diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,260314 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 10.0, + "eval_steps": 500, + "global_step": 14460, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0006915629322268327, + "grad_norm": 3.9518067836761475, + "learning_rate": 3.4578146611341636e-08, + "log_odds_chosen": 0.4893013536930084, + "log_odds_ratio": -0.5827709436416626, + "logits/chosen": -1.040260672569275, + "logits/rejected": -1.050236701965332, + "logps/chosen": -2.8453192710876465, + "logps/rejected": -3.280632495880127, + "loss": 9.734, + "nll_loss": 2.3752317428588867, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.28453195095062256, + "rewards/margins": 0.043531302362680435, + "rewards/rejected": -0.3280632495880127, + "step": 1 + }, + { + "epoch": 0.0013831258644536654, + "grad_norm": 2.5233092308044434, + "learning_rate": 6.915629322268327e-08, + "log_odds_chosen": 0.0836641862988472, + "log_odds_ratio": -0.6804438233375549, + "logits/chosen": -1.2517393827438354, + "logits/rejected": -1.226942539215088, + "logps/chosen": -3.62654972076416, + "logps/rejected": -3.6911375522613525, + "loss": 7.2619, + "nll_loss": 1.747441291809082, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.36265498399734497, + "rewards/margins": 0.0064587704837322235, + "rewards/rejected": -0.3691137433052063, + "step": 2 + }, + { + "epoch": 0.002074688796680498, + "grad_norm": 2.5664076805114746, + "learning_rate": 1.037344398340249e-07, + "log_odds_chosen": -0.4574589431285858, + "log_odds_ratio": -0.9838818311691284, + "logits/chosen": -1.0710841417312622, + "logits/rejected": -1.0666425228118896, + "logps/chosen": -2.612703800201416, + "logps/rejected": -2.2216196060180664, + "loss": 7.7538, + "nll_loss": 1.840064525604248, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.2612703740596771, + "rewards/margins": -0.039108406752347946, + "rewards/rejected": -0.22216194868087769, + "step": 3 + }, + { + "epoch": 0.0027662517289073307, + "grad_norm": 3.470735788345337, + "learning_rate": 1.3831258644536654e-07, + "log_odds_chosen": -0.5373206734657288, + "log_odds_ratio": -1.1326806545257568, + "logits/chosen": -1.2449288368225098, + "logits/rejected": -1.2681998014450073, + "logps/chosen": -3.79819393157959, + "logps/rejected": -3.27523136138916, + "loss": 9.8075, + "nll_loss": 2.3386147022247314, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.3798193633556366, + "rewards/margins": -0.052296221256256104, + "rewards/rejected": -0.3275231420993805, + "step": 4 + }, + { + "epoch": 0.003457814661134163, + "grad_norm": 2.069977283477783, + "learning_rate": 1.7289073305670816e-07, + "log_odds_chosen": 0.06638160347938538, + "log_odds_ratio": -0.7242591977119446, + "logits/chosen": -0.8228747844696045, + "logits/rejected": -0.8221093416213989, + "logps/chosen": -2.0583748817443848, + "logps/rejected": -2.0935425758361816, + "loss": 7.1762, + "nll_loss": 1.7216312885284424, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.20583748817443848, + "rewards/margins": 0.003516765311360359, + "rewards/rejected": -0.2093542516231537, + "step": 5 + }, + { + "epoch": 0.004149377593360996, + "grad_norm": 3.3388400077819824, + "learning_rate": 2.074688796680498e-07, + "log_odds_chosen": -0.002388477325439453, + "log_odds_ratio": -0.961333692073822, + "logits/chosen": -1.0542824268341064, + "logits/rejected": -1.0340723991394043, + "logps/chosen": -3.8489990234375, + "logps/rejected": -3.853835344314575, + "loss": 10.4565, + "nll_loss": 2.5179810523986816, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.38489991426467896, + "rewards/margins": 0.0004836246371269226, + "rewards/rejected": -0.3853835463523865, + "step": 6 + }, + { + "epoch": 0.004840940525587829, + "grad_norm": 2.113387107849121, + "learning_rate": 2.420470262793914e-07, + "log_odds_chosen": -0.07964649051427841, + "log_odds_ratio": -0.7827624678611755, + "logits/chosen": -1.0113275051116943, + "logits/rejected": -1.021159052848816, + "logps/chosen": -3.271230459213257, + "logps/rejected": -3.1815080642700195, + "loss": 7.4722, + "nll_loss": 1.7897660732269287, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.32712307572364807, + "rewards/margins": -0.008972249925136566, + "rewards/rejected": -0.3181508183479309, + "step": 7 + }, + { + "epoch": 0.005532503457814661, + "grad_norm": 2.046140670776367, + "learning_rate": 2.766251728907331e-07, + "log_odds_chosen": 0.09409669041633606, + "log_odds_ratio": -0.7343361377716064, + "logits/chosen": -1.4247064590454102, + "logits/rejected": -1.4529236555099487, + "logps/chosen": -2.1046290397644043, + "logps/rejected": -2.0878255367279053, + "loss": 8.2393, + "nll_loss": 1.9863979816436768, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.21046291291713715, + "rewards/margins": -0.0016803601756691933, + "rewards/rejected": -0.20878255367279053, + "step": 8 + }, + { + "epoch": 0.006224066390041493, + "grad_norm": 4.004306793212891, + "learning_rate": 3.112033195020747e-07, + "log_odds_chosen": -0.39638814330101013, + "log_odds_ratio": -1.0122816562652588, + "logits/chosen": -1.4044495820999146, + "logits/rejected": -1.4218255281448364, + "logps/chosen": -4.350630760192871, + "logps/rejected": -3.954375982284546, + "loss": 10.5476, + "nll_loss": 2.5356690883636475, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.43506306409835815, + "rewards/margins": -0.03962545841932297, + "rewards/rejected": -0.395437628030777, + "step": 9 + }, + { + "epoch": 0.006915629322268326, + "grad_norm": 4.281415939331055, + "learning_rate": 3.457814661134163e-07, + "log_odds_chosen": 0.08228078484535217, + "log_odds_ratio": -0.9024434089660645, + "logits/chosen": -0.951551079750061, + "logits/rejected": -0.9782785177230835, + "logps/chosen": -3.5066018104553223, + "logps/rejected": -3.498887777328491, + "loss": 9.1385, + "nll_loss": 2.1943864822387695, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.3506602346897125, + "rewards/margins": -0.0007714331150054932, + "rewards/rejected": -0.34988880157470703, + "step": 10 + }, + { + "epoch": 0.007607192254495159, + "grad_norm": 3.4988086223602295, + "learning_rate": 3.8035961272475794e-07, + "log_odds_chosen": -0.7721527218818665, + "log_odds_ratio": -1.4881058931350708, + "logits/chosen": -1.3219575881958008, + "logits/rejected": -1.3483083248138428, + "logps/chosen": -3.6868557929992676, + "logps/rejected": -2.9282896518707275, + "loss": 9.4678, + "nll_loss": 2.218129873275757, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.3686855435371399, + "rewards/margins": -0.07585658878087997, + "rewards/rejected": -0.2928289771080017, + "step": 11 + }, + { + "epoch": 0.008298755186721992, + "grad_norm": 3.7678544521331787, + "learning_rate": 4.149377593360996e-07, + "log_odds_chosen": -0.027068674564361572, + "log_odds_ratio": -0.837154746055603, + "logits/chosen": -1.2728548049926758, + "logits/rejected": -1.2553644180297852, + "logps/chosen": -3.4807701110839844, + "logps/rejected": -3.46420955657959, + "loss": 9.862, + "nll_loss": 2.381793260574341, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.34807702898979187, + "rewards/margins": -0.00165606290102005, + "rewards/rejected": -0.34642094373703003, + "step": 12 + }, + { + "epoch": 0.008990318118948824, + "grad_norm": 4.905233860015869, + "learning_rate": 4.495159059474412e-07, + "log_odds_chosen": -0.32843559980392456, + "log_odds_ratio": -1.060568928718567, + "logits/chosen": -1.2958381175994873, + "logits/rejected": -1.3102128505706787, + "logps/chosen": -3.4880740642547607, + "logps/rejected": -3.1222989559173584, + "loss": 9.009, + "nll_loss": 2.146188259124756, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.3488073945045471, + "rewards/margins": -0.0365775004029274, + "rewards/rejected": -0.3122299015522003, + "step": 13 + }, + { + "epoch": 0.009681881051175657, + "grad_norm": 2.7104506492614746, + "learning_rate": 4.840940525587828e-07, + "log_odds_chosen": -0.008607611060142517, + "log_odds_ratio": -0.7274478673934937, + "logits/chosen": -1.0369820594787598, + "logits/rejected": -1.02875816822052, + "logps/chosen": -2.9381771087646484, + "logps/rejected": -2.95135498046875, + "loss": 8.9553, + "nll_loss": 2.166069507598877, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2938177287578583, + "rewards/margins": 0.001317792572081089, + "rewards/rejected": -0.2951355278491974, + "step": 14 + }, + { + "epoch": 0.01037344398340249, + "grad_norm": 2.7053277492523193, + "learning_rate": 5.186721991701245e-07, + "log_odds_chosen": -0.06366106122732162, + "log_odds_ratio": -0.7992855310440063, + "logits/chosen": -1.388371229171753, + "logits/rejected": -1.3940556049346924, + "logps/chosen": -2.585225820541382, + "logps/rejected": -2.4857850074768066, + "loss": 7.9704, + "nll_loss": 1.9126801490783691, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2585225999355316, + "rewards/margins": -0.009944092482328415, + "rewards/rejected": -0.2485785037279129, + "step": 15 + }, + { + "epoch": 0.011065006915629323, + "grad_norm": 1.4869391918182373, + "learning_rate": 5.532503457814662e-07, + "log_odds_chosen": 0.05625841021537781, + "log_odds_ratio": -0.6945948600769043, + "logits/chosen": -1.3012064695358276, + "logits/rejected": -1.3060497045516968, + "logps/chosen": -2.5380234718322754, + "logps/rejected": -2.5695972442626953, + "loss": 6.1568, + "nll_loss": 1.46973717212677, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2538023591041565, + "rewards/margins": 0.003157366067171097, + "rewards/rejected": -0.2569597363471985, + "step": 16 + }, + { + "epoch": 0.011756569847856155, + "grad_norm": 4.137122631072998, + "learning_rate": 5.878284923928077e-07, + "log_odds_chosen": -0.10792502760887146, + "log_odds_ratio": -0.7970627546310425, + "logits/chosen": -1.3739025592803955, + "logits/rejected": -1.3377840518951416, + "logps/chosen": -3.974924325942993, + "logps/rejected": -3.8697359561920166, + "loss": 10.8296, + "nll_loss": 2.627690315246582, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.3974924385547638, + "rewards/margins": -0.010518837720155716, + "rewards/rejected": -0.38697361946105957, + "step": 17 + }, + { + "epoch": 0.012448132780082987, + "grad_norm": 2.9468846321105957, + "learning_rate": 6.224066390041494e-07, + "log_odds_chosen": 0.20884117484092712, + "log_odds_ratio": -0.723279595375061, + "logits/chosen": -1.262000560760498, + "logits/rejected": -1.3064842224121094, + "logps/chosen": -3.247279644012451, + "logps/rejected": -3.4324727058410645, + "loss": 8.8942, + "nll_loss": 2.151226758956909, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.32472798228263855, + "rewards/margins": 0.018519282341003418, + "rewards/rejected": -0.3432472348213196, + "step": 18 + }, + { + "epoch": 0.01313969571230982, + "grad_norm": 3.104160785675049, + "learning_rate": 6.569847856154911e-07, + "log_odds_chosen": -0.3776034414768219, + "log_odds_ratio": -1.2113221883773804, + "logits/chosen": -1.3295966386795044, + "logits/rejected": -1.3142446279525757, + "logps/chosen": -3.445866584777832, + "logps/rejected": -3.0813324451446533, + "loss": 8.868, + "nll_loss": 2.0958707332611084, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.34458667039871216, + "rewards/margins": -0.03645342215895653, + "rewards/rejected": -0.30813324451446533, + "step": 19 + }, + { + "epoch": 0.013831258644536652, + "grad_norm": 2.2052125930786133, + "learning_rate": 6.915629322268326e-07, + "log_odds_chosen": 0.24842804670333862, + "log_odds_ratio": -0.7053789496421814, + "logits/chosen": -1.1727490425109863, + "logits/rejected": -1.2424461841583252, + "logps/chosen": -2.45582914352417, + "logps/rejected": -2.6469976902008057, + "loss": 8.6392, + "nll_loss": 2.0892579555511475, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.2455829232931137, + "rewards/margins": 0.019116871058940887, + "rewards/rejected": -0.264699786901474, + "step": 20 + }, + { + "epoch": 0.014522821576763486, + "grad_norm": 2.8067734241485596, + "learning_rate": 7.261410788381743e-07, + "log_odds_chosen": 0.139600932598114, + "log_odds_ratio": -0.6512514352798462, + "logits/chosen": -1.1420315504074097, + "logits/rejected": -1.1386845111846924, + "logps/chosen": -3.4955501556396484, + "logps/rejected": -3.6302804946899414, + "loss": 9.1101, + "nll_loss": 2.2123937606811523, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.3495550751686096, + "rewards/margins": 0.013473005034029484, + "rewards/rejected": -0.36302804946899414, + "step": 21 + }, + { + "epoch": 0.015214384508990318, + "grad_norm": 3.0798892974853516, + "learning_rate": 7.607192254495159e-07, + "log_odds_chosen": 0.3492021858692169, + "log_odds_ratio": -0.6532081961631775, + "logits/chosen": -1.3779373168945312, + "logits/rejected": -1.3605176210403442, + "logps/chosen": -3.1275863647460938, + "logps/rejected": -3.465847969055176, + "loss": 8.2735, + "nll_loss": 2.0030555725097656, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.3127586543560028, + "rewards/margins": 0.03382612764835358, + "rewards/rejected": -0.3465847671031952, + "step": 22 + }, + { + "epoch": 0.01590594744121715, + "grad_norm": 3.224397659301758, + "learning_rate": 7.952973720608575e-07, + "log_odds_chosen": 0.9248179197311401, + "log_odds_ratio": -0.5056746006011963, + "logits/chosen": -0.9516175389289856, + "logits/rejected": -1.020263671875, + "logps/chosen": -2.944352149963379, + "logps/rejected": -3.7298731803894043, + "loss": 9.1044, + "nll_loss": 2.2255210876464844, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.2944352328777313, + "rewards/margins": 0.07855206727981567, + "rewards/rejected": -0.3729872703552246, + "step": 23 + }, + { + "epoch": 0.016597510373443983, + "grad_norm": 2.069481611251831, + "learning_rate": 8.298755186721992e-07, + "log_odds_chosen": -0.09091458469629288, + "log_odds_ratio": -0.7582843899726868, + "logits/chosen": -0.9845443964004517, + "logits/rejected": -1.0020672082901, + "logps/chosen": -2.7381744384765625, + "logps/rejected": -2.6412293910980225, + "loss": 7.4084, + "nll_loss": 1.7762739658355713, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.27381742000579834, + "rewards/margins": -0.009694469161331654, + "rewards/rejected": -0.26412296295166016, + "step": 24 + }, + { + "epoch": 0.017289073305670817, + "grad_norm": 2.335552930831909, + "learning_rate": 8.644536652835409e-07, + "log_odds_chosen": -0.03683727979660034, + "log_odds_ratio": -0.7664669752120972, + "logits/chosen": -0.9482793807983398, + "logits/rejected": -0.9446390867233276, + "logps/chosen": -2.417475461959839, + "logps/rejected": -2.370396614074707, + "loss": 7.387, + "nll_loss": 1.7701003551483154, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.24174755811691284, + "rewards/margins": -0.00470789335668087, + "rewards/rejected": -0.23703965544700623, + "step": 25 + }, + { + "epoch": 0.017980636237897647, + "grad_norm": 4.149723529815674, + "learning_rate": 8.990318118948824e-07, + "log_odds_chosen": 0.2577996253967285, + "log_odds_ratio": -0.634295642375946, + "logits/chosen": -1.2715983390808105, + "logits/rejected": -1.2856321334838867, + "logps/chosen": -3.455782413482666, + "logps/rejected": -3.7151052951812744, + "loss": 9.5517, + "nll_loss": 2.3244986534118652, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.34557828307151794, + "rewards/margins": 0.025932257995009422, + "rewards/rejected": -0.3715105354785919, + "step": 26 + }, + { + "epoch": 0.01867219917012448, + "grad_norm": 3.2110884189605713, + "learning_rate": 9.336099585062241e-07, + "log_odds_chosen": -0.0950632095336914, + "log_odds_ratio": -0.8977338075637817, + "logits/chosen": -0.823533296585083, + "logits/rejected": -0.8555487990379333, + "logps/chosen": -3.454474449157715, + "logps/rejected": -3.363515853881836, + "loss": 8.3972, + "nll_loss": 2.0095200538635254, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.34544745087623596, + "rewards/margins": -0.009095855057239532, + "rewards/rejected": -0.33635157346725464, + "step": 27 + }, + { + "epoch": 0.019363762102351315, + "grad_norm": 2.4229812622070312, + "learning_rate": 9.681881051175657e-07, + "log_odds_chosen": 0.26231566071510315, + "log_odds_ratio": -0.5937422513961792, + "logits/chosen": -1.199454426765442, + "logits/rejected": -1.2336453199386597, + "logps/chosen": -2.7520751953125, + "logps/rejected": -3.018535614013672, + "loss": 6.768, + "nll_loss": 1.6326138973236084, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.27520751953125, + "rewards/margins": 0.026646055281162262, + "rewards/rejected": -0.30185356736183167, + "step": 28 + }, + { + "epoch": 0.020055325034578148, + "grad_norm": 2.8843135833740234, + "learning_rate": 1.0027662517289075e-06, + "log_odds_chosen": 0.5056191682815552, + "log_odds_ratio": -0.5950756669044495, + "logits/chosen": -1.338259220123291, + "logits/rejected": -1.3438336849212646, + "logps/chosen": -2.64237642288208, + "logps/rejected": -3.1244683265686035, + "loss": 8.0822, + "nll_loss": 1.9610333442687988, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.264237642288208, + "rewards/margins": 0.048209186643362045, + "rewards/rejected": -0.31244683265686035, + "step": 29 + }, + { + "epoch": 0.02074688796680498, + "grad_norm": 4.502927303314209, + "learning_rate": 1.037344398340249e-06, + "log_odds_chosen": -0.34477755427360535, + "log_odds_ratio": -1.1222412586212158, + "logits/chosen": -0.8900103569030762, + "logits/rejected": -0.8709148168563843, + "logps/chosen": -3.582998275756836, + "logps/rejected": -3.256059169769287, + "loss": 7.2353, + "nll_loss": 1.696608543395996, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.3582998216152191, + "rewards/margins": -0.032693929970264435, + "rewards/rejected": -0.3256058990955353, + "step": 30 + }, + { + "epoch": 0.021438450899031812, + "grad_norm": 2.193732976913452, + "learning_rate": 1.0719225449515906e-06, + "log_odds_chosen": -0.05980326980352402, + "log_odds_ratio": -0.740838348865509, + "logits/chosen": -1.2712316513061523, + "logits/rejected": -1.2823163270950317, + "logps/chosen": -2.9276375770568848, + "logps/rejected": -2.8619937896728516, + "loss": 7.2937, + "nll_loss": 1.749345302581787, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.29276376962661743, + "rewards/margins": -0.006564359180629253, + "rewards/rejected": -0.2861993908882141, + "step": 31 + }, + { + "epoch": 0.022130013831258646, + "grad_norm": 3.5711543560028076, + "learning_rate": 1.1065006915629324e-06, + "log_odds_chosen": 0.08109360188245773, + "log_odds_ratio": -0.9470595121383667, + "logits/chosen": -1.3242299556732178, + "logits/rejected": -1.3326642513275146, + "logps/chosen": -3.59979510307312, + "logps/rejected": -3.677622079849243, + "loss": 9.7337, + "nll_loss": 2.338721752166748, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.359979510307312, + "rewards/margins": 0.007782714441418648, + "rewards/rejected": -0.3677622079849243, + "step": 32 + }, + { + "epoch": 0.022821576763485476, + "grad_norm": 2.2873423099517822, + "learning_rate": 1.141078838174274e-06, + "log_odds_chosen": 0.6533421874046326, + "log_odds_ratio": -0.4733458459377289, + "logits/chosen": -1.2940324544906616, + "logits/rejected": -1.2814232110977173, + "logps/chosen": -3.2225732803344727, + "logps/rejected": -3.8556478023529053, + "loss": 7.6812, + "nll_loss": 1.8729662895202637, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.3222573399543762, + "rewards/margins": 0.06330744922161102, + "rewards/rejected": -0.38556480407714844, + "step": 33 + }, + { + "epoch": 0.02351313969571231, + "grad_norm": 3.838637590408325, + "learning_rate": 1.1756569847856155e-06, + "log_odds_chosen": -0.2496761828660965, + "log_odds_ratio": -1.0469396114349365, + "logits/chosen": -1.2598438262939453, + "logits/rejected": -1.233662486076355, + "logps/chosen": -3.9419360160827637, + "logps/rejected": -3.6916069984436035, + "loss": 8.944, + "nll_loss": 2.1313016414642334, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.3941935896873474, + "rewards/margins": -0.02503291144967079, + "rewards/rejected": -0.3691607117652893, + "step": 34 + }, + { + "epoch": 0.024204702627939143, + "grad_norm": 2.1356067657470703, + "learning_rate": 1.2102351313969573e-06, + "log_odds_chosen": 0.15863251686096191, + "log_odds_ratio": -0.6830345392227173, + "logits/chosen": -0.9747909307479858, + "logits/rejected": -0.9530578851699829, + "logps/chosen": -3.0132992267608643, + "logps/rejected": -3.1510977745056152, + "loss": 6.7208, + "nll_loss": 1.6118903160095215, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.30132991075515747, + "rewards/margins": 0.013779876753687859, + "rewards/rejected": -0.3151097893714905, + "step": 35 + }, + { + "epoch": 0.024896265560165973, + "grad_norm": 3.4909298419952393, + "learning_rate": 1.2448132780082988e-06, + "log_odds_chosen": -0.2739093005657196, + "log_odds_ratio": -1.0388438701629639, + "logits/chosen": -1.1628073453903198, + "logits/rejected": -1.1294801235198975, + "logps/chosen": -3.61557936668396, + "logps/rejected": -3.371711492538452, + "loss": 9.8512, + "nll_loss": 2.3589138984680176, + "rewards/accuracies": 0.125, + "rewards/chosen": -0.3615579605102539, + "rewards/margins": -0.024386780336499214, + "rewards/rejected": -0.33717119693756104, + "step": 36 + }, + { + "epoch": 0.025587828492392807, + "grad_norm": 3.2615578174591064, + "learning_rate": 1.2793914246196404e-06, + "log_odds_chosen": 0.03353884071111679, + "log_odds_ratio": -0.7446186542510986, + "logits/chosen": -1.1005709171295166, + "logits/rejected": -1.0909405946731567, + "logps/chosen": -2.8883144855499268, + "logps/rejected": -2.917815923690796, + "loss": 9.5219, + "nll_loss": 2.3060121536254883, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2888314723968506, + "rewards/margins": 0.0029501384124159813, + "rewards/rejected": -0.29178160429000854, + "step": 37 + }, + { + "epoch": 0.02627939142461964, + "grad_norm": 2.5613675117492676, + "learning_rate": 1.3139695712309822e-06, + "log_odds_chosen": -0.23992179334163666, + "log_odds_ratio": -0.9381309747695923, + "logits/chosen": -1.258825421333313, + "logits/rejected": -1.2358593940734863, + "logps/chosen": -3.4032936096191406, + "logps/rejected": -3.213279962539673, + "loss": 7.4326, + "nll_loss": 1.7643382549285889, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.3403293788433075, + "rewards/margins": -0.019001349806785583, + "rewards/rejected": -0.3213280439376831, + "step": 38 + }, + { + "epoch": 0.026970954356846474, + "grad_norm": 2.939889669418335, + "learning_rate": 1.3485477178423237e-06, + "log_odds_chosen": -1.1127384901046753, + "log_odds_ratio": -1.6729886531829834, + "logits/chosen": -1.371311068534851, + "logits/rejected": -1.34170401096344, + "logps/chosen": -3.899885416030884, + "logps/rejected": -2.8833463191986084, + "loss": 8.3659, + "nll_loss": 1.9241644144058228, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.38998857140541077, + "rewards/margins": -0.10165394842624664, + "rewards/rejected": -0.2883346378803253, + "step": 39 + }, + { + "epoch": 0.027662517289073305, + "grad_norm": 2.1610751152038574, + "learning_rate": 1.3831258644536653e-06, + "log_odds_chosen": 0.07632875442504883, + "log_odds_ratio": -0.7144181728363037, + "logits/chosen": -1.1104373931884766, + "logits/rejected": -1.1167352199554443, + "logps/chosen": -2.4445266723632812, + "logps/rejected": -2.515613555908203, + "loss": 7.8256, + "nll_loss": 1.884954571723938, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.24445268511772156, + "rewards/margins": 0.0071086823008954525, + "rewards/rejected": -0.25156137347221375, + "step": 40 + }, + { + "epoch": 0.028354080221300138, + "grad_norm": 3.2550132274627686, + "learning_rate": 1.417704011065007e-06, + "log_odds_chosen": -0.283014178276062, + "log_odds_ratio": -0.8731192350387573, + "logits/chosen": -1.4890809059143066, + "logits/rejected": -1.45075523853302, + "logps/chosen": -3.078587770462036, + "logps/rejected": -2.8364899158477783, + "loss": 8.9882, + "nll_loss": 2.159730911254883, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.30785876512527466, + "rewards/margins": -0.02420978806912899, + "rewards/rejected": -0.2836489975452423, + "step": 41 + }, + { + "epoch": 0.029045643153526972, + "grad_norm": 2.4653944969177246, + "learning_rate": 1.4522821576763486e-06, + "log_odds_chosen": 0.2099519520998001, + "log_odds_ratio": -0.6373003125190735, + "logits/chosen": -1.3268687725067139, + "logits/rejected": -1.3488104343414307, + "logps/chosen": -2.963639974594116, + "logps/rejected": -3.1906325817108154, + "loss": 8.3625, + "nll_loss": 2.0268898010253906, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.2963640093803406, + "rewards/margins": 0.022699259221553802, + "rewards/rejected": -0.319063276052475, + "step": 42 + }, + { + "epoch": 0.029737206085753802, + "grad_norm": 3.8058722019195557, + "learning_rate": 1.4868603042876902e-06, + "log_odds_chosen": -0.4854162335395813, + "log_odds_ratio": -1.0482676029205322, + "logits/chosen": -1.4056271314620972, + "logits/rejected": -1.3884409666061401, + "logps/chosen": -3.5880017280578613, + "logps/rejected": -3.1194963455200195, + "loss": 7.3837, + "nll_loss": 1.7411028146743774, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.35880017280578613, + "rewards/margins": -0.04685051366686821, + "rewards/rejected": -0.3119496703147888, + "step": 43 + }, + { + "epoch": 0.030428769017980636, + "grad_norm": 3.0560855865478516, + "learning_rate": 1.5214384508990318e-06, + "log_odds_chosen": -0.056353405117988586, + "log_odds_ratio": -0.8186872601509094, + "logits/chosen": -1.2890833616256714, + "logits/rejected": -1.2734991312026978, + "logps/chosen": -4.082864761352539, + "logps/rejected": -4.05389928817749, + "loss": 9.6216, + "nll_loss": 2.323543071746826, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.408286452293396, + "rewards/margins": -0.0028965286910533905, + "rewards/rejected": -0.4053899347782135, + "step": 44 + }, + { + "epoch": 0.03112033195020747, + "grad_norm": 3.232548475265503, + "learning_rate": 1.5560165975103735e-06, + "log_odds_chosen": 0.009376328438520432, + "log_odds_ratio": -0.7260125875473022, + "logits/chosen": -1.0361629724502563, + "logits/rejected": -1.0547370910644531, + "logps/chosen": -2.6392018795013428, + "logps/rejected": -2.6416492462158203, + "loss": 9.7193, + "nll_loss": 2.3572349548339844, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2639201879501343, + "rewards/margins": 0.00024472130462527275, + "rewards/rejected": -0.26416492462158203, + "step": 45 + }, + { + "epoch": 0.0318118948824343, + "grad_norm": 1.8216197490692139, + "learning_rate": 1.590594744121715e-06, + "log_odds_chosen": 0.4674333333969116, + "log_odds_ratio": -0.5335142612457275, + "logits/chosen": -1.0167092084884644, + "logits/rejected": -1.0238672494888306, + "logps/chosen": -2.955474376678467, + "logps/rejected": -3.4067180156707764, + "loss": 6.3723, + "nll_loss": 1.5397170782089233, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.2955474257469177, + "rewards/margins": 0.0451243557035923, + "rewards/rejected": -0.3406718075275421, + "step": 46 + }, + { + "epoch": 0.03250345781466114, + "grad_norm": 3.082559823989868, + "learning_rate": 1.6251728907330569e-06, + "log_odds_chosen": -0.2621142566204071, + "log_odds_ratio": -0.874950110912323, + "logits/chosen": -1.1282384395599365, + "logits/rejected": -1.116199254989624, + "logps/chosen": -3.469964027404785, + "logps/rejected": -3.223210334777832, + "loss": 8.8783, + "nll_loss": 2.132092237472534, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.3469964265823364, + "rewards/margins": -0.02467537298798561, + "rewards/rejected": -0.3223210573196411, + "step": 47 + }, + { + "epoch": 0.03319502074688797, + "grad_norm": 2.0938286781311035, + "learning_rate": 1.6597510373443984e-06, + "log_odds_chosen": 0.11893421411514282, + "log_odds_ratio": -0.8492321968078613, + "logits/chosen": -0.9938136339187622, + "logits/rejected": -0.9602646827697754, + "logps/chosen": -2.0542774200439453, + "logps/rejected": -2.28214955329895, + "loss": 6.8048, + "nll_loss": 1.6162786483764648, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.20542772114276886, + "rewards/margins": 0.022787224501371384, + "rewards/rejected": -0.22821494936943054, + "step": 48 + }, + { + "epoch": 0.0338865836791148, + "grad_norm": 2.0529825687408447, + "learning_rate": 1.69432918395574e-06, + "log_odds_chosen": 0.47180014848709106, + "log_odds_ratio": -0.5816795825958252, + "logits/chosen": -1.263287901878357, + "logits/rejected": -1.2728713750839233, + "logps/chosen": -3.0481815338134766, + "logps/rejected": -3.511841058731079, + "loss": 6.6536, + "nll_loss": 1.6052374839782715, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.30481815338134766, + "rewards/margins": 0.04636598750948906, + "rewards/rejected": -0.3511841297149658, + "step": 49 + }, + { + "epoch": 0.034578146611341634, + "grad_norm": 2.3218564987182617, + "learning_rate": 1.7289073305670818e-06, + "log_odds_chosen": -0.06447839736938477, + "log_odds_ratio": -0.772865355014801, + "logits/chosen": -1.3173282146453857, + "logits/rejected": -1.3115019798278809, + "logps/chosen": -2.42673659324646, + "logps/rejected": -2.3573877811431885, + "loss": 6.855, + "nll_loss": 1.6364696025848389, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.24267366528511047, + "rewards/margins": -0.006934887729585171, + "rewards/rejected": -0.23573878407478333, + "step": 50 + }, + { + "epoch": 0.035269709543568464, + "grad_norm": 4.621151924133301, + "learning_rate": 1.7634854771784233e-06, + "log_odds_chosen": -0.05129563808441162, + "log_odds_ratio": -0.8750765323638916, + "logits/chosen": -1.0982708930969238, + "logits/rejected": -1.1156631708145142, + "logps/chosen": -2.895352363586426, + "logps/rejected": -2.7839388847351074, + "loss": 10.8387, + "nll_loss": 2.6221795082092285, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2895352244377136, + "rewards/margins": -0.011141344904899597, + "rewards/rejected": -0.2783938944339752, + "step": 51 + }, + { + "epoch": 0.035961272475795295, + "grad_norm": 1.4168874025344849, + "learning_rate": 1.798063623789765e-06, + "log_odds_chosen": -0.29925307631492615, + "log_odds_ratio": -0.8848166465759277, + "logits/chosen": -1.0895323753356934, + "logits/rejected": -1.0248337984085083, + "logps/chosen": -2.1519713401794434, + "logps/rejected": -1.9345920085906982, + "loss": 5.7303, + "nll_loss": 1.3440864086151123, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.2151971459388733, + "rewards/margins": -0.021737944334745407, + "rewards/rejected": -0.19345919787883759, + "step": 52 + }, + { + "epoch": 0.03665283540802213, + "grad_norm": 2.1725051403045654, + "learning_rate": 1.8326417704011067e-06, + "log_odds_chosen": 0.03270392119884491, + "log_odds_ratio": -0.7351056337356567, + "logits/chosen": -1.3653390407562256, + "logits/rejected": -1.3562443256378174, + "logps/chosen": -3.246081829071045, + "logps/rejected": -3.289919137954712, + "loss": 6.9984, + "nll_loss": 1.676098346710205, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.32460817694664, + "rewards/margins": 0.004383735358715057, + "rewards/rejected": -0.32899191975593567, + "step": 53 + }, + { + "epoch": 0.03734439834024896, + "grad_norm": 2.737290859222412, + "learning_rate": 1.8672199170124482e-06, + "log_odds_chosen": 0.3347844183444977, + "log_odds_ratio": -0.6068406105041504, + "logits/chosen": -1.3338276147842407, + "logits/rejected": -1.3446011543273926, + "logps/chosen": -3.3161258697509766, + "logps/rejected": -3.630776882171631, + "loss": 8.3089, + "nll_loss": 2.016543388366699, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.33161258697509766, + "rewards/margins": 0.03146512061357498, + "rewards/rejected": -0.36307770013809204, + "step": 54 + }, + { + "epoch": 0.03803596127247579, + "grad_norm": 2.663578510284424, + "learning_rate": 1.9017980636237896e-06, + "log_odds_chosen": 0.4287371337413788, + "log_odds_ratio": -0.5677598714828491, + "logits/chosen": -1.2210997343063354, + "logits/rejected": -1.272507667541504, + "logps/chosen": -2.382035732269287, + "logps/rejected": -2.683682918548584, + "loss": 7.7922, + "nll_loss": 1.8912622928619385, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.23820357024669647, + "rewards/margins": 0.03016471117734909, + "rewards/rejected": -0.26836830377578735, + "step": 55 + }, + { + "epoch": 0.03872752420470263, + "grad_norm": 1.5175490379333496, + "learning_rate": 1.9363762102351314e-06, + "log_odds_chosen": -0.08139551430940628, + "log_odds_ratio": -0.7952121496200562, + "logits/chosen": -0.8203100562095642, + "logits/rejected": -0.8402999043464661, + "logps/chosen": -2.7972769737243652, + "logps/rejected": -2.7121500968933105, + "loss": 6.0176, + "nll_loss": 1.4248862266540527, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.2797277271747589, + "rewards/margins": -0.008512692525982857, + "rewards/rejected": -0.27121502161026, + "step": 56 + }, + { + "epoch": 0.03941908713692946, + "grad_norm": 2.924398899078369, + "learning_rate": 1.970954356846473e-06, + "log_odds_chosen": -0.7466222643852234, + "log_odds_ratio": -1.2244271039962769, + "logits/chosen": -1.4082560539245605, + "logits/rejected": -1.334429383277893, + "logps/chosen": -3.302445888519287, + "logps/rejected": -2.6759796142578125, + "loss": 8.1842, + "nll_loss": 1.9236195087432861, + "rewards/accuracies": 0.125, + "rewards/chosen": -0.3302445709705353, + "rewards/margins": -0.06264658272266388, + "rewards/rejected": -0.2675980031490326, + "step": 57 + }, + { + "epoch": 0.040110650069156296, + "grad_norm": 2.6948745250701904, + "learning_rate": 2.005532503457815e-06, + "log_odds_chosen": -0.20200856029987335, + "log_odds_ratio": -0.8478542566299438, + "logits/chosen": -0.728827714920044, + "logits/rejected": -0.7348635196685791, + "logps/chosen": -2.619049072265625, + "logps/rejected": -2.4454312324523926, + "loss": 7.6473, + "nll_loss": 1.8270339965820312, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.26190489530563354, + "rewards/margins": -0.01736176386475563, + "rewards/rejected": -0.2445431351661682, + "step": 58 + }, + { + "epoch": 0.04080221300138313, + "grad_norm": 2.7902441024780273, + "learning_rate": 2.0401106500691565e-06, + "log_odds_chosen": -0.33980101346969604, + "log_odds_ratio": -1.0024666786193848, + "logits/chosen": -0.9672735333442688, + "logits/rejected": -0.9773224592208862, + "logps/chosen": -2.3472862243652344, + "logps/rejected": -2.0535407066345215, + "loss": 8.2159, + "nll_loss": 1.9537204504013062, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.2347286343574524, + "rewards/margins": -0.02937454544007778, + "rewards/rejected": -0.20535407960414886, + "step": 59 + }, + { + "epoch": 0.04149377593360996, + "grad_norm": 2.9321954250335693, + "learning_rate": 2.074688796680498e-06, + "log_odds_chosen": 0.14047515392303467, + "log_odds_ratio": -0.7460314035415649, + "logits/chosen": -1.103363037109375, + "logits/rejected": -1.0946812629699707, + "logps/chosen": -3.310117244720459, + "logps/rejected": -3.5168280601501465, + "loss": 6.8914, + "nll_loss": 1.6482346057891846, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.33101174235343933, + "rewards/margins": 0.020671118050813675, + "rewards/rejected": -0.3516828417778015, + "step": 60 + }, + { + "epoch": 0.042185338865836794, + "grad_norm": 2.5498883724212646, + "learning_rate": 2.1092669432918396e-06, + "log_odds_chosen": -0.5384204983711243, + "log_odds_ratio": -1.1760982275009155, + "logits/chosen": -0.9892525672912598, + "logits/rejected": -0.9684339761734009, + "logps/chosen": -2.563851833343506, + "logps/rejected": -2.093398332595825, + "loss": 7.0613, + "nll_loss": 1.6477240324020386, + "rewards/accuracies": 0.125, + "rewards/chosen": -0.2563851773738861, + "rewards/margins": -0.04704533517360687, + "rewards/rejected": -0.20933984220027924, + "step": 61 + }, + { + "epoch": 0.042876901798063624, + "grad_norm": 4.019651889801025, + "learning_rate": 2.143845089903181e-06, + "log_odds_chosen": 0.28454190492630005, + "log_odds_ratio": -0.6172089576721191, + "logits/chosen": -0.7720848917961121, + "logits/rejected": -0.7707473039627075, + "logps/chosen": -2.709263801574707, + "logps/rejected": -3.0105907917022705, + "loss": 8.2458, + "nll_loss": 1.9997224807739258, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2709263563156128, + "rewards/margins": 0.030132703483104706, + "rewards/rejected": -0.3010590672492981, + "step": 62 + }, + { + "epoch": 0.043568464730290454, + "grad_norm": 3.3269991874694824, + "learning_rate": 2.1784232365145227e-06, + "log_odds_chosen": 0.11333783715963364, + "log_odds_ratio": -0.676737368106842, + "logits/chosen": -1.1614861488342285, + "logits/rejected": -1.1610134840011597, + "logps/chosen": -2.9012930393218994, + "logps/rejected": -3.0032124519348145, + "loss": 8.5807, + "nll_loss": 2.0775046348571777, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.29012930393218994, + "rewards/margins": 0.010191947221755981, + "rewards/rejected": -0.3003212511539459, + "step": 63 + }, + { + "epoch": 0.04426002766251729, + "grad_norm": 2.0815582275390625, + "learning_rate": 2.2130013831258647e-06, + "log_odds_chosen": 0.12025430798530579, + "log_odds_ratio": -0.7256815433502197, + "logits/chosen": -1.159436821937561, + "logits/rejected": -1.1309454441070557, + "logps/chosen": -3.0659329891204834, + "logps/rejected": -3.178705930709839, + "loss": 7.1109, + "nll_loss": 1.7051641941070557, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.30659329891204834, + "rewards/margins": 0.01127728819847107, + "rewards/rejected": -0.3178706169128418, + "step": 64 + }, + { + "epoch": 0.04495159059474412, + "grad_norm": 6.716753959655762, + "learning_rate": 2.2475795297372063e-06, + "log_odds_chosen": -0.30332645773887634, + "log_odds_ratio": -1.194240927696228, + "logits/chosen": -1.0181807279586792, + "logits/rejected": -1.0282114744186401, + "logps/chosen": -4.083698272705078, + "logps/rejected": -3.759035110473633, + "loss": 10.9353, + "nll_loss": 2.61440372467041, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.40836983919143677, + "rewards/margins": -0.032466333359479904, + "rewards/rejected": -0.37590354681015015, + "step": 65 + }, + { + "epoch": 0.04564315352697095, + "grad_norm": 4.329280853271484, + "learning_rate": 2.282157676348548e-06, + "log_odds_chosen": -0.043851837515830994, + "log_odds_ratio": -0.781743049621582, + "logits/chosen": -1.4313268661499023, + "logits/rejected": -1.453513741493225, + "logps/chosen": -3.223816156387329, + "logps/rejected": -3.164360761642456, + "loss": 9.5621, + "nll_loss": 2.312361717224121, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.3223816156387329, + "rewards/margins": -0.0059455279260873795, + "rewards/rejected": -0.3164360821247101, + "step": 66 + }, + { + "epoch": 0.04633471645919779, + "grad_norm": 1.5088504552841187, + "learning_rate": 2.3167358229598894e-06, + "log_odds_chosen": 0.08667584508657455, + "log_odds_ratio": -0.6951309442520142, + "logits/chosen": -1.1596637964248657, + "logits/rejected": -1.134047031402588, + "logps/chosen": -1.920853614807129, + "logps/rejected": -1.9628230333328247, + "loss": 5.1904, + "nll_loss": 1.2280784845352173, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1920853555202484, + "rewards/margins": 0.004196947440505028, + "rewards/rejected": -0.196282297372818, + "step": 67 + }, + { + "epoch": 0.04702627939142462, + "grad_norm": 3.1953086853027344, + "learning_rate": 2.351313969571231e-06, + "log_odds_chosen": -0.16054373979568481, + "log_odds_ratio": -0.8236711025238037, + "logits/chosen": -0.9192508459091187, + "logits/rejected": -0.9316079616546631, + "logps/chosen": -2.5424296855926514, + "logps/rejected": -2.3846731185913086, + "loss": 8.2374, + "nll_loss": 1.976994514465332, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.2542429566383362, + "rewards/margins": -0.015775656327605247, + "rewards/rejected": -0.23846730589866638, + "step": 68 + }, + { + "epoch": 0.04771784232365145, + "grad_norm": 1.7714017629623413, + "learning_rate": 2.3858921161825725e-06, + "log_odds_chosen": 0.5153631567955017, + "log_odds_ratio": -0.5464873313903809, + "logits/chosen": -1.1873700618743896, + "logits/rejected": -1.2427599430084229, + "logps/chosen": -1.9302818775177002, + "logps/rejected": -2.423107147216797, + "loss": 5.5537, + "nll_loss": 1.3337669372558594, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.19302821159362793, + "rewards/margins": 0.049282513558864594, + "rewards/rejected": -0.24231071770191193, + "step": 69 + }, + { + "epoch": 0.048409405255878286, + "grad_norm": 2.594485282897949, + "learning_rate": 2.4204702627939145e-06, + "log_odds_chosen": 0.2057522088289261, + "log_odds_ratio": -0.6587037444114685, + "logits/chosen": -1.0115017890930176, + "logits/rejected": -0.994685709476471, + "logps/chosen": -2.51837420463562, + "logps/rejected": -2.7061476707458496, + "loss": 7.4934, + "nll_loss": 1.8074700832366943, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.25183743238449097, + "rewards/margins": 0.01877736859023571, + "rewards/rejected": -0.2706148028373718, + "step": 70 + }, + { + "epoch": 0.04910096818810512, + "grad_norm": 5.582400798797607, + "learning_rate": 2.455048409405256e-06, + "log_odds_chosen": -0.19556845724582672, + "log_odds_ratio": -0.8797652721405029, + "logits/chosen": -1.2655856609344482, + "logits/rejected": -1.286374807357788, + "logps/chosen": -3.659990072250366, + "logps/rejected": -3.4588847160339355, + "loss": 9.3168, + "nll_loss": 2.2412219047546387, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.3659989833831787, + "rewards/margins": -0.020110521465539932, + "rewards/rejected": -0.3458884656429291, + "step": 71 + }, + { + "epoch": 0.04979253112033195, + "grad_norm": 5.756114959716797, + "learning_rate": 2.4896265560165977e-06, + "log_odds_chosen": 0.04178621619939804, + "log_odds_ratio": -0.6979033946990967, + "logits/chosen": -1.0562987327575684, + "logits/rejected": -1.1279411315917969, + "logps/chosen": -3.996288299560547, + "logps/rejected": -4.0451226234436035, + "loss": 11.881, + "nll_loss": 2.9004478454589844, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.3996288776397705, + "rewards/margins": 0.0048833731561899185, + "rewards/rejected": -0.4045122265815735, + "step": 72 + }, + { + "epoch": 0.050484094052558784, + "grad_norm": 3.1050422191619873, + "learning_rate": 2.5242047026279392e-06, + "log_odds_chosen": 0.377094030380249, + "log_odds_ratio": -0.6772429943084717, + "logits/chosen": -1.2963155508041382, + "logits/rejected": -1.33097505569458, + "logps/chosen": -2.747310161590576, + "logps/rejected": -3.087376356124878, + "loss": 9.158, + "nll_loss": 2.221768379211426, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.27473101019859314, + "rewards/margins": 0.03400661423802376, + "rewards/rejected": -0.3087376356124878, + "step": 73 + }, + { + "epoch": 0.051175656984785614, + "grad_norm": 2.4878885746002197, + "learning_rate": 2.5587828492392808e-06, + "log_odds_chosen": 0.13045404851436615, + "log_odds_ratio": -0.6519996523857117, + "logits/chosen": -1.1966079473495483, + "logits/rejected": -1.1773067712783813, + "logps/chosen": -2.2391517162323, + "logps/rejected": -2.336344003677368, + "loss": 7.4135, + "nll_loss": 1.7881792783737183, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.22391517460346222, + "rewards/margins": 0.009719207882881165, + "rewards/rejected": -0.23363438248634338, + "step": 74 + }, + { + "epoch": 0.05186721991701245, + "grad_norm": 4.009108066558838, + "learning_rate": 2.5933609958506228e-06, + "log_odds_chosen": -0.608934760093689, + "log_odds_ratio": -1.387421727180481, + "logits/chosen": -1.170054316520691, + "logits/rejected": -1.1777504682540894, + "logps/chosen": -3.128124237060547, + "logps/rejected": -2.479940891265869, + "loss": 10.3093, + "nll_loss": 2.438586711883545, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.312812477350235, + "rewards/margins": -0.0648183524608612, + "rewards/rejected": -0.2479940950870514, + "step": 75 + }, + { + "epoch": 0.05255878284923928, + "grad_norm": 3.367987871170044, + "learning_rate": 2.6279391424619643e-06, + "log_odds_chosen": -0.12288656830787659, + "log_odds_ratio": -0.7879306674003601, + "logits/chosen": -1.2347142696380615, + "logits/rejected": -1.2641103267669678, + "logps/chosen": -3.0773115158081055, + "logps/rejected": -2.97043776512146, + "loss": 7.4922, + "nll_loss": 1.794250726699829, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.30773118138313293, + "rewards/margins": -0.010687386617064476, + "rewards/rejected": -0.2970438003540039, + "step": 76 + }, + { + "epoch": 0.05325034578146611, + "grad_norm": 2.115560293197632, + "learning_rate": 2.662517289073306e-06, + "log_odds_chosen": 0.1649816632270813, + "log_odds_ratio": -0.658004879951477, + "logits/chosen": -1.1515793800354004, + "logits/rejected": -1.1574221849441528, + "logps/chosen": -2.2130730152130127, + "logps/rejected": -2.367478370666504, + "loss": 6.2837, + "nll_loss": 1.5051299333572388, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.22130730748176575, + "rewards/margins": 0.015440553426742554, + "rewards/rejected": -0.2367478609085083, + "step": 77 + }, + { + "epoch": 0.05394190871369295, + "grad_norm": 3.061014175415039, + "learning_rate": 2.6970954356846475e-06, + "log_odds_chosen": -0.8962035775184631, + "log_odds_ratio": -1.6122883558273315, + "logits/chosen": -1.5006728172302246, + "logits/rejected": -1.491066575050354, + "logps/chosen": -3.76662015914917, + "logps/rejected": -2.943807363510132, + "loss": 8.4838, + "nll_loss": 1.9597277641296387, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.376662015914917, + "rewards/margins": -0.08228126168251038, + "rewards/rejected": -0.29438072443008423, + "step": 78 + }, + { + "epoch": 0.05463347164591978, + "grad_norm": 1.8718522787094116, + "learning_rate": 2.731673582295989e-06, + "log_odds_chosen": -0.046965256333351135, + "log_odds_ratio": -0.7528050541877747, + "logits/chosen": -1.0281174182891846, + "logits/rejected": -1.100132942199707, + "logps/chosen": -2.0911521911621094, + "logps/rejected": -2.0604071617126465, + "loss": 6.9623, + "nll_loss": 1.6652878522872925, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.20911523699760437, + "rewards/margins": -0.0030745062977075577, + "rewards/rejected": -0.20604071021080017, + "step": 79 + }, + { + "epoch": 0.05532503457814661, + "grad_norm": 3.6880903244018555, + "learning_rate": 2.7662517289073306e-06, + "log_odds_chosen": 0.20674392580986023, + "log_odds_ratio": -0.6412222981452942, + "logits/chosen": -0.9123594164848328, + "logits/rejected": -0.9457440972328186, + "logps/chosen": -2.468709945678711, + "logps/rejected": -2.680603265762329, + "loss": 7.7926, + "nll_loss": 1.8840311765670776, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2468709796667099, + "rewards/margins": 0.021189335733652115, + "rewards/rejected": -0.2680603265762329, + "step": 80 + }, + { + "epoch": 0.056016597510373446, + "grad_norm": 2.5244028568267822, + "learning_rate": 2.8008298755186726e-06, + "log_odds_chosen": 0.2665032744407654, + "log_odds_ratio": -0.5810218453407288, + "logits/chosen": -1.2331328392028809, + "logits/rejected": -1.3183355331420898, + "logps/chosen": -2.5980257987976074, + "logps/rejected": -2.8580169677734375, + "loss": 7.1341, + "nll_loss": 1.7254328727722168, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.25980257987976074, + "rewards/margins": 0.025999119505286217, + "rewards/rejected": -0.2858017086982727, + "step": 81 + }, + { + "epoch": 0.056708160442600276, + "grad_norm": 3.8314919471740723, + "learning_rate": 2.835408022130014e-06, + "log_odds_chosen": 0.8098757863044739, + "log_odds_ratio": -0.4692530632019043, + "logits/chosen": -1.3716188669204712, + "logits/rejected": -1.416751742362976, + "logps/chosen": -3.450601577758789, + "logps/rejected": -4.224173545837402, + "loss": 9.16, + "nll_loss": 2.2430672645568848, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.34506019949913025, + "rewards/margins": 0.07735716551542282, + "rewards/rejected": -0.42241737246513367, + "step": 82 + }, + { + "epoch": 0.05739972337482711, + "grad_norm": 3.0442752838134766, + "learning_rate": 2.8699861687413553e-06, + "log_odds_chosen": 0.2403930127620697, + "log_odds_ratio": -0.6385015249252319, + "logits/chosen": -1.2987475395202637, + "logits/rejected": -1.288628101348877, + "logps/chosen": -2.989830493927002, + "logps/rejected": -3.2324090003967285, + "loss": 8.18, + "nll_loss": 1.9811550378799438, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.29898306727409363, + "rewards/margins": 0.024257831275463104, + "rewards/rejected": -0.32324090600013733, + "step": 83 + }, + { + "epoch": 0.058091286307053944, + "grad_norm": 3.6023521423339844, + "learning_rate": 2.9045643153526973e-06, + "log_odds_chosen": 0.25399988889694214, + "log_odds_ratio": -0.6751829385757446, + "logits/chosen": -1.1019551753997803, + "logits/rejected": -1.1007038354873657, + "logps/chosen": -3.27341628074646, + "logps/rejected": -3.516066074371338, + "loss": 8.322, + "nll_loss": 2.0129876136779785, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.32734164595603943, + "rewards/margins": 0.024264976382255554, + "rewards/rejected": -0.3516066074371338, + "step": 84 + }, + { + "epoch": 0.058782849239280774, + "grad_norm": 1.5585123300552368, + "learning_rate": 2.939142461964039e-06, + "log_odds_chosen": -0.03407038748264313, + "log_odds_ratio": -0.7980966567993164, + "logits/chosen": -0.9366306066513062, + "logits/rejected": -0.9029936790466309, + "logps/chosen": -2.6618621349334717, + "logps/rejected": -2.6580753326416016, + "loss": 6.4503, + "nll_loss": 1.5327749252319336, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.2661862075328827, + "rewards/margins": -0.0003786766901612282, + "rewards/rejected": -0.26580753922462463, + "step": 85 + }, + { + "epoch": 0.059474412171507604, + "grad_norm": 2.9539215564727783, + "learning_rate": 2.9737206085753804e-06, + "log_odds_chosen": -0.021699100732803345, + "log_odds_ratio": -0.7429624795913696, + "logits/chosen": -1.4628320932388306, + "logits/rejected": -1.4432883262634277, + "logps/chosen": -2.684389114379883, + "logps/rejected": -2.669247627258301, + "loss": 9.0412, + "nll_loss": 2.186001777648926, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.2684389352798462, + "rewards/margins": -0.0015141433104872704, + "rewards/rejected": -0.26692479848861694, + "step": 86 + }, + { + "epoch": 0.06016597510373444, + "grad_norm": 3.238790512084961, + "learning_rate": 3.0082987551867224e-06, + "log_odds_chosen": -0.34586936235427856, + "log_odds_ratio": -1.0915687084197998, + "logits/chosen": -0.8685006499290466, + "logits/rejected": -0.86763596534729, + "logps/chosen": -1.9744906425476074, + "logps/rejected": -1.6153260469436646, + "loss": 6.9039, + "nll_loss": 1.6168253421783447, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.19744905829429626, + "rewards/margins": -0.03591645881533623, + "rewards/rejected": -0.16153259575366974, + "step": 87 + }, + { + "epoch": 0.06085753803596127, + "grad_norm": 2.4273881912231445, + "learning_rate": 3.0428769017980635e-06, + "log_odds_chosen": 0.20019817352294922, + "log_odds_ratio": -0.8292399644851685, + "logits/chosen": -0.5661689043045044, + "logits/rejected": -0.5779998302459717, + "logps/chosen": -2.4900712966918945, + "logps/rejected": -2.707916259765625, + "loss": 6.937, + "nll_loss": 1.6513311862945557, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.24900715053081512, + "rewards/margins": 0.0217844620347023, + "rewards/rejected": -0.270791620016098, + "step": 88 + }, + { + "epoch": 0.06154910096818811, + "grad_norm": 2.613290309906006, + "learning_rate": 3.0774550484094055e-06, + "log_odds_chosen": -0.08278333395719528, + "log_odds_ratio": -0.9090401530265808, + "logits/chosen": -1.3825005292892456, + "logits/rejected": -1.3741035461425781, + "logps/chosen": -2.779364824295044, + "logps/rejected": -2.6913795471191406, + "loss": 6.6745, + "nll_loss": 1.57771635055542, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.27793648838996887, + "rewards/margins": -0.0087985098361969, + "rewards/rejected": -0.269137978553772, + "step": 89 + }, + { + "epoch": 0.06224066390041494, + "grad_norm": 4.615622043609619, + "learning_rate": 3.112033195020747e-06, + "log_odds_chosen": 0.27620837092399597, + "log_odds_ratio": -0.6446676850318909, + "logits/chosen": -1.2485114336013794, + "logits/rejected": -1.2515029907226562, + "logps/chosen": -3.3156979084014893, + "logps/rejected": -3.592181444168091, + "loss": 10.2327, + "nll_loss": 2.4937028884887695, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.3315698206424713, + "rewards/margins": 0.027648335322737694, + "rewards/rejected": -0.35921815037727356, + "step": 90 + }, + { + "epoch": 0.06293222683264177, + "grad_norm": 3.800359010696411, + "learning_rate": 3.1466113416320886e-06, + "log_odds_chosen": -0.9488328099250793, + "log_odds_ratio": -1.5567295551300049, + "logits/chosen": -1.0518579483032227, + "logits/rejected": -1.051997423171997, + "logps/chosen": -3.5880908966064453, + "logps/rejected": -2.6912951469421387, + "loss": 7.9834, + "nll_loss": 1.8401869535446167, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.35880908370018005, + "rewards/margins": -0.08967956155538559, + "rewards/rejected": -0.26912954449653625, + "step": 91 + }, + { + "epoch": 0.0636237897648686, + "grad_norm": 3.3406009674072266, + "learning_rate": 3.18118948824343e-06, + "log_odds_chosen": -0.2876474857330322, + "log_odds_ratio": -0.9973258376121521, + "logits/chosen": -1.3440685272216797, + "logits/rejected": -1.3308429718017578, + "logps/chosen": -3.4420344829559326, + "logps/rejected": -3.1430163383483887, + "loss": 9.1634, + "nll_loss": 2.191114902496338, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.3442034423351288, + "rewards/margins": -0.029901809990406036, + "rewards/rejected": -0.31430163979530334, + "step": 92 + }, + { + "epoch": 0.06431535269709543, + "grad_norm": 4.966706275939941, + "learning_rate": 3.2157676348547718e-06, + "log_odds_chosen": 0.09749498963356018, + "log_odds_ratio": -0.6567263603210449, + "logits/chosen": -1.2255221605300903, + "logits/rejected": -1.2361114025115967, + "logps/chosen": -2.9487504959106445, + "logps/rejected": -3.0364670753479004, + "loss": 10.2881, + "nll_loss": 2.506345510482788, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.29487505555152893, + "rewards/margins": 0.008771630004048347, + "rewards/rejected": -0.30364668369293213, + "step": 93 + }, + { + "epoch": 0.06500691562932227, + "grad_norm": 2.9755330085754395, + "learning_rate": 3.2503457814661137e-06, + "log_odds_chosen": -0.014040261507034302, + "log_odds_ratio": -0.8920272588729858, + "logits/chosen": -1.2634459733963013, + "logits/rejected": -1.2027406692504883, + "logps/chosen": -3.3989415168762207, + "logps/rejected": -3.4355568885803223, + "loss": 7.9149, + "nll_loss": 1.8895316123962402, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.33989417552948, + "rewards/margins": 0.0036615319550037384, + "rewards/rejected": -0.3435557186603546, + "step": 94 + }, + { + "epoch": 0.0656984785615491, + "grad_norm": 4.007265567779541, + "learning_rate": 3.2849239280774553e-06, + "log_odds_chosen": -0.07402561604976654, + "log_odds_ratio": -0.7418888807296753, + "logits/chosen": -1.3279504776000977, + "logits/rejected": -1.3760790824890137, + "logps/chosen": -2.9946768283843994, + "logps/rejected": -2.9116311073303223, + "loss": 7.8239, + "nll_loss": 1.8817808628082275, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.29946768283843994, + "rewards/margins": -0.008304571732878685, + "rewards/rejected": -0.2911631166934967, + "step": 95 + }, + { + "epoch": 0.06639004149377593, + "grad_norm": 3.38915753364563, + "learning_rate": 3.319502074688797e-06, + "log_odds_chosen": 0.24476172029972076, + "log_odds_ratio": -0.7002239227294922, + "logits/chosen": -1.25504469871521, + "logits/rejected": -1.2935394048690796, + "logps/chosen": -2.665768623352051, + "logps/rejected": -2.892899513244629, + "loss": 8.7056, + "nll_loss": 2.1063828468322754, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.266576886177063, + "rewards/margins": 0.022713065147399902, + "rewards/rejected": -0.2892899513244629, + "step": 96 + }, + { + "epoch": 0.06708160442600276, + "grad_norm": 2.5748276710510254, + "learning_rate": 3.3540802213001384e-06, + "log_odds_chosen": -0.019003883004188538, + "log_odds_ratio": -0.7211747169494629, + "logits/chosen": -1.149442195892334, + "logits/rejected": -1.1671631336212158, + "logps/chosen": -3.067318916320801, + "logps/rejected": -3.0267174243927, + "loss": 6.695, + "nll_loss": 1.601643681526184, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.3067318797111511, + "rewards/margins": -0.004060110077261925, + "rewards/rejected": -0.30267176032066345, + "step": 97 + }, + { + "epoch": 0.0677731673582296, + "grad_norm": 4.646411895751953, + "learning_rate": 3.38865836791148e-06, + "log_odds_chosen": 0.02650478109717369, + "log_odds_ratio": -0.7107594013214111, + "logits/chosen": -1.3651975393295288, + "logits/rejected": -1.3814575672149658, + "logps/chosen": -2.978908061981201, + "logps/rejected": -2.998114824295044, + "loss": 10.4827, + "nll_loss": 2.54960560798645, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2978908121585846, + "rewards/margins": 0.0019206637516617775, + "rewards/rejected": -0.2998114824295044, + "step": 98 + }, + { + "epoch": 0.06846473029045644, + "grad_norm": 3.0044188499450684, + "learning_rate": 3.423236514522822e-06, + "log_odds_chosen": 0.23153075575828552, + "log_odds_ratio": -0.6025316715240479, + "logits/chosen": -0.852349579334259, + "logits/rejected": -0.8624266386032104, + "logps/chosen": -1.607686996459961, + "logps/rejected": -1.7681227922439575, + "loss": 7.2859, + "nll_loss": 1.7612199783325195, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.16076868772506714, + "rewards/margins": 0.016043581068515778, + "rewards/rejected": -0.1768122762441635, + "step": 99 + }, + { + "epoch": 0.06915629322268327, + "grad_norm": 1.6636029481887817, + "learning_rate": 3.4578146611341635e-06, + "log_odds_chosen": -0.05533628910779953, + "log_odds_ratio": -0.7417080998420715, + "logits/chosen": -0.9845290184020996, + "logits/rejected": -0.9741227626800537, + "logps/chosen": -1.9345066547393799, + "logps/rejected": -1.8781304359436035, + "loss": 5.6163, + "nll_loss": 1.329894781112671, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1934506595134735, + "rewards/margins": -0.005637619644403458, + "rewards/rejected": -0.18781304359436035, + "step": 100 + }, + { + "epoch": 0.0698478561549101, + "grad_norm": 1.76613187789917, + "learning_rate": 3.492392807745505e-06, + "log_odds_chosen": -0.31366345286369324, + "log_odds_ratio": -0.9095494747161865, + "logits/chosen": -1.0726799964904785, + "logits/rejected": -1.057313084602356, + "logps/chosen": -2.6118955612182617, + "logps/rejected": -2.3254950046539307, + "loss": 5.3665, + "nll_loss": 1.2506669759750366, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.2611895799636841, + "rewards/margins": -0.028640054166316986, + "rewards/rejected": -0.2325495034456253, + "step": 101 + }, + { + "epoch": 0.07053941908713693, + "grad_norm": 2.1601548194885254, + "learning_rate": 3.5269709543568467e-06, + "log_odds_chosen": 0.225947305560112, + "log_odds_ratio": -0.7366925477981567, + "logits/chosen": -0.9250924587249756, + "logits/rejected": -0.872596025466919, + "logps/chosen": -2.657607316970825, + "logps/rejected": -2.8689475059509277, + "loss": 6.3597, + "nll_loss": 1.516251564025879, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.26576074957847595, + "rewards/margins": 0.021134020760655403, + "rewards/rejected": -0.2868947684764862, + "step": 102 + }, + { + "epoch": 0.07123098201936376, + "grad_norm": 3.250379800796509, + "learning_rate": 3.5615491009681882e-06, + "log_odds_chosen": 0.41885554790496826, + "log_odds_ratio": -0.5761613845825195, + "logits/chosen": -0.9390544295310974, + "logits/rejected": -0.9515710473060608, + "logps/chosen": -2.112879753112793, + "logps/rejected": -2.4834203720092773, + "loss": 8.3838, + "nll_loss": 2.038335084915161, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.2112879753112793, + "rewards/margins": 0.03705406188964844, + "rewards/rejected": -0.24834203720092773, + "step": 103 + }, + { + "epoch": 0.07192254495159059, + "grad_norm": 4.528255939483643, + "learning_rate": 3.59612724757953e-06, + "log_odds_chosen": 0.31654834747314453, + "log_odds_ratio": -0.6934396028518677, + "logits/chosen": -0.9241195917129517, + "logits/rejected": -0.8918487429618835, + "logps/chosen": -2.244730234146118, + "logps/rejected": -2.5783205032348633, + "loss": 8.1125, + "nll_loss": 1.958787441253662, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2244730293750763, + "rewards/margins": 0.03335902467370033, + "rewards/rejected": -0.25783205032348633, + "step": 104 + }, + { + "epoch": 0.07261410788381743, + "grad_norm": 3.3515546321868896, + "learning_rate": 3.6307053941908718e-06, + "log_odds_chosen": 0.019478052854537964, + "log_odds_ratio": -0.7779797315597534, + "logits/chosen": -1.184047818183899, + "logits/rejected": -1.2227816581726074, + "logps/chosen": -2.3940818309783936, + "logps/rejected": -2.4155819416046143, + "loss": 6.6174, + "nll_loss": 1.576558232307434, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.2394081950187683, + "rewards/margins": 0.0021500196307897568, + "rewards/rejected": -0.24155820906162262, + "step": 105 + }, + { + "epoch": 0.07330567081604426, + "grad_norm": 4.590144634246826, + "learning_rate": 3.6652835408022133e-06, + "log_odds_chosen": -0.278724730014801, + "log_odds_ratio": -0.8659406304359436, + "logits/chosen": -1.0056959390640259, + "logits/rejected": -0.9648569822311401, + "logps/chosen": -3.568976879119873, + "logps/rejected": -3.291322708129883, + "loss": 8.3612, + "nll_loss": 2.0036964416503906, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.3568977117538452, + "rewards/margins": -0.02776542864739895, + "rewards/rejected": -0.3291322588920593, + "step": 106 + }, + { + "epoch": 0.0739972337482711, + "grad_norm": 3.8044564723968506, + "learning_rate": 3.699861687413555e-06, + "log_odds_chosen": -0.07593946903944016, + "log_odds_ratio": -0.7997829914093018, + "logits/chosen": -1.1828924417495728, + "logits/rejected": -1.219299077987671, + "logps/chosen": -2.4561989307403564, + "logps/rejected": -2.3595938682556152, + "loss": 7.8965, + "nll_loss": 1.8941495418548584, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.24561990797519684, + "rewards/margins": -0.009660521522164345, + "rewards/rejected": -0.23595938086509705, + "step": 107 + }, + { + "epoch": 0.07468879668049792, + "grad_norm": 3.134294271469116, + "learning_rate": 3.7344398340248965e-06, + "log_odds_chosen": 0.5621838569641113, + "log_odds_ratio": -0.4798460006713867, + "logits/chosen": -1.1768224239349365, + "logits/rejected": -1.2381618022918701, + "logps/chosen": -1.9236397743225098, + "logps/rejected": -2.380974769592285, + "loss": 7.811, + "nll_loss": 1.904759168624878, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1923639476299286, + "rewards/margins": 0.04573351517319679, + "rewards/rejected": -0.23809745907783508, + "step": 108 + }, + { + "epoch": 0.07538035961272475, + "grad_norm": 4.6720709800720215, + "learning_rate": 3.769017980636238e-06, + "log_odds_chosen": 0.4407891035079956, + "log_odds_ratio": -0.5491933822631836, + "logits/chosen": -1.250891923904419, + "logits/rejected": -1.251495599746704, + "logps/chosen": -2.444951057434082, + "logps/rejected": -2.8636553287506104, + "loss": 9.0964, + "nll_loss": 2.2191882133483887, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.24449510872364044, + "rewards/margins": 0.04187043011188507, + "rewards/rejected": -0.2863655388355255, + "step": 109 + }, + { + "epoch": 0.07607192254495158, + "grad_norm": 2.9060957431793213, + "learning_rate": 3.803596127247579e-06, + "log_odds_chosen": 0.03529013693332672, + "log_odds_ratio": -0.7689645886421204, + "logits/chosen": -1.411515235900879, + "logits/rejected": -1.3903522491455078, + "logps/chosen": -2.516648054122925, + "logps/rejected": -2.6515069007873535, + "loss": 7.2097, + "nll_loss": 1.7255399227142334, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.25166481733322144, + "rewards/margins": 0.013485876843333244, + "rewards/rejected": -0.26515069603919983, + "step": 110 + }, + { + "epoch": 0.07676348547717843, + "grad_norm": 2.940915107727051, + "learning_rate": 3.838174273858921e-06, + "log_odds_chosen": 0.23484264314174652, + "log_odds_ratio": -0.5932224988937378, + "logits/chosen": -0.9165379405021667, + "logits/rejected": -0.9511304497718811, + "logps/chosen": -2.0779600143432617, + "logps/rejected": -2.3309521675109863, + "loss": 6.6854, + "nll_loss": 1.6120387315750122, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.20779599249362946, + "rewards/margins": 0.025299228727817535, + "rewards/rejected": -0.2330952286720276, + "step": 111 + }, + { + "epoch": 0.07745504840940526, + "grad_norm": 4.432619094848633, + "learning_rate": 3.872752420470263e-06, + "log_odds_chosen": 0.3886290490627289, + "log_odds_ratio": -0.5865916013717651, + "logits/chosen": -1.2094829082489014, + "logits/rejected": -1.2666633129119873, + "logps/chosen": -2.9593513011932373, + "logps/rejected": -3.270914077758789, + "loss": 8.8685, + "nll_loss": 2.158459186553955, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.29593515396118164, + "rewards/margins": 0.03115629032254219, + "rewards/rejected": -0.32709142565727234, + "step": 112 + }, + { + "epoch": 0.07814661134163209, + "grad_norm": 4.220597743988037, + "learning_rate": 3.907330567081604e-06, + "log_odds_chosen": -0.2154398411512375, + "log_odds_ratio": -1.1117606163024902, + "logits/chosen": -1.3798332214355469, + "logits/rejected": -1.3210017681121826, + "logps/chosen": -2.220973014831543, + "logps/rejected": -2.12764835357666, + "loss": 8.4091, + "nll_loss": 1.9911069869995117, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.22209730744361877, + "rewards/margins": -0.009332460351288319, + "rewards/rejected": -0.21276485919952393, + "step": 113 + }, + { + "epoch": 0.07883817427385892, + "grad_norm": 4.478047847747803, + "learning_rate": 3.941908713692946e-06, + "log_odds_chosen": -0.19631054997444153, + "log_odds_ratio": -1.0323046445846558, + "logits/chosen": -1.1578409671783447, + "logits/rejected": -1.154388189315796, + "logps/chosen": -1.7710871696472168, + "logps/rejected": -1.6202861070632935, + "loss": 7.2636, + "nll_loss": 1.712680459022522, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.17710871994495392, + "rewards/margins": -0.015080108307301998, + "rewards/rejected": -0.16202861070632935, + "step": 114 + }, + { + "epoch": 0.07952973720608575, + "grad_norm": 3.203106641769409, + "learning_rate": 3.976486860304287e-06, + "log_odds_chosen": -0.10156537592411041, + "log_odds_ratio": -0.7922493815422058, + "logits/chosen": -1.2877484560012817, + "logits/rejected": -1.2778607606887817, + "logps/chosen": -1.9843857288360596, + "logps/rejected": -1.888746738433838, + "loss": 7.4244, + "nll_loss": 1.7768635749816895, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1984385848045349, + "rewards/margins": -0.009563901461660862, + "rewards/rejected": -0.18887467682361603, + "step": 115 + }, + { + "epoch": 0.08022130013831259, + "grad_norm": 2.964470863342285, + "learning_rate": 4.01106500691563e-06, + "log_odds_chosen": 0.14609771966934204, + "log_odds_ratio": -0.6924587488174438, + "logits/chosen": -1.3852272033691406, + "logits/rejected": -1.3699078559875488, + "logps/chosen": -2.1615686416625977, + "logps/rejected": -2.2567572593688965, + "loss": 8.114, + "nll_loss": 1.9592458009719849, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.21615687012672424, + "rewards/margins": 0.009518839418888092, + "rewards/rejected": -0.22567571699619293, + "step": 116 + }, + { + "epoch": 0.08091286307053942, + "grad_norm": 3.5704469680786133, + "learning_rate": 4.045643153526971e-06, + "log_odds_chosen": -0.2644822299480438, + "log_odds_ratio": -0.8610115647315979, + "logits/chosen": -0.7173007726669312, + "logits/rejected": -0.6642380952835083, + "logps/chosen": -2.625227451324463, + "logps/rejected": -2.385984420776367, + "loss": 7.9542, + "nll_loss": 1.9024397134780884, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.26252272725105286, + "rewards/margins": -0.02392430230975151, + "rewards/rejected": -0.23859843611717224, + "step": 117 + }, + { + "epoch": 0.08160442600276625, + "grad_norm": 4.570618629455566, + "learning_rate": 4.080221300138313e-06, + "log_odds_chosen": 0.9268831014633179, + "log_odds_ratio": -0.4015723764896393, + "logits/chosen": -1.1652560234069824, + "logits/rejected": -1.2067581415176392, + "logps/chosen": -1.5085891485214233, + "logps/rejected": -2.3109521865844727, + "loss": 8.9819, + "nll_loss": 2.205305814743042, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15085892379283905, + "rewards/margins": 0.08023631572723389, + "rewards/rejected": -0.23109523952007294, + "step": 118 + }, + { + "epoch": 0.08229598893499308, + "grad_norm": 2.8909966945648193, + "learning_rate": 4.1147994467496545e-06, + "log_odds_chosen": -0.18048861622810364, + "log_odds_ratio": -0.879106342792511, + "logits/chosen": -1.1618527173995972, + "logits/rejected": -1.1411261558532715, + "logps/chosen": -2.1716060638427734, + "logps/rejected": -2.0435569286346436, + "loss": 6.6815, + "nll_loss": 1.5824534893035889, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.21716058254241943, + "rewards/margins": -0.012804889120161533, + "rewards/rejected": -0.20435568690299988, + "step": 119 + }, + { + "epoch": 0.08298755186721991, + "grad_norm": 2.6412265300750732, + "learning_rate": 4.149377593360996e-06, + "log_odds_chosen": -0.01931702345609665, + "log_odds_ratio": -0.7372455596923828, + "logits/chosen": -1.2916526794433594, + "logits/rejected": -1.2799456119537354, + "logps/chosen": -2.639468193054199, + "logps/rejected": -2.6471922397613525, + "loss": 5.9978, + "nll_loss": 1.425737738609314, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.2639468014240265, + "rewards/margins": 0.0007724054157733917, + "rewards/rejected": -0.2647192180156708, + "step": 120 + }, + { + "epoch": 0.08367911479944674, + "grad_norm": 4.046600341796875, + "learning_rate": 4.183955739972338e-06, + "log_odds_chosen": -0.1946304440498352, + "log_odds_ratio": -0.8274456262588501, + "logits/chosen": -1.2523605823516846, + "logits/rejected": -1.2300465106964111, + "logps/chosen": -2.1097776889801025, + "logps/rejected": -1.9532036781311035, + "loss": 7.2348, + "nll_loss": 1.7259578704833984, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.21097779273986816, + "rewards/margins": -0.01565741002559662, + "rewards/rejected": -0.19532036781311035, + "step": 121 + }, + { + "epoch": 0.08437067773167359, + "grad_norm": 2.7409684658050537, + "learning_rate": 4.218533886583679e-06, + "log_odds_chosen": 0.13821080327033997, + "log_odds_ratio": -0.6783009767532349, + "logits/chosen": -1.0376652479171753, + "logits/rejected": -1.0456650257110596, + "logps/chosen": -2.253610372543335, + "logps/rejected": -2.3703272342681885, + "loss": 6.2914, + "nll_loss": 1.505028486251831, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.22536104917526245, + "rewards/margins": 0.011671675369143486, + "rewards/rejected": -0.2370327264070511, + "step": 122 + }, + { + "epoch": 0.08506224066390042, + "grad_norm": 4.59444522857666, + "learning_rate": 4.253112033195021e-06, + "log_odds_chosen": 0.14218562841415405, + "log_odds_ratio": -0.6852483153343201, + "logits/chosen": -1.2118958234786987, + "logits/rejected": -1.2340459823608398, + "logps/chosen": -2.2804534435272217, + "logps/rejected": -2.4017200469970703, + "loss": 8.8156, + "nll_loss": 2.135375738143921, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.22804535925388336, + "rewards/margins": 0.012126652523875237, + "rewards/rejected": -0.24017199873924255, + "step": 123 + }, + { + "epoch": 0.08575380359612725, + "grad_norm": 2.6190905570983887, + "learning_rate": 4.287690179806362e-06, + "log_odds_chosen": 0.07235060632228851, + "log_odds_ratio": -0.66350257396698, + "logits/chosen": -1.0323631763458252, + "logits/rejected": -1.0517666339874268, + "logps/chosen": -1.3956609964370728, + "logps/rejected": -1.4603394269943237, + "loss": 7.0443, + "nll_loss": 1.6947202682495117, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1395660936832428, + "rewards/margins": 0.0064678434282541275, + "rewards/rejected": -0.14603394269943237, + "step": 124 + }, + { + "epoch": 0.08644536652835408, + "grad_norm": 4.491865634918213, + "learning_rate": 4.322268326417704e-06, + "log_odds_chosen": -0.17503008246421814, + "log_odds_ratio": -0.8976634740829468, + "logits/chosen": -1.0372291803359985, + "logits/rejected": -1.035827398300171, + "logps/chosen": -2.6728475093841553, + "logps/rejected": -2.502103090286255, + "loss": 8.9689, + "nll_loss": 2.152448892593384, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.2672847509384155, + "rewards/margins": -0.017074448987841606, + "rewards/rejected": -0.25021031498908997, + "step": 125 + }, + { + "epoch": 0.08713692946058091, + "grad_norm": 5.2629780769348145, + "learning_rate": 4.3568464730290455e-06, + "log_odds_chosen": 0.5604333877563477, + "log_odds_ratio": -0.5766538381576538, + "logits/chosen": -1.2006797790527344, + "logits/rejected": -1.2117624282836914, + "logps/chosen": -2.1328866481781006, + "logps/rejected": -2.68172550201416, + "loss": 7.9791, + "nll_loss": 1.9371154308319092, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.21328869462013245, + "rewards/margins": 0.054883863776922226, + "rewards/rejected": -0.26817256212234497, + "step": 126 + }, + { + "epoch": 0.08782849239280774, + "grad_norm": 2.4697422981262207, + "learning_rate": 4.391424619640387e-06, + "log_odds_chosen": 0.7080565690994263, + "log_odds_ratio": -0.46416789293289185, + "logits/chosen": -1.017878532409668, + "logits/rejected": -0.994848370552063, + "logps/chosen": -1.7571378946304321, + "logps/rejected": -2.2879459857940674, + "loss": 6.4419, + "nll_loss": 1.5640522241592407, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.17571380734443665, + "rewards/margins": 0.05308079719543457, + "rewards/rejected": -0.22879458963871002, + "step": 127 + }, + { + "epoch": 0.08852005532503458, + "grad_norm": 3.796071767807007, + "learning_rate": 4.4260027662517294e-06, + "log_odds_chosen": -0.17505794763565063, + "log_odds_ratio": -0.8826912045478821, + "logits/chosen": -1.288775086402893, + "logits/rejected": -1.231229305267334, + "logps/chosen": -1.9284441471099854, + "logps/rejected": -1.7314453125, + "loss": 6.9746, + "nll_loss": 1.6553906202316284, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.192844420671463, + "rewards/margins": -0.019699882715940475, + "rewards/rejected": -0.17314454913139343, + "step": 128 + }, + { + "epoch": 0.08921161825726141, + "grad_norm": 2.491612672805786, + "learning_rate": 4.460580912863071e-06, + "log_odds_chosen": 0.39554885029792786, + "log_odds_ratio": -0.5392511487007141, + "logits/chosen": -1.2205698490142822, + "logits/rejected": -1.2139629125595093, + "logps/chosen": -1.0191541910171509, + "logps/rejected": -1.3029963970184326, + "loss": 6.6261, + "nll_loss": 1.6025913953781128, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10191541165113449, + "rewards/margins": 0.02838423103094101, + "rewards/rejected": -0.1302996575832367, + "step": 129 + }, + { + "epoch": 0.08990318118948824, + "grad_norm": 3.537321090698242, + "learning_rate": 4.4951590594744126e-06, + "log_odds_chosen": 0.21087250113487244, + "log_odds_ratio": -0.6249895095825195, + "logits/chosen": -0.9488554000854492, + "logits/rejected": -0.9826868772506714, + "logps/chosen": -1.5986173152923584, + "logps/rejected": -1.7431650161743164, + "loss": 7.216, + "nll_loss": 1.741507887840271, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1598617434501648, + "rewards/margins": 0.014454763382673264, + "rewards/rejected": -0.17431651055812836, + "step": 130 + }, + { + "epoch": 0.09059474412171507, + "grad_norm": 5.5984883308410645, + "learning_rate": 4.529737206085754e-06, + "log_odds_chosen": 0.40902745723724365, + "log_odds_ratio": -0.5497879981994629, + "logits/chosen": -1.3714349269866943, + "logits/rejected": -1.3984029293060303, + "logps/chosen": -1.4219186305999756, + "logps/rejected": -1.6851788759231567, + "loss": 10.107, + "nll_loss": 2.4717788696289062, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.14219185709953308, + "rewards/margins": 0.026326032355427742, + "rewards/rejected": -0.16851788759231567, + "step": 131 + }, + { + "epoch": 0.0912863070539419, + "grad_norm": 5.334275722503662, + "learning_rate": 4.564315352697096e-06, + "log_odds_chosen": -0.11085011065006256, + "log_odds_ratio": -0.8024502396583557, + "logits/chosen": -1.4571048021316528, + "logits/rejected": -1.4007424116134644, + "logps/chosen": -1.4272351264953613, + "logps/rejected": -1.4018197059631348, + "loss": 8.9514, + "nll_loss": 2.157599449157715, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.14272351562976837, + "rewards/margins": -0.0025415411219000816, + "rewards/rejected": -0.14018197357654572, + "step": 132 + }, + { + "epoch": 0.09197786998616875, + "grad_norm": 4.106137275695801, + "learning_rate": 4.598893499308437e-06, + "log_odds_chosen": 0.33311688899993896, + "log_odds_ratio": -0.6020650863647461, + "logits/chosen": -1.1125842332839966, + "logits/rejected": -1.1575082540512085, + "logps/chosen": -1.534233570098877, + "logps/rejected": -1.7893953323364258, + "loss": 7.5926, + "nll_loss": 1.8379466533660889, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.15342335402965546, + "rewards/margins": 0.025516191497445107, + "rewards/rejected": -0.178939551115036, + "step": 133 + }, + { + "epoch": 0.09266943291839558, + "grad_norm": 2.852998733520508, + "learning_rate": 4.633471645919779e-06, + "log_odds_chosen": 0.2178751528263092, + "log_odds_ratio": -0.6271660327911377, + "logits/chosen": -1.0826423168182373, + "logits/rejected": -1.0785956382751465, + "logps/chosen": -1.284407138824463, + "logps/rejected": -1.4057343006134033, + "loss": 6.5956, + "nll_loss": 1.5861728191375732, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1284407079219818, + "rewards/margins": 0.01213272288441658, + "rewards/rejected": -0.1405734419822693, + "step": 134 + }, + { + "epoch": 0.09336099585062241, + "grad_norm": 3.6493396759033203, + "learning_rate": 4.66804979253112e-06, + "log_odds_chosen": 0.3888116478919983, + "log_odds_ratio": -0.6063941121101379, + "logits/chosen": -0.8740944862365723, + "logits/rejected": -0.8482604622840881, + "logps/chosen": -1.223939299583435, + "logps/rejected": -1.533890962600708, + "loss": 6.8399, + "nll_loss": 1.6493427753448486, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.12239392846822739, + "rewards/margins": 0.030995164066553116, + "rewards/rejected": -0.1533890962600708, + "step": 135 + }, + { + "epoch": 0.09405255878284924, + "grad_norm": 2.6961162090301514, + "learning_rate": 4.702627939142462e-06, + "log_odds_chosen": -0.04538653790950775, + "log_odds_ratio": -0.7914632558822632, + "logits/chosen": -1.2952909469604492, + "logits/rejected": -1.305565595626831, + "logps/chosen": -1.4900434017181396, + "logps/rejected": -1.449129581451416, + "loss": 7.09, + "nll_loss": 1.6933479309082031, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.14900435507297516, + "rewards/margins": -0.00409140158444643, + "rewards/rejected": -0.1449129581451416, + "step": 136 + }, + { + "epoch": 0.09474412171507607, + "grad_norm": 5.1169657707214355, + "learning_rate": 4.7372060857538035e-06, + "log_odds_chosen": 0.2797987759113312, + "log_odds_ratio": -0.6090816259384155, + "logits/chosen": -1.2741329669952393, + "logits/rejected": -1.330481767654419, + "logps/chosen": -1.1922301054000854, + "logps/rejected": -1.3819254636764526, + "loss": 9.6619, + "nll_loss": 2.3545656204223633, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11922300606966019, + "rewards/margins": 0.018969547003507614, + "rewards/rejected": -0.1381925493478775, + "step": 137 + }, + { + "epoch": 0.0954356846473029, + "grad_norm": 4.451641082763672, + "learning_rate": 4.771784232365145e-06, + "log_odds_chosen": 0.2165432572364807, + "log_odds_ratio": -0.6945505738258362, + "logits/chosen": -1.1947180032730103, + "logits/rejected": -1.1982449293136597, + "logps/chosen": -1.1427407264709473, + "logps/rejected": -1.402220606803894, + "loss": 8.8009, + "nll_loss": 2.130781888961792, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11427406966686249, + "rewards/margins": 0.025947997346520424, + "rewards/rejected": -0.14022207260131836, + "step": 138 + }, + { + "epoch": 0.09612724757952974, + "grad_norm": 3.2618391513824463, + "learning_rate": 4.8063623789764875e-06, + "log_odds_chosen": -0.11838006228208542, + "log_odds_ratio": -0.7790952920913696, + "logits/chosen": -1.2024139165878296, + "logits/rejected": -1.1736711263656616, + "logps/chosen": -1.2197315692901611, + "logps/rejected": -1.1040195226669312, + "loss": 7.4106, + "nll_loss": 1.774742603302002, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.12197314947843552, + "rewards/margins": -0.011571199633181095, + "rewards/rejected": -0.11040195822715759, + "step": 139 + }, + { + "epoch": 0.09681881051175657, + "grad_norm": 3.434873580932617, + "learning_rate": 4.840940525587829e-06, + "log_odds_chosen": 0.010386921465396881, + "log_odds_ratio": -0.7327617406845093, + "logits/chosen": -1.3008506298065186, + "logits/rejected": -1.2888820171356201, + "logps/chosen": -1.301018476486206, + "logps/rejected": -1.288162112236023, + "loss": 7.1551, + "nll_loss": 1.7154977321624756, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.13010185956954956, + "rewards/margins": -0.0012856395915150642, + "rewards/rejected": -0.12881621718406677, + "step": 140 + }, + { + "epoch": 0.0975103734439834, + "grad_norm": 2.7035653591156006, + "learning_rate": 4.875518672199171e-06, + "log_odds_chosen": -0.29188334941864014, + "log_odds_ratio": -0.8879889249801636, + "logits/chosen": -0.9949595332145691, + "logits/rejected": -0.992701530456543, + "logps/chosen": -1.429956316947937, + "logps/rejected": -1.2038675546646118, + "loss": 6.5933, + "nll_loss": 1.5595312118530273, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.14299562573432922, + "rewards/margins": -0.02260887622833252, + "rewards/rejected": -0.1203867644071579, + "step": 141 + }, + { + "epoch": 0.09820193637621023, + "grad_norm": 3.0859532356262207, + "learning_rate": 4.910096818810512e-06, + "log_odds_chosen": 0.660336971282959, + "log_odds_ratio": -0.4694107472896576, + "logits/chosen": -0.798190712928772, + "logits/rejected": -0.847572922706604, + "logps/chosen": -0.8726248741149902, + "logps/rejected": -1.2767419815063477, + "loss": 6.6484, + "nll_loss": 1.6151642799377441, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08726249635219574, + "rewards/margins": 0.04041171073913574, + "rewards/rejected": -0.12767420709133148, + "step": 142 + }, + { + "epoch": 0.09889349930843706, + "grad_norm": 3.3809046745300293, + "learning_rate": 4.944674965421854e-06, + "log_odds_chosen": 0.15960107743740082, + "log_odds_ratio": -0.6858676671981812, + "logits/chosen": -1.0418691635131836, + "logits/rejected": -1.0430397987365723, + "logps/chosen": -1.2576993703842163, + "logps/rejected": -1.391213059425354, + "loss": 7.7321, + "nll_loss": 1.8644449710845947, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1257699429988861, + "rewards/margins": 0.013351368717849255, + "rewards/rejected": -0.13912130892276764, + "step": 143 + }, + { + "epoch": 0.0995850622406639, + "grad_norm": 3.0167365074157715, + "learning_rate": 4.979253112033195e-06, + "log_odds_chosen": 0.49617624282836914, + "log_odds_ratio": -0.5587316751480103, + "logits/chosen": -0.8929398059844971, + "logits/rejected": -0.9107470512390137, + "logps/chosen": -1.127401351928711, + "logps/rejected": -1.4853781461715698, + "loss": 7.3534, + "nll_loss": 1.7824782133102417, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11274014413356781, + "rewards/margins": 0.03579767793416977, + "rewards/rejected": -0.14853781461715698, + "step": 144 + }, + { + "epoch": 0.10027662517289074, + "grad_norm": 4.485063552856445, + "learning_rate": 5.013831258644537e-06, + "log_odds_chosen": 0.5366233587265015, + "log_odds_ratio": -0.46875566244125366, + "logits/chosen": -1.0820214748382568, + "logits/rejected": -1.1352897882461548, + "logps/chosen": -0.8813449144363403, + "logps/rejected": -1.1867763996124268, + "loss": 8.9173, + "nll_loss": 2.182441473007202, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08813448995351791, + "rewards/margins": 0.030543144792318344, + "rewards/rejected": -0.11867764592170715, + "step": 145 + }, + { + "epoch": 0.10096818810511757, + "grad_norm": 3.3979671001434326, + "learning_rate": 5.0484094052558784e-06, + "log_odds_chosen": 0.3244428336620331, + "log_odds_ratio": -0.5853853225708008, + "logits/chosen": -0.9036159515380859, + "logits/rejected": -0.9359475374221802, + "logps/chosen": -0.820245087146759, + "logps/rejected": -1.015293002128601, + "loss": 6.9945, + "nll_loss": 1.690085768699646, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08202450722455978, + "rewards/margins": 0.019504791125655174, + "rewards/rejected": -0.10152930021286011, + "step": 146 + }, + { + "epoch": 0.1016597510373444, + "grad_norm": 3.1067051887512207, + "learning_rate": 5.08298755186722e-06, + "log_odds_chosen": 0.47029638290405273, + "log_odds_ratio": -0.5233011245727539, + "logits/chosen": -0.9140459299087524, + "logits/rejected": -0.9160831570625305, + "logps/chosen": -0.7125457525253296, + "logps/rejected": -0.9512070417404175, + "loss": 5.5303, + "nll_loss": 1.3302336931228638, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.07125458121299744, + "rewards/margins": 0.02386613003909588, + "rewards/rejected": -0.09512070566415787, + "step": 147 + }, + { + "epoch": 0.10235131396957123, + "grad_norm": 2.4614651203155518, + "learning_rate": 5.1175656984785616e-06, + "log_odds_chosen": 0.17932581901550293, + "log_odds_ratio": -0.6718851923942566, + "logits/chosen": -0.9580909013748169, + "logits/rejected": -0.9631924033164978, + "logps/chosen": -0.7362526059150696, + "logps/rejected": -0.8400034308433533, + "loss": 6.4654, + "nll_loss": 1.5491598844528198, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.07362527400255203, + "rewards/margins": 0.010375075973570347, + "rewards/rejected": -0.08400034159421921, + "step": 148 + }, + { + "epoch": 0.10304287690179806, + "grad_norm": 3.6997735500335693, + "learning_rate": 5.152143845089903e-06, + "log_odds_chosen": 0.16198191046714783, + "log_odds_ratio": -0.6887600421905518, + "logits/chosen": -1.2218208312988281, + "logits/rejected": -1.1994661092758179, + "logps/chosen": -0.9160218238830566, + "logps/rejected": -0.9438376426696777, + "loss": 6.9703, + "nll_loss": 1.6736863851547241, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09160219132900238, + "rewards/margins": 0.002781575545668602, + "rewards/rejected": -0.09438376128673553, + "step": 149 + }, + { + "epoch": 0.1037344398340249, + "grad_norm": 4.104522228240967, + "learning_rate": 5.1867219917012455e-06, + "log_odds_chosen": 0.4990871548652649, + "log_odds_ratio": -0.5001032948493958, + "logits/chosen": -1.1245300769805908, + "logits/rejected": -1.146864414215088, + "logps/chosen": -0.7892501950263977, + "logps/rejected": -1.066573143005371, + "loss": 7.7041, + "nll_loss": 1.876022219657898, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07892502099275589, + "rewards/margins": 0.027732297778129578, + "rewards/rejected": -0.10665731877088547, + "step": 150 + }, + { + "epoch": 0.10442600276625173, + "grad_norm": 4.101137638092041, + "learning_rate": 5.221300138312587e-06, + "log_odds_chosen": 0.6196571588516235, + "log_odds_ratio": -0.4808695316314697, + "logits/chosen": -1.1832971572875977, + "logits/rejected": -1.2204134464263916, + "logps/chosen": -0.8114309906959534, + "logps/rejected": -1.205329179763794, + "loss": 7.5169, + "nll_loss": 1.8311498165130615, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.0811430960893631, + "rewards/margins": 0.03938981145620346, + "rewards/rejected": -0.12053291499614716, + "step": 151 + }, + { + "epoch": 0.10511756569847856, + "grad_norm": 3.7619707584381104, + "learning_rate": 5.255878284923929e-06, + "log_odds_chosen": 0.16248542070388794, + "log_odds_ratio": -0.6626776456832886, + "logits/chosen": -1.3932623863220215, + "logits/rejected": -1.4151134490966797, + "logps/chosen": -0.7183889746665955, + "logps/rejected": -0.81103515625, + "loss": 8.604, + "nll_loss": 2.084726095199585, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07183889299631119, + "rewards/margins": 0.009264619089663029, + "rewards/rejected": -0.08110351115465164, + "step": 152 + }, + { + "epoch": 0.10580912863070539, + "grad_norm": 3.2583189010620117, + "learning_rate": 5.29045643153527e-06, + "log_odds_chosen": 0.04566054046154022, + "log_odds_ratio": -0.7379659414291382, + "logits/chosen": -0.9627122282981873, + "logits/rejected": -0.9689441323280334, + "logps/chosen": -0.9377343058586121, + "logps/rejected": -0.9663422107696533, + "loss": 7.0429, + "nll_loss": 1.686922311782837, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.09377343207597733, + "rewards/margins": 0.002860790118575096, + "rewards/rejected": -0.09663422405719757, + "step": 153 + }, + { + "epoch": 0.10650069156293222, + "grad_norm": 3.12202787399292, + "learning_rate": 5.325034578146612e-06, + "log_odds_chosen": -0.14066341519355774, + "log_odds_ratio": -0.8131635785102844, + "logits/chosen": -1.1361145973205566, + "logits/rejected": -1.161047339439392, + "logps/chosen": -0.8530935049057007, + "logps/rejected": -0.7701809406280518, + "loss": 7.1593, + "nll_loss": 1.7085163593292236, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.08530934900045395, + "rewards/margins": -0.008291250094771385, + "rewards/rejected": -0.07701809704303741, + "step": 154 + }, + { + "epoch": 0.10719225449515905, + "grad_norm": 2.6539435386657715, + "learning_rate": 5.359612724757953e-06, + "log_odds_chosen": 0.05288369208574295, + "log_odds_ratio": -0.6911187171936035, + "logits/chosen": -1.1930322647094727, + "logits/rejected": -1.2084239721298218, + "logps/chosen": -0.7693685293197632, + "logps/rejected": -0.7825278043746948, + "loss": 5.6941, + "nll_loss": 1.3544032573699951, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07693684101104736, + "rewards/margins": 0.0013159322552382946, + "rewards/rejected": -0.07825277745723724, + "step": 155 + }, + { + "epoch": 0.1078838174273859, + "grad_norm": 2.016812801361084, + "learning_rate": 5.394190871369295e-06, + "log_odds_chosen": -0.2215977907180786, + "log_odds_ratio": -0.8341439962387085, + "logits/chosen": -0.985969066619873, + "logits/rejected": -0.9674792289733887, + "logps/chosen": -0.8357418775558472, + "logps/rejected": -0.7325291633605957, + "loss": 5.6336, + "nll_loss": 1.324975848197937, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.08357419073581696, + "rewards/margins": -0.010321276262402534, + "rewards/rejected": -0.07325291633605957, + "step": 156 + }, + { + "epoch": 0.10857538035961273, + "grad_norm": 4.376221179962158, + "learning_rate": 5.4287690179806365e-06, + "log_odds_chosen": 0.7335000038146973, + "log_odds_ratio": -0.4274066686630249, + "logits/chosen": -1.0363941192626953, + "logits/rejected": -1.07184636592865, + "logps/chosen": -0.5886829495429993, + "logps/rejected": -0.9144682884216309, + "loss": 7.8089, + "nll_loss": 1.9094841480255127, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05886829271912575, + "rewards/margins": 0.03257853537797928, + "rewards/rejected": -0.09144683182239532, + "step": 157 + }, + { + "epoch": 0.10926694329183956, + "grad_norm": 2.5418620109558105, + "learning_rate": 5.463347164591978e-06, + "log_odds_chosen": 0.11510226130485535, + "log_odds_ratio": -0.6988317370414734, + "logits/chosen": -0.9949272274971008, + "logits/rejected": -1.0054848194122314, + "logps/chosen": -0.6619945168495178, + "logps/rejected": -0.7326085567474365, + "loss": 6.3483, + "nll_loss": 1.5171840190887451, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.06619945168495178, + "rewards/margins": 0.007061406970024109, + "rewards/rejected": -0.07326085865497589, + "step": 158 + }, + { + "epoch": 0.10995850622406639, + "grad_norm": 3.0811150074005127, + "learning_rate": 5.49792531120332e-06, + "log_odds_chosen": 0.48675233125686646, + "log_odds_ratio": -0.5503637194633484, + "logits/chosen": -1.2820873260498047, + "logits/rejected": -1.2850234508514404, + "logps/chosen": -0.6293501853942871, + "logps/rejected": -0.831933319568634, + "loss": 6.8161, + "nll_loss": 1.6489897966384888, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.06293501704931259, + "rewards/margins": 0.020258314907550812, + "rewards/rejected": -0.0831933319568634, + "step": 159 + }, + { + "epoch": 0.11065006915629322, + "grad_norm": 3.9759669303894043, + "learning_rate": 5.532503457814661e-06, + "log_odds_chosen": 0.592308759689331, + "log_odds_ratio": -0.4961738884449005, + "logits/chosen": -0.9351903200149536, + "logits/rejected": -0.9440701007843018, + "logps/chosen": -0.5135587453842163, + "logps/rejected": -0.8421783447265625, + "loss": 6.8458, + "nll_loss": 1.6618294715881348, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.05135587230324745, + "rewards/margins": 0.032861970365047455, + "rewards/rejected": -0.08421783894300461, + "step": 160 + }, + { + "epoch": 0.11134163208852006, + "grad_norm": 2.940592050552368, + "learning_rate": 5.5670816044260036e-06, + "log_odds_chosen": 0.3694240152835846, + "log_odds_ratio": -0.5937953591346741, + "logits/chosen": -1.092789888381958, + "logits/rejected": -1.086504578590393, + "logps/chosen": -0.37033939361572266, + "logps/rejected": -0.5343331098556519, + "loss": 6.7006, + "nll_loss": 1.61576247215271, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.037033941596746445, + "rewards/margins": 0.01639937050640583, + "rewards/rejected": -0.053433313965797424, + "step": 161 + }, + { + "epoch": 0.11203319502074689, + "grad_norm": 3.694589614868164, + "learning_rate": 5.601659751037345e-06, + "log_odds_chosen": 0.6592048406600952, + "log_odds_ratio": -0.4732610583305359, + "logits/chosen": -1.0874364376068115, + "logits/rejected": -1.0700212717056274, + "logps/chosen": -0.27317383885383606, + "logps/rejected": -0.4280146062374115, + "loss": 6.7253, + "nll_loss": 1.6339954137802124, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.027317382395267487, + "rewards/margins": 0.015484076924622059, + "rewards/rejected": -0.04280146211385727, + "step": 162 + }, + { + "epoch": 0.11272475795297372, + "grad_norm": 2.992234706878662, + "learning_rate": 5.636237897648687e-06, + "log_odds_chosen": 0.10452957451343536, + "log_odds_ratio": -0.6631104946136475, + "logits/chosen": -1.0844966173171997, + "logits/rejected": -1.0631647109985352, + "logps/chosen": -0.4913597106933594, + "logps/rejected": -0.5436203479766846, + "loss": 5.5863, + "nll_loss": 1.3302576541900635, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.049135975539684296, + "rewards/margins": 0.0052260663360357285, + "rewards/rejected": -0.054362036287784576, + "step": 163 + }, + { + "epoch": 0.11341632088520055, + "grad_norm": 2.8842968940734863, + "learning_rate": 5.670816044260028e-06, + "log_odds_chosen": 0.5779211521148682, + "log_odds_ratio": -0.5084534883499146, + "logits/chosen": -0.9483163356781006, + "logits/rejected": -0.9567909240722656, + "logps/chosen": -0.4137590825557709, + "logps/rejected": -0.6266723275184631, + "loss": 5.9067, + "nll_loss": 1.425826072692871, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.04137590900063515, + "rewards/margins": 0.021291323006153107, + "rewards/rejected": -0.06266723573207855, + "step": 164 + }, + { + "epoch": 0.11410788381742738, + "grad_norm": 3.21958589553833, + "learning_rate": 5.70539419087137e-06, + "log_odds_chosen": 0.6306442022323608, + "log_odds_ratio": -0.4906991720199585, + "logits/chosen": -1.0865769386291504, + "logits/rejected": -1.1414381265640259, + "logps/chosen": -0.38455918431282043, + "logps/rejected": -0.7448874711990356, + "loss": 5.6921, + "nll_loss": 1.3739588260650635, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.03845591843128204, + "rewards/margins": 0.03603282943367958, + "rewards/rejected": -0.07448874413967133, + "step": 165 + }, + { + "epoch": 0.11479944674965421, + "grad_norm": 3.038536548614502, + "learning_rate": 5.7399723374827105e-06, + "log_odds_chosen": 0.3064265549182892, + "log_odds_ratio": -0.6025457382202148, + "logits/chosen": -1.0255924463272095, + "logits/rejected": -1.0379023551940918, + "logps/chosen": -0.43842604756355286, + "logps/rejected": -0.5147268772125244, + "loss": 4.9638, + "nll_loss": 1.1806881427764893, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.043842606246471405, + "rewards/margins": 0.007630080450326204, + "rewards/rejected": -0.05147268623113632, + "step": 166 + }, + { + "epoch": 0.11549100968188106, + "grad_norm": 3.0498814582824707, + "learning_rate": 5.774550484094053e-06, + "log_odds_chosen": 0.2696775794029236, + "log_odds_ratio": -0.6612348556518555, + "logits/chosen": -1.0600786209106445, + "logits/rejected": -1.0842914581298828, + "logps/chosen": -0.30538222193717957, + "logps/rejected": -0.4055224061012268, + "loss": 7.08, + "nll_loss": 1.7038698196411133, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.030538223683834076, + "rewards/margins": 0.01001401711255312, + "rewards/rejected": -0.04055224359035492, + "step": 167 + }, + { + "epoch": 0.11618257261410789, + "grad_norm": 2.9541399478912354, + "learning_rate": 5.8091286307053945e-06, + "log_odds_chosen": 0.7906651496887207, + "log_odds_ratio": -0.4121550917625427, + "logits/chosen": -0.8397430181503296, + "logits/rejected": -0.8527986407279968, + "logps/chosen": -0.41424304246902466, + "logps/rejected": -0.7723820805549622, + "loss": 6.7591, + "nll_loss": 1.648551344871521, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.041424304246902466, + "rewards/margins": 0.03581390529870987, + "rewards/rejected": -0.07723820954561234, + "step": 168 + }, + { + "epoch": 0.11687413554633472, + "grad_norm": 3.013000249862671, + "learning_rate": 5.843706777316736e-06, + "log_odds_chosen": 0.6137700080871582, + "log_odds_ratio": -0.4964900612831116, + "logits/chosen": -1.203710913658142, + "logits/rejected": -1.221206545829773, + "logps/chosen": -0.2094321846961975, + "logps/rejected": -0.32387834787368774, + "loss": 6.0946, + "nll_loss": 1.4740005731582642, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.02094321884214878, + "rewards/margins": 0.011444617062807083, + "rewards/rejected": -0.032387834042310715, + "step": 169 + }, + { + "epoch": 0.11756569847856155, + "grad_norm": 3.396745204925537, + "learning_rate": 5.878284923928078e-06, + "log_odds_chosen": 0.7227403521537781, + "log_odds_ratio": -0.5156093239784241, + "logits/chosen": -1.0030102729797363, + "logits/rejected": -1.0096819400787354, + "logps/chosen": -0.2418069839477539, + "logps/rejected": -0.49622681736946106, + "loss": 6.7511, + "nll_loss": 1.6362210512161255, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.02418069913983345, + "rewards/margins": 0.025441987439990044, + "rewards/rejected": -0.049622680991888046, + "step": 170 + }, + { + "epoch": 0.11825726141078838, + "grad_norm": 2.8235480785369873, + "learning_rate": 5.912863070539419e-06, + "log_odds_chosen": 1.0059127807617188, + "log_odds_ratio": -0.39074984192848206, + "logits/chosen": -0.79595547914505, + "logits/rejected": -0.8163602352142334, + "logps/chosen": -0.3028831481933594, + "logps/rejected": -0.5650444626808167, + "loss": 6.15, + "nll_loss": 1.4984172582626343, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.030288314446806908, + "rewards/margins": 0.026216134428977966, + "rewards/rejected": -0.05650445073843002, + "step": 171 + }, + { + "epoch": 0.11894882434301521, + "grad_norm": 3.315999984741211, + "learning_rate": 5.947441217150761e-06, + "log_odds_chosen": -0.772477388381958, + "log_odds_ratio": -1.4490861892700195, + "logits/chosen": -1.0406111478805542, + "logits/rejected": -1.0026965141296387, + "logps/chosen": -1.0363490581512451, + "logps/rejected": -0.5162218809127808, + "loss": 7.9681, + "nll_loss": 1.8471150398254395, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.10363490134477615, + "rewards/margins": -0.05201271176338196, + "rewards/rejected": -0.0516221821308136, + "step": 172 + }, + { + "epoch": 0.11964038727524205, + "grad_norm": 2.5711920261383057, + "learning_rate": 5.982019363762103e-06, + "log_odds_chosen": 1.1415014266967773, + "log_odds_ratio": -0.39422786235809326, + "logits/chosen": -0.7181402444839478, + "logits/rejected": -0.7053050994873047, + "logps/chosen": -0.2831200957298279, + "logps/rejected": -0.5622020959854126, + "loss": 5.6992, + "nll_loss": 1.3853861093521118, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.02831200882792473, + "rewards/margins": 0.02790820226073265, + "rewards/rejected": -0.05622021108865738, + "step": 173 + }, + { + "epoch": 0.12033195020746888, + "grad_norm": 2.762683868408203, + "learning_rate": 6.016597510373445e-06, + "log_odds_chosen": 0.8880830407142639, + "log_odds_ratio": -0.45892333984375, + "logits/chosen": -0.9726389646530151, + "logits/rejected": -0.9073729515075684, + "logps/chosen": -0.3582124710083008, + "logps/rejected": -0.6857412457466125, + "loss": 6.6004, + "nll_loss": 1.6042182445526123, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.03582124784588814, + "rewards/margins": 0.0327528715133667, + "rewards/rejected": -0.06857412308454514, + "step": 174 + }, + { + "epoch": 0.12102351313969571, + "grad_norm": 3.1388444900512695, + "learning_rate": 6.051175656984786e-06, + "log_odds_chosen": 0.3940921723842621, + "log_odds_ratio": -0.595094621181488, + "logits/chosen": -1.1326537132263184, + "logits/rejected": -1.1883403062820435, + "logps/chosen": -0.3753691613674164, + "logps/rejected": -0.4759666323661804, + "loss": 6.4509, + "nll_loss": 1.5532217025756836, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.03753691911697388, + "rewards/margins": 0.010059747844934464, + "rewards/rejected": -0.04759666323661804, + "step": 175 + }, + { + "epoch": 0.12171507607192254, + "grad_norm": 2.726529598236084, + "learning_rate": 6.085753803596127e-06, + "log_odds_chosen": 0.3556770086288452, + "log_odds_ratio": -0.6936931014060974, + "logits/chosen": -1.009606957435608, + "logits/rejected": -1.0062367916107178, + "logps/chosen": -0.31316474080085754, + "logps/rejected": -0.47506463527679443, + "loss": 5.9535, + "nll_loss": 1.419015884399414, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.031316474080085754, + "rewards/margins": 0.01618999056518078, + "rewards/rejected": -0.04750646650791168, + "step": 176 + }, + { + "epoch": 0.12240663900414937, + "grad_norm": 4.527401924133301, + "learning_rate": 6.120331950207469e-06, + "log_odds_chosen": 0.00036665797233581543, + "log_odds_ratio": -0.826472282409668, + "logits/chosen": -1.197486400604248, + "logits/rejected": -1.1863048076629639, + "logps/chosen": -0.3690447211265564, + "logps/rejected": -0.4162423312664032, + "loss": 8.1314, + "nll_loss": 1.9502149820327759, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.036904476583004, + "rewards/margins": 0.00471975514665246, + "rewards/rejected": -0.04162422940135002, + "step": 177 + }, + { + "epoch": 0.12309820193637622, + "grad_norm": 3.836942195892334, + "learning_rate": 6.154910096818811e-06, + "log_odds_chosen": 0.45801472663879395, + "log_odds_ratio": -0.5604403614997864, + "logits/chosen": -0.7728670835494995, + "logits/rejected": -0.790350079536438, + "logps/chosen": -0.3449108898639679, + "logps/rejected": -0.458249568939209, + "loss": 6.2138, + "nll_loss": 1.497396469116211, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.03449108451604843, + "rewards/margins": 0.011333871632814407, + "rewards/rejected": -0.04582495987415314, + "step": 178 + }, + { + "epoch": 0.12378976486860305, + "grad_norm": 4.046421051025391, + "learning_rate": 6.1894882434301526e-06, + "log_odds_chosen": 0.6479674577713013, + "log_odds_ratio": -0.5464097857475281, + "logits/chosen": -0.843544602394104, + "logits/rejected": -0.87617027759552, + "logps/chosen": -0.19211876392364502, + "logps/rejected": -0.3340243101119995, + "loss": 6.3723, + "nll_loss": 1.5384306907653809, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01921188086271286, + "rewards/margins": 0.01419055089354515, + "rewards/rejected": -0.03340243175625801, + "step": 179 + }, + { + "epoch": 0.12448132780082988, + "grad_norm": 2.370387554168701, + "learning_rate": 6.224066390041494e-06, + "log_odds_chosen": 1.467092514038086, + "log_odds_ratio": -0.3379737436771393, + "logits/chosen": -1.0904818773269653, + "logits/rejected": -1.1200087070465088, + "logps/chosen": -0.28249266743659973, + "logps/rejected": -0.6824603080749512, + "loss": 5.638, + "nll_loss": 1.375713586807251, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.028249267488718033, + "rewards/margins": 0.03999676555395126, + "rewards/rejected": -0.068246029317379, + "step": 180 + }, + { + "epoch": 0.1251728907330567, + "grad_norm": 4.152231693267822, + "learning_rate": 6.258644536652836e-06, + "log_odds_chosen": -0.037120670080184937, + "log_odds_ratio": -0.8663021922111511, + "logits/chosen": -1.0969313383102417, + "logits/rejected": -1.0759817361831665, + "logps/chosen": -0.4796963930130005, + "logps/rejected": -0.46426907181739807, + "loss": 6.8526, + "nll_loss": 1.626530647277832, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.04796964302659035, + "rewards/margins": -0.0015427323523908854, + "rewards/rejected": -0.04642690718173981, + "step": 181 + }, + { + "epoch": 0.12586445366528354, + "grad_norm": 3.547257423400879, + "learning_rate": 6.293222683264177e-06, + "log_odds_chosen": -0.031091928482055664, + "log_odds_ratio": -1.1040213108062744, + "logits/chosen": -1.212889313697815, + "logits/rejected": -1.206860065460205, + "logps/chosen": -0.6664987802505493, + "logps/rejected": -0.3842780292034149, + "loss": 6.6819, + "nll_loss": 1.5600645542144775, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.06664987653493881, + "rewards/margins": -0.028222069144248962, + "rewards/rejected": -0.03842780366539955, + "step": 182 + }, + { + "epoch": 0.12655601659751037, + "grad_norm": 3.0481033325195312, + "learning_rate": 6.327800829875519e-06, + "log_odds_chosen": 0.3628125488758087, + "log_odds_ratio": -0.628267765045166, + "logits/chosen": -1.2803454399108887, + "logits/rejected": -1.286865472793579, + "logps/chosen": -0.250302791595459, + "logps/rejected": -0.368587464094162, + "loss": 5.7693, + "nll_loss": 1.379507064819336, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.025030281394720078, + "rewards/margins": 0.0118284672498703, + "rewards/rejected": -0.03685874491930008, + "step": 183 + }, + { + "epoch": 0.1272475795297372, + "grad_norm": 2.8776659965515137, + "learning_rate": 6.36237897648686e-06, + "log_odds_chosen": 0.21505118906497955, + "log_odds_ratio": -0.6434811949729919, + "logits/chosen": -0.8477299809455872, + "logits/rejected": -0.8031063079833984, + "logps/chosen": -0.27206921577453613, + "logps/rejected": -0.312256395816803, + "loss": 6.5827, + "nll_loss": 1.5813323259353638, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.027206923812627792, + "rewards/margins": 0.0040187169797718525, + "rewards/rejected": -0.03122563846409321, + "step": 184 + }, + { + "epoch": 0.12793914246196403, + "grad_norm": 2.9824650287628174, + "learning_rate": 6.396957123098202e-06, + "log_odds_chosen": 0.3025757074356079, + "log_odds_ratio": -0.6380969882011414, + "logits/chosen": -0.8896502256393433, + "logits/rejected": -0.9428619146347046, + "logps/chosen": -0.25282663106918335, + "logps/rejected": -0.3300777077674866, + "loss": 5.2551, + "nll_loss": 1.249961256980896, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.025282664224505424, + "rewards/margins": 0.0077251046895980835, + "rewards/rejected": -0.03300777077674866, + "step": 185 + }, + { + "epoch": 0.12863070539419086, + "grad_norm": 3.4677364826202393, + "learning_rate": 6.4315352697095435e-06, + "log_odds_chosen": 0.5208099484443665, + "log_odds_ratio": -0.70698082447052, + "logits/chosen": -0.7441154718399048, + "logits/rejected": -0.7471722960472107, + "logps/chosen": -0.2849894165992737, + "logps/rejected": -0.48318296670913696, + "loss": 6.2467, + "nll_loss": 1.4909782409667969, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.028498942032456398, + "rewards/margins": 0.01981935277581215, + "rewards/rejected": -0.0483182929456234, + "step": 186 + }, + { + "epoch": 0.12932226832641772, + "grad_norm": 3.294553518295288, + "learning_rate": 6.466113416320886e-06, + "log_odds_chosen": -0.1137196272611618, + "log_odds_ratio": -0.7959656715393066, + "logits/chosen": -0.5830647945404053, + "logits/rejected": -0.5787371397018433, + "logps/chosen": -0.2610514163970947, + "logps/rejected": -0.2726013958454132, + "loss": 6.2343, + "nll_loss": 1.47898530960083, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.026105143129825592, + "rewards/margins": 0.001154996920377016, + "rewards/rejected": -0.02726013958454132, + "step": 187 + }, + { + "epoch": 0.13001383125864455, + "grad_norm": 2.292361259460449, + "learning_rate": 6.5006915629322275e-06, + "log_odds_chosen": 0.7241369485855103, + "log_odds_ratio": -0.4350699782371521, + "logits/chosen": -0.7973431348800659, + "logits/rejected": -0.8062911033630371, + "logps/chosen": -0.3231046199798584, + "logps/rejected": -0.5692073702812195, + "loss": 4.8058, + "nll_loss": 1.1579421758651733, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.03231046348810196, + "rewards/margins": 0.024610277265310287, + "rewards/rejected": -0.05692073702812195, + "step": 188 + }, + { + "epoch": 0.13070539419087138, + "grad_norm": 3.161369800567627, + "learning_rate": 6.535269709543569e-06, + "log_odds_chosen": 0.8151931762695312, + "log_odds_ratio": -0.5472856760025024, + "logits/chosen": -1.0429188013076782, + "logits/rejected": -1.051293134689331, + "logps/chosen": -0.27129557728767395, + "logps/rejected": -0.5605320930480957, + "loss": 5.7897, + "nll_loss": 1.392688274383545, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.027129555121064186, + "rewards/margins": 0.028923654928803444, + "rewards/rejected": -0.05605321004986763, + "step": 189 + }, + { + "epoch": 0.1313969571230982, + "grad_norm": 3.7443580627441406, + "learning_rate": 6.569847856154911e-06, + "log_odds_chosen": 0.6477518081665039, + "log_odds_ratio": -0.5179813504219055, + "logits/chosen": -1.2564938068389893, + "logits/rejected": -1.273054838180542, + "logps/chosen": -0.1866637021303177, + "logps/rejected": -0.3534211814403534, + "loss": 7.8754, + "nll_loss": 1.9170430898666382, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01866636984050274, + "rewards/margins": 0.01667574793100357, + "rewards/rejected": -0.03534211963415146, + "step": 190 + }, + { + "epoch": 0.13208852005532504, + "grad_norm": 3.5630128383636475, + "learning_rate": 6.604426002766252e-06, + "log_odds_chosen": 0.0035073384642601013, + "log_odds_ratio": -0.7637513875961304, + "logits/chosen": -0.7529496550559998, + "logits/rejected": -0.7476365566253662, + "logps/chosen": -0.3738434910774231, + "logps/rejected": -0.3310817778110504, + "loss": 5.1497, + "nll_loss": 1.2110416889190674, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.03738434985280037, + "rewards/margins": -0.004276171792298555, + "rewards/rejected": -0.0331081785261631, + "step": 191 + }, + { + "epoch": 0.13278008298755187, + "grad_norm": 3.5751423835754395, + "learning_rate": 6.639004149377594e-06, + "log_odds_chosen": 0.4771386981010437, + "log_odds_ratio": -0.5333381295204163, + "logits/chosen": -1.2637674808502197, + "logits/rejected": -1.2599058151245117, + "logps/chosen": -0.24162539839744568, + "logps/rejected": -0.33872294425964355, + "loss": 6.5613, + "nll_loss": 1.5869882106781006, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.02416253834962845, + "rewards/margins": 0.009709754958748817, + "rewards/rejected": -0.033872295171022415, + "step": 192 + }, + { + "epoch": 0.1334716459197787, + "grad_norm": 4.024544715881348, + "learning_rate": 6.673582295988935e-06, + "log_odds_chosen": 0.45041486620903015, + "log_odds_ratio": -0.6190093755722046, + "logits/chosen": -0.9153847098350525, + "logits/rejected": -0.9401689767837524, + "logps/chosen": -0.20653948187828064, + "logps/rejected": -0.3575013279914856, + "loss": 6.2296, + "nll_loss": 1.4955058097839355, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.020653948187828064, + "rewards/margins": 0.015096185728907585, + "rewards/rejected": -0.0357501320540905, + "step": 193 + }, + { + "epoch": 0.13416320885200553, + "grad_norm": 2.5976641178131104, + "learning_rate": 6.708160442600277e-06, + "log_odds_chosen": 0.7277088761329651, + "log_odds_ratio": -0.4611130654811859, + "logits/chosen": -1.1048755645751953, + "logits/rejected": -1.107547402381897, + "logps/chosen": -0.2420302927494049, + "logps/rejected": -0.4254276156425476, + "loss": 6.6071, + "nll_loss": 1.6056554317474365, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.02420303039252758, + "rewards/margins": 0.01833973452448845, + "rewards/rejected": -0.04254276305437088, + "step": 194 + }, + { + "epoch": 0.13485477178423236, + "grad_norm": 2.962162733078003, + "learning_rate": 6.7427385892116184e-06, + "log_odds_chosen": 0.519116997718811, + "log_odds_ratio": -0.5505663752555847, + "logits/chosen": -1.0663193464279175, + "logits/rejected": -1.0633552074432373, + "logps/chosen": -0.1306827962398529, + "logps/rejected": -0.23576460778713226, + "loss": 3.9006, + "nll_loss": 0.9200987219810486, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01306828111410141, + "rewards/margins": 0.010508181527256966, + "rewards/rejected": -0.023576460778713226, + "step": 195 + }, + { + "epoch": 0.1355463347164592, + "grad_norm": 4.697093486785889, + "learning_rate": 6.77731673582296e-06, + "log_odds_chosen": 0.3077636957168579, + "log_odds_ratio": -0.6371062397956848, + "logits/chosen": -1.100118637084961, + "logits/rejected": -1.0694329738616943, + "logps/chosen": -0.21723446249961853, + "logps/rejected": -0.3122878670692444, + "loss": 6.3028, + "nll_loss": 1.5120017528533936, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.021723445504903793, + "rewards/margins": 0.00950533989816904, + "rewards/rejected": -0.03122878633439541, + "step": 196 + }, + { + "epoch": 0.13623789764868602, + "grad_norm": 2.884382486343384, + "learning_rate": 6.8118948824343016e-06, + "log_odds_chosen": -0.0943203717470169, + "log_odds_ratio": -0.7874737977981567, + "logits/chosen": -0.5834493637084961, + "logits/rejected": -0.5732121467590332, + "logps/chosen": -0.2202082872390747, + "logps/rejected": -0.23534724116325378, + "loss": 4.5543, + "nll_loss": 1.0598318576812744, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.02202082984149456, + "rewards/margins": 0.001513894647359848, + "rewards/rejected": -0.023534726351499557, + "step": 197 + }, + { + "epoch": 0.13692946058091288, + "grad_norm": 3.0852651596069336, + "learning_rate": 6.846473029045644e-06, + "log_odds_chosen": 1.2487186193466187, + "log_odds_ratio": -0.5184494256973267, + "logits/chosen": -0.833308756351471, + "logits/rejected": -0.8734216690063477, + "logps/chosen": -0.18357661366462708, + "logps/rejected": -0.36280807852745056, + "loss": 6.8616, + "nll_loss": 1.6635560989379883, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.018357660621404648, + "rewards/margins": 0.017923148348927498, + "rewards/rejected": -0.036280810832977295, + "step": 198 + }, + { + "epoch": 0.1376210235131397, + "grad_norm": 3.3805103302001953, + "learning_rate": 6.8810511756569855e-06, + "log_odds_chosen": 0.44542139768600464, + "log_odds_ratio": -0.5214876532554626, + "logits/chosen": -0.8738541603088379, + "logits/rejected": -0.8638968467712402, + "logps/chosen": -0.19244788587093353, + "logps/rejected": -0.28946179151535034, + "loss": 6.6065, + "nll_loss": 1.599472165107727, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.019244790077209473, + "rewards/margins": 0.009701389819383621, + "rewards/rejected": -0.028946179896593094, + "step": 199 + }, + { + "epoch": 0.13831258644536654, + "grad_norm": 4.198734760284424, + "learning_rate": 6.915629322268327e-06, + "log_odds_chosen": 0.34398406744003296, + "log_odds_ratio": -0.6945401430130005, + "logits/chosen": -0.725531280040741, + "logits/rejected": -0.7579768896102905, + "logps/chosen": -0.26485031843185425, + "logps/rejected": -0.25032472610473633, + "loss": 6.4356, + "nll_loss": 1.5394471883773804, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.026485033333301544, + "rewards/margins": -0.0014525633305311203, + "rewards/rejected": -0.025032471865415573, + "step": 200 + }, + { + "epoch": 0.13900414937759337, + "grad_norm": 4.543229103088379, + "learning_rate": 6.950207468879669e-06, + "log_odds_chosen": 1.7519770860671997, + "log_odds_ratio": -0.33452051877975464, + "logits/chosen": -1.173638939857483, + "logits/rejected": -1.2212574481964111, + "logps/chosen": -0.14706477522850037, + "logps/rejected": -0.7356031537055969, + "loss": 7.496, + "nll_loss": 1.8405449390411377, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014706477522850037, + "rewards/margins": 0.058853842318058014, + "rewards/rejected": -0.07356031984090805, + "step": 201 + }, + { + "epoch": 0.1396957123098202, + "grad_norm": 2.9318344593048096, + "learning_rate": 6.98478561549101e-06, + "log_odds_chosen": 0.624595582485199, + "log_odds_ratio": -0.4736407995223999, + "logits/chosen": -0.9576144218444824, + "logits/rejected": -0.9811232686042786, + "logps/chosen": -0.22369879484176636, + "logps/rejected": -0.393297016620636, + "loss": 6.1301, + "nll_loss": 1.485149621963501, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.022369876503944397, + "rewards/margins": 0.016959823668003082, + "rewards/rejected": -0.03932970389723778, + "step": 202 + }, + { + "epoch": 0.14038727524204703, + "grad_norm": 3.0044398307800293, + "learning_rate": 7.019363762102352e-06, + "log_odds_chosen": 0.9112159013748169, + "log_odds_ratio": -0.5825238227844238, + "logits/chosen": -1.040523886680603, + "logits/rejected": -1.0338996648788452, + "logps/chosen": -0.15042006969451904, + "logps/rejected": -0.40081560611724854, + "loss": 5.4207, + "nll_loss": 1.2969247102737427, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.015042006969451904, + "rewards/margins": 0.02503955364227295, + "rewards/rejected": -0.04008156433701515, + "step": 203 + }, + { + "epoch": 0.14107883817427386, + "grad_norm": 4.994499206542969, + "learning_rate": 7.053941908713693e-06, + "log_odds_chosen": 0.053051501512527466, + "log_odds_ratio": -0.77434903383255, + "logits/chosen": -1.011791467666626, + "logits/rejected": -1.0063687562942505, + "logps/chosen": -0.29917120933532715, + "logps/rejected": -0.3704149127006531, + "loss": 6.1848, + "nll_loss": 1.4687684774398804, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.029917120933532715, + "rewards/margins": 0.007124368101358414, + "rewards/rejected": -0.03704149276018143, + "step": 204 + }, + { + "epoch": 0.1417704011065007, + "grad_norm": 5.865853786468506, + "learning_rate": 7.088520055325035e-06, + "log_odds_chosen": 0.41838449239730835, + "log_odds_ratio": -0.8382209539413452, + "logits/chosen": -0.9785934686660767, + "logits/rejected": -0.9925634264945984, + "logps/chosen": -0.21129783987998962, + "logps/rejected": -0.4044276475906372, + "loss": 6.6055, + "nll_loss": 1.5675466060638428, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.021129783242940903, + "rewards/margins": 0.019312981516122818, + "rewards/rejected": -0.04044276475906372, + "step": 205 + }, + { + "epoch": 0.14246196403872752, + "grad_norm": 3.181340217590332, + "learning_rate": 7.1230982019363765e-06, + "log_odds_chosen": 1.314388632774353, + "log_odds_ratio": -0.3325830101966858, + "logits/chosen": -0.8861981630325317, + "logits/rejected": -0.8500977754592896, + "logps/chosen": -0.12594662606716156, + "logps/rejected": -0.34991195797920227, + "loss": 6.2821, + "nll_loss": 1.5372616052627563, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01259466353803873, + "rewards/margins": 0.022396530956029892, + "rewards/rejected": -0.034991197288036346, + "step": 206 + }, + { + "epoch": 0.14315352697095435, + "grad_norm": 3.157027006149292, + "learning_rate": 7.157676348547718e-06, + "log_odds_chosen": 0.8879727125167847, + "log_odds_ratio": -0.5650953054428101, + "logits/chosen": -0.612299919128418, + "logits/rejected": -0.6093465089797974, + "logps/chosen": -0.2196851372718811, + "logps/rejected": -0.3596701920032501, + "loss": 6.3127, + "nll_loss": 1.5216560363769531, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.02196851372718811, + "rewards/margins": 0.013998505659401417, + "rewards/rejected": -0.03596701845526695, + "step": 207 + }, + { + "epoch": 0.14384508990318118, + "grad_norm": 4.110046863555908, + "learning_rate": 7.19225449515906e-06, + "log_odds_chosen": -0.11123251914978027, + "log_odds_ratio": -0.9420138001441956, + "logits/chosen": -0.8208776116371155, + "logits/rejected": -0.814195990562439, + "logps/chosen": -0.2516728937625885, + "logps/rejected": -0.26293325424194336, + "loss": 5.744, + "nll_loss": 1.3417918682098389, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.02516729012131691, + "rewards/margins": 0.0011260367464274168, + "rewards/rejected": -0.026293326169252396, + "step": 208 + }, + { + "epoch": 0.14453665283540804, + "grad_norm": 3.6285037994384766, + "learning_rate": 7.226832641770402e-06, + "log_odds_chosen": 1.427751064300537, + "log_odds_ratio": -0.37405925989151, + "logits/chosen": -1.077873706817627, + "logits/rejected": -1.1133126020431519, + "logps/chosen": -0.12508371472358704, + "logps/rejected": -0.466911256313324, + "loss": 7.4872, + "nll_loss": 1.8343921899795532, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.012508371844887733, + "rewards/margins": 0.03418275713920593, + "rewards/rejected": -0.04669112712144852, + "step": 209 + }, + { + "epoch": 0.14522821576763487, + "grad_norm": 2.4061126708984375, + "learning_rate": 7.2614107883817436e-06, + "log_odds_chosen": 1.315793752670288, + "log_odds_ratio": -0.36149081587791443, + "logits/chosen": -0.8516741991043091, + "logits/rejected": -0.8345063924789429, + "logps/chosen": -0.12234769016504288, + "logps/rejected": -0.31906723976135254, + "loss": 4.86, + "nll_loss": 1.1788439750671387, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012234769761562347, + "rewards/margins": 0.019671954214572906, + "rewards/rejected": -0.031906723976135254, + "step": 210 + }, + { + "epoch": 0.1459197786998617, + "grad_norm": 5.4849395751953125, + "learning_rate": 7.295988934993085e-06, + "log_odds_chosen": 0.9772852659225464, + "log_odds_ratio": -0.6391040682792664, + "logits/chosen": -0.8843315243721008, + "logits/rejected": -0.9143941402435303, + "logps/chosen": -0.17293697595596313, + "logps/rejected": -0.3915655314922333, + "loss": 5.7228, + "nll_loss": 1.3667978048324585, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.017293699085712433, + "rewards/margins": 0.021862853318452835, + "rewards/rejected": -0.039156556129455566, + "step": 211 + }, + { + "epoch": 0.14661134163208853, + "grad_norm": 3.6493422985076904, + "learning_rate": 7.330567081604427e-06, + "log_odds_chosen": -0.14418122172355652, + "log_odds_ratio": -1.1010364294052124, + "logits/chosen": -0.7637627720832825, + "logits/rejected": -0.8069720268249512, + "logps/chosen": -0.5388314723968506, + "logps/rejected": -0.20467592775821686, + "loss": 5.8703, + "nll_loss": 1.3574600219726562, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.053883146494627, + "rewards/margins": -0.033415548503398895, + "rewards/rejected": -0.020467594265937805, + "step": 212 + }, + { + "epoch": 0.14730290456431536, + "grad_norm": 6.45885705947876, + "learning_rate": 7.365145228215768e-06, + "log_odds_chosen": 0.9364676475524902, + "log_odds_ratio": -0.7672489285469055, + "logits/chosen": -1.0151984691619873, + "logits/rejected": -1.0229125022888184, + "logps/chosen": -0.21881504356861115, + "logps/rejected": -0.4902130365371704, + "loss": 6.6348, + "nll_loss": 1.581984043121338, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.021881505846977234, + "rewards/margins": 0.027139799669384956, + "rewards/rejected": -0.04902130365371704, + "step": 213 + }, + { + "epoch": 0.1479944674965422, + "grad_norm": 2.9415462017059326, + "learning_rate": 7.39972337482711e-06, + "log_odds_chosen": 0.662085771560669, + "log_odds_ratio": -0.5639595985412598, + "logits/chosen": -0.6300270557403564, + "logits/rejected": -0.6536536812782288, + "logps/chosen": -0.17133358120918274, + "logps/rejected": -0.30744150280952454, + "loss": 4.2607, + "nll_loss": 1.0087754726409912, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.017133358865976334, + "rewards/margins": 0.013610792346298695, + "rewards/rejected": -0.030744148418307304, + "step": 214 + }, + { + "epoch": 0.14868603042876902, + "grad_norm": 3.18498158454895, + "learning_rate": 7.434301521438451e-06, + "log_odds_chosen": 0.3586312234401703, + "log_odds_ratio": -0.8149222731590271, + "logits/chosen": -0.7855114936828613, + "logits/rejected": -0.7873052358627319, + "logps/chosen": -0.26449501514434814, + "logps/rejected": -0.2783412039279938, + "loss": 5.692, + "nll_loss": 1.3415006399154663, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.026449501514434814, + "rewards/margins": 0.0013846198562532663, + "rewards/rejected": -0.027834121137857437, + "step": 215 + }, + { + "epoch": 0.14937759336099585, + "grad_norm": 4.490957260131836, + "learning_rate": 7.468879668049793e-06, + "log_odds_chosen": 0.6908822059631348, + "log_odds_ratio": -0.6412980556488037, + "logits/chosen": -0.873936653137207, + "logits/rejected": -0.905531644821167, + "logps/chosen": -0.25872665643692017, + "logps/rejected": -0.318185031414032, + "loss": 5.5292, + "nll_loss": 1.318181037902832, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.025872664526104927, + "rewards/margins": 0.005945838056504726, + "rewards/rejected": -0.03181850537657738, + "step": 216 + }, + { + "epoch": 0.15006915629322268, + "grad_norm": 4.169113636016846, + "learning_rate": 7.5034578146611345e-06, + "log_odds_chosen": 0.30677735805511475, + "log_odds_ratio": -0.7200303077697754, + "logits/chosen": -1.0342464447021484, + "logits/rejected": -1.0058650970458984, + "logps/chosen": -0.2100173383951187, + "logps/rejected": -0.2714996933937073, + "loss": 5.6223, + "nll_loss": 1.3335614204406738, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.02100173383951187, + "rewards/margins": 0.006148234941065311, + "rewards/rejected": -0.027149969711899757, + "step": 217 + }, + { + "epoch": 0.1507607192254495, + "grad_norm": 2.975595474243164, + "learning_rate": 7.538035961272476e-06, + "log_odds_chosen": 1.100553035736084, + "log_odds_ratio": -0.4032314419746399, + "logits/chosen": -0.943057119846344, + "logits/rejected": -0.9438403844833374, + "logps/chosen": -0.19745934009552002, + "logps/rejected": -0.48316293954849243, + "loss": 4.7155, + "nll_loss": 1.1385633945465088, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01974593475461006, + "rewards/margins": 0.02857036143541336, + "rewards/rejected": -0.04831629619002342, + "step": 218 + }, + { + "epoch": 0.15145228215767634, + "grad_norm": 4.872454643249512, + "learning_rate": 7.572614107883818e-06, + "log_odds_chosen": -0.028303883969783783, + "log_odds_ratio": -1.1446633338928223, + "logits/chosen": -1.0451685190200806, + "logits/rejected": -1.0431214570999146, + "logps/chosen": -0.34600648283958435, + "logps/rejected": -0.4508405923843384, + "loss": 6.3832, + "nll_loss": 1.4813305139541626, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.034600649029016495, + "rewards/margins": 0.010483408346772194, + "rewards/rejected": -0.04508405551314354, + "step": 219 + }, + { + "epoch": 0.15214384508990317, + "grad_norm": 4.090143203735352, + "learning_rate": 7.607192254495158e-06, + "log_odds_chosen": 0.5904234647750854, + "log_odds_ratio": -0.6713616847991943, + "logits/chosen": -0.8287036418914795, + "logits/rejected": -0.8195431232452393, + "logps/chosen": -0.4618722200393677, + "logps/rejected": -0.5354244112968445, + "loss": 6.1785, + "nll_loss": 1.4774805307388306, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.04618722200393677, + "rewards/margins": 0.007355216890573502, + "rewards/rejected": -0.05354243889451027, + "step": 220 + }, + { + "epoch": 0.15283540802213003, + "grad_norm": 4.119716644287109, + "learning_rate": 7.641770401106502e-06, + "log_odds_chosen": 0.6317969560623169, + "log_odds_ratio": -0.6077767610549927, + "logits/chosen": -1.0859671831130981, + "logits/rejected": -1.0858830213546753, + "logps/chosen": -0.20056869089603424, + "logps/rejected": -0.3793398141860962, + "loss": 7.1166, + "nll_loss": 1.7183798551559448, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.020056869834661484, + "rewards/margins": 0.017877109348773956, + "rewards/rejected": -0.03793397918343544, + "step": 221 + }, + { + "epoch": 0.15352697095435686, + "grad_norm": 3.8006575107574463, + "learning_rate": 7.676348547717842e-06, + "log_odds_chosen": 1.653045892715454, + "log_odds_ratio": -0.4079207479953766, + "logits/chosen": -0.7932633757591248, + "logits/rejected": -0.7943170666694641, + "logps/chosen": -0.15025781095027924, + "logps/rejected": -0.329763799905777, + "loss": 6.9906, + "nll_loss": 1.706859827041626, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.015025781467556953, + "rewards/margins": 0.017950598150491714, + "rewards/rejected": -0.03297637775540352, + "step": 222 + }, + { + "epoch": 0.1542185338865837, + "grad_norm": 5.726372718811035, + "learning_rate": 7.710926694329184e-06, + "log_odds_chosen": 1.1458200216293335, + "log_odds_ratio": -0.6311984062194824, + "logits/chosen": -0.8566406965255737, + "logits/rejected": -0.8902969360351562, + "logps/chosen": -0.2128678560256958, + "logps/rejected": -0.3930707275867462, + "loss": 6.5217, + "nll_loss": 1.5672950744628906, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.02128678746521473, + "rewards/margins": 0.018020285293459892, + "rewards/rejected": -0.03930707648396492, + "step": 223 + }, + { + "epoch": 0.15491009681881052, + "grad_norm": 3.6631853580474854, + "learning_rate": 7.745504840940525e-06, + "log_odds_chosen": 0.3010627329349518, + "log_odds_ratio": -0.6827666759490967, + "logits/chosen": -0.6696209907531738, + "logits/rejected": -0.6726163029670715, + "logps/chosen": -0.11562220007181168, + "logps/rejected": -0.16979828476905823, + "loss": 4.5242, + "nll_loss": 1.0627738237380981, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.011562219820916653, + "rewards/margins": 0.00541760865598917, + "rewards/rejected": -0.016979828476905823, + "step": 224 + }, + { + "epoch": 0.15560165975103735, + "grad_norm": 2.3640875816345215, + "learning_rate": 7.780082987551867e-06, + "log_odds_chosen": 0.9647090435028076, + "log_odds_ratio": -0.3855966627597809, + "logits/chosen": -0.4787185788154602, + "logits/rejected": -0.4876733422279358, + "logps/chosen": -0.10393448173999786, + "logps/rejected": -0.26539182662963867, + "loss": 4.5303, + "nll_loss": 1.0940086841583252, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010393448173999786, + "rewards/margins": 0.0161457359790802, + "rewards/rejected": -0.026539184153079987, + "step": 225 + }, + { + "epoch": 0.15629322268326418, + "grad_norm": 3.5224838256835938, + "learning_rate": 7.814661134163209e-06, + "log_odds_chosen": 1.3051297664642334, + "log_odds_ratio": -0.44781693816185, + "logits/chosen": -0.813154399394989, + "logits/rejected": -0.8059035539627075, + "logps/chosen": -0.1292162537574768, + "logps/rejected": -0.32095658779144287, + "loss": 6.3816, + "nll_loss": 1.5506187677383423, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.012921624816954136, + "rewards/margins": 0.019174031913280487, + "rewards/rejected": -0.03209565579891205, + "step": 226 + }, + { + "epoch": 0.156984785615491, + "grad_norm": 3.95707106590271, + "learning_rate": 7.84923928077455e-06, + "log_odds_chosen": 0.27357932925224304, + "log_odds_ratio": -0.9965370297431946, + "logits/chosen": -0.703487753868103, + "logits/rejected": -0.6508299112319946, + "logps/chosen": -0.3244343400001526, + "logps/rejected": -0.35959532856941223, + "loss": 5.0346, + "nll_loss": 1.1590083837509155, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.03244343400001526, + "rewards/margins": 0.0035160984843969345, + "rewards/rejected": -0.03595953434705734, + "step": 227 + }, + { + "epoch": 0.15767634854771784, + "grad_norm": 3.188214063644409, + "learning_rate": 7.883817427385892e-06, + "log_odds_chosen": 0.1925515979528427, + "log_odds_ratio": -0.6929178237915039, + "logits/chosen": -0.9470303058624268, + "logits/rejected": -0.9407349228858948, + "logps/chosen": -0.226731076836586, + "logps/rejected": -0.31980404257774353, + "loss": 4.4676, + "nll_loss": 1.0476175546646118, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.0226731076836586, + "rewards/margins": 0.009307296946644783, + "rewards/rejected": -0.03198040649294853, + "step": 228 + }, + { + "epoch": 0.15836791147994467, + "grad_norm": 5.166046142578125, + "learning_rate": 7.918395573997233e-06, + "log_odds_chosen": -0.8341346979141235, + "log_odds_ratio": -1.5097215175628662, + "logits/chosen": -0.9731870889663696, + "logits/rejected": -0.960036039352417, + "logps/chosen": -0.4464319944381714, + "logps/rejected": -0.2061399519443512, + "loss": 6.0878, + "nll_loss": 1.370969533920288, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.04464320093393326, + "rewards/margins": -0.024029206484556198, + "rewards/rejected": -0.02061399631202221, + "step": 229 + }, + { + "epoch": 0.1590594744121715, + "grad_norm": 3.6036903858184814, + "learning_rate": 7.952973720608575e-06, + "log_odds_chosen": 0.7378802299499512, + "log_odds_ratio": -0.5369628667831421, + "logits/chosen": -0.6612091064453125, + "logits/rejected": -0.6341161727905273, + "logps/chosen": -0.1552380621433258, + "logps/rejected": -0.2605130076408386, + "loss": 5.5298, + "nll_loss": 1.328743815422058, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.015523807145655155, + "rewards/margins": 0.010527495294809341, + "rewards/rejected": -0.02605130337178707, + "step": 230 + }, + { + "epoch": 0.15975103734439833, + "grad_norm": 2.9829447269439697, + "learning_rate": 7.987551867219916e-06, + "log_odds_chosen": 0.7720410227775574, + "log_odds_ratio": -0.6032599210739136, + "logits/chosen": -0.9018489122390747, + "logits/rejected": -0.8468755483627319, + "logps/chosen": -0.24757295846939087, + "logps/rejected": -0.333136647939682, + "loss": 5.1035, + "nll_loss": 1.215556263923645, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.024757295846939087, + "rewards/margins": 0.008556367829442024, + "rewards/rejected": -0.03331366181373596, + "step": 231 + }, + { + "epoch": 0.16044260027662519, + "grad_norm": 4.110328197479248, + "learning_rate": 8.02213001383126e-06, + "log_odds_chosen": -0.3504614531993866, + "log_odds_ratio": -1.1943254470825195, + "logits/chosen": -0.8923564553260803, + "logits/rejected": -0.8823562860488892, + "logps/chosen": -0.5723345875740051, + "logps/rejected": -0.25823917984962463, + "loss": 6.3108, + "nll_loss": 1.4582581520080566, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.05723346397280693, + "rewards/margins": -0.03140954673290253, + "rewards/rejected": -0.025823919102549553, + "step": 232 + }, + { + "epoch": 0.16113416320885202, + "grad_norm": 3.6569745540618896, + "learning_rate": 8.056708160442601e-06, + "log_odds_chosen": 0.28908300399780273, + "log_odds_ratio": -0.6737136840820312, + "logits/chosen": -0.9151521921157837, + "logits/rejected": -0.9005734920501709, + "logps/chosen": -0.37384504079818726, + "logps/rejected": -0.3533002734184265, + "loss": 6.6159, + "nll_loss": 1.5866096019744873, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.037384502589702606, + "rewards/margins": -0.0020544761791825294, + "rewards/rejected": -0.03533002734184265, + "step": 233 + }, + { + "epoch": 0.16182572614107885, + "grad_norm": 3.0560779571533203, + "learning_rate": 8.091286307053943e-06, + "log_odds_chosen": 0.14291715621948242, + "log_odds_ratio": -1.0155534744262695, + "logits/chosen": -0.9692038297653198, + "logits/rejected": -0.9579259157180786, + "logps/chosen": -0.44459283351898193, + "logps/rejected": -0.2261468470096588, + "loss": 6.0231, + "nll_loss": 1.4042236804962158, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.04445928335189819, + "rewards/margins": -0.02184459939599037, + "rewards/rejected": -0.02261468581855297, + "step": 234 + }, + { + "epoch": 0.16251728907330568, + "grad_norm": 3.007359266281128, + "learning_rate": 8.125864453665284e-06, + "log_odds_chosen": 0.24351224303245544, + "log_odds_ratio": -0.6321843266487122, + "logits/chosen": -0.9844968318939209, + "logits/rejected": -0.9822818040847778, + "logps/chosen": -0.18725910782814026, + "logps/rejected": -0.19656959176063538, + "loss": 5.6756, + "nll_loss": 1.3556902408599854, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.018725909292697906, + "rewards/margins": 0.0009310483001172543, + "rewards/rejected": -0.019656959921121597, + "step": 235 + }, + { + "epoch": 0.1632088520055325, + "grad_norm": 3.4686291217803955, + "learning_rate": 8.160442600276626e-06, + "log_odds_chosen": 0.16042087972164154, + "log_odds_ratio": -0.8572741150856018, + "logits/chosen": -0.7890150547027588, + "logits/rejected": -0.7376941442489624, + "logps/chosen": -0.23788484930992126, + "logps/rejected": -0.29508593678474426, + "loss": 5.0317, + "nll_loss": 1.1721961498260498, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.023788485676050186, + "rewards/margins": 0.005720109213143587, + "rewards/rejected": -0.029508594423532486, + "step": 236 + }, + { + "epoch": 0.16390041493775934, + "grad_norm": 2.9869203567504883, + "learning_rate": 8.195020746887967e-06, + "log_odds_chosen": 0.40226978063583374, + "log_odds_ratio": -0.7669799327850342, + "logits/chosen": -0.6808412671089172, + "logits/rejected": -0.635565459728241, + "logps/chosen": -0.21583965420722961, + "logps/rejected": -0.37887197732925415, + "loss": 6.1774, + "nll_loss": 1.4676564931869507, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.021583963185548782, + "rewards/margins": 0.016303233802318573, + "rewards/rejected": -0.037887200713157654, + "step": 237 + }, + { + "epoch": 0.16459197786998617, + "grad_norm": 3.5267534255981445, + "learning_rate": 8.229598893499309e-06, + "log_odds_chosen": 0.41741740703582764, + "log_odds_ratio": -0.6975635290145874, + "logits/chosen": -0.8619732856750488, + "logits/rejected": -0.8443213701248169, + "logps/chosen": -0.24961000680923462, + "logps/rejected": -0.3617687225341797, + "loss": 6.1947, + "nll_loss": 1.4789154529571533, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.02496100217103958, + "rewards/margins": 0.011215871199965477, + "rewards/rejected": -0.03617687523365021, + "step": 238 + }, + { + "epoch": 0.165283540802213, + "grad_norm": 3.9766554832458496, + "learning_rate": 8.26417704011065e-06, + "log_odds_chosen": 1.0255920886993408, + "log_odds_ratio": -0.6245083808898926, + "logits/chosen": -0.8572830557823181, + "logits/rejected": -0.8940591216087341, + "logps/chosen": -0.15826298296451569, + "logps/rejected": -0.5650017261505127, + "loss": 5.4706, + "nll_loss": 1.3052107095718384, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.015826299786567688, + "rewards/margins": 0.0406738743185997, + "rewards/rejected": -0.05650017410516739, + "step": 239 + }, + { + "epoch": 0.16597510373443983, + "grad_norm": 3.8060593605041504, + "learning_rate": 8.298755186721992e-06, + "log_odds_chosen": 1.074886441230774, + "log_odds_ratio": -0.3917398750782013, + "logits/chosen": -0.9265488982200623, + "logits/rejected": -0.9145898222923279, + "logps/chosen": -0.16986651718616486, + "logps/rejected": -0.3676261305809021, + "loss": 6.0459, + "nll_loss": 1.4722979068756104, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.016986653208732605, + "rewards/margins": 0.019775960594415665, + "rewards/rejected": -0.03676261380314827, + "step": 240 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 3.665224552154541, + "learning_rate": 8.333333333333334e-06, + "log_odds_chosen": 1.9552595615386963, + "log_odds_ratio": -0.2515340745449066, + "logits/chosen": -0.7696788907051086, + "logits/rejected": -0.7876995801925659, + "logps/chosen": -0.11934017390012741, + "logps/rejected": -0.5310408473014832, + "loss": 6.5986, + "nll_loss": 1.6245028972625732, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011934017762541771, + "rewards/margins": 0.041170068085193634, + "rewards/rejected": -0.053104087710380554, + "step": 241 + }, + { + "epoch": 0.1673582295988935, + "grad_norm": 4.817811012268066, + "learning_rate": 8.367911479944675e-06, + "log_odds_chosen": -0.005749780684709549, + "log_odds_ratio": -0.7882553339004517, + "logits/chosen": -0.8050568103790283, + "logits/rejected": -0.8051145076751709, + "logps/chosen": -0.21885880827903748, + "logps/rejected": -0.1906094253063202, + "loss": 5.7043, + "nll_loss": 1.3472473621368408, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.021885879337787628, + "rewards/margins": -0.002824939088895917, + "rewards/rejected": -0.01906094141304493, + "step": 242 + }, + { + "epoch": 0.16804979253112035, + "grad_norm": 4.204524040222168, + "learning_rate": 8.402489626556017e-06, + "log_odds_chosen": 1.3039932250976562, + "log_odds_ratio": -0.36999383568763733, + "logits/chosen": -1.1626389026641846, + "logits/rejected": -1.1959856748580933, + "logps/chosen": -0.14168334007263184, + "logps/rejected": -0.3843676745891571, + "loss": 6.8428, + "nll_loss": 1.673712134361267, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014168335124850273, + "rewards/margins": 0.024268433451652527, + "rewards/rejected": -0.03843677043914795, + "step": 243 + }, + { + "epoch": 0.16874135546334718, + "grad_norm": 3.659269094467163, + "learning_rate": 8.437067773167358e-06, + "log_odds_chosen": 1.0554488897323608, + "log_odds_ratio": -0.5076719522476196, + "logits/chosen": -0.7935200929641724, + "logits/rejected": -0.8115878105163574, + "logps/chosen": -0.1356905847787857, + "logps/rejected": -0.25169292092323303, + "loss": 5.9024, + "nll_loss": 1.4248311519622803, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013569057919085026, + "rewards/margins": 0.011600232683122158, + "rewards/rejected": -0.025169292464852333, + "step": 244 + }, + { + "epoch": 0.169432918395574, + "grad_norm": 3.3517634868621826, + "learning_rate": 8.4716459197787e-06, + "log_odds_chosen": 0.6800482273101807, + "log_odds_ratio": -0.6337831020355225, + "logits/chosen": -0.9669154286384583, + "logits/rejected": -1.0145224332809448, + "logps/chosen": -0.19900593161582947, + "logps/rejected": -0.28002551198005676, + "loss": 6.2381, + "nll_loss": 1.4961349964141846, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.019900593906641006, + "rewards/margins": 0.00810195691883564, + "rewards/rejected": -0.028002548962831497, + "step": 245 + }, + { + "epoch": 0.17012448132780084, + "grad_norm": 3.6895792484283447, + "learning_rate": 8.506224066390042e-06, + "log_odds_chosen": 0.8594756126403809, + "log_odds_ratio": -0.5369733572006226, + "logits/chosen": -1.1025549173355103, + "logits/rejected": -1.1240384578704834, + "logps/chosen": -0.14387311041355133, + "logps/rejected": -0.3072226941585541, + "loss": 5.7825, + "nll_loss": 1.391920804977417, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014387311413884163, + "rewards/margins": 0.016334956511855125, + "rewards/rejected": -0.03072226792573929, + "step": 246 + }, + { + "epoch": 0.17081604426002767, + "grad_norm": 3.3241751194000244, + "learning_rate": 8.540802213001383e-06, + "log_odds_chosen": 0.6280308961868286, + "log_odds_ratio": -0.4941891133785248, + "logits/chosen": -0.7816058397293091, + "logits/rejected": -0.7533829212188721, + "logps/chosen": -0.2505534887313843, + "logps/rejected": -0.37605032324790955, + "loss": 5.6624, + "nll_loss": 1.366180419921875, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.025055352598428726, + "rewards/margins": 0.012549685314297676, + "rewards/rejected": -0.03760503977537155, + "step": 247 + }, + { + "epoch": 0.1715076071922545, + "grad_norm": 4.798497676849365, + "learning_rate": 8.575380359612725e-06, + "log_odds_chosen": -0.01965467631816864, + "log_odds_ratio": -0.8393452167510986, + "logits/chosen": -0.8515428304672241, + "logits/rejected": -0.8434562683105469, + "logps/chosen": -0.23914137482643127, + "logps/rejected": -0.2302568256855011, + "loss": 5.8272, + "nll_loss": 1.3728694915771484, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.023914135992527008, + "rewards/margins": -0.0008884554263204336, + "rewards/rejected": -0.02302568219602108, + "step": 248 + }, + { + "epoch": 0.17219917012448133, + "grad_norm": 3.0810976028442383, + "learning_rate": 8.609958506224066e-06, + "log_odds_chosen": 0.8071850538253784, + "log_odds_ratio": -0.43749377131462097, + "logits/chosen": -0.8725613951683044, + "logits/rejected": -0.8765714168548584, + "logps/chosen": -0.15248632431030273, + "logps/rejected": -0.2783476710319519, + "loss": 5.7413, + "nll_loss": 1.391564130783081, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.015248632058501244, + "rewards/margins": 0.012586135417222977, + "rewards/rejected": -0.02783476747572422, + "step": 249 + }, + { + "epoch": 0.17289073305670816, + "grad_norm": 5.421039581298828, + "learning_rate": 8.644536652835408e-06, + "log_odds_chosen": -0.3103007674217224, + "log_odds_ratio": -0.9351741671562195, + "logits/chosen": -0.8125675916671753, + "logits/rejected": -0.7800061702728271, + "logps/chosen": -0.19023606181144714, + "logps/rejected": -0.17538899183273315, + "loss": 5.8487, + "nll_loss": 1.3686484098434448, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.019023608416318893, + "rewards/margins": -0.0014847079291939735, + "rewards/rejected": -0.017538899555802345, + "step": 250 + }, + { + "epoch": 0.173582295988935, + "grad_norm": 4.459559440612793, + "learning_rate": 8.67911479944675e-06, + "log_odds_chosen": 1.124354600906372, + "log_odds_ratio": -0.5342854261398315, + "logits/chosen": -1.0505995750427246, + "logits/rejected": -1.0487140417099, + "logps/chosen": -0.13768863677978516, + "logps/rejected": -0.33856356143951416, + "loss": 6.1951, + "nll_loss": 1.4953362941741943, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.013768864795565605, + "rewards/margins": 0.02008748985826969, + "rewards/rejected": -0.0338563546538353, + "step": 251 + }, + { + "epoch": 0.17427385892116182, + "grad_norm": 4.052998065948486, + "learning_rate": 8.713692946058091e-06, + "log_odds_chosen": 0.19669348001480103, + "log_odds_ratio": -0.8254600763320923, + "logits/chosen": -0.9436452388763428, + "logits/rejected": -0.9478350877761841, + "logps/chosen": -0.24237194657325745, + "logps/rejected": -0.316573828458786, + "loss": 5.9555, + "nll_loss": 1.4063365459442139, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.024237193167209625, + "rewards/margins": 0.0074201906099915504, + "rewards/rejected": -0.0316573828458786, + "step": 252 + }, + { + "epoch": 0.17496542185338865, + "grad_norm": 3.448655605316162, + "learning_rate": 8.748271092669432e-06, + "log_odds_chosen": 0.10002302378416061, + "log_odds_ratio": -0.7270524501800537, + "logits/chosen": -0.8413020968437195, + "logits/rejected": -0.8493680953979492, + "logps/chosen": -0.30035310983657837, + "logps/rejected": -0.2797276973724365, + "loss": 6.0363, + "nll_loss": 1.4363651275634766, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.030035313218832016, + "rewards/margins": -0.0020625414326786995, + "rewards/rejected": -0.027972770854830742, + "step": 253 + }, + { + "epoch": 0.17565698478561548, + "grad_norm": 3.6428167819976807, + "learning_rate": 8.782849239280774e-06, + "log_odds_chosen": 0.5170423984527588, + "log_odds_ratio": -0.5325826406478882, + "logits/chosen": -0.8044297099113464, + "logits/rejected": -0.8021126389503479, + "logps/chosen": -0.20451143383979797, + "logps/rejected": -0.34033912420272827, + "loss": 6.0516, + "nll_loss": 1.4596498012542725, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.020451147109270096, + "rewards/margins": 0.01358276791870594, + "rewards/rejected": -0.03403391316533089, + "step": 254 + }, + { + "epoch": 0.17634854771784234, + "grad_norm": 3.2239508628845215, + "learning_rate": 8.817427385892117e-06, + "log_odds_chosen": 1.6908828020095825, + "log_odds_ratio": -0.3723670542240143, + "logits/chosen": -0.9017342329025269, + "logits/rejected": -0.94149249792099, + "logps/chosen": -0.17674483358860016, + "logps/rejected": -0.42865657806396484, + "loss": 5.8309, + "nll_loss": 1.4204809665679932, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.017674485221505165, + "rewards/margins": 0.0251911748200655, + "rewards/rejected": -0.04286566376686096, + "step": 255 + }, + { + "epoch": 0.17704011065006917, + "grad_norm": 3.5115952491760254, + "learning_rate": 8.852005532503459e-06, + "log_odds_chosen": 1.3778259754180908, + "log_odds_ratio": -0.43739527463912964, + "logits/chosen": -0.9482162594795227, + "logits/rejected": -0.9856005907058716, + "logps/chosen": -0.2021976113319397, + "logps/rejected": -0.8205811381340027, + "loss": 5.2779, + "nll_loss": 1.2757349014282227, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.02021976187825203, + "rewards/margins": 0.06183835491538048, + "rewards/rejected": -0.08205811679363251, + "step": 256 + }, + { + "epoch": 0.177731673582296, + "grad_norm": 3.428536891937256, + "learning_rate": 8.8865836791148e-06, + "log_odds_chosen": 1.5049645900726318, + "log_odds_ratio": -0.4445021450519562, + "logits/chosen": -0.8670735359191895, + "logits/rejected": -0.9449098110198975, + "logps/chosen": -0.2028433382511139, + "logps/rejected": -0.5087571740150452, + "loss": 4.5936, + "nll_loss": 1.1039620637893677, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.02028433419764042, + "rewards/margins": 0.03059137985110283, + "rewards/rejected": -0.0508757159113884, + "step": 257 + }, + { + "epoch": 0.17842323651452283, + "grad_norm": 3.801056385040283, + "learning_rate": 8.921161825726142e-06, + "log_odds_chosen": 1.4026226997375488, + "log_odds_ratio": -0.5079362988471985, + "logits/chosen": -0.9943914413452148, + "logits/rejected": -0.9608233571052551, + "logps/chosen": -0.18597161769866943, + "logps/rejected": -0.4813200831413269, + "loss": 4.6482, + "nll_loss": 1.1112600564956665, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.018597161397337914, + "rewards/margins": 0.029534848406910896, + "rewards/rejected": -0.04813200980424881, + "step": 258 + }, + { + "epoch": 0.17911479944674966, + "grad_norm": 3.898007869720459, + "learning_rate": 8.955739972337484e-06, + "log_odds_chosen": 0.8249939680099487, + "log_odds_ratio": -0.6307661533355713, + "logits/chosen": -1.0948408842086792, + "logits/rejected": -1.1247516870498657, + "logps/chosen": -0.21543753147125244, + "logps/rejected": -0.3504295349121094, + "loss": 6.9519, + "nll_loss": 1.6748905181884766, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.021543754264712334, + "rewards/margins": 0.013499200344085693, + "rewards/rejected": -0.03504295274615288, + "step": 259 + }, + { + "epoch": 0.1798063623789765, + "grad_norm": 4.461514949798584, + "learning_rate": 8.990318118948825e-06, + "log_odds_chosen": -0.24836111068725586, + "log_odds_ratio": -1.4388465881347656, + "logits/chosen": -0.8681745529174805, + "logits/rejected": -0.8156797885894775, + "logps/chosen": -0.6750339865684509, + "logps/rejected": -0.2794366180896759, + "loss": 5.7843, + "nll_loss": 1.30219566822052, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.06750340014696121, + "rewards/margins": -0.0395597368478775, + "rewards/rejected": -0.02794366329908371, + "step": 260 + }, + { + "epoch": 0.18049792531120332, + "grad_norm": 3.908860683441162, + "learning_rate": 9.024896265560167e-06, + "log_odds_chosen": 0.9576330184936523, + "log_odds_ratio": -0.5267143845558167, + "logits/chosen": -1.064504861831665, + "logits/rejected": -1.1053556203842163, + "logps/chosen": -0.15372568368911743, + "logps/rejected": -0.30671191215515137, + "loss": 6.5403, + "nll_loss": 1.5824048519134521, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.015372568741440773, + "rewards/margins": 0.015298626385629177, + "rewards/rejected": -0.030671194195747375, + "step": 261 + }, + { + "epoch": 0.18118948824343015, + "grad_norm": 3.937495231628418, + "learning_rate": 9.059474412171508e-06, + "log_odds_chosen": 0.8034919500350952, + "log_odds_ratio": -0.5796293616294861, + "logits/chosen": -0.7137233018875122, + "logits/rejected": -0.7320935130119324, + "logps/chosen": -0.17258089780807495, + "logps/rejected": -0.30214375257492065, + "loss": 5.7763, + "nll_loss": 1.3861002922058105, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.017258090898394585, + "rewards/margins": 0.012956284917891026, + "rewards/rejected": -0.030214374884963036, + "step": 262 + }, + { + "epoch": 0.18188105117565698, + "grad_norm": 4.973979949951172, + "learning_rate": 9.09405255878285e-06, + "log_odds_chosen": -0.3208833336830139, + "log_odds_ratio": -1.0229796171188354, + "logits/chosen": -0.9595593214035034, + "logits/rejected": -0.9800918102264404, + "logps/chosen": -0.46374863386154175, + "logps/rejected": -0.2736175060272217, + "loss": 7.3563, + "nll_loss": 1.7367701530456543, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.046374864876270294, + "rewards/margins": -0.019013112410902977, + "rewards/rejected": -0.02736174874007702, + "step": 263 + }, + { + "epoch": 0.1825726141078838, + "grad_norm": 4.1658830642700195, + "learning_rate": 9.128630705394191e-06, + "log_odds_chosen": 0.6217221021652222, + "log_odds_ratio": -0.6416304111480713, + "logits/chosen": -0.7790597677230835, + "logits/rejected": -0.7822107672691345, + "logps/chosen": -0.17077568173408508, + "logps/rejected": -0.2453240156173706, + "loss": 6.015, + "nll_loss": 1.4395976066589355, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.017077568918466568, + "rewards/margins": 0.007454832550138235, + "rewards/rejected": -0.02453240193426609, + "step": 264 + }, + { + "epoch": 0.18326417704011064, + "grad_norm": 3.1925108432769775, + "learning_rate": 9.163208852005533e-06, + "log_odds_chosen": 2.2674779891967773, + "log_odds_ratio": -0.3325195908546448, + "logits/chosen": -0.9382967948913574, + "logits/rejected": -0.9911866188049316, + "logps/chosen": -0.05271158739924431, + "logps/rejected": -0.5133439898490906, + "loss": 5.6839, + "nll_loss": 1.3877242803573608, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005271159112453461, + "rewards/margins": 0.04606323689222336, + "rewards/rejected": -0.05133439600467682, + "step": 265 + }, + { + "epoch": 0.1839557399723375, + "grad_norm": 4.016493320465088, + "learning_rate": 9.197786998616875e-06, + "log_odds_chosen": 0.9899934530258179, + "log_odds_ratio": -0.4765719175338745, + "logits/chosen": -1.0239133834838867, + "logits/rejected": -0.9974214434623718, + "logps/chosen": -0.14653241634368896, + "logps/rejected": -0.35717684030532837, + "loss": 6.1179, + "nll_loss": 1.4818254709243774, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014653241261839867, + "rewards/margins": 0.02106444165110588, + "rewards/rejected": -0.0357176810503006, + "step": 266 + }, + { + "epoch": 0.18464730290456433, + "grad_norm": 3.0796597003936768, + "learning_rate": 9.232365145228216e-06, + "log_odds_chosen": 0.2046814262866974, + "log_odds_ratio": -0.6776267290115356, + "logits/chosen": -0.9972403049468994, + "logits/rejected": -0.9939769506454468, + "logps/chosen": -0.2929823100566864, + "logps/rejected": -0.3252931833267212, + "loss": 6.4836, + "nll_loss": 1.5531431436538696, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.02929823100566864, + "rewards/margins": 0.0032310900278389454, + "rewards/rejected": -0.0325293205678463, + "step": 267 + }, + { + "epoch": 0.18533886583679116, + "grad_norm": 4.9689459800720215, + "learning_rate": 9.266943291839558e-06, + "log_odds_chosen": 0.17572666704654694, + "log_odds_ratio": -0.7039728164672852, + "logits/chosen": -0.999485969543457, + "logits/rejected": -0.9911883473396301, + "logps/chosen": -0.22734355926513672, + "logps/rejected": -0.28024086356163025, + "loss": 6.8594, + "nll_loss": 1.6444549560546875, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.02273435704410076, + "rewards/margins": 0.005289729684591293, + "rewards/rejected": -0.028024086728692055, + "step": 268 + }, + { + "epoch": 0.18603042876901799, + "grad_norm": 3.828569173812866, + "learning_rate": 9.3015214384509e-06, + "log_odds_chosen": 1.1488004922866821, + "log_odds_ratio": -0.41310569643974304, + "logits/chosen": -0.7810618877410889, + "logits/rejected": -0.7955660820007324, + "logps/chosen": -0.1333785206079483, + "logps/rejected": -0.2772391140460968, + "loss": 5.4026, + "nll_loss": 1.3093348741531372, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01333785243332386, + "rewards/margins": 0.01438605785369873, + "rewards/rejected": -0.02772390842437744, + "step": 269 + }, + { + "epoch": 0.18672199170124482, + "grad_norm": 3.72995662689209, + "learning_rate": 9.33609958506224e-06, + "log_odds_chosen": 0.4392034411430359, + "log_odds_ratio": -0.5941404104232788, + "logits/chosen": -0.9812883138656616, + "logits/rejected": -0.988927960395813, + "logps/chosen": -0.16869939863681793, + "logps/rejected": -0.21304473280906677, + "loss": 5.6749, + "nll_loss": 1.359307885169983, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.016869939863681793, + "rewards/margins": 0.004434533417224884, + "rewards/rejected": -0.021304473280906677, + "step": 270 + }, + { + "epoch": 0.18741355463347165, + "grad_norm": 2.949136734008789, + "learning_rate": 9.370677731673582e-06, + "log_odds_chosen": 2.5781455039978027, + "log_odds_ratio": -0.2511579096317291, + "logits/chosen": -0.6993355751037598, + "logits/rejected": -0.7362976670265198, + "logps/chosen": -0.08703863620758057, + "logps/rejected": -0.5672510862350464, + "loss": 5.5008, + "nll_loss": 1.350090503692627, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008703864179551601, + "rewards/margins": 0.04802124947309494, + "rewards/rejected": -0.056725114583969116, + "step": 271 + }, + { + "epoch": 0.18810511756569848, + "grad_norm": 4.6766133308410645, + "learning_rate": 9.405255878284924e-06, + "log_odds_chosen": 0.4586489200592041, + "log_odds_ratio": -0.5706301927566528, + "logits/chosen": -0.8487648963928223, + "logits/rejected": -0.8466265201568604, + "logps/chosen": -0.20309209823608398, + "logps/rejected": -0.3035036325454712, + "loss": 5.498, + "nll_loss": 1.317443609237671, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0203092098236084, + "rewards/margins": 0.010041153989732265, + "rewards/rejected": -0.03035036474466324, + "step": 272 + }, + { + "epoch": 0.1887966804979253, + "grad_norm": 4.04107666015625, + "learning_rate": 9.439834024896265e-06, + "log_odds_chosen": 0.7465725541114807, + "log_odds_ratio": -0.6487242579460144, + "logits/chosen": -1.0556849241256714, + "logits/rejected": -1.0306531190872192, + "logps/chosen": -0.18017810583114624, + "logps/rejected": -0.3081243336200714, + "loss": 5.5121, + "nll_loss": 1.313159465789795, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.018017811700701714, + "rewards/margins": 0.012794620357453823, + "rewards/rejected": -0.030812431126832962, + "step": 273 + }, + { + "epoch": 0.18948824343015214, + "grad_norm": 2.471160888671875, + "learning_rate": 9.474412171507607e-06, + "log_odds_chosen": 0.851750373840332, + "log_odds_ratio": -0.39355215430259705, + "logits/chosen": -0.7050144076347351, + "logits/rejected": -0.754703164100647, + "logps/chosen": -0.11717454344034195, + "logps/rejected": -0.22633624076843262, + "loss": 5.4761, + "nll_loss": 1.3296756744384766, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01171745453029871, + "rewards/margins": 0.010916169732809067, + "rewards/rejected": -0.02263362519443035, + "step": 274 + }, + { + "epoch": 0.19017980636237897, + "grad_norm": 3.6157355308532715, + "learning_rate": 9.508990318118949e-06, + "log_odds_chosen": 0.6131385564804077, + "log_odds_ratio": -0.5318353176116943, + "logits/chosen": -0.8138557076454163, + "logits/rejected": -0.817249059677124, + "logps/chosen": -0.14839550852775574, + "logps/rejected": -0.18212762475013733, + "loss": 6.5553, + "nll_loss": 1.5856428146362305, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.014839550480246544, + "rewards/margins": 0.003373212879523635, + "rewards/rejected": -0.018212763592600822, + "step": 275 + }, + { + "epoch": 0.1908713692946058, + "grad_norm": 2.5300960540771484, + "learning_rate": 9.54356846473029e-06, + "log_odds_chosen": 0.7017526626586914, + "log_odds_ratio": -0.44837111234664917, + "logits/chosen": -0.9915153980255127, + "logits/rejected": -1.0183674097061157, + "logps/chosen": -0.17959809303283691, + "logps/rejected": -0.3399566113948822, + "loss": 4.4278, + "nll_loss": 1.0621116161346436, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01795980893075466, + "rewards/margins": 0.01603585109114647, + "rewards/rejected": -0.03399566188454628, + "step": 276 + }, + { + "epoch": 0.19156293222683266, + "grad_norm": 3.8363876342773438, + "learning_rate": 9.578146611341633e-06, + "log_odds_chosen": 1.7807199954986572, + "log_odds_ratio": -0.407881498336792, + "logits/chosen": -0.7593604326248169, + "logits/rejected": -0.776255190372467, + "logps/chosen": -0.101393923163414, + "logps/rejected": -0.45290789008140564, + "loss": 5.9338, + "nll_loss": 1.442673683166504, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01013939268887043, + "rewards/margins": 0.0351513996720314, + "rewards/rejected": -0.04529079049825668, + "step": 277 + }, + { + "epoch": 0.19225449515905949, + "grad_norm": 3.2401390075683594, + "learning_rate": 9.612724757952975e-06, + "log_odds_chosen": 1.1467278003692627, + "log_odds_ratio": -0.39693349599838257, + "logits/chosen": -0.9011764526367188, + "logits/rejected": -0.9105724096298218, + "logps/chosen": -0.18877184391021729, + "logps/rejected": -0.4855310916900635, + "loss": 6.1813, + "nll_loss": 1.5056235790252686, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0188771840184927, + "rewards/margins": 0.029675928875803947, + "rewards/rejected": -0.048553116619586945, + "step": 278 + }, + { + "epoch": 0.19294605809128632, + "grad_norm": 2.911855459213257, + "learning_rate": 9.647302904564317e-06, + "log_odds_chosen": 1.363403081893921, + "log_odds_ratio": -0.46704989671707153, + "logits/chosen": -0.6899612545967102, + "logits/rejected": -0.7372763752937317, + "logps/chosen": -0.13555309176445007, + "logps/rejected": -0.27910101413726807, + "loss": 4.6959, + "nll_loss": 1.1272612810134888, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.013555308803915977, + "rewards/margins": 0.014354792423546314, + "rewards/rejected": -0.027910098433494568, + "step": 279 + }, + { + "epoch": 0.19363762102351315, + "grad_norm": 2.9546871185302734, + "learning_rate": 9.681881051175658e-06, + "log_odds_chosen": 1.2636113166809082, + "log_odds_ratio": -0.5243982672691345, + "logits/chosen": -1.0961809158325195, + "logits/rejected": -1.1375336647033691, + "logps/chosen": -0.23278352618217468, + "logps/rejected": -0.3730795383453369, + "loss": 5.5835, + "nll_loss": 1.3434245586395264, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.02327835187315941, + "rewards/margins": 0.014029599726200104, + "rewards/rejected": -0.03730795159935951, + "step": 280 + }, + { + "epoch": 0.19432918395573998, + "grad_norm": 3.927760601043701, + "learning_rate": 9.716459197787e-06, + "log_odds_chosen": -0.1890764832496643, + "log_odds_ratio": -1.1451356410980225, + "logits/chosen": -0.680740475654602, + "logits/rejected": -0.6450449824333191, + "logps/chosen": -0.40619486570358276, + "logps/rejected": -0.12343654036521912, + "loss": 5.5365, + "nll_loss": 1.269623041152954, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.040619488805532455, + "rewards/margins": -0.028275832533836365, + "rewards/rejected": -0.012343653477728367, + "step": 281 + }, + { + "epoch": 0.1950207468879668, + "grad_norm": 2.726196050643921, + "learning_rate": 9.751037344398341e-06, + "log_odds_chosen": 0.6973831057548523, + "log_odds_ratio": -0.5315908193588257, + "logits/chosen": -0.8917272090911865, + "logits/rejected": -0.8878331184387207, + "logps/chosen": -0.21855196356773376, + "logps/rejected": -0.3041384816169739, + "loss": 4.9619, + "nll_loss": 1.1873137950897217, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.021855197846889496, + "rewards/margins": 0.008558651432394981, + "rewards/rejected": -0.030413847416639328, + "step": 282 + }, + { + "epoch": 0.19571230982019364, + "grad_norm": 2.687487840652466, + "learning_rate": 9.785615491009683e-06, + "log_odds_chosen": 0.8881605863571167, + "log_odds_ratio": -0.49772194027900696, + "logits/chosen": -0.9113330841064453, + "logits/rejected": -0.9342383146286011, + "logps/chosen": -0.16241443157196045, + "logps/rejected": -0.31999391317367554, + "loss": 5.1659, + "nll_loss": 1.2416975498199463, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.016241442412137985, + "rewards/margins": 0.015757950022816658, + "rewards/rejected": -0.03199939429759979, + "step": 283 + }, + { + "epoch": 0.19640387275242047, + "grad_norm": 4.079690933227539, + "learning_rate": 9.820193637621024e-06, + "log_odds_chosen": 0.7447575330734253, + "log_odds_ratio": -0.48378610610961914, + "logits/chosen": -1.0826882123947144, + "logits/rejected": -1.093569040298462, + "logps/chosen": -0.15584491193294525, + "logps/rejected": -0.24808132648468018, + "loss": 5.3322, + "nll_loss": 1.2846788167953491, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.015584491193294525, + "rewards/margins": 0.009223640896379948, + "rewards/rejected": -0.024808134883642197, + "step": 284 + }, + { + "epoch": 0.1970954356846473, + "grad_norm": 3.588355302810669, + "learning_rate": 9.854771784232366e-06, + "log_odds_chosen": 0.44871243834495544, + "log_odds_ratio": -0.9932539463043213, + "logits/chosen": -0.5877626538276672, + "logits/rejected": -0.5795567035675049, + "logps/chosen": -0.435401052236557, + "logps/rejected": -0.32708725333213806, + "loss": 5.1331, + "nll_loss": 1.1839531660079956, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0435401052236557, + "rewards/margins": -0.01083137933164835, + "rewards/rejected": -0.032708726823329926, + "step": 285 + }, + { + "epoch": 0.19778699861687413, + "grad_norm": 3.3288917541503906, + "learning_rate": 9.889349930843707e-06, + "log_odds_chosen": 1.1832314729690552, + "log_odds_ratio": -0.5124855041503906, + "logits/chosen": -0.9348160624504089, + "logits/rejected": -0.898857593536377, + "logps/chosen": -0.15667709708213806, + "logps/rejected": -0.33472001552581787, + "loss": 5.1491, + "nll_loss": 1.2360328435897827, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.015667710453271866, + "rewards/margins": 0.01780429482460022, + "rewards/rejected": -0.03347200155258179, + "step": 286 + }, + { + "epoch": 0.19847856154910096, + "grad_norm": 3.9891884326934814, + "learning_rate": 9.923928077455049e-06, + "log_odds_chosen": 1.312206506729126, + "log_odds_ratio": -0.41736629605293274, + "logits/chosen": -0.820662260055542, + "logits/rejected": -0.8876691460609436, + "logps/chosen": -0.15436021983623505, + "logps/rejected": -0.4415496587753296, + "loss": 6.3401, + "nll_loss": 1.543293833732605, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.015436021611094475, + "rewards/margins": 0.028718942776322365, + "rewards/rejected": -0.04415496438741684, + "step": 287 + }, + { + "epoch": 0.1991701244813278, + "grad_norm": 3.754223346710205, + "learning_rate": 9.95850622406639e-06, + "log_odds_chosen": 2.2290995121002197, + "log_odds_ratio": -0.20051251351833344, + "logits/chosen": -0.7229880690574646, + "logits/rejected": -0.7098461985588074, + "logps/chosen": -0.08578348904848099, + "logps/rejected": -0.4376526176929474, + "loss": 6.0212, + "nll_loss": 1.485244870185852, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008578348904848099, + "rewards/margins": 0.03518691286444664, + "rewards/rejected": -0.04376526176929474, + "step": 288 + }, + { + "epoch": 0.19986168741355465, + "grad_norm": 3.978665351867676, + "learning_rate": 9.993084370677732e-06, + "log_odds_chosen": 0.7419009208679199, + "log_odds_ratio": -0.4755893647670746, + "logits/chosen": -0.6797462701797485, + "logits/rejected": -0.6705136299133301, + "logps/chosen": -0.14312395453453064, + "logps/rejected": -0.3239539563655853, + "loss": 5.3251, + "nll_loss": 1.2837040424346924, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.014312395825982094, + "rewards/margins": 0.01808299869298935, + "rewards/rejected": -0.03239539638161659, + "step": 289 + }, + { + "epoch": 0.20055325034578148, + "grad_norm": 4.12111759185791, + "learning_rate": 1.0027662517289074e-05, + "log_odds_chosen": 0.7455897331237793, + "log_odds_ratio": -0.8224204778671265, + "logits/chosen": -0.5983878970146179, + "logits/rejected": -0.623099148273468, + "logps/chosen": -0.2823043465614319, + "logps/rejected": -0.44747430086135864, + "loss": 5.7119, + "nll_loss": 1.3457216024398804, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.028230436146259308, + "rewards/margins": 0.016516994684934616, + "rewards/rejected": -0.044747427105903625, + "step": 290 + }, + { + "epoch": 0.2012448132780083, + "grad_norm": 2.2981762886047363, + "learning_rate": 1.0062240663900415e-05, + "log_odds_chosen": 1.614776849746704, + "log_odds_ratio": -0.5352373123168945, + "logits/chosen": -0.5616164207458496, + "logits/rejected": -0.5507243871688843, + "logps/chosen": -0.16399338841438293, + "logps/rejected": -0.41412293910980225, + "loss": 4.2938, + "nll_loss": 1.0199339389801025, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.016399338841438293, + "rewards/margins": 0.02501295693218708, + "rewards/rejected": -0.04141229763627052, + "step": 291 + }, + { + "epoch": 0.20193637621023514, + "grad_norm": 3.037929058074951, + "learning_rate": 1.0096818810511757e-05, + "log_odds_chosen": 0.8889395594596863, + "log_odds_ratio": -0.5420886278152466, + "logits/chosen": -0.6951072812080383, + "logits/rejected": -0.7086105942726135, + "logps/chosen": -0.16874369978904724, + "logps/rejected": -0.29494836926460266, + "loss": 5.1379, + "nll_loss": 1.2302649021148682, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.016874369233846664, + "rewards/margins": 0.012620468623936176, + "rewards/rejected": -0.029494838789105415, + "step": 292 + }, + { + "epoch": 0.20262793914246197, + "grad_norm": 2.978886842727661, + "learning_rate": 1.0131396957123098e-05, + "log_odds_chosen": 2.0868780612945557, + "log_odds_ratio": -0.25021910667419434, + "logits/chosen": -1.0118746757507324, + "logits/rejected": -1.016335129737854, + "logps/chosen": -0.1282978355884552, + "logps/rejected": -0.49944478273391724, + "loss": 5.2727, + "nll_loss": 1.2931417226791382, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01282978430390358, + "rewards/margins": 0.0371146984398365, + "rewards/rejected": -0.04994447901844978, + "step": 293 + }, + { + "epoch": 0.2033195020746888, + "grad_norm": 3.695957660675049, + "learning_rate": 1.016597510373444e-05, + "log_odds_chosen": 1.4583425521850586, + "log_odds_ratio": -0.4338352382183075, + "logits/chosen": -0.8413094878196716, + "logits/rejected": -0.8705133199691772, + "logps/chosen": -0.1513550579547882, + "logps/rejected": -0.33852750062942505, + "loss": 4.9433, + "nll_loss": 1.1924374103546143, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01513550616800785, + "rewards/margins": 0.018717240542173386, + "rewards/rejected": -0.033852748572826385, + "step": 294 + }, + { + "epoch": 0.20401106500691563, + "grad_norm": 4.3729681968688965, + "learning_rate": 1.0200553250345782e-05, + "log_odds_chosen": 0.8586447238922119, + "log_odds_ratio": -1.0533874034881592, + "logits/chosen": -0.963996410369873, + "logits/rejected": -0.934209406375885, + "logps/chosen": -0.44819894433021545, + "logps/rejected": -0.3105297088623047, + "loss": 5.5288, + "nll_loss": 1.2768583297729492, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.044819898903369904, + "rewards/margins": -0.013766927644610405, + "rewards/rejected": -0.031052973121404648, + "step": 295 + }, + { + "epoch": 0.20470262793914246, + "grad_norm": 3.3607981204986572, + "learning_rate": 1.0235131396957123e-05, + "log_odds_chosen": 2.0125203132629395, + "log_odds_ratio": -0.48399215936660767, + "logits/chosen": -0.8093291521072388, + "logits/rejected": -0.856208324432373, + "logps/chosen": -0.2095358669757843, + "logps/rejected": -0.6241459250450134, + "loss": 5.2864, + "nll_loss": 1.2732126712799072, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.02095358818769455, + "rewards/margins": 0.04146100580692291, + "rewards/rejected": -0.062414590269327164, + "step": 296 + }, + { + "epoch": 0.2053941908713693, + "grad_norm": 3.651738405227661, + "learning_rate": 1.0269709543568465e-05, + "log_odds_chosen": 1.384002685546875, + "log_odds_ratio": -0.523183286190033, + "logits/chosen": -0.725758969783783, + "logits/rejected": -0.7471228241920471, + "logps/chosen": -0.16776351630687714, + "logps/rejected": -0.44493597745895386, + "loss": 6.5535, + "nll_loss": 1.5860533714294434, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.016776353120803833, + "rewards/margins": 0.027717245742678642, + "rewards/rejected": -0.044493597000837326, + "step": 297 + }, + { + "epoch": 0.20608575380359612, + "grad_norm": 2.7523751258850098, + "learning_rate": 1.0304287690179806e-05, + "log_odds_chosen": 1.5752019882202148, + "log_odds_ratio": -0.4772804379463196, + "logits/chosen": -0.8213521838188171, + "logits/rejected": -0.8141341805458069, + "logps/chosen": -0.12757200002670288, + "logps/rejected": -0.4987594485282898, + "loss": 5.6836, + "nll_loss": 1.373165249824524, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012757200747728348, + "rewards/margins": 0.03711874410510063, + "rewards/rejected": -0.04987594485282898, + "step": 298 + }, + { + "epoch": 0.20677731673582295, + "grad_norm": 3.8582205772399902, + "learning_rate": 1.0338865836791148e-05, + "log_odds_chosen": 1.3856735229492188, + "log_odds_ratio": -0.3291966915130615, + "logits/chosen": -0.5608149170875549, + "logits/rejected": -0.5706402063369751, + "logps/chosen": -0.08333322405815125, + "logps/rejected": -0.25464698672294617, + "loss": 5.537, + "nll_loss": 1.3513189554214478, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00833332259207964, + "rewards/margins": 0.017131377011537552, + "rewards/rejected": -0.025464700534939766, + "step": 299 + }, + { + "epoch": 0.2074688796680498, + "grad_norm": 5.307126522064209, + "learning_rate": 1.0373443983402491e-05, + "log_odds_chosen": 0.05142582952976227, + "log_odds_ratio": -0.7213780283927917, + "logits/chosen": -0.9054761528968811, + "logits/rejected": -0.8753982186317444, + "logps/chosen": -0.18677140772342682, + "logps/rejected": -0.16824619472026825, + "loss": 6.0093, + "nll_loss": 1.4301897287368774, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.018677137792110443, + "rewards/margins": -0.0018525202758610249, + "rewards/rejected": -0.016824619844555855, + "step": 300 + }, + { + "epoch": 0.20816044260027664, + "grad_norm": 3.757661819458008, + "learning_rate": 1.0408022130013833e-05, + "log_odds_chosen": 0.9107809066772461, + "log_odds_ratio": -0.5286827087402344, + "logits/chosen": -0.7676786184310913, + "logits/rejected": -0.7682151198387146, + "logps/chosen": -0.18552105128765106, + "logps/rejected": -0.2996525168418884, + "loss": 5.3472, + "nll_loss": 1.2839436531066895, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.018552104011178017, + "rewards/margins": 0.011413146741688251, + "rewards/rejected": -0.029965251684188843, + "step": 301 + }, + { + "epoch": 0.20885200553250347, + "grad_norm": 3.3357064723968506, + "learning_rate": 1.0442600276625174e-05, + "log_odds_chosen": 2.001539707183838, + "log_odds_ratio": -0.22806967794895172, + "logits/chosen": -1.101258397102356, + "logits/rejected": -1.1736907958984375, + "logps/chosen": -0.08202943950891495, + "logps/rejected": -0.5091502666473389, + "loss": 5.017, + "nll_loss": 1.2314308881759644, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00820294488221407, + "rewards/margins": 0.04271208494901657, + "rewards/rejected": -0.050915028899908066, + "step": 302 + }, + { + "epoch": 0.2095435684647303, + "grad_norm": 5.6853108406066895, + "learning_rate": 1.0477178423236516e-05, + "log_odds_chosen": 0.8550553321838379, + "log_odds_ratio": -0.5609914660453796, + "logits/chosen": -0.7329128980636597, + "logits/rejected": -0.7598764896392822, + "logps/chosen": -0.35300227999687195, + "logps/rejected": -0.3239184021949768, + "loss": 6.053, + "nll_loss": 1.4571560621261597, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.035300228744745255, + "rewards/margins": -0.0029083851259201765, + "rewards/rejected": -0.03239184617996216, + "step": 303 + }, + { + "epoch": 0.21023513139695713, + "grad_norm": 4.545161724090576, + "learning_rate": 1.0511756569847857e-05, + "log_odds_chosen": 3.2867326736450195, + "log_odds_ratio": -0.21116581559181213, + "logits/chosen": -0.8189299702644348, + "logits/rejected": -0.8567197322845459, + "logps/chosen": -0.07318108528852463, + "logps/rejected": -0.832836925983429, + "loss": 7.3767, + "nll_loss": 1.8230527639389038, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007318108808249235, + "rewards/margins": 0.07596558332443237, + "rewards/rejected": -0.0832836925983429, + "step": 304 + }, + { + "epoch": 0.21092669432918396, + "grad_norm": 5.584658622741699, + "learning_rate": 1.0546334716459199e-05, + "log_odds_chosen": 2.03281569480896, + "log_odds_ratio": -0.4121776223182678, + "logits/chosen": -0.8407848477363586, + "logits/rejected": -0.8673202395439148, + "logps/chosen": -0.15794141590595245, + "logps/rejected": -0.48691418766975403, + "loss": 5.3831, + "nll_loss": 1.3045486211776733, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.015794143080711365, + "rewards/margins": 0.03289727866649628, + "rewards/rejected": -0.048691414296627045, + "step": 305 + }, + { + "epoch": 0.21161825726141079, + "grad_norm": 3.8299202919006348, + "learning_rate": 1.058091286307054e-05, + "log_odds_chosen": 1.1094874143600464, + "log_odds_ratio": -0.4206291139125824, + "logits/chosen": -0.7327075004577637, + "logits/rejected": -0.6932054162025452, + "logps/chosen": -0.1686730682849884, + "logps/rejected": -0.2844252288341522, + "loss": 5.7894, + "nll_loss": 1.405289649963379, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01686730608344078, + "rewards/margins": 0.011575218290090561, + "rewards/rejected": -0.028442522510886192, + "step": 306 + }, + { + "epoch": 0.21230982019363762, + "grad_norm": 5.454954624176025, + "learning_rate": 1.0615491009681882e-05, + "log_odds_chosen": 0.04435592144727707, + "log_odds_ratio": -0.7941852807998657, + "logits/chosen": -0.6925509572029114, + "logits/rejected": -0.7195000052452087, + "logps/chosen": -0.23101723194122314, + "logps/rejected": -0.24052780866622925, + "loss": 6.023, + "nll_loss": 1.4263360500335693, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.023101722821593285, + "rewards/margins": 0.0009510572999715805, + "rewards/rejected": -0.024052780121564865, + "step": 307 + }, + { + "epoch": 0.21300138312586445, + "grad_norm": 5.1639533042907715, + "learning_rate": 1.0650069156293224e-05, + "log_odds_chosen": 0.2002013623714447, + "log_odds_ratio": -0.8262504935264587, + "logits/chosen": -0.5839241147041321, + "logits/rejected": -0.5827579498291016, + "logps/chosen": -0.18781417608261108, + "logps/rejected": -0.16054357588291168, + "loss": 5.1734, + "nll_loss": 1.2107203006744385, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.01878141611814499, + "rewards/margins": -0.0027270594146102667, + "rewards/rejected": -0.016054358333349228, + "step": 308 + }, + { + "epoch": 0.21369294605809128, + "grad_norm": 17.813648223876953, + "learning_rate": 1.0684647302904565e-05, + "log_odds_chosen": 0.9967387914657593, + "log_odds_ratio": -0.7046850919723511, + "logits/chosen": -0.9163264632225037, + "logits/rejected": -0.9688165187835693, + "logps/chosen": -0.3284730315208435, + "logps/rejected": -0.489883154630661, + "loss": 5.1107, + "nll_loss": 1.207213282585144, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.03284730017185211, + "rewards/margins": 0.0161410141736269, + "rewards/rejected": -0.04898831248283386, + "step": 309 + }, + { + "epoch": 0.2143845089903181, + "grad_norm": 4.739478588104248, + "learning_rate": 1.0719225449515907e-05, + "log_odds_chosen": 1.4200026988983154, + "log_odds_ratio": -0.7030278444290161, + "logits/chosen": -0.8466711640357971, + "logits/rejected": -0.8449913263320923, + "logps/chosen": -0.18683403730392456, + "logps/rejected": -0.3169473707675934, + "loss": 5.5492, + "nll_loss": 1.3169974088668823, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.018683403730392456, + "rewards/margins": 0.013011332601308823, + "rewards/rejected": -0.03169473633170128, + "step": 310 + }, + { + "epoch": 0.21507607192254496, + "grad_norm": 3.767282485961914, + "learning_rate": 1.0753803596127248e-05, + "log_odds_chosen": 1.637789011001587, + "log_odds_ratio": -0.30916017293930054, + "logits/chosen": -0.5770536661148071, + "logits/rejected": -0.6048831343650818, + "logps/chosen": -0.08278335630893707, + "logps/rejected": -0.23345719277858734, + "loss": 4.6282, + "nll_loss": 1.1261216402053833, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008278336375951767, + "rewards/margins": 0.015067384578287601, + "rewards/rejected": -0.023345721885561943, + "step": 311 + }, + { + "epoch": 0.2157676348547718, + "grad_norm": 3.3942456245422363, + "learning_rate": 1.078838174273859e-05, + "log_odds_chosen": 2.3648693561553955, + "log_odds_ratio": -0.2751004099845886, + "logits/chosen": -0.6818414926528931, + "logits/rejected": -0.6695123910903931, + "logps/chosen": -0.09515105187892914, + "logps/rejected": -0.41627973318099976, + "loss": 5.145, + "nll_loss": 1.2587300539016724, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009515105746686459, + "rewards/margins": 0.03211286664009094, + "rewards/rejected": -0.041627973318099976, + "step": 312 + }, + { + "epoch": 0.21645919778699863, + "grad_norm": 4.549081802368164, + "learning_rate": 1.0822959889349931e-05, + "log_odds_chosen": 1.1085354089736938, + "log_odds_ratio": -0.7170664072036743, + "logits/chosen": -0.559461772441864, + "logits/rejected": -0.5786145925521851, + "logps/chosen": -0.22476282715797424, + "logps/rejected": -0.42009395360946655, + "loss": 5.1471, + "nll_loss": 1.2150615453720093, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.022476285696029663, + "rewards/margins": 0.01953311078250408, + "rewards/rejected": -0.0420093908905983, + "step": 313 + }, + { + "epoch": 0.21715076071922546, + "grad_norm": 3.267533302307129, + "learning_rate": 1.0857538035961273e-05, + "log_odds_chosen": 1.3190233707427979, + "log_odds_ratio": -0.4539645314216614, + "logits/chosen": -0.19708727300167084, + "logits/rejected": -0.2204999327659607, + "logps/chosen": -0.17690463364124298, + "logps/rejected": -0.36837536096572876, + "loss": 3.9881, + "nll_loss": 0.9516271352767944, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.017690464854240417, + "rewards/margins": 0.019147075712680817, + "rewards/rejected": -0.036837536841630936, + "step": 314 + }, + { + "epoch": 0.21784232365145229, + "grad_norm": 5.523169040679932, + "learning_rate": 1.0892116182572615e-05, + "log_odds_chosen": 0.7681819796562195, + "log_odds_ratio": -0.7182871103286743, + "logits/chosen": -0.8303021192550659, + "logits/rejected": -0.8364777565002441, + "logps/chosen": -0.22490473091602325, + "logps/rejected": -0.41915619373321533, + "loss": 6.7917, + "nll_loss": 1.626107931137085, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.022490475326776505, + "rewards/margins": 0.01942514255642891, + "rewards/rejected": -0.041915617883205414, + "step": 315 + }, + { + "epoch": 0.21853388658367912, + "grad_norm": 3.44968581199646, + "learning_rate": 1.0926694329183956e-05, + "log_odds_chosen": 0.487798810005188, + "log_odds_ratio": -0.575210452079773, + "logits/chosen": -0.806228518486023, + "logits/rejected": -0.788817822933197, + "logps/chosen": -0.17850428819656372, + "logps/rejected": -0.22373415529727936, + "loss": 4.2689, + "nll_loss": 1.0097146034240723, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.017850428819656372, + "rewards/margins": 0.004522987641394138, + "rewards/rejected": -0.022373415529727936, + "step": 316 + }, + { + "epoch": 0.21922544951590595, + "grad_norm": 2.8417859077453613, + "learning_rate": 1.0961272475795298e-05, + "log_odds_chosen": 2.186145305633545, + "log_odds_ratio": -0.26623407006263733, + "logits/chosen": -0.7959799766540527, + "logits/rejected": -0.826103687286377, + "logps/chosen": -0.09745047241449356, + "logps/rejected": -0.4826028048992157, + "loss": 5.4007, + "nll_loss": 1.3235399723052979, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009745046496391296, + "rewards/margins": 0.038515232503414154, + "rewards/rejected": -0.04826027899980545, + "step": 317 + }, + { + "epoch": 0.21991701244813278, + "grad_norm": 5.369155406951904, + "learning_rate": 1.099585062240664e-05, + "log_odds_chosen": -0.07499188184738159, + "log_odds_ratio": -0.9024602174758911, + "logits/chosen": -0.7481403350830078, + "logits/rejected": -0.7806286215782166, + "logps/chosen": -0.2302655577659607, + "logps/rejected": -0.2344481348991394, + "loss": 5.633, + "nll_loss": 1.3180158138275146, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.02302655577659607, + "rewards/margins": 0.0004182555712759495, + "rewards/rejected": -0.02344481088221073, + "step": 318 + }, + { + "epoch": 0.2206085753803596, + "grad_norm": 5.221406936645508, + "learning_rate": 1.103042876901798e-05, + "log_odds_chosen": 1.0126641988754272, + "log_odds_ratio": -0.6377867460250854, + "logits/chosen": -0.9906193017959595, + "logits/rejected": -0.9781680703163147, + "logps/chosen": -0.24248827993869781, + "logps/rejected": -0.48205310106277466, + "loss": 6.2934, + "nll_loss": 1.5095751285552979, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.02424883097410202, + "rewards/margins": 0.023956481367349625, + "rewards/rejected": -0.048205308616161346, + "step": 319 + }, + { + "epoch": 0.22130013831258644, + "grad_norm": 3.497264862060547, + "learning_rate": 1.1065006915629322e-05, + "log_odds_chosen": 2.176384925842285, + "log_odds_ratio": -0.29211416840553284, + "logits/chosen": -0.7758827805519104, + "logits/rejected": -0.8231465220451355, + "logps/chosen": -0.09156069159507751, + "logps/rejected": -0.3845745325088501, + "loss": 5.5199, + "nll_loss": 1.3507754802703857, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009156068786978722, + "rewards/margins": 0.029301384463906288, + "rewards/rejected": -0.03845745325088501, + "step": 320 + }, + { + "epoch": 0.22199170124481327, + "grad_norm": 5.271878242492676, + "learning_rate": 1.1099585062240664e-05, + "log_odds_chosen": 2.6156089305877686, + "log_odds_ratio": -0.5581331253051758, + "logits/chosen": -0.7301532030105591, + "logits/rejected": -0.8199042677879333, + "logps/chosen": -0.206491619348526, + "logps/rejected": -0.6840195059776306, + "loss": 5.643, + "nll_loss": 1.3549339771270752, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.02064916118979454, + "rewards/margins": 0.047752782702445984, + "rewards/rejected": -0.06840194761753082, + "step": 321 + }, + { + "epoch": 0.22268326417704012, + "grad_norm": 4.354135513305664, + "learning_rate": 1.1134163208852007e-05, + "log_odds_chosen": 1.7553741931915283, + "log_odds_ratio": -0.5252009034156799, + "logits/chosen": -0.9048160910606384, + "logits/rejected": -0.9343039393424988, + "logps/chosen": -0.18464252352714539, + "logps/rejected": -0.7623762488365173, + "loss": 6.0298, + "nll_loss": 1.4549338817596436, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01846425235271454, + "rewards/margins": 0.05777337774634361, + "rewards/rejected": -0.07623762637376785, + "step": 322 + }, + { + "epoch": 0.22337482710926695, + "grad_norm": 4.396636009216309, + "learning_rate": 1.1168741355463349e-05, + "log_odds_chosen": 1.224029302597046, + "log_odds_ratio": -0.7107768058776855, + "logits/chosen": -1.1329301595687866, + "logits/rejected": -1.1671746969223022, + "logps/chosen": -0.20167645812034607, + "logps/rejected": -0.4650338292121887, + "loss": 5.0124, + "nll_loss": 1.1820275783538818, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.020167646929621696, + "rewards/margins": 0.026335733011364937, + "rewards/rejected": -0.04650337994098663, + "step": 323 + }, + { + "epoch": 0.22406639004149378, + "grad_norm": 5.422794342041016, + "learning_rate": 1.120331950207469e-05, + "log_odds_chosen": 0.9470016360282898, + "log_odds_ratio": -0.6606626510620117, + "logits/chosen": -0.8801020979881287, + "logits/rejected": -0.8777154088020325, + "logps/chosen": -0.13853338360786438, + "logps/rejected": -0.35402578115463257, + "loss": 4.4383, + "nll_loss": 1.04351806640625, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.013853337615728378, + "rewards/margins": 0.021549241617321968, + "rewards/rejected": -0.0354025773704052, + "step": 324 + }, + { + "epoch": 0.22475795297372061, + "grad_norm": 3.7938013076782227, + "learning_rate": 1.1237897648686032e-05, + "log_odds_chosen": 2.538409948348999, + "log_odds_ratio": -0.17612087726593018, + "logits/chosen": -0.9144092798233032, + "logits/rejected": -0.9531471729278564, + "logps/chosen": -0.09165746718645096, + "logps/rejected": -0.6212788224220276, + "loss": 5.4653, + "nll_loss": 1.3487169742584229, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009165747091174126, + "rewards/margins": 0.05296213552355766, + "rewards/rejected": -0.06212788075208664, + "step": 325 + }, + { + "epoch": 0.22544951590594745, + "grad_norm": 3.0676229000091553, + "learning_rate": 1.1272475795297373e-05, + "log_odds_chosen": 1.3782696723937988, + "log_odds_ratio": -0.3701854646205902, + "logits/chosen": -0.8568637371063232, + "logits/rejected": -0.8561961650848389, + "logps/chosen": -0.10448767989873886, + "logps/rejected": -0.4444238543510437, + "loss": 4.5713, + "nll_loss": 1.105804681777954, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010448767803609371, + "rewards/margins": 0.033993616700172424, + "rewards/rejected": -0.04444238543510437, + "step": 326 + }, + { + "epoch": 0.22614107883817428, + "grad_norm": 5.147009372711182, + "learning_rate": 1.1307053941908715e-05, + "log_odds_chosen": 0.35377904772758484, + "log_odds_ratio": -0.753959059715271, + "logits/chosen": -0.8954100608825684, + "logits/rejected": -0.9011775851249695, + "logps/chosen": -0.2625162601470947, + "logps/rejected": -0.3287270665168762, + "loss": 4.7927, + "nll_loss": 1.122768521308899, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.026251627132296562, + "rewards/margins": 0.006621081382036209, + "rewards/rejected": -0.03287270665168762, + "step": 327 + }, + { + "epoch": 0.2268326417704011, + "grad_norm": 2.6937825679779053, + "learning_rate": 1.1341632088520057e-05, + "log_odds_chosen": 1.5019047260284424, + "log_odds_ratio": -0.3758338391780853, + "logits/chosen": -1.226015567779541, + "logits/rejected": -1.298396110534668, + "logps/chosen": -0.22487083077430725, + "logps/rejected": -0.5187363624572754, + "loss": 6.2312, + "nll_loss": 1.5202105045318604, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022487085312604904, + "rewards/margins": 0.029386552050709724, + "rewards/rejected": -0.05187363922595978, + "step": 328 + }, + { + "epoch": 0.22752420470262794, + "grad_norm": 4.040054798126221, + "learning_rate": 1.1376210235131398e-05, + "log_odds_chosen": 1.9598990678787231, + "log_odds_ratio": -0.292378693819046, + "logits/chosen": -0.742152988910675, + "logits/rejected": -0.7864880561828613, + "logps/chosen": -0.1894950270652771, + "logps/rejected": -0.5066390037536621, + "loss": 6.1423, + "nll_loss": 1.5063270330429077, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01894950121641159, + "rewards/margins": 0.03171439841389656, + "rewards/rejected": -0.05066390335559845, + "step": 329 + }, + { + "epoch": 0.22821576763485477, + "grad_norm": 4.373732089996338, + "learning_rate": 1.141078838174274e-05, + "log_odds_chosen": 0.03542667627334595, + "log_odds_ratio": -1.1298328638076782, + "logits/chosen": -0.5621449947357178, + "logits/rejected": -0.5511815547943115, + "logps/chosen": -0.34871435165405273, + "logps/rejected": -0.2797914147377014, + "loss": 5.2115, + "nll_loss": 1.189887523651123, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.03487143665552139, + "rewards/margins": -0.006892295554280281, + "rewards/rejected": -0.02797914296388626, + "step": 330 + }, + { + "epoch": 0.2289073305670816, + "grad_norm": 3.9931676387786865, + "learning_rate": 1.144536652835408e-05, + "log_odds_chosen": 1.8632118701934814, + "log_odds_ratio": -0.3370683789253235, + "logits/chosen": -0.7310777902603149, + "logits/rejected": -0.7843050360679626, + "logps/chosen": -0.08296102285385132, + "logps/rejected": -0.3721938729286194, + "loss": 5.4324, + "nll_loss": 1.3244011402130127, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008296102285385132, + "rewards/margins": 0.028923287987709045, + "rewards/rejected": -0.03721938654780388, + "step": 331 + }, + { + "epoch": 0.22959889349930843, + "grad_norm": 3.0267841815948486, + "learning_rate": 1.1479944674965421e-05, + "log_odds_chosen": 1.2454099655151367, + "log_odds_ratio": -0.33922553062438965, + "logits/chosen": -0.8828202486038208, + "logits/rejected": -0.8880357146263123, + "logps/chosen": -0.1263776272535324, + "logps/rejected": -0.3546895980834961, + "loss": 5.3861, + "nll_loss": 1.312612771987915, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01263776421546936, + "rewards/margins": 0.02283119410276413, + "rewards/rejected": -0.03546895831823349, + "step": 332 + }, + { + "epoch": 0.23029045643153526, + "grad_norm": 4.601929664611816, + "learning_rate": 1.1514522821576763e-05, + "log_odds_chosen": 0.9013376832008362, + "log_odds_ratio": -0.5258499979972839, + "logits/chosen": -0.814623236656189, + "logits/rejected": -0.8508963584899902, + "logps/chosen": -0.13411368429660797, + "logps/rejected": -0.23433759808540344, + "loss": 6.2399, + "nll_loss": 1.5073976516723633, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.013411369174718857, + "rewards/margins": 0.010022390633821487, + "rewards/rejected": -0.023433759808540344, + "step": 333 + }, + { + "epoch": 0.23098201936376211, + "grad_norm": 7.3901753425598145, + "learning_rate": 1.1549100968188106e-05, + "log_odds_chosen": 1.8473443984985352, + "log_odds_ratio": -0.6285804510116577, + "logits/chosen": -0.7013222575187683, + "logits/rejected": -0.734618067741394, + "logps/chosen": -0.17842142283916473, + "logps/rejected": -0.4848504364490509, + "loss": 5.5798, + "nll_loss": 1.3320882320404053, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.017842141911387444, + "rewards/margins": 0.030642900615930557, + "rewards/rejected": -0.04848504066467285, + "step": 334 + }, + { + "epoch": 0.23167358229598894, + "grad_norm": 3.9874866008758545, + "learning_rate": 1.1583679114799447e-05, + "log_odds_chosen": 2.6798810958862305, + "log_odds_ratio": -0.25364208221435547, + "logits/chosen": -0.820704996585846, + "logits/rejected": -0.8510127067565918, + "logps/chosen": -0.1315419226884842, + "logps/rejected": -0.7463638782501221, + "loss": 6.0871, + "nll_loss": 1.496415615081787, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013154192827641964, + "rewards/margins": 0.06148219481110573, + "rewards/rejected": -0.07463638484477997, + "step": 335 + }, + { + "epoch": 0.23236514522821577, + "grad_norm": 3.520430088043213, + "learning_rate": 1.1618257261410789e-05, + "log_odds_chosen": 1.1615909337997437, + "log_odds_ratio": -0.43198850750923157, + "logits/chosen": -0.7266113758087158, + "logits/rejected": -0.7631188631057739, + "logps/chosen": -0.15298506617546082, + "logps/rejected": -0.34811288118362427, + "loss": 4.4336, + "nll_loss": 1.0652047395706177, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.015298506245017052, + "rewards/margins": 0.019512783735990524, + "rewards/rejected": -0.03481128811836243, + "step": 336 + }, + { + "epoch": 0.2330567081604426, + "grad_norm": 4.572351932525635, + "learning_rate": 1.165283540802213e-05, + "log_odds_chosen": 2.346097946166992, + "log_odds_ratio": -0.3432542383670807, + "logits/chosen": -1.0011042356491089, + "logits/rejected": -1.037811517715454, + "logps/chosen": -0.08525515347719193, + "logps/rejected": -0.3206099271774292, + "loss": 5.3928, + "nll_loss": 1.3138808012008667, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008525514975190163, + "rewards/margins": 0.023535478860139847, + "rewards/rejected": -0.03206099569797516, + "step": 337 + }, + { + "epoch": 0.23374827109266944, + "grad_norm": 6.0374908447265625, + "learning_rate": 1.1687413554633472e-05, + "log_odds_chosen": 0.3840335011482239, + "log_odds_ratio": -0.9351744055747986, + "logits/chosen": -0.5911035537719727, + "logits/rejected": -0.6028836965560913, + "logps/chosen": -0.28335726261138916, + "logps/rejected": -0.2893810570240021, + "loss": 5.1259, + "nll_loss": 1.1879698038101196, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.028335727751255035, + "rewards/margins": 0.0006023813039064407, + "rewards/rejected": -0.028938109055161476, + "step": 338 + }, + { + "epoch": 0.23443983402489627, + "grad_norm": 9.87275218963623, + "learning_rate": 1.1721991701244814e-05, + "log_odds_chosen": -1.1624599695205688, + "log_odds_ratio": -1.561909556388855, + "logits/chosen": -0.7426409125328064, + "logits/rejected": -0.7292524576187134, + "logps/chosen": -0.33654242753982544, + "logps/rejected": -0.15829023718833923, + "loss": 6.1163, + "nll_loss": 1.3728806972503662, + "rewards/accuracies": 0.125, + "rewards/chosen": -0.03365424647927284, + "rewards/margins": -0.01782522350549698, + "rewards/rejected": -0.015829022973775864, + "step": 339 + }, + { + "epoch": 0.2351313969571231, + "grad_norm": 3.4978086948394775, + "learning_rate": 1.1756569847856155e-05, + "log_odds_chosen": 1.4125635623931885, + "log_odds_ratio": -0.3743363618850708, + "logits/chosen": -0.8770706653594971, + "logits/rejected": -0.8505789041519165, + "logps/chosen": -0.08787395060062408, + "logps/rejected": -0.3069489598274231, + "loss": 4.5954, + "nll_loss": 1.1114096641540527, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008787395432591438, + "rewards/margins": 0.02190750278532505, + "rewards/rejected": -0.03069489635527134, + "step": 340 + }, + { + "epoch": 0.23582295988934993, + "grad_norm": 5.122701168060303, + "learning_rate": 1.1791147994467497e-05, + "log_odds_chosen": 1.5185163021087646, + "log_odds_ratio": -0.5956940054893494, + "logits/chosen": -0.7384462952613831, + "logits/rejected": -0.7524312734603882, + "logps/chosen": -0.18539643287658691, + "logps/rejected": -0.4253931939601898, + "loss": 6.206, + "nll_loss": 1.4919191598892212, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01853964291512966, + "rewards/margins": 0.02399967610836029, + "rewards/rejected": -0.0425393208861351, + "step": 341 + }, + { + "epoch": 0.23651452282157676, + "grad_norm": 5.095941543579102, + "learning_rate": 1.1825726141078838e-05, + "log_odds_chosen": 1.302222728729248, + "log_odds_ratio": -0.48803675174713135, + "logits/chosen": -0.6658348441123962, + "logits/rejected": -0.6755991578102112, + "logps/chosen": -0.14942830801010132, + "logps/rejected": -0.4372924864292145, + "loss": 5.3003, + "nll_loss": 1.2762644290924072, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.014942830428481102, + "rewards/margins": 0.028786418959498405, + "rewards/rejected": -0.04372924938797951, + "step": 342 + }, + { + "epoch": 0.23720608575380359, + "grad_norm": 4.108216762542725, + "learning_rate": 1.186030428769018e-05, + "log_odds_chosen": 1.0591542720794678, + "log_odds_ratio": -0.35927653312683105, + "logits/chosen": -0.7216286659240723, + "logits/rejected": -0.7346411347389221, + "logps/chosen": -0.17469891905784607, + "logps/rejected": -0.36906424164772034, + "loss": 5.0595, + "nll_loss": 1.228936791419983, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017469894140958786, + "rewards/margins": 0.019436530768871307, + "rewards/rejected": -0.036906421184539795, + "step": 343 + }, + { + "epoch": 0.23789764868603042, + "grad_norm": 3.7954001426696777, + "learning_rate": 1.1894882434301522e-05, + "log_odds_chosen": 1.0637387037277222, + "log_odds_ratio": -0.4897671043872833, + "logits/chosen": -0.7606596350669861, + "logits/rejected": -0.7664926648139954, + "logps/chosen": -0.1042921245098114, + "logps/rejected": -0.3099287152290344, + "loss": 4.8263, + "nll_loss": 1.1575967073440552, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01042921282351017, + "rewards/margins": 0.020563658326864243, + "rewards/rejected": -0.030992871150374413, + "step": 344 + }, + { + "epoch": 0.23858921161825727, + "grad_norm": 2.977518081665039, + "learning_rate": 1.1929460580912865e-05, + "log_odds_chosen": 1.6749167442321777, + "log_odds_ratio": -0.4589526951313019, + "logits/chosen": -0.9313449859619141, + "logits/rejected": -0.9575750231742859, + "logps/chosen": -0.22524240612983704, + "logps/rejected": -0.42715808749198914, + "loss": 4.3979, + "nll_loss": 1.0535855293273926, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.022524241358041763, + "rewards/margins": 0.02019157074391842, + "rewards/rejected": -0.04271581396460533, + "step": 345 + }, + { + "epoch": 0.2392807745504841, + "grad_norm": 4.326882839202881, + "learning_rate": 1.1964038727524206e-05, + "log_odds_chosen": 0.5998474359512329, + "log_odds_ratio": -0.6148593425750732, + "logits/chosen": -0.8653595447540283, + "logits/rejected": -0.9045172929763794, + "logps/chosen": -0.2513960301876068, + "logps/rejected": -0.4176265597343445, + "loss": 5.8248, + "nll_loss": 1.394715666770935, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.02513960376381874, + "rewards/margins": 0.016623055562376976, + "rewards/rejected": -0.04176265746355057, + "step": 346 + }, + { + "epoch": 0.23997233748271093, + "grad_norm": 4.429263591766357, + "learning_rate": 1.1998616874135548e-05, + "log_odds_chosen": 1.5210059881210327, + "log_odds_ratio": -0.5017091035842896, + "logits/chosen": -0.9123212099075317, + "logits/rejected": -0.9576847553253174, + "logps/chosen": -0.14953172206878662, + "logps/rejected": -0.5459752082824707, + "loss": 5.5626, + "nll_loss": 1.340486764907837, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014953171834349632, + "rewards/margins": 0.039644353091716766, + "rewards/rejected": -0.05459752306342125, + "step": 347 + }, + { + "epoch": 0.24066390041493776, + "grad_norm": 3.6143527030944824, + "learning_rate": 1.203319502074689e-05, + "log_odds_chosen": 0.5170705914497375, + "log_odds_ratio": -0.5560952425003052, + "logits/chosen": -0.6124237775802612, + "logits/rejected": -0.5826144814491272, + "logps/chosen": -0.16328248381614685, + "logps/rejected": -0.24581731855869293, + "loss": 3.5029, + "nll_loss": 0.8201128840446472, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.016328249126672745, + "rewards/margins": 0.008253482170403004, + "rewards/rejected": -0.024581734091043472, + "step": 348 + }, + { + "epoch": 0.2413554633471646, + "grad_norm": 3.123534917831421, + "learning_rate": 1.2067773167358231e-05, + "log_odds_chosen": 0.731002688407898, + "log_odds_ratio": -0.49530866742134094, + "logits/chosen": -0.8178795576095581, + "logits/rejected": -0.8388358950614929, + "logps/chosen": -0.1596156358718872, + "logps/rejected": -0.25716426968574524, + "loss": 5.6156, + "nll_loss": 1.354375958442688, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01596156321465969, + "rewards/margins": 0.009754862636327744, + "rewards/rejected": -0.025716423988342285, + "step": 349 + }, + { + "epoch": 0.24204702627939143, + "grad_norm": 3.329681396484375, + "learning_rate": 1.2102351313969573e-05, + "log_odds_chosen": 0.6945995092391968, + "log_odds_ratio": -0.5422481298446655, + "logits/chosen": -0.7990038990974426, + "logits/rejected": -0.8053203821182251, + "logps/chosen": -0.11132040619850159, + "logps/rejected": -0.16899347305297852, + "loss": 4.5106, + "nll_loss": 1.0734366178512573, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011132041923701763, + "rewards/margins": 0.00576730677857995, + "rewards/rejected": -0.016899349167943, + "step": 350 + }, + { + "epoch": 0.24273858921161826, + "grad_norm": 3.027214765548706, + "learning_rate": 1.2136929460580914e-05, + "log_odds_chosen": 1.642749547958374, + "log_odds_ratio": -0.41968217492103577, + "logits/chosen": -0.9147093296051025, + "logits/rejected": -0.9439373016357422, + "logps/chosen": -0.10925433784723282, + "logps/rejected": -0.3146194517612457, + "loss": 5.1456, + "nll_loss": 1.244423270225525, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010925433598458767, + "rewards/margins": 0.0205365139991045, + "rewards/rejected": -0.03146194666624069, + "step": 351 + }, + { + "epoch": 0.24343015214384509, + "grad_norm": 3.000566005706787, + "learning_rate": 1.2171507607192254e-05, + "log_odds_chosen": 0.5409971475601196, + "log_odds_ratio": -0.5828170776367188, + "logits/chosen": -0.6988095045089722, + "logits/rejected": -0.6698228716850281, + "logps/chosen": -0.22581708431243896, + "logps/rejected": -0.3958743214607239, + "loss": 4.9495, + "nll_loss": 1.1790937185287476, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.022581709548830986, + "rewards/margins": 0.01700572483241558, + "rewards/rejected": -0.03958743438124657, + "step": 352 + }, + { + "epoch": 0.24412171507607192, + "grad_norm": 3.9589076042175293, + "learning_rate": 1.2206085753803596e-05, + "log_odds_chosen": 0.8446111679077148, + "log_odds_ratio": -0.5762701630592346, + "logits/chosen": -0.7837926149368286, + "logits/rejected": -0.7676455974578857, + "logps/chosen": -0.17044731974601746, + "logps/rejected": -0.288826584815979, + "loss": 5.6256, + "nll_loss": 1.3487628698349, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.017044732347130775, + "rewards/margins": 0.011837925761938095, + "rewards/rejected": -0.02888265810906887, + "step": 353 + }, + { + "epoch": 0.24481327800829875, + "grad_norm": 2.9041407108306885, + "learning_rate": 1.2240663900414937e-05, + "log_odds_chosen": 1.5135369300842285, + "log_odds_ratio": -0.5100223422050476, + "logits/chosen": -0.7986371517181396, + "logits/rejected": -0.8036400675773621, + "logps/chosen": -0.1771228313446045, + "logps/rejected": -0.48527291417121887, + "loss": 4.3205, + "nll_loss": 1.0291134119033813, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.017712285742163658, + "rewards/margins": 0.0308150053024292, + "rewards/rejected": -0.04852728918194771, + "step": 354 + }, + { + "epoch": 0.24550484094052558, + "grad_norm": 4.075167655944824, + "learning_rate": 1.2275242047026279e-05, + "log_odds_chosen": 1.6503653526306152, + "log_odds_ratio": -0.3700273334980011, + "logits/chosen": -0.8282778263092041, + "logits/rejected": -0.8640443086624146, + "logps/chosen": -0.11108995229005814, + "logps/rejected": -0.3973962962627411, + "loss": 5.0046, + "nll_loss": 1.2141525745391846, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011108995415270329, + "rewards/margins": 0.028630636632442474, + "rewards/rejected": -0.03973962739109993, + "step": 355 + }, + { + "epoch": 0.24619640387275243, + "grad_norm": 4.823635101318359, + "learning_rate": 1.2309820193637622e-05, + "log_odds_chosen": 0.9693795442581177, + "log_odds_ratio": -0.41709911823272705, + "logits/chosen": -0.6768261194229126, + "logits/rejected": -0.7117671966552734, + "logps/chosen": -0.14925503730773926, + "logps/rejected": -0.3569108247756958, + "loss": 5.3088, + "nll_loss": 1.2854878902435303, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01492550503462553, + "rewards/margins": 0.020765576511621475, + "rewards/rejected": -0.03569108247756958, + "step": 356 + }, + { + "epoch": 0.24688796680497926, + "grad_norm": 4.039694786071777, + "learning_rate": 1.2344398340248964e-05, + "log_odds_chosen": 1.6349093914031982, + "log_odds_ratio": -0.30501067638397217, + "logits/chosen": -0.6676627397537231, + "logits/rejected": -0.6829565763473511, + "logps/chosen": -0.12125631421804428, + "logps/rejected": -0.4480995833873749, + "loss": 6.7252, + "nll_loss": 1.6507999897003174, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.012125632725656033, + "rewards/margins": 0.032684326171875, + "rewards/rejected": -0.04480995610356331, + "step": 357 + }, + { + "epoch": 0.2475795297372061, + "grad_norm": 4.4998884201049805, + "learning_rate": 1.2378976486860305e-05, + "log_odds_chosen": 2.042746067047119, + "log_odds_ratio": -0.30104365944862366, + "logits/chosen": -0.7379899621009827, + "logits/rejected": -0.7694678902626038, + "logps/chosen": -0.14233702421188354, + "logps/rejected": -0.5452017188072205, + "loss": 6.9534, + "nll_loss": 1.7082533836364746, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01423370186239481, + "rewards/margins": 0.04028647020459175, + "rewards/rejected": -0.054520174860954285, + "step": 358 + }, + { + "epoch": 0.24827109266943292, + "grad_norm": 4.526036262512207, + "learning_rate": 1.2413554633471647e-05, + "log_odds_chosen": 1.137697458267212, + "log_odds_ratio": -0.47182533144950867, + "logits/chosen": -0.5749889612197876, + "logits/rejected": -0.6108061075210571, + "logps/chosen": -0.12986072897911072, + "logps/rejected": -0.2841159403324127, + "loss": 5.0898, + "nll_loss": 1.225273847579956, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012986073270440102, + "rewards/margins": 0.015425523743033409, + "rewards/rejected": -0.02841159515082836, + "step": 359 + }, + { + "epoch": 0.24896265560165975, + "grad_norm": 4.702746391296387, + "learning_rate": 1.2448132780082988e-05, + "log_odds_chosen": 1.1218202114105225, + "log_odds_ratio": -0.3816485106945038, + "logits/chosen": -0.5540226697921753, + "logits/rejected": -0.6201716065406799, + "logps/chosen": -0.12513335049152374, + "logps/rejected": -0.3358425796031952, + "loss": 5.328, + "nll_loss": 1.2938361167907715, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012513335794210434, + "rewards/margins": 0.021070925518870354, + "rewards/rejected": -0.03358425945043564, + "step": 360 + }, + { + "epoch": 0.24965421853388658, + "grad_norm": 4.272010803222656, + "learning_rate": 1.248271092669433e-05, + "log_odds_chosen": 0.28873878717422485, + "log_odds_ratio": -0.9356129169464111, + "logits/chosen": -0.9438433647155762, + "logits/rejected": -0.9586924910545349, + "logps/chosen": -0.2660728096961975, + "logps/rejected": -0.1731991171836853, + "loss": 4.8786, + "nll_loss": 1.1260899305343628, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.02660728059709072, + "rewards/margins": -0.009287366643548012, + "rewards/rejected": -0.01731991209089756, + "step": 361 + }, + { + "epoch": 0.2503457814661134, + "grad_norm": 4.26386833190918, + "learning_rate": 1.2517289073305671e-05, + "log_odds_chosen": 1.449477195739746, + "log_odds_ratio": -0.42578303813934326, + "logits/chosen": -0.6013892889022827, + "logits/rejected": -0.6026226282119751, + "logps/chosen": -0.1423933207988739, + "logps/rejected": -0.2896794378757477, + "loss": 5.13, + "nll_loss": 1.2399232387542725, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014239332638680935, + "rewards/margins": 0.014728610403835773, + "rewards/rejected": -0.02896794304251671, + "step": 362 + }, + { + "epoch": 0.25103734439834025, + "grad_norm": 4.793960094451904, + "learning_rate": 1.2551867219917013e-05, + "log_odds_chosen": 2.400606393814087, + "log_odds_ratio": -0.26553744077682495, + "logits/chosen": -0.5489617586135864, + "logits/rejected": -0.6443371176719666, + "logps/chosen": -0.07715193927288055, + "logps/rejected": -0.51331627368927, + "loss": 5.9403, + "nll_loss": 1.4585212469100952, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007715193554759026, + "rewards/margins": 0.04361642897129059, + "rewards/rejected": -0.05133162438869476, + "step": 363 + }, + { + "epoch": 0.2517289073305671, + "grad_norm": 5.656650543212891, + "learning_rate": 1.2586445366528355e-05, + "log_odds_chosen": 1.3067384958267212, + "log_odds_ratio": -0.5937015414237976, + "logits/chosen": -0.7901173830032349, + "logits/rejected": -0.8001323342323303, + "logps/chosen": -0.13530637323856354, + "logps/rejected": -0.39692115783691406, + "loss": 6.1923, + "nll_loss": 1.4887058734893799, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.013530636206269264, + "rewards/margins": 0.026161476969718933, + "rewards/rejected": -0.03969211503863335, + "step": 364 + }, + { + "epoch": 0.2524204702627939, + "grad_norm": 3.2860074043273926, + "learning_rate": 1.2621023513139696e-05, + "log_odds_chosen": 0.46500369906425476, + "log_odds_ratio": -0.6647549867630005, + "logits/chosen": -0.6945218443870544, + "logits/rejected": -0.7753416299819946, + "logps/chosen": -0.21082139015197754, + "logps/rejected": -0.2310963124036789, + "loss": 4.4554, + "nll_loss": 1.0473787784576416, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.021082140505313873, + "rewards/margins": 0.002027492504566908, + "rewards/rejected": -0.02310963161289692, + "step": 365 + }, + { + "epoch": 0.25311203319502074, + "grad_norm": 2.8510890007019043, + "learning_rate": 1.2655601659751038e-05, + "log_odds_chosen": 0.5070021748542786, + "log_odds_ratio": -0.5470277070999146, + "logits/chosen": -0.40646564960479736, + "logits/rejected": -0.4076330363750458, + "logps/chosen": -0.13049736618995667, + "logps/rejected": -0.24043381214141846, + "loss": 4.2234, + "nll_loss": 1.001145601272583, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013049736618995667, + "rewards/margins": 0.01099364273250103, + "rewards/rejected": -0.024043381214141846, + "step": 366 + }, + { + "epoch": 0.25380359612724757, + "grad_norm": 5.104526519775391, + "learning_rate": 1.269017980636238e-05, + "log_odds_chosen": 0.47252020239830017, + "log_odds_ratio": -0.8805572986602783, + "logits/chosen": -0.8176181316375732, + "logits/rejected": -0.8377338647842407, + "logps/chosen": -0.1574239730834961, + "logps/rejected": -0.26355281472206116, + "loss": 5.7228, + "nll_loss": 1.3426513671875, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01574239507317543, + "rewards/margins": 0.010612884536385536, + "rewards/rejected": -0.026355283334851265, + "step": 367 + }, + { + "epoch": 0.2544951590594744, + "grad_norm": 4.936967372894287, + "learning_rate": 1.272475795297372e-05, + "log_odds_chosen": 2.6330790519714355, + "log_odds_ratio": -0.2361859381198883, + "logits/chosen": -0.6837531924247742, + "logits/rejected": -0.7225916385650635, + "logps/chosen": -0.12778867781162262, + "logps/rejected": -0.5990055203437805, + "loss": 6.8949, + "nll_loss": 1.7001042366027832, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012778868898749352, + "rewards/margins": 0.04712168127298355, + "rewards/rejected": -0.05990055203437805, + "step": 368 + }, + { + "epoch": 0.2551867219917012, + "grad_norm": 4.205926418304443, + "learning_rate": 1.2759336099585062e-05, + "log_odds_chosen": 0.6637147068977356, + "log_odds_ratio": -0.665414035320282, + "logits/chosen": -0.7646222114562988, + "logits/rejected": -0.7779873013496399, + "logps/chosen": -0.16148130595684052, + "logps/rejected": -0.23958323895931244, + "loss": 4.5449, + "nll_loss": 1.0696804523468018, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01614813134074211, + "rewards/margins": 0.007810194510966539, + "rewards/rejected": -0.023958325386047363, + "step": 369 + }, + { + "epoch": 0.25587828492392806, + "grad_norm": 3.8479011058807373, + "learning_rate": 1.2793914246196404e-05, + "log_odds_chosen": 0.786837100982666, + "log_odds_ratio": -0.5847955942153931, + "logits/chosen": -0.795537531375885, + "logits/rejected": -0.8256769180297852, + "logps/chosen": -0.15839308500289917, + "logps/rejected": -0.3057611882686615, + "loss": 4.7425, + "nll_loss": 1.127145767211914, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.015839308500289917, + "rewards/margins": 0.014736810699105263, + "rewards/rejected": -0.03057611919939518, + "step": 370 + }, + { + "epoch": 0.2565698478561549, + "grad_norm": 4.731762886047363, + "learning_rate": 1.2828492392807745e-05, + "log_odds_chosen": 1.866947054862976, + "log_odds_ratio": -0.45763030648231506, + "logits/chosen": -0.8834202289581299, + "logits/rejected": -0.8629869222640991, + "logps/chosen": -0.18499600887298584, + "logps/rejected": -0.4734047055244446, + "loss": 5.7119, + "nll_loss": 1.3822195529937744, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.018499599769711494, + "rewards/margins": 0.028840873390436172, + "rewards/rejected": -0.04734047129750252, + "step": 371 + }, + { + "epoch": 0.2572614107883817, + "grad_norm": 3.759676218032837, + "learning_rate": 1.2863070539419087e-05, + "log_odds_chosen": 2.1450793743133545, + "log_odds_ratio": -0.3913061320781708, + "logits/chosen": -0.7757332921028137, + "logits/rejected": -0.7739498615264893, + "logps/chosen": -0.07313913851976395, + "logps/rejected": -0.2787635028362274, + "loss": 4.0562, + "nll_loss": 0.9749183654785156, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007313914131373167, + "rewards/margins": 0.020562436431646347, + "rewards/rejected": -0.0278763510286808, + "step": 372 + }, + { + "epoch": 0.2579529737206086, + "grad_norm": 8.916706085205078, + "learning_rate": 1.289764868603043e-05, + "log_odds_chosen": 0.3090290129184723, + "log_odds_ratio": -0.7983847856521606, + "logits/chosen": -1.282267451286316, + "logits/rejected": -1.275241494178772, + "logps/chosen": -0.16115368902683258, + "logps/rejected": -0.2360834777355194, + "loss": 6.7564, + "nll_loss": 1.6092727184295654, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.016115369275212288, + "rewards/margins": 0.007492979057133198, + "rewards/rejected": -0.02360834740102291, + "step": 373 + }, + { + "epoch": 0.25864453665283543, + "grad_norm": 4.516635894775391, + "learning_rate": 1.2932226832641772e-05, + "log_odds_chosen": 1.2972509860992432, + "log_odds_ratio": -0.342338889837265, + "logits/chosen": -0.7201129198074341, + "logits/rejected": -0.7343860268592834, + "logps/chosen": -0.11302807927131653, + "logps/rejected": -0.33227887749671936, + "loss": 5.0235, + "nll_loss": 1.2216522693634033, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011302808299660683, + "rewards/margins": 0.021925078704953194, + "rewards/rejected": -0.033227887004613876, + "step": 374 + }, + { + "epoch": 0.25933609958506226, + "grad_norm": 3.1193737983703613, + "learning_rate": 1.2966804979253113e-05, + "log_odds_chosen": 1.8649317026138306, + "log_odds_ratio": -0.28167441487312317, + "logits/chosen": -0.6341934204101562, + "logits/rejected": -0.6485381126403809, + "logps/chosen": -0.0737493708729744, + "logps/rejected": -0.3434864580631256, + "loss": 4.0531, + "nll_loss": 0.98511803150177, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007374937646090984, + "rewards/margins": 0.02697370946407318, + "rewards/rejected": -0.03434864804148674, + "step": 375 + }, + { + "epoch": 0.2600276625172891, + "grad_norm": 4.7184247970581055, + "learning_rate": 1.3001383125864455e-05, + "log_odds_chosen": 1.4624907970428467, + "log_odds_ratio": -0.5053262114524841, + "logits/chosen": -0.577054500579834, + "logits/rejected": -0.6243833303451538, + "logps/chosen": -0.22585873305797577, + "logps/rejected": -0.47850340604782104, + "loss": 6.4885, + "nll_loss": 1.5715917348861694, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.022585874423384666, + "rewards/margins": 0.025264466181397438, + "rewards/rejected": -0.047850340604782104, + "step": 376 + }, + { + "epoch": 0.2607192254495159, + "grad_norm": 5.825414180755615, + "learning_rate": 1.3035961272475797e-05, + "log_odds_chosen": 0.8218759894371033, + "log_odds_ratio": -0.774000883102417, + "logits/chosen": -0.6426214575767517, + "logits/rejected": -0.6425043344497681, + "logps/chosen": -0.18728148937225342, + "logps/rejected": -0.2688373327255249, + "loss": 5.8574, + "nll_loss": 1.3869421482086182, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.01872815005481243, + "rewards/margins": 0.00815558061003685, + "rewards/rejected": -0.02688373252749443, + "step": 377 + }, + { + "epoch": 0.26141078838174275, + "grad_norm": 4.1526780128479, + "learning_rate": 1.3070539419087138e-05, + "log_odds_chosen": 1.282230019569397, + "log_odds_ratio": -0.4600529670715332, + "logits/chosen": -0.9115185737609863, + "logits/rejected": -0.9095169305801392, + "logps/chosen": -0.18320423364639282, + "logps/rejected": -0.3419126272201538, + "loss": 5.7571, + "nll_loss": 1.3932691812515259, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.018320422619581223, + "rewards/margins": 0.0158708393573761, + "rewards/rejected": -0.03419126197695732, + "step": 378 + }, + { + "epoch": 0.2621023513139696, + "grad_norm": 2.8454952239990234, + "learning_rate": 1.310511756569848e-05, + "log_odds_chosen": 1.661702275276184, + "log_odds_ratio": -0.20927149057388306, + "logits/chosen": -0.5011343955993652, + "logits/rejected": -0.49014002084732056, + "logps/chosen": -0.0761743038892746, + "logps/rejected": -0.3358593285083771, + "loss": 4.2203, + "nll_loss": 1.0341438055038452, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0076174307614564896, + "rewards/margins": 0.025968503206968307, + "rewards/rejected": -0.03358593210577965, + "step": 379 + }, + { + "epoch": 0.2627939142461964, + "grad_norm": 6.023113250732422, + "learning_rate": 1.3139695712309821e-05, + "log_odds_chosen": 0.7916541695594788, + "log_odds_ratio": -0.5888239145278931, + "logits/chosen": -0.7584908604621887, + "logits/rejected": -0.7206529378890991, + "logps/chosen": -0.1440635770559311, + "logps/rejected": -0.3327956199645996, + "loss": 5.0561, + "nll_loss": 1.205130934715271, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.014406357891857624, + "rewards/margins": 0.018873201683163643, + "rewards/rejected": -0.03327956050634384, + "step": 380 + }, + { + "epoch": 0.26348547717842324, + "grad_norm": 3.5837507247924805, + "learning_rate": 1.3174273858921163e-05, + "log_odds_chosen": 0.8221812844276428, + "log_odds_ratio": -0.4075019359588623, + "logits/chosen": -0.8715775012969971, + "logits/rejected": -0.9140419363975525, + "logps/chosen": -0.14378435909748077, + "logps/rejected": -0.31232964992523193, + "loss": 3.6665, + "nll_loss": 0.8758648037910461, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014378435909748077, + "rewards/margins": 0.016854530200362206, + "rewards/rejected": -0.031232964247465134, + "step": 381 + }, + { + "epoch": 0.2641770401106501, + "grad_norm": 4.768825531005859, + "learning_rate": 1.3208852005532504e-05, + "log_odds_chosen": 2.0849530696868896, + "log_odds_ratio": -0.3738650679588318, + "logits/chosen": -0.9447451233863831, + "logits/rejected": -0.9916914701461792, + "logps/chosen": -0.11316876113414764, + "logps/rejected": -0.4777146577835083, + "loss": 6.7512, + "nll_loss": 1.6504106521606445, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011316876858472824, + "rewards/margins": 0.036454588174819946, + "rewards/rejected": -0.04777146503329277, + "step": 382 + }, + { + "epoch": 0.2648686030428769, + "grad_norm": 4.187633037567139, + "learning_rate": 1.3243430152143846e-05, + "log_odds_chosen": 0.632165789604187, + "log_odds_ratio": -0.5006855130195618, + "logits/chosen": -0.6153163909912109, + "logits/rejected": -0.5733051300048828, + "logps/chosen": -0.17688298225402832, + "logps/rejected": -0.3255842924118042, + "loss": 4.8609, + "nll_loss": 1.1651612520217896, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.017688296735286713, + "rewards/margins": 0.014870131388306618, + "rewards/rejected": -0.03255842626094818, + "step": 383 + }, + { + "epoch": 0.26556016597510373, + "grad_norm": 5.697615623474121, + "learning_rate": 1.3278008298755187e-05, + "log_odds_chosen": 1.6235935688018799, + "log_odds_ratio": -0.8028425574302673, + "logits/chosen": -0.9089243412017822, + "logits/rejected": -0.9217929840087891, + "logps/chosen": -0.2022382915019989, + "logps/rejected": -0.4012358784675598, + "loss": 4.5655, + "nll_loss": 1.0610859394073486, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.02022382989525795, + "rewards/margins": 0.019899757578969002, + "rewards/rejected": -0.0401235893368721, + "step": 384 + }, + { + "epoch": 0.26625172890733056, + "grad_norm": 2.4368789196014404, + "learning_rate": 1.3312586445366529e-05, + "log_odds_chosen": 2.746462821960449, + "log_odds_ratio": -0.1782023161649704, + "logits/chosen": -1.0373375415802002, + "logits/rejected": -1.0150465965270996, + "logps/chosen": -0.10444726049900055, + "logps/rejected": -0.7134073376655579, + "loss": 4.0557, + "nll_loss": 0.9960981607437134, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01044472586363554, + "rewards/margins": 0.06089600920677185, + "rewards/rejected": -0.07134073227643967, + "step": 385 + }, + { + "epoch": 0.2669432918395574, + "grad_norm": 4.904059886932373, + "learning_rate": 1.334716459197787e-05, + "log_odds_chosen": 0.5463998317718506, + "log_odds_ratio": -0.7089335918426514, + "logits/chosen": -0.7584419250488281, + "logits/rejected": -0.6511543989181519, + "logps/chosen": -0.13503843545913696, + "logps/rejected": -0.19024111330509186, + "loss": 4.8678, + "nll_loss": 1.1460521221160889, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013503843918442726, + "rewards/margins": 0.005520268343389034, + "rewards/rejected": -0.019024113193154335, + "step": 386 + }, + { + "epoch": 0.2676348547717842, + "grad_norm": 4.778861045837402, + "learning_rate": 1.3381742738589212e-05, + "log_odds_chosen": 0.6914415955543518, + "log_odds_ratio": -0.568891704082489, + "logits/chosen": -1.069885015487671, + "logits/rejected": -1.0567914247512817, + "logps/chosen": -0.20774585008621216, + "logps/rejected": -0.3966343104839325, + "loss": 5.8371, + "nll_loss": 1.4023927450180054, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.020774584263563156, + "rewards/margins": 0.018888846039772034, + "rewards/rejected": -0.03966343030333519, + "step": 387 + }, + { + "epoch": 0.26832641770401106, + "grad_norm": 5.75725793838501, + "learning_rate": 1.3416320885200554e-05, + "log_odds_chosen": 0.8915666341781616, + "log_odds_ratio": -0.7970221042633057, + "logits/chosen": -0.6445469856262207, + "logits/rejected": -0.6691566109657288, + "logps/chosen": -0.18700812757015228, + "logps/rejected": -0.45797044038772583, + "loss": 4.3015, + "nll_loss": 0.9956653118133545, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01870081201195717, + "rewards/margins": 0.027096234261989594, + "rewards/rejected": -0.04579704999923706, + "step": 388 + }, + { + "epoch": 0.2690179806362379, + "grad_norm": 2.98270583152771, + "learning_rate": 1.3450899031811895e-05, + "log_odds_chosen": 1.9932804107666016, + "log_odds_ratio": -0.20717641711235046, + "logits/chosen": -0.4077128767967224, + "logits/rejected": -0.45207881927490234, + "logps/chosen": -0.0988541767001152, + "logps/rejected": -0.30075618624687195, + "loss": 4.0487, + "nll_loss": 0.991469144821167, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00988541729748249, + "rewards/margins": 0.020190201699733734, + "rewards/rejected": -0.030075618997216225, + "step": 389 + }, + { + "epoch": 0.2697095435684647, + "grad_norm": 5.803657531738281, + "learning_rate": 1.3485477178423237e-05, + "log_odds_chosen": 1.3662338256835938, + "log_odds_ratio": -0.6121001839637756, + "logits/chosen": -0.6869480609893799, + "logits/rejected": -0.7207568287849426, + "logps/chosen": -0.1792462170124054, + "logps/rejected": -0.5913968086242676, + "loss": 6.2556, + "nll_loss": 1.5027010440826416, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01792462170124054, + "rewards/margins": 0.04121505841612816, + "rewards/rejected": -0.059139683842659, + "step": 390 + }, + { + "epoch": 0.27040110650069155, + "grad_norm": 4.610645771026611, + "learning_rate": 1.3520055325034578e-05, + "log_odds_chosen": 1.56792151927948, + "log_odds_ratio": -0.5591750741004944, + "logits/chosen": -0.744486391544342, + "logits/rejected": -0.7582099437713623, + "logps/chosen": -0.14866414666175842, + "logps/rejected": -0.6275578737258911, + "loss": 3.3825, + "nll_loss": 0.7897180318832397, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014866415411233902, + "rewards/margins": 0.04788937419652939, + "rewards/rejected": -0.06275579333305359, + "step": 391 + }, + { + "epoch": 0.2710926694329184, + "grad_norm": 5.005346775054932, + "learning_rate": 1.355463347164592e-05, + "log_odds_chosen": 1.5481021404266357, + "log_odds_ratio": -0.34036847949028015, + "logits/chosen": -0.9398497939109802, + "logits/rejected": -0.9297147393226624, + "logps/chosen": -0.14529621601104736, + "logps/rejected": -0.3949902057647705, + "loss": 5.6398, + "nll_loss": 1.3759061098098755, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.014529621228575706, + "rewards/margins": 0.024969400838017464, + "rewards/rejected": -0.03949902206659317, + "step": 392 + }, + { + "epoch": 0.2717842323651452, + "grad_norm": 4.769950866699219, + "learning_rate": 1.3589211618257262e-05, + "log_odds_chosen": 0.877053439617157, + "log_odds_ratio": -0.5863583087921143, + "logits/chosen": -0.6077442765235901, + "logits/rejected": -0.5932273864746094, + "logps/chosen": -0.16600897908210754, + "logps/rejected": -0.3204789459705353, + "loss": 5.4064, + "nll_loss": 1.2929534912109375, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.016600897535681725, + "rewards/margins": 0.015446994453668594, + "rewards/rejected": -0.03204789012670517, + "step": 393 + }, + { + "epoch": 0.27247579529737204, + "grad_norm": 4.381612300872803, + "learning_rate": 1.3623789764868603e-05, + "log_odds_chosen": 1.4782297611236572, + "log_odds_ratio": -0.4741993546485901, + "logits/chosen": -1.0078778266906738, + "logits/rejected": -0.9697948694229126, + "logps/chosen": -0.15128880739212036, + "logps/rejected": -0.5114619731903076, + "loss": 5.2417, + "nll_loss": 1.2629996538162231, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.015128879807889462, + "rewards/margins": 0.036017317324876785, + "rewards/rejected": -0.05114620178937912, + "step": 394 + }, + { + "epoch": 0.27316735822959887, + "grad_norm": 5.883626937866211, + "learning_rate": 1.3658367911479945e-05, + "log_odds_chosen": 1.8567073345184326, + "log_odds_ratio": -0.4506278932094574, + "logits/chosen": -0.8221858739852905, + "logits/rejected": -0.8678174018859863, + "logps/chosen": -0.1326407641172409, + "logps/rejected": -0.481861412525177, + "loss": 5.6703, + "nll_loss": 1.3725148439407349, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.013264075852930546, + "rewards/margins": 0.03492206707596779, + "rewards/rejected": -0.04818614572286606, + "step": 395 + }, + { + "epoch": 0.27385892116182575, + "grad_norm": 2.958195924758911, + "learning_rate": 1.3692946058091288e-05, + "log_odds_chosen": 1.946399450302124, + "log_odds_ratio": -0.2867945432662964, + "logits/chosen": -0.6446447372436523, + "logits/rejected": -0.6454988718032837, + "logps/chosen": -0.1034713089466095, + "logps/rejected": -0.30294114351272583, + "loss": 4.4459, + "nll_loss": 1.0828043222427368, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010347130708396435, + "rewards/margins": 0.019946984946727753, + "rewards/rejected": -0.030294114723801613, + "step": 396 + }, + { + "epoch": 0.2745504840940526, + "grad_norm": 7.297904968261719, + "learning_rate": 1.372752420470263e-05, + "log_odds_chosen": 0.1172141432762146, + "log_odds_ratio": -0.780999481678009, + "logits/chosen": -0.7667773365974426, + "logits/rejected": -0.7440715432167053, + "logps/chosen": -0.17577888071537018, + "logps/rejected": -0.20231610536575317, + "loss": 5.8445, + "nll_loss": 1.38302481174469, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.017577888444066048, + "rewards/margins": 0.002653722185641527, + "rewards/rejected": -0.02023160830140114, + "step": 397 + }, + { + "epoch": 0.2752420470262794, + "grad_norm": 5.8652849197387695, + "learning_rate": 1.3762102351313971e-05, + "log_odds_chosen": 1.4110496044158936, + "log_odds_ratio": -0.7352426052093506, + "logits/chosen": -0.7150549292564392, + "logits/rejected": -0.704857349395752, + "logps/chosen": -0.18836373090744019, + "logps/rejected": -0.3111065924167633, + "loss": 4.5665, + "nll_loss": 1.0680984258651733, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01883637346327305, + "rewards/margins": 0.01227428950369358, + "rewards/rejected": -0.03111066296696663, + "step": 398 + }, + { + "epoch": 0.27593360995850624, + "grad_norm": 6.426219463348389, + "learning_rate": 1.3796680497925313e-05, + "log_odds_chosen": 0.8483314514160156, + "log_odds_ratio": -1.1913480758666992, + "logits/chosen": -1.009595513343811, + "logits/rejected": -1.0208425521850586, + "logps/chosen": -0.41472068428993225, + "logps/rejected": -0.3666849434375763, + "loss": 6.2569, + "nll_loss": 1.4450807571411133, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.041472069919109344, + "rewards/margins": -0.00480357650667429, + "rewards/rejected": -0.03666849434375763, + "step": 399 + }, + { + "epoch": 0.2766251728907331, + "grad_norm": 4.07353401184082, + "learning_rate": 1.3831258644536654e-05, + "log_odds_chosen": 0.9386905431747437, + "log_odds_ratio": -0.463863343000412, + "logits/chosen": -0.7032485008239746, + "logits/rejected": -0.7028173208236694, + "logps/chosen": -0.107542023062706, + "logps/rejected": -0.3094400465488434, + "loss": 4.3246, + "nll_loss": 1.0347694158554077, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010754203423857689, + "rewards/margins": 0.0201898030936718, + "rewards/rejected": -0.03094400465488434, + "step": 400 + }, + { + "epoch": 0.2773167358229599, + "grad_norm": 3.0598976612091064, + "learning_rate": 1.3865836791147996e-05, + "log_odds_chosen": 1.7289857864379883, + "log_odds_ratio": -0.2864301800727844, + "logits/chosen": -0.7190135717391968, + "logits/rejected": -0.7345994710922241, + "logps/chosen": -0.11391860246658325, + "logps/rejected": -0.3247866630554199, + "loss": 3.956, + "nll_loss": 0.9603667259216309, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011391859501600266, + "rewards/margins": 0.021086808294057846, + "rewards/rejected": -0.03247866779565811, + "step": 401 + }, + { + "epoch": 0.27800829875518673, + "grad_norm": 4.122477054595947, + "learning_rate": 1.3900414937759337e-05, + "log_odds_chosen": 1.7716256380081177, + "log_odds_ratio": -0.3900958299636841, + "logits/chosen": -0.848552942276001, + "logits/rejected": -0.9127116203308105, + "logps/chosen": -0.14013731479644775, + "logps/rejected": -0.5122645497322083, + "loss": 4.9266, + "nll_loss": 1.1926302909851074, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014013731852173805, + "rewards/margins": 0.03721272572875023, + "rewards/rejected": -0.05122645944356918, + "step": 402 + }, + { + "epoch": 0.27869986168741356, + "grad_norm": 4.57697868347168, + "learning_rate": 1.3934993084370679e-05, + "log_odds_chosen": 1.1865309476852417, + "log_odds_ratio": -0.5246220827102661, + "logits/chosen": -0.8770260810852051, + "logits/rejected": -0.8688606023788452, + "logps/chosen": -0.15459983050823212, + "logps/rejected": -0.391287624835968, + "loss": 5.626, + "nll_loss": 1.3540284633636475, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.015459983609616756, + "rewards/margins": 0.02366877906024456, + "rewards/rejected": -0.03912876546382904, + "step": 403 + }, + { + "epoch": 0.2793914246196404, + "grad_norm": 5.066405296325684, + "learning_rate": 1.396957123098202e-05, + "log_odds_chosen": 2.0249691009521484, + "log_odds_ratio": -0.4475286602973938, + "logits/chosen": -0.9386860132217407, + "logits/rejected": -0.9849787354469299, + "logps/chosen": -0.1488751769065857, + "logps/rejected": -0.47698670625686646, + "loss": 6.2464, + "nll_loss": 1.5168423652648926, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.014887519180774689, + "rewards/margins": 0.03281114995479584, + "rewards/rejected": -0.047698669135570526, + "step": 404 + }, + { + "epoch": 0.2800829875518672, + "grad_norm": 4.11107063293457, + "learning_rate": 1.4004149377593362e-05, + "log_odds_chosen": 1.035890817642212, + "log_odds_ratio": -0.4360055923461914, + "logits/chosen": -0.8740240335464478, + "logits/rejected": -0.88525390625, + "logps/chosen": -0.10174266248941422, + "logps/rejected": -0.2971685528755188, + "loss": 5.0939, + "nll_loss": 1.2298824787139893, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010174266062676907, + "rewards/margins": 0.019542589783668518, + "rewards/rejected": -0.029716856777668, + "step": 405 + }, + { + "epoch": 0.28077455048409405, + "grad_norm": 4.380761623382568, + "learning_rate": 1.4038727524204704e-05, + "log_odds_chosen": 1.4169279336929321, + "log_odds_ratio": -0.34448444843292236, + "logits/chosen": -0.9655833840370178, + "logits/rejected": -0.9509184956550598, + "logps/chosen": -0.11824437975883484, + "logps/rejected": -0.33905068039894104, + "loss": 4.5858, + "nll_loss": 1.111992597579956, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011824438348412514, + "rewards/margins": 0.02208063006401062, + "rewards/rejected": -0.03390507027506828, + "step": 406 + }, + { + "epoch": 0.2814661134163209, + "grad_norm": 4.629016399383545, + "learning_rate": 1.4073305670816045e-05, + "log_odds_chosen": 3.032594680786133, + "log_odds_ratio": -0.40330177545547485, + "logits/chosen": -0.8205009698867798, + "logits/rejected": -0.8420271873474121, + "logps/chosen": -0.0831131562590599, + "logps/rejected": -0.4899987578392029, + "loss": 4.7165, + "nll_loss": 1.1388006210327148, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00831131637096405, + "rewards/margins": 0.04068855941295624, + "rewards/rejected": -0.04899987950921059, + "step": 407 + }, + { + "epoch": 0.2821576763485477, + "grad_norm": 5.947005748748779, + "learning_rate": 1.4107883817427387e-05, + "log_odds_chosen": 1.4556632041931152, + "log_odds_ratio": -0.5705589056015015, + "logits/chosen": -0.6047676205635071, + "logits/rejected": -0.6060796976089478, + "logps/chosen": -0.17796441912651062, + "logps/rejected": -0.3482607305049896, + "loss": 4.8253, + "nll_loss": 1.1492795944213867, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01779644377529621, + "rewards/margins": 0.01702963188290596, + "rewards/rejected": -0.03482607752084732, + "step": 408 + }, + { + "epoch": 0.28284923928077454, + "grad_norm": 3.4069530963897705, + "learning_rate": 1.4142461964038728e-05, + "log_odds_chosen": 2.048111915588379, + "log_odds_ratio": -0.26843875646591187, + "logits/chosen": -0.6469959616661072, + "logits/rejected": -0.6459199786186218, + "logps/chosen": -0.08298209309577942, + "logps/rejected": -0.2996857762336731, + "loss": 4.1631, + "nll_loss": 1.0139367580413818, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008298208937048912, + "rewards/margins": 0.021670371294021606, + "rewards/rejected": -0.02996858023107052, + "step": 409 + }, + { + "epoch": 0.2835408022130014, + "grad_norm": 8.421577453613281, + "learning_rate": 1.417704011065007e-05, + "log_odds_chosen": 1.7312108278274536, + "log_odds_ratio": -0.6507084369659424, + "logits/chosen": -0.7071995735168457, + "logits/rejected": -0.7199549674987793, + "logps/chosen": -0.10671839118003845, + "logps/rejected": -0.22450372576713562, + "loss": 5.4683, + "nll_loss": 1.3020111322402954, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.010671839118003845, + "rewards/margins": 0.011778535321354866, + "rewards/rejected": -0.02245037443935871, + "step": 410 + }, + { + "epoch": 0.2842323651452282, + "grad_norm": 4.508456230163574, + "learning_rate": 1.4211618257261411e-05, + "log_odds_chosen": 2.592770576477051, + "log_odds_ratio": -0.2583865523338318, + "logits/chosen": -0.7199490070343018, + "logits/rejected": -0.7735538482666016, + "logps/chosen": -0.04898786544799805, + "logps/rejected": -0.36356252431869507, + "loss": 5.7314, + "nll_loss": 1.4070085287094116, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004898787010461092, + "rewards/margins": 0.03145746514201164, + "rewards/rejected": -0.03635625168681145, + "step": 411 + }, + { + "epoch": 0.28492392807745504, + "grad_norm": 4.336709499359131, + "learning_rate": 1.4246196403872753e-05, + "log_odds_chosen": 1.3862974643707275, + "log_odds_ratio": -0.333371102809906, + "logits/chosen": -0.7664021253585815, + "logits/rejected": -0.7992305755615234, + "logps/chosen": -0.1225011795759201, + "logps/rejected": -0.3386220932006836, + "loss": 5.4525, + "nll_loss": 1.3297803401947021, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01225011795759201, + "rewards/margins": 0.021612092852592468, + "rewards/rejected": -0.03386221081018448, + "step": 412 + }, + { + "epoch": 0.28561549100968187, + "grad_norm": 5.788278102874756, + "learning_rate": 1.4280774550484095e-05, + "log_odds_chosen": -0.11120755970478058, + "log_odds_ratio": -0.7838518023490906, + "logits/chosen": -1.0839319229125977, + "logits/rejected": -1.0791661739349365, + "logps/chosen": -0.2397696077823639, + "logps/rejected": -0.22414036095142365, + "loss": 5.2104, + "nll_loss": 1.2242244482040405, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.023976963013410568, + "rewards/margins": -0.00156292540486902, + "rewards/rejected": -0.022414034232497215, + "step": 413 + }, + { + "epoch": 0.2863070539419087, + "grad_norm": 3.2388267517089844, + "learning_rate": 1.4315352697095436e-05, + "log_odds_chosen": 2.4697952270507812, + "log_odds_ratio": -0.2609533369541168, + "logits/chosen": -0.9798930883407593, + "logits/rejected": -1.0141501426696777, + "logps/chosen": -0.10586879402399063, + "logps/rejected": -0.5839859247207642, + "loss": 5.1401, + "nll_loss": 1.2589378356933594, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010586880147457123, + "rewards/margins": 0.047811709344387054, + "rewards/rejected": -0.05839858949184418, + "step": 414 + }, + { + "epoch": 0.2869986168741355, + "grad_norm": 6.667247772216797, + "learning_rate": 1.4349930843706778e-05, + "log_odds_chosen": 1.369337558746338, + "log_odds_ratio": -0.5773957371711731, + "logits/chosen": -0.7696110010147095, + "logits/rejected": -0.8015477657318115, + "logps/chosen": -0.14734028279781342, + "logps/rejected": -0.4369919002056122, + "loss": 4.6701, + "nll_loss": 1.1097811460494995, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.014734027907252312, + "rewards/margins": 0.028965162113308907, + "rewards/rejected": -0.04369918629527092, + "step": 415 + }, + { + "epoch": 0.28769017980636236, + "grad_norm": 4.8606133460998535, + "learning_rate": 1.438450899031812e-05, + "log_odds_chosen": 1.2960329055786133, + "log_odds_ratio": -0.3151377737522125, + "logits/chosen": -0.7202938199043274, + "logits/rejected": -0.7407795190811157, + "logps/chosen": -0.1336207389831543, + "logps/rejected": -0.32754695415496826, + "loss": 6.095, + "nll_loss": 1.4922322034835815, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01336207427084446, + "rewards/margins": 0.019392620772123337, + "rewards/rejected": -0.032754696905612946, + "step": 416 + }, + { + "epoch": 0.2883817427385892, + "grad_norm": 3.6973016262054443, + "learning_rate": 1.441908713692946e-05, + "log_odds_chosen": 1.6160345077514648, + "log_odds_ratio": -0.4387581944465637, + "logits/chosen": -0.5735676884651184, + "logits/rejected": -0.5724626779556274, + "logps/chosen": -0.16073375940322876, + "logps/rejected": -0.3444352149963379, + "loss": 3.3725, + "nll_loss": 0.7992392778396606, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.016073375940322876, + "rewards/margins": 0.018370144069194794, + "rewards/rejected": -0.03444352000951767, + "step": 417 + }, + { + "epoch": 0.28907330567081607, + "grad_norm": 4.923066139221191, + "learning_rate": 1.4453665283540804e-05, + "log_odds_chosen": 2.589185953140259, + "log_odds_ratio": -0.3006196916103363, + "logits/chosen": -0.8475648164749146, + "logits/rejected": -0.8302005529403687, + "logps/chosen": -0.06312157213687897, + "logps/rejected": -0.41984111070632935, + "loss": 5.6696, + "nll_loss": 1.3873448371887207, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.006312157027423382, + "rewards/margins": 0.03567195683717728, + "rewards/rejected": -0.041984111070632935, + "step": 418 + }, + { + "epoch": 0.2897648686030429, + "grad_norm": 5.649261951446533, + "learning_rate": 1.4488243430152146e-05, + "log_odds_chosen": 0.3662152886390686, + "log_odds_ratio": -0.6572021245956421, + "logits/chosen": -0.748294472694397, + "logits/rejected": -0.7791862487792969, + "logps/chosen": -0.22678637504577637, + "logps/rejected": -0.34637874364852905, + "loss": 6.1642, + "nll_loss": 1.4753245115280151, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.022678637877106667, + "rewards/margins": 0.011959237977862358, + "rewards/rejected": -0.034637875854969025, + "step": 419 + }, + { + "epoch": 0.29045643153526973, + "grad_norm": 10.321755409240723, + "learning_rate": 1.4522821576763487e-05, + "log_odds_chosen": 1.4131522178649902, + "log_odds_ratio": -0.981365442276001, + "logits/chosen": -0.7823167443275452, + "logits/rejected": -0.7747278213500977, + "logps/chosen": -0.23187300562858582, + "logps/rejected": -0.38720259070396423, + "loss": 5.0025, + "nll_loss": 1.1524810791015625, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.02318730019032955, + "rewards/margins": 0.0155329629778862, + "rewards/rejected": -0.0387202613055706, + "step": 420 + }, + { + "epoch": 0.29114799446749656, + "grad_norm": 5.31191349029541, + "learning_rate": 1.4557399723374829e-05, + "log_odds_chosen": 2.2740554809570312, + "log_odds_ratio": -0.4833701252937317, + "logits/chosen": -0.9618872404098511, + "logits/rejected": -0.974646270275116, + "logps/chosen": -0.1343914270401001, + "logps/rejected": -0.6901106834411621, + "loss": 6.4381, + "nll_loss": 1.56119966506958, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01343914121389389, + "rewards/margins": 0.05557192862033844, + "rewards/rejected": -0.06901106983423233, + "step": 421 + }, + { + "epoch": 0.2918395573997234, + "grad_norm": 4.671203136444092, + "learning_rate": 1.459197786998617e-05, + "log_odds_chosen": 0.7123202085494995, + "log_odds_ratio": -0.5865266919136047, + "logits/chosen": -1.0744010210037231, + "logits/rejected": -1.1003762483596802, + "logps/chosen": -0.1542780101299286, + "logps/rejected": -0.2637343108654022, + "loss": 5.6411, + "nll_loss": 1.3516212701797485, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01542779989540577, + "rewards/margins": 0.010945630259811878, + "rewards/rejected": -0.026373429223895073, + "step": 422 + }, + { + "epoch": 0.2925311203319502, + "grad_norm": 7.3605265617370605, + "learning_rate": 1.4626556016597512e-05, + "log_odds_chosen": 1.9674979448318481, + "log_odds_ratio": -0.4501616656780243, + "logits/chosen": -0.8395421504974365, + "logits/rejected": -0.8268611431121826, + "logps/chosen": -0.14120453596115112, + "logps/rejected": -0.4928848147392273, + "loss": 5.3382, + "nll_loss": 1.2895439863204956, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014120453037321568, + "rewards/margins": 0.03516802936792374, + "rewards/rejected": -0.04928848147392273, + "step": 423 + }, + { + "epoch": 0.29322268326417705, + "grad_norm": 4.548860549926758, + "learning_rate": 1.4661134163208853e-05, + "log_odds_chosen": 1.915052890777588, + "log_odds_ratio": -0.5355948805809021, + "logits/chosen": -0.9863214492797852, + "logits/rejected": -1.032691240310669, + "logps/chosen": -0.1866421401500702, + "logps/rejected": -0.4246392846107483, + "loss": 6.0418, + "nll_loss": 1.4568803310394287, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01866421476006508, + "rewards/margins": 0.023799719288945198, + "rewards/rejected": -0.042463935911655426, + "step": 424 + }, + { + "epoch": 0.2939142461964039, + "grad_norm": 3.851501226425171, + "learning_rate": 1.4695712309820195e-05, + "log_odds_chosen": 1.2596676349639893, + "log_odds_ratio": -0.3075161278247833, + "logits/chosen": -0.7728994488716125, + "logits/rejected": -0.7785760760307312, + "logps/chosen": -0.0999259352684021, + "logps/rejected": -0.3053882122039795, + "loss": 4.9755, + "nll_loss": 1.213114619255066, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009992592968046665, + "rewards/margins": 0.02054622769355774, + "rewards/rejected": -0.03053881973028183, + "step": 425 + }, + { + "epoch": 0.2946058091286307, + "grad_norm": 5.264786243438721, + "learning_rate": 1.4730290456431537e-05, + "log_odds_chosen": 0.816191554069519, + "log_odds_ratio": -0.7403416633605957, + "logits/chosen": -1.103607177734375, + "logits/rejected": -1.0989108085632324, + "logps/chosen": -0.1591918021440506, + "logps/rejected": -0.33083173632621765, + "loss": 5.3931, + "nll_loss": 1.2742496728897095, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01591918058693409, + "rewards/margins": 0.017163995653390884, + "rewards/rejected": -0.033083174377679825, + "step": 426 + }, + { + "epoch": 0.29529737206085754, + "grad_norm": 4.415341854095459, + "learning_rate": 1.4764868603042878e-05, + "log_odds_chosen": 1.500899076461792, + "log_odds_ratio": -0.3782936930656433, + "logits/chosen": -0.8219119310379028, + "logits/rejected": -0.8497810363769531, + "logps/chosen": -0.062062859535217285, + "logps/rejected": -0.2865581512451172, + "loss": 4.4984, + "nll_loss": 1.0867613554000854, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0062062861397862434, + "rewards/margins": 0.02244953066110611, + "rewards/rejected": -0.02865581586956978, + "step": 427 + }, + { + "epoch": 0.2959889349930844, + "grad_norm": 4.308657646179199, + "learning_rate": 1.479944674965422e-05, + "log_odds_chosen": 2.037611484527588, + "log_odds_ratio": -0.3803531527519226, + "logits/chosen": -0.5021131634712219, + "logits/rejected": -0.5083498358726501, + "logps/chosen": -0.07269049435853958, + "logps/rejected": -0.38383227586746216, + "loss": 5.7543, + "nll_loss": 1.4005486965179443, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007269049528986216, + "rewards/margins": 0.031114177778363228, + "rewards/rejected": -0.03838322311639786, + "step": 428 + }, + { + "epoch": 0.2966804979253112, + "grad_norm": 4.234870433807373, + "learning_rate": 1.4834024896265561e-05, + "log_odds_chosen": 2.4187357425689697, + "log_odds_ratio": -0.3537042737007141, + "logits/chosen": -0.7538959383964539, + "logits/rejected": -0.7366330623626709, + "logps/chosen": -0.08905172348022461, + "logps/rejected": -0.35023993253707886, + "loss": 3.5571, + "nll_loss": 0.853912353515625, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008905172348022461, + "rewards/margins": 0.026118820533156395, + "rewards/rejected": -0.035023994743824005, + "step": 429 + }, + { + "epoch": 0.29737206085753803, + "grad_norm": 4.188929557800293, + "learning_rate": 1.4868603042876903e-05, + "log_odds_chosen": 2.099421739578247, + "log_odds_ratio": -0.36733826994895935, + "logits/chosen": -0.8708817362785339, + "logits/rejected": -0.8622686266899109, + "logps/chosen": -0.09997798502445221, + "logps/rejected": -0.2955341339111328, + "loss": 5.2648, + "nll_loss": 1.2794567346572876, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009997798129916191, + "rewards/margins": 0.01955561712384224, + "rewards/rejected": -0.02955341339111328, + "step": 430 + }, + { + "epoch": 0.29806362378976486, + "grad_norm": 6.499512672424316, + "learning_rate": 1.4903181189488244e-05, + "log_odds_chosen": 1.8117034435272217, + "log_odds_ratio": -0.44585707783699036, + "logits/chosen": -0.6863371133804321, + "logits/rejected": -0.7225992679595947, + "logps/chosen": -0.13132785260677338, + "logps/rejected": -0.5516153573989868, + "loss": 7.6748, + "nll_loss": 1.8741196393966675, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013132785446941853, + "rewards/margins": 0.0420287549495697, + "rewards/rejected": -0.05516153573989868, + "step": 431 + }, + { + "epoch": 0.2987551867219917, + "grad_norm": 5.252923965454102, + "learning_rate": 1.4937759336099586e-05, + "log_odds_chosen": 0.8260930776596069, + "log_odds_ratio": -0.5757949352264404, + "logits/chosen": -0.7838727831840515, + "logits/rejected": -0.7577035427093506, + "logps/chosen": -0.2402959018945694, + "logps/rejected": -0.39940059185028076, + "loss": 6.0088, + "nll_loss": 1.444616675376892, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.02402959018945694, + "rewards/margins": 0.015910470858216286, + "rewards/rejected": -0.039940062910318375, + "step": 432 + }, + { + "epoch": 0.2994467496542185, + "grad_norm": 6.205774784088135, + "learning_rate": 1.4972337482710927e-05, + "log_odds_chosen": 0.7388364672660828, + "log_odds_ratio": -0.7631775140762329, + "logits/chosen": -1.18083655834198, + "logits/rejected": -1.2100516557693481, + "logps/chosen": -0.1917128562927246, + "logps/rejected": -0.25602954626083374, + "loss": 5.6047, + "nll_loss": 1.324852705001831, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.01917128637433052, + "rewards/margins": 0.006431670393794775, + "rewards/rejected": -0.025602955371141434, + "step": 433 + }, + { + "epoch": 0.30013831258644535, + "grad_norm": 4.936527252197266, + "learning_rate": 1.5006915629322269e-05, + "log_odds_chosen": 1.6309081315994263, + "log_odds_ratio": -0.3397252559661865, + "logits/chosen": -0.7085134983062744, + "logits/rejected": -0.6994410157203674, + "logps/chosen": -0.09206433594226837, + "logps/rejected": -0.2645779252052307, + "loss": 4.6834, + "nll_loss": 1.1368815898895264, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009206433780491352, + "rewards/margins": 0.017251359298825264, + "rewards/rejected": -0.02645779401063919, + "step": 434 + }, + { + "epoch": 0.3008298755186722, + "grad_norm": 4.56192684173584, + "learning_rate": 1.504149377593361e-05, + "log_odds_chosen": 1.898086667060852, + "log_odds_ratio": -0.3460543155670166, + "logits/chosen": -1.0442228317260742, + "logits/rejected": -1.0632047653198242, + "logps/chosen": -0.11367730796337128, + "logps/rejected": -0.4655604064464569, + "loss": 4.9245, + "nll_loss": 1.196526288986206, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011367732658982277, + "rewards/margins": 0.03518830984830856, + "rewards/rejected": -0.04655604064464569, + "step": 435 + }, + { + "epoch": 0.301521438450899, + "grad_norm": 4.4461469650268555, + "learning_rate": 1.5076071922544952e-05, + "log_odds_chosen": 3.3471388816833496, + "log_odds_ratio": -0.146846741437912, + "logits/chosen": -0.8048645257949829, + "logits/rejected": -0.7955081462860107, + "logps/chosen": -0.05524428188800812, + "logps/rejected": -0.6378879547119141, + "loss": 5.6174, + "nll_loss": 1.3896702527999878, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005524428561329842, + "rewards/margins": 0.058264363557100296, + "rewards/rejected": -0.06378879398107529, + "step": 436 + }, + { + "epoch": 0.30221300138312585, + "grad_norm": 3.9305005073547363, + "learning_rate": 1.5110650069156294e-05, + "log_odds_chosen": 0.9514443874359131, + "log_odds_ratio": -0.4392205476760864, + "logits/chosen": -0.6176141500473022, + "logits/rejected": -0.6424593925476074, + "logps/chosen": -0.11313779652118683, + "logps/rejected": -0.22065243124961853, + "loss": 4.0434, + "nll_loss": 0.9669332504272461, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011313780210912228, + "rewards/margins": 0.010751464404165745, + "rewards/rejected": -0.022065244615077972, + "step": 437 + }, + { + "epoch": 0.3029045643153527, + "grad_norm": 5.2028045654296875, + "learning_rate": 1.5145228215767635e-05, + "log_odds_chosen": 1.9534803628921509, + "log_odds_ratio": -0.5827986598014832, + "logits/chosen": -0.9196781516075134, + "logits/rejected": -0.8753281235694885, + "logps/chosen": -0.11949843168258667, + "logps/rejected": -0.46784883737564087, + "loss": 4.4481, + "nll_loss": 1.053735375404358, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011949843727052212, + "rewards/margins": 0.03483504056930542, + "rewards/rejected": -0.046784885227680206, + "step": 438 + }, + { + "epoch": 0.3035961272475795, + "grad_norm": 4.5554518699646, + "learning_rate": 1.5179806362378977e-05, + "log_odds_chosen": 2.1872141361236572, + "log_odds_ratio": -0.5206718444824219, + "logits/chosen": -0.4848793148994446, + "logits/rejected": -0.508102297782898, + "logps/chosen": -0.18532368540763855, + "logps/rejected": -0.5864068865776062, + "loss": 4.0094, + "nll_loss": 0.9502801895141602, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.018532367423176765, + "rewards/margins": 0.040108323097229004, + "rewards/rejected": -0.05864068865776062, + "step": 439 + }, + { + "epoch": 0.30428769017980634, + "grad_norm": 5.9851202964782715, + "learning_rate": 1.5214384508990317e-05, + "log_odds_chosen": 2.0922446250915527, + "log_odds_ratio": -0.5738884210586548, + "logits/chosen": -0.6798893213272095, + "logits/rejected": -0.7033019661903381, + "logps/chosen": -0.1237122043967247, + "logps/rejected": -0.3784821629524231, + "loss": 4.3193, + "nll_loss": 1.0224275588989258, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.012371220625936985, + "rewards/margins": 0.02547699585556984, + "rewards/rejected": -0.03784821555018425, + "step": 440 + }, + { + "epoch": 0.3049792531120332, + "grad_norm": 6.054779529571533, + "learning_rate": 1.5248962655601662e-05, + "log_odds_chosen": 0.9233060479164124, + "log_odds_ratio": -0.6769165396690369, + "logits/chosen": -0.8891846537590027, + "logits/rejected": -0.9096454381942749, + "logps/chosen": -0.16924817860126495, + "logps/rejected": -0.2498570829629898, + "loss": 5.3895, + "nll_loss": 1.2796918153762817, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.016924817115068436, + "rewards/margins": 0.00806089024990797, + "rewards/rejected": -0.02498571015894413, + "step": 441 + }, + { + "epoch": 0.30567081604426005, + "grad_norm": 5.956704139709473, + "learning_rate": 1.5283540802213005e-05, + "log_odds_chosen": 1.323585033416748, + "log_odds_ratio": -0.505527913570404, + "logits/chosen": -0.6470650434494019, + "logits/rejected": -0.6922782063484192, + "logps/chosen": -0.14604339003562927, + "logps/rejected": -0.30373379588127136, + "loss": 5.2503, + "nll_loss": 1.2620216608047485, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.014604338444769382, + "rewards/margins": 0.01576904021203518, + "rewards/rejected": -0.030373381450772285, + "step": 442 + }, + { + "epoch": 0.3063623789764869, + "grad_norm": 7.952620506286621, + "learning_rate": 1.5318118948824346e-05, + "log_odds_chosen": 2.377955913543701, + "log_odds_ratio": -1.1432132720947266, + "logits/chosen": -0.7932155728340149, + "logits/rejected": -0.7838965058326721, + "logps/chosen": -0.27961477637290955, + "logps/rejected": -0.3812497854232788, + "loss": 4.0085, + "nll_loss": 0.887802243232727, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.027961477637290955, + "rewards/margins": 0.010163499042391777, + "rewards/rejected": -0.03812497854232788, + "step": 443 + }, + { + "epoch": 0.3070539419087137, + "grad_norm": 4.7465362548828125, + "learning_rate": 1.5352697095435685e-05, + "log_odds_chosen": 2.2108235359191895, + "log_odds_ratio": -0.5854645371437073, + "logits/chosen": -0.40235084295272827, + "logits/rejected": -0.43530935049057007, + "logps/chosen": -0.2257867455482483, + "logps/rejected": -0.4482637047767639, + "loss": 5.0511, + "nll_loss": 1.204227328300476, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.02257867529988289, + "rewards/margins": 0.02224769815802574, + "rewards/rejected": -0.04482637345790863, + "step": 444 + }, + { + "epoch": 0.30774550484094054, + "grad_norm": 5.645203113555908, + "learning_rate": 1.5387275242047026e-05, + "log_odds_chosen": 2.5590360164642334, + "log_odds_ratio": -0.5274600982666016, + "logits/chosen": -0.9219954013824463, + "logits/rejected": -0.8904546499252319, + "logps/chosen": -0.13450196385383606, + "logps/rejected": -0.6550885438919067, + "loss": 7.0305, + "nll_loss": 1.7048795223236084, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013450197875499725, + "rewards/margins": 0.052058663219213486, + "rewards/rejected": -0.06550885736942291, + "step": 445 + }, + { + "epoch": 0.3084370677731674, + "grad_norm": 16.25078010559082, + "learning_rate": 1.5421853388658368e-05, + "log_odds_chosen": 3.1059811115264893, + "log_odds_ratio": -0.7972801923751831, + "logits/chosen": -0.3910766839981079, + "logits/rejected": -0.4266047775745392, + "logps/chosen": -0.13340765237808228, + "logps/rejected": -0.6035365462303162, + "loss": 3.8148, + "nll_loss": 0.873967707157135, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013340767472982407, + "rewards/margins": 0.04701288416981697, + "rewards/rejected": -0.06035365164279938, + "step": 446 + }, + { + "epoch": 0.3091286307053942, + "grad_norm": 5.219182014465332, + "learning_rate": 1.545643153526971e-05, + "log_odds_chosen": 1.8788645267486572, + "log_odds_ratio": -0.4780917465686798, + "logits/chosen": -0.788673996925354, + "logits/rejected": -0.8205811381340027, + "logps/chosen": -0.11047720164060593, + "logps/rejected": -0.3218545913696289, + "loss": 4.8663, + "nll_loss": 1.1687740087509155, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011047719977796078, + "rewards/margins": 0.021137740463018417, + "rewards/rejected": -0.03218546137213707, + "step": 447 + }, + { + "epoch": 0.30982019363762103, + "grad_norm": 4.042296886444092, + "learning_rate": 1.549100968188105e-05, + "log_odds_chosen": 2.074093818664551, + "log_odds_ratio": -0.3665195107460022, + "logits/chosen": -0.6632601022720337, + "logits/rejected": -0.7092807292938232, + "logps/chosen": -0.10485205054283142, + "logps/rejected": -0.4138421416282654, + "loss": 4.8116, + "nll_loss": 1.1662427186965942, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010485205799341202, + "rewards/margins": 0.030899008736014366, + "rewards/rejected": -0.04138421267271042, + "step": 448 + }, + { + "epoch": 0.31051175656984786, + "grad_norm": 3.6253623962402344, + "learning_rate": 1.5525587828492392e-05, + "log_odds_chosen": 1.5242533683776855, + "log_odds_ratio": -0.42981696128845215, + "logits/chosen": -0.6863400340080261, + "logits/rejected": -0.679867684841156, + "logps/chosen": -0.151654452085495, + "logps/rejected": -0.3692028224468231, + "loss": 5.0138, + "nll_loss": 1.2104747295379639, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.015165446326136589, + "rewards/margins": 0.021754834800958633, + "rewards/rejected": -0.03692027926445007, + "step": 449 + }, + { + "epoch": 0.3112033195020747, + "grad_norm": 5.387983322143555, + "learning_rate": 1.5560165975103734e-05, + "log_odds_chosen": 1.390764594078064, + "log_odds_ratio": -0.6059899926185608, + "logits/chosen": -1.0066139698028564, + "logits/rejected": -0.9922845363616943, + "logps/chosen": -0.167924165725708, + "logps/rejected": -0.27415433526039124, + "loss": 3.7726, + "nll_loss": 0.8825591802597046, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.0167924165725708, + "rewards/margins": 0.010623017325997353, + "rewards/rejected": -0.027415433898568153, + "step": 450 + }, + { + "epoch": 0.3118948824343015, + "grad_norm": 6.010692596435547, + "learning_rate": 1.5594744121715076e-05, + "log_odds_chosen": 2.2689998149871826, + "log_odds_ratio": -0.353574275970459, + "logits/chosen": -1.0005192756652832, + "logits/rejected": -0.9740056991577148, + "logps/chosen": -0.11832346022129059, + "logps/rejected": -0.49686843156814575, + "loss": 6.1829, + "nll_loss": 1.5103559494018555, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011832346208393574, + "rewards/margins": 0.03785449266433716, + "rewards/rejected": -0.049686841666698456, + "step": 451 + }, + { + "epoch": 0.31258644536652835, + "grad_norm": 5.422815799713135, + "learning_rate": 1.5629322268326417e-05, + "log_odds_chosen": 1.0189049243927002, + "log_odds_ratio": -0.6779848337173462, + "logits/chosen": -0.7761744260787964, + "logits/rejected": -0.7803124189376831, + "logps/chosen": -0.23633910715579987, + "logps/rejected": -0.2328052967786789, + "loss": 4.2235, + "nll_loss": 0.988079845905304, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.023633908480405807, + "rewards/margins": -0.0003533794078975916, + "rewards/rejected": -0.023280533030629158, + "step": 452 + }, + { + "epoch": 0.3132780082987552, + "grad_norm": 4.928466796875, + "learning_rate": 1.566390041493776e-05, + "log_odds_chosen": 0.30510926246643066, + "log_odds_ratio": -0.697333812713623, + "logits/chosen": -0.7703343629837036, + "logits/rejected": -0.7511740922927856, + "logps/chosen": -0.19591832160949707, + "logps/rejected": -0.2541714906692505, + "loss": 6.5869, + "nll_loss": 1.5769838094711304, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.019591832533478737, + "rewards/margins": 0.005825321190059185, + "rewards/rejected": -0.025417150929570198, + "step": 453 + }, + { + "epoch": 0.313969571230982, + "grad_norm": 3.4888973236083984, + "learning_rate": 1.56984785615491e-05, + "log_odds_chosen": 0.9905411601066589, + "log_odds_ratio": -0.6393153667449951, + "logits/chosen": -0.6037004590034485, + "logits/rejected": -0.6011282801628113, + "logps/chosen": -0.18709006905555725, + "logps/rejected": -0.24275116622447968, + "loss": 3.9967, + "nll_loss": 0.9352476596832275, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.018709007650613785, + "rewards/margins": 0.005566108971834183, + "rewards/rejected": -0.024275116622447968, + "step": 454 + }, + { + "epoch": 0.31466113416320884, + "grad_norm": 3.14521861076355, + "learning_rate": 1.5733056708160442e-05, + "log_odds_chosen": 1.300163984298706, + "log_odds_ratio": -0.6069784164428711, + "logits/chosen": -0.6802898645401001, + "logits/rejected": -0.6661124229431152, + "logps/chosen": -0.09800460934638977, + "logps/rejected": -0.19043129682540894, + "loss": 3.8805, + "nll_loss": 0.909426212310791, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009800462052226067, + "rewards/margins": 0.009242668747901917, + "rewards/rejected": -0.019043128937482834, + "step": 455 + }, + { + "epoch": 0.3153526970954357, + "grad_norm": 5.763227939605713, + "learning_rate": 1.5767634854771783e-05, + "log_odds_chosen": 0.5036981701850891, + "log_odds_ratio": -0.6302422285079956, + "logits/chosen": -0.885701060295105, + "logits/rejected": -0.8422713875770569, + "logps/chosen": -0.08884412050247192, + "logps/rejected": -0.15536990761756897, + "loss": 4.7726, + "nll_loss": 1.1301307678222656, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008884412236511707, + "rewards/margins": 0.006652578711509705, + "rewards/rejected": -0.015536990016698837, + "step": 456 + }, + { + "epoch": 0.3160442600276625, + "grad_norm": 3.6244726181030273, + "learning_rate": 1.5802213001383125e-05, + "log_odds_chosen": -0.06441202759742737, + "log_odds_ratio": -0.7889564037322998, + "logits/chosen": -0.7942500114440918, + "logits/rejected": -0.7812097072601318, + "logps/chosen": -0.15385322272777557, + "logps/rejected": -0.15002594888210297, + "loss": 4.216, + "nll_loss": 0.9750944972038269, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.015385321341454983, + "rewards/margins": -0.0003827265463769436, + "rewards/rejected": -0.015002595260739326, + "step": 457 + }, + { + "epoch": 0.31673582295988933, + "grad_norm": 8.510712623596191, + "learning_rate": 1.5836791147994467e-05, + "log_odds_chosen": 2.3712868690490723, + "log_odds_ratio": -0.5050680041313171, + "logits/chosen": -0.6574930548667908, + "logits/rejected": -0.6933423280715942, + "logps/chosen": -0.1535794585943222, + "logps/rejected": -0.4186011552810669, + "loss": 7.2306, + "nll_loss": 1.757137656211853, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.01535794697701931, + "rewards/margins": 0.02650216966867447, + "rewards/rejected": -0.04186011478304863, + "step": 458 + }, + { + "epoch": 0.31742738589211617, + "grad_norm": 6.765665531158447, + "learning_rate": 1.5871369294605808e-05, + "log_odds_chosen": 0.1631660908460617, + "log_odds_ratio": -0.7178246378898621, + "logits/chosen": -1.0220084190368652, + "logits/rejected": -1.027510643005371, + "logps/chosen": -0.1926945149898529, + "logps/rejected": -0.2280651032924652, + "loss": 5.968, + "nll_loss": 1.4202252626419067, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.01926945522427559, + "rewards/margins": 0.003537057200446725, + "rewards/rejected": -0.02280651032924652, + "step": 459 + }, + { + "epoch": 0.318118948824343, + "grad_norm": 6.357319355010986, + "learning_rate": 1.590594744121715e-05, + "log_odds_chosen": 1.1054508686065674, + "log_odds_ratio": -0.6838944554328918, + "logits/chosen": -1.1939641237258911, + "logits/rejected": -1.2263739109039307, + "logps/chosen": -0.10918106883764267, + "logps/rejected": -0.3688642978668213, + "loss": 6.5195, + "nll_loss": 1.5614843368530273, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.010918107815086842, + "rewards/margins": 0.02596832811832428, + "rewards/rejected": -0.03688643127679825, + "step": 460 + }, + { + "epoch": 0.3188105117565698, + "grad_norm": 3.7231040000915527, + "learning_rate": 1.594052558782849e-05, + "log_odds_chosen": 2.0059876441955566, + "log_odds_ratio": -0.4559151232242584, + "logits/chosen": -0.224339097738266, + "logits/rejected": -0.22532802820205688, + "logps/chosen": -0.10951988399028778, + "logps/rejected": -0.24661627411842346, + "loss": 3.3677, + "nll_loss": 0.7963347434997559, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010951988399028778, + "rewards/margins": 0.013709638267755508, + "rewards/rejected": -0.024661626666784286, + "step": 461 + }, + { + "epoch": 0.31950207468879666, + "grad_norm": 6.247403621673584, + "learning_rate": 1.5975103734439833e-05, + "log_odds_chosen": 1.5408384799957275, + "log_odds_ratio": -0.5715996623039246, + "logits/chosen": -0.9309489130973816, + "logits/rejected": -0.9221597909927368, + "logps/chosen": -0.19324921071529388, + "logps/rejected": -0.38850072026252747, + "loss": 5.1677, + "nll_loss": 1.23475980758667, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01932491920888424, + "rewards/margins": 0.019525151699781418, + "rewards/rejected": -0.038850072771310806, + "step": 462 + }, + { + "epoch": 0.3201936376210235, + "grad_norm": 4.389791965484619, + "learning_rate": 1.6009681881051174e-05, + "log_odds_chosen": 1.7772701978683472, + "log_odds_ratio": -0.446536660194397, + "logits/chosen": -1.0960631370544434, + "logits/rejected": -1.1086949110031128, + "logps/chosen": -0.11626395583152771, + "logps/rejected": -0.38875579833984375, + "loss": 5.5352, + "nll_loss": 1.3391516208648682, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01162639632821083, + "rewards/margins": 0.027249179780483246, + "rewards/rejected": -0.03887557610869408, + "step": 463 + }, + { + "epoch": 0.32088520055325037, + "grad_norm": 4.312671661376953, + "learning_rate": 1.604426002766252e-05, + "log_odds_chosen": 0.7777257561683655, + "log_odds_ratio": -0.5274251103401184, + "logits/chosen": -1.0016077756881714, + "logits/rejected": -1.0476646423339844, + "logps/chosen": -0.42495298385620117, + "logps/rejected": -0.5335454940795898, + "loss": 4.4051, + "nll_loss": 1.0485405921936035, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.04249529913067818, + "rewards/margins": 0.01085924543440342, + "rewards/rejected": -0.053354546427726746, + "step": 464 + }, + { + "epoch": 0.3215767634854772, + "grad_norm": 3.821538209915161, + "learning_rate": 1.607883817427386e-05, + "log_odds_chosen": 1.589055061340332, + "log_odds_ratio": -0.5532294511795044, + "logits/chosen": -0.7510684728622437, + "logits/rejected": -0.753553569316864, + "logps/chosen": -0.0831575095653534, + "logps/rejected": -0.31230461597442627, + "loss": 3.5947, + "nll_loss": 0.8433531522750854, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.00831575132906437, + "rewards/margins": 0.022914709523320198, + "rewards/rejected": -0.031230460852384567, + "step": 465 + }, + { + "epoch": 0.32226832641770403, + "grad_norm": 4.866796493530273, + "learning_rate": 1.6113416320885202e-05, + "log_odds_chosen": 2.049978017807007, + "log_odds_ratio": -0.5020076036453247, + "logits/chosen": -1.1018893718719482, + "logits/rejected": -1.086807131767273, + "logps/chosen": -0.17358484864234924, + "logps/rejected": -0.4650211036205292, + "loss": 4.7312, + "nll_loss": 1.1325989961624146, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.017358483746647835, + "rewards/margins": 0.029143624007701874, + "rewards/rejected": -0.04650210589170456, + "step": 466 + }, + { + "epoch": 0.32295988934993086, + "grad_norm": 4.3561906814575195, + "learning_rate": 1.6147994467496544e-05, + "log_odds_chosen": 1.2381858825683594, + "log_odds_ratio": -0.5811147689819336, + "logits/chosen": -1.0292110443115234, + "logits/rejected": -1.0253915786743164, + "logps/chosen": -0.21870014071464539, + "logps/rejected": -0.46153783798217773, + "loss": 4.2517, + "nll_loss": 1.004812240600586, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.021870015189051628, + "rewards/margins": 0.024283768609166145, + "rewards/rejected": -0.04615378752350807, + "step": 467 + }, + { + "epoch": 0.3236514522821577, + "grad_norm": 6.4525017738342285, + "learning_rate": 1.6182572614107886e-05, + "log_odds_chosen": 1.469836950302124, + "log_odds_ratio": -0.6502451300621033, + "logits/chosen": -1.014503002166748, + "logits/rejected": -0.9917067289352417, + "logps/chosen": -0.2316511869430542, + "logps/rejected": -0.4777286648750305, + "loss": 6.1528, + "nll_loss": 1.473185420036316, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.02316511794924736, + "rewards/margins": 0.024607747793197632, + "rewards/rejected": -0.04777286574244499, + "step": 468 + }, + { + "epoch": 0.3243430152143845, + "grad_norm": 5.878066539764404, + "learning_rate": 1.6217150760719227e-05, + "log_odds_chosen": 1.0376574993133545, + "log_odds_ratio": -0.5789130926132202, + "logits/chosen": -1.0515923500061035, + "logits/rejected": -1.0690171718597412, + "logps/chosen": -0.24930287897586823, + "logps/rejected": -0.3463584780693054, + "loss": 4.356, + "nll_loss": 1.0311205387115479, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.024930287152528763, + "rewards/margins": 0.009705559350550175, + "rewards/rejected": -0.03463584929704666, + "step": 469 + }, + { + "epoch": 0.32503457814661135, + "grad_norm": 3.6630163192749023, + "learning_rate": 1.625172890733057e-05, + "log_odds_chosen": 1.750572919845581, + "log_odds_ratio": -0.36723726987838745, + "logits/chosen": -0.8682456016540527, + "logits/rejected": -0.8956509828567505, + "logps/chosen": -0.17868489027023315, + "logps/rejected": -0.36978819966316223, + "loss": 4.0217, + "nll_loss": 0.968694806098938, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.017868489027023315, + "rewards/margins": 0.01911032944917679, + "rewards/rejected": -0.036978818476200104, + "step": 470 + }, + { + "epoch": 0.3257261410788382, + "grad_norm": 4.763913631439209, + "learning_rate": 1.628630705394191e-05, + "log_odds_chosen": 0.9818230271339417, + "log_odds_ratio": -0.5156347751617432, + "logits/chosen": -0.7395144701004028, + "logits/rejected": -0.7224610447883606, + "logps/chosen": -0.1149940937757492, + "logps/rejected": -0.23291422426700592, + "loss": 4.7129, + "nll_loss": 1.126656413078308, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.011499409563839436, + "rewards/margins": 0.011792012490332127, + "rewards/rejected": -0.023291420191526413, + "step": 471 + }, + { + "epoch": 0.326417704011065, + "grad_norm": 5.32498836517334, + "learning_rate": 1.6320885200553252e-05, + "log_odds_chosen": 0.628685712814331, + "log_odds_ratio": -0.44037503004074097, + "logits/chosen": -1.0388284921646118, + "logits/rejected": -1.0592763423919678, + "logps/chosen": -0.18162651360034943, + "logps/rejected": -0.32462698221206665, + "loss": 6.1673, + "nll_loss": 1.4977924823760986, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.018162650987505913, + "rewards/margins": 0.014300045557320118, + "rewards/rejected": -0.032462697476148605, + "step": 472 + }, + { + "epoch": 0.32710926694329184, + "grad_norm": 4.730746269226074, + "learning_rate": 1.6355463347164593e-05, + "log_odds_chosen": 2.820221424102783, + "log_odds_ratio": -0.34589338302612305, + "logits/chosen": -0.8554526567459106, + "logits/rejected": -0.8768529891967773, + "logps/chosen": -0.10701534152030945, + "logps/rejected": -0.7232824563980103, + "loss": 4.4983, + "nll_loss": 1.0899972915649414, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01070153433829546, + "rewards/margins": 0.06162671372294426, + "rewards/rejected": -0.07232824712991714, + "step": 473 + }, + { + "epoch": 0.3278008298755187, + "grad_norm": 5.415292263031006, + "learning_rate": 1.6390041493775935e-05, + "log_odds_chosen": 1.2005585432052612, + "log_odds_ratio": -0.6597185730934143, + "logits/chosen": -0.5219282507896423, + "logits/rejected": -0.523371160030365, + "logps/chosen": -0.1712622493505478, + "logps/rejected": -0.28718945384025574, + "loss": 5.7351, + "nll_loss": 1.3677911758422852, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01712622493505478, + "rewards/margins": 0.011592721566557884, + "rewards/rejected": -0.028718946501612663, + "step": 474 + }, + { + "epoch": 0.3284923928077455, + "grad_norm": 6.83495569229126, + "learning_rate": 1.6424619640387277e-05, + "log_odds_chosen": 1.5683974027633667, + "log_odds_ratio": -0.325946569442749, + "logits/chosen": -0.7668761610984802, + "logits/rejected": -0.8554536700248718, + "logps/chosen": -0.07481614500284195, + "logps/rejected": -0.23265601694583893, + "loss": 3.3869, + "nll_loss": 0.8141358494758606, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007481614127755165, + "rewards/margins": 0.015783987939357758, + "rewards/rejected": -0.023265602067112923, + "step": 475 + }, + { + "epoch": 0.32918395573997233, + "grad_norm": 5.765621662139893, + "learning_rate": 1.6459197786998618e-05, + "log_odds_chosen": 0.0016551315784454346, + "log_odds_ratio": -0.9704076647758484, + "logits/chosen": -0.8410443067550659, + "logits/rejected": -0.8325227499008179, + "logps/chosen": -0.1430882066488266, + "logps/rejected": -0.13438117504119873, + "loss": 4.226, + "nll_loss": 0.959465503692627, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.014308820478618145, + "rewards/margins": -0.000870703486725688, + "rewards/rejected": -0.013438117690384388, + "step": 476 + }, + { + "epoch": 0.32987551867219916, + "grad_norm": 4.890822887420654, + "learning_rate": 1.649377593360996e-05, + "log_odds_chosen": 1.6676287651062012, + "log_odds_ratio": -0.2915194034576416, + "logits/chosen": -0.7431758642196655, + "logits/rejected": -0.7679858803749084, + "logps/chosen": -0.1129474937915802, + "logps/rejected": -0.3145168125629425, + "loss": 4.9774, + "nll_loss": 1.2151939868927002, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011294749565422535, + "rewards/margins": 0.02015693299472332, + "rewards/rejected": -0.03145168349146843, + "step": 477 + }, + { + "epoch": 0.330567081604426, + "grad_norm": 4.508111953735352, + "learning_rate": 1.65283540802213e-05, + "log_odds_chosen": 1.216369867324829, + "log_odds_ratio": -0.42188069224357605, + "logits/chosen": -1.0423190593719482, + "logits/rejected": -1.047698974609375, + "logps/chosen": -0.10603365302085876, + "logps/rejected": -0.3019612729549408, + "loss": 4.6539, + "nll_loss": 1.121298909187317, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01060336735099554, + "rewards/margins": 0.019592760130763054, + "rewards/rejected": -0.03019612841308117, + "step": 478 + }, + { + "epoch": 0.3312586445366528, + "grad_norm": 3.8689982891082764, + "learning_rate": 1.6562932226832643e-05, + "log_odds_chosen": 1.1521224975585938, + "log_odds_ratio": -0.3827163279056549, + "logits/chosen": -0.7599689364433289, + "logits/rejected": -0.7738473415374756, + "logps/chosen": -0.11503064632415771, + "logps/rejected": -0.32514065504074097, + "loss": 4.676, + "nll_loss": 1.1307350397109985, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011503065004944801, + "rewards/margins": 0.021010998636484146, + "rewards/rejected": -0.0325140655040741, + "step": 479 + }, + { + "epoch": 0.33195020746887965, + "grad_norm": 6.303074359893799, + "learning_rate": 1.6597510373443984e-05, + "log_odds_chosen": 1.5216987133026123, + "log_odds_ratio": -0.578852117061615, + "logits/chosen": -0.43270474672317505, + "logits/rejected": -0.4453016519546509, + "logps/chosen": -0.15122368931770325, + "logps/rejected": -0.26229071617126465, + "loss": 4.962, + "nll_loss": 1.1826035976409912, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.015122368931770325, + "rewards/margins": 0.01110670156776905, + "rewards/rejected": -0.026229072362184525, + "step": 480 + }, + { + "epoch": 0.3326417704011065, + "grad_norm": 4.696047306060791, + "learning_rate": 1.6632088520055326e-05, + "log_odds_chosen": 1.1306276321411133, + "log_odds_ratio": -0.5183489322662354, + "logits/chosen": -1.0037342309951782, + "logits/rejected": -1.044380784034729, + "logps/chosen": -0.1369670182466507, + "logps/rejected": -0.2618962526321411, + "loss": 4.4882, + "nll_loss": 1.0702087879180908, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013696704059839249, + "rewards/margins": 0.012492923997342587, + "rewards/rejected": -0.02618962712585926, + "step": 481 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 4.115095615386963, + "learning_rate": 1.6666666666666667e-05, + "log_odds_chosen": 1.7002249956130981, + "log_odds_ratio": -0.3261849284172058, + "logits/chosen": -0.8328145146369934, + "logits/rejected": -0.7903501391410828, + "logps/chosen": -0.10042671114206314, + "logps/rejected": -0.33854255080223083, + "loss": 4.6884, + "nll_loss": 1.1394823789596558, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010042671114206314, + "rewards/margins": 0.02381158620119095, + "rewards/rejected": -0.03385425731539726, + "step": 482 + }, + { + "epoch": 0.33402489626556015, + "grad_norm": 5.309091567993164, + "learning_rate": 1.670124481327801e-05, + "log_odds_chosen": -0.06614989042282104, + "log_odds_ratio": -0.7974898219108582, + "logits/chosen": -0.9340482354164124, + "logits/rejected": -0.9295551180839539, + "logps/chosen": -0.1734018474817276, + "logps/rejected": -0.1410864144563675, + "loss": 4.9642, + "nll_loss": 1.161311149597168, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.01734018512070179, + "rewards/margins": -0.0032315438147634268, + "rewards/rejected": -0.014108642935752869, + "step": 483 + }, + { + "epoch": 0.334716459197787, + "grad_norm": 5.502237796783447, + "learning_rate": 1.673582295988935e-05, + "log_odds_chosen": 1.4345967769622803, + "log_odds_ratio": -0.4077805280685425, + "logits/chosen": -0.9030207395553589, + "logits/rejected": -0.9316298961639404, + "logps/chosen": -0.15604329109191895, + "logps/rejected": -0.4233133792877197, + "loss": 6.3053, + "nll_loss": 1.5355592966079712, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01560432743281126, + "rewards/margins": 0.026727013289928436, + "rewards/rejected": -0.04233134165406227, + "step": 484 + }, + { + "epoch": 0.3354080221300138, + "grad_norm": 4.085949420928955, + "learning_rate": 1.6770401106500692e-05, + "log_odds_chosen": 0.6022038459777832, + "log_odds_ratio": -0.5693349242210388, + "logits/chosen": -0.7707476019859314, + "logits/rejected": -0.7946466207504272, + "logps/chosen": -0.12312422692775726, + "logps/rejected": -0.3257462978363037, + "loss": 5.4492, + "nll_loss": 1.3053703308105469, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.012312421575188637, + "rewards/margins": 0.020262207835912704, + "rewards/rejected": -0.03257462754845619, + "step": 485 + }, + { + "epoch": 0.3360995850622407, + "grad_norm": 5.308136463165283, + "learning_rate": 1.6804979253112034e-05, + "log_odds_chosen": 0.49736395478248596, + "log_odds_ratio": -0.5352747440338135, + "logits/chosen": -0.538204550743103, + "logits/rejected": -0.5477010607719421, + "logps/chosen": -0.18097350001335144, + "logps/rejected": -0.27122586965560913, + "loss": 4.8144, + "nll_loss": 1.1500778198242188, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.018097348511219025, + "rewards/margins": 0.00902523659169674, + "rewards/rejected": -0.027122585102915764, + "step": 486 + }, + { + "epoch": 0.3367911479944675, + "grad_norm": 6.017159938812256, + "learning_rate": 1.6839557399723375e-05, + "log_odds_chosen": 2.9066498279571533, + "log_odds_ratio": -0.3275456428527832, + "logits/chosen": -0.9392266273498535, + "logits/rejected": -0.958389401435852, + "logps/chosen": -0.19047018885612488, + "logps/rejected": -0.7778450846672058, + "loss": 5.6669, + "nll_loss": 1.3839747905731201, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.019047021865844727, + "rewards/margins": 0.058737486600875854, + "rewards/rejected": -0.07778450846672058, + "step": 487 + }, + { + "epoch": 0.33748271092669435, + "grad_norm": 8.302292823791504, + "learning_rate": 1.6874135546334717e-05, + "log_odds_chosen": 0.5107730031013489, + "log_odds_ratio": -1.322838544845581, + "logits/chosen": -0.8151763081550598, + "logits/rejected": -0.7991777062416077, + "logps/chosen": -0.42283517122268677, + "logps/rejected": -0.34093451499938965, + "loss": 4.967, + "nll_loss": 1.1094615459442139, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.04228351637721062, + "rewards/margins": -0.008190065622329712, + "rewards/rejected": -0.034093454480171204, + "step": 488 + }, + { + "epoch": 0.3381742738589212, + "grad_norm": 3.8961689472198486, + "learning_rate": 1.690871369294606e-05, + "log_odds_chosen": 2.57572603225708, + "log_odds_ratio": -0.43755820393562317, + "logits/chosen": -0.8054549694061279, + "logits/rejected": -0.8180067539215088, + "logps/chosen": -0.06617649644613266, + "logps/rejected": -0.4114043116569519, + "loss": 4.2428, + "nll_loss": 1.016952395439148, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.006617650389671326, + "rewards/margins": 0.034522779285907745, + "rewards/rejected": -0.04114042967557907, + "step": 489 + }, + { + "epoch": 0.338865836791148, + "grad_norm": 7.237804889678955, + "learning_rate": 1.69432918395574e-05, + "log_odds_chosen": 1.2616724967956543, + "log_odds_ratio": -0.6169224381446838, + "logits/chosen": -1.0781421661376953, + "logits/rejected": -1.076696515083313, + "logps/chosen": -0.13765141367912292, + "logps/rejected": -0.36732688546180725, + "loss": 6.1128, + "nll_loss": 1.4665086269378662, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013765140436589718, + "rewards/margins": 0.022967549040913582, + "rewards/rejected": -0.036732688546180725, + "step": 490 + }, + { + "epoch": 0.33955739972337484, + "grad_norm": 3.6425857543945312, + "learning_rate": 1.697786998616874e-05, + "log_odds_chosen": 1.7106664180755615, + "log_odds_ratio": -0.46601614356040955, + "logits/chosen": -1.2055742740631104, + "logits/rejected": -1.2279021739959717, + "logps/chosen": -0.1318834125995636, + "logps/rejected": -0.2605992257595062, + "loss": 5.4442, + "nll_loss": 1.3144505023956299, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01318834163248539, + "rewards/margins": 0.012871582061052322, + "rewards/rejected": -0.02605992555618286, + "step": 491 + }, + { + "epoch": 0.34024896265560167, + "grad_norm": 5.997433185577393, + "learning_rate": 1.7012448132780083e-05, + "log_odds_chosen": 1.4230337142944336, + "log_odds_ratio": -0.8804874420166016, + "logits/chosen": -0.6543467044830322, + "logits/rejected": -0.6305510401725769, + "logps/chosen": -0.1759967803955078, + "logps/rejected": -0.5112686157226562, + "loss": 4.3876, + "nll_loss": 1.008862018585205, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0175996795296669, + "rewards/margins": 0.033527180552482605, + "rewards/rejected": -0.051126863807439804, + "step": 492 + }, + { + "epoch": 0.3409405255878285, + "grad_norm": 5.290637969970703, + "learning_rate": 1.7047026279391425e-05, + "log_odds_chosen": 1.3665871620178223, + "log_odds_ratio": -0.6540405750274658, + "logits/chosen": -0.7667418122291565, + "logits/rejected": -0.7568073272705078, + "logps/chosen": -0.22884142398834229, + "logps/rejected": -0.5813363194465637, + "loss": 4.7473, + "nll_loss": 1.1214134693145752, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.02288414165377617, + "rewards/margins": 0.0352494940161705, + "rewards/rejected": -0.05813363194465637, + "step": 493 + }, + { + "epoch": 0.34163208852005533, + "grad_norm": 4.3733720779418945, + "learning_rate": 1.7081604426002766e-05, + "log_odds_chosen": 2.179429054260254, + "log_odds_ratio": -0.53110271692276, + "logits/chosen": -1.1724821329116821, + "logits/rejected": -1.217232346534729, + "logps/chosen": -0.1547495275735855, + "logps/rejected": -0.5294268727302551, + "loss": 5.0789, + "nll_loss": 1.216625452041626, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.015474953688681126, + "rewards/margins": 0.03746773302555084, + "rewards/rejected": -0.05294268578290939, + "step": 494 + }, + { + "epoch": 0.34232365145228216, + "grad_norm": 4.039161682128906, + "learning_rate": 1.7116182572614108e-05, + "log_odds_chosen": 1.4472264051437378, + "log_odds_ratio": -0.47876089811325073, + "logits/chosen": -1.058305263519287, + "logits/rejected": -1.0702276229858398, + "logps/chosen": -0.1727418750524521, + "logps/rejected": -0.3430398106575012, + "loss": 4.4938, + "nll_loss": 1.0755620002746582, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01727418787777424, + "rewards/margins": 0.017029793933033943, + "rewards/rejected": -0.03430397808551788, + "step": 495 + }, + { + "epoch": 0.343015214384509, + "grad_norm": 4.610265731811523, + "learning_rate": 1.715076071922545e-05, + "log_odds_chosen": 2.3182146549224854, + "log_odds_ratio": -0.28784000873565674, + "logits/chosen": -0.9007983803749084, + "logits/rejected": -0.8986612558364868, + "logps/chosen": -0.1349295973777771, + "logps/rejected": -0.44597327709198, + "loss": 4.6976, + "nll_loss": 1.1456117630004883, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.013492961414158344, + "rewards/margins": 0.031104369089007378, + "rewards/rejected": -0.044597327709198, + "step": 496 + }, + { + "epoch": 0.3437067773167358, + "grad_norm": 7.051185131072998, + "learning_rate": 1.718533886583679e-05, + "log_odds_chosen": 1.7162365913391113, + "log_odds_ratio": -1.1342723369598389, + "logits/chosen": -1.022646188735962, + "logits/rejected": -1.019441843032837, + "logps/chosen": -0.3917577266693115, + "logps/rejected": -0.5872882008552551, + "loss": 4.6114, + "nll_loss": 1.039417028427124, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.03917577490210533, + "rewards/margins": 0.0195530503988266, + "rewards/rejected": -0.05872882157564163, + "step": 497 + }, + { + "epoch": 0.34439834024896265, + "grad_norm": 7.3869757652282715, + "learning_rate": 1.7219917012448132e-05, + "log_odds_chosen": 0.07555952668190002, + "log_odds_ratio": -0.8538734912872314, + "logits/chosen": -0.8446725606918335, + "logits/rejected": -0.8641011714935303, + "logps/chosen": -0.23065711557865143, + "logps/rejected": -0.21285629272460938, + "loss": 6.8209, + "nll_loss": 1.6198467016220093, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.023065710440278053, + "rewards/margins": -0.0017800810746848583, + "rewards/rejected": -0.021285628899931908, + "step": 498 + }, + { + "epoch": 0.3450899031811895, + "grad_norm": 4.562398433685303, + "learning_rate": 1.7254495159059474e-05, + "log_odds_chosen": 1.819996953010559, + "log_odds_ratio": -0.4225596487522125, + "logits/chosen": -0.8365169763565063, + "logits/rejected": -0.8637485504150391, + "logps/chosen": -0.13593707978725433, + "logps/rejected": -0.42044973373413086, + "loss": 4.9074, + "nll_loss": 1.1845835447311401, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013593706302344799, + "rewards/margins": 0.028451265767216682, + "rewards/rejected": -0.04204497113823891, + "step": 499 + }, + { + "epoch": 0.3457814661134163, + "grad_norm": 4.474560260772705, + "learning_rate": 1.7289073305670816e-05, + "log_odds_chosen": 0.9567031860351562, + "log_odds_ratio": -0.42330771684646606, + "logits/chosen": -1.1170122623443604, + "logits/rejected": -1.1414347887039185, + "logps/chosen": -0.13176178932189941, + "logps/rejected": -0.3465428054332733, + "loss": 5.3409, + "nll_loss": 1.292906641960144, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01317618042230606, + "rewards/margins": 0.02147809974849224, + "rewards/rejected": -0.03465428203344345, + "step": 500 + }, + { + "epoch": 0.34647302904564314, + "grad_norm": 5.560184955596924, + "learning_rate": 1.7323651452282157e-05, + "log_odds_chosen": 2.8556575775146484, + "log_odds_ratio": -0.28292903304100037, + "logits/chosen": -0.7554357051849365, + "logits/rejected": -0.7841203212738037, + "logps/chosen": -0.14785116910934448, + "logps/rejected": -0.6094825267791748, + "loss": 6.0501, + "nll_loss": 1.484229564666748, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.014785117469727993, + "rewards/margins": 0.04616313427686691, + "rewards/rejected": -0.060948245227336884, + "step": 501 + }, + { + "epoch": 0.34716459197787, + "grad_norm": 4.023012161254883, + "learning_rate": 1.73582295988935e-05, + "log_odds_chosen": 1.4860143661499023, + "log_odds_ratio": -0.3626514673233032, + "logits/chosen": -0.9559276103973389, + "logits/rejected": -0.9815624952316284, + "logps/chosen": -0.11361101269721985, + "logps/rejected": -0.34690365195274353, + "loss": 5.1345, + "nll_loss": 1.2473585605621338, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011361101642251015, + "rewards/margins": 0.02332925982773304, + "rewards/rejected": -0.034690361469984055, + "step": 502 + }, + { + "epoch": 0.3478561549100968, + "grad_norm": 3.716696262359619, + "learning_rate": 1.739280774550484e-05, + "log_odds_chosen": 1.6061586141586304, + "log_odds_ratio": -0.38605138659477234, + "logits/chosen": -0.9349660873413086, + "logits/rejected": -0.9320093393325806, + "logps/chosen": -0.14063169062137604, + "logps/rejected": -0.38299134373664856, + "loss": 5.7973, + "nll_loss": 1.410730004310608, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014063170179724693, + "rewards/margins": 0.02423596754670143, + "rewards/rejected": -0.038299135863780975, + "step": 503 + }, + { + "epoch": 0.34854771784232363, + "grad_norm": 4.08083438873291, + "learning_rate": 1.7427385892116182e-05, + "log_odds_chosen": 1.4298001527786255, + "log_odds_ratio": -0.4044135808944702, + "logits/chosen": -1.0546984672546387, + "logits/rejected": -1.0735547542572021, + "logps/chosen": -0.2744176983833313, + "logps/rejected": -0.5156873464584351, + "loss": 5.2015, + "nll_loss": 1.259932279586792, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.02744176983833313, + "rewards/margins": 0.024126969277858734, + "rewards/rejected": -0.051568739116191864, + "step": 504 + }, + { + "epoch": 0.34923928077455046, + "grad_norm": 5.01504373550415, + "learning_rate": 1.7461964038727523e-05, + "log_odds_chosen": 3.2316083908081055, + "log_odds_ratio": -0.1560392826795578, + "logits/chosen": -0.6426920294761658, + "logits/rejected": -0.6856198906898499, + "logps/chosen": -0.07277484238147736, + "logps/rejected": -0.4405496120452881, + "loss": 5.1007, + "nll_loss": 1.2595752477645874, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007277484517544508, + "rewards/margins": 0.03677747771143913, + "rewards/rejected": -0.04405496269464493, + "step": 505 + }, + { + "epoch": 0.3499308437067773, + "grad_norm": 3.568671941757202, + "learning_rate": 1.7496542185338865e-05, + "log_odds_chosen": 2.227147102355957, + "log_odds_ratio": -0.5110369324684143, + "logits/chosen": -0.7542319297790527, + "logits/rejected": -0.7737609148025513, + "logps/chosen": -0.17752686142921448, + "logps/rejected": -0.3617754578590393, + "loss": 3.9289, + "nll_loss": 0.9311113357543945, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.017752686515450478, + "rewards/margins": 0.018424857407808304, + "rewards/rejected": -0.03617754578590393, + "step": 506 + }, + { + "epoch": 0.3506224066390041, + "grad_norm": 4.465025901794434, + "learning_rate": 1.7531120331950207e-05, + "log_odds_chosen": 1.4633152484893799, + "log_odds_ratio": -0.6008234620094299, + "logits/chosen": -0.7912360429763794, + "logits/rejected": -0.79715895652771, + "logps/chosen": -0.24662283062934875, + "logps/rejected": -0.4220726788043976, + "loss": 4.4915, + "nll_loss": 1.062793254852295, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.024662284180521965, + "rewards/margins": 0.017544984817504883, + "rewards/rejected": -0.0422072634100914, + "step": 507 + }, + { + "epoch": 0.35131396957123096, + "grad_norm": 3.592378616333008, + "learning_rate": 1.7565698478561548e-05, + "log_odds_chosen": 2.0615651607513428, + "log_odds_ratio": -0.3657073378562927, + "logits/chosen": -0.7249419093132019, + "logits/rejected": -0.7586057186126709, + "logps/chosen": -0.11939063668251038, + "logps/rejected": -0.4244507849216461, + "loss": 3.253, + "nll_loss": 0.7766897082328796, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011939063668251038, + "rewards/margins": 0.030506014823913574, + "rewards/rejected": -0.04244507849216461, + "step": 508 + }, + { + "epoch": 0.35200553250345784, + "grad_norm": 5.696559429168701, + "learning_rate": 1.7600276625172893e-05, + "log_odds_chosen": 1.1087409257888794, + "log_odds_ratio": -0.6127991676330566, + "logits/chosen": -0.8956174850463867, + "logits/rejected": -0.9211435914039612, + "logps/chosen": -0.21373379230499268, + "logps/rejected": -0.3666113615036011, + "loss": 5.2028, + "nll_loss": 1.2394108772277832, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.021373379975557327, + "rewards/margins": 0.015287760645151138, + "rewards/rejected": -0.036661140620708466, + "step": 509 + }, + { + "epoch": 0.35269709543568467, + "grad_norm": 6.187593460083008, + "learning_rate": 1.7634854771784235e-05, + "log_odds_chosen": 0.7654345035552979, + "log_odds_ratio": -0.703902006149292, + "logits/chosen": -0.7929449081420898, + "logits/rejected": -0.7793622016906738, + "logps/chosen": -0.3261670768260956, + "logps/rejected": -0.6445503234863281, + "loss": 5.5787, + "nll_loss": 1.3242785930633545, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.03261670842766762, + "rewards/margins": 0.031838320195674896, + "rewards/rejected": -0.06445503234863281, + "step": 510 + }, + { + "epoch": 0.3533886583679115, + "grad_norm": 7.367726802825928, + "learning_rate": 1.7669432918395576e-05, + "log_odds_chosen": 0.41668662428855896, + "log_odds_ratio": -0.8817450404167175, + "logits/chosen": -0.6256126165390015, + "logits/rejected": -0.6103242039680481, + "logps/chosen": -0.2497943639755249, + "logps/rejected": -0.33595484495162964, + "loss": 4.6828, + "nll_loss": 1.0825315713882446, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.02497943490743637, + "rewards/margins": 0.008616046980023384, + "rewards/rejected": -0.033595483750104904, + "step": 511 + }, + { + "epoch": 0.35408022130013833, + "grad_norm": 3.762199640274048, + "learning_rate": 1.7704011065006918e-05, + "log_odds_chosen": 3.9033827781677246, + "log_odds_ratio": -0.27078744769096375, + "logits/chosen": -0.9025722742080688, + "logits/rejected": -0.945976972579956, + "logps/chosen": -0.13871155679225922, + "logps/rejected": -0.6132323741912842, + "loss": 3.0666, + "nll_loss": 0.7395758032798767, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.013871154747903347, + "rewards/margins": 0.047452084720134735, + "rewards/rejected": -0.06132324039936066, + "step": 512 + }, + { + "epoch": 0.35477178423236516, + "grad_norm": 5.514883041381836, + "learning_rate": 1.773858921161826e-05, + "log_odds_chosen": 1.5800803899765015, + "log_odds_ratio": -0.5254863500595093, + "logits/chosen": -0.8947157859802246, + "logits/rejected": -0.9294208288192749, + "logps/chosen": -0.19791993498802185, + "logps/rejected": -0.44762054085731506, + "loss": 6.4006, + "nll_loss": 1.5476136207580566, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.019791992381215096, + "rewards/margins": 0.02497006021440029, + "rewards/rejected": -0.04476205259561539, + "step": 513 + }, + { + "epoch": 0.355463347164592, + "grad_norm": 5.778316497802734, + "learning_rate": 1.77731673582296e-05, + "log_odds_chosen": 3.0969834327697754, + "log_odds_ratio": -0.2676597535610199, + "logits/chosen": -0.7269794940948486, + "logits/rejected": -0.7525280714035034, + "logps/chosen": -0.09116362780332565, + "logps/rejected": -0.5763051509857178, + "loss": 4.9457, + "nll_loss": 1.2096540927886963, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00911636371165514, + "rewards/margins": 0.04851415008306503, + "rewards/rejected": -0.0576305128633976, + "step": 514 + }, + { + "epoch": 0.3561549100968188, + "grad_norm": 5.426172256469727, + "learning_rate": 1.7807745504840942e-05, + "log_odds_chosen": 2.969529151916504, + "log_odds_ratio": -0.3110653758049011, + "logits/chosen": -0.7612947225570679, + "logits/rejected": -0.731289267539978, + "logps/chosen": -0.06461675465106964, + "logps/rejected": -0.4509395360946655, + "loss": 5.0502, + "nll_loss": 1.2314317226409912, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0064616757445037365, + "rewards/margins": 0.03863228112459183, + "rewards/rejected": -0.04509395360946655, + "step": 515 + }, + { + "epoch": 0.35684647302904565, + "grad_norm": 7.923834323883057, + "learning_rate": 1.7842323651452284e-05, + "log_odds_chosen": 0.44794902205467224, + "log_odds_ratio": -0.8729050159454346, + "logits/chosen": -0.8051487803459167, + "logits/rejected": -0.8108866214752197, + "logps/chosen": -0.1843547224998474, + "logps/rejected": -0.2477795034646988, + "loss": 5.1599, + "nll_loss": 1.2026830911636353, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.018435470759868622, + "rewards/margins": 0.006342479493469, + "rewards/rejected": -0.024777952581644058, + "step": 516 + }, + { + "epoch": 0.3575380359612725, + "grad_norm": 4.760327339172363, + "learning_rate": 1.7876901798063626e-05, + "log_odds_chosen": 2.5328943729400635, + "log_odds_ratio": -0.30655527114868164, + "logits/chosen": -0.7165100574493408, + "logits/rejected": -0.7217994332313538, + "logps/chosen": -0.11191149055957794, + "logps/rejected": -0.6096492409706116, + "loss": 5.0692, + "nll_loss": 1.2366557121276855, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011191150173544884, + "rewards/margins": 0.04977377504110336, + "rewards/rejected": -0.060964927077293396, + "step": 517 + }, + { + "epoch": 0.3582295988934993, + "grad_norm": 6.100254535675049, + "learning_rate": 1.7911479944674967e-05, + "log_odds_chosen": 1.5004901885986328, + "log_odds_ratio": -0.6379998922348022, + "logits/chosen": -0.43648284673690796, + "logits/rejected": -0.4436464011669159, + "logps/chosen": -0.11927513778209686, + "logps/rejected": -0.2644681930541992, + "loss": 4.0639, + "nll_loss": 0.9521628022193909, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.011927514337003231, + "rewards/margins": 0.014519304037094116, + "rewards/rejected": -0.026446819305419922, + "step": 518 + }, + { + "epoch": 0.35892116182572614, + "grad_norm": 8.609460830688477, + "learning_rate": 1.794605809128631e-05, + "log_odds_chosen": 1.5812023878097534, + "log_odds_ratio": -0.7274914979934692, + "logits/chosen": -0.9389444589614868, + "logits/rejected": -0.9337302446365356, + "logps/chosen": -0.2057965099811554, + "logps/rejected": -0.5808533430099487, + "loss": 5.5316, + "nll_loss": 1.310141682624817, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.02057965099811554, + "rewards/margins": 0.03750568628311157, + "rewards/rejected": -0.058085329830646515, + "step": 519 + }, + { + "epoch": 0.359612724757953, + "grad_norm": 6.230218410491943, + "learning_rate": 1.798063623789765e-05, + "log_odds_chosen": 1.7745656967163086, + "log_odds_ratio": -0.3179192543029785, + "logits/chosen": -0.7594548463821411, + "logits/rejected": -0.7903056144714355, + "logps/chosen": -0.11681567132472992, + "logps/rejected": -0.470163494348526, + "loss": 4.6051, + "nll_loss": 1.1194841861724854, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011681567877531052, + "rewards/margins": 0.03533478081226349, + "rewards/rejected": -0.04701634868979454, + "step": 520 + }, + { + "epoch": 0.3603042876901798, + "grad_norm": 5.00435733795166, + "learning_rate": 1.8015214384508992e-05, + "log_odds_chosen": 3.240253448486328, + "log_odds_ratio": -0.3954428434371948, + "logits/chosen": -0.695622980594635, + "logits/rejected": -0.7130659818649292, + "logps/chosen": -0.11247075349092484, + "logps/rejected": -0.6345089077949524, + "loss": 3.7368, + "nll_loss": 0.8946676850318909, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011247076094150543, + "rewards/margins": 0.052203819155693054, + "rewards/rejected": -0.0634508952498436, + "step": 521 + }, + { + "epoch": 0.36099585062240663, + "grad_norm": 3.5418221950531006, + "learning_rate": 1.8049792531120333e-05, + "log_odds_chosen": 2.4172143936157227, + "log_odds_ratio": -0.3795178234577179, + "logits/chosen": -0.7669139504432678, + "logits/rejected": -0.7880896925926208, + "logps/chosen": -0.13434094190597534, + "logps/rejected": -0.4309152066707611, + "loss": 5.098, + "nll_loss": 1.2365572452545166, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013434093445539474, + "rewards/margins": 0.029657430946826935, + "rewards/rejected": -0.04309152439236641, + "step": 522 + }, + { + "epoch": 0.36168741355463346, + "grad_norm": 3.2109227180480957, + "learning_rate": 1.8084370677731675e-05, + "log_odds_chosen": 3.076549530029297, + "log_odds_ratio": -0.1862131655216217, + "logits/chosen": -0.9441479444503784, + "logits/rejected": -0.9136902689933777, + "logps/chosen": -0.10313470661640167, + "logps/rejected": -0.6239364743232727, + "loss": 4.1863, + "nll_loss": 1.0279431343078613, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010313471779227257, + "rewards/margins": 0.0520801767706871, + "rewards/rejected": -0.06239365041255951, + "step": 523 + }, + { + "epoch": 0.3623789764868603, + "grad_norm": 4.191302299499512, + "learning_rate": 1.8118948824343017e-05, + "log_odds_chosen": 3.1018226146698, + "log_odds_ratio": -0.3014015257358551, + "logits/chosen": -0.8339476585388184, + "logits/rejected": -0.7866331338882446, + "logps/chosen": -0.09632863104343414, + "logps/rejected": -0.4858103096485138, + "loss": 4.1661, + "nll_loss": 1.0113775730133057, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009632863104343414, + "rewards/margins": 0.038948170840740204, + "rewards/rejected": -0.04858103394508362, + "step": 524 + }, + { + "epoch": 0.3630705394190871, + "grad_norm": 4.952174663543701, + "learning_rate": 1.8153526970954358e-05, + "log_odds_chosen": 2.119576930999756, + "log_odds_ratio": -0.2787606120109558, + "logits/chosen": -0.666994571685791, + "logits/rejected": -0.6753822565078735, + "logps/chosen": -0.07383690774440765, + "logps/rejected": -0.3994881510734558, + "loss": 5.4373, + "nll_loss": 1.3314471244812012, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007383690681308508, + "rewards/margins": 0.032565124332904816, + "rewards/rejected": -0.03994881734251976, + "step": 525 + }, + { + "epoch": 0.36376210235131395, + "grad_norm": 9.110729217529297, + "learning_rate": 1.81881051175657e-05, + "log_odds_chosen": 1.177870750427246, + "log_odds_ratio": -1.0585416555404663, + "logits/chosen": -0.7838261127471924, + "logits/rejected": -0.7817600965499878, + "logps/chosen": -0.21810297667980194, + "logps/rejected": -0.24028678238391876, + "loss": 4.1757, + "nll_loss": 0.9380632638931274, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.021810296922922134, + "rewards/margins": 0.002218380104750395, + "rewards/rejected": -0.024028677493333817, + "step": 526 + }, + { + "epoch": 0.3644536652835408, + "grad_norm": 6.334846019744873, + "learning_rate": 1.822268326417704e-05, + "log_odds_chosen": 1.2779005765914917, + "log_odds_ratio": -0.7157418727874756, + "logits/chosen": -0.7719458341598511, + "logits/rejected": -0.8071249723434448, + "logps/chosen": -0.23595497012138367, + "logps/rejected": -0.523496150970459, + "loss": 4.9821, + "nll_loss": 1.1739587783813477, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.023595497012138367, + "rewards/margins": 0.02875412628054619, + "rewards/rejected": -0.05234961956739426, + "step": 527 + }, + { + "epoch": 0.3651452282157676, + "grad_norm": 4.921584129333496, + "learning_rate": 1.8257261410788383e-05, + "log_odds_chosen": 1.9070172309875488, + "log_odds_ratio": -0.30627143383026123, + "logits/chosen": -0.8669978380203247, + "logits/rejected": -0.8720003366470337, + "logps/chosen": -0.12546661496162415, + "logps/rejected": -0.5315241813659668, + "loss": 5.7189, + "nll_loss": 1.3990854024887085, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012546662241220474, + "rewards/margins": 0.040605757385492325, + "rewards/rejected": -0.0531524196267128, + "step": 528 + }, + { + "epoch": 0.36583679114799444, + "grad_norm": 4.885773181915283, + "learning_rate": 1.8291839557399724e-05, + "log_odds_chosen": 2.917936325073242, + "log_odds_ratio": -0.4400659203529358, + "logits/chosen": -0.7196451425552368, + "logits/rejected": -0.720146894454956, + "logps/chosen": -0.16993539035320282, + "logps/rejected": -0.5515943765640259, + "loss": 4.9134, + "nll_loss": 1.1843429803848267, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.016993539407849312, + "rewards/margins": 0.038165897130966187, + "rewards/rejected": -0.05515943467617035, + "step": 529 + }, + { + "epoch": 0.3665283540802213, + "grad_norm": 3.7564799785614014, + "learning_rate": 1.8326417704011066e-05, + "log_odds_chosen": 2.123663902282715, + "log_odds_ratio": -0.2414604127407074, + "logits/chosen": -0.7857799530029297, + "logits/rejected": -0.8454867005348206, + "logps/chosen": -0.1178518757224083, + "logps/rejected": -0.5733444690704346, + "loss": 4.1923, + "nll_loss": 1.0239187479019165, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011785187758505344, + "rewards/margins": 0.04554925486445427, + "rewards/rejected": -0.05733444541692734, + "step": 530 + }, + { + "epoch": 0.36721991701244816, + "grad_norm": 4.089568614959717, + "learning_rate": 1.8360995850622407e-05, + "log_odds_chosen": 3.5009407997131348, + "log_odds_ratio": -0.24155713617801666, + "logits/chosen": -0.5445857048034668, + "logits/rejected": -0.5896902084350586, + "logps/chosen": -0.09425552189350128, + "logps/rejected": -0.7995942234992981, + "loss": 3.381, + "nll_loss": 0.8211044073104858, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009425551630556583, + "rewards/margins": 0.0705338716506958, + "rewards/rejected": -0.07995942234992981, + "step": 531 + }, + { + "epoch": 0.367911479944675, + "grad_norm": 3.8665342330932617, + "learning_rate": 1.839557399723375e-05, + "log_odds_chosen": 2.036942958831787, + "log_odds_ratio": -0.3373142182826996, + "logits/chosen": -0.8292122483253479, + "logits/rejected": -0.877589762210846, + "logps/chosen": -0.15983963012695312, + "logps/rejected": -0.5212113857269287, + "loss": 4.6423, + "nll_loss": 1.1268372535705566, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.015983961522579193, + "rewards/margins": 0.036137182265520096, + "rewards/rejected": -0.05212114751338959, + "step": 532 + }, + { + "epoch": 0.3686030428769018, + "grad_norm": 6.855545997619629, + "learning_rate": 1.843015214384509e-05, + "log_odds_chosen": 1.7628200054168701, + "log_odds_ratio": -0.6212955713272095, + "logits/chosen": -0.6676622033119202, + "logits/rejected": -0.6422132849693298, + "logps/chosen": -0.1455618143081665, + "logps/rejected": -0.312483012676239, + "loss": 5.3469, + "nll_loss": 1.2745840549468994, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01455618068575859, + "rewards/margins": 0.01669212058186531, + "rewards/rejected": -0.03124830313026905, + "step": 533 + }, + { + "epoch": 0.36929460580912865, + "grad_norm": 3.9764904975891113, + "learning_rate": 1.8464730290456432e-05, + "log_odds_chosen": 1.620558500289917, + "log_odds_ratio": -0.552440881729126, + "logits/chosen": -1.1512162685394287, + "logits/rejected": -1.1916279792785645, + "logps/chosen": -0.2041110098361969, + "logps/rejected": -0.6735808253288269, + "loss": 4.6524, + "nll_loss": 1.1078450679779053, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.02041110023856163, + "rewards/margins": 0.04694698005914688, + "rewards/rejected": -0.06735808402299881, + "step": 534 + }, + { + "epoch": 0.3699861687413555, + "grad_norm": 3.739170551300049, + "learning_rate": 1.8499308437067774e-05, + "log_odds_chosen": 4.318858623504639, + "log_odds_ratio": -0.27778083086013794, + "logits/chosen": -0.8107898235321045, + "logits/rejected": -0.853278636932373, + "logps/chosen": -0.0777362585067749, + "logps/rejected": -0.883296012878418, + "loss": 4.6828, + "nll_loss": 1.1429297924041748, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007773626130074263, + "rewards/margins": 0.0805559754371643, + "rewards/rejected": -0.08832959830760956, + "step": 535 + }, + { + "epoch": 0.3706777316735823, + "grad_norm": 4.370072841644287, + "learning_rate": 1.8533886583679115e-05, + "log_odds_chosen": 3.689222574234009, + "log_odds_ratio": -0.17596468329429626, + "logits/chosen": -0.7374777793884277, + "logits/rejected": -0.8291355967521667, + "logps/chosen": -0.06306464970111847, + "logps/rejected": -0.7058030366897583, + "loss": 4.2286, + "nll_loss": 1.0395509004592896, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006306465249508619, + "rewards/margins": 0.06427383422851562, + "rewards/rejected": -0.07058030366897583, + "step": 536 + }, + { + "epoch": 0.37136929460580914, + "grad_norm": 5.0150017738342285, + "learning_rate": 1.8568464730290457e-05, + "log_odds_chosen": 2.186718463897705, + "log_odds_ratio": -0.4063662886619568, + "logits/chosen": -0.8127549886703491, + "logits/rejected": -0.8689401149749756, + "logps/chosen": -0.11313273012638092, + "logps/rejected": -0.39278122782707214, + "loss": 4.6602, + "nll_loss": 1.1244102716445923, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011313272640109062, + "rewards/margins": 0.027964850887656212, + "rewards/rejected": -0.039278123527765274, + "step": 537 + }, + { + "epoch": 0.37206085753803597, + "grad_norm": 5.019992828369141, + "learning_rate": 1.86030428769018e-05, + "log_odds_chosen": 1.2885156869888306, + "log_odds_ratio": -0.6232368350028992, + "logits/chosen": -0.7190303206443787, + "logits/rejected": -0.7032252550125122, + "logps/chosen": -0.13489317893981934, + "logps/rejected": -0.19020210206508636, + "loss": 4.7953, + "nll_loss": 1.136505126953125, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.013489319011569023, + "rewards/margins": 0.005530891008675098, + "rewards/rejected": -0.019020210951566696, + "step": 538 + }, + { + "epoch": 0.3727524204702628, + "grad_norm": 6.01793098449707, + "learning_rate": 1.863762102351314e-05, + "log_odds_chosen": 0.862777829170227, + "log_odds_ratio": -0.7624509334564209, + "logits/chosen": -0.5267829298973083, + "logits/rejected": -0.5604238510131836, + "logps/chosen": -0.264478474855423, + "logps/rejected": -0.33356383442878723, + "loss": 4.7699, + "nll_loss": 1.1162419319152832, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.026447847485542297, + "rewards/margins": 0.006908536888659, + "rewards/rejected": -0.03335638344287872, + "step": 539 + }, + { + "epoch": 0.37344398340248963, + "grad_norm": 5.9148030281066895, + "learning_rate": 1.867219917012448e-05, + "log_odds_chosen": 0.3767799139022827, + "log_odds_ratio": -0.5631717443466187, + "logits/chosen": -0.9739837646484375, + "logits/rejected": -1.014035940170288, + "logps/chosen": -0.13336896896362305, + "logps/rejected": -0.19003111124038696, + "loss": 6.9421, + "nll_loss": 1.679201602935791, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01333689596503973, + "rewards/margins": 0.005666215904057026, + "rewards/rejected": -0.019003111869096756, + "step": 540 + }, + { + "epoch": 0.37413554633471646, + "grad_norm": 7.738327503204346, + "learning_rate": 1.8706777316735823e-05, + "log_odds_chosen": 0.9757472276687622, + "log_odds_ratio": -1.0805482864379883, + "logits/chosen": -0.8048467040061951, + "logits/rejected": -0.8325028419494629, + "logps/chosen": -0.14648893475532532, + "logps/rejected": -0.49185705184936523, + "loss": 5.6322, + "nll_loss": 1.2999882698059082, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.014648893848061562, + "rewards/margins": 0.03453681245446205, + "rewards/rejected": -0.04918570816516876, + "step": 541 + }, + { + "epoch": 0.3748271092669433, + "grad_norm": 4.354764938354492, + "learning_rate": 1.8741355463347165e-05, + "log_odds_chosen": 0.5504599809646606, + "log_odds_ratio": -0.5843324661254883, + "logits/chosen": -0.6657752990722656, + "logits/rejected": -0.6720460653305054, + "logps/chosen": -0.17936018109321594, + "logps/rejected": -0.31276610493659973, + "loss": 4.3939, + "nll_loss": 1.040050983428955, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.017936021089553833, + "rewards/margins": 0.013340589590370655, + "rewards/rejected": -0.03127660974860191, + "step": 542 + }, + { + "epoch": 0.3755186721991701, + "grad_norm": 6.16148567199707, + "learning_rate": 1.8775933609958506e-05, + "log_odds_chosen": 2.187774658203125, + "log_odds_ratio": -0.4758215844631195, + "logits/chosen": -0.6338940858840942, + "logits/rejected": -0.6519688367843628, + "logps/chosen": -0.13961473107337952, + "logps/rejected": -0.42610496282577515, + "loss": 4.8044, + "nll_loss": 1.153525471687317, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.013961473479866982, + "rewards/margins": 0.028649020940065384, + "rewards/rejected": -0.042610496282577515, + "step": 543 + }, + { + "epoch": 0.37621023513139695, + "grad_norm": 4.357821941375732, + "learning_rate": 1.8810511756569848e-05, + "log_odds_chosen": 1.0460649728775024, + "log_odds_ratio": -0.49968603253364563, + "logits/chosen": -1.1150552034378052, + "logits/rejected": -1.1179910898208618, + "logps/chosen": -0.10702711343765259, + "logps/rejected": -0.2099991887807846, + "loss": 4.6849, + "nll_loss": 1.1212637424468994, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010702710598707199, + "rewards/margins": 0.010297207161784172, + "rewards/rejected": -0.02099991962313652, + "step": 544 + }, + { + "epoch": 0.3769017980636238, + "grad_norm": 3.573343276977539, + "learning_rate": 1.884508990318119e-05, + "log_odds_chosen": 0.6429002285003662, + "log_odds_ratio": -0.5074270963668823, + "logits/chosen": -0.8373913168907166, + "logits/rejected": -0.8756779432296753, + "logps/chosen": -0.18509633839130402, + "logps/rejected": -0.2932808995246887, + "loss": 4.0565, + "nll_loss": 0.9633736610412598, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0185096338391304, + "rewards/margins": 0.01081845723092556, + "rewards/rejected": -0.02932809107005596, + "step": 545 + }, + { + "epoch": 0.3775933609958506, + "grad_norm": 5.617794990539551, + "learning_rate": 1.887966804979253e-05, + "log_odds_chosen": 1.6232762336730957, + "log_odds_ratio": -0.6035907864570618, + "logits/chosen": -0.8689144849777222, + "logits/rejected": -0.8932918310165405, + "logps/chosen": -0.17928524315357208, + "logps/rejected": -0.34406036138534546, + "loss": 3.2081, + "nll_loss": 0.7416632175445557, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01792852394282818, + "rewards/margins": 0.016477510333061218, + "rewards/rejected": -0.034406036138534546, + "step": 546 + }, + { + "epoch": 0.37828492392807744, + "grad_norm": 4.233097076416016, + "learning_rate": 1.8914246196403872e-05, + "log_odds_chosen": 2.6939659118652344, + "log_odds_ratio": -0.30268973112106323, + "logits/chosen": -0.7411539554595947, + "logits/rejected": -0.764373242855072, + "logps/chosen": -0.1007307767868042, + "logps/rejected": -0.35213255882263184, + "loss": 4.1591, + "nll_loss": 1.0095144510269165, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01007307693362236, + "rewards/margins": 0.025140177458524704, + "rewards/rejected": -0.035213254392147064, + "step": 547 + }, + { + "epoch": 0.3789764868603043, + "grad_norm": 3.8841028213500977, + "learning_rate": 1.8948824343015214e-05, + "log_odds_chosen": 2.9004340171813965, + "log_odds_ratio": -0.23978981375694275, + "logits/chosen": -0.5224136114120483, + "logits/rejected": -0.525780439376831, + "logps/chosen": -0.09610556066036224, + "logps/rejected": -0.38277459144592285, + "loss": 4.026, + "nll_loss": 0.9825116395950317, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009610556997358799, + "rewards/margins": 0.028666902333498, + "rewards/rejected": -0.038277462124824524, + "step": 548 + }, + { + "epoch": 0.3796680497925311, + "grad_norm": 4.983913898468018, + "learning_rate": 1.8983402489626556e-05, + "log_odds_chosen": 2.435208559036255, + "log_odds_ratio": -0.41046613454818726, + "logits/chosen": -0.8081510066986084, + "logits/rejected": -0.792250394821167, + "logps/chosen": -0.0936388149857521, + "logps/rejected": -0.2591051161289215, + "loss": 4.3009, + "nll_loss": 1.0341734886169434, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.00936388224363327, + "rewards/margins": 0.01654663123190403, + "rewards/rejected": -0.02591051161289215, + "step": 549 + }, + { + "epoch": 0.38035961272475793, + "grad_norm": 3.430642604827881, + "learning_rate": 1.9017980636237897e-05, + "log_odds_chosen": 3.36209774017334, + "log_odds_ratio": -0.246946781873703, + "logits/chosen": -0.8941298723220825, + "logits/rejected": -0.9081979990005493, + "logps/chosen": -0.13310852646827698, + "logps/rejected": -0.429534375667572, + "loss": 3.6823, + "nll_loss": 0.8958902359008789, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.013310853391885757, + "rewards/margins": 0.029642587527632713, + "rewards/rejected": -0.04295343905687332, + "step": 550 + }, + { + "epoch": 0.38105117565698476, + "grad_norm": 3.9265270233154297, + "learning_rate": 1.905255878284924e-05, + "log_odds_chosen": 1.881701946258545, + "log_odds_ratio": -0.515370786190033, + "logits/chosen": -0.9915303587913513, + "logits/rejected": -0.9318628907203674, + "logps/chosen": -0.10293899476528168, + "logps/rejected": -0.3324930667877197, + "loss": 3.9277, + "nll_loss": 0.930385172367096, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010293899103999138, + "rewards/margins": 0.022955408319830894, + "rewards/rejected": -0.03324930742383003, + "step": 551 + }, + { + "epoch": 0.3817427385892116, + "grad_norm": 4.866530418395996, + "learning_rate": 1.908713692946058e-05, + "log_odds_chosen": 0.5543054342269897, + "log_odds_ratio": -0.7098829746246338, + "logits/chosen": -1.2122690677642822, + "logits/rejected": -1.1861469745635986, + "logps/chosen": -0.19430440664291382, + "logps/rejected": -0.2644846737384796, + "loss": 4.9296, + "nll_loss": 1.1614184379577637, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.01943044178187847, + "rewards/margins": 0.00701802596449852, + "rewards/rejected": -0.02644846774637699, + "step": 552 + }, + { + "epoch": 0.3824343015214384, + "grad_norm": 4.363667964935303, + "learning_rate": 1.9121715076071922e-05, + "log_odds_chosen": 1.6609487533569336, + "log_odds_ratio": -0.22528058290481567, + "logits/chosen": -1.2467082738876343, + "logits/rejected": -1.256028413772583, + "logps/chosen": -0.07459308207035065, + "logps/rejected": -0.304107129573822, + "loss": 4.7053, + "nll_loss": 1.153801679611206, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007459308486431837, + "rewards/margins": 0.022951405495405197, + "rewards/rejected": -0.03041071444749832, + "step": 553 + }, + { + "epoch": 0.3831258644536653, + "grad_norm": 4.976971626281738, + "learning_rate": 1.9156293222683267e-05, + "log_odds_chosen": 1.7708755731582642, + "log_odds_ratio": -0.37831786274909973, + "logits/chosen": -0.7834327220916748, + "logits/rejected": -0.7940959930419922, + "logps/chosen": -0.14502611756324768, + "logps/rejected": -0.36740654706954956, + "loss": 4.2815, + "nll_loss": 1.0325376987457275, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.014502611942589283, + "rewards/margins": 0.022238047793507576, + "rewards/rejected": -0.036740656942129135, + "step": 554 + }, + { + "epoch": 0.38381742738589214, + "grad_norm": 3.4731082916259766, + "learning_rate": 1.919087136929461e-05, + "log_odds_chosen": 2.0094597339630127, + "log_odds_ratio": -0.44111326336860657, + "logits/chosen": -0.7475765943527222, + "logits/rejected": -0.764420747756958, + "logps/chosen": -0.116146519780159, + "logps/rejected": -0.338163286447525, + "loss": 3.6618, + "nll_loss": 0.8713344931602478, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01161465235054493, + "rewards/margins": 0.022201674059033394, + "rewards/rejected": -0.03381632640957832, + "step": 555 + }, + { + "epoch": 0.38450899031811897, + "grad_norm": 5.620976448059082, + "learning_rate": 1.922544951590595e-05, + "log_odds_chosen": 0.9785915017127991, + "log_odds_ratio": -0.5120671391487122, + "logits/chosen": -1.029029130935669, + "logits/rejected": -1.0345444679260254, + "logps/chosen": -0.14693522453308105, + "logps/rejected": -0.3012365996837616, + "loss": 4.3674, + "nll_loss": 1.040635108947754, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014693522825837135, + "rewards/margins": 0.015430137515068054, + "rewards/rejected": -0.03012366034090519, + "step": 556 + }, + { + "epoch": 0.3852005532503458, + "grad_norm": 6.698596477508545, + "learning_rate": 1.926002766251729e-05, + "log_odds_chosen": 1.5996103286743164, + "log_odds_ratio": -0.8160156607627869, + "logits/chosen": -0.6407162547111511, + "logits/rejected": -0.6249457001686096, + "logps/chosen": -0.18899376690387726, + "logps/rejected": -0.3072316646575928, + "loss": 3.864, + "nll_loss": 0.8844013214111328, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.018899379298090935, + "rewards/margins": 0.011823788285255432, + "rewards/rejected": -0.030723167583346367, + "step": 557 + }, + { + "epoch": 0.38589211618257263, + "grad_norm": 6.878204345703125, + "learning_rate": 1.9294605809128633e-05, + "log_odds_chosen": 2.2048490047454834, + "log_odds_ratio": -0.3432249128818512, + "logits/chosen": -0.7492395639419556, + "logits/rejected": -0.8243151307106018, + "logps/chosen": -0.0980885699391365, + "logps/rejected": -0.33490556478500366, + "loss": 5.5948, + "nll_loss": 1.364375114440918, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00980885699391365, + "rewards/margins": 0.023681702092289925, + "rewards/rejected": -0.033490557223558426, + "step": 558 + }, + { + "epoch": 0.38658367911479946, + "grad_norm": 4.561729907989502, + "learning_rate": 1.9329183955739975e-05, + "log_odds_chosen": 2.502473831176758, + "log_odds_ratio": -0.32938480377197266, + "logits/chosen": -0.917141318321228, + "logits/rejected": -0.9629212021827698, + "logps/chosen": -0.13379818201065063, + "logps/rejected": -0.3232942819595337, + "loss": 4.1279, + "nll_loss": 0.9990299940109253, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.013379817828536034, + "rewards/margins": 0.018949609249830246, + "rewards/rejected": -0.03232942894101143, + "step": 559 + }, + { + "epoch": 0.3872752420470263, + "grad_norm": 8.337691307067871, + "learning_rate": 1.9363762102351316e-05, + "log_odds_chosen": 2.1198501586914062, + "log_odds_ratio": -0.4438222646713257, + "logits/chosen": -0.7641869187355042, + "logits/rejected": -0.7751225233078003, + "logps/chosen": -0.14018933475017548, + "logps/rejected": -0.5707254409790039, + "loss": 5.6817, + "nll_loss": 1.3760305643081665, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014018935151398182, + "rewards/margins": 0.043053604662418365, + "rewards/rejected": -0.05707254260778427, + "step": 560 + }, + { + "epoch": 0.3879668049792531, + "grad_norm": 4.920252323150635, + "learning_rate": 1.9398340248962658e-05, + "log_odds_chosen": 0.6740873456001282, + "log_odds_ratio": -0.5108063220977783, + "logits/chosen": -0.8343955278396606, + "logits/rejected": -0.8382387161254883, + "logps/chosen": -0.13594703376293182, + "logps/rejected": -0.21453788876533508, + "loss": 4.8277, + "nll_loss": 1.1558473110198975, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013594703748822212, + "rewards/margins": 0.007859084755182266, + "rewards/rejected": -0.02145378850400448, + "step": 561 + }, + { + "epoch": 0.38865836791147995, + "grad_norm": 6.755950927734375, + "learning_rate": 1.9432918395574e-05, + "log_odds_chosen": 2.570904016494751, + "log_odds_ratio": -0.3146067261695862, + "logits/chosen": -0.9997011423110962, + "logits/rejected": -1.0078551769256592, + "logps/chosen": -0.12847094237804413, + "logps/rejected": -0.4903247654438019, + "loss": 5.5276, + "nll_loss": 1.350435495376587, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012847093865275383, + "rewards/margins": 0.036185383796691895, + "rewards/rejected": -0.04903247952461243, + "step": 562 + }, + { + "epoch": 0.3893499308437068, + "grad_norm": 5.547338962554932, + "learning_rate": 1.946749654218534e-05, + "log_odds_chosen": 0.8543901443481445, + "log_odds_ratio": -0.570191502571106, + "logits/chosen": -0.7356718182563782, + "logits/rejected": -0.7660771012306213, + "logps/chosen": -0.19542016088962555, + "logps/rejected": -0.4686131477355957, + "loss": 5.2079, + "nll_loss": 1.2449650764465332, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.019542016088962555, + "rewards/margins": 0.027319299057126045, + "rewards/rejected": -0.04686131328344345, + "step": 563 + }, + { + "epoch": 0.3900414937759336, + "grad_norm": 5.287246227264404, + "learning_rate": 1.9502074688796682e-05, + "log_odds_chosen": 2.0964884757995605, + "log_odds_ratio": -0.5565468668937683, + "logits/chosen": -0.9580271244049072, + "logits/rejected": -0.9527782797813416, + "logps/chosen": -0.08842720836400986, + "logps/rejected": -0.23043900728225708, + "loss": 3.9914, + "nll_loss": 0.9421975612640381, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008842721581459045, + "rewards/margins": 0.014201181009411812, + "rewards/rejected": -0.023043902590870857, + "step": 564 + }, + { + "epoch": 0.39073305670816044, + "grad_norm": 8.411381721496582, + "learning_rate": 1.9536652835408024e-05, + "log_odds_chosen": 1.5264617204666138, + "log_odds_ratio": -0.4682188034057617, + "logits/chosen": -0.8114876747131348, + "logits/rejected": -0.830116868019104, + "logps/chosen": -0.31981879472732544, + "logps/rejected": -0.5894278883934021, + "loss": 7.672, + "nll_loss": 1.8711817264556885, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.031981877982616425, + "rewards/margins": 0.026960909366607666, + "rewards/rejected": -0.05894278734922409, + "step": 565 + }, + { + "epoch": 0.3914246196403873, + "grad_norm": 9.792768478393555, + "learning_rate": 1.9571230982019366e-05, + "log_odds_chosen": 1.3716531991958618, + "log_odds_ratio": -0.8037305474281311, + "logits/chosen": -0.8083940744400024, + "logits/rejected": -0.8525917530059814, + "logps/chosen": -0.10131937265396118, + "logps/rejected": -0.37159010767936707, + "loss": 4.41, + "nll_loss": 1.0221236944198608, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.010131937451660633, + "rewards/margins": 0.0270270723849535, + "rewards/rejected": -0.03715901076793671, + "step": 566 + }, + { + "epoch": 0.3921161825726141, + "grad_norm": 4.492656707763672, + "learning_rate": 1.9605809128630707e-05, + "log_odds_chosen": 3.694186210632324, + "log_odds_ratio": -0.20484068989753723, + "logits/chosen": -0.467551052570343, + "logits/rejected": -0.4827098250389099, + "logps/chosen": -0.07522110641002655, + "logps/rejected": -0.4975152909755707, + "loss": 3.9541, + "nll_loss": 0.9680354595184326, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00752211082726717, + "rewards/margins": 0.04222942143678665, + "rewards/rejected": -0.049751535058021545, + "step": 567 + }, + { + "epoch": 0.39280774550484093, + "grad_norm": 3.082396984100342, + "learning_rate": 1.964038727524205e-05, + "log_odds_chosen": 1.8576481342315674, + "log_odds_ratio": -0.3983916640281677, + "logits/chosen": -0.9992802739143372, + "logits/rejected": -1.008885383605957, + "logps/chosen": -0.10009613633155823, + "logps/rejected": -0.3305109143257141, + "loss": 3.2665, + "nll_loss": 0.7767845392227173, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010009613819420338, + "rewards/margins": 0.023041479289531708, + "rewards/rejected": -0.03305109590291977, + "step": 568 + }, + { + "epoch": 0.39349930843706776, + "grad_norm": 5.877315998077393, + "learning_rate": 1.967496542185339e-05, + "log_odds_chosen": 2.1820318698883057, + "log_odds_ratio": -0.2573472261428833, + "logits/chosen": -1.050216794013977, + "logits/rejected": -1.063097596168518, + "logps/chosen": -0.08402914553880692, + "logps/rejected": -0.45616158843040466, + "loss": 5.5968, + "nll_loss": 1.3734532594680786, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008402915671467781, + "rewards/margins": 0.037213243544101715, + "rewards/rejected": -0.04561615735292435, + "step": 569 + }, + { + "epoch": 0.3941908713692946, + "grad_norm": 3.782557487487793, + "learning_rate": 1.9709543568464732e-05, + "log_odds_chosen": 1.2247357368469238, + "log_odds_ratio": -0.5550665259361267, + "logits/chosen": -0.9844390153884888, + "logits/rejected": -0.9861627221107483, + "logps/chosen": -0.13897764682769775, + "logps/rejected": -0.3307046592235565, + "loss": 4.0865, + "nll_loss": 0.9661211967468262, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01389776449650526, + "rewards/margins": 0.019172698259353638, + "rewards/rejected": -0.03307046368718147, + "step": 570 + }, + { + "epoch": 0.3948824343015214, + "grad_norm": 4.290762901306152, + "learning_rate": 1.9744121715076073e-05, + "log_odds_chosen": 1.3584752082824707, + "log_odds_ratio": -0.4768679738044739, + "logits/chosen": -0.7764488458633423, + "logits/rejected": -0.7982219457626343, + "logps/chosen": -0.1681971698999405, + "logps/rejected": -0.33647996187210083, + "loss": 4.5738, + "nll_loss": 1.0957714319229126, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01681971736252308, + "rewards/margins": 0.016828283667564392, + "rewards/rejected": -0.03364799916744232, + "step": 571 + }, + { + "epoch": 0.39557399723374825, + "grad_norm": 4.797642230987549, + "learning_rate": 1.9778699861687415e-05, + "log_odds_chosen": 1.5044949054718018, + "log_odds_ratio": -0.3889736235141754, + "logits/chosen": -0.4714691638946533, + "logits/rejected": -0.5175361633300781, + "logps/chosen": -0.20814576745033264, + "logps/rejected": -0.6175678968429565, + "loss": 3.5804, + "nll_loss": 0.8561970591545105, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.020814577117562294, + "rewards/margins": 0.04094220697879791, + "rewards/rejected": -0.061756789684295654, + "step": 572 + }, + { + "epoch": 0.3962655601659751, + "grad_norm": 6.371168613433838, + "learning_rate": 1.9813278008298757e-05, + "log_odds_chosen": 1.5652942657470703, + "log_odds_ratio": -0.6328588724136353, + "logits/chosen": -0.832341730594635, + "logits/rejected": -0.7894372940063477, + "logps/chosen": -0.07531416416168213, + "logps/rejected": -0.16684159636497498, + "loss": 3.9637, + "nll_loss": 0.9276465177536011, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007531417068094015, + "rewards/margins": 0.009152742102742195, + "rewards/rejected": -0.016684159636497498, + "step": 573 + }, + { + "epoch": 0.3969571230982019, + "grad_norm": 2.120250940322876, + "learning_rate": 1.9847856154910098e-05, + "log_odds_chosen": 3.8697755336761475, + "log_odds_ratio": -0.16896286606788635, + "logits/chosen": -1.1625442504882812, + "logits/rejected": -1.2002800703048706, + "logps/chosen": -0.06508542597293854, + "logps/rejected": -0.5326133966445923, + "loss": 2.7737, + "nll_loss": 0.6765269637107849, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006508542690426111, + "rewards/margins": 0.046752795577049255, + "rewards/rejected": -0.05326133966445923, + "step": 574 + }, + { + "epoch": 0.39764868603042874, + "grad_norm": 5.065700531005859, + "learning_rate": 1.988243430152144e-05, + "log_odds_chosen": 1.8119397163391113, + "log_odds_ratio": -0.33278965950012207, + "logits/chosen": -0.9758837223052979, + "logits/rejected": -0.9816950559616089, + "logps/chosen": -0.1355278044939041, + "logps/rejected": -0.3939824402332306, + "loss": 5.7527, + "nll_loss": 1.404889464378357, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.013552782125771046, + "rewards/margins": 0.025845464318990707, + "rewards/rejected": -0.03939824551343918, + "step": 575 + }, + { + "epoch": 0.3983402489626556, + "grad_norm": 5.197612762451172, + "learning_rate": 1.991701244813278e-05, + "log_odds_chosen": 2.7448010444641113, + "log_odds_ratio": -0.3634113669395447, + "logits/chosen": -1.033627986907959, + "logits/rejected": -1.049392580986023, + "logps/chosen": -0.10442063957452774, + "logps/rejected": -0.6225466728210449, + "loss": 4.248, + "nll_loss": 1.025671362876892, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010442064143717289, + "rewards/margins": 0.05181260406970978, + "rewards/rejected": -0.06225466728210449, + "step": 576 + }, + { + "epoch": 0.39903181189488246, + "grad_norm": 4.7536115646362305, + "learning_rate": 1.9951590594744123e-05, + "log_odds_chosen": 2.725403070449829, + "log_odds_ratio": -0.37399032711982727, + "logits/chosen": -1.0591670274734497, + "logits/rejected": -1.117004632949829, + "logps/chosen": -0.07127417623996735, + "logps/rejected": -0.3425430655479431, + "loss": 3.4978, + "nll_loss": 0.8370423316955566, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007127417717128992, + "rewards/margins": 0.027126889675855637, + "rewards/rejected": -0.03425430506467819, + "step": 577 + }, + { + "epoch": 0.3997233748271093, + "grad_norm": 4.59316873550415, + "learning_rate": 1.9986168741355464e-05, + "log_odds_chosen": 1.6278140544891357, + "log_odds_ratio": -0.3864939510822296, + "logits/chosen": -1.1396081447601318, + "logits/rejected": -1.1573009490966797, + "logps/chosen": -0.12526187300682068, + "logps/rejected": -0.4201142191886902, + "loss": 4.8807, + "nll_loss": 1.1815369129180908, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.012526188045740128, + "rewards/margins": 0.02948523312807083, + "rewards/rejected": -0.04201142117381096, + "step": 578 + }, + { + "epoch": 0.4004149377593361, + "grad_norm": 7.344080924987793, + "learning_rate": 2.0020746887966806e-05, + "log_odds_chosen": 3.3043441772460938, + "log_odds_ratio": -0.28534916043281555, + "logits/chosen": -0.6253362894058228, + "logits/rejected": -0.6439019441604614, + "logps/chosen": -0.10288303345441818, + "logps/rejected": -0.6352183818817139, + "loss": 4.3995, + "nll_loss": 1.0713284015655518, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010288302786648273, + "rewards/margins": 0.05323353409767151, + "rewards/rejected": -0.0635218396782875, + "step": 579 + }, + { + "epoch": 0.40110650069156295, + "grad_norm": 5.617142200469971, + "learning_rate": 2.0055325034578147e-05, + "log_odds_chosen": 1.7103029489517212, + "log_odds_ratio": -0.7923794984817505, + "logits/chosen": -0.8352532386779785, + "logits/rejected": -0.8463157415390015, + "logps/chosen": -0.16378286480903625, + "logps/rejected": -0.38926514983177185, + "loss": 4.3002, + "nll_loss": 0.9958136677742004, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.016378287225961685, + "rewards/margins": 0.02254822477698326, + "rewards/rejected": -0.038926515728235245, + "step": 580 + }, + { + "epoch": 0.4017980636237898, + "grad_norm": 3.304642677307129, + "learning_rate": 2.008990318118949e-05, + "log_odds_chosen": 3.5990259647369385, + "log_odds_ratio": -0.14686137437820435, + "logits/chosen": -0.756050705909729, + "logits/rejected": -0.8004224300384521, + "logps/chosen": -0.06934195756912231, + "logps/rejected": -0.5959687829017639, + "loss": 3.3677, + "nll_loss": 0.8272408843040466, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0069341957569122314, + "rewards/margins": 0.0526626855134964, + "rewards/rejected": -0.05959688127040863, + "step": 581 + }, + { + "epoch": 0.4024896265560166, + "grad_norm": 4.192474842071533, + "learning_rate": 2.012448132780083e-05, + "log_odds_chosen": 1.6783723831176758, + "log_odds_ratio": -0.3463253378868103, + "logits/chosen": -0.640341579914093, + "logits/rejected": -0.6563262939453125, + "logps/chosen": -0.18390527367591858, + "logps/rejected": -0.6369175314903259, + "loss": 4.1321, + "nll_loss": 0.9983953833580017, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.018390528857707977, + "rewards/margins": 0.045301225036382675, + "rewards/rejected": -0.06369175761938095, + "step": 582 + }, + { + "epoch": 0.40318118948824344, + "grad_norm": 3.8522253036499023, + "learning_rate": 2.0159059474412172e-05, + "log_odds_chosen": 0.8378918170928955, + "log_odds_ratio": -0.5052666664123535, + "logits/chosen": -0.7618661522865295, + "logits/rejected": -0.7571154236793518, + "logps/chosen": -0.1491318643093109, + "logps/rejected": -0.29800164699554443, + "loss": 4.1378, + "nll_loss": 0.9839212894439697, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01491318829357624, + "rewards/margins": 0.014886979945003986, + "rewards/rejected": -0.029800167307257652, + "step": 583 + }, + { + "epoch": 0.40387275242047027, + "grad_norm": 4.542193412780762, + "learning_rate": 2.0193637621023514e-05, + "log_odds_chosen": 2.0368504524230957, + "log_odds_ratio": -0.3491406738758087, + "logits/chosen": -0.9571303129196167, + "logits/rejected": -1.0110360383987427, + "logps/chosen": -0.1318981647491455, + "logps/rejected": -0.48875343799591064, + "loss": 4.4299, + "nll_loss": 1.0725702047348022, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01318981684744358, + "rewards/margins": 0.03568553179502487, + "rewards/rejected": -0.048875343054533005, + "step": 584 + }, + { + "epoch": 0.4045643153526971, + "grad_norm": 4.351795673370361, + "learning_rate": 2.0228215767634855e-05, + "log_odds_chosen": 1.6439603567123413, + "log_odds_ratio": -0.4035801887512207, + "logits/chosen": -0.8380446434020996, + "logits/rejected": -0.9095944762229919, + "logps/chosen": -0.1933341771364212, + "logps/rejected": -0.36412349343299866, + "loss": 4.176, + "nll_loss": 1.003645896911621, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01933341845870018, + "rewards/margins": 0.017078930512070656, + "rewards/rejected": -0.036412350833415985, + "step": 585 + }, + { + "epoch": 0.40525587828492393, + "grad_norm": 6.966506004333496, + "learning_rate": 2.0262793914246197e-05, + "log_odds_chosen": 4.041241645812988, + "log_odds_ratio": -0.7675859928131104, + "logits/chosen": -0.9329093098640442, + "logits/rejected": -0.9795863628387451, + "logps/chosen": -0.1338355988264084, + "logps/rejected": -0.6674110889434814, + "loss": 4.2325, + "nll_loss": 0.9813593626022339, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013383558951318264, + "rewards/margins": 0.053357549011707306, + "rewards/rejected": -0.06674110889434814, + "step": 586 + }, + { + "epoch": 0.40594744121715076, + "grad_norm": 4.690940856933594, + "learning_rate": 2.029737206085754e-05, + "log_odds_chosen": 0.8209674954414368, + "log_odds_ratio": -0.5149362683296204, + "logits/chosen": -1.1141588687896729, + "logits/rejected": -1.1204617023468018, + "logps/chosen": -0.2986541986465454, + "logps/rejected": -0.5440924167633057, + "loss": 5.1435, + "nll_loss": 1.2343727350234985, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.02986541949212551, + "rewards/margins": 0.024543821811676025, + "rewards/rejected": -0.054409243166446686, + "step": 587 + }, + { + "epoch": 0.4066390041493776, + "grad_norm": 4.465209007263184, + "learning_rate": 2.033195020746888e-05, + "log_odds_chosen": 2.6052603721618652, + "log_odds_ratio": -0.5808659791946411, + "logits/chosen": -0.8501052856445312, + "logits/rejected": -0.8835092782974243, + "logps/chosen": -0.2466723620891571, + "logps/rejected": -0.7465604543685913, + "loss": 4.3399, + "nll_loss": 1.026882290840149, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.02466723695397377, + "rewards/margins": 0.04998881369829178, + "rewards/rejected": -0.07465604692697525, + "step": 588 + }, + { + "epoch": 0.4073305670816044, + "grad_norm": 4.006109714508057, + "learning_rate": 2.036652835408022e-05, + "log_odds_chosen": 1.892028570175171, + "log_odds_ratio": -0.562629759311676, + "logits/chosen": -0.8217758536338806, + "logits/rejected": -0.8682578802108765, + "logps/chosen": -0.20783497393131256, + "logps/rejected": -0.3692111372947693, + "loss": 3.471, + "nll_loss": 0.8114974498748779, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.020783497020602226, + "rewards/margins": 0.016137616708874702, + "rewards/rejected": -0.03692111372947693, + "step": 589 + }, + { + "epoch": 0.40802213001383125, + "grad_norm": 3.260128974914551, + "learning_rate": 2.0401106500691563e-05, + "log_odds_chosen": 3.6313135623931885, + "log_odds_ratio": -0.28211459517478943, + "logits/chosen": -0.6204289793968201, + "logits/rejected": -0.6435818672180176, + "logps/chosen": -0.13274721801280975, + "logps/rejected": -0.507023811340332, + "loss": 4.3703, + "nll_loss": 1.0643757581710815, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0132747208699584, + "rewards/margins": 0.037427663803100586, + "rewards/rejected": -0.05070238560438156, + "step": 590 + }, + { + "epoch": 0.4087136929460581, + "grad_norm": 3.4640462398529053, + "learning_rate": 2.0435684647302905e-05, + "log_odds_chosen": 2.581511974334717, + "log_odds_ratio": -0.2007879614830017, + "logits/chosen": -1.0353610515594482, + "logits/rejected": -0.992326021194458, + "logps/chosen": -0.08307419717311859, + "logps/rejected": -0.6447066068649292, + "loss": 3.3652, + "nll_loss": 0.8212136030197144, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008307419717311859, + "rewards/margins": 0.0561632364988327, + "rewards/rejected": -0.06447066366672516, + "step": 591 + }, + { + "epoch": 0.4094052558782849, + "grad_norm": 3.2936601638793945, + "learning_rate": 2.0470262793914246e-05, + "log_odds_chosen": 5.063530921936035, + "log_odds_ratio": -0.11747785657644272, + "logits/chosen": -0.9911887645721436, + "logits/rejected": -0.9868468046188354, + "logps/chosen": -0.03351970762014389, + "logps/rejected": -0.7749127149581909, + "loss": 4.5265, + "nll_loss": 1.1198837757110596, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003351970575749874, + "rewards/margins": 0.0741392970085144, + "rewards/rejected": -0.07749126851558685, + "step": 592 + }, + { + "epoch": 0.41009681881051174, + "grad_norm": 4.080872058868408, + "learning_rate": 2.0504840940525588e-05, + "log_odds_chosen": 1.4647037982940674, + "log_odds_ratio": -0.47586244344711304, + "logits/chosen": -0.7562670707702637, + "logits/rejected": -0.7988969087600708, + "logps/chosen": -0.07946252077817917, + "logps/rejected": -0.2722882926464081, + "loss": 3.7475, + "nll_loss": 0.8892887234687805, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007946252822875977, + "rewards/margins": 0.01928257942199707, + "rewards/rejected": -0.027228832244873047, + "step": 593 + }, + { + "epoch": 0.4107883817427386, + "grad_norm": 2.9525630474090576, + "learning_rate": 2.053941908713693e-05, + "log_odds_chosen": 3.360999584197998, + "log_odds_ratio": -0.30566781759262085, + "logits/chosen": -0.8931617140769958, + "logits/rejected": -0.946820855140686, + "logps/chosen": -0.07567352056503296, + "logps/rejected": -0.4830871820449829, + "loss": 3.7883, + "nll_loss": 0.9165017604827881, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007567352615296841, + "rewards/margins": 0.040741365402936935, + "rewards/rejected": -0.04830871894955635, + "step": 594 + }, + { + "epoch": 0.4114799446749654, + "grad_norm": 3.956282377243042, + "learning_rate": 2.057399723374827e-05, + "log_odds_chosen": 1.7785038948059082, + "log_odds_ratio": -0.4785193204879761, + "logits/chosen": -1.0089384317398071, + "logits/rejected": -0.9966273307800293, + "logps/chosen": -0.1856726109981537, + "logps/rejected": -0.36326441168785095, + "loss": 4.9856, + "nll_loss": 1.1985530853271484, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.018567262217402458, + "rewards/margins": 0.017759177833795547, + "rewards/rejected": -0.036326438188552856, + "step": 595 + }, + { + "epoch": 0.41217150760719223, + "grad_norm": 2.674511194229126, + "learning_rate": 2.0608575380359612e-05, + "log_odds_chosen": 3.8712658882141113, + "log_odds_ratio": -0.21342608332633972, + "logits/chosen": -0.9700406789779663, + "logits/rejected": -0.952599287033081, + "logps/chosen": -0.08244549483060837, + "logps/rejected": -0.7506368160247803, + "loss": 3.3317, + "nll_loss": 0.811578094959259, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008244549855589867, + "rewards/margins": 0.06681913137435913, + "rewards/rejected": -0.07506367564201355, + "step": 596 + }, + { + "epoch": 0.41286307053941906, + "grad_norm": 3.9791877269744873, + "learning_rate": 2.0643153526970954e-05, + "log_odds_chosen": 3.953857421875, + "log_odds_ratio": -0.22624865174293518, + "logits/chosen": -1.003037452697754, + "logits/rejected": -1.02415931224823, + "logps/chosen": -0.0745580866932869, + "logps/rejected": -0.60592120885849, + "loss": 4.6389, + "nll_loss": 1.137102484703064, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007455809041857719, + "rewards/margins": 0.05313631147146225, + "rewards/rejected": -0.06059212237596512, + "step": 597 + }, + { + "epoch": 0.4135546334716459, + "grad_norm": 4.131697177886963, + "learning_rate": 2.0677731673582296e-05, + "log_odds_chosen": 1.4341011047363281, + "log_odds_ratio": -0.36636972427368164, + "logits/chosen": -0.6704150438308716, + "logits/rejected": -0.6725776195526123, + "logps/chosen": -0.15876276791095734, + "logps/rejected": -0.4130246639251709, + "loss": 4.1232, + "nll_loss": 0.9941583871841431, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.015876278281211853, + "rewards/margins": 0.025426192209124565, + "rewards/rejected": -0.04130247235298157, + "step": 598 + }, + { + "epoch": 0.4142461964038728, + "grad_norm": 3.8321592807769775, + "learning_rate": 2.071230982019364e-05, + "log_odds_chosen": 2.999331474304199, + "log_odds_ratio": -0.2566238045692444, + "logits/chosen": -0.7986801862716675, + "logits/rejected": -0.8267126679420471, + "logps/chosen": -0.15586483478546143, + "logps/rejected": -0.5835660696029663, + "loss": 3.7385, + "nll_loss": 0.9089583158493042, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.015586483292281628, + "rewards/margins": 0.04277012497186661, + "rewards/rejected": -0.05835660919547081, + "step": 599 + }, + { + "epoch": 0.4149377593360996, + "grad_norm": 9.953721046447754, + "learning_rate": 2.0746887966804982e-05, + "log_odds_chosen": 2.450714588165283, + "log_odds_ratio": -0.8266931176185608, + "logits/chosen": -1.1641135215759277, + "logits/rejected": -1.2072113752365112, + "logps/chosen": -0.1697208136320114, + "logps/rejected": -0.699099600315094, + "loss": 4.7114, + "nll_loss": 1.095177412033081, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01697208173573017, + "rewards/margins": 0.05293788015842438, + "rewards/rejected": -0.0699099600315094, + "step": 600 + }, + { + "epoch": 0.41562932226832644, + "grad_norm": 4.421914100646973, + "learning_rate": 2.0781466113416324e-05, + "log_odds_chosen": 1.5719330310821533, + "log_odds_ratio": -0.40029793977737427, + "logits/chosen": -0.844862699508667, + "logits/rejected": -0.8587499856948853, + "logps/chosen": -0.20900601148605347, + "logps/rejected": -0.47964876890182495, + "loss": 4.1449, + "nll_loss": 0.9961846470832825, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.020900603383779526, + "rewards/margins": 0.02706427499651909, + "rewards/rejected": -0.047964878380298615, + "step": 601 + }, + { + "epoch": 0.41632088520055327, + "grad_norm": 7.170130252838135, + "learning_rate": 2.0816044260027665e-05, + "log_odds_chosen": 2.6125829219818115, + "log_odds_ratio": -0.6497005224227905, + "logits/chosen": -0.9419523477554321, + "logits/rejected": -0.9898269176483154, + "logps/chosen": -0.13422423601150513, + "logps/rejected": -0.6038023829460144, + "loss": 3.9398, + "nll_loss": 0.9199838638305664, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013422423973679543, + "rewards/margins": 0.04695781320333481, + "rewards/rejected": -0.0603802390396595, + "step": 602 + }, + { + "epoch": 0.4170124481327801, + "grad_norm": 7.772970199584961, + "learning_rate": 2.0850622406639007e-05, + "log_odds_chosen": 3.680398464202881, + "log_odds_ratio": -0.5544787049293518, + "logits/chosen": -0.5327585339546204, + "logits/rejected": -0.5497394800186157, + "logps/chosen": -0.07278777658939362, + "logps/rejected": -0.6879750490188599, + "loss": 4.1216, + "nll_loss": 0.9749466180801392, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007278777658939362, + "rewards/margins": 0.06151873245835304, + "rewards/rejected": -0.0687975138425827, + "step": 603 + }, + { + "epoch": 0.41770401106500693, + "grad_norm": 4.757108688354492, + "learning_rate": 2.088520055325035e-05, + "log_odds_chosen": 2.0446386337280273, + "log_odds_ratio": -0.5148335695266724, + "logits/chosen": -0.6910528540611267, + "logits/rejected": -0.6742294430732727, + "logps/chosen": -0.14390459656715393, + "logps/rejected": -0.4189976751804352, + "loss": 5.4156, + "nll_loss": 1.3024276494979858, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014390461146831512, + "rewards/margins": 0.027509307488799095, + "rewards/rejected": -0.04189977049827576, + "step": 604 + }, + { + "epoch": 0.41839557399723376, + "grad_norm": 8.23976993560791, + "learning_rate": 2.091977869986169e-05, + "log_odds_chosen": 1.552129864692688, + "log_odds_ratio": -0.6863776445388794, + "logits/chosen": -0.557546854019165, + "logits/rejected": -0.6108566522598267, + "logps/chosen": -0.21034224331378937, + "logps/rejected": -0.514655590057373, + "loss": 6.1186, + "nll_loss": 1.4610066413879395, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.021034223958849907, + "rewards/margins": 0.030431339517235756, + "rewards/rejected": -0.05146556347608566, + "step": 605 + }, + { + "epoch": 0.4190871369294606, + "grad_norm": 3.723231077194214, + "learning_rate": 2.095435684647303e-05, + "log_odds_chosen": 1.9156570434570312, + "log_odds_ratio": -0.49034303426742554, + "logits/chosen": -1.0356037616729736, + "logits/rejected": -1.0161478519439697, + "logps/chosen": -0.10302520543336868, + "logps/rejected": -0.24236340820789337, + "loss": 4.8403, + "nll_loss": 1.1610405445098877, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.010302520357072353, + "rewards/margins": 0.013933821581304073, + "rewards/rejected": -0.024236343801021576, + "step": 606 + }, + { + "epoch": 0.4197786998616874, + "grad_norm": 7.568348407745361, + "learning_rate": 2.0988934993084373e-05, + "log_odds_chosen": 2.4463589191436768, + "log_odds_ratio": -0.4542555809020996, + "logits/chosen": -0.4883726239204407, + "logits/rejected": -0.5257099270820618, + "logps/chosen": -0.10223409533500671, + "logps/rejected": -0.5903890132904053, + "loss": 4.2713, + "nll_loss": 1.0224037170410156, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.010223409160971642, + "rewards/margins": 0.04881549999117851, + "rewards/rejected": -0.059038907289505005, + "step": 607 + }, + { + "epoch": 0.42047026279391425, + "grad_norm": 7.222330570220947, + "learning_rate": 2.1023513139695715e-05, + "log_odds_chosen": 1.1048989295959473, + "log_odds_ratio": -0.7384801506996155, + "logits/chosen": -0.7793760299682617, + "logits/rejected": -0.7979192733764648, + "logps/chosen": -0.16994166374206543, + "logps/rejected": -0.4164144992828369, + "loss": 5.7197, + "nll_loss": 1.3560773134231567, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.016994165256619453, + "rewards/margins": 0.024647288024425507, + "rewards/rejected": -0.04164145141839981, + "step": 608 + }, + { + "epoch": 0.4211618257261411, + "grad_norm": 5.54396915435791, + "learning_rate": 2.1058091286307056e-05, + "log_odds_chosen": 2.350543975830078, + "log_odds_ratio": -0.32651910185813904, + "logits/chosen": -0.6226840615272522, + "logits/rejected": -0.6696463227272034, + "logps/chosen": -0.14187708497047424, + "logps/rejected": -0.6169252395629883, + "loss": 5.5046, + "nll_loss": 1.343508005142212, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01418770756572485, + "rewards/margins": 0.047504812479019165, + "rewards/rejected": -0.06169252097606659, + "step": 609 + }, + { + "epoch": 0.4218533886583679, + "grad_norm": 7.533912181854248, + "learning_rate": 2.1092669432918398e-05, + "log_odds_chosen": 3.0908732414245605, + "log_odds_ratio": -0.41519248485565186, + "logits/chosen": -0.8241167068481445, + "logits/rejected": -0.9262205958366394, + "logps/chosen": -0.10551971942186356, + "logps/rejected": -0.5151045322418213, + "loss": 3.5644, + "nll_loss": 0.8495787382125854, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010551970452070236, + "rewards/margins": 0.04095848649740219, + "rewards/rejected": -0.05151045694947243, + "step": 610 + }, + { + "epoch": 0.42254495159059474, + "grad_norm": 3.861281633377075, + "learning_rate": 2.112724757952974e-05, + "log_odds_chosen": 2.33772349357605, + "log_odds_ratio": -0.4349439740180969, + "logits/chosen": -0.7974127531051636, + "logits/rejected": -0.836585283279419, + "logps/chosen": -0.11797378957271576, + "logps/rejected": -0.3172440528869629, + "loss": 3.3181, + "nll_loss": 0.7860289812088013, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.011797377839684486, + "rewards/margins": 0.019927026703953743, + "rewards/rejected": -0.03172440454363823, + "step": 611 + }, + { + "epoch": 0.42323651452282157, + "grad_norm": 5.57545280456543, + "learning_rate": 2.116182572614108e-05, + "log_odds_chosen": 1.7101471424102783, + "log_odds_ratio": -0.4371544122695923, + "logits/chosen": -0.9524636268615723, + "logits/rejected": -0.9920735359191895, + "logps/chosen": -0.15005937218666077, + "logps/rejected": -0.39813879132270813, + "loss": 5.6735, + "nll_loss": 1.374664306640625, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.015005936846137047, + "rewards/margins": 0.024807943031191826, + "rewards/rejected": -0.03981388360261917, + "step": 612 + }, + { + "epoch": 0.4239280774550484, + "grad_norm": 5.678771495819092, + "learning_rate": 2.1196403872752422e-05, + "log_odds_chosen": 0.9636862277984619, + "log_odds_ratio": -0.9654866456985474, + "logits/chosen": -0.7794173955917358, + "logits/rejected": -0.7428369522094727, + "logps/chosen": -0.10875187069177628, + "logps/rejected": -0.3289680778980255, + "loss": 5.6094, + "nll_loss": 1.3057971000671387, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.010875186882913113, + "rewards/margins": 0.022021621465682983, + "rewards/rejected": -0.03289680555462837, + "step": 613 + }, + { + "epoch": 0.42461964038727523, + "grad_norm": 4.860713958740234, + "learning_rate": 2.1230982019363764e-05, + "log_odds_chosen": 1.2260526418685913, + "log_odds_ratio": -0.5476371645927429, + "logits/chosen": -0.8112804889678955, + "logits/rejected": -0.8208972215652466, + "logps/chosen": -0.12507055699825287, + "logps/rejected": -0.20496192574501038, + "loss": 3.966, + "nll_loss": 0.9367334246635437, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.012507054954767227, + "rewards/margins": 0.007989136502146721, + "rewards/rejected": -0.020496191456913948, + "step": 614 + }, + { + "epoch": 0.42531120331950206, + "grad_norm": 4.082617282867432, + "learning_rate": 2.1265560165975106e-05, + "log_odds_chosen": 1.8591127395629883, + "log_odds_ratio": -0.3332652747631073, + "logits/chosen": -0.9899469614028931, + "logits/rejected": -0.9953739047050476, + "logps/chosen": -0.13035787642002106, + "logps/rejected": -0.4335346519947052, + "loss": 4.61, + "nll_loss": 1.1191822290420532, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.013035789132118225, + "rewards/margins": 0.030317679047584534, + "rewards/rejected": -0.04335346817970276, + "step": 615 + }, + { + "epoch": 0.4260027662517289, + "grad_norm": 3.619682550430298, + "learning_rate": 2.1300138312586447e-05, + "log_odds_chosen": 2.190673828125, + "log_odds_ratio": -0.4910670518875122, + "logits/chosen": -0.5571558475494385, + "logits/rejected": -0.5376981496810913, + "logps/chosen": -0.11940689384937286, + "logps/rejected": -0.520751953125, + "loss": 3.6402, + "nll_loss": 0.8609509468078613, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011940689757466316, + "rewards/margins": 0.04013450816273689, + "rewards/rejected": -0.05207519978284836, + "step": 616 + }, + { + "epoch": 0.4266943291839557, + "grad_norm": 2.6865475177764893, + "learning_rate": 2.133471645919779e-05, + "log_odds_chosen": 3.608241319656372, + "log_odds_ratio": -0.2808303236961365, + "logits/chosen": -0.7004544734954834, + "logits/rejected": -0.7063158750534058, + "logps/chosen": -0.10437381267547607, + "logps/rejected": -0.4118492007255554, + "loss": 2.8528, + "nll_loss": 0.6851093173027039, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010437380522489548, + "rewards/margins": 0.030747540295124054, + "rewards/rejected": -0.0411849245429039, + "step": 617 + }, + { + "epoch": 0.42738589211618255, + "grad_norm": 2.807631731033325, + "learning_rate": 2.136929460580913e-05, + "log_odds_chosen": 3.1713521480560303, + "log_odds_ratio": -0.2818281054496765, + "logits/chosen": -0.19942578673362732, + "logits/rejected": -0.23111185431480408, + "logps/chosen": -0.1086578443646431, + "logps/rejected": -0.46914684772491455, + "loss": 3.9685, + "nll_loss": 0.9639319777488708, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01086578518152237, + "rewards/margins": 0.036048900336027145, + "rewards/rejected": -0.046914685517549515, + "step": 618 + }, + { + "epoch": 0.4280774550484094, + "grad_norm": 3.6067707538604736, + "learning_rate": 2.1403872752420472e-05, + "log_odds_chosen": 2.436103582382202, + "log_odds_ratio": -0.1551538109779358, + "logits/chosen": -0.8461130261421204, + "logits/rejected": -0.9088087677955627, + "logps/chosen": -0.1261477768421173, + "logps/rejected": -0.8308358192443848, + "loss": 3.525, + "nll_loss": 0.865744948387146, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012614777311682701, + "rewards/margins": 0.07046880573034286, + "rewards/rejected": -0.08308358490467072, + "step": 619 + }, + { + "epoch": 0.4287690179806362, + "grad_norm": 3.759319305419922, + "learning_rate": 2.1438450899031813e-05, + "log_odds_chosen": 3.011648178100586, + "log_odds_ratio": -0.40124091506004333, + "logits/chosen": -0.7172807455062866, + "logits/rejected": -0.7169740796089172, + "logps/chosen": -0.17018908262252808, + "logps/rejected": -0.4564322233200073, + "loss": 4.2084, + "nll_loss": 1.0119715929031372, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.017018906772136688, + "rewards/margins": 0.028624314814805984, + "rewards/rejected": -0.04564322531223297, + "step": 620 + }, + { + "epoch": 0.42946058091286304, + "grad_norm": 3.819620370864868, + "learning_rate": 2.147302904564315e-05, + "log_odds_chosen": 2.1871187686920166, + "log_odds_ratio": -0.30014118552207947, + "logits/chosen": -0.5669596791267395, + "logits/rejected": -0.5761108994483948, + "logps/chosen": -0.14405927062034607, + "logps/rejected": -0.31124773621559143, + "loss": 4.5619, + "nll_loss": 1.1104607582092285, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014405926689505577, + "rewards/margins": 0.016718847677111626, + "rewards/rejected": -0.031124770641326904, + "step": 621 + }, + { + "epoch": 0.43015214384508993, + "grad_norm": 3.3658599853515625, + "learning_rate": 2.1507607192254497e-05, + "log_odds_chosen": 1.3140965700149536, + "log_odds_ratio": -0.3687862455844879, + "logits/chosen": -0.48315146565437317, + "logits/rejected": -0.4790656566619873, + "logps/chosen": -0.14348377287387848, + "logps/rejected": -0.553962767124176, + "loss": 3.5421, + "nll_loss": 0.8486409187316895, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014348377473652363, + "rewards/margins": 0.041047900915145874, + "rewards/rejected": -0.055396273732185364, + "step": 622 + }, + { + "epoch": 0.43084370677731676, + "grad_norm": 4.934183120727539, + "learning_rate": 2.1542185338865838e-05, + "log_odds_chosen": 0.5347107648849487, + "log_odds_ratio": -0.8448858857154846, + "logits/chosen": -0.739535927772522, + "logits/rejected": -0.7089710235595703, + "logps/chosen": -0.21264883875846863, + "logps/rejected": -0.19111944735050201, + "loss": 4.5833, + "nll_loss": 1.0613362789154053, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.021264884620904922, + "rewards/margins": -0.0021529390942305326, + "rewards/rejected": -0.01911194622516632, + "step": 623 + }, + { + "epoch": 0.4315352697095436, + "grad_norm": 3.4687881469726562, + "learning_rate": 2.157676348547718e-05, + "log_odds_chosen": 2.592643976211548, + "log_odds_ratio": -0.42990919947624207, + "logits/chosen": -0.5274770259857178, + "logits/rejected": -0.564875066280365, + "logps/chosen": -0.14812719821929932, + "logps/rejected": -0.48280104994773865, + "loss": 4.1303, + "nll_loss": 0.9895771741867065, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.014812720008194447, + "rewards/margins": 0.033467382192611694, + "rewards/rejected": -0.048280104994773865, + "step": 624 + }, + { + "epoch": 0.4322268326417704, + "grad_norm": 6.197664260864258, + "learning_rate": 2.161134163208852e-05, + "log_odds_chosen": 1.2445203065872192, + "log_odds_ratio": -0.5221874713897705, + "logits/chosen": -0.16059088706970215, + "logits/rejected": -0.23864303529262543, + "logps/chosen": -0.16100680828094482, + "logps/rejected": -0.32815873622894287, + "loss": 4.8393, + "nll_loss": 1.1576130390167236, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.016100682318210602, + "rewards/margins": 0.016715193167328835, + "rewards/rejected": -0.03281587362289429, + "step": 625 + }, + { + "epoch": 0.43291839557399725, + "grad_norm": 5.179163455963135, + "learning_rate": 2.1645919778699863e-05, + "log_odds_chosen": 1.6051888465881348, + "log_odds_ratio": -0.40840965509414673, + "logits/chosen": -0.9315775036811829, + "logits/rejected": -0.9024972915649414, + "logps/chosen": -0.17119812965393066, + "logps/rejected": -0.3284410238265991, + "loss": 4.5274, + "nll_loss": 1.0910115242004395, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.017119813710451126, + "rewards/margins": 0.015724290162324905, + "rewards/rejected": -0.03284410387277603, + "step": 626 + }, + { + "epoch": 0.4336099585062241, + "grad_norm": 6.263176918029785, + "learning_rate": 2.1680497925311204e-05, + "log_odds_chosen": 2.8963382244110107, + "log_odds_ratio": -0.7403358817100525, + "logits/chosen": -0.8731557130813599, + "logits/rejected": -0.8686691522598267, + "logps/chosen": -0.11738395690917969, + "logps/rejected": -0.5990802049636841, + "loss": 3.758, + "nll_loss": 0.8654546141624451, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011738396249711514, + "rewards/margins": 0.04816962778568268, + "rewards/rejected": -0.05990801751613617, + "step": 627 + }, + { + "epoch": 0.4343015214384509, + "grad_norm": 4.25787878036499, + "learning_rate": 2.1715076071922546e-05, + "log_odds_chosen": 3.804713487625122, + "log_odds_ratio": -0.11228744685649872, + "logits/chosen": -0.5167519450187683, + "logits/rejected": -0.5582841634750366, + "logps/chosen": -0.05870117247104645, + "logps/rejected": -0.5987078547477722, + "loss": 4.6895, + "nll_loss": 1.1611560583114624, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005870117340236902, + "rewards/margins": 0.05400066822767258, + "rewards/rejected": -0.05987078696489334, + "step": 628 + }, + { + "epoch": 0.43499308437067774, + "grad_norm": 5.743461608886719, + "learning_rate": 2.1749654218533887e-05, + "log_odds_chosen": 0.03474217653274536, + "log_odds_ratio": -0.8090510964393616, + "logits/chosen": -0.890548586845398, + "logits/rejected": -0.907269299030304, + "logps/chosen": -0.3029828667640686, + "logps/rejected": -0.2693331241607666, + "loss": 4.6926, + "nll_loss": 1.0922552347183228, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.03029828704893589, + "rewards/margins": -0.003364972770214081, + "rewards/rejected": -0.02693331427872181, + "step": 629 + }, + { + "epoch": 0.43568464730290457, + "grad_norm": 4.5716352462768555, + "learning_rate": 2.178423236514523e-05, + "log_odds_chosen": 0.03434586524963379, + "log_odds_ratio": -0.7138271331787109, + "logits/chosen": -0.7514895796775818, + "logits/rejected": -0.7773147821426392, + "logps/chosen": -0.21131566166877747, + "logps/rejected": -0.22896406054496765, + "loss": 5.0569, + "nll_loss": 1.192832589149475, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.021131567656993866, + "rewards/margins": 0.0017648395150899887, + "rewards/rejected": -0.022896405309438705, + "step": 630 + }, + { + "epoch": 0.4363762102351314, + "grad_norm": 4.4561686515808105, + "learning_rate": 2.181881051175657e-05, + "log_odds_chosen": 1.4079740047454834, + "log_odds_ratio": -0.5032125115394592, + "logits/chosen": -0.927483320236206, + "logits/rejected": -0.8899856805801392, + "logps/chosen": -0.1549498587846756, + "logps/rejected": -0.4571008086204529, + "loss": 4.6199, + "nll_loss": 1.1046602725982666, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01549498364329338, + "rewards/margins": 0.030215097591280937, + "rewards/rejected": -0.04571007937192917, + "step": 631 + }, + { + "epoch": 0.43706777316735823, + "grad_norm": 4.596878528594971, + "learning_rate": 2.1853388658367912e-05, + "log_odds_chosen": -0.02922854572534561, + "log_odds_ratio": -0.8003402352333069, + "logits/chosen": -0.7364927530288696, + "logits/rejected": -0.7289812564849854, + "logps/chosen": -0.17805808782577515, + "logps/rejected": -0.1403406709432602, + "loss": 3.4764, + "nll_loss": 0.7890704870223999, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.017805809155106544, + "rewards/margins": -0.0037717418745160103, + "rewards/rejected": -0.014034068211913109, + "step": 632 + }, + { + "epoch": 0.43775933609958506, + "grad_norm": 3.0470612049102783, + "learning_rate": 2.1887966804979254e-05, + "log_odds_chosen": 1.3135905265808105, + "log_odds_ratio": -0.3760731816291809, + "logits/chosen": -0.9930360317230225, + "logits/rejected": -0.990397572517395, + "logps/chosen": -0.16257613897323608, + "logps/rejected": -0.36039984226226807, + "loss": 4.3786, + "nll_loss": 1.0570416450500488, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01625761389732361, + "rewards/margins": 0.019782373681664467, + "rewards/rejected": -0.036039985716342926, + "step": 633 + }, + { + "epoch": 0.4384508990318119, + "grad_norm": 4.420305252075195, + "learning_rate": 2.1922544951590595e-05, + "log_odds_chosen": 1.2010859251022339, + "log_odds_ratio": -0.34093958139419556, + "logits/chosen": -0.5854348540306091, + "logits/rejected": -0.6194449663162231, + "logps/chosen": -0.1107054203748703, + "logps/rejected": -0.3933059573173523, + "loss": 4.8822, + "nll_loss": 1.1864490509033203, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01107054203748703, + "rewards/margins": 0.028260057792067528, + "rewards/rejected": -0.03933060169219971, + "step": 634 + }, + { + "epoch": 0.4391424619640387, + "grad_norm": 4.327555179595947, + "learning_rate": 2.1957123098201937e-05, + "log_odds_chosen": 1.896942377090454, + "log_odds_ratio": -0.4892352819442749, + "logits/chosen": -0.8643075227737427, + "logits/rejected": -0.8878147006034851, + "logps/chosen": -0.08768102526664734, + "logps/rejected": -0.3476106524467468, + "loss": 4.4977, + "nll_loss": 1.0755122900009155, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.008768102154135704, + "rewards/margins": 0.02599296346306801, + "rewards/rejected": -0.03476106375455856, + "step": 635 + }, + { + "epoch": 0.43983402489626555, + "grad_norm": 4.274322986602783, + "learning_rate": 2.199170124481328e-05, + "log_odds_chosen": 0.9763669371604919, + "log_odds_ratio": -0.4171779751777649, + "logits/chosen": -0.8166045546531677, + "logits/rejected": -0.8884382843971252, + "logps/chosen": -0.128249853849411, + "logps/rejected": -0.3486913740634918, + "loss": 5.1655, + "nll_loss": 1.2496671676635742, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012824985198676586, + "rewards/margins": 0.02204415202140808, + "rewards/rejected": -0.03486913815140724, + "step": 636 + }, + { + "epoch": 0.4405255878284924, + "grad_norm": 7.383416652679443, + "learning_rate": 2.202627939142462e-05, + "log_odds_chosen": 0.6402369737625122, + "log_odds_ratio": -0.9285612106323242, + "logits/chosen": -0.7861868143081665, + "logits/rejected": -0.7962783575057983, + "logps/chosen": -0.29655033349990845, + "logps/rejected": -0.42859429121017456, + "loss": 5.2194, + "nll_loss": 1.2119812965393066, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.029655033722519875, + "rewards/margins": 0.013204396702349186, + "rewards/rejected": -0.04285942763090134, + "step": 637 + }, + { + "epoch": 0.4412171507607192, + "grad_norm": 3.3463234901428223, + "learning_rate": 2.206085753803596e-05, + "log_odds_chosen": 1.1340889930725098, + "log_odds_ratio": -0.4663980007171631, + "logits/chosen": -1.0553393363952637, + "logits/rejected": -1.0700688362121582, + "logps/chosen": -0.16461165249347687, + "logps/rejected": -0.3293513357639313, + "loss": 4.5737, + "nll_loss": 1.0967905521392822, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.016461165621876717, + "rewards/margins": 0.01647396758198738, + "rewards/rejected": -0.03293513134121895, + "step": 638 + }, + { + "epoch": 0.44190871369294604, + "grad_norm": 3.887077808380127, + "learning_rate": 2.2095435684647303e-05, + "log_odds_chosen": 1.06056809425354, + "log_odds_ratio": -0.3603925406932831, + "logits/chosen": -0.6518492102622986, + "logits/rejected": -0.6650881171226501, + "logps/chosen": -0.20834115147590637, + "logps/rejected": -0.42792510986328125, + "loss": 5.1296, + "nll_loss": 1.2463585138320923, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.020834118127822876, + "rewards/margins": 0.021958395838737488, + "rewards/rejected": -0.042792513966560364, + "step": 639 + }, + { + "epoch": 0.4426002766251729, + "grad_norm": 3.2058866024017334, + "learning_rate": 2.2130013831258645e-05, + "log_odds_chosen": 1.502551794052124, + "log_odds_ratio": -0.31615087389945984, + "logits/chosen": -0.5435585975646973, + "logits/rejected": -0.5261815786361694, + "logps/chosen": -0.0911509096622467, + "logps/rejected": -0.2400660216808319, + "loss": 3.3395, + "nll_loss": 0.8032507300376892, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009115091525018215, + "rewards/margins": 0.014891511760652065, + "rewards/rejected": -0.02400660328567028, + "step": 640 + }, + { + "epoch": 0.4432918395573997, + "grad_norm": 3.7415406703948975, + "learning_rate": 2.2164591977869986e-05, + "log_odds_chosen": 2.1081349849700928, + "log_odds_ratio": -0.25261473655700684, + "logits/chosen": -0.7255756855010986, + "logits/rejected": -0.7845942974090576, + "logps/chosen": -0.12538139522075653, + "logps/rejected": -0.41523393988609314, + "loss": 4.2104, + "nll_loss": 1.0273290872573853, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.012538139708340168, + "rewards/margins": 0.02898525446653366, + "rewards/rejected": -0.041523393243551254, + "step": 641 + }, + { + "epoch": 0.44398340248962653, + "grad_norm": 4.018771171569824, + "learning_rate": 2.2199170124481328e-05, + "log_odds_chosen": 1.4001842737197876, + "log_odds_ratio": -0.4438078999519348, + "logits/chosen": -0.7838208675384521, + "logits/rejected": -0.8029565215110779, + "logps/chosen": -0.41601836681365967, + "logps/rejected": -0.5780717134475708, + "loss": 4.6326, + "nll_loss": 1.113771915435791, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.04160183668136597, + "rewards/margins": 0.016205335035920143, + "rewards/rejected": -0.05780716985464096, + "step": 642 + }, + { + "epoch": 0.44467496542185336, + "grad_norm": 4.972719192504883, + "learning_rate": 2.223374827109267e-05, + "log_odds_chosen": 0.3314962387084961, + "log_odds_ratio": -0.6846669316291809, + "logits/chosen": -0.8766312599182129, + "logits/rejected": -0.8915233612060547, + "logps/chosen": -0.2336045205593109, + "logps/rejected": -0.32554876804351807, + "loss": 4.8175, + "nll_loss": 1.1359007358551025, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.02336045168340206, + "rewards/margins": 0.009194424375891685, + "rewards/rejected": -0.03255487605929375, + "step": 643 + }, + { + "epoch": 0.44536652835408025, + "grad_norm": 5.33884859085083, + "learning_rate": 2.2268326417704014e-05, + "log_odds_chosen": 1.5173817873001099, + "log_odds_ratio": -0.5441713929176331, + "logits/chosen": -0.808599054813385, + "logits/rejected": -0.8076343536376953, + "logps/chosen": -0.14385217428207397, + "logps/rejected": -0.31490224599838257, + "loss": 4.8053, + "nll_loss": 1.146899938583374, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.014385217800736427, + "rewards/margins": 0.01710500568151474, + "rewards/rejected": -0.03149022161960602, + "step": 644 + }, + { + "epoch": 0.4460580912863071, + "grad_norm": 2.935650110244751, + "learning_rate": 2.2302904564315356e-05, + "log_odds_chosen": 2.2890067100524902, + "log_odds_ratio": -0.3063238263130188, + "logits/chosen": -0.9418952465057373, + "logits/rejected": -0.9721782207489014, + "logps/chosen": -0.14255841076374054, + "logps/rejected": -0.43673819303512573, + "loss": 3.4497, + "nll_loss": 0.8317903876304626, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.014255841262638569, + "rewards/margins": 0.02941797859966755, + "rewards/rejected": -0.04367382079362869, + "step": 645 + }, + { + "epoch": 0.4467496542185339, + "grad_norm": 4.236213684082031, + "learning_rate": 2.2337482710926697e-05, + "log_odds_chosen": 1.5571013689041138, + "log_odds_ratio": -0.42031991481781006, + "logits/chosen": -1.0419297218322754, + "logits/rejected": -1.061880350112915, + "logps/chosen": -0.1531745195388794, + "logps/rejected": -0.385803759098053, + "loss": 5.3258, + "nll_loss": 1.289408564567566, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.015317452140152454, + "rewards/margins": 0.02326292172074318, + "rewards/rejected": -0.03858037665486336, + "step": 646 + }, + { + "epoch": 0.44744121715076074, + "grad_norm": 2.988067150115967, + "learning_rate": 2.237206085753804e-05, + "log_odds_chosen": 2.180973768234253, + "log_odds_ratio": -0.29629412293434143, + "logits/chosen": -0.5734241604804993, + "logits/rejected": -0.5932707190513611, + "logps/chosen": -0.08009282499551773, + "logps/rejected": -0.2977696359157562, + "loss": 3.2365, + "nll_loss": 0.7794865369796753, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008009282872080803, + "rewards/margins": 0.02176768146455288, + "rewards/rejected": -0.029776964336633682, + "step": 647 + }, + { + "epoch": 0.44813278008298757, + "grad_norm": 4.176928520202637, + "learning_rate": 2.240663900414938e-05, + "log_odds_chosen": 0.4870775043964386, + "log_odds_ratio": -0.6710426807403564, + "logits/chosen": -0.9077112078666687, + "logits/rejected": -0.885935366153717, + "logps/chosen": -0.20027214288711548, + "logps/rejected": -0.21831238269805908, + "loss": 5.4343, + "nll_loss": 1.2914657592773438, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.020027216523885727, + "rewards/margins": 0.0018040239810943604, + "rewards/rejected": -0.021831240504980087, + "step": 648 + }, + { + "epoch": 0.4488243430152144, + "grad_norm": 2.1371140480041504, + "learning_rate": 2.2441217150760722e-05, + "log_odds_chosen": 4.809844493865967, + "log_odds_ratio": -0.09976686537265778, + "logits/chosen": -0.4416744112968445, + "logits/rejected": -0.42568644881248474, + "logps/chosen": -0.04137096181511879, + "logps/rejected": -0.4853114187717438, + "loss": 2.7075, + "nll_loss": 0.6669005155563354, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004137096460908651, + "rewards/margins": 0.04439404606819153, + "rewards/rejected": -0.048531144857406616, + "step": 649 + }, + { + "epoch": 0.44951590594744123, + "grad_norm": 4.71837854385376, + "learning_rate": 2.2475795297372064e-05, + "log_odds_chosen": 2.040015935897827, + "log_odds_ratio": -0.5045047998428345, + "logits/chosen": -0.932171106338501, + "logits/rejected": -0.9571213126182556, + "logps/chosen": -0.1530236005783081, + "logps/rejected": -0.4374780058860779, + "loss": 5.1655, + "nll_loss": 1.2409231662750244, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.015302360989153385, + "rewards/margins": 0.02844543755054474, + "rewards/rejected": -0.04374779760837555, + "step": 650 + }, + { + "epoch": 0.45020746887966806, + "grad_norm": 3.8925278186798096, + "learning_rate": 2.2510373443983405e-05, + "log_odds_chosen": 0.5939846634864807, + "log_odds_ratio": -0.5840673446655273, + "logits/chosen": -0.775164008140564, + "logits/rejected": -0.7598456740379333, + "logps/chosen": -0.1721755415201187, + "logps/rejected": -0.2940378189086914, + "loss": 3.7779, + "nll_loss": 0.8860760927200317, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01721755415201187, + "rewards/margins": 0.012186229228973389, + "rewards/rejected": -0.02940378151834011, + "step": 651 + }, + { + "epoch": 0.4508990318118949, + "grad_norm": 5.848450660705566, + "learning_rate": 2.2544951590594747e-05, + "log_odds_chosen": 0.2048446238040924, + "log_odds_ratio": -0.6970038414001465, + "logits/chosen": -1.0518460273742676, + "logits/rejected": -1.0371158123016357, + "logps/chosen": -0.202000230550766, + "logps/rejected": -0.21628674864768982, + "loss": 4.8156, + "nll_loss": 1.1341984272003174, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.020200025290250778, + "rewards/margins": 0.0014286513905972242, + "rewards/rejected": -0.021628674119710922, + "step": 652 + }, + { + "epoch": 0.4515905947441217, + "grad_norm": 12.044407844543457, + "learning_rate": 2.257952973720609e-05, + "log_odds_chosen": 0.9392263889312744, + "log_odds_ratio": -1.0350747108459473, + "logits/chosen": -1.006348729133606, + "logits/rejected": -0.9927129149436951, + "logps/chosen": -0.37167733907699585, + "logps/rejected": -0.5737072229385376, + "loss": 5.473, + "nll_loss": 1.2647355794906616, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.037167735397815704, + "rewards/margins": 0.020202992483973503, + "rewards/rejected": -0.05737072601914406, + "step": 653 + }, + { + "epoch": 0.45228215767634855, + "grad_norm": 6.465015888214111, + "learning_rate": 2.261410788381743e-05, + "log_odds_chosen": 1.9693341255187988, + "log_odds_ratio": -0.6730993390083313, + "logits/chosen": -0.28224295377731323, + "logits/rejected": -0.2860146760940552, + "logps/chosen": -0.13423292338848114, + "logps/rejected": -0.28257089853286743, + "loss": 3.8267, + "nll_loss": 0.8893666863441467, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.013423292897641659, + "rewards/margins": 0.01483379676938057, + "rewards/rejected": -0.028257090598344803, + "step": 654 + }, + { + "epoch": 0.4529737206085754, + "grad_norm": 3.4309816360473633, + "learning_rate": 2.264868603042877e-05, + "log_odds_chosen": 3.407935619354248, + "log_odds_ratio": -0.24094924330711365, + "logits/chosen": -0.5608742237091064, + "logits/rejected": -0.5527865290641785, + "logps/chosen": -0.08340831845998764, + "logps/rejected": -0.4136614501476288, + "loss": 2.9297, + "nll_loss": 0.7083350419998169, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008340831845998764, + "rewards/margins": 0.03302531689405441, + "rewards/rejected": -0.04136614501476288, + "step": 655 + }, + { + "epoch": 0.4536652835408022, + "grad_norm": 4.206521034240723, + "learning_rate": 2.2683264177040113e-05, + "log_odds_chosen": 3.273314952850342, + "log_odds_ratio": -0.3209017515182495, + "logits/chosen": -0.6433435678482056, + "logits/rejected": -0.6615561842918396, + "logps/chosen": -0.07162127643823624, + "logps/rejected": -0.4444481134414673, + "loss": 2.7651, + "nll_loss": 0.6591819524765015, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007162127643823624, + "rewards/margins": 0.037282682955265045, + "rewards/rejected": -0.04444481059908867, + "step": 656 + }, + { + "epoch": 0.45435684647302904, + "grad_norm": 4.922212600708008, + "learning_rate": 2.2717842323651455e-05, + "log_odds_chosen": 0.9490612745285034, + "log_odds_ratio": -0.6327435970306396, + "logits/chosen": -0.9662469029426575, + "logits/rejected": -0.9231171607971191, + "logps/chosen": -0.16946572065353394, + "logps/rejected": -0.3263300061225891, + "loss": 2.9991, + "nll_loss": 0.686488151550293, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.016946572810411453, + "rewards/margins": 0.015686428174376488, + "rewards/rejected": -0.03263299912214279, + "step": 657 + }, + { + "epoch": 0.45504840940525587, + "grad_norm": 4.609391212463379, + "learning_rate": 2.2752420470262796e-05, + "log_odds_chosen": 1.5648826360702515, + "log_odds_ratio": -0.5906928777694702, + "logits/chosen": -0.6863613128662109, + "logits/rejected": -0.682285726070404, + "logps/chosen": -0.18679755926132202, + "logps/rejected": -0.4306481182575226, + "loss": 4.8784, + "nll_loss": 1.1605193614959717, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.018679756671190262, + "rewards/margins": 0.024385055527091026, + "rewards/rejected": -0.04306481406092644, + "step": 658 + }, + { + "epoch": 0.4557399723374827, + "grad_norm": 4.7040181159973145, + "learning_rate": 2.2786998616874138e-05, + "log_odds_chosen": 3.1283419132232666, + "log_odds_ratio": -0.21017280220985413, + "logits/chosen": -0.9078444242477417, + "logits/rejected": -1.0034115314483643, + "logps/chosen": -0.06228271499276161, + "logps/rejected": -0.530816376209259, + "loss": 5.0095, + "nll_loss": 1.2313501834869385, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006228271871805191, + "rewards/margins": 0.04685336351394653, + "rewards/rejected": -0.05308163911104202, + "step": 659 + }, + { + "epoch": 0.45643153526970953, + "grad_norm": 5.001986026763916, + "learning_rate": 2.282157676348548e-05, + "log_odds_chosen": 1.5463465452194214, + "log_odds_ratio": -0.49791088700294495, + "logits/chosen": -0.7297683954238892, + "logits/rejected": -0.729040265083313, + "logps/chosen": -0.14330022037029266, + "logps/rejected": -0.42162853479385376, + "loss": 5.4593, + "nll_loss": 1.3150358200073242, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014330022968351841, + "rewards/margins": 0.02783283218741417, + "rewards/rejected": -0.042162857949733734, + "step": 660 + }, + { + "epoch": 0.45712309820193636, + "grad_norm": 4.276454448699951, + "learning_rate": 2.285615491009682e-05, + "log_odds_chosen": 4.342514514923096, + "log_odds_ratio": -0.18762508034706116, + "logits/chosen": -0.3685300946235657, + "logits/rejected": -0.3769664466381073, + "logps/chosen": -0.03132067620754242, + "logps/rejected": -0.8504193425178528, + "loss": 3.3376, + "nll_loss": 0.8156321048736572, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003132067620754242, + "rewards/margins": 0.08190987259149551, + "rewards/rejected": -0.08504194021224976, + "step": 661 + }, + { + "epoch": 0.4578146611341632, + "grad_norm": 7.172152519226074, + "learning_rate": 2.289073305670816e-05, + "log_odds_chosen": 1.3948019742965698, + "log_odds_ratio": -0.7182621955871582, + "logits/chosen": -0.9717065095901489, + "logits/rejected": -0.9662845134735107, + "logps/chosen": -0.14619432389736176, + "logps/rejected": -0.45164555311203003, + "loss": 4.7844, + "nll_loss": 1.1242810487747192, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.014619432389736176, + "rewards/margins": 0.030545122921466827, + "rewards/rejected": -0.045164551585912704, + "step": 662 + }, + { + "epoch": 0.45850622406639, + "grad_norm": 3.811978578567505, + "learning_rate": 2.29253112033195e-05, + "log_odds_chosen": 3.057511806488037, + "log_odds_ratio": -0.3753212094306946, + "logits/chosen": -0.5274239778518677, + "logits/rejected": -0.5684012770652771, + "logps/chosen": -0.07948172837495804, + "logps/rejected": -0.3966066241264343, + "loss": 2.7919, + "nll_loss": 0.6604464054107666, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.007948173210024834, + "rewards/margins": 0.03171249106526375, + "rewards/rejected": -0.03966066613793373, + "step": 663 + }, + { + "epoch": 0.45919778699861685, + "grad_norm": 3.786224365234375, + "learning_rate": 2.2959889349930842e-05, + "log_odds_chosen": 3.219557285308838, + "log_odds_ratio": -0.2720406949520111, + "logits/chosen": -0.828033983707428, + "logits/rejected": -0.866054356098175, + "logps/chosen": -0.09778165817260742, + "logps/rejected": -0.568712592124939, + "loss": 3.7211, + "nll_loss": 0.903061032295227, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009778165258467197, + "rewards/margins": 0.047093093395233154, + "rewards/rejected": -0.05687125772237778, + "step": 664 + }, + { + "epoch": 0.4598893499308437, + "grad_norm": 5.818863391876221, + "learning_rate": 2.2994467496542184e-05, + "log_odds_chosen": 4.029585361480713, + "log_odds_ratio": -0.19765400886535645, + "logits/chosen": -0.8430564403533936, + "logits/rejected": -0.8614147901535034, + "logps/chosen": -0.05954580008983612, + "logps/rejected": -0.5826296806335449, + "loss": 3.7868, + "nll_loss": 0.9269360303878784, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005954580381512642, + "rewards/margins": 0.05230838805437088, + "rewards/rejected": -0.058262962847948074, + "step": 665 + }, + { + "epoch": 0.4605809128630705, + "grad_norm": 4.759044170379639, + "learning_rate": 2.3029045643153525e-05, + "log_odds_chosen": 2.9879543781280518, + "log_odds_ratio": -0.40324172377586365, + "logits/chosen": -0.7793172597885132, + "logits/rejected": -0.7813615798950195, + "logps/chosen": -0.08944907784461975, + "logps/rejected": -0.5450009107589722, + "loss": 4.8858, + "nll_loss": 1.1811367273330688, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.008944908156991005, + "rewards/margins": 0.045555178076028824, + "rewards/rejected": -0.05450008437037468, + "step": 666 + }, + { + "epoch": 0.4612724757952974, + "grad_norm": 5.577088832855225, + "learning_rate": 2.306362378976487e-05, + "log_odds_chosen": 3.68088436126709, + "log_odds_ratio": -0.4639522433280945, + "logits/chosen": -0.8919408321380615, + "logits/rejected": -0.9049323797225952, + "logps/chosen": -0.10777649283409119, + "logps/rejected": -0.7049047946929932, + "loss": 6.1401, + "nll_loss": 1.4886412620544434, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.010777648538351059, + "rewards/margins": 0.05971283093094826, + "rewards/rejected": -0.07049047946929932, + "step": 667 + }, + { + "epoch": 0.46196403872752423, + "grad_norm": 8.479419708251953, + "learning_rate": 2.3098201936376212e-05, + "log_odds_chosen": -0.17054805159568787, + "log_odds_ratio": -1.0562233924865723, + "logits/chosen": -0.4395080804824829, + "logits/rejected": -0.4240494668483734, + "logps/chosen": -0.286432683467865, + "logps/rejected": -0.14448946714401245, + "loss": 4.8861, + "nll_loss": 1.1159002780914307, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.02864326722919941, + "rewards/margins": -0.014194320887327194, + "rewards/rejected": -0.01444894727319479, + "step": 668 + }, + { + "epoch": 0.46265560165975106, + "grad_norm": 5.375706672668457, + "learning_rate": 2.3132780082987553e-05, + "log_odds_chosen": 2.186450242996216, + "log_odds_ratio": -0.39149805903434753, + "logits/chosen": -0.852817714214325, + "logits/rejected": -0.8768041729927063, + "logps/chosen": -0.11213727295398712, + "logps/rejected": -0.42280256748199463, + "loss": 4.9077, + "nll_loss": 1.187782645225525, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011213728226721287, + "rewards/margins": 0.03106653317809105, + "rewards/rejected": -0.04228026047348976, + "step": 669 + }, + { + "epoch": 0.4633471645919779, + "grad_norm": 6.580266952514648, + "learning_rate": 2.3167358229598895e-05, + "log_odds_chosen": 1.5665547847747803, + "log_odds_ratio": -0.7286292910575867, + "logits/chosen": -0.7778172492980957, + "logits/rejected": -0.7949341535568237, + "logps/chosen": -0.14386004209518433, + "logps/rejected": -0.4769167900085449, + "loss": 4.1815, + "nll_loss": 0.9725210666656494, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.014386004768311977, + "rewards/margins": 0.03330567479133606, + "rewards/rejected": -0.04769168049097061, + "step": 670 + }, + { + "epoch": 0.4640387275242047, + "grad_norm": 5.457213878631592, + "learning_rate": 2.3201936376210237e-05, + "log_odds_chosen": 4.0841383934021, + "log_odds_ratio": -0.19488346576690674, + "logits/chosen": -0.9210847616195679, + "logits/rejected": -0.9380112886428833, + "logps/chosen": -0.06799730658531189, + "logps/rejected": -0.5339650511741638, + "loss": 5.0348, + "nll_loss": 1.2392032146453857, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006799730472266674, + "rewards/margins": 0.04659678041934967, + "rewards/rejected": -0.05339650809764862, + "step": 671 + }, + { + "epoch": 0.46473029045643155, + "grad_norm": 3.573294162750244, + "learning_rate": 2.3236514522821578e-05, + "log_odds_chosen": 2.17598295211792, + "log_odds_ratio": -0.29349270462989807, + "logits/chosen": -0.749427855014801, + "logits/rejected": -0.7455752491950989, + "logps/chosen": -0.14027109742164612, + "logps/rejected": -0.6240657567977905, + "loss": 3.9887, + "nll_loss": 0.9678138494491577, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.014027111232280731, + "rewards/margins": 0.048379458487033844, + "rewards/rejected": -0.062406569719314575, + "step": 672 + }, + { + "epoch": 0.4654218533886584, + "grad_norm": 6.060122013092041, + "learning_rate": 2.327109266943292e-05, + "log_odds_chosen": 4.223729610443115, + "log_odds_ratio": -0.28007790446281433, + "logits/chosen": -0.4201928377151489, + "logits/rejected": -0.44983333349227905, + "logps/chosen": -0.08497817814350128, + "logps/rejected": -0.754709005355835, + "loss": 3.8559, + "nll_loss": 0.9359645247459412, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008497818373143673, + "rewards/margins": 0.06697308272123337, + "rewards/rejected": -0.07547089457511902, + "step": 673 + }, + { + "epoch": 0.4661134163208852, + "grad_norm": 3.797612428665161, + "learning_rate": 2.330567081604426e-05, + "log_odds_chosen": 1.7717167139053345, + "log_odds_ratio": -0.39768701791763306, + "logits/chosen": -0.7274033427238464, + "logits/rejected": -0.7588223218917847, + "logps/chosen": -0.08553272485733032, + "logps/rejected": -0.3211662173271179, + "loss": 3.6084, + "nll_loss": 0.8623219728469849, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.008553272113204002, + "rewards/margins": 0.02356335148215294, + "rewards/rejected": -0.03211662545800209, + "step": 674 + }, + { + "epoch": 0.46680497925311204, + "grad_norm": 6.884234428405762, + "learning_rate": 2.3340248962655603e-05, + "log_odds_chosen": 0.7624253630638123, + "log_odds_ratio": -0.7565903067588806, + "logits/chosen": -0.5982872843742371, + "logits/rejected": -0.6013323068618774, + "logps/chosen": -0.23801694810390472, + "logps/rejected": -0.3481021523475647, + "loss": 4.1929, + "nll_loss": 0.9725688099861145, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.023801693692803383, + "rewards/margins": 0.011008523404598236, + "rewards/rejected": -0.03481021523475647, + "step": 675 + }, + { + "epoch": 0.46749654218533887, + "grad_norm": 4.248331069946289, + "learning_rate": 2.3374827109266944e-05, + "log_odds_chosen": 2.659592628479004, + "log_odds_ratio": -0.4246455729007721, + "logits/chosen": -0.5385680198669434, + "logits/rejected": -0.5579327940940857, + "logps/chosen": -0.14692561328411102, + "logps/rejected": -0.48084428906440735, + "loss": 4.525, + "nll_loss": 1.0887757539749146, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014692561700940132, + "rewards/margins": 0.03339186683297157, + "rewards/rejected": -0.048084430396556854, + "step": 676 + }, + { + "epoch": 0.4681881051175657, + "grad_norm": 6.2764387130737305, + "learning_rate": 2.3409405255878286e-05, + "log_odds_chosen": 2.887273073196411, + "log_odds_ratio": -0.6312262415885925, + "logits/chosen": -0.7242209911346436, + "logits/rejected": -0.7688803672790527, + "logps/chosen": -0.21251043677330017, + "logps/rejected": -0.6274941563606262, + "loss": 4.3248, + "nll_loss": 1.0180730819702148, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.021251043304800987, + "rewards/margins": 0.041498374193906784, + "rewards/rejected": -0.06274942308664322, + "step": 677 + }, + { + "epoch": 0.46887966804979253, + "grad_norm": 3.9606966972351074, + "learning_rate": 2.3443983402489627e-05, + "log_odds_chosen": 1.724837303161621, + "log_odds_ratio": -0.35207831859588623, + "logits/chosen": -0.8217482566833496, + "logits/rejected": -0.8121352195739746, + "logps/chosen": -0.10145239531993866, + "logps/rejected": -0.26650118827819824, + "loss": 6.4763, + "nll_loss": 1.5838665962219238, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010145239531993866, + "rewards/margins": 0.016504880040884018, + "rewards/rejected": -0.026650119572877884, + "step": 678 + }, + { + "epoch": 0.46957123098201936, + "grad_norm": 4.277392387390137, + "learning_rate": 2.347856154910097e-05, + "log_odds_chosen": 3.5856690406799316, + "log_odds_ratio": -0.3550170361995697, + "logits/chosen": -0.8307151794433594, + "logits/rejected": -0.8088093996047974, + "logps/chosen": -0.07082566618919373, + "logps/rejected": -0.47188445925712585, + "loss": 3.7765, + "nll_loss": 0.9086235165596008, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007082565687596798, + "rewards/margins": 0.04010588303208351, + "rewards/rejected": -0.047188449651002884, + "step": 679 + }, + { + "epoch": 0.4702627939142462, + "grad_norm": 2.5866429805755615, + "learning_rate": 2.351313969571231e-05, + "log_odds_chosen": 6.325022220611572, + "log_odds_ratio": -0.11263729631900787, + "logits/chosen": -0.6943249702453613, + "logits/rejected": -0.7007040977478027, + "logps/chosen": -0.028736630454659462, + "logps/rejected": -0.8324602246284485, + "loss": 3.3625, + "nll_loss": 0.8293724060058594, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002873663092032075, + "rewards/margins": 0.08037236332893372, + "rewards/rejected": -0.08324602246284485, + "step": 680 + }, + { + "epoch": 0.470954356846473, + "grad_norm": 3.6870453357696533, + "learning_rate": 2.3547717842323652e-05, + "log_odds_chosen": 3.5631234645843506, + "log_odds_ratio": -0.2938140630722046, + "logits/chosen": -0.8848822116851807, + "logits/rejected": -0.9235984086990356, + "logps/chosen": -0.07511557638645172, + "logps/rejected": -0.5606021285057068, + "loss": 4.6362, + "nll_loss": 1.1296803951263428, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007511558011174202, + "rewards/margins": 0.04854864999651909, + "rewards/rejected": -0.05606020987033844, + "step": 681 + }, + { + "epoch": 0.47164591977869985, + "grad_norm": 4.530055046081543, + "learning_rate": 2.3582295988934994e-05, + "log_odds_chosen": 2.7019920349121094, + "log_odds_ratio": -0.21998381614685059, + "logits/chosen": -0.38282451033592224, + "logits/rejected": -0.4395584464073181, + "logps/chosen": -0.04314936324954033, + "logps/rejected": -0.5612409710884094, + "loss": 4.021, + "nll_loss": 0.9832491278648376, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0043149362318217754, + "rewards/margins": 0.051809161901474, + "rewards/rejected": -0.05612409487366676, + "step": 682 + }, + { + "epoch": 0.4723374827109267, + "grad_norm": 5.116053581237793, + "learning_rate": 2.3616874135546335e-05, + "log_odds_chosen": 3.1295435428619385, + "log_odds_ratio": -0.4350450038909912, + "logits/chosen": -0.7074323892593384, + "logits/rejected": -0.696124255657196, + "logps/chosen": -0.13088738918304443, + "logps/rejected": -0.45735907554626465, + "loss": 3.6855, + "nll_loss": 0.877873957157135, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013088738545775414, + "rewards/margins": 0.03264717012643814, + "rewards/rejected": -0.045735906809568405, + "step": 683 + }, + { + "epoch": 0.4730290456431535, + "grad_norm": 3.400541067123413, + "learning_rate": 2.3651452282157677e-05, + "log_odds_chosen": 3.7050838470458984, + "log_odds_ratio": -0.19541694223880768, + "logits/chosen": -0.7514389753341675, + "logits/rejected": -0.7832955121994019, + "logps/chosen": -0.04506572708487511, + "logps/rejected": -0.648411750793457, + "loss": 2.8218, + "nll_loss": 0.6859157681465149, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004506572615355253, + "rewards/margins": 0.06033460050821304, + "rewards/rejected": -0.06484117358922958, + "step": 684 + }, + { + "epoch": 0.47372060857538034, + "grad_norm": 3.607529640197754, + "learning_rate": 2.368603042876902e-05, + "log_odds_chosen": 5.2442169189453125, + "log_odds_ratio": -0.09644800424575806, + "logits/chosen": -0.5605584383010864, + "logits/rejected": -0.5699232220649719, + "logps/chosen": -0.0434856116771698, + "logps/rejected": -0.7973841428756714, + "loss": 3.877, + "nll_loss": 0.9596099853515625, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00434856116771698, + "rewards/margins": 0.07538985460996628, + "rewards/rejected": -0.07973841577768326, + "step": 685 + }, + { + "epoch": 0.47441217150760717, + "grad_norm": 6.932708263397217, + "learning_rate": 2.372060857538036e-05, + "log_odds_chosen": 3.1771626472473145, + "log_odds_ratio": -0.4026772379875183, + "logits/chosen": -0.4725809693336487, + "logits/rejected": -0.4851277768611908, + "logps/chosen": -0.10029126703739166, + "logps/rejected": -0.3951825499534607, + "loss": 4.7662, + "nll_loss": 1.151286005973816, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010029126890003681, + "rewards/margins": 0.029489127919077873, + "rewards/rejected": -0.03951825574040413, + "step": 686 + }, + { + "epoch": 0.475103734439834, + "grad_norm": 3.666964054107666, + "learning_rate": 2.37551867219917e-05, + "log_odds_chosen": 3.696667432785034, + "log_odds_ratio": -0.41769081354141235, + "logits/chosen": -0.430128276348114, + "logits/rejected": -0.4578275680541992, + "logps/chosen": -0.13075147569179535, + "logps/rejected": -0.5719894766807556, + "loss": 3.1046, + "nll_loss": 0.7343854308128357, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.013075148686766624, + "rewards/margins": 0.044123802334070206, + "rewards/rejected": -0.05719895288348198, + "step": 687 + }, + { + "epoch": 0.47579529737206083, + "grad_norm": 4.897703647613525, + "learning_rate": 2.3789764868603043e-05, + "log_odds_chosen": 2.9305665493011475, + "log_odds_ratio": -0.5501704812049866, + "logits/chosen": -0.8313249349594116, + "logits/rejected": -0.8332083225250244, + "logps/chosen": -0.1709379106760025, + "logps/rejected": -0.5386735200881958, + "loss": 4.0558, + "nll_loss": 0.9589261412620544, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01709379069507122, + "rewards/margins": 0.03677356243133545, + "rewards/rejected": -0.05386735126376152, + "step": 688 + }, + { + "epoch": 0.47648686030428766, + "grad_norm": 6.1797261238098145, + "learning_rate": 2.3824343015214385e-05, + "log_odds_chosen": 3.6837799549102783, + "log_odds_ratio": -0.4411526322364807, + "logits/chosen": -0.5774151086807251, + "logits/rejected": -0.5877612829208374, + "logps/chosen": -0.09230178594589233, + "logps/rejected": -0.6897770166397095, + "loss": 4.0497, + "nll_loss": 0.9682997465133667, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009230178780853748, + "rewards/margins": 0.059747517108917236, + "rewards/rejected": -0.06897769123315811, + "step": 689 + }, + { + "epoch": 0.47717842323651455, + "grad_norm": 8.8616361618042, + "learning_rate": 2.385892116182573e-05, + "log_odds_chosen": 1.3486244678497314, + "log_odds_ratio": -0.5516138076782227, + "logits/chosen": -0.569491982460022, + "logits/rejected": -0.5894758105278015, + "logps/chosen": -0.16861675679683685, + "logps/rejected": -0.512142539024353, + "loss": 5.723, + "nll_loss": 1.3755992650985718, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.016861675307154655, + "rewards/margins": 0.03435257449746132, + "rewards/rejected": -0.051214251667261124, + "step": 690 + }, + { + "epoch": 0.4778699861687414, + "grad_norm": 3.650421380996704, + "learning_rate": 2.389349930843707e-05, + "log_odds_chosen": 4.223165988922119, + "log_odds_ratio": -0.2957286834716797, + "logits/chosen": -0.7020717263221741, + "logits/rejected": -0.6890726685523987, + "logps/chosen": -0.08652821183204651, + "logps/rejected": -0.5754883289337158, + "loss": 4.0655, + "nll_loss": 0.9868116974830627, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008652821183204651, + "rewards/margins": 0.04889601469039917, + "rewards/rejected": -0.057548828423023224, + "step": 691 + }, + { + "epoch": 0.4785615491009682, + "grad_norm": 4.098056793212891, + "learning_rate": 2.3928077455048413e-05, + "log_odds_chosen": 5.177599906921387, + "log_odds_ratio": -0.1555010825395584, + "logits/chosen": -0.6356836557388306, + "logits/rejected": -0.6456239819526672, + "logps/chosen": -0.053202398121356964, + "logps/rejected": -0.9623237252235413, + "loss": 5.5101, + "nll_loss": 1.3619807958602905, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005320240277796984, + "rewards/margins": 0.09091213345527649, + "rewards/rejected": -0.09623237699270248, + "step": 692 + }, + { + "epoch": 0.47925311203319504, + "grad_norm": 4.213980674743652, + "learning_rate": 2.3962655601659754e-05, + "log_odds_chosen": 4.00429105758667, + "log_odds_ratio": -0.21531108021736145, + "logits/chosen": -0.5026724338531494, + "logits/rejected": -0.542472779750824, + "logps/chosen": -0.10158185660839081, + "logps/rejected": -0.9162179231643677, + "loss": 3.8501, + "nll_loss": 0.9409924745559692, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010158185847103596, + "rewards/margins": 0.08146360516548157, + "rewards/rejected": -0.09162179380655289, + "step": 693 + }, + { + "epoch": 0.47994467496542187, + "grad_norm": 5.7204909324646, + "learning_rate": 2.3997233748271096e-05, + "log_odds_chosen": 3.674548625946045, + "log_odds_ratio": -0.46117550134658813, + "logits/chosen": -0.7658097147941589, + "logits/rejected": -0.7818572521209717, + "logps/chosen": -0.1041916161775589, + "logps/rejected": -0.4919533431529999, + "loss": 2.8983, + "nll_loss": 0.6784451007843018, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01041916199028492, + "rewards/margins": 0.03877617418766022, + "rewards/rejected": -0.04919533431529999, + "step": 694 + }, + { + "epoch": 0.4806362378976487, + "grad_norm": 4.009372234344482, + "learning_rate": 2.4031811894882437e-05, + "log_odds_chosen": 2.4284112453460693, + "log_odds_ratio": -0.33100491762161255, + "logits/chosen": -0.6104992032051086, + "logits/rejected": -0.601692795753479, + "logps/chosen": -0.10431472957134247, + "logps/rejected": -0.3768067955970764, + "loss": 4.076, + "nll_loss": 0.9859074950218201, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010431474074721336, + "rewards/margins": 0.027249209582805634, + "rewards/rejected": -0.03768068179488182, + "step": 695 + }, + { + "epoch": 0.48132780082987553, + "grad_norm": 3.8886003494262695, + "learning_rate": 2.406639004149378e-05, + "log_odds_chosen": 3.0958616733551025, + "log_odds_ratio": -0.39199692010879517, + "logits/chosen": -0.9665194749832153, + "logits/rejected": -0.9609275460243225, + "logps/chosen": -0.0949786901473999, + "logps/rejected": -0.7616181969642639, + "loss": 3.6704, + "nll_loss": 0.8784018754959106, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009497868828475475, + "rewards/margins": 0.0666639506816864, + "rewards/rejected": -0.07616182416677475, + "step": 696 + }, + { + "epoch": 0.48201936376210236, + "grad_norm": 4.627381801605225, + "learning_rate": 2.410096818810512e-05, + "log_odds_chosen": 4.46881103515625, + "log_odds_ratio": -0.2650693953037262, + "logits/chosen": -0.435590922832489, + "logits/rejected": -0.48427075147628784, + "logps/chosen": -0.04191301390528679, + "logps/rejected": -1.0247564315795898, + "loss": 4.7758, + "nll_loss": 1.1674453020095825, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004191301297396421, + "rewards/margins": 0.09828434139490128, + "rewards/rejected": -0.10247564315795898, + "step": 697 + }, + { + "epoch": 0.4827109266943292, + "grad_norm": 4.1338982582092285, + "learning_rate": 2.4135546334716462e-05, + "log_odds_chosen": 3.408036947250366, + "log_odds_ratio": -0.18219085037708282, + "logits/chosen": -0.7029824256896973, + "logits/rejected": -0.7051295638084412, + "logps/chosen": -0.06247822940349579, + "logps/rejected": -0.5670195817947388, + "loss": 4.3019, + "nll_loss": 1.0572634935379028, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006247823126614094, + "rewards/margins": 0.050454139709472656, + "rewards/rejected": -0.05670195817947388, + "step": 698 + }, + { + "epoch": 0.483402489626556, + "grad_norm": 3.902381420135498, + "learning_rate": 2.4170124481327804e-05, + "log_odds_chosen": 3.730391502380371, + "log_odds_ratio": -0.25784698128700256, + "logits/chosen": -0.6142232418060303, + "logits/rejected": -0.6449130773544312, + "logps/chosen": -0.054460309445858, + "logps/rejected": -0.5013003945350647, + "loss": 3.2071, + "nll_loss": 0.7759826183319092, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005446030758321285, + "rewards/margins": 0.04468400776386261, + "rewards/rejected": -0.05013003945350647, + "step": 699 + }, + { + "epoch": 0.48409405255878285, + "grad_norm": 3.9679691791534424, + "learning_rate": 2.4204702627939145e-05, + "log_odds_chosen": 2.903071880340576, + "log_odds_ratio": -0.3373691737651825, + "logits/chosen": -0.628058671951294, + "logits/rejected": -0.6574671268463135, + "logps/chosen": -0.05507644638419151, + "logps/rejected": -0.4640412926673889, + "loss": 3.3383, + "nll_loss": 0.8008279800415039, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.0055076442658901215, + "rewards/margins": 0.04089648649096489, + "rewards/rejected": -0.04640413075685501, + "step": 700 + }, + { + "epoch": 0.4847856154910097, + "grad_norm": 4.491204738616943, + "learning_rate": 2.4239280774550487e-05, + "log_odds_chosen": 3.4920854568481445, + "log_odds_ratio": -0.4841329753398895, + "logits/chosen": -0.8375265002250671, + "logits/rejected": -0.8460544347763062, + "logps/chosen": -0.20640972256660461, + "logps/rejected": -0.8564853668212891, + "loss": 4.7305, + "nll_loss": 1.13421630859375, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.02064097486436367, + "rewards/margins": 0.06500756740570068, + "rewards/rejected": -0.0856485366821289, + "step": 701 + }, + { + "epoch": 0.4854771784232365, + "grad_norm": 3.5440473556518555, + "learning_rate": 2.427385892116183e-05, + "log_odds_chosen": 5.956630706787109, + "log_odds_ratio": -0.14299477636814117, + "logits/chosen": -0.589779257774353, + "logits/rejected": -0.6273432970046997, + "logps/chosen": -0.05705910921096802, + "logps/rejected": -1.2479016780853271, + "loss": 3.6198, + "nll_loss": 0.8906500339508057, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005705910734832287, + "rewards/margins": 0.11908426880836487, + "rewards/rejected": -0.12479016929864883, + "step": 702 + }, + { + "epoch": 0.48616874135546334, + "grad_norm": 5.295886516571045, + "learning_rate": 2.4308437067773167e-05, + "log_odds_chosen": 4.244320869445801, + "log_odds_ratio": -0.27056583762168884, + "logits/chosen": -0.7680187225341797, + "logits/rejected": -0.7666739821434021, + "logps/chosen": -0.0726771354675293, + "logps/rejected": -0.9253808856010437, + "loss": 5.6222, + "nll_loss": 1.3784937858581543, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007267713081091642, + "rewards/margins": 0.08527037501335144, + "rewards/rejected": -0.09253808110952377, + "step": 703 + }, + { + "epoch": 0.48686030428769017, + "grad_norm": 5.875594139099121, + "learning_rate": 2.4343015214384508e-05, + "log_odds_chosen": 3.2456061840057373, + "log_odds_ratio": -0.8249081373214722, + "logits/chosen": -0.8685834407806396, + "logits/rejected": -0.7868944406509399, + "logps/chosen": -0.166039377450943, + "logps/rejected": -0.7548103928565979, + "loss": 4.1563, + "nll_loss": 0.9565944075584412, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01660393737256527, + "rewards/margins": 0.05887710675597191, + "rewards/rejected": -0.07548104226589203, + "step": 704 + }, + { + "epoch": 0.487551867219917, + "grad_norm": 4.143042087554932, + "learning_rate": 2.437759336099585e-05, + "log_odds_chosen": 1.445640206336975, + "log_odds_ratio": -0.44271689653396606, + "logits/chosen": -0.541271984577179, + "logits/rejected": -0.5584791302680969, + "logps/chosen": -0.19485358893871307, + "logps/rejected": -0.5292760133743286, + "loss": 3.6438, + "nll_loss": 0.8666675686836243, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.019485358148813248, + "rewards/margins": 0.03344224393367767, + "rewards/rejected": -0.05292759835720062, + "step": 705 + }, + { + "epoch": 0.48824343015214383, + "grad_norm": 7.924228668212891, + "learning_rate": 2.441217150760719e-05, + "log_odds_chosen": 2.327423334121704, + "log_odds_ratio": -0.5170378088951111, + "logits/chosen": -0.6705014705657959, + "logits/rejected": -0.6787251830101013, + "logps/chosen": -0.26046106219291687, + "logps/rejected": -0.5459519028663635, + "loss": 3.8784, + "nll_loss": 0.9178914427757263, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.026046108454465866, + "rewards/margins": 0.028549088165163994, + "rewards/rejected": -0.05459519475698471, + "step": 706 + }, + { + "epoch": 0.48893499308437066, + "grad_norm": 3.639187812805176, + "learning_rate": 2.4446749654218533e-05, + "log_odds_chosen": 7.317781448364258, + "log_odds_ratio": -0.0036757837515324354, + "logits/chosen": -0.6027560234069824, + "logits/rejected": -0.6349475383758545, + "logps/chosen": -0.008355571888387203, + "logps/rejected": -1.3751740455627441, + "loss": 3.8351, + "nll_loss": 0.9584081172943115, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000835557235404849, + "rewards/margins": 0.13668186962604523, + "rewards/rejected": -0.13751742243766785, + "step": 707 + }, + { + "epoch": 0.4896265560165975, + "grad_norm": 4.836334228515625, + "learning_rate": 2.4481327800829874e-05, + "log_odds_chosen": 2.9217796325683594, + "log_odds_ratio": -0.2655583620071411, + "logits/chosen": -0.7226359248161316, + "logits/rejected": -0.7514448165893555, + "logps/chosen": -0.21701642870903015, + "logps/rejected": -0.7266488075256348, + "loss": 4.8098, + "nll_loss": 1.1758911609649658, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.021701643243432045, + "rewards/margins": 0.05096323788166046, + "rewards/rejected": -0.07266488671302795, + "step": 708 + }, + { + "epoch": 0.4903181189488243, + "grad_norm": 9.13145637512207, + "learning_rate": 2.4515905947441216e-05, + "log_odds_chosen": 1.2536616325378418, + "log_odds_ratio": -0.7132289409637451, + "logits/chosen": -0.544143795967102, + "logits/rejected": -0.5465924739837646, + "logps/chosen": -0.2198922038078308, + "logps/rejected": -0.5221154689788818, + "loss": 5.0716, + "nll_loss": 1.196586012840271, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.02198921889066696, + "rewards/margins": 0.030222328379750252, + "rewards/rejected": -0.052211545407772064, + "step": 709 + }, + { + "epoch": 0.49100968188105115, + "grad_norm": 5.586406707763672, + "learning_rate": 2.4550484094052557e-05, + "log_odds_chosen": 3.5675199031829834, + "log_odds_ratio": -0.3161616921424866, + "logits/chosen": -0.4636297821998596, + "logits/rejected": -0.5131024122238159, + "logps/chosen": -0.07581526041030884, + "logps/rejected": -0.7153963446617126, + "loss": 4.9111, + "nll_loss": 1.1961618661880493, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007581526413559914, + "rewards/margins": 0.06395810842514038, + "rewards/rejected": -0.07153964042663574, + "step": 710 + }, + { + "epoch": 0.491701244813278, + "grad_norm": 3.736074924468994, + "learning_rate": 2.45850622406639e-05, + "log_odds_chosen": 3.691223382949829, + "log_odds_ratio": -0.321685254573822, + "logits/chosen": -0.819664716720581, + "logits/rejected": -0.7938543558120728, + "logps/chosen": -0.08634298294782639, + "logps/rejected": -0.7033805847167969, + "loss": 4.2003, + "nll_loss": 1.0179029703140259, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008634298108518124, + "rewards/margins": 0.06170375645160675, + "rewards/rejected": -0.07033805549144745, + "step": 711 + }, + { + "epoch": 0.49239280774550487, + "grad_norm": 7.242453575134277, + "learning_rate": 2.4619640387275244e-05, + "log_odds_chosen": 2.9408159255981445, + "log_odds_ratio": -0.3289014399051666, + "logits/chosen": -0.7875644564628601, + "logits/rejected": -0.7483938336372375, + "logps/chosen": -0.10421629995107651, + "logps/rejected": -0.5258691310882568, + "loss": 3.8019, + "nll_loss": 0.9175827503204346, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01042162999510765, + "rewards/margins": 0.04216528683900833, + "rewards/rejected": -0.052586913108825684, + "step": 712 + }, + { + "epoch": 0.4930843706777317, + "grad_norm": 4.52717399597168, + "learning_rate": 2.4654218533886586e-05, + "log_odds_chosen": 2.288606882095337, + "log_odds_ratio": -0.5677134394645691, + "logits/chosen": -0.9292905330657959, + "logits/rejected": -0.9273300170898438, + "logps/chosen": -0.22288769483566284, + "logps/rejected": -0.6668552160263062, + "loss": 6.0572, + "nll_loss": 1.4575316905975342, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.022288773208856583, + "rewards/margins": 0.04439675435423851, + "rewards/rejected": -0.06668552756309509, + "step": 713 + }, + { + "epoch": 0.49377593360995853, + "grad_norm": 4.925570011138916, + "learning_rate": 2.4688796680497927e-05, + "log_odds_chosen": 2.577275037765503, + "log_odds_ratio": -0.3427557349205017, + "logits/chosen": -0.7094758152961731, + "logits/rejected": -0.7411022782325745, + "logps/chosen": -0.14292514324188232, + "logps/rejected": -0.5327209830284119, + "loss": 4.5115, + "nll_loss": 1.0935925245285034, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.014292514882981777, + "rewards/margins": 0.038979582488536835, + "rewards/rejected": -0.053272098302841187, + "step": 714 + }, + { + "epoch": 0.49446749654218536, + "grad_norm": 4.740431308746338, + "learning_rate": 2.472337482710927e-05, + "log_odds_chosen": 4.266748428344727, + "log_odds_ratio": -0.1784973442554474, + "logits/chosen": -0.4813273549079895, + "logits/rejected": -0.5323498845100403, + "logps/chosen": -0.1453215330839157, + "logps/rejected": -0.9189692139625549, + "loss": 4.5187, + "nll_loss": 1.1118159294128418, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.014532153494656086, + "rewards/margins": 0.07736477255821228, + "rewards/rejected": -0.0918969213962555, + "step": 715 + }, + { + "epoch": 0.4951590594744122, + "grad_norm": 3.258787155151367, + "learning_rate": 2.475795297372061e-05, + "log_odds_chosen": 4.292454242706299, + "log_odds_ratio": -0.16503621637821198, + "logits/chosen": -0.5896936058998108, + "logits/rejected": -0.6260637640953064, + "logps/chosen": -0.05939153581857681, + "logps/rejected": -0.5779461860656738, + "loss": 3.8843, + "nll_loss": 0.9545656442642212, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005939153954386711, + "rewards/margins": 0.051855459809303284, + "rewards/rejected": -0.057794615626335144, + "step": 716 + }, + { + "epoch": 0.495850622406639, + "grad_norm": 4.017149448394775, + "learning_rate": 2.4792531120331952e-05, + "log_odds_chosen": 1.6627793312072754, + "log_odds_ratio": -0.4310210049152374, + "logits/chosen": -0.9920613765716553, + "logits/rejected": -1.0068923234939575, + "logps/chosen": -0.17367520928382874, + "logps/rejected": -0.3792310655117035, + "loss": 4.0788, + "nll_loss": 0.9766014814376831, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.017367523163557053, + "rewards/margins": 0.020555583760142326, + "rewards/rejected": -0.03792310506105423, + "step": 717 + }, + { + "epoch": 0.49654218533886585, + "grad_norm": 3.9915847778320312, + "learning_rate": 2.4827109266943293e-05, + "log_odds_chosen": 1.5953905582427979, + "log_odds_ratio": -0.3041524887084961, + "logits/chosen": -0.8089584112167358, + "logits/rejected": -0.8049341440200806, + "logps/chosen": -0.1256750226020813, + "logps/rejected": -0.4668002724647522, + "loss": 4.7225, + "nll_loss": 1.1501986980438232, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01256750337779522, + "rewards/margins": 0.03411252424120903, + "rewards/rejected": -0.0466800294816494, + "step": 718 + }, + { + "epoch": 0.4972337482710927, + "grad_norm": 4.275820255279541, + "learning_rate": 2.4861687413554635e-05, + "log_odds_chosen": 3.3468992710113525, + "log_odds_ratio": -0.33127206563949585, + "logits/chosen": -0.7646849155426025, + "logits/rejected": -0.8305087685585022, + "logps/chosen": -0.19278240203857422, + "logps/rejected": -0.6699162721633911, + "loss": 4.3256, + "nll_loss": 1.0482611656188965, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01927824132144451, + "rewards/margins": 0.04771338775753975, + "rewards/rejected": -0.06699162721633911, + "step": 719 + }, + { + "epoch": 0.4979253112033195, + "grad_norm": 7.294179916381836, + "learning_rate": 2.4896265560165977e-05, + "log_odds_chosen": 1.5366660356521606, + "log_odds_ratio": -0.4828697443008423, + "logits/chosen": -0.7735751867294312, + "logits/rejected": -0.7869131565093994, + "logps/chosen": -0.10392677038908005, + "logps/rejected": -0.3954734206199646, + "loss": 4.8667, + "nll_loss": 1.1683766841888428, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01039267797023058, + "rewards/margins": 0.029154665768146515, + "rewards/rejected": -0.03954734280705452, + "step": 720 + }, + { + "epoch": 0.49861687413554634, + "grad_norm": 4.376179218292236, + "learning_rate": 2.4930843706777318e-05, + "log_odds_chosen": 2.7048628330230713, + "log_odds_ratio": -0.4194316565990448, + "logits/chosen": -0.9780904650688171, + "logits/rejected": -0.9741083383560181, + "logps/chosen": -0.09044212102890015, + "logps/rejected": -0.6422989368438721, + "loss": 4.7012, + "nll_loss": 1.1333608627319336, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009044213220477104, + "rewards/margins": 0.05518568307161331, + "rewards/rejected": -0.06422989815473557, + "step": 721 + }, + { + "epoch": 0.49930843706777317, + "grad_norm": 4.935683727264404, + "learning_rate": 2.496542185338866e-05, + "log_odds_chosen": 2.501511812210083, + "log_odds_ratio": -0.2555024325847626, + "logits/chosen": -0.6554673314094543, + "logits/rejected": -0.7062564492225647, + "logps/chosen": -0.10878726094961166, + "logps/rejected": -0.4665549397468567, + "loss": 4.8412, + "nll_loss": 1.1847527027130127, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010878726840019226, + "rewards/margins": 0.035776764154434204, + "rewards/rejected": -0.04665549099445343, + "step": 722 + }, + { + "epoch": 0.5, + "grad_norm": 4.417289733886719, + "learning_rate": 2.5e-05, + "log_odds_chosen": 5.634490489959717, + "log_odds_ratio": -0.17039619386196136, + "logits/chosen": -1.154792070388794, + "logits/rejected": -1.2090914249420166, + "logps/chosen": -0.06233625113964081, + "logps/rejected": -0.9839351177215576, + "loss": 5.0622, + "nll_loss": 1.248513102531433, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006233625113964081, + "rewards/margins": 0.09215987473726273, + "rewards/rejected": -0.0983935073018074, + "step": 723 + }, + { + "epoch": 0.5006915629322268, + "grad_norm": 7.064770698547363, + "learning_rate": 2.5034578146611343e-05, + "log_odds_chosen": 3.256854772567749, + "log_odds_ratio": -1.0175788402557373, + "logits/chosen": -0.9441323280334473, + "logits/rejected": -0.9441784620285034, + "logps/chosen": -0.1523509919643402, + "logps/rejected": -0.7931645512580872, + "loss": 4.4789, + "nll_loss": 1.0179661512374878, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.015235099010169506, + "rewards/margins": 0.0640813559293747, + "rewards/rejected": -0.07931645214557648, + "step": 724 + }, + { + "epoch": 0.5013831258644537, + "grad_norm": 3.7516984939575195, + "learning_rate": 2.5069156293222684e-05, + "log_odds_chosen": 5.143672943115234, + "log_odds_ratio": -0.17888480424880981, + "logits/chosen": -0.6211791038513184, + "logits/rejected": -0.6152884364128113, + "logps/chosen": -0.03391317278146744, + "logps/rejected": -0.6649593710899353, + "loss": 4.3696, + "nll_loss": 1.0745129585266113, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003391317091882229, + "rewards/margins": 0.06310462206602097, + "rewards/rejected": -0.06649593263864517, + "step": 725 + }, + { + "epoch": 0.5020746887966805, + "grad_norm": 4.188735485076904, + "learning_rate": 2.5103734439834026e-05, + "log_odds_chosen": 5.950955390930176, + "log_odds_ratio": -0.1566586196422577, + "logits/chosen": -0.553804874420166, + "logits/rejected": -0.6219318509101868, + "logps/chosen": -0.058077793568372726, + "logps/rejected": -1.1520419120788574, + "loss": 3.492, + "nll_loss": 0.8573253154754639, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005807779263705015, + "rewards/margins": 0.10939642041921616, + "rewards/rejected": -0.11520420014858246, + "step": 726 + }, + { + "epoch": 0.5027662517289073, + "grad_norm": 5.206643104553223, + "learning_rate": 2.5138312586445367e-05, + "log_odds_chosen": 2.201714038848877, + "log_odds_ratio": -0.407717227935791, + "logits/chosen": -0.8526656627655029, + "logits/rejected": -0.8820828199386597, + "logps/chosen": -0.14064417779445648, + "logps/rejected": -0.46919354796409607, + "loss": 4.2531, + "nll_loss": 1.02250337600708, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014064418151974678, + "rewards/margins": 0.03285493701696396, + "rewards/rejected": -0.04691935330629349, + "step": 727 + }, + { + "epoch": 0.5034578146611342, + "grad_norm": 4.013926982879639, + "learning_rate": 2.517289073305671e-05, + "log_odds_chosen": 5.200564384460449, + "log_odds_ratio": -0.14862266182899475, + "logits/chosen": -0.7379950284957886, + "logits/rejected": -0.7829983234405518, + "logps/chosen": -0.06311299651861191, + "logps/rejected": -1.130431056022644, + "loss": 3.7501, + "nll_loss": 0.9226634502410889, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006311299279332161, + "rewards/margins": 0.10673180222511292, + "rewards/rejected": -0.11304309964179993, + "step": 728 + }, + { + "epoch": 0.504149377593361, + "grad_norm": 6.703171253204346, + "learning_rate": 2.520746887966805e-05, + "log_odds_chosen": 3.670536994934082, + "log_odds_ratio": -0.6451328992843628, + "logits/chosen": -0.9287649393081665, + "logits/rejected": -0.9407943487167358, + "logps/chosen": -0.14862267673015594, + "logps/rejected": -0.744902491569519, + "loss": 2.8785, + "nll_loss": 0.6551051139831543, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014862269163131714, + "rewards/margins": 0.05962797999382019, + "rewards/rejected": -0.0744902491569519, + "step": 729 + }, + { + "epoch": 0.5048409405255878, + "grad_norm": 5.829585075378418, + "learning_rate": 2.5242047026279392e-05, + "log_odds_chosen": 0.3746829032897949, + "log_odds_ratio": -0.7685449719429016, + "logits/chosen": -0.9504293203353882, + "logits/rejected": -0.9758710861206055, + "logps/chosen": -0.26119324564933777, + "logps/rejected": -0.31625595688819885, + "loss": 5.5506, + "nll_loss": 1.3107986450195312, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.026119325309991837, + "rewards/margins": 0.005506269633769989, + "rewards/rejected": -0.031625594943761826, + "step": 730 + }, + { + "epoch": 0.5055325034578146, + "grad_norm": 4.024864673614502, + "learning_rate": 2.5276625172890734e-05, + "log_odds_chosen": 1.6292681694030762, + "log_odds_ratio": -0.34728798270225525, + "logits/chosen": -0.9132955074310303, + "logits/rejected": -0.9273039102554321, + "logps/chosen": -0.1225036233663559, + "logps/rejected": -0.3228994607925415, + "loss": 5.56, + "nll_loss": 1.3552802801132202, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.012250362895429134, + "rewards/margins": 0.02003958262503147, + "rewards/rejected": -0.03228994458913803, + "step": 731 + }, + { + "epoch": 0.5062240663900415, + "grad_norm": 4.193017482757568, + "learning_rate": 2.5311203319502075e-05, + "log_odds_chosen": 3.345766544342041, + "log_odds_ratio": -0.4248673617839813, + "logits/chosen": -0.6792013645172119, + "logits/rejected": -0.7000718116760254, + "logps/chosen": -0.10061212629079819, + "logps/rejected": -0.5930604338645935, + "loss": 3.9277, + "nll_loss": 0.9394264817237854, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010061212815344334, + "rewards/margins": 0.04924483224749565, + "rewards/rejected": -0.05930604785680771, + "step": 732 + }, + { + "epoch": 0.5069156293222683, + "grad_norm": 4.2471513748168945, + "learning_rate": 2.5345781466113417e-05, + "log_odds_chosen": 2.095512628555298, + "log_odds_ratio": -0.363492876291275, + "logits/chosen": -1.0475705862045288, + "logits/rejected": -1.0593514442443848, + "logps/chosen": -0.1103830561041832, + "logps/rejected": -0.4578365683555603, + "loss": 4.8793, + "nll_loss": 1.1834713220596313, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01103830523788929, + "rewards/margins": 0.03474535420536995, + "rewards/rejected": -0.04578366130590439, + "step": 733 + }, + { + "epoch": 0.5076071922544951, + "grad_norm": 4.612521171569824, + "learning_rate": 2.538035961272476e-05, + "log_odds_chosen": 3.1200380325317383, + "log_odds_ratio": -0.23247987031936646, + "logits/chosen": -0.8296999335289001, + "logits/rejected": -0.8448148965835571, + "logps/chosen": -0.06477497518062592, + "logps/rejected": -0.6362839937210083, + "loss": 4.8238, + "nll_loss": 1.1827137470245361, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0064774975180625916, + "rewards/margins": 0.05715090408921242, + "rewards/rejected": -0.06362840533256531, + "step": 734 + }, + { + "epoch": 0.508298755186722, + "grad_norm": 6.666332244873047, + "learning_rate": 2.54149377593361e-05, + "log_odds_chosen": 4.635190963745117, + "log_odds_ratio": -0.3607940673828125, + "logits/chosen": -0.6753288507461548, + "logits/rejected": -0.7206825017929077, + "logps/chosen": -0.06729499995708466, + "logps/rejected": -0.7631007432937622, + "loss": 3.8885, + "nll_loss": 0.9360435009002686, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006729499902576208, + "rewards/margins": 0.0695805773139, + "rewards/rejected": -0.07631008327007294, + "step": 735 + }, + { + "epoch": 0.5089903181189488, + "grad_norm": 2.7993688583374023, + "learning_rate": 2.544951590594744e-05, + "log_odds_chosen": 4.7974700927734375, + "log_odds_ratio": -0.31564173102378845, + "logits/chosen": -0.3785450756549835, + "logits/rejected": -0.4010365903377533, + "logps/chosen": -0.09814214706420898, + "logps/rejected": -0.5331931114196777, + "loss": 3.3022, + "nll_loss": 0.7939915657043457, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009814215824007988, + "rewards/margins": 0.043505098670721054, + "rewards/rejected": -0.05331931263208389, + "step": 736 + }, + { + "epoch": 0.5096818810511756, + "grad_norm": 4.751145362854004, + "learning_rate": 2.5484094052558783e-05, + "log_odds_chosen": 1.547588586807251, + "log_odds_ratio": -0.43941688537597656, + "logits/chosen": -0.5329767465591431, + "logits/rejected": -0.540837824344635, + "logps/chosen": -0.12904410064220428, + "logps/rejected": -0.3887450397014618, + "loss": 4.7745, + "nll_loss": 1.14969003200531, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012904411181807518, + "rewards/margins": 0.02597009390592575, + "rewards/rejected": -0.03887450322508812, + "step": 737 + }, + { + "epoch": 0.5103734439834025, + "grad_norm": 3.7940049171447754, + "learning_rate": 2.5518672199170125e-05, + "log_odds_chosen": 2.1066598892211914, + "log_odds_ratio": -0.34529027342796326, + "logits/chosen": -0.7043582201004028, + "logits/rejected": -0.7103002667427063, + "logps/chosen": -0.12666653096675873, + "logps/rejected": -0.4064810574054718, + "loss": 3.412, + "nll_loss": 0.8184600472450256, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012666651979088783, + "rewards/margins": 0.027981456369161606, + "rewards/rejected": -0.04064810648560524, + "step": 738 + }, + { + "epoch": 0.5110650069156293, + "grad_norm": 2.5742151737213135, + "learning_rate": 2.5553250345781466e-05, + "log_odds_chosen": 5.782053470611572, + "log_odds_ratio": -0.07086659222841263, + "logits/chosen": -0.6252003908157349, + "logits/rejected": -0.6271520853042603, + "logps/chosen": -0.016071034595370293, + "logps/rejected": -0.5072439908981323, + "loss": 2.7822, + "nll_loss": 0.6884604692459106, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016071033896878362, + "rewards/margins": 0.04911729693412781, + "rewards/rejected": -0.05072439834475517, + "step": 739 + }, + { + "epoch": 0.5117565698478561, + "grad_norm": 3.867454767227173, + "learning_rate": 2.5587828492392808e-05, + "log_odds_chosen": 1.2175309658050537, + "log_odds_ratio": -0.43134674429893494, + "logits/chosen": -0.7617948651313782, + "logits/rejected": -0.7681587934494019, + "logps/chosen": -0.15212436020374298, + "logps/rejected": -0.3493318557739258, + "loss": 4.1321, + "nll_loss": 0.9898828864097595, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.015212435275316238, + "rewards/margins": 0.01972074992954731, + "rewards/rejected": -0.0349331870675087, + "step": 740 + }, + { + "epoch": 0.5124481327800829, + "grad_norm": 4.407393932342529, + "learning_rate": 2.562240663900415e-05, + "log_odds_chosen": 3.897775888442993, + "log_odds_ratio": -0.16987872123718262, + "logits/chosen": -0.44695958495140076, + "logits/rejected": -0.5096914172172546, + "logps/chosen": -0.10248123109340668, + "logps/rejected": -0.7007458806037903, + "loss": 4.0115, + "nll_loss": 0.9858871102333069, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010248124599456787, + "rewards/margins": 0.05982645973563194, + "rewards/rejected": -0.07007458806037903, + "step": 741 + }, + { + "epoch": 0.5131396957123098, + "grad_norm": 6.455469131469727, + "learning_rate": 2.565698478561549e-05, + "log_odds_chosen": 0.6308086514472961, + "log_odds_ratio": -0.6032850742340088, + "logits/chosen": -0.594819962978363, + "logits/rejected": -0.5979675650596619, + "logps/chosen": -0.19797390699386597, + "logps/rejected": -0.40863046050071716, + "loss": 6.5324, + "nll_loss": 1.572765827178955, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.019797392189502716, + "rewards/margins": 0.02106565237045288, + "rewards/rejected": -0.0408630445599556, + "step": 742 + }, + { + "epoch": 0.5138312586445366, + "grad_norm": 6.121413707733154, + "learning_rate": 2.5691562932226832e-05, + "log_odds_chosen": 1.7726167440414429, + "log_odds_ratio": -0.4711247384548187, + "logits/chosen": -0.6016209125518799, + "logits/rejected": -0.6485509872436523, + "logps/chosen": -0.08414126187562943, + "logps/rejected": -0.5053392052650452, + "loss": 4.8448, + "nll_loss": 1.1640899181365967, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008414126932621002, + "rewards/margins": 0.042119793593883514, + "rewards/rejected": -0.05053391680121422, + "step": 743 + }, + { + "epoch": 0.5145228215767634, + "grad_norm": 6.091677188873291, + "learning_rate": 2.5726141078838174e-05, + "log_odds_chosen": 2.197624444961548, + "log_odds_ratio": -0.5597094893455505, + "logits/chosen": -0.678301990032196, + "logits/rejected": -0.7075386643409729, + "logps/chosen": -0.11059334874153137, + "logps/rejected": -0.3694513738155365, + "loss": 4.874, + "nll_loss": 1.1625367403030396, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011059334501624107, + "rewards/margins": 0.02588580548763275, + "rewards/rejected": -0.03694513812661171, + "step": 744 + }, + { + "epoch": 0.5152143845089903, + "grad_norm": 4.404496192932129, + "learning_rate": 2.5760719225449516e-05, + "log_odds_chosen": 1.9973732233047485, + "log_odds_ratio": -0.35110753774642944, + "logits/chosen": -0.5603127479553223, + "logits/rejected": -0.5757501721382141, + "logps/chosen": -0.12605589628219604, + "logps/rejected": -0.4605715274810791, + "loss": 4.0381, + "nll_loss": 0.9744019508361816, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.012605588883161545, + "rewards/margins": 0.033451564610004425, + "rewards/rejected": -0.04605715349316597, + "step": 745 + }, + { + "epoch": 0.5159059474412172, + "grad_norm": 3.317373514175415, + "learning_rate": 2.579529737206086e-05, + "log_odds_chosen": 3.7140681743621826, + "log_odds_ratio": -0.1961238533258438, + "logits/chosen": -0.9345548152923584, + "logits/rejected": -0.9548307061195374, + "logps/chosen": -0.08017171174287796, + "logps/rejected": -0.5122724771499634, + "loss": 4.3311, + "nll_loss": 1.0631747245788574, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008017171174287796, + "rewards/margins": 0.04321007430553436, + "rewards/rejected": -0.05122724547982216, + "step": 746 + }, + { + "epoch": 0.516597510373444, + "grad_norm": 3.674877405166626, + "learning_rate": 2.5829875518672202e-05, + "log_odds_chosen": 2.7474920749664307, + "log_odds_ratio": -0.2940402626991272, + "logits/chosen": -0.5442591309547424, + "logits/rejected": -0.563199520111084, + "logps/chosen": -0.10129649192094803, + "logps/rejected": -0.4285820722579956, + "loss": 4.7906, + "nll_loss": 1.168250322341919, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010129649192094803, + "rewards/margins": 0.03272855654358864, + "rewards/rejected": -0.04285820946097374, + "step": 747 + }, + { + "epoch": 0.5172890733056709, + "grad_norm": 7.059281349182129, + "learning_rate": 2.5864453665283544e-05, + "log_odds_chosen": -0.07221721112728119, + "log_odds_ratio": -0.8372195959091187, + "logits/chosen": -0.49470487236976624, + "logits/rejected": -0.5321720242500305, + "logps/chosen": -0.19111818075180054, + "logps/rejected": -0.20531783998012543, + "loss": 4.2752, + "nll_loss": 0.9850720167160034, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.019111819565296173, + "rewards/margins": 0.0014199642464518547, + "rewards/rejected": -0.020531784743070602, + "step": 748 + }, + { + "epoch": 0.5179806362378977, + "grad_norm": 3.5188753604888916, + "learning_rate": 2.5899031811894885e-05, + "log_odds_chosen": 2.548219680786133, + "log_odds_ratio": -0.32754892110824585, + "logits/chosen": -0.8879270553588867, + "logits/rejected": -0.8918251395225525, + "logps/chosen": -0.14871710538864136, + "logps/rejected": -0.5499089956283569, + "loss": 4.0379, + "nll_loss": 0.9767143726348877, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.014871710911393166, + "rewards/margins": 0.040119193494319916, + "rewards/rejected": -0.05499090254306793, + "step": 749 + }, + { + "epoch": 0.5186721991701245, + "grad_norm": 3.5754287242889404, + "learning_rate": 2.5933609958506227e-05, + "log_odds_chosen": 3.1863934993743896, + "log_odds_ratio": -0.3843536376953125, + "logits/chosen": -0.5169445872306824, + "logits/rejected": -0.49316757917404175, + "logps/chosen": -0.11643863469362259, + "logps/rejected": -0.5823589563369751, + "loss": 3.4881, + "nll_loss": 0.8335927724838257, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011643863283097744, + "rewards/margins": 0.04659203439950943, + "rewards/rejected": -0.05823589116334915, + "step": 750 + }, + { + "epoch": 0.5193637621023514, + "grad_norm": 4.024445056915283, + "learning_rate": 2.596818810511757e-05, + "log_odds_chosen": 1.073103666305542, + "log_odds_ratio": -0.6491037607192993, + "logits/chosen": -1.0050249099731445, + "logits/rejected": -1.0319346189498901, + "logps/chosen": -0.16097313165664673, + "logps/rejected": -0.26376786828041077, + "loss": 4.6072, + "nll_loss": 1.0868946313858032, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.016097312793135643, + "rewards/margins": 0.010279476642608643, + "rewards/rejected": -0.026376787573099136, + "step": 751 + }, + { + "epoch": 0.5200553250345782, + "grad_norm": 5.002845764160156, + "learning_rate": 2.600276625172891e-05, + "log_odds_chosen": 1.8316545486450195, + "log_odds_ratio": -0.5380937457084656, + "logits/chosen": -0.5348612666130066, + "logits/rejected": -0.5365390181541443, + "logps/chosen": -0.19655044376850128, + "logps/rejected": -0.3816303610801697, + "loss": 2.9643, + "nll_loss": 0.6872615814208984, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.019655045121908188, + "rewards/margins": 0.01850799284875393, + "rewards/rejected": -0.03816303238272667, + "step": 752 + }, + { + "epoch": 0.520746887966805, + "grad_norm": 6.330991268157959, + "learning_rate": 2.603734439834025e-05, + "log_odds_chosen": 0.8655611872673035, + "log_odds_ratio": -0.6965842247009277, + "logits/chosen": -0.8606619834899902, + "logits/rejected": -0.8774101734161377, + "logps/chosen": -0.2293684184551239, + "logps/rejected": -0.4654915928840637, + "loss": 4.2237, + "nll_loss": 0.9862702488899231, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.02293684333562851, + "rewards/margins": 0.023612316697835922, + "rewards/rejected": -0.04654916375875473, + "step": 753 + }, + { + "epoch": 0.5214384508990318, + "grad_norm": 4.153950214385986, + "learning_rate": 2.6071922544951593e-05, + "log_odds_chosen": 1.9040453433990479, + "log_odds_ratio": -0.5017755627632141, + "logits/chosen": -0.9090903997421265, + "logits/rejected": -0.9265241622924805, + "logps/chosen": -0.1252647340297699, + "logps/rejected": -0.46394142508506775, + "loss": 3.6527, + "nll_loss": 0.862996518611908, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012526473961770535, + "rewards/margins": 0.033867672085762024, + "rewards/rejected": -0.046394143253564835, + "step": 754 + }, + { + "epoch": 0.5221300138312587, + "grad_norm": 4.278303623199463, + "learning_rate": 2.6106500691562935e-05, + "log_odds_chosen": 1.5212517976760864, + "log_odds_ratio": -0.2745608985424042, + "logits/chosen": -0.5291388034820557, + "logits/rejected": -0.5695719718933105, + "logps/chosen": -0.11442254483699799, + "logps/rejected": -0.4772147238254547, + "loss": 4.0293, + "nll_loss": 0.9798673391342163, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011442254297435284, + "rewards/margins": 0.03627922013401985, + "rewards/rejected": -0.04772147536277771, + "step": 755 + }, + { + "epoch": 0.5228215767634855, + "grad_norm": 3.9194319248199463, + "learning_rate": 2.6141078838174276e-05, + "log_odds_chosen": 1.879309892654419, + "log_odds_ratio": -0.3652842044830322, + "logits/chosen": -0.893629789352417, + "logits/rejected": -0.8920744061470032, + "logps/chosen": -0.11437473446130753, + "logps/rejected": -0.43803346157073975, + "loss": 3.9646, + "nll_loss": 0.9546254873275757, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011437472887337208, + "rewards/margins": 0.03236587345600128, + "rewards/rejected": -0.04380334913730621, + "step": 756 + }, + { + "epoch": 0.5235131396957123, + "grad_norm": 4.98897123336792, + "learning_rate": 2.6175656984785618e-05, + "log_odds_chosen": 1.3624794483184814, + "log_odds_ratio": -0.44211530685424805, + "logits/chosen": -0.565403163433075, + "logits/rejected": -0.5806471705436707, + "logps/chosen": -0.1290087103843689, + "logps/rejected": -0.2725811004638672, + "loss": 5.366, + "nll_loss": 1.2972995042800903, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.012900871224701405, + "rewards/margins": 0.014357241801917553, + "rewards/rejected": -0.027258113026618958, + "step": 757 + }, + { + "epoch": 0.5242047026279392, + "grad_norm": 4.583791732788086, + "learning_rate": 2.621023513139696e-05, + "log_odds_chosen": 2.5393147468566895, + "log_odds_ratio": -0.3373655378818512, + "logits/chosen": -0.4986213147640228, + "logits/rejected": -0.487330824136734, + "logps/chosen": -0.1359880566596985, + "logps/rejected": -0.4418767988681793, + "loss": 4.0072, + "nll_loss": 0.9680536389350891, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.013598806224763393, + "rewards/margins": 0.030588874593377113, + "rewards/rejected": -0.04418767988681793, + "step": 758 + }, + { + "epoch": 0.524896265560166, + "grad_norm": 3.971343517303467, + "learning_rate": 2.62448132780083e-05, + "log_odds_chosen": 1.4316984415054321, + "log_odds_ratio": -0.4938808083534241, + "logits/chosen": -0.5328791737556458, + "logits/rejected": -0.4855450391769409, + "logps/chosen": -0.1369304656982422, + "logps/rejected": -0.30632275342941284, + "loss": 3.7887, + "nll_loss": 0.8977863788604736, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.013693045824766159, + "rewards/margins": 0.016939228400588036, + "rewards/rejected": -0.030632276087999344, + "step": 759 + }, + { + "epoch": 0.5255878284923928, + "grad_norm": 4.400160312652588, + "learning_rate": 2.6279391424619642e-05, + "log_odds_chosen": 0.7450700402259827, + "log_odds_ratio": -0.8866539001464844, + "logits/chosen": -0.5152993202209473, + "logits/rejected": -0.4968855381011963, + "logps/chosen": -0.1909049153327942, + "logps/rejected": -0.22761806845664978, + "loss": 3.5208, + "nll_loss": 0.7915401458740234, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.01909049227833748, + "rewards/margins": 0.0036713131703436375, + "rewards/rejected": -0.022761806845664978, + "step": 760 + }, + { + "epoch": 0.5262793914246197, + "grad_norm": 4.3578338623046875, + "learning_rate": 2.6313969571230984e-05, + "log_odds_chosen": 1.7597806453704834, + "log_odds_ratio": -0.3029617369174957, + "logits/chosen": -0.6150257587432861, + "logits/rejected": -0.6218180656433105, + "logps/chosen": -0.1087150052189827, + "logps/rejected": -0.39284566044807434, + "loss": 4.5935, + "nll_loss": 1.11807119846344, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01087150163948536, + "rewards/margins": 0.028413068503141403, + "rewards/rejected": -0.03928457200527191, + "step": 761 + }, + { + "epoch": 0.5269709543568465, + "grad_norm": 3.5353329181671143, + "learning_rate": 2.6348547717842326e-05, + "log_odds_chosen": 1.9123175144195557, + "log_odds_ratio": -0.3525024354457855, + "logits/chosen": -0.8938580751419067, + "logits/rejected": -0.8758429288864136, + "logps/chosen": -0.13999508321285248, + "logps/rejected": -0.3457835912704468, + "loss": 4.4079, + "nll_loss": 1.0667307376861572, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.013999508693814278, + "rewards/margins": 0.02057885192334652, + "rewards/rejected": -0.0345783606171608, + "step": 762 + }, + { + "epoch": 0.5276625172890733, + "grad_norm": 5.203707695007324, + "learning_rate": 2.6383125864453667e-05, + "log_odds_chosen": 1.0678726434707642, + "log_odds_ratio": -0.6421834230422974, + "logits/chosen": -0.621665358543396, + "logits/rejected": -0.613940417766571, + "logps/chosen": -0.16835099458694458, + "logps/rejected": -0.440276563167572, + "loss": 4.4273, + "nll_loss": 1.042595386505127, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.016835100948810577, + "rewards/margins": 0.027192555367946625, + "rewards/rejected": -0.0440276563167572, + "step": 763 + }, + { + "epoch": 0.5283540802213001, + "grad_norm": 3.169253349304199, + "learning_rate": 2.641770401106501e-05, + "log_odds_chosen": 2.8914332389831543, + "log_odds_ratio": -0.2715517282485962, + "logits/chosen": -0.822980523109436, + "logits/rejected": -0.853297233581543, + "logps/chosen": -0.06129853054881096, + "logps/rejected": -0.37394028902053833, + "loss": 4.1005, + "nll_loss": 0.9979735612869263, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006129853427410126, + "rewards/margins": 0.031264178454875946, + "rewards/rejected": -0.03739403188228607, + "step": 764 + }, + { + "epoch": 0.529045643153527, + "grad_norm": 4.1213531494140625, + "learning_rate": 2.645228215767635e-05, + "log_odds_chosen": 1.256605625152588, + "log_odds_ratio": -0.44500529766082764, + "logits/chosen": -0.7482407093048096, + "logits/rejected": -0.7464094161987305, + "logps/chosen": -0.1757386028766632, + "logps/rejected": -0.2952781319618225, + "loss": 3.8401, + "nll_loss": 0.9155247211456299, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01757385954260826, + "rewards/margins": 0.01195395179092884, + "rewards/rejected": -0.02952781319618225, + "step": 765 + }, + { + "epoch": 0.5297372060857538, + "grad_norm": 4.943138122558594, + "learning_rate": 2.6486860304287692e-05, + "log_odds_chosen": 1.1825425624847412, + "log_odds_ratio": -0.6412897109985352, + "logits/chosen": -0.9547351598739624, + "logits/rejected": -0.940011739730835, + "logps/chosen": -0.16075746715068817, + "logps/rejected": -0.27479812502861023, + "loss": 5.8234, + "nll_loss": 1.391717791557312, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.016075747087597847, + "rewards/margins": 0.011404067277908325, + "rewards/rejected": -0.027479812502861023, + "step": 766 + }, + { + "epoch": 0.5304287690179806, + "grad_norm": 3.294407367706299, + "learning_rate": 2.6521438450899033e-05, + "log_odds_chosen": 2.2841694355010986, + "log_odds_ratio": -0.47490108013153076, + "logits/chosen": -0.9181256294250488, + "logits/rejected": -0.9499702453613281, + "logps/chosen": -0.08856260776519775, + "logps/rejected": -0.3723558783531189, + "loss": 4.6973, + "nll_loss": 1.1268422603607178, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008856261149048805, + "rewards/margins": 0.028379324823617935, + "rewards/rejected": -0.03723558783531189, + "step": 767 + }, + { + "epoch": 0.5311203319502075, + "grad_norm": 3.231011390686035, + "learning_rate": 2.6556016597510375e-05, + "log_odds_chosen": 2.726728916168213, + "log_odds_ratio": -0.4835449457168579, + "logits/chosen": -0.8287713527679443, + "logits/rejected": -0.869062066078186, + "logps/chosen": -0.15231871604919434, + "logps/rejected": -0.44504475593566895, + "loss": 3.9759, + "nll_loss": 0.9456151723861694, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.015231871977448463, + "rewards/margins": 0.02927260287106037, + "rewards/rejected": -0.044504471123218536, + "step": 768 + }, + { + "epoch": 0.5318118948824343, + "grad_norm": 3.9974963665008545, + "learning_rate": 2.6590594744121717e-05, + "log_odds_chosen": 2.2256228923797607, + "log_odds_ratio": -0.3583628535270691, + "logits/chosen": -0.6432703733444214, + "logits/rejected": -0.6699737310409546, + "logps/chosen": -0.131565660238266, + "logps/rejected": -0.5114902853965759, + "loss": 5.9335, + "nll_loss": 1.4475429058074951, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013156566768884659, + "rewards/margins": 0.03799246624112129, + "rewards/rejected": -0.05114902928471565, + "step": 769 + }, + { + "epoch": 0.5325034578146611, + "grad_norm": 2.977198839187622, + "learning_rate": 2.6625172890733058e-05, + "log_odds_chosen": 2.8446381092071533, + "log_odds_ratio": -0.2956915497779846, + "logits/chosen": -0.6301093101501465, + "logits/rejected": -0.6782950162887573, + "logps/chosen": -0.1516442894935608, + "logps/rejected": -0.568672776222229, + "loss": 4.0353, + "nll_loss": 0.9792449474334717, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.015164428390562534, + "rewards/margins": 0.04170284792780876, + "rewards/rejected": -0.05686727166175842, + "step": 770 + }, + { + "epoch": 0.533195020746888, + "grad_norm": 4.515598773956299, + "learning_rate": 2.66597510373444e-05, + "log_odds_chosen": 1.9685964584350586, + "log_odds_ratio": -0.4243180751800537, + "logits/chosen": -0.6195254921913147, + "logits/rejected": -0.6742951273918152, + "logps/chosen": -0.14887145161628723, + "logps/rejected": -0.7238699197769165, + "loss": 4.7484, + "nll_loss": 1.144672155380249, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014887145720422268, + "rewards/margins": 0.057499852031469345, + "rewards/rejected": -0.07238698750734329, + "step": 771 + }, + { + "epoch": 0.5338865836791148, + "grad_norm": 4.412965297698975, + "learning_rate": 2.669432918395574e-05, + "log_odds_chosen": 1.7204562425613403, + "log_odds_ratio": -0.39194899797439575, + "logits/chosen": -0.5892022848129272, + "logits/rejected": -0.6004505157470703, + "logps/chosen": -0.1321462094783783, + "logps/rejected": -0.3937058448791504, + "loss": 4.8, + "nll_loss": 1.1607953310012817, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.013214620761573315, + "rewards/margins": 0.02615596167743206, + "rewards/rejected": -0.0393705815076828, + "step": 772 + }, + { + "epoch": 0.5345781466113416, + "grad_norm": 4.206930637359619, + "learning_rate": 2.6728907330567083e-05, + "log_odds_chosen": 1.9610211849212646, + "log_odds_ratio": -0.22896619141101837, + "logits/chosen": -0.5846484899520874, + "logits/rejected": -0.6280151605606079, + "logps/chosen": -0.09105009585618973, + "logps/rejected": -0.40164899826049805, + "loss": 4.0853, + "nll_loss": 0.9984228610992432, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009105009958148003, + "rewards/margins": 0.03105989098548889, + "rewards/rejected": -0.04016490280628204, + "step": 773 + }, + { + "epoch": 0.5352697095435685, + "grad_norm": 4.103181838989258, + "learning_rate": 2.6763485477178424e-05, + "log_odds_chosen": 1.097496747970581, + "log_odds_ratio": -0.5405707955360413, + "logits/chosen": -0.37573862075805664, + "logits/rejected": -0.43676427006721497, + "logps/chosen": -0.1625049114227295, + "logps/rejected": -0.3144679665565491, + "loss": 4.3009, + "nll_loss": 1.0211718082427979, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.0162504930049181, + "rewards/margins": 0.015196304768323898, + "rewards/rejected": -0.031446799635887146, + "step": 774 + }, + { + "epoch": 0.5359612724757953, + "grad_norm": 4.65155029296875, + "learning_rate": 2.6798063623789766e-05, + "log_odds_chosen": 1.3845301866531372, + "log_odds_ratio": -0.39862924814224243, + "logits/chosen": -0.3924928307533264, + "logits/rejected": -0.40106141567230225, + "logps/chosen": -0.16831707954406738, + "logps/rejected": -0.39287295937538147, + "loss": 4.8806, + "nll_loss": 1.180276870727539, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.016831709071993828, + "rewards/margins": 0.02245558425784111, + "rewards/rejected": -0.039287298917770386, + "step": 775 + }, + { + "epoch": 0.5366528354080221, + "grad_norm": 6.8850932121276855, + "learning_rate": 2.6832641770401107e-05, + "log_odds_chosen": 2.191964864730835, + "log_odds_ratio": -0.687903106212616, + "logits/chosen": -0.3750625252723694, + "logits/rejected": -0.4132746160030365, + "logps/chosen": -0.14285290241241455, + "logps/rejected": -0.40058577060699463, + "loss": 4.5572, + "nll_loss": 1.0705032348632812, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01428529154509306, + "rewards/margins": 0.025773286819458008, + "rewards/rejected": -0.040058575570583344, + "step": 776 + }, + { + "epoch": 0.5373443983402489, + "grad_norm": 2.687833786010742, + "learning_rate": 2.686721991701245e-05, + "log_odds_chosen": 3.121793270111084, + "log_odds_ratio": -0.29946577548980713, + "logits/chosen": -0.5395219922065735, + "logits/rejected": -0.5736823678016663, + "logps/chosen": -0.07659432291984558, + "logps/rejected": -0.3418726921081543, + "loss": 3.6865, + "nll_loss": 0.8916776776313782, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007659432012587786, + "rewards/margins": 0.02652783691883087, + "rewards/rejected": -0.03418726846575737, + "step": 777 + }, + { + "epoch": 0.5380359612724758, + "grad_norm": 4.768846035003662, + "learning_rate": 2.690179806362379e-05, + "log_odds_chosen": 1.7344532012939453, + "log_odds_ratio": -0.6882243156433105, + "logits/chosen": -0.7784990668296814, + "logits/rejected": -0.8007603883743286, + "logps/chosen": -0.14528436958789825, + "logps/rejected": -0.3341407775878906, + "loss": 5.0192, + "nll_loss": 1.1859819889068604, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01452843751758337, + "rewards/margins": 0.018885640427470207, + "rewards/rejected": -0.033414077013731, + "step": 778 + }, + { + "epoch": 0.5387275242047026, + "grad_norm": 3.818828582763672, + "learning_rate": 2.6936376210235132e-05, + "log_odds_chosen": 2.687124252319336, + "log_odds_ratio": -0.5535922646522522, + "logits/chosen": -0.6772273182868958, + "logits/rejected": -0.7121256589889526, + "logps/chosen": -0.14388306438922882, + "logps/rejected": -0.5445559620857239, + "loss": 3.6195, + "nll_loss": 0.8495252132415771, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.014388306066393852, + "rewards/margins": 0.040067292749881744, + "rewards/rejected": -0.054455600678920746, + "step": 779 + }, + { + "epoch": 0.5394190871369294, + "grad_norm": 4.158625602722168, + "learning_rate": 2.6970954356846474e-05, + "log_odds_chosen": 2.361208915710449, + "log_odds_ratio": -0.29354727268218994, + "logits/chosen": -0.3465491235256195, + "logits/rejected": -0.34471794962882996, + "logps/chosen": -0.1022605150938034, + "logps/rejected": -0.38724422454833984, + "loss": 4.1023, + "nll_loss": 0.9962158203125, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010226051323115826, + "rewards/margins": 0.028498370200395584, + "rewards/rejected": -0.038724422454833984, + "step": 780 + }, + { + "epoch": 0.5401106500691563, + "grad_norm": 4.943760871887207, + "learning_rate": 2.7005532503457815e-05, + "log_odds_chosen": 1.5981311798095703, + "log_odds_ratio": -0.4409557580947876, + "logits/chosen": -0.7355484366416931, + "logits/rejected": -0.763656735420227, + "logps/chosen": -0.13593299686908722, + "logps/rejected": -0.4234578311443329, + "loss": 4.972, + "nll_loss": 1.1989084482192993, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.013593301177024841, + "rewards/margins": 0.028752481564879417, + "rewards/rejected": -0.04234578460454941, + "step": 781 + }, + { + "epoch": 0.5408022130013831, + "grad_norm": 6.2170090675354, + "learning_rate": 2.7040110650069157e-05, + "log_odds_chosen": 0.6322706937789917, + "log_odds_ratio": -0.8293993473052979, + "logits/chosen": -0.7701730132102966, + "logits/rejected": -0.7138707637786865, + "logps/chosen": -0.26484400033950806, + "logps/rejected": -0.4796360731124878, + "loss": 5.606, + "nll_loss": 1.3185690641403198, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.026484401896595955, + "rewards/margins": 0.021479208022356033, + "rewards/rejected": -0.04796360805630684, + "step": 782 + }, + { + "epoch": 0.5414937759336099, + "grad_norm": 4.743947505950928, + "learning_rate": 2.70746887966805e-05, + "log_odds_chosen": 0.5518181324005127, + "log_odds_ratio": -0.5969145894050598, + "logits/chosen": -0.9111509919166565, + "logits/rejected": -0.8899242281913757, + "logps/chosen": -0.18800178170204163, + "logps/rejected": -0.3061884641647339, + "loss": 5.4442, + "nll_loss": 1.3013533353805542, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.01880018040537834, + "rewards/margins": 0.011818666011095047, + "rewards/rejected": -0.03061884641647339, + "step": 783 + }, + { + "epoch": 0.5421853388658368, + "grad_norm": 3.736454725265503, + "learning_rate": 2.710926694329184e-05, + "log_odds_chosen": 2.629013776779175, + "log_odds_ratio": -0.38394689559936523, + "logits/chosen": -0.702704906463623, + "logits/rejected": -0.7145882248878479, + "logps/chosen": -0.08854182809591293, + "logps/rejected": -0.3831250071525574, + "loss": 3.3801, + "nll_loss": 0.8066269159317017, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.008854183368384838, + "rewards/margins": 0.029458321630954742, + "rewards/rejected": -0.038312505930662155, + "step": 784 + }, + { + "epoch": 0.5428769017980636, + "grad_norm": 3.409085512161255, + "learning_rate": 2.714384508990318e-05, + "log_odds_chosen": 2.7632088661193848, + "log_odds_ratio": -0.2678602933883667, + "logits/chosen": -0.4413297176361084, + "logits/rejected": -0.48410317301750183, + "logps/chosen": -0.0839589387178421, + "logps/rejected": -0.5882033705711365, + "loss": 3.9799, + "nll_loss": 0.9681931734085083, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008395894430577755, + "rewards/margins": 0.050424449145793915, + "rewards/rejected": -0.058820344507694244, + "step": 785 + }, + { + "epoch": 0.5435684647302904, + "grad_norm": 3.7073123455047607, + "learning_rate": 2.7178423236514523e-05, + "log_odds_chosen": 2.0540857315063477, + "log_odds_ratio": -0.2842232584953308, + "logits/chosen": -0.8638523817062378, + "logits/rejected": -0.8727890253067017, + "logps/chosen": -0.06774594634771347, + "logps/rejected": -0.3289458751678467, + "loss": 3.3233, + "nll_loss": 0.8023905158042908, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00677459454163909, + "rewards/margins": 0.02611999399960041, + "rewards/rejected": -0.03289458900690079, + "step": 786 + }, + { + "epoch": 0.5442600276625172, + "grad_norm": 4.251959323883057, + "learning_rate": 2.7213001383125865e-05, + "log_odds_chosen": 1.566872000694275, + "log_odds_ratio": -0.5363442897796631, + "logits/chosen": -0.6940356492996216, + "logits/rejected": -0.6940560340881348, + "logps/chosen": -0.14424169063568115, + "logps/rejected": -0.5402974486351013, + "loss": 4.3171, + "nll_loss": 1.0256503820419312, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014424169436097145, + "rewards/margins": 0.03960557281970978, + "rewards/rejected": -0.05402974411845207, + "step": 787 + }, + { + "epoch": 0.5449515905947441, + "grad_norm": 5.320130348205566, + "learning_rate": 2.7247579529737206e-05, + "log_odds_chosen": 1.577492356300354, + "log_odds_ratio": -0.40941229462623596, + "logits/chosen": -0.7909371256828308, + "logits/rejected": -0.7870829105377197, + "logps/chosen": -0.15442880988121033, + "logps/rejected": -0.3312009274959564, + "loss": 4.9657, + "nll_loss": 1.2004934549331665, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.015442880801856518, + "rewards/margins": 0.01767721213400364, + "rewards/rejected": -0.03312009200453758, + "step": 788 + }, + { + "epoch": 0.5456431535269709, + "grad_norm": 2.6318728923797607, + "learning_rate": 2.7282157676348548e-05, + "log_odds_chosen": 2.872804880142212, + "log_odds_ratio": -0.3544527590274811, + "logits/chosen": -0.46935009956359863, + "logits/rejected": -0.47872787714004517, + "logps/chosen": -0.0999964103102684, + "logps/rejected": -0.28759926557540894, + "loss": 3.5248, + "nll_loss": 0.8457651138305664, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009999641217291355, + "rewards/margins": 0.018760286271572113, + "rewards/rejected": -0.028759926557540894, + "step": 789 + }, + { + "epoch": 0.5463347164591977, + "grad_norm": 4.349747180938721, + "learning_rate": 2.731673582295989e-05, + "log_odds_chosen": 2.838932991027832, + "log_odds_ratio": -0.14528866112232208, + "logits/chosen": -0.500427782535553, + "logits/rejected": -0.5386743545532227, + "logps/chosen": -0.059282850474119186, + "logps/rejected": -0.46896499395370483, + "loss": 5.0646, + "nll_loss": 1.2516143321990967, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0059282854199409485, + "rewards/margins": 0.040968216955661774, + "rewards/rejected": -0.04689650237560272, + "step": 790 + }, + { + "epoch": 0.5470262793914247, + "grad_norm": 6.711754322052002, + "learning_rate": 2.7351313969571234e-05, + "log_odds_chosen": 0.7262469530105591, + "log_odds_ratio": -0.6351161599159241, + "logits/chosen": -0.8433955907821655, + "logits/rejected": -0.8634489178657532, + "logps/chosen": -0.1538301706314087, + "logps/rejected": -0.20225130021572113, + "loss": 6.0995, + "nll_loss": 1.4613513946533203, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.015383017249405384, + "rewards/margins": 0.004842113703489304, + "rewards/rejected": -0.020225130021572113, + "step": 791 + }, + { + "epoch": 0.5477178423236515, + "grad_norm": 7.227035045623779, + "learning_rate": 2.7385892116182576e-05, + "log_odds_chosen": 1.4565250873565674, + "log_odds_ratio": -0.5795712471008301, + "logits/chosen": -0.5510681867599487, + "logits/rejected": -0.5623601675033569, + "logps/chosen": -0.19853737950325012, + "logps/rejected": -0.4113742411136627, + "loss": 3.8831, + "nll_loss": 0.9128076434135437, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.019853739067912102, + "rewards/margins": 0.02128368616104126, + "rewards/rejected": -0.04113742709159851, + "step": 792 + }, + { + "epoch": 0.5484094052558783, + "grad_norm": 4.128443717956543, + "learning_rate": 2.7420470262793917e-05, + "log_odds_chosen": 4.165518283843994, + "log_odds_ratio": -0.11458099633455276, + "logits/chosen": -0.634047269821167, + "logits/rejected": -0.6352896690368652, + "logps/chosen": -0.06181440129876137, + "logps/rejected": -0.6406897306442261, + "loss": 4.2237, + "nll_loss": 1.0444719791412354, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006181440781801939, + "rewards/margins": 0.05788753554224968, + "rewards/rejected": -0.06406897306442261, + "step": 793 + }, + { + "epoch": 0.5491009681881052, + "grad_norm": 3.842010259628296, + "learning_rate": 2.745504840940526e-05, + "log_odds_chosen": 2.4175264835357666, + "log_odds_ratio": -0.5656532049179077, + "logits/chosen": -0.4226923882961273, + "logits/rejected": -0.3965950012207031, + "logps/chosen": -0.1884640008211136, + "logps/rejected": -0.30576592683792114, + "loss": 4.1704, + "nll_loss": 0.9860259890556335, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.01884640008211136, + "rewards/margins": 0.011730191297829151, + "rewards/rejected": -0.030576592311263084, + "step": 794 + }, + { + "epoch": 0.549792531120332, + "grad_norm": 4.974619388580322, + "learning_rate": 2.74896265560166e-05, + "log_odds_chosen": 1.3438681364059448, + "log_odds_ratio": -0.6321280002593994, + "logits/chosen": -0.566962718963623, + "logits/rejected": -0.5704185366630554, + "logps/chosen": -0.1416710764169693, + "logps/rejected": -0.5122509598731995, + "loss": 4.0576, + "nll_loss": 0.951187789440155, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01416710764169693, + "rewards/margins": 0.037057988345623016, + "rewards/rejected": -0.05122509226202965, + "step": 795 + }, + { + "epoch": 0.5504840940525588, + "grad_norm": 3.7341768741607666, + "learning_rate": 2.7524204702627942e-05, + "log_odds_chosen": 2.616122007369995, + "log_odds_ratio": -0.29485952854156494, + "logits/chosen": -0.5127213001251221, + "logits/rejected": -0.49903106689453125, + "logps/chosen": -0.07982846349477768, + "logps/rejected": -0.44080740213394165, + "loss": 5.2754, + "nll_loss": 1.2893630266189575, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007982847280800343, + "rewards/margins": 0.03609789162874222, + "rewards/rejected": -0.044080741703510284, + "step": 796 + }, + { + "epoch": 0.5511756569847857, + "grad_norm": 4.109652042388916, + "learning_rate": 2.7558782849239284e-05, + "log_odds_chosen": 3.560600757598877, + "log_odds_ratio": -0.29116514325141907, + "logits/chosen": -0.7510650157928467, + "logits/rejected": -0.779525637626648, + "logps/chosen": -0.10667699575424194, + "logps/rejected": -0.9304628968238831, + "loss": 4.4969, + "nll_loss": 1.0951179265975952, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01066769938915968, + "rewards/margins": 0.08237859606742859, + "rewards/rejected": -0.09304629266262054, + "step": 797 + }, + { + "epoch": 0.5518672199170125, + "grad_norm": 4.035399436950684, + "learning_rate": 2.7593360995850625e-05, + "log_odds_chosen": 1.198608160018921, + "log_odds_ratio": -0.3582497835159302, + "logits/chosen": -0.7811927795410156, + "logits/rejected": -0.786125123500824, + "logps/chosen": -0.10331468284130096, + "logps/rejected": -0.3704620599746704, + "loss": 3.7105, + "nll_loss": 0.8917912840843201, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010331467725336552, + "rewards/margins": 0.026714740321040154, + "rewards/rejected": -0.03704620897769928, + "step": 798 + }, + { + "epoch": 0.5525587828492393, + "grad_norm": 3.7303075790405273, + "learning_rate": 2.7627939142461967e-05, + "log_odds_chosen": 3.3303298950195312, + "log_odds_ratio": -0.1779707968235016, + "logits/chosen": -0.4753631055355072, + "logits/rejected": -0.5264405012130737, + "logps/chosen": -0.06457747519016266, + "logps/rejected": -0.7297971248626709, + "loss": 3.5538, + "nll_loss": 0.8706504702568054, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006457747891545296, + "rewards/margins": 0.06652196496725082, + "rewards/rejected": -0.07297971099615097, + "step": 799 + }, + { + "epoch": 0.5532503457814661, + "grad_norm": 4.2899322509765625, + "learning_rate": 2.766251728907331e-05, + "log_odds_chosen": 3.094510078430176, + "log_odds_ratio": -0.293547660112381, + "logits/chosen": -0.9138388633728027, + "logits/rejected": -0.9197445511817932, + "logps/chosen": -0.0937461331486702, + "logps/rejected": -0.564795970916748, + "loss": 5.1177, + "nll_loss": 1.250058650970459, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009374613873660564, + "rewards/margins": 0.047104984521865845, + "rewards/rejected": -0.05647960305213928, + "step": 800 + }, + { + "epoch": 0.553941908713693, + "grad_norm": 5.070873737335205, + "learning_rate": 2.769709543568465e-05, + "log_odds_chosen": 4.334895610809326, + "log_odds_ratio": -0.5068272948265076, + "logits/chosen": -0.6096988320350647, + "logits/rejected": -0.6185740232467651, + "logps/chosen": -0.1481603980064392, + "logps/rejected": -0.9830435514450073, + "loss": 4.8537, + "nll_loss": 1.1627322435379028, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.014816039241850376, + "rewards/margins": 0.083488330245018, + "rewards/rejected": -0.09830436110496521, + "step": 801 + }, + { + "epoch": 0.5546334716459198, + "grad_norm": 4.983336925506592, + "learning_rate": 2.773167358229599e-05, + "log_odds_chosen": 1.7390892505645752, + "log_odds_ratio": -0.3052809536457062, + "logits/chosen": -0.759835422039032, + "logits/rejected": -0.7830816507339478, + "logps/chosen": -0.14674659073352814, + "logps/rejected": -0.4617050290107727, + "loss": 4.7161, + "nll_loss": 1.148496150970459, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.014674659818410873, + "rewards/margins": 0.031495846807956696, + "rewards/rejected": -0.04617050662636757, + "step": 802 + }, + { + "epoch": 0.5553250345781466, + "grad_norm": 4.5399370193481445, + "learning_rate": 2.7766251728907333e-05, + "log_odds_chosen": 4.40491247177124, + "log_odds_ratio": -0.43303653597831726, + "logits/chosen": -0.6713570952415466, + "logits/rejected": -0.7115859985351562, + "logps/chosen": -0.152422696352005, + "logps/rejected": -0.7829738259315491, + "loss": 3.7393, + "nll_loss": 0.8915234804153442, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01524226926267147, + "rewards/margins": 0.0630551129579544, + "rewards/rejected": -0.07829738408327103, + "step": 803 + }, + { + "epoch": 0.5560165975103735, + "grad_norm": 3.6568808555603027, + "learning_rate": 2.7800829875518675e-05, + "log_odds_chosen": 2.0957438945770264, + "log_odds_ratio": -0.35637903213500977, + "logits/chosen": -0.6042817831039429, + "logits/rejected": -0.5805646777153015, + "logps/chosen": -0.1572953462600708, + "logps/rejected": -0.43727999925613403, + "loss": 4.2036, + "nll_loss": 1.0152617692947388, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01572953723371029, + "rewards/margins": 0.027998462319374084, + "rewards/rejected": -0.043727997690439224, + "step": 804 + }, + { + "epoch": 0.5567081604426003, + "grad_norm": 5.165635108947754, + "learning_rate": 2.7835408022130016e-05, + "log_odds_chosen": 1.8557673692703247, + "log_odds_ratio": -0.7694844007492065, + "logits/chosen": -0.5034834742546082, + "logits/rejected": -0.5410099029541016, + "logps/chosen": -0.22562721371650696, + "logps/rejected": -0.5017483830451965, + "loss": 3.6102, + "nll_loss": 0.8255925178527832, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.022562721744179726, + "rewards/margins": 0.027612116187810898, + "rewards/rejected": -0.050174832344055176, + "step": 805 + }, + { + "epoch": 0.5573997233748271, + "grad_norm": 8.340802192687988, + "learning_rate": 2.7869986168741358e-05, + "log_odds_chosen": 1.3203935623168945, + "log_odds_ratio": -1.065887212753296, + "logits/chosen": -0.4293314814567566, + "logits/rejected": -0.45345383882522583, + "logps/chosen": -0.22275424003601074, + "logps/rejected": -0.3493325114250183, + "loss": 4.5494, + "nll_loss": 1.0307643413543701, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.022275425493717194, + "rewards/margins": 0.012657827697694302, + "rewards/rejected": -0.03493325412273407, + "step": 806 + }, + { + "epoch": 0.558091286307054, + "grad_norm": 3.9928297996520996, + "learning_rate": 2.79045643153527e-05, + "log_odds_chosen": 2.364372491836548, + "log_odds_ratio": -0.4788789749145508, + "logits/chosen": -0.8757091164588928, + "logits/rejected": -0.8417526483535767, + "logps/chosen": -0.17361877858638763, + "logps/rejected": -0.674676239490509, + "loss": 4.6235, + "nll_loss": 1.1079771518707275, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.017361879348754883, + "rewards/margins": 0.0501057505607605, + "rewards/rejected": -0.06746762990951538, + "step": 807 + }, + { + "epoch": 0.5587828492392808, + "grad_norm": 4.5377044677734375, + "learning_rate": 2.793914246196404e-05, + "log_odds_chosen": 2.1746432781219482, + "log_odds_ratio": -0.482133686542511, + "logits/chosen": -0.872567355632782, + "logits/rejected": -0.8851162195205688, + "logps/chosen": -0.13028322160243988, + "logps/rejected": -0.3184550404548645, + "loss": 4.8049, + "nll_loss": 1.153007984161377, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.013028322719037533, + "rewards/margins": 0.018817182630300522, + "rewards/rejected": -0.03184550628066063, + "step": 808 + }, + { + "epoch": 0.5594744121715076, + "grad_norm": 4.3767242431640625, + "learning_rate": 2.7973720608575382e-05, + "log_odds_chosen": 2.2641472816467285, + "log_odds_ratio": -0.2708294689655304, + "logits/chosen": -0.7464234232902527, + "logits/rejected": -0.7855645418167114, + "logps/chosen": -0.09002307802438736, + "logps/rejected": -0.38848984241485596, + "loss": 4.545, + "nll_loss": 1.1091712713241577, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00900230836123228, + "rewards/margins": 0.02984667383134365, + "rewards/rejected": -0.03884898126125336, + "step": 809 + }, + { + "epoch": 0.5601659751037344, + "grad_norm": 3.8829071521759033, + "learning_rate": 2.8008298755186724e-05, + "log_odds_chosen": 3.297788143157959, + "log_odds_ratio": -0.1501597911119461, + "logits/chosen": -0.41120457649230957, + "logits/rejected": -0.4324203133583069, + "logps/chosen": -0.07470942288637161, + "logps/rejected": -0.6631616353988647, + "loss": 3.5935, + "nll_loss": 0.8833543062210083, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007470941636711359, + "rewards/margins": 0.058845218271017075, + "rewards/rejected": -0.066316157579422, + "step": 810 + }, + { + "epoch": 0.5608575380359613, + "grad_norm": 5.252667427062988, + "learning_rate": 2.8042876901798066e-05, + "log_odds_chosen": 3.6831297874450684, + "log_odds_ratio": -0.18154287338256836, + "logits/chosen": -0.6489682197570801, + "logits/rejected": -0.6909961104393005, + "logps/chosen": -0.07913654297590256, + "logps/rejected": -0.6723751425743103, + "loss": 4.9607, + "nll_loss": 1.222014307975769, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007913654670119286, + "rewards/margins": 0.059323858469724655, + "rewards/rejected": -0.06723751127719879, + "step": 811 + }, + { + "epoch": 0.5615491009681881, + "grad_norm": 3.088599681854248, + "learning_rate": 2.8077455048409407e-05, + "log_odds_chosen": 4.496147632598877, + "log_odds_ratio": -0.31289637088775635, + "logits/chosen": -0.7372713685035706, + "logits/rejected": -0.821730375289917, + "logps/chosen": -0.07059605419635773, + "logps/rejected": -0.8317077159881592, + "loss": 3.4873, + "nll_loss": 0.8405301570892334, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007059605326503515, + "rewards/margins": 0.07611117511987686, + "rewards/rejected": -0.08317077159881592, + "step": 812 + }, + { + "epoch": 0.5622406639004149, + "grad_norm": 4.271559715270996, + "learning_rate": 2.811203319502075e-05, + "log_odds_chosen": 3.0948903560638428, + "log_odds_ratio": -0.3525601029396057, + "logits/chosen": -0.4547148644924164, + "logits/rejected": -0.4978640675544739, + "logps/chosen": -0.12485533952713013, + "logps/rejected": -0.4798852205276489, + "loss": 3.5777, + "nll_loss": 0.8591761589050293, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012485533021390438, + "rewards/margins": 0.03550298511981964, + "rewards/rejected": -0.04798852279782295, + "step": 813 + }, + { + "epoch": 0.5629322268326418, + "grad_norm": 3.4795942306518555, + "learning_rate": 2.814661134163209e-05, + "log_odds_chosen": 5.109646797180176, + "log_odds_ratio": -0.19024419784545898, + "logits/chosen": -0.553877055644989, + "logits/rejected": -0.5855638980865479, + "logps/chosen": -0.058753401041030884, + "logps/rejected": -0.6228174567222595, + "loss": 3.5912, + "nll_loss": 0.8787802457809448, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005875340197235346, + "rewards/margins": 0.0564064085483551, + "rewards/rejected": -0.06228174269199371, + "step": 814 + }, + { + "epoch": 0.5636237897648686, + "grad_norm": 3.0252182483673096, + "learning_rate": 2.8181189488243432e-05, + "log_odds_chosen": 3.8306195735931396, + "log_odds_ratio": -0.2786122262477875, + "logits/chosen": -0.5956602096557617, + "logits/rejected": -0.6217089891433716, + "logps/chosen": -0.07585865259170532, + "logps/rejected": -0.5628564953804016, + "loss": 2.9095, + "nll_loss": 0.6995032429695129, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0075858645141124725, + "rewards/margins": 0.04869978502392769, + "rewards/rejected": -0.05628565326333046, + "step": 815 + }, + { + "epoch": 0.5643153526970954, + "grad_norm": 3.9304304122924805, + "learning_rate": 2.8215767634854773e-05, + "log_odds_chosen": 4.477867603302002, + "log_odds_ratio": -0.25606584548950195, + "logits/chosen": -0.47317975759506226, + "logits/rejected": -0.5208321809768677, + "logps/chosen": -0.07767429947853088, + "logps/rejected": -0.5357122421264648, + "loss": 4.6473, + "nll_loss": 1.1362117528915405, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0077674295753240585, + "rewards/margins": 0.045803800225257874, + "rewards/rejected": -0.05357122793793678, + "step": 816 + }, + { + "epoch": 0.5650069156293223, + "grad_norm": 3.6263046264648438, + "learning_rate": 2.8250345781466115e-05, + "log_odds_chosen": 4.530634880065918, + "log_odds_ratio": -0.16015692055225372, + "logits/chosen": -0.4547904431819916, + "logits/rejected": -0.44847387075424194, + "logps/chosen": -0.06347014009952545, + "logps/rejected": -0.7121883630752563, + "loss": 3.1737, + "nll_loss": 0.7773990631103516, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00634701456874609, + "rewards/margins": 0.06487182527780533, + "rewards/rejected": -0.0712188333272934, + "step": 817 + }, + { + "epoch": 0.5656984785615491, + "grad_norm": 6.153628826141357, + "learning_rate": 2.8284923928077457e-05, + "log_odds_chosen": 3.2044761180877686, + "log_odds_ratio": -0.5079742670059204, + "logits/chosen": -0.5137223601341248, + "logits/rejected": -0.5485202074050903, + "logps/chosen": -0.15794017910957336, + "logps/rejected": -0.6775748133659363, + "loss": 4.6777, + "nll_loss": 1.118630051612854, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.015794018283486366, + "rewards/margins": 0.05196346342563629, + "rewards/rejected": -0.0677574872970581, + "step": 818 + }, + { + "epoch": 0.5663900414937759, + "grad_norm": 4.21480655670166, + "learning_rate": 2.8319502074688798e-05, + "log_odds_chosen": 2.6262288093566895, + "log_odds_ratio": -0.303774356842041, + "logits/chosen": -0.7704868912696838, + "logits/rejected": -0.8650259375572205, + "logps/chosen": -0.12395796179771423, + "logps/rejected": -0.4790630042552948, + "loss": 3.8007, + "nll_loss": 0.9198006391525269, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.012395797297358513, + "rewards/margins": 0.03551050275564194, + "rewards/rejected": -0.0479063019156456, + "step": 819 + }, + { + "epoch": 0.5670816044260027, + "grad_norm": 3.816220283508301, + "learning_rate": 2.835408022130014e-05, + "log_odds_chosen": 2.4669811725616455, + "log_odds_ratio": -0.3575797379016876, + "logits/chosen": -0.5892040133476257, + "logits/rejected": -0.6085165739059448, + "logps/chosen": -0.13426099717617035, + "logps/rejected": -0.5866325497627258, + "loss": 3.0939, + "nll_loss": 0.7377179265022278, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01342609990388155, + "rewards/margins": 0.04523715749382973, + "rewards/rejected": -0.0586632564663887, + "step": 820 + }, + { + "epoch": 0.5677731673582296, + "grad_norm": 7.7992730140686035, + "learning_rate": 2.838865836791148e-05, + "log_odds_chosen": 4.493231773376465, + "log_odds_ratio": -0.5356466770172119, + "logits/chosen": -0.5167162418365479, + "logits/rejected": -0.5656682252883911, + "logps/chosen": -0.06463811546564102, + "logps/rejected": -0.8757993578910828, + "loss": 4.4222, + "nll_loss": 1.051975965499878, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006463811732828617, + "rewards/margins": 0.08111612498760223, + "rewards/rejected": -0.08757993578910828, + "step": 821 + }, + { + "epoch": 0.5684647302904564, + "grad_norm": 5.891249179840088, + "learning_rate": 2.8423236514522823e-05, + "log_odds_chosen": 2.585822343826294, + "log_odds_ratio": -0.6505037546157837, + "logits/chosen": -0.7809338569641113, + "logits/rejected": -0.7595021724700928, + "logps/chosen": -0.1175026148557663, + "logps/rejected": -0.3504663407802582, + "loss": 5.8074, + "nll_loss": 1.3868025541305542, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.011750261299312115, + "rewards/margins": 0.023296372964978218, + "rewards/rejected": -0.03504663333296776, + "step": 822 + }, + { + "epoch": 0.5691562932226832, + "grad_norm": 7.554938316345215, + "learning_rate": 2.8457814661134164e-05, + "log_odds_chosen": 1.2075903415679932, + "log_odds_ratio": -0.7292794585227966, + "logits/chosen": -0.5420154333114624, + "logits/rejected": -0.5659103393554688, + "logps/chosen": -0.1520342379808426, + "logps/rejected": -0.40784332156181335, + "loss": 3.883, + "nll_loss": 0.8978164792060852, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.015203425660729408, + "rewards/margins": 0.025580905377864838, + "rewards/rejected": -0.0407843291759491, + "step": 823 + }, + { + "epoch": 0.5698478561549101, + "grad_norm": 3.3298332691192627, + "learning_rate": 2.8492392807745506e-05, + "log_odds_chosen": 3.9135968685150146, + "log_odds_ratio": -0.3564820885658264, + "logits/chosen": -0.5905018448829651, + "logits/rejected": -0.6010125875473022, + "logps/chosen": -0.06891832500696182, + "logps/rejected": -0.408086359500885, + "loss": 4.5232, + "nll_loss": 1.0951402187347412, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.006891832686960697, + "rewards/margins": 0.03391680121421814, + "rewards/rejected": -0.04080863296985626, + "step": 824 + }, + { + "epoch": 0.5705394190871369, + "grad_norm": 5.071108341217041, + "learning_rate": 2.8526970954356847e-05, + "log_odds_chosen": 3.3061747550964355, + "log_odds_ratio": -0.4244978129863739, + "logits/chosen": -0.6132084131240845, + "logits/rejected": -0.5790513753890991, + "logps/chosen": -0.17267945408821106, + "logps/rejected": -0.5895110368728638, + "loss": 4.9681, + "nll_loss": 1.1995717287063599, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.017267946153879166, + "rewards/margins": 0.04168315604329109, + "rewards/rejected": -0.058951105922460556, + "step": 825 + }, + { + "epoch": 0.5712309820193637, + "grad_norm": 8.846132278442383, + "learning_rate": 2.856154910096819e-05, + "log_odds_chosen": 2.3505032062530518, + "log_odds_ratio": -0.9325801730155945, + "logits/chosen": -0.6876745820045471, + "logits/rejected": -0.7471765279769897, + "logps/chosen": -0.11827315390110016, + "logps/rejected": -0.461060494184494, + "loss": 5.6731, + "nll_loss": 1.3250163793563843, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011827315203845501, + "rewards/margins": 0.034278735518455505, + "rewards/rejected": -0.04610605165362358, + "step": 826 + }, + { + "epoch": 0.5719225449515906, + "grad_norm": 4.555541515350342, + "learning_rate": 2.859612724757953e-05, + "log_odds_chosen": 0.9776923060417175, + "log_odds_ratio": -0.4993519186973572, + "logits/chosen": -0.72479647397995, + "logits/rejected": -0.7874212265014648, + "logps/chosen": -0.15950685739517212, + "logps/rejected": -0.23940470814704895, + "loss": 5.5452, + "nll_loss": 1.336353063583374, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.015950685366988182, + "rewards/margins": 0.007989783771336079, + "rewards/rejected": -0.023940470069646835, + "step": 827 + }, + { + "epoch": 0.5726141078838174, + "grad_norm": 3.23429536819458, + "learning_rate": 2.8630705394190872e-05, + "log_odds_chosen": 4.071626663208008, + "log_odds_ratio": -0.24565596878528595, + "logits/chosen": -0.5955303907394409, + "logits/rejected": -0.624784529209137, + "logps/chosen": -0.05326705053448677, + "logps/rejected": -0.3535764217376709, + "loss": 3.4374, + "nll_loss": 0.8347886800765991, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005326705053448677, + "rewards/margins": 0.030030936002731323, + "rewards/rejected": -0.03535763919353485, + "step": 828 + }, + { + "epoch": 0.5733056708160442, + "grad_norm": 2.840412139892578, + "learning_rate": 2.8665283540802214e-05, + "log_odds_chosen": 4.180113315582275, + "log_odds_ratio": -0.19920429587364197, + "logits/chosen": -0.9072915315628052, + "logits/rejected": -0.9142999649047852, + "logps/chosen": -0.04711640253663063, + "logps/rejected": -0.48545461893081665, + "loss": 3.5186, + "nll_loss": 0.8597191572189331, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0047116405330598354, + "rewards/margins": 0.04383382201194763, + "rewards/rejected": -0.048545461148023605, + "step": 829 + }, + { + "epoch": 0.573997233748271, + "grad_norm": 3.699284553527832, + "learning_rate": 2.8699861687413555e-05, + "log_odds_chosen": 2.9122934341430664, + "log_odds_ratio": -0.379080593585968, + "logits/chosen": -0.5995203256607056, + "logits/rejected": -0.6436635255813599, + "logps/chosen": -0.0784071683883667, + "logps/rejected": -0.46496570110321045, + "loss": 3.2445, + "nll_loss": 0.7732207179069519, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0078407172113657, + "rewards/margins": 0.038655854761600494, + "rewards/rejected": -0.046496570110321045, + "step": 830 + }, + { + "epoch": 0.5746887966804979, + "grad_norm": 3.74281644821167, + "learning_rate": 2.8734439834024897e-05, + "log_odds_chosen": 2.593203544616699, + "log_odds_ratio": -0.5199276208877563, + "logits/chosen": -0.15824517607688904, + "logits/rejected": -0.17762351036071777, + "logps/chosen": -0.06203662231564522, + "logps/rejected": -0.3429993987083435, + "loss": 3.3275, + "nll_loss": 0.7798757553100586, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.006203662138432264, + "rewards/margins": 0.028096279129385948, + "rewards/rejected": -0.03429993987083435, + "step": 831 + }, + { + "epoch": 0.5753803596127247, + "grad_norm": 4.471902847290039, + "learning_rate": 2.876901798063624e-05, + "log_odds_chosen": 2.951345920562744, + "log_odds_ratio": -0.42411351203918457, + "logits/chosen": -0.3936513662338257, + "logits/rejected": -0.41638216376304626, + "logps/chosen": -0.08869173377752304, + "logps/rejected": -0.4445950984954834, + "loss": 4.7829, + "nll_loss": 1.153320550918579, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008869173005223274, + "rewards/margins": 0.035590335726737976, + "rewards/rejected": -0.0444595068693161, + "step": 832 + }, + { + "epoch": 0.5760719225449515, + "grad_norm": 4.303694725036621, + "learning_rate": 2.880359612724758e-05, + "log_odds_chosen": 1.960557222366333, + "log_odds_ratio": -0.6316541433334351, + "logits/chosen": -0.927675187587738, + "logits/rejected": -0.9279346466064453, + "logps/chosen": -0.2110701948404312, + "logps/rejected": -0.47497111558914185, + "loss": 4.5291, + "nll_loss": 1.0691012144088745, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.02110701985657215, + "rewards/margins": 0.026390092447400093, + "rewards/rejected": -0.047497112303972244, + "step": 833 + }, + { + "epoch": 0.5767634854771784, + "grad_norm": 4.320366859436035, + "learning_rate": 2.883817427385892e-05, + "log_odds_chosen": 3.7120985984802246, + "log_odds_ratio": -0.21159572899341583, + "logits/chosen": -0.8098641633987427, + "logits/rejected": -0.7814208269119263, + "logps/chosen": -0.06647158414125443, + "logps/rejected": -0.5353458523750305, + "loss": 4.282, + "nll_loss": 1.0493314266204834, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006647157948464155, + "rewards/margins": 0.04688742756843567, + "rewards/rejected": -0.05353458225727081, + "step": 834 + }, + { + "epoch": 0.5774550484094052, + "grad_norm": 3.6654677391052246, + "learning_rate": 2.8872752420470263e-05, + "log_odds_chosen": 2.1516904830932617, + "log_odds_ratio": -0.4506683051586151, + "logits/chosen": -0.8107026815414429, + "logits/rejected": -0.8079274892807007, + "logps/chosen": -0.11864329874515533, + "logps/rejected": -0.40280842781066895, + "loss": 4.4068, + "nll_loss": 1.0566353797912598, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.011864329688251019, + "rewards/margins": 0.028416510671377182, + "rewards/rejected": -0.040280841290950775, + "step": 835 + }, + { + "epoch": 0.5781466113416321, + "grad_norm": 3.123896360397339, + "learning_rate": 2.8907330567081608e-05, + "log_odds_chosen": 3.5088181495666504, + "log_odds_ratio": -0.23648536205291748, + "logits/chosen": -0.33534562587738037, + "logits/rejected": -0.31095805764198303, + "logps/chosen": -0.09103836119174957, + "logps/rejected": -0.34189480543136597, + "loss": 3.1665, + "nll_loss": 0.767978847026825, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009103836491703987, + "rewards/margins": 0.02508564665913582, + "rewards/rejected": -0.034189481288194656, + "step": 836 + }, + { + "epoch": 0.578838174273859, + "grad_norm": 4.09024715423584, + "learning_rate": 2.894190871369295e-05, + "log_odds_chosen": 2.4240686893463135, + "log_odds_ratio": -0.21396838128566742, + "logits/chosen": -0.7405085563659668, + "logits/rejected": -0.7897012233734131, + "logps/chosen": -0.092780202627182, + "logps/rejected": -0.3410795331001282, + "loss": 3.9187, + "nll_loss": 0.9582738876342773, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00927801989018917, + "rewards/margins": 0.024829933419823647, + "rewards/rejected": -0.03410795331001282, + "step": 837 + }, + { + "epoch": 0.5795297372060858, + "grad_norm": 4.34940242767334, + "learning_rate": 2.897648686030429e-05, + "log_odds_chosen": 3.650613784790039, + "log_odds_ratio": -0.13455849885940552, + "logits/chosen": -0.6593092679977417, + "logits/rejected": -0.7048452496528625, + "logps/chosen": -0.06898073852062225, + "logps/rejected": -0.9512230753898621, + "loss": 4.6662, + "nll_loss": 1.1531026363372803, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006898073945194483, + "rewards/margins": 0.08822423219680786, + "rewards/rejected": -0.0951223075389862, + "step": 838 + }, + { + "epoch": 0.5802213001383126, + "grad_norm": 4.64874792098999, + "learning_rate": 2.9011065006915633e-05, + "log_odds_chosen": 2.257146120071411, + "log_odds_ratio": -0.5997257232666016, + "logits/chosen": -0.4204868674278259, + "logits/rejected": -0.42794424295425415, + "logps/chosen": -0.21649272739887238, + "logps/rejected": -0.3857274055480957, + "loss": 3.7636, + "nll_loss": 0.8809358477592468, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.021649271249771118, + "rewards/margins": 0.016923464834690094, + "rewards/rejected": -0.03857273608446121, + "step": 839 + }, + { + "epoch": 0.5809128630705395, + "grad_norm": 3.4235551357269287, + "learning_rate": 2.9045643153526974e-05, + "log_odds_chosen": 2.356065511703491, + "log_odds_ratio": -0.4604770839214325, + "logits/chosen": -0.7503741979598999, + "logits/rejected": -0.7579087018966675, + "logps/chosen": -0.20275188982486725, + "logps/rejected": -0.5521271228790283, + "loss": 4.1885, + "nll_loss": 1.0010693073272705, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.020275190472602844, + "rewards/margins": 0.03493752330541611, + "rewards/rejected": -0.05521271377801895, + "step": 840 + }, + { + "epoch": 0.5816044260027663, + "grad_norm": 3.7340457439422607, + "learning_rate": 2.9080221300138316e-05, + "log_odds_chosen": 5.318211555480957, + "log_odds_ratio": -0.39543986320495605, + "logits/chosen": -0.5348352193832397, + "logits/rejected": -0.541537880897522, + "logps/chosen": -0.05745195224881172, + "logps/rejected": -0.6514012813568115, + "loss": 3.1775, + "nll_loss": 0.7548248171806335, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.005745194852352142, + "rewards/margins": 0.05939492955803871, + "rewards/rejected": -0.06514012813568115, + "step": 841 + }, + { + "epoch": 0.5822959889349931, + "grad_norm": 4.628314971923828, + "learning_rate": 2.9114799446749657e-05, + "log_odds_chosen": 2.1264233589172363, + "log_odds_ratio": -0.5673332810401917, + "logits/chosen": -0.4301891028881073, + "logits/rejected": -0.45492786169052124, + "logps/chosen": -0.18683162331581116, + "logps/rejected": -0.4376612901687622, + "loss": 4.6018, + "nll_loss": 1.0937283039093018, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.018683163449168205, + "rewards/margins": 0.025082964450120926, + "rewards/rejected": -0.04376612976193428, + "step": 842 + }, + { + "epoch": 0.58298755186722, + "grad_norm": 4.024649620056152, + "learning_rate": 2.9149377593361e-05, + "log_odds_chosen": 4.945640563964844, + "log_odds_ratio": -0.18473605811595917, + "logits/chosen": -0.7456772923469543, + "logits/rejected": -0.8036054372787476, + "logps/chosen": -0.06031443178653717, + "logps/rejected": -0.6921895146369934, + "loss": 4.4804, + "nll_loss": 1.1016205549240112, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006031442899256945, + "rewards/margins": 0.06318750977516174, + "rewards/rejected": -0.0692189484834671, + "step": 843 + }, + { + "epoch": 0.5836791147994468, + "grad_norm": 3.0777909755706787, + "learning_rate": 2.918395573997234e-05, + "log_odds_chosen": 3.7343509197235107, + "log_odds_ratio": -0.29216429591178894, + "logits/chosen": -0.6594283580780029, + "logits/rejected": -0.6560637354850769, + "logps/chosen": -0.068955197930336, + "logps/rejected": -0.4723179042339325, + "loss": 3.2491, + "nll_loss": 0.7830635905265808, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0068955197930336, + "rewards/margins": 0.04033627361059189, + "rewards/rejected": -0.04723179340362549, + "step": 844 + }, + { + "epoch": 0.5843706777316736, + "grad_norm": 4.926107883453369, + "learning_rate": 2.9218533886583682e-05, + "log_odds_chosen": 2.450533628463745, + "log_odds_ratio": -0.4356068968772888, + "logits/chosen": -0.6373806595802307, + "logits/rejected": -0.626931369304657, + "logps/chosen": -0.195645272731781, + "logps/rejected": -0.5948528051376343, + "loss": 3.5552, + "nll_loss": 0.8452330231666565, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01956452801823616, + "rewards/margins": 0.03992075473070145, + "rewards/rejected": -0.05948528274893761, + "step": 845 + }, + { + "epoch": 0.5850622406639004, + "grad_norm": 7.288669109344482, + "learning_rate": 2.9253112033195024e-05, + "log_odds_chosen": 1.0796414613723755, + "log_odds_ratio": -0.7791949510574341, + "logits/chosen": -0.8181466460227966, + "logits/rejected": -0.7781654596328735, + "logps/chosen": -0.14210152626037598, + "logps/rejected": -0.24084502458572388, + "loss": 4.4744, + "nll_loss": 1.0406807661056519, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014210152439773083, + "rewards/margins": 0.009874352253973484, + "rewards/rejected": -0.024084504693746567, + "step": 846 + }, + { + "epoch": 0.5857538035961273, + "grad_norm": 5.573554515838623, + "learning_rate": 2.9287690179806365e-05, + "log_odds_chosen": 1.4154304265975952, + "log_odds_ratio": -0.5396012663841248, + "logits/chosen": -0.6344490051269531, + "logits/rejected": -0.6770589351654053, + "logps/chosen": -0.12416580319404602, + "logps/rejected": -0.306435227394104, + "loss": 5.0875, + "nll_loss": 1.217907428741455, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.012416580691933632, + "rewards/margins": 0.01822694018483162, + "rewards/rejected": -0.0306435227394104, + "step": 847 + }, + { + "epoch": 0.5864453665283541, + "grad_norm": 3.9988412857055664, + "learning_rate": 2.9322268326417707e-05, + "log_odds_chosen": 1.2546517848968506, + "log_odds_ratio": -0.46386343240737915, + "logits/chosen": -0.44906535744667053, + "logits/rejected": -0.473858118057251, + "logps/chosen": -0.13993997871875763, + "logps/rejected": -0.3896486759185791, + "loss": 3.897, + "nll_loss": 0.9278663396835327, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.013993998058140278, + "rewards/margins": 0.024970872327685356, + "rewards/rejected": -0.03896487131714821, + "step": 848 + }, + { + "epoch": 0.5871369294605809, + "grad_norm": 2.8597145080566406, + "learning_rate": 2.935684647302905e-05, + "log_odds_chosen": 2.0133144855499268, + "log_odds_ratio": -0.5181325078010559, + "logits/chosen": -0.5793277621269226, + "logits/rejected": -0.5990381240844727, + "logps/chosen": -0.12990695238113403, + "logps/rejected": -0.4745805859565735, + "loss": 2.9299, + "nll_loss": 0.6806671619415283, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012990695424377918, + "rewards/margins": 0.034467361867427826, + "rewards/rejected": -0.04745806008577347, + "step": 849 + }, + { + "epoch": 0.5878284923928078, + "grad_norm": 3.8037021160125732, + "learning_rate": 2.939142461964039e-05, + "log_odds_chosen": 1.8560802936553955, + "log_odds_ratio": -0.4328434467315674, + "logits/chosen": -0.9212394952774048, + "logits/rejected": -0.9172543287277222, + "logps/chosen": -0.08324627578258514, + "logps/rejected": -0.2946692705154419, + "loss": 3.972, + "nll_loss": 0.9497216939926147, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008324628695845604, + "rewards/margins": 0.021142300218343735, + "rewards/rejected": -0.02946692705154419, + "step": 850 + }, + { + "epoch": 0.5885200553250346, + "grad_norm": 3.4924614429473877, + "learning_rate": 2.942600276625173e-05, + "log_odds_chosen": 2.0488245487213135, + "log_odds_ratio": -0.2997685372829437, + "logits/chosen": -0.3865346610546112, + "logits/rejected": -0.42198455333709717, + "logps/chosen": -0.09593084454536438, + "logps/rejected": -0.37145107984542847, + "loss": 3.6509, + "nll_loss": 0.8827521204948425, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009593085385859013, + "rewards/margins": 0.02755202353000641, + "rewards/rejected": -0.03714510798454285, + "step": 851 + }, + { + "epoch": 0.5892116182572614, + "grad_norm": 3.511770009994507, + "learning_rate": 2.9460580912863073e-05, + "log_odds_chosen": 3.7782740592956543, + "log_odds_ratio": -0.24993540346622467, + "logits/chosen": -0.710189938545227, + "logits/rejected": -0.7517250776290894, + "logps/chosen": -0.09729503840208054, + "logps/rejected": -0.5149400234222412, + "loss": 3.9751, + "nll_loss": 0.9687862992286682, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009729502722620964, + "rewards/margins": 0.04176449775695801, + "rewards/rejected": -0.05149400234222412, + "step": 852 + }, + { + "epoch": 0.5899031811894883, + "grad_norm": 4.0979390144348145, + "learning_rate": 2.9495159059474415e-05, + "log_odds_chosen": 2.0559842586517334, + "log_odds_ratio": -0.3502409756183624, + "logits/chosen": -0.7304658889770508, + "logits/rejected": -0.7085290551185608, + "logps/chosen": -0.11763148754835129, + "logps/rejected": -0.38747963309288025, + "loss": 4.2136, + "nll_loss": 1.0183758735656738, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011763148941099644, + "rewards/margins": 0.026984816417098045, + "rewards/rejected": -0.038747966289520264, + "step": 853 + }, + { + "epoch": 0.5905947441217151, + "grad_norm": 4.737348556518555, + "learning_rate": 2.9529737206085756e-05, + "log_odds_chosen": 0.7259271144866943, + "log_odds_ratio": -0.5310872793197632, + "logits/chosen": -0.7076024413108826, + "logits/rejected": -0.6707653403282166, + "logps/chosen": -0.12364031374454498, + "logps/rejected": -0.22116045653820038, + "loss": 4.4756, + "nll_loss": 1.0657799243927002, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012364029884338379, + "rewards/margins": 0.009752015583217144, + "rewards/rejected": -0.022116046398878098, + "step": 854 + }, + { + "epoch": 0.5912863070539419, + "grad_norm": 4.541485786437988, + "learning_rate": 2.9564315352697098e-05, + "log_odds_chosen": 1.7745822668075562, + "log_odds_ratio": -0.3305359184741974, + "logits/chosen": -0.8056719899177551, + "logits/rejected": -0.8085699081420898, + "logps/chosen": -0.11804518103599548, + "logps/rejected": -0.3992602229118347, + "loss": 4.4872, + "nll_loss": 1.0887389183044434, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011804519221186638, + "rewards/margins": 0.028121504932641983, + "rewards/rejected": -0.03992602229118347, + "step": 855 + }, + { + "epoch": 0.5919778699861687, + "grad_norm": 4.4644951820373535, + "learning_rate": 2.959889349930844e-05, + "log_odds_chosen": 3.1081631183624268, + "log_odds_ratio": -0.11160407960414886, + "logits/chosen": -0.6001406908035278, + "logits/rejected": -0.6196090579032898, + "logps/chosen": -0.12042544782161713, + "logps/rejected": -1.225574016571045, + "loss": 3.2166, + "nll_loss": 0.7929803133010864, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012042545713484287, + "rewards/margins": 0.11051484942436218, + "rewards/rejected": -0.1225573942065239, + "step": 856 + }, + { + "epoch": 0.5926694329183956, + "grad_norm": 4.001978397369385, + "learning_rate": 2.963347164591978e-05, + "log_odds_chosen": 4.053859233856201, + "log_odds_ratio": -0.2311854213476181, + "logits/chosen": -0.8147008419036865, + "logits/rejected": -0.7994644045829773, + "logps/chosen": -0.05535433441400528, + "logps/rejected": -0.6948223114013672, + "loss": 4.2184, + "nll_loss": 1.0314934253692627, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005535434000194073, + "rewards/margins": 0.06394679844379425, + "rewards/rejected": -0.0694822371006012, + "step": 857 + }, + { + "epoch": 0.5933609958506224, + "grad_norm": 5.344448089599609, + "learning_rate": 2.9668049792531122e-05, + "log_odds_chosen": 1.0760059356689453, + "log_odds_ratio": -0.7446597218513489, + "logits/chosen": -0.8145299553871155, + "logits/rejected": -0.8130720853805542, + "logps/chosen": -0.1919110268354416, + "logps/rejected": -0.483597993850708, + "loss": 5.2387, + "nll_loss": 1.235212802886963, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.01919110305607319, + "rewards/margins": 0.02916869707405567, + "rewards/rejected": -0.04835980013012886, + "step": 858 + }, + { + "epoch": 0.5940525587828492, + "grad_norm": 4.593982696533203, + "learning_rate": 2.9702627939142464e-05, + "log_odds_chosen": 3.4344637393951416, + "log_odds_ratio": -0.24414914846420288, + "logits/chosen": -0.5376627445220947, + "logits/rejected": -0.5609232187271118, + "logps/chosen": -0.07658465951681137, + "logps/rejected": -0.7597995400428772, + "loss": 4.6779, + "nll_loss": 1.1450719833374023, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007658466696739197, + "rewards/margins": 0.06832148879766464, + "rewards/rejected": -0.07597995549440384, + "step": 859 + }, + { + "epoch": 0.5947441217150761, + "grad_norm": 4.40407657623291, + "learning_rate": 2.9737206085753806e-05, + "log_odds_chosen": 2.1988892555236816, + "log_odds_ratio": -0.4513634443283081, + "logits/chosen": -0.8038565516471863, + "logits/rejected": -0.8830662369728088, + "logps/chosen": -0.1267254501581192, + "logps/rejected": -0.44491326808929443, + "loss": 5.182, + "nll_loss": 1.250370979309082, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012672546319663525, + "rewards/margins": 0.031818781048059464, + "rewards/rejected": -0.04449132829904556, + "step": 860 + }, + { + "epoch": 0.5954356846473029, + "grad_norm": 4.350531578063965, + "learning_rate": 2.9771784232365147e-05, + "log_odds_chosen": 5.146965503692627, + "log_odds_ratio": -0.09328575432300568, + "logits/chosen": -0.8370950818061829, + "logits/rejected": -0.8731504678726196, + "logps/chosen": -0.029911965131759644, + "logps/rejected": -1.0596965551376343, + "loss": 4.2535, + "nll_loss": 1.0540424585342407, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0029911966994404793, + "rewards/margins": 0.10297845304012299, + "rewards/rejected": -0.10596965253353119, + "step": 861 + }, + { + "epoch": 0.5961272475795297, + "grad_norm": 4.999265670776367, + "learning_rate": 2.980636237897649e-05, + "log_odds_chosen": 4.388033866882324, + "log_odds_ratio": -0.17923061549663544, + "logits/chosen": -0.7717255353927612, + "logits/rejected": -0.8007091879844666, + "logps/chosen": -0.07025519758462906, + "logps/rejected": -0.7537840604782104, + "loss": 4.2105, + "nll_loss": 1.034711480140686, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007025519385933876, + "rewards/margins": 0.06835289299488068, + "rewards/rejected": -0.0753784030675888, + "step": 862 + }, + { + "epoch": 0.5968188105117566, + "grad_norm": 4.830355644226074, + "learning_rate": 2.984094052558783e-05, + "log_odds_chosen": 4.727936744689941, + "log_odds_ratio": -0.1790466606616974, + "logits/chosen": -0.5581704378128052, + "logits/rejected": -0.5863669514656067, + "logps/chosen": -0.3133222460746765, + "logps/rejected": -1.0449085235595703, + "loss": 4.303, + "nll_loss": 1.0578471422195435, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.03133222460746765, + "rewards/margins": 0.0731586217880249, + "rewards/rejected": -0.10449084639549255, + "step": 863 + }, + { + "epoch": 0.5975103734439834, + "grad_norm": 5.1829447746276855, + "learning_rate": 2.9875518672199172e-05, + "log_odds_chosen": 1.112974762916565, + "log_odds_ratio": -0.6885846853256226, + "logits/chosen": -0.6578227281570435, + "logits/rejected": -0.626107931137085, + "logps/chosen": -0.17409192025661469, + "logps/rejected": -0.4953766167163849, + "loss": 3.5169, + "nll_loss": 0.8103781342506409, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.01740919053554535, + "rewards/margins": 0.0321284681558609, + "rewards/rejected": -0.04953765869140625, + "step": 864 + }, + { + "epoch": 0.5982019363762102, + "grad_norm": 4.5902581214904785, + "learning_rate": 2.9910096818810513e-05, + "log_odds_chosen": 0.7052420377731323, + "log_odds_ratio": -0.5897258520126343, + "logits/chosen": -0.6574615836143494, + "logits/rejected": -0.6554303765296936, + "logps/chosen": -0.1836041957139969, + "logps/rejected": -0.3788834810256958, + "loss": 3.7562, + "nll_loss": 0.8800770044326782, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01836041919887066, + "rewards/margins": 0.01952793076634407, + "rewards/rejected": -0.03788834810256958, + "step": 865 + }, + { + "epoch": 0.598893499308437, + "grad_norm": 4.048480987548828, + "learning_rate": 2.9944674965421855e-05, + "log_odds_chosen": 1.4362030029296875, + "log_odds_ratio": -0.38652119040489197, + "logits/chosen": -0.8279319405555725, + "logits/rejected": -0.8762853145599365, + "logps/chosen": -0.14335329830646515, + "logps/rejected": -0.7207509875297546, + "loss": 3.5014, + "nll_loss": 0.8366998434066772, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.014335330575704575, + "rewards/margins": 0.05773976817727089, + "rewards/rejected": -0.07207509875297546, + "step": 866 + }, + { + "epoch": 0.5995850622406639, + "grad_norm": 5.155201435089111, + "learning_rate": 2.9979253112033196e-05, + "log_odds_chosen": 2.813506603240967, + "log_odds_ratio": -0.5240182876586914, + "logits/chosen": -0.6967120170593262, + "logits/rejected": -0.6847906112670898, + "logps/chosen": -0.18069618940353394, + "logps/rejected": -0.5114045143127441, + "loss": 4.5955, + "nll_loss": 1.0964840650558472, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.018069619312882423, + "rewards/margins": 0.03307083621621132, + "rewards/rejected": -0.05114045366644859, + "step": 867 + }, + { + "epoch": 0.6002766251728907, + "grad_norm": 3.6505987644195557, + "learning_rate": 3.0013831258644538e-05, + "log_odds_chosen": 0.4830799996852875, + "log_odds_ratio": -0.5487514138221741, + "logits/chosen": -0.7270222902297974, + "logits/rejected": -0.7637308835983276, + "logps/chosen": -0.22091691195964813, + "logps/rejected": -0.36204272508621216, + "loss": 4.494, + "nll_loss": 1.0686300992965698, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.022091692313551903, + "rewards/margins": 0.014112580567598343, + "rewards/rejected": -0.036204271018505096, + "step": 868 + }, + { + "epoch": 0.6009681881051175, + "grad_norm": 6.205440044403076, + "learning_rate": 3.004840940525588e-05, + "log_odds_chosen": 3.524580955505371, + "log_odds_ratio": -0.4662216305732727, + "logits/chosen": -0.754102349281311, + "logits/rejected": -0.7670979499816895, + "logps/chosen": -0.13366849720478058, + "logps/rejected": -0.6823487281799316, + "loss": 4.7184, + "nll_loss": 1.1329686641693115, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013366851024329662, + "rewards/margins": 0.054868023842573166, + "rewards/rejected": -0.0682348757982254, + "step": 869 + }, + { + "epoch": 0.6016597510373444, + "grad_norm": 2.9457459449768066, + "learning_rate": 3.008298755186722e-05, + "log_odds_chosen": 2.1648120880126953, + "log_odds_ratio": -0.3084190785884857, + "logits/chosen": -0.7266798615455627, + "logits/rejected": -0.7544651031494141, + "logps/chosen": -0.09362323582172394, + "logps/rejected": -0.4190313518047333, + "loss": 3.7257, + "nll_loss": 0.9005783796310425, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009362323209643364, + "rewards/margins": 0.03254081681370735, + "rewards/rejected": -0.041903138160705566, + "step": 870 + }, + { + "epoch": 0.6023513139695712, + "grad_norm": 3.978710412979126, + "learning_rate": 3.0117565698478563e-05, + "log_odds_chosen": 3.242192029953003, + "log_odds_ratio": -0.1863611340522766, + "logits/chosen": -0.552879273891449, + "logits/rejected": -0.5906392335891724, + "logps/chosen": -0.11116364598274231, + "logps/rejected": -0.8105045557022095, + "loss": 3.6538, + "nll_loss": 0.8948162794113159, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01111636497080326, + "rewards/margins": 0.06993409246206284, + "rewards/rejected": -0.08105045557022095, + "step": 871 + }, + { + "epoch": 0.603042876901798, + "grad_norm": 4.808966636657715, + "learning_rate": 3.0152143845089904e-05, + "log_odds_chosen": 3.1818840503692627, + "log_odds_ratio": -0.5515273213386536, + "logits/chosen": -0.7102684378623962, + "logits/rejected": -0.7035820484161377, + "logps/chosen": -0.2942872941493988, + "logps/rejected": -0.8675200343132019, + "loss": 4.3676, + "nll_loss": 1.0367563962936401, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.02942873351275921, + "rewards/margins": 0.05732327699661255, + "rewards/rejected": -0.0867520123720169, + "step": 872 + }, + { + "epoch": 0.6037344398340249, + "grad_norm": 5.278499126434326, + "learning_rate": 3.0186721991701246e-05, + "log_odds_chosen": 3.4745728969573975, + "log_odds_ratio": -0.4629146158695221, + "logits/chosen": -0.8261131048202515, + "logits/rejected": -0.8101270198822021, + "logps/chosen": -0.14608454704284668, + "logps/rejected": -0.45583587884902954, + "loss": 3.7457, + "nll_loss": 0.890127420425415, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.014608454890549183, + "rewards/margins": 0.030975131317973137, + "rewards/rejected": -0.04558359086513519, + "step": 873 + }, + { + "epoch": 0.6044260027662517, + "grad_norm": 6.244517803192139, + "learning_rate": 3.0221300138312587e-05, + "log_odds_chosen": 1.8965070247650146, + "log_odds_ratio": -0.45901423692703247, + "logits/chosen": -0.3932605981826782, + "logits/rejected": -0.46693626046180725, + "logps/chosen": -0.086180180311203, + "logps/rejected": -0.41221320629119873, + "loss": 3.7194, + "nll_loss": 0.8839367628097534, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00861801765859127, + "rewards/margins": 0.03260330110788345, + "rewards/rejected": -0.04122132062911987, + "step": 874 + }, + { + "epoch": 0.6051175656984785, + "grad_norm": 3.957749843597412, + "learning_rate": 3.025587828492393e-05, + "log_odds_chosen": 3.5800201892852783, + "log_odds_ratio": -0.3158847391605377, + "logits/chosen": -0.8257095813751221, + "logits/rejected": -0.8298307657241821, + "logps/chosen": -0.12911739945411682, + "logps/rejected": -0.637715220451355, + "loss": 3.9396, + "nll_loss": 0.9532997608184814, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.012911740690469742, + "rewards/margins": 0.050859782844781876, + "rewards/rejected": -0.06377153098583221, + "step": 875 + }, + { + "epoch": 0.6058091286307054, + "grad_norm": 4.251432418823242, + "learning_rate": 3.029045643153527e-05, + "log_odds_chosen": 4.258300304412842, + "log_odds_ratio": -0.1648968756198883, + "logits/chosen": -0.8471444845199585, + "logits/rejected": -0.891996443271637, + "logps/chosen": -0.04554177075624466, + "logps/rejected": -0.8233499526977539, + "loss": 4.0591, + "nll_loss": 0.9982973337173462, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004554177634418011, + "rewards/margins": 0.0777808129787445, + "rewards/rejected": -0.08233499526977539, + "step": 876 + }, + { + "epoch": 0.6065006915629322, + "grad_norm": 3.713589668273926, + "learning_rate": 3.0325034578146612e-05, + "log_odds_chosen": 3.3973257541656494, + "log_odds_ratio": -0.20859216153621674, + "logits/chosen": -0.3923302888870239, + "logits/rejected": -0.4933090806007385, + "logps/chosen": -0.054292913526296616, + "logps/rejected": -0.6101047992706299, + "loss": 3.6514, + "nll_loss": 0.8919917941093445, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005429290700703859, + "rewards/margins": 0.055581189692020416, + "rewards/rejected": -0.06101047992706299, + "step": 877 + }, + { + "epoch": 0.607192254495159, + "grad_norm": 6.882563591003418, + "learning_rate": 3.0359612724757954e-05, + "log_odds_chosen": 0.854179859161377, + "log_odds_ratio": -0.8967254161834717, + "logits/chosen": -0.9078612327575684, + "logits/rejected": -0.9028449058532715, + "logps/chosen": -0.11673790216445923, + "logps/rejected": -0.4252132177352905, + "loss": 5.0935, + "nll_loss": 1.1836953163146973, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.011673791334033012, + "rewards/margins": 0.03084753267467022, + "rewards/rejected": -0.04252132400870323, + "step": 878 + }, + { + "epoch": 0.6078838174273858, + "grad_norm": 7.392794132232666, + "learning_rate": 3.0394190871369292e-05, + "log_odds_chosen": 0.07599025964736938, + "log_odds_ratio": -1.283529281616211, + "logits/chosen": -0.7483278512954712, + "logits/rejected": -0.7108883857727051, + "logps/chosen": -0.25827908515930176, + "logps/rejected": -0.20767498016357422, + "loss": 6.6526, + "nll_loss": 1.5347901582717896, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.025827907025814056, + "rewards/margins": -0.005060410127043724, + "rewards/rejected": -0.02076749876141548, + "step": 879 + }, + { + "epoch": 0.6085753803596127, + "grad_norm": 6.576107978820801, + "learning_rate": 3.0428769017980633e-05, + "log_odds_chosen": 2.8613975048065186, + "log_odds_ratio": -0.34593522548675537, + "logits/chosen": -0.30519038438796997, + "logits/rejected": -0.3554477393627167, + "logps/chosen": -0.07818441838026047, + "logps/rejected": -0.6126123666763306, + "loss": 6.0538, + "nll_loss": 1.4788503646850586, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007818441838026047, + "rewards/margins": 0.05344279855489731, + "rewards/rejected": -0.06126123666763306, + "step": 880 + }, + { + "epoch": 0.6092669432918395, + "grad_norm": 4.552968502044678, + "learning_rate": 3.0463347164591975e-05, + "log_odds_chosen": 2.4071388244628906, + "log_odds_ratio": -0.40522754192352295, + "logits/chosen": -0.6670210361480713, + "logits/rejected": -0.7009649872779846, + "logps/chosen": -0.12310780584812164, + "logps/rejected": -0.5360164642333984, + "loss": 3.7009, + "nll_loss": 0.8846949338912964, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012310780584812164, + "rewards/margins": 0.04129086434841156, + "rewards/rejected": -0.05360164865851402, + "step": 881 + }, + { + "epoch": 0.6099585062240664, + "grad_norm": 21.216053009033203, + "learning_rate": 3.0497925311203323e-05, + "log_odds_chosen": 0.16731399297714233, + "log_odds_ratio": -1.1148260831832886, + "logits/chosen": -0.40998703241348267, + "logits/rejected": -0.4283626079559326, + "logps/chosen": -0.38131338357925415, + "logps/rejected": -0.44890064001083374, + "loss": 4.4729, + "nll_loss": 1.0067414045333862, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.038131337612867355, + "rewards/margins": 0.006758726201951504, + "rewards/rejected": -0.044890061020851135, + "step": 882 + }, + { + "epoch": 0.6106500691562933, + "grad_norm": 5.321377754211426, + "learning_rate": 3.053250345781467e-05, + "log_odds_chosen": 2.30208420753479, + "log_odds_ratio": -0.3444046974182129, + "logits/chosen": -0.6480960845947266, + "logits/rejected": -0.6440442204475403, + "logps/chosen": -0.17282502353191376, + "logps/rejected": -0.43156111240386963, + "loss": 4.774, + "nll_loss": 1.159049391746521, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.017282500863075256, + "rewards/margins": 0.025873607024550438, + "rewards/rejected": -0.043156106024980545, + "step": 883 + }, + { + "epoch": 0.6113416320885201, + "grad_norm": 4.070542812347412, + "learning_rate": 3.056708160442601e-05, + "log_odds_chosen": 2.503066062927246, + "log_odds_ratio": -0.24399249255657196, + "logits/chosen": -0.6377800703048706, + "logits/rejected": -0.647244930267334, + "logps/chosen": -0.0947108268737793, + "logps/rejected": -0.521747350692749, + "loss": 4.3188, + "nll_loss": 1.0552964210510254, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00947108305990696, + "rewards/margins": 0.04270365089178085, + "rewards/rejected": -0.052174732089042664, + "step": 884 + }, + { + "epoch": 0.6120331950207469, + "grad_norm": 4.310357570648193, + "learning_rate": 3.060165975103735e-05, + "log_odds_chosen": 2.998178005218506, + "log_odds_ratio": -0.17078091204166412, + "logits/chosen": -0.6814748048782349, + "logits/rejected": -0.8221471905708313, + "logps/chosen": -0.03645121678709984, + "logps/rejected": -0.4962855875492096, + "loss": 3.8549, + "nll_loss": 0.9466458559036255, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0036451215855777264, + "rewards/margins": 0.045983437448740005, + "rewards/rejected": -0.04962855949997902, + "step": 885 + }, + { + "epoch": 0.6127247579529738, + "grad_norm": 3.07930850982666, + "learning_rate": 3.063623789764869e-05, + "log_odds_chosen": 3.6279428005218506, + "log_odds_ratio": -0.2710344195365906, + "logits/chosen": -0.4153643846511841, + "logits/rejected": -0.44188249111175537, + "logps/chosen": -0.044143207371234894, + "logps/rejected": -0.785947322845459, + "loss": 2.7167, + "nll_loss": 0.6520835161209106, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004414320457726717, + "rewards/margins": 0.07418042421340942, + "rewards/rejected": -0.07859473675489426, + "step": 886 + }, + { + "epoch": 0.6134163208852006, + "grad_norm": 6.346424102783203, + "learning_rate": 3.0670816044260035e-05, + "log_odds_chosen": 2.9766647815704346, + "log_odds_ratio": -0.5250827670097351, + "logits/chosen": -0.6835624575614929, + "logits/rejected": -0.7732921838760376, + "logps/chosen": -0.13457365334033966, + "logps/rejected": -0.6309829354286194, + "loss": 5.0592, + "nll_loss": 1.212296485900879, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.013457365334033966, + "rewards/margins": 0.04964093118906021, + "rewards/rejected": -0.06309829652309418, + "step": 887 + }, + { + "epoch": 0.6141078838174274, + "grad_norm": 5.160597801208496, + "learning_rate": 3.070539419087137e-05, + "log_odds_chosen": 3.923093795776367, + "log_odds_ratio": -0.3125317096710205, + "logits/chosen": -0.7844764590263367, + "logits/rejected": -0.7793978452682495, + "logps/chosen": -0.10180672258138657, + "logps/rejected": -0.688077449798584, + "loss": 4.0323, + "nll_loss": 0.9768339395523071, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010180672630667686, + "rewards/margins": 0.05862707644701004, + "rewards/rejected": -0.06880774348974228, + "step": 888 + }, + { + "epoch": 0.6147994467496543, + "grad_norm": 26.145076751708984, + "learning_rate": 3.073997233748271e-05, + "log_odds_chosen": 1.9590518474578857, + "log_odds_ratio": -0.7331660389900208, + "logits/chosen": -0.5167617797851562, + "logits/rejected": -0.4817545413970947, + "logps/chosen": -0.07678233087062836, + "logps/rejected": -0.43516165018081665, + "loss": 3.6756, + "nll_loss": 0.8455724120140076, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007678233552724123, + "rewards/margins": 0.03583793342113495, + "rewards/rejected": -0.043516166508197784, + "step": 889 + }, + { + "epoch": 0.6154910096818811, + "grad_norm": 4.597845554351807, + "learning_rate": 3.077455048409405e-05, + "log_odds_chosen": 2.656071662902832, + "log_odds_ratio": -0.27559003233909607, + "logits/chosen": -0.49670350551605225, + "logits/rejected": -0.5645101070404053, + "logps/chosen": -0.0880126804113388, + "logps/rejected": -0.6273921132087708, + "loss": 4.4735, + "nll_loss": 1.0908229351043701, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00880126841366291, + "rewards/margins": 0.053937941789627075, + "rewards/rejected": -0.06273921579122543, + "step": 890 + }, + { + "epoch": 0.6161825726141079, + "grad_norm": 4.356814384460449, + "learning_rate": 3.0809128630705394e-05, + "log_odds_chosen": 1.726629614830017, + "log_odds_ratio": -0.3198900818824768, + "logits/chosen": -0.9252246618270874, + "logits/rejected": -0.9489607810974121, + "logps/chosen": -0.16357335448265076, + "logps/rejected": -0.5082889795303345, + "loss": 5.4556, + "nll_loss": 1.3318991661071777, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.016357336193323135, + "rewards/margins": 0.03447156772017479, + "rewards/rejected": -0.050828903913497925, + "step": 891 + }, + { + "epoch": 0.6168741355463347, + "grad_norm": 5.273382663726807, + "learning_rate": 3.0843706777316736e-05, + "log_odds_chosen": 0.9887380599975586, + "log_odds_ratio": -0.9254065155982971, + "logits/chosen": -0.7355146408081055, + "logits/rejected": -0.6899512410163879, + "logps/chosen": -0.19425490498542786, + "logps/rejected": -0.30127257108688354, + "loss": 3.9442, + "nll_loss": 0.8935017585754395, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.019425490871071815, + "rewards/margins": 0.01070176437497139, + "rewards/rejected": -0.030127257108688354, + "step": 892 + }, + { + "epoch": 0.6175656984785616, + "grad_norm": 3.6554532051086426, + "learning_rate": 3.087828492392808e-05, + "log_odds_chosen": 3.534128189086914, + "log_odds_ratio": -0.26558980345726013, + "logits/chosen": -0.4329376816749573, + "logits/rejected": -0.4630773067474365, + "logps/chosen": -0.08066828548908234, + "logps/rejected": -0.6910880208015442, + "loss": 3.9841, + "nll_loss": 0.9694664478302002, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008066828362643719, + "rewards/margins": 0.061041973531246185, + "rewards/rejected": -0.06910879909992218, + "step": 893 + }, + { + "epoch": 0.6182572614107884, + "grad_norm": 5.20051383972168, + "learning_rate": 3.091286307053942e-05, + "log_odds_chosen": 2.1171774864196777, + "log_odds_ratio": -0.4897395968437195, + "logits/chosen": -0.3613443374633789, + "logits/rejected": -0.3467557430267334, + "logps/chosen": -0.13655899465084076, + "logps/rejected": -0.35757774114608765, + "loss": 4.3682, + "nll_loss": 1.0430686473846436, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.013655899092555046, + "rewards/margins": 0.022101877257227898, + "rewards/rejected": -0.035757772624492645, + "step": 894 + }, + { + "epoch": 0.6189488243430152, + "grad_norm": 6.48548698425293, + "learning_rate": 3.094744121715076e-05, + "log_odds_chosen": 1.8737776279449463, + "log_odds_ratio": -0.4800935387611389, + "logits/chosen": -0.7387460470199585, + "logits/rejected": -0.7719529867172241, + "logps/chosen": -0.1483597457408905, + "logps/rejected": -0.5151386857032776, + "loss": 4.0982, + "nll_loss": 0.9765384793281555, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014835975132882595, + "rewards/margins": 0.03667789697647095, + "rewards/rejected": -0.05151387304067612, + "step": 895 + }, + { + "epoch": 0.6196403872752421, + "grad_norm": 4.083328723907471, + "learning_rate": 3.09820193637621e-05, + "log_odds_chosen": 0.5870912671089172, + "log_odds_ratio": -0.6784539222717285, + "logits/chosen": -1.0754952430725098, + "logits/rejected": -1.0223438739776611, + "logps/chosen": -0.202505961060524, + "logps/rejected": -0.3597847819328308, + "loss": 5.1295, + "nll_loss": 1.2145276069641113, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0202505961060524, + "rewards/margins": 0.015727879479527473, + "rewards/rejected": -0.03597847744822502, + "step": 896 + }, + { + "epoch": 0.6203319502074689, + "grad_norm": 3.179802417755127, + "learning_rate": 3.1016597510373443e-05, + "log_odds_chosen": 3.1242868900299072, + "log_odds_ratio": -0.38213053345680237, + "logits/chosen": -0.7337120771408081, + "logits/rejected": -0.7521069049835205, + "logps/chosen": -0.09369072318077087, + "logps/rejected": -0.39017921686172485, + "loss": 4.2453, + "nll_loss": 1.023102879524231, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009369072504341602, + "rewards/margins": 0.02964884601533413, + "rewards/rejected": -0.039017919450998306, + "step": 897 + }, + { + "epoch": 0.6210235131396957, + "grad_norm": 4.312764644622803, + "learning_rate": 3.1051175656984785e-05, + "log_odds_chosen": 1.904249906539917, + "log_odds_ratio": -0.40307527780532837, + "logits/chosen": -1.055102825164795, + "logits/rejected": -1.0678390264511108, + "logps/chosen": -0.06856757402420044, + "logps/rejected": -0.3968680202960968, + "loss": 4.8316, + "nll_loss": 1.1675834655761719, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.006856757681816816, + "rewards/margins": 0.032830044627189636, + "rewards/rejected": -0.03968679904937744, + "step": 898 + }, + { + "epoch": 0.6217150760719226, + "grad_norm": 3.331369638442993, + "learning_rate": 3.1085753803596127e-05, + "log_odds_chosen": 3.4465959072113037, + "log_odds_ratio": -0.17227791249752045, + "logits/chosen": -0.8975204825401306, + "logits/rejected": -0.9244405627250671, + "logps/chosen": -0.07354501634836197, + "logps/rejected": -0.8571603298187256, + "loss": 3.7298, + "nll_loss": 0.9152202010154724, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007354501634836197, + "rewards/margins": 0.07836152613162994, + "rewards/rejected": -0.08571602404117584, + "step": 899 + }, + { + "epoch": 0.6224066390041494, + "grad_norm": 4.473353862762451, + "learning_rate": 3.112033195020747e-05, + "log_odds_chosen": 4.239065647125244, + "log_odds_ratio": -0.21452511847019196, + "logits/chosen": -0.5707802772521973, + "logits/rejected": -0.6279401183128357, + "logps/chosen": -0.061546772718429565, + "logps/rejected": -0.9604505896568298, + "loss": 4.919, + "nll_loss": 1.2083086967468262, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006154676899313927, + "rewards/margins": 0.08989039063453674, + "rewards/rejected": -0.09604506194591522, + "step": 900 + }, + { + "epoch": 0.6230982019363762, + "grad_norm": 6.345496654510498, + "learning_rate": 3.115491009681881e-05, + "log_odds_chosen": 1.1958024501800537, + "log_odds_ratio": -0.41798198223114014, + "logits/chosen": -0.5380837917327881, + "logits/rejected": -0.5586702823638916, + "logps/chosen": -0.07194848358631134, + "logps/rejected": -0.2240828424692154, + "loss": 5.5867, + "nll_loss": 1.354879379272461, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007194849196821451, + "rewards/margins": 0.015213435515761375, + "rewards/rejected": -0.02240828424692154, + "step": 901 + }, + { + "epoch": 0.623789764868603, + "grad_norm": 3.472062349319458, + "learning_rate": 3.118948824343015e-05, + "log_odds_chosen": 2.3726320266723633, + "log_odds_ratio": -0.4545746147632599, + "logits/chosen": -0.6491698026657104, + "logits/rejected": -0.6731710433959961, + "logps/chosen": -0.13186895847320557, + "logps/rejected": -0.3443443775177002, + "loss": 3.0496, + "nll_loss": 0.7169334888458252, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.013186894357204437, + "rewards/margins": 0.021247539669275284, + "rewards/rejected": -0.03443443775177002, + "step": 902 + }, + { + "epoch": 0.6244813278008299, + "grad_norm": 5.7245001792907715, + "learning_rate": 3.122406639004149e-05, + "log_odds_chosen": 2.1482057571411133, + "log_odds_ratio": -0.8374338150024414, + "logits/chosen": -0.5861244201660156, + "logits/rejected": -0.6452823877334595, + "logps/chosen": -0.17203915119171143, + "logps/rejected": -0.347610741853714, + "loss": 2.8378, + "nll_loss": 0.6257038116455078, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.017203915864229202, + "rewards/margins": 0.017557159066200256, + "rewards/rejected": -0.03476107493042946, + "step": 903 + }, + { + "epoch": 0.6251728907330567, + "grad_norm": 4.456448554992676, + "learning_rate": 3.1258644536652834e-05, + "log_odds_chosen": 1.7303143739700317, + "log_odds_ratio": -0.4254305958747864, + "logits/chosen": -0.8845744132995605, + "logits/rejected": -0.9023687839508057, + "logps/chosen": -0.11853601038455963, + "logps/rejected": -0.25728434324264526, + "loss": 4.7616, + "nll_loss": 1.147857666015625, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011853600852191448, + "rewards/margins": 0.013874834403395653, + "rewards/rejected": -0.025728434324264526, + "step": 904 + }, + { + "epoch": 0.6258644536652835, + "grad_norm": 4.575382709503174, + "learning_rate": 3.1293222683264176e-05, + "log_odds_chosen": 2.423657178878784, + "log_odds_ratio": -0.20692911744117737, + "logits/chosen": -0.6058048605918884, + "logits/rejected": -0.5127230882644653, + "logps/chosen": -0.15506532788276672, + "logps/rejected": -0.5397747159004211, + "loss": 4.3004, + "nll_loss": 1.0544087886810303, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.015506532974541187, + "rewards/margins": 0.03847094252705574, + "rewards/rejected": -0.05397747457027435, + "step": 905 + }, + { + "epoch": 0.6265560165975104, + "grad_norm": 3.4776272773742676, + "learning_rate": 3.132780082987552e-05, + "log_odds_chosen": 3.638993263244629, + "log_odds_ratio": -0.19260406494140625, + "logits/chosen": -0.9499366879463196, + "logits/rejected": -0.9942508339881897, + "logps/chosen": -0.0676005631685257, + "logps/rejected": -0.580873429775238, + "loss": 3.8193, + "nll_loss": 0.935560941696167, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006760057061910629, + "rewards/margins": 0.05132729187607765, + "rewards/rejected": -0.058087341487407684, + "step": 906 + }, + { + "epoch": 0.6272475795297372, + "grad_norm": 5.688107013702393, + "learning_rate": 3.136237897648686e-05, + "log_odds_chosen": 1.3792686462402344, + "log_odds_ratio": -0.5582557916641235, + "logits/chosen": -0.8317203521728516, + "logits/rejected": -0.7857211828231812, + "logps/chosen": -0.17600604891777039, + "logps/rejected": -0.444831520318985, + "loss": 5.0394, + "nll_loss": 1.2040131092071533, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01760060526430607, + "rewards/margins": 0.02688254788517952, + "rewards/rejected": -0.04448315501213074, + "step": 907 + }, + { + "epoch": 0.627939142461964, + "grad_norm": 7.277334213256836, + "learning_rate": 3.13969571230982e-05, + "log_odds_chosen": 2.3631396293640137, + "log_odds_ratio": -0.40137773752212524, + "logits/chosen": -0.7335744500160217, + "logits/rejected": -0.7411458492279053, + "logps/chosen": -0.06967581808567047, + "logps/rejected": -0.4501439332962036, + "loss": 2.5292, + "nll_loss": 0.5921643972396851, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.006967581808567047, + "rewards/margins": 0.03804681450128555, + "rewards/rejected": -0.0450143963098526, + "step": 908 + }, + { + "epoch": 0.6286307053941909, + "grad_norm": 4.77232551574707, + "learning_rate": 3.143153526970954e-05, + "log_odds_chosen": 1.4729492664337158, + "log_odds_ratio": -0.35362690687179565, + "logits/chosen": -0.7603928446769714, + "logits/rejected": -0.696243405342102, + "logps/chosen": -0.07079069316387177, + "logps/rejected": -0.3067443072795868, + "loss": 6.0722, + "nll_loss": 1.4826915264129639, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007079069968312979, + "rewards/margins": 0.023595361039042473, + "rewards/rejected": -0.03067443147301674, + "step": 909 + }, + { + "epoch": 0.6293222683264177, + "grad_norm": 6.8746256828308105, + "learning_rate": 3.1466113416320884e-05, + "log_odds_chosen": 1.3585569858551025, + "log_odds_ratio": -0.8060304522514343, + "logits/chosen": -0.5674390196800232, + "logits/rejected": -0.5963701009750366, + "logps/chosen": -0.15420934557914734, + "logps/rejected": -0.35397300124168396, + "loss": 4.8346, + "nll_loss": 1.1280417442321777, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.015420932322740555, + "rewards/margins": 0.019976366311311722, + "rewards/rejected": -0.03539729863405228, + "step": 910 + }, + { + "epoch": 0.6300138312586445, + "grad_norm": 3.980470895767212, + "learning_rate": 3.1500691562932225e-05, + "log_odds_chosen": 0.9345357418060303, + "log_odds_ratio": -0.5109595060348511, + "logits/chosen": -0.7937835454940796, + "logits/rejected": -0.8195587992668152, + "logps/chosen": -0.1285438984632492, + "logps/rejected": -0.294846773147583, + "loss": 4.4796, + "nll_loss": 1.0688003301620483, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01285438984632492, + "rewards/margins": 0.01663028635084629, + "rewards/rejected": -0.02948467805981636, + "step": 911 + }, + { + "epoch": 0.6307053941908713, + "grad_norm": 3.9688222408294678, + "learning_rate": 3.153526970954357e-05, + "log_odds_chosen": 2.540585994720459, + "log_odds_ratio": -0.36621564626693726, + "logits/chosen": -0.7523171305656433, + "logits/rejected": -0.6979089379310608, + "logps/chosen": -0.13078458607196808, + "logps/rejected": -0.5843268632888794, + "loss": 3.3149, + "nll_loss": 0.7921104431152344, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013078458607196808, + "rewards/margins": 0.04535423591732979, + "rewards/rejected": -0.0584326907992363, + "step": 912 + }, + { + "epoch": 0.6313969571230982, + "grad_norm": 4.612651348114014, + "learning_rate": 3.156984785615491e-05, + "log_odds_chosen": 0.6476094722747803, + "log_odds_ratio": -0.7385530471801758, + "logits/chosen": -0.8418199419975281, + "logits/rejected": -0.8664075136184692, + "logps/chosen": -0.1603190004825592, + "logps/rejected": -0.26439252495765686, + "loss": 4.0473, + "nll_loss": 0.9379769563674927, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01603190042078495, + "rewards/margins": 0.01040735188871622, + "rewards/rejected": -0.026439251378178596, + "step": 913 + }, + { + "epoch": 0.632088520055325, + "grad_norm": 4.260547161102295, + "learning_rate": 3.160442600276625e-05, + "log_odds_chosen": 3.119439125061035, + "log_odds_ratio": -0.19069647789001465, + "logits/chosen": -0.586380660533905, + "logits/rejected": -0.5719614624977112, + "logps/chosen": -0.06460949778556824, + "logps/rejected": -0.5709012746810913, + "loss": 4.0549, + "nll_loss": 0.9946677684783936, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006460950244218111, + "rewards/margins": 0.050629179924726486, + "rewards/rejected": -0.05709013342857361, + "step": 914 + }, + { + "epoch": 0.6327800829875518, + "grad_norm": 5.091923713684082, + "learning_rate": 3.163900414937759e-05, + "log_odds_chosen": 3.6806349754333496, + "log_odds_ratio": -0.27678605914115906, + "logits/chosen": -0.7521594762802124, + "logits/rejected": -0.761591911315918, + "logps/chosen": -0.0646386444568634, + "logps/rejected": -0.819200873374939, + "loss": 4.513, + "nll_loss": 1.1005698442459106, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00646386481821537, + "rewards/margins": 0.07545622438192368, + "rewards/rejected": -0.0819200873374939, + "step": 915 + }, + { + "epoch": 0.6334716459197787, + "grad_norm": 6.1348443031311035, + "learning_rate": 3.167358229598893e-05, + "log_odds_chosen": 2.4885177612304688, + "log_odds_ratio": -0.6434838175773621, + "logits/chosen": -0.7858555316925049, + "logits/rejected": -0.8438479900360107, + "logps/chosen": -0.1499505490064621, + "logps/rejected": -0.693801760673523, + "loss": 3.9574, + "nll_loss": 0.925003170967102, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.014995056204497814, + "rewards/margins": 0.05438512563705444, + "rewards/rejected": -0.06938017904758453, + "step": 916 + }, + { + "epoch": 0.6341632088520055, + "grad_norm": 4.475279808044434, + "learning_rate": 3.1708160442600275e-05, + "log_odds_chosen": 3.5078020095825195, + "log_odds_ratio": -0.4594971537590027, + "logits/chosen": -0.6189094185829163, + "logits/rejected": -0.638657808303833, + "logps/chosen": -0.11998427659273148, + "logps/rejected": -0.5195710062980652, + "loss": 3.0219, + "nll_loss": 0.7095255851745605, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.011998428963124752, + "rewards/margins": 0.03995867446064949, + "rewards/rejected": -0.05195710062980652, + "step": 917 + }, + { + "epoch": 0.6348547717842323, + "grad_norm": 4.12563419342041, + "learning_rate": 3.1742738589211616e-05, + "log_odds_chosen": 2.7910380363464355, + "log_odds_ratio": -0.3635891079902649, + "logits/chosen": -0.7694523334503174, + "logits/rejected": -0.8305118680000305, + "logps/chosen": -0.1132592186331749, + "logps/rejected": -0.5803558230400085, + "loss": 4.9796, + "nll_loss": 1.2085505723953247, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01132592186331749, + "rewards/margins": 0.04670966416597366, + "rewards/rejected": -0.058035582304000854, + "step": 918 + }, + { + "epoch": 0.6355463347164592, + "grad_norm": 5.34503173828125, + "learning_rate": 3.177731673582296e-05, + "log_odds_chosen": 2.969501495361328, + "log_odds_ratio": -0.17896559834480286, + "logits/chosen": -0.6707146763801575, + "logits/rejected": -0.672049880027771, + "logps/chosen": -0.14243923127651215, + "logps/rejected": -0.5696920156478882, + "loss": 5.5494, + "nll_loss": 1.369441032409668, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01424392405897379, + "rewards/margins": 0.04272527992725372, + "rewards/rejected": -0.05696920305490494, + "step": 919 + }, + { + "epoch": 0.636237897648686, + "grad_norm": 4.605804920196533, + "learning_rate": 3.18118948824343e-05, + "log_odds_chosen": 1.0992157459259033, + "log_odds_ratio": -0.33924224972724915, + "logits/chosen": -0.8458801507949829, + "logits/rejected": -0.8768494129180908, + "logps/chosen": -0.10855139791965485, + "logps/rejected": -0.28552332520484924, + "loss": 5.0427, + "nll_loss": 1.2267569303512573, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010855140164494514, + "rewards/margins": 0.01769719272851944, + "rewards/rejected": -0.028552331030368805, + "step": 920 + }, + { + "epoch": 0.6369294605809128, + "grad_norm": 4.734560966491699, + "learning_rate": 3.184647302904564e-05, + "log_odds_chosen": 3.045750856399536, + "log_odds_ratio": -0.5812844634056091, + "logits/chosen": -0.5721637010574341, + "logits/rejected": -0.5491136312484741, + "logps/chosen": -0.10722295939922333, + "logps/rejected": -0.4818299412727356, + "loss": 4.2084, + "nll_loss": 0.9939660429954529, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.010722294449806213, + "rewards/margins": 0.03746069595217705, + "rewards/rejected": -0.04818298667669296, + "step": 921 + }, + { + "epoch": 0.6376210235131397, + "grad_norm": 5.207472801208496, + "learning_rate": 3.188105117565698e-05, + "log_odds_chosen": 1.3426095247268677, + "log_odds_ratio": -0.6640761494636536, + "logits/chosen": -0.6079537868499756, + "logits/rejected": -0.6411263346672058, + "logps/chosen": -0.15399664640426636, + "logps/rejected": -0.3464754819869995, + "loss": 5.4171, + "nll_loss": 1.2878714799880981, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.015399663709104061, + "rewards/margins": 0.019247887656092644, + "rewards/rejected": -0.03464755043387413, + "step": 922 + }, + { + "epoch": 0.6383125864453665, + "grad_norm": 4.346343994140625, + "learning_rate": 3.1915629322268324e-05, + "log_odds_chosen": 3.3377137184143066, + "log_odds_ratio": -0.1723647117614746, + "logits/chosen": -0.7857403755187988, + "logits/rejected": -0.7861360907554626, + "logps/chosen": -0.05585169792175293, + "logps/rejected": -0.5312471389770508, + "loss": 4.2025, + "nll_loss": 1.0333845615386963, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005585169419646263, + "rewards/margins": 0.047539543360471725, + "rewards/rejected": -0.05312471464276314, + "step": 923 + }, + { + "epoch": 0.6390041493775933, + "grad_norm": 4.295398235321045, + "learning_rate": 3.1950207468879666e-05, + "log_odds_chosen": 2.227273941040039, + "log_odds_ratio": -0.32186567783355713, + "logits/chosen": -0.8246999382972717, + "logits/rejected": -0.8642113208770752, + "logps/chosen": -0.17864662408828735, + "logps/rejected": -0.7396684885025024, + "loss": 3.9963, + "nll_loss": 0.9668830633163452, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.017864665016531944, + "rewards/margins": 0.05610219016671181, + "rewards/rejected": -0.0739668533205986, + "step": 924 + }, + { + "epoch": 0.6396957123098201, + "grad_norm": 4.727065086364746, + "learning_rate": 3.198478561549101e-05, + "log_odds_chosen": 0.795332133769989, + "log_odds_ratio": -0.49990129470825195, + "logits/chosen": -0.6169123649597168, + "logits/rejected": -0.6310234665870667, + "logps/chosen": -0.15110734105110168, + "logps/rejected": -0.28500136733055115, + "loss": 3.7686, + "nll_loss": 0.8921705484390259, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.015110732987523079, + "rewards/margins": 0.01338940393179655, + "rewards/rejected": -0.028500137850642204, + "step": 925 + }, + { + "epoch": 0.640387275242047, + "grad_norm": 4.3016180992126465, + "learning_rate": 3.201936376210235e-05, + "log_odds_chosen": 2.2576334476470947, + "log_odds_ratio": -0.2811516225337982, + "logits/chosen": -0.672473669052124, + "logits/rejected": -0.7026387453079224, + "logps/chosen": -0.11066614091396332, + "logps/rejected": -0.376792311668396, + "loss": 4.3925, + "nll_loss": 1.0700031518936157, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011066613718867302, + "rewards/margins": 0.026612618938088417, + "rewards/rejected": -0.03767923265695572, + "step": 926 + }, + { + "epoch": 0.6410788381742739, + "grad_norm": 4.539062023162842, + "learning_rate": 3.20539419087137e-05, + "log_odds_chosen": 1.6754204034805298, + "log_odds_ratio": -0.29951563477516174, + "logits/chosen": -1.038869857788086, + "logits/rejected": -1.0374137163162231, + "logps/chosen": -0.13555291295051575, + "logps/rejected": -0.4391994774341583, + "loss": 4.6734, + "nll_loss": 1.1384084224700928, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.013555290177464485, + "rewards/margins": 0.030364658683538437, + "rewards/rejected": -0.04391995072364807, + "step": 927 + }, + { + "epoch": 0.6417704011065007, + "grad_norm": 3.144559621810913, + "learning_rate": 3.208852005532504e-05, + "log_odds_chosen": 3.9733846187591553, + "log_odds_ratio": -0.28225821256637573, + "logits/chosen": -0.5854853391647339, + "logits/rejected": -0.5669970512390137, + "logps/chosen": -0.08290040493011475, + "logps/rejected": -0.5470890998840332, + "loss": 3.6142, + "nll_loss": 0.8753182888031006, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008290041238069534, + "rewards/margins": 0.046418868005275726, + "rewards/rejected": -0.05470890551805496, + "step": 928 + }, + { + "epoch": 0.6424619640387276, + "grad_norm": 3.229943037033081, + "learning_rate": 3.212309820193638e-05, + "log_odds_chosen": 5.208747863769531, + "log_odds_ratio": -0.2483353614807129, + "logits/chosen": -0.9289498329162598, + "logits/rejected": -0.9372609257698059, + "logps/chosen": -0.04052822291851044, + "logps/rejected": -0.8483723402023315, + "loss": 4.7465, + "nll_loss": 1.1617987155914307, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.004052822478115559, + "rewards/margins": 0.08078441023826599, + "rewards/rejected": -0.08483723551034927, + "step": 929 + }, + { + "epoch": 0.6431535269709544, + "grad_norm": 10.269536972045898, + "learning_rate": 3.215767634854772e-05, + "log_odds_chosen": 0.1895618438720703, + "log_odds_ratio": -1.680237889289856, + "logits/chosen": -0.7783693671226501, + "logits/rejected": -0.7193617820739746, + "logps/chosen": -0.32872065901756287, + "logps/rejected": -0.28012144565582275, + "loss": 4.6799, + "nll_loss": 1.001959204673767, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.03287206590175629, + "rewards/margins": -0.004859922453761101, + "rewards/rejected": -0.028012147173285484, + "step": 930 + }, + { + "epoch": 0.6438450899031812, + "grad_norm": 4.42756462097168, + "learning_rate": 3.219225449515906e-05, + "log_odds_chosen": 2.1525275707244873, + "log_odds_ratio": -0.4749143719673157, + "logits/chosen": -0.4051012396812439, + "logits/rejected": -0.48455822467803955, + "logps/chosen": -0.15239334106445312, + "logps/rejected": -0.3337353467941284, + "loss": 3.7849, + "nll_loss": 0.8987392783164978, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.015239333733916283, + "rewards/margins": 0.01813420280814171, + "rewards/rejected": -0.03337353840470314, + "step": 931 + }, + { + "epoch": 0.6445366528354081, + "grad_norm": 5.184959888458252, + "learning_rate": 3.2226832641770405e-05, + "log_odds_chosen": 2.667898654937744, + "log_odds_ratio": -0.5918496251106262, + "logits/chosen": -0.2000243365764618, + "logits/rejected": -0.23744788765907288, + "logps/chosen": -0.11377274245023727, + "logps/rejected": -0.44666174054145813, + "loss": 3.711, + "nll_loss": 0.8685758113861084, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.011377274990081787, + "rewards/margins": 0.033288899809122086, + "rewards/rejected": -0.04466617479920387, + "step": 932 + }, + { + "epoch": 0.6452282157676349, + "grad_norm": 3.8456571102142334, + "learning_rate": 3.2261410788381746e-05, + "log_odds_chosen": 1.2180832624435425, + "log_odds_ratio": -0.4285384714603424, + "logits/chosen": -0.6114488244056702, + "logits/rejected": -0.6289840936660767, + "logps/chosen": -0.16820138692855835, + "logps/rejected": -0.4128754138946533, + "loss": 4.6335, + "nll_loss": 1.1155246496200562, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.016820138320326805, + "rewards/margins": 0.024467404931783676, + "rewards/rejected": -0.04128754511475563, + "step": 933 + }, + { + "epoch": 0.6459197786998617, + "grad_norm": 7.9818806648254395, + "learning_rate": 3.229598893499309e-05, + "log_odds_chosen": 2.1372387409210205, + "log_odds_ratio": -0.6383996605873108, + "logits/chosen": -0.5187885761260986, + "logits/rejected": -0.5804282426834106, + "logps/chosen": -0.16250097751617432, + "logps/rejected": -0.6418223977088928, + "loss": 4.8151, + "nll_loss": 1.1399288177490234, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.016250096261501312, + "rewards/margins": 0.04793214797973633, + "rewards/rejected": -0.06418224424123764, + "step": 934 + }, + { + "epoch": 0.6466113416320886, + "grad_norm": 6.263998985290527, + "learning_rate": 3.233056708160443e-05, + "log_odds_chosen": 2.2691001892089844, + "log_odds_ratio": -0.6636475324630737, + "logits/chosen": -0.638105571269989, + "logits/rejected": -0.6674371957778931, + "logps/chosen": -0.1204868033528328, + "logps/rejected": -0.5415738821029663, + "loss": 4.1406, + "nll_loss": 0.9687949419021606, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012048681266605854, + "rewards/margins": 0.04210871085524559, + "rewards/rejected": -0.05415739119052887, + "step": 935 + }, + { + "epoch": 0.6473029045643154, + "grad_norm": 5.25544548034668, + "learning_rate": 3.236514522821577e-05, + "log_odds_chosen": 3.077738046646118, + "log_odds_ratio": -0.4996723234653473, + "logits/chosen": -0.34365952014923096, + "logits/rejected": -0.3545984923839569, + "logps/chosen": -0.11812933534383774, + "logps/rejected": -0.3927255868911743, + "loss": 3.6792, + "nll_loss": 0.869831919670105, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.011812932789325714, + "rewards/margins": 0.027459625154733658, + "rewards/rejected": -0.03927256166934967, + "step": 936 + }, + { + "epoch": 0.6479944674965422, + "grad_norm": 3.740233898162842, + "learning_rate": 3.239972337482711e-05, + "log_odds_chosen": 3.347403049468994, + "log_odds_ratio": -0.30564332008361816, + "logits/chosen": -0.6202876567840576, + "logits/rejected": -0.6746382117271423, + "logps/chosen": -0.09465580433607101, + "logps/rejected": -0.46282535791397095, + "loss": 4.0773, + "nll_loss": 0.9887527227401733, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009465579874813557, + "rewards/margins": 0.036816954612731934, + "rewards/rejected": -0.04628252983093262, + "step": 937 + }, + { + "epoch": 0.648686030428769, + "grad_norm": 4.68813419342041, + "learning_rate": 3.2434301521438454e-05, + "log_odds_chosen": 2.7533187866210938, + "log_odds_ratio": -0.23865221440792084, + "logits/chosen": -0.651775598526001, + "logits/rejected": -0.6854273080825806, + "logps/chosen": -0.10296142846345901, + "logps/rejected": -0.5382181406021118, + "loss": 5.4118, + "nll_loss": 1.3290754556655884, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010296143591403961, + "rewards/margins": 0.04352566972374916, + "rewards/rejected": -0.05382181331515312, + "step": 938 + }, + { + "epoch": 0.6493775933609959, + "grad_norm": 5.684381484985352, + "learning_rate": 3.2468879668049796e-05, + "log_odds_chosen": 0.0025558993220329285, + "log_odds_ratio": -0.8680828809738159, + "logits/chosen": -0.7046631574630737, + "logits/rejected": -0.7345783114433289, + "logps/chosen": -0.14862655103206635, + "logps/rejected": -0.18110089004039764, + "loss": 6.4886, + "nll_loss": 1.5353366136550903, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.014862654730677605, + "rewards/margins": 0.003247436136007309, + "rewards/rejected": -0.018110090866684914, + "step": 939 + }, + { + "epoch": 0.6500691562932227, + "grad_norm": 5.221529483795166, + "learning_rate": 3.250345781466114e-05, + "log_odds_chosen": 0.3629959523677826, + "log_odds_ratio": -0.6458583474159241, + "logits/chosen": -0.5722988247871399, + "logits/rejected": -0.5610537528991699, + "logps/chosen": -0.156322181224823, + "logps/rejected": -0.19696059823036194, + "loss": 4.7117, + "nll_loss": 1.1133334636688232, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01563221588730812, + "rewards/margins": 0.004063841886818409, + "rewards/rejected": -0.019696058705449104, + "step": 940 + }, + { + "epoch": 0.6507607192254495, + "grad_norm": 3.8440189361572266, + "learning_rate": 3.253803596127248e-05, + "log_odds_chosen": 1.7516545057296753, + "log_odds_ratio": -0.41569817066192627, + "logits/chosen": -0.7494470477104187, + "logits/rejected": -0.7926744222640991, + "logps/chosen": -0.1611669659614563, + "logps/rejected": -0.449934184551239, + "loss": 4.4423, + "nll_loss": 1.06900155544281, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01611669547855854, + "rewards/margins": 0.02887672558426857, + "rewards/rejected": -0.04499341920018196, + "step": 941 + }, + { + "epoch": 0.6514522821576764, + "grad_norm": 3.899761199951172, + "learning_rate": 3.257261410788382e-05, + "log_odds_chosen": 1.4607198238372803, + "log_odds_ratio": -0.3697126507759094, + "logits/chosen": -0.4832645654678345, + "logits/rejected": -0.5157491564750671, + "logps/chosen": -0.16678622364997864, + "logps/rejected": -0.49319201707839966, + "loss": 4.1484, + "nll_loss": 1.0001217126846313, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.016678621992468834, + "rewards/margins": 0.03264058008790016, + "rewards/rejected": -0.049319203943014145, + "step": 942 + }, + { + "epoch": 0.6521438450899032, + "grad_norm": 3.763258218765259, + "learning_rate": 3.260719225449516e-05, + "log_odds_chosen": 3.7982499599456787, + "log_odds_ratio": -0.47766488790512085, + "logits/chosen": -0.4156723916530609, + "logits/rejected": -0.4465107023715973, + "logps/chosen": -0.10961335897445679, + "logps/rejected": -0.44413048028945923, + "loss": 4.2851, + "nll_loss": 1.0235079526901245, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.010961336083710194, + "rewards/margins": 0.033451713621616364, + "rewards/rejected": -0.04441304877400398, + "step": 943 + }, + { + "epoch": 0.65283540802213, + "grad_norm": 3.1957039833068848, + "learning_rate": 3.2641770401106504e-05, + "log_odds_chosen": 3.5332484245300293, + "log_odds_ratio": -0.33818644285202026, + "logits/chosen": -0.6256710886955261, + "logits/rejected": -0.6820761561393738, + "logps/chosen": -0.06437689810991287, + "logps/rejected": -0.5691992044448853, + "loss": 3.97, + "nll_loss": 0.9586867690086365, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.006437689997255802, + "rewards/margins": 0.050482235848903656, + "rewards/rejected": -0.056919924914836884, + "step": 944 + }, + { + "epoch": 0.6535269709543569, + "grad_norm": 3.3025779724121094, + "learning_rate": 3.2676348547717845e-05, + "log_odds_chosen": 2.6594409942626953, + "log_odds_ratio": -0.34340739250183105, + "logits/chosen": -0.3092968165874481, + "logits/rejected": -0.3505038321018219, + "logps/chosen": -0.1379614770412445, + "logps/rejected": -0.5781947374343872, + "loss": 3.5048, + "nll_loss": 0.8418477773666382, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013796146959066391, + "rewards/margins": 0.04402332752943039, + "rewards/rejected": -0.05781947076320648, + "step": 945 + }, + { + "epoch": 0.6542185338865837, + "grad_norm": 7.668539524078369, + "learning_rate": 3.271092669432919e-05, + "log_odds_chosen": 1.77287757396698, + "log_odds_ratio": -0.6184110641479492, + "logits/chosen": -0.5389738082885742, + "logits/rejected": -0.5569124817848206, + "logps/chosen": -0.2962481677532196, + "logps/rejected": -0.37920287251472473, + "loss": 3.9189, + "nll_loss": 0.9178899526596069, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.02962481416761875, + "rewards/margins": 0.008295468986034393, + "rewards/rejected": -0.037920285016298294, + "step": 946 + }, + { + "epoch": 0.6549100968188105, + "grad_norm": 3.6975250244140625, + "learning_rate": 3.274550484094053e-05, + "log_odds_chosen": 3.4721813201904297, + "log_odds_ratio": -0.22020241618156433, + "logits/chosen": -0.5824014544487, + "logits/rejected": -0.636989176273346, + "logps/chosen": -0.09637489169836044, + "logps/rejected": -0.657294511795044, + "loss": 3.4929, + "nll_loss": 0.8512168526649475, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00963748898357153, + "rewards/margins": 0.05609196051955223, + "rewards/rejected": -0.06572945415973663, + "step": 947 + }, + { + "epoch": 0.6556016597510373, + "grad_norm": 4.219089508056641, + "learning_rate": 3.278008298755187e-05, + "log_odds_chosen": 1.8522963523864746, + "log_odds_ratio": -0.2913023829460144, + "logits/chosen": -0.4794574975967407, + "logits/rejected": -0.4770664572715759, + "logps/chosen": -0.09961166977882385, + "logps/rejected": -0.43963801860809326, + "loss": 5.0723, + "nll_loss": 1.2389488220214844, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009961167350411415, + "rewards/margins": 0.0340026319026947, + "rewards/rejected": -0.04396379739046097, + "step": 948 + }, + { + "epoch": 0.6562932226832642, + "grad_norm": 6.227601051330566, + "learning_rate": 3.281466113416321e-05, + "log_odds_chosen": 1.6492278575897217, + "log_odds_ratio": -0.4346155524253845, + "logits/chosen": -0.3798789978027344, + "logits/rejected": -0.3958047926425934, + "logps/chosen": -0.10809013247489929, + "logps/rejected": -0.4654456377029419, + "loss": 5.3598, + "nll_loss": 1.296478033065796, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.010809013620018959, + "rewards/margins": 0.03573554754257202, + "rewards/rejected": -0.04654456302523613, + "step": 949 + }, + { + "epoch": 0.656984785615491, + "grad_norm": 4.8800482749938965, + "learning_rate": 3.284923928077455e-05, + "log_odds_chosen": 1.9015074968338013, + "log_odds_ratio": -0.40058067440986633, + "logits/chosen": -0.6047090888023376, + "logits/rejected": -0.6340633630752563, + "logps/chosen": -0.13578994572162628, + "logps/rejected": -0.4089638888835907, + "loss": 5.4725, + "nll_loss": 1.3280658721923828, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.013578995130956173, + "rewards/margins": 0.02731739543378353, + "rewards/rejected": -0.04089638963341713, + "step": 950 + }, + { + "epoch": 0.6576763485477178, + "grad_norm": 4.019526481628418, + "learning_rate": 3.2883817427385895e-05, + "log_odds_chosen": 2.497021198272705, + "log_odds_ratio": -0.26466888189315796, + "logits/chosen": -0.5571470260620117, + "logits/rejected": -0.5579490661621094, + "logps/chosen": -0.09435532242059708, + "logps/rejected": -0.4960017800331116, + "loss": 4.1844, + "nll_loss": 1.0196443796157837, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009435532614588737, + "rewards/margins": 0.04016464576125145, + "rewards/rejected": -0.04960017651319504, + "step": 951 + }, + { + "epoch": 0.6583679114799447, + "grad_norm": 3.2139434814453125, + "learning_rate": 3.2918395573997236e-05, + "log_odds_chosen": 1.3813042640686035, + "log_odds_ratio": -0.3880887031555176, + "logits/chosen": -0.6013690233230591, + "logits/rejected": -0.5642503499984741, + "logps/chosen": -0.07664715498685837, + "logps/rejected": -0.2166944146156311, + "loss": 4.2634, + "nll_loss": 1.027036190032959, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007664714939892292, + "rewards/margins": 0.014004725962877274, + "rewards/rejected": -0.02166944183409214, + "step": 952 + }, + { + "epoch": 0.6590594744121715, + "grad_norm": 3.341346502304077, + "learning_rate": 3.295297372060858e-05, + "log_odds_chosen": 3.3636043071746826, + "log_odds_ratio": -0.30477118492126465, + "logits/chosen": -0.7266377806663513, + "logits/rejected": -0.7696323990821838, + "logps/chosen": -0.08241377025842667, + "logps/rejected": -0.49398496747016907, + "loss": 3.9865, + "nll_loss": 0.966143012046814, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008241376839578152, + "rewards/margins": 0.04115711897611618, + "rewards/rejected": -0.04939849674701691, + "step": 953 + }, + { + "epoch": 0.6597510373443983, + "grad_norm": 3.837860107421875, + "learning_rate": 3.298755186721992e-05, + "log_odds_chosen": 3.0661492347717285, + "log_odds_ratio": -0.33581408858299255, + "logits/chosen": -0.4582061171531677, + "logits/rejected": -0.47436198592185974, + "logps/chosen": -0.09099718928337097, + "logps/rejected": -0.45954978466033936, + "loss": 3.3755, + "nll_loss": 0.8102924823760986, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009099719114601612, + "rewards/margins": 0.03685525804758072, + "rewards/rejected": -0.045954976230859756, + "step": 954 + }, + { + "epoch": 0.6604426002766252, + "grad_norm": 5.162682056427002, + "learning_rate": 3.302213001383126e-05, + "log_odds_chosen": 3.169241428375244, + "log_odds_ratio": -0.2741418182849884, + "logits/chosen": -0.7861517667770386, + "logits/rejected": -0.7951920628547668, + "logps/chosen": -0.09555049985647202, + "logps/rejected": -0.6090150475502014, + "loss": 5.6519, + "nll_loss": 1.38556969165802, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009555051103234291, + "rewards/margins": 0.05134645104408264, + "rewards/rejected": -0.06090150400996208, + "step": 955 + }, + { + "epoch": 0.661134163208852, + "grad_norm": 2.37520170211792, + "learning_rate": 3.30567081604426e-05, + "log_odds_chosen": 4.856169700622559, + "log_odds_ratio": -0.23580773174762726, + "logits/chosen": -0.3492242991924286, + "logits/rejected": -0.34554314613342285, + "logps/chosen": -0.06726567447185516, + "logps/rejected": -0.5879815816879272, + "loss": 3.3724, + "nll_loss": 0.8195255398750305, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006726567167788744, + "rewards/margins": 0.05207158997654915, + "rewards/rejected": -0.058798156678676605, + "step": 956 + }, + { + "epoch": 0.6618257261410788, + "grad_norm": 4.020497798919678, + "learning_rate": 3.3091286307053944e-05, + "log_odds_chosen": 2.5861258506774902, + "log_odds_ratio": -0.47657981514930725, + "logits/chosen": -0.330152302980423, + "logits/rejected": -0.3692672550678253, + "logps/chosen": -0.13104590773582458, + "logps/rejected": -0.30247101187705994, + "loss": 4.1376, + "nll_loss": 0.9867503643035889, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013104591518640518, + "rewards/margins": 0.017142511904239655, + "rewards/rejected": -0.030247103422880173, + "step": 957 + }, + { + "epoch": 0.6625172890733056, + "grad_norm": 4.300625801086426, + "learning_rate": 3.3125864453665286e-05, + "log_odds_chosen": 3.64050030708313, + "log_odds_ratio": -0.2507421374320984, + "logits/chosen": -0.21320414543151855, + "logits/rejected": -0.2004971206188202, + "logps/chosen": -0.04882103577256203, + "logps/rejected": -0.4051501452922821, + "loss": 3.7638, + "nll_loss": 0.9158720374107361, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004882104694843292, + "rewards/margins": 0.0356329083442688, + "rewards/rejected": -0.04051501303911209, + "step": 958 + }, + { + "epoch": 0.6632088520055325, + "grad_norm": 3.524125814437866, + "learning_rate": 3.316044260027663e-05, + "log_odds_chosen": 2.5185976028442383, + "log_odds_ratio": -0.3382371664047241, + "logits/chosen": -0.41052520275115967, + "logits/rejected": -0.41902029514312744, + "logps/chosen": -0.08878672868013382, + "logps/rejected": -0.34096887707710266, + "loss": 3.3122, + "nll_loss": 0.7942249178886414, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008878673426806927, + "rewards/margins": 0.025218214839696884, + "rewards/rejected": -0.034096889197826385, + "step": 959 + }, + { + "epoch": 0.6639004149377593, + "grad_norm": 3.816377878189087, + "learning_rate": 3.319502074688797e-05, + "log_odds_chosen": 3.3016445636749268, + "log_odds_ratio": -0.4189302325248718, + "logits/chosen": -0.4831903874874115, + "logits/rejected": -0.49403488636016846, + "logps/chosen": -0.14972633123397827, + "logps/rejected": -0.456408828496933, + "loss": 3.5794, + "nll_loss": 0.8529676198959351, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014972632750868797, + "rewards/margins": 0.03066825307905674, + "rewards/rejected": -0.04564088582992554, + "step": 960 + }, + { + "epoch": 0.6645919778699861, + "grad_norm": 6.777546405792236, + "learning_rate": 3.322959889349931e-05, + "log_odds_chosen": 0.5140173435211182, + "log_odds_ratio": -1.0868242979049683, + "logits/chosen": -0.7206689119338989, + "logits/rejected": -0.7224574089050293, + "logps/chosen": -0.19908304512500763, + "logps/rejected": -0.3203428387641907, + "loss": 3.4081, + "nll_loss": 0.7433361411094666, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.019908303394913673, + "rewards/margins": 0.012125976383686066, + "rewards/rejected": -0.03203428164124489, + "step": 961 + }, + { + "epoch": 0.665283540802213, + "grad_norm": 5.210657119750977, + "learning_rate": 3.326417704011065e-05, + "log_odds_chosen": 3.3200583457946777, + "log_odds_ratio": -0.4891462028026581, + "logits/chosen": -0.5053502917289734, + "logits/rejected": -0.5233083367347717, + "logps/chosen": -0.1546093374490738, + "logps/rejected": -0.7421329021453857, + "loss": 3.9954, + "nll_loss": 0.9499325752258301, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.015460933558642864, + "rewards/margins": 0.058752357959747314, + "rewards/rejected": -0.07421329617500305, + "step": 962 + }, + { + "epoch": 0.6659751037344398, + "grad_norm": 4.276523113250732, + "learning_rate": 3.329875518672199e-05, + "log_odds_chosen": 2.442716121673584, + "log_odds_ratio": -0.5946823954582214, + "logits/chosen": -0.3844939172267914, + "logits/rejected": -0.3767719864845276, + "logps/chosen": -0.11018381267786026, + "logps/rejected": -0.28107550740242004, + "loss": 4.1964, + "nll_loss": 0.9896374940872192, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011018382385373116, + "rewards/margins": 0.01708916947245598, + "rewards/rejected": -0.028107551857829094, + "step": 963 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 4.2431864738464355, + "learning_rate": 3.3333333333333335e-05, + "log_odds_chosen": 4.00262975692749, + "log_odds_ratio": -0.17702579498291016, + "logits/chosen": -0.20652252435684204, + "logits/rejected": -0.1833350956439972, + "logps/chosen": -0.08886405825614929, + "logps/rejected": -0.554391086101532, + "loss": 3.8919, + "nll_loss": 0.9552844762802124, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008886406198143959, + "rewards/margins": 0.04655269905924797, + "rewards/rejected": -0.05543910712003708, + "step": 964 + }, + { + "epoch": 0.6673582295988935, + "grad_norm": 4.64008092880249, + "learning_rate": 3.3367911479944676e-05, + "log_odds_chosen": 4.577019691467285, + "log_odds_ratio": -0.2833351194858551, + "logits/chosen": -0.4747796952724457, + "logits/rejected": -0.4827622175216675, + "logps/chosen": -0.07018810510635376, + "logps/rejected": -0.5580646395683289, + "loss": 3.5238, + "nll_loss": 0.8526178002357483, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007018811535090208, + "rewards/margins": 0.04878764972090721, + "rewards/rejected": -0.055806465446949005, + "step": 965 + }, + { + "epoch": 0.6680497925311203, + "grad_norm": 4.885814666748047, + "learning_rate": 3.340248962655602e-05, + "log_odds_chosen": 1.4906724691390991, + "log_odds_ratio": -0.3242151439189911, + "logits/chosen": -0.8521069288253784, + "logits/rejected": -0.8305846452713013, + "logps/chosen": -0.10241183638572693, + "logps/rejected": -0.30071839690208435, + "loss": 4.6939, + "nll_loss": 1.1410484313964844, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010241183452308178, + "rewards/margins": 0.019830655306577682, + "rewards/rejected": -0.030071841552853584, + "step": 966 + }, + { + "epoch": 0.6687413554633471, + "grad_norm": 4.377496242523193, + "learning_rate": 3.343706777316736e-05, + "log_odds_chosen": 1.4498246908187866, + "log_odds_ratio": -0.43261057138442993, + "logits/chosen": -0.8319251537322998, + "logits/rejected": -0.8602160215377808, + "logps/chosen": -0.1076955646276474, + "logps/rejected": -0.38744795322418213, + "loss": 4.8181, + "nll_loss": 1.1612600088119507, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0107695572078228, + "rewards/margins": 0.027975236997008324, + "rewards/rejected": -0.03874479606747627, + "step": 967 + }, + { + "epoch": 0.669432918395574, + "grad_norm": 5.273506164550781, + "learning_rate": 3.34716459197787e-05, + "log_odds_chosen": 2.5594263076782227, + "log_odds_ratio": -0.44265487790107727, + "logits/chosen": -0.827867865562439, + "logits/rejected": -0.8279743194580078, + "logps/chosen": -0.15520818531513214, + "logps/rejected": -0.5966611504554749, + "loss": 4.861, + "nll_loss": 1.170979380607605, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.015520821325480938, + "rewards/margins": 0.04414529725909233, + "rewards/rejected": -0.059666119515895844, + "step": 968 + }, + { + "epoch": 0.6701244813278008, + "grad_norm": 2.88142991065979, + "learning_rate": 3.350622406639004e-05, + "log_odds_chosen": 3.7562472820281982, + "log_odds_ratio": -0.11612822115421295, + "logits/chosen": -0.774664044380188, + "logits/rejected": -0.7495312690734863, + "logps/chosen": -0.04824502021074295, + "logps/rejected": -0.5066708326339722, + "loss": 3.2582, + "nll_loss": 0.8029303550720215, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004824501927942038, + "rewards/margins": 0.04584258794784546, + "rewards/rejected": -0.050667084753513336, + "step": 969 + }, + { + "epoch": 0.6708160442600276, + "grad_norm": 4.272217273712158, + "learning_rate": 3.3540802213001384e-05, + "log_odds_chosen": 1.9701576232910156, + "log_odds_ratio": -0.5143850445747375, + "logits/chosen": -0.7145799398422241, + "logits/rejected": -0.7090466022491455, + "logps/chosen": -0.1732991337776184, + "logps/rejected": -0.506736159324646, + "loss": 4.3177, + "nll_loss": 1.0279784202575684, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01732991263270378, + "rewards/margins": 0.03334370255470276, + "rewards/rejected": -0.05067361518740654, + "step": 970 + }, + { + "epoch": 0.6715076071922544, + "grad_norm": 6.0354390144348145, + "learning_rate": 3.3575380359612726e-05, + "log_odds_chosen": 0.18285652995109558, + "log_odds_ratio": -0.6881224513053894, + "logits/chosen": -0.9916139841079712, + "logits/rejected": -0.9856917262077332, + "logps/chosen": -0.21898028254508972, + "logps/rejected": -0.2937367558479309, + "loss": 5.0391, + "nll_loss": 1.190969467163086, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.021898027509450912, + "rewards/margins": 0.00747564947232604, + "rewards/rejected": -0.02937367744743824, + "step": 971 + }, + { + "epoch": 0.6721991701244814, + "grad_norm": 3.3654420375823975, + "learning_rate": 3.360995850622407e-05, + "log_odds_chosen": 2.4569759368896484, + "log_odds_ratio": -0.2657202482223511, + "logits/chosen": -0.3326878547668457, + "logits/rejected": -0.3718082010746002, + "logps/chosen": -0.0956912636756897, + "logps/rejected": -0.3199200928211212, + "loss": 3.6996, + "nll_loss": 0.8983267545700073, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00956912711262703, + "rewards/margins": 0.02242288552224636, + "rewards/rejected": -0.03199201449751854, + "step": 972 + }, + { + "epoch": 0.6728907330567082, + "grad_norm": 3.7501866817474365, + "learning_rate": 3.364453665283541e-05, + "log_odds_chosen": 1.6184512376785278, + "log_odds_ratio": -0.3203818202018738, + "logits/chosen": -0.746425986289978, + "logits/rejected": -0.7444495558738708, + "logps/chosen": -0.10139751434326172, + "logps/rejected": -0.3613331913948059, + "loss": 4.0865, + "nll_loss": 0.9895828366279602, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010139752179384232, + "rewards/margins": 0.02599356882274151, + "rewards/rejected": -0.03613331913948059, + "step": 973 + }, + { + "epoch": 0.673582295988935, + "grad_norm": 3.471388816833496, + "learning_rate": 3.367911479944675e-05, + "log_odds_chosen": 3.8406763076782227, + "log_odds_ratio": -0.4341563284397125, + "logits/chosen": -0.47200706601142883, + "logits/rejected": -0.4881913661956787, + "logps/chosen": -0.07415127754211426, + "logps/rejected": -0.40192297101020813, + "loss": 3.2404, + "nll_loss": 0.7666944265365601, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.007415127940475941, + "rewards/margins": 0.032777171581983566, + "rewards/rejected": -0.040192294865846634, + "step": 974 + }, + { + "epoch": 0.6742738589211619, + "grad_norm": 3.176514148712158, + "learning_rate": 3.371369294605809e-05, + "log_odds_chosen": 1.1546623706817627, + "log_odds_ratio": -0.39299866557121277, + "logits/chosen": -0.36612606048583984, + "logits/rejected": -0.35044723749160767, + "logps/chosen": -0.16547030210494995, + "logps/rejected": -0.4321480691432953, + "loss": 3.0503, + "nll_loss": 0.7232798337936401, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.016547029837965965, + "rewards/margins": 0.026667779311537743, + "rewards/rejected": -0.04321480542421341, + "step": 975 + }, + { + "epoch": 0.6749654218533887, + "grad_norm": 4.0449538230896, + "learning_rate": 3.3748271092669434e-05, + "log_odds_chosen": 3.5713706016540527, + "log_odds_ratio": -0.3042868673801422, + "logits/chosen": -0.46112552285194397, + "logits/rejected": -0.5051918625831604, + "logps/chosen": -0.0884426087141037, + "logps/rejected": -0.48638951778411865, + "loss": 4.5366, + "nll_loss": 1.1037156581878662, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.00884426198899746, + "rewards/margins": 0.039794694632291794, + "rewards/rejected": -0.048638954758644104, + "step": 976 + }, + { + "epoch": 0.6756569847856155, + "grad_norm": 3.9816436767578125, + "learning_rate": 3.3782849239280775e-05, + "log_odds_chosen": 3.7466630935668945, + "log_odds_ratio": -0.3648548126220703, + "logits/chosen": -0.6325836777687073, + "logits/rejected": -0.6226431727409363, + "logps/chosen": -0.07730133831501007, + "logps/rejected": -0.7135946154594421, + "loss": 3.2022, + "nll_loss": 0.7640625238418579, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007730133831501007, + "rewards/margins": 0.06362932920455933, + "rewards/rejected": -0.07135946303606033, + "step": 977 + }, + { + "epoch": 0.6763485477178424, + "grad_norm": 3.3771657943725586, + "learning_rate": 3.381742738589212e-05, + "log_odds_chosen": 3.9462733268737793, + "log_odds_ratio": -0.3088147044181824, + "logits/chosen": -0.8675793409347534, + "logits/rejected": -0.8908983469009399, + "logps/chosen": -0.10656030476093292, + "logps/rejected": -0.7457081079483032, + "loss": 3.7328, + "nll_loss": 0.9023140668869019, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010656031779944897, + "rewards/margins": 0.06391478329896927, + "rewards/rejected": -0.07457081228494644, + "step": 978 + }, + { + "epoch": 0.6770401106500692, + "grad_norm": 4.066298007965088, + "learning_rate": 3.385200553250346e-05, + "log_odds_chosen": 2.962782859802246, + "log_odds_ratio": -0.3244636654853821, + "logits/chosen": -0.6073873043060303, + "logits/rejected": -0.6426203846931458, + "logps/chosen": -0.08333338052034378, + "logps/rejected": -0.5626384615898132, + "loss": 3.7632, + "nll_loss": 0.9083600044250488, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008333337493240833, + "rewards/margins": 0.047930508852005005, + "rewards/rejected": -0.05626384913921356, + "step": 979 + }, + { + "epoch": 0.677731673582296, + "grad_norm": 5.754980564117432, + "learning_rate": 3.38865836791148e-05, + "log_odds_chosen": 3.0913007259368896, + "log_odds_ratio": -0.393584668636322, + "logits/chosen": -0.5925887823104858, + "logits/rejected": -0.6561344265937805, + "logps/chosen": -0.1347353160381317, + "logps/rejected": -0.5746136903762817, + "loss": 4.7798, + "nll_loss": 1.1555871963500977, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01347353309392929, + "rewards/margins": 0.043987832963466644, + "rewards/rejected": -0.057461366057395935, + "step": 980 + }, + { + "epoch": 0.6784232365145229, + "grad_norm": 4.209239959716797, + "learning_rate": 3.392116182572614e-05, + "log_odds_chosen": 2.974820613861084, + "log_odds_ratio": -0.3493000268936157, + "logits/chosen": -0.980753481388092, + "logits/rejected": -1.0084547996520996, + "logps/chosen": -0.05953915789723396, + "logps/rejected": -0.46828117966651917, + "loss": 4.2893, + "nll_loss": 1.037407398223877, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005953915882855654, + "rewards/margins": 0.04087420180439949, + "rewards/rejected": -0.04682811722159386, + "step": 981 + }, + { + "epoch": 0.6791147994467497, + "grad_norm": 5.047092914581299, + "learning_rate": 3.395573997233748e-05, + "log_odds_chosen": -0.3905660808086395, + "log_odds_ratio": -0.9512245655059814, + "logits/chosen": -0.4087103009223938, + "logits/rejected": -0.38396310806274414, + "logps/chosen": -0.19242680072784424, + "logps/rejected": -0.1307140290737152, + "loss": 4.7201, + "nll_loss": 1.0849030017852783, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.019242681562900543, + "rewards/margins": -0.006171277724206448, + "rewards/rejected": -0.01307140477001667, + "step": 982 + }, + { + "epoch": 0.6798063623789765, + "grad_norm": 5.881921291351318, + "learning_rate": 3.3990318118948825e-05, + "log_odds_chosen": 1.5944758653640747, + "log_odds_ratio": -0.7979428172111511, + "logits/chosen": -0.5588828921318054, + "logits/rejected": -0.5556572675704956, + "logps/chosen": -0.13492585718631744, + "logps/rejected": -0.4783182144165039, + "loss": 4.2007, + "nll_loss": 0.9703859090805054, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.013492586091160774, + "rewards/margins": 0.03433923423290253, + "rewards/rejected": -0.04783182218670845, + "step": 983 + }, + { + "epoch": 0.6804979253112033, + "grad_norm": 5.97307014465332, + "learning_rate": 3.4024896265560166e-05, + "log_odds_chosen": 4.040063381195068, + "log_odds_ratio": -0.5749695301055908, + "logits/chosen": -0.43717265129089355, + "logits/rejected": -0.4032226800918579, + "logps/chosen": -0.08708241581916809, + "logps/rejected": -0.4701418876647949, + "loss": 3.4376, + "nll_loss": 0.8019071221351624, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.008708241395652294, + "rewards/margins": 0.03830594941973686, + "rewards/rejected": -0.04701419174671173, + "step": 984 + }, + { + "epoch": 0.6811894882434302, + "grad_norm": 4.5437116622924805, + "learning_rate": 3.405947441217151e-05, + "log_odds_chosen": 1.420559287071228, + "log_odds_ratio": -0.32596197724342346, + "logits/chosen": -0.41524261236190796, + "logits/rejected": -0.41757941246032715, + "logps/chosen": -0.1719023883342743, + "logps/rejected": -0.5850471258163452, + "loss": 3.5266, + "nll_loss": 0.8490634560585022, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01719024032354355, + "rewards/margins": 0.04131447523832321, + "rewards/rejected": -0.05850471183657646, + "step": 985 + }, + { + "epoch": 0.681881051175657, + "grad_norm": 4.771402835845947, + "learning_rate": 3.409405255878285e-05, + "log_odds_chosen": 2.0101003646850586, + "log_odds_ratio": -0.5677647590637207, + "logits/chosen": -0.7287096977233887, + "logits/rejected": -0.753067135810852, + "logps/chosen": -0.16982880234718323, + "logps/rejected": -0.48460328578948975, + "loss": 5.6082, + "nll_loss": 1.3452624082565308, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.016982881352305412, + "rewards/margins": 0.031477443873882294, + "rewards/rejected": -0.048460330814123154, + "step": 986 + }, + { + "epoch": 0.6825726141078838, + "grad_norm": 4.040125846862793, + "learning_rate": 3.412863070539419e-05, + "log_odds_chosen": 3.0267107486724854, + "log_odds_ratio": -0.4347437024116516, + "logits/chosen": -0.8169975280761719, + "logits/rejected": -0.8093977570533752, + "logps/chosen": -0.11449096351861954, + "logps/rejected": -0.5360022187232971, + "loss": 4.6192, + "nll_loss": 1.1113377809524536, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011449096724390984, + "rewards/margins": 0.04215112701058388, + "rewards/rejected": -0.05360021814703941, + "step": 987 + }, + { + "epoch": 0.6832641770401107, + "grad_norm": 7.9208807945251465, + "learning_rate": 3.416320885200553e-05, + "log_odds_chosen": -0.30854225158691406, + "log_odds_ratio": -1.0147993564605713, + "logits/chosen": -0.2861216962337494, + "logits/rejected": -0.29731225967407227, + "logps/chosen": -0.4028143882751465, + "logps/rejected": -0.21703889966011047, + "loss": 5.3238, + "nll_loss": 1.2294610738754272, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.04028144106268883, + "rewards/margins": -0.01857755146920681, + "rewards/rejected": -0.02170388773083687, + "step": 988 + }, + { + "epoch": 0.6839557399723375, + "grad_norm": 5.581965446472168, + "learning_rate": 3.4197786998616874e-05, + "log_odds_chosen": 2.1221306324005127, + "log_odds_ratio": -0.3847159743309021, + "logits/chosen": -0.6367366909980774, + "logits/rejected": -0.6817864775657654, + "logps/chosen": -0.11510385572910309, + "logps/rejected": -0.44107532501220703, + "loss": 4.8327, + "nll_loss": 1.169701099395752, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.011510386131703854, + "rewards/margins": 0.032597146928310394, + "rewards/rejected": -0.04410753399133682, + "step": 989 + }, + { + "epoch": 0.6846473029045643, + "grad_norm": 5.5574140548706055, + "learning_rate": 3.4232365145228216e-05, + "log_odds_chosen": 1.8708701133728027, + "log_odds_ratio": -0.7869781851768494, + "logits/chosen": -0.6626830697059631, + "logits/rejected": -0.6924288272857666, + "logps/chosen": -0.21340054273605347, + "logps/rejected": -0.3987637758255005, + "loss": 4.356, + "nll_loss": 1.0102986097335815, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.021340053528547287, + "rewards/margins": 0.01853632554411888, + "rewards/rejected": -0.03987637907266617, + "step": 990 + }, + { + "epoch": 0.6853388658367912, + "grad_norm": 3.7821218967437744, + "learning_rate": 3.426694329183956e-05, + "log_odds_chosen": 3.599870204925537, + "log_odds_ratio": -0.39365655183792114, + "logits/chosen": -0.8973872661590576, + "logits/rejected": -0.9051483273506165, + "logps/chosen": -0.14106644690036774, + "logps/rejected": -0.3728795647621155, + "loss": 3.5174, + "nll_loss": 0.83997642993927, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014106645248830318, + "rewards/margins": 0.023181311786174774, + "rewards/rejected": -0.03728795796632767, + "step": 991 + }, + { + "epoch": 0.686030428769018, + "grad_norm": 4.836569786071777, + "learning_rate": 3.43015214384509e-05, + "log_odds_chosen": 1.069951057434082, + "log_odds_ratio": -0.5975240468978882, + "logits/chosen": -0.19024032354354858, + "logits/rejected": -0.17968958616256714, + "logps/chosen": -0.11591685563325882, + "logps/rejected": -0.27811747789382935, + "loss": 4.2014, + "nll_loss": 0.9905920028686523, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.011591685004532337, + "rewards/margins": 0.01622006483376026, + "rewards/rejected": -0.027811748906970024, + "step": 992 + }, + { + "epoch": 0.6867219917012448, + "grad_norm": 2.7467453479766846, + "learning_rate": 3.433609958506224e-05, + "log_odds_chosen": 3.485941171646118, + "log_odds_ratio": -0.3185138404369354, + "logits/chosen": -0.394996702671051, + "logits/rejected": -0.4043181836605072, + "logps/chosen": -0.10663348436355591, + "logps/rejected": -0.38438695669174194, + "loss": 3.4745, + "nll_loss": 0.8367683291435242, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010663348250091076, + "rewards/margins": 0.027775347232818604, + "rewards/rejected": -0.038438692688941956, + "step": 993 + }, + { + "epoch": 0.6874135546334716, + "grad_norm": 11.837431907653809, + "learning_rate": 3.437067773167358e-05, + "log_odds_chosen": 2.9741313457489014, + "log_odds_ratio": -0.7015028595924377, + "logits/chosen": -0.34724104404449463, + "logits/rejected": -0.3198005259037018, + "logps/chosen": -0.10866589844226837, + "logps/rejected": -0.4531497359275818, + "loss": 3.1967, + "nll_loss": 0.7290204763412476, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010866588912904263, + "rewards/margins": 0.03444838523864746, + "rewards/rejected": -0.0453149750828743, + "step": 994 + }, + { + "epoch": 0.6881051175656985, + "grad_norm": 3.6138124465942383, + "learning_rate": 3.4405255878284923e-05, + "log_odds_chosen": 2.874924659729004, + "log_odds_ratio": -0.4109228849411011, + "logits/chosen": -0.34337174892425537, + "logits/rejected": -0.34686705470085144, + "logps/chosen": -0.1123964712023735, + "logps/rejected": -0.38113945722579956, + "loss": 4.2389, + "nll_loss": 1.0186246633529663, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01123964786529541, + "rewards/margins": 0.026874300092458725, + "rewards/rejected": -0.038113947957754135, + "step": 995 + }, + { + "epoch": 0.6887966804979253, + "grad_norm": 3.863123893737793, + "learning_rate": 3.4439834024896265e-05, + "log_odds_chosen": 1.4307935237884521, + "log_odds_ratio": -0.7987613081932068, + "logits/chosen": -0.45763593912124634, + "logits/rejected": -0.4397510886192322, + "logps/chosen": -0.17323186993598938, + "logps/rejected": -0.29393959045410156, + "loss": 4.0577, + "nll_loss": 0.9345569610595703, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.017323188483715057, + "rewards/margins": 0.012070773169398308, + "rewards/rejected": -0.029393963515758514, + "step": 996 + }, + { + "epoch": 0.6894882434301521, + "grad_norm": 2.736734628677368, + "learning_rate": 3.4474412171507607e-05, + "log_odds_chosen": 1.240825891494751, + "log_odds_ratio": -0.33845093846321106, + "logits/chosen": -0.5953700542449951, + "logits/rejected": -0.6042296886444092, + "logps/chosen": -0.13993823528289795, + "logps/rejected": -0.43376731872558594, + "loss": 4.281, + "nll_loss": 1.0364121198654175, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.013993822038173676, + "rewards/margins": 0.029382910579442978, + "rewards/rejected": -0.04337673261761665, + "step": 997 + }, + { + "epoch": 0.690179806362379, + "grad_norm": 2.9601619243621826, + "learning_rate": 3.450899031811895e-05, + "log_odds_chosen": 2.210240125656128, + "log_odds_ratio": -0.4595649838447571, + "logits/chosen": -0.5523463487625122, + "logits/rejected": -0.6149584054946899, + "logps/chosen": -0.07959660142660141, + "logps/rejected": -0.25229543447494507, + "loss": 3.0791, + "nll_loss": 0.7238231301307678, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007959661073982716, + "rewards/margins": 0.017269885167479515, + "rewards/rejected": -0.025229543447494507, + "step": 998 + }, + { + "epoch": 0.6908713692946058, + "grad_norm": 3.568354368209839, + "learning_rate": 3.454356846473029e-05, + "log_odds_chosen": 1.4892652034759521, + "log_odds_ratio": -0.4482731223106384, + "logits/chosen": -0.5335237383842468, + "logits/rejected": -0.5101516246795654, + "logps/chosen": -0.13420335948467255, + "logps/rejected": -0.4021463394165039, + "loss": 3.3719, + "nll_loss": 0.7981423139572144, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01342033687978983, + "rewards/margins": 0.026794295758008957, + "rewards/rejected": -0.04021463543176651, + "step": 999 + }, + { + "epoch": 0.6915629322268326, + "grad_norm": 3.095843553543091, + "learning_rate": 3.457814661134163e-05, + "log_odds_chosen": 4.331266403198242, + "log_odds_ratio": -0.22867438197135925, + "logits/chosen": -0.10455627739429474, + "logits/rejected": -0.1381661295890808, + "logps/chosen": -0.04887394607067108, + "logps/rejected": -0.48447513580322266, + "loss": 2.9581, + "nll_loss": 0.7166623473167419, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004887394607067108, + "rewards/margins": 0.04356011748313904, + "rewards/rejected": -0.048447512090206146, + "step": 1000 + }, + { + "epoch": 0.6922544951590595, + "grad_norm": 3.872911214828491, + "learning_rate": 3.461272475795297e-05, + "log_odds_chosen": 1.9170019626617432, + "log_odds_ratio": -0.5383463501930237, + "logits/chosen": -0.24512067437171936, + "logits/rejected": -0.24772781133651733, + "logps/chosen": -0.10138186067342758, + "logps/rejected": -0.3690054416656494, + "loss": 2.9505, + "nll_loss": 0.6837884187698364, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.010138185694813728, + "rewards/margins": 0.026762360706925392, + "rewards/rejected": -0.03690054640173912, + "step": 1001 + }, + { + "epoch": 0.6929460580912863, + "grad_norm": 2.6410980224609375, + "learning_rate": 3.4647302904564314e-05, + "log_odds_chosen": 0.46685516834259033, + "log_odds_ratio": -0.5900712013244629, + "logits/chosen": -0.41452556848526, + "logits/rejected": -0.3784329295158386, + "logps/chosen": -0.13433943688869476, + "logps/rejected": -0.21028949320316315, + "loss": 3.0342, + "nll_loss": 0.6995489597320557, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013433944433927536, + "rewards/margins": 0.007595007307827473, + "rewards/rejected": -0.021028950810432434, + "step": 1002 + }, + { + "epoch": 0.6936376210235131, + "grad_norm": 3.552522897720337, + "learning_rate": 3.4681881051175656e-05, + "log_odds_chosen": 4.3343186378479, + "log_odds_ratio": -0.1376817226409912, + "logits/chosen": -0.17142628133296967, + "logits/rejected": -0.1732548475265503, + "logps/chosen": -0.05082971975207329, + "logps/rejected": -0.39188894629478455, + "loss": 3.8044, + "nll_loss": 0.9373430013656616, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005082972347736359, + "rewards/margins": 0.034105923026800156, + "rewards/rejected": -0.039188895374536514, + "step": 1003 + }, + { + "epoch": 0.69432918395574, + "grad_norm": 3.4876062870025635, + "learning_rate": 3.4716459197787e-05, + "log_odds_chosen": 0.7625752091407776, + "log_odds_ratio": -0.45943683385849, + "logits/chosen": -0.6727699637413025, + "logits/rejected": -0.6510109901428223, + "logps/chosen": -0.16815423965454102, + "logps/rejected": -0.35847675800323486, + "loss": 2.9769, + "nll_loss": 0.6982860565185547, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0168154239654541, + "rewards/margins": 0.019032252952456474, + "rewards/rejected": -0.035847678780555725, + "step": 1004 + }, + { + "epoch": 0.6950207468879668, + "grad_norm": 4.408685207366943, + "learning_rate": 3.475103734439834e-05, + "log_odds_chosen": 2.3867549896240234, + "log_odds_ratio": -0.314208984375, + "logits/chosen": -0.6479254364967346, + "logits/rejected": -0.6886224150657654, + "logps/chosen": -0.10548969358205795, + "logps/rejected": -0.4884876310825348, + "loss": 4.383, + "nll_loss": 1.0643311738967896, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010548969730734825, + "rewards/margins": 0.038299791514873505, + "rewards/rejected": -0.04884876310825348, + "step": 1005 + }, + { + "epoch": 0.6957123098201936, + "grad_norm": 3.915799617767334, + "learning_rate": 3.478561549100968e-05, + "log_odds_chosen": 3.446237087249756, + "log_odds_ratio": -0.3558885157108307, + "logits/chosen": -0.6331568956375122, + "logits/rejected": -0.6514267921447754, + "logps/chosen": -0.14412257075309753, + "logps/rejected": -0.6114010810852051, + "loss": 3.2002, + "nll_loss": 0.7644554376602173, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.014412256889045238, + "rewards/margins": 0.046727851033210754, + "rewards/rejected": -0.061140112578868866, + "step": 1006 + }, + { + "epoch": 0.6964038727524204, + "grad_norm": 4.497232437133789, + "learning_rate": 3.482019363762102e-05, + "log_odds_chosen": 1.839732050895691, + "log_odds_ratio": -0.3929927349090576, + "logits/chosen": -0.3407534062862396, + "logits/rejected": -0.36043086647987366, + "logps/chosen": -0.07634704560041428, + "logps/rejected": -0.40131884813308716, + "loss": 4.2111, + "nll_loss": 1.0134646892547607, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007634705863893032, + "rewards/margins": 0.03249718248844147, + "rewards/rejected": -0.040131889283657074, + "step": 1007 + }, + { + "epoch": 0.6970954356846473, + "grad_norm": 3.4116599559783936, + "learning_rate": 3.4854771784232364e-05, + "log_odds_chosen": 2.439486026763916, + "log_odds_ratio": -0.20432066917419434, + "logits/chosen": -0.6585232615470886, + "logits/rejected": -0.7056906223297119, + "logps/chosen": -0.15550759434700012, + "logps/rejected": -0.7698625922203064, + "loss": 3.8301, + "nll_loss": 0.9370852708816528, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015550761483609676, + "rewards/margins": 0.06143549457192421, + "rewards/rejected": -0.07698626071214676, + "step": 1008 + }, + { + "epoch": 0.6977869986168741, + "grad_norm": 4.420785903930664, + "learning_rate": 3.4889349930843705e-05, + "log_odds_chosen": 0.5230880379676819, + "log_odds_ratio": -0.551123321056366, + "logits/chosen": -0.7153730988502502, + "logits/rejected": -0.759360134601593, + "logps/chosen": -0.14089688658714294, + "logps/rejected": -0.31493067741394043, + "loss": 5.1094, + "nll_loss": 1.222233772277832, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014089690521359444, + "rewards/margins": 0.0174033772200346, + "rewards/rejected": -0.03149306774139404, + "step": 1009 + }, + { + "epoch": 0.6984785615491009, + "grad_norm": 4.9378557205200195, + "learning_rate": 3.492392807745505e-05, + "log_odds_chosen": 3.2825350761413574, + "log_odds_ratio": -0.2590652406215668, + "logits/chosen": -0.8463261127471924, + "logits/rejected": -0.9359209537506104, + "logps/chosen": -0.08437243103981018, + "logps/rejected": -0.7198293805122375, + "loss": 5.7714, + "nll_loss": 1.416931390762329, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008437243290245533, + "rewards/margins": 0.06354568898677826, + "rewards/rejected": -0.07198293507099152, + "step": 1010 + }, + { + "epoch": 0.6991701244813278, + "grad_norm": 3.547576427459717, + "learning_rate": 3.495850622406639e-05, + "log_odds_chosen": 2.6140286922454834, + "log_odds_ratio": -0.29810887575149536, + "logits/chosen": -0.7257479429244995, + "logits/rejected": -0.7237606048583984, + "logps/chosen": -0.06791023164987564, + "logps/rejected": -0.41881442070007324, + "loss": 4.1977, + "nll_loss": 1.0196094512939453, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006791023537516594, + "rewards/margins": 0.03509042412042618, + "rewards/rejected": -0.04188144952058792, + "step": 1011 + }, + { + "epoch": 0.6998616874135546, + "grad_norm": 4.760746002197266, + "learning_rate": 3.499308437067773e-05, + "log_odds_chosen": 3.2602224349975586, + "log_odds_ratio": -0.4387058615684509, + "logits/chosen": -0.5019223690032959, + "logits/rejected": -0.5217973589897156, + "logps/chosen": -0.08388447761535645, + "logps/rejected": -0.6735737919807434, + "loss": 4.8476, + "nll_loss": 1.1680355072021484, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008388448506593704, + "rewards/margins": 0.058968931436538696, + "rewards/rejected": -0.0673573836684227, + "step": 1012 + }, + { + "epoch": 0.7005532503457814, + "grad_norm": 4.929751873016357, + "learning_rate": 3.502766251728907e-05, + "log_odds_chosen": 1.7852381467819214, + "log_odds_ratio": -0.545844554901123, + "logits/chosen": -0.4042653739452362, + "logits/rejected": -0.44106078147888184, + "logps/chosen": -0.1827096939086914, + "logps/rejected": -0.5603499412536621, + "loss": 4.9799, + "nll_loss": 1.1903879642486572, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01827096939086914, + "rewards/margins": 0.03776402026414871, + "rewards/rejected": -0.05603498965501785, + "step": 1013 + }, + { + "epoch": 0.7012448132780082, + "grad_norm": 4.870401382446289, + "learning_rate": 3.506224066390041e-05, + "log_odds_chosen": 3.7482364177703857, + "log_odds_ratio": -0.3262961506843567, + "logits/chosen": -0.794061541557312, + "logits/rejected": -0.7662990093231201, + "logps/chosen": -0.06687057763338089, + "logps/rejected": -0.5857488512992859, + "loss": 4.0956, + "nll_loss": 0.9912663698196411, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006687058135867119, + "rewards/margins": 0.05188782885670662, + "rewards/rejected": -0.05857488512992859, + "step": 1014 + }, + { + "epoch": 0.7019363762102351, + "grad_norm": 5.270098686218262, + "learning_rate": 3.5096818810511755e-05, + "log_odds_chosen": 1.8332812786102295, + "log_odds_ratio": -0.5245056748390198, + "logits/chosen": -0.7074244022369385, + "logits/rejected": -0.7468313574790955, + "logps/chosen": -0.11609043180942535, + "logps/rejected": -0.3391810357570648, + "loss": 4.5547, + "nll_loss": 1.0862311124801636, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.011609042063355446, + "rewards/margins": 0.022309059277176857, + "rewards/rejected": -0.0339181050658226, + "step": 1015 + }, + { + "epoch": 0.7026279391424619, + "grad_norm": 4.283554553985596, + "learning_rate": 3.5131396957123096e-05, + "log_odds_chosen": 1.1753572225570679, + "log_odds_ratio": -0.36678510904312134, + "logits/chosen": -0.4675461947917938, + "logits/rejected": -0.479078471660614, + "logps/chosen": -0.12072758376598358, + "logps/rejected": -0.33430609107017517, + "loss": 4.0677, + "nll_loss": 0.980250895023346, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.012072758749127388, + "rewards/margins": 0.02135784924030304, + "rewards/rejected": -0.03343060612678528, + "step": 1016 + }, + { + "epoch": 0.7033195020746889, + "grad_norm": 4.776187896728516, + "learning_rate": 3.5165975103734445e-05, + "log_odds_chosen": 3.0475525856018066, + "log_odds_ratio": -0.21066389977931976, + "logits/chosen": -0.6627320051193237, + "logits/rejected": -0.6491464376449585, + "logps/chosen": -0.09881362318992615, + "logps/rejected": -0.5803578495979309, + "loss": 4.6229, + "nll_loss": 1.1346709728240967, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00988136138767004, + "rewards/margins": 0.048154428601264954, + "rewards/rejected": -0.05803578719496727, + "step": 1017 + }, + { + "epoch": 0.7040110650069157, + "grad_norm": 3.657235860824585, + "learning_rate": 3.5200553250345786e-05, + "log_odds_chosen": 1.733577847480774, + "log_odds_ratio": -0.5284569263458252, + "logits/chosen": -0.42558619379997253, + "logits/rejected": -0.48167991638183594, + "logps/chosen": -0.1381000578403473, + "logps/rejected": -0.2790611982345581, + "loss": 3.4075, + "nll_loss": 0.7990308403968811, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01381000503897667, + "rewards/margins": 0.014096113853156567, + "rewards/rejected": -0.02790611982345581, + "step": 1018 + }, + { + "epoch": 0.7047026279391425, + "grad_norm": 3.4598958492279053, + "learning_rate": 3.523513139695713e-05, + "log_odds_chosen": 3.601151466369629, + "log_odds_ratio": -0.22872741520404816, + "logits/chosen": -0.6099940538406372, + "logits/rejected": -0.6158989071846008, + "logps/chosen": -0.04661906510591507, + "logps/rejected": -0.6002925634384155, + "loss": 3.4816, + "nll_loss": 0.8475351333618164, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004661906510591507, + "rewards/margins": 0.055367350578308105, + "rewards/rejected": -0.06002925708889961, + "step": 1019 + }, + { + "epoch": 0.7053941908713693, + "grad_norm": 4.966454029083252, + "learning_rate": 3.526970954356847e-05, + "log_odds_chosen": 1.2062649726867676, + "log_odds_ratio": -0.505730390548706, + "logits/chosen": -0.8687747716903687, + "logits/rejected": -0.9171357750892639, + "logps/chosen": -0.15147417783737183, + "logps/rejected": -0.4396016597747803, + "loss": 5.3178, + "nll_loss": 1.2788842916488647, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.015147417783737183, + "rewards/margins": 0.028812747448682785, + "rewards/rejected": -0.043960168957710266, + "step": 1020 + }, + { + "epoch": 0.7060857538035962, + "grad_norm": 2.3341596126556396, + "learning_rate": 3.530428769017981e-05, + "log_odds_chosen": 5.564569473266602, + "log_odds_ratio": -0.04462364688515663, + "logits/chosen": -0.21850377321243286, + "logits/rejected": -0.2372198849916458, + "logps/chosen": -0.03681230917572975, + "logps/rejected": -0.7510018348693848, + "loss": 3.1713, + "nll_loss": 0.7883737683296204, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0036812310572713614, + "rewards/margins": 0.07141894847154617, + "rewards/rejected": -0.07510018348693848, + "step": 1021 + }, + { + "epoch": 0.706777316735823, + "grad_norm": 4.633392333984375, + "learning_rate": 3.533886583679115e-05, + "log_odds_chosen": 4.466066360473633, + "log_odds_ratio": -0.2729628086090088, + "logits/chosen": -0.52900230884552, + "logits/rejected": -0.5783476829528809, + "logps/chosen": -0.06350569427013397, + "logps/rejected": -0.6221787333488464, + "loss": 3.6471, + "nll_loss": 0.8844875693321228, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006350569427013397, + "rewards/margins": 0.055867306888103485, + "rewards/rejected": -0.062217868864536285, + "step": 1022 + }, + { + "epoch": 0.7074688796680498, + "grad_norm": 4.214491367340088, + "learning_rate": 3.5373443983402494e-05, + "log_odds_chosen": 3.1329104900360107, + "log_odds_ratio": -0.3419583737850189, + "logits/chosen": -0.6461799144744873, + "logits/rejected": -0.6264448165893555, + "logps/chosen": -0.0748824030160904, + "logps/rejected": -0.5373539924621582, + "loss": 3.7843, + "nll_loss": 0.9118846654891968, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.007488240487873554, + "rewards/margins": 0.04624716192483902, + "rewards/rejected": -0.05373540148139, + "step": 1023 + }, + { + "epoch": 0.7081604426002767, + "grad_norm": 4.893476486206055, + "learning_rate": 3.5408022130013836e-05, + "log_odds_chosen": 0.9490301609039307, + "log_odds_ratio": -0.8348803520202637, + "logits/chosen": -0.8363821506500244, + "logits/rejected": -0.8834267854690552, + "logps/chosen": -0.17236392199993134, + "logps/rejected": -0.4684886634349823, + "loss": 4.0871, + "nll_loss": 0.9382818937301636, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.017236391082406044, + "rewards/margins": 0.029612472280859947, + "rewards/rejected": -0.04684887081384659, + "step": 1024 + }, + { + "epoch": 0.7088520055325035, + "grad_norm": 4.346531867980957, + "learning_rate": 3.544260027662518e-05, + "log_odds_chosen": 5.200535297393799, + "log_odds_ratio": -0.0615101084113121, + "logits/chosen": -0.8259227275848389, + "logits/rejected": -0.8945444822311401, + "logps/chosen": -0.018674220889806747, + "logps/rejected": -0.633916437625885, + "loss": 4.3103, + "nll_loss": 1.0714225769042969, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018674221355468035, + "rewards/margins": 0.061524223536252975, + "rewards/rejected": -0.06339164078235626, + "step": 1025 + }, + { + "epoch": 0.7095435684647303, + "grad_norm": 3.327759027481079, + "learning_rate": 3.547717842323652e-05, + "log_odds_chosen": 5.512311935424805, + "log_odds_ratio": -0.16451141238212585, + "logits/chosen": -0.8345467448234558, + "logits/rejected": -0.8430370092391968, + "logps/chosen": -0.07801201939582825, + "logps/rejected": -0.8770555257797241, + "loss": 3.289, + "nll_loss": 0.8057981729507446, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007801203057169914, + "rewards/margins": 0.07990435510873795, + "rewards/rejected": -0.08770555257797241, + "step": 1026 + }, + { + "epoch": 0.7102351313969572, + "grad_norm": 3.0856127738952637, + "learning_rate": 3.551175656984786e-05, + "log_odds_chosen": 2.5221033096313477, + "log_odds_ratio": -0.3784581124782562, + "logits/chosen": -0.287615031003952, + "logits/rejected": -0.27814438939094543, + "logps/chosen": -0.15040525794029236, + "logps/rejected": -0.49353331327438354, + "loss": 2.9133, + "nll_loss": 0.6904860734939575, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01504052709788084, + "rewards/margins": 0.03431280702352524, + "rewards/rejected": -0.04935333877801895, + "step": 1027 + }, + { + "epoch": 0.710926694329184, + "grad_norm": 3.6601574420928955, + "learning_rate": 3.55463347164592e-05, + "log_odds_chosen": 1.9526305198669434, + "log_odds_ratio": -0.49178487062454224, + "logits/chosen": -0.574364423751831, + "logits/rejected": -0.5638930797576904, + "logps/chosen": -0.15838921070098877, + "logps/rejected": -0.48088887333869934, + "loss": 3.962, + "nll_loss": 0.9413228034973145, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.015838919207453728, + "rewards/margins": 0.03224996477365494, + "rewards/rejected": -0.048088885843753815, + "step": 1028 + }, + { + "epoch": 0.7116182572614108, + "grad_norm": 6.44592809677124, + "learning_rate": 3.558091286307054e-05, + "log_odds_chosen": 3.593794345855713, + "log_odds_ratio": -0.5041420459747314, + "logits/chosen": -0.7599477767944336, + "logits/rejected": -0.7372944355010986, + "logps/chosen": -0.18992546200752258, + "logps/rejected": -0.566834568977356, + "loss": 4.0358, + "nll_loss": 0.9585399627685547, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.018992546945810318, + "rewards/margins": 0.0376909077167511, + "rewards/rejected": -0.056683458387851715, + "step": 1029 + }, + { + "epoch": 0.7123098201936376, + "grad_norm": 7.031370639801025, + "learning_rate": 3.5615491009681885e-05, + "log_odds_chosen": 3.2854695320129395, + "log_odds_ratio": -0.435203492641449, + "logits/chosen": -0.45540231466293335, + "logits/rejected": -0.513881504535675, + "logps/chosen": -0.0969720408320427, + "logps/rejected": -0.5342581272125244, + "loss": 3.0684, + "nll_loss": 0.7235726714134216, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0096972044557333, + "rewards/margins": 0.04372861236333847, + "rewards/rejected": -0.05342581868171692, + "step": 1030 + }, + { + "epoch": 0.7130013831258645, + "grad_norm": 6.708185195922852, + "learning_rate": 3.5650069156293226e-05, + "log_odds_chosen": 3.589202642440796, + "log_odds_ratio": -0.20989595353603363, + "logits/chosen": -0.4838123321533203, + "logits/rejected": -0.5778621435165405, + "logps/chosen": -0.10837168246507645, + "logps/rejected": -0.9984228610992432, + "loss": 3.532, + "nll_loss": 0.8619996905326843, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01083716843277216, + "rewards/margins": 0.08900512754917145, + "rewards/rejected": -0.09984229505062103, + "step": 1031 + }, + { + "epoch": 0.7136929460580913, + "grad_norm": 3.7739248275756836, + "learning_rate": 3.568464730290457e-05, + "log_odds_chosen": 2.3249645233154297, + "log_odds_ratio": -0.4324222803115845, + "logits/chosen": -0.8024967908859253, + "logits/rejected": -0.7371588945388794, + "logps/chosen": -0.12025891244411469, + "logps/rejected": -0.7280334234237671, + "loss": 3.3625, + "nll_loss": 0.7973849177360535, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.012025891803205013, + "rewards/margins": 0.0607774518430233, + "rewards/rejected": -0.07280334085226059, + "step": 1032 + }, + { + "epoch": 0.7143845089903181, + "grad_norm": 4.37109899520874, + "learning_rate": 3.571922544951591e-05, + "log_odds_chosen": 1.8722158670425415, + "log_odds_ratio": -0.4846808910369873, + "logits/chosen": -0.4866708517074585, + "logits/rejected": -0.5177431106567383, + "logps/chosen": -0.03973466157913208, + "logps/rejected": -0.30102258920669556, + "loss": 4.1712, + "nll_loss": 0.9943400025367737, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003973466344177723, + "rewards/margins": 0.026128794997930527, + "rewards/rejected": -0.030102260410785675, + "step": 1033 + }, + { + "epoch": 0.715076071922545, + "grad_norm": 4.285762786865234, + "learning_rate": 3.575380359612725e-05, + "log_odds_chosen": 3.012889862060547, + "log_odds_ratio": -0.2758004665374756, + "logits/chosen": -0.22630202770233154, + "logits/rejected": -0.21843719482421875, + "logps/chosen": -0.18059399724006653, + "logps/rejected": -0.6662799119949341, + "loss": 3.9736, + "nll_loss": 0.9658151865005493, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.018059398978948593, + "rewards/margins": 0.048568591475486755, + "rewards/rejected": -0.06662799417972565, + "step": 1034 + }, + { + "epoch": 0.7157676348547718, + "grad_norm": 5.383205413818359, + "learning_rate": 3.578838174273859e-05, + "log_odds_chosen": 1.5302259922027588, + "log_odds_ratio": -0.45822760462760925, + "logits/chosen": -0.4184732437133789, + "logits/rejected": -0.3977556824684143, + "logps/chosen": -0.15078751742839813, + "logps/rejected": -0.4710010290145874, + "loss": 4.7171, + "nll_loss": 1.133442759513855, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.015078751370310783, + "rewards/margins": 0.03202135115861893, + "rewards/rejected": -0.04710010439157486, + "step": 1035 + }, + { + "epoch": 0.7164591977869986, + "grad_norm": 3.5321078300476074, + "learning_rate": 3.5822959889349934e-05, + "log_odds_chosen": 1.6457781791687012, + "log_odds_ratio": -0.3123471438884735, + "logits/chosen": -0.505487859249115, + "logits/rejected": -0.47095245122909546, + "logps/chosen": -0.17606432735919952, + "logps/rejected": -0.6406019926071167, + "loss": 3.0116, + "nll_loss": 0.7216607928276062, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.017606433480978012, + "rewards/margins": 0.04645375907421112, + "rewards/rejected": -0.06406019628047943, + "step": 1036 + }, + { + "epoch": 0.7171507607192255, + "grad_norm": 4.82590389251709, + "learning_rate": 3.5857538035961276e-05, + "log_odds_chosen": 3.8188605308532715, + "log_odds_ratio": -0.1875026524066925, + "logits/chosen": -0.3388659954071045, + "logits/rejected": -0.4199290871620178, + "logps/chosen": -0.08805263042449951, + "logps/rejected": -0.7061742544174194, + "loss": 3.9242, + "nll_loss": 0.9623033404350281, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00880526378750801, + "rewards/margins": 0.061812154948711395, + "rewards/rejected": -0.0706174224615097, + "step": 1037 + }, + { + "epoch": 0.7178423236514523, + "grad_norm": 4.746621131896973, + "learning_rate": 3.589211618257262e-05, + "log_odds_chosen": 2.7862987518310547, + "log_odds_ratio": -0.8785898685455322, + "logits/chosen": -0.6080033779144287, + "logits/rejected": -0.6072147488594055, + "logps/chosen": -0.12437206506729126, + "logps/rejected": -0.4387931823730469, + "loss": 3.3452, + "nll_loss": 0.7484517693519592, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.012437205761671066, + "rewards/margins": 0.03144211322069168, + "rewards/rejected": -0.04387931898236275, + "step": 1038 + }, + { + "epoch": 0.7185338865836791, + "grad_norm": 5.579493522644043, + "learning_rate": 3.592669432918396e-05, + "log_odds_chosen": 3.3001158237457275, + "log_odds_ratio": -0.24173426628112793, + "logits/chosen": -0.777419924736023, + "logits/rejected": -0.7728596925735474, + "logps/chosen": -0.06899379193782806, + "logps/rejected": -0.6735274195671082, + "loss": 4.8171, + "nll_loss": 1.1800899505615234, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006899379659444094, + "rewards/margins": 0.06045336276292801, + "rewards/rejected": -0.06735274195671082, + "step": 1039 + }, + { + "epoch": 0.719225449515906, + "grad_norm": 4.938050746917725, + "learning_rate": 3.59612724757953e-05, + "log_odds_chosen": 1.087746262550354, + "log_odds_ratio": -0.5268850326538086, + "logits/chosen": -0.5052182078361511, + "logits/rejected": -0.5489234328269958, + "logps/chosen": -0.17366079986095428, + "logps/rejected": -0.4371531307697296, + "loss": 4.0872, + "nll_loss": 0.969113826751709, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.0173660796135664, + "rewards/margins": 0.026349231600761414, + "rewards/rejected": -0.04371531680226326, + "step": 1040 + }, + { + "epoch": 0.7199170124481328, + "grad_norm": 5.435187339782715, + "learning_rate": 3.599585062240664e-05, + "log_odds_chosen": 2.283799171447754, + "log_odds_ratio": -0.5793908834457397, + "logits/chosen": -0.6914126873016357, + "logits/rejected": -0.7008126974105835, + "logps/chosen": -0.1254206746816635, + "logps/rejected": -0.3240630030632019, + "loss": 4.2092, + "nll_loss": 0.9943568110466003, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.012542067095637321, + "rewards/margins": 0.01986423321068287, + "rewards/rejected": -0.03240630030632019, + "step": 1041 + }, + { + "epoch": 0.7206085753803596, + "grad_norm": 4.84965705871582, + "learning_rate": 3.6030428769017984e-05, + "log_odds_chosen": 4.489874839782715, + "log_odds_ratio": -0.29299384355545044, + "logits/chosen": -0.6419664025306702, + "logits/rejected": -0.6491258144378662, + "logps/chosen": -0.13277804851531982, + "logps/rejected": -0.714569628238678, + "loss": 4.3872, + "nll_loss": 1.067491888999939, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.013277805410325527, + "rewards/margins": 0.058179158717393875, + "rewards/rejected": -0.07145696133375168, + "step": 1042 + }, + { + "epoch": 0.7213001383125864, + "grad_norm": 4.387578010559082, + "learning_rate": 3.6065006915629325e-05, + "log_odds_chosen": 2.601278781890869, + "log_odds_ratio": -0.3603097200393677, + "logits/chosen": -0.5618245601654053, + "logits/rejected": -0.5624284744262695, + "logps/chosen": -0.08398522436618805, + "logps/rejected": -0.556341290473938, + "loss": 4.4761, + "nll_loss": 1.082999587059021, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00839852262288332, + "rewards/margins": 0.04723560810089111, + "rewards/rejected": -0.05563412979245186, + "step": 1043 + }, + { + "epoch": 0.7219917012448133, + "grad_norm": 4.411139965057373, + "learning_rate": 3.609958506224067e-05, + "log_odds_chosen": 2.73284649848938, + "log_odds_ratio": -0.34536677598953247, + "logits/chosen": -0.6625887155532837, + "logits/rejected": -0.6966568231582642, + "logps/chosen": -0.1273365020751953, + "logps/rejected": -0.7267633676528931, + "loss": 3.3589, + "nll_loss": 0.8051877617835999, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.012733649462461472, + "rewards/margins": 0.059942688792943954, + "rewards/rejected": -0.07267633825540543, + "step": 1044 + }, + { + "epoch": 0.7226832641770401, + "grad_norm": 5.204629421234131, + "learning_rate": 3.613416320885201e-05, + "log_odds_chosen": 2.503190040588379, + "log_odds_ratio": -0.4112352132797241, + "logits/chosen": -0.7123258113861084, + "logits/rejected": -0.7635508179664612, + "logps/chosen": -0.14418675005435944, + "logps/rejected": -0.5068868398666382, + "loss": 4.7637, + "nll_loss": 1.1498039960861206, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014418675564229488, + "rewards/margins": 0.03627001494169235, + "rewards/rejected": -0.05068868398666382, + "step": 1045 + }, + { + "epoch": 0.7233748271092669, + "grad_norm": 4.64717960357666, + "learning_rate": 3.616874135546335e-05, + "log_odds_chosen": 2.7680552005767822, + "log_odds_ratio": -0.22051768004894257, + "logits/chosen": -0.5189613103866577, + "logits/rejected": -0.5583252906799316, + "logps/chosen": -0.08537647873163223, + "logps/rejected": -0.5342350006103516, + "loss": 3.1143, + "nll_loss": 0.7565240859985352, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008537648245692253, + "rewards/margins": 0.04488585889339447, + "rewards/rejected": -0.053423501551151276, + "step": 1046 + }, + { + "epoch": 0.7240663900414938, + "grad_norm": 3.2733585834503174, + "learning_rate": 3.620331950207469e-05, + "log_odds_chosen": 6.08332633972168, + "log_odds_ratio": -0.16748517751693726, + "logits/chosen": -0.35235828161239624, + "logits/rejected": -0.37172678112983704, + "logps/chosen": -0.06972315907478333, + "logps/rejected": -0.7122898101806641, + "loss": 3.2545, + "nll_loss": 0.796885073184967, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006972315721213818, + "rewards/margins": 0.06425667554140091, + "rewards/rejected": -0.071228988468647, + "step": 1047 + }, + { + "epoch": 0.7247579529737206, + "grad_norm": 4.058255672454834, + "learning_rate": 3.623789764868603e-05, + "log_odds_chosen": 1.6902698278427124, + "log_odds_ratio": -0.3691096305847168, + "logits/chosen": -0.6874192953109741, + "logits/rejected": -0.6792905330657959, + "logps/chosen": -0.14544987678527832, + "logps/rejected": -0.43885865807533264, + "loss": 4.3441, + "nll_loss": 1.0491060018539429, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.014544988051056862, + "rewards/margins": 0.02934087999165058, + "rewards/rejected": -0.04388586804270744, + "step": 1048 + }, + { + "epoch": 0.7254495159059474, + "grad_norm": 5.58115816116333, + "learning_rate": 3.6272475795297375e-05, + "log_odds_chosen": 3.4147226810455322, + "log_odds_ratio": -0.45109570026397705, + "logits/chosen": -0.7348883748054504, + "logits/rejected": -0.7098078727722168, + "logps/chosen": -0.10721461474895477, + "logps/rejected": -0.6155630350112915, + "loss": 4.19, + "nll_loss": 1.0024001598358154, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010721461847424507, + "rewards/margins": 0.05083484202623367, + "rewards/rejected": -0.06155630201101303, + "step": 1049 + }, + { + "epoch": 0.7261410788381742, + "grad_norm": 4.20112419128418, + "learning_rate": 3.6307053941908716e-05, + "log_odds_chosen": 4.499558925628662, + "log_odds_ratio": -0.2270698845386505, + "logits/chosen": -0.23923638463020325, + "logits/rejected": -0.28563469648361206, + "logps/chosen": -0.07210483402013779, + "logps/rejected": -0.4738559126853943, + "loss": 3.026, + "nll_loss": 0.7338007688522339, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007210483308881521, + "rewards/margins": 0.04017511010169983, + "rewards/rejected": -0.04738559573888779, + "step": 1050 + }, + { + "epoch": 0.7268326417704011, + "grad_norm": 5.781348705291748, + "learning_rate": 3.634163208852006e-05, + "log_odds_chosen": 1.436905860900879, + "log_odds_ratio": -0.41141462326049805, + "logits/chosen": -0.6144793033599854, + "logits/rejected": -0.6271205544471741, + "logps/chosen": -0.13432197272777557, + "logps/rejected": -0.6162081360816956, + "loss": 5.1217, + "nll_loss": 1.2392876148223877, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.013432197272777557, + "rewards/margins": 0.04818861931562424, + "rewards/rejected": -0.061620816588401794, + "step": 1051 + }, + { + "epoch": 0.7275242047026279, + "grad_norm": 6.011894226074219, + "learning_rate": 3.63762102351314e-05, + "log_odds_chosen": 0.6937634944915771, + "log_odds_ratio": -0.5657732486724854, + "logits/chosen": -0.5936172008514404, + "logits/rejected": -0.6089737415313721, + "logps/chosen": -0.17231324315071106, + "logps/rejected": -0.3177986145019531, + "loss": 6.265, + "nll_loss": 1.5096734762191772, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.017231326550245285, + "rewards/margins": 0.01454853918403387, + "rewards/rejected": -0.03177986294031143, + "step": 1052 + }, + { + "epoch": 0.7282157676348547, + "grad_norm": 5.030375003814697, + "learning_rate": 3.641078838174274e-05, + "log_odds_chosen": 2.0339081287384033, + "log_odds_ratio": -0.3970106840133667, + "logits/chosen": -0.33543309569358826, + "logits/rejected": -0.4022204279899597, + "logps/chosen": -0.09676516801118851, + "logps/rejected": -0.38274016976356506, + "loss": 3.5718, + "nll_loss": 0.8532538414001465, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009676516987383366, + "rewards/margins": 0.028597503900527954, + "rewards/rejected": -0.038274019956588745, + "step": 1053 + }, + { + "epoch": 0.7289073305670816, + "grad_norm": 5.222143173217773, + "learning_rate": 3.644536652835408e-05, + "log_odds_chosen": 3.022456169128418, + "log_odds_ratio": -0.36239343881607056, + "logits/chosen": -0.23395097255706787, + "logits/rejected": -0.18252798914909363, + "logps/chosen": -0.0783625915646553, + "logps/rejected": -0.2937096357345581, + "loss": 4.2201, + "nll_loss": 1.018797755241394, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.00783625990152359, + "rewards/margins": 0.02153470367193222, + "rewards/rejected": -0.02937096357345581, + "step": 1054 + }, + { + "epoch": 0.7295988934993084, + "grad_norm": 4.244716644287109, + "learning_rate": 3.6479944674965424e-05, + "log_odds_chosen": 1.170404314994812, + "log_odds_ratio": -0.6230608820915222, + "logits/chosen": -0.7283819913864136, + "logits/rejected": -0.7557381391525269, + "logps/chosen": -0.17400789260864258, + "logps/rejected": -0.3624713122844696, + "loss": 4.0401, + "nll_loss": 0.9477148056030273, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.01740078628063202, + "rewards/margins": 0.01884634606540203, + "rewards/rejected": -0.0362471342086792, + "step": 1055 + }, + { + "epoch": 0.7302904564315352, + "grad_norm": 5.602895259857178, + "learning_rate": 3.6514522821576766e-05, + "log_odds_chosen": 4.399355888366699, + "log_odds_ratio": -0.20333260297775269, + "logits/chosen": -0.5755096077919006, + "logits/rejected": -0.598081111907959, + "logps/chosen": -0.09461069107055664, + "logps/rejected": -0.7363580465316772, + "loss": 4.7825, + "nll_loss": 1.1752852201461792, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009461069479584694, + "rewards/margins": 0.06417473405599594, + "rewards/rejected": -0.07363580167293549, + "step": 1056 + }, + { + "epoch": 0.7309820193637621, + "grad_norm": 4.749327659606934, + "learning_rate": 3.654910096818811e-05, + "log_odds_chosen": 0.8630741834640503, + "log_odds_ratio": -0.504383385181427, + "logits/chosen": -0.5899327993392944, + "logits/rejected": -0.6016180515289307, + "logps/chosen": -0.15858584642410278, + "logps/rejected": -0.31086573004722595, + "loss": 5.0885, + "nll_loss": 1.2216897010803223, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.015858585014939308, + "rewards/margins": 0.015227987430989742, + "rewards/rejected": -0.031086573377251625, + "step": 1057 + }, + { + "epoch": 0.7316735822959889, + "grad_norm": 4.7271223068237305, + "learning_rate": 3.658367911479945e-05, + "log_odds_chosen": 3.1375505924224854, + "log_odds_ratio": -0.23146921396255493, + "logits/chosen": -0.532660961151123, + "logits/rejected": -0.580690860748291, + "logps/chosen": -0.10345923900604248, + "logps/rejected": -0.5149369835853577, + "loss": 4.4799, + "nll_loss": 1.0968393087387085, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010345923714339733, + "rewards/margins": 0.04114777594804764, + "rewards/rejected": -0.05149369686841965, + "step": 1058 + }, + { + "epoch": 0.7323651452282157, + "grad_norm": 3.9870223999023438, + "learning_rate": 3.661825726141079e-05, + "log_odds_chosen": 4.208795070648193, + "log_odds_ratio": -0.29262298345565796, + "logits/chosen": -0.4202018082141876, + "logits/rejected": -0.4788719117641449, + "logps/chosen": -0.06881583482027054, + "logps/rejected": -0.4822113513946533, + "loss": 4.1821, + "nll_loss": 1.0162742137908936, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006881583947688341, + "rewards/margins": 0.04133955016732216, + "rewards/rejected": -0.04822114109992981, + "step": 1059 + }, + { + "epoch": 0.7330567081604425, + "grad_norm": 4.029439449310303, + "learning_rate": 3.665283540802213e-05, + "log_odds_chosen": 1.8435285091400146, + "log_odds_ratio": -0.5828537344932556, + "logits/chosen": -0.3349389135837555, + "logits/rejected": -0.35388249158859253, + "logps/chosen": -0.13364273309707642, + "logps/rejected": -0.29924899339675903, + "loss": 3.2881, + "nll_loss": 0.7637304663658142, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.013364273123443127, + "rewards/margins": 0.016560625284910202, + "rewards/rejected": -0.029924899339675903, + "step": 1060 + }, + { + "epoch": 0.7337482710926694, + "grad_norm": 5.114047527313232, + "learning_rate": 3.668741355463347e-05, + "log_odds_chosen": 2.7002954483032227, + "log_odds_ratio": -0.3071057200431824, + "logits/chosen": -0.5675312876701355, + "logits/rejected": -0.5511829257011414, + "logps/chosen": -0.10216303169727325, + "logps/rejected": -0.4013533294200897, + "loss": 4.0426, + "nll_loss": 0.9799474477767944, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010216303169727325, + "rewards/margins": 0.029919028282165527, + "rewards/rejected": -0.04013533145189285, + "step": 1061 + }, + { + "epoch": 0.7344398340248963, + "grad_norm": 4.512507438659668, + "learning_rate": 3.6721991701244815e-05, + "log_odds_chosen": 2.5141499042510986, + "log_odds_ratio": -0.3996974229812622, + "logits/chosen": -0.46067526936531067, + "logits/rejected": -0.4886988699436188, + "logps/chosen": -0.14718782901763916, + "logps/rejected": -0.6637516021728516, + "loss": 3.6508, + "nll_loss": 0.8727205991744995, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014718784019351006, + "rewards/margins": 0.05165638029575348, + "rewards/rejected": -0.06637516617774963, + "step": 1062 + }, + { + "epoch": 0.7351313969571232, + "grad_norm": 4.6516900062561035, + "learning_rate": 3.6756569847856156e-05, + "log_odds_chosen": 3.213118553161621, + "log_odds_ratio": -0.41739317774772644, + "logits/chosen": -0.7525394558906555, + "logits/rejected": -0.7624413967132568, + "logps/chosen": -0.13979551196098328, + "logps/rejected": -0.6717532277107239, + "loss": 4.4842, + "nll_loss": 1.0793054103851318, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.013979552313685417, + "rewards/margins": 0.053195770829916, + "rewards/rejected": -0.06717532873153687, + "step": 1063 + }, + { + "epoch": 0.73582295988935, + "grad_norm": 4.892207145690918, + "learning_rate": 3.67911479944675e-05, + "log_odds_chosen": 1.7997876405715942, + "log_odds_ratio": -0.44318562746047974, + "logits/chosen": -0.18329492211341858, + "logits/rejected": -0.20506246387958527, + "logps/chosen": -0.15136878192424774, + "logps/rejected": -0.5139152407646179, + "loss": 3.9923, + "nll_loss": 0.9537680149078369, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.015136879868805408, + "rewards/margins": 0.0362546443939209, + "rewards/rejected": -0.05139152333140373, + "step": 1064 + }, + { + "epoch": 0.7365145228215768, + "grad_norm": 3.9588499069213867, + "learning_rate": 3.682572614107884e-05, + "log_odds_chosen": 1.91678786277771, + "log_odds_ratio": -0.4147067368030548, + "logits/chosen": -0.45388561487197876, + "logits/rejected": -0.4534473419189453, + "logps/chosen": -0.09650453925132751, + "logps/rejected": -0.3927268981933594, + "loss": 3.6191, + "nll_loss": 0.8632949590682983, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009650452993810177, + "rewards/margins": 0.029622236266732216, + "rewards/rejected": -0.03927268832921982, + "step": 1065 + }, + { + "epoch": 0.7372060857538036, + "grad_norm": 2.8396570682525635, + "learning_rate": 3.686030428769018e-05, + "log_odds_chosen": 3.650707483291626, + "log_odds_ratio": -0.38111308217048645, + "logits/chosen": -0.5955278873443604, + "logits/rejected": -0.5792263746261597, + "logps/chosen": -0.09282395243644714, + "logps/rejected": -0.5960294008255005, + "loss": 2.7555, + "nll_loss": 0.650758683681488, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009282395243644714, + "rewards/margins": 0.050320543348789215, + "rewards/rejected": -0.05960294231772423, + "step": 1066 + }, + { + "epoch": 0.7378976486860305, + "grad_norm": 3.476088285446167, + "learning_rate": 3.689488243430152e-05, + "log_odds_chosen": 2.3841819763183594, + "log_odds_ratio": -0.3378468155860901, + "logits/chosen": -0.7314380407333374, + "logits/rejected": -0.7951211333274841, + "logps/chosen": -0.11184002459049225, + "logps/rejected": -0.3191815912723541, + "loss": 4.3461, + "nll_loss": 1.0527474880218506, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01118400227278471, + "rewards/margins": 0.020734157413244247, + "rewards/rejected": -0.03191816061735153, + "step": 1067 + }, + { + "epoch": 0.7385892116182573, + "grad_norm": 4.4976935386657715, + "learning_rate": 3.6929460580912864e-05, + "log_odds_chosen": 4.20717191696167, + "log_odds_ratio": -0.15988406538963318, + "logits/chosen": -0.45512500405311584, + "logits/rejected": -0.4465624988079071, + "logps/chosen": -0.03990020975470543, + "logps/rejected": -0.5817291736602783, + "loss": 4.7337, + "nll_loss": 1.1674458980560303, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0039900210686028, + "rewards/margins": 0.05418289452791214, + "rewards/rejected": -0.05817291885614395, + "step": 1068 + }, + { + "epoch": 0.7392807745504841, + "grad_norm": 3.743703603744507, + "learning_rate": 3.6964038727524206e-05, + "log_odds_chosen": 2.3907783031463623, + "log_odds_ratio": -0.3446536362171173, + "logits/chosen": -0.4428321421146393, + "logits/rejected": -0.43401190638542175, + "logps/chosen": -0.08195048570632935, + "logps/rejected": -0.3324810266494751, + "loss": 3.4512, + "nll_loss": 0.8283307552337646, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.00819504912942648, + "rewards/margins": 0.025053054094314575, + "rewards/rejected": -0.03324810042977333, + "step": 1069 + }, + { + "epoch": 0.739972337482711, + "grad_norm": 4.063248157501221, + "learning_rate": 3.699861687413555e-05, + "log_odds_chosen": 4.055078983306885, + "log_odds_ratio": -0.1567329466342926, + "logits/chosen": -0.1771300733089447, + "logits/rejected": -0.2207050770521164, + "logps/chosen": -0.05447046458721161, + "logps/rejected": -0.7913841009140015, + "loss": 3.1308, + "nll_loss": 0.767034649848938, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005447045899927616, + "rewards/margins": 0.07369136810302734, + "rewards/rejected": -0.07913841307163239, + "step": 1070 + }, + { + "epoch": 0.7406639004149378, + "grad_norm": 2.8043837547302246, + "learning_rate": 3.703319502074689e-05, + "log_odds_chosen": 2.3212316036224365, + "log_odds_ratio": -0.3333531320095062, + "logits/chosen": -0.09415624290704727, + "logits/rejected": -0.09188832342624664, + "logps/chosen": -0.13778002560138702, + "logps/rejected": -0.5300951600074768, + "loss": 3.8473, + "nll_loss": 0.9284874200820923, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013778002932667732, + "rewards/margins": 0.03923151642084122, + "rewards/rejected": -0.0530095174908638, + "step": 1071 + }, + { + "epoch": 0.7413554633471646, + "grad_norm": 4.631345748901367, + "learning_rate": 3.706777316735823e-05, + "log_odds_chosen": 3.794403553009033, + "log_odds_ratio": -0.16763067245483398, + "logits/chosen": -0.21735386550426483, + "logits/rejected": -0.21720963716506958, + "logps/chosen": -0.04987889155745506, + "logps/rejected": -0.5241183042526245, + "loss": 3.927, + "nll_loss": 0.9649972915649414, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004987888969480991, + "rewards/margins": 0.047423943877220154, + "rewards/rejected": -0.05241183191537857, + "step": 1072 + }, + { + "epoch": 0.7420470262793915, + "grad_norm": 5.381296634674072, + "learning_rate": 3.710235131396957e-05, + "log_odds_chosen": 2.529040813446045, + "log_odds_ratio": -0.21126320958137512, + "logits/chosen": -0.2269861251115799, + "logits/rejected": -0.23414316773414612, + "logps/chosen": -0.07538623362779617, + "logps/rejected": -0.47442883253097534, + "loss": 4.7342, + "nll_loss": 1.1624130010604858, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00753862364217639, + "rewards/margins": 0.03990425914525986, + "rewards/rejected": -0.047442883253097534, + "step": 1073 + }, + { + "epoch": 0.7427385892116183, + "grad_norm": 3.97910213470459, + "learning_rate": 3.7136929460580914e-05, + "log_odds_chosen": 4.230005264282227, + "log_odds_ratio": -0.14120924472808838, + "logits/chosen": -0.521894097328186, + "logits/rejected": -0.5772715210914612, + "logps/chosen": -0.03921622037887573, + "logps/rejected": -0.6114540100097656, + "loss": 3.8008, + "nll_loss": 0.9360888004302979, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003921622410416603, + "rewards/margins": 0.057223785668611526, + "rewards/rejected": -0.06114540249109268, + "step": 1074 + }, + { + "epoch": 0.7434301521438451, + "grad_norm": 2.6997873783111572, + "learning_rate": 3.7171507607192255e-05, + "log_odds_chosen": 5.320131778717041, + "log_odds_ratio": -0.1742401272058487, + "logits/chosen": -0.2050633281469345, + "logits/rejected": -0.2097817212343216, + "logps/chosen": -0.05946308746933937, + "logps/rejected": -0.6085449457168579, + "loss": 2.5591, + "nll_loss": 0.622360348701477, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0059463088400661945, + "rewards/margins": 0.05490818992257118, + "rewards/rejected": -0.06085449829697609, + "step": 1075 + }, + { + "epoch": 0.7441217150760719, + "grad_norm": 5.3858642578125, + "learning_rate": 3.72060857538036e-05, + "log_odds_chosen": 2.5684211254119873, + "log_odds_ratio": -0.2726511061191559, + "logits/chosen": -0.8184974193572998, + "logits/rejected": -0.8305962085723877, + "logps/chosen": -0.08273748308420181, + "logps/rejected": -0.5238097906112671, + "loss": 5.5739, + "nll_loss": 1.3662147521972656, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008273748680949211, + "rewards/margins": 0.044107235968112946, + "rewards/rejected": -0.05238097906112671, + "step": 1076 + }, + { + "epoch": 0.7448132780082988, + "grad_norm": 4.981190204620361, + "learning_rate": 3.724066390041494e-05, + "log_odds_chosen": 2.198141574859619, + "log_odds_ratio": -0.4150627851486206, + "logits/chosen": -0.5096691250801086, + "logits/rejected": -0.5472338199615479, + "logps/chosen": -0.11538825929164886, + "logps/rejected": -0.4828852415084839, + "loss": 4.129, + "nll_loss": 0.9907474517822266, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011538825929164886, + "rewards/margins": 0.0367497019469738, + "rewards/rejected": -0.04828852415084839, + "step": 1077 + }, + { + "epoch": 0.7455048409405256, + "grad_norm": 3.9157955646514893, + "learning_rate": 3.727524204702628e-05, + "log_odds_chosen": 2.297726631164551, + "log_odds_ratio": -0.371995210647583, + "logits/chosen": -0.45065683126449585, + "logits/rejected": -0.4426935315132141, + "logps/chosen": -0.08662683516740799, + "logps/rejected": -0.5210797786712646, + "loss": 3.581, + "nll_loss": 0.8580514192581177, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008662683889269829, + "rewards/margins": 0.043445296585559845, + "rewards/rejected": -0.05210798233747482, + "step": 1078 + }, + { + "epoch": 0.7461964038727524, + "grad_norm": 4.103110313415527, + "learning_rate": 3.730982019363762e-05, + "log_odds_chosen": 2.5692861080169678, + "log_odds_ratio": -0.34731459617614746, + "logits/chosen": -0.07757198810577393, + "logits/rejected": -0.10431862622499466, + "logps/chosen": -0.13760052621364594, + "logps/rejected": -0.558710515499115, + "loss": 3.3979, + "nll_loss": 0.8147333264350891, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.013760052621364594, + "rewards/margins": 0.042111001908779144, + "rewards/rejected": -0.05587105453014374, + "step": 1079 + }, + { + "epoch": 0.7468879668049793, + "grad_norm": 4.3268609046936035, + "learning_rate": 3.734439834024896e-05, + "log_odds_chosen": 3.42661190032959, + "log_odds_ratio": -0.2674722969532013, + "logits/chosen": -0.5790812373161316, + "logits/rejected": -0.6419976949691772, + "logps/chosen": -0.1267891228199005, + "logps/rejected": -1.1352012157440186, + "loss": 3.6002, + "nll_loss": 0.8733097314834595, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.012678911909461021, + "rewards/margins": 0.10084120184183121, + "rewards/rejected": -0.11352010816335678, + "step": 1080 + }, + { + "epoch": 0.7475795297372061, + "grad_norm": 8.72693157196045, + "learning_rate": 3.7378976486860305e-05, + "log_odds_chosen": 2.839428424835205, + "log_odds_ratio": -0.6887301206588745, + "logits/chosen": -0.11714953929185867, + "logits/rejected": -0.09862416982650757, + "logps/chosen": -0.16352665424346924, + "logps/rejected": -0.43877363204956055, + "loss": 4.117, + "nll_loss": 0.9603717923164368, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.016352666541934013, + "rewards/margins": 0.027524694800376892, + "rewards/rejected": -0.043877359479665756, + "step": 1081 + }, + { + "epoch": 0.7482710926694329, + "grad_norm": 5.499275207519531, + "learning_rate": 3.7413554633471646e-05, + "log_odds_chosen": 2.351445198059082, + "log_odds_ratio": -0.3783484697341919, + "logits/chosen": -0.5679388642311096, + "logits/rejected": -0.6132568120956421, + "logps/chosen": -0.1187373548746109, + "logps/rejected": -0.541204035282135, + "loss": 4.3354, + "nll_loss": 1.046006679534912, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011873736046254635, + "rewards/margins": 0.04224666580557823, + "rewards/rejected": -0.05412040278315544, + "step": 1082 + }, + { + "epoch": 0.7489626556016598, + "grad_norm": 5.204410076141357, + "learning_rate": 3.744813278008299e-05, + "log_odds_chosen": 3.1782922744750977, + "log_odds_ratio": -0.5147818922996521, + "logits/chosen": -0.44969701766967773, + "logits/rejected": -0.5115452408790588, + "logps/chosen": -0.21525892615318298, + "logps/rejected": -0.6728153228759766, + "loss": 4.2166, + "nll_loss": 1.0026686191558838, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.021525893360376358, + "rewards/margins": 0.04575563967227936, + "rewards/rejected": -0.06728152930736542, + "step": 1083 + }, + { + "epoch": 0.7496542185338866, + "grad_norm": 4.421257019042969, + "learning_rate": 3.748271092669433e-05, + "log_odds_chosen": 3.770069122314453, + "log_odds_ratio": -0.2764526605606079, + "logits/chosen": -0.5885463953018188, + "logits/rejected": -0.6559880971908569, + "logps/chosen": -0.1313014030456543, + "logps/rejected": -0.7071733474731445, + "loss": 3.618, + "nll_loss": 0.8768469095230103, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01313013769686222, + "rewards/margins": 0.057587191462516785, + "rewards/rejected": -0.07071733474731445, + "step": 1084 + }, + { + "epoch": 0.7503457814661134, + "grad_norm": 5.926955223083496, + "learning_rate": 3.751728907330567e-05, + "log_odds_chosen": 2.0917410850524902, + "log_odds_ratio": -0.3241581320762634, + "logits/chosen": -0.5161243677139282, + "logits/rejected": -0.5618958473205566, + "logps/chosen": -0.18530499935150146, + "logps/rejected": -0.7070559859275818, + "loss": 5.7767, + "nll_loss": 1.4117603302001953, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.018530499190092087, + "rewards/margins": 0.05217510089278221, + "rewards/rejected": -0.0707056000828743, + "step": 1085 + }, + { + "epoch": 0.7510373443983402, + "grad_norm": 4.207293510437012, + "learning_rate": 3.755186721991701e-05, + "log_odds_chosen": 2.306514263153076, + "log_odds_ratio": -0.42577362060546875, + "logits/chosen": -0.7863785624504089, + "logits/rejected": -0.8544211983680725, + "logps/chosen": -0.18335390090942383, + "logps/rejected": -0.6212817430496216, + "loss": 3.0202, + "nll_loss": 0.71247798204422, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.018335388973355293, + "rewards/margins": 0.043792787939310074, + "rewards/rejected": -0.062128178775310516, + "step": 1086 + }, + { + "epoch": 0.7517289073305671, + "grad_norm": 5.270834445953369, + "learning_rate": 3.7586445366528354e-05, + "log_odds_chosen": 2.643711566925049, + "log_odds_ratio": -0.4310038983821869, + "logits/chosen": -0.4614013135433197, + "logits/rejected": -0.4456390142440796, + "logps/chosen": -0.15072834491729736, + "logps/rejected": -0.5333762168884277, + "loss": 2.8166, + "nll_loss": 0.6610429883003235, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.015072835609316826, + "rewards/margins": 0.038264788687229156, + "rewards/rejected": -0.05333762615919113, + "step": 1087 + }, + { + "epoch": 0.7524204702627939, + "grad_norm": 5.288963317871094, + "learning_rate": 3.7621023513139696e-05, + "log_odds_chosen": 3.5286176204681396, + "log_odds_ratio": -0.40869560837745667, + "logits/chosen": -0.7788718938827515, + "logits/rejected": -0.8168633580207825, + "logps/chosen": -0.1163693368434906, + "logps/rejected": -0.8496063947677612, + "loss": 5.0915, + "nll_loss": 1.2319990396499634, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01163693517446518, + "rewards/margins": 0.07332369685173035, + "rewards/rejected": -0.08496063202619553, + "step": 1088 + }, + { + "epoch": 0.7531120331950207, + "grad_norm": 4.082790374755859, + "learning_rate": 3.765560165975104e-05, + "log_odds_chosen": 6.779488563537598, + "log_odds_ratio": -0.03812899813055992, + "logits/chosen": -0.6678798198699951, + "logits/rejected": -0.7273062467575073, + "logps/chosen": -0.011440301313996315, + "logps/rejected": -1.4002537727355957, + "loss": 4.1497, + "nll_loss": 1.0336132049560547, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011440301313996315, + "rewards/margins": 0.1388813555240631, + "rewards/rejected": -0.14002537727355957, + "step": 1089 + }, + { + "epoch": 0.7538035961272476, + "grad_norm": 7.497416973114014, + "learning_rate": 3.769017980636238e-05, + "log_odds_chosen": 2.679985523223877, + "log_odds_ratio": -0.48144277930259705, + "logits/chosen": -0.7045676708221436, + "logits/rejected": -0.7712850570678711, + "logps/chosen": -0.18166804313659668, + "logps/rejected": -0.6683666706085205, + "loss": 5.1019, + "nll_loss": 1.2273311614990234, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.018166804686188698, + "rewards/margins": 0.048669859766960144, + "rewards/rejected": -0.06683666259050369, + "step": 1090 + }, + { + "epoch": 0.7544951590594744, + "grad_norm": 11.02239990234375, + "learning_rate": 3.772475795297372e-05, + "log_odds_chosen": 5.144924640655518, + "log_odds_ratio": -0.21654638648033142, + "logits/chosen": -1.0382695198059082, + "logits/rejected": -1.0143285989761353, + "logps/chosen": -0.05242515355348587, + "logps/rejected": -0.6436967849731445, + "loss": 3.9557, + "nll_loss": 0.9672713875770569, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005242515355348587, + "rewards/margins": 0.05912715941667557, + "rewards/rejected": -0.06436967849731445, + "step": 1091 + }, + { + "epoch": 0.7551867219917012, + "grad_norm": 7.538413047790527, + "learning_rate": 3.775933609958506e-05, + "log_odds_chosen": 2.380988597869873, + "log_odds_ratio": -0.8324184417724609, + "logits/chosen": -0.6824043989181519, + "logits/rejected": -0.6923583745956421, + "logps/chosen": -0.19706298410892487, + "logps/rejected": -0.5978980660438538, + "loss": 4.4174, + "nll_loss": 1.0211037397384644, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.019706297665834427, + "rewards/margins": 0.04008351266384125, + "rewards/rejected": -0.059789810329675674, + "step": 1092 + }, + { + "epoch": 0.7558782849239281, + "grad_norm": 4.259000778198242, + "learning_rate": 3.7793914246196403e-05, + "log_odds_chosen": 2.0974249839782715, + "log_odds_ratio": -0.2188202440738678, + "logits/chosen": -0.6757645010948181, + "logits/rejected": -0.6860700845718384, + "logps/chosen": -0.10299341380596161, + "logps/rejected": -0.5411241054534912, + "loss": 3.923, + "nll_loss": 0.9588569402694702, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01029934175312519, + "rewards/margins": 0.0438130684196949, + "rewards/rejected": -0.054112404584884644, + "step": 1093 + }, + { + "epoch": 0.7565698478561549, + "grad_norm": 3.8153839111328125, + "learning_rate": 3.7828492392807745e-05, + "log_odds_chosen": 4.307488441467285, + "log_odds_ratio": -0.2924882769584656, + "logits/chosen": -0.5414397716522217, + "logits/rejected": -0.5879523158073425, + "logps/chosen": -0.08990863710641861, + "logps/rejected": -0.6482211947441101, + "loss": 3.6132, + "nll_loss": 0.8740568161010742, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008990864269435406, + "rewards/margins": 0.05583126097917557, + "rewards/rejected": -0.06482212245464325, + "step": 1094 + }, + { + "epoch": 0.7572614107883817, + "grad_norm": 4.142021656036377, + "learning_rate": 3.7863070539419087e-05, + "log_odds_chosen": 4.343199253082275, + "log_odds_ratio": -0.22691279649734497, + "logits/chosen": -0.778051495552063, + "logits/rejected": -0.840363621711731, + "logps/chosen": -0.048902321606874466, + "logps/rejected": -0.7048326730728149, + "loss": 3.5437, + "nll_loss": 0.8632230758666992, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0048902323469519615, + "rewards/margins": 0.06559304147958755, + "rewards/rejected": -0.0704832673072815, + "step": 1095 + }, + { + "epoch": 0.7579529737206085, + "grad_norm": 3.198138952255249, + "learning_rate": 3.789764868603043e-05, + "log_odds_chosen": 4.851378440856934, + "log_odds_ratio": -0.3832613527774811, + "logits/chosen": -0.6265957355499268, + "logits/rejected": -0.6354972124099731, + "logps/chosen": -0.13120141625404358, + "logps/rejected": -0.8701565265655518, + "loss": 3.1297, + "nll_loss": 0.7441052794456482, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.013120142742991447, + "rewards/margins": 0.07389551401138306, + "rewards/rejected": -0.08701566606760025, + "step": 1096 + }, + { + "epoch": 0.7586445366528354, + "grad_norm": 5.956087112426758, + "learning_rate": 3.793222683264177e-05, + "log_odds_chosen": 3.1593127250671387, + "log_odds_ratio": -0.5399028062820435, + "logits/chosen": -0.7491406202316284, + "logits/rejected": -0.7200503945350647, + "logps/chosen": -0.10669641196727753, + "logps/rejected": -0.507939875125885, + "loss": 3.3775, + "nll_loss": 0.7903934717178345, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010669643059372902, + "rewards/margins": 0.04012434929609299, + "rewards/rejected": -0.05079399049282074, + "step": 1097 + }, + { + "epoch": 0.7593360995850622, + "grad_norm": 3.8274338245391846, + "learning_rate": 3.796680497925311e-05, + "log_odds_chosen": 5.283022403717041, + "log_odds_ratio": -0.122711181640625, + "logits/chosen": -0.7252033948898315, + "logits/rejected": -0.7651737332344055, + "logps/chosen": -0.031005796045064926, + "logps/rejected": -0.9215719699859619, + "loss": 4.2154, + "nll_loss": 1.0415713787078857, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0031005796045064926, + "rewards/margins": 0.08905662596225739, + "rewards/rejected": -0.09215720742940903, + "step": 1098 + }, + { + "epoch": 0.760027662517289, + "grad_norm": 6.531271457672119, + "learning_rate": 3.800138312586445e-05, + "log_odds_chosen": 2.260535717010498, + "log_odds_ratio": -0.5166959166526794, + "logits/chosen": -0.5828487873077393, + "logits/rejected": -0.6525802612304688, + "logps/chosen": -0.12451886385679245, + "logps/rejected": -0.7832128405570984, + "loss": 4.359, + "nll_loss": 1.0380773544311523, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01245188619941473, + "rewards/margins": 0.06586939096450806, + "rewards/rejected": -0.07832127809524536, + "step": 1099 + }, + { + "epoch": 0.7607192254495159, + "grad_norm": 6.007966041564941, + "learning_rate": 3.8035961272475794e-05, + "log_odds_chosen": 0.9574238061904907, + "log_odds_ratio": -0.7949805855751038, + "logits/chosen": -0.5290077924728394, + "logits/rejected": -0.5131245255470276, + "logps/chosen": -0.3000096380710602, + "logps/rejected": -0.44282999634742737, + "loss": 4.4522, + "nll_loss": 1.0335532426834106, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.030000966042280197, + "rewards/margins": 0.01428203471004963, + "rewards/rejected": -0.044283002614974976, + "step": 1100 + }, + { + "epoch": 0.7614107883817427, + "grad_norm": 4.777920722961426, + "learning_rate": 3.8070539419087136e-05, + "log_odds_chosen": 2.418083667755127, + "log_odds_ratio": -0.36667677760124207, + "logits/chosen": -0.6750804781913757, + "logits/rejected": -0.737511396408081, + "logps/chosen": -0.0945938378572464, + "logps/rejected": -0.6871483325958252, + "loss": 4.6545, + "nll_loss": 1.1269659996032715, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009459384717047215, + "rewards/margins": 0.0592554472386837, + "rewards/rejected": -0.06871482729911804, + "step": 1101 + }, + { + "epoch": 0.7621023513139695, + "grad_norm": 4.31030797958374, + "learning_rate": 3.810511756569848e-05, + "log_odds_chosen": 1.0872337818145752, + "log_odds_ratio": -0.3637908697128296, + "logits/chosen": -0.8375824689865112, + "logits/rejected": -0.8417525291442871, + "logps/chosen": -0.15087522566318512, + "logps/rejected": -0.3603815734386444, + "loss": 4.4087, + "nll_loss": 1.065795660018921, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015087523497641087, + "rewards/margins": 0.02095063589513302, + "rewards/rejected": -0.03603815659880638, + "step": 1102 + }, + { + "epoch": 0.7627939142461964, + "grad_norm": 4.248500823974609, + "learning_rate": 3.813969571230982e-05, + "log_odds_chosen": 2.1251821517944336, + "log_odds_ratio": -0.5379229784011841, + "logits/chosen": -0.8146258592605591, + "logits/rejected": -0.8227401971817017, + "logps/chosen": -0.25022798776626587, + "logps/rejected": -0.43799692392349243, + "loss": 3.9286, + "nll_loss": 0.9283566474914551, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.025022799149155617, + "rewards/margins": 0.018776895478367805, + "rewards/rejected": -0.043799690902233124, + "step": 1103 + }, + { + "epoch": 0.7634854771784232, + "grad_norm": 6.478932857513428, + "learning_rate": 3.817427385892116e-05, + "log_odds_chosen": 3.2808871269226074, + "log_odds_ratio": -0.6370083689689636, + "logits/chosen": -0.5142711400985718, + "logits/rejected": -0.5442649722099304, + "logps/chosen": -0.17024749517440796, + "logps/rejected": -0.7139173150062561, + "loss": 4.3995, + "nll_loss": 1.0361793041229248, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.017024749889969826, + "rewards/margins": 0.05436699092388153, + "rewards/rejected": -0.07139173895120621, + "step": 1104 + }, + { + "epoch": 0.76417704011065, + "grad_norm": 6.064324378967285, + "learning_rate": 3.82088520055325e-05, + "log_odds_chosen": 2.0439133644104004, + "log_odds_ratio": -0.6495869755744934, + "logits/chosen": -0.5766505002975464, + "logits/rejected": -0.6375952363014221, + "logps/chosen": -0.11183735728263855, + "logps/rejected": -0.4476245641708374, + "loss": 4.1578, + "nll_loss": 0.9744910001754761, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01118373591452837, + "rewards/margins": 0.033578719943761826, + "rewards/rejected": -0.04476245492696762, + "step": 1105 + }, + { + "epoch": 0.7648686030428768, + "grad_norm": 5.818049430847168, + "learning_rate": 3.8243430152143844e-05, + "log_odds_chosen": 3.604917526245117, + "log_odds_ratio": -0.20862287282943726, + "logits/chosen": -0.40988677740097046, + "logits/rejected": -0.541254460811615, + "logps/chosen": -0.1304902732372284, + "logps/rejected": -0.8513497114181519, + "loss": 4.8317, + "nll_loss": 1.1870533227920532, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01304902695119381, + "rewards/margins": 0.07208594679832458, + "rewards/rejected": -0.08513498306274414, + "step": 1106 + }, + { + "epoch": 0.7655601659751037, + "grad_norm": 4.074938774108887, + "learning_rate": 3.8278008298755185e-05, + "log_odds_chosen": 3.30131459236145, + "log_odds_ratio": -0.4436090588569641, + "logits/chosen": -0.2585781514644623, + "logits/rejected": -0.2200646549463272, + "logps/chosen": -0.17219777405261993, + "logps/rejected": -0.4860021770000458, + "loss": 3.6635, + "nll_loss": 0.8715248703956604, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.017219776287674904, + "rewards/margins": 0.031380441039800644, + "rewards/rejected": -0.0486002191901207, + "step": 1107 + }, + { + "epoch": 0.7662517289073306, + "grad_norm": 4.7798752784729, + "learning_rate": 3.8312586445366534e-05, + "log_odds_chosen": 3.8130784034729004, + "log_odds_ratio": -0.46914446353912354, + "logits/chosen": -0.3986022472381592, + "logits/rejected": -0.44066357612609863, + "logps/chosen": -0.11577075719833374, + "logps/rejected": -0.608155369758606, + "loss": 2.5063, + "nll_loss": 0.5796663761138916, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011577075347304344, + "rewards/margins": 0.04923846200108528, + "rewards/rejected": -0.060815539211034775, + "step": 1108 + }, + { + "epoch": 0.7669432918395575, + "grad_norm": 4.062282085418701, + "learning_rate": 3.8347164591977875e-05, + "log_odds_chosen": 2.94562029838562, + "log_odds_ratio": -0.3354604244232178, + "logits/chosen": -0.9019069671630859, + "logits/rejected": -0.8851369619369507, + "logps/chosen": -0.13271500170230865, + "logps/rejected": -0.5042838454246521, + "loss": 4.0718, + "nll_loss": 0.9844123125076294, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.013271501287817955, + "rewards/margins": 0.03715688735246658, + "rewards/rejected": -0.05042839050292969, + "step": 1109 + }, + { + "epoch": 0.7676348547717843, + "grad_norm": 3.5688350200653076, + "learning_rate": 3.838174273858922e-05, + "log_odds_chosen": 3.1504390239715576, + "log_odds_ratio": -0.1645454466342926, + "logits/chosen": -0.5958057641983032, + "logits/rejected": -0.5955528020858765, + "logps/chosen": -0.07106180489063263, + "logps/rejected": -0.6669843792915344, + "loss": 3.9271, + "nll_loss": 0.9653175473213196, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007106180768460035, + "rewards/margins": 0.05959225445985794, + "rewards/rejected": -0.06669843941926956, + "step": 1110 + }, + { + "epoch": 0.7683264177040111, + "grad_norm": 3.4419846534729004, + "learning_rate": 3.841632088520056e-05, + "log_odds_chosen": 2.8391995429992676, + "log_odds_ratio": -0.2667177617549896, + "logits/chosen": -0.47049030661582947, + "logits/rejected": -0.4641495943069458, + "logps/chosen": -0.15538278222084045, + "logps/rejected": -0.5239760279655457, + "loss": 3.7415, + "nll_loss": 0.9087094068527222, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.015538278967142105, + "rewards/margins": 0.03685932606458664, + "rewards/rejected": -0.052397601306438446, + "step": 1111 + }, + { + "epoch": 0.7690179806362379, + "grad_norm": 4.275797367095947, + "learning_rate": 3.84508990318119e-05, + "log_odds_chosen": 1.1996444463729858, + "log_odds_ratio": -0.38345867395401, + "logits/chosen": -0.4088421165943146, + "logits/rejected": -0.4383625388145447, + "logps/chosen": -0.14941827952861786, + "logps/rejected": -0.30568715929985046, + "loss": 4.1739, + "nll_loss": 1.005133032798767, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.014941826462745667, + "rewards/margins": 0.01562688872218132, + "rewards/rejected": -0.030568715184926987, + "step": 1112 + }, + { + "epoch": 0.7697095435684648, + "grad_norm": 4.479307651519775, + "learning_rate": 3.848547717842324e-05, + "log_odds_chosen": 2.365272283554077, + "log_odds_ratio": -0.3554462790489197, + "logits/chosen": -0.8425488471984863, + "logits/rejected": -0.8144983053207397, + "logps/chosen": -0.1148102805018425, + "logps/rejected": -0.4265750050544739, + "loss": 4.7071, + "nll_loss": 1.1412395238876343, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01148102805018425, + "rewards/margins": 0.031176473945379257, + "rewards/rejected": -0.042657505720853806, + "step": 1113 + }, + { + "epoch": 0.7704011065006916, + "grad_norm": 4.078261852264404, + "learning_rate": 3.852005532503458e-05, + "log_odds_chosen": 2.991490602493286, + "log_odds_ratio": -0.22407673299312592, + "logits/chosen": -0.5358573198318481, + "logits/rejected": -0.5682927370071411, + "logps/chosen": -0.10035638511180878, + "logps/rejected": -0.6249147057533264, + "loss": 3.4856, + "nll_loss": 0.848996639251709, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010035638697445393, + "rewards/margins": 0.052455835044384, + "rewards/rejected": -0.06249146908521652, + "step": 1114 + }, + { + "epoch": 0.7710926694329184, + "grad_norm": 4.2970662117004395, + "learning_rate": 3.8554633471645925e-05, + "log_odds_chosen": 1.0146849155426025, + "log_odds_ratio": -0.46182528138160706, + "logits/chosen": -0.7279335856437683, + "logits/rejected": -0.7823548316955566, + "logps/chosen": -0.16933849453926086, + "logps/rejected": -0.41459232568740845, + "loss": 4.9735, + "nll_loss": 1.1972006559371948, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.016933850944042206, + "rewards/margins": 0.02452538162469864, + "rewards/rejected": -0.041459232568740845, + "step": 1115 + }, + { + "epoch": 0.7717842323651453, + "grad_norm": 4.411706447601318, + "learning_rate": 3.8589211618257266e-05, + "log_odds_chosen": 4.425901412963867, + "log_odds_ratio": -0.20330864191055298, + "logits/chosen": -0.5573688745498657, + "logits/rejected": -0.6300353407859802, + "logps/chosen": -0.08963973075151443, + "logps/rejected": -0.8802515864372253, + "loss": 3.438, + "nll_loss": 0.8391615152359009, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008963974192738533, + "rewards/margins": 0.07906118780374527, + "rewards/rejected": -0.08802516013383865, + "step": 1116 + }, + { + "epoch": 0.7724757952973721, + "grad_norm": 4.105024814605713, + "learning_rate": 3.862378976486861e-05, + "log_odds_chosen": 3.935359001159668, + "log_odds_ratio": -0.12453159689903259, + "logits/chosen": -0.7120730876922607, + "logits/rejected": -0.7461434602737427, + "logps/chosen": -0.08041106164455414, + "logps/rejected": -0.8631589412689209, + "loss": 3.263, + "nll_loss": 0.8032896518707275, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008041106164455414, + "rewards/margins": 0.07827478647232056, + "rewards/rejected": -0.08631589263677597, + "step": 1117 + }, + { + "epoch": 0.7731673582295989, + "grad_norm": 3.715104579925537, + "learning_rate": 3.865836791147995e-05, + "log_odds_chosen": 3.0843594074249268, + "log_odds_ratio": -0.3770461082458496, + "logits/chosen": -0.47825729846954346, + "logits/rejected": -0.5002142190933228, + "logps/chosen": -0.12397563457489014, + "logps/rejected": -0.5420525074005127, + "loss": 3.286, + "nll_loss": 0.7838032245635986, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012397563084959984, + "rewards/margins": 0.041807692497968674, + "rewards/rejected": -0.05420524999499321, + "step": 1118 + }, + { + "epoch": 0.7738589211618258, + "grad_norm": 3.9022834300994873, + "learning_rate": 3.869294605809129e-05, + "log_odds_chosen": 4.696831703186035, + "log_odds_ratio": -0.1862536519765854, + "logits/chosen": -0.680198609828949, + "logits/rejected": -0.6572217345237732, + "logps/chosen": -0.023391971364617348, + "logps/rejected": -0.6749863624572754, + "loss": 3.9703, + "nll_loss": 0.9739567041397095, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002339197089895606, + "rewards/margins": 0.06515943259000778, + "rewards/rejected": -0.06749863177537918, + "step": 1119 + }, + { + "epoch": 0.7745504840940526, + "grad_norm": 3.49310040473938, + "learning_rate": 3.872752420470263e-05, + "log_odds_chosen": 3.1153817176818848, + "log_odds_ratio": -0.3262585699558258, + "logits/chosen": -0.4032193422317505, + "logits/rejected": -0.4326457679271698, + "logps/chosen": -0.2284383922815323, + "logps/rejected": -0.5586980581283569, + "loss": 4.6027, + "nll_loss": 1.1180399656295776, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.02284383960068226, + "rewards/margins": 0.033025968819856644, + "rewards/rejected": -0.05586981028318405, + "step": 1120 + }, + { + "epoch": 0.7752420470262794, + "grad_norm": 4.8784637451171875, + "learning_rate": 3.8762102351313974e-05, + "log_odds_chosen": 1.9118473529815674, + "log_odds_ratio": -0.49750471115112305, + "logits/chosen": -0.6266558170318604, + "logits/rejected": -0.6750451326370239, + "logps/chosen": -0.19696509838104248, + "logps/rejected": -0.567264199256897, + "loss": 4.9556, + "nll_loss": 1.1891417503356934, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.019696509465575218, + "rewards/margins": 0.03702991455793381, + "rewards/rejected": -0.056726425886154175, + "step": 1121 + }, + { + "epoch": 0.7759336099585062, + "grad_norm": 4.29984712600708, + "learning_rate": 3.8796680497925316e-05, + "log_odds_chosen": 3.8260724544525146, + "log_odds_ratio": -0.5127156972885132, + "logits/chosen": -0.7006309032440186, + "logits/rejected": -0.6704519391059875, + "logps/chosen": -0.26238536834716797, + "logps/rejected": -0.6787563562393188, + "loss": 3.134, + "nll_loss": 0.7322263717651367, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.026238534599542618, + "rewards/margins": 0.04163710027933121, + "rewards/rejected": -0.06787563860416412, + "step": 1122 + }, + { + "epoch": 0.7766251728907331, + "grad_norm": 3.7785251140594482, + "learning_rate": 3.883125864453666e-05, + "log_odds_chosen": 3.391031265258789, + "log_odds_ratio": -0.25807151198387146, + "logits/chosen": -0.5419265031814575, + "logits/rejected": -0.5877819061279297, + "logps/chosen": -0.06500020623207092, + "logps/rejected": -0.5630817413330078, + "loss": 4.2755, + "nll_loss": 1.043055772781372, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006500020623207092, + "rewards/margins": 0.049808159470558167, + "rewards/rejected": -0.05630818009376526, + "step": 1123 + }, + { + "epoch": 0.7773167358229599, + "grad_norm": 4.714626312255859, + "learning_rate": 3.8865836791148e-05, + "log_odds_chosen": 1.7221829891204834, + "log_odds_ratio": -0.3858616054058075, + "logits/chosen": -0.7497819662094116, + "logits/rejected": -0.7688895463943481, + "logps/chosen": -0.12314295023679733, + "logps/rejected": -0.5708560347557068, + "loss": 4.1738, + "nll_loss": 1.0048553943634033, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.012314295396208763, + "rewards/margins": 0.04477131739258766, + "rewards/rejected": -0.05708560720086098, + "step": 1124 + }, + { + "epoch": 0.7780082987551867, + "grad_norm": 4.4347004890441895, + "learning_rate": 3.890041493775934e-05, + "log_odds_chosen": 3.8225934505462646, + "log_odds_ratio": -0.278799831867218, + "logits/chosen": -0.5871791839599609, + "logits/rejected": -0.6095415353775024, + "logps/chosen": -0.05089031159877777, + "logps/rejected": -0.5935240387916565, + "loss": 3.8368, + "nll_loss": 0.9313230514526367, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005089031532406807, + "rewards/margins": 0.05426337197422981, + "rewards/rejected": -0.05935240536928177, + "step": 1125 + }, + { + "epoch": 0.7786998616874136, + "grad_norm": 5.964032173156738, + "learning_rate": 3.893499308437068e-05, + "log_odds_chosen": 1.3911092281341553, + "log_odds_ratio": -0.6403376460075378, + "logits/chosen": -0.45489662885665894, + "logits/rejected": -0.45509597659111023, + "logps/chosen": -0.20690639317035675, + "logps/rejected": -0.44346410036087036, + "loss": 4.6253, + "nll_loss": 1.0922995805740356, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.020690638571977615, + "rewards/margins": 0.02365577220916748, + "rewards/rejected": -0.0443464070558548, + "step": 1126 + }, + { + "epoch": 0.7793914246196404, + "grad_norm": 4.736138820648193, + "learning_rate": 3.896957123098202e-05, + "log_odds_chosen": 1.951234221458435, + "log_odds_ratio": -0.2717779278755188, + "logits/chosen": -0.4384583830833435, + "logits/rejected": -0.45516031980514526, + "logps/chosen": -0.180232971906662, + "logps/rejected": -0.49474045634269714, + "loss": 4.4218, + "nll_loss": 1.0782605409622192, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0180232971906662, + "rewards/margins": 0.031450752168893814, + "rewards/rejected": -0.04947404935956001, + "step": 1127 + }, + { + "epoch": 0.7800829875518672, + "grad_norm": 4.4454121589660645, + "learning_rate": 3.9004149377593365e-05, + "log_odds_chosen": 0.8737515211105347, + "log_odds_ratio": -0.48570409417152405, + "logits/chosen": -0.5742661952972412, + "logits/rejected": -0.6075069308280945, + "logps/chosen": -0.19278815388679504, + "logps/rejected": -0.38309305906295776, + "loss": 3.8146, + "nll_loss": 0.9050906896591187, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.019278815016150475, + "rewards/margins": 0.019030490890145302, + "rewards/rejected": -0.038309305906295776, + "step": 1128 + }, + { + "epoch": 0.780774550484094, + "grad_norm": 3.730633497238159, + "learning_rate": 3.9038727524204706e-05, + "log_odds_chosen": 4.613859176635742, + "log_odds_ratio": -0.2876426875591278, + "logits/chosen": -0.618686318397522, + "logits/rejected": -0.630027711391449, + "logps/chosen": -0.0700341984629631, + "logps/rejected": -0.9240524172782898, + "loss": 3.7821, + "nll_loss": 0.9167693853378296, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007003419566899538, + "rewards/margins": 0.08540181815624237, + "rewards/rejected": -0.09240524470806122, + "step": 1129 + }, + { + "epoch": 0.7814661134163209, + "grad_norm": 3.9620399475097656, + "learning_rate": 3.907330567081605e-05, + "log_odds_chosen": 5.74013614654541, + "log_odds_ratio": -0.04481692984700203, + "logits/chosen": -0.5398914813995361, + "logits/rejected": -0.6123236417770386, + "logps/chosen": -0.020203545689582825, + "logps/rejected": -0.8377431631088257, + "loss": 3.3303, + "nll_loss": 0.8280977010726929, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002020354615524411, + "rewards/margins": 0.08175395429134369, + "rewards/rejected": -0.08377431333065033, + "step": 1130 + }, + { + "epoch": 0.7821576763485477, + "grad_norm": 4.754410743713379, + "learning_rate": 3.910788381742739e-05, + "log_odds_chosen": 3.7495689392089844, + "log_odds_ratio": -0.6508976221084595, + "logits/chosen": -0.24432966113090515, + "logits/rejected": -0.3001052737236023, + "logps/chosen": -0.12211127579212189, + "logps/rejected": -0.8259966969490051, + "loss": 3.4209, + "nll_loss": 0.7901304960250854, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012211127206683159, + "rewards/margins": 0.0703885406255722, + "rewards/rejected": -0.08259966969490051, + "step": 1131 + }, + { + "epoch": 0.7828492392807745, + "grad_norm": 5.5947771072387695, + "learning_rate": 3.914246196403873e-05, + "log_odds_chosen": 2.722187042236328, + "log_odds_ratio": -0.1351226270198822, + "logits/chosen": -0.27186745405197144, + "logits/rejected": -0.29344063997268677, + "logps/chosen": -0.045510705560445786, + "logps/rejected": -0.46628302335739136, + "loss": 4.7851, + "nll_loss": 1.1827595233917236, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0045510707423090935, + "rewards/margins": 0.042077235877513885, + "rewards/rejected": -0.04662831127643585, + "step": 1132 + }, + { + "epoch": 0.7835408022130014, + "grad_norm": 4.867863178253174, + "learning_rate": 3.917704011065007e-05, + "log_odds_chosen": 5.198690891265869, + "log_odds_ratio": -0.270771861076355, + "logits/chosen": -0.4652605950832367, + "logits/rejected": -0.4993474781513214, + "logps/chosen": -0.16884656250476837, + "logps/rejected": -0.8110989332199097, + "loss": 3.881, + "nll_loss": 0.9431832432746887, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.016884656623005867, + "rewards/margins": 0.06422524154186249, + "rewards/rejected": -0.0811098963022232, + "step": 1133 + }, + { + "epoch": 0.7842323651452282, + "grad_norm": 4.890600681304932, + "learning_rate": 3.9211618257261414e-05, + "log_odds_chosen": 0.5457233786582947, + "log_odds_ratio": -0.7434303760528564, + "logits/chosen": -0.41072139143943787, + "logits/rejected": -0.45201027393341064, + "logps/chosen": -0.30787140130996704, + "logps/rejected": -0.31583571434020996, + "loss": 4.3617, + "nll_loss": 1.016086459159851, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.030787140130996704, + "rewards/margins": 0.0007964321412146091, + "rewards/rejected": -0.03158356994390488, + "step": 1134 + }, + { + "epoch": 0.784923928077455, + "grad_norm": 3.413855791091919, + "learning_rate": 3.9246196403872756e-05, + "log_odds_chosen": 3.549720287322998, + "log_odds_ratio": -0.19492925703525543, + "logits/chosen": -0.4394097328186035, + "logits/rejected": -0.4626482427120209, + "logps/chosen": -0.14576005935668945, + "logps/rejected": -0.8169733881950378, + "loss": 2.9895, + "nll_loss": 0.7278827428817749, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014576006680727005, + "rewards/margins": 0.06712133437395096, + "rewards/rejected": -0.08169733732938766, + "step": 1135 + }, + { + "epoch": 0.7856154910096819, + "grad_norm": 5.547590255737305, + "learning_rate": 3.92807745504841e-05, + "log_odds_chosen": 2.836033344268799, + "log_odds_ratio": -0.4607436954975128, + "logits/chosen": -0.47262442111968994, + "logits/rejected": -0.46149805188179016, + "logps/chosen": -0.2260802686214447, + "logps/rejected": -0.8693596124649048, + "loss": 4.2696, + "nll_loss": 1.0213253498077393, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.02260802686214447, + "rewards/margins": 0.06432792544364929, + "rewards/rejected": -0.08693595230579376, + "step": 1136 + }, + { + "epoch": 0.7863070539419087, + "grad_norm": 6.455709934234619, + "learning_rate": 3.931535269709544e-05, + "log_odds_chosen": 4.0131916999816895, + "log_odds_ratio": -0.9443694353103638, + "logits/chosen": -0.1703692525625229, + "logits/rejected": -0.21043118834495544, + "logps/chosen": -0.19897066056728363, + "logps/rejected": -0.6492530703544617, + "loss": 3.9973, + "nll_loss": 0.9048901200294495, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.019897066056728363, + "rewards/margins": 0.04502824321389198, + "rewards/rejected": -0.06492530554533005, + "step": 1137 + }, + { + "epoch": 0.7869986168741355, + "grad_norm": 4.839828014373779, + "learning_rate": 3.934993084370678e-05, + "log_odds_chosen": 2.5431089401245117, + "log_odds_ratio": -0.36189424991607666, + "logits/chosen": -0.45434796810150146, + "logits/rejected": -0.48614394664764404, + "logps/chosen": -0.12932661175727844, + "logps/rejected": -0.487295538187027, + "loss": 4.8545, + "nll_loss": 1.177430510520935, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012932661920785904, + "rewards/margins": 0.035796891897916794, + "rewards/rejected": -0.0487295538187027, + "step": 1138 + }, + { + "epoch": 0.7876901798063624, + "grad_norm": 6.385467052459717, + "learning_rate": 3.938450899031812e-05, + "log_odds_chosen": 0.4432275593280792, + "log_odds_ratio": -0.8290820717811584, + "logits/chosen": -0.5175051093101501, + "logits/rejected": -0.5431155562400818, + "logps/chosen": -0.18120020627975464, + "logps/rejected": -0.2309972047805786, + "loss": 5.9582, + "nll_loss": 1.4066460132598877, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.018120020627975464, + "rewards/margins": 0.004979700315743685, + "rewards/rejected": -0.02309972234070301, + "step": 1139 + }, + { + "epoch": 0.7883817427385892, + "grad_norm": 3.1535542011260986, + "learning_rate": 3.9419087136929464e-05, + "log_odds_chosen": 2.2779955863952637, + "log_odds_ratio": -0.40133097767829895, + "logits/chosen": -0.2673976421356201, + "logits/rejected": -0.325452595949173, + "logps/chosen": -0.09485425055027008, + "logps/rejected": -0.4404667019844055, + "loss": 3.3564, + "nll_loss": 0.7989755868911743, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009485425427556038, + "rewards/margins": 0.03456124663352966, + "rewards/rejected": -0.04404667019844055, + "step": 1140 + }, + { + "epoch": 0.789073305670816, + "grad_norm": 6.922438621520996, + "learning_rate": 3.9453665283540805e-05, + "log_odds_chosen": 1.6342426538467407, + "log_odds_ratio": -0.5387409925460815, + "logits/chosen": -0.34957778453826904, + "logits/rejected": -0.3884001076221466, + "logps/chosen": -0.12367647886276245, + "logps/rejected": -0.31333163380622864, + "loss": 4.4978, + "nll_loss": 1.0705667734146118, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.012367649003863335, + "rewards/margins": 0.01896551437675953, + "rewards/rejected": -0.031333163380622864, + "step": 1141 + }, + { + "epoch": 0.7897648686030428, + "grad_norm": 4.580284118652344, + "learning_rate": 3.948824343015215e-05, + "log_odds_chosen": 1.7318023443222046, + "log_odds_ratio": -0.6523119807243347, + "logits/chosen": -0.6493903994560242, + "logits/rejected": -0.6705228686332703, + "logps/chosen": -0.27369049191474915, + "logps/rejected": -0.6649531722068787, + "loss": 4.2986, + "nll_loss": 1.0094248056411743, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.027369048446416855, + "rewards/margins": 0.03912627696990967, + "rewards/rejected": -0.06649532169103622, + "step": 1142 + }, + { + "epoch": 0.7904564315352697, + "grad_norm": 5.137827396392822, + "learning_rate": 3.952282157676349e-05, + "log_odds_chosen": 2.366032600402832, + "log_odds_ratio": -0.4727362394332886, + "logits/chosen": -0.20692262053489685, + "logits/rejected": -0.2002687156200409, + "logps/chosen": -0.1281823217868805, + "logps/rejected": -0.4092061221599579, + "loss": 3.2961, + "nll_loss": 0.7767484188079834, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012818234041333199, + "rewards/margins": 0.02810238115489483, + "rewards/rejected": -0.04092061519622803, + "step": 1143 + }, + { + "epoch": 0.7911479944674965, + "grad_norm": 4.259830474853516, + "learning_rate": 3.955739972337483e-05, + "log_odds_chosen": 3.2707371711730957, + "log_odds_ratio": -0.2883046269416809, + "logits/chosen": -0.1852089762687683, + "logits/rejected": -0.2368299812078476, + "logps/chosen": -0.1016981303691864, + "logps/rejected": -0.8447484970092773, + "loss": 2.9771, + "nll_loss": 0.7154471278190613, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010169814340770245, + "rewards/margins": 0.07430503517389297, + "rewards/rejected": -0.0844748467206955, + "step": 1144 + }, + { + "epoch": 0.7918395573997233, + "grad_norm": 3.689413070678711, + "learning_rate": 3.959197786998617e-05, + "log_odds_chosen": 3.4060113430023193, + "log_odds_ratio": -0.25230181217193604, + "logits/chosen": -0.25829124450683594, + "logits/rejected": -0.25649240612983704, + "logps/chosen": -0.08521488308906555, + "logps/rejected": -0.5981854796409607, + "loss": 3.8064, + "nll_loss": 0.9263700246810913, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0085214888677001, + "rewards/margins": 0.05129706487059593, + "rewards/rejected": -0.05981855094432831, + "step": 1145 + }, + { + "epoch": 0.7925311203319502, + "grad_norm": 5.098305702209473, + "learning_rate": 3.962655601659751e-05, + "log_odds_chosen": 2.6655032634735107, + "log_odds_ratio": -0.24669036269187927, + "logits/chosen": -0.4905535578727722, + "logits/rejected": -0.5256434082984924, + "logps/chosen": -0.10529651492834091, + "logps/rejected": -0.5505136847496033, + "loss": 4.5956, + "nll_loss": 1.1242369413375854, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010529652237892151, + "rewards/margins": 0.04452171549201012, + "rewards/rejected": -0.05505136772990227, + "step": 1146 + }, + { + "epoch": 0.793222683264177, + "grad_norm": 3.806218385696411, + "learning_rate": 3.9661134163208855e-05, + "log_odds_chosen": 3.110368251800537, + "log_odds_ratio": -0.30972862243652344, + "logits/chosen": -0.15990647673606873, + "logits/rejected": -0.1701805144548416, + "logps/chosen": -0.10173983126878738, + "logps/rejected": -0.4115472435951233, + "loss": 3.4048, + "nll_loss": 0.8202356696128845, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010173983871936798, + "rewards/margins": 0.03098073974251747, + "rewards/rejected": -0.04115472361445427, + "step": 1147 + }, + { + "epoch": 0.7939142461964038, + "grad_norm": 3.248661994934082, + "learning_rate": 3.9695712309820196e-05, + "log_odds_chosen": 4.2546844482421875, + "log_odds_ratio": -0.14933958649635315, + "logits/chosen": -0.2126917541027069, + "logits/rejected": -0.26696938276290894, + "logps/chosen": -0.02835908532142639, + "logps/rejected": -0.4520314931869507, + "loss": 3.2796, + "nll_loss": 0.8049613237380981, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0028359086718410254, + "rewards/margins": 0.04236724227666855, + "rewards/rejected": -0.04520314931869507, + "step": 1148 + }, + { + "epoch": 0.7946058091286307, + "grad_norm": 4.2895588874816895, + "learning_rate": 3.973029045643154e-05, + "log_odds_chosen": 1.050495982170105, + "log_odds_ratio": -0.6158413290977478, + "logits/chosen": -0.6027485728263855, + "logits/rejected": -0.5986880660057068, + "logps/chosen": -0.17323151230812073, + "logps/rejected": -0.3859601616859436, + "loss": 3.2588, + "nll_loss": 0.7531127333641052, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.017323151230812073, + "rewards/margins": 0.021272864192724228, + "rewards/rejected": -0.0385960191488266, + "step": 1149 + }, + { + "epoch": 0.7952973720608575, + "grad_norm": 3.7696895599365234, + "learning_rate": 3.976486860304288e-05, + "log_odds_chosen": 3.539609909057617, + "log_odds_ratio": -0.25380614399909973, + "logits/chosen": -0.3072446584701538, + "logits/rejected": -0.3367466628551483, + "logps/chosen": -0.07568614184856415, + "logps/rejected": -0.502180814743042, + "loss": 3.3981, + "nll_loss": 0.824151337146759, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007568614557385445, + "rewards/margins": 0.042649466544389725, + "rewards/rejected": -0.05021808296442032, + "step": 1150 + }, + { + "epoch": 0.7959889349930843, + "grad_norm": 3.330585241317749, + "learning_rate": 3.979944674965422e-05, + "log_odds_chosen": 2.6073431968688965, + "log_odds_ratio": -0.2579598128795624, + "logits/chosen": -0.23022376000881195, + "logits/rejected": -0.26469650864601135, + "logps/chosen": -0.0766703188419342, + "logps/rejected": -0.3181542754173279, + "loss": 4.0204, + "nll_loss": 0.9793111085891724, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007667032536119223, + "rewards/margins": 0.024148397147655487, + "rewards/rejected": -0.03181542828679085, + "step": 1151 + }, + { + "epoch": 0.7966804979253111, + "grad_norm": 4.687983512878418, + "learning_rate": 3.983402489626556e-05, + "log_odds_chosen": 1.84706449508667, + "log_odds_ratio": -0.33897465467453003, + "logits/chosen": -0.7606194019317627, + "logits/rejected": -0.7207037210464478, + "logps/chosen": -0.12923657894134521, + "logps/rejected": -0.43369144201278687, + "loss": 5.1788, + "nll_loss": 1.2607989311218262, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.012923657894134521, + "rewards/margins": 0.030445488169789314, + "rewards/rejected": -0.043369147926568985, + "step": 1152 + }, + { + "epoch": 0.7973720608575381, + "grad_norm": 6.274543762207031, + "learning_rate": 3.9868603042876904e-05, + "log_odds_chosen": 0.8526477217674255, + "log_odds_ratio": -0.5951778888702393, + "logits/chosen": -0.20541231334209442, + "logits/rejected": -0.23336751759052277, + "logps/chosen": -0.19507427513599396, + "logps/rejected": -0.3879839777946472, + "loss": 3.8535, + "nll_loss": 0.9038695096969604, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.019507426768541336, + "rewards/margins": 0.019290970638394356, + "rewards/rejected": -0.03879839554429054, + "step": 1153 + }, + { + "epoch": 0.7980636237897649, + "grad_norm": 4.864806652069092, + "learning_rate": 3.9903181189488246e-05, + "log_odds_chosen": 2.4882702827453613, + "log_odds_ratio": -0.3174005150794983, + "logits/chosen": -0.8158073425292969, + "logits/rejected": -0.8330868482589722, + "logps/chosen": -0.09785200655460358, + "logps/rejected": -0.4712101221084595, + "loss": 4.5925, + "nll_loss": 1.1163914203643799, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009785201400518417, + "rewards/margins": 0.03733580932021141, + "rewards/rejected": -0.04712101072072983, + "step": 1154 + }, + { + "epoch": 0.7987551867219918, + "grad_norm": 4.701379776000977, + "learning_rate": 3.993775933609959e-05, + "log_odds_chosen": 2.112551689147949, + "log_odds_ratio": -0.37846639752388, + "logits/chosen": -0.275676965713501, + "logits/rejected": -0.2844288647174835, + "logps/chosen": -0.12924633920192719, + "logps/rejected": -0.33982053399086, + "loss": 4.3999, + "nll_loss": 1.0621216297149658, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012924633920192719, + "rewards/margins": 0.02105741947889328, + "rewards/rejected": -0.033982053399086, + "step": 1155 + }, + { + "epoch": 0.7994467496542186, + "grad_norm": 4.104145526885986, + "learning_rate": 3.997233748271093e-05, + "log_odds_chosen": 0.3516320288181305, + "log_odds_ratio": -0.6141179800033569, + "logits/chosen": -0.4469658136367798, + "logits/rejected": -0.468797504901886, + "logps/chosen": -0.20192989706993103, + "logps/rejected": -0.29918792843818665, + "loss": 4.2525, + "nll_loss": 1.0017073154449463, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.020192990079522133, + "rewards/margins": 0.009725801646709442, + "rewards/rejected": -0.029918791726231575, + "step": 1156 + }, + { + "epoch": 0.8001383125864454, + "grad_norm": 3.982905864715576, + "learning_rate": 4.000691562932227e-05, + "log_odds_chosen": 2.0321569442749023, + "log_odds_ratio": -0.38790351152420044, + "logits/chosen": -0.2542264461517334, + "logits/rejected": -0.23322290182113647, + "logps/chosen": -0.2226470708847046, + "logps/rejected": -0.647704541683197, + "loss": 3.7577, + "nll_loss": 0.9006452560424805, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.02226470783352852, + "rewards/margins": 0.04250574856996536, + "rewards/rejected": -0.06477045267820358, + "step": 1157 + }, + { + "epoch": 0.8008298755186722, + "grad_norm": 4.697607040405273, + "learning_rate": 4.004149377593361e-05, + "log_odds_chosen": 1.7568845748901367, + "log_odds_ratio": -0.5033901333808899, + "logits/chosen": -0.6665538549423218, + "logits/rejected": -0.6717506647109985, + "logps/chosen": -0.11687202751636505, + "logps/rejected": -0.410500168800354, + "loss": 4.2899, + "nll_loss": 1.0221455097198486, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.011687202379107475, + "rewards/margins": 0.029362818226218224, + "rewards/rejected": -0.0410500168800354, + "step": 1158 + }, + { + "epoch": 0.8015214384508991, + "grad_norm": 4.458200454711914, + "learning_rate": 4.007607192254495e-05, + "log_odds_chosen": 2.649742603302002, + "log_odds_ratio": -0.2814682722091675, + "logits/chosen": -0.3529450297355652, + "logits/rejected": -0.32280710339546204, + "logps/chosen": -0.13715879619121552, + "logps/rejected": -0.5732930302619934, + "loss": 3.8533, + "nll_loss": 0.9351730346679688, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01371588185429573, + "rewards/margins": 0.04361342638731003, + "rewards/rejected": -0.05732930451631546, + "step": 1159 + }, + { + "epoch": 0.8022130013831259, + "grad_norm": 5.326199054718018, + "learning_rate": 4.0110650069156295e-05, + "log_odds_chosen": 1.607958436012268, + "log_odds_ratio": -1.345959186553955, + "logits/chosen": -0.11324809491634369, + "logits/rejected": -0.07060239464044571, + "logps/chosen": -0.27737849950790405, + "logps/rejected": -0.5359267592430115, + "loss": 3.6059, + "nll_loss": 0.7668741941452026, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.027737848460674286, + "rewards/margins": 0.02585482969880104, + "rewards/rejected": -0.05359267443418503, + "step": 1160 + }, + { + "epoch": 0.8029045643153527, + "grad_norm": 4.502684116363525, + "learning_rate": 4.0145228215767636e-05, + "log_odds_chosen": 2.724449872970581, + "log_odds_ratio": -0.3041455149650574, + "logits/chosen": -0.10399705171585083, + "logits/rejected": -0.0837821215391159, + "logps/chosen": -0.07750361412763596, + "logps/rejected": -0.46520230174064636, + "loss": 3.4773, + "nll_loss": 0.8389115333557129, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007750361226499081, + "rewards/margins": 0.03876987099647522, + "rewards/rejected": -0.046520233154296875, + "step": 1161 + }, + { + "epoch": 0.8035961272475796, + "grad_norm": 4.772683620452881, + "learning_rate": 4.017980636237898e-05, + "log_odds_chosen": 2.803502321243286, + "log_odds_ratio": -0.5766577124595642, + "logits/chosen": -0.05678505823016167, + "logits/rejected": -0.0759805291891098, + "logps/chosen": -0.17404711246490479, + "logps/rejected": -0.5554315447807312, + "loss": 3.6957, + "nll_loss": 0.8662543296813965, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.017404712736606598, + "rewards/margins": 0.038138438016176224, + "rewards/rejected": -0.05554314702749252, + "step": 1162 + }, + { + "epoch": 0.8042876901798064, + "grad_norm": 3.36564564704895, + "learning_rate": 4.021438450899032e-05, + "log_odds_chosen": 2.6950929164886475, + "log_odds_ratio": -0.3946307301521301, + "logits/chosen": -0.3952489197254181, + "logits/rejected": -0.41110169887542725, + "logps/chosen": -0.13186201453208923, + "logps/rejected": -0.4083203077316284, + "loss": 3.2633, + "nll_loss": 0.7763731479644775, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013186202384531498, + "rewards/margins": 0.027645830065011978, + "rewards/rejected": -0.0408320352435112, + "step": 1163 + }, + { + "epoch": 0.8049792531120332, + "grad_norm": 5.37779426574707, + "learning_rate": 4.024896265560166e-05, + "log_odds_chosen": 1.5617663860321045, + "log_odds_ratio": -0.6627476215362549, + "logits/chosen": -0.3280797600746155, + "logits/rejected": -0.38121557235717773, + "logps/chosen": -0.26118674874305725, + "logps/rejected": -0.4342179000377655, + "loss": 4.6767, + "nll_loss": 1.1028947830200195, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.026118673384189606, + "rewards/margins": 0.017303116619586945, + "rewards/rejected": -0.04342179000377655, + "step": 1164 + }, + { + "epoch": 0.80567081604426, + "grad_norm": 5.427413463592529, + "learning_rate": 4.0283540802213e-05, + "log_odds_chosen": 2.0146398544311523, + "log_odds_ratio": -0.37571975588798523, + "logits/chosen": -0.5844836235046387, + "logits/rejected": -0.6303712725639343, + "logps/chosen": -0.13081598281860352, + "logps/rejected": -0.4544587731361389, + "loss": 3.4184, + "nll_loss": 0.8170157670974731, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013081599026918411, + "rewards/margins": 0.03236427530646324, + "rewards/rejected": -0.04544587433338165, + "step": 1165 + }, + { + "epoch": 0.8063623789764869, + "grad_norm": 5.316728591918945, + "learning_rate": 4.0318118948824344e-05, + "log_odds_chosen": 1.9966132640838623, + "log_odds_ratio": -0.5896454453468323, + "logits/chosen": -0.23118741810321808, + "logits/rejected": -0.19517435133457184, + "logps/chosen": -0.19767743349075317, + "logps/rejected": -0.36030542850494385, + "loss": 3.8185, + "nll_loss": 0.8956526517868042, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.019767742604017258, + "rewards/margins": 0.016262799501419067, + "rewards/rejected": -0.036030545830726624, + "step": 1166 + }, + { + "epoch": 0.8070539419087137, + "grad_norm": 5.237853050231934, + "learning_rate": 4.0352697095435686e-05, + "log_odds_chosen": 2.7189266681671143, + "log_odds_ratio": -0.45107319951057434, + "logits/chosen": -0.3652513921260834, + "logits/rejected": -0.3933391571044922, + "logps/chosen": -0.16551122069358826, + "logps/rejected": -0.4190428555011749, + "loss": 3.4939, + "nll_loss": 0.8283703327178955, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.016551122069358826, + "rewards/margins": 0.025353163480758667, + "rewards/rejected": -0.04190428555011749, + "step": 1167 + }, + { + "epoch": 0.8077455048409405, + "grad_norm": 3.3878939151763916, + "learning_rate": 4.038727524204703e-05, + "log_odds_chosen": 4.432202339172363, + "log_odds_ratio": -0.1157660111784935, + "logits/chosen": -0.4019083082675934, + "logits/rejected": -0.39598947763442993, + "logps/chosen": -0.0686732605099678, + "logps/rejected": -0.6455079913139343, + "loss": 3.2299, + "nll_loss": 0.7959014773368835, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0068673258647322655, + "rewards/margins": 0.05768347159028053, + "rewards/rejected": -0.06455080211162567, + "step": 1168 + }, + { + "epoch": 0.8084370677731674, + "grad_norm": 5.72116756439209, + "learning_rate": 4.042185338865837e-05, + "log_odds_chosen": 2.8803868293762207, + "log_odds_ratio": -0.2445899397134781, + "logits/chosen": -0.35858699679374695, + "logits/rejected": -0.4009200632572174, + "logps/chosen": -0.1210114136338234, + "logps/rejected": -0.600544810295105, + "loss": 4.0163, + "nll_loss": 0.9796262979507446, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01210114173591137, + "rewards/margins": 0.04795333743095398, + "rewards/rejected": -0.0600544810295105, + "step": 1169 + }, + { + "epoch": 0.8091286307053942, + "grad_norm": 4.961204528808594, + "learning_rate": 4.045643153526971e-05, + "log_odds_chosen": 1.2866220474243164, + "log_odds_ratio": -0.6377352476119995, + "logits/chosen": -0.7465688586235046, + "logits/rejected": -0.7543196678161621, + "logps/chosen": -0.20632582902908325, + "logps/rejected": -0.4361448287963867, + "loss": 4.3729, + "nll_loss": 1.029453992843628, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.020632583647966385, + "rewards/margins": 0.022981898859143257, + "rewards/rejected": -0.04361448436975479, + "step": 1170 + }, + { + "epoch": 0.809820193637621, + "grad_norm": 3.8165488243103027, + "learning_rate": 4.049100968188105e-05, + "log_odds_chosen": 1.9424272775650024, + "log_odds_ratio": -0.5050072073936462, + "logits/chosen": -0.6732468605041504, + "logits/rejected": -0.657630205154419, + "logps/chosen": -0.14212819933891296, + "logps/rejected": -0.31241375207901, + "loss": 3.3255, + "nll_loss": 0.7808791399002075, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01421282161027193, + "rewards/margins": 0.017028555274009705, + "rewards/rejected": -0.03124137595295906, + "step": 1171 + }, + { + "epoch": 0.8105117565698479, + "grad_norm": 4.526830673217773, + "learning_rate": 4.0525587828492394e-05, + "log_odds_chosen": 3.6338460445404053, + "log_odds_ratio": -0.3146744966506958, + "logits/chosen": -0.6114726662635803, + "logits/rejected": -0.6514254808425903, + "logps/chosen": -0.10209785401821136, + "logps/rejected": -0.5759584903717041, + "loss": 4.2295, + "nll_loss": 1.0259071588516235, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010209785774350166, + "rewards/margins": 0.04738606512546539, + "rewards/rejected": -0.05759584903717041, + "step": 1172 + }, + { + "epoch": 0.8112033195020747, + "grad_norm": 4.934037685394287, + "learning_rate": 4.0560165975103735e-05, + "log_odds_chosen": 1.4841629266738892, + "log_odds_ratio": -0.3995290696620941, + "logits/chosen": -0.682904839515686, + "logits/rejected": -0.7407668828964233, + "logps/chosen": -0.14081400632858276, + "logps/rejected": -0.4084875285625458, + "loss": 5.494, + "nll_loss": 1.3335497379302979, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.014081399887800217, + "rewards/margins": 0.02676735632121563, + "rewards/rejected": -0.0408487543463707, + "step": 1173 + }, + { + "epoch": 0.8118948824343015, + "grad_norm": 4.349318981170654, + "learning_rate": 4.059474412171508e-05, + "log_odds_chosen": 1.8717788457870483, + "log_odds_ratio": -0.38216298818588257, + "logits/chosen": -0.5308178067207336, + "logits/rejected": -0.5366460680961609, + "logps/chosen": -0.10794338583946228, + "logps/rejected": -0.34007906913757324, + "loss": 4.0714, + "nll_loss": 0.9796421527862549, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.010794337838888168, + "rewards/margins": 0.023213567212224007, + "rewards/rejected": -0.034007906913757324, + "step": 1174 + }, + { + "epoch": 0.8125864453665284, + "grad_norm": 6.510798454284668, + "learning_rate": 4.062932226832642e-05, + "log_odds_chosen": 1.6442862749099731, + "log_odds_ratio": -0.40282881259918213, + "logits/chosen": -0.7126697301864624, + "logits/rejected": -0.7322872281074524, + "logps/chosen": -0.13879220187664032, + "logps/rejected": -0.5311157703399658, + "loss": 6.3191, + "nll_loss": 1.5394930839538574, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.013879221864044666, + "rewards/margins": 0.03923235461115837, + "rewards/rejected": -0.05311157554388046, + "step": 1175 + }, + { + "epoch": 0.8132780082987552, + "grad_norm": 3.506040096282959, + "learning_rate": 4.066390041493776e-05, + "log_odds_chosen": 5.572505950927734, + "log_odds_ratio": -0.1323142647743225, + "logits/chosen": 0.40928786993026733, + "logits/rejected": 0.3669503927230835, + "logps/chosen": -0.07105830311775208, + "logps/rejected": -0.9678250551223755, + "loss": 3.5214, + "nll_loss": 0.8671307563781738, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007105831056833267, + "rewards/margins": 0.08967668563127518, + "rewards/rejected": -0.09678251296281815, + "step": 1176 + }, + { + "epoch": 0.813969571230982, + "grad_norm": 4.736689567565918, + "learning_rate": 4.06984785615491e-05, + "log_odds_chosen": 2.2772843837738037, + "log_odds_ratio": -0.5770815014839172, + "logits/chosen": -0.677619218826294, + "logits/rejected": -0.6756543517112732, + "logps/chosen": -0.14869184792041779, + "logps/rejected": -0.6958863735198975, + "loss": 4.5143, + "nll_loss": 1.0708606243133545, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.014869185164570808, + "rewards/margins": 0.054719455540180206, + "rewards/rejected": -0.06958863884210587, + "step": 1177 + }, + { + "epoch": 0.8146611341632088, + "grad_norm": 5.372425556182861, + "learning_rate": 4.073305670816044e-05, + "log_odds_chosen": 1.6330846548080444, + "log_odds_ratio": -0.6535148620605469, + "logits/chosen": -0.4990033805370331, + "logits/rejected": -0.548502504825592, + "logps/chosen": -0.1866636574268341, + "logps/rejected": -0.5533938407897949, + "loss": 3.2122, + "nll_loss": 0.7376972436904907, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01866636611521244, + "rewards/margins": 0.03667302057147026, + "rewards/rejected": -0.05533938482403755, + "step": 1178 + }, + { + "epoch": 0.8153526970954357, + "grad_norm": 5.9537529945373535, + "learning_rate": 4.0767634854771785e-05, + "log_odds_chosen": 4.260707378387451, + "log_odds_ratio": -0.28442054986953735, + "logits/chosen": -0.19398483633995056, + "logits/rejected": -0.26974254846572876, + "logps/chosen": -0.1344795972108841, + "logps/rejected": -0.9466410875320435, + "loss": 4.0131, + "nll_loss": 0.9748204946517944, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.013447960838675499, + "rewards/margins": 0.08121615648269653, + "rewards/rejected": -0.09466411918401718, + "step": 1179 + }, + { + "epoch": 0.8160442600276625, + "grad_norm": 5.193739891052246, + "learning_rate": 4.0802213001383126e-05, + "log_odds_chosen": 1.9125900268554688, + "log_odds_ratio": -0.5270196795463562, + "logits/chosen": -0.29638856649398804, + "logits/rejected": -0.3578253984451294, + "logps/chosen": -0.17957088351249695, + "logps/rejected": -0.4569128155708313, + "loss": 4.2902, + "nll_loss": 1.0198581218719482, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.017957089468836784, + "rewards/margins": 0.027734192088246346, + "rewards/rejected": -0.04569128155708313, + "step": 1180 + }, + { + "epoch": 0.8167358229598893, + "grad_norm": 5.224289894104004, + "learning_rate": 4.083679114799447e-05, + "log_odds_chosen": 2.933523654937744, + "log_odds_ratio": -0.5448406934738159, + "logits/chosen": -0.3297680616378784, + "logits/rejected": -0.355973482131958, + "logps/chosen": -0.0969846174120903, + "logps/rejected": -0.5666028261184692, + "loss": 3.8264, + "nll_loss": 0.9021034836769104, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.00969846174120903, + "rewards/margins": 0.046961817890405655, + "rewards/rejected": -0.056660279631614685, + "step": 1181 + }, + { + "epoch": 0.8174273858921162, + "grad_norm": 3.4837048053741455, + "learning_rate": 4.087136929460581e-05, + "log_odds_chosen": 3.5890541076660156, + "log_odds_ratio": -0.33530038595199585, + "logits/chosen": 0.014912977814674377, + "logits/rejected": 0.02515430748462677, + "logps/chosen": -0.10411328077316284, + "logps/rejected": -0.4772682189941406, + "loss": 3.3812, + "nll_loss": 0.811762809753418, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010411329567432404, + "rewards/margins": 0.0373154915869236, + "rewards/rejected": -0.0477268248796463, + "step": 1182 + }, + { + "epoch": 0.818118948824343, + "grad_norm": 5.149781227111816, + "learning_rate": 4.090594744121715e-05, + "log_odds_chosen": 2.0537867546081543, + "log_odds_ratio": -0.48002955317497253, + "logits/chosen": -0.6776705980300903, + "logits/rejected": -0.7077109813690186, + "logps/chosen": -0.14051635563373566, + "logps/rejected": -0.6583629846572876, + "loss": 5.4693, + "nll_loss": 1.3193252086639404, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01405163574963808, + "rewards/margins": 0.051784664392471313, + "rewards/rejected": -0.06583630293607712, + "step": 1183 + }, + { + "epoch": 0.8188105117565698, + "grad_norm": 11.356409072875977, + "learning_rate": 4.094052558782849e-05, + "log_odds_chosen": 1.8903353214263916, + "log_odds_ratio": -0.8911072611808777, + "logits/chosen": -0.31842726469039917, + "logits/rejected": -0.4309629201889038, + "logps/chosen": -0.2517080307006836, + "logps/rejected": -0.5393136739730835, + "loss": 4.4805, + "nll_loss": 1.0310028791427612, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.02517080307006836, + "rewards/margins": 0.02876056358218193, + "rewards/rejected": -0.05393137037754059, + "step": 1184 + }, + { + "epoch": 0.8195020746887967, + "grad_norm": 4.158552646636963, + "learning_rate": 4.0975103734439834e-05, + "log_odds_chosen": 1.3254547119140625, + "log_odds_ratio": -0.41943395137786865, + "logits/chosen": -0.2344731092453003, + "logits/rejected": -0.25372275710105896, + "logps/chosen": -0.095099076628685, + "logps/rejected": -0.2825375199317932, + "loss": 3.0469, + "nll_loss": 0.719789981842041, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.00950990803539753, + "rewards/margins": 0.018743846565485, + "rewards/rejected": -0.02825375273823738, + "step": 1185 + }, + { + "epoch": 0.8201936376210235, + "grad_norm": 4.228912353515625, + "learning_rate": 4.1009681881051176e-05, + "log_odds_chosen": 1.841248631477356, + "log_odds_ratio": -0.42090776562690735, + "logits/chosen": -0.5038369297981262, + "logits/rejected": -0.577272891998291, + "logps/chosen": -0.3570842444896698, + "logps/rejected": -0.5847877264022827, + "loss": 4.0669, + "nll_loss": 0.9746286869049072, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.03570842370390892, + "rewards/margins": 0.02277034893631935, + "rewards/rejected": -0.05847877264022827, + "step": 1186 + }, + { + "epoch": 0.8208852005532503, + "grad_norm": 3.153090238571167, + "learning_rate": 4.104426002766252e-05, + "log_odds_chosen": 2.0451748371124268, + "log_odds_ratio": -0.3823314309120178, + "logits/chosen": -0.6597636342048645, + "logits/rejected": -0.6733765006065369, + "logps/chosen": -0.09847903251647949, + "logps/rejected": -0.280222624540329, + "loss": 3.309, + "nll_loss": 0.7890222072601318, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009847903624176979, + "rewards/margins": 0.01817435771226883, + "rewards/rejected": -0.02802225947380066, + "step": 1187 + }, + { + "epoch": 0.8215767634854771, + "grad_norm": 4.841866970062256, + "learning_rate": 4.107883817427386e-05, + "log_odds_chosen": 1.6958292722702026, + "log_odds_ratio": -0.3945018947124481, + "logits/chosen": -0.5676984190940857, + "logits/rejected": -0.5837230682373047, + "logps/chosen": -0.08179913461208344, + "logps/rejected": -0.3788478672504425, + "loss": 3.481, + "nll_loss": 0.8307971954345703, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008179914206266403, + "rewards/margins": 0.029704870656132698, + "rewards/rejected": -0.03788478672504425, + "step": 1188 + }, + { + "epoch": 0.822268326417704, + "grad_norm": 4.054405689239502, + "learning_rate": 4.11134163208852e-05, + "log_odds_chosen": 2.147653818130493, + "log_odds_ratio": -0.4769085645675659, + "logits/chosen": -0.5489739179611206, + "logits/rejected": -0.5161327123641968, + "logps/chosen": -0.17612148821353912, + "logps/rejected": -0.3499046266078949, + "loss": 4.3547, + "nll_loss": 1.0409873723983765, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.017612148076295853, + "rewards/margins": 0.017378315329551697, + "rewards/rejected": -0.03499046340584755, + "step": 1189 + }, + { + "epoch": 0.8229598893499308, + "grad_norm": 4.69357442855835, + "learning_rate": 4.114799446749654e-05, + "log_odds_chosen": 1.723274827003479, + "log_odds_ratio": -0.33592188358306885, + "logits/chosen": -0.20577648282051086, + "logits/rejected": -0.2108028531074524, + "logps/chosen": -0.11633970588445663, + "logps/rejected": -0.30491918325424194, + "loss": 2.9418, + "nll_loss": 0.7018574476242065, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011633969843387604, + "rewards/margins": 0.01885795034468174, + "rewards/rejected": -0.030491922050714493, + "step": 1190 + }, + { + "epoch": 0.8236514522821576, + "grad_norm": 6.797853946685791, + "learning_rate": 4.118257261410788e-05, + "log_odds_chosen": 2.888835906982422, + "log_odds_ratio": -0.3923826813697815, + "logits/chosen": -0.25407689809799194, + "logits/rejected": -0.2665678858757019, + "logps/chosen": -0.0858723446726799, + "logps/rejected": -0.7471990585327148, + "loss": 3.7509, + "nll_loss": 0.8984828591346741, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008587234653532505, + "rewards/margins": 0.06613267213106155, + "rewards/rejected": -0.07471990585327148, + "step": 1191 + }, + { + "epoch": 0.8243430152143845, + "grad_norm": 6.593566417694092, + "learning_rate": 4.1217150760719225e-05, + "log_odds_chosen": 2.051375150680542, + "log_odds_ratio": -0.8040463924407959, + "logits/chosen": -0.5109673738479614, + "logits/rejected": -0.4858384430408478, + "logps/chosen": -0.14845474064350128, + "logps/rejected": -0.3342825770378113, + "loss": 4.0657, + "nll_loss": 0.9360308051109314, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.014845474623143673, + "rewards/margins": 0.018582783639431, + "rewards/rejected": -0.03342825919389725, + "step": 1192 + }, + { + "epoch": 0.8250345781466113, + "grad_norm": 6.4309892654418945, + "learning_rate": 4.1251728907330567e-05, + "log_odds_chosen": 1.1407532691955566, + "log_odds_ratio": -0.4700060486793518, + "logits/chosen": -0.5878971815109253, + "logits/rejected": -0.6174403429031372, + "logps/chosen": -0.09566640853881836, + "logps/rejected": -0.34187084436416626, + "loss": 4.4896, + "nll_loss": 1.0753902196884155, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.00956664141267538, + "rewards/margins": 0.02462044358253479, + "rewards/rejected": -0.03418708220124245, + "step": 1193 + }, + { + "epoch": 0.8257261410788381, + "grad_norm": 3.923752784729004, + "learning_rate": 4.128630705394191e-05, + "log_odds_chosen": 4.222731113433838, + "log_odds_ratio": -0.24456751346588135, + "logits/chosen": -0.7110618352890015, + "logits/rejected": -0.7427866458892822, + "logps/chosen": -0.09104707837104797, + "logps/rejected": -0.7605820894241333, + "loss": 3.5788, + "nll_loss": 0.8702382445335388, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009104708209633827, + "rewards/margins": 0.06695350259542465, + "rewards/rejected": -0.07605820894241333, + "step": 1194 + }, + { + "epoch": 0.826417704011065, + "grad_norm": 4.21205472946167, + "learning_rate": 4.132088520055325e-05, + "log_odds_chosen": 4.177135944366455, + "log_odds_ratio": -0.1285741925239563, + "logits/chosen": -0.6350801587104797, + "logits/rejected": -0.7226979732513428, + "logps/chosen": -0.0693359375, + "logps/rejected": -0.6927146315574646, + "loss": 4.1323, + "nll_loss": 1.0202298164367676, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0069335936568677425, + "rewards/margins": 0.06233787164092064, + "rewards/rejected": -0.06927146017551422, + "step": 1195 + }, + { + "epoch": 0.8271092669432918, + "grad_norm": 4.5966105461120605, + "learning_rate": 4.135546334716459e-05, + "log_odds_chosen": 4.998106002807617, + "log_odds_ratio": -0.09226921945810318, + "logits/chosen": -0.345980167388916, + "logits/rejected": -0.3663310110569, + "logps/chosen": -0.04563899710774422, + "logps/rejected": -0.8056957721710205, + "loss": 3.638, + "nll_loss": 0.9002783298492432, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004563899710774422, + "rewards/margins": 0.07600568234920502, + "rewards/rejected": -0.08056958019733429, + "step": 1196 + }, + { + "epoch": 0.8278008298755186, + "grad_norm": 5.137960433959961, + "learning_rate": 4.139004149377593e-05, + "log_odds_chosen": 3.515726089477539, + "log_odds_ratio": -0.2497054785490036, + "logits/chosen": -0.6006046533584595, + "logits/rejected": -0.5914702415466309, + "logps/chosen": -0.1347048431634903, + "logps/rejected": -0.7286680340766907, + "loss": 3.5016, + "nll_loss": 0.8504340648651123, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013470484875142574, + "rewards/margins": 0.05939631909132004, + "rewards/rejected": -0.07286680489778519, + "step": 1197 + }, + { + "epoch": 0.8284923928077456, + "grad_norm": 7.066193103790283, + "learning_rate": 4.142461964038728e-05, + "log_odds_chosen": 2.3033316135406494, + "log_odds_ratio": -0.28070491552352905, + "logits/chosen": -0.29856306314468384, + "logits/rejected": -0.32309019565582275, + "logps/chosen": -0.09830452501773834, + "logps/rejected": -0.35360878705978394, + "loss": 3.6397, + "nll_loss": 0.8818494081497192, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009830452501773834, + "rewards/margins": 0.02553042583167553, + "rewards/rejected": -0.035360876470804214, + "step": 1198 + }, + { + "epoch": 0.8291839557399724, + "grad_norm": 6.882563591003418, + "learning_rate": 4.145919778699862e-05, + "log_odds_chosen": 3.472243309020996, + "log_odds_ratio": -0.602714478969574, + "logits/chosen": -0.27655768394470215, + "logits/rejected": -0.3242935836315155, + "logps/chosen": -0.3016278147697449, + "logps/rejected": -0.7091034650802612, + "loss": 3.4584, + "nll_loss": 0.8043214678764343, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.030162781476974487, + "rewards/margins": 0.040747564285993576, + "rewards/rejected": -0.07091034948825836, + "step": 1199 + }, + { + "epoch": 0.8298755186721992, + "grad_norm": 5.0919013023376465, + "learning_rate": 4.1493775933609964e-05, + "log_odds_chosen": 2.8356375694274902, + "log_odds_ratio": -0.49583184719085693, + "logits/chosen": -0.5040990114212036, + "logits/rejected": -0.5087206363677979, + "logps/chosen": -0.18786540627479553, + "logps/rejected": -0.3985045552253723, + "loss": 4.3429, + "nll_loss": 1.0361515283584595, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.018786542117595673, + "rewards/margins": 0.02106391452252865, + "rewards/rejected": -0.03985045477747917, + "step": 1200 + }, + { + "epoch": 0.830567081604426, + "grad_norm": 4.154881954193115, + "learning_rate": 4.1528354080221306e-05, + "log_odds_chosen": 2.0494563579559326, + "log_odds_ratio": -0.46288198232650757, + "logits/chosen": -0.6752278208732605, + "logits/rejected": -0.7129898071289062, + "logps/chosen": -0.1519179791212082, + "logps/rejected": -0.5215798020362854, + "loss": 4.4414, + "nll_loss": 1.0640610456466675, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.015191798098385334, + "rewards/margins": 0.03696617856621742, + "rewards/rejected": -0.05215797945857048, + "step": 1201 + }, + { + "epoch": 0.8312586445366529, + "grad_norm": 6.282675743103027, + "learning_rate": 4.156293222683265e-05, + "log_odds_chosen": 1.862894058227539, + "log_odds_ratio": -0.7544838786125183, + "logits/chosen": -0.5247619152069092, + "logits/rejected": -0.5509651899337769, + "logps/chosen": -0.31346026062965393, + "logps/rejected": -0.48862025141716003, + "loss": 3.144, + "nll_loss": 0.7105435132980347, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.03134603053331375, + "rewards/margins": 0.017515994608402252, + "rewards/rejected": -0.048862025141716, + "step": 1202 + }, + { + "epoch": 0.8319502074688797, + "grad_norm": 4.217153072357178, + "learning_rate": 4.159751037344399e-05, + "log_odds_chosen": 3.424147367477417, + "log_odds_ratio": -0.4018994867801666, + "logits/chosen": -0.4903850555419922, + "logits/rejected": -0.4391288459300995, + "logps/chosen": -0.13487458229064941, + "logps/rejected": -0.5529087781906128, + "loss": 3.0789, + "nll_loss": 0.7295363545417786, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.013487459160387516, + "rewards/margins": 0.04180341958999634, + "rewards/rejected": -0.05529087781906128, + "step": 1203 + }, + { + "epoch": 0.8326417704011065, + "grad_norm": 4.596008777618408, + "learning_rate": 4.163208852005533e-05, + "log_odds_chosen": 5.339834213256836, + "log_odds_ratio": -0.16341853141784668, + "logits/chosen": -0.0553714781999588, + "logits/rejected": -0.09893985092639923, + "logps/chosen": -0.06320115923881531, + "logps/rejected": -0.8495229482650757, + "loss": 3.7488, + "nll_loss": 0.9208630919456482, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006320116110146046, + "rewards/margins": 0.0786321833729744, + "rewards/rejected": -0.08495229482650757, + "step": 1204 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 5.455927848815918, + "learning_rate": 4.166666666666667e-05, + "log_odds_chosen": 4.1446943283081055, + "log_odds_ratio": -0.21529428660869598, + "logits/chosen": -0.5013213753700256, + "logits/rejected": -0.5345667004585266, + "logps/chosen": -0.06707189232110977, + "logps/rejected": -0.7442373037338257, + "loss": 4.5042, + "nll_loss": 1.1045323610305786, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006707189604640007, + "rewards/margins": 0.06771654635667801, + "rewards/rejected": -0.07442373782396317, + "step": 1205 + }, + { + "epoch": 0.8340248962655602, + "grad_norm": 3.082413673400879, + "learning_rate": 4.1701244813278014e-05, + "log_odds_chosen": 4.069175720214844, + "log_odds_ratio": -0.1851607710123062, + "logits/chosen": -0.35634279251098633, + "logits/rejected": -0.36941099166870117, + "logps/chosen": -0.12784144282341003, + "logps/rejected": -0.7375493049621582, + "loss": 2.9003, + "nll_loss": 0.7065519094467163, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012784144841134548, + "rewards/margins": 0.060970790684223175, + "rewards/rejected": -0.0737549364566803, + "step": 1206 + }, + { + "epoch": 0.834716459197787, + "grad_norm": 7.601669788360596, + "learning_rate": 4.1735822959889355e-05, + "log_odds_chosen": 0.334051251411438, + "log_odds_ratio": -1.0398133993148804, + "logits/chosen": -0.4660201072692871, + "logits/rejected": -0.48117709159851074, + "logps/chosen": -0.1395268589258194, + "logps/rejected": -0.13496336340904236, + "loss": 5.0357, + "nll_loss": 1.1549324989318848, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.01395268552005291, + "rewards/margins": -0.0004563478287309408, + "rewards/rejected": -0.013496337458491325, + "step": 1207 + }, + { + "epoch": 0.8354080221300139, + "grad_norm": 4.260904788970947, + "learning_rate": 4.17704011065007e-05, + "log_odds_chosen": 4.091191291809082, + "log_odds_ratio": -0.24443864822387695, + "logits/chosen": -0.08303473889827728, + "logits/rejected": -0.11515428870916367, + "logps/chosen": -0.04653660207986832, + "logps/rejected": -0.5629119277000427, + "loss": 3.546, + "nll_loss": 0.8620575070381165, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.004653660114854574, + "rewards/margins": 0.05163753405213356, + "rewards/rejected": -0.05629119649529457, + "step": 1208 + }, + { + "epoch": 0.8360995850622407, + "grad_norm": 4.505746364593506, + "learning_rate": 4.180497925311204e-05, + "log_odds_chosen": 1.8157931566238403, + "log_odds_ratio": -0.4644305109977722, + "logits/chosen": -0.4604804813861847, + "logits/rejected": -0.4551182687282562, + "logps/chosen": -0.115799680352211, + "logps/rejected": -0.3373379707336426, + "loss": 3.9787, + "nll_loss": 0.9482215642929077, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0115799680352211, + "rewards/margins": 0.0221538282930851, + "rewards/rejected": -0.0337337963283062, + "step": 1209 + }, + { + "epoch": 0.8367911479944675, + "grad_norm": 4.050663948059082, + "learning_rate": 4.183955739972338e-05, + "log_odds_chosen": 1.7767119407653809, + "log_odds_ratio": -0.5598441362380981, + "logits/chosen": 0.06878723204135895, + "logits/rejected": 0.024741366505622864, + "logps/chosen": -0.1255115121603012, + "logps/rejected": -0.39500701427459717, + "loss": 3.7783, + "nll_loss": 0.8885964155197144, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012551150284707546, + "rewards/margins": 0.026949552819132805, + "rewards/rejected": -0.039500705897808075, + "step": 1210 + }, + { + "epoch": 0.8374827109266944, + "grad_norm": 5.5279221534729, + "learning_rate": 4.187413554633472e-05, + "log_odds_chosen": 2.310128927230835, + "log_odds_ratio": -0.36447837948799133, + "logits/chosen": -0.4867563545703888, + "logits/rejected": -0.48124808073043823, + "logps/chosen": -0.1323777288198471, + "logps/rejected": -0.5982121229171753, + "loss": 3.9506, + "nll_loss": 0.9512027502059937, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.013237773440778255, + "rewards/margins": 0.04658343642950058, + "rewards/rejected": -0.05982121080160141, + "step": 1211 + }, + { + "epoch": 0.8381742738589212, + "grad_norm": 3.9652137756347656, + "learning_rate": 4.190871369294606e-05, + "log_odds_chosen": 4.73415994644165, + "log_odds_ratio": -0.16974598169326782, + "logits/chosen": -0.6232622861862183, + "logits/rejected": -0.7259473204612732, + "logps/chosen": -0.07471007108688354, + "logps/rejected": -0.8358129858970642, + "loss": 3.8503, + "nll_loss": 0.9455909729003906, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007471007294952869, + "rewards/margins": 0.07611028850078583, + "rewards/rejected": -0.08358129858970642, + "step": 1212 + }, + { + "epoch": 0.838865836791148, + "grad_norm": 4.576741695404053, + "learning_rate": 4.1943291839557405e-05, + "log_odds_chosen": 1.4289312362670898, + "log_odds_ratio": -0.35713696479797363, + "logits/chosen": -0.029139623045921326, + "logits/rejected": -0.03655124455690384, + "logps/chosen": -0.1455044150352478, + "logps/rejected": -0.6169479489326477, + "loss": 3.7704, + "nll_loss": 0.906876802444458, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01455044187605381, + "rewards/margins": 0.04714436084032059, + "rewards/rejected": -0.06169480085372925, + "step": 1213 + }, + { + "epoch": 0.8395573997233748, + "grad_norm": 5.6564483642578125, + "learning_rate": 4.1977869986168746e-05, + "log_odds_chosen": 1.9149457216262817, + "log_odds_ratio": -0.5468197464942932, + "logits/chosen": -0.6435422897338867, + "logits/rejected": -0.6532891392707825, + "logps/chosen": -0.13478265702724457, + "logps/rejected": -0.4897097945213318, + "loss": 4.3445, + "nll_loss": 1.0314515829086304, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.013478267006576061, + "rewards/margins": 0.03549271076917648, + "rewards/rejected": -0.04897098243236542, + "step": 1214 + }, + { + "epoch": 0.8402489626556017, + "grad_norm": 3.8609166145324707, + "learning_rate": 4.201244813278009e-05, + "log_odds_chosen": 3.7545604705810547, + "log_odds_ratio": -0.17774531245231628, + "logits/chosen": -0.030912477523088455, + "logits/rejected": -0.054544124752283096, + "logps/chosen": -0.20503610372543335, + "logps/rejected": -0.9788771867752075, + "loss": 2.4621, + "nll_loss": 0.5977532863616943, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.020503612235188484, + "rewards/margins": 0.0773840993642807, + "rewards/rejected": -0.09788771718740463, + "step": 1215 + }, + { + "epoch": 0.8409405255878285, + "grad_norm": 5.178175449371338, + "learning_rate": 4.204702627939143e-05, + "log_odds_chosen": 3.330840587615967, + "log_odds_ratio": -0.45695760846138, + "logits/chosen": -0.4083555340766907, + "logits/rejected": -0.3598038852214813, + "logps/chosen": -0.1588767021894455, + "logps/rejected": -0.6059430241584778, + "loss": 3.7545, + "nll_loss": 0.8929381966590881, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01588767021894455, + "rewards/margins": 0.044706642627716064, + "rewards/rejected": -0.060594312846660614, + "step": 1216 + }, + { + "epoch": 0.8416320885200553, + "grad_norm": 4.9921793937683105, + "learning_rate": 4.208160442600277e-05, + "log_odds_chosen": 3.022162437438965, + "log_odds_ratio": -0.4056708812713623, + "logits/chosen": -0.6954939365386963, + "logits/rejected": -0.6824483275413513, + "logps/chosen": -0.07887732237577438, + "logps/rejected": -0.39871978759765625, + "loss": 4.3949, + "nll_loss": 1.0581598281860352, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.007887732237577438, + "rewards/margins": 0.031984247267246246, + "rewards/rejected": -0.039871979504823685, + "step": 1217 + }, + { + "epoch": 0.8423236514522822, + "grad_norm": 7.773573398590088, + "learning_rate": 4.211618257261411e-05, + "log_odds_chosen": 1.5267056226730347, + "log_odds_ratio": -0.7812448740005493, + "logits/chosen": -0.2832191288471222, + "logits/rejected": -0.28594040870666504, + "logps/chosen": -0.181090846657753, + "logps/rejected": -0.46923351287841797, + "loss": 4.4109, + "nll_loss": 1.0245888233184814, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.01810908503830433, + "rewards/margins": 0.02881426364183426, + "rewards/rejected": -0.046923354268074036, + "step": 1218 + }, + { + "epoch": 0.843015214384509, + "grad_norm": 5.341215133666992, + "learning_rate": 4.2150760719225454e-05, + "log_odds_chosen": 2.800717353820801, + "log_odds_ratio": -0.35339123010635376, + "logits/chosen": -0.278065949678421, + "logits/rejected": -0.24943991005420685, + "logps/chosen": -0.10523054003715515, + "logps/rejected": -0.4062395989894867, + "loss": 3.6332, + "nll_loss": 0.8729555606842041, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010523054748773575, + "rewards/margins": 0.030100908130407333, + "rewards/rejected": -0.04062396287918091, + "step": 1219 + }, + { + "epoch": 0.8437067773167358, + "grad_norm": 6.229884624481201, + "learning_rate": 4.2185338865836796e-05, + "log_odds_chosen": 3.69413685798645, + "log_odds_ratio": -0.267327219247818, + "logits/chosen": -0.2839363217353821, + "logits/rejected": -0.33933788537979126, + "logps/chosen": -0.0874333381652832, + "logps/rejected": -0.7815597653388977, + "loss": 4.2217, + "nll_loss": 1.0287011861801147, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.00874333456158638, + "rewards/margins": 0.06941264122724533, + "rewards/rejected": -0.07815597206354141, + "step": 1220 + }, + { + "epoch": 0.8443983402489627, + "grad_norm": 5.169261455535889, + "learning_rate": 4.221991701244814e-05, + "log_odds_chosen": 1.908298134803772, + "log_odds_ratio": -0.32451504468917847, + "logits/chosen": -0.8838093280792236, + "logits/rejected": -0.8724334836006165, + "logps/chosen": -0.18538136780261993, + "logps/rejected": -0.5294378995895386, + "loss": 5.8003, + "nll_loss": 1.417634129524231, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.018538137897849083, + "rewards/margins": 0.034405652433633804, + "rewards/rejected": -0.05294378846883774, + "step": 1221 + }, + { + "epoch": 0.8450899031811895, + "grad_norm": 7.863767623901367, + "learning_rate": 4.225449515905948e-05, + "log_odds_chosen": 2.2556004524230957, + "log_odds_ratio": -0.3413243889808655, + "logits/chosen": -0.6394181251525879, + "logits/rejected": -0.670673668384552, + "logps/chosen": -0.14786472916603088, + "logps/rejected": -0.6458082795143127, + "loss": 4.1597, + "nll_loss": 1.0057915449142456, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.014786472544074059, + "rewards/margins": 0.049794360995292664, + "rewards/rejected": -0.06458082795143127, + "step": 1222 + }, + { + "epoch": 0.8457814661134163, + "grad_norm": 5.6627936363220215, + "learning_rate": 4.228907330567082e-05, + "log_odds_chosen": 4.577823162078857, + "log_odds_ratio": -0.4264640808105469, + "logits/chosen": -0.3070671260356903, + "logits/rejected": -0.27830278873443604, + "logps/chosen": -0.05938584357500076, + "logps/rejected": -0.8323963284492493, + "loss": 2.6459, + "nll_loss": 0.6188327670097351, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005938584450632334, + "rewards/margins": 0.07730104774236679, + "rewards/rejected": -0.08323963731527328, + "step": 1223 + }, + { + "epoch": 0.8464730290456431, + "grad_norm": 3.9208574295043945, + "learning_rate": 4.232365145228216e-05, + "log_odds_chosen": 4.880911827087402, + "log_odds_ratio": -0.2090207040309906, + "logits/chosen": -0.39382532238960266, + "logits/rejected": -0.4027000665664673, + "logps/chosen": -0.08941195905208588, + "logps/rejected": -0.6881061792373657, + "loss": 3.1003, + "nll_loss": 0.7541638612747192, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008941195905208588, + "rewards/margins": 0.059869423508644104, + "rewards/rejected": -0.06881061941385269, + "step": 1224 + }, + { + "epoch": 0.84716459197787, + "grad_norm": 4.687501430511475, + "learning_rate": 4.23582295988935e-05, + "log_odds_chosen": 3.7448058128356934, + "log_odds_ratio": -0.3105209469795227, + "logits/chosen": -0.4358472228050232, + "logits/rejected": -0.42677658796310425, + "logps/chosen": -0.08816111832857132, + "logps/rejected": -0.5220236778259277, + "loss": 4.3466, + "nll_loss": 1.0556085109710693, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008816111832857132, + "rewards/margins": 0.04338625445961952, + "rewards/rejected": -0.052202366292476654, + "step": 1225 + }, + { + "epoch": 0.8478561549100968, + "grad_norm": 4.883336067199707, + "learning_rate": 4.2392807745504845e-05, + "log_odds_chosen": 2.8365492820739746, + "log_odds_ratio": -0.33779749274253845, + "logits/chosen": -0.3609054386615753, + "logits/rejected": -0.39662984013557434, + "logps/chosen": -0.20170946419239044, + "logps/rejected": -0.7530872821807861, + "loss": 4.0256, + "nll_loss": 0.9726265072822571, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.020170947536826134, + "rewards/margins": 0.05513777956366539, + "rewards/rejected": -0.07530872523784637, + "step": 1226 + }, + { + "epoch": 0.8485477178423236, + "grad_norm": 3.9957022666931152, + "learning_rate": 4.2427385892116186e-05, + "log_odds_chosen": 1.8730344772338867, + "log_odds_ratio": -0.26740092039108276, + "logits/chosen": -0.2765858769416809, + "logits/rejected": -0.306190550327301, + "logps/chosen": -0.09251527488231659, + "logps/rejected": -0.4008828401565552, + "loss": 3.7071, + "nll_loss": 0.9000409245491028, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009251527488231659, + "rewards/margins": 0.03083675727248192, + "rewards/rejected": -0.04008828476071358, + "step": 1227 + }, + { + "epoch": 0.8492392807745505, + "grad_norm": 5.592484951019287, + "learning_rate": 4.246196403872753e-05, + "log_odds_chosen": 2.1070966720581055, + "log_odds_ratio": -0.45303577184677124, + "logits/chosen": -0.2678883373737335, + "logits/rejected": -0.3001551628112793, + "logps/chosen": -0.0664995014667511, + "logps/rejected": -0.4867702126502991, + "loss": 4.9924, + "nll_loss": 1.2028013467788696, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.006649950053542852, + "rewards/margins": 0.042027074843645096, + "rewards/rejected": -0.04867701977491379, + "step": 1228 + }, + { + "epoch": 0.8499308437067773, + "grad_norm": 3.897392988204956, + "learning_rate": 4.249654218533887e-05, + "log_odds_chosen": 3.5367512702941895, + "log_odds_ratio": -0.3471705913543701, + "logits/chosen": -0.1914403736591339, + "logits/rejected": -0.2182171642780304, + "logps/chosen": -0.0768127590417862, + "logps/rejected": -0.5251092314720154, + "loss": 2.98, + "nll_loss": 0.7102901935577393, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0076812757179141045, + "rewards/margins": 0.04482964053750038, + "rewards/rejected": -0.05251092091202736, + "step": 1229 + }, + { + "epoch": 0.8506224066390041, + "grad_norm": 6.20721435546875, + "learning_rate": 4.253112033195021e-05, + "log_odds_chosen": 1.3702822923660278, + "log_odds_ratio": -0.45010262727737427, + "logits/chosen": -0.7466237545013428, + "logits/rejected": -0.7181574702262878, + "logps/chosen": -0.14990904927253723, + "logps/rejected": -0.45504340529441833, + "loss": 5.2804, + "nll_loss": 1.2751015424728394, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.014990905299782753, + "rewards/margins": 0.03051343560218811, + "rewards/rejected": -0.04550434276461601, + "step": 1230 + }, + { + "epoch": 0.851313969571231, + "grad_norm": 5.15730619430542, + "learning_rate": 4.256569847856155e-05, + "log_odds_chosen": 3.9428796768188477, + "log_odds_ratio": -0.23902566730976105, + "logits/chosen": -0.10978386551141739, + "logits/rejected": -0.1629532277584076, + "logps/chosen": -0.10805842280387878, + "logps/rejected": -0.7425420880317688, + "loss": 2.8282, + "nll_loss": 0.6831561923027039, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010805842466652393, + "rewards/margins": 0.06344836950302124, + "rewards/rejected": -0.07425420731306076, + "step": 1231 + }, + { + "epoch": 0.8520055325034578, + "grad_norm": 5.834319591522217, + "learning_rate": 4.2600276625172894e-05, + "log_odds_chosen": 3.8613672256469727, + "log_odds_ratio": -0.2609718143939972, + "logits/chosen": -0.2552832365036011, + "logits/rejected": -0.30748167634010315, + "logps/chosen": -0.10061614215373993, + "logps/rejected": -0.9150368571281433, + "loss": 3.4123, + "nll_loss": 0.8269892930984497, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010061614215373993, + "rewards/margins": 0.08144207298755646, + "rewards/rejected": -0.09150368720293045, + "step": 1232 + }, + { + "epoch": 0.8526970954356846, + "grad_norm": 5.265030860900879, + "learning_rate": 4.2634854771784236e-05, + "log_odds_chosen": 3.142730236053467, + "log_odds_ratio": -0.35933810472488403, + "logits/chosen": -0.662431001663208, + "logits/rejected": -0.7470867037773132, + "logps/chosen": -0.06795337051153183, + "logps/rejected": -0.4906230568885803, + "loss": 3.967, + "nll_loss": 0.9558151364326477, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.006795337423682213, + "rewards/margins": 0.04226697236299515, + "rewards/rejected": -0.04906231164932251, + "step": 1233 + }, + { + "epoch": 0.8533886583679114, + "grad_norm": 3.716066360473633, + "learning_rate": 4.266943291839558e-05, + "log_odds_chosen": 2.2964534759521484, + "log_odds_ratio": -0.5118023157119751, + "logits/chosen": -0.397636353969574, + "logits/rejected": -0.4446547031402588, + "logps/chosen": -0.19349884986877441, + "logps/rejected": -0.4441201090812683, + "loss": 3.864, + "nll_loss": 0.9148226976394653, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01934988610446453, + "rewards/margins": 0.02506212517619133, + "rewards/rejected": -0.04441201686859131, + "step": 1234 + }, + { + "epoch": 0.8540802213001383, + "grad_norm": 5.476595401763916, + "learning_rate": 4.270401106500692e-05, + "log_odds_chosen": 3.079970598220825, + "log_odds_ratio": -0.22505627572536469, + "logits/chosen": -0.44705498218536377, + "logits/rejected": -0.4541969299316406, + "logps/chosen": -0.08623314648866653, + "logps/rejected": -0.7812601327896118, + "loss": 4.4526, + "nll_loss": 1.090645670890808, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008623314090073109, + "rewards/margins": 0.06950270384550095, + "rewards/rejected": -0.07812602072954178, + "step": 1235 + }, + { + "epoch": 0.8547717842323651, + "grad_norm": 4.144224166870117, + "learning_rate": 4.273858921161826e-05, + "log_odds_chosen": 3.158641815185547, + "log_odds_ratio": -0.24196459352970123, + "logits/chosen": -0.33807045221328735, + "logits/rejected": -0.30896690487861633, + "logps/chosen": -0.12600083649158478, + "logps/rejected": -0.7073169946670532, + "loss": 3.5475, + "nll_loss": 0.8626745343208313, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012600085698068142, + "rewards/margins": 0.058131616562604904, + "rewards/rejected": -0.07073169946670532, + "step": 1236 + }, + { + "epoch": 0.8554633471645919, + "grad_norm": 4.101622581481934, + "learning_rate": 4.27731673582296e-05, + "log_odds_chosen": 4.937752723693848, + "log_odds_ratio": -0.21346403658390045, + "logits/chosen": -0.29906758666038513, + "logits/rejected": -0.35858187079429626, + "logps/chosen": -0.09986451268196106, + "logps/rejected": -0.7788125872612, + "loss": 3.6658, + "nll_loss": 0.895104706287384, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00998645182698965, + "rewards/margins": 0.06789480149745941, + "rewards/rejected": -0.07788125425577164, + "step": 1237 + }, + { + "epoch": 0.8561549100968188, + "grad_norm": 10.665369987487793, + "learning_rate": 4.2807745504840944e-05, + "log_odds_chosen": 0.5083409547805786, + "log_odds_ratio": -0.9532041549682617, + "logits/chosen": -0.5494678020477295, + "logits/rejected": -0.549345850944519, + "logps/chosen": -0.2034616321325302, + "logps/rejected": -0.4447656571865082, + "loss": 5.7725, + "nll_loss": 1.3478106260299683, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.02034616470336914, + "rewards/margins": 0.024130402132868767, + "rewards/rejected": -0.04447656869888306, + "step": 1238 + }, + { + "epoch": 0.8568464730290456, + "grad_norm": 4.152436256408691, + "learning_rate": 4.2842323651452285e-05, + "log_odds_chosen": 3.5537776947021484, + "log_odds_ratio": -0.4564933776855469, + "logits/chosen": -0.6331797242164612, + "logits/rejected": -0.64528888463974, + "logps/chosen": -0.11556783318519592, + "logps/rejected": -0.4677344262599945, + "loss": 3.8412, + "nll_loss": 0.9146407842636108, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011556783691048622, + "rewards/margins": 0.03521666303277016, + "rewards/rejected": -0.04677344113588333, + "step": 1239 + }, + { + "epoch": 0.8575380359612724, + "grad_norm": 4.356481552124023, + "learning_rate": 4.287690179806363e-05, + "log_odds_chosen": 2.6086325645446777, + "log_odds_ratio": -0.24501118063926697, + "logits/chosen": -0.3237851858139038, + "logits/rejected": -0.3254019618034363, + "logps/chosen": -0.10340925306081772, + "logps/rejected": -0.3051115870475769, + "loss": 3.9994, + "nll_loss": 0.9753445386886597, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010340925306081772, + "rewards/margins": 0.02017023414373398, + "rewards/rejected": -0.03051115944981575, + "step": 1240 + }, + { + "epoch": 0.8582295988934993, + "grad_norm": 3.881535530090332, + "learning_rate": 4.291147994467496e-05, + "log_odds_chosen": 3.739959716796875, + "log_odds_ratio": -0.20326995849609375, + "logits/chosen": -0.5520289540290833, + "logits/rejected": -0.5822651386260986, + "logps/chosen": -0.13151343166828156, + "logps/rejected": -0.7232505679130554, + "loss": 3.8902, + "nll_loss": 0.9522209167480469, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013151343911886215, + "rewards/margins": 0.059173714369535446, + "rewards/rejected": -0.07232505828142166, + "step": 1241 + }, + { + "epoch": 0.8589211618257261, + "grad_norm": 5.007400989532471, + "learning_rate": 4.29460580912863e-05, + "log_odds_chosen": 0.6331380009651184, + "log_odds_ratio": -0.6973768472671509, + "logits/chosen": -0.3870214521884918, + "logits/rejected": -0.386497437953949, + "logps/chosen": -0.23055224120616913, + "logps/rejected": -0.3669086992740631, + "loss": 4.5481, + "nll_loss": 1.0672754049301147, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.023055225610733032, + "rewards/margins": 0.013635647483170033, + "rewards/rejected": -0.03669087216258049, + "step": 1242 + }, + { + "epoch": 0.859612724757953, + "grad_norm": 4.997777462005615, + "learning_rate": 4.298063623789765e-05, + "log_odds_chosen": 2.6052985191345215, + "log_odds_ratio": -0.16318482160568237, + "logits/chosen": -0.5671769380569458, + "logits/rejected": -0.607761800289154, + "logps/chosen": -0.06360312551259995, + "logps/rejected": -0.5905706286430359, + "loss": 4.4011, + "nll_loss": 1.0839494466781616, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006360312458127737, + "rewards/margins": 0.052696749567985535, + "rewards/rejected": -0.05905706062912941, + "step": 1243 + }, + { + "epoch": 0.8603042876901799, + "grad_norm": 3.7530622482299805, + "learning_rate": 4.301521438450899e-05, + "log_odds_chosen": 3.010711669921875, + "log_odds_ratio": -0.33168599009513855, + "logits/chosen": -0.6687760353088379, + "logits/rejected": -0.7112399339675903, + "logps/chosen": -0.12361060082912445, + "logps/rejected": -0.4488832950592041, + "loss": 3.901, + "nll_loss": 0.9420774579048157, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01236105989664793, + "rewards/margins": 0.032527267932891846, + "rewards/rejected": -0.04488833248615265, + "step": 1244 + }, + { + "epoch": 0.8609958506224067, + "grad_norm": 3.3816781044006348, + "learning_rate": 4.3049792531120335e-05, + "log_odds_chosen": 5.055641174316406, + "log_odds_ratio": -0.3109224736690521, + "logits/chosen": -0.47822022438049316, + "logits/rejected": -0.48512572050094604, + "logps/chosen": -0.06640556454658508, + "logps/rejected": -0.7292112708091736, + "loss": 2.9014, + "nll_loss": 0.6942633986473083, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006640556268393993, + "rewards/margins": 0.06628057360649109, + "rewards/rejected": -0.07292113453149796, + "step": 1245 + }, + { + "epoch": 0.8616874135546335, + "grad_norm": 4.799623489379883, + "learning_rate": 4.3084370677731676e-05, + "log_odds_chosen": 3.1673483848571777, + "log_odds_ratio": -0.23026300966739655, + "logits/chosen": -0.5659746527671814, + "logits/rejected": -0.6104631423950195, + "logps/chosen": -0.1196167916059494, + "logps/rejected": -0.6679284572601318, + "loss": 3.9385, + "nll_loss": 0.9616028666496277, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011961679905653, + "rewards/margins": 0.05483117699623108, + "rewards/rejected": -0.06679285317659378, + "step": 1246 + }, + { + "epoch": 0.8623789764868603, + "grad_norm": 4.420276165008545, + "learning_rate": 4.311894882434302e-05, + "log_odds_chosen": 2.7055413722991943, + "log_odds_ratio": -0.4239078760147095, + "logits/chosen": -0.6248461604118347, + "logits/rejected": -0.5993098616600037, + "logps/chosen": -0.15310505032539368, + "logps/rejected": -0.5843885540962219, + "loss": 3.6267, + "nll_loss": 0.8642741441726685, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.015310506336390972, + "rewards/margins": 0.043128348886966705, + "rewards/rejected": -0.05843885987997055, + "step": 1247 + }, + { + "epoch": 0.8630705394190872, + "grad_norm": 5.892736911773682, + "learning_rate": 4.315352697095436e-05, + "log_odds_chosen": 0.3708970546722412, + "log_odds_ratio": -0.6783088445663452, + "logits/chosen": -0.5890284180641174, + "logits/rejected": -0.6571257710456848, + "logps/chosen": -0.2017858624458313, + "logps/rejected": -0.2839735746383667, + "loss": 4.9916, + "nll_loss": 1.1800813674926758, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.02017858810722828, + "rewards/margins": 0.008218769915401936, + "rewards/rejected": -0.02839735709130764, + "step": 1248 + }, + { + "epoch": 0.863762102351314, + "grad_norm": 4.3697614669799805, + "learning_rate": 4.31881051175657e-05, + "log_odds_chosen": 3.982241153717041, + "log_odds_ratio": -0.3116607069969177, + "logits/chosen": -0.20677393674850464, + "logits/rejected": -0.2635921537876129, + "logps/chosen": -0.052336186170578, + "logps/rejected": -0.4851013123989105, + "loss": 3.9766, + "nll_loss": 0.9629923105239868, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.005233618896454573, + "rewards/margins": 0.04327651113271713, + "rewards/rejected": -0.04851013422012329, + "step": 1249 + }, + { + "epoch": 0.8644536652835408, + "grad_norm": 5.986281394958496, + "learning_rate": 4.322268326417704e-05, + "log_odds_chosen": 3.4355010986328125, + "log_odds_ratio": -0.6999623775482178, + "logits/chosen": -0.4995029866695404, + "logits/rejected": -0.517444372177124, + "logps/chosen": -0.18629102408885956, + "logps/rejected": -0.836786687374115, + "loss": 4.1068, + "nll_loss": 0.9566999673843384, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.018629100173711777, + "rewards/margins": 0.06504955887794495, + "rewards/rejected": -0.08367866277694702, + "step": 1250 + }, + { + "epoch": 0.8651452282157677, + "grad_norm": 5.7515363693237305, + "learning_rate": 4.3257261410788384e-05, + "log_odds_chosen": 1.052149772644043, + "log_odds_ratio": -0.4477373957633972, + "logits/chosen": -0.34615832567214966, + "logits/rejected": -0.34621891379356384, + "logps/chosen": -0.11608318239450455, + "logps/rejected": -0.31631356477737427, + "loss": 4.7191, + "nll_loss": 1.1350071430206299, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011608317494392395, + "rewards/margins": 0.020023038610816002, + "rewards/rejected": -0.031631357967853546, + "step": 1251 + }, + { + "epoch": 0.8658367911479945, + "grad_norm": 5.770718574523926, + "learning_rate": 4.3291839557399726e-05, + "log_odds_chosen": 1.1486060619354248, + "log_odds_ratio": -0.5084373950958252, + "logits/chosen": -0.21859028935432434, + "logits/rejected": -0.2105821967124939, + "logps/chosen": -0.16546833515167236, + "logps/rejected": -0.3545966148376465, + "loss": 4.3302, + "nll_loss": 1.0317028760910034, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.016546836122870445, + "rewards/margins": 0.018912825733423233, + "rewards/rejected": -0.03545965999364853, + "step": 1252 + }, + { + "epoch": 0.8665283540802213, + "grad_norm": 4.072329998016357, + "learning_rate": 4.332641770401107e-05, + "log_odds_chosen": 4.862940788269043, + "log_odds_ratio": -0.339602530002594, + "logits/chosen": 0.11724947392940521, + "logits/rejected": 0.12245957553386688, + "logps/chosen": -0.05847052484750748, + "logps/rejected": -0.5670610070228577, + "loss": 2.1563, + "nll_loss": 0.5051190853118896, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005847052205353975, + "rewards/margins": 0.05085904896259308, + "rewards/rejected": -0.05670610070228577, + "step": 1253 + }, + { + "epoch": 0.8672199170124482, + "grad_norm": 3.4656217098236084, + "learning_rate": 4.336099585062241e-05, + "log_odds_chosen": 3.4801251888275146, + "log_odds_ratio": -0.31768786907196045, + "logits/chosen": -0.5136781930923462, + "logits/rejected": -0.5579401850700378, + "logps/chosen": -0.18964815139770508, + "logps/rejected": -0.8065962791442871, + "loss": 3.0265, + "nll_loss": 0.7248650193214417, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01896481402218342, + "rewards/margins": 0.06169482320547104, + "rewards/rejected": -0.08065963536500931, + "step": 1254 + }, + { + "epoch": 0.867911479944675, + "grad_norm": 7.574423313140869, + "learning_rate": 4.339557399723375e-05, + "log_odds_chosen": 2.4200801849365234, + "log_odds_ratio": -0.5579121112823486, + "logits/chosen": -0.8971307873725891, + "logits/rejected": -0.8899210691452026, + "logps/chosen": -0.07807845622301102, + "logps/rejected": -0.6198257207870483, + "loss": 3.4674, + "nll_loss": 0.8110649585723877, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.0078078461810946465, + "rewards/margins": 0.054174721240997314, + "rewards/rejected": -0.061982572078704834, + "step": 1255 + }, + { + "epoch": 0.8686030428769018, + "grad_norm": 2.9485676288604736, + "learning_rate": 4.343015214384509e-05, + "log_odds_chosen": 5.759621620178223, + "log_odds_ratio": -0.08616334944963455, + "logits/chosen": -0.1814260631799698, + "logits/rejected": -0.18436534702777863, + "logps/chosen": -0.032411232590675354, + "logps/rejected": -0.5909621119499207, + "loss": 2.5601, + "nll_loss": 0.6313992142677307, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003241123864427209, + "rewards/margins": 0.05585508793592453, + "rewards/rejected": -0.059096213430166245, + "step": 1256 + }, + { + "epoch": 0.8692946058091287, + "grad_norm": 5.415265083312988, + "learning_rate": 4.346473029045643e-05, + "log_odds_chosen": 2.480560779571533, + "log_odds_ratio": -0.4247099459171295, + "logits/chosen": -0.4166119694709778, + "logits/rejected": -0.4697090685367584, + "logps/chosen": -0.14666330814361572, + "logps/rejected": -0.5061662793159485, + "loss": 4.6707, + "nll_loss": 1.1251946687698364, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014666330069303513, + "rewards/margins": 0.03595029562711716, + "rewards/rejected": -0.05061662942171097, + "step": 1257 + }, + { + "epoch": 0.8699861687413555, + "grad_norm": 3.902346611022949, + "learning_rate": 4.3499308437067775e-05, + "log_odds_chosen": 2.743802070617676, + "log_odds_ratio": -0.3696817457675934, + "logits/chosen": -0.5543731451034546, + "logits/rejected": -0.5735875368118286, + "logps/chosen": -0.08180180191993713, + "logps/rejected": -0.4036181569099426, + "loss": 3.124, + "nll_loss": 0.7440320253372192, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008180180564522743, + "rewards/margins": 0.03218163549900055, + "rewards/rejected": -0.04036181420087814, + "step": 1258 + }, + { + "epoch": 0.8706777316735823, + "grad_norm": 5.200778961181641, + "learning_rate": 4.3533886583679116e-05, + "log_odds_chosen": 0.7463172078132629, + "log_odds_ratio": -0.7367535829544067, + "logits/chosen": -0.5876007676124573, + "logits/rejected": -0.5731717944145203, + "logps/chosen": -0.2275894433259964, + "logps/rejected": -0.3888647258281708, + "loss": 3.7748, + "nll_loss": 0.8700259923934937, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.02275894396007061, + "rewards/margins": 0.016127530485391617, + "rewards/rejected": -0.03888647258281708, + "step": 1259 + }, + { + "epoch": 0.8713692946058091, + "grad_norm": 6.475817680358887, + "learning_rate": 4.356846473029046e-05, + "log_odds_chosen": 0.4220742881298065, + "log_odds_ratio": -0.5629880428314209, + "logits/chosen": -0.4277142286300659, + "logits/rejected": -0.4545121192932129, + "logps/chosen": -0.16203731298446655, + "logps/rejected": -0.28819459676742554, + "loss": 5.9425, + "nll_loss": 1.4293360710144043, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.016203733161091805, + "rewards/margins": 0.012615729123353958, + "rewards/rejected": -0.028819462284445763, + "step": 1260 + }, + { + "epoch": 0.872060857538036, + "grad_norm": 3.48825740814209, + "learning_rate": 4.36030428769018e-05, + "log_odds_chosen": 4.7352705001831055, + "log_odds_ratio": -0.20084872841835022, + "logits/chosen": -0.5206946134567261, + "logits/rejected": -0.561223030090332, + "logps/chosen": -0.06274496763944626, + "logps/rejected": -0.6954832077026367, + "loss": 3.657, + "nll_loss": 0.8941764235496521, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006274497136473656, + "rewards/margins": 0.0632738322019577, + "rewards/rejected": -0.06954832375049591, + "step": 1261 + }, + { + "epoch": 0.8727524204702628, + "grad_norm": 4.392824649810791, + "learning_rate": 4.363762102351314e-05, + "log_odds_chosen": 1.6250078678131104, + "log_odds_ratio": -0.30818629264831543, + "logits/chosen": -0.6393093466758728, + "logits/rejected": -0.6915218830108643, + "logps/chosen": -0.08740460127592087, + "logps/rejected": -0.2715250849723816, + "loss": 4.279, + "nll_loss": 1.038927435874939, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008740460500121117, + "rewards/margins": 0.018412049859762192, + "rewards/rejected": -0.02715251035988331, + "step": 1262 + }, + { + "epoch": 0.8734439834024896, + "grad_norm": 7.467405796051025, + "learning_rate": 4.367219917012448e-05, + "log_odds_chosen": 2.354684829711914, + "log_odds_ratio": -0.36135029792785645, + "logits/chosen": -0.4317726492881775, + "logits/rejected": -0.49516892433166504, + "logps/chosen": -0.13952602446079254, + "logps/rejected": -0.6113516092300415, + "loss": 4.828, + "nll_loss": 1.170866847038269, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013952603563666344, + "rewards/margins": 0.04718255624175072, + "rewards/rejected": -0.06113515794277191, + "step": 1263 + }, + { + "epoch": 0.8741355463347165, + "grad_norm": 4.659549236297607, + "learning_rate": 4.3706777316735824e-05, + "log_odds_chosen": 3.3404979705810547, + "log_odds_ratio": -0.2198869287967682, + "logits/chosen": -0.44002625346183777, + "logits/rejected": -0.5099364519119263, + "logps/chosen": -0.07723425328731537, + "logps/rejected": -0.7068568468093872, + "loss": 3.4741, + "nll_loss": 0.8465284109115601, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0077234250493347645, + "rewards/margins": 0.06296226382255554, + "rewards/rejected": -0.07068568468093872, + "step": 1264 + }, + { + "epoch": 0.8748271092669433, + "grad_norm": 3.586311101913452, + "learning_rate": 4.3741355463347166e-05, + "log_odds_chosen": 2.737506151199341, + "log_odds_ratio": -0.3485250771045685, + "logits/chosen": -0.48119568824768066, + "logits/rejected": -0.4383758306503296, + "logps/chosen": -0.12813690304756165, + "logps/rejected": -0.4432229995727539, + "loss": 3.5006, + "nll_loss": 0.8403025269508362, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.012813691049814224, + "rewards/margins": 0.031508613377809525, + "rewards/rejected": -0.04432230070233345, + "step": 1265 + }, + { + "epoch": 0.8755186721991701, + "grad_norm": 5.795801639556885, + "learning_rate": 4.377593360995851e-05, + "log_odds_chosen": 1.0400015115737915, + "log_odds_ratio": -0.7847639322280884, + "logits/chosen": -0.7246870994567871, + "logits/rejected": -0.7208892107009888, + "logps/chosen": -0.1682136058807373, + "logps/rejected": -0.30135980248451233, + "loss": 5.2164, + "nll_loss": 1.2256345748901367, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01682136207818985, + "rewards/margins": 0.013314621523022652, + "rewards/rejected": -0.030135981738567352, + "step": 1266 + }, + { + "epoch": 0.876210235131397, + "grad_norm": 4.90360164642334, + "learning_rate": 4.381051175656985e-05, + "log_odds_chosen": 1.7198656797409058, + "log_odds_ratio": -0.39128538966178894, + "logits/chosen": -0.7646574974060059, + "logits/rejected": -0.8175445795059204, + "logps/chosen": -0.1509498655796051, + "logps/rejected": -0.6070072650909424, + "loss": 4.1454, + "nll_loss": 0.9972254037857056, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01509498618543148, + "rewards/margins": 0.04560573399066925, + "rewards/rejected": -0.06070072203874588, + "step": 1267 + }, + { + "epoch": 0.8769017980636238, + "grad_norm": 4.177268028259277, + "learning_rate": 4.384508990318119e-05, + "log_odds_chosen": 0.8822661638259888, + "log_odds_ratio": -0.5075497627258301, + "logits/chosen": -0.3998650908470154, + "logits/rejected": -0.40977737307548523, + "logps/chosen": -0.173186793923378, + "logps/rejected": -0.38849419355392456, + "loss": 3.9601, + "nll_loss": 0.9392576217651367, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01731867901980877, + "rewards/margins": 0.021530739963054657, + "rewards/rejected": -0.03884941712021828, + "step": 1268 + }, + { + "epoch": 0.8775933609958506, + "grad_norm": 2.707632303237915, + "learning_rate": 4.387966804979253e-05, + "log_odds_chosen": 1.4121384620666504, + "log_odds_ratio": -0.3443738520145416, + "logits/chosen": -0.6001982092857361, + "logits/rejected": -0.606619119644165, + "logps/chosen": -0.09227811545133591, + "logps/rejected": -0.4715130925178528, + "loss": 3.0712, + "nll_loss": 0.7333630919456482, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009227811358869076, + "rewards/margins": 0.03792349994182587, + "rewards/rejected": -0.04715131223201752, + "step": 1269 + }, + { + "epoch": 0.8782849239280774, + "grad_norm": 5.621554374694824, + "learning_rate": 4.3914246196403874e-05, + "log_odds_chosen": 2.2095069885253906, + "log_odds_ratio": -0.3407396078109741, + "logits/chosen": -0.4042474031448364, + "logits/rejected": -0.461988240480423, + "logps/chosen": -0.0528687983751297, + "logps/rejected": -0.4888181686401367, + "loss": 4.8491, + "nll_loss": 1.1781988143920898, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.005286880303174257, + "rewards/margins": 0.04359494149684906, + "rewards/rejected": -0.04888181760907173, + "step": 1270 + }, + { + "epoch": 0.8789764868603043, + "grad_norm": 4.035280704498291, + "learning_rate": 4.3948824343015215e-05, + "log_odds_chosen": 1.842458963394165, + "log_odds_ratio": -0.353007972240448, + "logits/chosen": -0.1685333549976349, + "logits/rejected": -0.2191891223192215, + "logps/chosen": -0.09284445643424988, + "logps/rejected": -0.5506631135940552, + "loss": 3.2703, + "nll_loss": 0.782278835773468, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009284445084631443, + "rewards/margins": 0.04578186571598053, + "rewards/rejected": -0.0550663098692894, + "step": 1271 + }, + { + "epoch": 0.8796680497925311, + "grad_norm": 6.146232604980469, + "learning_rate": 4.398340248962656e-05, + "log_odds_chosen": 2.9054505825042725, + "log_odds_ratio": -0.2413417100906372, + "logits/chosen": -0.11257967352867126, + "logits/rejected": -0.18791714310646057, + "logps/chosen": -0.0745718702673912, + "logps/rejected": -0.5622603893280029, + "loss": 5.5758, + "nll_loss": 1.3698159456253052, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007457186467945576, + "rewards/margins": 0.048768848180770874, + "rewards/rejected": -0.056226037442684174, + "step": 1272 + }, + { + "epoch": 0.8803596127247579, + "grad_norm": 4.25452184677124, + "learning_rate": 4.40179806362379e-05, + "log_odds_chosen": 3.095830202102661, + "log_odds_ratio": -0.48948919773101807, + "logits/chosen": -0.1420711725950241, + "logits/rejected": -0.12481048703193665, + "logps/chosen": -0.09185618162155151, + "logps/rejected": -0.44011950492858887, + "loss": 3.1873, + "nll_loss": 0.7478775382041931, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.009185617789626122, + "rewards/margins": 0.034826330840587616, + "rewards/rejected": -0.04401195049285889, + "step": 1273 + }, + { + "epoch": 0.8810511756569848, + "grad_norm": 2.990342855453491, + "learning_rate": 4.405255878284924e-05, + "log_odds_chosen": 4.47576904296875, + "log_odds_ratio": -0.22933019697666168, + "logits/chosen": -0.43872299790382385, + "logits/rejected": -0.47745269536972046, + "logps/chosen": -0.11944451928138733, + "logps/rejected": -0.690141499042511, + "loss": 2.723, + "nll_loss": 0.6578062772750854, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011944452300667763, + "rewards/margins": 0.057069696485996246, + "rewards/rejected": -0.06901414692401886, + "step": 1274 + }, + { + "epoch": 0.8817427385892116, + "grad_norm": 4.838727951049805, + "learning_rate": 4.408713692946058e-05, + "log_odds_chosen": 0.7123834490776062, + "log_odds_ratio": -0.63567054271698, + "logits/chosen": -0.774634599685669, + "logits/rejected": -0.7992992401123047, + "logps/chosen": -0.16042813658714294, + "logps/rejected": -0.3580513894557953, + "loss": 4.8929, + "nll_loss": 1.1596554517745972, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.016042813658714294, + "rewards/margins": 0.019762322306632996, + "rewards/rejected": -0.03580513596534729, + "step": 1275 + }, + { + "epoch": 0.8824343015214384, + "grad_norm": 3.9668753147125244, + "learning_rate": 4.412171507607192e-05, + "log_odds_chosen": 4.125424385070801, + "log_odds_ratio": -0.0830477774143219, + "logits/chosen": -0.3952116370201111, + "logits/rejected": -0.45113319158554077, + "logps/chosen": -0.04570968449115753, + "logps/rejected": -0.5450409054756165, + "loss": 3.504, + "nll_loss": 0.8676958084106445, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004570968449115753, + "rewards/margins": 0.04993312433362007, + "rewards/rejected": -0.054504092782735825, + "step": 1276 + }, + { + "epoch": 0.8831258644536653, + "grad_norm": 3.731466770172119, + "learning_rate": 4.4156293222683265e-05, + "log_odds_chosen": 1.79964280128479, + "log_odds_ratio": -0.48185083270072937, + "logits/chosen": -0.6289352774620056, + "logits/rejected": -0.6494641304016113, + "logps/chosen": -0.13553780317306519, + "logps/rejected": -0.45093098282814026, + "loss": 3.3266, + "nll_loss": 0.7834726572036743, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.013553779572248459, + "rewards/margins": 0.031539320945739746, + "rewards/rejected": -0.045093100517988205, + "step": 1277 + }, + { + "epoch": 0.8838174273858921, + "grad_norm": 4.510237216949463, + "learning_rate": 4.4190871369294606e-05, + "log_odds_chosen": 1.7436089515686035, + "log_odds_ratio": -0.4460057020187378, + "logits/chosen": 0.017364047467708588, + "logits/rejected": 0.0052915215492248535, + "logps/chosen": -0.13879433274269104, + "logps/rejected": -0.4545135200023651, + "loss": 3.6902, + "nll_loss": 0.877946138381958, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013879433274269104, + "rewards/margins": 0.031571924686431885, + "rewards/rejected": -0.04545135423541069, + "step": 1278 + }, + { + "epoch": 0.8845089903181189, + "grad_norm": 3.648057222366333, + "learning_rate": 4.422544951590595e-05, + "log_odds_chosen": 3.3090474605560303, + "log_odds_ratio": -0.20653875172138214, + "logits/chosen": -0.4803401529788971, + "logits/rejected": -0.516859769821167, + "logps/chosen": -0.08723768591880798, + "logps/rejected": -0.630642294883728, + "loss": 3.0028, + "nll_loss": 0.7300387620925903, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008723769336938858, + "rewards/margins": 0.054340463131666183, + "rewards/rejected": -0.06306423246860504, + "step": 1279 + }, + { + "epoch": 0.8852005532503457, + "grad_norm": 4.398556232452393, + "learning_rate": 4.426002766251729e-05, + "log_odds_chosen": 1.5525989532470703, + "log_odds_ratio": -0.5788559317588806, + "logits/chosen": -0.4554845690727234, + "logits/rejected": -0.44896024465560913, + "logps/chosen": -0.17942111194133759, + "logps/rejected": -0.49931180477142334, + "loss": 3.7082, + "nll_loss": 0.8691673874855042, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.017942111939191818, + "rewards/margins": 0.031989071518182755, + "rewards/rejected": -0.04993118345737457, + "step": 1280 + }, + { + "epoch": 0.8858921161825726, + "grad_norm": 5.154435634613037, + "learning_rate": 4.429460580912863e-05, + "log_odds_chosen": 1.6350841522216797, + "log_odds_ratio": -0.35474780201911926, + "logits/chosen": -0.5933663249015808, + "logits/rejected": -0.6430088877677917, + "logps/chosen": -0.11515937745571136, + "logps/rejected": -0.4523059129714966, + "loss": 4.1953, + "nll_loss": 1.0133419036865234, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011515937745571136, + "rewards/margins": 0.0337146520614624, + "rewards/rejected": -0.04523058980703354, + "step": 1281 + }, + { + "epoch": 0.8865836791147994, + "grad_norm": 4.977684020996094, + "learning_rate": 4.432918395573997e-05, + "log_odds_chosen": 5.273676872253418, + "log_odds_ratio": -0.21022650599479675, + "logits/chosen": -0.4650052487850189, + "logits/rejected": -0.5259711742401123, + "logps/chosen": -0.09879347681999207, + "logps/rejected": -0.7697173357009888, + "loss": 3.1604, + "nll_loss": 0.7690898180007935, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009879347868263721, + "rewards/margins": 0.06709238886833191, + "rewards/rejected": -0.07697173207998276, + "step": 1282 + }, + { + "epoch": 0.8872752420470262, + "grad_norm": 3.8348000049591064, + "learning_rate": 4.4363762102351314e-05, + "log_odds_chosen": 4.379862308502197, + "log_odds_ratio": -0.35318854451179504, + "logits/chosen": -0.4663954973220825, + "logits/rejected": -0.5029880404472351, + "logps/chosen": -0.07573398947715759, + "logps/rejected": -0.7639623880386353, + "loss": 2.9837, + "nll_loss": 0.7106069922447205, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007573399692773819, + "rewards/margins": 0.06882283836603165, + "rewards/rejected": -0.07639623433351517, + "step": 1283 + }, + { + "epoch": 0.8879668049792531, + "grad_norm": 5.035210609436035, + "learning_rate": 4.4398340248962656e-05, + "log_odds_chosen": 1.4314640760421753, + "log_odds_ratio": -0.4312036335468292, + "logits/chosen": -0.406145840883255, + "logits/rejected": -0.36703863739967346, + "logps/chosen": -0.12340890616178513, + "logps/rejected": -0.3170912265777588, + "loss": 4.0451, + "nll_loss": 0.9681638479232788, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012340890243649483, + "rewards/margins": 0.019368231296539307, + "rewards/rejected": -0.03170912340283394, + "step": 1284 + }, + { + "epoch": 0.8886583679114799, + "grad_norm": 5.095395088195801, + "learning_rate": 4.4432918395574e-05, + "log_odds_chosen": 0.9760552048683167, + "log_odds_ratio": -0.7666757702827454, + "logits/chosen": -0.4045482277870178, + "logits/rejected": -0.4359784722328186, + "logps/chosen": -0.3768349587917328, + "logps/rejected": -0.4175845980644226, + "loss": 4.5482, + "nll_loss": 1.060389518737793, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.03768349438905716, + "rewards/margins": 0.00407496839761734, + "rewards/rejected": -0.0417584627866745, + "step": 1285 + }, + { + "epoch": 0.8893499308437067, + "grad_norm": 2.2948174476623535, + "learning_rate": 4.446749654218534e-05, + "log_odds_chosen": 3.8000855445861816, + "log_odds_ratio": -0.1091739609837532, + "logits/chosen": -0.6121377944946289, + "logits/rejected": -0.568754255771637, + "logps/chosen": -0.044313665479421616, + "logps/rejected": -0.5707491636276245, + "loss": 2.6511, + "nll_loss": 0.6518504619598389, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004431366454809904, + "rewards/margins": 0.0526435561478138, + "rewards/rejected": -0.05707491934299469, + "step": 1286 + }, + { + "epoch": 0.8900414937759336, + "grad_norm": 4.04536247253418, + "learning_rate": 4.450207468879668e-05, + "log_odds_chosen": 3.1700429916381836, + "log_odds_ratio": -0.4066445827484131, + "logits/chosen": -0.6255698204040527, + "logits/rejected": -0.6513446569442749, + "logps/chosen": -0.11673114448785782, + "logps/rejected": -0.700387716293335, + "loss": 3.6983, + "nll_loss": 0.8839007616043091, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011673114262521267, + "rewards/margins": 0.058365657925605774, + "rewards/rejected": -0.07003877311944962, + "step": 1287 + }, + { + "epoch": 0.8907330567081605, + "grad_norm": 7.219163417816162, + "learning_rate": 4.453665283540803e-05, + "log_odds_chosen": 3.674023151397705, + "log_odds_ratio": -0.47398021817207336, + "logits/chosen": -0.39232513308525085, + "logits/rejected": -0.41735202074050903, + "logps/chosen": -0.10538452863693237, + "logps/rejected": -0.6500795483589172, + "loss": 4.1308, + "nll_loss": 0.9852948188781738, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010538453236222267, + "rewards/margins": 0.054469503462314606, + "rewards/rejected": -0.06500795483589172, + "step": 1288 + }, + { + "epoch": 0.8914246196403873, + "grad_norm": 4.936799049377441, + "learning_rate": 4.457123098201937e-05, + "log_odds_chosen": 1.5761115550994873, + "log_odds_ratio": -0.6042423248291016, + "logits/chosen": -0.5079135894775391, + "logits/rejected": -0.5012636780738831, + "logps/chosen": -0.20412951707839966, + "logps/rejected": -0.3674103915691376, + "loss": 3.8832, + "nll_loss": 0.9103636741638184, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.020412951707839966, + "rewards/margins": 0.016328085213899612, + "rewards/rejected": -0.03674103692173958, + "step": 1289 + }, + { + "epoch": 0.8921161825726142, + "grad_norm": 3.823768377304077, + "learning_rate": 4.460580912863071e-05, + "log_odds_chosen": 6.826704502105713, + "log_odds_ratio": -0.05582105368375778, + "logits/chosen": -0.3447270691394806, + "logits/rejected": -0.4047142565250397, + "logps/chosen": -0.023953121155500412, + "logps/rejected": -0.9755678772926331, + "loss": 3.1965, + "nll_loss": 0.793531060218811, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002395312301814556, + "rewards/margins": 0.09516146779060364, + "rewards/rejected": -0.09755679219961166, + "step": 1290 + }, + { + "epoch": 0.892807745504841, + "grad_norm": 5.092657566070557, + "learning_rate": 4.464038727524205e-05, + "log_odds_chosen": 2.33146071434021, + "log_odds_ratio": -0.4566318988800049, + "logits/chosen": -0.6432561278343201, + "logits/rejected": -0.6718426942825317, + "logps/chosen": -0.12250132858753204, + "logps/rejected": -0.5979148149490356, + "loss": 4.1411, + "nll_loss": 0.9896198511123657, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012250132858753204, + "rewards/margins": 0.04754134267568588, + "rewards/rejected": -0.059791479259729385, + "step": 1291 + }, + { + "epoch": 0.8934993084370678, + "grad_norm": 4.353096008300781, + "learning_rate": 4.4674965421853395e-05, + "log_odds_chosen": 4.498590469360352, + "log_odds_ratio": -0.15228214859962463, + "logits/chosen": -0.6290316581726074, + "logits/rejected": -0.6855980157852173, + "logps/chosen": -0.06508232653141022, + "logps/rejected": -0.8101258277893066, + "loss": 3.72, + "nll_loss": 0.914771318435669, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006508233025670052, + "rewards/margins": 0.07450436055660248, + "rewards/rejected": -0.08101259171962738, + "step": 1292 + }, + { + "epoch": 0.8941908713692946, + "grad_norm": 3.421708583831787, + "learning_rate": 4.4709543568464736e-05, + "log_odds_chosen": 4.2662034034729, + "log_odds_ratio": -0.1641775369644165, + "logits/chosen": -0.7522150278091431, + "logits/rejected": -0.8374617695808411, + "logps/chosen": -0.08146195113658905, + "logps/rejected": -0.6217236518859863, + "loss": 3.0554, + "nll_loss": 0.7474253177642822, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008146194741129875, + "rewards/margins": 0.05402617156505585, + "rewards/rejected": -0.06217236444354057, + "step": 1293 + }, + { + "epoch": 0.8948824343015215, + "grad_norm": 5.032227039337158, + "learning_rate": 4.474412171507608e-05, + "log_odds_chosen": 2.9761674404144287, + "log_odds_ratio": -0.3079957664012909, + "logits/chosen": -0.37861043214797974, + "logits/rejected": -0.4537746012210846, + "logps/chosen": -0.12657591700553894, + "logps/rejected": -0.6328597068786621, + "loss": 3.3381, + "nll_loss": 0.8037294149398804, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01265759114176035, + "rewards/margins": 0.050628382712602615, + "rewards/rejected": -0.06328596919775009, + "step": 1294 + }, + { + "epoch": 0.8955739972337483, + "grad_norm": 3.9334354400634766, + "learning_rate": 4.477869986168742e-05, + "log_odds_chosen": 2.1492156982421875, + "log_odds_ratio": -0.38227516412734985, + "logits/chosen": -0.7759724259376526, + "logits/rejected": -0.8038296699523926, + "logps/chosen": -0.10162755846977234, + "logps/rejected": -0.4932019114494324, + "loss": 4.4286, + "nll_loss": 1.0689126253128052, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010162755846977234, + "rewards/margins": 0.039157435297966, + "rewards/rejected": -0.04932019114494324, + "step": 1295 + }, + { + "epoch": 0.8962655601659751, + "grad_norm": 4.790040016174316, + "learning_rate": 4.481327800829876e-05, + "log_odds_chosen": 3.400041341781616, + "log_odds_ratio": -0.3399104177951813, + "logits/chosen": -0.45178931951522827, + "logits/rejected": -0.46649065613746643, + "logps/chosen": -0.08820350468158722, + "logps/rejected": -0.8288697600364685, + "loss": 3.4904, + "nll_loss": 0.83861243724823, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008820349350571632, + "rewards/margins": 0.07406662404537201, + "rewards/rejected": -0.08288698643445969, + "step": 1296 + }, + { + "epoch": 0.896957123098202, + "grad_norm": 4.286818981170654, + "learning_rate": 4.48478561549101e-05, + "log_odds_chosen": 3.6408259868621826, + "log_odds_ratio": -0.3869631886482239, + "logits/chosen": -0.10497380793094635, + "logits/rejected": -0.15320220589637756, + "logps/chosen": -0.08386063575744629, + "logps/rejected": -0.34298986196517944, + "loss": 3.1143, + "nll_loss": 0.7398838996887207, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.00838606245815754, + "rewards/margins": 0.025912927463650703, + "rewards/rejected": -0.034298986196517944, + "step": 1297 + }, + { + "epoch": 0.8976486860304288, + "grad_norm": 3.562063694000244, + "learning_rate": 4.4882434301521444e-05, + "log_odds_chosen": 5.407161712646484, + "log_odds_ratio": -0.23755621910095215, + "logits/chosen": -0.35799068212509155, + "logits/rejected": -0.3617667555809021, + "logps/chosen": -0.10267575085163116, + "logps/rejected": -0.901630163192749, + "loss": 2.9682, + "nll_loss": 0.718295693397522, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010267574340105057, + "rewards/margins": 0.07989544421434402, + "rewards/rejected": -0.09016300737857819, + "step": 1298 + }, + { + "epoch": 0.8983402489626556, + "grad_norm": 5.247976779937744, + "learning_rate": 4.4917012448132786e-05, + "log_odds_chosen": 2.938415288925171, + "log_odds_ratio": -0.2645170986652374, + "logits/chosen": -0.678473711013794, + "logits/rejected": -0.7426837682723999, + "logps/chosen": -0.15398818254470825, + "logps/rejected": -0.6524811387062073, + "loss": 4.3283, + "nll_loss": 1.0556238889694214, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01539881806820631, + "rewards/margins": 0.04984929785132408, + "rewards/rejected": -0.06524811685085297, + "step": 1299 + }, + { + "epoch": 0.8990318118948825, + "grad_norm": 4.963733196258545, + "learning_rate": 4.495159059474413e-05, + "log_odds_chosen": 4.816115856170654, + "log_odds_ratio": -0.1597924530506134, + "logits/chosen": -0.5552168488502502, + "logits/rejected": -0.6095180511474609, + "logps/chosen": -0.03244926035404205, + "logps/rejected": -0.570087194442749, + "loss": 2.9926, + "nll_loss": 0.7321832180023193, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0032449259888380766, + "rewards/margins": 0.053763799369335175, + "rewards/rejected": -0.05700872093439102, + "step": 1300 + }, + { + "epoch": 0.8997233748271093, + "grad_norm": 3.502925157546997, + "learning_rate": 4.498616874135547e-05, + "log_odds_chosen": 5.5950822830200195, + "log_odds_ratio": -0.08565986901521683, + "logits/chosen": -0.6689096689224243, + "logits/rejected": -0.7246332764625549, + "logps/chosen": -0.06416517496109009, + "logps/rejected": -0.8265728950500488, + "loss": 2.5299, + "nll_loss": 0.6238970756530762, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0064165182411670685, + "rewards/margins": 0.07624077051877975, + "rewards/rejected": -0.08265729248523712, + "step": 1301 + }, + { + "epoch": 0.9004149377593361, + "grad_norm": 6.148613929748535, + "learning_rate": 4.502074688796681e-05, + "log_odds_chosen": 1.8418501615524292, + "log_odds_ratio": -0.475241482257843, + "logits/chosen": -0.6767906546592712, + "logits/rejected": -0.7121725678443909, + "logps/chosen": -0.11665619909763336, + "logps/rejected": -0.456548810005188, + "loss": 4.6573, + "nll_loss": 1.11681067943573, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01166562084108591, + "rewards/margins": 0.03398926556110382, + "rewards/rejected": -0.04565488174557686, + "step": 1302 + }, + { + "epoch": 0.901106500691563, + "grad_norm": 4.376565456390381, + "learning_rate": 4.505532503457815e-05, + "log_odds_chosen": 2.30222225189209, + "log_odds_ratio": -0.2232993245124817, + "logits/chosen": -0.7871577739715576, + "logits/rejected": -0.7928929328918457, + "logps/chosen": -0.08736582845449448, + "logps/rejected": -0.4544295072555542, + "loss": 3.7713, + "nll_loss": 0.9204996824264526, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008736583404242992, + "rewards/margins": 0.03670636564493179, + "rewards/rejected": -0.04544294998049736, + "step": 1303 + }, + { + "epoch": 0.9017980636237898, + "grad_norm": 4.588575839996338, + "learning_rate": 4.5089903181189494e-05, + "log_odds_chosen": 2.5985970497131348, + "log_odds_ratio": -0.37684494256973267, + "logits/chosen": -0.44517451524734497, + "logits/rejected": -0.4426910877227783, + "logps/chosen": -0.13602550327777863, + "logps/rejected": -0.566986083984375, + "loss": 4.3455, + "nll_loss": 1.0486829280853271, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013602551072835922, + "rewards/margins": 0.04309605434536934, + "rewards/rejected": -0.05669860541820526, + "step": 1304 + }, + { + "epoch": 0.9024896265560166, + "grad_norm": 6.157674312591553, + "learning_rate": 4.5124481327800835e-05, + "log_odds_chosen": 2.6842451095581055, + "log_odds_ratio": -0.5304502248764038, + "logits/chosen": -0.574425995349884, + "logits/rejected": -0.6412628889083862, + "logps/chosen": -0.10813743621110916, + "logps/rejected": -0.5658566951751709, + "loss": 4.5154, + "nll_loss": 1.0758013725280762, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010813743807375431, + "rewards/margins": 0.04577192664146423, + "rewards/rejected": -0.05658566579222679, + "step": 1305 + }, + { + "epoch": 0.9031811894882434, + "grad_norm": 4.149651050567627, + "learning_rate": 4.515905947441218e-05, + "log_odds_chosen": 2.3158023357391357, + "log_odds_ratio": -0.18550823628902435, + "logits/chosen": -0.5790281295776367, + "logits/rejected": -0.6453101634979248, + "logps/chosen": -0.11300928145647049, + "logps/rejected": -0.7054611444473267, + "loss": 3.7652, + "nll_loss": 0.9227438569068909, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011300928890705109, + "rewards/margins": 0.059245187789201736, + "rewards/rejected": -0.07054612040519714, + "step": 1306 + }, + { + "epoch": 0.9038727524204703, + "grad_norm": 4.822201728820801, + "learning_rate": 4.519363762102352e-05, + "log_odds_chosen": 4.911643028259277, + "log_odds_ratio": -0.1145109161734581, + "logits/chosen": -0.47414761781692505, + "logits/rejected": -0.4934576451778412, + "logps/chosen": -0.05742540955543518, + "logps/rejected": -0.8317654132843018, + "loss": 4.7059, + "nll_loss": 1.165034294128418, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005742541514337063, + "rewards/margins": 0.0774339959025383, + "rewards/rejected": -0.08317653834819794, + "step": 1307 + }, + { + "epoch": 0.9045643153526971, + "grad_norm": 35.87089538574219, + "learning_rate": 4.522821576763486e-05, + "log_odds_chosen": 1.5534108877182007, + "log_odds_ratio": -0.5431495904922485, + "logits/chosen": -0.615991473197937, + "logits/rejected": -0.6219574809074402, + "logps/chosen": -0.1171765848994255, + "logps/rejected": -0.3493393659591675, + "loss": 3.5324, + "nll_loss": 0.8287971615791321, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01171765848994255, + "rewards/margins": 0.023216277360916138, + "rewards/rejected": -0.03493393585085869, + "step": 1308 + }, + { + "epoch": 0.9052558782849239, + "grad_norm": 5.447350978851318, + "learning_rate": 4.52627939142462e-05, + "log_odds_chosen": 1.6186761856079102, + "log_odds_ratio": -0.40729713439941406, + "logits/chosen": -0.7507280111312866, + "logits/rejected": -0.7362810373306274, + "logps/chosen": -0.09505996853113174, + "logps/rejected": -0.25165241956710815, + "loss": 4.7086, + "nll_loss": 1.1364130973815918, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00950599741190672, + "rewards/margins": 0.015659242868423462, + "rewards/rejected": -0.025165241211652756, + "step": 1309 + }, + { + "epoch": 0.9059474412171508, + "grad_norm": 7.766451835632324, + "learning_rate": 4.529737206085754e-05, + "log_odds_chosen": 2.389831304550171, + "log_odds_ratio": -0.5831612944602966, + "logits/chosen": -0.44988909363746643, + "logits/rejected": -0.447091281414032, + "logps/chosen": -0.15839967131614685, + "logps/rejected": -0.7640863060951233, + "loss": 3.9613, + "nll_loss": 0.9320147037506104, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.015839967876672745, + "rewards/margins": 0.060568664222955704, + "rewards/rejected": -0.07640863209962845, + "step": 1310 + }, + { + "epoch": 0.9066390041493776, + "grad_norm": 2.5082616806030273, + "learning_rate": 4.5331950207468885e-05, + "log_odds_chosen": 4.344583988189697, + "log_odds_ratio": -0.14952819049358368, + "logits/chosen": -0.5728102326393127, + "logits/rejected": -0.5895368456840515, + "logps/chosen": -0.056445784866809845, + "logps/rejected": -0.4942803978919983, + "loss": 2.454, + "nll_loss": 0.5985533595085144, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0056445784866809845, + "rewards/margins": 0.043783463537693024, + "rewards/rejected": -0.04942803829908371, + "step": 1311 + }, + { + "epoch": 0.9073305670816044, + "grad_norm": 4.3802170753479, + "learning_rate": 4.5366528354080226e-05, + "log_odds_chosen": 3.1736373901367188, + "log_odds_ratio": -0.21422179043293, + "logits/chosen": -0.7820005416870117, + "logits/rejected": -0.8080264925956726, + "logps/chosen": -0.09875577688217163, + "logps/rejected": -0.523554265499115, + "loss": 4.3245, + "nll_loss": 1.0596930980682373, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009875577874481678, + "rewards/margins": 0.042479850351810455, + "rewards/rejected": -0.05235542729496956, + "step": 1312 + }, + { + "epoch": 0.9080221300138313, + "grad_norm": 3.8520901203155518, + "learning_rate": 4.540110650069157e-05, + "log_odds_chosen": 4.601001739501953, + "log_odds_ratio": -0.07977695763111115, + "logits/chosen": -0.6756404638290405, + "logits/rejected": -0.7021632790565491, + "logps/chosen": -0.05860934406518936, + "logps/rejected": -0.9899218082427979, + "loss": 2.8374, + "nll_loss": 0.7013697624206543, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005860934499651194, + "rewards/margins": 0.09313125163316727, + "rewards/rejected": -0.09899218380451202, + "step": 1313 + }, + { + "epoch": 0.9087136929460581, + "grad_norm": 5.126208305358887, + "learning_rate": 4.543568464730291e-05, + "log_odds_chosen": 3.0823912620544434, + "log_odds_ratio": -0.6001900434494019, + "logits/chosen": -0.793992280960083, + "logits/rejected": -0.8149877786636353, + "logps/chosen": -0.15848688781261444, + "logps/rejected": -0.7364607453346252, + "loss": 3.3215, + "nll_loss": 0.7703518867492676, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.015848688781261444, + "rewards/margins": 0.0577973872423172, + "rewards/rejected": -0.07364607602357864, + "step": 1314 + }, + { + "epoch": 0.9094052558782849, + "grad_norm": 5.542551517486572, + "learning_rate": 4.547026279391425e-05, + "log_odds_chosen": 2.469801902770996, + "log_odds_ratio": -0.21200266480445862, + "logits/chosen": -0.4572201669216156, + "logits/rejected": -0.5229408144950867, + "logps/chosen": -0.08554461598396301, + "logps/rejected": -0.7156955599784851, + "loss": 5.0535, + "nll_loss": 1.2421646118164062, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008554462343454361, + "rewards/margins": 0.06301509588956833, + "rewards/rejected": -0.07156955450773239, + "step": 1315 + }, + { + "epoch": 0.9100968188105117, + "grad_norm": 5.043685436248779, + "learning_rate": 4.550484094052559e-05, + "log_odds_chosen": 1.3790621757507324, + "log_odds_ratio": -0.7095733880996704, + "logits/chosen": -0.579319417476654, + "logits/rejected": -0.5887928605079651, + "logps/chosen": -0.18025648593902588, + "logps/rejected": -0.34477776288986206, + "loss": 4.1403, + "nll_loss": 0.96412593126297, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.018025647848844528, + "rewards/margins": 0.016452128067612648, + "rewards/rejected": -0.034477777779102325, + "step": 1316 + }, + { + "epoch": 0.9107883817427386, + "grad_norm": 4.892166614532471, + "learning_rate": 4.5539419087136934e-05, + "log_odds_chosen": 3.1222496032714844, + "log_odds_ratio": -0.26753729581832886, + "logits/chosen": -0.7339975237846375, + "logits/rejected": -0.7486129999160767, + "logps/chosen": -0.09540008753538132, + "logps/rejected": -0.5913593769073486, + "loss": 4.3887, + "nll_loss": 1.070409893989563, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009540008381009102, + "rewards/margins": 0.04959592968225479, + "rewards/rejected": -0.059135936200618744, + "step": 1317 + }, + { + "epoch": 0.9114799446749654, + "grad_norm": 3.516747236251831, + "learning_rate": 4.5573997233748275e-05, + "log_odds_chosen": 6.2766432762146, + "log_odds_ratio": -0.05572151765227318, + "logits/chosen": -0.5407491326332092, + "logits/rejected": -0.5907287001609802, + "logps/chosen": -0.05054568871855736, + "logps/rejected": -1.1098434925079346, + "loss": 3.5414, + "nll_loss": 0.8797795176506042, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0050545684061944485, + "rewards/margins": 0.10592978447675705, + "rewards/rejected": -0.11098435521125793, + "step": 1318 + }, + { + "epoch": 0.9121715076071922, + "grad_norm": 5.598543643951416, + "learning_rate": 4.560857538035962e-05, + "log_odds_chosen": 3.204986572265625, + "log_odds_ratio": -0.42890167236328125, + "logits/chosen": -0.6990206837654114, + "logits/rejected": -0.7492403984069824, + "logps/chosen": -0.10166987776756287, + "logps/rejected": -0.813813328742981, + "loss": 4.8704, + "nll_loss": 1.1747119426727295, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010166987776756287, + "rewards/margins": 0.07121434807777405, + "rewards/rejected": -0.08138133585453033, + "step": 1319 + }, + { + "epoch": 0.9128630705394191, + "grad_norm": 3.9839718341827393, + "learning_rate": 4.564315352697096e-05, + "log_odds_chosen": 1.6930179595947266, + "log_odds_ratio": -0.44334709644317627, + "logits/chosen": -0.43449825048446655, + "logits/rejected": -0.41399112343788147, + "logps/chosen": -0.1631818413734436, + "logps/rejected": -0.24806104600429535, + "loss": 2.9659, + "nll_loss": 0.6971408724784851, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01631818525493145, + "rewards/margins": 0.00848792027682066, + "rewards/rejected": -0.024806104600429535, + "step": 1320 + }, + { + "epoch": 0.9135546334716459, + "grad_norm": 4.427506446838379, + "learning_rate": 4.56777316735823e-05, + "log_odds_chosen": 3.5876333713531494, + "log_odds_ratio": -0.33876973390579224, + "logits/chosen": -0.6090391874313354, + "logits/rejected": -0.6520763635635376, + "logps/chosen": -0.1511664092540741, + "logps/rejected": -0.7406303286552429, + "loss": 3.6463, + "nll_loss": 0.8776899576187134, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01511664129793644, + "rewards/margins": 0.0589463897049427, + "rewards/rejected": -0.07406303286552429, + "step": 1321 + }, + { + "epoch": 0.9142461964038727, + "grad_norm": 7.1556291580200195, + "learning_rate": 4.571230982019364e-05, + "log_odds_chosen": 0.20019125938415527, + "log_odds_ratio": -0.9610413908958435, + "logits/chosen": -0.6211057305335999, + "logits/rejected": -0.6151083111763, + "logps/chosen": -0.19979843497276306, + "logps/rejected": -0.21615344285964966, + "loss": 3.8671, + "nll_loss": 0.870674192905426, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.019979843869805336, + "rewards/margins": 0.0016355020925402641, + "rewards/rejected": -0.021615345031023026, + "step": 1322 + }, + { + "epoch": 0.9149377593360996, + "grad_norm": 5.3502197265625, + "learning_rate": 4.5746887966804977e-05, + "log_odds_chosen": 3.4048633575439453, + "log_odds_ratio": -0.4762808084487915, + "logits/chosen": -0.82079017162323, + "logits/rejected": -0.8388082981109619, + "logps/chosen": -0.13025324046611786, + "logps/rejected": -0.6816924214363098, + "loss": 4.202, + "nll_loss": 1.0028724670410156, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.013025323860347271, + "rewards/margins": 0.055143918842077255, + "rewards/rejected": -0.0681692436337471, + "step": 1323 + }, + { + "epoch": 0.9156293222683264, + "grad_norm": 4.137228488922119, + "learning_rate": 4.578146611341632e-05, + "log_odds_chosen": 4.749207496643066, + "log_odds_ratio": -0.20173031091690063, + "logits/chosen": -0.842710018157959, + "logits/rejected": -0.8378841280937195, + "logps/chosen": -0.06035904958844185, + "logps/rejected": -0.8911387920379639, + "loss": 4.0766, + "nll_loss": 0.9989691972732544, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006035904865711927, + "rewards/margins": 0.08307798206806183, + "rewards/rejected": -0.08911389112472534, + "step": 1324 + }, + { + "epoch": 0.9163208852005532, + "grad_norm": 4.485111713409424, + "learning_rate": 4.581604426002766e-05, + "log_odds_chosen": 2.276975154876709, + "log_odds_ratio": -0.1457901895046234, + "logits/chosen": -0.38137882947921753, + "logits/rejected": -0.37996378540992737, + "logps/chosen": -0.11912259459495544, + "logps/rejected": -0.6932668089866638, + "loss": 3.2816, + "nll_loss": 0.8058204650878906, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011912260204553604, + "rewards/margins": 0.057414423674345016, + "rewards/rejected": -0.06932668387889862, + "step": 1325 + }, + { + "epoch": 0.91701244813278, + "grad_norm": 6.287432670593262, + "learning_rate": 4.5850622406639e-05, + "log_odds_chosen": 2.9762678146362305, + "log_odds_ratio": -0.40890878438949585, + "logits/chosen": -0.8684566617012024, + "logits/rejected": -0.8920563459396362, + "logps/chosen": -0.12412737309932709, + "logps/rejected": -0.7340701818466187, + "loss": 4.3594, + "nll_loss": 1.0489689111709595, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012412738054990768, + "rewards/margins": 0.06099428981542587, + "rewards/rejected": -0.07340703159570694, + "step": 1326 + }, + { + "epoch": 0.9177040110650069, + "grad_norm": 3.8076276779174805, + "learning_rate": 4.588520055325034e-05, + "log_odds_chosen": 1.818742275238037, + "log_odds_ratio": -0.36743277311325073, + "logits/chosen": -0.750927209854126, + "logits/rejected": -0.7433460354804993, + "logps/chosen": -0.1195850744843483, + "logps/rejected": -0.4962850511074066, + "loss": 2.9064, + "nll_loss": 0.6898566484451294, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01195850782096386, + "rewards/margins": 0.03766999766230583, + "rewards/rejected": -0.04962850734591484, + "step": 1327 + }, + { + "epoch": 0.9183955739972337, + "grad_norm": 6.428443431854248, + "learning_rate": 4.5919778699861684e-05, + "log_odds_chosen": 0.8354544639587402, + "log_odds_ratio": -0.48252594470977783, + "logits/chosen": -0.9051334261894226, + "logits/rejected": -0.906627893447876, + "logps/chosen": -0.10690341144800186, + "logps/rejected": -0.27170899510383606, + "loss": 4.5899, + "nll_loss": 1.0992116928100586, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010690340772271156, + "rewards/margins": 0.01648055762052536, + "rewards/rejected": -0.027170900255441666, + "step": 1328 + }, + { + "epoch": 0.9190871369294605, + "grad_norm": 6.3958353996276855, + "learning_rate": 4.5954356846473026e-05, + "log_odds_chosen": 1.6040034294128418, + "log_odds_ratio": -0.26059266924858093, + "logits/chosen": -0.4876313805580139, + "logits/rejected": -0.5052316188812256, + "logps/chosen": -0.09867852926254272, + "logps/rejected": -0.5049740076065063, + "loss": 5.1261, + "nll_loss": 1.2554727792739868, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009867853485047817, + "rewards/margins": 0.0406295470893383, + "rewards/rejected": -0.050497397780418396, + "step": 1329 + }, + { + "epoch": 0.9197786998616874, + "grad_norm": 3.9454269409179688, + "learning_rate": 4.598893499308437e-05, + "log_odds_chosen": 3.4766697883605957, + "log_odds_ratio": -0.3413624167442322, + "logits/chosen": -0.6844198107719421, + "logits/rejected": -0.6933927536010742, + "logps/chosen": -0.10315560549497604, + "logps/rejected": -0.6214383840560913, + "loss": 3.0776, + "nll_loss": 0.7352601885795593, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010315561667084694, + "rewards/margins": 0.05182827636599541, + "rewards/rejected": -0.06214383617043495, + "step": 1330 + }, + { + "epoch": 0.9204702627939142, + "grad_norm": 6.752826690673828, + "learning_rate": 4.602351313969571e-05, + "log_odds_chosen": 2.6723358631134033, + "log_odds_ratio": -0.40230464935302734, + "logits/chosen": -0.5666855573654175, + "logits/rejected": -0.6329537630081177, + "logps/chosen": -0.11216460913419724, + "logps/rejected": -0.4415185749530792, + "loss": 4.4679, + "nll_loss": 1.076737642288208, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011216461658477783, + "rewards/margins": 0.03293539956212044, + "rewards/rejected": -0.04415185749530792, + "step": 1331 + }, + { + "epoch": 0.921161825726141, + "grad_norm": 3.5908589363098145, + "learning_rate": 4.605809128630705e-05, + "log_odds_chosen": 3.980630874633789, + "log_odds_ratio": -0.30837661027908325, + "logits/chosen": -0.786279559135437, + "logits/rejected": -0.8102754354476929, + "logps/chosen": -0.09069174528121948, + "logps/rejected": -0.7721251249313354, + "loss": 3.2405, + "nll_loss": 0.7792943120002747, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009069174528121948, + "rewards/margins": 0.06814335286617279, + "rewards/rejected": -0.07721251249313354, + "step": 1332 + }, + { + "epoch": 0.9218533886583679, + "grad_norm": 5.429046630859375, + "learning_rate": 4.609266943291839e-05, + "log_odds_chosen": 4.113162517547607, + "log_odds_ratio": -0.20421762764453888, + "logits/chosen": -0.6347546577453613, + "logits/rejected": -0.6887112259864807, + "logps/chosen": -0.09734141826629639, + "logps/rejected": -0.7304251790046692, + "loss": 4.5229, + "nll_loss": 1.1103060245513916, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009734141640365124, + "rewards/margins": 0.06330837309360504, + "rewards/rejected": -0.07304251939058304, + "step": 1333 + }, + { + "epoch": 0.9225449515905948, + "grad_norm": 3.887255907058716, + "learning_rate": 4.612724757952974e-05, + "log_odds_chosen": 4.6803507804870605, + "log_odds_ratio": -0.11616200953722, + "logits/chosen": -0.1559465080499649, + "logits/rejected": -0.15856684744358063, + "logps/chosen": -0.04484165087342262, + "logps/rejected": -0.7281444072723389, + "loss": 2.6469, + "nll_loss": 0.6501127481460571, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004484164994210005, + "rewards/margins": 0.06833028048276901, + "rewards/rejected": -0.07281444221735, + "step": 1334 + }, + { + "epoch": 0.9232365145228216, + "grad_norm": 3.4572935104370117, + "learning_rate": 4.616182572614108e-05, + "log_odds_chosen": 3.0293996334075928, + "log_odds_ratio": -0.17892718315124512, + "logits/chosen": -0.6547147035598755, + "logits/rejected": -0.7216066718101501, + "logps/chosen": -0.075624480843544, + "logps/rejected": -0.41122573614120483, + "loss": 2.958, + "nll_loss": 0.7216038703918457, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0075624482706189156, + "rewards/margins": 0.0335601270198822, + "rewards/rejected": -0.04112257435917854, + "step": 1335 + }, + { + "epoch": 0.9239280774550485, + "grad_norm": 3.7187869548797607, + "learning_rate": 4.6196403872752424e-05, + "log_odds_chosen": 3.6043601036071777, + "log_odds_ratio": -0.4560887813568115, + "logits/chosen": -0.4577183723449707, + "logits/rejected": -0.41571831703186035, + "logps/chosen": -0.0708552822470665, + "logps/rejected": -0.3021210730075836, + "loss": 2.6082, + "nll_loss": 0.6064510345458984, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.00708552822470665, + "rewards/margins": 0.02312657982110977, + "rewards/rejected": -0.03021210804581642, + "step": 1336 + }, + { + "epoch": 0.9246196403872753, + "grad_norm": 6.289156913757324, + "learning_rate": 4.6230982019363765e-05, + "log_odds_chosen": 1.629589319229126, + "log_odds_ratio": -0.3251434564590454, + "logits/chosen": -0.7396148443222046, + "logits/rejected": -0.799910843372345, + "logps/chosen": -0.094536691904068, + "logps/rejected": -0.34461310505867004, + "loss": 4.7423, + "nll_loss": 1.153063178062439, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0094536691904068, + "rewards/margins": 0.025007642805576324, + "rewards/rejected": -0.034461311995983124, + "step": 1337 + }, + { + "epoch": 0.9253112033195021, + "grad_norm": 5.073808193206787, + "learning_rate": 4.626556016597511e-05, + "log_odds_chosen": 2.9201889038085938, + "log_odds_ratio": -0.35377949476242065, + "logits/chosen": -0.5850831270217896, + "logits/rejected": -0.5737963914871216, + "logps/chosen": -0.1539689302444458, + "logps/rejected": -0.5299727916717529, + "loss": 4.3157, + "nll_loss": 1.043558120727539, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01539689302444458, + "rewards/margins": 0.03760039061307907, + "rewards/rejected": -0.05299727991223335, + "step": 1338 + }, + { + "epoch": 0.926002766251729, + "grad_norm": 5.053715705871582, + "learning_rate": 4.630013831258645e-05, + "log_odds_chosen": 3.1605682373046875, + "log_odds_ratio": -0.39162132143974304, + "logits/chosen": -0.7348707318305969, + "logits/rejected": -0.7419517636299133, + "logps/chosen": -0.18457387387752533, + "logps/rejected": -0.8330814242362976, + "loss": 4.2734, + "nll_loss": 1.0291779041290283, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.018457388505339622, + "rewards/margins": 0.06485076248645782, + "rewards/rejected": -0.083308145403862, + "step": 1339 + }, + { + "epoch": 0.9266943291839558, + "grad_norm": 4.149688243865967, + "learning_rate": 4.633471645919779e-05, + "log_odds_chosen": 2.2877182960510254, + "log_odds_ratio": -0.20909997820854187, + "logits/chosen": -0.8047329187393188, + "logits/rejected": -0.8140844702720642, + "logps/chosen": -0.12095511704683304, + "logps/rejected": -0.6085599660873413, + "loss": 3.9157, + "nll_loss": 0.9580215215682983, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012095511890947819, + "rewards/margins": 0.048760488629341125, + "rewards/rejected": -0.06085599958896637, + "step": 1340 + }, + { + "epoch": 0.9273858921161826, + "grad_norm": 5.761998653411865, + "learning_rate": 4.636929460580913e-05, + "log_odds_chosen": 3.480170249938965, + "log_odds_ratio": -0.35071712732315063, + "logits/chosen": -0.6200214624404907, + "logits/rejected": -0.6776783466339111, + "logps/chosen": -0.07922127842903137, + "logps/rejected": -0.7400749921798706, + "loss": 4.1748, + "nll_loss": 1.0086222887039185, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007922128774225712, + "rewards/margins": 0.06608536839485168, + "rewards/rejected": -0.07400749623775482, + "step": 1341 + }, + { + "epoch": 0.9280774550484094, + "grad_norm": 6.504542827606201, + "learning_rate": 4.640387275242047e-05, + "log_odds_chosen": 2.966991662979126, + "log_odds_ratio": -0.9646126627922058, + "logits/chosen": -0.900704562664032, + "logits/rejected": -0.9117063283920288, + "logps/chosen": -0.09276466071605682, + "logps/rejected": -0.6016995906829834, + "loss": 5.0409, + "nll_loss": 1.1637513637542725, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.009276467375457287, + "rewards/margins": 0.05089349299669266, + "rewards/rejected": -0.06016996130347252, + "step": 1342 + }, + { + "epoch": 0.9287690179806363, + "grad_norm": 5.371675491333008, + "learning_rate": 4.6438450899031815e-05, + "log_odds_chosen": 3.9872875213623047, + "log_odds_ratio": -0.5040350556373596, + "logits/chosen": -0.6492640376091003, + "logits/rejected": -0.7425565719604492, + "logps/chosen": -0.10791552066802979, + "logps/rejected": -0.619098961353302, + "loss": 2.7258, + "nll_loss": 0.6310492753982544, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010791551321744919, + "rewards/margins": 0.05111834406852722, + "rewards/rejected": -0.06190989911556244, + "step": 1343 + }, + { + "epoch": 0.9294605809128631, + "grad_norm": 3.99408221244812, + "learning_rate": 4.6473029045643156e-05, + "log_odds_chosen": 3.311504602432251, + "log_odds_ratio": -0.15633322298526764, + "logits/chosen": -0.9153397083282471, + "logits/rejected": -0.9816678762435913, + "logps/chosen": -0.038775935769081116, + "logps/rejected": -0.48394450545310974, + "loss": 2.6258, + "nll_loss": 0.6408182382583618, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0038775932043790817, + "rewards/margins": 0.04451685771346092, + "rewards/rejected": -0.04839445278048515, + "step": 1344 + }, + { + "epoch": 0.9301521438450899, + "grad_norm": 3.6404597759246826, + "learning_rate": 4.65076071922545e-05, + "log_odds_chosen": 2.5649046897888184, + "log_odds_ratio": -0.29447752237319946, + "logits/chosen": -0.5716572403907776, + "logits/rejected": -0.6139044761657715, + "logps/chosen": -0.07814640551805496, + "logps/rejected": -0.4611126184463501, + "loss": 2.5634, + "nll_loss": 0.6114119291305542, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007814640179276466, + "rewards/margins": 0.038296617567539215, + "rewards/rejected": -0.04611125960946083, + "step": 1345 + }, + { + "epoch": 0.9308437067773168, + "grad_norm": 6.4514384269714355, + "learning_rate": 4.654218533886584e-05, + "log_odds_chosen": 1.3883261680603027, + "log_odds_ratio": -0.6939107179641724, + "logits/chosen": -0.6356791257858276, + "logits/rejected": -0.6466587781906128, + "logps/chosen": -0.1649370789527893, + "logps/rejected": -0.4422614574432373, + "loss": 5.3489, + "nll_loss": 1.2678287029266357, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01649370789527893, + "rewards/margins": 0.027732431888580322, + "rewards/rejected": -0.04422614350914955, + "step": 1346 + }, + { + "epoch": 0.9315352697095436, + "grad_norm": 5.485098361968994, + "learning_rate": 4.657676348547718e-05, + "log_odds_chosen": 3.7240209579467773, + "log_odds_ratio": -0.5168907642364502, + "logits/chosen": -0.23317617177963257, + "logits/rejected": -0.2783960700035095, + "logps/chosen": -0.11563809216022491, + "logps/rejected": -0.6522781848907471, + "loss": 3.8546, + "nll_loss": 0.9119526147842407, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.011563808657228947, + "rewards/margins": 0.05366401746869087, + "rewards/rejected": -0.06522782146930695, + "step": 1347 + }, + { + "epoch": 0.9322268326417704, + "grad_norm": 3.336510181427002, + "learning_rate": 4.661134163208852e-05, + "log_odds_chosen": 1.6070709228515625, + "log_odds_ratio": -0.3758274018764496, + "logits/chosen": -0.7622817158699036, + "logits/rejected": -0.7541989684104919, + "logps/chosen": -0.13546738028526306, + "logps/rejected": -0.6061623096466064, + "loss": 2.603, + "nll_loss": 0.6131584644317627, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.013546737842261791, + "rewards/margins": 0.0470694974064827, + "rewards/rejected": -0.060616232454776764, + "step": 1348 + }, + { + "epoch": 0.9329183955739973, + "grad_norm": 5.757965564727783, + "learning_rate": 4.6645919778699864e-05, + "log_odds_chosen": 3.743828058242798, + "log_odds_ratio": -0.37967419624328613, + "logits/chosen": -0.5838915705680847, + "logits/rejected": -0.5985926985740662, + "logps/chosen": -0.14416922628879547, + "logps/rejected": -0.8762768507003784, + "loss": 3.4319, + "nll_loss": 0.820005476474762, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014416922815144062, + "rewards/margins": 0.07321076840162277, + "rewards/rejected": -0.08762768656015396, + "step": 1349 + }, + { + "epoch": 0.9336099585062241, + "grad_norm": 5.270911693572998, + "learning_rate": 4.6680497925311206e-05, + "log_odds_chosen": 2.170856237411499, + "log_odds_ratio": -0.519264280796051, + "logits/chosen": -0.4287693500518799, + "logits/rejected": -0.49412840604782104, + "logps/chosen": -0.13331323862075806, + "logps/rejected": -0.5560269951820374, + "loss": 3.0761, + "nll_loss": 0.7170995473861694, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013331323862075806, + "rewards/margins": 0.04227137565612793, + "rewards/rejected": -0.055602699518203735, + "step": 1350 + }, + { + "epoch": 0.9343015214384509, + "grad_norm": 5.0149126052856445, + "learning_rate": 4.671507607192255e-05, + "log_odds_chosen": 4.14609956741333, + "log_odds_ratio": -0.19730296730995178, + "logits/chosen": -0.40930646657943726, + "logits/rejected": -0.42448240518569946, + "logps/chosen": -0.08510898798704147, + "logps/rejected": -0.7383308410644531, + "loss": 3.784, + "nll_loss": 0.926262617111206, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008510898798704147, + "rewards/margins": 0.06532219052314758, + "rewards/rejected": -0.07383308559656143, + "step": 1351 + }, + { + "epoch": 0.9349930843706777, + "grad_norm": 6.72976016998291, + "learning_rate": 4.674965421853389e-05, + "log_odds_chosen": 1.5673390626907349, + "log_odds_ratio": -0.6566497683525085, + "logits/chosen": -0.3811492919921875, + "logits/rejected": -0.3969017565250397, + "logps/chosen": -0.14099657535552979, + "logps/rejected": -0.5028352737426758, + "loss": 4.0345, + "nll_loss": 0.9429715871810913, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.014099658466875553, + "rewards/margins": 0.03618386387825012, + "rewards/rejected": -0.0502835214138031, + "step": 1352 + }, + { + "epoch": 0.9356846473029046, + "grad_norm": 4.536650657653809, + "learning_rate": 4.678423236514523e-05, + "log_odds_chosen": 1.6238508224487305, + "log_odds_ratio": -0.408079594373703, + "logits/chosen": -0.493243932723999, + "logits/rejected": -0.49688708782196045, + "logps/chosen": -0.11562632769346237, + "logps/rejected": -0.42109858989715576, + "loss": 2.8957, + "nll_loss": 0.6831123232841492, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011562633328139782, + "rewards/margins": 0.03054722398519516, + "rewards/rejected": -0.04210985451936722, + "step": 1353 + }, + { + "epoch": 0.9363762102351314, + "grad_norm": 4.0943474769592285, + "learning_rate": 4.681881051175657e-05, + "log_odds_chosen": 1.6092420816421509, + "log_odds_ratio": -0.41765856742858887, + "logits/chosen": -0.2855399250984192, + "logits/rejected": -0.2806923985481262, + "logps/chosen": -0.1413896083831787, + "logps/rejected": -0.35888469219207764, + "loss": 3.0766, + "nll_loss": 0.7273715734481812, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014138958416879177, + "rewards/margins": 0.02174951136112213, + "rewards/rejected": -0.03588847070932388, + "step": 1354 + }, + { + "epoch": 0.9370677731673582, + "grad_norm": 6.500419616699219, + "learning_rate": 4.685338865836791e-05, + "log_odds_chosen": 2.8715126514434814, + "log_odds_ratio": -0.16753099858760834, + "logits/chosen": -0.4005013704299927, + "logits/rejected": -0.3878091871738434, + "logps/chosen": -0.0721023827791214, + "logps/rejected": -0.5393887162208557, + "loss": 5.1474, + "nll_loss": 1.2700891494750977, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007210238371044397, + "rewards/margins": 0.04672863706946373, + "rewards/rejected": -0.05393887311220169, + "step": 1355 + }, + { + "epoch": 0.9377593360995851, + "grad_norm": 4.416325092315674, + "learning_rate": 4.6887966804979255e-05, + "log_odds_chosen": 2.915926218032837, + "log_odds_ratio": -0.13118229806423187, + "logits/chosen": -0.904473066329956, + "logits/rejected": -0.9156371355056763, + "logps/chosen": -0.060720786452293396, + "logps/rejected": -0.7894104719161987, + "loss": 3.4559, + "nll_loss": 0.8508535623550415, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0060720788314938545, + "rewards/margins": 0.07286897301673889, + "rewards/rejected": -0.07894104719161987, + "step": 1356 + }, + { + "epoch": 0.9384508990318119, + "grad_norm": 4.253722667694092, + "learning_rate": 4.6922544951590596e-05, + "log_odds_chosen": 2.4106616973876953, + "log_odds_ratio": -0.37549322843551636, + "logits/chosen": -0.8365063071250916, + "logits/rejected": -0.8938258290290833, + "logps/chosen": -0.09556761384010315, + "logps/rejected": -0.48990726470947266, + "loss": 4.3275, + "nll_loss": 1.044328212738037, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009556761011481285, + "rewards/margins": 0.03943396359682083, + "rewards/rejected": -0.048990726470947266, + "step": 1357 + }, + { + "epoch": 0.9391424619640387, + "grad_norm": 4.91149377822876, + "learning_rate": 4.695712309820194e-05, + "log_odds_chosen": 4.468101501464844, + "log_odds_ratio": -0.18852251768112183, + "logits/chosen": -0.057901933789253235, + "logits/rejected": -0.11767878383398056, + "logps/chosen": -0.031196830794215202, + "logps/rejected": -0.6462098360061646, + "loss": 3.5735, + "nll_loss": 0.8745160102844238, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003119683125987649, + "rewards/margins": 0.06150130555033684, + "rewards/rejected": -0.0646209865808487, + "step": 1358 + }, + { + "epoch": 0.9398340248962656, + "grad_norm": 4.495655059814453, + "learning_rate": 4.699170124481328e-05, + "log_odds_chosen": 2.976381301879883, + "log_odds_ratio": -0.24520474672317505, + "logits/chosen": -0.6155415773391724, + "logits/rejected": -0.5602388381958008, + "logps/chosen": -0.08388067781925201, + "logps/rejected": -0.5136565566062927, + "loss": 4.3834, + "nll_loss": 1.0713173151016235, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008388067595660686, + "rewards/margins": 0.04297758638858795, + "rewards/rejected": -0.05136565491557121, + "step": 1359 + }, + { + "epoch": 0.9405255878284924, + "grad_norm": 7.075529098510742, + "learning_rate": 4.702627939142462e-05, + "log_odds_chosen": 1.5126800537109375, + "log_odds_ratio": -0.40790507197380066, + "logits/chosen": -0.5208859443664551, + "logits/rejected": -0.5781892538070679, + "logps/chosen": -0.1379278004169464, + "logps/rejected": -0.4327913820743561, + "loss": 5.0908, + "nll_loss": 1.2318994998931885, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01379278115928173, + "rewards/margins": 0.029486358165740967, + "rewards/rejected": -0.04327914118766785, + "step": 1360 + }, + { + "epoch": 0.9412171507607192, + "grad_norm": 3.1823527812957764, + "learning_rate": 4.706085753803596e-05, + "log_odds_chosen": 2.7076611518859863, + "log_odds_ratio": -0.2057853639125824, + "logits/chosen": -0.2056216150522232, + "logits/rejected": -0.2584799528121948, + "logps/chosen": -0.06254203617572784, + "logps/rejected": -0.3413214087486267, + "loss": 2.9938, + "nll_loss": 0.7278686761856079, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006254204083234072, + "rewards/margins": 0.027877936139702797, + "rewards/rejected": -0.03413214161992073, + "step": 1361 + }, + { + "epoch": 0.941908713692946, + "grad_norm": 5.588365077972412, + "learning_rate": 4.7095435684647304e-05, + "log_odds_chosen": 2.5802206993103027, + "log_odds_ratio": -0.31722715497016907, + "logits/chosen": -0.3173674941062927, + "logits/rejected": -0.4348156154155731, + "logps/chosen": -0.09981952607631683, + "logps/rejected": -0.6218388080596924, + "loss": 4.6298, + "nll_loss": 1.1257364749908447, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009981952607631683, + "rewards/margins": 0.052201926708221436, + "rewards/rejected": -0.06218387186527252, + "step": 1362 + }, + { + "epoch": 0.9426002766251729, + "grad_norm": 6.2187299728393555, + "learning_rate": 4.7130013831258646e-05, + "log_odds_chosen": 3.495363473892212, + "log_odds_ratio": -0.3896557092666626, + "logits/chosen": -0.9022430181503296, + "logits/rejected": -0.902992308139801, + "logps/chosen": -0.08296022564172745, + "logps/rejected": -0.7100075483322144, + "loss": 5.2592, + "nll_loss": 1.275832176208496, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008296022191643715, + "rewards/margins": 0.06270474195480347, + "rewards/rejected": -0.07100075483322144, + "step": 1363 + }, + { + "epoch": 0.9432918395573997, + "grad_norm": 4.568310260772705, + "learning_rate": 4.716459197786999e-05, + "log_odds_chosen": 3.555234432220459, + "log_odds_ratio": -0.4674950838088989, + "logits/chosen": -0.5476137399673462, + "logits/rejected": -0.5729899406433105, + "logps/chosen": -0.11827027052640915, + "logps/rejected": -0.7366992235183716, + "loss": 2.5127, + "nll_loss": 0.5814200043678284, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011827027425169945, + "rewards/margins": 0.061842888593673706, + "rewards/rejected": -0.0736699178814888, + "step": 1364 + }, + { + "epoch": 0.9439834024896265, + "grad_norm": 3.968846321105957, + "learning_rate": 4.719917012448133e-05, + "log_odds_chosen": 4.848783493041992, + "log_odds_ratio": -0.16408772766590118, + "logits/chosen": -0.6661707162857056, + "logits/rejected": -0.7089472413063049, + "logps/chosen": -0.07180530577898026, + "logps/rejected": -0.8554694652557373, + "loss": 3.0071, + "nll_loss": 0.7353719472885132, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007180530112236738, + "rewards/margins": 0.07836642116308212, + "rewards/rejected": -0.08554694801568985, + "step": 1365 + }, + { + "epoch": 0.9446749654218534, + "grad_norm": 5.436463832855225, + "learning_rate": 4.723374827109267e-05, + "log_odds_chosen": 1.8622217178344727, + "log_odds_ratio": -0.3799074590206146, + "logits/chosen": -0.7138546109199524, + "logits/rejected": -0.7268157005310059, + "logps/chosen": -0.11516305804252625, + "logps/rejected": -0.5172901153564453, + "loss": 4.521, + "nll_loss": 1.0922627449035645, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01151630561798811, + "rewards/margins": 0.04021270573139191, + "rewards/rejected": -0.05172900855541229, + "step": 1366 + }, + { + "epoch": 0.9453665283540802, + "grad_norm": 4.4956955909729, + "learning_rate": 4.726832641770401e-05, + "log_odds_chosen": 1.9010629653930664, + "log_odds_ratio": -0.56339430809021, + "logits/chosen": -0.1682775616645813, + "logits/rejected": -0.1694241464138031, + "logps/chosen": -0.1430377960205078, + "logps/rejected": -0.5368797183036804, + "loss": 3.296, + "nll_loss": 0.7676615715026855, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.014303779229521751, + "rewards/margins": 0.03938418999314308, + "rewards/rejected": -0.05368797108530998, + "step": 1367 + }, + { + "epoch": 0.946058091286307, + "grad_norm": 5.558333873748779, + "learning_rate": 4.7302904564315354e-05, + "log_odds_chosen": 3.761000871658325, + "log_odds_ratio": -0.17737360298633575, + "logits/chosen": -0.46184465289115906, + "logits/rejected": -0.4802972674369812, + "logps/chosen": -0.08399471640586853, + "logps/rejected": -1.0271053314208984, + "loss": 4.8689, + "nll_loss": 1.1994938850402832, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008399471640586853, + "rewards/margins": 0.09431107342243195, + "rewards/rejected": -0.1027105450630188, + "step": 1368 + }, + { + "epoch": 0.9467496542185339, + "grad_norm": 4.409332275390625, + "learning_rate": 4.7337482710926695e-05, + "log_odds_chosen": 2.9365220069885254, + "log_odds_ratio": -0.38205763697624207, + "logits/chosen": -0.6523790955543518, + "logits/rejected": -0.6939494609832764, + "logps/chosen": -0.15215745568275452, + "logps/rejected": -0.7071554660797119, + "loss": 3.9908, + "nll_loss": 0.9594962000846863, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.015215746127068996, + "rewards/margins": 0.05549979954957962, + "rewards/rejected": -0.07071554660797119, + "step": 1369 + }, + { + "epoch": 0.9474412171507607, + "grad_norm": 2.9507594108581543, + "learning_rate": 4.737206085753804e-05, + "log_odds_chosen": 4.197795867919922, + "log_odds_ratio": -0.3220274746417999, + "logits/chosen": -0.5537489652633667, + "logits/rejected": -0.5800063610076904, + "logps/chosen": -0.09319345653057098, + "logps/rejected": -0.5641083121299744, + "loss": 2.8204, + "nll_loss": 0.6729087233543396, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009319345466792583, + "rewards/margins": 0.04709148779511452, + "rewards/rejected": -0.056410834193229675, + "step": 1370 + }, + { + "epoch": 0.9481327800829875, + "grad_norm": 4.980175971984863, + "learning_rate": 4.740663900414938e-05, + "log_odds_chosen": 5.336559295654297, + "log_odds_ratio": -0.10088712722063065, + "logits/chosen": -0.3527514636516571, + "logits/rejected": -0.37984007596969604, + "logps/chosen": -0.05419841408729553, + "logps/rejected": -0.985955536365509, + "loss": 3.0996, + "nll_loss": 0.7648058533668518, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005419841967523098, + "rewards/margins": 0.09317570924758911, + "rewards/rejected": -0.09859554469585419, + "step": 1371 + }, + { + "epoch": 0.9488243430152143, + "grad_norm": 6.840479373931885, + "learning_rate": 4.744121715076072e-05, + "log_odds_chosen": 1.5749034881591797, + "log_odds_ratio": -0.4523867964744568, + "logits/chosen": -0.6053675413131714, + "logits/rejected": -0.621617317199707, + "logps/chosen": -0.12378253042697906, + "logps/rejected": -0.3877543807029724, + "loss": 4.6367, + "nll_loss": 1.1139403581619263, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012378253042697906, + "rewards/margins": 0.026397183537483215, + "rewards/rejected": -0.03877543285489082, + "step": 1372 + }, + { + "epoch": 0.9495159059474412, + "grad_norm": 6.94858455657959, + "learning_rate": 4.747579529737206e-05, + "log_odds_chosen": 3.1703052520751953, + "log_odds_ratio": -0.5808858871459961, + "logits/chosen": -0.8809736967086792, + "logits/rejected": -0.8974775075912476, + "logps/chosen": -0.10179749131202698, + "logps/rejected": -0.544264554977417, + "loss": 4.1248, + "nll_loss": 0.9731166362762451, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.010179748758673668, + "rewards/margins": 0.04424671083688736, + "rewards/rejected": -0.05442645773291588, + "step": 1373 + }, + { + "epoch": 0.950207468879668, + "grad_norm": 6.224476337432861, + "learning_rate": 4.75103734439834e-05, + "log_odds_chosen": 2.24906587600708, + "log_odds_ratio": -0.36297982931137085, + "logits/chosen": -0.5064922571182251, + "logits/rejected": -0.5028213262557983, + "logps/chosen": -0.14730899035930634, + "logps/rejected": -0.5468143820762634, + "loss": 4.5969, + "nll_loss": 1.112921953201294, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.014730898663401604, + "rewards/margins": 0.03995054215192795, + "rewards/rejected": -0.0546814389526844, + "step": 1374 + }, + { + "epoch": 0.9508990318118948, + "grad_norm": 5.267735958099365, + "learning_rate": 4.7544951590594745e-05, + "log_odds_chosen": 0.8886209726333618, + "log_odds_ratio": -0.6777921319007874, + "logits/chosen": -0.5937820076942444, + "logits/rejected": -0.5795533061027527, + "logps/chosen": -0.16547569632530212, + "logps/rejected": -0.3152911067008972, + "loss": 4.4504, + "nll_loss": 1.0448328256607056, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.016547568142414093, + "rewards/margins": 0.014981540851294994, + "rewards/rejected": -0.03152911365032196, + "step": 1375 + }, + { + "epoch": 0.9515905947441217, + "grad_norm": 4.287798881530762, + "learning_rate": 4.7579529737206086e-05, + "log_odds_chosen": 1.8869426250457764, + "log_odds_ratio": -0.5077506303787231, + "logits/chosen": -0.6354760527610779, + "logits/rejected": -0.6710132360458374, + "logps/chosen": -0.16248366236686707, + "logps/rejected": -0.6342530250549316, + "loss": 3.3212, + "nll_loss": 0.7795161008834839, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.016248365864157677, + "rewards/margins": 0.04717693477869034, + "rewards/rejected": -0.06342530250549316, + "step": 1376 + }, + { + "epoch": 0.9522821576763485, + "grad_norm": 4.387780666351318, + "learning_rate": 4.761410788381743e-05, + "log_odds_chosen": 3.9893524646759033, + "log_odds_ratio": -0.2768661379814148, + "logits/chosen": -0.9158992767333984, + "logits/rejected": -0.9356086850166321, + "logps/chosen": -0.08836707472801208, + "logps/rejected": -0.5554932951927185, + "loss": 4.7146, + "nll_loss": 1.1509664058685303, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008836708031594753, + "rewards/margins": 0.04671262204647064, + "rewards/rejected": -0.05554932728409767, + "step": 1377 + }, + { + "epoch": 0.9529737206085753, + "grad_norm": 4.432811737060547, + "learning_rate": 4.764868603042877e-05, + "log_odds_chosen": 2.32497501373291, + "log_odds_ratio": -0.3427910804748535, + "logits/chosen": -0.5060633420944214, + "logits/rejected": -0.5108827352523804, + "logps/chosen": -0.11491291224956512, + "logps/rejected": -0.4623175859451294, + "loss": 3.8105, + "nll_loss": 0.9183489084243774, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011491292156279087, + "rewards/margins": 0.03474046662449837, + "rewards/rejected": -0.04623176157474518, + "step": 1378 + }, + { + "epoch": 0.9536652835408023, + "grad_norm": 5.681545257568359, + "learning_rate": 4.768326417704012e-05, + "log_odds_chosen": 2.833235740661621, + "log_odds_ratio": -0.5704039335250854, + "logits/chosen": -0.6774800419807434, + "logits/rejected": -0.749790370464325, + "logps/chosen": -0.10722468048334122, + "logps/rejected": -0.47469180822372437, + "loss": 4.0847, + "nll_loss": 0.9641448259353638, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.010722469538450241, + "rewards/margins": 0.036746710538864136, + "rewards/rejected": -0.047469183802604675, + "step": 1379 + }, + { + "epoch": 0.9543568464730291, + "grad_norm": 5.541007995605469, + "learning_rate": 4.771784232365146e-05, + "log_odds_chosen": 3.154003858566284, + "log_odds_ratio": -0.16309258341789246, + "logits/chosen": -0.40195342898368835, + "logits/rejected": -0.4969612956047058, + "logps/chosen": -0.18587008118629456, + "logps/rejected": -0.8497620224952698, + "loss": 4.5835, + "nll_loss": 1.1295685768127441, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.018587008118629456, + "rewards/margins": 0.06638918817043304, + "rewards/rejected": -0.0849761962890625, + "step": 1380 + }, + { + "epoch": 0.9550484094052559, + "grad_norm": 4.687609672546387, + "learning_rate": 4.77524204702628e-05, + "log_odds_chosen": 1.5811307430267334, + "log_odds_ratio": -0.7333714365959167, + "logits/chosen": -0.6358388662338257, + "logits/rejected": -0.6384164690971375, + "logps/chosen": -0.21824342012405396, + "logps/rejected": -0.5271948575973511, + "loss": 3.788, + "nll_loss": 0.8736591339111328, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.021824343129992485, + "rewards/margins": 0.030895143747329712, + "rewards/rejected": -0.052719488739967346, + "step": 1381 + }, + { + "epoch": 0.9557399723374828, + "grad_norm": 5.607902526855469, + "learning_rate": 4.778699861687414e-05, + "log_odds_chosen": 1.194372534751892, + "log_odds_ratio": -0.5874398946762085, + "logits/chosen": -0.7090533971786499, + "logits/rejected": -0.732360303401947, + "logps/chosen": -0.26333528757095337, + "logps/rejected": -0.5452054142951965, + "loss": 4.5204, + "nll_loss": 1.0713554620742798, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.026333527639508247, + "rewards/margins": 0.028187017887830734, + "rewards/rejected": -0.05452054366469383, + "step": 1382 + }, + { + "epoch": 0.9564315352697096, + "grad_norm": 4.289752960205078, + "learning_rate": 4.7821576763485484e-05, + "log_odds_chosen": 3.3134241104125977, + "log_odds_ratio": -0.19969701766967773, + "logits/chosen": -0.6987218260765076, + "logits/rejected": -0.7583480477333069, + "logps/chosen": -0.06717553734779358, + "logps/rejected": -0.5206062197685242, + "loss": 3.9022, + "nll_loss": 0.9555697441101074, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006717554293572903, + "rewards/margins": 0.045343067497015, + "rewards/rejected": -0.052060626447200775, + "step": 1383 + }, + { + "epoch": 0.9571230982019364, + "grad_norm": 3.9954605102539062, + "learning_rate": 4.7856154910096825e-05, + "log_odds_chosen": 2.87797474861145, + "log_odds_ratio": -0.31735920906066895, + "logits/chosen": -0.41059356927871704, + "logits/rejected": -0.42806077003479004, + "logps/chosen": -0.1289018988609314, + "logps/rejected": -0.5481069684028625, + "loss": 3.6808, + "nll_loss": 0.8884710669517517, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.012890191748738289, + "rewards/margins": 0.041920505464076996, + "rewards/rejected": -0.054810699075460434, + "step": 1384 + }, + { + "epoch": 0.9578146611341632, + "grad_norm": 2.9284474849700928, + "learning_rate": 4.789073305670817e-05, + "log_odds_chosen": 1.96919584274292, + "log_odds_ratio": -0.22860193252563477, + "logits/chosen": -0.6298066973686218, + "logits/rejected": -0.6608700752258301, + "logps/chosen": -0.10201866924762726, + "logps/rejected": -0.5613384246826172, + "loss": 2.7856, + "nll_loss": 0.6735316514968872, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010201867669820786, + "rewards/margins": 0.04593197628855705, + "rewards/rejected": -0.05613384395837784, + "step": 1385 + }, + { + "epoch": 0.9585062240663901, + "grad_norm": 4.66770076751709, + "learning_rate": 4.792531120331951e-05, + "log_odds_chosen": 2.5473694801330566, + "log_odds_ratio": -0.3094061017036438, + "logits/chosen": -0.564201831817627, + "logits/rejected": -0.5761914253234863, + "logps/chosen": -0.11359382420778275, + "logps/rejected": -0.4843006730079651, + "loss": 4.2746, + "nll_loss": 1.037712812423706, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011359382420778275, + "rewards/margins": 0.03707068786025047, + "rewards/rejected": -0.04843007028102875, + "step": 1386 + }, + { + "epoch": 0.9591977869986169, + "grad_norm": 3.5193567276000977, + "learning_rate": 4.795988934993085e-05, + "log_odds_chosen": 1.3455833196640015, + "log_odds_ratio": -0.5004943609237671, + "logits/chosen": -0.5322783589363098, + "logits/rejected": -0.5484592914581299, + "logps/chosen": -0.12864184379577637, + "logps/rejected": -0.31443101167678833, + "loss": 3.4441, + "nll_loss": 0.8109761476516724, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012864183634519577, + "rewards/margins": 0.018578914925456047, + "rewards/rejected": -0.03144310042262077, + "step": 1387 + }, + { + "epoch": 0.9598893499308437, + "grad_norm": 4.228249549865723, + "learning_rate": 4.799446749654219e-05, + "log_odds_chosen": 2.267373561859131, + "log_odds_ratio": -0.4458346962928772, + "logits/chosen": -0.5130159258842468, + "logits/rejected": -0.5397688150405884, + "logps/chosen": -0.1408432275056839, + "logps/rejected": -0.542976975440979, + "loss": 2.7079, + "nll_loss": 0.632402777671814, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01408432237803936, + "rewards/margins": 0.04021337628364563, + "rewards/rejected": -0.05429770052433014, + "step": 1388 + }, + { + "epoch": 0.9605809128630706, + "grad_norm": 5.8462677001953125, + "learning_rate": 4.802904564315353e-05, + "log_odds_chosen": 2.109428882598877, + "log_odds_ratio": -0.608425498008728, + "logits/chosen": -0.33053314685821533, + "logits/rejected": -0.3880746364593506, + "logps/chosen": -0.1225154846906662, + "logps/rejected": -0.3235834836959839, + "loss": 3.6282, + "nll_loss": 0.8462128639221191, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01225154846906662, + "rewards/margins": 0.02010679990053177, + "rewards/rejected": -0.03235834836959839, + "step": 1389 + }, + { + "epoch": 0.9612724757952974, + "grad_norm": 6.464356422424316, + "learning_rate": 4.8063623789764875e-05, + "log_odds_chosen": 1.8575029373168945, + "log_odds_ratio": -0.40052133798599243, + "logits/chosen": -0.4968864321708679, + "logits/rejected": -0.5065348744392395, + "logps/chosen": -0.08285154402256012, + "logps/rejected": -0.35983797907829285, + "loss": 4.0866, + "nll_loss": 0.9815993905067444, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008285155519843102, + "rewards/margins": 0.027698643505573273, + "rewards/rejected": -0.03598380088806152, + "step": 1390 + }, + { + "epoch": 0.9619640387275242, + "grad_norm": 4.375123500823975, + "learning_rate": 4.8098201936376216e-05, + "log_odds_chosen": 2.18118953704834, + "log_odds_ratio": -0.489065557718277, + "logits/chosen": -0.7576559782028198, + "logits/rejected": -0.7377809882164001, + "logps/chosen": -0.12921804189682007, + "logps/rejected": -0.48704081773757935, + "loss": 3.0138, + "nll_loss": 0.7045431137084961, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012921803630888462, + "rewards/margins": 0.03578227758407593, + "rewards/rejected": -0.048704084008932114, + "step": 1391 + }, + { + "epoch": 0.9626556016597511, + "grad_norm": 6.688558578491211, + "learning_rate": 4.813278008298756e-05, + "log_odds_chosen": 3.997199535369873, + "log_odds_ratio": -0.18422558903694153, + "logits/chosen": -0.009374737739562988, + "logits/rejected": -0.01648634299635887, + "logps/chosen": -0.06665275990962982, + "logps/rejected": -0.8346791863441467, + "loss": 3.2844, + "nll_loss": 0.8026823997497559, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006665276363492012, + "rewards/margins": 0.07680264115333557, + "rewards/rejected": -0.08346791565418243, + "step": 1392 + }, + { + "epoch": 0.9633471645919779, + "grad_norm": 6.806331157684326, + "learning_rate": 4.81673582295989e-05, + "log_odds_chosen": 2.9461169242858887, + "log_odds_ratio": -0.3872566819190979, + "logits/chosen": -0.506131649017334, + "logits/rejected": -0.595481276512146, + "logps/chosen": -0.10255613178014755, + "logps/rejected": -0.7516794204711914, + "loss": 4.0962, + "nll_loss": 0.985315203666687, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010255612432956696, + "rewards/margins": 0.0649123340845108, + "rewards/rejected": -0.0751679390668869, + "step": 1393 + }, + { + "epoch": 0.9640387275242047, + "grad_norm": 5.865633487701416, + "learning_rate": 4.820193637621024e-05, + "log_odds_chosen": 2.411072254180908, + "log_odds_ratio": -0.27777040004730225, + "logits/chosen": -0.9139069318771362, + "logits/rejected": -0.9897363781929016, + "logps/chosen": -0.09145885705947876, + "logps/rejected": -0.3880517780780792, + "loss": 5.0715, + "nll_loss": 1.2401103973388672, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009145885705947876, + "rewards/margins": 0.029659289866685867, + "rewards/rejected": -0.03880517557263374, + "step": 1394 + }, + { + "epoch": 0.9647302904564315, + "grad_norm": 6.374701499938965, + "learning_rate": 4.823651452282158e-05, + "log_odds_chosen": 2.4968247413635254, + "log_odds_ratio": -0.248407244682312, + "logits/chosen": -0.1413920819759369, + "logits/rejected": -0.19391274452209473, + "logps/chosen": -0.06600844860076904, + "logps/rejected": -0.3738167881965637, + "loss": 4.7476, + "nll_loss": 1.1620688438415527, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006600845605134964, + "rewards/margins": 0.030780835077166557, + "rewards/rejected": -0.03738168254494667, + "step": 1395 + }, + { + "epoch": 0.9654218533886584, + "grad_norm": 4.07810640335083, + "learning_rate": 4.8271092669432924e-05, + "log_odds_chosen": 3.7929351329803467, + "log_odds_ratio": -0.1519448459148407, + "logits/chosen": -0.19091452658176422, + "logits/rejected": -0.24754397571086884, + "logps/chosen": -0.044879138469696045, + "logps/rejected": -0.5073044896125793, + "loss": 2.6384, + "nll_loss": 0.644395649433136, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004487914033234119, + "rewards/margins": 0.04624253511428833, + "rewards/rejected": -0.05073045194149017, + "step": 1396 + }, + { + "epoch": 0.9661134163208852, + "grad_norm": 3.521620512008667, + "learning_rate": 4.8305670816044266e-05, + "log_odds_chosen": 3.7254812717437744, + "log_odds_ratio": -0.23378178477287292, + "logits/chosen": -0.027941592037677765, + "logits/rejected": -0.03508976846933365, + "logps/chosen": -0.07573119550943375, + "logps/rejected": -0.5350953340530396, + "loss": 3.7968, + "nll_loss": 0.9258334636688232, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00757311936467886, + "rewards/margins": 0.04593641683459282, + "rewards/rejected": -0.053509537130594254, + "step": 1397 + }, + { + "epoch": 0.966804979253112, + "grad_norm": 5.105077743530273, + "learning_rate": 4.834024896265561e-05, + "log_odds_chosen": 3.2875313758850098, + "log_odds_ratio": -0.2844654321670532, + "logits/chosen": -0.19148701429367065, + "logits/rejected": -0.2091885507106781, + "logps/chosen": -0.09642978012561798, + "logps/rejected": -0.5320587754249573, + "loss": 3.3054, + "nll_loss": 0.7979139089584351, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009642978198826313, + "rewards/margins": 0.04356290400028229, + "rewards/rejected": -0.053205881267786026, + "step": 1398 + }, + { + "epoch": 0.9674965421853389, + "grad_norm": 4.500959396362305, + "learning_rate": 4.837482710926695e-05, + "log_odds_chosen": 2.458662748336792, + "log_odds_ratio": -0.47104576230049133, + "logits/chosen": -0.5823588967323303, + "logits/rejected": -0.585030734539032, + "logps/chosen": -0.18328362703323364, + "logps/rejected": -0.5744332671165466, + "loss": 3.0462, + "nll_loss": 0.7144502997398376, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.018328363075852394, + "rewards/margins": 0.039114970713853836, + "rewards/rejected": -0.05744332820177078, + "step": 1399 + }, + { + "epoch": 0.9681881051175657, + "grad_norm": 4.3684401512146, + "learning_rate": 4.840940525587829e-05, + "log_odds_chosen": 1.7222740650177002, + "log_odds_ratio": -0.4732230305671692, + "logits/chosen": -0.4307154417037964, + "logits/rejected": -0.4691559970378876, + "logps/chosen": -0.18791911005973816, + "logps/rejected": -0.42685210704803467, + "loss": 3.9824, + "nll_loss": 0.9482815265655518, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.018791913986206055, + "rewards/margins": 0.02389329858124256, + "rewards/rejected": -0.04268521070480347, + "step": 1400 + }, + { + "epoch": 0.9688796680497925, + "grad_norm": 6.100607395172119, + "learning_rate": 4.844398340248963e-05, + "log_odds_chosen": 2.7508108615875244, + "log_odds_ratio": -0.2012925148010254, + "logits/chosen": -0.5389729142189026, + "logits/rejected": -0.5223175883293152, + "logps/chosen": -0.10300426185131073, + "logps/rejected": -0.6810008883476257, + "loss": 4.3147, + "nll_loss": 1.058542251586914, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010300425812602043, + "rewards/margins": 0.05779966339468956, + "rewards/rejected": -0.06810008734464645, + "step": 1401 + }, + { + "epoch": 0.9695712309820194, + "grad_norm": 5.2680768966674805, + "learning_rate": 4.8478561549100974e-05, + "log_odds_chosen": 3.3759124279022217, + "log_odds_ratio": -0.28325340151786804, + "logits/chosen": -0.36735105514526367, + "logits/rejected": -0.39844614267349243, + "logps/chosen": -0.07357937842607498, + "logps/rejected": -0.695828914642334, + "loss": 3.9927, + "nll_loss": 0.9698481559753418, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007357938215136528, + "rewards/margins": 0.06222495436668396, + "rewards/rejected": -0.06958289444446564, + "step": 1402 + }, + { + "epoch": 0.9702627939142462, + "grad_norm": 6.146441459655762, + "learning_rate": 4.8513139695712315e-05, + "log_odds_chosen": 2.8977818489074707, + "log_odds_ratio": -0.34373605251312256, + "logits/chosen": -0.08919993788003922, + "logits/rejected": -0.1025874987244606, + "logps/chosen": -0.12175912410020828, + "logps/rejected": -0.4791489541530609, + "loss": 4.2676, + "nll_loss": 1.0325186252593994, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012175912037491798, + "rewards/margins": 0.0357389822602272, + "rewards/rejected": -0.04791489243507385, + "step": 1403 + }, + { + "epoch": 0.970954356846473, + "grad_norm": 5.012028217315674, + "learning_rate": 4.854771784232366e-05, + "log_odds_chosen": 4.039868354797363, + "log_odds_ratio": -0.1392008513212204, + "logits/chosen": -0.4420851469039917, + "logits/rejected": -0.4822046160697937, + "logps/chosen": -0.062176115810871124, + "logps/rejected": -0.6904194951057434, + "loss": 3.8251, + "nll_loss": 0.942352831363678, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006217611953616142, + "rewards/margins": 0.06282433867454529, + "rewards/rejected": -0.06904194504022598, + "step": 1404 + }, + { + "epoch": 0.9716459197786999, + "grad_norm": 6.326803684234619, + "learning_rate": 4.858229598893499e-05, + "log_odds_chosen": 5.957394599914551, + "log_odds_ratio": -0.08176662027835846, + "logits/chosen": -0.33497846126556396, + "logits/rejected": -0.35597074031829834, + "logps/chosen": -0.040950141847133636, + "logps/rejected": -1.1453843116760254, + "loss": 4.6783, + "nll_loss": 1.1614075899124146, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004095014184713364, + "rewards/margins": 0.11044342070817947, + "rewards/rejected": -0.11453843861818314, + "step": 1405 + }, + { + "epoch": 0.9723374827109267, + "grad_norm": 4.005640029907227, + "learning_rate": 4.861687413554633e-05, + "log_odds_chosen": 1.7429275512695312, + "log_odds_ratio": -0.5316041111946106, + "logits/chosen": -0.5472002029418945, + "logits/rejected": -0.5267659425735474, + "logps/chosen": -0.10989716649055481, + "logps/rejected": -0.5483049154281616, + "loss": 3.148, + "nll_loss": 0.7338396310806274, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.010989716276526451, + "rewards/margins": 0.04384077712893486, + "rewards/rejected": -0.05483049526810646, + "step": 1406 + }, + { + "epoch": 0.9730290456431535, + "grad_norm": 4.7602219581604, + "learning_rate": 4.8651452282157675e-05, + "log_odds_chosen": 3.336592674255371, + "log_odds_ratio": -0.26619279384613037, + "logits/chosen": -0.4598739743232727, + "logits/rejected": -0.4864497780799866, + "logps/chosen": -0.08146242797374725, + "logps/rejected": -0.5847065448760986, + "loss": 4.358, + "nll_loss": 1.0628809928894043, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008146243169903755, + "rewards/margins": 0.05032441020011902, + "rewards/rejected": -0.058470651507377625, + "step": 1407 + }, + { + "epoch": 0.9737206085753803, + "grad_norm": 4.0709357261657715, + "learning_rate": 4.8686030428769016e-05, + "log_odds_chosen": 3.5210342407226562, + "log_odds_ratio": -0.22207771241664886, + "logits/chosen": -0.4946010708808899, + "logits/rejected": -0.5341157913208008, + "logps/chosen": -0.08640432357788086, + "logps/rejected": -0.6819725036621094, + "loss": 2.8891, + "nll_loss": 0.7000560760498047, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008640431798994541, + "rewards/margins": 0.05955682322382927, + "rewards/rejected": -0.06819725036621094, + "step": 1408 + }, + { + "epoch": 0.9744121715076072, + "grad_norm": 4.645059108734131, + "learning_rate": 4.872060857538036e-05, + "log_odds_chosen": 3.6667566299438477, + "log_odds_ratio": -0.19257347285747528, + "logits/chosen": -0.4007118344306946, + "logits/rejected": -0.4578131139278412, + "logps/chosen": -0.09479233622550964, + "logps/rejected": -0.5741965770721436, + "loss": 3.3919, + "nll_loss": 0.8287203907966614, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009479233995079994, + "rewards/margins": 0.04794042184948921, + "rewards/rejected": -0.057419657707214355, + "step": 1409 + }, + { + "epoch": 0.975103734439834, + "grad_norm": 5.395598888397217, + "learning_rate": 4.87551867219917e-05, + "log_odds_chosen": 2.5936923027038574, + "log_odds_ratio": -0.3879097104072571, + "logits/chosen": -0.3431122601032257, + "logits/rejected": -0.39480048418045044, + "logps/chosen": -0.12003860622644424, + "logps/rejected": -0.5975310206413269, + "loss": 4.2286, + "nll_loss": 1.0183528661727905, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.012003861367702484, + "rewards/margins": 0.047749243676662445, + "rewards/rejected": -0.05975310876965523, + "step": 1410 + }, + { + "epoch": 0.9757952973720608, + "grad_norm": 5.991229057312012, + "learning_rate": 4.878976486860304e-05, + "log_odds_chosen": 4.690047740936279, + "log_odds_ratio": -0.18323415517807007, + "logits/chosen": -0.48473188281059265, + "logits/rejected": -0.5005260109901428, + "logps/chosen": -0.06240301951766014, + "logps/rejected": -1.179287075996399, + "loss": 3.4513, + "nll_loss": 0.844507098197937, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0062403022311627865, + "rewards/margins": 0.11168840527534485, + "rewards/rejected": -0.11792870610952377, + "step": 1411 + }, + { + "epoch": 0.9764868603042877, + "grad_norm": 4.65676736831665, + "learning_rate": 4.882434301521438e-05, + "log_odds_chosen": 0.7128917574882507, + "log_odds_ratio": -0.48616403341293335, + "logits/chosen": -0.7598358392715454, + "logits/rejected": -0.7617342472076416, + "logps/chosen": -0.09561615437269211, + "logps/rejected": -0.18550316989421844, + "loss": 3.743, + "nll_loss": 0.8871452212333679, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009561615064740181, + "rewards/margins": 0.008988700807094574, + "rewards/rejected": -0.018550317734479904, + "step": 1412 + }, + { + "epoch": 0.9771784232365145, + "grad_norm": 5.093491077423096, + "learning_rate": 4.8858921161825724e-05, + "log_odds_chosen": 1.3392752408981323, + "log_odds_ratio": -0.48251235485076904, + "logits/chosen": -0.477012574672699, + "logits/rejected": -0.4597123861312866, + "logps/chosen": -0.22213858366012573, + "logps/rejected": -0.3907717764377594, + "loss": 3.4779, + "nll_loss": 0.8212242126464844, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.022213861346244812, + "rewards/margins": 0.016863318160176277, + "rewards/rejected": -0.03907717764377594, + "step": 1413 + }, + { + "epoch": 0.9778699861687413, + "grad_norm": 10.657920837402344, + "learning_rate": 4.8893499308437066e-05, + "log_odds_chosen": 0.4027661979198456, + "log_odds_ratio": -1.558465600013733, + "logits/chosen": -0.6197187900543213, + "logits/rejected": -0.6360077857971191, + "logps/chosen": -0.3210720717906952, + "logps/rejected": -0.2586687505245209, + "loss": 5.5928, + "nll_loss": 1.2423492670059204, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.03210721164941788, + "rewards/margins": -0.0062403371557593346, + "rewards/rejected": -0.025866875424981117, + "step": 1414 + }, + { + "epoch": 0.9785615491009682, + "grad_norm": 6.654989719390869, + "learning_rate": 4.892807745504841e-05, + "log_odds_chosen": 2.0434505939483643, + "log_odds_ratio": -0.43897897005081177, + "logits/chosen": -0.6745843887329102, + "logits/rejected": -0.6595047116279602, + "logps/chosen": -0.11813121289014816, + "logps/rejected": -0.3862118721008301, + "loss": 4.2663, + "nll_loss": 1.0226771831512451, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011813120916485786, + "rewards/margins": 0.02680806629359722, + "rewards/rejected": -0.038621190935373306, + "step": 1415 + }, + { + "epoch": 0.979253112033195, + "grad_norm": 5.987014293670654, + "learning_rate": 4.896265560165975e-05, + "log_odds_chosen": 3.5931477546691895, + "log_odds_ratio": -0.3616059124469757, + "logits/chosen": -0.3304971158504486, + "logits/rejected": -0.4050114154815674, + "logps/chosen": -0.07955209910869598, + "logps/rejected": -0.7297384738922119, + "loss": 4.1158, + "nll_loss": 0.9927953481674194, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.007955210283398628, + "rewards/margins": 0.06501863896846771, + "rewards/rejected": -0.0729738399386406, + "step": 1416 + }, + { + "epoch": 0.9799446749654218, + "grad_norm": 7.944802761077881, + "learning_rate": 4.899723374827109e-05, + "log_odds_chosen": 2.194571018218994, + "log_odds_ratio": -0.7392339110374451, + "logits/chosen": -0.18101269006729126, + "logits/rejected": -0.1946060061454773, + "logps/chosen": -0.08568310737609863, + "logps/rejected": -0.389140784740448, + "loss": 3.6976, + "nll_loss": 0.8504800796508789, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008568311110138893, + "rewards/margins": 0.030345769599080086, + "rewards/rejected": -0.03891407698392868, + "step": 1417 + }, + { + "epoch": 0.9806362378976486, + "grad_norm": 5.2776312828063965, + "learning_rate": 4.903181189488243e-05, + "log_odds_chosen": 3.4539592266082764, + "log_odds_ratio": -0.17639483511447906, + "logits/chosen": -0.6359360814094543, + "logits/rejected": -0.681241512298584, + "logps/chosen": -0.07430019974708557, + "logps/rejected": -0.6378059387207031, + "loss": 3.5222, + "nll_loss": 0.8629008531570435, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007430019788444042, + "rewards/margins": 0.056350577622652054, + "rewards/rejected": -0.06378059834241867, + "step": 1418 + }, + { + "epoch": 0.9813278008298755, + "grad_norm": 7.109945774078369, + "learning_rate": 4.9066390041493773e-05, + "log_odds_chosen": 3.779050350189209, + "log_odds_ratio": -0.6532815098762512, + "logits/chosen": -0.6393461227416992, + "logits/rejected": -0.6872209310531616, + "logps/chosen": -0.11103115975856781, + "logps/rejected": -0.819476842880249, + "loss": 5.2681, + "nll_loss": 1.2516968250274658, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.011103115975856781, + "rewards/margins": 0.07084456831216812, + "rewards/rejected": -0.0819476842880249, + "step": 1419 + }, + { + "epoch": 0.9820193637621023, + "grad_norm": 5.775816440582275, + "learning_rate": 4.9100968188105115e-05, + "log_odds_chosen": 1.6225054264068604, + "log_odds_ratio": -0.5428125858306885, + "logits/chosen": -0.7603781223297119, + "logits/rejected": -0.7697376608848572, + "logps/chosen": -0.14464308321475983, + "logps/rejected": -0.5490061640739441, + "loss": 4.3808, + "nll_loss": 1.0409178733825684, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014464308507740498, + "rewards/margins": 0.04043630510568619, + "rewards/rejected": -0.05490061640739441, + "step": 1420 + }, + { + "epoch": 0.9827109266943291, + "grad_norm": 5.920970439910889, + "learning_rate": 4.9135546334716457e-05, + "log_odds_chosen": 4.283205986022949, + "log_odds_ratio": -0.17093268036842346, + "logits/chosen": -0.7627154588699341, + "logits/rejected": -0.8561334013938904, + "logps/chosen": -0.04168498143553734, + "logps/rejected": -0.7488090991973877, + "loss": 4.9513, + "nll_loss": 1.2207205295562744, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004168498329818249, + "rewards/margins": 0.07071240991353989, + "rewards/rejected": -0.07488091289997101, + "step": 1421 + }, + { + "epoch": 0.983402489626556, + "grad_norm": 6.730568885803223, + "learning_rate": 4.91701244813278e-05, + "log_odds_chosen": 1.5627930164337158, + "log_odds_ratio": -0.3418138027191162, + "logits/chosen": -0.39804694056510925, + "logits/rejected": -0.4181201756000519, + "logps/chosen": -0.14453352987766266, + "logps/rejected": -0.5822803974151611, + "loss": 4.5374, + "nll_loss": 1.1001561880111694, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.014453353360295296, + "rewards/margins": 0.04377468675374985, + "rewards/rejected": -0.05822804570198059, + "step": 1422 + }, + { + "epoch": 0.9840940525587828, + "grad_norm": 4.172337532043457, + "learning_rate": 4.920470262793914e-05, + "log_odds_chosen": 3.565321922302246, + "log_odds_ratio": -0.16967037320137024, + "logits/chosen": -0.4934771955013275, + "logits/rejected": -0.5961635112762451, + "logps/chosen": -0.08974127471446991, + "logps/rejected": -0.806494951248169, + "loss": 2.8905, + "nll_loss": 0.7056623697280884, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008974128402769566, + "rewards/margins": 0.0716753676533699, + "rewards/rejected": -0.0806494951248169, + "step": 1423 + }, + { + "epoch": 0.9847856154910097, + "grad_norm": 3.726513385772705, + "learning_rate": 4.923928077455049e-05, + "log_odds_chosen": 3.001035213470459, + "log_odds_ratio": -0.4421585202217102, + "logits/chosen": -0.6121970415115356, + "logits/rejected": -0.6204871535301208, + "logps/chosen": -0.0803869366645813, + "logps/rejected": -0.4731726050376892, + "loss": 2.6134, + "nll_loss": 0.609136164188385, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.008038693107664585, + "rewards/margins": 0.03927857056260109, + "rewards/rejected": -0.0473172664642334, + "step": 1424 + }, + { + "epoch": 0.9854771784232366, + "grad_norm": 5.099123954772949, + "learning_rate": 4.927385892116183e-05, + "log_odds_chosen": 2.7881312370300293, + "log_odds_ratio": -0.2055206596851349, + "logits/chosen": -0.5875111818313599, + "logits/rejected": -0.7261995077133179, + "logps/chosen": -0.06383083760738373, + "logps/rejected": -0.47276073694229126, + "loss": 3.7443, + "nll_loss": 0.9155149459838867, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006383083760738373, + "rewards/margins": 0.040892988443374634, + "rewards/rejected": -0.04727607220411301, + "step": 1425 + }, + { + "epoch": 0.9861687413554634, + "grad_norm": 5.290557384490967, + "learning_rate": 4.930843706777317e-05, + "log_odds_chosen": 3.4888155460357666, + "log_odds_ratio": -0.18133285641670227, + "logits/chosen": -0.3766958713531494, + "logits/rejected": -0.439983606338501, + "logps/chosen": -0.08132694661617279, + "logps/rejected": -0.933536171913147, + "loss": 3.4533, + "nll_loss": 0.8451938629150391, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008132695220410824, + "rewards/margins": 0.08522091805934906, + "rewards/rejected": -0.09335361421108246, + "step": 1426 + }, + { + "epoch": 0.9868603042876902, + "grad_norm": 6.354071140289307, + "learning_rate": 4.934301521438451e-05, + "log_odds_chosen": 3.1632020473480225, + "log_odds_ratio": -0.21364012360572815, + "logits/chosen": -0.4825863242149353, + "logits/rejected": -0.5225251317024231, + "logps/chosen": -0.05160689353942871, + "logps/rejected": -0.6081209182739258, + "loss": 5.2659, + "nll_loss": 1.2951115369796753, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005160689353942871, + "rewards/margins": 0.055651403963565826, + "rewards/rejected": -0.0608120895922184, + "step": 1427 + }, + { + "epoch": 0.9875518672199171, + "grad_norm": 5.97199821472168, + "learning_rate": 4.9377593360995854e-05, + "log_odds_chosen": 3.9173221588134766, + "log_odds_ratio": -0.1347779929637909, + "logits/chosen": -0.5775250196456909, + "logits/rejected": -0.624565839767456, + "logps/chosen": -0.0501507967710495, + "logps/rejected": -0.64739990234375, + "loss": 3.8972, + "nll_loss": 0.9608168601989746, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005015079397708178, + "rewards/margins": 0.05972491204738617, + "rewards/rejected": -0.06473998725414276, + "step": 1428 + }, + { + "epoch": 0.9882434301521439, + "grad_norm": 4.286798477172852, + "learning_rate": 4.9412171507607196e-05, + "log_odds_chosen": 2.3756041526794434, + "log_odds_ratio": -0.3553531765937805, + "logits/chosen": -0.7835421562194824, + "logits/rejected": -0.8416650891304016, + "logps/chosen": -0.07701346278190613, + "logps/rejected": -0.359640896320343, + "loss": 3.0372, + "nll_loss": 0.7237566709518433, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007701346650719643, + "rewards/margins": 0.02826274186372757, + "rewards/rejected": -0.03596408665180206, + "step": 1429 + }, + { + "epoch": 0.9889349930843707, + "grad_norm": 4.213981628417969, + "learning_rate": 4.944674965421854e-05, + "log_odds_chosen": 3.0098376274108887, + "log_odds_ratio": -0.2841997742652893, + "logits/chosen": -0.809171199798584, + "logits/rejected": -0.837843656539917, + "logps/chosen": -0.1136767566204071, + "logps/rejected": -0.6050674319267273, + "loss": 3.5371, + "nll_loss": 0.8558429479598999, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01136767491698265, + "rewards/margins": 0.04913906753063202, + "rewards/rejected": -0.06050674617290497, + "step": 1430 + }, + { + "epoch": 0.9896265560165975, + "grad_norm": 3.492421865463257, + "learning_rate": 4.948132780082988e-05, + "log_odds_chosen": 3.6703217029571533, + "log_odds_ratio": -0.12424807995557785, + "logits/chosen": -0.3869936466217041, + "logits/rejected": -0.4417737126350403, + "logps/chosen": -0.08608690649271011, + "logps/rejected": -0.9713066816329956, + "loss": 2.9985, + "nll_loss": 0.7371969819068909, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008608691394329071, + "rewards/margins": 0.08852197974920273, + "rewards/rejected": -0.0971306711435318, + "step": 1431 + }, + { + "epoch": 0.9903181189488244, + "grad_norm": 3.1567420959472656, + "learning_rate": 4.951590594744122e-05, + "log_odds_chosen": 5.578029632568359, + "log_odds_ratio": -0.166071355342865, + "logits/chosen": -0.6140339970588684, + "logits/rejected": -0.6234652996063232, + "logps/chosen": -0.05416777357459068, + "logps/rejected": -0.7071319818496704, + "loss": 3.3025, + "nll_loss": 0.8090150356292725, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005416777450591326, + "rewards/margins": 0.06529641896486282, + "rewards/rejected": -0.07071319967508316, + "step": 1432 + }, + { + "epoch": 0.9910096818810512, + "grad_norm": 5.914407730102539, + "learning_rate": 4.955048409405256e-05, + "log_odds_chosen": 2.671858310699463, + "log_odds_ratio": -0.27089083194732666, + "logits/chosen": -0.4663957953453064, + "logits/rejected": -0.5192126035690308, + "logps/chosen": -0.08659430593252182, + "logps/rejected": -0.5890503525733948, + "loss": 4.767, + "nll_loss": 1.164661169052124, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008659430779516697, + "rewards/margins": 0.050245605409145355, + "rewards/rejected": -0.05890503525733948, + "step": 1433 + }, + { + "epoch": 0.991701244813278, + "grad_norm": 3.4547741413116455, + "learning_rate": 4.9585062240663904e-05, + "log_odds_chosen": 3.578338861465454, + "log_odds_ratio": -0.12430144101381302, + "logits/chosen": -0.6321961879730225, + "logits/rejected": -0.6485381126403809, + "logps/chosen": -0.07194468379020691, + "logps/rejected": -0.7826632261276245, + "loss": 3.1203, + "nll_loss": 0.7676528692245483, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007194467820227146, + "rewards/margins": 0.07107185572385788, + "rewards/rejected": -0.07826632261276245, + "step": 1434 + }, + { + "epoch": 0.9923928077455049, + "grad_norm": 4.012014389038086, + "learning_rate": 4.9619640387275245e-05, + "log_odds_chosen": 2.3743512630462646, + "log_odds_ratio": -0.5197234153747559, + "logits/chosen": -0.1886919140815735, + "logits/rejected": -0.19670218229293823, + "logps/chosen": -0.14259615540504456, + "logps/rejected": -0.418677419424057, + "loss": 3.1601, + "nll_loss": 0.7380492687225342, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.014259614050388336, + "rewards/margins": 0.027608126401901245, + "rewards/rejected": -0.04186774045228958, + "step": 1435 + }, + { + "epoch": 0.9930843706777317, + "grad_norm": 4.641087532043457, + "learning_rate": 4.965421853388659e-05, + "log_odds_chosen": 3.459301471710205, + "log_odds_ratio": -0.30342578887939453, + "logits/chosen": -0.8673899173736572, + "logits/rejected": -0.9011232256889343, + "logps/chosen": -0.043145108968019485, + "logps/rejected": -0.3700050115585327, + "loss": 4.9722, + "nll_loss": 1.2126994132995605, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.004314510617405176, + "rewards/margins": 0.03268599137663841, + "rewards/rejected": -0.03700050339102745, + "step": 1436 + }, + { + "epoch": 0.9937759336099585, + "grad_norm": 5.460515022277832, + "learning_rate": 4.968879668049793e-05, + "log_odds_chosen": 3.272472381591797, + "log_odds_ratio": -0.13704834878444672, + "logits/chosen": -0.2897428870201111, + "logits/rejected": -0.35923218727111816, + "logps/chosen": -0.05391785502433777, + "logps/rejected": -0.46236205101013184, + "loss": 3.6047, + "nll_loss": 0.8874602317810059, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005391785409301519, + "rewards/margins": 0.040844421833753586, + "rewards/rejected": -0.04623620584607124, + "step": 1437 + }, + { + "epoch": 0.9944674965421854, + "grad_norm": 4.706887245178223, + "learning_rate": 4.972337482710927e-05, + "log_odds_chosen": 3.8396406173706055, + "log_odds_ratio": -0.22891446948051453, + "logits/chosen": -0.4871414601802826, + "logits/rejected": -0.565714418888092, + "logps/chosen": -0.057451147586107254, + "logps/rejected": -0.5715197324752808, + "loss": 4.5186, + "nll_loss": 1.1067641973495483, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.005745114758610725, + "rewards/margins": 0.0514068529009819, + "rewards/rejected": -0.05715196952223778, + "step": 1438 + }, + { + "epoch": 0.9951590594744122, + "grad_norm": 3.4306483268737793, + "learning_rate": 4.975795297372061e-05, + "log_odds_chosen": 5.28213357925415, + "log_odds_ratio": -0.038520678877830505, + "logits/chosen": -0.7845959663391113, + "logits/rejected": -0.7963880300521851, + "logps/chosen": -0.012803150340914726, + "logps/rejected": -0.6970179080963135, + "loss": 3.6504, + "nll_loss": 0.9087599515914917, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001280314987525344, + "rewards/margins": 0.06842147558927536, + "rewards/rejected": -0.06970179080963135, + "step": 1439 + }, + { + "epoch": 0.995850622406639, + "grad_norm": 7.943667411804199, + "learning_rate": 4.979253112033195e-05, + "log_odds_chosen": 1.939913272857666, + "log_odds_ratio": -0.914853572845459, + "logits/chosen": -0.6151790022850037, + "logits/rejected": -0.6059738397598267, + "logps/chosen": -0.24091029167175293, + "logps/rejected": -0.6626577377319336, + "loss": 4.3262, + "nll_loss": 0.9900734424591064, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.024091029539704323, + "rewards/margins": 0.04217474162578583, + "rewards/rejected": -0.0662657767534256, + "step": 1440 + }, + { + "epoch": 0.9965421853388658, + "grad_norm": 5.455565929412842, + "learning_rate": 4.9827109266943295e-05, + "log_odds_chosen": 4.995009899139404, + "log_odds_ratio": -0.1817580908536911, + "logits/chosen": -0.30914661288261414, + "logits/rejected": -0.31398558616638184, + "logps/chosen": -0.084615059196949, + "logps/rejected": -0.8779784440994263, + "loss": 3.2643, + "nll_loss": 0.797911524772644, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008461506105959415, + "rewards/margins": 0.07933633774518967, + "rewards/rejected": -0.08779783546924591, + "step": 1441 + }, + { + "epoch": 0.9972337482710927, + "grad_norm": 5.774052143096924, + "learning_rate": 4.9861687413554636e-05, + "log_odds_chosen": 3.2858119010925293, + "log_odds_ratio": -0.4178237318992615, + "logits/chosen": -0.9737169742584229, + "logits/rejected": -1.011305570602417, + "logps/chosen": -0.04260983318090439, + "logps/rejected": -0.5319869518280029, + "loss": 3.9489, + "nll_loss": 0.9454514980316162, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004260983318090439, + "rewards/margins": 0.048937711864709854, + "rewards/rejected": -0.05319869518280029, + "step": 1442 + }, + { + "epoch": 0.9979253112033195, + "grad_norm": 5.342999458312988, + "learning_rate": 4.989626556016598e-05, + "log_odds_chosen": 2.6681790351867676, + "log_odds_ratio": -0.4021229147911072, + "logits/chosen": -0.6921200752258301, + "logits/rejected": -0.7400314807891846, + "logps/chosen": -0.14808860421180725, + "logps/rejected": -0.8047324419021606, + "loss": 4.6528, + "nll_loss": 1.122977614402771, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01480886060744524, + "rewards/margins": 0.0656643807888031, + "rewards/rejected": -0.08047324419021606, + "step": 1443 + }, + { + "epoch": 0.9986168741355463, + "grad_norm": 4.999363899230957, + "learning_rate": 4.993084370677732e-05, + "log_odds_chosen": 2.2260918617248535, + "log_odds_ratio": -0.40468379855155945, + "logits/chosen": -0.6280920505523682, + "logits/rejected": -0.6217027902603149, + "logps/chosen": -0.11846964806318283, + "logps/rejected": -0.38483330607414246, + "loss": 3.5864, + "nll_loss": 0.856123685836792, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011846965178847313, + "rewards/margins": 0.026636367663741112, + "rewards/rejected": -0.038483329117298126, + "step": 1444 + }, + { + "epoch": 0.9993084370677732, + "grad_norm": 25.162742614746094, + "learning_rate": 4.996542185338866e-05, + "log_odds_chosen": 0.30654793977737427, + "log_odds_ratio": -1.2838389873504639, + "logits/chosen": -0.40273237228393555, + "logits/rejected": -0.3698718249797821, + "logps/chosen": -0.3533599376678467, + "logps/rejected": -0.33225274085998535, + "loss": 3.3748, + "nll_loss": 0.7153055667877197, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.03533599525690079, + "rewards/margins": -0.0021107199136167765, + "rewards/rejected": -0.033225271850824356, + "step": 1445 + }, + { + "epoch": 1.0, + "grad_norm": 3.8861796855926514, + "learning_rate": 5e-05, + "log_odds_chosen": 3.859959602355957, + "log_odds_ratio": -0.33482107520103455, + "logits/chosen": -0.59022057056427, + "logits/rejected": -0.5907670259475708, + "logps/chosen": -0.17323844134807587, + "logps/rejected": -0.8100842237472534, + "loss": 2.379, + "nll_loss": 0.561260461807251, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.017323845997452736, + "rewards/margins": 0.06368458271026611, + "rewards/rejected": -0.0810084268450737, + "step": 1446 + }, + { + "epoch": 1.0006915629322268, + "grad_norm": 4.5464582443237305, + "learning_rate": 4.9996157983709855e-05, + "log_odds_chosen": 3.976539134979248, + "log_odds_ratio": -0.40323778986930847, + "logits/chosen": -0.837387204170227, + "logits/rejected": -0.9365677833557129, + "logps/chosen": -0.054401617497205734, + "logps/rejected": -0.6226985454559326, + "loss": 4.0711, + "nll_loss": 0.9774460792541504, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.0054401615634560585, + "rewards/margins": 0.05682969093322754, + "rewards/rejected": -0.06226985156536102, + "step": 1447 + }, + { + "epoch": 1.0013831258644537, + "grad_norm": 4.172989845275879, + "learning_rate": 4.999231596741971e-05, + "log_odds_chosen": 3.1294639110565186, + "log_odds_ratio": -0.13460323214530945, + "logits/chosen": -0.662013828754425, + "logits/rejected": -0.6416223049163818, + "logps/chosen": -0.11852079629898071, + "logps/rejected": -0.8950018882751465, + "loss": 2.598, + "nll_loss": 0.6360316276550293, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011852080002427101, + "rewards/margins": 0.0776481032371521, + "rewards/rejected": -0.08950018882751465, + "step": 1448 + }, + { + "epoch": 1.0020746887966805, + "grad_norm": 4.302820205688477, + "learning_rate": 4.998847395112955e-05, + "log_odds_chosen": 4.37247371673584, + "log_odds_ratio": -0.2136344015598297, + "logits/chosen": -0.5137135982513428, + "logits/rejected": -0.5863500237464905, + "logps/chosen": -0.0761384591460228, + "logps/rejected": -1.1133251190185547, + "loss": 3.1741, + "nll_loss": 0.7721676826477051, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007613845635205507, + "rewards/margins": 0.10371865332126617, + "rewards/rejected": -0.11133251339197159, + "step": 1449 + }, + { + "epoch": 1.0027662517289073, + "grad_norm": 6.997933864593506, + "learning_rate": 4.9984631934839405e-05, + "log_odds_chosen": 2.459064483642578, + "log_odds_ratio": -0.46886980533599854, + "logits/chosen": -0.599526047706604, + "logits/rejected": -0.610713005065918, + "logps/chosen": -0.09736969321966171, + "logps/rejected": -0.683413565158844, + "loss": 4.5402, + "nll_loss": 1.0881648063659668, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.009736969135701656, + "rewards/margins": 0.05860438942909241, + "rewards/rejected": -0.06834135949611664, + "step": 1450 + }, + { + "epoch": 1.0034578146611342, + "grad_norm": 3.562448024749756, + "learning_rate": 4.998078991854926e-05, + "log_odds_chosen": 5.7006330490112305, + "log_odds_ratio": -0.06817111372947693, + "logits/chosen": -0.598158061504364, + "logits/rejected": -0.6181397438049316, + "logps/chosen": -0.03915969282388687, + "logps/rejected": -0.7007030248641968, + "loss": 2.7675, + "nll_loss": 0.685059666633606, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003915969282388687, + "rewards/margins": 0.06615433096885681, + "rewards/rejected": -0.0700703039765358, + "step": 1451 + }, + { + "epoch": 1.004149377593361, + "grad_norm": 4.6683549880981445, + "learning_rate": 4.997694790225911e-05, + "log_odds_chosen": 1.3357759714126587, + "log_odds_ratio": -0.4112657308578491, + "logits/chosen": -1.0096291303634644, + "logits/rejected": -1.0104484558105469, + "logps/chosen": -0.13406968116760254, + "logps/rejected": -0.3350994288921356, + "loss": 4.2212, + "nll_loss": 1.0141762495040894, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013406967744231224, + "rewards/margins": 0.020102977752685547, + "rewards/rejected": -0.03350994735956192, + "step": 1452 + }, + { + "epoch": 1.0048409405255878, + "grad_norm": 5.696212291717529, + "learning_rate": 4.9973105885968956e-05, + "log_odds_chosen": 4.117009162902832, + "log_odds_ratio": -0.2986528277397156, + "logits/chosen": -0.7022649049758911, + "logits/rejected": -0.7363672256469727, + "logps/chosen": -0.05535222589969635, + "logps/rejected": -0.8268040418624878, + "loss": 3.4259, + "nll_loss": 0.8266156911849976, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005535222589969635, + "rewards/margins": 0.07714518904685974, + "rewards/rejected": -0.08268041163682938, + "step": 1453 + }, + { + "epoch": 1.0055325034578146, + "grad_norm": 5.526662349700928, + "learning_rate": 4.9969263869678815e-05, + "log_odds_chosen": 0.7705850601196289, + "log_odds_ratio": -0.43338674306869507, + "logits/chosen": -0.6624119281768799, + "logits/rejected": -0.6808663606643677, + "logps/chosen": -0.18742117285728455, + "logps/rejected": -0.2641259431838989, + "loss": 3.6687, + "nll_loss": 0.8738378882408142, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.018742118030786514, + "rewards/margins": 0.007670475170016289, + "rewards/rejected": -0.026412591338157654, + "step": 1454 + }, + { + "epoch": 1.0062240663900415, + "grad_norm": 7.230208873748779, + "learning_rate": 4.996542185338866e-05, + "log_odds_chosen": 4.472329139709473, + "log_odds_ratio": -0.23803406953811646, + "logits/chosen": -0.6536309719085693, + "logits/rejected": -0.6856323480606079, + "logps/chosen": -0.09958557039499283, + "logps/rejected": -0.8282464742660522, + "loss": 3.4442, + "nll_loss": 0.8372559547424316, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009958556853234768, + "rewards/margins": 0.07286608964204788, + "rewards/rejected": -0.08282465487718582, + "step": 1455 + }, + { + "epoch": 1.0069156293222683, + "grad_norm": 5.879416465759277, + "learning_rate": 4.996157983709851e-05, + "log_odds_chosen": 3.733511447906494, + "log_odds_ratio": -0.40536871552467346, + "logits/chosen": -0.424949586391449, + "logits/rejected": -0.47743141651153564, + "logps/chosen": -0.04454517364501953, + "logps/rejected": -0.724551796913147, + "loss": 3.1424, + "nll_loss": 0.7450686097145081, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004454517271369696, + "rewards/margins": 0.0680006593465805, + "rewards/rejected": -0.07245517522096634, + "step": 1456 + }, + { + "epoch": 1.0076071922544951, + "grad_norm": 5.5041728019714355, + "learning_rate": 4.9957737820808366e-05, + "log_odds_chosen": 0.9899818301200867, + "log_odds_ratio": -0.47608351707458496, + "logits/chosen": -0.32593727111816406, + "logits/rejected": -0.3411751985549927, + "logps/chosen": -0.13348256051540375, + "logps/rejected": -0.40093713998794556, + "loss": 3.9442, + "nll_loss": 0.9384334087371826, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01334825623780489, + "rewards/margins": 0.02674545720219612, + "rewards/rejected": -0.040093712508678436, + "step": 1457 + }, + { + "epoch": 1.008298755186722, + "grad_norm": 4.41768741607666, + "learning_rate": 4.995389580451821e-05, + "log_odds_chosen": 5.170443534851074, + "log_odds_ratio": -0.08081326633691788, + "logits/chosen": -0.5705434679985046, + "logits/rejected": -0.7547807097434998, + "logps/chosen": -0.059746697545051575, + "logps/rejected": -0.8604313135147095, + "loss": 3.2622, + "nll_loss": 0.807471752166748, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005974669940769672, + "rewards/margins": 0.08006846159696579, + "rewards/rejected": -0.08604312688112259, + "step": 1458 + }, + { + "epoch": 1.0089903181189488, + "grad_norm": 5.407264232635498, + "learning_rate": 4.9950053788228064e-05, + "log_odds_chosen": 7.331927299499512, + "log_odds_ratio": -0.031412504613399506, + "logits/chosen": -0.2545586824417114, + "logits/rejected": -0.35101965069770813, + "logps/chosen": -0.01821615919470787, + "logps/rejected": -1.1783541440963745, + "loss": 4.2907, + "nll_loss": 1.0695462226867676, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018216159660369158, + "rewards/margins": 0.11601380258798599, + "rewards/rejected": -0.11783542484045029, + "step": 1459 + }, + { + "epoch": 1.0096818810511756, + "grad_norm": 3.847421884536743, + "learning_rate": 4.9946211771937916e-05, + "log_odds_chosen": 2.8356752395629883, + "log_odds_ratio": -0.29734063148498535, + "logits/chosen": -0.7546320557594299, + "logits/rejected": -0.7500847578048706, + "logps/chosen": -0.1304139792919159, + "logps/rejected": -0.783536434173584, + "loss": 3.2944, + "nll_loss": 0.7938747406005859, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01304139755666256, + "rewards/margins": 0.06531224399805069, + "rewards/rejected": -0.0783536434173584, + "step": 1460 + }, + { + "epoch": 1.0103734439834025, + "grad_norm": 5.793185234069824, + "learning_rate": 4.994236975564777e-05, + "log_odds_chosen": 3.1138916015625, + "log_odds_ratio": -0.4364016652107239, + "logits/chosen": -0.3820783495903015, + "logits/rejected": -0.4282950758934021, + "logps/chosen": -0.15490713715553284, + "logps/rejected": -0.8658761978149414, + "loss": 4.3823, + "nll_loss": 1.0519235134124756, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.015490712597966194, + "rewards/margins": 0.07109691202640533, + "rewards/rejected": -0.08658762276172638, + "step": 1461 + }, + { + "epoch": 1.0110650069156293, + "grad_norm": 7.400942802429199, + "learning_rate": 4.9938527739357614e-05, + "log_odds_chosen": 2.564420461654663, + "log_odds_ratio": -0.31582629680633545, + "logits/chosen": -0.6081953048706055, + "logits/rejected": -0.6425383687019348, + "logps/chosen": -0.12189154326915741, + "logps/rejected": -0.6753732562065125, + "loss": 4.3823, + "nll_loss": 1.063995599746704, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01218915544450283, + "rewards/margins": 0.055348172783851624, + "rewards/rejected": -0.067537322640419, + "step": 1462 + }, + { + "epoch": 1.0117565698478561, + "grad_norm": 3.8683624267578125, + "learning_rate": 4.9934685723067474e-05, + "log_odds_chosen": 3.5754759311676025, + "log_odds_ratio": -0.2153715193271637, + "logits/chosen": -0.5791640877723694, + "logits/rejected": -0.5993859767913818, + "logps/chosen": -0.09025327861309052, + "logps/rejected": -0.6351896524429321, + "loss": 3.3673, + "nll_loss": 0.8202858567237854, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009025328792631626, + "rewards/margins": 0.05449363589286804, + "rewards/rejected": -0.0635189637541771, + "step": 1463 + }, + { + "epoch": 1.012448132780083, + "grad_norm": 12.99779987335205, + "learning_rate": 4.993084370677732e-05, + "log_odds_chosen": 2.696488618850708, + "log_odds_ratio": -0.5498002767562866, + "logits/chosen": -0.7150594592094421, + "logits/rejected": -0.7320340871810913, + "logps/chosen": -0.15543246269226074, + "logps/rejected": -0.6988484859466553, + "loss": 5.2035, + "nll_loss": 1.2458841800689697, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.015543246641755104, + "rewards/margins": 0.05434160679578781, + "rewards/rejected": -0.06988485157489777, + "step": 1464 + }, + { + "epoch": 1.0131396957123098, + "grad_norm": 3.1184329986572266, + "learning_rate": 4.992700169048717e-05, + "log_odds_chosen": 4.265319347381592, + "log_odds_ratio": -0.10288287699222565, + "logits/chosen": -0.7221165299415588, + "logits/rejected": -0.7270498275756836, + "logps/chosen": -0.09049959480762482, + "logps/rejected": -0.8920851349830627, + "loss": 2.8165, + "nll_loss": 0.6938475370407104, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009049959480762482, + "rewards/margins": 0.08015856146812439, + "rewards/rejected": -0.08920852094888687, + "step": 1465 + }, + { + "epoch": 1.0138312586445366, + "grad_norm": 5.818999290466309, + "learning_rate": 4.9923159674197024e-05, + "log_odds_chosen": 5.4633588790893555, + "log_odds_ratio": -0.30595359206199646, + "logits/chosen": -0.7791047096252441, + "logits/rejected": -0.7961665391921997, + "logps/chosen": -0.08309157937765121, + "logps/rejected": -1.0019042491912842, + "loss": 2.8203, + "nll_loss": 0.6744802594184875, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008309157565236092, + "rewards/margins": 0.09188126027584076, + "rewards/rejected": -0.1001904308795929, + "step": 1466 + }, + { + "epoch": 1.0145228215767634, + "grad_norm": 5.464219570159912, + "learning_rate": 4.991931765790687e-05, + "log_odds_chosen": 3.736652135848999, + "log_odds_ratio": -0.217147096991539, + "logits/chosen": -0.9352800846099854, + "logits/rejected": -0.9811190366744995, + "logps/chosen": -0.0618901327252388, + "logps/rejected": -0.6639367341995239, + "loss": 3.7076, + "nll_loss": 0.9051859974861145, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0061890133656561375, + "rewards/margins": 0.06020466610789299, + "rewards/rejected": -0.06639367341995239, + "step": 1467 + }, + { + "epoch": 1.0152143845089903, + "grad_norm": 13.700241088867188, + "learning_rate": 4.991547564161672e-05, + "log_odds_chosen": 2.2971251010894775, + "log_odds_ratio": -0.5517905354499817, + "logits/chosen": -0.8629281520843506, + "logits/rejected": -0.8985836505889893, + "logps/chosen": -0.12846173346042633, + "logps/rejected": -0.543826699256897, + "loss": 3.8535, + "nll_loss": 0.908200204372406, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012846173718571663, + "rewards/margins": 0.04153650254011154, + "rewards/rejected": -0.054382674396038055, + "step": 1468 + }, + { + "epoch": 1.015905947441217, + "grad_norm": 5.728583812713623, + "learning_rate": 4.9911633625326575e-05, + "log_odds_chosen": 4.474362850189209, + "log_odds_ratio": -0.3257821500301361, + "logits/chosen": -0.736266016960144, + "logits/rejected": -0.7569507360458374, + "logps/chosen": -0.05592148005962372, + "logps/rejected": -0.8763000965118408, + "loss": 3.1086, + "nll_loss": 0.7445786595344543, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005592147819697857, + "rewards/margins": 0.08203786611557007, + "rewards/rejected": -0.0876300036907196, + "step": 1469 + }, + { + "epoch": 1.016597510373444, + "grad_norm": 6.4466657638549805, + "learning_rate": 4.990779160903643e-05, + "log_odds_chosen": 3.7242801189422607, + "log_odds_ratio": -0.2941632866859436, + "logits/chosen": -0.4376232624053955, + "logits/rejected": -0.5244469046592712, + "logps/chosen": -0.07507316023111343, + "logps/rejected": -0.6646156311035156, + "loss": 4.0845, + "nll_loss": 0.9917135238647461, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0075073158368468285, + "rewards/margins": 0.05895423889160156, + "rewards/rejected": -0.06646155565977097, + "step": 1470 + }, + { + "epoch": 1.0172890733056708, + "grad_norm": 6.7467474937438965, + "learning_rate": 4.990394959274627e-05, + "log_odds_chosen": 2.9118337631225586, + "log_odds_ratio": -0.2949202060699463, + "logits/chosen": -0.7435550689697266, + "logits/rejected": -0.7765440940856934, + "logps/chosen": -0.060861919075250626, + "logps/rejected": -0.4571167826652527, + "loss": 4.0488, + "nll_loss": 0.9826978445053101, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0060861920937895775, + "rewards/margins": 0.039625488221645355, + "rewards/rejected": -0.04571168124675751, + "step": 1471 + }, + { + "epoch": 1.0179806362378976, + "grad_norm": 3.7251908779144287, + "learning_rate": 4.990010757645613e-05, + "log_odds_chosen": 5.3352484703063965, + "log_odds_ratio": -0.22203417122364044, + "logits/chosen": -0.6861305832862854, + "logits/rejected": -0.6410164833068848, + "logps/chosen": -0.05618961900472641, + "logps/rejected": -0.7430349588394165, + "loss": 3.0389, + "nll_loss": 0.7375138401985168, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005618962924927473, + "rewards/margins": 0.06868454813957214, + "rewards/rejected": -0.0743035078048706, + "step": 1472 + }, + { + "epoch": 1.0186721991701244, + "grad_norm": 4.516066074371338, + "learning_rate": 4.989626556016598e-05, + "log_odds_chosen": 4.567673683166504, + "log_odds_ratio": -0.12776193022727966, + "logits/chosen": -0.34971410036087036, + "logits/rejected": -0.3900047540664673, + "logps/chosen": -0.07080184668302536, + "logps/rejected": -0.6615493893623352, + "loss": 3.6239, + "nll_loss": 0.8931936025619507, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007080184295773506, + "rewards/margins": 0.0590747594833374, + "rewards/rejected": -0.06615494191646576, + "step": 1473 + }, + { + "epoch": 1.0193637621023512, + "grad_norm": 6.061563014984131, + "learning_rate": 4.989242354387583e-05, + "log_odds_chosen": 3.5704660415649414, + "log_odds_ratio": -0.33379295468330383, + "logits/chosen": -1.1629607677459717, + "logits/rejected": -1.2340478897094727, + "logps/chosen": -0.14167319238185883, + "logps/rejected": -1.1132009029388428, + "loss": 4.6314, + "nll_loss": 1.124472737312317, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014167319051921368, + "rewards/margins": 0.09715276211500168, + "rewards/rejected": -0.11132007837295532, + "step": 1474 + }, + { + "epoch": 1.020055325034578, + "grad_norm": 6.204284191131592, + "learning_rate": 4.988858152758568e-05, + "log_odds_chosen": 4.754334926605225, + "log_odds_ratio": -0.14875973761081696, + "logits/chosen": -0.5835494995117188, + "logits/rejected": -0.6686667203903198, + "logps/chosen": -0.04585752636194229, + "logps/rejected": -0.9479017853736877, + "loss": 3.5374, + "nll_loss": 0.8694802522659302, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004585752729326487, + "rewards/margins": 0.09020442515611649, + "rewards/rejected": -0.09479017555713654, + "step": 1475 + }, + { + "epoch": 1.020746887966805, + "grad_norm": 4.354371070861816, + "learning_rate": 4.988473951129553e-05, + "log_odds_chosen": 3.4431748390197754, + "log_odds_ratio": -0.3371661901473999, + "logits/chosen": -0.6801312565803528, + "logits/rejected": -0.715042769908905, + "logps/chosen": -0.09811490774154663, + "logps/rejected": -0.7584939002990723, + "loss": 3.0323, + "nll_loss": 0.7243598103523254, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009811490774154663, + "rewards/margins": 0.06603790074586868, + "rewards/rejected": -0.07584939152002335, + "step": 1476 + }, + { + "epoch": 1.0214384508990317, + "grad_norm": 4.167712688446045, + "learning_rate": 4.988089749500538e-05, + "log_odds_chosen": 1.9616563320159912, + "log_odds_ratio": -0.30320820212364197, + "logits/chosen": -0.867897629737854, + "logits/rejected": -0.9229037761688232, + "logps/chosen": -0.13874275982379913, + "logps/rejected": -0.36793744564056396, + "loss": 2.7929, + "nll_loss": 0.6678975820541382, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013874277472496033, + "rewards/margins": 0.022919466719031334, + "rewards/rejected": -0.036793746054172516, + "step": 1477 + }, + { + "epoch": 1.0221300138312586, + "grad_norm": 5.813611030578613, + "learning_rate": 4.987705547871523e-05, + "log_odds_chosen": 4.935079574584961, + "log_odds_ratio": -0.2710667550563812, + "logits/chosen": -0.6112565398216248, + "logits/rejected": -0.6051702499389648, + "logps/chosen": -0.08972442150115967, + "logps/rejected": -0.6998757123947144, + "loss": 2.919, + "nll_loss": 0.702637791633606, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008972441777586937, + "rewards/margins": 0.061015136539936066, + "rewards/rejected": -0.06998757272958755, + "step": 1478 + }, + { + "epoch": 1.0228215767634854, + "grad_norm": 5.390235424041748, + "learning_rate": 4.9873213462425086e-05, + "log_odds_chosen": 6.563016414642334, + "log_odds_ratio": -0.127326101064682, + "logits/chosen": -0.4359290897846222, + "logits/rejected": -0.52577805519104, + "logps/chosen": -0.06370159238576889, + "logps/rejected": -1.0844309329986572, + "loss": 3.073, + "nll_loss": 0.7555067539215088, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006370158866047859, + "rewards/margins": 0.10207293927669525, + "rewards/rejected": -0.10844309628009796, + "step": 1479 + }, + { + "epoch": 1.0235131396957122, + "grad_norm": 5.924533367156982, + "learning_rate": 4.986937144613493e-05, + "log_odds_chosen": 4.72926139831543, + "log_odds_ratio": -0.29802221059799194, + "logits/chosen": -0.8602408170700073, + "logits/rejected": -0.9133716225624084, + "logps/chosen": -0.06283199042081833, + "logps/rejected": -1.035302996635437, + "loss": 3.3873, + "nll_loss": 0.8170216083526611, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006283198483288288, + "rewards/margins": 0.09724709391593933, + "rewards/rejected": -0.10353029519319534, + "step": 1480 + }, + { + "epoch": 1.024204702627939, + "grad_norm": 5.796539783477783, + "learning_rate": 4.986552942984479e-05, + "log_odds_chosen": 5.10892391204834, + "log_odds_ratio": -0.0661209300160408, + "logits/chosen": -0.41841191053390503, + "logits/rejected": -0.49035269021987915, + "logps/chosen": -0.05405154824256897, + "logps/rejected": -0.9556215405464172, + "loss": 3.9195, + "nll_loss": 0.9732711315155029, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005405155010521412, + "rewards/margins": 0.09015700221061707, + "rewards/rejected": -0.0955621600151062, + "step": 1481 + }, + { + "epoch": 1.0248962655601659, + "grad_norm": 7.5419440269470215, + "learning_rate": 4.9861687413554636e-05, + "log_odds_chosen": 3.7392358779907227, + "log_odds_ratio": -0.3376656770706177, + "logits/chosen": -0.5626893043518066, + "logits/rejected": -0.6578677892684937, + "logps/chosen": -0.05953347682952881, + "logps/rejected": -1.008660078048706, + "loss": 3.9953, + "nll_loss": 0.965064525604248, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005953347310423851, + "rewards/margins": 0.09491265565156937, + "rewards/rejected": -0.10086600482463837, + "step": 1482 + }, + { + "epoch": 1.0255878284923927, + "grad_norm": 4.64001989364624, + "learning_rate": 4.985784539726449e-05, + "log_odds_chosen": 2.772674322128296, + "log_odds_ratio": -0.33215615153312683, + "logits/chosen": -0.6052103042602539, + "logits/rejected": -0.7159792184829712, + "logps/chosen": -0.09696868807077408, + "logps/rejected": -0.4555617570877075, + "loss": 3.1045, + "nll_loss": 0.7428995370864868, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009696869179606438, + "rewards/margins": 0.035859305411577225, + "rewards/rejected": -0.04555617272853851, + "step": 1483 + }, + { + "epoch": 1.0262793914246195, + "grad_norm": 4.410762310028076, + "learning_rate": 4.985400338097434e-05, + "log_odds_chosen": 3.116834878921509, + "log_odds_ratio": -0.173253133893013, + "logits/chosen": -0.5153848528862, + "logits/rejected": -0.5361400246620178, + "logps/chosen": -0.16222445666790009, + "logps/rejected": -1.062785029411316, + "loss": 2.8669, + "nll_loss": 0.6994096636772156, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.016222447156906128, + "rewards/margins": 0.09005605429410934, + "rewards/rejected": -0.10627850890159607, + "step": 1484 + }, + { + "epoch": 1.0269709543568464, + "grad_norm": 4.671135425567627, + "learning_rate": 4.985016136468419e-05, + "log_odds_chosen": 3.684541702270508, + "log_odds_ratio": -0.36840176582336426, + "logits/chosen": -0.8689281940460205, + "logits/rejected": -0.9373396039009094, + "logps/chosen": -0.13366033136844635, + "logps/rejected": -1.042647361755371, + "loss": 2.7474, + "nll_loss": 0.6500199437141418, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013366032391786575, + "rewards/margins": 0.09089870750904083, + "rewards/rejected": -0.10426473617553711, + "step": 1485 + }, + { + "epoch": 1.0276625172890732, + "grad_norm": 5.958430767059326, + "learning_rate": 4.984631934839404e-05, + "log_odds_chosen": 4.020816802978516, + "log_odds_ratio": -0.1794414073228836, + "logits/chosen": -0.8697519302368164, + "logits/rejected": -0.9032071828842163, + "logps/chosen": -0.0766788125038147, + "logps/rejected": -0.9268592596054077, + "loss": 4.5648, + "nll_loss": 1.123245120048523, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007667881436645985, + "rewards/margins": 0.08501805365085602, + "rewards/rejected": -0.09268593043088913, + "step": 1486 + }, + { + "epoch": 1.0283540802213, + "grad_norm": 5.243428707122803, + "learning_rate": 4.984247733210389e-05, + "log_odds_chosen": 4.724527359008789, + "log_odds_ratio": -0.1316288858652115, + "logits/chosen": -0.6422492265701294, + "logits/rejected": -0.6679896116256714, + "logps/chosen": -0.09851549565792084, + "logps/rejected": -0.9931231141090393, + "loss": 3.0589, + "nll_loss": 0.7515564560890198, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009851549752056599, + "rewards/margins": 0.08946076780557632, + "rewards/rejected": -0.09931232035160065, + "step": 1487 + }, + { + "epoch": 1.0290456431535269, + "grad_norm": 4.835902214050293, + "learning_rate": 4.9838635315813744e-05, + "log_odds_chosen": 5.668395519256592, + "log_odds_ratio": -0.05924345925450325, + "logits/chosen": -0.5757212042808533, + "logits/rejected": -0.6555768251419067, + "logps/chosen": -0.0362117774784565, + "logps/rejected": -1.1513760089874268, + "loss": 2.7235, + "nll_loss": 0.6749587059020996, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0036211777478456497, + "rewards/margins": 0.11151641607284546, + "rewards/rejected": -0.11513759940862656, + "step": 1488 + }, + { + "epoch": 1.0297372060857537, + "grad_norm": 7.987643718719482, + "learning_rate": 4.983479329952359e-05, + "log_odds_chosen": 1.2246575355529785, + "log_odds_ratio": -0.5338953137397766, + "logits/chosen": -0.8425899744033813, + "logits/rejected": -0.8281242847442627, + "logps/chosen": -0.10856227576732635, + "logps/rejected": -0.3495185673236847, + "loss": 4.8577, + "nll_loss": 1.1610369682312012, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.010856227949261665, + "rewards/margins": 0.024095630273222923, + "rewards/rejected": -0.03495185822248459, + "step": 1489 + }, + { + "epoch": 1.0304287690179805, + "grad_norm": 5.155649185180664, + "learning_rate": 4.983095128323345e-05, + "log_odds_chosen": 4.663093566894531, + "log_odds_ratio": -0.16769303381443024, + "logits/chosen": -0.8803797364234924, + "logits/rejected": -0.9577143788337708, + "logps/chosen": -0.11833354085683823, + "logps/rejected": -1.161036491394043, + "loss": 3.4158, + "nll_loss": 0.8371741771697998, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011833353899419308, + "rewards/margins": 0.10427028685808182, + "rewards/rejected": -0.1161036491394043, + "step": 1490 + }, + { + "epoch": 1.0311203319502074, + "grad_norm": 6.939423561096191, + "learning_rate": 4.9827109266943295e-05, + "log_odds_chosen": 2.6805033683776855, + "log_odds_ratio": -0.2592310607433319, + "logits/chosen": -0.8418576121330261, + "logits/rejected": -0.8511228561401367, + "logps/chosen": -0.08779346942901611, + "logps/rejected": -0.6378369331359863, + "loss": 4.4381, + "nll_loss": 1.0835912227630615, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008779346942901611, + "rewards/margins": 0.05500435084104538, + "rewards/rejected": -0.06378369778394699, + "step": 1491 + }, + { + "epoch": 1.0318118948824342, + "grad_norm": 3.8663382530212402, + "learning_rate": 4.982326725065315e-05, + "log_odds_chosen": 4.59311580657959, + "log_odds_ratio": -0.34453898668289185, + "logits/chosen": -0.5843978524208069, + "logits/rejected": -0.6239404082298279, + "logps/chosen": -0.08339428901672363, + "logps/rejected": -0.8839590549468994, + "loss": 2.7118, + "nll_loss": 0.6434944272041321, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008339428342878819, + "rewards/margins": 0.08005647361278534, + "rewards/rejected": -0.08839590102434158, + "step": 1492 + }, + { + "epoch": 1.0325034578146612, + "grad_norm": 3.709350109100342, + "learning_rate": 4.9819425234363e-05, + "log_odds_chosen": 3.1544461250305176, + "log_odds_ratio": -0.24927958846092224, + "logits/chosen": -0.38115400075912476, + "logits/rejected": -0.33510446548461914, + "logps/chosen": -0.08069927990436554, + "logps/rejected": -0.5364916324615479, + "loss": 2.9121, + "nll_loss": 0.7030921578407288, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008069928735494614, + "rewards/margins": 0.04557923972606659, + "rewards/rejected": -0.053649164736270905, + "step": 1493 + }, + { + "epoch": 1.033195020746888, + "grad_norm": 6.313911437988281, + "learning_rate": 4.9815583218072845e-05, + "log_odds_chosen": 1.3357501029968262, + "log_odds_ratio": -0.5104138255119324, + "logits/chosen": -0.9382165670394897, + "logits/rejected": -0.9525442123413086, + "logps/chosen": -0.1589067131280899, + "logps/rejected": -0.6409029960632324, + "loss": 4.2722, + "nll_loss": 1.0169986486434937, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01589067280292511, + "rewards/margins": 0.048199623823165894, + "rewards/rejected": -0.064090296626091, + "step": 1494 + }, + { + "epoch": 1.033886583679115, + "grad_norm": 5.793435096740723, + "learning_rate": 4.98117412017827e-05, + "log_odds_chosen": 3.6047868728637695, + "log_odds_ratio": -0.3205690383911133, + "logits/chosen": -0.4956324100494385, + "logits/rejected": -0.5108366012573242, + "logps/chosen": -0.07623106241226196, + "logps/rejected": -0.5219119787216187, + "loss": 3.1138, + "nll_loss": 0.7464025020599365, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007623106241226196, + "rewards/margins": 0.04456809163093567, + "rewards/rejected": -0.052191197872161865, + "step": 1495 + }, + { + "epoch": 1.0345781466113417, + "grad_norm": 6.194908142089844, + "learning_rate": 4.980789918549255e-05, + "log_odds_chosen": 3.547358989715576, + "log_odds_ratio": -0.19596754014492035, + "logits/chosen": -0.6217373013496399, + "logits/rejected": -0.6675953269004822, + "logps/chosen": -0.05426674336194992, + "logps/rejected": -0.7067912817001343, + "loss": 3.5106, + "nll_loss": 0.8580626249313354, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005426674149930477, + "rewards/margins": 0.06525244563817978, + "rewards/rejected": -0.07067912071943283, + "step": 1496 + }, + { + "epoch": 1.0352697095435686, + "grad_norm": 5.629507064819336, + "learning_rate": 4.98040571692024e-05, + "log_odds_chosen": 3.7559783458709717, + "log_odds_ratio": -0.2629086971282959, + "logits/chosen": -0.4892784655094147, + "logits/rejected": -0.4466947019100189, + "logps/chosen": -0.04877060279250145, + "logps/rejected": -0.835755467414856, + "loss": 3.1084, + "nll_loss": 0.7508119940757751, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004877060651779175, + "rewards/margins": 0.07869848608970642, + "rewards/rejected": -0.0835755467414856, + "step": 1497 + }, + { + "epoch": 1.0359612724757954, + "grad_norm": 5.053825855255127, + "learning_rate": 4.980021515291225e-05, + "log_odds_chosen": 4.632743835449219, + "log_odds_ratio": -0.11885064840316772, + "logits/chosen": -0.49171894788742065, + "logits/rejected": -0.5932501554489136, + "logps/chosen": -0.029469992965459824, + "logps/rejected": -0.7572300434112549, + "loss": 3.4089, + "nll_loss": 0.8403362035751343, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0029469996225088835, + "rewards/margins": 0.07277600467205048, + "rewards/rejected": -0.07572300732135773, + "step": 1498 + }, + { + "epoch": 1.0366528354080222, + "grad_norm": 5.869250297546387, + "learning_rate": 4.979637313662211e-05, + "log_odds_chosen": 2.212305784225464, + "log_odds_ratio": -0.5324792861938477, + "logits/chosen": -0.38662075996398926, + "logits/rejected": -0.4400796890258789, + "logps/chosen": -0.11539213359355927, + "logps/rejected": -0.5057278871536255, + "loss": 3.3367, + "nll_loss": 0.7809147834777832, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011539213359355927, + "rewards/margins": 0.03903357312083244, + "rewards/rejected": -0.05057279020547867, + "step": 1499 + }, + { + "epoch": 1.037344398340249, + "grad_norm": 4.107361316680908, + "learning_rate": 4.979253112033195e-05, + "log_odds_chosen": 3.1276094913482666, + "log_odds_ratio": -0.2214275598526001, + "logits/chosen": -0.7829334735870361, + "logits/rejected": -0.8359056711196899, + "logps/chosen": -0.08103692531585693, + "logps/rejected": -0.7872143983840942, + "loss": 2.925, + "nll_loss": 0.7091047167778015, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008103692904114723, + "rewards/margins": 0.07061775028705597, + "rewards/rejected": -0.07872144877910614, + "step": 1500 + }, + { + "epoch": 1.0380359612724759, + "grad_norm": 7.110342502593994, + "learning_rate": 4.9788689104041805e-05, + "log_odds_chosen": 3.4415764808654785, + "log_odds_ratio": -0.21897970139980316, + "logits/chosen": -0.5591660141944885, + "logits/rejected": -0.5564761161804199, + "logps/chosen": -0.09651856124401093, + "logps/rejected": -0.7514764666557312, + "loss": 3.952, + "nll_loss": 0.9661027789115906, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009651856496930122, + "rewards/margins": 0.0654957965016365, + "rewards/rejected": -0.07514764368534088, + "step": 1501 + }, + { + "epoch": 1.0387275242047027, + "grad_norm": 4.894821643829346, + "learning_rate": 4.978484708775166e-05, + "log_odds_chosen": 5.221924781799316, + "log_odds_ratio": -0.025731677189469337, + "logits/chosen": -0.33677947521209717, + "logits/rejected": -0.4343298077583313, + "logps/chosen": -0.021347129717469215, + "logps/rejected": -0.7246700525283813, + "loss": 3.2321, + "nll_loss": 0.8054642677307129, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021347124129533768, + "rewards/margins": 0.07033228874206543, + "rewards/rejected": -0.07246700674295425, + "step": 1502 + }, + { + "epoch": 1.0394190871369295, + "grad_norm": 4.469569683074951, + "learning_rate": 4.9781005071461504e-05, + "log_odds_chosen": 5.758516788482666, + "log_odds_ratio": -0.22051161527633667, + "logits/chosen": -0.9311298131942749, + "logits/rejected": -1.0032835006713867, + "logps/chosen": -0.09000095725059509, + "logps/rejected": -0.904339611530304, + "loss": 2.8294, + "nll_loss": 0.6853088140487671, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009000095538794994, + "rewards/margins": 0.08143387734889984, + "rewards/rejected": -0.09043397009372711, + "step": 1503 + }, + { + "epoch": 1.0401106500691564, + "grad_norm": 5.033740520477295, + "learning_rate": 4.9777163055171356e-05, + "log_odds_chosen": 4.276998519897461, + "log_odds_ratio": -0.09011676162481308, + "logits/chosen": -0.8271830677986145, + "logits/rejected": -0.8114771842956543, + "logps/chosen": -0.043199293315410614, + "logps/rejected": -0.6810850501060486, + "loss": 4.096, + "nll_loss": 1.014995813369751, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004319929517805576, + "rewards/margins": 0.06378857791423798, + "rewards/rejected": -0.06810849905014038, + "step": 1504 + }, + { + "epoch": 1.0408022130013832, + "grad_norm": 6.084150791168213, + "learning_rate": 4.977332103888121e-05, + "log_odds_chosen": 4.076897621154785, + "log_odds_ratio": -0.08082269877195358, + "logits/chosen": -0.5791704654693604, + "logits/rejected": -0.6189771294593811, + "logps/chosen": -0.06673917174339294, + "logps/rejected": -1.0344374179840088, + "loss": 3.3458, + "nll_loss": 0.8283798098564148, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006673917640000582, + "rewards/margins": 0.09676983207464218, + "rewards/rejected": -0.10344374179840088, + "step": 1505 + }, + { + "epoch": 1.04149377593361, + "grad_norm": 3.709134578704834, + "learning_rate": 4.976947902259106e-05, + "log_odds_chosen": 7.363162994384766, + "log_odds_ratio": -0.011417560279369354, + "logits/chosen": -0.33076947927474976, + "logits/rejected": -0.43420493602752686, + "logps/chosen": -0.029158981516957283, + "logps/rejected": -1.3433349132537842, + "loss": 2.1727, + "nll_loss": 0.5420453548431396, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0029158983379602432, + "rewards/margins": 0.13141758739948273, + "rewards/rejected": -0.13433349132537842, + "step": 1506 + }, + { + "epoch": 1.0421853388658369, + "grad_norm": 6.592757225036621, + "learning_rate": 4.9765637006300907e-05, + "log_odds_chosen": 3.5828444957733154, + "log_odds_ratio": -0.3698550760746002, + "logits/chosen": -0.7420531511306763, + "logits/rejected": -0.7805401086807251, + "logps/chosen": -0.10293908417224884, + "logps/rejected": -0.9354575872421265, + "loss": 3.6139, + "nll_loss": 0.866478443145752, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010293908417224884, + "rewards/margins": 0.08325185626745224, + "rewards/rejected": -0.09354576468467712, + "step": 1507 + }, + { + "epoch": 1.0428769017980637, + "grad_norm": 5.358985424041748, + "learning_rate": 4.9761794990010766e-05, + "log_odds_chosen": 3.160102367401123, + "log_odds_ratio": -0.2559373378753662, + "logits/chosen": -0.5014327764511108, + "logits/rejected": -0.5404451489448547, + "logps/chosen": -0.07068739831447601, + "logps/rejected": -0.4019555151462555, + "loss": 3.1128, + "nll_loss": 0.7525964379310608, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007068739738315344, + "rewards/margins": 0.033126816153526306, + "rewards/rejected": -0.04019555449485779, + "step": 1508 + }, + { + "epoch": 1.0435684647302905, + "grad_norm": 5.2016072273254395, + "learning_rate": 4.975795297372061e-05, + "log_odds_chosen": 3.912111520767212, + "log_odds_ratio": -0.325267493724823, + "logits/chosen": -0.4946413040161133, + "logits/rejected": -0.5560250282287598, + "logps/chosen": -0.15734195709228516, + "logps/rejected": -0.8032873272895813, + "loss": 3.3712, + "nll_loss": 0.8102628588676453, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.015734193846583366, + "rewards/margins": 0.06459453701972961, + "rewards/rejected": -0.08032873272895813, + "step": 1509 + }, + { + "epoch": 1.0442600276625174, + "grad_norm": 6.495105743408203, + "learning_rate": 4.9754110957430464e-05, + "log_odds_chosen": 2.503323554992676, + "log_odds_ratio": -0.8614378571510315, + "logits/chosen": -0.8828580379486084, + "logits/rejected": -0.9524918794631958, + "logps/chosen": -0.1063104122877121, + "logps/rejected": -0.4049009084701538, + "loss": 3.8731, + "nll_loss": 0.8821337223052979, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01063104160130024, + "rewards/margins": 0.02985905110836029, + "rewards/rejected": -0.04049009084701538, + "step": 1510 + }, + { + "epoch": 1.0449515905947442, + "grad_norm": 6.673771858215332, + "learning_rate": 4.9750268941140316e-05, + "log_odds_chosen": 2.5765671730041504, + "log_odds_ratio": -0.24561844766139984, + "logits/chosen": -0.6686314344406128, + "logits/rejected": -0.6705050468444824, + "logps/chosen": -0.14821584522724152, + "logps/rejected": -0.7514455914497375, + "loss": 3.3937, + "nll_loss": 0.8238645792007446, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014821582473814487, + "rewards/margins": 0.06032297760248184, + "rewards/rejected": -0.07514456659555435, + "step": 1511 + }, + { + "epoch": 1.045643153526971, + "grad_norm": 4.638186454772949, + "learning_rate": 4.974642692485016e-05, + "log_odds_chosen": 5.006286144256592, + "log_odds_ratio": -0.1460382491350174, + "logits/chosen": -1.0378838777542114, + "logits/rejected": -1.0734418630599976, + "logps/chosen": -0.036317795515060425, + "logps/rejected": -1.1099814176559448, + "loss": 3.5714, + "nll_loss": 0.8782393932342529, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003631779458373785, + "rewards/margins": 0.10736636072397232, + "rewards/rejected": -0.11099813878536224, + "step": 1512 + }, + { + "epoch": 1.0463347164591978, + "grad_norm": 4.667377948760986, + "learning_rate": 4.9742584908560014e-05, + "log_odds_chosen": 3.783291816711426, + "log_odds_ratio": -0.10481594502925873, + "logits/chosen": -0.5721790790557861, + "logits/rejected": -0.6407243013381958, + "logps/chosen": -0.06656525284051895, + "logps/rejected": -0.7319782972335815, + "loss": 3.7715, + "nll_loss": 0.9324032664299011, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006656525656580925, + "rewards/margins": 0.06654130667448044, + "rewards/rejected": -0.07319782674312592, + "step": 1513 + }, + { + "epoch": 1.0470262793914247, + "grad_norm": 6.9356279373168945, + "learning_rate": 4.973874289226986e-05, + "log_odds_chosen": 3.897022008895874, + "log_odds_ratio": -0.3799517750740051, + "logits/chosen": -0.6409194469451904, + "logits/rejected": -0.6361832618713379, + "logps/chosen": -0.1504475176334381, + "logps/rejected": -0.7533466219902039, + "loss": 3.8866, + "nll_loss": 0.9336593747138977, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.015044751577079296, + "rewards/margins": 0.060289908200502396, + "rewards/rejected": -0.07533465325832367, + "step": 1514 + }, + { + "epoch": 1.0477178423236515, + "grad_norm": 4.3579816818237305, + "learning_rate": 4.973490087597972e-05, + "log_odds_chosen": 4.464286804199219, + "log_odds_ratio": -0.4597773551940918, + "logits/chosen": -0.7858383655548096, + "logits/rejected": -0.7813628911972046, + "logps/chosen": -0.14199215173721313, + "logps/rejected": -0.5497534871101379, + "loss": 2.8294, + "nll_loss": 0.6613786220550537, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014199215918779373, + "rewards/margins": 0.04077612981200218, + "rewards/rejected": -0.054975349456071854, + "step": 1515 + }, + { + "epoch": 1.0484094052558783, + "grad_norm": 4.271621227264404, + "learning_rate": 4.9731058859689565e-05, + "log_odds_chosen": 4.928406238555908, + "log_odds_ratio": -0.06683094799518585, + "logits/chosen": -1.0744023323059082, + "logits/rejected": -1.1269543170928955, + "logps/chosen": -0.045706942677497864, + "logps/rejected": -1.0398142337799072, + "loss": 4.0336, + "nll_loss": 1.001724362373352, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004570694640278816, + "rewards/margins": 0.09941072762012482, + "rewards/rejected": -0.10398142784833908, + "step": 1516 + }, + { + "epoch": 1.0491009681881052, + "grad_norm": 6.2232232093811035, + "learning_rate": 4.972721684339942e-05, + "log_odds_chosen": 4.167832851409912, + "log_odds_ratio": -0.151000514626503, + "logits/chosen": -0.5242301821708679, + "logits/rejected": -0.6229081153869629, + "logps/chosen": -0.09259649366140366, + "logps/rejected": -0.8858538866043091, + "loss": 3.5524, + "nll_loss": 0.8730012774467468, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009259650483727455, + "rewards/margins": 0.07932574301958084, + "rewards/rejected": -0.08858539164066315, + "step": 1517 + }, + { + "epoch": 1.049792531120332, + "grad_norm": 6.540098667144775, + "learning_rate": 4.972337482710927e-05, + "log_odds_chosen": 3.922050714492798, + "log_odds_ratio": -0.5066487789154053, + "logits/chosen": -0.8467140793800354, + "logits/rejected": -0.8433650732040405, + "logps/chosen": -0.0856909528374672, + "logps/rejected": -0.6936833262443542, + "loss": 3.9991, + "nll_loss": 0.9490994811058044, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00856909528374672, + "rewards/margins": 0.060799237340688705, + "rewards/rejected": -0.06936833262443542, + "step": 1518 + }, + { + "epoch": 1.0504840940525588, + "grad_norm": 5.885974884033203, + "learning_rate": 4.971953281081912e-05, + "log_odds_chosen": 2.73998761177063, + "log_odds_ratio": -0.43364351987838745, + "logits/chosen": -0.48864781856536865, + "logits/rejected": -0.48220106959342957, + "logps/chosen": -0.12833350896835327, + "logps/rejected": -0.44956284761428833, + "loss": 3.5138, + "nll_loss": 0.8350796699523926, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.012833353132009506, + "rewards/margins": 0.03212292864918709, + "rewards/rejected": -0.044956281781196594, + "step": 1519 + }, + { + "epoch": 1.0511756569847857, + "grad_norm": 4.99786901473999, + "learning_rate": 4.971569079452897e-05, + "log_odds_chosen": 3.619704246520996, + "log_odds_ratio": -0.3211745321750641, + "logits/chosen": -0.6291632652282715, + "logits/rejected": -0.6346349716186523, + "logps/chosen": -0.08411780744791031, + "logps/rejected": -0.5153727531433105, + "loss": 3.7475, + "nll_loss": 0.9047644734382629, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00841178186237812, + "rewards/margins": 0.043125495314598083, + "rewards/rejected": -0.051537275314331055, + "step": 1520 + }, + { + "epoch": 1.0518672199170125, + "grad_norm": 3.7386040687561035, + "learning_rate": 4.971184877823882e-05, + "log_odds_chosen": 4.540435791015625, + "log_odds_ratio": -0.13528023660182953, + "logits/chosen": -0.40115034580230713, + "logits/rejected": -0.415203332901001, + "logps/chosen": -0.02656002901494503, + "logps/rejected": -0.8941652178764343, + "loss": 2.5831, + "nll_loss": 0.6322515606880188, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0026560029946267605, + "rewards/margins": 0.0867605209350586, + "rewards/rejected": -0.08941652625799179, + "step": 1521 + }, + { + "epoch": 1.0525587828492393, + "grad_norm": 4.414981365203857, + "learning_rate": 4.970800676194867e-05, + "log_odds_chosen": 6.532523155212402, + "log_odds_ratio": -0.03326879441738129, + "logits/chosen": -0.522810161113739, + "logits/rejected": -0.597527027130127, + "logps/chosen": -0.032224591821432114, + "logps/rejected": -1.2256418466567993, + "loss": 3.1706, + "nll_loss": 0.7893306612968445, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00322245922870934, + "rewards/margins": 0.11934173107147217, + "rewards/rejected": -0.12256418168544769, + "step": 1522 + }, + { + "epoch": 1.0532503457814661, + "grad_norm": 6.495909214019775, + "learning_rate": 4.970416474565852e-05, + "log_odds_chosen": 1.773823618888855, + "log_odds_ratio": -0.5499988198280334, + "logits/chosen": -0.6086165308952332, + "logits/rejected": -0.5990644693374634, + "logps/chosen": -0.1282849758863449, + "logps/rejected": -0.4344036281108856, + "loss": 4.7242, + "nll_loss": 1.1260621547698975, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.012828497216105461, + "rewards/margins": 0.03061186708509922, + "rewards/rejected": -0.04344036430120468, + "step": 1523 + }, + { + "epoch": 1.053941908713693, + "grad_norm": 5.455717086791992, + "learning_rate": 4.970032272936838e-05, + "log_odds_chosen": 3.1658692359924316, + "log_odds_ratio": -0.3641965687274933, + "logits/chosen": -0.8424069285392761, + "logits/rejected": -0.8386783599853516, + "logps/chosen": -0.14120902121067047, + "logps/rejected": -0.5841333866119385, + "loss": 3.4216, + "nll_loss": 0.8189803957939148, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.014120901934802532, + "rewards/margins": 0.04429244622588158, + "rewards/rejected": -0.058413345366716385, + "step": 1524 + }, + { + "epoch": 1.0546334716459198, + "grad_norm": 5.827929496765137, + "learning_rate": 4.9696480713078223e-05, + "log_odds_chosen": 2.437635898590088, + "log_odds_ratio": -0.5703208446502686, + "logits/chosen": -0.6680049896240234, + "logits/rejected": -0.6942263841629028, + "logps/chosen": -0.22066247463226318, + "logps/rejected": -0.478730171918869, + "loss": 3.363, + "nll_loss": 0.7837151288986206, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.02206624671816826, + "rewards/margins": 0.025806769728660583, + "rewards/rejected": -0.04787301644682884, + "step": 1525 + }, + { + "epoch": 1.0553250345781466, + "grad_norm": 4.877053737640381, + "learning_rate": 4.9692638696788076e-05, + "log_odds_chosen": 3.6916658878326416, + "log_odds_ratio": -0.19331425428390503, + "logits/chosen": -0.19534748792648315, + "logits/rejected": -0.24064458906650543, + "logps/chosen": -0.08929431438446045, + "logps/rejected": -0.7749847769737244, + "loss": 2.8245, + "nll_loss": 0.686805784702301, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008929431438446045, + "rewards/margins": 0.06856904923915863, + "rewards/rejected": -0.07749848067760468, + "step": 1526 + }, + { + "epoch": 1.0560165975103735, + "grad_norm": 5.007440090179443, + "learning_rate": 4.968879668049793e-05, + "log_odds_chosen": 3.8080079555511475, + "log_odds_ratio": -0.28198474645614624, + "logits/chosen": -0.7920790314674377, + "logits/rejected": -0.8061708211898804, + "logps/chosen": -0.059589091688394547, + "logps/rejected": -0.8138545751571655, + "loss": 3.2012, + "nll_loss": 0.7721074223518372, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.005958909168839455, + "rewards/margins": 0.07542654871940613, + "rewards/rejected": -0.08138545602560043, + "step": 1527 + }, + { + "epoch": 1.0567081604426003, + "grad_norm": 7.326679229736328, + "learning_rate": 4.968495466420778e-05, + "log_odds_chosen": 2.553607940673828, + "log_odds_ratio": -0.20654824376106262, + "logits/chosen": -0.5083712339401245, + "logits/rejected": -0.5545042157173157, + "logps/chosen": -0.11978105455636978, + "logps/rejected": -0.673089861869812, + "loss": 4.8292, + "nll_loss": 1.186640739440918, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011978104710578918, + "rewards/margins": 0.05533087998628616, + "rewards/rejected": -0.06730898469686508, + "step": 1528 + }, + { + "epoch": 1.0573997233748271, + "grad_norm": 6.929790496826172, + "learning_rate": 4.9681112647917626e-05, + "log_odds_chosen": 4.561022758483887, + "log_odds_ratio": -0.0534825474023819, + "logits/chosen": -0.5944857597351074, + "logits/rejected": -0.6960824728012085, + "logps/chosen": -0.057704515755176544, + "logps/rejected": -0.9158083200454712, + "loss": 4.3409, + "nll_loss": 1.0798721313476562, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005770451854914427, + "rewards/margins": 0.08581038564443588, + "rewards/rejected": -0.0915808379650116, + "step": 1529 + }, + { + "epoch": 1.058091286307054, + "grad_norm": 5.326831817626953, + "learning_rate": 4.967727063162748e-05, + "log_odds_chosen": 2.998952865600586, + "log_odds_ratio": -0.31420373916625977, + "logits/chosen": -0.6921024322509766, + "logits/rejected": -0.7197988033294678, + "logps/chosen": -0.16037589311599731, + "logps/rejected": -0.823391318321228, + "loss": 3.7279, + "nll_loss": 0.9005526304244995, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01603759080171585, + "rewards/margins": 0.06630153954029083, + "rewards/rejected": -0.08233913779258728, + "step": 1530 + }, + { + "epoch": 1.0587828492392808, + "grad_norm": 5.03424596786499, + "learning_rate": 4.967342861533733e-05, + "log_odds_chosen": 3.576352834701538, + "log_odds_ratio": -0.19618278741836548, + "logits/chosen": -0.3750036954879761, + "logits/rejected": -0.3847036361694336, + "logps/chosen": -0.0660768672823906, + "logps/rejected": -0.5405435562133789, + "loss": 3.5426, + "nll_loss": 0.8660234808921814, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006607687100768089, + "rewards/margins": 0.04744666814804077, + "rewards/rejected": -0.05405435711145401, + "step": 1531 + }, + { + "epoch": 1.0594744121715076, + "grad_norm": 6.5278472900390625, + "learning_rate": 4.966958659904718e-05, + "log_odds_chosen": 5.937303066253662, + "log_odds_ratio": -0.061495859175920486, + "logits/chosen": -0.7140944600105286, + "logits/rejected": -0.7931259870529175, + "logps/chosen": -0.03675852343440056, + "logps/rejected": -1.057922124862671, + "loss": 4.0925, + "nll_loss": 1.0169788599014282, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0036758524365723133, + "rewards/margins": 0.10211636126041412, + "rewards/rejected": -0.10579221695661545, + "step": 1532 + }, + { + "epoch": 1.0601659751037344, + "grad_norm": 8.326800346374512, + "learning_rate": 4.9665744582757036e-05, + "log_odds_chosen": 4.401022434234619, + "log_odds_ratio": -0.22211284935474396, + "logits/chosen": -0.6128544211387634, + "logits/rejected": -0.6509073972702026, + "logps/chosen": -0.060591697692871094, + "logps/rejected": -0.8965548276901245, + "loss": 4.3236, + "nll_loss": 1.0586953163146973, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006059169769287109, + "rewards/margins": 0.08359631896018982, + "rewards/rejected": -0.08965548872947693, + "step": 1533 + }, + { + "epoch": 1.0608575380359613, + "grad_norm": 6.853762149810791, + "learning_rate": 4.966190256646688e-05, + "log_odds_chosen": 3.25315523147583, + "log_odds_ratio": -0.30279356241226196, + "logits/chosen": -0.45043084025382996, + "logits/rejected": -0.4535585641860962, + "logps/chosen": -0.060727495700120926, + "logps/rejected": -0.5386378169059753, + "loss": 4.0742, + "nll_loss": 0.9882668256759644, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.006072749383747578, + "rewards/margins": 0.04779103398323059, + "rewards/rejected": -0.053863782435655594, + "step": 1534 + }, + { + "epoch": 1.061549100968188, + "grad_norm": 9.890820503234863, + "learning_rate": 4.9658060550176734e-05, + "log_odds_chosen": 4.420248031616211, + "log_odds_ratio": -0.46526646614074707, + "logits/chosen": -0.45639920234680176, + "logits/rejected": -0.5076233148574829, + "logps/chosen": -0.08660363405942917, + "logps/rejected": -0.7423797845840454, + "loss": 3.3507, + "nll_loss": 0.791154146194458, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008660363964736462, + "rewards/margins": 0.06557761132717133, + "rewards/rejected": -0.07423797994852066, + "step": 1535 + }, + { + "epoch": 1.062240663900415, + "grad_norm": 5.7195658683776855, + "learning_rate": 4.965421853388659e-05, + "log_odds_chosen": 3.7188186645507812, + "log_odds_ratio": -0.18315275013446808, + "logits/chosen": -1.041092872619629, + "logits/rejected": -1.1016302108764648, + "logps/chosen": -0.06354139000177383, + "logps/rejected": -0.5816217660903931, + "loss": 5.1011, + "nll_loss": 1.2569714784622192, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006354139186441898, + "rewards/margins": 0.051808036863803864, + "rewards/rejected": -0.05816217511892319, + "step": 1536 + }, + { + "epoch": 1.0629322268326418, + "grad_norm": 6.2236223220825195, + "learning_rate": 4.965037651759644e-05, + "log_odds_chosen": 2.0946202278137207, + "log_odds_ratio": -0.40443798899650574, + "logits/chosen": -0.6937721371650696, + "logits/rejected": -0.7108144760131836, + "logps/chosen": -0.14707300066947937, + "logps/rejected": -0.6219024658203125, + "loss": 3.6966, + "nll_loss": 0.8837115168571472, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014707300812005997, + "rewards/margins": 0.047482945024967194, + "rewards/rejected": -0.06219024583697319, + "step": 1537 + }, + { + "epoch": 1.0636237897648686, + "grad_norm": 6.805111408233643, + "learning_rate": 4.9646534501306285e-05, + "log_odds_chosen": 1.7916090488433838, + "log_odds_ratio": -0.5535953044891357, + "logits/chosen": -0.5534647703170776, + "logits/rejected": -0.5669266581535339, + "logps/chosen": -0.19315959513187408, + "logps/rejected": -0.5117899775505066, + "loss": 4.121, + "nll_loss": 0.9748976826667786, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.019315961748361588, + "rewards/margins": 0.03186304122209549, + "rewards/rejected": -0.05117899924516678, + "step": 1538 + }, + { + "epoch": 1.0643153526970954, + "grad_norm": 4.819196701049805, + "learning_rate": 4.964269248501614e-05, + "log_odds_chosen": 6.569972038269043, + "log_odds_ratio": -0.09104802459478378, + "logits/chosen": -0.7071633338928223, + "logits/rejected": -0.7946109175682068, + "logps/chosen": -0.030333345755934715, + "logps/rejected": -0.6713792085647583, + "loss": 3.5293, + "nll_loss": 0.8732243180274963, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003033334854990244, + "rewards/margins": 0.0641045868396759, + "rewards/rejected": -0.06713791936635971, + "step": 1539 + }, + { + "epoch": 1.0650069156293223, + "grad_norm": 5.15584659576416, + "learning_rate": 4.963885046872599e-05, + "log_odds_chosen": 4.203775405883789, + "log_odds_ratio": -0.19854849576950073, + "logits/chosen": -0.2534320652484894, + "logits/rejected": -0.2955503761768341, + "logps/chosen": -0.03829387575387955, + "logps/rejected": -0.6372541189193726, + "loss": 4.1082, + "nll_loss": 1.0071941614151, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0038293874822556973, + "rewards/margins": 0.059896018356084824, + "rewards/rejected": -0.06372541189193726, + "step": 1540 + }, + { + "epoch": 1.065698478561549, + "grad_norm": 6.698187828063965, + "learning_rate": 4.9635008452435835e-05, + "log_odds_chosen": 4.564090728759766, + "log_odds_ratio": -0.19210776686668396, + "logits/chosen": -0.3504962921142578, + "logits/rejected": -0.4118233323097229, + "logps/chosen": -0.09770353138446808, + "logps/rejected": -0.9626650810241699, + "loss": 3.631, + "nll_loss": 0.888546347618103, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009770354256033897, + "rewards/margins": 0.08649615198373795, + "rewards/rejected": -0.09626650810241699, + "step": 1541 + }, + { + "epoch": 1.066390041493776, + "grad_norm": 6.590559005737305, + "learning_rate": 4.9631166436145695e-05, + "log_odds_chosen": 0.892957329750061, + "log_odds_ratio": -0.48052775859832764, + "logits/chosen": -0.5946922898292542, + "logits/rejected": -0.6033682823181152, + "logps/chosen": -0.1246226578950882, + "logps/rejected": -0.23645266890525818, + "loss": 3.8066, + "nll_loss": 0.9035993218421936, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01246226578950882, + "rewards/margins": 0.011183001101016998, + "rewards/rejected": -0.023645266890525818, + "step": 1542 + }, + { + "epoch": 1.0670816044260027, + "grad_norm": 6.689427375793457, + "learning_rate": 4.962732441985554e-05, + "log_odds_chosen": 1.9284253120422363, + "log_odds_ratio": -0.5196525454521179, + "logits/chosen": -0.7123900055885315, + "logits/rejected": -0.7312474846839905, + "logps/chosen": -0.294888973236084, + "logps/rejected": -0.5123586654663086, + "loss": 3.0642, + "nll_loss": 0.7140846252441406, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.02948889695107937, + "rewards/margins": 0.02174697443842888, + "rewards/rejected": -0.0512358695268631, + "step": 1543 + }, + { + "epoch": 1.0677731673582296, + "grad_norm": 7.583176136016846, + "learning_rate": 4.962348240356539e-05, + "log_odds_chosen": 2.621812105178833, + "log_odds_ratio": -0.6792709827423096, + "logits/chosen": -0.6956069469451904, + "logits/rejected": -0.7238144874572754, + "logps/chosen": -0.14746025204658508, + "logps/rejected": -0.49026352167129517, + "loss": 3.7706, + "nll_loss": 0.8747116923332214, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.014746023342013359, + "rewards/margins": 0.03428032994270325, + "rewards/rejected": -0.049026355147361755, + "step": 1544 + }, + { + "epoch": 1.0684647302904564, + "grad_norm": 7.3789849281311035, + "learning_rate": 4.9619640387275245e-05, + "log_odds_chosen": 4.767126083374023, + "log_odds_ratio": -0.3121826648712158, + "logits/chosen": -0.7488409876823425, + "logits/rejected": -0.8396613001823425, + "logps/chosen": -0.07528477907180786, + "logps/rejected": -0.9295564293861389, + "loss": 4.9592, + "nll_loss": 1.208593726158142, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007528477814048529, + "rewards/margins": 0.0854271650314331, + "rewards/rejected": -0.09295564889907837, + "step": 1545 + }, + { + "epoch": 1.0691562932226832, + "grad_norm": 4.530989646911621, + "learning_rate": 4.96157983709851e-05, + "log_odds_chosen": 3.770918130874634, + "log_odds_ratio": -0.25483426451683044, + "logits/chosen": -0.286983460187912, + "logits/rejected": -0.2833419144153595, + "logps/chosen": -0.09215762466192245, + "logps/rejected": -0.7823206186294556, + "loss": 4.1782, + "nll_loss": 1.0190585851669312, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009215762838721275, + "rewards/margins": 0.06901630014181137, + "rewards/rejected": -0.0782320648431778, + "step": 1546 + }, + { + "epoch": 1.06984785615491, + "grad_norm": 7.37415885925293, + "learning_rate": 4.961195635469494e-05, + "log_odds_chosen": 2.621229410171509, + "log_odds_ratio": -0.9123156070709229, + "logits/chosen": -0.5532602667808533, + "logits/rejected": -0.5623765587806702, + "logps/chosen": -0.23067545890808105, + "logps/rejected": -0.7292701005935669, + "loss": 3.1782, + "nll_loss": 0.703306257724762, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.023067545145750046, + "rewards/margins": 0.049859460443258286, + "rewards/rejected": -0.07292701303958893, + "step": 1547 + }, + { + "epoch": 1.070539419087137, + "grad_norm": 2.7616488933563232, + "learning_rate": 4.9608114338404796e-05, + "log_odds_chosen": 2.966463088989258, + "log_odds_ratio": -0.22822074592113495, + "logits/chosen": -0.6201618909835815, + "logits/rejected": -0.6020743250846863, + "logps/chosen": -0.07064022868871689, + "logps/rejected": -0.6099626421928406, + "loss": 3.038, + "nll_loss": 0.7366786003112793, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0070640225894749165, + "rewards/margins": 0.05393224209547043, + "rewards/rejected": -0.06099626421928406, + "step": 1548 + }, + { + "epoch": 1.0712309820193637, + "grad_norm": 5.657704830169678, + "learning_rate": 4.960427232211465e-05, + "log_odds_chosen": 5.151440620422363, + "log_odds_ratio": -0.034663643687963486, + "logits/chosen": -0.41759589314460754, + "logits/rejected": -0.4986497759819031, + "logps/chosen": -0.03338145092129707, + "logps/rejected": -0.799149215221405, + "loss": 3.465, + "nll_loss": 0.8627803325653076, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0033381450921297073, + "rewards/margins": 0.07657677680253983, + "rewards/rejected": -0.07991492748260498, + "step": 1549 + }, + { + "epoch": 1.0719225449515906, + "grad_norm": 4.098145961761475, + "learning_rate": 4.9600430305824494e-05, + "log_odds_chosen": 3.907048463821411, + "log_odds_ratio": -0.25910213589668274, + "logits/chosen": -0.28188061714172363, + "logits/rejected": -0.40207451581954956, + "logps/chosen": -0.07275703549385071, + "logps/rejected": -0.8932787179946899, + "loss": 2.4006, + "nll_loss": 0.5742417573928833, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007275703828781843, + "rewards/margins": 0.08205216377973557, + "rewards/rejected": -0.089327871799469, + "step": 1550 + }, + { + "epoch": 1.0726141078838174, + "grad_norm": 6.418388843536377, + "learning_rate": 4.959658828953435e-05, + "log_odds_chosen": 2.885356903076172, + "log_odds_ratio": -0.3832821249961853, + "logits/chosen": -0.6497433185577393, + "logits/rejected": -0.7168554663658142, + "logps/chosen": -0.14998841285705566, + "logps/rejected": -0.7326866388320923, + "loss": 4.6042, + "nll_loss": 1.1127233505249023, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.014998842030763626, + "rewards/margins": 0.05826983600854874, + "rewards/rejected": -0.07326867431402206, + "step": 1551 + }, + { + "epoch": 1.0733056708160442, + "grad_norm": 6.9985833168029785, + "learning_rate": 4.95927462732442e-05, + "log_odds_chosen": 1.6743195056915283, + "log_odds_ratio": -0.39890745282173157, + "logits/chosen": -0.5378249287605286, + "logits/rejected": -0.5748019218444824, + "logps/chosen": -0.11988474428653717, + "logps/rejected": -0.4952792525291443, + "loss": 3.4803, + "nll_loss": 0.8301811814308167, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011988474056124687, + "rewards/margins": 0.03753945231437683, + "rewards/rejected": -0.04952792450785637, + "step": 1552 + }, + { + "epoch": 1.073997233748271, + "grad_norm": 4.325546741485596, + "learning_rate": 4.958890425695405e-05, + "log_odds_chosen": 3.9147095680236816, + "log_odds_ratio": -0.21128058433532715, + "logits/chosen": -0.307517945766449, + "logits/rejected": -0.3628294765949249, + "logps/chosen": -0.0758337453007698, + "logps/rejected": -0.78581303358078, + "loss": 2.5512, + "nll_loss": 0.6166709661483765, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007583374157547951, + "rewards/margins": 0.0709979385137558, + "rewards/rejected": -0.078581303358078, + "step": 1553 + }, + { + "epoch": 1.0746887966804979, + "grad_norm": 3.2370855808258057, + "learning_rate": 4.9585062240663904e-05, + "log_odds_chosen": 3.607590436935425, + "log_odds_ratio": -0.2519790828227997, + "logits/chosen": -0.37377700209617615, + "logits/rejected": -0.3569980263710022, + "logps/chosen": -0.06945005059242249, + "logps/rejected": -0.809980571269989, + "loss": 2.6024, + "nll_loss": 0.6253975033760071, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006945005152374506, + "rewards/margins": 0.07405305653810501, + "rewards/rejected": -0.08099806308746338, + "step": 1554 + }, + { + "epoch": 1.0753803596127247, + "grad_norm": 5.439840793609619, + "learning_rate": 4.9581220224373756e-05, + "log_odds_chosen": 4.531608581542969, + "log_odds_ratio": -0.266179621219635, + "logits/chosen": -0.5603072047233582, + "logits/rejected": -0.5774667263031006, + "logps/chosen": -0.039245884865522385, + "logps/rejected": -0.5564451813697815, + "loss": 3.4032, + "nll_loss": 0.8241841793060303, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003924589138478041, + "rewards/margins": 0.05171992629766464, + "rewards/rejected": -0.05564451962709427, + "step": 1555 + }, + { + "epoch": 1.0760719225449515, + "grad_norm": 7.069887161254883, + "learning_rate": 4.95773782080836e-05, + "log_odds_chosen": 1.0571117401123047, + "log_odds_ratio": -0.4433063864707947, + "logits/chosen": -0.7253471612930298, + "logits/rejected": -0.7856532335281372, + "logps/chosen": -0.17213042080402374, + "logps/rejected": -0.4319838881492615, + "loss": 3.9714, + "nll_loss": 0.9485308527946472, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.017213044688105583, + "rewards/margins": 0.025985345244407654, + "rewards/rejected": -0.043198391795158386, + "step": 1556 + }, + { + "epoch": 1.0767634854771784, + "grad_norm": 6.502851963043213, + "learning_rate": 4.9573536191793454e-05, + "log_odds_chosen": 2.863762855529785, + "log_odds_ratio": -0.33171597123146057, + "logits/chosen": -0.6376508474349976, + "logits/rejected": -0.7480642795562744, + "logps/chosen": -0.10218179225921631, + "logps/rejected": -0.664842963218689, + "loss": 2.8928, + "nll_loss": 0.6900296807289124, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010218179784715176, + "rewards/margins": 0.056266117841005325, + "rewards/rejected": -0.06648429483175278, + "step": 1557 + }, + { + "epoch": 1.0774550484094052, + "grad_norm": 6.30776834487915, + "learning_rate": 4.956969417550331e-05, + "log_odds_chosen": 4.367179870605469, + "log_odds_ratio": -0.41399842500686646, + "logits/chosen": -0.3242146968841553, + "logits/rejected": -0.4026827812194824, + "logps/chosen": -0.09431909769773483, + "logps/rejected": -0.5847798585891724, + "loss": 3.5125, + "nll_loss": 0.8367260694503784, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009431909769773483, + "rewards/margins": 0.04904608428478241, + "rewards/rejected": -0.058477990329265594, + "step": 1558 + }, + { + "epoch": 1.078146611341632, + "grad_norm": 8.217262268066406, + "learning_rate": 4.956585215921315e-05, + "log_odds_chosen": 3.6869709491729736, + "log_odds_ratio": -0.5149865746498108, + "logits/chosen": -0.9258292317390442, + "logits/rejected": -0.9052362442016602, + "logps/chosen": -0.12747938930988312, + "logps/rejected": -0.7195074558258057, + "loss": 4.7971, + "nll_loss": 1.1477842330932617, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012747939676046371, + "rewards/margins": 0.059202805161476135, + "rewards/rejected": -0.07195074111223221, + "step": 1559 + }, + { + "epoch": 1.0788381742738589, + "grad_norm": 4.424699306488037, + "learning_rate": 4.956201014292301e-05, + "log_odds_chosen": 3.942409038543701, + "log_odds_ratio": -0.08518670499324799, + "logits/chosen": -0.4492560029029846, + "logits/rejected": -0.4492393732070923, + "logps/chosen": -0.05345804616808891, + "logps/rejected": -0.7698019742965698, + "loss": 2.9677, + "nll_loss": 0.7333984375, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005345804616808891, + "rewards/margins": 0.07163438946008682, + "rewards/rejected": -0.07698019593954086, + "step": 1560 + }, + { + "epoch": 1.0795297372060857, + "grad_norm": 5.800602436065674, + "learning_rate": 4.955816812663286e-05, + "log_odds_chosen": 5.187628269195557, + "log_odds_ratio": -0.12586824595928192, + "logits/chosen": -0.7637317180633545, + "logits/rejected": -0.8406319618225098, + "logps/chosen": -0.08760840445756912, + "logps/rejected": -0.8338153958320618, + "loss": 3.7892, + "nll_loss": 0.9347092509269714, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008760839700698853, + "rewards/margins": 0.07462070137262344, + "rewards/rejected": -0.0833815410733223, + "step": 1561 + }, + { + "epoch": 1.0802213001383125, + "grad_norm": 4.874590873718262, + "learning_rate": 4.955432611034271e-05, + "log_odds_chosen": 3.5219225883483887, + "log_odds_ratio": -0.30585017800331116, + "logits/chosen": -0.8220917582511902, + "logits/rejected": -0.8466977477073669, + "logps/chosen": -0.0972166433930397, + "logps/rejected": -0.4325979948043823, + "loss": 3.2432, + "nll_loss": 0.7802194952964783, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009721663780510426, + "rewards/margins": 0.03353814035654068, + "rewards/rejected": -0.04325980320572853, + "step": 1562 + }, + { + "epoch": 1.0809128630705394, + "grad_norm": 6.767172336578369, + "learning_rate": 4.955048409405256e-05, + "log_odds_chosen": 3.9182686805725098, + "log_odds_ratio": -0.2959219813346863, + "logits/chosen": -0.6551793813705444, + "logits/rejected": -0.7170536518096924, + "logps/chosen": -0.08494836837053299, + "logps/rejected": -0.7065892219543457, + "loss": 3.554, + "nll_loss": 0.8589138984680176, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008494837209582329, + "rewards/margins": 0.06216409057378769, + "rewards/rejected": -0.07065892964601517, + "step": 1563 + }, + { + "epoch": 1.0816044260027662, + "grad_norm": 3.542426824569702, + "learning_rate": 4.9546642077762415e-05, + "log_odds_chosen": 4.997422695159912, + "log_odds_ratio": -0.1264844536781311, + "logits/chosen": -0.6658202409744263, + "logits/rejected": -0.7189267873764038, + "logps/chosen": -0.10747257620096207, + "logps/rejected": -0.9709796905517578, + "loss": 2.1569, + "nll_loss": 0.5265790820121765, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010747257620096207, + "rewards/margins": 0.0863507091999054, + "rewards/rejected": -0.0970979705452919, + "step": 1564 + }, + { + "epoch": 1.082295988934993, + "grad_norm": 5.220712661743164, + "learning_rate": 4.954280006147226e-05, + "log_odds_chosen": 2.417450428009033, + "log_odds_ratio": -0.3215015232563019, + "logits/chosen": -0.6923054456710815, + "logits/rejected": -0.6714261770248413, + "logps/chosen": -0.0629698857665062, + "logps/rejected": -0.37886086106300354, + "loss": 3.6002, + "nll_loss": 0.8678989410400391, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0062969885766506195, + "rewards/margins": 0.031589098274707794, + "rewards/rejected": -0.037886083126068115, + "step": 1565 + }, + { + "epoch": 1.0829875518672198, + "grad_norm": 5.408576488494873, + "learning_rate": 4.953895804518211e-05, + "log_odds_chosen": 2.9714107513427734, + "log_odds_ratio": -0.2437531054019928, + "logits/chosen": -0.7561995983123779, + "logits/rejected": -0.7987399697303772, + "logps/chosen": -0.09682756662368774, + "logps/rejected": -0.8127809166908264, + "loss": 2.9384, + "nll_loss": 0.7102223634719849, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009682757779955864, + "rewards/margins": 0.07159534096717834, + "rewards/rejected": -0.08127809315919876, + "step": 1566 + }, + { + "epoch": 1.0836791147994467, + "grad_norm": 7.883374214172363, + "learning_rate": 4.9535116028891965e-05, + "log_odds_chosen": 0.3866366147994995, + "log_odds_ratio": -0.6445382237434387, + "logits/chosen": -1.159950613975525, + "logits/rejected": -1.1587477922439575, + "logps/chosen": -0.1852284073829651, + "logps/rejected": -0.28827083110809326, + "loss": 6.2101, + "nll_loss": 1.4880727529525757, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01852283999323845, + "rewards/margins": 0.010304244235157967, + "rewards/rejected": -0.028827082365751266, + "step": 1567 + }, + { + "epoch": 1.0843706777316735, + "grad_norm": 7.807645797729492, + "learning_rate": 4.953127401260181e-05, + "log_odds_chosen": 2.2896320819854736, + "log_odds_ratio": -0.406283974647522, + "logits/chosen": -1.0614399909973145, + "logits/rejected": -1.1143429279327393, + "logps/chosen": -0.33437466621398926, + "logps/rejected": -0.8825758695602417, + "loss": 5.8773, + "nll_loss": 1.4286930561065674, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.033437468111515045, + "rewards/margins": 0.05482013151049614, + "rewards/rejected": -0.08825759589672089, + "step": 1568 + }, + { + "epoch": 1.0850622406639003, + "grad_norm": 7.627025604248047, + "learning_rate": 4.952743199631167e-05, + "log_odds_chosen": 4.397810459136963, + "log_odds_ratio": -0.48821020126342773, + "logits/chosen": -0.5610281229019165, + "logits/rejected": -0.611020565032959, + "logps/chosen": -0.06754257529973984, + "logps/rejected": -0.7311649918556213, + "loss": 4.1127, + "nll_loss": 0.9793562889099121, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.006754256784915924, + "rewards/margins": 0.06636224687099457, + "rewards/rejected": -0.07311650365591049, + "step": 1569 + }, + { + "epoch": 1.0857538035961272, + "grad_norm": 9.063570976257324, + "learning_rate": 4.9523589980021516e-05, + "log_odds_chosen": 3.481013298034668, + "log_odds_ratio": -0.37235239148139954, + "logits/chosen": -0.8404412269592285, + "logits/rejected": -0.7990936040878296, + "logps/chosen": -0.1394955813884735, + "logps/rejected": -0.6244330406188965, + "loss": 3.5202, + "nll_loss": 0.8428088426589966, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.013949558138847351, + "rewards/margins": 0.04849374666810036, + "rewards/rejected": -0.06244330108165741, + "step": 1570 + }, + { + "epoch": 1.086445366528354, + "grad_norm": 6.26206111907959, + "learning_rate": 4.951974796373137e-05, + "log_odds_chosen": 3.6194040775299072, + "log_odds_ratio": -0.18340617418289185, + "logits/chosen": -0.8630995750427246, + "logits/rejected": -0.9298602938652039, + "logps/chosen": -0.10304134339094162, + "logps/rejected": -0.6944648623466492, + "loss": 4.4837, + "nll_loss": 1.1025748252868652, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010304134339094162, + "rewards/margins": 0.0591423474252224, + "rewards/rejected": -0.06944648176431656, + "step": 1571 + }, + { + "epoch": 1.0871369294605808, + "grad_norm": 5.816069602966309, + "learning_rate": 4.951590594744122e-05, + "log_odds_chosen": 5.30167293548584, + "log_odds_ratio": -0.15548737347126007, + "logits/chosen": -0.26609960198402405, + "logits/rejected": -0.312613308429718, + "logps/chosen": -0.041022010147571564, + "logps/rejected": -0.6375818252563477, + "loss": 3.3537, + "nll_loss": 0.8228654861450195, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004102201201021671, + "rewards/margins": 0.05965597555041313, + "rewards/rejected": -0.06375817954540253, + "step": 1572 + }, + { + "epoch": 1.0878284923928077, + "grad_norm": 8.704594612121582, + "learning_rate": 4.951206393115107e-05, + "log_odds_chosen": 3.002938985824585, + "log_odds_ratio": -0.4912012815475464, + "logits/chosen": -0.9212777614593506, + "logits/rejected": -0.9318854808807373, + "logps/chosen": -0.09731482714414597, + "logps/rejected": -0.6755044460296631, + "loss": 4.4402, + "nll_loss": 1.0609227418899536, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009731482714414597, + "rewards/margins": 0.05781896412372589, + "rewards/rejected": -0.06755045056343079, + "step": 1573 + }, + { + "epoch": 1.0885200553250345, + "grad_norm": 5.034717082977295, + "learning_rate": 4.950822191486092e-05, + "log_odds_chosen": 2.4043898582458496, + "log_odds_ratio": -0.4333747625350952, + "logits/chosen": -0.6324424743652344, + "logits/rejected": -0.662071168422699, + "logps/chosen": -0.1544969230890274, + "logps/rejected": -0.5821070075035095, + "loss": 3.2963, + "nll_loss": 0.7807316780090332, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.015449692495167255, + "rewards/margins": 0.04276100918650627, + "rewards/rejected": -0.05821070447564125, + "step": 1574 + }, + { + "epoch": 1.0892116182572613, + "grad_norm": 5.510666847229004, + "learning_rate": 4.950437989857077e-05, + "log_odds_chosen": 5.295717716217041, + "log_odds_ratio": -0.21159470081329346, + "logits/chosen": -0.6146509647369385, + "logits/rejected": -0.6928945779800415, + "logps/chosen": -0.06159878149628639, + "logps/rejected": -0.9186801314353943, + "loss": 3.8207, + "nll_loss": 0.9340190291404724, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006159878335893154, + "rewards/margins": 0.0857081338763237, + "rewards/rejected": -0.09186801314353943, + "step": 1575 + }, + { + "epoch": 1.0899031811894881, + "grad_norm": 5.914917469024658, + "learning_rate": 4.9500537882280624e-05, + "log_odds_chosen": 4.013111114501953, + "log_odds_ratio": -0.19167517125606537, + "logits/chosen": -0.5529786348342896, + "logits/rejected": -0.5917526483535767, + "logps/chosen": -0.04883315786719322, + "logps/rejected": -0.667991042137146, + "loss": 2.7463, + "nll_loss": 0.6674108505249023, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004883316345512867, + "rewards/margins": 0.06191578879952431, + "rewards/rejected": -0.0667991042137146, + "step": 1576 + }, + { + "epoch": 1.090594744121715, + "grad_norm": 6.839190483093262, + "learning_rate": 4.949669586599047e-05, + "log_odds_chosen": 5.532386779785156, + "log_odds_ratio": -0.0764627605676651, + "logits/chosen": -0.4853833019733429, + "logits/rejected": -0.5567290782928467, + "logps/chosen": -0.03992730379104614, + "logps/rejected": -1.2442706823349, + "loss": 4.5269, + "nll_loss": 1.124070167541504, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003992730751633644, + "rewards/margins": 0.12043432891368866, + "rewards/rejected": -0.12442706525325775, + "step": 1577 + }, + { + "epoch": 1.0912863070539418, + "grad_norm": 4.283989906311035, + "learning_rate": 4.949285384970033e-05, + "log_odds_chosen": 5.226707458496094, + "log_odds_ratio": -0.2467494159936905, + "logits/chosen": -0.5581457018852234, + "logits/rejected": -0.6312193870544434, + "logps/chosen": -0.07011242210865021, + "logps/rejected": -0.948968231678009, + "loss": 2.6167, + "nll_loss": 0.6295046210289001, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007011242676526308, + "rewards/margins": 0.08788558095693588, + "rewards/rejected": -0.0948968306183815, + "step": 1578 + }, + { + "epoch": 1.0919778699861689, + "grad_norm": 7.571458339691162, + "learning_rate": 4.9489011833410174e-05, + "log_odds_chosen": 2.7708773612976074, + "log_odds_ratio": -0.4572438895702362, + "logits/chosen": -0.45970478653907776, + "logits/rejected": -0.49945998191833496, + "logps/chosen": -0.2505786120891571, + "logps/rejected": -0.6798125505447388, + "loss": 4.4457, + "nll_loss": 1.0656884908676147, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.02505786530673504, + "rewards/margins": 0.042923398315906525, + "rewards/rejected": -0.06798125803470612, + "step": 1579 + }, + { + "epoch": 1.0926694329183957, + "grad_norm": 6.045781135559082, + "learning_rate": 4.9485169817120027e-05, + "log_odds_chosen": 1.9347407817840576, + "log_odds_ratio": -0.558551549911499, + "logits/chosen": -0.6301894187927246, + "logits/rejected": -0.6606278419494629, + "logps/chosen": -0.1072370707988739, + "logps/rejected": -0.4773583710193634, + "loss": 3.232, + "nll_loss": 0.7521458268165588, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01072370633482933, + "rewards/margins": 0.03701213374733925, + "rewards/rejected": -0.04773584008216858, + "step": 1580 + }, + { + "epoch": 1.0933609958506225, + "grad_norm": 10.899459838867188, + "learning_rate": 4.948132780082988e-05, + "log_odds_chosen": 5.716094970703125, + "log_odds_ratio": -0.26957541704177856, + "logits/chosen": -0.3576154112815857, + "logits/rejected": -0.40196752548217773, + "logps/chosen": -0.06499480456113815, + "logps/rejected": -1.0065155029296875, + "loss": 3.6276, + "nll_loss": 0.8799489140510559, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006499480456113815, + "rewards/margins": 0.09415207803249359, + "rewards/rejected": -0.10065155476331711, + "step": 1581 + }, + { + "epoch": 1.0940525587828493, + "grad_norm": 3.8481316566467285, + "learning_rate": 4.947748578453973e-05, + "log_odds_chosen": 4.175989627838135, + "log_odds_ratio": -0.0638933852314949, + "logits/chosen": -0.9346051216125488, + "logits/rejected": -0.9710807800292969, + "logps/chosen": -0.04190801829099655, + "logps/rejected": -0.6167564392089844, + "loss": 3.0485, + "nll_loss": 0.7557366490364075, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00419080164283514, + "rewards/margins": 0.05748484656214714, + "rewards/rejected": -0.06167564168572426, + "step": 1582 + }, + { + "epoch": 1.0947441217150762, + "grad_norm": 3.2850422859191895, + "learning_rate": 4.947364376824958e-05, + "log_odds_chosen": 4.834670543670654, + "log_odds_ratio": -0.07640227675437927, + "logits/chosen": -0.4402886629104614, + "logits/rejected": -0.44064515829086304, + "logps/chosen": -0.05761062353849411, + "logps/rejected": -0.8796223998069763, + "loss": 2.8049, + "nll_loss": 0.6935828328132629, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005761062726378441, + "rewards/margins": 0.08220118284225464, + "rewards/rejected": -0.08796224743127823, + "step": 1583 + }, + { + "epoch": 1.095435684647303, + "grad_norm": 7.48633337020874, + "learning_rate": 4.946980175195943e-05, + "log_odds_chosen": 2.777125597000122, + "log_odds_ratio": -0.2875290513038635, + "logits/chosen": -0.4086695611476898, + "logits/rejected": -0.44259917736053467, + "logps/chosen": -0.12264515459537506, + "logps/rejected": -0.5968244075775146, + "loss": 3.8138, + "nll_loss": 0.924686074256897, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012264516204595566, + "rewards/margins": 0.04741792380809784, + "rewards/rejected": -0.059682440012693405, + "step": 1584 + }, + { + "epoch": 1.0961272475795298, + "grad_norm": 5.133641242980957, + "learning_rate": 4.946595973566928e-05, + "log_odds_chosen": 0.8376002311706543, + "log_odds_ratio": -0.7386839389801025, + "logits/chosen": -0.661615788936615, + "logits/rejected": -0.6790576577186584, + "logps/chosen": -0.2840120792388916, + "logps/rejected": -0.3249834179878235, + "loss": 3.1574, + "nll_loss": 0.7154770493507385, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0284012071788311, + "rewards/margins": 0.0040971338748931885, + "rewards/rejected": -0.03249834105372429, + "step": 1585 + }, + { + "epoch": 1.0968188105117567, + "grad_norm": 5.681310653686523, + "learning_rate": 4.946211771937913e-05, + "log_odds_chosen": 5.562228202819824, + "log_odds_ratio": -0.19052885472774506, + "logits/chosen": -0.5465707778930664, + "logits/rejected": -0.5676652193069458, + "logps/chosen": -0.04916710406541824, + "logps/rejected": -0.8330110311508179, + "loss": 3.2122, + "nll_loss": 0.7839978337287903, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004916710779070854, + "rewards/margins": 0.0783843994140625, + "rewards/rejected": -0.0833011046051979, + "step": 1586 + }, + { + "epoch": 1.0975103734439835, + "grad_norm": 3.980339527130127, + "learning_rate": 4.945827570308899e-05, + "log_odds_chosen": 4.279679298400879, + "log_odds_ratio": -0.30568966269493103, + "logits/chosen": -0.33977723121643066, + "logits/rejected": -0.33284032344818115, + "logps/chosen": -0.09073701500892639, + "logps/rejected": -0.6302847266197205, + "loss": 2.272, + "nll_loss": 0.5374258160591125, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009073702618479729, + "rewards/margins": 0.053954772651195526, + "rewards/rejected": -0.06302846968173981, + "step": 1587 + }, + { + "epoch": 1.0982019363762103, + "grad_norm": 6.770112991333008, + "learning_rate": 4.945443368679883e-05, + "log_odds_chosen": 4.432427406311035, + "log_odds_ratio": -0.5763689875602722, + "logits/chosen": -0.7433520555496216, + "logits/rejected": -0.7090204954147339, + "logps/chosen": -0.2709885835647583, + "logps/rejected": -0.893592357635498, + "loss": 2.7834, + "nll_loss": 0.6382165551185608, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.02709886059165001, + "rewards/margins": 0.062260378152132034, + "rewards/rejected": -0.08935923129320145, + "step": 1588 + }, + { + "epoch": 1.0988934993084372, + "grad_norm": 6.708295822143555, + "learning_rate": 4.9450591670508685e-05, + "log_odds_chosen": 3.567749500274658, + "log_odds_ratio": -0.39118099212646484, + "logits/chosen": -0.2514309287071228, + "logits/rejected": -0.2853849232196808, + "logps/chosen": -0.09239472448825836, + "logps/rejected": -0.3336440920829773, + "loss": 3.9691, + "nll_loss": 0.9531650543212891, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.00923947338014841, + "rewards/margins": 0.024124938994646072, + "rewards/rejected": -0.03336441144347191, + "step": 1589 + }, + { + "epoch": 1.099585062240664, + "grad_norm": 10.586297988891602, + "learning_rate": 4.944674965421854e-05, + "log_odds_chosen": 4.613847732543945, + "log_odds_ratio": -0.29507291316986084, + "logits/chosen": -0.6949521899223328, + "logits/rejected": -0.7418277859687805, + "logps/chosen": -0.08647487312555313, + "logps/rejected": -0.9586604237556458, + "loss": 3.1212, + "nll_loss": 0.750788688659668, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008647486567497253, + "rewards/margins": 0.08721855282783508, + "rewards/rejected": -0.09586603939533234, + "step": 1590 + }, + { + "epoch": 1.1002766251728908, + "grad_norm": 6.597928047180176, + "learning_rate": 4.944290763792839e-05, + "log_odds_chosen": 1.6849793195724487, + "log_odds_ratio": -0.6137993931770325, + "logits/chosen": -0.20653149485588074, + "logits/rejected": -0.31308573484420776, + "logps/chosen": -0.1943899691104889, + "logps/rejected": -0.5025385618209839, + "loss": 3.4048, + "nll_loss": 0.7898290157318115, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01943899691104889, + "rewards/margins": 0.030814863741397858, + "rewards/rejected": -0.05025385692715645, + "step": 1591 + }, + { + "epoch": 1.1009681881051177, + "grad_norm": 7.803439140319824, + "learning_rate": 4.9439065621638235e-05, + "log_odds_chosen": 2.9654581546783447, + "log_odds_ratio": -0.6031391620635986, + "logits/chosen": -0.4306519627571106, + "logits/rejected": -0.5081591010093689, + "logps/chosen": -0.17305631935596466, + "logps/rejected": -0.736358642578125, + "loss": 4.1712, + "nll_loss": 0.9824913740158081, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.017305633053183556, + "rewards/margins": 0.05633023381233215, + "rewards/rejected": -0.07363586127758026, + "step": 1592 + }, + { + "epoch": 1.1016597510373445, + "grad_norm": 4.649277687072754, + "learning_rate": 4.943522360534809e-05, + "log_odds_chosen": 4.267962455749512, + "log_odds_ratio": -0.23143798112869263, + "logits/chosen": -0.6768631339073181, + "logits/rejected": -0.6536177396774292, + "logps/chosen": -0.06421162188053131, + "logps/rejected": -0.9346715807914734, + "loss": 3.2449, + "nll_loss": 0.7880828976631165, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0064211622811853886, + "rewards/margins": 0.08704599738121033, + "rewards/rejected": -0.09346716105937958, + "step": 1593 + }, + { + "epoch": 1.1023513139695713, + "grad_norm": 23.250423431396484, + "learning_rate": 4.943138158905794e-05, + "log_odds_chosen": 2.1543641090393066, + "log_odds_ratio": -0.31085923314094543, + "logits/chosen": -0.4932831823825836, + "logits/rejected": -0.4758214056491852, + "logps/chosen": -0.061881616711616516, + "logps/rejected": -0.518766462802887, + "loss": 3.1535, + "nll_loss": 0.757276713848114, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006188162136822939, + "rewards/margins": 0.04568849131464958, + "rewards/rejected": -0.051876652985811234, + "step": 1594 + }, + { + "epoch": 1.1030428769017981, + "grad_norm": 6.492717266082764, + "learning_rate": 4.9427539572767786e-05, + "log_odds_chosen": 2.071760654449463, + "log_odds_ratio": -0.36288756132125854, + "logits/chosen": -0.6168765425682068, + "logits/rejected": -0.6260075569152832, + "logps/chosen": -0.14397895336151123, + "logps/rejected": -0.5518575310707092, + "loss": 4.0722, + "nll_loss": 0.9817653298377991, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.014397896826267242, + "rewards/margins": 0.04078786075115204, + "rewards/rejected": -0.05518575757741928, + "step": 1595 + }, + { + "epoch": 1.103734439834025, + "grad_norm": 5.582458019256592, + "learning_rate": 4.9423697556477645e-05, + "log_odds_chosen": 4.845449924468994, + "log_odds_ratio": -0.17791111767292023, + "logits/chosen": -0.2792462706565857, + "logits/rejected": -0.3469136357307434, + "logps/chosen": -0.07031789422035217, + "logps/rejected": -0.8743381500244141, + "loss": 2.9334, + "nll_loss": 0.7155571579933167, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0070317890495061874, + "rewards/margins": 0.08040202409029007, + "rewards/rejected": -0.0874338150024414, + "step": 1596 + }, + { + "epoch": 1.1044260027662518, + "grad_norm": 5.206540107727051, + "learning_rate": 4.941985554018749e-05, + "log_odds_chosen": 5.46361780166626, + "log_odds_ratio": -0.21550363302230835, + "logits/chosen": -0.44038888812065125, + "logits/rejected": -0.44270017743110657, + "logps/chosen": -0.08260742574930191, + "logps/rejected": -0.9547374248504639, + "loss": 3.4688, + "nll_loss": 0.8456557989120483, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008260741829872131, + "rewards/margins": 0.08721300959587097, + "rewards/rejected": -0.0954737514257431, + "step": 1597 + }, + { + "epoch": 1.1051175656984786, + "grad_norm": 6.907774448394775, + "learning_rate": 4.941601352389734e-05, + "log_odds_chosen": 5.112415313720703, + "log_odds_ratio": -0.15309226512908936, + "logits/chosen": -0.5420889854431152, + "logits/rejected": -0.6159816980361938, + "logps/chosen": -0.04379098117351532, + "logps/rejected": -1.0684349536895752, + "loss": 3.6694, + "nll_loss": 0.9020520448684692, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00437909783795476, + "rewards/margins": 0.10246440768241882, + "rewards/rejected": -0.106843501329422, + "step": 1598 + }, + { + "epoch": 1.1058091286307055, + "grad_norm": 4.947270393371582, + "learning_rate": 4.9412171507607196e-05, + "log_odds_chosen": 4.351035118103027, + "log_odds_ratio": -0.1412781924009323, + "logits/chosen": -0.5069587230682373, + "logits/rejected": -0.5466160178184509, + "logps/chosen": -0.11968033015727997, + "logps/rejected": -0.9916271567344666, + "loss": 3.2332, + "nll_loss": 0.79417884349823, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011968032456934452, + "rewards/margins": 0.08719468861818314, + "rewards/rejected": -0.09916272759437561, + "step": 1599 + }, + { + "epoch": 1.1065006915629323, + "grad_norm": 3.7760555744171143, + "learning_rate": 4.940832949131705e-05, + "log_odds_chosen": 5.466026782989502, + "log_odds_ratio": -0.09592100977897644, + "logits/chosen": -0.661428689956665, + "logits/rejected": -0.7313367128372192, + "logps/chosen": -0.03490148112177849, + "logps/rejected": -0.9429963231086731, + "loss": 2.319, + "nll_loss": 0.5701475739479065, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003490148112177849, + "rewards/margins": 0.09080948680639267, + "rewards/rejected": -0.09429963678121567, + "step": 1600 + }, + { + "epoch": 1.1071922544951591, + "grad_norm": 3.3002185821533203, + "learning_rate": 4.9404487475026894e-05, + "log_odds_chosen": 4.276036739349365, + "log_odds_ratio": -0.07263210415840149, + "logits/chosen": -0.6134432554244995, + "logits/rejected": -0.6024748682975769, + "logps/chosen": -0.05085566267371178, + "logps/rejected": -0.7766672372817993, + "loss": 3.3723, + "nll_loss": 0.8358083963394165, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00508556654676795, + "rewards/margins": 0.07258116453886032, + "rewards/rejected": -0.07766672968864441, + "step": 1601 + }, + { + "epoch": 1.107883817427386, + "grad_norm": 7.756932735443115, + "learning_rate": 4.9400645458736746e-05, + "log_odds_chosen": 2.1110987663269043, + "log_odds_ratio": -1.3031134605407715, + "logits/chosen": -0.41942086815834045, + "logits/rejected": -0.48380038142204285, + "logps/chosen": -0.2565928101539612, + "logps/rejected": -0.5842873454093933, + "loss": 4.4358, + "nll_loss": 0.9786398410797119, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.025659281760454178, + "rewards/margins": 0.032769449055194855, + "rewards/rejected": -0.05842873081564903, + "step": 1602 + }, + { + "epoch": 1.1085753803596128, + "grad_norm": 6.662200927734375, + "learning_rate": 4.93968034424466e-05, + "log_odds_chosen": 3.6231627464294434, + "log_odds_ratio": -0.18868622183799744, + "logits/chosen": -0.5040012001991272, + "logits/rejected": -0.5356638431549072, + "logps/chosen": -0.10449366271495819, + "logps/rejected": -0.7985894680023193, + "loss": 2.8902, + "nll_loss": 0.7036839127540588, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010449366644024849, + "rewards/margins": 0.0694095715880394, + "rewards/rejected": -0.0798589363694191, + "step": 1603 + }, + { + "epoch": 1.1092669432918396, + "grad_norm": 5.240853309631348, + "learning_rate": 4.9392961426156444e-05, + "log_odds_chosen": 1.5222573280334473, + "log_odds_ratio": -0.4411067068576813, + "logits/chosen": -0.6297593116760254, + "logits/rejected": -0.6809091567993164, + "logps/chosen": -0.0897793397307396, + "logps/rejected": -0.4441949725151062, + "loss": 4.194, + "nll_loss": 1.0043995380401611, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008977933786809444, + "rewards/margins": 0.0354415625333786, + "rewards/rejected": -0.04441949725151062, + "step": 1604 + }, + { + "epoch": 1.1099585062240664, + "grad_norm": 4.411398887634277, + "learning_rate": 4.9389119409866304e-05, + "log_odds_chosen": 2.3982813358306885, + "log_odds_ratio": -0.3286263644695282, + "logits/chosen": -0.13925503194332123, + "logits/rejected": -0.11078141629695892, + "logps/chosen": -0.08166931569576263, + "logps/rejected": -0.3501182794570923, + "loss": 3.8694, + "nll_loss": 0.9344936609268188, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008166931569576263, + "rewards/margins": 0.026844896376132965, + "rewards/rejected": -0.03501182794570923, + "step": 1605 + }, + { + "epoch": 1.1106500691562933, + "grad_norm": 5.958127975463867, + "learning_rate": 4.938527739357615e-05, + "log_odds_chosen": 3.813007354736328, + "log_odds_ratio": -0.12042003870010376, + "logits/chosen": -0.8060808777809143, + "logits/rejected": -0.8647295832633972, + "logps/chosen": -0.08172804862260818, + "logps/rejected": -0.9784216284751892, + "loss": 4.7046, + "nll_loss": 1.1641055345535278, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008172805421054363, + "rewards/margins": 0.0896693542599678, + "rewards/rejected": -0.09784215688705444, + "step": 1606 + }, + { + "epoch": 1.11134163208852, + "grad_norm": 7.819185256958008, + "learning_rate": 4.9381435377286e-05, + "log_odds_chosen": 2.3151051998138428, + "log_odds_ratio": -0.5686360597610474, + "logits/chosen": -0.5417439937591553, + "logits/rejected": -0.5752671957015991, + "logps/chosen": -0.1530493199825287, + "logps/rejected": -0.4273419976234436, + "loss": 3.5647, + "nll_loss": 0.8343040943145752, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.015304931439459324, + "rewards/margins": 0.02742926776409149, + "rewards/rejected": -0.04273419827222824, + "step": 1607 + }, + { + "epoch": 1.112033195020747, + "grad_norm": 5.927016258239746, + "learning_rate": 4.9377593360995854e-05, + "log_odds_chosen": 3.2013847827911377, + "log_odds_ratio": -0.33018064498901367, + "logits/chosen": -0.6260197162628174, + "logits/rejected": -0.6978428363800049, + "logps/chosen": -0.06922274827957153, + "logps/rejected": -0.8326290249824524, + "loss": 3.1934, + "nll_loss": 0.7653228640556335, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.006922274827957153, + "rewards/margins": 0.07634063065052032, + "rewards/rejected": -0.08326290547847748, + "step": 1608 + }, + { + "epoch": 1.1127247579529738, + "grad_norm": 3.3232882022857666, + "learning_rate": 4.937375134470571e-05, + "log_odds_chosen": 3.8514456748962402, + "log_odds_ratio": -0.26576095819473267, + "logits/chosen": -0.2728078067302704, + "logits/rejected": -0.26904305815696716, + "logps/chosen": -0.08268368989229202, + "logps/rejected": -0.513020396232605, + "loss": 2.8992, + "nll_loss": 0.6982300877571106, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008268369361758232, + "rewards/margins": 0.043033670634031296, + "rewards/rejected": -0.05130203813314438, + "step": 1609 + }, + { + "epoch": 1.1134163208852006, + "grad_norm": 3.1397273540496826, + "learning_rate": 4.936990932841555e-05, + "log_odds_chosen": 3.1608638763427734, + "log_odds_ratio": -0.3535284996032715, + "logits/chosen": -0.3686169683933258, + "logits/rejected": -0.38490238785743713, + "logps/chosen": -0.10619036853313446, + "logps/rejected": -0.6823399662971497, + "loss": 2.3135, + "nll_loss": 0.5430222153663635, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010619036853313446, + "rewards/margins": 0.05761495977640152, + "rewards/rejected": -0.06823400408029556, + "step": 1610 + }, + { + "epoch": 1.1141078838174274, + "grad_norm": 5.7114338874816895, + "learning_rate": 4.9366067312125405e-05, + "log_odds_chosen": 0.835321307182312, + "log_odds_ratio": -0.516608715057373, + "logits/chosen": -0.9049027562141418, + "logits/rejected": -0.876893162727356, + "logps/chosen": -0.149239644408226, + "logps/rejected": -0.3646370470523834, + "loss": 3.99, + "nll_loss": 0.9458338618278503, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014923964627087116, + "rewards/margins": 0.02153974026441574, + "rewards/rejected": -0.03646370768547058, + "step": 1611 + }, + { + "epoch": 1.1147994467496543, + "grad_norm": 7.030287742614746, + "learning_rate": 4.936222529583526e-05, + "log_odds_chosen": 3.394101619720459, + "log_odds_ratio": -0.3108062744140625, + "logits/chosen": -0.19622401893138885, + "logits/rejected": -0.26739972829818726, + "logps/chosen": -0.05431075766682625, + "logps/rejected": -0.4297984838485718, + "loss": 2.801, + "nll_loss": 0.6691676378250122, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.00543107558041811, + "rewards/margins": 0.03754877299070358, + "rewards/rejected": -0.042979851365089417, + "step": 1612 + }, + { + "epoch": 1.115491009681881, + "grad_norm": 5.743906021118164, + "learning_rate": 4.93583832795451e-05, + "log_odds_chosen": 3.954920768737793, + "log_odds_ratio": -0.15968072414398193, + "logits/chosen": -0.3926483392715454, + "logits/rejected": -0.47625932097435, + "logps/chosen": -0.06609839200973511, + "logps/rejected": -0.8475942015647888, + "loss": 3.3477, + "nll_loss": 0.8209632635116577, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006609839387238026, + "rewards/margins": 0.07814957201480865, + "rewards/rejected": -0.0847594141960144, + "step": 1613 + }, + { + "epoch": 1.116182572614108, + "grad_norm": 7.3016510009765625, + "learning_rate": 4.935454126325496e-05, + "log_odds_chosen": 3.8284425735473633, + "log_odds_ratio": -0.25773143768310547, + "logits/chosen": -0.48374220728874207, + "logits/rejected": -0.5044052004814148, + "logps/chosen": -0.08846524357795715, + "logps/rejected": -0.7487151026725769, + "loss": 3.6295, + "nll_loss": 0.8816075325012207, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008846525102853775, + "rewards/margins": 0.06602498888969421, + "rewards/rejected": -0.07487151026725769, + "step": 1614 + }, + { + "epoch": 1.1168741355463347, + "grad_norm": 11.499844551086426, + "learning_rate": 4.935069924696481e-05, + "log_odds_chosen": 3.526914596557617, + "log_odds_ratio": -0.6500409841537476, + "logits/chosen": -0.8252017498016357, + "logits/rejected": -0.8671191334724426, + "logps/chosen": -0.13813264667987823, + "logps/rejected": -0.5451509356498718, + "loss": 4.4419, + "nll_loss": 1.045462727546692, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013813264667987823, + "rewards/margins": 0.04070183262228966, + "rewards/rejected": -0.05451509729027748, + "step": 1615 + }, + { + "epoch": 1.1175656984785616, + "grad_norm": 4.009091854095459, + "learning_rate": 4.934685723067466e-05, + "log_odds_chosen": 6.222552299499512, + "log_odds_ratio": -0.047023192048072815, + "logits/chosen": -0.5882456302642822, + "logits/rejected": -0.5744817852973938, + "logps/chosen": -0.03364326059818268, + "logps/rejected": -0.5920311212539673, + "loss": 2.6308, + "nll_loss": 0.6530083417892456, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0033643266651779413, + "rewards/margins": 0.05583879351615906, + "rewards/rejected": -0.05920311436057091, + "step": 1616 + }, + { + "epoch": 1.1182572614107884, + "grad_norm": 5.012261390686035, + "learning_rate": 4.934301521438451e-05, + "log_odds_chosen": 1.9860203266143799, + "log_odds_ratio": -0.3052294850349426, + "logits/chosen": -0.20576004683971405, + "logits/rejected": -0.1735822856426239, + "logps/chosen": -0.10027210414409637, + "logps/rejected": -0.3483325242996216, + "loss": 3.0656, + "nll_loss": 0.7358713746070862, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010027211159467697, + "rewards/margins": 0.02480604313313961, + "rewards/rejected": -0.03483325615525246, + "step": 1617 + }, + { + "epoch": 1.1189488243430152, + "grad_norm": 4.4571919441223145, + "learning_rate": 4.9339173198094365e-05, + "log_odds_chosen": 4.763401508331299, + "log_odds_ratio": -0.167281836271286, + "logits/chosen": -0.5365985631942749, + "logits/rejected": -0.5737805366516113, + "logps/chosen": -0.08558344095945358, + "logps/rejected": -0.7303501963615417, + "loss": 3.3979, + "nll_loss": 0.8327397704124451, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008558344095945358, + "rewards/margins": 0.06447667628526688, + "rewards/rejected": -0.07303501665592194, + "step": 1618 + }, + { + "epoch": 1.119640387275242, + "grad_norm": 5.40239953994751, + "learning_rate": 4.933533118180421e-05, + "log_odds_chosen": 4.589512825012207, + "log_odds_ratio": -0.21769876778125763, + "logits/chosen": -0.66706782579422, + "logits/rejected": -0.6870805025100708, + "logps/chosen": -0.036408498883247375, + "logps/rejected": -0.6844362020492554, + "loss": 3.0529, + "nll_loss": 0.7414535284042358, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0036408500745892525, + "rewards/margins": 0.06480278074741364, + "rewards/rejected": -0.06844362616539001, + "step": 1619 + }, + { + "epoch": 1.120331950207469, + "grad_norm": 4.73685884475708, + "learning_rate": 4.933148916551406e-05, + "log_odds_chosen": 0.8199967741966248, + "log_odds_ratio": -0.4512670636177063, + "logits/chosen": -0.333723247051239, + "logits/rejected": -0.3674015700817108, + "logps/chosen": -0.12762649357318878, + "logps/rejected": -0.306660532951355, + "loss": 3.2674, + "nll_loss": 0.7717109322547913, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.012762648984789848, + "rewards/margins": 0.0179034024477005, + "rewards/rejected": -0.030666053295135498, + "step": 1620 + }, + { + "epoch": 1.1210235131396957, + "grad_norm": 5.175172328948975, + "learning_rate": 4.9327647149223916e-05, + "log_odds_chosen": 2.6243436336517334, + "log_odds_ratio": -0.4764016568660736, + "logits/chosen": -0.7795975208282471, + "logits/rejected": -0.782432496547699, + "logps/chosen": -0.051674984395504, + "logps/rejected": -0.7004712820053101, + "loss": 3.3344, + "nll_loss": 0.7859505414962769, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.005167498253285885, + "rewards/margins": 0.06487962603569031, + "rewards/rejected": -0.07004712522029877, + "step": 1621 + }, + { + "epoch": 1.1217150760719226, + "grad_norm": 4.103938579559326, + "learning_rate": 4.932380513293376e-05, + "log_odds_chosen": 4.642533302307129, + "log_odds_ratio": -0.101071797311306, + "logits/chosen": -0.6278355717658997, + "logits/rejected": -0.6976598501205444, + "logps/chosen": -0.05617416650056839, + "logps/rejected": -0.7917832136154175, + "loss": 2.8799, + "nll_loss": 0.709868848323822, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005617417395114899, + "rewards/margins": 0.0735609158873558, + "rewards/rejected": -0.0791783332824707, + "step": 1622 + }, + { + "epoch": 1.1224066390041494, + "grad_norm": 4.957241058349609, + "learning_rate": 4.931996311664362e-05, + "log_odds_chosen": 6.685695648193359, + "log_odds_ratio": -0.04919375851750374, + "logits/chosen": -0.6535751223564148, + "logits/rejected": -0.6850297451019287, + "logps/chosen": -0.06118635833263397, + "logps/rejected": -1.2937774658203125, + "loss": 3.101, + "nll_loss": 0.770325779914856, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00611863611266017, + "rewards/margins": 0.12325912714004517, + "rewards/rejected": -0.12937775254249573, + "step": 1623 + }, + { + "epoch": 1.1230982019363762, + "grad_norm": 9.584799766540527, + "learning_rate": 4.9316121100353466e-05, + "log_odds_chosen": 3.091644763946533, + "log_odds_ratio": -0.31044328212738037, + "logits/chosen": -0.4470682740211487, + "logits/rejected": -0.5123588442802429, + "logps/chosen": -0.07803031802177429, + "logps/rejected": -0.7579048871994019, + "loss": 4.4858, + "nll_loss": 1.090402603149414, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007803032640367746, + "rewards/margins": 0.06798745691776276, + "rewards/rejected": -0.07579049468040466, + "step": 1624 + }, + { + "epoch": 1.123789764868603, + "grad_norm": 5.5097174644470215, + "learning_rate": 4.931227908406332e-05, + "log_odds_chosen": 4.695748805999756, + "log_odds_ratio": -0.1543882191181183, + "logits/chosen": -0.15720303356647491, + "logits/rejected": -0.1915234476327896, + "logps/chosen": -0.0845288410782814, + "logps/rejected": -0.7179161310195923, + "loss": 2.4497, + "nll_loss": 0.5969738960266113, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008452883921563625, + "rewards/margins": 0.06333872675895691, + "rewards/rejected": -0.07179160416126251, + "step": 1625 + }, + { + "epoch": 1.1244813278008299, + "grad_norm": 8.026557922363281, + "learning_rate": 4.930843706777317e-05, + "log_odds_chosen": 3.344681739807129, + "log_odds_ratio": -1.0812357664108276, + "logits/chosen": -0.48144322633743286, + "logits/rejected": -0.4669128954410553, + "logps/chosen": -0.21000587940216064, + "logps/rejected": -0.7212158441543579, + "loss": 4.0158, + "nll_loss": 0.8958293795585632, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.021000590175390244, + "rewards/margins": 0.05112099647521973, + "rewards/rejected": -0.07212159037590027, + "step": 1626 + }, + { + "epoch": 1.1251728907330567, + "grad_norm": 9.976818084716797, + "learning_rate": 4.9304595051483024e-05, + "log_odds_chosen": 0.8328192234039307, + "log_odds_ratio": -0.9626883864402771, + "logits/chosen": -0.653387188911438, + "logits/rejected": -0.6768147945404053, + "logps/chosen": -0.20581936836242676, + "logps/rejected": -0.33645424246788025, + "loss": 3.9907, + "nll_loss": 0.9014164805412292, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.020581936463713646, + "rewards/margins": 0.013063488528132439, + "rewards/rejected": -0.03364542871713638, + "step": 1627 + }, + { + "epoch": 1.1258644536652835, + "grad_norm": 5.68765926361084, + "learning_rate": 4.930075303519287e-05, + "log_odds_chosen": 4.571466445922852, + "log_odds_ratio": -0.20608262717723846, + "logits/chosen": -0.6150608658790588, + "logits/rejected": -0.6334342360496521, + "logps/chosen": -0.05543144419789314, + "logps/rejected": -0.8365674018859863, + "loss": 4.3687, + "nll_loss": 1.0715715885162354, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005543144885450602, + "rewards/margins": 0.0781136006116867, + "rewards/rejected": -0.08365673571825027, + "step": 1628 + }, + { + "epoch": 1.1265560165975104, + "grad_norm": 4.4451494216918945, + "learning_rate": 4.929691101890272e-05, + "log_odds_chosen": 3.742349863052368, + "log_odds_ratio": -0.23910017311573029, + "logits/chosen": -0.3370603024959564, + "logits/rejected": -0.3568029999732971, + "logps/chosen": -0.16225500404834747, + "logps/rejected": -0.7111620903015137, + "loss": 3.7668, + "nll_loss": 0.9177911877632141, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.016225501894950867, + "rewards/margins": 0.0548907108604908, + "rewards/rejected": -0.07111620903015137, + "step": 1629 + }, + { + "epoch": 1.1272475795297372, + "grad_norm": 3.7970314025878906, + "learning_rate": 4.9293069002612574e-05, + "log_odds_chosen": 2.94815731048584, + "log_odds_ratio": -0.15021851658821106, + "logits/chosen": -0.5437855124473572, + "logits/rejected": -0.5586941838264465, + "logps/chosen": -0.12474802136421204, + "logps/rejected": -0.7907891273498535, + "loss": 2.7239, + "nll_loss": 0.6659583449363708, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012474801391363144, + "rewards/margins": 0.06660410761833191, + "rewards/rejected": -0.07907891273498535, + "step": 1630 + }, + { + "epoch": 1.127939142461964, + "grad_norm": 8.042040824890137, + "learning_rate": 4.9289226986322427e-05, + "log_odds_chosen": 3.3266515731811523, + "log_odds_ratio": -0.4503074884414673, + "logits/chosen": -0.45929020643234253, + "logits/rejected": -0.4860483407974243, + "logps/chosen": -0.11026670038700104, + "logps/rejected": -0.6502195596694946, + "loss": 4.1101, + "nll_loss": 0.9824921488761902, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011026670224964619, + "rewards/margins": 0.0539952851831913, + "rewards/rejected": -0.06502196192741394, + "step": 1631 + }, + { + "epoch": 1.1286307053941909, + "grad_norm": 4.391267776489258, + "learning_rate": 4.928538497003228e-05, + "log_odds_chosen": 5.245411396026611, + "log_odds_ratio": -0.34077537059783936, + "logits/chosen": -0.699263870716095, + "logits/rejected": -0.7080675363540649, + "logps/chosen": -0.10102634876966476, + "logps/rejected": -0.7285547256469727, + "loss": 2.3565, + "nll_loss": 0.5550588369369507, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010102634318172932, + "rewards/margins": 0.06275284290313721, + "rewards/rejected": -0.07285548001527786, + "step": 1632 + }, + { + "epoch": 1.1293222683264177, + "grad_norm": 4.450112819671631, + "learning_rate": 4.9281542953742125e-05, + "log_odds_chosen": 7.433021545410156, + "log_odds_ratio": -0.0016825036145746708, + "logits/chosen": -0.3040405511856079, + "logits/rejected": -0.34822529554367065, + "logps/chosen": -0.00632679695263505, + "logps/rejected": -1.2802190780639648, + "loss": 2.5501, + "nll_loss": 0.637348473072052, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006326796719804406, + "rewards/margins": 0.12738922238349915, + "rewards/rejected": -0.12802191078662872, + "step": 1633 + }, + { + "epoch": 1.1300138312586445, + "grad_norm": 5.715757369995117, + "learning_rate": 4.927770093745198e-05, + "log_odds_chosen": 3.635809898376465, + "log_odds_ratio": -0.4132714867591858, + "logits/chosen": -0.35934287309646606, + "logits/rejected": -0.3480125963687897, + "logps/chosen": -0.11658094823360443, + "logps/rejected": -0.7394274473190308, + "loss": 3.198, + "nll_loss": 0.7581678032875061, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011658095754683018, + "rewards/margins": 0.062284644693136215, + "rewards/rejected": -0.07394274324178696, + "step": 1634 + }, + { + "epoch": 1.1307053941908713, + "grad_norm": 7.623841762542725, + "learning_rate": 4.927385892116183e-05, + "log_odds_chosen": 2.3251943588256836, + "log_odds_ratio": -0.43498295545578003, + "logits/chosen": -0.5964546799659729, + "logits/rejected": -0.5950238704681396, + "logps/chosen": -0.12847104668617249, + "logps/rejected": -0.5509581565856934, + "loss": 4.8888, + "nll_loss": 1.1787103414535522, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.012847105041146278, + "rewards/margins": 0.042248714715242386, + "rewards/rejected": -0.055095817893743515, + "step": 1635 + }, + { + "epoch": 1.1313969571230982, + "grad_norm": 4.627635955810547, + "learning_rate": 4.927001690487168e-05, + "log_odds_chosen": 6.360747337341309, + "log_odds_ratio": -0.03663550317287445, + "logits/chosen": -0.3691325783729553, + "logits/rejected": -0.4044398069381714, + "logps/chosen": -0.05212727189064026, + "logps/rejected": -1.1166536808013916, + "loss": 2.5455, + "nll_loss": 0.6327031850814819, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005212727934122086, + "rewards/margins": 0.10645262897014618, + "rewards/rejected": -0.11166536808013916, + "step": 1636 + }, + { + "epoch": 1.132088520055325, + "grad_norm": 6.78596830368042, + "learning_rate": 4.926617488858153e-05, + "log_odds_chosen": 2.127685070037842, + "log_odds_ratio": -0.6240582466125488, + "logits/chosen": -0.4540773630142212, + "logits/rejected": -0.49073025584220886, + "logps/chosen": -0.16882912814617157, + "logps/rejected": -0.4076695740222931, + "loss": 4.0789, + "nll_loss": 0.9573305249214172, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.016882915049791336, + "rewards/margins": 0.02388404682278633, + "rewards/rejected": -0.04076696187257767, + "step": 1637 + }, + { + "epoch": 1.1327800829875518, + "grad_norm": 6.193438529968262, + "learning_rate": 4.926233287229138e-05, + "log_odds_chosen": 5.44583797454834, + "log_odds_ratio": -0.17725984752178192, + "logits/chosen": -0.003792904317378998, + "logits/rejected": -0.11088617146015167, + "logps/chosen": -0.043570052832365036, + "logps/rejected": -1.0186293125152588, + "loss": 3.391, + "nll_loss": 0.8300263285636902, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0043570054695010185, + "rewards/margins": 0.09750592708587646, + "rewards/rejected": -0.10186292976140976, + "step": 1638 + }, + { + "epoch": 1.1334716459197787, + "grad_norm": 7.459033489227295, + "learning_rate": 4.925849085600123e-05, + "log_odds_chosen": 3.1296372413635254, + "log_odds_ratio": -0.23613426089286804, + "logits/chosen": -0.49093225598335266, + "logits/rejected": -0.5520147681236267, + "logps/chosen": -0.08632385730743408, + "logps/rejected": -0.6004340648651123, + "loss": 3.683, + "nll_loss": 0.8971471786499023, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008632385171949863, + "rewards/margins": 0.05141102522611618, + "rewards/rejected": -0.06004340946674347, + "step": 1639 + }, + { + "epoch": 1.1341632088520055, + "grad_norm": 5.153171539306641, + "learning_rate": 4.9254648839711085e-05, + "log_odds_chosen": 4.974581718444824, + "log_odds_ratio": -0.2203356772661209, + "logits/chosen": -0.34750106930732727, + "logits/rejected": -0.3961317241191864, + "logps/chosen": -0.062355298548936844, + "logps/rejected": -0.7640964388847351, + "loss": 2.6511, + "nll_loss": 0.6407467722892761, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.006235530134290457, + "rewards/margins": 0.07017411291599274, + "rewards/rejected": -0.07640964537858963, + "step": 1640 + }, + { + "epoch": 1.1348547717842323, + "grad_norm": 4.96319055557251, + "learning_rate": 4.925080682342094e-05, + "log_odds_chosen": 3.6856021881103516, + "log_odds_ratio": -0.34094586968421936, + "logits/chosen": -0.618395209312439, + "logits/rejected": -0.6706283092498779, + "logps/chosen": -0.17205843329429626, + "logps/rejected": -0.5053707361221313, + "loss": 3.246, + "nll_loss": 0.777412474155426, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.017205843701958656, + "rewards/margins": 0.03333123028278351, + "rewards/rejected": -0.050537072122097015, + "step": 1641 + }, + { + "epoch": 1.1355463347164592, + "grad_norm": 6.023739814758301, + "learning_rate": 4.924696480713078e-05, + "log_odds_chosen": 2.418586492538452, + "log_odds_ratio": -0.3684270977973938, + "logits/chosen": -0.7153379321098328, + "logits/rejected": -0.7132663130760193, + "logps/chosen": -0.10732871294021606, + "logps/rejected": -0.4773138761520386, + "loss": 3.5234, + "nll_loss": 0.8440024852752686, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010732870548963547, + "rewards/margins": 0.03699852153658867, + "rewards/rejected": -0.047731392085552216, + "step": 1642 + }, + { + "epoch": 1.136237897648686, + "grad_norm": 3.6020874977111816, + "learning_rate": 4.9243122790840636e-05, + "log_odds_chosen": 3.060307502746582, + "log_odds_ratio": -0.13144780695438385, + "logits/chosen": -0.6625022888183594, + "logits/rejected": -0.6797756552696228, + "logps/chosen": -0.09465555101633072, + "logps/rejected": -0.6426516175270081, + "loss": 3.4357, + "nll_loss": 0.8457767963409424, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009465554729104042, + "rewards/margins": 0.054799605160951614, + "rewards/rejected": -0.0642651617527008, + "step": 1643 + }, + { + "epoch": 1.1369294605809128, + "grad_norm": 4.302525520324707, + "learning_rate": 4.923928077455049e-05, + "log_odds_chosen": 2.8316779136657715, + "log_odds_ratio": -0.469307005405426, + "logits/chosen": -0.5069953203201294, + "logits/rejected": -0.511053204536438, + "logps/chosen": -0.10691452026367188, + "logps/rejected": -0.5705878734588623, + "loss": 3.149, + "nll_loss": 0.7403192520141602, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.010691452771425247, + "rewards/margins": 0.0463673360645771, + "rewards/rejected": -0.05705878883600235, + "step": 1644 + }, + { + "epoch": 1.1376210235131397, + "grad_norm": 7.63559103012085, + "learning_rate": 4.923543875826034e-05, + "log_odds_chosen": 4.579002857208252, + "log_odds_ratio": -0.09951350837945938, + "logits/chosen": 0.05032390356063843, + "logits/rejected": -0.00823403149843216, + "logps/chosen": -0.02870844677090645, + "logps/rejected": -0.9091194272041321, + "loss": 4.3199, + "nll_loss": 1.070017695426941, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0028708449099212885, + "rewards/margins": 0.08804109692573547, + "rewards/rejected": -0.09091193974018097, + "step": 1645 + }, + { + "epoch": 1.1383125864453665, + "grad_norm": 4.535558700561523, + "learning_rate": 4.9231596741970186e-05, + "log_odds_chosen": 3.8096237182617188, + "log_odds_ratio": -0.36003577709198, + "logits/chosen": -0.8161823153495789, + "logits/rejected": -0.8410078287124634, + "logps/chosen": -0.07835372537374496, + "logps/rejected": -0.7174544334411621, + "loss": 3.0849, + "nll_loss": 0.735215425491333, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007835373282432556, + "rewards/margins": 0.06391007453203201, + "rewards/rejected": -0.07174544036388397, + "step": 1646 + }, + { + "epoch": 1.1390041493775933, + "grad_norm": 4.016298294067383, + "learning_rate": 4.9227754725680045e-05, + "log_odds_chosen": 3.7809200286865234, + "log_odds_ratio": -0.2413710504770279, + "logits/chosen": -0.42067304253578186, + "logits/rejected": -0.37396830320358276, + "logps/chosen": -0.09345149993896484, + "logps/rejected": -0.6482632160186768, + "loss": 2.8125, + "nll_loss": 0.6789858341217041, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00934515055269003, + "rewards/margins": 0.05548117309808731, + "rewards/rejected": -0.06482632458209991, + "step": 1647 + }, + { + "epoch": 1.1396957123098201, + "grad_norm": 7.45605993270874, + "learning_rate": 4.922391270938989e-05, + "log_odds_chosen": 2.0952701568603516, + "log_odds_ratio": -0.37152040004730225, + "logits/chosen": -0.580081582069397, + "logits/rejected": -0.6073364019393921, + "logps/chosen": -0.1313703954219818, + "logps/rejected": -0.597480058670044, + "loss": 4.4212, + "nll_loss": 1.0681456327438354, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01313704065978527, + "rewards/margins": 0.04661097005009651, + "rewards/rejected": -0.05974800884723663, + "step": 1648 + }, + { + "epoch": 1.140387275242047, + "grad_norm": 5.093718528747559, + "learning_rate": 4.9220070693099743e-05, + "log_odds_chosen": 1.542366623878479, + "log_odds_ratio": -0.34795552492141724, + "logits/chosen": -0.5907109975814819, + "logits/rejected": -0.5933347940444946, + "logps/chosen": -0.09473934769630432, + "logps/rejected": -0.30543074011802673, + "loss": 3.4758, + "nll_loss": 0.834161639213562, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009473934769630432, + "rewards/margins": 0.02106913924217224, + "rewards/rejected": -0.030543074011802673, + "step": 1649 + }, + { + "epoch": 1.1410788381742738, + "grad_norm": 10.639129638671875, + "learning_rate": 4.921622867680959e-05, + "log_odds_chosen": 2.3673408031463623, + "log_odds_ratio": -0.22872783243656158, + "logits/chosen": -0.2898896634578705, + "logits/rejected": -0.39120471477508545, + "logps/chosen": -0.18079392611980438, + "logps/rejected": -0.9120924472808838, + "loss": 5.7336, + "nll_loss": 1.4105350971221924, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.018079392611980438, + "rewards/margins": 0.07312985509634018, + "rewards/rejected": -0.09120924770832062, + "step": 1650 + }, + { + "epoch": 1.1417704011065006, + "grad_norm": 6.867001533508301, + "learning_rate": 4.921238666051944e-05, + "log_odds_chosen": 2.202517509460449, + "log_odds_ratio": -0.3454076945781708, + "logits/chosen": -0.6197003126144409, + "logits/rejected": -0.6169849634170532, + "logps/chosen": -0.09023884683847427, + "logps/rejected": -0.3344111442565918, + "loss": 5.8866, + "nll_loss": 1.4371070861816406, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009023885242640972, + "rewards/margins": 0.024417227134108543, + "rewards/rejected": -0.03344111517071724, + "step": 1651 + }, + { + "epoch": 1.1424619640387275, + "grad_norm": 5.405807971954346, + "learning_rate": 4.9208544644229294e-05, + "log_odds_chosen": 2.6169772148132324, + "log_odds_ratio": -0.48516252636909485, + "logits/chosen": -0.5111470818519592, + "logits/rejected": -0.5195670127868652, + "logps/chosen": -0.12113036215305328, + "logps/rejected": -0.5095824599266052, + "loss": 3.1151, + "nll_loss": 0.7302597761154175, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012113036587834358, + "rewards/margins": 0.038845207542181015, + "rewards/rejected": -0.05095824971795082, + "step": 1652 + }, + { + "epoch": 1.1431535269709543, + "grad_norm": 5.10026741027832, + "learning_rate": 4.920470262793914e-05, + "log_odds_chosen": 3.596342086791992, + "log_odds_ratio": -0.4324381947517395, + "logits/chosen": -0.4986104369163513, + "logits/rejected": -0.5461660027503967, + "logps/chosen": -0.15873593091964722, + "logps/rejected": -0.6344982385635376, + "loss": 3.1703, + "nll_loss": 0.7493206262588501, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.015873592346906662, + "rewards/margins": 0.04757623374462128, + "rewards/rejected": -0.06344982981681824, + "step": 1653 + }, + { + "epoch": 1.1438450899031811, + "grad_norm": 4.311457633972168, + "learning_rate": 4.9200860611649e-05, + "log_odds_chosen": 3.192807197570801, + "log_odds_ratio": -0.24010953307151794, + "logits/chosen": -0.3876314163208008, + "logits/rejected": -0.4332069456577301, + "logps/chosen": -0.08185150474309921, + "logps/rejected": -0.5942113399505615, + "loss": 2.6524, + "nll_loss": 0.6390830874443054, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008185150101780891, + "rewards/margins": 0.051235977560281754, + "rewards/rejected": -0.05942113697528839, + "step": 1654 + }, + { + "epoch": 1.144536652835408, + "grad_norm": 4.532189846038818, + "learning_rate": 4.9197018595358845e-05, + "log_odds_chosen": 3.096100091934204, + "log_odds_ratio": -0.23214709758758545, + "logits/chosen": -0.6746965646743774, + "logits/rejected": -0.6621944308280945, + "logps/chosen": -0.10050790756940842, + "logps/rejected": -0.555006742477417, + "loss": 3.6235, + "nll_loss": 0.8826611042022705, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010050790384411812, + "rewards/margins": 0.0454498827457428, + "rewards/rejected": -0.05550067499279976, + "step": 1655 + }, + { + "epoch": 1.1452282157676348, + "grad_norm": 4.945940971374512, + "learning_rate": 4.91931765790687e-05, + "log_odds_chosen": 2.6006646156311035, + "log_odds_ratio": -0.2788270115852356, + "logits/chosen": -0.6870574355125427, + "logits/rejected": -0.6803454756736755, + "logps/chosen": -0.06980787962675095, + "logps/rejected": -0.5878862142562866, + "loss": 3.3184, + "nll_loss": 0.8017243146896362, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006980787497013807, + "rewards/margins": 0.05180782452225685, + "rewards/rejected": -0.058788616210222244, + "step": 1656 + }, + { + "epoch": 1.1459197786998616, + "grad_norm": 10.107165336608887, + "learning_rate": 4.918933456277855e-05, + "log_odds_chosen": 3.2974982261657715, + "log_odds_ratio": -0.4954441785812378, + "logits/chosen": -0.3904740512371063, + "logits/rejected": -0.378157377243042, + "logps/chosen": -0.10745969414710999, + "logps/rejected": -0.4751533567905426, + "loss": 4.543, + "nll_loss": 1.0862106084823608, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010745970532298088, + "rewards/margins": 0.03676936775445938, + "rewards/rejected": -0.04751533642411232, + "step": 1657 + }, + { + "epoch": 1.1466113416320884, + "grad_norm": 5.9534173011779785, + "learning_rate": 4.91854925464884e-05, + "log_odds_chosen": 5.842714309692383, + "log_odds_ratio": -0.09394479542970657, + "logits/chosen": -0.4603232443332672, + "logits/rejected": -0.5066232681274414, + "logps/chosen": -0.058203209191560745, + "logps/rejected": -1.0633374452590942, + "loss": 3.2159, + "nll_loss": 0.7945787310600281, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005820321384817362, + "rewards/margins": 0.10051342844963074, + "rewards/rejected": -0.10633374750614166, + "step": 1658 + }, + { + "epoch": 1.1473029045643153, + "grad_norm": 5.841182708740234, + "learning_rate": 4.918165053019825e-05, + "log_odds_chosen": 4.049822807312012, + "log_odds_ratio": -0.23675966262817383, + "logits/chosen": -0.3512836694717407, + "logits/rejected": -0.46696898341178894, + "logps/chosen": -0.08876635134220123, + "logps/rejected": -1.013091802597046, + "loss": 4.286, + "nll_loss": 1.04783034324646, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008876635693013668, + "rewards/margins": 0.09243255108594894, + "rewards/rejected": -0.10130918771028519, + "step": 1659 + }, + { + "epoch": 1.147994467496542, + "grad_norm": 4.923659801483154, + "learning_rate": 4.91778085139081e-05, + "log_odds_chosen": 5.361041069030762, + "log_odds_ratio": -0.16646316647529602, + "logits/chosen": -0.47271567583084106, + "logits/rejected": -0.4884890019893646, + "logps/chosen": -0.0668402761220932, + "logps/rejected": -0.7941054105758667, + "loss": 2.6559, + "nll_loss": 0.647331714630127, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00668402761220932, + "rewards/margins": 0.07272651046514511, + "rewards/rejected": -0.07941053807735443, + "step": 1660 + }, + { + "epoch": 1.148686030428769, + "grad_norm": 5.230505466461182, + "learning_rate": 4.917396649761795e-05, + "log_odds_chosen": 4.7292022705078125, + "log_odds_ratio": -0.158156618475914, + "logits/chosen": -0.8374958038330078, + "logits/rejected": -0.8386293649673462, + "logps/chosen": -0.06256973743438721, + "logps/rejected": -0.8360474705696106, + "loss": 4.0505, + "nll_loss": 0.9968149662017822, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0062569743022322655, + "rewards/margins": 0.0773477777838707, + "rewards/rejected": -0.08360475301742554, + "step": 1661 + }, + { + "epoch": 1.1493775933609958, + "grad_norm": 6.226810455322266, + "learning_rate": 4.91701244813278e-05, + "log_odds_chosen": 3.0641565322875977, + "log_odds_ratio": -0.2409423142671585, + "logits/chosen": -0.569155216217041, + "logits/rejected": -0.5948754549026489, + "logps/chosen": -0.06277336925268173, + "logps/rejected": -0.5739333033561707, + "loss": 4.5011, + "nll_loss": 1.1011693477630615, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006277337204664946, + "rewards/margins": 0.05111599713563919, + "rewards/rejected": -0.057393334805965424, + "step": 1662 + }, + { + "epoch": 1.1500691562932226, + "grad_norm": 7.280765056610107, + "learning_rate": 4.916628246503766e-05, + "log_odds_chosen": 2.8586554527282715, + "log_odds_ratio": -0.18323729932308197, + "logits/chosen": -0.5155065059661865, + "logits/rejected": -0.5760002732276917, + "logps/chosen": -0.07034897804260254, + "logps/rejected": -0.602337121963501, + "loss": 5.7805, + "nll_loss": 1.4267985820770264, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007034897338598967, + "rewards/margins": 0.053198814392089844, + "rewards/rejected": -0.0602337121963501, + "step": 1663 + }, + { + "epoch": 1.1507607192254494, + "grad_norm": 7.845277309417725, + "learning_rate": 4.91624404487475e-05, + "log_odds_chosen": 5.0160417556762695, + "log_odds_ratio": -0.2870984673500061, + "logits/chosen": -0.3389076590538025, + "logits/rejected": -0.3596407175064087, + "logps/chosen": -0.055796217173337936, + "logps/rejected": -1.035224437713623, + "loss": 4.6075, + "nll_loss": 1.1231759786605835, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005579621996730566, + "rewards/margins": 0.09794282913208008, + "rewards/rejected": -0.10352244973182678, + "step": 1664 + }, + { + "epoch": 1.1514522821576763, + "grad_norm": 5.204296588897705, + "learning_rate": 4.9158598432457355e-05, + "log_odds_chosen": 6.408123970031738, + "log_odds_ratio": -0.12883061170578003, + "logits/chosen": -0.22733397781848907, + "logits/rejected": -0.31913691759109497, + "logps/chosen": -0.05134060978889465, + "logps/rejected": -0.7932345271110535, + "loss": 2.6177, + "nll_loss": 0.6415426135063171, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005134060978889465, + "rewards/margins": 0.07418939471244812, + "rewards/rejected": -0.07932344824075699, + "step": 1665 + }, + { + "epoch": 1.152143845089903, + "grad_norm": 4.909811973571777, + "learning_rate": 4.915475641616721e-05, + "log_odds_chosen": 4.934022903442383, + "log_odds_ratio": -0.12989267706871033, + "logits/chosen": -0.3042985796928406, + "logits/rejected": -0.3891471028327942, + "logps/chosen": -0.07688090205192566, + "logps/rejected": -0.896817684173584, + "loss": 2.7567, + "nll_loss": 0.676192045211792, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007688090205192566, + "rewards/margins": 0.08199368417263031, + "rewards/rejected": -0.08968178182840347, + "step": 1666 + }, + { + "epoch": 1.15283540802213, + "grad_norm": 5.710595607757568, + "learning_rate": 4.915091439987706e-05, + "log_odds_chosen": 3.5815577507019043, + "log_odds_ratio": -0.2634783983230591, + "logits/chosen": -0.7198811769485474, + "logits/rejected": -0.7444949150085449, + "logps/chosen": -0.11194150894880295, + "logps/rejected": -0.595949113368988, + "loss": 4.0449, + "nll_loss": 0.9848828911781311, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011194150894880295, + "rewards/margins": 0.04840076342225075, + "rewards/rejected": -0.05959491431713104, + "step": 1667 + }, + { + "epoch": 1.1535269709543567, + "grad_norm": 8.014622688293457, + "learning_rate": 4.9147072383586906e-05, + "log_odds_chosen": 2.484999418258667, + "log_odds_ratio": -0.5048198103904724, + "logits/chosen": 0.004195423796772957, + "logits/rejected": -0.047088563442230225, + "logps/chosen": -0.18642094731330872, + "logps/rejected": -0.5310749411582947, + "loss": 4.4745, + "nll_loss": 1.0681509971618652, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.018642093986272812, + "rewards/margins": 0.034465398639440536, + "rewards/rejected": -0.05310749262571335, + "step": 1668 + }, + { + "epoch": 1.1542185338865836, + "grad_norm": 5.527405738830566, + "learning_rate": 4.914323036729676e-05, + "log_odds_chosen": 2.0551114082336426, + "log_odds_ratio": -0.18518784642219543, + "logits/chosen": -0.3637649118900299, + "logits/rejected": -0.3871780037879944, + "logps/chosen": -0.09218564629554749, + "logps/rejected": -0.518540620803833, + "loss": 3.6033, + "nll_loss": 0.8823060393333435, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009218564257025719, + "rewards/margins": 0.042635492980480194, + "rewards/rejected": -0.05185405910015106, + "step": 1669 + }, + { + "epoch": 1.1549100968188104, + "grad_norm": 4.045838356018066, + "learning_rate": 4.913938835100661e-05, + "log_odds_chosen": 5.368803977966309, + "log_odds_ratio": -0.06457238644361496, + "logits/chosen": -0.7588016986846924, + "logits/rejected": -0.7690705060958862, + "logps/chosen": -0.015393403358757496, + "logps/rejected": -0.855527400970459, + "loss": 2.7714, + "nll_loss": 0.6863961219787598, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001539340359158814, + "rewards/margins": 0.08401340246200562, + "rewards/rejected": -0.08555274456739426, + "step": 1670 + }, + { + "epoch": 1.1556016597510372, + "grad_norm": 5.965897083282471, + "learning_rate": 4.9135546334716457e-05, + "log_odds_chosen": 2.9094269275665283, + "log_odds_ratio": -0.46307048201560974, + "logits/chosen": -0.2961413860321045, + "logits/rejected": -0.27826306223869324, + "logps/chosen": -0.09813665598630905, + "logps/rejected": -0.6231961250305176, + "loss": 3.5255, + "nll_loss": 0.835060715675354, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.009813666343688965, + "rewards/margins": 0.05250595137476921, + "rewards/rejected": -0.062319621443748474, + "step": 1671 + }, + { + "epoch": 1.156293222683264, + "grad_norm": 4.455894470214844, + "learning_rate": 4.9131704318426316e-05, + "log_odds_chosen": 4.973597049713135, + "log_odds_ratio": -0.09037226438522339, + "logits/chosen": -0.6276228427886963, + "logits/rejected": -0.6904111504554749, + "logps/chosen": -0.06606549024581909, + "logps/rejected": -1.0546118021011353, + "loss": 3.0021, + "nll_loss": 0.7414852380752563, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006606548558920622, + "rewards/margins": 0.09885463863611221, + "rewards/rejected": -0.10546118021011353, + "step": 1672 + }, + { + "epoch": 1.156984785615491, + "grad_norm": 4.100534439086914, + "learning_rate": 4.912786230213616e-05, + "log_odds_chosen": 5.852409362792969, + "log_odds_ratio": -0.06742922961711884, + "logits/chosen": -0.6525583863258362, + "logits/rejected": -0.7228882908821106, + "logps/chosen": -0.023829951882362366, + "logps/rejected": -1.2559055089950562, + "loss": 3.5361, + "nll_loss": 0.8772757649421692, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002382995095103979, + "rewards/margins": 0.12320756167173386, + "rewards/rejected": -0.12559056282043457, + "step": 1673 + }, + { + "epoch": 1.1576763485477177, + "grad_norm": 7.9378790855407715, + "learning_rate": 4.9124020285846014e-05, + "log_odds_chosen": 2.524151563644409, + "log_odds_ratio": -0.6948474049568176, + "logits/chosen": -0.7985988855361938, + "logits/rejected": -0.8701484799385071, + "logps/chosen": -0.11264081299304962, + "logps/rejected": -0.5082597732543945, + "loss": 5.014, + "nll_loss": 1.1840271949768066, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011264080181717873, + "rewards/margins": 0.03956189751625061, + "rewards/rejected": -0.050825975835323334, + "step": 1674 + }, + { + "epoch": 1.1583679114799446, + "grad_norm": 6.94173002243042, + "learning_rate": 4.9120178269555866e-05, + "log_odds_chosen": 4.101590156555176, + "log_odds_ratio": -0.3209138512611389, + "logits/chosen": -0.3075600564479828, + "logits/rejected": -0.34465670585632324, + "logps/chosen": -0.13615387678146362, + "logps/rejected": -1.0777006149291992, + "loss": 3.8975, + "nll_loss": 0.9422775506973267, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013615388423204422, + "rewards/margins": 0.09415467083454132, + "rewards/rejected": -0.10777007043361664, + "step": 1675 + }, + { + "epoch": 1.1590594744121714, + "grad_norm": 4.6008195877075195, + "learning_rate": 4.911633625326572e-05, + "log_odds_chosen": 4.601840972900391, + "log_odds_ratio": -0.21844017505645752, + "logits/chosen": -0.18146252632141113, + "logits/rejected": -0.19315530359745026, + "logps/chosen": -0.11097032576799393, + "logps/rejected": -0.8364883661270142, + "loss": 3.2465, + "nll_loss": 0.7897868752479553, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011097033508121967, + "rewards/margins": 0.07255180180072784, + "rewards/rejected": -0.08364883810281754, + "step": 1676 + }, + { + "epoch": 1.1597510373443982, + "grad_norm": 5.564633846282959, + "learning_rate": 4.9112494236975564e-05, + "log_odds_chosen": 3.663455009460449, + "log_odds_ratio": -0.11242678761482239, + "logits/chosen": -0.7468219995498657, + "logits/rejected": -0.7474039196968079, + "logps/chosen": -0.133896142244339, + "logps/rejected": -1.2125940322875977, + "loss": 3.4401, + "nll_loss": 0.8487862348556519, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013389615342020988, + "rewards/margins": 0.10786978900432587, + "rewards/rejected": -0.12125939875841141, + "step": 1677 + }, + { + "epoch": 1.1604426002766253, + "grad_norm": 5.151391506195068, + "learning_rate": 4.910865222068542e-05, + "log_odds_chosen": 3.6544852256774902, + "log_odds_ratio": -0.3815726935863495, + "logits/chosen": -0.5739408135414124, + "logits/rejected": -0.6253122687339783, + "logps/chosen": -0.1026638001203537, + "logps/rejected": -0.4522859752178192, + "loss": 3.6439, + "nll_loss": 0.8728081583976746, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010266379453241825, + "rewards/margins": 0.03496221452951431, + "rewards/rejected": -0.04522860050201416, + "step": 1678 + }, + { + "epoch": 1.161134163208852, + "grad_norm": 4.976283550262451, + "learning_rate": 4.910481020439527e-05, + "log_odds_chosen": 5.420558929443359, + "log_odds_ratio": -0.09251780807971954, + "logits/chosen": -0.658862292766571, + "logits/rejected": -0.6868621706962585, + "logps/chosen": -0.03994268178939819, + "logps/rejected": -0.7436200380325317, + "loss": 2.3847, + "nll_loss": 0.5869274139404297, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003994268365204334, + "rewards/margins": 0.0703677386045456, + "rewards/rejected": -0.07436200976371765, + "step": 1679 + }, + { + "epoch": 1.161825726141079, + "grad_norm": 4.847886562347412, + "learning_rate": 4.9100968188105115e-05, + "log_odds_chosen": 3.639221429824829, + "log_odds_ratio": -0.24989987909793854, + "logits/chosen": -0.5126394629478455, + "logits/rejected": -0.5004561543464661, + "logps/chosen": -0.07378698885440826, + "logps/rejected": -0.6940549612045288, + "loss": 3.1331, + "nll_loss": 0.7582850456237793, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007378697860985994, + "rewards/margins": 0.062026798725128174, + "rewards/rejected": -0.06940549612045288, + "step": 1680 + }, + { + "epoch": 1.1625172890733058, + "grad_norm": 6.826456546783447, + "learning_rate": 4.9097126171814974e-05, + "log_odds_chosen": 4.123432636260986, + "log_odds_ratio": -0.23046131432056427, + "logits/chosen": -0.5177972912788391, + "logits/rejected": -0.562575101852417, + "logps/chosen": -0.055708374828100204, + "logps/rejected": -0.8523918390274048, + "loss": 3.4889, + "nll_loss": 0.8491816520690918, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005570837762206793, + "rewards/margins": 0.07966834306716919, + "rewards/rejected": -0.08523918688297272, + "step": 1681 + }, + { + "epoch": 1.1632088520055326, + "grad_norm": 4.31039571762085, + "learning_rate": 4.909328415552482e-05, + "log_odds_chosen": 3.9521539211273193, + "log_odds_ratio": -0.318380206823349, + "logits/chosen": -0.30143076181411743, + "logits/rejected": -0.30992391705513, + "logps/chosen": -0.1237991452217102, + "logps/rejected": -0.7632213234901428, + "loss": 2.6952, + "nll_loss": 0.6419578194618225, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01237991638481617, + "rewards/margins": 0.06394222378730774, + "rewards/rejected": -0.07632213830947876, + "step": 1682 + }, + { + "epoch": 1.1639004149377594, + "grad_norm": 5.163027286529541, + "learning_rate": 4.908944213923467e-05, + "log_odds_chosen": 4.225794315338135, + "log_odds_ratio": -0.20646844804286957, + "logits/chosen": -0.170430988073349, + "logits/rejected": -0.26145029067993164, + "logps/chosen": -0.08614880591630936, + "logps/rejected": -0.6882296204566956, + "loss": 2.9342, + "nll_loss": 0.7129086852073669, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008614880032837391, + "rewards/margins": 0.06020808592438698, + "rewards/rejected": -0.0688229650259018, + "step": 1683 + }, + { + "epoch": 1.1645919778699863, + "grad_norm": 6.314855575561523, + "learning_rate": 4.9085600122944525e-05, + "log_odds_chosen": 6.348881721496582, + "log_odds_ratio": -0.03455987200140953, + "logits/chosen": -0.7655620574951172, + "logits/rejected": -0.794634222984314, + "logps/chosen": -0.01900785230100155, + "logps/rejected": -1.014559030532837, + "loss": 3.2221, + "nll_loss": 0.8020722270011902, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001900785369798541, + "rewards/margins": 0.0995551124215126, + "rewards/rejected": -0.10145590454339981, + "step": 1684 + }, + { + "epoch": 1.165283540802213, + "grad_norm": 6.513770580291748, + "learning_rate": 4.908175810665438e-05, + "log_odds_chosen": 4.891922473907471, + "log_odds_ratio": -0.09184511005878448, + "logits/chosen": -0.4809439778327942, + "logits/rejected": -0.5717316269874573, + "logps/chosen": -0.06514959037303925, + "logps/rejected": -0.9885765314102173, + "loss": 4.2231, + "nll_loss": 1.0465795993804932, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006514958571642637, + "rewards/margins": 0.09234270453453064, + "rewards/rejected": -0.09885765612125397, + "step": 1685 + }, + { + "epoch": 1.16597510373444, + "grad_norm": 6.499868392944336, + "learning_rate": 4.907791609036422e-05, + "log_odds_chosen": 5.693251609802246, + "log_odds_ratio": -0.3279362618923187, + "logits/chosen": -0.574987530708313, + "logits/rejected": -0.6270314455032349, + "logps/chosen": -0.06349683552980423, + "logps/rejected": -0.9440611600875854, + "loss": 1.991, + "nll_loss": 0.4649551510810852, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006349683273583651, + "rewards/margins": 0.08805642277002335, + "rewards/rejected": -0.0944061130285263, + "step": 1686 + }, + { + "epoch": 1.1666666666666667, + "grad_norm": 7.496671676635742, + "learning_rate": 4.9074074074074075e-05, + "log_odds_chosen": 3.44323992729187, + "log_odds_ratio": -0.5197282433509827, + "logits/chosen": -0.415819376707077, + "logits/rejected": -0.40631169080734253, + "logps/chosen": -0.12665203213691711, + "logps/rejected": -0.8945168256759644, + "loss": 3.9831, + "nll_loss": 0.9438073039054871, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01266520470380783, + "rewards/margins": 0.07678647339344025, + "rewards/rejected": -0.08945168554782867, + "step": 1687 + }, + { + "epoch": 1.1673582295988936, + "grad_norm": 5.101566314697266, + "learning_rate": 4.907023205778393e-05, + "log_odds_chosen": 3.5672669410705566, + "log_odds_ratio": -0.20680952072143555, + "logits/chosen": -0.42624789476394653, + "logits/rejected": -0.466632604598999, + "logps/chosen": -0.1136215478181839, + "logps/rejected": -0.874742329120636, + "loss": 3.3617, + "nll_loss": 0.8197535276412964, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011362154968082905, + "rewards/margins": 0.07611207664012909, + "rewards/rejected": -0.08747424185276031, + "step": 1688 + }, + { + "epoch": 1.1680497925311204, + "grad_norm": 6.7860283851623535, + "learning_rate": 4.9066390041493773e-05, + "log_odds_chosen": 4.040188312530518, + "log_odds_ratio": -0.37084370851516724, + "logits/chosen": -0.4637543261051178, + "logits/rejected": -0.5015013813972473, + "logps/chosen": -0.1526714712381363, + "logps/rejected": -0.9633198976516724, + "loss": 4.2893, + "nll_loss": 1.0352481603622437, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.015267147682607174, + "rewards/margins": 0.08106484264135361, + "rewards/rejected": -0.09633199125528336, + "step": 1689 + }, + { + "epoch": 1.1687413554633472, + "grad_norm": 7.010456085205078, + "learning_rate": 4.906254802520363e-05, + "log_odds_chosen": 2.909613609313965, + "log_odds_ratio": -0.18302452564239502, + "logits/chosen": -0.15228617191314697, + "logits/rejected": -0.10386423766613007, + "logps/chosen": -0.09300586581230164, + "logps/rejected": -0.5666298270225525, + "loss": 4.2026, + "nll_loss": 1.0323392152786255, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009300586767494678, + "rewards/margins": 0.047362394630908966, + "rewards/rejected": -0.05666298419237137, + "step": 1690 + }, + { + "epoch": 1.169432918395574, + "grad_norm": 5.009253025054932, + "learning_rate": 4.905870600891348e-05, + "log_odds_chosen": 4.034602642059326, + "log_odds_ratio": -0.22675380110740662, + "logits/chosen": -0.3099116086959839, + "logits/rejected": -0.34099122881889343, + "logps/chosen": -0.1059737503528595, + "logps/rejected": -0.7704473733901978, + "loss": 3.7081, + "nll_loss": 0.9043589234352112, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010597375221550465, + "rewards/margins": 0.06644736230373383, + "rewards/rejected": -0.07704474031925201, + "step": 1691 + }, + { + "epoch": 1.170124481327801, + "grad_norm": 6.25744104385376, + "learning_rate": 4.905486399262333e-05, + "log_odds_chosen": 4.162661075592041, + "log_odds_ratio": -0.06848907470703125, + "logits/chosen": -0.5197206735610962, + "logits/rejected": -0.5505616664886475, + "logps/chosen": -0.03739035129547119, + "logps/rejected": -0.6413342952728271, + "loss": 4.7416, + "nll_loss": 1.1785529851913452, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003739034989848733, + "rewards/margins": 0.06039439141750336, + "rewards/rejected": -0.0641334280371666, + "step": 1692 + }, + { + "epoch": 1.1708160442600277, + "grad_norm": 10.99494457244873, + "learning_rate": 4.905102197633318e-05, + "log_odds_chosen": 5.198581218719482, + "log_odds_ratio": -0.0648670345544815, + "logits/chosen": -0.4666910767555237, + "logits/rejected": -0.47967618703842163, + "logps/chosen": -0.07335232943296432, + "logps/rejected": -0.8208720684051514, + "loss": 2.714, + "nll_loss": 0.6720219850540161, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00733523303642869, + "rewards/margins": 0.07475197315216064, + "rewards/rejected": -0.0820872038602829, + "step": 1693 + }, + { + "epoch": 1.1715076071922546, + "grad_norm": 4.8345441818237305, + "learning_rate": 4.9047179960043036e-05, + "log_odds_chosen": 2.570599317550659, + "log_odds_ratio": -0.496211975812912, + "logits/chosen": -0.5026839375495911, + "logits/rejected": -0.4709765911102295, + "logps/chosen": -0.16171272099018097, + "logps/rejected": -0.49773257970809937, + "loss": 2.9871, + "nll_loss": 0.6971441507339478, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.016171272844076157, + "rewards/margins": 0.03360198438167572, + "rewards/rejected": -0.04977325722575188, + "step": 1694 + }, + { + "epoch": 1.1721991701244814, + "grad_norm": 5.599493980407715, + "learning_rate": 4.904333794375288e-05, + "log_odds_chosen": 6.313642501831055, + "log_odds_ratio": -0.23186209797859192, + "logits/chosen": -0.17842333018779755, + "logits/rejected": -0.20784892141819, + "logps/chosen": -0.10879230499267578, + "logps/rejected": -0.7824175357818604, + "loss": 3.2663, + "nll_loss": 0.793390154838562, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010879230685532093, + "rewards/margins": 0.06736251711845398, + "rewards/rejected": -0.0782417505979538, + "step": 1695 + }, + { + "epoch": 1.1728907330567082, + "grad_norm": 4.865135192871094, + "learning_rate": 4.9039495927462734e-05, + "log_odds_chosen": 2.9153175354003906, + "log_odds_ratio": -0.29287028312683105, + "logits/chosen": -0.3575715720653534, + "logits/rejected": -0.3803892731666565, + "logps/chosen": -0.0807889997959137, + "logps/rejected": -0.5074627995491028, + "loss": 3.2277, + "nll_loss": 0.7776476740837097, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00807889923453331, + "rewards/margins": 0.04266738519072533, + "rewards/rejected": -0.050746284425258636, + "step": 1696 + }, + { + "epoch": 1.173582295988935, + "grad_norm": 5.336679935455322, + "learning_rate": 4.9035653911172586e-05, + "log_odds_chosen": 7.413576602935791, + "log_odds_ratio": -0.004513254389166832, + "logits/chosen": -0.46858909726142883, + "logits/rejected": -0.4611837863922119, + "logps/chosen": -0.01939975842833519, + "logps/rejected": -1.1275957822799683, + "loss": 3.077, + "nll_loss": 0.7687873840332031, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019399760058149695, + "rewards/margins": 0.11081959307193756, + "rewards/rejected": -0.11275956779718399, + "step": 1697 + }, + { + "epoch": 1.1742738589211619, + "grad_norm": 4.496361255645752, + "learning_rate": 4.903181189488243e-05, + "log_odds_chosen": 5.660953521728516, + "log_odds_ratio": -0.05301395803689957, + "logits/chosen": -0.27618223428726196, + "logits/rejected": -0.31661874055862427, + "logps/chosen": -0.02332213707268238, + "logps/rejected": -0.6976031064987183, + "loss": 2.9289, + "nll_loss": 0.726930558681488, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002332213567569852, + "rewards/margins": 0.0674280971288681, + "rewards/rejected": -0.06976031512022018, + "step": 1698 + }, + { + "epoch": 1.1749654218533887, + "grad_norm": 6.71616268157959, + "learning_rate": 4.902796987859229e-05, + "log_odds_chosen": 2.6607582569122314, + "log_odds_ratio": -0.28555721044540405, + "logits/chosen": -0.7132636904716492, + "logits/rejected": -0.7477965354919434, + "logps/chosen": -0.15023541450500488, + "logps/rejected": -0.6951221227645874, + "loss": 4.5801, + "nll_loss": 1.1164746284484863, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.015023542568087578, + "rewards/margins": 0.05448867008090019, + "rewards/rejected": -0.06951221823692322, + "step": 1699 + }, + { + "epoch": 1.1756569847856155, + "grad_norm": 5.5194783210754395, + "learning_rate": 4.902412786230214e-05, + "log_odds_chosen": 2.8025269508361816, + "log_odds_ratio": -0.5562505722045898, + "logits/chosen": -0.6272051334381104, + "logits/rejected": -0.6220443248748779, + "logps/chosen": -0.1548004150390625, + "logps/rejected": -0.5177695751190186, + "loss": 3.2813, + "nll_loss": 0.7647000551223755, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01548004150390625, + "rewards/margins": 0.036296918988227844, + "rewards/rejected": -0.051776956766843796, + "step": 1700 + }, + { + "epoch": 1.1763485477178424, + "grad_norm": 4.19633150100708, + "learning_rate": 4.902028584601199e-05, + "log_odds_chosen": 5.554427146911621, + "log_odds_ratio": -0.12019255757331848, + "logits/chosen": -0.19540315866470337, + "logits/rejected": -0.2124204933643341, + "logps/chosen": -0.039816707372665405, + "logps/rejected": -1.157930612564087, + "loss": 3.2301, + "nll_loss": 0.7955155968666077, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003981670830398798, + "rewards/margins": 0.11181138455867767, + "rewards/rejected": -0.11579305678606033, + "step": 1701 + }, + { + "epoch": 1.1770401106500692, + "grad_norm": 4.260463237762451, + "learning_rate": 4.901644382972184e-05, + "log_odds_chosen": 5.628140449523926, + "log_odds_ratio": -0.08788633346557617, + "logits/chosen": -0.7331647872924805, + "logits/rejected": -0.7790195345878601, + "logps/chosen": -0.025510985404253006, + "logps/rejected": -0.7099208235740662, + "loss": 2.6703, + "nll_loss": 0.6587742567062378, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0025510985869914293, + "rewards/margins": 0.0684409886598587, + "rewards/rejected": -0.07099208235740662, + "step": 1702 + }, + { + "epoch": 1.177731673582296, + "grad_norm": 5.894955158233643, + "learning_rate": 4.9012601813431694e-05, + "log_odds_chosen": 1.5804680585861206, + "log_odds_ratio": -0.6186449527740479, + "logits/chosen": -0.4635010361671448, + "logits/rejected": -0.5034515857696533, + "logps/chosen": -0.1285967230796814, + "logps/rejected": -0.4574553966522217, + "loss": 3.4464, + "nll_loss": 0.7997399568557739, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01285967230796814, + "rewards/margins": 0.032885871827602386, + "rewards/rejected": -0.045745544135570526, + "step": 1703 + }, + { + "epoch": 1.1784232365145229, + "grad_norm": 4.228522777557373, + "learning_rate": 4.900875979714154e-05, + "log_odds_chosen": 5.511290550231934, + "log_odds_ratio": -0.052493322640657425, + "logits/chosen": -0.354582816362381, + "logits/rejected": -0.3201233744621277, + "logps/chosen": -0.045021433383226395, + "logps/rejected": -0.7105008959770203, + "loss": 2.8926, + "nll_loss": 0.7179022431373596, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004502143245190382, + "rewards/margins": 0.0665479451417923, + "rewards/rejected": -0.07105008512735367, + "step": 1704 + }, + { + "epoch": 1.1791147994467497, + "grad_norm": 6.06090784072876, + "learning_rate": 4.900491778085139e-05, + "log_odds_chosen": 4.680421352386475, + "log_odds_ratio": -0.04448583722114563, + "logits/chosen": -0.6964292526245117, + "logits/rejected": -0.6882299780845642, + "logps/chosen": -0.027521885931491852, + "logps/rejected": -0.6695336699485779, + "loss": 3.5434, + "nll_loss": 0.881403386592865, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002752188825979829, + "rewards/margins": 0.06420118361711502, + "rewards/rejected": -0.06695336848497391, + "step": 1705 + }, + { + "epoch": 1.1798063623789765, + "grad_norm": 5.982486248016357, + "learning_rate": 4.9001075764561245e-05, + "log_odds_chosen": 4.029968738555908, + "log_odds_ratio": -0.15640224516391754, + "logits/chosen": -0.42293068766593933, + "logits/rejected": -0.5107755661010742, + "logps/chosen": -0.08865299820899963, + "logps/rejected": -0.9007152318954468, + "loss": 3.6548, + "nll_loss": 0.898070752620697, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008865299634635448, + "rewards/margins": 0.08120621740818024, + "rewards/rejected": -0.09007152169942856, + "step": 1706 + }, + { + "epoch": 1.1804979253112033, + "grad_norm": 8.436861991882324, + "learning_rate": 4.899723374827109e-05, + "log_odds_chosen": 3.1094229221343994, + "log_odds_ratio": -0.23974156379699707, + "logits/chosen": -0.6728495955467224, + "logits/rejected": -0.7456769943237305, + "logps/chosen": -0.07639665901660919, + "logps/rejected": -0.45884567499160767, + "loss": 4.2164, + "nll_loss": 1.0301259756088257, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007639665622264147, + "rewards/margins": 0.03824490308761597, + "rewards/rejected": -0.045884571969509125, + "step": 1707 + }, + { + "epoch": 1.1811894882434302, + "grad_norm": 6.517073154449463, + "learning_rate": 4.899339173198095e-05, + "log_odds_chosen": 4.08290433883667, + "log_odds_ratio": -0.45967811346054077, + "logits/chosen": -0.3719734251499176, + "logits/rejected": -0.4058666527271271, + "logps/chosen": -0.04212799295783043, + "logps/rejected": -0.5582305192947388, + "loss": 3.415, + "nll_loss": 0.8077764511108398, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.004212799482047558, + "rewards/margins": 0.051610250025987625, + "rewards/rejected": -0.05582305043935776, + "step": 1708 + }, + { + "epoch": 1.181881051175657, + "grad_norm": 6.491971969604492, + "learning_rate": 4.8989549715690795e-05, + "log_odds_chosen": 2.971602201461792, + "log_odds_ratio": -0.25535744428634644, + "logits/chosen": -0.6482892036437988, + "logits/rejected": -0.642250657081604, + "logps/chosen": -0.11500442028045654, + "logps/rejected": -0.7281869053840637, + "loss": 3.9964, + "nll_loss": 0.9735685586929321, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011500442400574684, + "rewards/margins": 0.06131824851036072, + "rewards/rejected": -0.07281868904829025, + "step": 1709 + }, + { + "epoch": 1.1825726141078838, + "grad_norm": 4.486275672912598, + "learning_rate": 4.898570769940065e-05, + "log_odds_chosen": 5.068606376647949, + "log_odds_ratio": -0.21615208685398102, + "logits/chosen": -0.7731133699417114, + "logits/rejected": -0.7959321737289429, + "logps/chosen": -0.10496488958597183, + "logps/rejected": -0.5765972137451172, + "loss": 3.7558, + "nll_loss": 0.9173317551612854, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010496489703655243, + "rewards/margins": 0.047163236886262894, + "rewards/rejected": -0.05765972286462784, + "step": 1710 + }, + { + "epoch": 1.1832641770401107, + "grad_norm": 6.759716987609863, + "learning_rate": 4.89818656831105e-05, + "log_odds_chosen": 4.021801948547363, + "log_odds_ratio": -0.2717605531215668, + "logits/chosen": -0.3650795817375183, + "logits/rejected": -0.4151354432106018, + "logps/chosen": -0.06702595949172974, + "logps/rejected": -0.6509315967559814, + "loss": 3.7085, + "nll_loss": 0.899939775466919, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.006702595390379429, + "rewards/margins": 0.05839055776596069, + "rewards/rejected": -0.06509315967559814, + "step": 1711 + }, + { + "epoch": 1.1839557399723375, + "grad_norm": 5.515722751617432, + "learning_rate": 4.897802366682035e-05, + "log_odds_chosen": 2.76090145111084, + "log_odds_ratio": -0.47430720925331116, + "logits/chosen": -0.1472322642803192, + "logits/rejected": -0.19560343027114868, + "logps/chosen": -0.09127159416675568, + "logps/rejected": -0.47359779477119446, + "loss": 3.639, + "nll_loss": 0.8623095154762268, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009127158671617508, + "rewards/margins": 0.03823261708021164, + "rewards/rejected": -0.047359779477119446, + "step": 1712 + }, + { + "epoch": 1.1846473029045643, + "grad_norm": 5.288144111633301, + "learning_rate": 4.89741816505302e-05, + "log_odds_chosen": 4.28141975402832, + "log_odds_ratio": -0.12948086857795715, + "logits/chosen": 0.026375465095043182, + "logits/rejected": 0.011478882282972336, + "logps/chosen": -0.07874485850334167, + "logps/rejected": -0.8107168674468994, + "loss": 2.7414, + "nll_loss": 0.6723971366882324, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007874486967921257, + "rewards/margins": 0.07319720089435577, + "rewards/rejected": -0.08107168227434158, + "step": 1713 + }, + { + "epoch": 1.1853388658367912, + "grad_norm": 5.372937202453613, + "learning_rate": 4.897033963424005e-05, + "log_odds_chosen": 2.137545585632324, + "log_odds_ratio": -0.44550761580467224, + "logits/chosen": -0.6334435939788818, + "logits/rejected": -0.6503807902336121, + "logps/chosen": -0.10513751208782196, + "logps/rejected": -0.3197248876094818, + "loss": 3.7874, + "nll_loss": 0.9022948741912842, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010513750836253166, + "rewards/margins": 0.021458739414811134, + "rewards/rejected": -0.031972486525774, + "step": 1714 + }, + { + "epoch": 1.186030428769018, + "grad_norm": 4.750962734222412, + "learning_rate": 4.89664976179499e-05, + "log_odds_chosen": 3.8123714923858643, + "log_odds_ratio": -0.2624880075454712, + "logits/chosen": -0.5540913343429565, + "logits/rejected": -0.5961364507675171, + "logps/chosen": -0.08316250890493393, + "logps/rejected": -0.7523815631866455, + "loss": 3.0809, + "nll_loss": 0.7439756989479065, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008316251449286938, + "rewards/margins": 0.0669219046831131, + "rewards/rejected": -0.07523815333843231, + "step": 1715 + }, + { + "epoch": 1.1867219917012448, + "grad_norm": 8.46352481842041, + "learning_rate": 4.896265560165975e-05, + "log_odds_chosen": 5.548409938812256, + "log_odds_ratio": -0.22372980415821075, + "logits/chosen": -0.20582397282123566, + "logits/rejected": -0.1781938374042511, + "logps/chosen": -0.06882276386022568, + "logps/rejected": -0.8869989514350891, + "loss": 4.0499, + "nll_loss": 0.9900984764099121, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006882276386022568, + "rewards/margins": 0.0818176195025444, + "rewards/rejected": -0.08869989961385727, + "step": 1716 + }, + { + "epoch": 1.1874135546334716, + "grad_norm": 5.552541732788086, + "learning_rate": 4.895881358536961e-05, + "log_odds_chosen": 5.818368911743164, + "log_odds_ratio": -0.052378714084625244, + "logits/chosen": -0.19172891974449158, + "logits/rejected": -0.352263867855072, + "logps/chosen": -0.016883784905076027, + "logps/rejected": -0.8089572191238403, + "loss": 2.9399, + "nll_loss": 0.7297303080558777, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016883786302059889, + "rewards/margins": 0.07920734584331512, + "rewards/rejected": -0.08089572191238403, + "step": 1717 + }, + { + "epoch": 1.1881051175656985, + "grad_norm": 5.634227275848389, + "learning_rate": 4.8954971569079454e-05, + "log_odds_chosen": 4.411200046539307, + "log_odds_ratio": -0.13465569913387299, + "logits/chosen": -0.39296990633010864, + "logits/rejected": -0.44110405445098877, + "logps/chosen": -0.041829999536275864, + "logps/rejected": -0.8756586909294128, + "loss": 3.3538, + "nll_loss": 0.8249937891960144, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004182999953627586, + "rewards/margins": 0.08338287472724915, + "rewards/rejected": -0.08756587654352188, + "step": 1718 + }, + { + "epoch": 1.1887966804979253, + "grad_norm": 6.191216468811035, + "learning_rate": 4.8951129552789306e-05, + "log_odds_chosen": 4.064810752868652, + "log_odds_ratio": -0.2015867829322815, + "logits/chosen": -0.4229384958744049, + "logits/rejected": -0.4340212047100067, + "logps/chosen": -0.04063744470477104, + "logps/rejected": -0.73988938331604, + "loss": 2.6727, + "nll_loss": 0.6480090618133545, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004063744563609362, + "rewards/margins": 0.0699252039194107, + "rewards/rejected": -0.07398894429206848, + "step": 1719 + }, + { + "epoch": 1.1894882434301521, + "grad_norm": 3.5908620357513428, + "learning_rate": 4.894728753649916e-05, + "log_odds_chosen": 4.018005847930908, + "log_odds_ratio": -0.30734917521476746, + "logits/chosen": -0.5695242881774902, + "logits/rejected": -0.5691094994544983, + "logps/chosen": -0.08586877584457397, + "logps/rejected": -0.659201979637146, + "loss": 2.8098, + "nll_loss": 0.6717254519462585, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008586877025663853, + "rewards/margins": 0.057333312928676605, + "rewards/rejected": -0.06592019647359848, + "step": 1720 + }, + { + "epoch": 1.190179806362379, + "grad_norm": 5.031204700469971, + "learning_rate": 4.894344552020901e-05, + "log_odds_chosen": 3.613013505935669, + "log_odds_ratio": -0.14017079770565033, + "logits/chosen": -0.6082075834274292, + "logits/rejected": -0.640134871006012, + "logps/chosen": -0.06326667964458466, + "logps/rejected": -0.5767212510108948, + "loss": 2.7309, + "nll_loss": 0.6687150001525879, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006326667964458466, + "rewards/margins": 0.05134545639157295, + "rewards/rejected": -0.05767212063074112, + "step": 1721 + }, + { + "epoch": 1.1908713692946058, + "grad_norm": 4.486302852630615, + "learning_rate": 4.8939603503918857e-05, + "log_odds_chosen": 3.342613697052002, + "log_odds_ratio": -0.20768174529075623, + "logits/chosen": -0.2848474383354187, + "logits/rejected": -0.29988640546798706, + "logps/chosen": -0.09562725573778152, + "logps/rejected": -0.66987544298172, + "loss": 2.6715, + "nll_loss": 0.6471147537231445, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009562725201249123, + "rewards/margins": 0.057424817234277725, + "rewards/rejected": -0.066987544298172, + "step": 1722 + }, + { + "epoch": 1.1915629322268326, + "grad_norm": 8.49174690246582, + "learning_rate": 4.893576148762871e-05, + "log_odds_chosen": 3.0292856693267822, + "log_odds_ratio": -0.30213072896003723, + "logits/chosen": -0.6463524103164673, + "logits/rejected": -0.6709821224212646, + "logps/chosen": -0.15487344563007355, + "logps/rejected": -0.8251013159751892, + "loss": 4.1571, + "nll_loss": 1.0090569257736206, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.015487344935536385, + "rewards/margins": 0.06702278554439545, + "rewards/rejected": -0.08251012861728668, + "step": 1723 + }, + { + "epoch": 1.1922544951590595, + "grad_norm": 7.511002063751221, + "learning_rate": 4.893191947133856e-05, + "log_odds_chosen": 4.571445465087891, + "log_odds_ratio": -0.4141373634338379, + "logits/chosen": -0.4878627359867096, + "logits/rejected": -0.5303633809089661, + "logps/chosen": -0.248141810297966, + "logps/rejected": -0.6373559832572937, + "loss": 3.8155, + "nll_loss": 0.9124590158462524, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.02481417916715145, + "rewards/margins": 0.03892141580581665, + "rewards/rejected": -0.06373559683561325, + "step": 1724 + }, + { + "epoch": 1.1929460580912863, + "grad_norm": 7.655296802520752, + "learning_rate": 4.892807745504841e-05, + "log_odds_chosen": 3.3585290908813477, + "log_odds_ratio": -0.39322608709335327, + "logits/chosen": -0.6439844965934753, + "logits/rejected": -0.6731945276260376, + "logps/chosen": -0.12331248819828033, + "logps/rejected": -1.0248024463653564, + "loss": 5.2115, + "nll_loss": 1.2635518312454224, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012331248261034489, + "rewards/margins": 0.09014899283647537, + "rewards/rejected": -0.10248024016618729, + "step": 1725 + }, + { + "epoch": 1.1936376210235131, + "grad_norm": 5.599480152130127, + "learning_rate": 4.8924235438758266e-05, + "log_odds_chosen": 5.788207530975342, + "log_odds_ratio": -0.25669771432876587, + "logits/chosen": -0.7034620046615601, + "logits/rejected": -0.7278566360473633, + "logps/chosen": -0.03290760889649391, + "logps/rejected": -0.6548459529876709, + "loss": 3.2463, + "nll_loss": 0.7858980298042297, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0032907610293477774, + "rewards/margins": 0.06219382956624031, + "rewards/rejected": -0.06548459082841873, + "step": 1726 + }, + { + "epoch": 1.19432918395574, + "grad_norm": 8.53808879852295, + "learning_rate": 4.892039342246811e-05, + "log_odds_chosen": 4.921740531921387, + "log_odds_ratio": -0.2440432608127594, + "logits/chosen": -0.38131314516067505, + "logits/rejected": -0.44898858666419983, + "logps/chosen": -0.12483307719230652, + "logps/rejected": -1.2572139501571655, + "loss": 4.7227, + "nll_loss": 1.1562637090682983, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.012483308091759682, + "rewards/margins": 0.11323809623718262, + "rewards/rejected": -0.12572139501571655, + "step": 1727 + }, + { + "epoch": 1.1950207468879668, + "grad_norm": 7.684170722961426, + "learning_rate": 4.8916551406177964e-05, + "log_odds_chosen": 5.221076965332031, + "log_odds_ratio": -0.22764191031455994, + "logits/chosen": -0.4904865622520447, + "logits/rejected": -0.4865496754646301, + "logps/chosen": -0.08739637583494186, + "logps/rejected": -0.7149273157119751, + "loss": 4.4827, + "nll_loss": 1.0979145765304565, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008739637210965157, + "rewards/margins": 0.0627530962228775, + "rewards/rejected": -0.0714927390217781, + "step": 1728 + }, + { + "epoch": 1.1957123098201936, + "grad_norm": 4.598145961761475, + "learning_rate": 4.891270938988782e-05, + "log_odds_chosen": 4.581998825073242, + "log_odds_ratio": -0.14316655695438385, + "logits/chosen": -0.6146881580352783, + "logits/rejected": -0.6451320648193359, + "logps/chosen": -0.03969764709472656, + "logps/rejected": -0.9337931871414185, + "loss": 3.057, + "nll_loss": 0.7499375343322754, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003969764802604914, + "rewards/margins": 0.08940955996513367, + "rewards/rejected": -0.09337931871414185, + "step": 1729 + }, + { + "epoch": 1.1964038727524204, + "grad_norm": 5.302453517913818, + "learning_rate": 4.890886737359767e-05, + "log_odds_chosen": 2.8931965827941895, + "log_odds_ratio": -0.29582732915878296, + "logits/chosen": -0.23103663325309753, + "logits/rejected": -0.19957208633422852, + "logps/chosen": -0.09102919697761536, + "logps/rejected": -0.4125361740589142, + "loss": 3.5228, + "nll_loss": 0.8511098027229309, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009102920070290565, + "rewards/margins": 0.03215069696307182, + "rewards/rejected": -0.04125361889600754, + "step": 1730 + }, + { + "epoch": 1.1970954356846473, + "grad_norm": 3.398452043533325, + "learning_rate": 4.8905025357307515e-05, + "log_odds_chosen": 4.475583553314209, + "log_odds_ratio": -0.29882586002349854, + "logits/chosen": -0.21776120364665985, + "logits/rejected": -0.20349350571632385, + "logps/chosen": -0.10076668113470078, + "logps/rejected": -0.5111818909645081, + "loss": 2.7408, + "nll_loss": 0.6553149819374084, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010076668113470078, + "rewards/margins": 0.04104151949286461, + "rewards/rejected": -0.051118187606334686, + "step": 1731 + }, + { + "epoch": 1.197786998616874, + "grad_norm": 3.415905714035034, + "learning_rate": 4.890118334101737e-05, + "log_odds_chosen": 7.551507949829102, + "log_odds_ratio": -0.020409587770700455, + "logits/chosen": -0.6365310549736023, + "logits/rejected": -0.6799354553222656, + "logps/chosen": -0.01089848019182682, + "logps/rejected": -0.8678996562957764, + "loss": 2.3784, + "nll_loss": 0.5925613641738892, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010898481123149395, + "rewards/margins": 0.08570010960102081, + "rewards/rejected": -0.08678996562957764, + "step": 1732 + }, + { + "epoch": 1.198478561549101, + "grad_norm": 5.842837810516357, + "learning_rate": 4.889734132472722e-05, + "log_odds_chosen": 4.548971176147461, + "log_odds_ratio": -0.4501718282699585, + "logits/chosen": -0.5090807676315308, + "logits/rejected": -0.524118185043335, + "logps/chosen": -0.1523047387599945, + "logps/rejected": -0.854877769947052, + "loss": 2.9738, + "nll_loss": 0.6984264254570007, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.015230474062263966, + "rewards/margins": 0.07025731354951859, + "rewards/rejected": -0.08548778295516968, + "step": 1733 + }, + { + "epoch": 1.1991701244813278, + "grad_norm": 5.5528082847595215, + "learning_rate": 4.8893499308437066e-05, + "log_odds_chosen": 4.443816184997559, + "log_odds_ratio": -0.20689380168914795, + "logits/chosen": -0.1926521509885788, + "logits/rejected": -0.238718181848526, + "logps/chosen": -0.06474921107292175, + "logps/rejected": -0.8699467182159424, + "loss": 3.2818, + "nll_loss": 0.799752950668335, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006474921479821205, + "rewards/margins": 0.08051975071430206, + "rewards/rejected": -0.08699467033147812, + "step": 1734 + }, + { + "epoch": 1.1998616874135546, + "grad_norm": 6.228503227233887, + "learning_rate": 4.8889657292146925e-05, + "log_odds_chosen": 3.841547727584839, + "log_odds_ratio": -0.2785239517688751, + "logits/chosen": -0.6458946466445923, + "logits/rejected": -0.6550995707511902, + "logps/chosen": -0.10351575911045074, + "logps/rejected": -0.9910328388214111, + "loss": 4.1892, + "nll_loss": 1.019450068473816, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010351575911045074, + "rewards/margins": 0.08875171840190887, + "rewards/rejected": -0.09910327941179276, + "step": 1735 + }, + { + "epoch": 1.2005532503457814, + "grad_norm": 8.065655708312988, + "learning_rate": 4.888581527585677e-05, + "log_odds_chosen": 4.7300567626953125, + "log_odds_ratio": -0.24043934047222137, + "logits/chosen": -0.40952181816101074, + "logits/rejected": -0.43163684010505676, + "logps/chosen": -0.12518151104450226, + "logps/rejected": -1.2267175912857056, + "loss": 2.9163, + "nll_loss": 0.7050365209579468, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012518150731921196, + "rewards/margins": 0.11015360057353973, + "rewards/rejected": -0.12267175316810608, + "step": 1736 + }, + { + "epoch": 1.2012448132780082, + "grad_norm": 6.8196282386779785, + "learning_rate": 4.888197325956662e-05, + "log_odds_chosen": 4.278225898742676, + "log_odds_ratio": -0.2729211151599884, + "logits/chosen": -0.6910278797149658, + "logits/rejected": -0.7113723158836365, + "logps/chosen": -0.09786369651556015, + "logps/rejected": -0.619482159614563, + "loss": 3.8214, + "nll_loss": 0.9280692338943481, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009786369279026985, + "rewards/margins": 0.05216185003519058, + "rewards/rejected": -0.06194822117686272, + "step": 1737 + }, + { + "epoch": 1.201936376210235, + "grad_norm": 5.485548973083496, + "learning_rate": 4.8878131243276475e-05, + "log_odds_chosen": 2.264752149581909, + "log_odds_ratio": -0.496864378452301, + "logits/chosen": -0.3320154547691345, + "logits/rejected": -0.31849053502082825, + "logps/chosen": -0.13289959728717804, + "logps/rejected": -0.7237868905067444, + "loss": 2.6569, + "nll_loss": 0.6145286560058594, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.013289961032569408, + "rewards/margins": 0.059088729321956635, + "rewards/rejected": -0.07237869501113892, + "step": 1738 + }, + { + "epoch": 1.202627939142462, + "grad_norm": 6.142898082733154, + "learning_rate": 4.887428922698633e-05, + "log_odds_chosen": 3.9606635570526123, + "log_odds_ratio": -0.31309106945991516, + "logits/chosen": -0.5523556470870972, + "logits/rejected": -0.635855495929718, + "logps/chosen": -0.08076249063014984, + "logps/rejected": -0.8161361217498779, + "loss": 3.7155, + "nll_loss": 0.8975592255592346, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008076248690485954, + "rewards/margins": 0.07353736460208893, + "rewards/rejected": -0.08161361515522003, + "step": 1739 + }, + { + "epoch": 1.2033195020746887, + "grad_norm": 5.83258056640625, + "learning_rate": 4.8870447210696173e-05, + "log_odds_chosen": 2.0771408081054688, + "log_odds_ratio": -0.33731359243392944, + "logits/chosen": -0.679516077041626, + "logits/rejected": -0.6932682991027832, + "logps/chosen": -0.11285239458084106, + "logps/rejected": -0.41165733337402344, + "loss": 3.7447, + "nll_loss": 0.902442455291748, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011285239830613136, + "rewards/margins": 0.029880493879318237, + "rewards/rejected": -0.041165731847286224, + "step": 1740 + }, + { + "epoch": 1.2040110650069156, + "grad_norm": 5.038331508636475, + "learning_rate": 4.8866605194406026e-05, + "log_odds_chosen": 6.890632629394531, + "log_odds_ratio": -0.0032947673462331295, + "logits/chosen": -0.5037215948104858, + "logits/rejected": -0.526785135269165, + "logps/chosen": -0.007296360097825527, + "logps/rejected": -1.105158805847168, + "loss": 3.131, + "nll_loss": 0.7824151515960693, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007296359981410205, + "rewards/margins": 0.10978624224662781, + "rewards/rejected": -0.11051587760448456, + "step": 1741 + }, + { + "epoch": 1.2047026279391424, + "grad_norm": 5.662741184234619, + "learning_rate": 4.886276317811588e-05, + "log_odds_chosen": 3.73443603515625, + "log_odds_ratio": -0.10789938271045685, + "logits/chosen": -0.44654685258865356, + "logits/rejected": -0.4576447606086731, + "logps/chosen": -0.07846559584140778, + "logps/rejected": -0.8716351389884949, + "loss": 2.8468, + "nll_loss": 0.7009090185165405, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007846559397876263, + "rewards/margins": 0.07931695878505707, + "rewards/rejected": -0.0871635228395462, + "step": 1742 + }, + { + "epoch": 1.2053941908713692, + "grad_norm": 5.150761127471924, + "learning_rate": 4.8858921161825724e-05, + "log_odds_chosen": 2.4600987434387207, + "log_odds_ratio": -0.3202195167541504, + "logits/chosen": -0.2952830493450165, + "logits/rejected": -0.27832943201065063, + "logps/chosen": -0.10139881074428558, + "logps/rejected": -0.6176132559776306, + "loss": 3.2098, + "nll_loss": 0.7704358100891113, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010139882564544678, + "rewards/margins": 0.0516214445233345, + "rewards/rejected": -0.06176132708787918, + "step": 1743 + }, + { + "epoch": 1.206085753803596, + "grad_norm": 6.006669044494629, + "learning_rate": 4.885507914553558e-05, + "log_odds_chosen": 7.206621170043945, + "log_odds_ratio": -0.11751189827919006, + "logits/chosen": -0.11540503799915314, + "logits/rejected": -0.22454169392585754, + "logps/chosen": -0.04032893851399422, + "logps/rejected": -1.2910562753677368, + "loss": 4.2113, + "nll_loss": 1.04107666015625, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004032894037663937, + "rewards/margins": 0.12507273256778717, + "rewards/rejected": -0.12910562753677368, + "step": 1744 + }, + { + "epoch": 1.206777316735823, + "grad_norm": 4.959017276763916, + "learning_rate": 4.885123712924543e-05, + "log_odds_chosen": 1.8773472309112549, + "log_odds_ratio": -0.47992217540740967, + "logits/chosen": -0.6515315175056458, + "logits/rejected": -0.6257120966911316, + "logps/chosen": -0.17926810681819916, + "logps/rejected": -0.661084771156311, + "loss": 3.5644, + "nll_loss": 0.8431035280227661, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.017926812171936035, + "rewards/margins": 0.04818166047334671, + "rewards/rejected": -0.06610847264528275, + "step": 1745 + }, + { + "epoch": 1.2074688796680497, + "grad_norm": 8.701241493225098, + "learning_rate": 4.884739511295528e-05, + "log_odds_chosen": 4.010853290557861, + "log_odds_ratio": -0.11723016947507858, + "logits/chosen": -0.568061888217926, + "logits/rejected": -0.6578323841094971, + "logps/chosen": -0.07474225759506226, + "logps/rejected": -0.9790551662445068, + "loss": 4.6718, + "nll_loss": 1.1562221050262451, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007474225480109453, + "rewards/margins": 0.09043128788471222, + "rewards/rejected": -0.09790551662445068, + "step": 1746 + }, + { + "epoch": 1.2081604426002766, + "grad_norm": 5.493067264556885, + "learning_rate": 4.8843553096665134e-05, + "log_odds_chosen": 4.669642925262451, + "log_odds_ratio": -0.23646828532218933, + "logits/chosen": -0.3074951171875, + "logits/rejected": -0.3432926833629608, + "logps/chosen": -0.06846563518047333, + "logps/rejected": -0.9051128625869751, + "loss": 3.3922, + "nll_loss": 0.824414074420929, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006846563890576363, + "rewards/margins": 0.08366473019123077, + "rewards/rejected": -0.09051129221916199, + "step": 1747 + }, + { + "epoch": 1.2088520055325034, + "grad_norm": 8.25934886932373, + "learning_rate": 4.8839711080374986e-05, + "log_odds_chosen": 3.223919630050659, + "log_odds_ratio": -0.21303078532218933, + "logits/chosen": -0.42224496603012085, + "logits/rejected": -0.43888431787490845, + "logps/chosen": -0.08300723135471344, + "logps/rejected": -0.7980213165283203, + "loss": 3.9078, + "nll_loss": 0.9556349515914917, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0083007225766778, + "rewards/margins": 0.07150140404701233, + "rewards/rejected": -0.07980212569236755, + "step": 1748 + }, + { + "epoch": 1.2095435684647302, + "grad_norm": 8.729022979736328, + "learning_rate": 4.883586906408483e-05, + "log_odds_chosen": 4.861759185791016, + "log_odds_ratio": -0.23737916350364685, + "logits/chosen": -0.32016968727111816, + "logits/rejected": -0.3375993072986603, + "logps/chosen": -0.14943954348564148, + "logps/rejected": -0.7811824679374695, + "loss": 3.8816, + "nll_loss": 0.9466730952262878, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.014943954534828663, + "rewards/margins": 0.0631742924451828, + "rewards/rejected": -0.07811824232339859, + "step": 1749 + }, + { + "epoch": 1.210235131396957, + "grad_norm": 5.598297595977783, + "learning_rate": 4.8832027047794684e-05, + "log_odds_chosen": 3.8768386840820312, + "log_odds_ratio": -0.2456800788640976, + "logits/chosen": -0.5290508270263672, + "logits/rejected": -0.5527206659317017, + "logps/chosen": -0.10265764594078064, + "logps/rejected": -0.6469843983650208, + "loss": 3.3323, + "nll_loss": 0.8085078001022339, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010265765711665154, + "rewards/margins": 0.054432667791843414, + "rewards/rejected": -0.06469843536615372, + "step": 1750 + }, + { + "epoch": 1.2109266943291839, + "grad_norm": 6.770428657531738, + "learning_rate": 4.882818503150454e-05, + "log_odds_chosen": 2.9814016819000244, + "log_odds_ratio": -0.2961695194244385, + "logits/chosen": -0.690618634223938, + "logits/rejected": -0.6883652806282043, + "logps/chosen": -0.1316520869731903, + "logps/rejected": -0.7826559543609619, + "loss": 4.3335, + "nll_loss": 1.0537523031234741, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01316520944237709, + "rewards/margins": 0.06510038673877716, + "rewards/rejected": -0.07826559990644455, + "step": 1751 + }, + { + "epoch": 1.2116182572614107, + "grad_norm": 6.940248966217041, + "learning_rate": 4.882434301521438e-05, + "log_odds_chosen": 0.9520793557167053, + "log_odds_ratio": -0.5022197961807251, + "logits/chosen": -0.45700353384017944, + "logits/rejected": -0.41526639461517334, + "logps/chosen": -0.19162046909332275, + "logps/rejected": -0.46913477778434753, + "loss": 4.8986, + "nll_loss": 1.174436330795288, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.019162047654390335, + "rewards/margins": 0.027751432731747627, + "rewards/rejected": -0.04691348224878311, + "step": 1752 + }, + { + "epoch": 1.2123098201936375, + "grad_norm": 12.57465648651123, + "learning_rate": 4.882050099892424e-05, + "log_odds_chosen": 1.6119085550308228, + "log_odds_ratio": -1.0399413108825684, + "logits/chosen": -0.6990538835525513, + "logits/rejected": -0.6973117589950562, + "logps/chosen": -0.2371227741241455, + "logps/rejected": -0.3703542649745941, + "loss": 4.8505, + "nll_loss": 1.1086349487304688, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.02371227741241455, + "rewards/margins": 0.013323148712515831, + "rewards/rejected": -0.03703542798757553, + "step": 1753 + }, + { + "epoch": 1.2130013831258644, + "grad_norm": 6.856281757354736, + "learning_rate": 4.881665898263409e-05, + "log_odds_chosen": 3.3835794925689697, + "log_odds_ratio": -0.37793684005737305, + "logits/chosen": -0.6199455261230469, + "logits/rejected": -0.6514655351638794, + "logps/chosen": -0.12874896824359894, + "logps/rejected": -0.5390415191650391, + "loss": 3.995, + "nll_loss": 0.9609636664390564, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012874897569417953, + "rewards/margins": 0.04102925583720207, + "rewards/rejected": -0.053904157131910324, + "step": 1754 + }, + { + "epoch": 1.2136929460580912, + "grad_norm": 5.524672508239746, + "learning_rate": 4.881281696634394e-05, + "log_odds_chosen": 5.13658332824707, + "log_odds_ratio": -0.03698335960507393, + "logits/chosen": -0.44332507252693176, + "logits/rejected": -0.4719582200050354, + "logps/chosen": -0.053218670189380646, + "logps/rejected": -0.8304323554039001, + "loss": 2.7617, + "nll_loss": 0.6867244243621826, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005321866367012262, + "rewards/margins": 0.07772136479616165, + "rewards/rejected": -0.08304324001073837, + "step": 1755 + }, + { + "epoch": 1.214384508990318, + "grad_norm": 12.355244636535645, + "learning_rate": 4.880897495005379e-05, + "log_odds_chosen": 3.635563850402832, + "log_odds_ratio": -0.7260986566543579, + "logits/chosen": -0.4599088430404663, + "logits/rejected": -0.42827102541923523, + "logps/chosen": -0.18452264368534088, + "logps/rejected": -0.5236896276473999, + "loss": 3.6846, + "nll_loss": 0.8485289812088013, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.018452264368534088, + "rewards/margins": 0.03391670063138008, + "rewards/rejected": -0.05236896127462387, + "step": 1756 + }, + { + "epoch": 1.215076071922545, + "grad_norm": 4.876830101013184, + "learning_rate": 4.8805132933763645e-05, + "log_odds_chosen": 5.384311676025391, + "log_odds_ratio": -0.04986800625920296, + "logits/chosen": -0.28661617636680603, + "logits/rejected": -0.3225519061088562, + "logps/chosen": -0.026972772553563118, + "logps/rejected": -0.7848159670829773, + "loss": 3.338, + "nll_loss": 0.8295071125030518, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0026972773484885693, + "rewards/margins": 0.07578432559967041, + "rewards/rejected": -0.07848159968852997, + "step": 1757 + }, + { + "epoch": 1.215767634854772, + "grad_norm": 3.9064624309539795, + "learning_rate": 4.880129091747349e-05, + "log_odds_chosen": 4.191158294677734, + "log_odds_ratio": -0.12977999448776245, + "logits/chosen": -0.5362319946289062, + "logits/rejected": -0.5842596292495728, + "logps/chosen": -0.062384773045778275, + "logps/rejected": -0.7664477229118347, + "loss": 2.7305, + "nll_loss": 0.6696509718894958, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0062384773045778275, + "rewards/margins": 0.07040630280971527, + "rewards/rejected": -0.07664477080106735, + "step": 1758 + }, + { + "epoch": 1.2164591977869987, + "grad_norm": 6.504387855529785, + "learning_rate": 4.879744890118334e-05, + "log_odds_chosen": 1.8020520210266113, + "log_odds_ratio": -0.5221161842346191, + "logits/chosen": -0.6099242568016052, + "logits/rejected": -0.5896930694580078, + "logps/chosen": -0.11337540298700333, + "logps/rejected": -0.4469042420387268, + "loss": 3.8603, + "nll_loss": 0.9128572344779968, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011337541043758392, + "rewards/margins": 0.03335288166999817, + "rewards/rejected": -0.04469042271375656, + "step": 1759 + }, + { + "epoch": 1.2171507607192256, + "grad_norm": 4.438575267791748, + "learning_rate": 4.8793606884893195e-05, + "log_odds_chosen": 4.415384292602539, + "log_odds_ratio": -0.12809635698795319, + "logits/chosen": -1.0808501243591309, + "logits/rejected": -1.1437695026397705, + "logps/chosen": -0.07756942510604858, + "logps/rejected": -0.9279264211654663, + "loss": 4.8248, + "nll_loss": 1.1933966875076294, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007756942883133888, + "rewards/margins": 0.08503570407629013, + "rewards/rejected": -0.09279264509677887, + "step": 1760 + }, + { + "epoch": 1.2178423236514524, + "grad_norm": 3.9479455947875977, + "learning_rate": 4.878976486860304e-05, + "log_odds_chosen": 5.265148162841797, + "log_odds_ratio": -0.12130524963140488, + "logits/chosen": -0.12485721707344055, + "logits/rejected": -0.14175529778003693, + "logps/chosen": -0.05179094150662422, + "logps/rejected": -1.0051532983779907, + "loss": 2.4347, + "nll_loss": 0.596540629863739, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005179094150662422, + "rewards/margins": 0.09533624351024628, + "rewards/rejected": -0.10051532834768295, + "step": 1761 + }, + { + "epoch": 1.2185338865836792, + "grad_norm": 7.4558515548706055, + "learning_rate": 4.87859228523129e-05, + "log_odds_chosen": 3.7101030349731445, + "log_odds_ratio": -0.14278443157672882, + "logits/chosen": -0.36934995651245117, + "logits/rejected": -0.403659850358963, + "logps/chosen": -0.09488293528556824, + "logps/rejected": -0.6350594162940979, + "loss": 5.1023, + "nll_loss": 1.2613072395324707, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009488292969763279, + "rewards/margins": 0.054017651826143265, + "rewards/rejected": -0.06350594758987427, + "step": 1762 + }, + { + "epoch": 1.219225449515906, + "grad_norm": 5.117076873779297, + "learning_rate": 4.8782080836022746e-05, + "log_odds_chosen": 1.7559460401535034, + "log_odds_ratio": -0.2781575322151184, + "logits/chosen": -0.5458097457885742, + "logits/rejected": -0.5692986845970154, + "logps/chosen": -0.14074856042861938, + "logps/rejected": -0.42338860034942627, + "loss": 3.6414, + "nll_loss": 0.8825327157974243, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.014074856415390968, + "rewards/margins": 0.0282640028744936, + "rewards/rejected": -0.042338863015174866, + "step": 1763 + }, + { + "epoch": 1.2199170124481329, + "grad_norm": 5.407169818878174, + "learning_rate": 4.87782388197326e-05, + "log_odds_chosen": 4.143532752990723, + "log_odds_ratio": -0.15496212244033813, + "logits/chosen": -0.6489083766937256, + "logits/rejected": -0.6936314702033997, + "logps/chosen": -0.034726690500974655, + "logps/rejected": -0.7174232602119446, + "loss": 3.3003, + "nll_loss": 0.8095715045928955, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0034726690500974655, + "rewards/margins": 0.06826966255903244, + "rewards/rejected": -0.07174232602119446, + "step": 1764 + }, + { + "epoch": 1.2206085753803597, + "grad_norm": 6.849701881408691, + "learning_rate": 4.877439680344245e-05, + "log_odds_chosen": 4.303737163543701, + "log_odds_ratio": -0.28363704681396484, + "logits/chosen": -0.6390360593795776, + "logits/rejected": -0.6330482959747314, + "logps/chosen": -0.09742604196071625, + "logps/rejected": -0.8334695100784302, + "loss": 3.9848, + "nll_loss": 0.9678481221199036, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009742604568600655, + "rewards/margins": 0.07360435277223587, + "rewards/rejected": -0.08334695547819138, + "step": 1765 + }, + { + "epoch": 1.2213001383125865, + "grad_norm": 4.578864574432373, + "learning_rate": 4.87705547871523e-05, + "log_odds_chosen": 4.171421527862549, + "log_odds_ratio": -0.32398200035095215, + "logits/chosen": -0.3163169324398041, + "logits/rejected": -0.2699214816093445, + "logps/chosen": -0.07687410712242126, + "logps/rejected": -0.562875509262085, + "loss": 2.4355, + "nll_loss": 0.5764786005020142, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007687410339713097, + "rewards/margins": 0.04860014095902443, + "rewards/rejected": -0.05628754943609238, + "step": 1766 + }, + { + "epoch": 1.2219917012448134, + "grad_norm": 6.775142192840576, + "learning_rate": 4.876671277086215e-05, + "log_odds_chosen": 3.3114845752716064, + "log_odds_ratio": -0.30798664689064026, + "logits/chosen": -0.8572962284088135, + "logits/rejected": -0.8945976495742798, + "logps/chosen": -0.14023393392562866, + "logps/rejected": -0.7111852169036865, + "loss": 4.3299, + "nll_loss": 1.0516669750213623, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014023393392562866, + "rewards/margins": 0.057095129042863846, + "rewards/rejected": -0.07111851871013641, + "step": 1767 + }, + { + "epoch": 1.2226832641770402, + "grad_norm": 3.9804606437683105, + "learning_rate": 4.8762870754572e-05, + "log_odds_chosen": 4.615160942077637, + "log_odds_ratio": -0.18223318457603455, + "logits/chosen": -0.6927958726882935, + "logits/rejected": -0.6910476088523865, + "logps/chosen": -0.029494691640138626, + "logps/rejected": -0.6091811656951904, + "loss": 2.7294, + "nll_loss": 0.6641297340393066, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0029494690243154764, + "rewards/margins": 0.05796864628791809, + "rewards/rejected": -0.06091811880469322, + "step": 1768 + }, + { + "epoch": 1.223374827109267, + "grad_norm": 7.519423484802246, + "learning_rate": 4.8759028738281854e-05, + "log_odds_chosen": 2.6739237308502197, + "log_odds_ratio": -0.5759358406066895, + "logits/chosen": -0.7101910710334778, + "logits/rejected": -0.735167384147644, + "logps/chosen": -0.17419900000095367, + "logps/rejected": -0.5741370916366577, + "loss": 3.8378, + "nll_loss": 0.9018557667732239, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.017419900745153427, + "rewards/margins": 0.039993807673454285, + "rewards/rejected": -0.05741371214389801, + "step": 1769 + }, + { + "epoch": 1.2240663900414939, + "grad_norm": 5.838580131530762, + "learning_rate": 4.87551867219917e-05, + "log_odds_chosen": 4.783184051513672, + "log_odds_ratio": -0.05730760842561722, + "logits/chosen": -0.3828684687614441, + "logits/rejected": -0.4162397086620331, + "logps/chosen": -0.024466292932629585, + "logps/rejected": -0.8661034107208252, + "loss": 3.8114, + "nll_loss": 0.947107195854187, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002446629572659731, + "rewards/margins": 0.08416370302438736, + "rewards/rejected": -0.0866103321313858, + "step": 1770 + }, + { + "epoch": 1.2247579529737207, + "grad_norm": 4.113922595977783, + "learning_rate": 4.875134470570156e-05, + "log_odds_chosen": 6.979005336761475, + "log_odds_ratio": -0.019842159003019333, + "logits/chosen": -0.5436484813690186, + "logits/rejected": -0.585045576095581, + "logps/chosen": -0.025577958673238754, + "logps/rejected": -0.8694262504577637, + "loss": 2.8166, + "nll_loss": 0.7021672129631042, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002557795960456133, + "rewards/margins": 0.08438482880592346, + "rewards/rejected": -0.0869426280260086, + "step": 1771 + }, + { + "epoch": 1.2254495159059475, + "grad_norm": 5.46701717376709, + "learning_rate": 4.8747502689411404e-05, + "log_odds_chosen": 3.2045738697052, + "log_odds_ratio": -0.4168136715888977, + "logits/chosen": -0.47816532850265503, + "logits/rejected": -0.5259116888046265, + "logps/chosen": -0.08361619710922241, + "logps/rejected": -0.41244637966156006, + "loss": 2.8807, + "nll_loss": 0.678496241569519, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00836162082850933, + "rewards/margins": 0.03288302198052406, + "rewards/rejected": -0.041244640946388245, + "step": 1772 + }, + { + "epoch": 1.2261410788381744, + "grad_norm": 3.5881946086883545, + "learning_rate": 4.874366067312126e-05, + "log_odds_chosen": 4.681888103485107, + "log_odds_ratio": -0.23527833819389343, + "logits/chosen": -0.4189876317977905, + "logits/rejected": -0.43972110748291016, + "logps/chosen": -0.09223958849906921, + "logps/rejected": -0.6895633935928345, + "loss": 2.6699, + "nll_loss": 0.6439555883407593, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009223959408700466, + "rewards/margins": 0.05973239243030548, + "rewards/rejected": -0.06895634531974792, + "step": 1773 + }, + { + "epoch": 1.2268326417704012, + "grad_norm": 5.046339988708496, + "learning_rate": 4.873981865683111e-05, + "log_odds_chosen": 2.407923936843872, + "log_odds_ratio": -0.16615843772888184, + "logits/chosen": -0.2516796588897705, + "logits/rejected": -0.2353476881980896, + "logps/chosen": -0.07308045029640198, + "logps/rejected": -0.49336302280426025, + "loss": 2.8996, + "nll_loss": 0.7082836627960205, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007308045402169228, + "rewards/margins": 0.04202825948596001, + "rewards/rejected": -0.04933629930019379, + "step": 1774 + }, + { + "epoch": 1.227524204702628, + "grad_norm": 6.417335033416748, + "learning_rate": 4.873597664054096e-05, + "log_odds_chosen": 3.896390199661255, + "log_odds_ratio": -0.1744166910648346, + "logits/chosen": -0.3504815101623535, + "logits/rejected": -0.3109341263771057, + "logps/chosen": -0.06069917231798172, + "logps/rejected": -0.7529281377792358, + "loss": 3.9176, + "nll_loss": 0.9619663953781128, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006069916766136885, + "rewards/margins": 0.06922289729118347, + "rewards/rejected": -0.07529281079769135, + "step": 1775 + }, + { + "epoch": 1.2282157676348548, + "grad_norm": 7.379581928253174, + "learning_rate": 4.873213462425081e-05, + "log_odds_chosen": 3.4607551097869873, + "log_odds_ratio": -0.37025904655456543, + "logits/chosen": -0.5527825355529785, + "logits/rejected": -0.5550543665885925, + "logps/chosen": -0.10252739489078522, + "logps/rejected": -0.5675938725471497, + "loss": 3.6683, + "nll_loss": 0.8800457715988159, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010252740234136581, + "rewards/margins": 0.046506647020578384, + "rewards/rejected": -0.056759387254714966, + "step": 1776 + }, + { + "epoch": 1.2289073305670817, + "grad_norm": 9.051420211791992, + "learning_rate": 4.872829260796066e-05, + "log_odds_chosen": 1.5939397811889648, + "log_odds_ratio": -0.34218525886535645, + "logits/chosen": -0.6052234172821045, + "logits/rejected": -0.6254400610923767, + "logps/chosen": -0.16898562014102936, + "logps/rejected": -0.6538894176483154, + "loss": 3.4297, + "nll_loss": 0.8231991529464722, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.016898561269044876, + "rewards/margins": 0.04849037900567055, + "rewards/rejected": -0.06538893282413483, + "step": 1777 + }, + { + "epoch": 1.2295988934993085, + "grad_norm": 11.628338813781738, + "learning_rate": 4.872445059167051e-05, + "log_odds_chosen": 3.865147590637207, + "log_odds_ratio": -0.27473387122154236, + "logits/chosen": -0.183550626039505, + "logits/rejected": -0.26342085003852844, + "logps/chosen": -0.03889723867177963, + "logps/rejected": -0.5629591345787048, + "loss": 3.0423, + "nll_loss": 0.7331140637397766, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0038897236809134483, + "rewards/margins": 0.052406199276447296, + "rewards/rejected": -0.05629592016339302, + "step": 1778 + }, + { + "epoch": 1.2302904564315353, + "grad_norm": 5.52664852142334, + "learning_rate": 4.872060857538036e-05, + "log_odds_chosen": 6.228176116943359, + "log_odds_ratio": -0.03211307153105736, + "logits/chosen": -0.31341448426246643, + "logits/rejected": -0.3368358016014099, + "logps/chosen": -0.030239183455705643, + "logps/rejected": -0.7390041351318359, + "loss": 3.3062, + "nll_loss": 0.8233508467674255, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0030239184852689505, + "rewards/margins": 0.07087649405002594, + "rewards/rejected": -0.07390041649341583, + "step": 1779 + }, + { + "epoch": 1.2309820193637622, + "grad_norm": 6.157802104949951, + "learning_rate": 4.871676655909022e-05, + "log_odds_chosen": 4.707504749298096, + "log_odds_ratio": -0.13479316234588623, + "logits/chosen": -0.5304163694381714, + "logits/rejected": -0.5511650443077087, + "logps/chosen": -0.052768275141716, + "logps/rejected": -0.7715237140655518, + "loss": 4.0619, + "nll_loss": 1.0019972324371338, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005276828072965145, + "rewards/margins": 0.07187553495168686, + "rewards/rejected": -0.07715237140655518, + "step": 1780 + }, + { + "epoch": 1.231673582295989, + "grad_norm": 8.27289867401123, + "learning_rate": 4.871292454280006e-05, + "log_odds_chosen": 4.5420122146606445, + "log_odds_ratio": -0.2521488070487976, + "logits/chosen": -0.5395856499671936, + "logits/rejected": -0.5455723404884338, + "logps/chosen": -0.1704384982585907, + "logps/rejected": -0.8473212718963623, + "loss": 4.0229, + "nll_loss": 0.980500340461731, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01704385131597519, + "rewards/margins": 0.06768827140331268, + "rewards/rejected": -0.08473212271928787, + "step": 1781 + }, + { + "epoch": 1.2323651452282158, + "grad_norm": 6.026027679443359, + "learning_rate": 4.8709082526509915e-05, + "log_odds_chosen": 2.95859694480896, + "log_odds_ratio": -0.18901872634887695, + "logits/chosen": -0.6355127096176147, + "logits/rejected": -0.6376572251319885, + "logps/chosen": -0.08060871064662933, + "logps/rejected": -0.5930425524711609, + "loss": 3.294, + "nll_loss": 0.8046071529388428, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008060871623456478, + "rewards/margins": 0.051243383437395096, + "rewards/rejected": -0.05930424854159355, + "step": 1782 + }, + { + "epoch": 1.2330567081604427, + "grad_norm": 5.428910732269287, + "learning_rate": 4.870524051021977e-05, + "log_odds_chosen": 7.058526515960693, + "log_odds_ratio": -0.03021731786429882, + "logits/chosen": -0.30677375197410583, + "logits/rejected": -0.4313288927078247, + "logps/chosen": -0.011306393891572952, + "logps/rejected": -1.24741530418396, + "loss": 3.2365, + "nll_loss": 0.806109607219696, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011306394590064883, + "rewards/margins": 0.12361088395118713, + "rewards/rejected": -0.12474152445793152, + "step": 1783 + }, + { + "epoch": 1.2337482710926695, + "grad_norm": 5.7618231773376465, + "learning_rate": 4.870139849392962e-05, + "log_odds_chosen": 6.493813991546631, + "log_odds_ratio": -0.09601682424545288, + "logits/chosen": -0.435533344745636, + "logits/rejected": -0.4357895851135254, + "logps/chosen": -0.03549773246049881, + "logps/rejected": -1.1804301738739014, + "loss": 2.9047, + "nll_loss": 0.7165642380714417, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0035497734788805246, + "rewards/margins": 0.1144932359457016, + "rewards/rejected": -0.11804300546646118, + "step": 1784 + }, + { + "epoch": 1.2344398340248963, + "grad_norm": 9.983572959899902, + "learning_rate": 4.8697556477639466e-05, + "log_odds_chosen": 3.681022882461548, + "log_odds_ratio": -0.39471232891082764, + "logits/chosen": -0.20369184017181396, + "logits/rejected": -0.2624896764755249, + "logps/chosen": -0.10708246380090714, + "logps/rejected": -1.0745576620101929, + "loss": 4.8753, + "nll_loss": 1.1793495416641235, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010708245448768139, + "rewards/margins": 0.0967475175857544, + "rewards/rejected": -0.1074557676911354, + "step": 1785 + }, + { + "epoch": 1.2351313969571232, + "grad_norm": 3.3492255210876465, + "learning_rate": 4.869371446134932e-05, + "log_odds_chosen": 3.9649410247802734, + "log_odds_ratio": -0.11080026626586914, + "logits/chosen": -0.7051090002059937, + "logits/rejected": -0.7223587036132812, + "logps/chosen": -0.053031086921691895, + "logps/rejected": -0.6676794290542603, + "loss": 2.287, + "nll_loss": 0.5606632828712463, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005303108599036932, + "rewards/margins": 0.061464838683605194, + "rewards/rejected": -0.06676793843507767, + "step": 1786 + }, + { + "epoch": 1.23582295988935, + "grad_norm": 6.671707630157471, + "learning_rate": 4.868987244505917e-05, + "log_odds_chosen": 4.573537349700928, + "log_odds_ratio": -0.16145706176757812, + "logits/chosen": -0.5118668079376221, + "logits/rejected": -0.568949818611145, + "logps/chosen": -0.06815500557422638, + "logps/rejected": -0.9939965009689331, + "loss": 3.7013, + "nll_loss": 0.9091796278953552, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006815500557422638, + "rewards/margins": 0.09258415549993515, + "rewards/rejected": -0.09939965605735779, + "step": 1787 + }, + { + "epoch": 1.2365145228215768, + "grad_norm": 4.77935791015625, + "learning_rate": 4.8686030428769016e-05, + "log_odds_chosen": 4.834593296051025, + "log_odds_ratio": -0.20173893868923187, + "logits/chosen": -0.41683146357536316, + "logits/rejected": -0.3714733421802521, + "logps/chosen": -0.17759756743907928, + "logps/rejected": -0.8496260046958923, + "loss": 3.5081, + "nll_loss": 0.8568394184112549, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01775975711643696, + "rewards/margins": 0.0672028437256813, + "rewards/rejected": -0.08496260643005371, + "step": 1788 + }, + { + "epoch": 1.2372060857538036, + "grad_norm": 7.869295597076416, + "learning_rate": 4.868218841247887e-05, + "log_odds_chosen": 2.6747584342956543, + "log_odds_ratio": -0.38161760568618774, + "logits/chosen": -0.6707320213317871, + "logits/rejected": -0.7061043977737427, + "logps/chosen": -0.13883009552955627, + "logps/rejected": -0.6048102974891663, + "loss": 4.4771, + "nll_loss": 1.081102967262268, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013883009552955627, + "rewards/margins": 0.04659801721572876, + "rewards/rejected": -0.06048102676868439, + "step": 1789 + }, + { + "epoch": 1.2378976486860305, + "grad_norm": 6.983148574829102, + "learning_rate": 4.867834639618872e-05, + "log_odds_chosen": 5.052033424377441, + "log_odds_ratio": -0.3466581404209137, + "logits/chosen": -0.541464626789093, + "logits/rejected": -0.6000796556472778, + "logps/chosen": -0.0706629678606987, + "logps/rejected": -0.9605896472930908, + "loss": 3.2206, + "nll_loss": 0.7704898118972778, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007066297344863415, + "rewards/margins": 0.08899267017841339, + "rewards/rejected": -0.09605896472930908, + "step": 1790 + }, + { + "epoch": 1.2385892116182573, + "grad_norm": 5.895198822021484, + "learning_rate": 4.8674504379898574e-05, + "log_odds_chosen": 3.636683940887451, + "log_odds_ratio": -0.4684436023235321, + "logits/chosen": -0.18043142557144165, + "logits/rejected": -0.1791577935218811, + "logps/chosen": -0.10141946375370026, + "logps/rejected": -0.514195442199707, + "loss": 2.7713, + "nll_loss": 0.6459746956825256, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010141946375370026, + "rewards/margins": 0.04127759858965874, + "rewards/rejected": -0.05141954496502876, + "step": 1791 + }, + { + "epoch": 1.2392807745504841, + "grad_norm": 15.659523963928223, + "learning_rate": 4.867066236360842e-05, + "log_odds_chosen": 1.765541672706604, + "log_odds_ratio": -0.8988336324691772, + "logits/chosen": -0.36447399854660034, + "logits/rejected": -0.4492056369781494, + "logps/chosen": -0.14289280772209167, + "logps/rejected": -0.5673074722290039, + "loss": 4.6843, + "nll_loss": 1.0811951160430908, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.014289281331002712, + "rewards/margins": 0.042441464960575104, + "rewards/rejected": -0.05673075094819069, + "step": 1792 + }, + { + "epoch": 1.239972337482711, + "grad_norm": 8.014297485351562, + "learning_rate": 4.866682034731828e-05, + "log_odds_chosen": 6.788247108459473, + "log_odds_ratio": -0.04455922171473503, + "logits/chosen": -0.45799165964126587, + "logits/rejected": -0.5114096403121948, + "logps/chosen": -0.026142219081521034, + "logps/rejected": -1.3048663139343262, + "loss": 4.0886, + "nll_loss": 1.017690658569336, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0026142222341150045, + "rewards/margins": 0.12787240743637085, + "rewards/rejected": -0.1304866373538971, + "step": 1793 + }, + { + "epoch": 1.2406639004149378, + "grad_norm": 7.309274196624756, + "learning_rate": 4.8662978331028124e-05, + "log_odds_chosen": 6.315692901611328, + "log_odds_ratio": -0.16049347817897797, + "logits/chosen": -0.41751378774642944, + "logits/rejected": -0.4461780786514282, + "logps/chosen": -0.06197963282465935, + "logps/rejected": -0.7669271230697632, + "loss": 3.3334, + "nll_loss": 0.817298948764801, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0061979638412594795, + "rewards/margins": 0.07049474865198135, + "rewards/rejected": -0.07669270783662796, + "step": 1794 + }, + { + "epoch": 1.2413554633471646, + "grad_norm": 8.051568984985352, + "learning_rate": 4.8659136314737977e-05, + "log_odds_chosen": 7.974938869476318, + "log_odds_ratio": -0.0025558762717992067, + "logits/chosen": -0.21984058618545532, + "logits/rejected": -0.3406951427459717, + "logps/chosen": -0.003969233483076096, + "logps/rejected": -1.5013253688812256, + "loss": 4.0633, + "nll_loss": 1.0155622959136963, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00039692336576990783, + "rewards/margins": 0.14973559975624084, + "rewards/rejected": -0.15013253688812256, + "step": 1795 + }, + { + "epoch": 1.2420470262793915, + "grad_norm": 5.935072422027588, + "learning_rate": 4.865529429844783e-05, + "log_odds_chosen": 2.892843246459961, + "log_odds_ratio": -0.9609073400497437, + "logits/chosen": -0.4119155704975128, + "logits/rejected": -0.38800469040870667, + "logps/chosen": -0.07128679752349854, + "logps/rejected": -0.5854979753494263, + "loss": 3.2578, + "nll_loss": 0.7183555364608765, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007128680124878883, + "rewards/margins": 0.051421117037534714, + "rewards/rejected": -0.05854979529976845, + "step": 1796 + }, + { + "epoch": 1.2427385892116183, + "grad_norm": 3.907433032989502, + "learning_rate": 4.8651452282157675e-05, + "log_odds_chosen": 4.29548454284668, + "log_odds_ratio": -0.18526923656463623, + "logits/chosen": -0.3604997992515564, + "logits/rejected": -0.3359731435775757, + "logps/chosen": -0.05916924402117729, + "logps/rejected": -0.606911838054657, + "loss": 1.9766, + "nll_loss": 0.47562935948371887, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005916924215853214, + "rewards/margins": 0.05477425456047058, + "rewards/rejected": -0.06069118157029152, + "step": 1797 + }, + { + "epoch": 1.2434301521438451, + "grad_norm": 7.241527557373047, + "learning_rate": 4.864761026586753e-05, + "log_odds_chosen": 2.8537378311157227, + "log_odds_ratio": -0.36928698420524597, + "logits/chosen": -0.4368304908275604, + "logits/rejected": -0.49222734570503235, + "logps/chosen": -0.09692305326461792, + "logps/rejected": -0.6475295424461365, + "loss": 4.222, + "nll_loss": 1.0185637474060059, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009692303836345673, + "rewards/margins": 0.055060647428035736, + "rewards/rejected": -0.06475295126438141, + "step": 1798 + }, + { + "epoch": 1.244121715076072, + "grad_norm": 6.7715535163879395, + "learning_rate": 4.864376824957738e-05, + "log_odds_chosen": 4.448674201965332, + "log_odds_ratio": -0.12964072823524475, + "logits/chosen": -0.43957293033599854, + "logits/rejected": -0.5094834566116333, + "logps/chosen": -0.057148225605487823, + "logps/rejected": -1.0359687805175781, + "loss": 3.5742, + "nll_loss": 0.8805873394012451, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005714822560548782, + "rewards/margins": 0.09788206219673157, + "rewards/rejected": -0.10359688103199005, + "step": 1799 + }, + { + "epoch": 1.2448132780082988, + "grad_norm": 5.604511737823486, + "learning_rate": 4.863992623328723e-05, + "log_odds_chosen": 4.957757949829102, + "log_odds_ratio": -0.0533502958714962, + "logits/chosen": -0.6584824323654175, + "logits/rejected": -0.7121330499649048, + "logps/chosen": -0.0186811201274395, + "logps/rejected": -0.6562873125076294, + "loss": 3.541, + "nll_loss": 0.8799247741699219, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018681121291592717, + "rewards/margins": 0.06376062333583832, + "rewards/rejected": -0.06562872976064682, + "step": 1800 + }, + { + "epoch": 1.2455048409405256, + "grad_norm": 4.875192165374756, + "learning_rate": 4.863608421699708e-05, + "log_odds_chosen": 1.3748055696487427, + "log_odds_ratio": -0.40649357438087463, + "logits/chosen": -0.37381306290626526, + "logits/rejected": -0.3776434063911438, + "logps/chosen": -0.1655338704586029, + "logps/rejected": -0.6377518177032471, + "loss": 3.6855, + "nll_loss": 0.8807240128517151, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01655338704586029, + "rewards/margins": 0.047221794724464417, + "rewards/rejected": -0.06377518177032471, + "step": 1801 + }, + { + "epoch": 1.2461964038727524, + "grad_norm": 7.296558380126953, + "learning_rate": 4.863224220070694e-05, + "log_odds_chosen": 1.5173479318618774, + "log_odds_ratio": -0.5433456897735596, + "logits/chosen": -0.6616544127464294, + "logits/rejected": -0.6958113312721252, + "logps/chosen": -0.16676326096057892, + "logps/rejected": -0.5516203045845032, + "loss": 4.5563, + "nll_loss": 1.0847339630126953, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.016676325350999832, + "rewards/margins": 0.03848570957779884, + "rewards/rejected": -0.055162034928798676, + "step": 1802 + }, + { + "epoch": 1.2468879668049793, + "grad_norm": 3.2413599491119385, + "learning_rate": 4.862840018441678e-05, + "log_odds_chosen": 5.263888835906982, + "log_odds_ratio": -0.1274007260799408, + "logits/chosen": -0.5223049521446228, + "logits/rejected": -0.5819611549377441, + "logps/chosen": -0.05425209179520607, + "logps/rejected": -0.583556056022644, + "loss": 3.2491, + "nll_loss": 0.7995354533195496, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005425209179520607, + "rewards/margins": 0.05293039232492447, + "rewards/rejected": -0.05835559964179993, + "step": 1803 + }, + { + "epoch": 1.247579529737206, + "grad_norm": 22.35890007019043, + "learning_rate": 4.8624558168126635e-05, + "log_odds_chosen": 2.573143720626831, + "log_odds_ratio": -0.26976823806762695, + "logits/chosen": -0.45450615882873535, + "logits/rejected": -0.5272830128669739, + "logps/chosen": -0.11467941105365753, + "logps/rejected": -0.5504158139228821, + "loss": 3.3287, + "nll_loss": 0.8052026629447937, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011467942036688328, + "rewards/margins": 0.043573640286922455, + "rewards/rejected": -0.05504158139228821, + "step": 1804 + }, + { + "epoch": 1.248271092669433, + "grad_norm": 8.21379566192627, + "learning_rate": 4.862071615183649e-05, + "log_odds_chosen": 4.7264862060546875, + "log_odds_ratio": -0.3322974145412445, + "logits/chosen": -0.6622597575187683, + "logits/rejected": -0.6728526949882507, + "logps/chosen": -0.08343342691659927, + "logps/rejected": -0.7681043148040771, + "loss": 3.1076, + "nll_loss": 0.7436782121658325, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008343343622982502, + "rewards/margins": 0.06846708059310913, + "rewards/rejected": -0.07681042701005936, + "step": 1805 + }, + { + "epoch": 1.2489626556016598, + "grad_norm": 6.535581588745117, + "learning_rate": 4.861687413554633e-05, + "log_odds_chosen": 1.8638652563095093, + "log_odds_ratio": -0.7721409201622009, + "logits/chosen": -0.4947623312473297, + "logits/rejected": -0.4675024151802063, + "logps/chosen": -0.16926667094230652, + "logps/rejected": -0.5563034415245056, + "loss": 3.2474, + "nll_loss": 0.7346447110176086, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01692666858434677, + "rewards/margins": 0.03870367258787155, + "rewards/rejected": -0.05563034117221832, + "step": 1806 + }, + { + "epoch": 1.2496542185338866, + "grad_norm": 4.28892183303833, + "learning_rate": 4.8613032119256186e-05, + "log_odds_chosen": 5.750433444976807, + "log_odds_ratio": -0.047676898539066315, + "logits/chosen": -0.4715877175331116, + "logits/rejected": -0.4328291416168213, + "logps/chosen": -0.0543404258787632, + "logps/rejected": -1.378570556640625, + "loss": 2.9613, + "nll_loss": 0.7355595231056213, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005434042774140835, + "rewards/margins": 0.1324230134487152, + "rewards/rejected": -0.13785704970359802, + "step": 1807 + }, + { + "epoch": 1.2503457814661134, + "grad_norm": 7.0970892906188965, + "learning_rate": 4.860919010296604e-05, + "log_odds_chosen": 3.2057337760925293, + "log_odds_ratio": -0.27527907490730286, + "logits/chosen": -0.5142180919647217, + "logits/rejected": -0.5255178213119507, + "logps/chosen": -0.08345197141170502, + "logps/rejected": -0.6663510799407959, + "loss": 4.0499, + "nll_loss": 0.984959602355957, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008345197886228561, + "rewards/margins": 0.058289919048547745, + "rewards/rejected": -0.06663510948419571, + "step": 1808 + }, + { + "epoch": 1.2510373443983402, + "grad_norm": 5.712761878967285, + "learning_rate": 4.860534808667589e-05, + "log_odds_chosen": 2.3765594959259033, + "log_odds_ratio": -0.23137031495571136, + "logits/chosen": -0.6019795536994934, + "logits/rejected": -0.582493007183075, + "logps/chosen": -0.0825355052947998, + "logps/rejected": -0.5512726306915283, + "loss": 4.1693, + "nll_loss": 1.0191932916641235, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00825355015695095, + "rewards/margins": 0.04687371850013733, + "rewards/rejected": -0.05512726306915283, + "step": 1809 + }, + { + "epoch": 1.251728907330567, + "grad_norm": 7.043604373931885, + "learning_rate": 4.8601506070385736e-05, + "log_odds_chosen": 2.8349339962005615, + "log_odds_ratio": -0.37025701999664307, + "logits/chosen": -0.6216073036193848, + "logits/rejected": -0.6692014932632446, + "logps/chosen": -0.12638868391513824, + "logps/rejected": -0.4497242867946625, + "loss": 4.7696, + "nll_loss": 1.15536630153656, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.012638869695365429, + "rewards/margins": 0.03233356401324272, + "rewards/rejected": -0.04497242718935013, + "step": 1810 + }, + { + "epoch": 1.252420470262794, + "grad_norm": 5.4507880210876465, + "learning_rate": 4.8597664054095595e-05, + "log_odds_chosen": 4.4462409019470215, + "log_odds_ratio": -0.18066637217998505, + "logits/chosen": -0.3726177215576172, + "logits/rejected": -0.4263952672481537, + "logps/chosen": -0.06515246629714966, + "logps/rejected": -0.7420536875724792, + "loss": 2.7647, + "nll_loss": 0.6730960607528687, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.006515247281640768, + "rewards/margins": 0.06769011914730072, + "rewards/rejected": -0.07420536875724792, + "step": 1811 + }, + { + "epoch": 1.2531120331950207, + "grad_norm": 8.727974891662598, + "learning_rate": 4.859382203780544e-05, + "log_odds_chosen": 3.2646877765655518, + "log_odds_ratio": -0.3166475296020508, + "logits/chosen": -0.5484322309494019, + "logits/rejected": -0.5809099674224854, + "logps/chosen": -0.07628434896469116, + "logps/rejected": -0.6679997444152832, + "loss": 3.6801, + "nll_loss": 0.8883615732192993, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0076284343376755714, + "rewards/margins": 0.05917154252529144, + "rewards/rejected": -0.06679997593164444, + "step": 1812 + }, + { + "epoch": 1.2538035961272476, + "grad_norm": 5.453410625457764, + "learning_rate": 4.8589980021515293e-05, + "log_odds_chosen": 4.002201557159424, + "log_odds_ratio": -0.1269581913948059, + "logits/chosen": -0.2462340146303177, + "logits/rejected": -0.24367199838161469, + "logps/chosen": -0.10391269624233246, + "logps/rejected": -0.9489084482192993, + "loss": 3.1281, + "nll_loss": 0.7693310379981995, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010391268879175186, + "rewards/margins": 0.08449958264827728, + "rewards/rejected": -0.09489084780216217, + "step": 1813 + }, + { + "epoch": 1.2544951590594744, + "grad_norm": 6.228405475616455, + "learning_rate": 4.8586138005225146e-05, + "log_odds_chosen": 3.509197950363159, + "log_odds_ratio": -0.11182574927806854, + "logits/chosen": 0.05738585814833641, + "logits/rejected": -0.041131533682346344, + "logps/chosen": -0.05739348381757736, + "logps/rejected": -0.6539136171340942, + "loss": 2.5143, + "nll_loss": 0.6173902153968811, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005739348940551281, + "rewards/margins": 0.05965201556682587, + "rewards/rejected": -0.06539136916399002, + "step": 1814 + }, + { + "epoch": 1.2551867219917012, + "grad_norm": 4.185488700866699, + "learning_rate": 4.858229598893499e-05, + "log_odds_chosen": 3.572788715362549, + "log_odds_ratio": -0.15489646792411804, + "logits/chosen": -0.3606988191604614, + "logits/rejected": -0.391099750995636, + "logps/chosen": -0.07718568295240402, + "logps/rejected": -1.0063767433166504, + "loss": 2.7932, + "nll_loss": 0.6828101277351379, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0077185677364468575, + "rewards/margins": 0.09291911125183105, + "rewards/rejected": -0.10063768178224564, + "step": 1815 + }, + { + "epoch": 1.255878284923928, + "grad_norm": 6.819009304046631, + "learning_rate": 4.8578453972644844e-05, + "log_odds_chosen": 2.5417683124542236, + "log_odds_ratio": -0.6759305596351624, + "logits/chosen": -0.1891404539346695, + "logits/rejected": -0.16132640838623047, + "logps/chosen": -0.13642950356006622, + "logps/rejected": -0.48567137122154236, + "loss": 3.4566, + "nll_loss": 0.7965582013130188, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.013642950914800167, + "rewards/margins": 0.03492419049143791, + "rewards/rejected": -0.048567142337560654, + "step": 1816 + }, + { + "epoch": 1.2565698478561549, + "grad_norm": 4.596541404724121, + "learning_rate": 4.8574611956354696e-05, + "log_odds_chosen": 2.963163137435913, + "log_odds_ratio": -0.4418284595012665, + "logits/chosen": -0.227559432387352, + "logits/rejected": -0.27367666363716125, + "logps/chosen": -0.11844546347856522, + "logps/rejected": -0.6489789485931396, + "loss": 2.3538, + "nll_loss": 0.5442634224891663, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011844546534121037, + "rewards/margins": 0.0530533492565155, + "rewards/rejected": -0.06489789485931396, + "step": 1817 + }, + { + "epoch": 1.2572614107883817, + "grad_norm": 5.669462203979492, + "learning_rate": 4.857076994006455e-05, + "log_odds_chosen": 1.8501408100128174, + "log_odds_ratio": -0.5410861968994141, + "logits/chosen": -0.10962569713592529, + "logits/rejected": -0.12081693857908249, + "logps/chosen": -0.15979516506195068, + "logps/rejected": -0.41644734144210815, + "loss": 2.8134, + "nll_loss": 0.6492462158203125, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.015979517251253128, + "rewards/margins": 0.025665219873189926, + "rewards/rejected": -0.041644733399152756, + "step": 1818 + }, + { + "epoch": 1.2579529737206085, + "grad_norm": 7.734716415405273, + "learning_rate": 4.8566927923774395e-05, + "log_odds_chosen": 5.519069671630859, + "log_odds_ratio": -0.021997055038809776, + "logits/chosen": -0.11598198860883713, + "logits/rejected": -0.18683940172195435, + "logps/chosen": -0.022436058148741722, + "logps/rejected": -1.1067173480987549, + "loss": 3.2506, + "nll_loss": 0.810446560382843, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002243605675175786, + "rewards/margins": 0.10842813551425934, + "rewards/rejected": -0.1106717437505722, + "step": 1819 + }, + { + "epoch": 1.2586445366528354, + "grad_norm": 6.248927593231201, + "learning_rate": 4.8563085907484254e-05, + "log_odds_chosen": 2.9996016025543213, + "log_odds_ratio": -0.2759028375148773, + "logits/chosen": -0.38097551465034485, + "logits/rejected": -0.3946007490158081, + "logps/chosen": -0.07410791516304016, + "logps/rejected": -0.7756040096282959, + "loss": 3.6204, + "nll_loss": 0.8775010108947754, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007410791236907244, + "rewards/margins": 0.07014961540699005, + "rewards/rejected": -0.0775604099035263, + "step": 1820 + }, + { + "epoch": 1.2593360995850622, + "grad_norm": 8.13671588897705, + "learning_rate": 4.85592438911941e-05, + "log_odds_chosen": 4.534608840942383, + "log_odds_ratio": -0.20198392868041992, + "logits/chosen": -0.511385977268219, + "logits/rejected": -0.5996202826499939, + "logps/chosen": -0.05032597482204437, + "logps/rejected": -0.923053503036499, + "loss": 4.5111, + "nll_loss": 1.1075801849365234, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005032597575336695, + "rewards/margins": 0.0872727483510971, + "rewards/rejected": -0.09230534732341766, + "step": 1821 + }, + { + "epoch": 1.260027662517289, + "grad_norm": 7.075880527496338, + "learning_rate": 4.855540187490395e-05, + "log_odds_chosen": 4.356635093688965, + "log_odds_ratio": -0.11832673847675323, + "logits/chosen": -0.2284286618232727, + "logits/rejected": -0.2228046953678131, + "logps/chosen": -0.0332304872572422, + "logps/rejected": -0.4895108938217163, + "loss": 3.2858, + "nll_loss": 0.8096163272857666, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0033230483531951904, + "rewards/margins": 0.04562804475426674, + "rewards/rejected": -0.04895108938217163, + "step": 1822 + }, + { + "epoch": 1.2607192254495159, + "grad_norm": 4.531290054321289, + "learning_rate": 4.8551559858613804e-05, + "log_odds_chosen": 5.777563571929932, + "log_odds_ratio": -0.20365619659423828, + "logits/chosen": -0.27427423000335693, + "logits/rejected": -0.3515477478504181, + "logps/chosen": -0.047163330018520355, + "logps/rejected": -0.9215035438537598, + "loss": 2.7918, + "nll_loss": 0.6775762438774109, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0047163330018520355, + "rewards/margins": 0.08743403106927872, + "rewards/rejected": -0.09215036034584045, + "step": 1823 + }, + { + "epoch": 1.2614107883817427, + "grad_norm": 9.5455322265625, + "learning_rate": 4.854771784232366e-05, + "log_odds_chosen": 1.7303236722946167, + "log_odds_ratio": -0.5304129719734192, + "logits/chosen": -0.7086005806922913, + "logits/rejected": -0.6558952927589417, + "logps/chosen": -0.14678457379341125, + "logps/rejected": -0.598397970199585, + "loss": 5.3619, + "nll_loss": 1.2874255180358887, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01467845868319273, + "rewards/margins": 0.04516134038567543, + "rewards/rejected": -0.059839800000190735, + "step": 1824 + }, + { + "epoch": 1.2621023513139695, + "grad_norm": 4.559263706207275, + "learning_rate": 4.85438758260335e-05, + "log_odds_chosen": 2.271963596343994, + "log_odds_ratio": -0.1910509467124939, + "logits/chosen": -0.3403293490409851, + "logits/rejected": -0.3471444249153137, + "logps/chosen": -0.08710083365440369, + "logps/rejected": -0.5700175762176514, + "loss": 3.4875, + "nll_loss": 0.8527782559394836, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008710084483027458, + "rewards/margins": 0.04829167574644089, + "rewards/rejected": -0.057001762092113495, + "step": 1825 + }, + { + "epoch": 1.2627939142461964, + "grad_norm": 7.239500522613525, + "learning_rate": 4.8540033809743355e-05, + "log_odds_chosen": 3.428022623062134, + "log_odds_ratio": -0.2764246463775635, + "logits/chosen": -0.808087944984436, + "logits/rejected": -0.8824774026870728, + "logps/chosen": -0.05949515849351883, + "logps/rejected": -0.5695070028305054, + "loss": 4.5764, + "nll_loss": 1.1164684295654297, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005949515849351883, + "rewards/margins": 0.051001183688640594, + "rewards/rejected": -0.05695069581270218, + "step": 1826 + }, + { + "epoch": 1.2634854771784232, + "grad_norm": 4.512261390686035, + "learning_rate": 4.853619179345321e-05, + "log_odds_chosen": 4.599119186401367, + "log_odds_ratio": -0.25986239314079285, + "logits/chosen": -0.5794503092765808, + "logits/rejected": -0.5732518434524536, + "logps/chosen": -0.10974626243114471, + "logps/rejected": -0.6613384485244751, + "loss": 2.8689, + "nll_loss": 0.6912313103675842, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010974626056849957, + "rewards/margins": 0.055159226059913635, + "rewards/rejected": -0.06613385677337646, + "step": 1827 + }, + { + "epoch": 1.26417704011065, + "grad_norm": 3.2224671840667725, + "learning_rate": 4.853234977716305e-05, + "log_odds_chosen": 3.540921211242676, + "log_odds_ratio": -0.31274497509002686, + "logits/chosen": -0.36539560556411743, + "logits/rejected": -0.37292957305908203, + "logps/chosen": -0.12696358561515808, + "logps/rejected": -0.7113257050514221, + "loss": 2.2019, + "nll_loss": 0.519207775592804, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012696359306573868, + "rewards/margins": 0.058436211198568344, + "rewards/rejected": -0.07113257050514221, + "step": 1828 + }, + { + "epoch": 1.2648686030428768, + "grad_norm": 8.992777824401855, + "learning_rate": 4.852850776087291e-05, + "log_odds_chosen": 3.9111456871032715, + "log_odds_ratio": -0.403731107711792, + "logits/chosen": -0.2728814482688904, + "logits/rejected": -0.3519219160079956, + "logps/chosen": -0.1583581566810608, + "logps/rejected": -0.9107358455657959, + "loss": 3.9931, + "nll_loss": 0.9579125642776489, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01583581417798996, + "rewards/margins": 0.07523778080940247, + "rewards/rejected": -0.09107358753681183, + "step": 1829 + }, + { + "epoch": 1.2655601659751037, + "grad_norm": 6.241001605987549, + "learning_rate": 4.852466574458276e-05, + "log_odds_chosen": 3.549448013305664, + "log_odds_ratio": -0.22406116127967834, + "logits/chosen": -0.5081853866577148, + "logits/rejected": -0.5429813265800476, + "logps/chosen": -0.06931378692388535, + "logps/rejected": -0.6130677461624146, + "loss": 3.3564, + "nll_loss": 0.8167012929916382, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00693137850612402, + "rewards/margins": 0.05437539890408516, + "rewards/rejected": -0.06130677089095116, + "step": 1830 + }, + { + "epoch": 1.2662517289073305, + "grad_norm": 7.095801830291748, + "learning_rate": 4.852082372829261e-05, + "log_odds_chosen": 3.620205879211426, + "log_odds_ratio": -0.22003838419914246, + "logits/chosen": -0.5255930423736572, + "logits/rejected": -0.5722360610961914, + "logps/chosen": -0.05655446648597717, + "logps/rejected": -0.4140484929084778, + "loss": 3.5879, + "nll_loss": 0.874961256980896, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005655447021126747, + "rewards/margins": 0.035749401897192, + "rewards/rejected": -0.0414048507809639, + "step": 1831 + }, + { + "epoch": 1.2669432918395573, + "grad_norm": 7.453549861907959, + "learning_rate": 4.851698171200246e-05, + "log_odds_chosen": 2.598477363586426, + "log_odds_ratio": -0.3083294928073883, + "logits/chosen": -0.345392644405365, + "logits/rejected": -0.43535852432250977, + "logps/chosen": -0.06634186208248138, + "logps/rejected": -0.6782900094985962, + "loss": 3.3553, + "nll_loss": 0.8080006241798401, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006634186953306198, + "rewards/margins": 0.06119481474161148, + "rewards/rejected": -0.06782899796962738, + "step": 1832 + }, + { + "epoch": 1.2676348547717842, + "grad_norm": 4.456362724304199, + "learning_rate": 4.8513139695712315e-05, + "log_odds_chosen": 5.967951774597168, + "log_odds_ratio": -0.03453276678919792, + "logits/chosen": -0.30861398577690125, + "logits/rejected": -0.31359925866127014, + "logps/chosen": -0.04853855073451996, + "logps/rejected": -0.9749094247817993, + "loss": 2.8744, + "nll_loss": 0.7151439785957336, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004853855352848768, + "rewards/margins": 0.0926370918750763, + "rewards/rejected": -0.09749095141887665, + "step": 1833 + }, + { + "epoch": 1.268326417704011, + "grad_norm": 4.406580448150635, + "learning_rate": 4.850929767942216e-05, + "log_odds_chosen": 4.313794136047363, + "log_odds_ratio": -0.24935269355773926, + "logits/chosen": -0.13199511170387268, + "logits/rejected": -0.11371532827615738, + "logps/chosen": -0.10822973400354385, + "logps/rejected": -0.9197275638580322, + "loss": 2.1964, + "nll_loss": 0.5241571664810181, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01082297321408987, + "rewards/margins": 0.08114977926015854, + "rewards/rejected": -0.09197275340557098, + "step": 1834 + }, + { + "epoch": 1.2690179806362378, + "grad_norm": 7.473618984222412, + "learning_rate": 4.850545566313201e-05, + "log_odds_chosen": 5.008003234863281, + "log_odds_ratio": -0.17252132296562195, + "logits/chosen": -0.5653342604637146, + "logits/rejected": -0.5684071779251099, + "logps/chosen": -0.04174775630235672, + "logps/rejected": -0.6983299255371094, + "loss": 3.6188, + "nll_loss": 0.8874568939208984, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004174775909632444, + "rewards/margins": 0.06565822660923004, + "rewards/rejected": -0.06983299553394318, + "step": 1835 + }, + { + "epoch": 1.2697095435684647, + "grad_norm": 6.988341331481934, + "learning_rate": 4.8501613646841866e-05, + "log_odds_chosen": 2.817474603652954, + "log_odds_ratio": -0.5724210739135742, + "logits/chosen": -0.5476275682449341, + "logits/rejected": -0.6104252338409424, + "logps/chosen": -0.16373126208782196, + "logps/rejected": -0.46579307317733765, + "loss": 3.4597, + "nll_loss": 0.8076732158660889, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.016373127698898315, + "rewards/margins": 0.03020618110895157, + "rewards/rejected": -0.046579305082559586, + "step": 1836 + }, + { + "epoch": 1.2704011065006915, + "grad_norm": 6.129537105560303, + "learning_rate": 4.849777163055171e-05, + "log_odds_chosen": 4.696710586547852, + "log_odds_ratio": -0.20053833723068237, + "logits/chosen": -0.329264372587204, + "logits/rejected": -0.3493046164512634, + "logps/chosen": -0.06220349669456482, + "logps/rejected": -0.9139543771743774, + "loss": 2.9743, + "nll_loss": 0.7235198020935059, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006220349110662937, + "rewards/margins": 0.08517508953809738, + "rewards/rejected": -0.09139543771743774, + "step": 1837 + }, + { + "epoch": 1.2710926694329183, + "grad_norm": 6.702199459075928, + "learning_rate": 4.849392961426157e-05, + "log_odds_chosen": 3.0366287231445312, + "log_odds_ratio": -0.2572011649608612, + "logits/chosen": -0.28276461362838745, + "logits/rejected": -0.3073059022426605, + "logps/chosen": -0.0854082852602005, + "logps/rejected": -0.6870397329330444, + "loss": 3.8337, + "nll_loss": 0.9326946139335632, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008540828712284565, + "rewards/margins": 0.060163144022226334, + "rewards/rejected": -0.06870397925376892, + "step": 1838 + }, + { + "epoch": 1.2717842323651452, + "grad_norm": 5.057860851287842, + "learning_rate": 4.8490087597971416e-05, + "log_odds_chosen": 3.52040958404541, + "log_odds_ratio": -0.35257863998413086, + "logits/chosen": -0.436309278011322, + "logits/rejected": -0.4628809690475464, + "logps/chosen": -0.07719769328832626, + "logps/rejected": -0.5573811531066895, + "loss": 4.0301, + "nll_loss": 0.9722760915756226, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007719769608229399, + "rewards/margins": 0.04801835119724274, + "rewards/rejected": -0.05573812127113342, + "step": 1839 + }, + { + "epoch": 1.272475795297372, + "grad_norm": 5.871735095977783, + "learning_rate": 4.848624558168127e-05, + "log_odds_chosen": 5.993706226348877, + "log_odds_ratio": -0.018410231918096542, + "logits/chosen": -0.5008187890052795, + "logits/rejected": -0.5109366178512573, + "logps/chosen": -0.02858724817633629, + "logps/rejected": -0.9414122104644775, + "loss": 3.1941, + "nll_loss": 0.7966926097869873, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0028587249107658863, + "rewards/margins": 0.09128250181674957, + "rewards/rejected": -0.09414122253656387, + "step": 1840 + }, + { + "epoch": 1.2731673582295988, + "grad_norm": 5.2710700035095215, + "learning_rate": 4.848240356539112e-05, + "log_odds_chosen": 2.1282846927642822, + "log_odds_ratio": -0.19243940711021423, + "logits/chosen": -0.5377815961837769, + "logits/rejected": -0.5604823231697083, + "logps/chosen": -0.08244717121124268, + "logps/rejected": -0.44714000821113586, + "loss": 3.6366, + "nll_loss": 0.8899109363555908, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008244717493653297, + "rewards/margins": 0.03646928444504738, + "rewards/rejected": -0.044714003801345825, + "step": 1841 + }, + { + "epoch": 1.2738589211618256, + "grad_norm": 6.019529342651367, + "learning_rate": 4.8478561549100974e-05, + "log_odds_chosen": 2.516636610031128, + "log_odds_ratio": -0.38365089893341064, + "logits/chosen": -0.40743488073349, + "logits/rejected": -0.4360080361366272, + "logps/chosen": -0.06433766335248947, + "logps/rejected": -0.4514871835708618, + "loss": 3.3455, + "nll_loss": 0.7980217933654785, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.006433766335248947, + "rewards/margins": 0.038714952766895294, + "rewards/rejected": -0.04514871910214424, + "step": 1842 + }, + { + "epoch": 1.2745504840940525, + "grad_norm": 7.783799171447754, + "learning_rate": 4.847471953281082e-05, + "log_odds_chosen": 1.5541954040527344, + "log_odds_ratio": -0.5911738872528076, + "logits/chosen": -0.7046796083450317, + "logits/rejected": -0.6738811731338501, + "logps/chosen": -0.1610485166311264, + "logps/rejected": -0.28007322549819946, + "loss": 4.0777, + "nll_loss": 0.9603129625320435, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01610485091805458, + "rewards/margins": 0.011902473866939545, + "rewards/rejected": -0.028007326647639275, + "step": 1843 + }, + { + "epoch": 1.2752420470262793, + "grad_norm": 6.018929958343506, + "learning_rate": 4.847087751652067e-05, + "log_odds_chosen": 5.784402847290039, + "log_odds_ratio": -0.048644986003637314, + "logits/chosen": -0.3585667610168457, + "logits/rejected": -0.4079504609107971, + "logps/chosen": -0.02275342121720314, + "logps/rejected": -0.9510570764541626, + "loss": 3.2988, + "nll_loss": 0.8198418021202087, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002275342121720314, + "rewards/margins": 0.0928303673863411, + "rewards/rejected": -0.09510570764541626, + "step": 1844 + }, + { + "epoch": 1.2759336099585061, + "grad_norm": 7.915690898895264, + "learning_rate": 4.8467035500230524e-05, + "log_odds_chosen": 5.180848121643066, + "log_odds_ratio": -0.19197486340999603, + "logits/chosen": -0.48925912380218506, + "logits/rejected": -0.5417444705963135, + "logps/chosen": -0.06355451047420502, + "logps/rejected": -0.9609445333480835, + "loss": 3.5779, + "nll_loss": 0.8752825260162354, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0063554514199495316, + "rewards/margins": 0.08973899483680725, + "rewards/rejected": -0.09609444439411163, + "step": 1845 + }, + { + "epoch": 1.276625172890733, + "grad_norm": 6.901232719421387, + "learning_rate": 4.846319348394037e-05, + "log_odds_chosen": 3.1123545169830322, + "log_odds_ratio": -0.323904424905777, + "logits/chosen": -0.568681538105011, + "logits/rejected": -0.5214330554008484, + "logps/chosen": -0.11305805295705795, + "logps/rejected": -0.6645975112915039, + "loss": 2.6877, + "nll_loss": 0.6395328044891357, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011305805295705795, + "rewards/margins": 0.055153943598270416, + "rewards/rejected": -0.06645975261926651, + "step": 1846 + }, + { + "epoch": 1.2773167358229598, + "grad_norm": 5.688883304595947, + "learning_rate": 4.845935146765023e-05, + "log_odds_chosen": 5.134402751922607, + "log_odds_ratio": -0.07821536064147949, + "logits/chosen": -0.6485186815261841, + "logits/rejected": -0.6384646892547607, + "logps/chosen": -0.06612791121006012, + "logps/rejected": -0.8336848020553589, + "loss": 3.464, + "nll_loss": 0.8581699728965759, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006612791679799557, + "rewards/margins": 0.07675568759441376, + "rewards/rejected": -0.08336848020553589, + "step": 1847 + }, + { + "epoch": 1.2780082987551866, + "grad_norm": 5.677089691162109, + "learning_rate": 4.8455509451360075e-05, + "log_odds_chosen": 5.470160484313965, + "log_odds_ratio": -0.05512235313653946, + "logits/chosen": -0.6574974656105042, + "logits/rejected": -0.6757139563560486, + "logps/chosen": -0.07178202271461487, + "logps/rejected": -1.0133131742477417, + "loss": 2.8147, + "nll_loss": 0.698168933391571, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007178202271461487, + "rewards/margins": 0.09415312111377716, + "rewards/rejected": -0.10133132338523865, + "step": 1848 + }, + { + "epoch": 1.2786998616874135, + "grad_norm": 7.259307384490967, + "learning_rate": 4.845166743506993e-05, + "log_odds_chosen": 1.1947388648986816, + "log_odds_ratio": -0.4756326377391815, + "logits/chosen": -0.7476555705070496, + "logits/rejected": -0.7715795040130615, + "logps/chosen": -0.13770818710327148, + "logps/rejected": -0.2466597706079483, + "loss": 3.4717, + "nll_loss": 0.8203725218772888, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.013770818710327148, + "rewards/margins": 0.010895160026848316, + "rewards/rejected": -0.02466597780585289, + "step": 1849 + }, + { + "epoch": 1.2793914246196403, + "grad_norm": 7.345088005065918, + "learning_rate": 4.844782541877978e-05, + "log_odds_chosen": 2.8660197257995605, + "log_odds_ratio": -0.25970110297203064, + "logits/chosen": -0.5066275000572205, + "logits/rejected": -0.5241155028343201, + "logps/chosen": -0.08532549440860748, + "logps/rejected": -0.7479270696640015, + "loss": 4.1521, + "nll_loss": 1.012049913406372, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008532550185918808, + "rewards/margins": 0.06626015901565552, + "rewards/rejected": -0.07479271292686462, + "step": 1850 + }, + { + "epoch": 1.2800829875518671, + "grad_norm": 6.5352463722229, + "learning_rate": 4.844398340248963e-05, + "log_odds_chosen": 5.387038230895996, + "log_odds_ratio": -0.08680490404367447, + "logits/chosen": -0.38126322627067566, + "logits/rejected": -0.4586794674396515, + "logps/chosen": -0.025043586269021034, + "logps/rejected": -0.6016091704368591, + "loss": 3.5159, + "nll_loss": 0.8702915906906128, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0025043589994311333, + "rewards/margins": 0.057656560093164444, + "rewards/rejected": -0.06016091629862785, + "step": 1851 + }, + { + "epoch": 1.280774550484094, + "grad_norm": 7.052436828613281, + "learning_rate": 4.844014138619948e-05, + "log_odds_chosen": 4.889477729797363, + "log_odds_ratio": -0.26520976424217224, + "logits/chosen": -0.6732861399650574, + "logits/rejected": -0.6910228729248047, + "logps/chosen": -0.058603618294000626, + "logps/rejected": -0.8419814109802246, + "loss": 2.993, + "nll_loss": 0.7217181921005249, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005860361270606518, + "rewards/margins": 0.07833777368068695, + "rewards/rejected": -0.08419813960790634, + "step": 1852 + }, + { + "epoch": 1.2814661134163208, + "grad_norm": 7.370616436004639, + "learning_rate": 4.843629936990933e-05, + "log_odds_chosen": 1.2901815176010132, + "log_odds_ratio": -0.5043874979019165, + "logits/chosen": -0.7290447950363159, + "logits/rejected": -0.7297863960266113, + "logps/chosen": -0.10335765779018402, + "logps/rejected": -0.5296757817268372, + "loss": 3.9113, + "nll_loss": 0.9273877143859863, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010335765779018402, + "rewards/margins": 0.04263181611895561, + "rewards/rejected": -0.052967578172683716, + "step": 1853 + }, + { + "epoch": 1.2821576763485476, + "grad_norm": 9.12453556060791, + "learning_rate": 4.843245735361918e-05, + "log_odds_chosen": 2.5721120834350586, + "log_odds_ratio": -0.4275425970554352, + "logits/chosen": -0.45946818590164185, + "logits/rejected": -0.5032299757003784, + "logps/chosen": -0.15334449708461761, + "logps/rejected": -0.688929557800293, + "loss": 2.9555, + "nll_loss": 0.6961199045181274, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.015334450639784336, + "rewards/margins": 0.053558506071567535, + "rewards/rejected": -0.0688929557800293, + "step": 1854 + }, + { + "epoch": 1.2828492392807744, + "grad_norm": 5.654973030090332, + "learning_rate": 4.842861533732903e-05, + "log_odds_chosen": 4.989964485168457, + "log_odds_ratio": -0.12226064503192902, + "logits/chosen": -0.6214022636413574, + "logits/rejected": -0.7165380120277405, + "logps/chosen": -0.06140881031751633, + "logps/rejected": -0.8350120186805725, + "loss": 3.106, + "nll_loss": 0.7642849683761597, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00614088075235486, + "rewards/margins": 0.07736032456159592, + "rewards/rejected": -0.08350120484828949, + "step": 1855 + }, + { + "epoch": 1.2835408022130013, + "grad_norm": 6.4906206130981445, + "learning_rate": 4.842477332103889e-05, + "log_odds_chosen": 2.749014139175415, + "log_odds_ratio": -0.22980672121047974, + "logits/chosen": -0.42675426602363586, + "logits/rejected": -0.4076400399208069, + "logps/chosen": -0.09638235718011856, + "logps/rejected": -0.7707810401916504, + "loss": 3.0649, + "nll_loss": 0.743255615234375, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009638235904276371, + "rewards/margins": 0.06743986904621124, + "rewards/rejected": -0.07707811146974564, + "step": 1856 + }, + { + "epoch": 1.284232365145228, + "grad_norm": 8.228878021240234, + "learning_rate": 4.842093130474873e-05, + "log_odds_chosen": 2.587768316268921, + "log_odds_ratio": -0.5744894742965698, + "logits/chosen": -0.6129688620567322, + "logits/rejected": -0.5657984018325806, + "logps/chosen": -0.1795559525489807, + "logps/rejected": -0.7216471433639526, + "loss": 3.8535, + "nll_loss": 0.9059350490570068, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0179555956274271, + "rewards/margins": 0.05420912057161331, + "rewards/rejected": -0.07216471433639526, + "step": 1857 + }, + { + "epoch": 1.284923928077455, + "grad_norm": 7.017327785491943, + "learning_rate": 4.8417089288458586e-05, + "log_odds_chosen": 4.121495246887207, + "log_odds_ratio": -0.34169018268585205, + "logits/chosen": -0.6251221895217896, + "logits/rejected": -0.5807482600212097, + "logps/chosen": -0.0728740394115448, + "logps/rejected": -0.8007559776306152, + "loss": 3.7888, + "nll_loss": 0.9130185842514038, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007287404499948025, + "rewards/margins": 0.07278818637132645, + "rewards/rejected": -0.08007559180259705, + "step": 1858 + }, + { + "epoch": 1.2856154910096818, + "grad_norm": 6.424606800079346, + "learning_rate": 4.841324727216844e-05, + "log_odds_chosen": 5.630163192749023, + "log_odds_ratio": -0.11693432927131653, + "logits/chosen": -0.6970394849777222, + "logits/rejected": -0.7171042561531067, + "logps/chosen": -0.08394990861415863, + "logps/rejected": -0.9980266094207764, + "loss": 3.6016, + "nll_loss": 0.8887089490890503, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008394991047680378, + "rewards/margins": 0.0914076715707779, + "rewards/rejected": -0.0998026579618454, + "step": 1859 + }, + { + "epoch": 1.2863070539419086, + "grad_norm": 7.732998371124268, + "learning_rate": 4.840940525587829e-05, + "log_odds_chosen": 5.277063369750977, + "log_odds_ratio": -0.08803144097328186, + "logits/chosen": -0.44812747836112976, + "logits/rejected": -0.43661120533943176, + "logps/chosen": -0.06136512756347656, + "logps/rejected": -1.2950425148010254, + "loss": 3.5298, + "nll_loss": 0.8736498951911926, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006136512849479914, + "rewards/margins": 0.12336773425340652, + "rewards/rejected": -0.1295042335987091, + "step": 1860 + }, + { + "epoch": 1.2869986168741354, + "grad_norm": 5.051394939422607, + "learning_rate": 4.8405563239588136e-05, + "log_odds_chosen": 3.0801963806152344, + "log_odds_ratio": -0.43634703755378723, + "logits/chosen": -0.4509056508541107, + "logits/rejected": -0.4946168065071106, + "logps/chosen": -0.09263553470373154, + "logps/rejected": -0.7222429513931274, + "loss": 3.4704, + "nll_loss": 0.8239755034446716, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.009263553656637669, + "rewards/margins": 0.06296074390411377, + "rewards/rejected": -0.07222429662942886, + "step": 1861 + }, + { + "epoch": 1.2876901798063622, + "grad_norm": 6.35806131362915, + "learning_rate": 4.840172122329799e-05, + "log_odds_chosen": 3.0220894813537598, + "log_odds_ratio": -0.1876365840435028, + "logits/chosen": -0.5654817819595337, + "logits/rejected": -0.613560676574707, + "logps/chosen": -0.14155136048793793, + "logps/rejected": -0.7994102239608765, + "loss": 3.0146, + "nll_loss": 0.7348905801773071, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014155135490000248, + "rewards/margins": 0.06578588485717773, + "rewards/rejected": -0.079941026866436, + "step": 1862 + }, + { + "epoch": 1.288381742738589, + "grad_norm": 6.703084468841553, + "learning_rate": 4.839787920700784e-05, + "log_odds_chosen": 2.794166326522827, + "log_odds_ratio": -0.45009666681289673, + "logits/chosen": -0.5885961055755615, + "logits/rejected": -0.6103361248970032, + "logps/chosen": -0.08698848634958267, + "logps/rejected": -0.5582041144371033, + "loss": 3.3271, + "nll_loss": 0.7867544293403625, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.008698849007487297, + "rewards/margins": 0.047121562063694, + "rewards/rejected": -0.05582040920853615, + "step": 1863 + }, + { + "epoch": 1.2890733056708161, + "grad_norm": 5.702164173126221, + "learning_rate": 4.839403719071769e-05, + "log_odds_chosen": 5.4588541984558105, + "log_odds_ratio": -0.14614000916481018, + "logits/chosen": -0.606258749961853, + "logits/rejected": -0.6877145767211914, + "logps/chosen": -0.04850241541862488, + "logps/rejected": -1.0530086755752563, + "loss": 4.0411, + "nll_loss": 0.9956707954406738, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00485024182125926, + "rewards/margins": 0.10045063495635986, + "rewards/rejected": -0.10530087351799011, + "step": 1864 + }, + { + "epoch": 1.289764868603043, + "grad_norm": 7.933743476867676, + "learning_rate": 4.8390195174427546e-05, + "log_odds_chosen": 3.8146302700042725, + "log_odds_ratio": -0.34971341490745544, + "logits/chosen": -0.48540371656417847, + "logits/rejected": -0.5037742853164673, + "logps/chosen": -0.06129493564367294, + "logps/rejected": -0.8910010457038879, + "loss": 4.3695, + "nll_loss": 1.0573959350585938, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.006129493936896324, + "rewards/margins": 0.08297061175107956, + "rewards/rejected": -0.08910011500120163, + "step": 1865 + }, + { + "epoch": 1.2904564315352698, + "grad_norm": 7.737270832061768, + "learning_rate": 4.838635315813739e-05, + "log_odds_chosen": 3.4090940952301025, + "log_odds_ratio": -0.29580122232437134, + "logits/chosen": -0.642238438129425, + "logits/rejected": -0.6717078685760498, + "logps/chosen": -0.10193420946598053, + "logps/rejected": -0.6569085717201233, + "loss": 3.8894, + "nll_loss": 0.9427617192268372, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010193421505391598, + "rewards/margins": 0.055497437715530396, + "rewards/rejected": -0.06569086015224457, + "step": 1866 + }, + { + "epoch": 1.2911479944674966, + "grad_norm": 8.35338020324707, + "learning_rate": 4.8382511141847244e-05, + "log_odds_chosen": 5.628767967224121, + "log_odds_ratio": -0.34031566977500916, + "logits/chosen": -0.37301352620124817, + "logits/rejected": -0.4034350514411926, + "logps/chosen": -0.08956724405288696, + "logps/rejected": -0.8568572998046875, + "loss": 2.7278, + "nll_loss": 0.647929847240448, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008956724777817726, + "rewards/margins": 0.07672901451587677, + "rewards/rejected": -0.08568572998046875, + "step": 1867 + }, + { + "epoch": 1.2918395573997234, + "grad_norm": 9.274822235107422, + "learning_rate": 4.8378669125557096e-05, + "log_odds_chosen": 5.655916213989258, + "log_odds_ratio": -0.22805175185203552, + "logits/chosen": -0.5578227043151855, + "logits/rejected": -0.6247016191482544, + "logps/chosen": -0.07201507687568665, + "logps/rejected": -0.9687396883964539, + "loss": 4.2172, + "nll_loss": 1.0314915180206299, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0072015076875686646, + "rewards/margins": 0.08967246115207672, + "rewards/rejected": -0.09687396883964539, + "step": 1868 + }, + { + "epoch": 1.2925311203319503, + "grad_norm": 4.5447893142700195, + "learning_rate": 4.837482710926695e-05, + "log_odds_chosen": 7.457864761352539, + "log_odds_ratio": -0.08831396698951721, + "logits/chosen": -0.15770329535007477, + "logits/rejected": -0.22396370768547058, + "logps/chosen": -0.024904444813728333, + "logps/rejected": -0.9831055402755737, + "loss": 2.4908, + "nll_loss": 0.6138787865638733, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0024904448073357344, + "rewards/margins": 0.0958201140165329, + "rewards/rejected": -0.09831055998802185, + "step": 1869 + }, + { + "epoch": 1.293222683264177, + "grad_norm": 9.271495819091797, + "learning_rate": 4.8370985092976795e-05, + "log_odds_chosen": 3.8442087173461914, + "log_odds_ratio": -0.3285159468650818, + "logits/chosen": -0.7342332005500793, + "logits/rejected": -0.7820785045623779, + "logps/chosen": -0.12376535683870316, + "logps/rejected": -0.7330570816993713, + "loss": 3.2292, + "nll_loss": 0.7744507193565369, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012376535683870316, + "rewards/margins": 0.06092917546629906, + "rewards/rejected": -0.07330571115016937, + "step": 1870 + }, + { + "epoch": 1.293914246196404, + "grad_norm": 6.048008441925049, + "learning_rate": 4.836714307668665e-05, + "log_odds_chosen": 3.2552452087402344, + "log_odds_ratio": -0.06862203031778336, + "logits/chosen": -0.375715970993042, + "logits/rejected": -0.44336646795272827, + "logps/chosen": -0.07369048148393631, + "logps/rejected": -0.7449591159820557, + "loss": 4.4007, + "nll_loss": 1.093301773071289, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007369048427790403, + "rewards/margins": 0.06712686270475388, + "rewards/rejected": -0.07449591159820557, + "step": 1871 + }, + { + "epoch": 1.2946058091286308, + "grad_norm": 6.476336479187012, + "learning_rate": 4.83633010603965e-05, + "log_odds_chosen": 3.0051374435424805, + "log_odds_ratio": -0.17865484952926636, + "logits/chosen": -0.5499475002288818, + "logits/rejected": -0.5804236531257629, + "logps/chosen": -0.06498004496097565, + "logps/rejected": -0.4653778374195099, + "loss": 3.7613, + "nll_loss": 0.922465443611145, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006498004775494337, + "rewards/margins": 0.040039777755737305, + "rewards/rejected": -0.04653778672218323, + "step": 1872 + }, + { + "epoch": 1.2952973720608576, + "grad_norm": 8.106934547424316, + "learning_rate": 4.8359459044106345e-05, + "log_odds_chosen": 3.545710802078247, + "log_odds_ratio": -0.22031505405902863, + "logits/chosen": -0.7222949266433716, + "logits/rejected": -0.7418359518051147, + "logps/chosen": -0.060237836092710495, + "logps/rejected": -0.6780364513397217, + "loss": 4.4143, + "nll_loss": 1.0815520286560059, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006023783702403307, + "rewards/margins": 0.06177985668182373, + "rewards/rejected": -0.06780364364385605, + "step": 1873 + }, + { + "epoch": 1.2959889349930844, + "grad_norm": 3.658154010772705, + "learning_rate": 4.8355617027816204e-05, + "log_odds_chosen": 4.990939140319824, + "log_odds_ratio": -0.10858413577079773, + "logits/chosen": -0.43856334686279297, + "logits/rejected": -0.5317746996879578, + "logps/chosen": -0.028155002743005753, + "logps/rejected": -0.6593915224075317, + "loss": 2.7739, + "nll_loss": 0.6826105117797852, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0028155003674328327, + "rewards/margins": 0.0631236582994461, + "rewards/rejected": -0.06593915820121765, + "step": 1874 + }, + { + "epoch": 1.2966804979253113, + "grad_norm": 6.410544395446777, + "learning_rate": 4.835177501152605e-05, + "log_odds_chosen": 4.552112579345703, + "log_odds_ratio": -0.42514729499816895, + "logits/chosen": -0.6232977509498596, + "logits/rejected": -0.6269210577011108, + "logps/chosen": -0.10090231895446777, + "logps/rejected": -0.7650644183158875, + "loss": 2.7052, + "nll_loss": 0.6337778568267822, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010090231895446777, + "rewards/margins": 0.06641621887683868, + "rewards/rejected": -0.07650645077228546, + "step": 1875 + }, + { + "epoch": 1.297372060857538, + "grad_norm": 6.081912517547607, + "learning_rate": 4.83479329952359e-05, + "log_odds_chosen": 4.995113372802734, + "log_odds_ratio": -0.13507652282714844, + "logits/chosen": -0.0397970974445343, + "logits/rejected": -0.11730852723121643, + "logps/chosen": -0.06897362321615219, + "logps/rejected": -0.7518665194511414, + "loss": 3.0435, + "nll_loss": 0.7473784685134888, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006897362880408764, + "rewards/margins": 0.06828928738832474, + "rewards/rejected": -0.07518665492534637, + "step": 1876 + }, + { + "epoch": 1.298063623789765, + "grad_norm": 8.268874168395996, + "learning_rate": 4.8344090978945755e-05, + "log_odds_chosen": 3.9267804622650146, + "log_odds_ratio": -0.2662210464477539, + "logits/chosen": -0.6300618052482605, + "logits/rejected": -0.6393392086029053, + "logps/chosen": -0.11472286283969879, + "logps/rejected": -0.5772973299026489, + "loss": 4.4849, + "nll_loss": 1.0945922136306763, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011472285725176334, + "rewards/margins": 0.04625745117664337, + "rewards/rejected": -0.05772973597049713, + "step": 1877 + }, + { + "epoch": 1.2987551867219918, + "grad_norm": 6.93869161605835, + "learning_rate": 4.834024896265561e-05, + "log_odds_chosen": 3.730520248413086, + "log_odds_ratio": -0.2211248278617859, + "logits/chosen": -0.16885051131248474, + "logits/rejected": -0.20659935474395752, + "logps/chosen": -0.09142343699932098, + "logps/rejected": -0.5973372459411621, + "loss": 4.1071, + "nll_loss": 1.0046672821044922, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009142343886196613, + "rewards/margins": 0.05059138312935829, + "rewards/rejected": -0.05973372235894203, + "step": 1878 + }, + { + "epoch": 1.2994467496542186, + "grad_norm": 4.864905834197998, + "learning_rate": 4.833640694636545e-05, + "log_odds_chosen": 5.531225204467773, + "log_odds_ratio": -0.028478192165493965, + "logits/chosen": -0.6834012866020203, + "logits/rejected": -0.7350859045982361, + "logps/chosen": -0.05556423217058182, + "logps/rejected": -1.2433018684387207, + "loss": 3.2099, + "nll_loss": 0.7996299862861633, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005556423682719469, + "rewards/margins": 0.11877376586198807, + "rewards/rejected": -0.12433018535375595, + "step": 1879 + }, + { + "epoch": 1.3001383125864454, + "grad_norm": 9.283565521240234, + "learning_rate": 4.8332564930075305e-05, + "log_odds_chosen": 4.565658092498779, + "log_odds_ratio": -0.5067183375358582, + "logits/chosen": -0.5782751441001892, + "logits/rejected": -0.5901899933815002, + "logps/chosen": -0.08600565791130066, + "logps/rejected": -0.9412083029747009, + "loss": 3.1232, + "nll_loss": 0.7301177382469177, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008600565604865551, + "rewards/margins": 0.08552026748657227, + "rewards/rejected": -0.09412083774805069, + "step": 1880 + }, + { + "epoch": 1.3008298755186722, + "grad_norm": 4.7859086990356445, + "learning_rate": 4.832872291378516e-05, + "log_odds_chosen": 6.456204414367676, + "log_odds_ratio": -0.10142803192138672, + "logits/chosen": -0.24098005890846252, + "logits/rejected": -0.26826679706573486, + "logps/chosen": -0.04199819266796112, + "logps/rejected": -0.7708888053894043, + "loss": 2.5463, + "nll_loss": 0.6264212131500244, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004199819173663855, + "rewards/margins": 0.0728890597820282, + "rewards/rejected": -0.07708887755870819, + "step": 1881 + }, + { + "epoch": 1.301521438450899, + "grad_norm": 8.697461128234863, + "learning_rate": 4.8324880897495004e-05, + "log_odds_chosen": 3.373399257659912, + "log_odds_ratio": -0.2699851095676422, + "logits/chosen": -0.5844525694847107, + "logits/rejected": -0.5994455218315125, + "logps/chosen": -0.10936583578586578, + "logps/rejected": -0.8947189450263977, + "loss": 3.9083, + "nll_loss": 0.9500669836997986, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010936584323644638, + "rewards/margins": 0.07853531837463379, + "rewards/rejected": -0.08947189897298813, + "step": 1882 + }, + { + "epoch": 1.302213001383126, + "grad_norm": 12.149765968322754, + "learning_rate": 4.832103888120486e-05, + "log_odds_chosen": 3.4742417335510254, + "log_odds_ratio": -0.2230614721775055, + "logits/chosen": -0.5748246908187866, + "logits/rejected": -0.6016756296157837, + "logps/chosen": -0.18775507807731628, + "logps/rejected": -0.7884979248046875, + "loss": 2.7704, + "nll_loss": 0.6702914237976074, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01877550780773163, + "rewards/margins": 0.06007428467273712, + "rewards/rejected": -0.07884979248046875, + "step": 1883 + }, + { + "epoch": 1.3029045643153527, + "grad_norm": 7.945075988769531, + "learning_rate": 4.831719686491471e-05, + "log_odds_chosen": 3.9197511672973633, + "log_odds_ratio": -0.43578043580055237, + "logits/chosen": -0.7064417600631714, + "logits/rejected": -0.7176086902618408, + "logps/chosen": -0.20374563336372375, + "logps/rejected": -0.7481101751327515, + "loss": 3.5857, + "nll_loss": 0.8528434634208679, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.020374562591314316, + "rewards/margins": 0.05443645641207695, + "rewards/rejected": -0.07481101900339127, + "step": 1884 + }, + { + "epoch": 1.3035961272475796, + "grad_norm": 6.092682361602783, + "learning_rate": 4.831335484862456e-05, + "log_odds_chosen": 4.127374649047852, + "log_odds_ratio": -0.10231603682041168, + "logits/chosen": -0.6021102070808411, + "logits/rejected": -0.5776104927062988, + "logps/chosen": -0.07319527119398117, + "logps/rejected": -0.9779950976371765, + "loss": 2.8118, + "nll_loss": 0.6927098035812378, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0073195272125303745, + "rewards/margins": 0.09047998487949371, + "rewards/rejected": -0.09779950976371765, + "step": 1885 + }, + { + "epoch": 1.3042876901798064, + "grad_norm": 5.868535041809082, + "learning_rate": 4.830951283233441e-05, + "log_odds_chosen": 5.035343170166016, + "log_odds_ratio": -0.11897246539592743, + "logits/chosen": -0.5982178449630737, + "logits/rejected": -0.6445438861846924, + "logps/chosen": -0.033809542655944824, + "logps/rejected": -0.8231789469718933, + "loss": 3.7005, + "nll_loss": 0.9132217764854431, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0033809540327638388, + "rewards/margins": 0.07893693447113037, + "rewards/rejected": -0.08231788873672485, + "step": 1886 + }, + { + "epoch": 1.3049792531120332, + "grad_norm": 7.5153937339782715, + "learning_rate": 4.8305670816044266e-05, + "log_odds_chosen": 2.3621668815612793, + "log_odds_ratio": -0.3876497149467468, + "logits/chosen": -0.6375263929367065, + "logits/rejected": -0.6277173757553101, + "logps/chosen": -0.20085851848125458, + "logps/rejected": -0.7376888990402222, + "loss": 3.3031, + "nll_loss": 0.7870079874992371, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.020085850730538368, + "rewards/margins": 0.05368303507566452, + "rewards/rejected": -0.07376889139413834, + "step": 1887 + }, + { + "epoch": 1.30567081604426, + "grad_norm": 10.764470100402832, + "learning_rate": 4.830182879975411e-05, + "log_odds_chosen": 5.82547664642334, + "log_odds_ratio": -0.808645486831665, + "logits/chosen": -0.6361026763916016, + "logits/rejected": -0.7336512207984924, + "logps/chosen": -0.1351955085992813, + "logps/rejected": -0.7599672079086304, + "loss": 3.3539, + "nll_loss": 0.7576218843460083, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013519550673663616, + "rewards/margins": 0.062477171421051025, + "rewards/rejected": -0.07599671930074692, + "step": 1888 + }, + { + "epoch": 1.3063623789764869, + "grad_norm": 6.360213279724121, + "learning_rate": 4.8297986783463964e-05, + "log_odds_chosen": 7.0350661277771, + "log_odds_ratio": -0.06198444962501526, + "logits/chosen": -0.46543243527412415, + "logits/rejected": -0.48943889141082764, + "logps/chosen": -0.03179304301738739, + "logps/rejected": -1.2069227695465088, + "loss": 3.0007, + "nll_loss": 0.7439780831336975, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003179304301738739, + "rewards/margins": 0.11751297861337662, + "rewards/rejected": -0.12069229781627655, + "step": 1889 + }, + { + "epoch": 1.3070539419087137, + "grad_norm": 5.6595916748046875, + "learning_rate": 4.8294144767173816e-05, + "log_odds_chosen": 6.70374870300293, + "log_odds_ratio": -0.014150972478091717, + "logits/chosen": -0.33366501331329346, + "logits/rejected": -0.31003445386886597, + "logps/chosen": -0.018083132803440094, + "logps/rejected": -0.8532133102416992, + "loss": 3.0836, + "nll_loss": 0.7694973349571228, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018083134200423956, + "rewards/margins": 0.08351302146911621, + "rewards/rejected": -0.0853213369846344, + "step": 1890 + }, + { + "epoch": 1.3077455048409405, + "grad_norm": 6.861349105834961, + "learning_rate": 4.829030275088366e-05, + "log_odds_chosen": 4.63370418548584, + "log_odds_ratio": -0.1361251324415207, + "logits/chosen": -0.48294711112976074, + "logits/rejected": -0.5271444916725159, + "logps/chosen": -0.057334210723638535, + "logps/rejected": -0.8996442556381226, + "loss": 3.5532, + "nll_loss": 0.8746891021728516, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0057334210723638535, + "rewards/margins": 0.08423101156949997, + "rewards/rejected": -0.08996443450450897, + "step": 1891 + }, + { + "epoch": 1.3084370677731674, + "grad_norm": 7.4157233238220215, + "learning_rate": 4.828646073459352e-05, + "log_odds_chosen": 4.082646369934082, + "log_odds_ratio": -0.3548939824104309, + "logits/chosen": -0.597272515296936, + "logits/rejected": -0.6197874546051025, + "logps/chosen": -0.07361356914043427, + "logps/rejected": -0.907717764377594, + "loss": 3.7981, + "nll_loss": 0.9140282273292542, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007361356168985367, + "rewards/margins": 0.08341042697429657, + "rewards/rejected": -0.09077177941799164, + "step": 1892 + }, + { + "epoch": 1.3091286307053942, + "grad_norm": 8.492520332336426, + "learning_rate": 4.828261871830337e-05, + "log_odds_chosen": 2.9942915439605713, + "log_odds_ratio": -0.36342692375183105, + "logits/chosen": -0.4822395145893097, + "logits/rejected": -0.49275386333465576, + "logps/chosen": -0.08205610513687134, + "logps/rejected": -0.566351056098938, + "loss": 4.007, + "nll_loss": 0.9654159545898438, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008205609396100044, + "rewards/margins": 0.048429492861032486, + "rewards/rejected": -0.05663510411977768, + "step": 1893 + }, + { + "epoch": 1.309820193637621, + "grad_norm": 9.320199012756348, + "learning_rate": 4.827877670201322e-05, + "log_odds_chosen": 3.0118792057037354, + "log_odds_ratio": -0.7188223600387573, + "logits/chosen": -0.4053908586502075, + "logits/rejected": -0.39137327671051025, + "logps/chosen": -0.13147065043449402, + "logps/rejected": -0.8272587656974792, + "loss": 4.1265, + "nll_loss": 0.9597398042678833, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013147065415978432, + "rewards/margins": 0.06957881152629852, + "rewards/rejected": -0.0827258750796318, + "step": 1894 + }, + { + "epoch": 1.3105117565698479, + "grad_norm": 5.111316204071045, + "learning_rate": 4.827493468572307e-05, + "log_odds_chosen": 3.6450846195220947, + "log_odds_ratio": -0.08115525543689728, + "logits/chosen": -0.47379356622695923, + "logits/rejected": -0.43740054965019226, + "logps/chosen": -0.07818682491779327, + "logps/rejected": -0.911526083946228, + "loss": 3.0668, + "nll_loss": 0.7585898041725159, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007818683050572872, + "rewards/margins": 0.08333393186330795, + "rewards/rejected": -0.0911526158452034, + "step": 1895 + }, + { + "epoch": 1.3112033195020747, + "grad_norm": 5.797129154205322, + "learning_rate": 4.8271092669432924e-05, + "log_odds_chosen": 4.451534271240234, + "log_odds_ratio": -0.13431881368160248, + "logits/chosen": -0.5808414816856384, + "logits/rejected": -0.5981189608573914, + "logps/chosen": -0.11908942461013794, + "logps/rejected": -1.117770791053772, + "loss": 3.3404, + "nll_loss": 0.8216636776924133, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011908942833542824, + "rewards/margins": 0.09986813366413116, + "rewards/rejected": -0.11177708208560944, + "step": 1896 + }, + { + "epoch": 1.3118948824343015, + "grad_norm": 6.0447001457214355, + "learning_rate": 4.826725065314277e-05, + "log_odds_chosen": 5.6041412353515625, + "log_odds_ratio": -0.05469472333788872, + "logits/chosen": -0.6320992112159729, + "logits/rejected": -0.7025479078292847, + "logps/chosen": -0.03088865801692009, + "logps/rejected": -1.0616106986999512, + "loss": 3.0946, + "nll_loss": 0.7681707143783569, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0030888658948242664, + "rewards/margins": 0.10307221114635468, + "rewards/rejected": -0.10616108030080795, + "step": 1897 + }, + { + "epoch": 1.3125864453665284, + "grad_norm": 7.3333916664123535, + "learning_rate": 4.826340863685262e-05, + "log_odds_chosen": 3.174428939819336, + "log_odds_ratio": -0.1787259876728058, + "logits/chosen": -0.46415650844573975, + "logits/rejected": -0.4987673759460449, + "logps/chosen": -0.05549796670675278, + "logps/rejected": -0.41620373725891113, + "loss": 4.3112, + "nll_loss": 1.0599231719970703, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005549796391278505, + "rewards/margins": 0.036070577800273895, + "rewards/rejected": -0.04162037372589111, + "step": 1898 + }, + { + "epoch": 1.3132780082987552, + "grad_norm": 6.713944435119629, + "learning_rate": 4.8259566620562475e-05, + "log_odds_chosen": 5.584816932678223, + "log_odds_ratio": -0.3194558322429657, + "logits/chosen": -0.17112912237644196, + "logits/rejected": -0.19902299344539642, + "logps/chosen": -0.07132703810930252, + "logps/rejected": -0.9684167504310608, + "loss": 2.6484, + "nll_loss": 0.6301434636116028, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007132704369723797, + "rewards/margins": 0.08970896899700165, + "rewards/rejected": -0.09684167802333832, + "step": 1899 + }, + { + "epoch": 1.313969571230982, + "grad_norm": 4.772759437561035, + "learning_rate": 4.825572460427232e-05, + "log_odds_chosen": 4.3274641036987305, + "log_odds_ratio": -0.17329102754592896, + "logits/chosen": -0.5920778512954712, + "logits/rejected": -0.5221872329711914, + "logps/chosen": -0.06731683015823364, + "logps/rejected": -0.6521579623222351, + "loss": 2.7345, + "nll_loss": 0.6662896871566772, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006731683388352394, + "rewards/margins": 0.058484114706516266, + "rewards/rejected": -0.06521579623222351, + "step": 1900 + }, + { + "epoch": 1.3146611341632088, + "grad_norm": 5.9790472984313965, + "learning_rate": 4.825188258798218e-05, + "log_odds_chosen": 6.322959899902344, + "log_odds_ratio": -0.09761762619018555, + "logits/chosen": -0.34470367431640625, + "logits/rejected": -0.3735116720199585, + "logps/chosen": -0.040303681045770645, + "logps/rejected": -1.1409962177276611, + "loss": 3.2684, + "nll_loss": 0.8073413372039795, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0040303682908415794, + "rewards/margins": 0.11006926000118256, + "rewards/rejected": -0.11409962922334671, + "step": 1901 + }, + { + "epoch": 1.3153526970954357, + "grad_norm": 8.579450607299805, + "learning_rate": 4.8248040571692025e-05, + "log_odds_chosen": 2.434113025665283, + "log_odds_ratio": -0.39737313985824585, + "logits/chosen": -0.7401032447814941, + "logits/rejected": -0.7689638137817383, + "logps/chosen": -0.06367967277765274, + "logps/rejected": -0.37234988808631897, + "loss": 4.2298, + "nll_loss": 1.0177040100097656, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006367966998368502, + "rewards/margins": 0.030867021530866623, + "rewards/rejected": -0.037234991788864136, + "step": 1902 + }, + { + "epoch": 1.3160442600276625, + "grad_norm": 8.755030632019043, + "learning_rate": 4.824419855540188e-05, + "log_odds_chosen": 2.1251778602600098, + "log_odds_ratio": -0.5365231037139893, + "logits/chosen": -0.5605593919754028, + "logits/rejected": -0.5998620986938477, + "logps/chosen": -0.18675807118415833, + "logps/rejected": -0.45498570799827576, + "loss": 4.2634, + "nll_loss": 1.0122004747390747, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.018675807863473892, + "rewards/margins": 0.026822764426469803, + "rewards/rejected": -0.045498572289943695, + "step": 1903 + }, + { + "epoch": 1.3167358229598893, + "grad_norm": 5.525919437408447, + "learning_rate": 4.824035653911173e-05, + "log_odds_chosen": 3.190709352493286, + "log_odds_ratio": -0.4506668448448181, + "logits/chosen": -0.524328351020813, + "logits/rejected": -0.5008019804954529, + "logps/chosen": -0.12892203032970428, + "logps/rejected": -0.6358974575996399, + "loss": 2.624, + "nll_loss": 0.6109344959259033, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012892204336822033, + "rewards/margins": 0.05069754272699356, + "rewards/rejected": -0.06358975172042847, + "step": 1904 + }, + { + "epoch": 1.3174273858921162, + "grad_norm": 3.6526026725769043, + "learning_rate": 4.823651452282158e-05, + "log_odds_chosen": 3.5043106079101562, + "log_odds_ratio": -0.11030441522598267, + "logits/chosen": -0.2913362383842468, + "logits/rejected": -0.3034833073616028, + "logps/chosen": -0.05897592753171921, + "logps/rejected": -0.6736852526664734, + "loss": 2.6101, + "nll_loss": 0.6414985656738281, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0058975922875106335, + "rewards/margins": 0.06147093325853348, + "rewards/rejected": -0.0673685297369957, + "step": 1905 + }, + { + "epoch": 1.318118948824343, + "grad_norm": 4.910558700561523, + "learning_rate": 4.823267250653143e-05, + "log_odds_chosen": 3.7399656772613525, + "log_odds_ratio": -0.19141636788845062, + "logits/chosen": -0.7501358985900879, + "logits/rejected": -0.774748682975769, + "logps/chosen": -0.06764288246631622, + "logps/rejected": -0.5902732610702515, + "loss": 2.9807, + "nll_loss": 0.7260439395904541, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006764288060367107, + "rewards/margins": 0.0522630400955677, + "rewards/rejected": -0.059027329087257385, + "step": 1906 + }, + { + "epoch": 1.3188105117565698, + "grad_norm": 6.183116912841797, + "learning_rate": 4.822883049024128e-05, + "log_odds_chosen": 1.4731816053390503, + "log_odds_ratio": -0.42161351442337036, + "logits/chosen": -0.7233215570449829, + "logits/rejected": -0.6984033584594727, + "logps/chosen": -0.09123566746711731, + "logps/rejected": -0.382061243057251, + "loss": 4.6328, + "nll_loss": 1.1160461902618408, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009123566560447216, + "rewards/margins": 0.029082562774419785, + "rewards/rejected": -0.038206130266189575, + "step": 1907 + }, + { + "epoch": 1.3195020746887967, + "grad_norm": 8.602644920349121, + "learning_rate": 4.822498847395113e-05, + "log_odds_chosen": 5.085760593414307, + "log_odds_ratio": -0.21069833636283875, + "logits/chosen": -0.24763265252113342, + "logits/rejected": -0.3364519476890564, + "logps/chosen": -0.15351608395576477, + "logps/rejected": -1.0235706567764282, + "loss": 3.9976, + "nll_loss": 0.97832852602005, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.015351608395576477, + "rewards/margins": 0.08700545877218246, + "rewards/rejected": -0.10235706716775894, + "step": 1908 + }, + { + "epoch": 1.3201936376210235, + "grad_norm": 9.28389835357666, + "learning_rate": 4.822114645766098e-05, + "log_odds_chosen": 4.172635078430176, + "log_odds_ratio": -0.24250559508800507, + "logits/chosen": -0.46899259090423584, + "logits/rejected": -0.5167728066444397, + "logps/chosen": -0.0976497232913971, + "logps/rejected": -0.5878530740737915, + "loss": 5.0903, + "nll_loss": 1.2483203411102295, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00976497307419777, + "rewards/margins": 0.04902033507823944, + "rewards/rejected": -0.05878530442714691, + "step": 1909 + }, + { + "epoch": 1.3208852005532503, + "grad_norm": 6.55712366104126, + "learning_rate": 4.821730444137084e-05, + "log_odds_chosen": 6.226669788360596, + "log_odds_ratio": -0.12685224413871765, + "logits/chosen": -0.3187330961227417, + "logits/rejected": -0.39868754148483276, + "logps/chosen": -0.050216492265462875, + "logps/rejected": -1.1293668746948242, + "loss": 3.7417, + "nll_loss": 0.9227307438850403, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0050216494128108025, + "rewards/margins": 0.10791504383087158, + "rewards/rejected": -0.11293669044971466, + "step": 1910 + }, + { + "epoch": 1.3215767634854771, + "grad_norm": 5.095497131347656, + "learning_rate": 4.8213462425080684e-05, + "log_odds_chosen": 3.7288761138916016, + "log_odds_ratio": -0.18773989379405975, + "logits/chosen": -0.6164065003395081, + "logits/rejected": -0.6328135132789612, + "logps/chosen": -0.10513586550951004, + "logps/rejected": -0.9961248636245728, + "loss": 2.8386, + "nll_loss": 0.690880298614502, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010513586923480034, + "rewards/margins": 0.08909890055656433, + "rewards/rejected": -0.09961248934268951, + "step": 1911 + }, + { + "epoch": 1.322268326417704, + "grad_norm": 10.32607650756836, + "learning_rate": 4.8209620408790536e-05, + "log_odds_chosen": 2.625572919845581, + "log_odds_ratio": -0.4675644636154175, + "logits/chosen": -0.5549513101577759, + "logits/rejected": -0.5509620904922485, + "logps/chosen": -0.10303942859172821, + "logps/rejected": -0.5751489400863647, + "loss": 3.7475, + "nll_loss": 0.8901306986808777, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.010303942486643791, + "rewards/margins": 0.04721095412969589, + "rewards/rejected": -0.057514894753694534, + "step": 1912 + }, + { + "epoch": 1.3229598893499308, + "grad_norm": 7.665313243865967, + "learning_rate": 4.820577839250039e-05, + "log_odds_chosen": 2.6405282020568848, + "log_odds_ratio": -0.33495649695396423, + "logits/chosen": -0.31856656074523926, + "logits/rejected": -0.37194597721099854, + "logps/chosen": -0.0918373167514801, + "logps/rejected": -0.6856303215026855, + "loss": 3.5346, + "nll_loss": 0.8501495122909546, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009183731861412525, + "rewards/margins": 0.059379301965236664, + "rewards/rejected": -0.06856303662061691, + "step": 1913 + }, + { + "epoch": 1.3236514522821576, + "grad_norm": 4.601027965545654, + "learning_rate": 4.820193637621024e-05, + "log_odds_chosen": 4.731778144836426, + "log_odds_ratio": -0.31632643938064575, + "logits/chosen": -0.053993016481399536, + "logits/rejected": -0.0613970011472702, + "logps/chosen": -0.12295106053352356, + "logps/rejected": -0.9370225667953491, + "loss": 3.0581, + "nll_loss": 0.7328994274139404, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.012295106425881386, + "rewards/margins": 0.08140715211629868, + "rewards/rejected": -0.0937022715806961, + "step": 1914 + }, + { + "epoch": 1.3243430152143845, + "grad_norm": 6.187616348266602, + "learning_rate": 4.819809435992009e-05, + "log_odds_chosen": 5.130054473876953, + "log_odds_ratio": -0.1377926617860794, + "logits/chosen": -0.49733227491378784, + "logits/rejected": -0.4652095139026642, + "logps/chosen": -0.031285833567380905, + "logps/rejected": -0.6109557747840881, + "loss": 2.7409, + "nll_loss": 0.6714452505111694, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003128583310171962, + "rewards/margins": 0.05796699598431587, + "rewards/rejected": -0.06109558045864105, + "step": 1915 + }, + { + "epoch": 1.3250345781466113, + "grad_norm": 7.493089199066162, + "learning_rate": 4.819425234362994e-05, + "log_odds_chosen": 2.8747878074645996, + "log_odds_ratio": -0.2400168925523758, + "logits/chosen": -0.2644622325897217, + "logits/rejected": -0.3293954133987427, + "logps/chosen": -0.1101066917181015, + "logps/rejected": -0.7629117369651794, + "loss": 3.3065, + "nll_loss": 0.802635669708252, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01101066917181015, + "rewards/margins": 0.0652805045247078, + "rewards/rejected": -0.07629118114709854, + "step": 1916 + }, + { + "epoch": 1.3257261410788381, + "grad_norm": 6.159051418304443, + "learning_rate": 4.819041032733979e-05, + "log_odds_chosen": 4.431463241577148, + "log_odds_ratio": -0.15916067361831665, + "logits/chosen": -0.364975243806839, + "logits/rejected": -0.3880729079246521, + "logps/chosen": -0.13159070909023285, + "logps/rejected": -0.9299683570861816, + "loss": 3.9075, + "nll_loss": 0.960968017578125, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013159072026610374, + "rewards/margins": 0.07983776926994324, + "rewards/rejected": -0.09299683570861816, + "step": 1917 + }, + { + "epoch": 1.326417704011065, + "grad_norm": 5.809264659881592, + "learning_rate": 4.818656831104964e-05, + "log_odds_chosen": 2.516145706176758, + "log_odds_ratio": -0.42379873991012573, + "logits/chosen": -0.6518577337265015, + "logits/rejected": -0.6944831013679504, + "logps/chosen": -0.0951496809720993, + "logps/rejected": -0.3088337182998657, + "loss": 4.5632, + "nll_loss": 1.0984179973602295, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.009514967910945415, + "rewards/margins": 0.021368402987718582, + "rewards/rejected": -0.030883371829986572, + "step": 1918 + }, + { + "epoch": 1.3271092669432918, + "grad_norm": 4.586781024932861, + "learning_rate": 4.8182726294759497e-05, + "log_odds_chosen": 2.9109044075012207, + "log_odds_ratio": -0.16025152802467346, + "logits/chosen": -0.4489715099334717, + "logits/rejected": -0.42398160696029663, + "logps/chosen": -0.05299271643161774, + "logps/rejected": -0.5875262022018433, + "loss": 2.9342, + "nll_loss": 0.7175151109695435, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005299271084368229, + "rewards/margins": 0.053453344851732254, + "rewards/rejected": -0.05875261873006821, + "step": 1919 + }, + { + "epoch": 1.3278008298755186, + "grad_norm": 3.521937608718872, + "learning_rate": 4.817888427846934e-05, + "log_odds_chosen": 5.2917985916137695, + "log_odds_ratio": -0.03396681323647499, + "logits/chosen": -0.4610143303871155, + "logits/rejected": -0.47300106287002563, + "logps/chosen": -0.018367256969213486, + "logps/rejected": -0.6851637363433838, + "loss": 2.4834, + "nll_loss": 0.6174432039260864, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018367258599027991, + "rewards/margins": 0.06667964160442352, + "rewards/rejected": -0.06851637363433838, + "step": 1920 + }, + { + "epoch": 1.3284923928077454, + "grad_norm": 6.283596038818359, + "learning_rate": 4.8175042262179195e-05, + "log_odds_chosen": 3.88523530960083, + "log_odds_ratio": -0.1463552713394165, + "logits/chosen": -0.4549104571342468, + "logits/rejected": -0.4896395206451416, + "logps/chosen": -0.07398514449596405, + "logps/rejected": -1.0360349416732788, + "loss": 4.0476, + "nll_loss": 0.9972621202468872, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007398514077067375, + "rewards/margins": 0.0962049812078476, + "rewards/rejected": -0.10360349714756012, + "step": 1921 + }, + { + "epoch": 1.3291839557399723, + "grad_norm": 13.937512397766113, + "learning_rate": 4.817120024588904e-05, + "log_odds_chosen": 3.372614860534668, + "log_odds_ratio": -0.6902897357940674, + "logits/chosen": -0.3406500816345215, + "logits/rejected": -0.35912925004959106, + "logps/chosen": -0.14384174346923828, + "logps/rejected": -0.9012109041213989, + "loss": 3.3868, + "nll_loss": 0.777681291103363, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.014384175650775433, + "rewards/margins": 0.07573691010475159, + "rewards/rejected": -0.0901210829615593, + "step": 1922 + }, + { + "epoch": 1.329875518672199, + "grad_norm": 3.9251039028167725, + "learning_rate": 4.81673582295989e-05, + "log_odds_chosen": 3.5236854553222656, + "log_odds_ratio": -0.08903735131025314, + "logits/chosen": -0.3721243739128113, + "logits/rejected": -0.39660120010375977, + "logps/chosen": -0.05323631688952446, + "logps/rejected": -0.5726852416992188, + "loss": 3.2612, + "nll_loss": 0.8063850402832031, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005323631688952446, + "rewards/margins": 0.05194488912820816, + "rewards/rejected": -0.057268522679805756, + "step": 1923 + }, + { + "epoch": 1.330567081604426, + "grad_norm": 6.169641494750977, + "learning_rate": 4.8163516213308745e-05, + "log_odds_chosen": 4.691677093505859, + "log_odds_ratio": -0.2546631693840027, + "logits/chosen": -0.6151852011680603, + "logits/rejected": -0.6133842468261719, + "logps/chosen": -0.08545532077550888, + "logps/rejected": -1.0763016939163208, + "loss": 3.4508, + "nll_loss": 0.8372362852096558, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008545532822608948, + "rewards/margins": 0.09908463805913925, + "rewards/rejected": -0.1076301708817482, + "step": 1924 + }, + { + "epoch": 1.3312586445366528, + "grad_norm": 6.655059337615967, + "learning_rate": 4.81596741970186e-05, + "log_odds_chosen": 4.733453750610352, + "log_odds_ratio": -0.13630807399749756, + "logits/chosen": -0.7335551977157593, + "logits/rejected": -0.7183327674865723, + "logps/chosen": -0.06154067814350128, + "logps/rejected": -0.9583780765533447, + "loss": 3.6682, + "nll_loss": 0.9034278392791748, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006154067814350128, + "rewards/margins": 0.08968374133110046, + "rewards/rejected": -0.09583780914545059, + "step": 1925 + }, + { + "epoch": 1.3319502074688796, + "grad_norm": 6.028042316436768, + "learning_rate": 4.815583218072845e-05, + "log_odds_chosen": 1.7379109859466553, + "log_odds_ratio": -0.703913152217865, + "logits/chosen": -0.7907073497772217, + "logits/rejected": -0.7652462720870972, + "logps/chosen": -0.27896490693092346, + "logps/rejected": -0.42122477293014526, + "loss": 4.0062, + "nll_loss": 0.9311593770980835, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.027896491810679436, + "rewards/margins": 0.014225986786186695, + "rewards/rejected": -0.042122479528188705, + "step": 1926 + }, + { + "epoch": 1.3326417704011064, + "grad_norm": 5.958827972412109, + "learning_rate": 4.8151990164438296e-05, + "log_odds_chosen": 4.64818000793457, + "log_odds_ratio": -0.080184206366539, + "logits/chosen": -0.43817225098609924, + "logits/rejected": -0.5326857566833496, + "logps/chosen": -0.03991539031267166, + "logps/rejected": -0.8206549882888794, + "loss": 3.7101, + "nll_loss": 0.9195180535316467, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003991539124399424, + "rewards/margins": 0.07807396352291107, + "rewards/rejected": -0.08206550031900406, + "step": 1927 + }, + { + "epoch": 1.3333333333333333, + "grad_norm": 7.063464641571045, + "learning_rate": 4.814814814814815e-05, + "log_odds_chosen": 4.256350517272949, + "log_odds_ratio": -0.03917912021279335, + "logits/chosen": -0.6081252098083496, + "logits/rejected": -0.6156570911407471, + "logps/chosen": -0.05544862151145935, + "logps/rejected": -1.2835543155670166, + "loss": 4.0754, + "nll_loss": 1.0149255990982056, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0055448622442781925, + "rewards/margins": 0.12281057238578796, + "rewards/rejected": -0.12835542857646942, + "step": 1928 + }, + { + "epoch": 1.33402489626556, + "grad_norm": 5.007072448730469, + "learning_rate": 4.8144306131858e-05, + "log_odds_chosen": 4.11650276184082, + "log_odds_ratio": -0.08610834181308746, + "logits/chosen": -0.6029285788536072, + "logits/rejected": -0.7104057669639587, + "logps/chosen": -0.07007205486297607, + "logps/rejected": -0.8489803671836853, + "loss": 3.7165, + "nll_loss": 0.9205197095870972, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007007205858826637, + "rewards/margins": 0.07789083570241928, + "rewards/rejected": -0.08489803969860077, + "step": 1929 + }, + { + "epoch": 1.334716459197787, + "grad_norm": 5.1880998611450195, + "learning_rate": 4.814046411556785e-05, + "log_odds_chosen": 5.354378700256348, + "log_odds_ratio": -0.20343045890331268, + "logits/chosen": -0.621367871761322, + "logits/rejected": -0.6300415992736816, + "logps/chosen": -0.05588001012802124, + "logps/rejected": -0.9308750629425049, + "loss": 3.9913, + "nll_loss": 0.9774820804595947, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.005588000640273094, + "rewards/margins": 0.08749950677156448, + "rewards/rejected": -0.09308750927448273, + "step": 1930 + }, + { + "epoch": 1.3354080221300137, + "grad_norm": 5.457958221435547, + "learning_rate": 4.81366220992777e-05, + "log_odds_chosen": 4.170896530151367, + "log_odds_ratio": -0.4549473524093628, + "logits/chosen": -0.7111985683441162, + "logits/rejected": -0.7823254466056824, + "logps/chosen": -0.1055913120508194, + "logps/rejected": -0.8930991888046265, + "loss": 3.1366, + "nll_loss": 0.7386659383773804, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010559131391346455, + "rewards/margins": 0.07875078916549683, + "rewards/rejected": -0.08930991590023041, + "step": 1931 + }, + { + "epoch": 1.3360995850622408, + "grad_norm": 6.582731246948242, + "learning_rate": 4.813278008298756e-05, + "log_odds_chosen": 1.9933853149414062, + "log_odds_ratio": -0.3615785241127014, + "logits/chosen": -0.2690390348434448, + "logits/rejected": -0.3522653579711914, + "logps/chosen": -0.08049627393484116, + "logps/rejected": -0.4732947051525116, + "loss": 3.7376, + "nll_loss": 0.8982344269752502, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008049627766013145, + "rewards/margins": 0.039279840886592865, + "rewards/rejected": -0.04732947051525116, + "step": 1932 + }, + { + "epoch": 1.3367911479944676, + "grad_norm": 7.321381568908691, + "learning_rate": 4.8128938066697404e-05, + "log_odds_chosen": 3.5911612510681152, + "log_odds_ratio": -0.18059605360031128, + "logits/chosen": -0.468522310256958, + "logits/rejected": -0.5160965919494629, + "logps/chosen": -0.07987986505031586, + "logps/rejected": -0.7631416320800781, + "loss": 4.4929, + "nll_loss": 1.1051725149154663, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00798798631876707, + "rewards/margins": 0.0683261826634407, + "rewards/rejected": -0.07631416618824005, + "step": 1933 + }, + { + "epoch": 1.3374827109266945, + "grad_norm": 7.451639652252197, + "learning_rate": 4.8125096050407256e-05, + "log_odds_chosen": 3.3757247924804688, + "log_odds_ratio": -0.5779038667678833, + "logits/chosen": -0.6634786128997803, + "logits/rejected": -0.7254400849342346, + "logps/chosen": -0.1831435114145279, + "logps/rejected": -0.7591239213943481, + "loss": 3.3445, + "nll_loss": 0.778323769569397, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01831435039639473, + "rewards/margins": 0.0575980469584465, + "rewards/rejected": -0.07591239362955093, + "step": 1934 + }, + { + "epoch": 1.3381742738589213, + "grad_norm": 6.1455230712890625, + "learning_rate": 4.812125403411711e-05, + "log_odds_chosen": 3.1927881240844727, + "log_odds_ratio": -0.21947617828845978, + "logits/chosen": -0.6109656095504761, + "logits/rejected": -0.5959261059761047, + "logps/chosen": -0.08558580279350281, + "logps/rejected": -0.5466667413711548, + "loss": 3.9627, + "nll_loss": 0.9687193036079407, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008558579720556736, + "rewards/margins": 0.04610808938741684, + "rewards/rejected": -0.054666668176651, + "step": 1935 + }, + { + "epoch": 1.3388658367911481, + "grad_norm": 5.804905891418457, + "learning_rate": 4.8117412017826954e-05, + "log_odds_chosen": 6.877157211303711, + "log_odds_ratio": -0.00704343942925334, + "logits/chosen": -0.6004572510719299, + "logits/rejected": -0.6538717150688171, + "logps/chosen": -0.004800926893949509, + "logps/rejected": -1.1645859479904175, + "loss": 3.132, + "nll_loss": 0.7823060750961304, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004800926835741848, + "rewards/margins": 0.11597850918769836, + "rewards/rejected": -0.11645859479904175, + "step": 1936 + }, + { + "epoch": 1.339557399723375, + "grad_norm": 5.952755451202393, + "learning_rate": 4.811357000153681e-05, + "log_odds_chosen": 2.8181982040405273, + "log_odds_ratio": -0.20215541124343872, + "logits/chosen": -0.39430737495422363, + "logits/rejected": -0.427776575088501, + "logps/chosen": -0.04715566337108612, + "logps/rejected": -0.5383660793304443, + "loss": 4.7374, + "nll_loss": 1.1641255617141724, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004715566523373127, + "rewards/margins": 0.04912103712558746, + "rewards/rejected": -0.053836606442928314, + "step": 1937 + }, + { + "epoch": 1.3402489626556018, + "grad_norm": 6.488382339477539, + "learning_rate": 4.810972798524666e-05, + "log_odds_chosen": 3.856659412384033, + "log_odds_ratio": -0.333176851272583, + "logits/chosen": -0.7704707384109497, + "logits/rejected": -0.869666576385498, + "logps/chosen": -0.15772998332977295, + "logps/rejected": -0.9237390756607056, + "loss": 3.4646, + "nll_loss": 0.8328379392623901, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.015772996470332146, + "rewards/margins": 0.07660090923309326, + "rewards/rejected": -0.09237390756607056, + "step": 1938 + }, + { + "epoch": 1.3409405255878286, + "grad_norm": 8.147760391235352, + "learning_rate": 4.810588596895651e-05, + "log_odds_chosen": 2.7923974990844727, + "log_odds_ratio": -0.9370527267456055, + "logits/chosen": -0.9870648980140686, + "logits/rejected": -1.0097748041152954, + "logps/chosen": -0.16981056332588196, + "logps/rejected": -0.6250455379486084, + "loss": 4.2305, + "nll_loss": 0.9639307856559753, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.016981055960059166, + "rewards/margins": 0.045523501932621, + "rewards/rejected": -0.06250455975532532, + "step": 1939 + }, + { + "epoch": 1.3416320885200554, + "grad_norm": 6.496330738067627, + "learning_rate": 4.810204395266636e-05, + "log_odds_chosen": 2.225219249725342, + "log_odds_ratio": -0.4056919515132904, + "logits/chosen": -0.6805392503738403, + "logits/rejected": -0.6900457143783569, + "logps/chosen": -0.08912613242864609, + "logps/rejected": -0.5723999738693237, + "loss": 3.8992, + "nll_loss": 0.9342321157455444, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008912613615393639, + "rewards/margins": 0.04832738637924194, + "rewards/rejected": -0.05724000185728073, + "step": 1940 + }, + { + "epoch": 1.3423236514522823, + "grad_norm": 4.509915828704834, + "learning_rate": 4.8098201936376216e-05, + "log_odds_chosen": 4.331239700317383, + "log_odds_ratio": -0.370481938123703, + "logits/chosen": -0.3268811106681824, + "logits/rejected": -0.35085660219192505, + "logps/chosen": -0.12016618996858597, + "logps/rejected": -0.8190144896507263, + "loss": 2.3706, + "nll_loss": 0.5555914044380188, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012016619555652142, + "rewards/margins": 0.06988482177257538, + "rewards/rejected": -0.0819014459848404, + "step": 1941 + }, + { + "epoch": 1.343015214384509, + "grad_norm": 7.133805274963379, + "learning_rate": 4.809435992008606e-05, + "log_odds_chosen": 4.085857391357422, + "log_odds_ratio": -0.2963946461677551, + "logits/chosen": -0.45591726899147034, + "logits/rejected": -0.49084967374801636, + "logps/chosen": -0.10536396503448486, + "logps/rejected": -1.0363645553588867, + "loss": 3.7455, + "nll_loss": 0.9067329168319702, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010536396875977516, + "rewards/margins": 0.09310007095336914, + "rewards/rejected": -0.10363646596670151, + "step": 1942 + }, + { + "epoch": 1.343706777316736, + "grad_norm": 7.4849443435668945, + "learning_rate": 4.8090517903795915e-05, + "log_odds_chosen": 3.4074530601501465, + "log_odds_ratio": -0.46883493661880493, + "logits/chosen": -0.6866205334663391, + "logits/rejected": -0.7802227139472961, + "logps/chosen": -0.08575089275836945, + "logps/rejected": -0.746304988861084, + "loss": 3.0132, + "nll_loss": 0.7064082622528076, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008575089275836945, + "rewards/margins": 0.06605540961027145, + "rewards/rejected": -0.0746304988861084, + "step": 1943 + }, + { + "epoch": 1.3443983402489628, + "grad_norm": 9.744033813476562, + "learning_rate": 4.808667588750577e-05, + "log_odds_chosen": 5.1300787925720215, + "log_odds_ratio": -0.28685262799263, + "logits/chosen": -0.8690510392189026, + "logits/rejected": -0.9068571329116821, + "logps/chosen": -0.126407653093338, + "logps/rejected": -0.8747024536132812, + "loss": 5.1364, + "nll_loss": 1.2554024457931519, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.012640764936804771, + "rewards/margins": 0.07482947409152985, + "rewards/rejected": -0.08747023344039917, + "step": 1944 + }, + { + "epoch": 1.3450899031811896, + "grad_norm": 5.100958824157715, + "learning_rate": 4.808283387121561e-05, + "log_odds_chosen": 5.217370986938477, + "log_odds_ratio": -0.24462248384952545, + "logits/chosen": -0.7892632484436035, + "logits/rejected": -0.849675714969635, + "logps/chosen": -0.09817850589752197, + "logps/rejected": -0.787107527256012, + "loss": 2.6284, + "nll_loss": 0.6326299905776978, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009817851707339287, + "rewards/margins": 0.06889290362596512, + "rewards/rejected": -0.07871074974536896, + "step": 1945 + }, + { + "epoch": 1.3457814661134164, + "grad_norm": 6.703667163848877, + "learning_rate": 4.8078991854925465e-05, + "log_odds_chosen": 6.110369682312012, + "log_odds_ratio": -0.03723360225558281, + "logits/chosen": -0.49276337027549744, + "logits/rejected": -0.5956921577453613, + "logps/chosen": -0.039471082389354706, + "logps/rejected": -1.272836685180664, + "loss": 3.4174, + "nll_loss": 0.850635290145874, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003947108052670956, + "rewards/margins": 0.12333656847476959, + "rewards/rejected": -0.12728366255760193, + "step": 1946 + }, + { + "epoch": 1.3464730290456433, + "grad_norm": 5.189427852630615, + "learning_rate": 4.807514983863532e-05, + "log_odds_chosen": 3.109792947769165, + "log_odds_ratio": -0.244966059923172, + "logits/chosen": -0.540881335735321, + "logits/rejected": -0.5172264575958252, + "logps/chosen": -0.05473095923662186, + "logps/rejected": -0.4827151298522949, + "loss": 2.852, + "nll_loss": 0.6885116696357727, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005473096389323473, + "rewards/margins": 0.04279841482639313, + "rewards/rejected": -0.04827151447534561, + "step": 1947 + }, + { + "epoch": 1.34716459197787, + "grad_norm": 2.6815555095672607, + "learning_rate": 4.807130782234517e-05, + "log_odds_chosen": 2.74289870262146, + "log_odds_ratio": -0.2474522888660431, + "logits/chosen": -0.16342592239379883, + "logits/rejected": -0.1653198003768921, + "logps/chosen": -0.14200901985168457, + "logps/rejected": -0.9104002118110657, + "loss": 2.6106, + "nll_loss": 0.6278988122940063, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.014200902543962002, + "rewards/margins": 0.0768391340970993, + "rewards/rejected": -0.09104002267122269, + "step": 1948 + }, + { + "epoch": 1.347856154910097, + "grad_norm": 6.47789192199707, + "learning_rate": 4.8067465806055016e-05, + "log_odds_chosen": 2.62200927734375, + "log_odds_ratio": -0.6260537505149841, + "logits/chosen": -0.6399807929992676, + "logits/rejected": -0.6780401468276978, + "logps/chosen": -0.11815258115530014, + "logps/rejected": -0.8514223098754883, + "loss": 3.2289, + "nll_loss": 0.7446192502975464, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011815258301794529, + "rewards/margins": 0.073326975107193, + "rewards/rejected": -0.08514222502708435, + "step": 1949 + }, + { + "epoch": 1.3485477178423237, + "grad_norm": 6.061791896820068, + "learning_rate": 4.8063623789764875e-05, + "log_odds_chosen": 3.674816131591797, + "log_odds_ratio": -0.19583740830421448, + "logits/chosen": -0.6116560101509094, + "logits/rejected": -0.6176925897598267, + "logps/chosen": -0.07769626379013062, + "logps/rejected": -0.6334196925163269, + "loss": 4.2678, + "nll_loss": 1.0473612546920776, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0077696265652775764, + "rewards/margins": 0.055572349578142166, + "rewards/rejected": -0.06334197521209717, + "step": 1950 + }, + { + "epoch": 1.3492392807745506, + "grad_norm": 5.788738250732422, + "learning_rate": 4.805978177347472e-05, + "log_odds_chosen": 3.3159537315368652, + "log_odds_ratio": -0.346087247133255, + "logits/chosen": -0.11763886362314224, + "logits/rejected": -0.19081079959869385, + "logps/chosen": -0.0851515457034111, + "logps/rejected": -0.575129508972168, + "loss": 2.8582, + "nll_loss": 0.6799299120903015, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00851515494287014, + "rewards/margins": 0.04899780452251434, + "rewards/rejected": -0.057512953877449036, + "step": 1951 + }, + { + "epoch": 1.3499308437067774, + "grad_norm": 4.8835954666137695, + "learning_rate": 4.805593975718457e-05, + "log_odds_chosen": 3.579317808151245, + "log_odds_ratio": -0.3033023476600647, + "logits/chosen": -0.5329922437667847, + "logits/rejected": -0.5394185185432434, + "logps/chosen": -0.11767933517694473, + "logps/rejected": -0.7724454998970032, + "loss": 3.3101, + "nll_loss": 0.7971964478492737, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011767934076488018, + "rewards/margins": 0.06547661870718002, + "rewards/rejected": -0.07724454998970032, + "step": 1952 + }, + { + "epoch": 1.3506224066390042, + "grad_norm": 8.65053939819336, + "learning_rate": 4.8052097740894425e-05, + "log_odds_chosen": 1.601304054260254, + "log_odds_ratio": -0.6165583729743958, + "logits/chosen": -0.7664597034454346, + "logits/rejected": -0.766176700592041, + "logps/chosen": -0.12955564260482788, + "logps/rejected": -0.4429931640625, + "loss": 5.0357, + "nll_loss": 1.1972750425338745, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.012955564074218273, + "rewards/margins": 0.03134375810623169, + "rewards/rejected": -0.04429932311177254, + "step": 1953 + }, + { + "epoch": 1.351313969571231, + "grad_norm": 7.064445972442627, + "learning_rate": 4.804825572460427e-05, + "log_odds_chosen": 5.0720367431640625, + "log_odds_ratio": -0.2898871898651123, + "logits/chosen": -0.46167534589767456, + "logits/rejected": -0.45588383078575134, + "logps/chosen": -0.050215303897857666, + "logps/rejected": -0.6823631525039673, + "loss": 3.7218, + "nll_loss": 0.901453971862793, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.005021530669182539, + "rewards/margins": 0.06321477890014648, + "rewards/rejected": -0.06823631376028061, + "step": 1954 + }, + { + "epoch": 1.352005532503458, + "grad_norm": 6.010137557983398, + "learning_rate": 4.8044413708314124e-05, + "log_odds_chosen": 1.5840513706207275, + "log_odds_ratio": -0.4234069883823395, + "logits/chosen": -0.42211514711380005, + "logits/rejected": -0.4028991162776947, + "logps/chosen": -0.11243654787540436, + "logps/rejected": -0.46887820959091187, + "loss": 2.9777, + "nll_loss": 0.7020907402038574, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011243656277656555, + "rewards/margins": 0.03564416617155075, + "rewards/rejected": -0.046887822449207306, + "step": 1955 + }, + { + "epoch": 1.3526970954356847, + "grad_norm": 8.500036239624023, + "learning_rate": 4.8040571692023976e-05, + "log_odds_chosen": 4.498102188110352, + "log_odds_ratio": -0.18591952323913574, + "logits/chosen": -0.4714905619621277, + "logits/rejected": -0.5314135551452637, + "logps/chosen": -0.13488513231277466, + "logps/rejected": -0.9827030301094055, + "loss": 3.4283, + "nll_loss": 0.8384861350059509, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.013488514348864555, + "rewards/margins": 0.08478179574012756, + "rewards/rejected": -0.09827030450105667, + "step": 1956 + }, + { + "epoch": 1.3533886583679116, + "grad_norm": 5.889149188995361, + "learning_rate": 4.803672967573383e-05, + "log_odds_chosen": 1.6420878171920776, + "log_odds_ratio": -1.187038540840149, + "logits/chosen": -0.7033360600471497, + "logits/rejected": -0.6936848163604736, + "logps/chosen": -0.12075044214725494, + "logps/rejected": -0.6052550673484802, + "loss": 3.5898, + "nll_loss": 0.7787531614303589, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.012075044214725494, + "rewards/margins": 0.04845046624541283, + "rewards/rejected": -0.06052550673484802, + "step": 1957 + }, + { + "epoch": 1.3540802213001384, + "grad_norm": 5.764094829559326, + "learning_rate": 4.8032887659443674e-05, + "log_odds_chosen": 4.8118696212768555, + "log_odds_ratio": -0.2559741735458374, + "logits/chosen": -0.2301330417394638, + "logits/rejected": -0.2537592649459839, + "logps/chosen": -0.051100168377161026, + "logps/rejected": -0.9901271462440491, + "loss": 2.8586, + "nll_loss": 0.6890623569488525, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0051100170239806175, + "rewards/margins": 0.09390270709991455, + "rewards/rejected": -0.09901271760463715, + "step": 1958 + }, + { + "epoch": 1.3547717842323652, + "grad_norm": 6.235367774963379, + "learning_rate": 4.802904564315353e-05, + "log_odds_chosen": 4.211156368255615, + "log_odds_ratio": -0.19045324623584747, + "logits/chosen": -0.5956144332885742, + "logits/rejected": -0.6682155132293701, + "logps/chosen": -0.07565949112176895, + "logps/rejected": -0.7925881743431091, + "loss": 3.4797, + "nll_loss": 0.8508702516555786, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007565949112176895, + "rewards/margins": 0.07169287651777267, + "rewards/rejected": -0.07925882190465927, + "step": 1959 + }, + { + "epoch": 1.355463347164592, + "grad_norm": 6.1317338943481445, + "learning_rate": 4.802520362686338e-05, + "log_odds_chosen": 5.707240104675293, + "log_odds_ratio": -0.27061474323272705, + "logits/chosen": -0.36521780490875244, + "logits/rejected": -0.38194626569747925, + "logps/chosen": -0.037421174347400665, + "logps/rejected": -1.2415099143981934, + "loss": 2.8538, + "nll_loss": 0.6863940954208374, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0037421174347400665, + "rewards/margins": 0.12040887773036957, + "rewards/rejected": -0.12415099143981934, + "step": 1960 + }, + { + "epoch": 1.3561549100968189, + "grad_norm": 3.9908347129821777, + "learning_rate": 4.802136161057323e-05, + "log_odds_chosen": 2.9918770790100098, + "log_odds_ratio": -0.44292670488357544, + "logits/chosen": -0.6748017072677612, + "logits/rejected": -0.7269718647003174, + "logps/chosen": -0.14064937829971313, + "logps/rejected": -0.4798199534416199, + "loss": 3.3409, + "nll_loss": 0.7909257411956787, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.014064937829971313, + "rewards/margins": 0.033917058259248734, + "rewards/rejected": -0.04798199608922005, + "step": 1961 + }, + { + "epoch": 1.3568464730290457, + "grad_norm": 6.479868412017822, + "learning_rate": 4.8017519594283084e-05, + "log_odds_chosen": 4.106939315795898, + "log_odds_ratio": -0.2835253179073334, + "logits/chosen": -0.6259688138961792, + "logits/rejected": -0.7092024683952332, + "logps/chosen": -0.03148230165243149, + "logps/rejected": -0.7635524272918701, + "loss": 3.48, + "nll_loss": 0.8416469097137451, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0031482302583754063, + "rewards/margins": 0.07320702075958252, + "rewards/rejected": -0.07635524868965149, + "step": 1962 + }, + { + "epoch": 1.3575380359612725, + "grad_norm": 9.798500061035156, + "learning_rate": 4.801367757799293e-05, + "log_odds_chosen": 4.252465724945068, + "log_odds_ratio": -0.25726497173309326, + "logits/chosen": -0.5425630211830139, + "logits/rejected": -0.6961837410926819, + "logps/chosen": -0.04355762153863907, + "logps/rejected": -0.8535860776901245, + "loss": 3.9693, + "nll_loss": 0.9666072130203247, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.004355762153863907, + "rewards/margins": 0.08100283890962601, + "rewards/rejected": -0.08535861223936081, + "step": 1963 + }, + { + "epoch": 1.3582295988934994, + "grad_norm": 7.3136305809021, + "learning_rate": 4.800983556170278e-05, + "log_odds_chosen": 4.736594200134277, + "log_odds_ratio": -0.18831676244735718, + "logits/chosen": -0.36243361234664917, + "logits/rejected": -0.37697988748550415, + "logps/chosen": -0.05069248378276825, + "logps/rejected": -0.8661303520202637, + "loss": 3.7201, + "nll_loss": 0.9111894369125366, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005069248378276825, + "rewards/margins": 0.08154379576444626, + "rewards/rejected": -0.08661304414272308, + "step": 1964 + }, + { + "epoch": 1.3589211618257262, + "grad_norm": 5.368043422698975, + "learning_rate": 4.8005993545412634e-05, + "log_odds_chosen": 2.5243210792541504, + "log_odds_ratio": -0.40685027837753296, + "logits/chosen": -0.40497708320617676, + "logits/rejected": -0.3547658324241638, + "logps/chosen": -0.06767724454402924, + "logps/rejected": -0.5502372980117798, + "loss": 2.753, + "nll_loss": 0.6475711464881897, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.006767723709344864, + "rewards/margins": 0.048256002366542816, + "rewards/rejected": -0.05502372980117798, + "step": 1965 + }, + { + "epoch": 1.359612724757953, + "grad_norm": 6.931267738342285, + "learning_rate": 4.800215152912249e-05, + "log_odds_chosen": 5.26509952545166, + "log_odds_ratio": -0.15575455129146576, + "logits/chosen": -0.48398256301879883, + "logits/rejected": -0.5567474961280823, + "logps/chosen": -0.06316303461790085, + "logps/rejected": -0.8755611181259155, + "loss": 3.2898, + "nll_loss": 0.8068767189979553, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00631630327552557, + "rewards/margins": 0.08123980462551117, + "rewards/rejected": -0.08755610883235931, + "step": 1966 + }, + { + "epoch": 1.3603042876901799, + "grad_norm": 4.129261016845703, + "learning_rate": 4.799830951283233e-05, + "log_odds_chosen": 4.578372001647949, + "log_odds_ratio": -0.0830615758895874, + "logits/chosen": -0.4261839985847473, + "logits/rejected": -0.46446114778518677, + "logps/chosen": -0.0838017538189888, + "logps/rejected": -0.7984384298324585, + "loss": 2.2135, + "nll_loss": 0.5450767874717712, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008380175568163395, + "rewards/margins": 0.07146366685628891, + "rewards/rejected": -0.07984384894371033, + "step": 1967 + }, + { + "epoch": 1.3609958506224067, + "grad_norm": 7.837843894958496, + "learning_rate": 4.799446749654219e-05, + "log_odds_chosen": 1.4155436754226685, + "log_odds_ratio": -0.6807968616485596, + "logits/chosen": -0.6102103590965271, + "logits/rejected": -0.6006621718406677, + "logps/chosen": -0.18789350986480713, + "logps/rejected": -0.41446274518966675, + "loss": 3.5981, + "nll_loss": 0.8314481377601624, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.018789350986480713, + "rewards/margins": 0.022656923159956932, + "rewards/rejected": -0.041446272283792496, + "step": 1968 + }, + { + "epoch": 1.3616874135546335, + "grad_norm": 5.931718349456787, + "learning_rate": 4.799062548025204e-05, + "log_odds_chosen": 4.071783542633057, + "log_odds_ratio": -0.22267219424247742, + "logits/chosen": -0.836118221282959, + "logits/rejected": -0.8715649247169495, + "logps/chosen": -0.04238056018948555, + "logps/rejected": -0.7004839181900024, + "loss": 3.8172, + "nll_loss": 0.9320341944694519, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004238056018948555, + "rewards/margins": 0.06581033766269684, + "rewards/rejected": -0.07004839926958084, + "step": 1969 + }, + { + "epoch": 1.3623789764868603, + "grad_norm": 6.518864631652832, + "learning_rate": 4.798678346396189e-05, + "log_odds_chosen": 4.345464706420898, + "log_odds_ratio": -0.31315863132476807, + "logits/chosen": -0.6527379751205444, + "logits/rejected": -0.6448567509651184, + "logps/chosen": -0.11621613055467606, + "logps/rejected": -0.5999836921691895, + "loss": 4.2446, + "nll_loss": 1.0298457145690918, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011621613055467606, + "rewards/margins": 0.04837675765156746, + "rewards/rejected": -0.059998370707035065, + "step": 1970 + }, + { + "epoch": 1.3630705394190872, + "grad_norm": 6.676383972167969, + "learning_rate": 4.798294144767174e-05, + "log_odds_chosen": 5.199181079864502, + "log_odds_ratio": -0.26002073287963867, + "logits/chosen": -0.46755251288414, + "logits/rejected": -0.5575705766677856, + "logps/chosen": -0.06826357543468475, + "logps/rejected": -0.875418484210968, + "loss": 2.9096, + "nll_loss": 0.7013946771621704, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006826357915997505, + "rewards/margins": 0.08071549236774445, + "rewards/rejected": -0.0875418558716774, + "step": 1971 + }, + { + "epoch": 1.363762102351314, + "grad_norm": 4.894266605377197, + "learning_rate": 4.797909943138159e-05, + "log_odds_chosen": 2.65413761138916, + "log_odds_ratio": -0.24586760997772217, + "logits/chosen": -0.6672540903091431, + "logits/rejected": -0.6396835446357727, + "logps/chosen": -0.0630340427160263, + "logps/rejected": -0.6643854379653931, + "loss": 2.3291, + "nll_loss": 0.5576860308647156, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0063034044578671455, + "rewards/margins": 0.06013514846563339, + "rewards/rejected": -0.06643854826688766, + "step": 1972 + }, + { + "epoch": 1.3644536652835408, + "grad_norm": 4.742177963256836, + "learning_rate": 4.797525741509144e-05, + "log_odds_chosen": 2.2191128730773926, + "log_odds_ratio": -0.4817036986351013, + "logits/chosen": -0.4750691056251526, + "logits/rejected": -0.5359756350517273, + "logps/chosen": -0.10734833031892776, + "logps/rejected": -0.42450714111328125, + "loss": 3.7213, + "nll_loss": 0.8821476697921753, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010734833776950836, + "rewards/margins": 0.03171588107943535, + "rewards/rejected": -0.04245071858167648, + "step": 1973 + }, + { + "epoch": 1.3651452282157677, + "grad_norm": 3.849248170852661, + "learning_rate": 4.797141539880129e-05, + "log_odds_chosen": 2.6243133544921875, + "log_odds_ratio": -0.48991310596466064, + "logits/chosen": -0.3240754008293152, + "logits/rejected": -0.3246554136276245, + "logps/chosen": -0.1363077461719513, + "logps/rejected": -0.4788323938846588, + "loss": 2.3044, + "nll_loss": 0.527114748954773, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01363077387213707, + "rewards/margins": 0.03425246477127075, + "rewards/rejected": -0.04788323864340782, + "step": 1974 + }, + { + "epoch": 1.3658367911479945, + "grad_norm": 7.029124736785889, + "learning_rate": 4.7967573382511145e-05, + "log_odds_chosen": 3.2474732398986816, + "log_odds_ratio": -0.22893860936164856, + "logits/chosen": -0.678703784942627, + "logits/rejected": -0.6811733841896057, + "logps/chosen": -0.06615074723958969, + "logps/rejected": -0.5361521244049072, + "loss": 3.8263, + "nll_loss": 0.9336775541305542, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006615075282752514, + "rewards/margins": 0.047000136226415634, + "rewards/rejected": -0.05361521244049072, + "step": 1975 + }, + { + "epoch": 1.3665283540802213, + "grad_norm": 4.65273904800415, + "learning_rate": 4.796373136622099e-05, + "log_odds_chosen": 3.6754541397094727, + "log_odds_ratio": -0.12889450788497925, + "logits/chosen": -0.7940229177474976, + "logits/rejected": -0.7994104623794556, + "logps/chosen": -0.05574585497379303, + "logps/rejected": -0.6791449785232544, + "loss": 3.0074, + "nll_loss": 0.7389633655548096, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0055745854042470455, + "rewards/margins": 0.062339916825294495, + "rewards/rejected": -0.06791450083255768, + "step": 1976 + }, + { + "epoch": 1.3672199170124482, + "grad_norm": 6.906028747558594, + "learning_rate": 4.795988934993085e-05, + "log_odds_chosen": 4.955214500427246, + "log_odds_ratio": -0.2700616717338562, + "logits/chosen": -0.33470577001571655, + "logits/rejected": -0.41006895899772644, + "logps/chosen": -0.060129314661026, + "logps/rejected": -1.0193614959716797, + "loss": 2.8268, + "nll_loss": 0.679686427116394, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0060129314661026, + "rewards/margins": 0.09592323005199432, + "rewards/rejected": -0.10193616151809692, + "step": 1977 + }, + { + "epoch": 1.367911479944675, + "grad_norm": 5.417724132537842, + "learning_rate": 4.7956047333640696e-05, + "log_odds_chosen": 3.265223979949951, + "log_odds_ratio": -0.1482883095741272, + "logits/chosen": -0.47248220443725586, + "logits/rejected": -0.47120919823646545, + "logps/chosen": -0.0854690670967102, + "logps/rejected": -0.8021750450134277, + "loss": 3.5534, + "nll_loss": 0.873532235622406, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008546906523406506, + "rewards/margins": 0.07167059183120728, + "rewards/rejected": -0.08021750301122665, + "step": 1978 + }, + { + "epoch": 1.3686030428769018, + "grad_norm": 5.667328834533691, + "learning_rate": 4.795220531735055e-05, + "log_odds_chosen": 2.9932069778442383, + "log_odds_ratio": -0.2437632977962494, + "logits/chosen": -0.6204601526260376, + "logits/rejected": -0.6771796941757202, + "logps/chosen": -0.09919442236423492, + "logps/rejected": -0.6160801649093628, + "loss": 2.8913, + "nll_loss": 0.6984534859657288, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009919442236423492, + "rewards/margins": 0.05168858543038368, + "rewards/rejected": -0.061608027666807175, + "step": 1979 + }, + { + "epoch": 1.3692946058091287, + "grad_norm": 5.357570648193359, + "learning_rate": 4.79483633010604e-05, + "log_odds_chosen": 2.7944960594177246, + "log_odds_ratio": -0.25465598702430725, + "logits/chosen": -0.6888865828514099, + "logits/rejected": -0.7295427918434143, + "logps/chosen": -0.13671831786632538, + "logps/rejected": -0.6448003649711609, + "loss": 3.5759, + "nll_loss": 0.8685050010681152, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.013671832159161568, + "rewards/margins": 0.05080820620059967, + "rewards/rejected": -0.06448003649711609, + "step": 1980 + }, + { + "epoch": 1.3699861687413555, + "grad_norm": 7.210214614868164, + "learning_rate": 4.7944521284770246e-05, + "log_odds_chosen": 3.735138177871704, + "log_odds_ratio": -0.24184566736221313, + "logits/chosen": -0.759207546710968, + "logits/rejected": -0.773901641368866, + "logps/chosen": -0.04999281466007233, + "logps/rejected": -0.5645262002944946, + "loss": 3.4228, + "nll_loss": 0.8315247297286987, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004999281372874975, + "rewards/margins": 0.05145333334803581, + "rewards/rejected": -0.056452613323926926, + "step": 1981 + }, + { + "epoch": 1.3706777316735823, + "grad_norm": 5.053602695465088, + "learning_rate": 4.79406792684801e-05, + "log_odds_chosen": 3.7224245071411133, + "log_odds_ratio": -0.2283429205417633, + "logits/chosen": -0.6872579455375671, + "logits/rejected": -0.7188456654548645, + "logps/chosen": -0.09103509783744812, + "logps/rejected": -0.6439297199249268, + "loss": 2.5801, + "nll_loss": 0.6221836805343628, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009103509597480297, + "rewards/margins": 0.055289462208747864, + "rewards/rejected": -0.06439296901226044, + "step": 1982 + }, + { + "epoch": 1.3713692946058091, + "grad_norm": 4.521610736846924, + "learning_rate": 4.793683725218995e-05, + "log_odds_chosen": 3.951543092727661, + "log_odds_ratio": -0.3210464119911194, + "logits/chosen": -0.4027020335197449, + "logits/rejected": -0.448123037815094, + "logps/chosen": -0.0801689624786377, + "logps/rejected": -0.743116557598114, + "loss": 2.79, + "nll_loss": 0.6653933525085449, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.00801689550280571, + "rewards/margins": 0.06629475951194763, + "rewards/rejected": -0.07431165128946304, + "step": 1983 + }, + { + "epoch": 1.372060857538036, + "grad_norm": 6.1983723640441895, + "learning_rate": 4.7932995235899804e-05, + "log_odds_chosen": 5.588142395019531, + "log_odds_ratio": -0.04881987348198891, + "logits/chosen": -0.22947348654270172, + "logits/rejected": -0.18946635723114014, + "logps/chosen": -0.04573337733745575, + "logps/rejected": -1.1007776260375977, + "loss": 2.6265, + "nll_loss": 0.6517492532730103, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004573337268084288, + "rewards/margins": 0.10550442337989807, + "rewards/rejected": -0.11007775366306305, + "step": 1984 + }, + { + "epoch": 1.3727524204702628, + "grad_norm": 8.349906921386719, + "learning_rate": 4.792915321960965e-05, + "log_odds_chosen": 3.1251940727233887, + "log_odds_ratio": -0.6032139658927917, + "logits/chosen": -0.514311671257019, + "logits/rejected": -0.5597679615020752, + "logps/chosen": -0.182631254196167, + "logps/rejected": -0.6719191074371338, + "loss": 3.0019, + "nll_loss": 0.6901443004608154, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01826312765479088, + "rewards/margins": 0.04892878234386444, + "rewards/rejected": -0.06719191372394562, + "step": 1985 + }, + { + "epoch": 1.3734439834024896, + "grad_norm": 7.041315078735352, + "learning_rate": 4.792531120331951e-05, + "log_odds_chosen": 4.945158958435059, + "log_odds_ratio": -0.2092055380344391, + "logits/chosen": -0.2713004946708679, + "logits/rejected": -0.2928984761238098, + "logps/chosen": -0.06935533881187439, + "logps/rejected": -1.159374713897705, + "loss": 2.9727, + "nll_loss": 0.7222519516944885, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006935533601790667, + "rewards/margins": 0.10900193452835083, + "rewards/rejected": -0.11593747138977051, + "step": 1986 + }, + { + "epoch": 1.3741355463347165, + "grad_norm": 5.349747180938721, + "learning_rate": 4.7921469187029354e-05, + "log_odds_chosen": 1.9877170324325562, + "log_odds_ratio": -0.2859208583831787, + "logits/chosen": -0.3041594326496124, + "logits/rejected": -0.32395341992378235, + "logps/chosen": -0.10919293761253357, + "logps/rejected": -0.41289645433425903, + "loss": 4.014, + "nll_loss": 0.9749143123626709, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010919294320046902, + "rewards/margins": 0.030370350927114487, + "rewards/rejected": -0.041289642453193665, + "step": 1987 + }, + { + "epoch": 1.3748271092669433, + "grad_norm": 5.41785192489624, + "learning_rate": 4.791762717073921e-05, + "log_odds_chosen": 5.79414176940918, + "log_odds_ratio": -0.16315719485282898, + "logits/chosen": -0.2273552268743515, + "logits/rejected": -0.25023555755615234, + "logps/chosen": -0.031983837485313416, + "logps/rejected": -0.6889051198959351, + "loss": 3.0326, + "nll_loss": 0.7418302297592163, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003198383841663599, + "rewards/margins": 0.06569212675094604, + "rewards/rejected": -0.0688905119895935, + "step": 1988 + }, + { + "epoch": 1.3755186721991701, + "grad_norm": 7.066270351409912, + "learning_rate": 4.791378515444906e-05, + "log_odds_chosen": 3.625401735305786, + "log_odds_ratio": -0.13533686101436615, + "logits/chosen": -0.33719366788864136, + "logits/rejected": -0.37789204716682434, + "logps/chosen": -0.05393574386835098, + "logps/rejected": -0.6988438367843628, + "loss": 4.1022, + "nll_loss": 1.0120058059692383, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005393574479967356, + "rewards/margins": 0.06449081003665924, + "rewards/rejected": -0.06988438963890076, + "step": 1989 + }, + { + "epoch": 1.376210235131397, + "grad_norm": 7.164843559265137, + "learning_rate": 4.7909943138158905e-05, + "log_odds_chosen": 5.247487545013428, + "log_odds_ratio": -0.1880350410938263, + "logits/chosen": -0.6329569816589355, + "logits/rejected": -0.5379258990287781, + "logps/chosen": -0.029778484255075455, + "logps/rejected": -0.6082863211631775, + "loss": 3.8988, + "nll_loss": 0.955906093120575, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002977848518639803, + "rewards/margins": 0.05785078555345535, + "rewards/rejected": -0.06082863733172417, + "step": 1990 + }, + { + "epoch": 1.3769017980636238, + "grad_norm": 5.449748516082764, + "learning_rate": 4.790610112186876e-05, + "log_odds_chosen": 3.5334930419921875, + "log_odds_ratio": -0.21102845668792725, + "logits/chosen": -0.41154390573501587, + "logits/rejected": -0.38675880432128906, + "logps/chosen": -0.08894149214029312, + "logps/rejected": -0.6938140392303467, + "loss": 3.6819, + "nll_loss": 0.8993774652481079, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008894150145351887, + "rewards/margins": 0.060487259179353714, + "rewards/rejected": -0.06938140839338303, + "step": 1991 + }, + { + "epoch": 1.3775933609958506, + "grad_norm": 5.393478870391846, + "learning_rate": 4.790225910557861e-05, + "log_odds_chosen": 3.826383590698242, + "log_odds_ratio": -0.36511868238449097, + "logits/chosen": -0.29448530077934265, + "logits/rejected": -0.2909614145755768, + "logps/chosen": -0.13047395646572113, + "logps/rejected": -0.8310095071792603, + "loss": 2.8987, + "nll_loss": 0.6881729364395142, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.013047396205365658, + "rewards/margins": 0.07005355507135391, + "rewards/rejected": -0.08310095220804214, + "step": 1992 + }, + { + "epoch": 1.3782849239280774, + "grad_norm": 6.349294662475586, + "learning_rate": 4.789841708928846e-05, + "log_odds_chosen": 4.048874855041504, + "log_odds_ratio": -0.2582094371318817, + "logits/chosen": -0.5665625333786011, + "logits/rejected": -0.6103119850158691, + "logps/chosen": -0.09019026905298233, + "logps/rejected": -0.5480784177780151, + "loss": 3.733, + "nll_loss": 0.907441258430481, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009019027464091778, + "rewards/margins": 0.04578881710767746, + "rewards/rejected": -0.054807838052511215, + "step": 1993 + }, + { + "epoch": 1.3789764868603043, + "grad_norm": 6.726129055023193, + "learning_rate": 4.789457507299831e-05, + "log_odds_chosen": 3.7346792221069336, + "log_odds_ratio": -0.3640691339969635, + "logits/chosen": -0.3173547685146332, + "logits/rejected": -0.3681148588657379, + "logps/chosen": -0.14127859473228455, + "logps/rejected": -0.597007155418396, + "loss": 2.7151, + "nll_loss": 0.642366349697113, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01412785891443491, + "rewards/margins": 0.045572854578495026, + "rewards/rejected": -0.05970071256160736, + "step": 1994 + }, + { + "epoch": 1.379668049792531, + "grad_norm": 6.565385341644287, + "learning_rate": 4.789073305670817e-05, + "log_odds_chosen": 4.064783573150635, + "log_odds_ratio": -0.21562957763671875, + "logits/chosen": -0.5860500335693359, + "logits/rejected": -0.688904881477356, + "logps/chosen": -0.060132626444101334, + "logps/rejected": -1.0031733512878418, + "loss": 3.8129, + "nll_loss": 0.9316558837890625, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.006013263016939163, + "rewards/margins": 0.09430407732725143, + "rewards/rejected": -0.1003173366189003, + "step": 1995 + }, + { + "epoch": 1.380359612724758, + "grad_norm": 5.72561502456665, + "learning_rate": 4.788689104041801e-05, + "log_odds_chosen": 4.816940784454346, + "log_odds_ratio": -0.2247188836336136, + "logits/chosen": -0.30259183049201965, + "logits/rejected": -0.2999754548072815, + "logps/chosen": -0.0902637168765068, + "logps/rejected": -1.0141100883483887, + "loss": 2.7829, + "nll_loss": 0.6732596158981323, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009026371873915195, + "rewards/margins": 0.09238463640213013, + "rewards/rejected": -0.10141100734472275, + "step": 1996 + }, + { + "epoch": 1.3810511756569848, + "grad_norm": 5.127386093139648, + "learning_rate": 4.7883049024127865e-05, + "log_odds_chosen": 4.620459079742432, + "log_odds_ratio": -0.16839353740215302, + "logits/chosen": -0.416474312543869, + "logits/rejected": -0.4174883961677551, + "logps/chosen": -0.09087523818016052, + "logps/rejected": -0.835807204246521, + "loss": 3.4169, + "nll_loss": 0.8373754620552063, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009087524376809597, + "rewards/margins": 0.07449319213628769, + "rewards/rejected": -0.08358071744441986, + "step": 1997 + }, + { + "epoch": 1.3817427385892116, + "grad_norm": 6.234887599945068, + "learning_rate": 4.787920700783772e-05, + "log_odds_chosen": 4.776619911193848, + "log_odds_ratio": -0.18312208354473114, + "logits/chosen": -0.635537326335907, + "logits/rejected": -0.6250307559967041, + "logps/chosen": -0.043525900691747665, + "logps/rejected": -0.9725822806358337, + "loss": 2.6875, + "nll_loss": 0.6535642147064209, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0043525900691747665, + "rewards/margins": 0.09290564060211182, + "rewards/rejected": -0.09725822508335114, + "step": 1998 + }, + { + "epoch": 1.3824343015214384, + "grad_norm": 5.526193618774414, + "learning_rate": 4.787536499154756e-05, + "log_odds_chosen": 4.099743843078613, + "log_odds_ratio": -0.3171989321708679, + "logits/chosen": -0.8929038047790527, + "logits/rejected": -0.8908524513244629, + "logps/chosen": -0.09101560711860657, + "logps/rejected": -0.821946382522583, + "loss": 3.2194, + "nll_loss": 0.7731227874755859, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009101560339331627, + "rewards/margins": 0.07309307903051376, + "rewards/rejected": -0.08219464123249054, + "step": 1999 + }, + { + "epoch": 1.3831258644536653, + "grad_norm": 7.315108299255371, + "learning_rate": 4.7871522975257416e-05, + "log_odds_chosen": 4.748763084411621, + "log_odds_ratio": -0.15661533176898956, + "logits/chosen": -0.8928630948066711, + "logits/rejected": -0.9290038347244263, + "logps/chosen": -0.05364002287387848, + "logps/rejected": -1.0081446170806885, + "loss": 3.0047, + "nll_loss": 0.735511064529419, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0053640021942555904, + "rewards/margins": 0.09545045346021652, + "rewards/rejected": -0.10081446170806885, + "step": 2000 + }, + { + "epoch": 1.383817427385892, + "grad_norm": 7.51612663269043, + "learning_rate": 4.786768095896727e-05, + "log_odds_chosen": 6.139799118041992, + "log_odds_ratio": -0.22766713798046112, + "logits/chosen": -0.242156982421875, + "logits/rejected": -0.3143760561943054, + "logps/chosen": -0.018331004306674004, + "logps/rejected": -0.9860979318618774, + "loss": 3.02, + "nll_loss": 0.7322361469268799, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0018331005703657866, + "rewards/margins": 0.09677669405937195, + "rewards/rejected": -0.0986097976565361, + "step": 2001 + }, + { + "epoch": 1.384508990318119, + "grad_norm": 6.356954574584961, + "learning_rate": 4.786383894267712e-05, + "log_odds_chosen": 3.1617636680603027, + "log_odds_ratio": -0.3131043016910553, + "logits/chosen": -0.7802184820175171, + "logits/rejected": -0.7662279009819031, + "logps/chosen": -0.09586849063634872, + "logps/rejected": -0.48525965213775635, + "loss": 3.7224, + "nll_loss": 0.89928138256073, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009586849249899387, + "rewards/margins": 0.03893912211060524, + "rewards/rejected": -0.04852597042918205, + "step": 2002 + }, + { + "epoch": 1.3852005532503457, + "grad_norm": 7.20344877243042, + "learning_rate": 4.7859996926386966e-05, + "log_odds_chosen": 3.3825900554656982, + "log_odds_ratio": -0.33774444460868835, + "logits/chosen": -0.7070195078849792, + "logits/rejected": -0.7348042726516724, + "logps/chosen": -0.08501673489809036, + "logps/rejected": -0.8383290767669678, + "loss": 3.6556, + "nll_loss": 0.8801342844963074, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008501673117280006, + "rewards/margins": 0.07533122599124908, + "rewards/rejected": -0.08383290469646454, + "step": 2003 + }, + { + "epoch": 1.3858921161825726, + "grad_norm": 11.765769958496094, + "learning_rate": 4.7856154910096825e-05, + "log_odds_chosen": 2.907918930053711, + "log_odds_ratio": -0.6590770483016968, + "logits/chosen": -0.4293488562107086, + "logits/rejected": -0.47192126512527466, + "logps/chosen": -0.2054803967475891, + "logps/rejected": -0.6298776268959045, + "loss": 5.2493, + "nll_loss": 1.24642813205719, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.02054804004728794, + "rewards/margins": 0.04243971407413483, + "rewards/rejected": -0.06298775970935822, + "step": 2004 + }, + { + "epoch": 1.3865836791147994, + "grad_norm": 3.930716037750244, + "learning_rate": 4.785231289380667e-05, + "log_odds_chosen": 4.3756890296936035, + "log_odds_ratio": -0.13180823624134064, + "logits/chosen": -0.6425517201423645, + "logits/rejected": -0.6409659385681152, + "logps/chosen": -0.05571923032402992, + "logps/rejected": -0.7726579904556274, + "loss": 2.7233, + "nll_loss": 0.6676549911499023, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00557192275300622, + "rewards/margins": 0.07169388234615326, + "rewards/rejected": -0.07726580649614334, + "step": 2005 + }, + { + "epoch": 1.3872752420470262, + "grad_norm": 5.899771690368652, + "learning_rate": 4.7848470877516524e-05, + "log_odds_chosen": 4.994879722595215, + "log_odds_ratio": -0.46346139907836914, + "logits/chosen": -0.7579612135887146, + "logits/rejected": -0.7698273658752441, + "logps/chosen": -0.11928918212652206, + "logps/rejected": -1.1728246212005615, + "loss": 2.927, + "nll_loss": 0.6854143142700195, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011928917840123177, + "rewards/margins": 0.10535356402397156, + "rewards/rejected": -0.11728248000144958, + "step": 2006 + }, + { + "epoch": 1.387966804979253, + "grad_norm": 7.911510467529297, + "learning_rate": 4.7844628861226376e-05, + "log_odds_chosen": 2.2403409481048584, + "log_odds_ratio": -0.45800328254699707, + "logits/chosen": -0.7115573883056641, + "logits/rejected": -0.736803412437439, + "logps/chosen": -0.1595693677663803, + "logps/rejected": -0.5836013555526733, + "loss": 3.9246, + "nll_loss": 0.9353399872779846, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.015956936404109, + "rewards/margins": 0.042403195053339005, + "rewards/rejected": -0.058360133320093155, + "step": 2007 + }, + { + "epoch": 1.38865836791148, + "grad_norm": 5.979852199554443, + "learning_rate": 4.784078684493623e-05, + "log_odds_chosen": 5.933134078979492, + "log_odds_ratio": -0.018309567123651505, + "logits/chosen": -0.5352558493614197, + "logits/rejected": -0.5490735769271851, + "logps/chosen": -0.024755114689469337, + "logps/rejected": -0.852277398109436, + "loss": 3.4945, + "nll_loss": 0.8717933893203735, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002475511282682419, + "rewards/margins": 0.08275222778320312, + "rewards/rejected": -0.08522774279117584, + "step": 2008 + }, + { + "epoch": 1.3893499308437067, + "grad_norm": 6.766330718994141, + "learning_rate": 4.7836944828646074e-05, + "log_odds_chosen": 3.526118278503418, + "log_odds_ratio": -0.25739622116088867, + "logits/chosen": -0.6153137683868408, + "logits/rejected": -0.6079054474830627, + "logps/chosen": -0.06622539460659027, + "logps/rejected": -0.657639741897583, + "loss": 3.1783, + "nll_loss": 0.7688472270965576, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00662253936752677, + "rewards/margins": 0.05914144217967987, + "rewards/rejected": -0.06576397269964218, + "step": 2009 + }, + { + "epoch": 1.3900414937759336, + "grad_norm": 4.855165481567383, + "learning_rate": 4.7833102812355927e-05, + "log_odds_chosen": 4.4697265625, + "log_odds_ratio": -0.046659186482429504, + "logits/chosen": -0.36698010563850403, + "logits/rejected": -0.4080663323402405, + "logps/chosen": -0.03516608104109764, + "logps/rejected": -0.8239533305168152, + "loss": 3.2684, + "nll_loss": 0.8124281764030457, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003516607917845249, + "rewards/margins": 0.0788787230849266, + "rewards/rejected": -0.08239533007144928, + "step": 2010 + }, + { + "epoch": 1.3907330567081604, + "grad_norm": 11.912924766540527, + "learning_rate": 4.782926079606578e-05, + "log_odds_chosen": 2.89589262008667, + "log_odds_ratio": -0.2935729920864105, + "logits/chosen": -0.29961317777633667, + "logits/rejected": -0.36233705282211304, + "logps/chosen": -0.09414010494947433, + "logps/rejected": -0.6591374278068542, + "loss": 4.819, + "nll_loss": 1.1753848791122437, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009414010681211948, + "rewards/margins": 0.05649973452091217, + "rewards/rejected": -0.06591374427080154, + "step": 2011 + }, + { + "epoch": 1.3914246196403872, + "grad_norm": 8.001628875732422, + "learning_rate": 4.7825418779775625e-05, + "log_odds_chosen": 4.689050674438477, + "log_odds_ratio": -0.18099285662174225, + "logits/chosen": -0.28261780738830566, + "logits/rejected": -0.29922106862068176, + "logps/chosen": -0.0453483983874321, + "logps/rejected": -0.8597908616065979, + "loss": 4.1907, + "nll_loss": 1.0295741558074951, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004534840118139982, + "rewards/margins": 0.08144424110651016, + "rewards/rejected": -0.08597908914089203, + "step": 2012 + }, + { + "epoch": 1.392116182572614, + "grad_norm": 7.775978088378906, + "learning_rate": 4.7821576763485484e-05, + "log_odds_chosen": 5.596604347229004, + "log_odds_ratio": -0.16852453351020813, + "logits/chosen": -0.5239167809486389, + "logits/rejected": -0.4925086498260498, + "logps/chosen": -0.045228488743305206, + "logps/rejected": -0.7164972424507141, + "loss": 4.5063, + "nll_loss": 1.1097182035446167, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004522849339991808, + "rewards/margins": 0.06712687015533447, + "rewards/rejected": -0.07164972275495529, + "step": 2013 + }, + { + "epoch": 1.3928077455048409, + "grad_norm": 6.019063949584961, + "learning_rate": 4.781773474719533e-05, + "log_odds_chosen": 3.483452320098877, + "log_odds_ratio": -0.1692686378955841, + "logits/chosen": -0.4919688403606415, + "logits/rejected": -0.5416184663772583, + "logps/chosen": -0.07065532356500626, + "logps/rejected": -0.6099762916564941, + "loss": 2.8116, + "nll_loss": 0.685977578163147, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007065531797707081, + "rewards/margins": 0.05393209308385849, + "rewards/rejected": -0.060997627675533295, + "step": 2014 + }, + { + "epoch": 1.3934993084370677, + "grad_norm": 4.556207656860352, + "learning_rate": 4.781389273090518e-05, + "log_odds_chosen": 5.876942157745361, + "log_odds_ratio": -0.11567544937133789, + "logits/chosen": -0.8399274349212646, + "logits/rejected": -0.8773524761199951, + "logps/chosen": -0.029017897322773933, + "logps/rejected": -0.9330223202705383, + "loss": 3.1699, + "nll_loss": 0.7809014916419983, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0029017897322773933, + "rewards/margins": 0.09040044993162155, + "rewards/rejected": -0.09330223500728607, + "step": 2015 + }, + { + "epoch": 1.3941908713692945, + "grad_norm": 4.728296279907227, + "learning_rate": 4.7810050714615034e-05, + "log_odds_chosen": 7.070993423461914, + "log_odds_ratio": -0.006337965838611126, + "logits/chosen": -0.5788023471832275, + "logits/rejected": -0.6077452898025513, + "logps/chosen": -0.0022974973544478416, + "logps/rejected": -0.7637322545051575, + "loss": 2.648, + "nll_loss": 0.6613626480102539, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00022974974126555026, + "rewards/margins": 0.07614347338676453, + "rewards/rejected": -0.07637323439121246, + "step": 2016 + }, + { + "epoch": 1.3948824343015214, + "grad_norm": 5.20559024810791, + "learning_rate": 4.780620869832489e-05, + "log_odds_chosen": 6.8388824462890625, + "log_odds_ratio": -0.03591344505548477, + "logits/chosen": -0.33628082275390625, + "logits/rejected": -0.35934945940971375, + "logps/chosen": -0.008574387989938259, + "logps/rejected": -0.8044378757476807, + "loss": 3.558, + "nll_loss": 0.8859061002731323, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008574387757107615, + "rewards/margins": 0.07958635687828064, + "rewards/rejected": -0.08044379204511642, + "step": 2017 + }, + { + "epoch": 1.3955739972337482, + "grad_norm": 7.665406703948975, + "learning_rate": 4.780236668203473e-05, + "log_odds_chosen": 6.559147834777832, + "log_odds_ratio": -0.058827269822359085, + "logits/chosen": -0.6809530854225159, + "logits/rejected": -0.709974467754364, + "logps/chosen": -0.0708513855934143, + "logps/rejected": -1.248304843902588, + "loss": 4.2583, + "nll_loss": 1.0586938858032227, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007085138466209173, + "rewards/margins": 0.11774535477161407, + "rewards/rejected": -0.12483049929141998, + "step": 2018 + }, + { + "epoch": 1.396265560165975, + "grad_norm": 5.273054599761963, + "learning_rate": 4.7798524665744585e-05, + "log_odds_chosen": 5.406514644622803, + "log_odds_ratio": -0.049261778593063354, + "logits/chosen": -0.6238724589347839, + "logits/rejected": -0.616300106048584, + "logps/chosen": -0.014879455789923668, + "logps/rejected": -0.5857996344566345, + "loss": 3.1058, + "nll_loss": 0.7715163826942444, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014879456721246243, + "rewards/margins": 0.05709202215075493, + "rewards/rejected": -0.05857996270060539, + "step": 2019 + }, + { + "epoch": 1.3969571230982019, + "grad_norm": 3.7898244857788086, + "learning_rate": 4.779468264945444e-05, + "log_odds_chosen": 7.883171081542969, + "log_odds_ratio": -0.0038941281381994486, + "logits/chosen": -0.43187326192855835, + "logits/rejected": -0.44849109649658203, + "logps/chosen": -0.007364689838141203, + "logps/rejected": -1.025825023651123, + "loss": 2.5187, + "nll_loss": 0.6292957067489624, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007364689372479916, + "rewards/margins": 0.10184603929519653, + "rewards/rejected": -0.10258250683546066, + "step": 2020 + }, + { + "epoch": 1.3976486860304287, + "grad_norm": 7.709190368652344, + "learning_rate": 4.779084063316428e-05, + "log_odds_chosen": 4.407033443450928, + "log_odds_ratio": -0.10557064414024353, + "logits/chosen": -0.6062334775924683, + "logits/rejected": -0.5914596915245056, + "logps/chosen": -0.053902555257081985, + "logps/rejected": -0.7024961709976196, + "loss": 4.9311, + "nll_loss": 1.2222228050231934, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005390255246311426, + "rewards/margins": 0.06485936045646667, + "rewards/rejected": -0.07024961709976196, + "step": 2021 + }, + { + "epoch": 1.3983402489626555, + "grad_norm": 5.200230121612549, + "learning_rate": 4.778699861687414e-05, + "log_odds_chosen": 5.399484634399414, + "log_odds_ratio": -0.21866726875305176, + "logits/chosen": -0.4181535542011261, + "logits/rejected": -0.4063791036605835, + "logps/chosen": -0.060105275362730026, + "logps/rejected": -1.2082123756408691, + "loss": 3.6573, + "nll_loss": 0.8924470543861389, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00601052725687623, + "rewards/margins": 0.11481072008609772, + "rewards/rejected": -0.12082124501466751, + "step": 2022 + }, + { + "epoch": 1.3990318118948823, + "grad_norm": 6.319047451019287, + "learning_rate": 4.778315660058399e-05, + "log_odds_chosen": 5.292322158813477, + "log_odds_ratio": -0.09680824726819992, + "logits/chosen": -0.5350341796875, + "logits/rejected": -0.5699993371963501, + "logps/chosen": -0.04791120067238808, + "logps/rejected": -0.6731879115104675, + "loss": 2.9202, + "nll_loss": 0.7203590869903564, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004791120067238808, + "rewards/margins": 0.06252767145633698, + "rewards/rejected": -0.06731878966093063, + "step": 2023 + }, + { + "epoch": 1.3997233748271092, + "grad_norm": 8.506767272949219, + "learning_rate": 4.777931458429384e-05, + "log_odds_chosen": 5.026066303253174, + "log_odds_ratio": -0.35519641637802124, + "logits/chosen": -0.3691147565841675, + "logits/rejected": -0.40069153904914856, + "logps/chosen": -0.08445140719413757, + "logps/rejected": -1.0374783277511597, + "loss": 4.6222, + "nll_loss": 1.1200363636016846, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008445140905678272, + "rewards/margins": 0.09530268609523773, + "rewards/rejected": -0.10374782979488373, + "step": 2024 + }, + { + "epoch": 1.400414937759336, + "grad_norm": 3.4464073181152344, + "learning_rate": 4.777547256800369e-05, + "log_odds_chosen": 5.708957195281982, + "log_odds_ratio": -0.05988878756761551, + "logits/chosen": -0.6721937656402588, + "logits/rejected": -0.635280966758728, + "logps/chosen": -0.048227760940790176, + "logps/rejected": -1.0062072277069092, + "loss": 3.2867, + "nll_loss": 0.815693199634552, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004822776187211275, + "rewards/margins": 0.09579794853925705, + "rewards/rejected": -0.10062072426080704, + "step": 2025 + }, + { + "epoch": 1.4011065006915628, + "grad_norm": 5.286276817321777, + "learning_rate": 4.7771630551713545e-05, + "log_odds_chosen": 5.2256927490234375, + "log_odds_ratio": -0.1297086924314499, + "logits/chosen": -0.5953899621963501, + "logits/rejected": -0.6047387719154358, + "logps/chosen": -0.05348531901836395, + "logps/rejected": -1.1254825592041016, + "loss": 2.8011, + "nll_loss": 0.6873102188110352, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005348531529307365, + "rewards/margins": 0.10719972103834152, + "rewards/rejected": -0.11254825443029404, + "step": 2026 + }, + { + "epoch": 1.4017980636237897, + "grad_norm": 10.278658866882324, + "learning_rate": 4.776778853542339e-05, + "log_odds_chosen": 4.066064834594727, + "log_odds_ratio": -0.500348687171936, + "logits/chosen": -0.755004346370697, + "logits/rejected": -0.8277742266654968, + "logps/chosen": -0.06693331897258759, + "logps/rejected": -1.083938717842102, + "loss": 3.9488, + "nll_loss": 0.9371687769889832, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006693332456052303, + "rewards/margins": 0.10170053690671921, + "rewards/rejected": -0.10839387029409409, + "step": 2027 + }, + { + "epoch": 1.4024896265560165, + "grad_norm": 6.364788055419922, + "learning_rate": 4.7763946519133243e-05, + "log_odds_chosen": 3.97965669631958, + "log_odds_ratio": -0.28485307097435, + "logits/chosen": -0.5287357568740845, + "logits/rejected": -0.5590074062347412, + "logps/chosen": -0.04807409271597862, + "logps/rejected": -0.5102754831314087, + "loss": 3.0035, + "nll_loss": 0.7223997712135315, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.00480740936473012, + "rewards/margins": 0.046220146119594574, + "rewards/rejected": -0.05102755129337311, + "step": 2028 + }, + { + "epoch": 1.4031811894882433, + "grad_norm": 5.334066390991211, + "learning_rate": 4.7760104502843096e-05, + "log_odds_chosen": 3.6514530181884766, + "log_odds_ratio": -0.18523135781288147, + "logits/chosen": -0.31928396224975586, + "logits/rejected": -0.31118130683898926, + "logps/chosen": -0.047676000744104385, + "logps/rejected": -0.6466583609580994, + "loss": 2.2694, + "nll_loss": 0.5488160848617554, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004767600446939468, + "rewards/margins": 0.05989823490381241, + "rewards/rejected": -0.06466583907604218, + "step": 2029 + }, + { + "epoch": 1.4038727524204702, + "grad_norm": 11.846880912780762, + "learning_rate": 4.775626248655294e-05, + "log_odds_chosen": 2.908698081970215, + "log_odds_ratio": -0.4558899700641632, + "logits/chosen": -0.41499459743499756, + "logits/rejected": -0.4122236371040344, + "logps/chosen": -0.10598790645599365, + "logps/rejected": -0.6007672548294067, + "loss": 3.3705, + "nll_loss": 0.7970328330993652, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01059879083186388, + "rewards/margins": 0.049477942287921906, + "rewards/rejected": -0.06007672846317291, + "step": 2030 + }, + { + "epoch": 1.404564315352697, + "grad_norm": 10.183661460876465, + "learning_rate": 4.77524204702628e-05, + "log_odds_chosen": 3.7660956382751465, + "log_odds_ratio": -0.3516588807106018, + "logits/chosen": -0.11453324556350708, + "logits/rejected": -0.17370480298995972, + "logps/chosen": -0.1562454104423523, + "logps/rejected": -0.9619159698486328, + "loss": 4.5479, + "nll_loss": 1.1018214225769043, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01562454178929329, + "rewards/margins": 0.08056706190109253, + "rewards/rejected": -0.09619159996509552, + "step": 2031 + }, + { + "epoch": 1.4052558782849238, + "grad_norm": 4.392728805541992, + "learning_rate": 4.7748578453972646e-05, + "log_odds_chosen": 6.245469570159912, + "log_odds_ratio": -0.18948374688625336, + "logits/chosen": -0.18185730278491974, + "logits/rejected": -0.19521722197532654, + "logps/chosen": -0.038732998073101044, + "logps/rejected": -1.112351894378662, + "loss": 3.3824, + "nll_loss": 0.8266395926475525, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0038732998073101044, + "rewards/margins": 0.10736190527677536, + "rewards/rejected": -0.11123519390821457, + "step": 2032 + }, + { + "epoch": 1.4059474412171507, + "grad_norm": 8.62684440612793, + "learning_rate": 4.77447364376825e-05, + "log_odds_chosen": 3.7161660194396973, + "log_odds_ratio": -0.49389269948005676, + "logits/chosen": -0.5872402191162109, + "logits/rejected": -0.5600318312644958, + "logps/chosen": -0.07983855903148651, + "logps/rejected": -0.6433324813842773, + "loss": 3.2825, + "nll_loss": 0.7712404131889343, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007983855903148651, + "rewards/margins": 0.056349389255046844, + "rewards/rejected": -0.0643332451581955, + "step": 2033 + }, + { + "epoch": 1.4066390041493775, + "grad_norm": 4.754065990447998, + "learning_rate": 4.774089442139235e-05, + "log_odds_chosen": 4.764361381530762, + "log_odds_ratio": -0.05447795242071152, + "logits/chosen": -0.47819578647613525, + "logits/rejected": -0.4906609058380127, + "logps/chosen": -0.09325191378593445, + "logps/rejected": -1.2406823635101318, + "loss": 3.6243, + "nll_loss": 0.9006204009056091, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009325191378593445, + "rewards/margins": 0.11474305391311646, + "rewards/rejected": -0.1240682452917099, + "step": 2034 + }, + { + "epoch": 1.4073305670816043, + "grad_norm": 8.18529224395752, + "learning_rate": 4.7737052405102204e-05, + "log_odds_chosen": 4.307243824005127, + "log_odds_ratio": -0.12494970858097076, + "logits/chosen": -0.3749274015426636, + "logits/rejected": -0.4090096056461334, + "logps/chosen": -0.03940832242369652, + "logps/rejected": -0.7432606220245361, + "loss": 2.9987, + "nll_loss": 0.7371869087219238, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0039408328011631966, + "rewards/margins": 0.07038523256778717, + "rewards/rejected": -0.0743260532617569, + "step": 2035 + }, + { + "epoch": 1.4080221300138311, + "grad_norm": 4.085348606109619, + "learning_rate": 4.773321038881205e-05, + "log_odds_chosen": 6.709726810455322, + "log_odds_ratio": -0.08220961689949036, + "logits/chosen": -0.4231798052787781, + "logits/rejected": -0.488666296005249, + "logps/chosen": -0.029946403577923775, + "logps/rejected": -1.1328915357589722, + "loss": 2.6045, + "nll_loss": 0.6429080367088318, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0029946400318294764, + "rewards/margins": 0.11029452085494995, + "rewards/rejected": -0.11328916251659393, + "step": 2036 + }, + { + "epoch": 1.408713692946058, + "grad_norm": 3.2187740802764893, + "learning_rate": 4.77293683725219e-05, + "log_odds_chosen": 6.3264288902282715, + "log_odds_ratio": -0.04832165315747261, + "logits/chosen": -0.22015348076820374, + "logits/rejected": -0.24953240156173706, + "logps/chosen": -0.013802926056087017, + "logps/rejected": -0.8438926935195923, + "loss": 2.8291, + "nll_loss": 0.7024345993995667, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013802925823256373, + "rewards/margins": 0.08300897479057312, + "rewards/rejected": -0.08438926935195923, + "step": 2037 + }, + { + "epoch": 1.4094052558782848, + "grad_norm": 4.500889301300049, + "learning_rate": 4.7725526356231754e-05, + "log_odds_chosen": 6.405463218688965, + "log_odds_ratio": -0.009915841743350029, + "logits/chosen": -0.5794069766998291, + "logits/rejected": -0.6441061496734619, + "logps/chosen": -0.014284246601164341, + "logps/rejected": -1.012584924697876, + "loss": 3.3985, + "nll_loss": 0.8486447334289551, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014284246135503054, + "rewards/margins": 0.09983007609844208, + "rewards/rejected": -0.10125849395990372, + "step": 2038 + }, + { + "epoch": 1.4100968188105116, + "grad_norm": 4.588141918182373, + "learning_rate": 4.77216843399416e-05, + "log_odds_chosen": 4.53325891494751, + "log_odds_ratio": -0.21827146410942078, + "logits/chosen": -0.5001474022865295, + "logits/rejected": -0.5334861874580383, + "logps/chosen": -0.1058742105960846, + "logps/rejected": -0.67145836353302, + "loss": 2.818, + "nll_loss": 0.6826778054237366, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010587421245872974, + "rewards/margins": 0.05655841529369354, + "rewards/rejected": -0.06714583933353424, + "step": 2039 + }, + { + "epoch": 1.4107883817427385, + "grad_norm": 9.857651710510254, + "learning_rate": 4.771784232365146e-05, + "log_odds_chosen": 3.348238945007324, + "log_odds_ratio": -0.43746525049209595, + "logits/chosen": -0.4963114857673645, + "logits/rejected": -0.4507068395614624, + "logps/chosen": -0.060887180268764496, + "logps/rejected": -0.6050597429275513, + "loss": 3.2402, + "nll_loss": 0.7663080096244812, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.006088717840611935, + "rewards/margins": 0.05441725254058838, + "rewards/rejected": -0.06050597131252289, + "step": 2040 + }, + { + "epoch": 1.4114799446749653, + "grad_norm": 4.385861396789551, + "learning_rate": 4.7714000307361305e-05, + "log_odds_chosen": 4.662498474121094, + "log_odds_ratio": -0.209502175450325, + "logits/chosen": -0.4768851399421692, + "logits/rejected": -0.5662028789520264, + "logps/chosen": -0.054230280220508575, + "logps/rejected": -0.6234518885612488, + "loss": 2.458, + "nll_loss": 0.5935608744621277, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0054230280220508575, + "rewards/margins": 0.05692216008901596, + "rewards/rejected": -0.06234519183635712, + "step": 2041 + }, + { + "epoch": 1.4121715076071921, + "grad_norm": 6.921299934387207, + "learning_rate": 4.771015829107116e-05, + "log_odds_chosen": 4.092554092407227, + "log_odds_ratio": -0.21367663145065308, + "logits/chosen": -0.1800042986869812, + "logits/rejected": -0.22171702980995178, + "logps/chosen": -0.0716947615146637, + "logps/rejected": -0.443551242351532, + "loss": 2.6438, + "nll_loss": 0.639578104019165, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007169476244598627, + "rewards/margins": 0.037185654044151306, + "rewards/rejected": -0.0443551279604435, + "step": 2042 + }, + { + "epoch": 1.412863070539419, + "grad_norm": 6.809596538543701, + "learning_rate": 4.770631627478101e-05, + "log_odds_chosen": 4.3092546463012695, + "log_odds_ratio": -0.18392224609851837, + "logits/chosen": -0.8782827258110046, + "logits/rejected": -0.8806307911872864, + "logps/chosen": -0.0561148002743721, + "logps/rejected": -0.8389385938644409, + "loss": 3.899, + "nll_loss": 0.9563499093055725, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005611480679363012, + "rewards/margins": 0.07828238606452942, + "rewards/rejected": -0.08389386534690857, + "step": 2043 + }, + { + "epoch": 1.4135546334716458, + "grad_norm": 5.72255277633667, + "learning_rate": 4.770247425849086e-05, + "log_odds_chosen": 3.307846784591675, + "log_odds_ratio": -0.20712444186210632, + "logits/chosen": -0.3684505224227905, + "logits/rejected": -0.3365062475204468, + "logps/chosen": -0.08130621910095215, + "logps/rejected": -0.6315487623214722, + "loss": 3.3224, + "nll_loss": 0.8098927736282349, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00813062209635973, + "rewards/margins": 0.05502425879240036, + "rewards/rejected": -0.06315487623214722, + "step": 2044 + }, + { + "epoch": 1.4142461964038728, + "grad_norm": 4.594570636749268, + "learning_rate": 4.769863224220071e-05, + "log_odds_chosen": 4.4372711181640625, + "log_odds_ratio": -0.09585855901241302, + "logits/chosen": -0.2599387764930725, + "logits/rejected": -0.2439916431903839, + "logps/chosen": -0.04680861905217171, + "logps/rejected": -0.9597396850585938, + "loss": 3.1439, + "nll_loss": 0.7763959765434265, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004680861718952656, + "rewards/margins": 0.091293103992939, + "rewards/rejected": -0.09597396850585938, + "step": 2045 + }, + { + "epoch": 1.4149377593360997, + "grad_norm": 9.47522258758545, + "learning_rate": 4.769479022591056e-05, + "log_odds_chosen": 4.923094749450684, + "log_odds_ratio": -0.6394574046134949, + "logits/chosen": -0.4518549144268036, + "logits/rejected": -0.48012202978134155, + "logps/chosen": -0.23291736841201782, + "logps/rejected": -1.0669751167297363, + "loss": 3.2946, + "nll_loss": 0.7597114443778992, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.023291736841201782, + "rewards/margins": 0.08340578526258469, + "rewards/rejected": -0.10669751465320587, + "step": 2046 + }, + { + "epoch": 1.4156293222683265, + "grad_norm": 6.063464641571045, + "learning_rate": 4.769094820962041e-05, + "log_odds_chosen": 3.17647385597229, + "log_odds_ratio": -0.532335638999939, + "logits/chosen": -0.29802849888801575, + "logits/rejected": -0.2881256937980652, + "logps/chosen": -0.17807450890541077, + "logps/rejected": -0.4909724295139313, + "loss": 3.0705, + "nll_loss": 0.714398205280304, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.017807450145483017, + "rewards/margins": 0.03128979355096817, + "rewards/rejected": -0.04909724369645119, + "step": 2047 + }, + { + "epoch": 1.4163208852005533, + "grad_norm": 11.671127319335938, + "learning_rate": 4.768710619333026e-05, + "log_odds_chosen": 5.549150466918945, + "log_odds_ratio": -0.19529500603675842, + "logits/chosen": -0.340154230594635, + "logits/rejected": -0.4136425852775574, + "logps/chosen": -0.08357815444469452, + "logps/rejected": -1.157057523727417, + "loss": 3.2631, + "nll_loss": 0.7962380647659302, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008357815444469452, + "rewards/margins": 0.10734793543815613, + "rewards/rejected": -0.11570574343204498, + "step": 2048 + }, + { + "epoch": 1.4170124481327802, + "grad_norm": 6.643354415893555, + "learning_rate": 4.768326417704012e-05, + "log_odds_chosen": 5.000125408172607, + "log_odds_ratio": -0.1833888292312622, + "logits/chosen": -0.34129559993743896, + "logits/rejected": -0.3918303847312927, + "logps/chosen": -0.05015400052070618, + "logps/rejected": -0.8530675172805786, + "loss": 2.9937, + "nll_loss": 0.7300856709480286, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005015400238335133, + "rewards/margins": 0.08029135316610336, + "rewards/rejected": -0.08530674874782562, + "step": 2049 + }, + { + "epoch": 1.417704011065007, + "grad_norm": 4.964437961578369, + "learning_rate": 4.767942216074996e-05, + "log_odds_chosen": 6.179078102111816, + "log_odds_ratio": -0.30751490592956543, + "logits/chosen": -0.5061019062995911, + "logits/rejected": -0.4916246831417084, + "logps/chosen": -0.0694524496793747, + "logps/rejected": -0.9263420104980469, + "loss": 2.6063, + "nll_loss": 0.6208357810974121, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006945244502276182, + "rewards/margins": 0.08568896353244781, + "rewards/rejected": -0.09263420104980469, + "step": 2050 + }, + { + "epoch": 1.4183955739972338, + "grad_norm": 6.969486713409424, + "learning_rate": 4.7675580144459816e-05, + "log_odds_chosen": 4.743740558624268, + "log_odds_ratio": -0.24729038774967194, + "logits/chosen": -0.7440781593322754, + "logits/rejected": -0.7566511631011963, + "logps/chosen": -0.11772959679365158, + "logps/rejected": -0.8475928902626038, + "loss": 3.8343, + "nll_loss": 0.9338378310203552, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011772960424423218, + "rewards/margins": 0.07298632711172104, + "rewards/rejected": -0.08475928753614426, + "step": 2051 + }, + { + "epoch": 1.4190871369294606, + "grad_norm": 7.391602039337158, + "learning_rate": 4.767173812816967e-05, + "log_odds_chosen": 3.4940693378448486, + "log_odds_ratio": -0.1731761395931244, + "logits/chosen": -0.5935790538787842, + "logits/rejected": -0.6371148228645325, + "logps/chosen": -0.09643512964248657, + "logps/rejected": -0.6096627116203308, + "loss": 4.3386, + "nll_loss": 1.067338228225708, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009643513709306717, + "rewards/margins": 0.051322758197784424, + "rewards/rejected": -0.06096626818180084, + "step": 2052 + }, + { + "epoch": 1.4197786998616875, + "grad_norm": 6.038008689880371, + "learning_rate": 4.766789611187952e-05, + "log_odds_chosen": 5.126229763031006, + "log_odds_ratio": -0.046565812081098557, + "logits/chosen": -0.2556982934474945, + "logits/rejected": -0.2397790253162384, + "logps/chosen": -0.08712028712034225, + "logps/rejected": -1.0902677774429321, + "loss": 2.8909, + "nll_loss": 0.7180594205856323, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008712029084563255, + "rewards/margins": 0.10031475126743317, + "rewards/rejected": -0.10902677476406097, + "step": 2053 + }, + { + "epoch": 1.4204702627939143, + "grad_norm": 10.155299186706543, + "learning_rate": 4.7664054095589366e-05, + "log_odds_chosen": 1.5348985195159912, + "log_odds_ratio": -1.1214851140975952, + "logits/chosen": -0.3333180546760559, + "logits/rejected": -0.31991809606552124, + "logps/chosen": -0.21411769092082977, + "logps/rejected": -0.6242636442184448, + "loss": 3.7383, + "nll_loss": 0.8224247694015503, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.021411770954728127, + "rewards/margins": 0.041014596819877625, + "rewards/rejected": -0.0624263659119606, + "step": 2054 + }, + { + "epoch": 1.4211618257261411, + "grad_norm": 9.23679256439209, + "learning_rate": 4.766021207929922e-05, + "log_odds_chosen": 5.134243965148926, + "log_odds_ratio": -0.06426586210727692, + "logits/chosen": -0.14200043678283691, + "logits/rejected": -0.2121468186378479, + "logps/chosen": -0.0550372339785099, + "logps/rejected": -1.017485499382019, + "loss": 3.8874, + "nll_loss": 0.9654339551925659, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00550372339785099, + "rewards/margins": 0.09624484181404114, + "rewards/rejected": -0.10174856334924698, + "step": 2055 + }, + { + "epoch": 1.421853388658368, + "grad_norm": 5.483739376068115, + "learning_rate": 4.765637006300907e-05, + "log_odds_chosen": 3.1780428886413574, + "log_odds_ratio": -0.33298707008361816, + "logits/chosen": -0.5277194976806641, + "logits/rejected": -0.5595696568489075, + "logps/chosen": -0.08533762395381927, + "logps/rejected": -0.6815667152404785, + "loss": 3.5452, + "nll_loss": 0.8530020713806152, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008533762767910957, + "rewards/margins": 0.05962291359901428, + "rewards/rejected": -0.06815667450428009, + "step": 2056 + }, + { + "epoch": 1.4225449515905948, + "grad_norm": 4.766363143920898, + "learning_rate": 4.765252804671892e-05, + "log_odds_chosen": 4.3871612548828125, + "log_odds_ratio": -0.11903732270002365, + "logits/chosen": -0.3458870053291321, + "logits/rejected": -0.33200258016586304, + "logps/chosen": -0.06314408779144287, + "logps/rejected": -0.5942891836166382, + "loss": 2.9096, + "nll_loss": 0.7155048251152039, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006314409431070089, + "rewards/margins": 0.05311451479792595, + "rewards/rejected": -0.059428922832012177, + "step": 2057 + }, + { + "epoch": 1.4232365145228216, + "grad_norm": 7.043921947479248, + "learning_rate": 4.764868603042877e-05, + "log_odds_chosen": 2.770045280456543, + "log_odds_ratio": -0.3470918536186218, + "logits/chosen": -0.5503393411636353, + "logits/rejected": -0.5623016357421875, + "logps/chosen": -0.10145130008459091, + "logps/rejected": -0.6719057559967041, + "loss": 3.0892, + "nll_loss": 0.7375876903533936, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010145129635930061, + "rewards/margins": 0.05704544484615326, + "rewards/rejected": -0.06719058007001877, + "step": 2058 + }, + { + "epoch": 1.4239280774550485, + "grad_norm": 9.261284828186035, + "learning_rate": 4.764484401413862e-05, + "log_odds_chosen": 3.0731759071350098, + "log_odds_ratio": -0.43416914343833923, + "logits/chosen": -0.798588752746582, + "logits/rejected": -0.853974461555481, + "logps/chosen": -0.07625366747379303, + "logps/rejected": -0.7762160897254944, + "loss": 2.7815, + "nll_loss": 0.6519516706466675, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007625367492437363, + "rewards/margins": 0.06999623775482178, + "rewards/rejected": -0.07762160897254944, + "step": 2059 + }, + { + "epoch": 1.4246196403872753, + "grad_norm": 6.812342166900635, + "learning_rate": 4.7641001997848474e-05, + "log_odds_chosen": 5.612292289733887, + "log_odds_ratio": -0.177658811211586, + "logits/chosen": -0.6069943308830261, + "logits/rejected": -0.5836856365203857, + "logps/chosen": -0.055021896958351135, + "logps/rejected": -0.8681304454803467, + "loss": 3.2148, + "nll_loss": 0.7859339714050293, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005502189975231886, + "rewards/margins": 0.08131085336208344, + "rewards/rejected": -0.0868130475282669, + "step": 2060 + }, + { + "epoch": 1.4253112033195021, + "grad_norm": 6.473294258117676, + "learning_rate": 4.763715998155832e-05, + "log_odds_chosen": 4.578250408172607, + "log_odds_ratio": -0.38147497177124023, + "logits/chosen": -0.39818042516708374, + "logits/rejected": -0.4307783246040344, + "logps/chosen": -0.02809958904981613, + "logps/rejected": -0.9484551548957825, + "loss": 2.2748, + "nll_loss": 0.5305440425872803, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0028099589981138706, + "rewards/margins": 0.09203556180000305, + "rewards/rejected": -0.09484551846981049, + "step": 2061 + }, + { + "epoch": 1.426002766251729, + "grad_norm": 4.594994068145752, + "learning_rate": 4.763331796526818e-05, + "log_odds_chosen": 4.217331886291504, + "log_odds_ratio": -0.08356684446334839, + "logits/chosen": -0.5503053665161133, + "logits/rejected": -0.5877286195755005, + "logps/chosen": -0.028734585270285606, + "logps/rejected": -0.7267761826515198, + "loss": 3.5088, + "nll_loss": 0.8688546419143677, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0028734588995575905, + "rewards/margins": 0.06980416178703308, + "rewards/rejected": -0.0726776197552681, + "step": 2062 + }, + { + "epoch": 1.4266943291839558, + "grad_norm": 3.986945867538452, + "learning_rate": 4.7629475948978025e-05, + "log_odds_chosen": 4.139495372772217, + "log_odds_ratio": -0.15999802947044373, + "logits/chosen": -0.6195761561393738, + "logits/rejected": -0.668656051158905, + "logps/chosen": -0.06690604239702225, + "logps/rejected": -0.8718291521072388, + "loss": 2.6388, + "nll_loss": 0.6437124013900757, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006690604612231255, + "rewards/margins": 0.08049231767654419, + "rewards/rejected": -0.08718291670084, + "step": 2063 + }, + { + "epoch": 1.4273858921161826, + "grad_norm": 11.108855247497559, + "learning_rate": 4.762563393268788e-05, + "log_odds_chosen": 2.0826430320739746, + "log_odds_ratio": -1.056549310684204, + "logits/chosen": -0.7052680253982544, + "logits/rejected": -0.6390185356140137, + "logps/chosen": -0.22649815678596497, + "logps/rejected": -0.6641699075698853, + "loss": 3.9387, + "nll_loss": 0.8790209293365479, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.022649817168712616, + "rewards/margins": 0.04376717284321785, + "rewards/rejected": -0.06641699373722076, + "step": 2064 + }, + { + "epoch": 1.4280774550484094, + "grad_norm": 7.645991325378418, + "learning_rate": 4.762179191639773e-05, + "log_odds_chosen": 5.085951328277588, + "log_odds_ratio": -0.10783078521490097, + "logits/chosen": -0.549422562122345, + "logits/rejected": -0.5912079811096191, + "logps/chosen": -0.10167013108730316, + "logps/rejected": -0.9657047390937805, + "loss": 3.9768, + "nll_loss": 0.9834229946136475, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010167012922465801, + "rewards/margins": 0.08640345931053162, + "rewards/rejected": -0.09657047688961029, + "step": 2065 + }, + { + "epoch": 1.4287690179806363, + "grad_norm": 5.260788440704346, + "learning_rate": 4.7617949900107575e-05, + "log_odds_chosen": 4.431698322296143, + "log_odds_ratio": -0.13982881605625153, + "logits/chosen": -0.816025972366333, + "logits/rejected": -0.8234196305274963, + "logps/chosen": -0.05856183171272278, + "logps/rejected": -0.8903920650482178, + "loss": 2.9261, + "nll_loss": 0.7175315618515015, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005856183357536793, + "rewards/margins": 0.08318303525447845, + "rewards/rejected": -0.08903920650482178, + "step": 2066 + }, + { + "epoch": 1.429460580912863, + "grad_norm": 6.308394908905029, + "learning_rate": 4.761410788381743e-05, + "log_odds_chosen": 3.1791226863861084, + "log_odds_ratio": -0.13766688108444214, + "logits/chosen": -0.8693885207176208, + "logits/rejected": -0.896051287651062, + "logps/chosen": -0.0903547927737236, + "logps/rejected": -0.5084475874900818, + "loss": 3.1956, + "nll_loss": 0.7851389050483704, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00903547927737236, + "rewards/margins": 0.04180927574634552, + "rewards/rejected": -0.05084475874900818, + "step": 2067 + }, + { + "epoch": 1.43015214384509, + "grad_norm": 11.030671119689941, + "learning_rate": 4.761026586752728e-05, + "log_odds_chosen": 4.220271110534668, + "log_odds_ratio": -0.23262549936771393, + "logits/chosen": -0.6884947419166565, + "logits/rejected": -0.7322020530700684, + "logps/chosen": -0.09444166719913483, + "logps/rejected": -0.5991060733795166, + "loss": 3.6798, + "nll_loss": 0.8966806530952454, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009444165974855423, + "rewards/margins": 0.050466448068618774, + "rewards/rejected": -0.0599106140434742, + "step": 2068 + }, + { + "epoch": 1.4308437067773168, + "grad_norm": 9.617368698120117, + "learning_rate": 4.760642385123713e-05, + "log_odds_chosen": 3.991337299346924, + "log_odds_ratio": -0.21459956467151642, + "logits/chosen": -0.7082586288452148, + "logits/rejected": -0.7063589096069336, + "logps/chosen": -0.10309841483831406, + "logps/rejected": -1.088301420211792, + "loss": 3.6348, + "nll_loss": 0.887231171131134, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010309841483831406, + "rewards/margins": 0.09852030128240585, + "rewards/rejected": -0.10883013904094696, + "step": 2069 + }, + { + "epoch": 1.4315352697095436, + "grad_norm": 5.760544300079346, + "learning_rate": 4.760258183494698e-05, + "log_odds_chosen": 4.451226234436035, + "log_odds_ratio": -0.08724942803382874, + "logits/chosen": -0.6014732122421265, + "logits/rejected": -0.6634864807128906, + "logps/chosen": -0.05170953646302223, + "logps/rejected": -0.8413094878196716, + "loss": 2.9167, + "nll_loss": 0.7204617261886597, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005170953925698996, + "rewards/margins": 0.07895999401807785, + "rewards/rejected": -0.08413094282150269, + "step": 2070 + }, + { + "epoch": 1.4322268326417704, + "grad_norm": 5.429257869720459, + "learning_rate": 4.759873981865684e-05, + "log_odds_chosen": 6.547586441040039, + "log_odds_ratio": -0.009889435023069382, + "logits/chosen": -0.38951486349105835, + "logits/rejected": -0.4980710744857788, + "logps/chosen": -0.008761152625083923, + "logps/rejected": -1.0059410333633423, + "loss": 2.8611, + "nll_loss": 0.7142948508262634, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008761152857914567, + "rewards/margins": 0.09971798956394196, + "rewards/rejected": -0.10059410333633423, + "step": 2071 + }, + { + "epoch": 1.4329183955739973, + "grad_norm": 5.300156593322754, + "learning_rate": 4.759489780236668e-05, + "log_odds_chosen": 4.442890167236328, + "log_odds_ratio": -0.10472977161407471, + "logits/chosen": -0.5378776788711548, + "logits/rejected": -0.5973137617111206, + "logps/chosen": -0.02917708456516266, + "logps/rejected": -0.5998899936676025, + "loss": 2.832, + "nll_loss": 0.6975303292274475, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002917708596214652, + "rewards/margins": 0.057071294635534286, + "rewards/rejected": -0.05998900532722473, + "step": 2072 + }, + { + "epoch": 1.433609958506224, + "grad_norm": 8.273154258728027, + "learning_rate": 4.7591055786076536e-05, + "log_odds_chosen": 2.775758743286133, + "log_odds_ratio": -0.4968562722206116, + "logits/chosen": -0.5443849563598633, + "logits/rejected": -0.5393614172935486, + "logps/chosen": -0.08266573399305344, + "logps/rejected": -0.4756109416484833, + "loss": 3.1284, + "nll_loss": 0.732426643371582, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008266573771834373, + "rewards/margins": 0.0392945222556591, + "rewards/rejected": -0.04756109416484833, + "step": 2073 + }, + { + "epoch": 1.434301521438451, + "grad_norm": 5.922261714935303, + "learning_rate": 4.758721376978639e-05, + "log_odds_chosen": 5.002694129943848, + "log_odds_ratio": -0.12283527851104736, + "logits/chosen": -0.6495686769485474, + "logits/rejected": -0.6880848407745361, + "logps/chosen": -0.036076322197914124, + "logps/rejected": -0.7622798681259155, + "loss": 3.6584, + "nll_loss": 0.9023165702819824, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0036076316609978676, + "rewards/margins": 0.07262035459280014, + "rewards/rejected": -0.07622798532247543, + "step": 2074 + }, + { + "epoch": 1.4349930843706777, + "grad_norm": 8.912012100219727, + "learning_rate": 4.7583371753496234e-05, + "log_odds_chosen": 3.8500843048095703, + "log_odds_ratio": -0.18038401007652283, + "logits/chosen": -0.7040153741836548, + "logits/rejected": -0.696866512298584, + "logps/chosen": -0.08037686347961426, + "logps/rejected": -0.6211951971054077, + "loss": 3.3774, + "nll_loss": 0.8263095617294312, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008037686347961426, + "rewards/margins": 0.05408182740211487, + "rewards/rejected": -0.062119513750076294, + "step": 2075 + }, + { + "epoch": 1.4356846473029046, + "grad_norm": 5.237964630126953, + "learning_rate": 4.7579529737206086e-05, + "log_odds_chosen": 4.842121124267578, + "log_odds_ratio": -0.18194277584552765, + "logits/chosen": -0.3374127149581909, + "logits/rejected": -0.2697219252586365, + "logps/chosen": -0.07888604700565338, + "logps/rejected": -0.5460504293441772, + "loss": 3.0352, + "nll_loss": 0.7406092286109924, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007888603955507278, + "rewards/margins": 0.04671643674373627, + "rewards/rejected": -0.054605040699243546, + "step": 2076 + }, + { + "epoch": 1.4363762102351314, + "grad_norm": 4.641595363616943, + "learning_rate": 4.757568772091594e-05, + "log_odds_chosen": 4.9412760734558105, + "log_odds_ratio": -0.014735918492078781, + "logits/chosen": -0.6826890707015991, + "logits/rejected": -0.7102535367012024, + "logps/chosen": -0.00749985920265317, + "logps/rejected": -0.7612767219543457, + "loss": 2.7544, + "nll_loss": 0.6871151924133301, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007499859784729779, + "rewards/margins": 0.0753776878118515, + "rewards/rejected": -0.07612767070531845, + "step": 2077 + }, + { + "epoch": 1.4370677731673582, + "grad_norm": 7.271586894989014, + "learning_rate": 4.757184570462579e-05, + "log_odds_chosen": 3.560127019882202, + "log_odds_ratio": -0.22132518887519836, + "logits/chosen": -0.6114071011543274, + "logits/rejected": -0.6493667960166931, + "logps/chosen": -0.06852074712514877, + "logps/rejected": -0.5435910224914551, + "loss": 4.1174, + "nll_loss": 1.007214069366455, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00685207499191165, + "rewards/margins": 0.04750702530145645, + "rewards/rejected": -0.05435910075902939, + "step": 2078 + }, + { + "epoch": 1.437759336099585, + "grad_norm": 6.2681965827941895, + "learning_rate": 4.756800368833564e-05, + "log_odds_chosen": 5.811890602111816, + "log_odds_ratio": -0.11716453731060028, + "logits/chosen": -0.12001199275255203, + "logits/rejected": -0.1396545171737671, + "logps/chosen": -0.01839967630803585, + "logps/rejected": -0.8142108917236328, + "loss": 3.3498, + "nll_loss": 0.8257352709770203, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0018399673281237483, + "rewards/margins": 0.0795811265707016, + "rewards/rejected": -0.08142108470201492, + "step": 2079 + }, + { + "epoch": 1.438450899031812, + "grad_norm": 7.204479694366455, + "learning_rate": 4.7564161672045496e-05, + "log_odds_chosen": 5.542275905609131, + "log_odds_ratio": -0.05477634072303772, + "logits/chosen": -0.43915674090385437, + "logits/rejected": -0.5252007246017456, + "logps/chosen": -0.02469804137945175, + "logps/rejected": -0.8203492760658264, + "loss": 3.6115, + "nll_loss": 0.8973989486694336, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002469804137945175, + "rewards/margins": 0.0795651227235794, + "rewards/rejected": -0.08203493058681488, + "step": 2080 + }, + { + "epoch": 1.4391424619640387, + "grad_norm": 8.111823081970215, + "learning_rate": 4.756031965575534e-05, + "log_odds_chosen": 2.5070009231567383, + "log_odds_ratio": -0.33566251397132874, + "logits/chosen": -0.36609354615211487, + "logits/rejected": -0.3842601478099823, + "logps/chosen": -0.11886178702116013, + "logps/rejected": -0.5776019096374512, + "loss": 3.8067, + "nll_loss": 0.9181172847747803, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011886179447174072, + "rewards/margins": 0.045874010771512985, + "rewards/rejected": -0.05776019021868706, + "step": 2081 + }, + { + "epoch": 1.4398340248962656, + "grad_norm": 4.4983978271484375, + "learning_rate": 4.7556477639465194e-05, + "log_odds_chosen": 3.637857437133789, + "log_odds_ratio": -0.1540573537349701, + "logits/chosen": -0.6648321151733398, + "logits/rejected": -0.6483675241470337, + "logps/chosen": -0.06123726814985275, + "logps/rejected": -0.7261663675308228, + "loss": 2.5024, + "nll_loss": 0.6101846098899841, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0061237262561917305, + "rewards/margins": 0.06649291515350342, + "rewards/rejected": -0.07261664420366287, + "step": 2082 + }, + { + "epoch": 1.4405255878284924, + "grad_norm": 6.488410472869873, + "learning_rate": 4.7552635623175047e-05, + "log_odds_chosen": 4.784399032592773, + "log_odds_ratio": -0.3786088526248932, + "logits/chosen": -1.0810126066207886, + "logits/rejected": -1.0608916282653809, + "logps/chosen": -0.10673705488443375, + "logps/rejected": -0.8546772003173828, + "loss": 4.396, + "nll_loss": 1.0611448287963867, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010673705488443375, + "rewards/margins": 0.07479400932788849, + "rewards/rejected": -0.08546771109104156, + "step": 2083 + }, + { + "epoch": 1.4412171507607192, + "grad_norm": 6.633011341094971, + "learning_rate": 4.754879360688489e-05, + "log_odds_chosen": 4.465466499328613, + "log_odds_ratio": -0.21018919348716736, + "logits/chosen": -0.3606579899787903, + "logits/rejected": -0.3915744423866272, + "logps/chosen": -0.05982568487524986, + "logps/rejected": -0.8488081097602844, + "loss": 3.7668, + "nll_loss": 0.9206801056861877, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005982568487524986, + "rewards/margins": 0.07889824360609055, + "rewards/rejected": -0.08488080650568008, + "step": 2084 + }, + { + "epoch": 1.441908713692946, + "grad_norm": 4.620772838592529, + "learning_rate": 4.7544951590594745e-05, + "log_odds_chosen": 5.470822811126709, + "log_odds_ratio": -0.1020824983716011, + "logits/chosen": -0.6941245794296265, + "logits/rejected": -0.6865053176879883, + "logps/chosen": -0.049653299152851105, + "logps/rejected": -0.7177039384841919, + "loss": 2.4043, + "nll_loss": 0.5908569097518921, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004965329542756081, + "rewards/margins": 0.06680506467819214, + "rewards/rejected": -0.07177039235830307, + "step": 2085 + }, + { + "epoch": 1.4426002766251729, + "grad_norm": 9.680559158325195, + "learning_rate": 4.75411095743046e-05, + "log_odds_chosen": 1.9712796211242676, + "log_odds_ratio": -0.47640758752822876, + "logits/chosen": -0.6261113882064819, + "logits/rejected": -0.6207519769668579, + "logps/chosen": -0.11739076673984528, + "logps/rejected": -0.43056824803352356, + "loss": 2.9493, + "nll_loss": 0.6896948218345642, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.011739077046513557, + "rewards/margins": 0.03131775185465813, + "rewards/rejected": -0.043056827038526535, + "step": 2086 + }, + { + "epoch": 1.4432918395573997, + "grad_norm": 5.5366926193237305, + "learning_rate": 4.753726755801445e-05, + "log_odds_chosen": 4.977890968322754, + "log_odds_ratio": -0.17959055304527283, + "logits/chosen": -0.6620508432388306, + "logits/rejected": -0.7662349343299866, + "logps/chosen": -0.056352924555540085, + "logps/rejected": -0.7703478336334229, + "loss": 3.3832, + "nll_loss": 0.8278340697288513, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005635292734950781, + "rewards/margins": 0.07139948755502701, + "rewards/rejected": -0.07703477144241333, + "step": 2087 + }, + { + "epoch": 1.4439834024896265, + "grad_norm": 6.524678707122803, + "learning_rate": 4.7533425541724295e-05, + "log_odds_chosen": 4.2204790115356445, + "log_odds_ratio": -0.0700051486492157, + "logits/chosen": -0.7024485468864441, + "logits/rejected": -0.730099081993103, + "logps/chosen": -0.049229852855205536, + "logps/rejected": -0.8601389527320862, + "loss": 3.5406, + "nll_loss": 0.8781381249427795, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004922985564917326, + "rewards/margins": 0.08109091222286224, + "rewards/rejected": -0.08601390570402145, + "step": 2088 + }, + { + "epoch": 1.4446749654218534, + "grad_norm": 7.487344741821289, + "learning_rate": 4.7529583525434154e-05, + "log_odds_chosen": 5.620451927185059, + "log_odds_ratio": -0.0797644704580307, + "logits/chosen": -0.5892861485481262, + "logits/rejected": -0.6945428252220154, + "logps/chosen": -0.03896079212427139, + "logps/rejected": -0.809598445892334, + "loss": 3.1271, + "nll_loss": 0.7737871408462524, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0038960790261626244, + "rewards/margins": 0.07706376165151596, + "rewards/rejected": -0.08095984905958176, + "step": 2089 + }, + { + "epoch": 1.4453665283540802, + "grad_norm": 4.206676483154297, + "learning_rate": 4.7525741509144e-05, + "log_odds_chosen": 4.1212263107299805, + "log_odds_ratio": -0.2708800137042999, + "logits/chosen": -0.1651289314031601, + "logits/rejected": -0.12203869968652725, + "logps/chosen": -0.05846680328249931, + "logps/rejected": -0.6591073274612427, + "loss": 2.7409, + "nll_loss": 0.658129096031189, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005846680607646704, + "rewards/margins": 0.060064058750867844, + "rewards/rejected": -0.06591074168682098, + "step": 2090 + }, + { + "epoch": 1.446058091286307, + "grad_norm": 8.854192733764648, + "learning_rate": 4.752189949285385e-05, + "log_odds_chosen": 5.342554569244385, + "log_odds_ratio": -0.11022274941205978, + "logits/chosen": -0.5099636912345886, + "logits/rejected": -0.6596127152442932, + "logps/chosen": -0.03835906460881233, + "logps/rejected": -1.042710781097412, + "loss": 3.9023, + "nll_loss": 0.9645450711250305, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0038359067402780056, + "rewards/margins": 0.10043518245220184, + "rewards/rejected": -0.10427108407020569, + "step": 2091 + }, + { + "epoch": 1.4467496542185339, + "grad_norm": 6.701385974884033, + "learning_rate": 4.7518057476563705e-05, + "log_odds_chosen": 2.987711191177368, + "log_odds_ratio": -0.3907737731933594, + "logits/chosen": -0.5788431167602539, + "logits/rejected": -0.5822548866271973, + "logps/chosen": -0.12158074975013733, + "logps/rejected": -0.725666880607605, + "loss": 3.1725, + "nll_loss": 0.7540526390075684, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.012158075347542763, + "rewards/margins": 0.06040861830115318, + "rewards/rejected": -0.0725666880607605, + "step": 2092 + }, + { + "epoch": 1.4474412171507607, + "grad_norm": 8.469315528869629, + "learning_rate": 4.751421546027355e-05, + "log_odds_chosen": 6.235541343688965, + "log_odds_ratio": -0.08917492628097534, + "logits/chosen": -0.40043991804122925, + "logits/rejected": -0.4467763304710388, + "logps/chosen": -0.011788062751293182, + "logps/rejected": -1.1630051136016846, + "loss": 3.4903, + "nll_loss": 0.8636683821678162, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011788064148277044, + "rewards/margins": 0.11512169986963272, + "rewards/rejected": -0.11630050837993622, + "step": 2093 + }, + { + "epoch": 1.4481327800829875, + "grad_norm": 5.140117168426514, + "learning_rate": 4.75103734439834e-05, + "log_odds_chosen": 4.032228469848633, + "log_odds_ratio": -0.20453763008117676, + "logits/chosen": -0.6595421433448792, + "logits/rejected": -0.6425448656082153, + "logps/chosen": -0.07961926609277725, + "logps/rejected": -0.7643399238586426, + "loss": 2.5293, + "nll_loss": 0.6118590831756592, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007961926981806755, + "rewards/margins": 0.06847207248210907, + "rewards/rejected": -0.07643399387598038, + "step": 2094 + }, + { + "epoch": 1.4488243430152143, + "grad_norm": 6.220903396606445, + "learning_rate": 4.7506531427693255e-05, + "log_odds_chosen": 4.783239364624023, + "log_odds_ratio": -0.35463640093803406, + "logits/chosen": -0.7399685382843018, + "logits/rejected": -0.7145621180534363, + "logps/chosen": -0.11588143557310104, + "logps/rejected": -1.126645803451538, + "loss": 3.1774, + "nll_loss": 0.7588942050933838, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011588143184781075, + "rewards/margins": 0.10107642412185669, + "rewards/rejected": -0.11266457289457321, + "step": 2095 + }, + { + "epoch": 1.4495159059474412, + "grad_norm": 7.440333366394043, + "learning_rate": 4.750268941140311e-05, + "log_odds_chosen": 4.993106842041016, + "log_odds_ratio": -0.2743750512599945, + "logits/chosen": -0.7840232253074646, + "logits/rejected": -0.7973366975784302, + "logps/chosen": -0.13951562345027924, + "logps/rejected": -0.9469488859176636, + "loss": 4.7916, + "nll_loss": 1.1704589128494263, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.013951562345027924, + "rewards/margins": 0.08074332028627396, + "rewards/rejected": -0.09469488263130188, + "step": 2096 + }, + { + "epoch": 1.450207468879668, + "grad_norm": 7.338708400726318, + "learning_rate": 4.7498847395112954e-05, + "log_odds_chosen": 4.159909248352051, + "log_odds_ratio": -0.07225679606199265, + "logits/chosen": -0.9430498480796814, + "logits/rejected": -0.9889695644378662, + "logps/chosen": -0.053130898624658585, + "logps/rejected": -0.639094889163971, + "loss": 4.3992, + "nll_loss": 1.0925672054290771, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005313089583069086, + "rewards/margins": 0.058596402406692505, + "rewards/rejected": -0.06390949338674545, + "step": 2097 + }, + { + "epoch": 1.4508990318118948, + "grad_norm": 6.4432597160339355, + "learning_rate": 4.749500537882281e-05, + "log_odds_chosen": 3.2537670135498047, + "log_odds_ratio": -0.263638436794281, + "logits/chosen": -0.6450674533843994, + "logits/rejected": -0.6910425424575806, + "logps/chosen": -0.1120055690407753, + "logps/rejected": -0.598624587059021, + "loss": 2.9496, + "nll_loss": 0.7110449075698853, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01120055839419365, + "rewards/margins": 0.04866189882159233, + "rewards/rejected": -0.05986246094107628, + "step": 2098 + }, + { + "epoch": 1.4515905947441217, + "grad_norm": 7.961607456207275, + "learning_rate": 4.749116336253266e-05, + "log_odds_chosen": 4.801853656768799, + "log_odds_ratio": -0.104258693754673, + "logits/chosen": -0.6197474598884583, + "logits/rejected": -0.6892693638801575, + "logps/chosen": -0.05515953525900841, + "logps/rejected": -1.0898054838180542, + "loss": 2.591, + "nll_loss": 0.637330949306488, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005515953525900841, + "rewards/margins": 0.10346459597349167, + "rewards/rejected": -0.10898055136203766, + "step": 2099 + }, + { + "epoch": 1.4522821576763485, + "grad_norm": 6.290823936462402, + "learning_rate": 4.748732134624251e-05, + "log_odds_chosen": 3.377328872680664, + "log_odds_ratio": -0.12939488887786865, + "logits/chosen": -0.3903263211250305, + "logits/rejected": -0.3945927619934082, + "logps/chosen": -0.07465854287147522, + "logps/rejected": -0.6714155673980713, + "loss": 3.2051, + "nll_loss": 0.7883424758911133, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007465854287147522, + "rewards/margins": 0.05967570096254349, + "rewards/rejected": -0.06714155524969101, + "step": 2100 + }, + { + "epoch": 1.4529737206085753, + "grad_norm": 3.876042127609253, + "learning_rate": 4.748347932995236e-05, + "log_odds_chosen": 6.402103900909424, + "log_odds_ratio": -0.11234617233276367, + "logits/chosen": -0.3926086723804474, + "logits/rejected": -0.4278343915939331, + "logps/chosen": -0.04511556401848793, + "logps/rejected": -1.008373737335205, + "loss": 2.4422, + "nll_loss": 0.5993175506591797, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004511556122452021, + "rewards/margins": 0.0963258147239685, + "rewards/rejected": -0.10083737224340439, + "step": 2101 + }, + { + "epoch": 1.4536652835408022, + "grad_norm": 9.223143577575684, + "learning_rate": 4.747963731366221e-05, + "log_odds_chosen": 3.694127082824707, + "log_odds_ratio": -0.21813002228736877, + "logits/chosen": -0.935250997543335, + "logits/rejected": -0.9457248449325562, + "logps/chosen": -0.053896158933639526, + "logps/rejected": -0.49949681758880615, + "loss": 3.6024, + "nll_loss": 0.878780722618103, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00538961635902524, + "rewards/margins": 0.044560063630342484, + "rewards/rejected": -0.04994967579841614, + "step": 2102 + }, + { + "epoch": 1.454356846473029, + "grad_norm": 4.475998878479004, + "learning_rate": 4.747579529737206e-05, + "log_odds_chosen": 4.9824724197387695, + "log_odds_ratio": -0.3273460566997528, + "logits/chosen": -0.16247451305389404, + "logits/rejected": -0.21642492711544037, + "logps/chosen": -0.13452918827533722, + "logps/rejected": -1.0846654176712036, + "loss": 2.8663, + "nll_loss": 0.6838335394859314, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013452919200062752, + "rewards/margins": 0.09501362591981888, + "rewards/rejected": -0.10846654325723648, + "step": 2103 + }, + { + "epoch": 1.4550484094052558, + "grad_norm": 7.579476356506348, + "learning_rate": 4.7471953281081914e-05, + "log_odds_chosen": 4.738933563232422, + "log_odds_ratio": -0.04160004109144211, + "logits/chosen": -0.44023242592811584, + "logits/rejected": -0.43835222721099854, + "logps/chosen": -0.07230572402477264, + "logps/rejected": -1.0475863218307495, + "loss": 3.3754, + "nll_loss": 0.8396894335746765, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007230572868138552, + "rewards/margins": 0.09752806276082993, + "rewards/rejected": -0.10475863516330719, + "step": 2104 + }, + { + "epoch": 1.4557399723374826, + "grad_norm": 6.231069564819336, + "learning_rate": 4.7468111264791766e-05, + "log_odds_chosen": 4.9739909172058105, + "log_odds_ratio": -0.0823458880186081, + "logits/chosen": -0.6578803658485413, + "logits/rejected": -0.6871607303619385, + "logps/chosen": -0.04676012694835663, + "logps/rejected": -0.8974286317825317, + "loss": 3.3229, + "nll_loss": 0.8224886655807495, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004676013253629208, + "rewards/margins": 0.08506684750318527, + "rewards/rejected": -0.08974287658929825, + "step": 2105 + }, + { + "epoch": 1.4564315352697095, + "grad_norm": 6.654862403869629, + "learning_rate": 4.746426924850161e-05, + "log_odds_chosen": 3.6491219997406006, + "log_odds_ratio": -0.4037840962409973, + "logits/chosen": -0.5042887330055237, + "logits/rejected": -0.5388541221618652, + "logps/chosen": -0.1399620920419693, + "logps/rejected": -0.6792500019073486, + "loss": 2.5631, + "nll_loss": 0.6003901958465576, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013996210880577564, + "rewards/margins": 0.05392879247665405, + "rewards/rejected": -0.06792499870061874, + "step": 2106 + }, + { + "epoch": 1.4571230982019363, + "grad_norm": 6.00809907913208, + "learning_rate": 4.746042723221147e-05, + "log_odds_chosen": 3.6155893802642822, + "log_odds_ratio": -0.2792835831642151, + "logits/chosen": -0.6958773732185364, + "logits/rejected": -0.673318088054657, + "logps/chosen": -0.04103608429431915, + "logps/rejected": -0.6202775239944458, + "loss": 2.6871, + "nll_loss": 0.6438344717025757, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004103608429431915, + "rewards/margins": 0.057924140244722366, + "rewards/rejected": -0.06202774867415428, + "step": 2107 + }, + { + "epoch": 1.4578146611341631, + "grad_norm": 7.503900527954102, + "learning_rate": 4.745658521592132e-05, + "log_odds_chosen": 5.063538074493408, + "log_odds_ratio": -0.482469767332077, + "logits/chosen": -0.6590060591697693, + "logits/rejected": -0.6995262503623962, + "logps/chosen": -0.1371098756790161, + "logps/rejected": -0.7465744614601135, + "loss": 3.0332, + "nll_loss": 0.710058331489563, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.013710987754166126, + "rewards/margins": 0.06094646081328392, + "rewards/rejected": -0.07465744763612747, + "step": 2108 + }, + { + "epoch": 1.45850622406639, + "grad_norm": 4.681285858154297, + "learning_rate": 4.745274319963117e-05, + "log_odds_chosen": 4.333364963531494, + "log_odds_ratio": -0.1369195282459259, + "logits/chosen": -0.47746366262435913, + "logits/rejected": -0.5346704125404358, + "logps/chosen": -0.052240658551454544, + "logps/rejected": -0.602260947227478, + "loss": 2.7166, + "nll_loss": 0.6654695868492126, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005224065855145454, + "rewards/margins": 0.05500202625989914, + "rewards/rejected": -0.060226090252399445, + "step": 2109 + }, + { + "epoch": 1.4591977869986168, + "grad_norm": 6.215029716491699, + "learning_rate": 4.744890118334102e-05, + "log_odds_chosen": 4.594152927398682, + "log_odds_ratio": -0.2969439625740051, + "logits/chosen": -0.4114540219306946, + "logits/rejected": -0.4823339581489563, + "logps/chosen": -0.11988166719675064, + "logps/rejected": -0.5423327684402466, + "loss": 2.5059, + "nll_loss": 0.5967724323272705, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011988166719675064, + "rewards/margins": 0.04224511235952377, + "rewards/rejected": -0.05423327907919884, + "step": 2110 + }, + { + "epoch": 1.4598893499308436, + "grad_norm": 11.740045547485352, + "learning_rate": 4.744505916705087e-05, + "log_odds_chosen": 2.193021774291992, + "log_odds_ratio": -0.4456307291984558, + "logits/chosen": -0.4481427073478699, + "logits/rejected": -0.5567407608032227, + "logps/chosen": -0.2550775408744812, + "logps/rejected": -0.9214454293251038, + "loss": 3.7604, + "nll_loss": 0.8955416679382324, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.02550775557756424, + "rewards/margins": 0.06663678586483002, + "rewards/rejected": -0.09214454889297485, + "step": 2111 + }, + { + "epoch": 1.4605809128630705, + "grad_norm": 5.726841926574707, + "learning_rate": 4.744121715076072e-05, + "log_odds_chosen": 4.480843544006348, + "log_odds_ratio": -0.12533259391784668, + "logits/chosen": -0.44382768869400024, + "logits/rejected": -0.473913311958313, + "logps/chosen": -0.04405241459608078, + "logps/rejected": -0.5421817898750305, + "loss": 3.7368, + "nll_loss": 0.9216721057891846, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004405241459608078, + "rewards/margins": 0.04981293901801109, + "rewards/rejected": -0.05421818047761917, + "step": 2112 + }, + { + "epoch": 1.4612724757952975, + "grad_norm": 6.996542930603027, + "learning_rate": 4.743737513447057e-05, + "log_odds_chosen": 4.351029396057129, + "log_odds_ratio": -0.1016198918223381, + "logits/chosen": -0.48450225591659546, + "logits/rejected": -0.5053142309188843, + "logps/chosen": -0.08305220305919647, + "logps/rejected": -1.1227951049804688, + "loss": 3.0431, + "nll_loss": 0.7506143450737, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008305220864713192, + "rewards/margins": 0.10397429764270782, + "rewards/rejected": -0.11227951943874359, + "step": 2113 + }, + { + "epoch": 1.4619640387275243, + "grad_norm": 8.82361125946045, + "learning_rate": 4.7433533118180425e-05, + "log_odds_chosen": 2.8261706829071045, + "log_odds_ratio": -0.5516917109489441, + "logits/chosen": -0.47138428688049316, + "logits/rejected": -0.46295493841171265, + "logps/chosen": -0.130302295088768, + "logps/rejected": -0.4903410077095032, + "loss": 4.3231, + "nll_loss": 1.0256012678146362, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.013030230067670345, + "rewards/margins": 0.036003872752189636, + "rewards/rejected": -0.049034103751182556, + "step": 2114 + }, + { + "epoch": 1.4626556016597512, + "grad_norm": 7.945962905883789, + "learning_rate": 4.742969110189027e-05, + "log_odds_chosen": 3.673635244369507, + "log_odds_ratio": -0.10550344735383987, + "logits/chosen": -0.6925126314163208, + "logits/rejected": -0.6828139424324036, + "logps/chosen": -0.03606286272406578, + "logps/rejected": -0.6843661665916443, + "loss": 3.5708, + "nll_loss": 0.8821461200714111, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003606286598369479, + "rewards/margins": 0.06483032554388046, + "rewards/rejected": -0.06843661516904831, + "step": 2115 + }, + { + "epoch": 1.463347164591978, + "grad_norm": 6.662603378295898, + "learning_rate": 4.742584908560013e-05, + "log_odds_chosen": 4.126121520996094, + "log_odds_ratio": -0.24083402752876282, + "logits/chosen": -0.4301590621471405, + "logits/rejected": -0.5066604614257812, + "logps/chosen": -0.05969817191362381, + "logps/rejected": -0.7975174784660339, + "loss": 3.5244, + "nll_loss": 0.8570234775543213, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005969816818833351, + "rewards/margins": 0.07378192991018295, + "rewards/rejected": -0.07975174486637115, + "step": 2116 + }, + { + "epoch": 1.4640387275242048, + "grad_norm": 6.692433834075928, + "learning_rate": 4.7422007069309975e-05, + "log_odds_chosen": 5.185555934906006, + "log_odds_ratio": -0.06206965073943138, + "logits/chosen": -0.6158180236816406, + "logits/rejected": -0.6842352747917175, + "logps/chosen": -0.05131285637617111, + "logps/rejected": -1.0540692806243896, + "loss": 4.124, + "nll_loss": 1.0247859954833984, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005131285637617111, + "rewards/margins": 0.10027563571929932, + "rewards/rejected": -0.10540692508220673, + "step": 2117 + }, + { + "epoch": 1.4647302904564317, + "grad_norm": 7.510965347290039, + "learning_rate": 4.741816505301983e-05, + "log_odds_chosen": 3.0582268238067627, + "log_odds_ratio": -0.28565216064453125, + "logits/chosen": -0.2934304177761078, + "logits/rejected": -0.29644107818603516, + "logps/chosen": -0.054563820362091064, + "logps/rejected": -0.5622140169143677, + "loss": 2.8653, + "nll_loss": 0.6877476572990417, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005456382408738136, + "rewards/margins": 0.0507650226354599, + "rewards/rejected": -0.056221406906843185, + "step": 2118 + }, + { + "epoch": 1.4654218533886585, + "grad_norm": 9.008056640625, + "learning_rate": 4.741432303672968e-05, + "log_odds_chosen": 3.7784793376922607, + "log_odds_ratio": -0.28400006890296936, + "logits/chosen": -0.5542048811912537, + "logits/rejected": -0.6849331855773926, + "logps/chosen": -0.15926668047904968, + "logps/rejected": -0.9513943195343018, + "loss": 3.2885, + "nll_loss": 0.7937213778495789, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.015926668420433998, + "rewards/margins": 0.07921276241540909, + "rewards/rejected": -0.09513943642377853, + "step": 2119 + }, + { + "epoch": 1.4661134163208853, + "grad_norm": 7.5393500328063965, + "learning_rate": 4.7410481020439526e-05, + "log_odds_chosen": 2.863239288330078, + "log_odds_ratio": -0.4638535976409912, + "logits/chosen": -0.30791130661964417, + "logits/rejected": -0.32997122406959534, + "logps/chosen": -0.11125410348176956, + "logps/rejected": -0.7325018644332886, + "loss": 3.2332, + "nll_loss": 0.7619196176528931, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011125410906970501, + "rewards/margins": 0.06212478503584862, + "rewards/rejected": -0.0732501968741417, + "step": 2120 + }, + { + "epoch": 1.4668049792531122, + "grad_norm": 4.835433006286621, + "learning_rate": 4.740663900414938e-05, + "log_odds_chosen": 4.400109767913818, + "log_odds_ratio": -0.2901593744754791, + "logits/chosen": -0.3349055051803589, + "logits/rejected": -0.2996591627597809, + "logps/chosen": -0.07037417590618134, + "logps/rejected": -0.7848444581031799, + "loss": 2.8301, + "nll_loss": 0.6785147190093994, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007037417963147163, + "rewards/margins": 0.07144702970981598, + "rewards/rejected": -0.078484445810318, + "step": 2121 + }, + { + "epoch": 1.467496542185339, + "grad_norm": 6.1850666999816895, + "learning_rate": 4.740279698785923e-05, + "log_odds_chosen": 4.053633689880371, + "log_odds_ratio": -0.5489174723625183, + "logits/chosen": -0.5120274424552917, + "logits/rejected": -0.5325107574462891, + "logps/chosen": -0.09262340515851974, + "logps/rejected": -0.5417929887771606, + "loss": 2.7099, + "nll_loss": 0.6225718855857849, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009262342005968094, + "rewards/margins": 0.04491695761680603, + "rewards/rejected": -0.05417929217219353, + "step": 2122 + }, + { + "epoch": 1.4681881051175658, + "grad_norm": 4.262668132781982, + "learning_rate": 4.739895497156908e-05, + "log_odds_chosen": 6.103684425354004, + "log_odds_ratio": -0.007512577343732119, + "logits/chosen": -0.5071601867675781, + "logits/rejected": -0.50539231300354, + "logps/chosen": -0.02052057720720768, + "logps/rejected": -0.9985079169273376, + "loss": 3.4081, + "nll_loss": 0.8512747287750244, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002052057534456253, + "rewards/margins": 0.09779874235391617, + "rewards/rejected": -0.09985079616308212, + "step": 2123 + }, + { + "epoch": 1.4688796680497926, + "grad_norm": 4.665466785430908, + "learning_rate": 4.739511295527893e-05, + "log_odds_chosen": 4.051023960113525, + "log_odds_ratio": -0.09487346559762955, + "logits/chosen": -0.6261661052703857, + "logits/rejected": -0.6074262261390686, + "logps/chosen": -0.060799550265073776, + "logps/rejected": -0.6109548211097717, + "loss": 3.5126, + "nll_loss": 0.8686507940292358, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006079955492168665, + "rewards/margins": 0.055015530437231064, + "rewards/rejected": -0.06109548732638359, + "step": 2124 + }, + { + "epoch": 1.4695712309820195, + "grad_norm": 10.238999366760254, + "learning_rate": 4.739127093898879e-05, + "log_odds_chosen": 4.448538303375244, + "log_odds_ratio": -0.4580115079879761, + "logits/chosen": -0.12140993028879166, + "logits/rejected": -0.12025927007198334, + "logps/chosen": -0.09281541407108307, + "logps/rejected": -0.7629176378250122, + "loss": 2.5392, + "nll_loss": 0.5889951586723328, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.009281541220843792, + "rewards/margins": 0.06701022386550903, + "rewards/rejected": -0.0762917622923851, + "step": 2125 + }, + { + "epoch": 1.4702627939142463, + "grad_norm": 7.920474529266357, + "learning_rate": 4.7387428922698634e-05, + "log_odds_chosen": 2.344700336456299, + "log_odds_ratio": -0.37472227215766907, + "logits/chosen": -0.7578220367431641, + "logits/rejected": -0.775606095790863, + "logps/chosen": -0.20015893876552582, + "logps/rejected": -0.5982450246810913, + "loss": 3.5284, + "nll_loss": 0.8446251153945923, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0200158953666687, + "rewards/margins": 0.03980860486626625, + "rewards/rejected": -0.05982450023293495, + "step": 2126 + }, + { + "epoch": 1.4709543568464731, + "grad_norm": 5.36145544052124, + "learning_rate": 4.7383586906408486e-05, + "log_odds_chosen": 2.4950942993164062, + "log_odds_ratio": -0.49963515996932983, + "logits/chosen": -0.4623447060585022, + "logits/rejected": -0.4579474925994873, + "logps/chosen": -0.17350442707538605, + "logps/rejected": -0.5114143490791321, + "loss": 2.9987, + "nll_loss": 0.699701726436615, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.017350442707538605, + "rewards/margins": 0.033790990710258484, + "rewards/rejected": -0.05114143341779709, + "step": 2127 + }, + { + "epoch": 1.4716459197787, + "grad_norm": 6.284639358520508, + "learning_rate": 4.737974489011834e-05, + "log_odds_chosen": 6.780862808227539, + "log_odds_ratio": -0.015053209848701954, + "logits/chosen": -0.267768919467926, + "logits/rejected": -0.24615205824375153, + "logps/chosen": -0.0044655827805399895, + "logps/rejected": -0.8356389999389648, + "loss": 3.29, + "nll_loss": 0.8209894895553589, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00044655834790319204, + "rewards/margins": 0.08311734348535538, + "rewards/rejected": -0.083563894033432, + "step": 2128 + }, + { + "epoch": 1.4723374827109268, + "grad_norm": 6.118336200714111, + "learning_rate": 4.7375902873828184e-05, + "log_odds_chosen": 5.7112226486206055, + "log_odds_ratio": -0.13390396535396576, + "logits/chosen": -0.5156606435775757, + "logits/rejected": -0.5889492630958557, + "logps/chosen": -0.04604191705584526, + "logps/rejected": -1.1898932456970215, + "loss": 3.6421, + "nll_loss": 0.8971379995346069, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004604191519320011, + "rewards/margins": 0.11438513547182083, + "rewards/rejected": -0.11898931860923767, + "step": 2129 + }, + { + "epoch": 1.4730290456431536, + "grad_norm": 4.7239837646484375, + "learning_rate": 4.737206085753804e-05, + "log_odds_chosen": 3.1016533374786377, + "log_odds_ratio": -0.2537999749183655, + "logits/chosen": -0.40021154284477234, + "logits/rejected": -0.4068134129047394, + "logps/chosen": -0.09348339587450027, + "logps/rejected": -0.9883545637130737, + "loss": 2.8394, + "nll_loss": 0.6844592094421387, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009348340332508087, + "rewards/margins": 0.08948711305856705, + "rewards/rejected": -0.09883546084165573, + "step": 2130 + }, + { + "epoch": 1.4737206085753805, + "grad_norm": 10.042348861694336, + "learning_rate": 4.736821884124789e-05, + "log_odds_chosen": 4.484597682952881, + "log_odds_ratio": -0.07834921777248383, + "logits/chosen": -0.2142612487077713, + "logits/rejected": -0.2503935396671295, + "logps/chosen": -0.03164057806134224, + "logps/rejected": -0.49265021085739136, + "loss": 3.7392, + "nll_loss": 0.9269661903381348, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0031640580855309963, + "rewards/margins": 0.04610096290707588, + "rewards/rejected": -0.049265019595623016, + "step": 2131 + }, + { + "epoch": 1.4744121715076073, + "grad_norm": 6.352854251861572, + "learning_rate": 4.736437682495774e-05, + "log_odds_chosen": 6.055075645446777, + "log_odds_ratio": -0.07979589700698853, + "logits/chosen": -0.4343861937522888, + "logits/rejected": -0.49018430709838867, + "logps/chosen": -0.020101509988307953, + "logps/rejected": -0.8233213424682617, + "loss": 3.0005, + "nll_loss": 0.7421414852142334, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002010151045396924, + "rewards/margins": 0.08032198995351791, + "rewards/rejected": -0.08233214169740677, + "step": 2132 + }, + { + "epoch": 1.4751037344398341, + "grad_norm": 8.404098510742188, + "learning_rate": 4.736053480866759e-05, + "log_odds_chosen": 3.932952404022217, + "log_odds_ratio": -0.065504290163517, + "logits/chosen": -0.5252431631088257, + "logits/rejected": -0.5888339281082153, + "logps/chosen": -0.0439700186252594, + "logps/rejected": -0.7456878423690796, + "loss": 4.4127, + "nll_loss": 1.0966308116912842, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004397002514451742, + "rewards/margins": 0.0701717883348465, + "rewards/rejected": -0.07456878572702408, + "step": 2133 + }, + { + "epoch": 1.475795297372061, + "grad_norm": 11.165557861328125, + "learning_rate": 4.7356692792377447e-05, + "log_odds_chosen": 5.398857116699219, + "log_odds_ratio": -0.466510146856308, + "logits/chosen": -0.056302133947610855, + "logits/rejected": -0.11562386155128479, + "logps/chosen": -0.09982578456401825, + "logps/rejected": -1.0831027030944824, + "loss": 3.7737, + "nll_loss": 0.8967711925506592, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.00998257752507925, + "rewards/margins": 0.09832769632339478, + "rewards/rejected": -0.1083102822303772, + "step": 2134 + }, + { + "epoch": 1.4764868603042878, + "grad_norm": 7.29428243637085, + "learning_rate": 4.735285077608729e-05, + "log_odds_chosen": 1.775020718574524, + "log_odds_ratio": -0.6045640707015991, + "logits/chosen": -0.4116702675819397, + "logits/rejected": -0.45533379912376404, + "logps/chosen": -0.10833375155925751, + "logps/rejected": -0.5464069843292236, + "loss": 3.363, + "nll_loss": 0.7803056240081787, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01083337515592575, + "rewards/margins": 0.04380732774734497, + "rewards/rejected": -0.054640695452690125, + "step": 2135 + }, + { + "epoch": 1.4771784232365146, + "grad_norm": 6.622182369232178, + "learning_rate": 4.7349008759797145e-05, + "log_odds_chosen": 6.178796768188477, + "log_odds_ratio": -0.07372809946537018, + "logits/chosen": -0.34616512060165405, + "logits/rejected": -0.4092048108577728, + "logps/chosen": -0.05432863533496857, + "logps/rejected": -1.0202291011810303, + "loss": 3.2895, + "nll_loss": 0.8149974942207336, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005432863254100084, + "rewards/margins": 0.096590057015419, + "rewards/rejected": -0.10202290862798691, + "step": 2136 + }, + { + "epoch": 1.4778699861687414, + "grad_norm": 5.9264235496521, + "learning_rate": 4.7345166743507e-05, + "log_odds_chosen": 3.3092384338378906, + "log_odds_ratio": -0.18755535781383514, + "logits/chosen": -0.47509706020355225, + "logits/rejected": -0.4891270101070404, + "logps/chosen": -0.05454757437109947, + "logps/rejected": -0.5544096231460571, + "loss": 4.0193, + "nll_loss": 0.9860591888427734, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005454757250845432, + "rewards/margins": 0.049986205995082855, + "rewards/rejected": -0.05544096231460571, + "step": 2137 + }, + { + "epoch": 1.4785615491009683, + "grad_norm": 13.524920463562012, + "learning_rate": 4.734132472721684e-05, + "log_odds_chosen": 1.8216850757598877, + "log_odds_ratio": -0.45454758405685425, + "logits/chosen": -0.4366537034511566, + "logits/rejected": -0.460040807723999, + "logps/chosen": -0.2892919182777405, + "logps/rejected": -0.5869209170341492, + "loss": 3.0996, + "nll_loss": 0.7294558882713318, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.028929192572832108, + "rewards/margins": 0.02976289950311184, + "rewards/rejected": -0.0586920902132988, + "step": 2138 + }, + { + "epoch": 1.479253112033195, + "grad_norm": 6.360928535461426, + "learning_rate": 4.7337482710926695e-05, + "log_odds_chosen": 6.825347423553467, + "log_odds_ratio": -0.009228329174220562, + "logits/chosen": -0.5052814483642578, + "logits/rejected": -0.6166610717773438, + "logps/chosen": -0.017878873273730278, + "logps/rejected": -1.3889663219451904, + "loss": 3.8464, + "nll_loss": 0.9606884717941284, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017878874205052853, + "rewards/margins": 0.1371087282896042, + "rewards/rejected": -0.1388966143131256, + "step": 2139 + }, + { + "epoch": 1.479944674965422, + "grad_norm": 8.675497055053711, + "learning_rate": 4.733364069463655e-05, + "log_odds_chosen": 5.791857719421387, + "log_odds_ratio": -0.03341325372457504, + "logits/chosen": -0.3276616930961609, + "logits/rejected": -0.41704148054122925, + "logps/chosen": -0.01462834607809782, + "logps/rejected": -1.239469051361084, + "loss": 3.6756, + "nll_loss": 0.9155594706535339, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001462834537960589, + "rewards/margins": 0.12248405814170837, + "rewards/rejected": -0.1239469051361084, + "step": 2140 + }, + { + "epoch": 1.4806362378976488, + "grad_norm": 6.080367565155029, + "learning_rate": 4.73297986783464e-05, + "log_odds_chosen": 5.032208442687988, + "log_odds_ratio": -0.07722554355859756, + "logits/chosen": -0.8022492527961731, + "logits/rejected": -0.8383902311325073, + "logps/chosen": -0.022705290466547012, + "logps/rejected": -0.8100517988204956, + "loss": 2.9716, + "nll_loss": 0.7351704835891724, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0022705290466547012, + "rewards/margins": 0.07873465120792389, + "rewards/rejected": -0.08100517839193344, + "step": 2141 + }, + { + "epoch": 1.4813278008298756, + "grad_norm": 6.03106164932251, + "learning_rate": 4.7325956662056246e-05, + "log_odds_chosen": 7.702787399291992, + "log_odds_ratio": -0.04957111179828644, + "logits/chosen": -0.39969608187675476, + "logits/rejected": -0.4934896230697632, + "logps/chosen": -0.0072068748995661736, + "logps/rejected": -1.4062868356704712, + "loss": 2.4801, + "nll_loss": 0.6150761246681213, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007206875015981495, + "rewards/margins": 0.13990800082683563, + "rewards/rejected": -0.14062868058681488, + "step": 2142 + }, + { + "epoch": 1.4820193637621024, + "grad_norm": 6.338611125946045, + "learning_rate": 4.7322114645766105e-05, + "log_odds_chosen": 3.901794672012329, + "log_odds_ratio": -0.1494646668434143, + "logits/chosen": -0.9588245153427124, + "logits/rejected": -1.0375213623046875, + "logps/chosen": -0.032890114933252335, + "logps/rejected": -0.5975766181945801, + "loss": 3.4648, + "nll_loss": 0.851256251335144, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003289011772722006, + "rewards/margins": 0.056468650698661804, + "rewards/rejected": -0.05975766479969025, + "step": 2143 + }, + { + "epoch": 1.4827109266943292, + "grad_norm": 7.798945903778076, + "learning_rate": 4.731827262947595e-05, + "log_odds_chosen": 4.657999038696289, + "log_odds_ratio": -0.22223106026649475, + "logits/chosen": 0.04468072950839996, + "logits/rejected": 0.035873137414455414, + "logps/chosen": -0.08787171542644501, + "logps/rejected": -1.0934221744537354, + "loss": 4.3384, + "nll_loss": 1.0623890161514282, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00878717191517353, + "rewards/margins": 0.10055506229400635, + "rewards/rejected": -0.10934222489595413, + "step": 2144 + }, + { + "epoch": 1.483402489626556, + "grad_norm": 9.234012603759766, + "learning_rate": 4.73144306131858e-05, + "log_odds_chosen": 5.35285758972168, + "log_odds_ratio": -0.1352691948413849, + "logits/chosen": -0.47749924659729004, + "logits/rejected": -0.5015690326690674, + "logps/chosen": -0.04192415624856949, + "logps/rejected": -0.7736974954605103, + "loss": 5.0982, + "nll_loss": 1.2610275745391846, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004192416090518236, + "rewards/margins": 0.07317733764648438, + "rewards/rejected": -0.07736974954605103, + "step": 2145 + }, + { + "epoch": 1.484094052558783, + "grad_norm": 8.922550201416016, + "learning_rate": 4.7310588596895656e-05, + "log_odds_chosen": 2.317765712738037, + "log_odds_ratio": -0.4278677701950073, + "logits/chosen": -0.5566809177398682, + "logits/rejected": -0.5125826001167297, + "logps/chosen": -0.0754639282822609, + "logps/rejected": -0.27378353476524353, + "loss": 3.6639, + "nll_loss": 0.8731777667999268, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007546393200755119, + "rewards/margins": 0.019831961020827293, + "rewards/rejected": -0.027378354221582413, + "step": 2146 + }, + { + "epoch": 1.4847856154910097, + "grad_norm": 7.1663689613342285, + "learning_rate": 4.73067465806055e-05, + "log_odds_chosen": 2.725557327270508, + "log_odds_ratio": -0.2395949810743332, + "logits/chosen": -0.6971051096916199, + "logits/rejected": -0.721907913684845, + "logps/chosen": -0.12180915474891663, + "logps/rejected": -0.7929058074951172, + "loss": 4.1105, + "nll_loss": 1.0036766529083252, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.012180916965007782, + "rewards/margins": 0.06710965931415558, + "rewards/rejected": -0.07929057627916336, + "step": 2147 + }, + { + "epoch": 1.4854771784232366, + "grad_norm": 7.183311939239502, + "learning_rate": 4.7302904564315354e-05, + "log_odds_chosen": 5.562891960144043, + "log_odds_ratio": -0.05773504078388214, + "logits/chosen": -0.45431026816368103, + "logits/rejected": -0.514234185218811, + "logps/chosen": -0.03870779275894165, + "logps/rejected": -0.9699372053146362, + "loss": 4.4011, + "nll_loss": 1.0945072174072266, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0038707794155925512, + "rewards/margins": 0.0931229442358017, + "rewards/rejected": -0.09699372202157974, + "step": 2148 + }, + { + "epoch": 1.4861687413554634, + "grad_norm": 5.313976764678955, + "learning_rate": 4.7299062548025206e-05, + "log_odds_chosen": 5.13576602935791, + "log_odds_ratio": -0.11388306319713593, + "logits/chosen": -0.188670352101326, + "logits/rejected": -0.2855151891708374, + "logps/chosen": -0.09410841763019562, + "logps/rejected": -0.9148901104927063, + "loss": 2.5396, + "nll_loss": 0.6235028505325317, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009410843253135681, + "rewards/margins": 0.08207817375659943, + "rewards/rejected": -0.09148901700973511, + "step": 2149 + }, + { + "epoch": 1.4868603042876902, + "grad_norm": 7.928635597229004, + "learning_rate": 4.729522053173506e-05, + "log_odds_chosen": 3.8607382774353027, + "log_odds_ratio": -0.5775615572929382, + "logits/chosen": -0.21268567442893982, + "logits/rejected": -0.2625402510166168, + "logps/chosen": -0.13489803671836853, + "logps/rejected": -0.9022601842880249, + "loss": 2.8881, + "nll_loss": 0.6642749309539795, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.013489805161952972, + "rewards/margins": 0.076736219227314, + "rewards/rejected": -0.09022602438926697, + "step": 2150 + }, + { + "epoch": 1.487551867219917, + "grad_norm": 7.397334575653076, + "learning_rate": 4.7291378515444904e-05, + "log_odds_chosen": 6.1395344734191895, + "log_odds_ratio": -0.06367681920528412, + "logits/chosen": -0.08033701777458191, + "logits/rejected": -0.15627387166023254, + "logps/chosen": -0.07491898536682129, + "logps/rejected": -1.1956180334091187, + "loss": 2.5914, + "nll_loss": 0.6414797306060791, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007491898722946644, + "rewards/margins": 0.11206988990306854, + "rewards/rejected": -0.11956179141998291, + "step": 2151 + }, + { + "epoch": 1.4882434301521439, + "grad_norm": 9.43634033203125, + "learning_rate": 4.7287536499154763e-05, + "log_odds_chosen": 2.093677282333374, + "log_odds_ratio": -0.6924388408660889, + "logits/chosen": -0.32470396161079407, + "logits/rejected": -0.3753596544265747, + "logps/chosen": -0.13023850321769714, + "logps/rejected": -0.6583462357521057, + "loss": 3.179, + "nll_loss": 0.7255163788795471, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01302385050803423, + "rewards/margins": 0.052810780704021454, + "rewards/rejected": -0.06583462655544281, + "step": 2152 + }, + { + "epoch": 1.4889349930843707, + "grad_norm": 7.570080757141113, + "learning_rate": 4.728369448286461e-05, + "log_odds_chosen": 4.581209182739258, + "log_odds_ratio": -0.19417330622673035, + "logits/chosen": -0.5809056162834167, + "logits/rejected": -0.6013231873512268, + "logps/chosen": -0.07302338629961014, + "logps/rejected": -0.6953396797180176, + "loss": 3.8762, + "nll_loss": 0.9496421217918396, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007302338723093271, + "rewards/margins": 0.0622316375374794, + "rewards/rejected": -0.06953397393226624, + "step": 2153 + }, + { + "epoch": 1.4896265560165975, + "grad_norm": 4.859841346740723, + "learning_rate": 4.727985246657446e-05, + "log_odds_chosen": 5.577225685119629, + "log_odds_ratio": -0.014450366608798504, + "logits/chosen": -0.4863715171813965, + "logits/rejected": -0.4975162148475647, + "logps/chosen": -0.008166614919900894, + "logps/rejected": -0.7243944406509399, + "loss": 2.3394, + "nll_loss": 0.5834062695503235, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008166615734808147, + "rewards/margins": 0.07162278145551682, + "rewards/rejected": -0.07243944704532623, + "step": 2154 + }, + { + "epoch": 1.4903181189488244, + "grad_norm": 7.439135551452637, + "learning_rate": 4.7276010450284314e-05, + "log_odds_chosen": 3.001149892807007, + "log_odds_ratio": -0.32452264428138733, + "logits/chosen": -0.40237510204315186, + "logits/rejected": -0.4026588201522827, + "logps/chosen": -0.09805717319250107, + "logps/rejected": -0.6367148160934448, + "loss": 3.4933, + "nll_loss": 0.8408713340759277, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009805718436837196, + "rewards/margins": 0.053865764290094376, + "rewards/rejected": -0.06367147713899612, + "step": 2155 + }, + { + "epoch": 1.4910096818810512, + "grad_norm": 7.253205299377441, + "learning_rate": 4.727216843399416e-05, + "log_odds_chosen": 5.118253231048584, + "log_odds_ratio": -0.24667249619960785, + "logits/chosen": -0.8493658900260925, + "logits/rejected": -0.9143842458724976, + "logps/chosen": -0.07504816353321075, + "logps/rejected": -1.4404281377792358, + "loss": 2.998, + "nll_loss": 0.7248427867889404, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007504816632717848, + "rewards/margins": 0.13653799891471863, + "rewards/rejected": -0.14404281973838806, + "step": 2156 + }, + { + "epoch": 1.491701244813278, + "grad_norm": 8.593515396118164, + "learning_rate": 4.726832641770401e-05, + "log_odds_chosen": 4.080145835876465, + "log_odds_ratio": -0.08691144734621048, + "logits/chosen": -0.7267591953277588, + "logits/rejected": -0.7008885145187378, + "logps/chosen": -0.03745944797992706, + "logps/rejected": -0.5800659656524658, + "loss": 3.0962, + "nll_loss": 0.7653520107269287, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0037459449376910925, + "rewards/margins": 0.05426064878702164, + "rewards/rejected": -0.058006592094898224, + "step": 2157 + }, + { + "epoch": 1.4923928077455049, + "grad_norm": 9.480552673339844, + "learning_rate": 4.7264484401413865e-05, + "log_odds_chosen": 4.114963054656982, + "log_odds_ratio": -0.20959897339344025, + "logits/chosen": -0.6540583372116089, + "logits/rejected": -0.7013518810272217, + "logps/chosen": -0.10385959595441818, + "logps/rejected": -0.9378431439399719, + "loss": 3.4575, + "nll_loss": 0.8434049487113953, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010385959409177303, + "rewards/margins": 0.08339834958314896, + "rewards/rejected": -0.09378431737422943, + "step": 2158 + }, + { + "epoch": 1.4930843706777317, + "grad_norm": 5.0781474113464355, + "learning_rate": 4.726064238512372e-05, + "log_odds_chosen": 2.2874131202697754, + "log_odds_ratio": -0.5506502389907837, + "logits/chosen": -0.6341639161109924, + "logits/rejected": -0.6848548650741577, + "logps/chosen": -0.17891795933246613, + "logps/rejected": -0.5480682849884033, + "loss": 3.3959, + "nll_loss": 0.7939110994338989, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.017891794443130493, + "rewards/margins": 0.03691503778100014, + "rewards/rejected": -0.05480683222413063, + "step": 2159 + }, + { + "epoch": 1.4937759336099585, + "grad_norm": 11.54198169708252, + "learning_rate": 4.725680036883356e-05, + "log_odds_chosen": 5.798181056976318, + "log_odds_ratio": -0.6406501531600952, + "logits/chosen": -0.4734801948070526, + "logits/rejected": -0.5142822265625, + "logps/chosen": -0.07387179136276245, + "logps/rejected": -1.086634635925293, + "loss": 3.5472, + "nll_loss": 0.8227443695068359, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007387179881334305, + "rewards/margins": 0.10127627849578857, + "rewards/rejected": -0.10866345465183258, + "step": 2160 + }, + { + "epoch": 1.4944674965421854, + "grad_norm": 7.3922834396362305, + "learning_rate": 4.725295835254342e-05, + "log_odds_chosen": 4.19404411315918, + "log_odds_ratio": -0.1303994059562683, + "logits/chosen": -0.45763909816741943, + "logits/rejected": -0.5041736364364624, + "logps/chosen": -0.07846425473690033, + "logps/rejected": -0.8622405529022217, + "loss": 3.7875, + "nll_loss": 0.9338254332542419, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007846426218748093, + "rewards/margins": 0.0783776268362999, + "rewards/rejected": -0.08622404932975769, + "step": 2161 + }, + { + "epoch": 1.4951590594744122, + "grad_norm": 6.090280532836914, + "learning_rate": 4.724911633625327e-05, + "log_odds_chosen": 1.9667760133743286, + "log_odds_ratio": -0.20600448548793793, + "logits/chosen": -0.729042649269104, + "logits/rejected": -0.7240199446678162, + "logps/chosen": -0.1258997917175293, + "logps/rejected": -0.6925151944160461, + "loss": 4.4843, + "nll_loss": 1.1004818677902222, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012589978985488415, + "rewards/margins": 0.056661538779735565, + "rewards/rejected": -0.06925151497125626, + "step": 2162 + }, + { + "epoch": 1.495850622406639, + "grad_norm": 8.1521577835083, + "learning_rate": 4.724527431996312e-05, + "log_odds_chosen": 4.232202529907227, + "log_odds_ratio": -0.05224863812327385, + "logits/chosen": -0.5873209238052368, + "logits/rejected": -0.6442223191261292, + "logps/chosen": -0.037870604544878006, + "logps/rejected": -0.7845534086227417, + "loss": 5.4093, + "nll_loss": 1.347105622291565, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0037870605010539293, + "rewards/margins": 0.07466829568147659, + "rewards/rejected": -0.078455351293087, + "step": 2163 + }, + { + "epoch": 1.4965421853388658, + "grad_norm": 5.145297527313232, + "learning_rate": 4.724143230367297e-05, + "log_odds_chosen": 6.329265117645264, + "log_odds_ratio": -0.11377011239528656, + "logits/chosen": -0.6110424995422363, + "logits/rejected": -0.6548846960067749, + "logps/chosen": -0.03288734704256058, + "logps/rejected": -0.841201901435852, + "loss": 2.3195, + "nll_loss": 0.5684930086135864, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0032887347042560577, + "rewards/margins": 0.08083146065473557, + "rewards/rejected": -0.08412019163370132, + "step": 2164 + }, + { + "epoch": 1.4972337482710927, + "grad_norm": 8.501917839050293, + "learning_rate": 4.723759028738282e-05, + "log_odds_chosen": 5.699545383453369, + "log_odds_ratio": -0.06798206269741058, + "logits/chosen": -0.06234194338321686, + "logits/rejected": -0.06303275376558304, + "logps/chosen": -0.03642402961850166, + "logps/rejected": -0.678272008895874, + "loss": 2.7021, + "nll_loss": 0.6687160134315491, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0036424030549824238, + "rewards/margins": 0.06418479233980179, + "rewards/rejected": -0.06782719492912292, + "step": 2165 + }, + { + "epoch": 1.4979253112033195, + "grad_norm": 12.868005752563477, + "learning_rate": 4.723374827109267e-05, + "log_odds_chosen": 2.8281095027923584, + "log_odds_ratio": -0.680151104927063, + "logits/chosen": -0.6259391903877258, + "logits/rejected": -0.6474286317825317, + "logps/chosen": -0.12104423344135284, + "logps/rejected": -0.4440361261367798, + "loss": 3.9843, + "nll_loss": 0.9280720949172974, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01210442278534174, + "rewards/margins": 0.032299187034368515, + "rewards/rejected": -0.04440361261367798, + "step": 2166 + }, + { + "epoch": 1.4986168741355463, + "grad_norm": 7.35582160949707, + "learning_rate": 4.722990625480252e-05, + "log_odds_chosen": 7.120567321777344, + "log_odds_ratio": -0.011260163970291615, + "logits/chosen": -0.42110222578048706, + "logits/rejected": -0.474237859249115, + "logps/chosen": -0.007903838530182838, + "logps/rejected": -1.129345178604126, + "loss": 3.206, + "nll_loss": 0.8003849387168884, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007903838413767517, + "rewards/margins": 0.11214414238929749, + "rewards/rejected": -0.11293452978134155, + "step": 2167 + }, + { + "epoch": 1.4993084370677732, + "grad_norm": 7.784075736999512, + "learning_rate": 4.7226064238512375e-05, + "log_odds_chosen": 5.417253494262695, + "log_odds_ratio": -0.026224004104733467, + "logits/chosen": -0.7177451252937317, + "logits/rejected": -0.7622504234313965, + "logps/chosen": -0.03399862349033356, + "logps/rejected": -0.8395058512687683, + "loss": 3.8119, + "nll_loss": 0.9503412246704102, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0033998622093349695, + "rewards/margins": 0.08055072277784348, + "rewards/rejected": -0.08395059406757355, + "step": 2168 + }, + { + "epoch": 1.5, + "grad_norm": 9.223710060119629, + "learning_rate": 4.722222222222222e-05, + "log_odds_chosen": 6.69984245300293, + "log_odds_ratio": -0.0645102858543396, + "logits/chosen": -0.47331032156944275, + "logits/rejected": -0.547443151473999, + "logps/chosen": -0.04146613925695419, + "logps/rejected": -1.09946870803833, + "loss": 4.3556, + "nll_loss": 1.0824520587921143, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004146614111959934, + "rewards/margins": 0.10580027103424072, + "rewards/rejected": -0.10994688421487808, + "step": 2169 + }, + { + "epoch": 1.5006915629322268, + "grad_norm": 5.514622688293457, + "learning_rate": 4.721838020593208e-05, + "log_odds_chosen": 3.6207275390625, + "log_odds_ratio": -0.20964643359184265, + "logits/chosen": -0.41321468353271484, + "logits/rejected": -0.44356417655944824, + "logps/chosen": -0.08171873539686203, + "logps/rejected": -0.7008731365203857, + "loss": 3.3274, + "nll_loss": 0.8108948469161987, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008171873167157173, + "rewards/margins": 0.06191544234752655, + "rewards/rejected": -0.07008731365203857, + "step": 2170 + }, + { + "epoch": 1.5013831258644537, + "grad_norm": 6.830862045288086, + "learning_rate": 4.7214538189641926e-05, + "log_odds_chosen": 2.9175610542297363, + "log_odds_ratio": -0.34225520491600037, + "logits/chosen": -0.5801506042480469, + "logits/rejected": -0.6245124340057373, + "logps/chosen": -0.19777889549732208, + "logps/rejected": -0.8225585222244263, + "loss": 3.689, + "nll_loss": 0.8880286812782288, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.019777892157435417, + "rewards/margins": 0.062477968633174896, + "rewards/rejected": -0.08225585520267487, + "step": 2171 + }, + { + "epoch": 1.5020746887966805, + "grad_norm": 9.817378044128418, + "learning_rate": 4.721069617335178e-05, + "log_odds_chosen": 2.637803792953491, + "log_odds_ratio": -0.326236367225647, + "logits/chosen": -0.3923531770706177, + "logits/rejected": -0.4752095937728882, + "logps/chosen": -0.10277456045150757, + "logps/rejected": -0.7350403666496277, + "loss": 4.3623, + "nll_loss": 1.0579389333724976, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010277455672621727, + "rewards/margins": 0.06322658061981201, + "rewards/rejected": -0.07350403815507889, + "step": 2172 + }, + { + "epoch": 1.5027662517289073, + "grad_norm": 7.807641983032227, + "learning_rate": 4.720685415706163e-05, + "log_odds_chosen": 1.7504419088363647, + "log_odds_ratio": -0.5149462223052979, + "logits/chosen": -0.5191226005554199, + "logits/rejected": -0.514976441860199, + "logps/chosen": -0.1291445940732956, + "logps/rejected": -0.38086992502212524, + "loss": 3.6293, + "nll_loss": 0.8558423519134521, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.012914460152387619, + "rewards/margins": 0.025172535330057144, + "rewards/rejected": -0.03808699548244476, + "step": 2173 + }, + { + "epoch": 1.5034578146611342, + "grad_norm": 5.703883171081543, + "learning_rate": 4.7203012140771477e-05, + "log_odds_chosen": 4.115744113922119, + "log_odds_ratio": -0.2530246376991272, + "logits/chosen": -0.39776185154914856, + "logits/rejected": -0.42042019963264465, + "logps/chosen": -0.11606866866350174, + "logps/rejected": -0.5591020584106445, + "loss": 2.8344, + "nll_loss": 0.6832969188690186, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011606866493821144, + "rewards/margins": 0.04430334270000458, + "rewards/rejected": -0.05591020733118057, + "step": 2174 + }, + { + "epoch": 1.504149377593361, + "grad_norm": 5.126632213592529, + "learning_rate": 4.719917012448133e-05, + "log_odds_chosen": 3.8321008682250977, + "log_odds_ratio": -0.20930258929729462, + "logits/chosen": -0.413907915353775, + "logits/rejected": -0.4011528491973877, + "logps/chosen": -0.05856021121144295, + "logps/rejected": -0.8056188821792603, + "loss": 3.0414, + "nll_loss": 0.7394230365753174, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00585602130740881, + "rewards/margins": 0.07470586895942688, + "rewards/rejected": -0.08056189119815826, + "step": 2175 + }, + { + "epoch": 1.5048409405255878, + "grad_norm": 6.326693058013916, + "learning_rate": 4.719532810819118e-05, + "log_odds_chosen": 4.230525970458984, + "log_odds_ratio": -0.14581666886806488, + "logits/chosen": -0.486900269985199, + "logits/rejected": -0.4845389723777771, + "logps/chosen": -0.0919831246137619, + "logps/rejected": -0.8042812347412109, + "loss": 3.837, + "nll_loss": 0.9446582794189453, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009198312647640705, + "rewards/margins": 0.07122981548309326, + "rewards/rejected": -0.0804281234741211, + "step": 2176 + }, + { + "epoch": 1.5055325034578146, + "grad_norm": 6.409120559692383, + "learning_rate": 4.7191486091901034e-05, + "log_odds_chosen": 2.6936960220336914, + "log_odds_ratio": -0.239236980676651, + "logits/chosen": -0.33274608850479126, + "logits/rejected": -0.38033154606819153, + "logps/chosen": -0.11048668622970581, + "logps/rejected": -0.6125718355178833, + "loss": 3.0156, + "nll_loss": 0.7299783229827881, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011048668995499611, + "rewards/margins": 0.05020851641893387, + "rewards/rejected": -0.06125718355178833, + "step": 2177 + }, + { + "epoch": 1.5062240663900415, + "grad_norm": 7.672756195068359, + "learning_rate": 4.718764407561088e-05, + "log_odds_chosen": 3.801034688949585, + "log_odds_ratio": -0.316747784614563, + "logits/chosen": -0.19822092354297638, + "logits/rejected": -0.2509034276008606, + "logps/chosen": -0.1539057195186615, + "logps/rejected": -1.0572311878204346, + "loss": 3.6783, + "nll_loss": 0.8879072070121765, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01539057306945324, + "rewards/margins": 0.09033254534006119, + "rewards/rejected": -0.10572311282157898, + "step": 2178 + }, + { + "epoch": 1.5069156293222683, + "grad_norm": 5.304668426513672, + "learning_rate": 4.718380205932074e-05, + "log_odds_chosen": 4.217876434326172, + "log_odds_ratio": -0.23010079562664032, + "logits/chosen": -0.6556233167648315, + "logits/rejected": -0.6610323786735535, + "logps/chosen": -0.05022624507546425, + "logps/rejected": -0.6683301329612732, + "loss": 3.1224, + "nll_loss": 0.7575937509536743, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005022624507546425, + "rewards/margins": 0.061810389161109924, + "rewards/rejected": -0.0668330118060112, + "step": 2179 + }, + { + "epoch": 1.5076071922544951, + "grad_norm": 3.787766933441162, + "learning_rate": 4.7179960043030584e-05, + "log_odds_chosen": 3.8163716793060303, + "log_odds_ratio": -0.09614754468202591, + "logits/chosen": -0.3028152287006378, + "logits/rejected": -0.3119809329509735, + "logps/chosen": -0.0463411808013916, + "logps/rejected": -0.7167572379112244, + "loss": 2.5819, + "nll_loss": 0.6358667016029358, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004634118173271418, + "rewards/margins": 0.06704160571098328, + "rewards/rejected": -0.07167572528123856, + "step": 2180 + }, + { + "epoch": 1.508298755186722, + "grad_norm": 8.500920295715332, + "learning_rate": 4.717611802674044e-05, + "log_odds_chosen": 4.703157424926758, + "log_odds_ratio": -0.09611813724040985, + "logits/chosen": -0.5946884751319885, + "logits/rejected": -0.5608884692192078, + "logps/chosen": -0.047219306230545044, + "logps/rejected": -0.5458288192749023, + "loss": 4.019, + "nll_loss": 0.9951435327529907, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0047219302505254745, + "rewards/margins": 0.04986095428466797, + "rewards/rejected": -0.054582882672548294, + "step": 2181 + }, + { + "epoch": 1.5089903181189488, + "grad_norm": 7.796213626861572, + "learning_rate": 4.717227601045029e-05, + "log_odds_chosen": 5.931513786315918, + "log_odds_ratio": -0.12241919338703156, + "logits/chosen": -0.402628093957901, + "logits/rejected": -0.45937222242355347, + "logps/chosen": -0.06165814772248268, + "logps/rejected": -1.0699610710144043, + "loss": 3.4985, + "nll_loss": 0.8623833060264587, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006165814585983753, + "rewards/margins": 0.10083030164241791, + "rewards/rejected": -0.10699611157178879, + "step": 2182 + }, + { + "epoch": 1.5096818810511756, + "grad_norm": 4.598722457885742, + "learning_rate": 4.7168433994160135e-05, + "log_odds_chosen": 4.163899898529053, + "log_odds_ratio": -0.14226964116096497, + "logits/chosen": -0.4701404571533203, + "logits/rejected": -0.49784785509109497, + "logps/chosen": -0.07165002077817917, + "logps/rejected": -0.796354353427887, + "loss": 2.391, + "nll_loss": 0.5835211277008057, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007165002636611462, + "rewards/margins": 0.07247043401002884, + "rewards/rejected": -0.07963543385267258, + "step": 2183 + }, + { + "epoch": 1.5103734439834025, + "grad_norm": 4.878766059875488, + "learning_rate": 4.716459197786999e-05, + "log_odds_chosen": 3.9604339599609375, + "log_odds_ratio": -0.14563514292240143, + "logits/chosen": -0.42983126640319824, + "logits/rejected": -0.48781245946884155, + "logps/chosen": -0.12423344701528549, + "logps/rejected": -0.8960301280021667, + "loss": 3.1638, + "nll_loss": 0.7763780355453491, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012423344887793064, + "rewards/margins": 0.07717966288328171, + "rewards/rejected": -0.0896030068397522, + "step": 2184 + }, + { + "epoch": 1.5110650069156293, + "grad_norm": 4.061890602111816, + "learning_rate": 4.716074996157984e-05, + "log_odds_chosen": 4.62667989730835, + "log_odds_ratio": -0.09810079634189606, + "logits/chosen": -0.42752885818481445, + "logits/rejected": -0.45172789692878723, + "logps/chosen": -0.03561442345380783, + "logps/rejected": -0.48452576994895935, + "loss": 2.199, + "nll_loss": 0.5399402379989624, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003561442717909813, + "rewards/margins": 0.044891130179166794, + "rewards/rejected": -0.048452578485012054, + "step": 2185 + }, + { + "epoch": 1.5117565698478561, + "grad_norm": 8.15449333190918, + "learning_rate": 4.715690794528969e-05, + "log_odds_chosen": 3.6245779991149902, + "log_odds_ratio": -0.15182821452617645, + "logits/chosen": -0.4021381139755249, + "logits/rejected": -0.42084044218063354, + "logps/chosen": -0.0873645544052124, + "logps/rejected": -0.9084411859512329, + "loss": 3.3647, + "nll_loss": 0.8259831666946411, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00873645581305027, + "rewards/margins": 0.08210767060518265, + "rewards/rejected": -0.09084412455558777, + "step": 2186 + }, + { + "epoch": 1.512448132780083, + "grad_norm": 7.220978736877441, + "learning_rate": 4.715306592899954e-05, + "log_odds_chosen": 1.8586406707763672, + "log_odds_ratio": -0.3653057813644409, + "logits/chosen": -0.5158290863037109, + "logits/rejected": -0.5505003333091736, + "logps/chosen": -0.09770803898572922, + "logps/rejected": -0.3956950902938843, + "loss": 3.5248, + "nll_loss": 0.8446773290634155, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009770805016160011, + "rewards/margins": 0.029798705130815506, + "rewards/rejected": -0.03956950828433037, + "step": 2187 + }, + { + "epoch": 1.5131396957123098, + "grad_norm": 9.282061576843262, + "learning_rate": 4.71492239127094e-05, + "log_odds_chosen": 5.534390449523926, + "log_odds_ratio": -0.2007375955581665, + "logits/chosen": -0.6496320962905884, + "logits/rejected": -0.6339712142944336, + "logps/chosen": -0.05207693949341774, + "logps/rejected": -0.7376291751861572, + "loss": 3.0871, + "nll_loss": 0.7517117261886597, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005207694135606289, + "rewards/margins": 0.06855522096157074, + "rewards/rejected": -0.0737629160284996, + "step": 2188 + }, + { + "epoch": 1.5138312586445366, + "grad_norm": 13.90990924835205, + "learning_rate": 4.714538189641924e-05, + "log_odds_chosen": 3.7275521755218506, + "log_odds_ratio": -0.5319461226463318, + "logits/chosen": -0.6782576441764832, + "logits/rejected": -0.7164244651794434, + "logps/chosen": -0.07063320279121399, + "logps/rejected": -0.7580413818359375, + "loss": 3.8182, + "nll_loss": 0.9013439416885376, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007063319906592369, + "rewards/margins": 0.0687408298254013, + "rewards/rejected": -0.07580414414405823, + "step": 2189 + }, + { + "epoch": 1.5145228215767634, + "grad_norm": 11.419933319091797, + "learning_rate": 4.7141539880129095e-05, + "log_odds_chosen": 3.8566625118255615, + "log_odds_ratio": -0.6142464876174927, + "logits/chosen": -0.8340749740600586, + "logits/rejected": -0.8271781206130981, + "logps/chosen": -0.11666074395179749, + "logps/rejected": -0.5378887057304382, + "loss": 2.7655, + "nll_loss": 0.6299543976783752, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011666074395179749, + "rewards/margins": 0.042122796177864075, + "rewards/rejected": -0.05378887057304382, + "step": 2190 + }, + { + "epoch": 1.5152143845089903, + "grad_norm": 6.894917011260986, + "learning_rate": 4.713769786383895e-05, + "log_odds_chosen": 5.651021480560303, + "log_odds_ratio": -0.1871916502714157, + "logits/chosen": -0.5743628740310669, + "logits/rejected": -0.6309788823127747, + "logps/chosen": -0.06064632534980774, + "logps/rejected": -1.5780786275863647, + "loss": 3.1899, + "nll_loss": 0.7787548899650574, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006064632907509804, + "rewards/margins": 0.15174323320388794, + "rewards/rejected": -0.157807856798172, + "step": 2191 + }, + { + "epoch": 1.515905947441217, + "grad_norm": 12.359020233154297, + "learning_rate": 4.7133855847548793e-05, + "log_odds_chosen": 3.298121213912964, + "log_odds_ratio": -0.49267399311065674, + "logits/chosen": -0.6871169805526733, + "logits/rejected": -0.6752989888191223, + "logps/chosen": -0.1996937096118927, + "logps/rejected": -0.5604193210601807, + "loss": 3.2892, + "nll_loss": 0.7730243802070618, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01996937021613121, + "rewards/margins": 0.036072563380002975, + "rewards/rejected": -0.056041933596134186, + "step": 2192 + }, + { + "epoch": 1.516597510373444, + "grad_norm": 10.040817260742188, + "learning_rate": 4.7130013831258646e-05, + "log_odds_chosen": 5.390373229980469, + "log_odds_ratio": -0.04475773125886917, + "logits/chosen": -0.41156676411628723, + "logits/rejected": -0.5033215284347534, + "logps/chosen": -0.046836577355861664, + "logps/rejected": -1.0671862363815308, + "loss": 4.4292, + "nll_loss": 1.1028175354003906, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0046836575493216515, + "rewards/margins": 0.10203496366739273, + "rewards/rejected": -0.10671862214803696, + "step": 2193 + }, + { + "epoch": 1.5172890733056708, + "grad_norm": 3.939854621887207, + "learning_rate": 4.71261718149685e-05, + "log_odds_chosen": 5.454831123352051, + "log_odds_ratio": -0.08274343609809875, + "logits/chosen": -0.4129883646965027, + "logits/rejected": -0.46223029494285583, + "logps/chosen": -0.051142267882823944, + "logps/rejected": -1.1733758449554443, + "loss": 2.4585, + "nll_loss": 0.6063456535339355, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005114227067679167, + "rewards/margins": 0.11222335696220398, + "rewards/rejected": -0.11733758449554443, + "step": 2194 + }, + { + "epoch": 1.5179806362378976, + "grad_norm": 8.463907241821289, + "learning_rate": 4.712232979867835e-05, + "log_odds_chosen": 4.573187828063965, + "log_odds_ratio": -0.14323106408119202, + "logits/chosen": -0.7672544121742249, + "logits/rejected": -0.8273261785507202, + "logps/chosen": -0.07819414883852005, + "logps/rejected": -0.8404219746589661, + "loss": 3.8202, + "nll_loss": 0.9407300353050232, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00781941507011652, + "rewards/margins": 0.07622279226779938, + "rewards/rejected": -0.08404220640659332, + "step": 2195 + }, + { + "epoch": 1.5186721991701244, + "grad_norm": 5.669083118438721, + "learning_rate": 4.7118487782388196e-05, + "log_odds_chosen": 4.349452018737793, + "log_odds_ratio": -0.23575344681739807, + "logits/chosen": -0.345994234085083, + "logits/rejected": -0.38687825202941895, + "logps/chosen": -0.08902490884065628, + "logps/rejected": -0.8375565409660339, + "loss": 3.4363, + "nll_loss": 0.8354873657226562, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008902492001652718, + "rewards/margins": 0.07485316693782806, + "rewards/rejected": -0.08375565707683563, + "step": 2196 + }, + { + "epoch": 1.5193637621023512, + "grad_norm": 6.776609897613525, + "learning_rate": 4.711464576609805e-05, + "log_odds_chosen": 7.661093711853027, + "log_odds_ratio": -0.00390626722946763, + "logits/chosen": -0.5424741506576538, + "logits/rejected": -0.66068434715271, + "logps/chosen": -0.0027085847686976194, + "logps/rejected": -1.221256971359253, + "loss": 2.6495, + "nll_loss": 0.6619873046875, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002708584943320602, + "rewards/margins": 0.12185483425855637, + "rewards/rejected": -0.12212569266557693, + "step": 2197 + }, + { + "epoch": 1.520055325034578, + "grad_norm": 6.6352458000183105, + "learning_rate": 4.71108037498079e-05, + "log_odds_chosen": 3.3492088317871094, + "log_odds_ratio": -0.3416593372821808, + "logits/chosen": -0.6170538663864136, + "logits/rejected": -0.6046271324157715, + "logps/chosen": -0.09723344445228577, + "logps/rejected": -0.8013380169868469, + "loss": 3.7498, + "nll_loss": 0.9032930135726929, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009723344817757607, + "rewards/margins": 0.07041046023368835, + "rewards/rejected": -0.08013381063938141, + "step": 2198 + }, + { + "epoch": 1.520746887966805, + "grad_norm": 5.197224140167236, + "learning_rate": 4.7106961733517754e-05, + "log_odds_chosen": 3.2548828125, + "log_odds_ratio": -0.12569035589694977, + "logits/chosen": -0.8019800186157227, + "logits/rejected": -0.7516960501670837, + "logps/chosen": -0.10095830261707306, + "logps/rejected": -0.8367016315460205, + "loss": 3.6117, + "nll_loss": 0.8903552889823914, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010095831006765366, + "rewards/margins": 0.07357433438301086, + "rewards/rejected": -0.08367016911506653, + "step": 2199 + }, + { + "epoch": 1.5214384508990317, + "grad_norm": 6.180981636047363, + "learning_rate": 4.71031197172276e-05, + "log_odds_chosen": 7.052360534667969, + "log_odds_ratio": -0.011196529492735863, + "logits/chosen": -0.6240170001983643, + "logits/rejected": -0.7029715776443481, + "logps/chosen": -0.006017737090587616, + "logps/rejected": -1.1081998348236084, + "loss": 3.5874, + "nll_loss": 0.8957244753837585, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000601773732341826, + "rewards/margins": 0.11021822690963745, + "rewards/rejected": -0.1108199954032898, + "step": 2200 + }, + { + "epoch": 1.5221300138312586, + "grad_norm": 6.7219109535217285, + "learning_rate": 4.709927770093746e-05, + "log_odds_chosen": 2.2531862258911133, + "log_odds_ratio": -0.41288065910339355, + "logits/chosen": -0.3794539272785187, + "logits/rejected": -0.3585187494754791, + "logps/chosen": -0.18572141230106354, + "logps/rejected": -0.6649050712585449, + "loss": 3.0795, + "nll_loss": 0.7285885810852051, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.018572140485048294, + "rewards/margins": 0.04791836813092232, + "rewards/rejected": -0.06649051606655121, + "step": 2201 + }, + { + "epoch": 1.5228215767634854, + "grad_norm": 6.930861949920654, + "learning_rate": 4.7095435684647304e-05, + "log_odds_chosen": 3.2896299362182617, + "log_odds_ratio": -0.2985961437225342, + "logits/chosen": -0.536874532699585, + "logits/rejected": -0.55685955286026, + "logps/chosen": -0.09223245084285736, + "logps/rejected": -0.9117711186408997, + "loss": 3.1251, + "nll_loss": 0.7514032125473022, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009223245084285736, + "rewards/margins": 0.08195386826992035, + "rewards/rejected": -0.09117711335420609, + "step": 2202 + }, + { + "epoch": 1.5235131396957122, + "grad_norm": 6.204575538635254, + "learning_rate": 4.709159366835716e-05, + "log_odds_chosen": 6.052004814147949, + "log_odds_ratio": -0.183577299118042, + "logits/chosen": -0.5787208080291748, + "logits/rejected": -0.6404365301132202, + "logps/chosen": -0.04411087930202484, + "logps/rejected": -1.0143474340438843, + "loss": 2.8211, + "nll_loss": 0.6869177222251892, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004411087837070227, + "rewards/margins": 0.09702365100383759, + "rewards/rejected": -0.10143474489450455, + "step": 2203 + }, + { + "epoch": 1.524204702627939, + "grad_norm": 7.466019153594971, + "learning_rate": 4.708775165206701e-05, + "log_odds_chosen": 5.644599437713623, + "log_odds_ratio": -0.32767152786254883, + "logits/chosen": -0.37275075912475586, + "logits/rejected": -0.37881413102149963, + "logps/chosen": -0.07302402704954147, + "logps/rejected": -0.7124531269073486, + "loss": 2.5691, + "nll_loss": 0.6095045804977417, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007302402053028345, + "rewards/margins": 0.06394290924072266, + "rewards/rejected": -0.07124531269073486, + "step": 2204 + }, + { + "epoch": 1.5248962655601659, + "grad_norm": 5.963090419769287, + "learning_rate": 4.7083909635776855e-05, + "log_odds_chosen": 4.4939069747924805, + "log_odds_ratio": -0.03454150632023811, + "logits/chosen": -0.6710375547409058, + "logits/rejected": -0.6639552116394043, + "logps/chosen": -0.03183533623814583, + "logps/rejected": -0.6352487206459045, + "loss": 3.6995, + "nll_loss": 0.9214182496070862, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0031835336703807116, + "rewards/margins": 0.06034134328365326, + "rewards/rejected": -0.06352487206459045, + "step": 2205 + }, + { + "epoch": 1.5255878284923927, + "grad_norm": 9.382392883300781, + "learning_rate": 4.708006761948671e-05, + "log_odds_chosen": 4.671770095825195, + "log_odds_ratio": -0.13547496497631073, + "logits/chosen": -0.3547920286655426, + "logits/rejected": -0.42458677291870117, + "logps/chosen": -0.06325706094503403, + "logps/rejected": -0.9535627365112305, + "loss": 4.7383, + "nll_loss": 1.171016812324524, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006325706373900175, + "rewards/margins": 0.08903056383132935, + "rewards/rejected": -0.09535627067089081, + "step": 2206 + }, + { + "epoch": 1.5262793914246195, + "grad_norm": 8.332213401794434, + "learning_rate": 4.707622560319656e-05, + "log_odds_chosen": 2.178969383239746, + "log_odds_ratio": -0.4346066117286682, + "logits/chosen": -0.049312885850667953, + "logits/rejected": 0.04795428365468979, + "logps/chosen": -0.11472927033901215, + "logps/rejected": -0.44776538014411926, + "loss": 3.148, + "nll_loss": 0.7435441017150879, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011472927406430244, + "rewards/margins": 0.033303603529930115, + "rewards/rejected": -0.04477653279900551, + "step": 2207 + }, + { + "epoch": 1.5269709543568464, + "grad_norm": 5.1370720863342285, + "learning_rate": 4.707238358690641e-05, + "log_odds_chosen": 6.408810615539551, + "log_odds_ratio": -0.0063616689294576645, + "logits/chosen": -0.4974561333656311, + "logits/rejected": -0.5476498603820801, + "logps/chosen": -0.012093435041606426, + "logps/rejected": -1.1643551588058472, + "loss": 2.863, + "nll_loss": 0.7151111364364624, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012093434343114495, + "rewards/margins": 0.11522617936134338, + "rewards/rejected": -0.11643552035093307, + "step": 2208 + }, + { + "epoch": 1.5276625172890732, + "grad_norm": 9.080206871032715, + "learning_rate": 4.706854157061626e-05, + "log_odds_chosen": 5.32785701751709, + "log_odds_ratio": -0.6000714898109436, + "logits/chosen": -0.34474441409111023, + "logits/rejected": -0.39225518703460693, + "logps/chosen": -0.05898641422390938, + "logps/rejected": -0.8116523623466492, + "loss": 3.4182, + "nll_loss": 0.7945421934127808, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005898641422390938, + "rewards/margins": 0.07526659220457077, + "rewards/rejected": -0.08116523176431656, + "step": 2209 + }, + { + "epoch": 1.5283540802213, + "grad_norm": 8.669232368469238, + "learning_rate": 4.706469955432612e-05, + "log_odds_chosen": 6.044839859008789, + "log_odds_ratio": -0.020968372002243996, + "logits/chosen": -0.28658148646354675, + "logits/rejected": -0.33178937435150146, + "logps/chosen": -0.02577211521565914, + "logps/rejected": -1.0604619979858398, + "loss": 4.4212, + "nll_loss": 1.1032041311264038, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0025772114749997854, + "rewards/margins": 0.10346899181604385, + "rewards/rejected": -0.10604619979858398, + "step": 2210 + }, + { + "epoch": 1.5290456431535269, + "grad_norm": 4.403314113616943, + "learning_rate": 4.706085753803596e-05, + "log_odds_chosen": 5.636608600616455, + "log_odds_ratio": -0.12528975307941437, + "logits/chosen": -0.18785572052001953, + "logits/rejected": -0.254210501909256, + "logps/chosen": -0.042339250445365906, + "logps/rejected": -0.874763548374176, + "loss": 2.4197, + "nll_loss": 0.5923962593078613, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004233924672007561, + "rewards/margins": 0.08324243128299713, + "rewards/rejected": -0.08747635781764984, + "step": 2211 + }, + { + "epoch": 1.5297372060857537, + "grad_norm": 10.492977142333984, + "learning_rate": 4.7057015521745815e-05, + "log_odds_chosen": 2.836245536804199, + "log_odds_ratio": -0.3267596662044525, + "logits/chosen": -0.410195916891098, + "logits/rejected": -0.4073330760002136, + "logps/chosen": -0.1376115381717682, + "logps/rejected": -0.7667669057846069, + "loss": 3.9706, + "nll_loss": 0.9599714875221252, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.013761154375970364, + "rewards/margins": 0.06291554123163223, + "rewards/rejected": -0.07667669653892517, + "step": 2212 + }, + { + "epoch": 1.5304287690179805, + "grad_norm": 5.397129058837891, + "learning_rate": 4.705317350545567e-05, + "log_odds_chosen": 4.809292793273926, + "log_odds_ratio": -0.054968055337667465, + "logits/chosen": -0.5614354610443115, + "logits/rejected": -0.5658167004585266, + "logps/chosen": -0.06616536527872086, + "logps/rejected": -1.0150160789489746, + "loss": 2.9256, + "nll_loss": 0.725896954536438, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006616536527872086, + "rewards/margins": 0.09488508105278015, + "rewards/rejected": -0.10150161385536194, + "step": 2213 + }, + { + "epoch": 1.5311203319502074, + "grad_norm": 6.102870464324951, + "learning_rate": 4.704933148916551e-05, + "log_odds_chosen": 3.3828892707824707, + "log_odds_ratio": -0.2942639887332916, + "logits/chosen": -0.39553898572921753, + "logits/rejected": -0.3925166130065918, + "logps/chosen": -0.07648057490587234, + "logps/rejected": -0.4567372798919678, + "loss": 3.152, + "nll_loss": 0.758583664894104, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007648056838661432, + "rewards/margins": 0.03802567347884178, + "rewards/rejected": -0.045673731714487076, + "step": 2214 + }, + { + "epoch": 1.5318118948824342, + "grad_norm": 11.949549674987793, + "learning_rate": 4.7045489472875366e-05, + "log_odds_chosen": 3.903663396835327, + "log_odds_ratio": -0.39459455013275146, + "logits/chosen": -0.2356366515159607, + "logits/rejected": -0.2835361659526825, + "logps/chosen": -0.06455142050981522, + "logps/rejected": -0.6523666977882385, + "loss": 3.3108, + "nll_loss": 0.7882421612739563, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.006455142050981522, + "rewards/margins": 0.05878153070807457, + "rewards/rejected": -0.06523667275905609, + "step": 2215 + }, + { + "epoch": 1.532503457814661, + "grad_norm": 4.589229106903076, + "learning_rate": 4.704164745658522e-05, + "log_odds_chosen": 2.781095504760742, + "log_odds_ratio": -0.1778787225484848, + "logits/chosen": -0.510430634021759, + "logits/rejected": -0.5029317140579224, + "logps/chosen": -0.11355285346508026, + "logps/rejected": -0.7437509894371033, + "loss": 3.0605, + "nll_loss": 0.7473265528678894, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011355285532772541, + "rewards/margins": 0.06301981210708618, + "rewards/rejected": -0.07437510043382645, + "step": 2216 + }, + { + "epoch": 1.5331950207468878, + "grad_norm": 8.02696418762207, + "learning_rate": 4.703780544029507e-05, + "log_odds_chosen": 3.008775472640991, + "log_odds_ratio": -0.3435032367706299, + "logits/chosen": -0.4846343398094177, + "logits/rejected": -0.4518267810344696, + "logps/chosen": -0.10005879402160645, + "logps/rejected": -0.5169304013252258, + "loss": 4.5717, + "nll_loss": 1.1085805892944336, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010005880147218704, + "rewards/margins": 0.04168716073036194, + "rewards/rejected": -0.05169304460287094, + "step": 2217 + }, + { + "epoch": 1.5338865836791147, + "grad_norm": 4.993509769439697, + "learning_rate": 4.7033963424004916e-05, + "log_odds_chosen": 6.149620532989502, + "log_odds_ratio": -0.07025769352912903, + "logits/chosen": -0.36979010701179504, + "logits/rejected": -0.36081594228744507, + "logps/chosen": -0.03063029982149601, + "logps/rejected": -0.859117329120636, + "loss": 2.776, + "nll_loss": 0.686970591545105, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0030630300752818584, + "rewards/margins": 0.08284871280193329, + "rewards/rejected": -0.08591174334287643, + "step": 2218 + }, + { + "epoch": 1.5345781466113415, + "grad_norm": 7.524913311004639, + "learning_rate": 4.7030121407714775e-05, + "log_odds_chosen": 4.3976359367370605, + "log_odds_ratio": -0.19181588292121887, + "logits/chosen": -0.6076658368110657, + "logits/rejected": -0.6003731489181519, + "logps/chosen": -0.05698583647608757, + "logps/rejected": -0.697370707988739, + "loss": 4.0902, + "nll_loss": 1.003367304801941, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.005698584020137787, + "rewards/margins": 0.06403848528862, + "rewards/rejected": -0.06973707675933838, + "step": 2219 + }, + { + "epoch": 1.5352697095435683, + "grad_norm": 9.015628814697266, + "learning_rate": 4.702627939142462e-05, + "log_odds_chosen": 4.4959282875061035, + "log_odds_ratio": -0.49220168590545654, + "logits/chosen": -0.47423118352890015, + "logits/rejected": -0.489341676235199, + "logps/chosen": -0.09900905936956406, + "logps/rejected": -0.9558752775192261, + "loss": 3.5996, + "nll_loss": 0.8506906628608704, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.009900907054543495, + "rewards/margins": 0.08568662405014038, + "rewards/rejected": -0.09558752924203873, + "step": 2220 + }, + { + "epoch": 1.5359612724757952, + "grad_norm": 8.050044059753418, + "learning_rate": 4.7022437375134474e-05, + "log_odds_chosen": 2.9283671379089355, + "log_odds_ratio": -0.2631620168685913, + "logits/chosen": -0.7292971611022949, + "logits/rejected": -0.7532137632369995, + "logps/chosen": -0.07009609788656235, + "logps/rejected": -0.6604401469230652, + "loss": 3.9517, + "nll_loss": 0.9616029262542725, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00700960960239172, + "rewards/margins": 0.05903441086411476, + "rewards/rejected": -0.06604401767253876, + "step": 2221 + }, + { + "epoch": 1.536652835408022, + "grad_norm": 8.144369125366211, + "learning_rate": 4.7018595358844326e-05, + "log_odds_chosen": 6.381158828735352, + "log_odds_ratio": -0.033019233494997025, + "logits/chosen": -0.3614676594734192, + "logits/rejected": -0.3959410786628723, + "logps/chosen": -0.02348705753684044, + "logps/rejected": -1.3655200004577637, + "loss": 3.0952, + "nll_loss": 0.7705005407333374, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00234870589338243, + "rewards/margins": 0.13420329988002777, + "rewards/rejected": -0.13655200600624084, + "step": 2222 + }, + { + "epoch": 1.5373443983402488, + "grad_norm": 3.36130952835083, + "learning_rate": 4.701475334255417e-05, + "log_odds_chosen": 6.352807521820068, + "log_odds_ratio": -0.04064463824033737, + "logits/chosen": -0.0948086827993393, + "logits/rejected": -0.13905943930149078, + "logps/chosen": -0.018351389095187187, + "logps/rejected": -0.7243081331253052, + "loss": 2.6742, + "nll_loss": 0.6644976735115051, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018351390026509762, + "rewards/margins": 0.07059568166732788, + "rewards/rejected": -0.0724308118224144, + "step": 2223 + }, + { + "epoch": 1.5380359612724757, + "grad_norm": 5.5334954261779785, + "learning_rate": 4.7010911326264024e-05, + "log_odds_chosen": 4.103457450866699, + "log_odds_ratio": -0.4907708168029785, + "logits/chosen": -0.39433753490448, + "logits/rejected": -0.41758227348327637, + "logps/chosen": -0.1360062211751938, + "logps/rejected": -0.6773937344551086, + "loss": 3.0142, + "nll_loss": 0.7044817209243774, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013600623235106468, + "rewards/margins": 0.054138749837875366, + "rewards/rejected": -0.06773936748504639, + "step": 2224 + }, + { + "epoch": 1.5387275242047025, + "grad_norm": 8.210365295410156, + "learning_rate": 4.7007069309973877e-05, + "log_odds_chosen": 4.578831672668457, + "log_odds_ratio": -0.11062193661928177, + "logits/chosen": -0.503457248210907, + "logits/rejected": -0.5944218635559082, + "logps/chosen": -0.060358330607414246, + "logps/rejected": -1.2142866849899292, + "loss": 4.0134, + "nll_loss": 0.9922851324081421, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006035833153873682, + "rewards/margins": 0.11539284139871597, + "rewards/rejected": -0.12142866849899292, + "step": 2225 + }, + { + "epoch": 1.5394190871369293, + "grad_norm": 7.67247200012207, + "learning_rate": 4.700322729368373e-05, + "log_odds_chosen": 4.713946342468262, + "log_odds_ratio": -0.17651237547397614, + "logits/chosen": -0.8597179055213928, + "logits/rejected": -0.8643999099731445, + "logps/chosen": -0.04120595008134842, + "logps/rejected": -0.8932948112487793, + "loss": 3.6999, + "nll_loss": 0.9073218703269958, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004120595287531614, + "rewards/margins": 0.08520889282226562, + "rewards/rejected": -0.08932948112487793, + "step": 2226 + }, + { + "epoch": 1.5401106500691562, + "grad_norm": 10.076916694641113, + "learning_rate": 4.6999385277393575e-05, + "log_odds_chosen": 4.503382205963135, + "log_odds_ratio": -0.19504140317440033, + "logits/chosen": -0.4432103931903839, + "logits/rejected": -0.4810800850391388, + "logps/chosen": -0.05120420455932617, + "logps/rejected": -0.8100253343582153, + "loss": 4.4369, + "nll_loss": 1.0897185802459717, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005120420828461647, + "rewards/margins": 0.07588210701942444, + "rewards/rejected": -0.08100253343582153, + "step": 2227 + }, + { + "epoch": 1.540802213001383, + "grad_norm": 13.191364288330078, + "learning_rate": 4.6995543261103434e-05, + "log_odds_chosen": 3.591403007507324, + "log_odds_ratio": -0.5504745841026306, + "logits/chosen": -0.6312179565429688, + "logits/rejected": -0.7169389724731445, + "logps/chosen": -0.06760822981595993, + "logps/rejected": -0.6767516732215881, + "loss": 3.1659, + "nll_loss": 0.7364269495010376, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0067608230747282505, + "rewards/margins": 0.06091434881091118, + "rewards/rejected": -0.06767517328262329, + "step": 2228 + }, + { + "epoch": 1.5414937759336098, + "grad_norm": 5.499429702758789, + "learning_rate": 4.699170124481328e-05, + "log_odds_chosen": 4.832643508911133, + "log_odds_ratio": -0.1555217206478119, + "logits/chosen": -0.2926243841648102, + "logits/rejected": -0.3150829076766968, + "logps/chosen": -0.04423796385526657, + "logps/rejected": -0.8635392189025879, + "loss": 2.8915, + "nll_loss": 0.7073196172714233, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0044237966649234295, + "rewards/margins": 0.08193013072013855, + "rewards/rejected": -0.08635392785072327, + "step": 2229 + }, + { + "epoch": 1.5421853388658366, + "grad_norm": 4.754825115203857, + "learning_rate": 4.698785922852313e-05, + "log_odds_chosen": 5.817052364349365, + "log_odds_ratio": -0.2055891752243042, + "logits/chosen": -0.33507710695266724, + "logits/rejected": -0.433362752199173, + "logps/chosen": -0.049009814858436584, + "logps/rejected": -0.8625462055206299, + "loss": 2.7819, + "nll_loss": 0.674911618232727, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004900981672108173, + "rewards/margins": 0.08135364204645157, + "rewards/rejected": -0.08625461906194687, + "step": 2230 + }, + { + "epoch": 1.5428769017980635, + "grad_norm": 7.7743635177612305, + "learning_rate": 4.6984017212232984e-05, + "log_odds_chosen": 3.0345685482025146, + "log_odds_ratio": -0.2999267578125, + "logits/chosen": -0.336641788482666, + "logits/rejected": -0.3972005844116211, + "logps/chosen": -0.11239126324653625, + "logps/rejected": -0.6859149932861328, + "loss": 3.0472, + "nll_loss": 0.7318093180656433, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011239126324653625, + "rewards/margins": 0.057352371513843536, + "rewards/rejected": -0.06859149783849716, + "step": 2231 + }, + { + "epoch": 1.5435684647302903, + "grad_norm": 5.8012166023254395, + "learning_rate": 4.698017519594283e-05, + "log_odds_chosen": 6.982174396514893, + "log_odds_ratio": -0.018287427723407745, + "logits/chosen": -0.5732105374336243, + "logits/rejected": -0.6470195651054382, + "logps/chosen": -0.00966467335820198, + "logps/rejected": -1.0621178150177002, + "loss": 2.9736, + "nll_loss": 0.7415661811828613, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009664673125371337, + "rewards/margins": 0.10524532198905945, + "rewards/rejected": -0.10621179640293121, + "step": 2232 + }, + { + "epoch": 1.5442600276625171, + "grad_norm": 10.100383758544922, + "learning_rate": 4.697633317965268e-05, + "log_odds_chosen": 2.6996121406555176, + "log_odds_ratio": -0.5876289010047913, + "logits/chosen": -0.5158724784851074, + "logits/rejected": -0.5607286691665649, + "logps/chosen": -0.13475391268730164, + "logps/rejected": -0.5057365894317627, + "loss": 4.4236, + "nll_loss": 1.0471446514129639, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.013475392945110798, + "rewards/margins": 0.03709826618432999, + "rewards/rejected": -0.05057365447282791, + "step": 2233 + }, + { + "epoch": 1.544951590594744, + "grad_norm": 7.3931145668029785, + "learning_rate": 4.6972491163362535e-05, + "log_odds_chosen": 3.28309965133667, + "log_odds_ratio": -0.41309264302253723, + "logits/chosen": 0.1753653883934021, + "logits/rejected": 0.1266123354434967, + "logps/chosen": -0.1575171798467636, + "logps/rejected": -0.6481037735939026, + "loss": 3.2442, + "nll_loss": 0.7697430849075317, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01575171761214733, + "rewards/margins": 0.04905865713953972, + "rewards/rejected": -0.0648103803396225, + "step": 2234 + }, + { + "epoch": 1.5456431535269708, + "grad_norm": 5.558627605438232, + "learning_rate": 4.696864914707239e-05, + "log_odds_chosen": 3.975231647491455, + "log_odds_ratio": -0.2093396931886673, + "logits/chosen": -0.7380461096763611, + "logits/rejected": -0.8254467248916626, + "logps/chosen": -0.04940512031316757, + "logps/rejected": -0.6910010576248169, + "loss": 4.2055, + "nll_loss": 1.0304415225982666, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004940511658787727, + "rewards/margins": 0.06415959447622299, + "rewards/rejected": -0.06910011172294617, + "step": 2235 + }, + { + "epoch": 1.5463347164591976, + "grad_norm": 6.294323444366455, + "learning_rate": 4.696480713078223e-05, + "log_odds_chosen": 4.579098224639893, + "log_odds_ratio": -0.08531267940998077, + "logits/chosen": -0.6607158780097961, + "logits/rejected": -0.6623681783676147, + "logps/chosen": -0.04320439323782921, + "logps/rejected": -0.6710415482521057, + "loss": 4.064, + "nll_loss": 1.0074676275253296, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004320439882576466, + "rewards/margins": 0.06278371810913086, + "rewards/rejected": -0.06710416078567505, + "step": 2236 + }, + { + "epoch": 1.5470262793914247, + "grad_norm": 4.83128023147583, + "learning_rate": 4.696096511449209e-05, + "log_odds_chosen": 5.596061706542969, + "log_odds_ratio": -0.024458257481455803, + "logits/chosen": -0.2902466952800751, + "logits/rejected": -0.30807405710220337, + "logps/chosen": -0.017890680581331253, + "logps/rejected": -1.3433257341384888, + "loss": 2.3647, + "nll_loss": 0.5887378454208374, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017890682211145759, + "rewards/margins": 0.13254351913928986, + "rewards/rejected": -0.1343325823545456, + "step": 2237 + }, + { + "epoch": 1.5477178423236515, + "grad_norm": 9.162208557128906, + "learning_rate": 4.695712309820194e-05, + "log_odds_chosen": 5.807769775390625, + "log_odds_ratio": -0.08700526505708694, + "logits/chosen": -0.13000746071338654, + "logits/rejected": -0.22356395423412323, + "logps/chosen": -0.03778745234012604, + "logps/rejected": -1.1309075355529785, + "loss": 2.6584, + "nll_loss": 0.6559075713157654, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0037787449546158314, + "rewards/margins": 0.1093120127916336, + "rewards/rejected": -0.11309076845645905, + "step": 2238 + }, + { + "epoch": 1.5484094052558783, + "grad_norm": 7.159018516540527, + "learning_rate": 4.695328108191179e-05, + "log_odds_chosen": 3.921074867248535, + "log_odds_ratio": -0.20838870108127594, + "logits/chosen": -0.5181608200073242, + "logits/rejected": -0.5305622816085815, + "logps/chosen": -0.05944810062646866, + "logps/rejected": -0.8501400947570801, + "loss": 3.7351, + "nll_loss": 0.9129410982131958, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005944809876382351, + "rewards/margins": 0.07906919717788696, + "rewards/rejected": -0.08501400798559189, + "step": 2239 + }, + { + "epoch": 1.5491009681881052, + "grad_norm": 9.407584190368652, + "learning_rate": 4.694943906562164e-05, + "log_odds_chosen": 4.420240879058838, + "log_odds_ratio": -0.27922773361206055, + "logits/chosen": -0.7489995360374451, + "logits/rejected": -0.7329412698745728, + "logps/chosen": -0.07872271537780762, + "logps/rejected": -0.8607683777809143, + "loss": 3.4904, + "nll_loss": 0.844673752784729, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007872272282838821, + "rewards/margins": 0.07820457220077515, + "rewards/rejected": -0.08607684075832367, + "step": 2240 + }, + { + "epoch": 1.549792531120332, + "grad_norm": 6.541446208953857, + "learning_rate": 4.694559704933149e-05, + "log_odds_chosen": 4.660301208496094, + "log_odds_ratio": -0.12407185137271881, + "logits/chosen": -0.4145255982875824, + "logits/rejected": -0.48058953881263733, + "logps/chosen": -0.059069301933050156, + "logps/rejected": -1.0553184747695923, + "loss": 2.7047, + "nll_loss": 0.6637730598449707, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0059069301933050156, + "rewards/margins": 0.09962491691112518, + "rewards/rejected": -0.10553184151649475, + "step": 2241 + }, + { + "epoch": 1.5504840940525588, + "grad_norm": 4.595657825469971, + "learning_rate": 4.694175503304134e-05, + "log_odds_chosen": 5.710814476013184, + "log_odds_ratio": -0.24719639122486115, + "logits/chosen": -0.5707492232322693, + "logits/rejected": -0.5625244379043579, + "logps/chosen": -0.07158362865447998, + "logps/rejected": -1.2810132503509521, + "loss": 2.4542, + "nll_loss": 0.5888248682022095, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007158362772315741, + "rewards/margins": 0.12094297260046005, + "rewards/rejected": -0.12810133397579193, + "step": 2242 + }, + { + "epoch": 1.5511756569847857, + "grad_norm": 6.285073280334473, + "learning_rate": 4.6937913016751193e-05, + "log_odds_chosen": 4.71954870223999, + "log_odds_ratio": -0.260731041431427, + "logits/chosen": 0.10269571840763092, + "logits/rejected": 0.06153454631567001, + "logps/chosen": -0.09316494315862656, + "logps/rejected": -0.9280557632446289, + "loss": 2.9387, + "nll_loss": 0.7085932493209839, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009316494688391685, + "rewards/margins": 0.0834890753030777, + "rewards/rejected": -0.09280557930469513, + "step": 2243 + }, + { + "epoch": 1.5518672199170125, + "grad_norm": 6.047837734222412, + "learning_rate": 4.6934071000461046e-05, + "log_odds_chosen": 3.6565470695495605, + "log_odds_ratio": -0.3122093975543976, + "logits/chosen": -0.3903008997440338, + "logits/rejected": -0.41671690344810486, + "logps/chosen": -0.08219773322343826, + "logps/rejected": -0.8850083351135254, + "loss": 2.5731, + "nll_loss": 0.6120545864105225, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008219772949814796, + "rewards/margins": 0.08028105646371841, + "rewards/rejected": -0.08850082755088806, + "step": 2244 + }, + { + "epoch": 1.5525587828492393, + "grad_norm": 5.648904800415039, + "learning_rate": 4.693022898417089e-05, + "log_odds_chosen": 5.111184120178223, + "log_odds_ratio": -0.11708255112171173, + "logits/chosen": -0.4507385790348053, + "logits/rejected": -0.4581332206726074, + "logps/chosen": -0.02783753164112568, + "logps/rejected": -0.6649580001831055, + "loss": 2.5968, + "nll_loss": 0.6374881863594055, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0027837532106786966, + "rewards/margins": 0.06371204555034637, + "rewards/rejected": -0.06649579852819443, + "step": 2245 + }, + { + "epoch": 1.5532503457814661, + "grad_norm": 4.566701889038086, + "learning_rate": 4.692638696788075e-05, + "log_odds_chosen": 4.072457313537598, + "log_odds_ratio": -0.13861241936683655, + "logits/chosen": -0.4009241461753845, + "logits/rejected": -0.48581573367118835, + "logps/chosen": -0.06524769216775894, + "logps/rejected": -0.5617847442626953, + "loss": 2.1005, + "nll_loss": 0.5112607479095459, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006524769123643637, + "rewards/margins": 0.04965370520949364, + "rewards/rejected": -0.05617847293615341, + "step": 2246 + }, + { + "epoch": 1.553941908713693, + "grad_norm": 4.161489963531494, + "learning_rate": 4.6922544951590596e-05, + "log_odds_chosen": 5.0455217361450195, + "log_odds_ratio": -0.08254259079694748, + "logits/chosen": -0.08890029788017273, + "logits/rejected": -0.14021196961402893, + "logps/chosen": -0.04011262208223343, + "logps/rejected": -0.823492169380188, + "loss": 2.216, + "nll_loss": 0.5457525849342346, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00401126267388463, + "rewards/margins": 0.07833795249462128, + "rewards/rejected": -0.08234922587871552, + "step": 2247 + }, + { + "epoch": 1.5546334716459198, + "grad_norm": 12.599466323852539, + "learning_rate": 4.691870293530045e-05, + "log_odds_chosen": 3.1988582611083984, + "log_odds_ratio": -0.2710178792476654, + "logits/chosen": -0.22087787091732025, + "logits/rejected": -0.2567731440067291, + "logps/chosen": -0.04471834376454353, + "logps/rejected": -0.5315315127372742, + "loss": 2.9405, + "nll_loss": 0.7080118060112, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004471834283322096, + "rewards/margins": 0.04868132248520851, + "rewards/rejected": -0.053153157234191895, + "step": 2248 + }, + { + "epoch": 1.5553250345781466, + "grad_norm": 7.255458354949951, + "learning_rate": 4.69148609190103e-05, + "log_odds_chosen": 3.571174383163452, + "log_odds_ratio": -0.19597908854484558, + "logits/chosen": -0.3806026875972748, + "logits/rejected": -0.4358273148536682, + "logps/chosen": -0.0519745796918869, + "logps/rejected": -0.746728241443634, + "loss": 3.1931, + "nll_loss": 0.7786867022514343, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00519745796918869, + "rewards/margins": 0.06947536766529083, + "rewards/rejected": -0.07467282563447952, + "step": 2249 + }, + { + "epoch": 1.5560165975103735, + "grad_norm": 8.847131729125977, + "learning_rate": 4.691101890272015e-05, + "log_odds_chosen": 4.572246074676514, + "log_odds_ratio": -0.1461586356163025, + "logits/chosen": -0.5918107032775879, + "logits/rejected": -0.5609433650970459, + "logps/chosen": -0.036586739122867584, + "logps/rejected": -0.8704870343208313, + "loss": 4.0928, + "nll_loss": 1.008584976196289, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003658674191683531, + "rewards/margins": 0.08339002728462219, + "rewards/rejected": -0.08704870939254761, + "step": 2250 + }, + { + "epoch": 1.5567081604426003, + "grad_norm": 3.6108243465423584, + "learning_rate": 4.690717688643e-05, + "log_odds_chosen": 3.400420665740967, + "log_odds_ratio": -0.20974057912826538, + "logits/chosen": -0.38296398520469666, + "logits/rejected": -0.3265213966369629, + "logps/chosen": -0.1113806664943695, + "logps/rejected": -0.82120680809021, + "loss": 1.8716, + "nll_loss": 0.4469362795352936, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011138067580759525, + "rewards/margins": 0.07098261266946793, + "rewards/rejected": -0.08212068676948547, + "step": 2251 + }, + { + "epoch": 1.5573997233748271, + "grad_norm": 7.981571674346924, + "learning_rate": 4.690333487013985e-05, + "log_odds_chosen": 2.3555095195770264, + "log_odds_ratio": -0.49662184715270996, + "logits/chosen": -0.593062698841095, + "logits/rejected": -0.615126371383667, + "logps/chosen": -0.16694380342960358, + "logps/rejected": -0.4941992163658142, + "loss": 2.9893, + "nll_loss": 0.6976538896560669, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.016694379970431328, + "rewards/margins": 0.03272554278373718, + "rewards/rejected": -0.04941992461681366, + "step": 2252 + }, + { + "epoch": 1.558091286307054, + "grad_norm": 7.54820442199707, + "learning_rate": 4.6899492853849704e-05, + "log_odds_chosen": 4.2390642166137695, + "log_odds_ratio": -0.3768894374370575, + "logits/chosen": -0.6133686304092407, + "logits/rejected": -0.5917006731033325, + "logps/chosen": -0.08966968953609467, + "logps/rejected": -0.7241112589836121, + "loss": 3.8115, + "nll_loss": 0.9151946902275085, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008966969326138496, + "rewards/margins": 0.06344415992498398, + "rewards/rejected": -0.07241112738847733, + "step": 2253 + }, + { + "epoch": 1.5587828492392808, + "grad_norm": 7.562520980834961, + "learning_rate": 4.689565083755955e-05, + "log_odds_chosen": 2.634389877319336, + "log_odds_ratio": -0.3599710464477539, + "logits/chosen": -0.5475929975509644, + "logits/rejected": -0.5768888592720032, + "logps/chosen": -0.0821683332324028, + "logps/rejected": -0.5093742609024048, + "loss": 4.1712, + "nll_loss": 1.006812572479248, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008216832764446735, + "rewards/margins": 0.04272059351205826, + "rewards/rejected": -0.05093742161989212, + "step": 2254 + }, + { + "epoch": 1.5594744121715076, + "grad_norm": 6.5873003005981445, + "learning_rate": 4.689180882126941e-05, + "log_odds_chosen": 5.166384220123291, + "log_odds_ratio": -0.2231721132993698, + "logits/chosen": -0.40660107135772705, + "logits/rejected": -0.4643666446208954, + "logps/chosen": -0.07692534476518631, + "logps/rejected": -1.0464847087860107, + "loss": 2.1066, + "nll_loss": 0.5043294429779053, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007692534010857344, + "rewards/margins": 0.09695594012737274, + "rewards/rejected": -0.10464847087860107, + "step": 2255 + }, + { + "epoch": 1.5601659751037344, + "grad_norm": 8.40011215209961, + "learning_rate": 4.6887966804979255e-05, + "log_odds_chosen": 4.982305526733398, + "log_odds_ratio": -0.21497850120067596, + "logits/chosen": -0.5718773603439331, + "logits/rejected": -0.5998449325561523, + "logps/chosen": -0.095095694065094, + "logps/rejected": -0.9810383319854736, + "loss": 3.0402, + "nll_loss": 0.7385454773902893, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009509569965302944, + "rewards/margins": 0.08859425783157349, + "rewards/rejected": -0.09810382127761841, + "step": 2256 + }, + { + "epoch": 1.5608575380359613, + "grad_norm": 5.619307041168213, + "learning_rate": 4.688412478868911e-05, + "log_odds_chosen": 6.083016872406006, + "log_odds_ratio": -0.03372356668114662, + "logits/chosen": -0.2072625458240509, + "logits/rejected": -0.2259514033794403, + "logps/chosen": -0.013912231661379337, + "logps/rejected": -0.925947904586792, + "loss": 3.2673, + "nll_loss": 0.8134469985961914, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013912231661379337, + "rewards/margins": 0.09120357036590576, + "rewards/rejected": -0.09259478747844696, + "step": 2257 + }, + { + "epoch": 1.561549100968188, + "grad_norm": 12.73154354095459, + "learning_rate": 4.688028277239896e-05, + "log_odds_chosen": 4.015732765197754, + "log_odds_ratio": -0.1854289174079895, + "logits/chosen": -0.4852401316165924, + "logits/rejected": -0.5350650548934937, + "logps/chosen": -0.04746335744857788, + "logps/rejected": -0.6718555688858032, + "loss": 3.5894, + "nll_loss": 0.8787986636161804, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004746335558593273, + "rewards/margins": 0.06243922561407089, + "rewards/rejected": -0.06718556582927704, + "step": 2258 + }, + { + "epoch": 1.562240663900415, + "grad_norm": 7.783176898956299, + "learning_rate": 4.6876440756108805e-05, + "log_odds_chosen": 3.907989501953125, + "log_odds_ratio": -0.11575151979923248, + "logits/chosen": -0.3810523748397827, + "logits/rejected": -0.380711168050766, + "logps/chosen": -0.07309068739414215, + "logps/rejected": -0.7313439846038818, + "loss": 3.4366, + "nll_loss": 0.84757000207901, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00730906892567873, + "rewards/margins": 0.06582533568143845, + "rewards/rejected": -0.0731343999505043, + "step": 2259 + }, + { + "epoch": 1.5629322268326418, + "grad_norm": 7.931395053863525, + "learning_rate": 4.687259873981866e-05, + "log_odds_chosen": 6.3931474685668945, + "log_odds_ratio": -0.00962926261126995, + "logits/chosen": -0.6360489130020142, + "logits/rejected": -0.6455775499343872, + "logps/chosen": -0.014708174392580986, + "logps/rejected": -1.1883326768875122, + "loss": 3.0027, + "nll_loss": 0.7497127652168274, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014708174858242273, + "rewards/margins": 0.11736244708299637, + "rewards/rejected": -0.1188332661986351, + "step": 2260 + }, + { + "epoch": 1.5636237897648686, + "grad_norm": 4.673415660858154, + "learning_rate": 4.686875672352851e-05, + "log_odds_chosen": 4.533776760101318, + "log_odds_ratio": -0.1115853562951088, + "logits/chosen": -0.534148633480072, + "logits/rejected": -0.5244668126106262, + "logps/chosen": -0.08479005843400955, + "logps/rejected": -0.6998138427734375, + "loss": 2.764, + "nll_loss": 0.6798312664031982, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00847900565713644, + "rewards/margins": 0.06150238215923309, + "rewards/rejected": -0.06998138874769211, + "step": 2261 + }, + { + "epoch": 1.5643153526970954, + "grad_norm": 6.692610263824463, + "learning_rate": 4.686491470723836e-05, + "log_odds_chosen": 7.074902057647705, + "log_odds_ratio": -0.006812370382249355, + "logits/chosen": -0.2236250340938568, + "logits/rejected": -0.2226749062538147, + "logps/chosen": -0.030843688175082207, + "logps/rejected": -1.46689772605896, + "loss": 3.1026, + "nll_loss": 0.7749695181846619, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0030843692366033792, + "rewards/margins": 0.14360541105270386, + "rewards/rejected": -0.146689772605896, + "step": 2262 + }, + { + "epoch": 1.5650069156293223, + "grad_norm": 9.14884090423584, + "learning_rate": 4.686107269094821e-05, + "log_odds_chosen": 3.6463592052459717, + "log_odds_ratio": -0.248472660779953, + "logits/chosen": 0.002660594880580902, + "logits/rejected": 0.011733196675777435, + "logps/chosen": -0.06953177601099014, + "logps/rejected": -0.6269186735153198, + "loss": 2.9179, + "nll_loss": 0.7046196460723877, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006953177973628044, + "rewards/margins": 0.05573869124054909, + "rewards/rejected": -0.06269187480211258, + "step": 2263 + }, + { + "epoch": 1.565698478561549, + "grad_norm": 7.125910758972168, + "learning_rate": 4.685723067465807e-05, + "log_odds_chosen": 2.580953598022461, + "log_odds_ratio": -0.24101954698562622, + "logits/chosen": -0.57945716381073, + "logits/rejected": -0.5280551910400391, + "logps/chosen": -0.07494837790727615, + "logps/rejected": -0.6202612519264221, + "loss": 2.7215, + "nll_loss": 0.6562801599502563, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007494837511330843, + "rewards/margins": 0.054531291127204895, + "rewards/rejected": -0.06202612444758415, + "step": 2264 + }, + { + "epoch": 1.566390041493776, + "grad_norm": 6.528625965118408, + "learning_rate": 4.685338865836791e-05, + "log_odds_chosen": 5.087283134460449, + "log_odds_ratio": -0.3491363823413849, + "logits/chosen": -0.4980352520942688, + "logits/rejected": -0.48793551325798035, + "logps/chosen": -0.1593426764011383, + "logps/rejected": -1.1793937683105469, + "loss": 3.2584, + "nll_loss": 0.7796763181686401, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01593426614999771, + "rewards/margins": 0.10200510919094086, + "rewards/rejected": -0.11793938279151917, + "step": 2265 + }, + { + "epoch": 1.5670816044260027, + "grad_norm": 13.136091232299805, + "learning_rate": 4.6849546642077766e-05, + "log_odds_chosen": 3.9400973320007324, + "log_odds_ratio": -1.1063835620880127, + "logits/chosen": -0.5145586729049683, + "logits/rejected": -0.543152391910553, + "logps/chosen": -0.09819920361042023, + "logps/rejected": -0.9681227207183838, + "loss": 2.7526, + "nll_loss": 0.5775208473205566, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009819920174777508, + "rewards/margins": 0.08699235320091248, + "rewards/rejected": -0.09681227803230286, + "step": 2266 + }, + { + "epoch": 1.5677731673582296, + "grad_norm": 14.50251579284668, + "learning_rate": 4.684570462578762e-05, + "log_odds_chosen": 4.714078426361084, + "log_odds_ratio": -0.19142206013202667, + "logits/chosen": -0.5533724427223206, + "logits/rejected": -0.5080875158309937, + "logps/chosen": -0.03418285399675369, + "logps/rejected": -0.9308934211730957, + "loss": 3.6667, + "nll_loss": 0.8975303173065186, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0034182853996753693, + "rewards/margins": 0.0896710455417633, + "rewards/rejected": -0.09308933466672897, + "step": 2267 + }, + { + "epoch": 1.5684647302904564, + "grad_norm": 6.3615570068359375, + "learning_rate": 4.6841862609497464e-05, + "log_odds_chosen": 4.702467918395996, + "log_odds_ratio": -0.1508854329586029, + "logits/chosen": -0.5412316918373108, + "logits/rejected": -0.5616676807403564, + "logps/chosen": -0.12236365675926208, + "logps/rejected": -1.1580426692962646, + "loss": 3.3303, + "nll_loss": 0.8174852728843689, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012236366048455238, + "rewards/margins": 0.10356790572404861, + "rewards/rejected": -0.1158042699098587, + "step": 2268 + }, + { + "epoch": 1.5691562932226832, + "grad_norm": 11.172139167785645, + "learning_rate": 4.6838020593207316e-05, + "log_odds_chosen": 3.8827757835388184, + "log_odds_ratio": -0.2262200564146042, + "logits/chosen": -0.481758177280426, + "logits/rejected": -0.5334774255752563, + "logps/chosen": -0.07622340321540833, + "logps/rejected": -0.8563274145126343, + "loss": 3.4932, + "nll_loss": 0.8506813049316406, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007622339762747288, + "rewards/margins": 0.07801041007041931, + "rewards/rejected": -0.08563274890184402, + "step": 2269 + }, + { + "epoch": 1.56984785615491, + "grad_norm": 7.487488746643066, + "learning_rate": 4.683417857691717e-05, + "log_odds_chosen": 5.401069164276123, + "log_odds_ratio": -0.1360565572977066, + "logits/chosen": -0.5984183549880981, + "logits/rejected": -0.7007421255111694, + "logps/chosen": -0.04290872812271118, + "logps/rejected": -0.9781840443611145, + "loss": 2.8651, + "nll_loss": 0.702671229839325, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004290873650461435, + "rewards/margins": 0.09352753311395645, + "rewards/rejected": -0.09781840443611145, + "step": 2270 + }, + { + "epoch": 1.570539419087137, + "grad_norm": 7.04751443862915, + "learning_rate": 4.683033656062702e-05, + "log_odds_chosen": 4.005356311798096, + "log_odds_ratio": -0.3126870393753052, + "logits/chosen": -0.23318475484848022, + "logits/rejected": -0.2612619698047638, + "logps/chosen": -0.07785270363092422, + "logps/rejected": -0.8214019536972046, + "loss": 3.8368, + "nll_loss": 0.9279318451881409, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007785269990563393, + "rewards/margins": 0.07435491681098938, + "rewards/rejected": -0.08214019238948822, + "step": 2271 + }, + { + "epoch": 1.5712309820193637, + "grad_norm": 6.216674327850342, + "learning_rate": 4.682649454433687e-05, + "log_odds_chosen": 3.7621045112609863, + "log_odds_ratio": -0.5083651542663574, + "logits/chosen": -0.07921777665615082, + "logits/rejected": -0.09831476211547852, + "logps/chosen": -0.09837433695793152, + "logps/rejected": -0.5076334476470947, + "loss": 2.1151, + "nll_loss": 0.477946013212204, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009837434627115726, + "rewards/margins": 0.04092591255903244, + "rewards/rejected": -0.05076334625482559, + "step": 2272 + }, + { + "epoch": 1.5719225449515906, + "grad_norm": 4.1684250831604, + "learning_rate": 4.6822652528046726e-05, + "log_odds_chosen": 4.168286323547363, + "log_odds_ratio": -0.09342752397060394, + "logits/chosen": -0.48797476291656494, + "logits/rejected": -0.4549727439880371, + "logps/chosen": -0.059577472507953644, + "logps/rejected": -0.8933494091033936, + "loss": 2.6642, + "nll_loss": 0.656704306602478, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005957747809588909, + "rewards/margins": 0.0833771824836731, + "rewards/rejected": -0.08933493494987488, + "step": 2273 + }, + { + "epoch": 1.5726141078838174, + "grad_norm": 6.8194098472595215, + "learning_rate": 4.681881051175657e-05, + "log_odds_chosen": 4.301138877868652, + "log_odds_ratio": -0.2094355970621109, + "logits/chosen": -0.023733407258987427, + "logits/rejected": -0.0873059630393982, + "logps/chosen": -0.026450948789715767, + "logps/rejected": -0.46369093656539917, + "loss": 2.8734, + "nll_loss": 0.6974120140075684, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002645095344632864, + "rewards/margins": 0.043724000453948975, + "rewards/rejected": -0.04636909440159798, + "step": 2274 + }, + { + "epoch": 1.5733056708160442, + "grad_norm": 3.638047218322754, + "learning_rate": 4.6814968495466424e-05, + "log_odds_chosen": 6.883190631866455, + "log_odds_ratio": -0.01691877841949463, + "logits/chosen": -0.4181588888168335, + "logits/rejected": -0.4647769629955292, + "logps/chosen": -0.03221792355179787, + "logps/rejected": -0.7626996040344238, + "loss": 2.0761, + "nll_loss": 0.5173306465148926, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003221792634576559, + "rewards/margins": 0.0730481669306755, + "rewards/rejected": -0.0762699544429779, + "step": 2275 + }, + { + "epoch": 1.573997233748271, + "grad_norm": 6.572237491607666, + "learning_rate": 4.681112647917628e-05, + "log_odds_chosen": 4.171795845031738, + "log_odds_ratio": -0.33008846640586853, + "logits/chosen": -0.4735686779022217, + "logits/rejected": -0.5132468938827515, + "logps/chosen": -0.09502127021551132, + "logps/rejected": -0.559738278388977, + "loss": 2.9807, + "nll_loss": 0.7121750116348267, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009502128697931767, + "rewards/margins": 0.04647170007228851, + "rewards/rejected": -0.055973831564188004, + "step": 2276 + }, + { + "epoch": 1.5746887966804979, + "grad_norm": 4.681520462036133, + "learning_rate": 4.680728446288612e-05, + "log_odds_chosen": 3.857510566711426, + "log_odds_ratio": -0.20329353213310242, + "logits/chosen": -0.5046722888946533, + "logits/rejected": -0.4650399088859558, + "logps/chosen": -0.0971132218837738, + "logps/rejected": -0.9814940690994263, + "loss": 3.3064, + "nll_loss": 0.8062769174575806, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009711322374641895, + "rewards/margins": 0.08843808621168137, + "rewards/rejected": -0.09814940392971039, + "step": 2277 + }, + { + "epoch": 1.5753803596127247, + "grad_norm": 7.279231548309326, + "learning_rate": 4.6803442446595975e-05, + "log_odds_chosen": 4.661543369293213, + "log_odds_ratio": -0.19676269590854645, + "logits/chosen": -0.6336410045623779, + "logits/rejected": -0.561577320098877, + "logps/chosen": -0.07620273530483246, + "logps/rejected": -0.923383355140686, + "loss": 3.0587, + "nll_loss": 0.7449985146522522, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007620273623615503, + "rewards/margins": 0.08471806347370148, + "rewards/rejected": -0.09233833849430084, + "step": 2278 + }, + { + "epoch": 1.5760719225449515, + "grad_norm": 4.930444240570068, + "learning_rate": 4.679960043030583e-05, + "log_odds_chosen": 4.155850887298584, + "log_odds_ratio": -0.13013622164726257, + "logits/chosen": -0.41056662797927856, + "logits/rejected": -0.49715933203697205, + "logps/chosen": -0.05985238403081894, + "logps/rejected": -0.5921831727027893, + "loss": 2.6141, + "nll_loss": 0.6405088901519775, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005985238589346409, + "rewards/margins": 0.053233079612255096, + "rewards/rejected": -0.05921831727027893, + "step": 2279 + }, + { + "epoch": 1.5767634854771784, + "grad_norm": 5.073685169219971, + "learning_rate": 4.679575841401568e-05, + "log_odds_chosen": 3.090080738067627, + "log_odds_ratio": -0.3903737962245941, + "logits/chosen": -0.5625483989715576, + "logits/rejected": -0.5984906554222107, + "logps/chosen": -0.08926475048065186, + "logps/rejected": -0.48649924993515015, + "loss": 3.0696, + "nll_loss": 0.7283635139465332, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.00892647448927164, + "rewards/margins": 0.03972344845533371, + "rewards/rejected": -0.048649922013282776, + "step": 2280 + }, + { + "epoch": 1.5774550484094052, + "grad_norm": 6.59467887878418, + "learning_rate": 4.6791916397725525e-05, + "log_odds_chosen": 3.5226287841796875, + "log_odds_ratio": -0.24100548028945923, + "logits/chosen": -0.5984765887260437, + "logits/rejected": -0.6621988415718079, + "logps/chosen": -0.07654125243425369, + "logps/rejected": -0.7357521653175354, + "loss": 3.7518, + "nll_loss": 0.913856565952301, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007654125336557627, + "rewards/margins": 0.06592109799385071, + "rewards/rejected": -0.0735752284526825, + "step": 2281 + }, + { + "epoch": 1.5781466113416323, + "grad_norm": 15.683188438415527, + "learning_rate": 4.6788074381435385e-05, + "log_odds_chosen": 3.7196710109710693, + "log_odds_ratio": -0.34569528698921204, + "logits/chosen": -0.7387279272079468, + "logits/rejected": -0.6877920031547546, + "logps/chosen": -0.05489436909556389, + "logps/rejected": -0.8038865327835083, + "loss": 3.8624, + "nll_loss": 0.9310262799263, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.005489437375217676, + "rewards/margins": 0.07489921152591705, + "rewards/rejected": -0.08038865029811859, + "step": 2282 + }, + { + "epoch": 1.578838174273859, + "grad_norm": 10.021100997924805, + "learning_rate": 4.678423236514523e-05, + "log_odds_chosen": 4.2374372482299805, + "log_odds_ratio": -0.04714512825012207, + "logits/chosen": -0.716540515422821, + "logits/rejected": -0.7443221211433411, + "logps/chosen": -0.055723875761032104, + "logps/rejected": -0.974632203578949, + "loss": 4.4345, + "nll_loss": 1.103899359703064, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00557238794863224, + "rewards/margins": 0.09189082682132721, + "rewards/rejected": -0.0974632203578949, + "step": 2283 + }, + { + "epoch": 1.579529737206086, + "grad_norm": 7.538951396942139, + "learning_rate": 4.678039034885508e-05, + "log_odds_chosen": 3.4724903106689453, + "log_odds_ratio": -0.37358352541923523, + "logits/chosen": -0.46461501717567444, + "logits/rejected": -0.5749452114105225, + "logps/chosen": -0.07608627527952194, + "logps/rejected": -0.5288589000701904, + "loss": 2.807, + "nll_loss": 0.6643953323364258, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007608628366142511, + "rewards/margins": 0.04527726769447327, + "rewards/rejected": -0.05288589745759964, + "step": 2284 + }, + { + "epoch": 1.5802213001383127, + "grad_norm": 7.384200096130371, + "learning_rate": 4.6776548332564935e-05, + "log_odds_chosen": 5.402228355407715, + "log_odds_ratio": -0.07402972877025604, + "logits/chosen": -0.5802706480026245, + "logits/rejected": -0.5512796640396118, + "logps/chosen": -0.042227327823638916, + "logps/rejected": -0.8186689019203186, + "loss": 4.1053, + "nll_loss": 1.0189257860183716, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0042227329686284065, + "rewards/margins": 0.07764415442943573, + "rewards/rejected": -0.08186689019203186, + "step": 2285 + }, + { + "epoch": 1.5809128630705396, + "grad_norm": 7.9935712814331055, + "learning_rate": 4.677270631627478e-05, + "log_odds_chosen": 3.9247207641601562, + "log_odds_ratio": -0.19959238171577454, + "logits/chosen": -0.4443342089653015, + "logits/rejected": -0.44178879261016846, + "logps/chosen": -0.08705995976924896, + "logps/rejected": -0.6575961112976074, + "loss": 3.2841, + "nll_loss": 0.8010765314102173, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008705995976924896, + "rewards/margins": 0.057053614407777786, + "rewards/rejected": -0.06575960665941238, + "step": 2286 + }, + { + "epoch": 1.5816044260027664, + "grad_norm": 6.8036909103393555, + "learning_rate": 4.676886429998463e-05, + "log_odds_chosen": 3.094165086746216, + "log_odds_ratio": -0.21915318071842194, + "logits/chosen": -0.650163471698761, + "logits/rejected": -0.6991207599639893, + "logps/chosen": -0.10170422494411469, + "logps/rejected": -0.7241445779800415, + "loss": 2.8511, + "nll_loss": 0.6908689141273499, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010170423425734043, + "rewards/margins": 0.06224404275417328, + "rewards/rejected": -0.07241446524858475, + "step": 2287 + }, + { + "epoch": 1.5822959889349932, + "grad_norm": 8.86994743347168, + "learning_rate": 4.6765022283694486e-05, + "log_odds_chosen": 4.749606132507324, + "log_odds_ratio": -0.1850530505180359, + "logits/chosen": -0.42290088534355164, + "logits/rejected": -0.47745946049690247, + "logps/chosen": -0.06721153110265732, + "logps/rejected": -0.6770196557044983, + "loss": 3.5533, + "nll_loss": 0.8698145151138306, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006721153389662504, + "rewards/margins": 0.06098081171512604, + "rewards/rejected": -0.06770196557044983, + "step": 2288 + }, + { + "epoch": 1.58298755186722, + "grad_norm": 6.188156604766846, + "learning_rate": 4.676118026740434e-05, + "log_odds_chosen": 4.213592052459717, + "log_odds_ratio": -0.20819568634033203, + "logits/chosen": -0.5084786415100098, + "logits/rejected": -0.5529496669769287, + "logps/chosen": -0.030551385134458542, + "logps/rejected": -0.5414082407951355, + "loss": 3.5748, + "nll_loss": 0.8728883266448975, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003055138746276498, + "rewards/margins": 0.051085688173770905, + "rewards/rejected": -0.05414082854986191, + "step": 2289 + }, + { + "epoch": 1.583679114799447, + "grad_norm": 5.629952907562256, + "learning_rate": 4.6757338251114184e-05, + "log_odds_chosen": 3.0484161376953125, + "log_odds_ratio": -0.34847933053970337, + "logits/chosen": -0.23855555057525635, + "logits/rejected": -0.34040266275405884, + "logps/chosen": -0.123260498046875, + "logps/rejected": -0.5389373898506165, + "loss": 2.3774, + "nll_loss": 0.5595142841339111, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012326049618422985, + "rewards/margins": 0.04156769439578056, + "rewards/rejected": -0.053893741220235825, + "step": 2290 + }, + { + "epoch": 1.5843706777316737, + "grad_norm": 20.07173728942871, + "learning_rate": 4.675349623482404e-05, + "log_odds_chosen": 3.135038375854492, + "log_odds_ratio": -0.6684633493423462, + "logits/chosen": -0.6842124462127686, + "logits/rejected": -0.744235634803772, + "logps/chosen": -0.19508275389671326, + "logps/rejected": -0.7924327850341797, + "loss": 4.8983, + "nll_loss": 1.157721996307373, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.019508276134729385, + "rewards/margins": 0.0597350038588047, + "rewards/rejected": -0.07924328744411469, + "step": 2291 + }, + { + "epoch": 1.5850622406639006, + "grad_norm": 4.18657922744751, + "learning_rate": 4.674965421853389e-05, + "log_odds_chosen": 6.433959484100342, + "log_odds_ratio": -0.09131443500518799, + "logits/chosen": -0.3304455876350403, + "logits/rejected": -0.361205518245697, + "logps/chosen": -0.03453647345304489, + "logps/rejected": -0.9038757085800171, + "loss": 2.1664, + "nll_loss": 0.5324781537055969, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003453647717833519, + "rewards/margins": 0.0869339257478714, + "rewards/rejected": -0.09038757532835007, + "step": 2292 + }, + { + "epoch": 1.5857538035961274, + "grad_norm": 4.888633728027344, + "learning_rate": 4.674581220224374e-05, + "log_odds_chosen": 4.468114852905273, + "log_odds_ratio": -0.25625795125961304, + "logits/chosen": -0.31070369482040405, + "logits/rejected": -0.33406126499176025, + "logps/chosen": -0.08972156047821045, + "logps/rejected": -0.7813112139701843, + "loss": 2.4483, + "nll_loss": 0.586453378200531, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008972156792879105, + "rewards/margins": 0.06915897130966187, + "rewards/rejected": -0.07813112437725067, + "step": 2293 + }, + { + "epoch": 1.5864453665283542, + "grad_norm": 6.429629802703857, + "learning_rate": 4.6741970185953594e-05, + "log_odds_chosen": 5.878769874572754, + "log_odds_ratio": -0.021112609654664993, + "logits/chosen": -0.40205931663513184, + "logits/rejected": -0.4707787036895752, + "logps/chosen": -0.03028869815170765, + "logps/rejected": -1.0147864818572998, + "loss": 2.9816, + "nll_loss": 0.7433009743690491, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00302886962890625, + "rewards/margins": 0.09844978153705597, + "rewards/rejected": -0.10147865861654282, + "step": 2294 + }, + { + "epoch": 1.587136929460581, + "grad_norm": 6.87356424331665, + "learning_rate": 4.673812816966344e-05, + "log_odds_chosen": 4.551303386688232, + "log_odds_ratio": -0.3128006160259247, + "logits/chosen": -0.6874199509620667, + "logits/rejected": -0.7475636005401611, + "logps/chosen": -0.0662873387336731, + "logps/rejected": -0.806043803691864, + "loss": 3.8277, + "nll_loss": 0.9256444573402405, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0066287340596318245, + "rewards/margins": 0.07397565245628357, + "rewards/rejected": -0.08060438185930252, + "step": 2295 + }, + { + "epoch": 1.5878284923928079, + "grad_norm": 3.4723312854766846, + "learning_rate": 4.673428615337329e-05, + "log_odds_chosen": 5.179519176483154, + "log_odds_ratio": -0.09129927307367325, + "logits/chosen": -0.6070810556411743, + "logits/rejected": -0.5578267574310303, + "logps/chosen": -0.03832171857357025, + "logps/rejected": -1.030213713645935, + "loss": 2.8462, + "nll_loss": 0.7024317383766174, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0038321721367537975, + "rewards/margins": 0.09918919950723648, + "rewards/rejected": -0.10302136838436127, + "step": 2296 + }, + { + "epoch": 1.5885200553250347, + "grad_norm": 5.925631046295166, + "learning_rate": 4.6730444137083144e-05, + "log_odds_chosen": 6.14210319519043, + "log_odds_ratio": -0.0657062903046608, + "logits/chosen": -0.20594017207622528, + "logits/rejected": -0.23784777522087097, + "logps/chosen": -0.031046025454998016, + "logps/rejected": -1.0381896495819092, + "loss": 2.6941, + "nll_loss": 0.6669539213180542, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0031046029180288315, + "rewards/margins": 0.10071436315774918, + "rewards/rejected": -0.10381896048784256, + "step": 2297 + }, + { + "epoch": 1.5892116182572615, + "grad_norm": 3.7132484912872314, + "learning_rate": 4.6726602120792997e-05, + "log_odds_chosen": 4.760432720184326, + "log_odds_ratio": -0.13704749941825867, + "logits/chosen": 0.025912266224622726, + "logits/rejected": 0.02450394444167614, + "logps/chosen": -0.03368768095970154, + "logps/rejected": -0.5429489612579346, + "loss": 2.7401, + "nll_loss": 0.6713322997093201, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003368767909705639, + "rewards/margins": 0.050926122814416885, + "rewards/rejected": -0.054294899106025696, + "step": 2298 + }, + { + "epoch": 1.5899031811894884, + "grad_norm": 5.432823181152344, + "learning_rate": 4.672276010450284e-05, + "log_odds_chosen": 3.6638736724853516, + "log_odds_ratio": -0.1098797619342804, + "logits/chosen": -0.9263089299201965, + "logits/rejected": -0.9255947470664978, + "logps/chosen": -0.07125405222177505, + "logps/rejected": -0.6752550601959229, + "loss": 3.6093, + "nll_loss": 0.891340970993042, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007125406060367823, + "rewards/margins": 0.0604000985622406, + "rewards/rejected": -0.06752550601959229, + "step": 2299 + }, + { + "epoch": 1.5905947441217152, + "grad_norm": 4.5839080810546875, + "learning_rate": 4.67189180882127e-05, + "log_odds_chosen": 5.678994178771973, + "log_odds_ratio": -0.06031135842204094, + "logits/chosen": -0.4893847107887268, + "logits/rejected": -0.445559024810791, + "logps/chosen": -0.04330340772867203, + "logps/rejected": -1.07259202003479, + "loss": 2.6092, + "nll_loss": 0.6462651491165161, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00433034123852849, + "rewards/margins": 0.10292886942625046, + "rewards/rejected": -0.10725921392440796, + "step": 2300 + }, + { + "epoch": 1.591286307053942, + "grad_norm": 6.613040924072266, + "learning_rate": 4.671507607192255e-05, + "log_odds_chosen": 4.988267421722412, + "log_odds_ratio": -0.11602865904569626, + "logits/chosen": -0.5056090354919434, + "logits/rejected": -0.4993463158607483, + "logps/chosen": -0.0756821483373642, + "logps/rejected": -1.1408820152282715, + "loss": 3.2067, + "nll_loss": 0.790062427520752, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007568215020000935, + "rewards/margins": 0.10651998221874237, + "rewards/rejected": -0.11408819258213043, + "step": 2301 + }, + { + "epoch": 1.5919778699861689, + "grad_norm": 9.389286041259766, + "learning_rate": 4.67112340556324e-05, + "log_odds_chosen": 3.63946533203125, + "log_odds_ratio": -0.3243602514266968, + "logits/chosen": -0.49547529220581055, + "logits/rejected": -0.5523756742477417, + "logps/chosen": -0.06218154355883598, + "logps/rejected": -0.6321157813072205, + "loss": 3.8846, + "nll_loss": 0.9387215375900269, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.006218154914677143, + "rewards/margins": 0.05699342489242554, + "rewards/rejected": -0.06321157515048981, + "step": 2302 + }, + { + "epoch": 1.5926694329183957, + "grad_norm": 11.416284561157227, + "learning_rate": 4.670739203934225e-05, + "log_odds_chosen": 5.384055137634277, + "log_odds_ratio": -0.1621214896440506, + "logits/chosen": -0.5782039165496826, + "logits/rejected": -0.6556646823883057, + "logps/chosen": -0.0784316435456276, + "logps/rejected": -0.9207970499992371, + "loss": 3.4856, + "nll_loss": 0.8551783561706543, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007843165658414364, + "rewards/margins": 0.08423653244972229, + "rewards/rejected": -0.09207969903945923, + "step": 2303 + }, + { + "epoch": 1.5933609958506225, + "grad_norm": 7.5037841796875, + "learning_rate": 4.67035500230521e-05, + "log_odds_chosen": 4.605857849121094, + "log_odds_ratio": -0.03979405388236046, + "logits/chosen": -0.4579419791698456, + "logits/rejected": -0.5394278764724731, + "logps/chosen": -0.04164456948637962, + "logps/rejected": -0.9381356835365295, + "loss": 2.7509, + "nll_loss": 0.6837377548217773, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004164457321166992, + "rewards/margins": 0.08964911848306656, + "rewards/rejected": -0.09381356835365295, + "step": 2304 + }, + { + "epoch": 1.5940525587828493, + "grad_norm": 3.53702712059021, + "learning_rate": 4.669970800676195e-05, + "log_odds_chosen": 5.398779392242432, + "log_odds_ratio": -0.17257985472679138, + "logits/chosen": -0.26726865768432617, + "logits/rejected": -0.27700862288475037, + "logps/chosen": -0.05751689895987511, + "logps/rejected": -0.9009071588516235, + "loss": 2.3245, + "nll_loss": 0.5638747811317444, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005751689895987511, + "rewards/margins": 0.08433903008699417, + "rewards/rejected": -0.09009072184562683, + "step": 2305 + }, + { + "epoch": 1.5947441217150762, + "grad_norm": 4.662716865539551, + "learning_rate": 4.66958659904718e-05, + "log_odds_chosen": 6.068386077880859, + "log_odds_ratio": -0.05336064100265503, + "logits/chosen": -0.10764148831367493, + "logits/rejected": -0.13395418226718903, + "logps/chosen": -0.013181259855628014, + "logps/rejected": -1.0082037448883057, + "loss": 2.8182, + "nll_loss": 0.6992227435112, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013181259855628014, + "rewards/margins": 0.09950225800275803, + "rewards/rejected": -0.100820392370224, + "step": 2306 + }, + { + "epoch": 1.595435684647303, + "grad_norm": 6.559289455413818, + "learning_rate": 4.6692023974181655e-05, + "log_odds_chosen": 3.6565051078796387, + "log_odds_ratio": -0.20271995663642883, + "logits/chosen": -0.8742144107818604, + "logits/rejected": -0.8906134963035583, + "logps/chosen": -0.09067210555076599, + "logps/rejected": -0.8885717988014221, + "loss": 4.2556, + "nll_loss": 1.043622374534607, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009067210368812084, + "rewards/margins": 0.07978996634483337, + "rewards/rejected": -0.08885718882083893, + "step": 2307 + }, + { + "epoch": 1.5961272475795298, + "grad_norm": 8.234404563903809, + "learning_rate": 4.66881819578915e-05, + "log_odds_chosen": 5.145881652832031, + "log_odds_ratio": -0.3637697100639343, + "logits/chosen": -0.661266565322876, + "logits/rejected": -0.7174186706542969, + "logps/chosen": -0.04645030200481415, + "logps/rejected": -0.9846563339233398, + "loss": 3.6622, + "nll_loss": 0.8791638016700745, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004645030479878187, + "rewards/margins": 0.09382060915231705, + "rewards/rejected": -0.09846563637256622, + "step": 2308 + }, + { + "epoch": 1.5968188105117567, + "grad_norm": 9.499542236328125, + "learning_rate": 4.668433994160136e-05, + "log_odds_chosen": 4.3986029624938965, + "log_odds_ratio": -0.15495246648788452, + "logits/chosen": -0.6596752405166626, + "logits/rejected": -0.667199969291687, + "logps/chosen": -0.05631183832883835, + "logps/rejected": -0.6967073082923889, + "loss": 4.3779, + "nll_loss": 1.0789897441864014, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005631184205412865, + "rewards/margins": 0.06403955072164536, + "rewards/rejected": -0.06967072933912277, + "step": 2309 + }, + { + "epoch": 1.5975103734439835, + "grad_norm": 4.345082759857178, + "learning_rate": 4.6680497925311206e-05, + "log_odds_chosen": 6.556073188781738, + "log_odds_ratio": -0.12552745640277863, + "logits/chosen": -0.3318266272544861, + "logits/rejected": -0.33238643407821655, + "logps/chosen": -0.019965466111898422, + "logps/rejected": -0.7021166086196899, + "loss": 2.5127, + "nll_loss": 0.6156325340270996, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0019965465180575848, + "rewards/margins": 0.06821511685848236, + "rewards/rejected": -0.07021167129278183, + "step": 2310 + }, + { + "epoch": 1.5982019363762103, + "grad_norm": 8.282541275024414, + "learning_rate": 4.667665590902106e-05, + "log_odds_chosen": 7.438543319702148, + "log_odds_ratio": -0.25406116247177124, + "logits/chosen": -0.45235535502433777, + "logits/rejected": -0.5772778391838074, + "logps/chosen": -0.05688957870006561, + "logps/rejected": -1.2933604717254639, + "loss": 3.1258, + "nll_loss": 0.7560532093048096, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005688957870006561, + "rewards/margins": 0.12364709377288818, + "rewards/rejected": -0.12933605909347534, + "step": 2311 + }, + { + "epoch": 1.5988934993084372, + "grad_norm": 11.441560745239258, + "learning_rate": 4.667281389273091e-05, + "log_odds_chosen": 5.331631660461426, + "log_odds_ratio": -0.3978029191493988, + "logits/chosen": -0.4594797194004059, + "logits/rejected": -0.5356467962265015, + "logps/chosen": -0.06144750118255615, + "logps/rejected": -0.762560248374939, + "loss": 2.632, + "nll_loss": 0.6182161569595337, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0061447499319911, + "rewards/margins": 0.07011127471923828, + "rewards/rejected": -0.07625602185726166, + "step": 2312 + }, + { + "epoch": 1.599585062240664, + "grad_norm": 7.113100051879883, + "learning_rate": 4.6668971876440756e-05, + "log_odds_chosen": 5.57823371887207, + "log_odds_ratio": -0.024948718026280403, + "logits/chosen": -0.4611778259277344, + "logits/rejected": -0.5264756679534912, + "logps/chosen": -0.016496511176228523, + "logps/rejected": -0.930469274520874, + "loss": 3.3915, + "nll_loss": 0.8453721404075623, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016496512107551098, + "rewards/margins": 0.09139727056026459, + "rewards/rejected": -0.09304693341255188, + "step": 2313 + }, + { + "epoch": 1.6002766251728908, + "grad_norm": 8.255138397216797, + "learning_rate": 4.666512986015061e-05, + "log_odds_chosen": 5.406302452087402, + "log_odds_ratio": -0.13809970021247864, + "logits/chosen": -0.5205468535423279, + "logits/rejected": -0.5812825560569763, + "logps/chosen": -0.09680378437042236, + "logps/rejected": -0.9270695447921753, + "loss": 3.6517, + "nll_loss": 0.8991074562072754, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009680378250777721, + "rewards/margins": 0.08302658051252365, + "rewards/rejected": -0.09270695596933365, + "step": 2314 + }, + { + "epoch": 1.6009681881051177, + "grad_norm": 8.078280448913574, + "learning_rate": 4.666128784386046e-05, + "log_odds_chosen": 5.976874828338623, + "log_odds_ratio": -0.13946348428726196, + "logits/chosen": -0.45572811365127563, + "logits/rejected": -0.4910028576850891, + "logps/chosen": -0.0380314439535141, + "logps/rejected": -1.0345638990402222, + "loss": 4.0855, + "nll_loss": 1.0074275732040405, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0038031444419175386, + "rewards/margins": 0.09965324401855469, + "rewards/rejected": -0.10345638543367386, + "step": 2315 + }, + { + "epoch": 1.6016597510373445, + "grad_norm": 9.3878173828125, + "learning_rate": 4.6657445827570313e-05, + "log_odds_chosen": 6.526431083679199, + "log_odds_ratio": -0.07762724161148071, + "logits/chosen": -0.20644918084144592, + "logits/rejected": -0.2773859202861786, + "logps/chosen": -0.046094466000795364, + "logps/rejected": -1.1674156188964844, + "loss": 3.4912, + "nll_loss": 0.8650420308113098, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004609446506947279, + "rewards/margins": 0.11213213205337524, + "rewards/rejected": -0.11674156785011292, + "step": 2316 + }, + { + "epoch": 1.6023513139695713, + "grad_norm": 7.8561577796936035, + "learning_rate": 4.665360381128016e-05, + "log_odds_chosen": 2.670375347137451, + "log_odds_ratio": -0.47158828377723694, + "logits/chosen": -0.4281069040298462, + "logits/rejected": -0.46524304151535034, + "logps/chosen": -0.1585179567337036, + "logps/rejected": -0.8629575967788696, + "loss": 3.2523, + "nll_loss": 0.7659047245979309, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01585179753601551, + "rewards/margins": 0.07044396549463272, + "rewards/rejected": -0.08629576116800308, + "step": 2317 + }, + { + "epoch": 1.6030428769017981, + "grad_norm": 5.324893474578857, + "learning_rate": 4.664976179499002e-05, + "log_odds_chosen": 5.277317047119141, + "log_odds_ratio": -0.0963197872042656, + "logits/chosen": -0.3024168908596039, + "logits/rejected": -0.28410249948501587, + "logps/chosen": -0.09467468410730362, + "logps/rejected": -0.673798680305481, + "loss": 2.8702, + "nll_loss": 0.7079252004623413, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009467468596994877, + "rewards/margins": 0.05791240185499191, + "rewards/rejected": -0.06737986952066422, + "step": 2318 + }, + { + "epoch": 1.603734439834025, + "grad_norm": 19.815616607666016, + "learning_rate": 4.6645919778699864e-05, + "log_odds_chosen": 3.646332025527954, + "log_odds_ratio": -0.9083267450332642, + "logits/chosen": -0.5500541925430298, + "logits/rejected": -0.5560811161994934, + "logps/chosen": -0.16285112500190735, + "logps/rejected": -0.5417895317077637, + "loss": 3.3589, + "nll_loss": 0.748899519443512, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.016285112127661705, + "rewards/margins": 0.03789384663105011, + "rewards/rejected": -0.05417895317077637, + "step": 2319 + }, + { + "epoch": 1.6044260027662518, + "grad_norm": 6.896365642547607, + "learning_rate": 4.6642077762409716e-05, + "log_odds_chosen": 6.688531875610352, + "log_odds_ratio": -0.03402579948306084, + "logits/chosen": -0.4911445379257202, + "logits/rejected": -0.5525773763656616, + "logps/chosen": -0.057118237018585205, + "logps/rejected": -1.2721749544143677, + "loss": 2.672, + "nll_loss": 0.6646054983139038, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005711824167519808, + "rewards/margins": 0.12150569260120392, + "rewards/rejected": -0.12721750140190125, + "step": 2320 + }, + { + "epoch": 1.6051175656984786, + "grad_norm": 9.04888916015625, + "learning_rate": 4.663823574611957e-05, + "log_odds_chosen": 2.932542085647583, + "log_odds_ratio": -0.4230884313583374, + "logits/chosen": -0.16413822770118713, + "logits/rejected": -0.09653112292289734, + "logps/chosen": -0.0899929478764534, + "logps/rejected": -0.42458221316337585, + "loss": 2.9287, + "nll_loss": 0.6898687481880188, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008999294601380825, + "rewards/margins": 0.033458929508924484, + "rewards/rejected": -0.042458221316337585, + "step": 2321 + }, + { + "epoch": 1.6058091286307055, + "grad_norm": 10.678813934326172, + "learning_rate": 4.6634393729829415e-05, + "log_odds_chosen": 3.87385630607605, + "log_odds_ratio": -0.39904534816741943, + "logits/chosen": -0.43692782521247864, + "logits/rejected": -0.4507974088191986, + "logps/chosen": -0.20758147537708282, + "logps/rejected": -0.8612111210823059, + "loss": 3.9424, + "nll_loss": 0.9456848502159119, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.020758148282766342, + "rewards/margins": 0.06536296010017395, + "rewards/rejected": -0.08612111210823059, + "step": 2322 + }, + { + "epoch": 1.6065006915629323, + "grad_norm": 11.248788833618164, + "learning_rate": 4.663055171353927e-05, + "log_odds_chosen": 6.452774524688721, + "log_odds_ratio": -0.08504676818847656, + "logits/chosen": -0.31893569231033325, + "logits/rejected": -0.40124383568763733, + "logps/chosen": -0.1601179838180542, + "logps/rejected": -1.3543026447296143, + "loss": 4.2856, + "nll_loss": 1.0629040002822876, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01601179875433445, + "rewards/margins": 0.11941846460103989, + "rewards/rejected": -0.1354302614927292, + "step": 2323 + }, + { + "epoch": 1.6071922544951591, + "grad_norm": 7.978420734405518, + "learning_rate": 4.662670969724912e-05, + "log_odds_chosen": 4.781014919281006, + "log_odds_ratio": -0.07580827176570892, + "logits/chosen": -0.46464425325393677, + "logits/rejected": -0.5390160083770752, + "logps/chosen": -0.010720476508140564, + "logps/rejected": -0.6336989402770996, + "loss": 2.9299, + "nll_loss": 0.7249056100845337, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001072047627530992, + "rewards/margins": 0.06229785829782486, + "rewards/rejected": -0.06336989998817444, + "step": 2324 + }, + { + "epoch": 1.607883817427386, + "grad_norm": 6.0682501792907715, + "learning_rate": 4.662286768095897e-05, + "log_odds_chosen": 7.181793212890625, + "log_odds_ratio": -0.013707602396607399, + "logits/chosen": -0.3983333110809326, + "logits/rejected": -0.40123891830444336, + "logps/chosen": -0.004066762514412403, + "logps/rejected": -0.80652916431427, + "loss": 3.2499, + "nll_loss": 0.8111165165901184, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004066762630827725, + "rewards/margins": 0.08024624735116959, + "rewards/rejected": -0.08065292239189148, + "step": 2325 + }, + { + "epoch": 1.6085753803596128, + "grad_norm": 9.307788848876953, + "learning_rate": 4.661902566466882e-05, + "log_odds_chosen": 3.275400161743164, + "log_odds_ratio": -0.3779076933860779, + "logits/chosen": -0.293110728263855, + "logits/rejected": -0.3226720690727234, + "logps/chosen": -0.11518510431051254, + "logps/rejected": -0.6717798709869385, + "loss": 3.6997, + "nll_loss": 0.8871325254440308, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011518510058522224, + "rewards/margins": 0.05565947666764259, + "rewards/rejected": -0.06717798113822937, + "step": 2326 + }, + { + "epoch": 1.6092669432918396, + "grad_norm": 5.229033470153809, + "learning_rate": 4.661518364837868e-05, + "log_odds_chosen": 5.4648237228393555, + "log_odds_ratio": -0.09957113116979599, + "logits/chosen": -0.5804209113121033, + "logits/rejected": -0.5788403749465942, + "logps/chosen": -0.05049334466457367, + "logps/rejected": -0.8705024719238281, + "loss": 2.1554, + "nll_loss": 0.5288969278335571, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005049334838986397, + "rewards/margins": 0.08200091123580933, + "rewards/rejected": -0.08705024421215057, + "step": 2327 + }, + { + "epoch": 1.6099585062240664, + "grad_norm": 6.291632652282715, + "learning_rate": 4.661134163208852e-05, + "log_odds_chosen": 4.770295143127441, + "log_odds_ratio": -0.3147009313106537, + "logits/chosen": -0.4132739305496216, + "logits/rejected": -0.4148447811603546, + "logps/chosen": -0.061191972345113754, + "logps/rejected": -1.0149085521697998, + "loss": 2.1178, + "nll_loss": 0.4979802966117859, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006119197234511375, + "rewards/margins": 0.09537166357040405, + "rewards/rejected": -0.10149086266756058, + "step": 2328 + }, + { + "epoch": 1.6106500691562933, + "grad_norm": 7.1284918785095215, + "learning_rate": 4.6607499615798375e-05, + "log_odds_chosen": 4.904767990112305, + "log_odds_ratio": -0.08909415453672409, + "logits/chosen": -0.25764161348342896, + "logits/rejected": -0.3145712912082672, + "logps/chosen": -0.03883281722664833, + "logps/rejected": -0.816331148147583, + "loss": 3.3219, + "nll_loss": 0.821575403213501, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003883281722664833, + "rewards/margins": 0.0777498334646225, + "rewards/rejected": -0.08163312077522278, + "step": 2329 + }, + { + "epoch": 1.61134163208852, + "grad_norm": 6.315418720245361, + "learning_rate": 4.660365759950822e-05, + "log_odds_chosen": 5.398120403289795, + "log_odds_ratio": -0.2213883101940155, + "logits/chosen": -0.631862998008728, + "logits/rejected": -0.6522068977355957, + "logps/chosen": -0.08457255363464355, + "logps/rejected": -0.718346118927002, + "loss": 2.8186, + "nll_loss": 0.6825187802314758, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00845725554972887, + "rewards/margins": 0.06337735801935196, + "rewards/rejected": -0.07183460891246796, + "step": 2330 + }, + { + "epoch": 1.612033195020747, + "grad_norm": 5.7349324226379395, + "learning_rate": 4.659981558321807e-05, + "log_odds_chosen": 6.114752769470215, + "log_odds_ratio": -0.02867019921541214, + "logits/chosen": -0.3078418970108032, + "logits/rejected": -0.30085307359695435, + "logps/chosen": -0.04859218746423721, + "logps/rejected": -1.2826318740844727, + "loss": 2.4187, + "nll_loss": 0.6017983555793762, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0048592183738946915, + "rewards/margins": 0.12340398877859116, + "rewards/rejected": -0.1282632052898407, + "step": 2331 + }, + { + "epoch": 1.6127247579529738, + "grad_norm": 17.164234161376953, + "learning_rate": 4.6595973566927925e-05, + "log_odds_chosen": 4.351862907409668, + "log_odds_ratio": -0.16593973338603973, + "logits/chosen": -0.6303444504737854, + "logits/rejected": -0.6840345859527588, + "logps/chosen": -0.0819045901298523, + "logps/rejected": -0.631934404373169, + "loss": 3.2901, + "nll_loss": 0.8059207201004028, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008190459571778774, + "rewards/margins": 0.055002983659505844, + "rewards/rejected": -0.0631934404373169, + "step": 2332 + }, + { + "epoch": 1.6134163208852006, + "grad_norm": 8.640056610107422, + "learning_rate": 4.659213155063777e-05, + "log_odds_chosen": 5.334671974182129, + "log_odds_ratio": -0.06108527630567551, + "logits/chosen": -0.41245636343955994, + "logits/rejected": -0.4462578296661377, + "logps/chosen": -0.02963314950466156, + "logps/rejected": -0.7621734738349915, + "loss": 3.773, + "nll_loss": 0.9371405839920044, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002963314764201641, + "rewards/margins": 0.07325402647256851, + "rewards/rejected": -0.07621734589338303, + "step": 2333 + }, + { + "epoch": 1.6141078838174274, + "grad_norm": 6.361103534698486, + "learning_rate": 4.658828953434763e-05, + "log_odds_chosen": 4.108833312988281, + "log_odds_ratio": -0.08453751355409622, + "logits/chosen": -0.4442722201347351, + "logits/rejected": -0.45774152874946594, + "logps/chosen": -0.04168268293142319, + "logps/rejected": -0.7075502872467041, + "loss": 3.2087, + "nll_loss": 0.7937202453613281, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004168268758803606, + "rewards/margins": 0.06658677011728287, + "rewards/rejected": -0.07075503468513489, + "step": 2334 + }, + { + "epoch": 1.6147994467496543, + "grad_norm": 8.147212028503418, + "learning_rate": 4.6584447518057476e-05, + "log_odds_chosen": 5.207988739013672, + "log_odds_ratio": -0.06420108675956726, + "logits/chosen": -0.5827217698097229, + "logits/rejected": -0.6342862844467163, + "logps/chosen": -0.02741215191781521, + "logps/rejected": -0.8105305433273315, + "loss": 3.8817, + "nll_loss": 0.9639945030212402, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0027412152849137783, + "rewards/margins": 0.07831183820962906, + "rewards/rejected": -0.08105304837226868, + "step": 2335 + }, + { + "epoch": 1.615491009681881, + "grad_norm": 9.331297874450684, + "learning_rate": 4.658060550176733e-05, + "log_odds_chosen": 5.364526271820068, + "log_odds_ratio": -0.6092586517333984, + "logits/chosen": -0.430431991815567, + "logits/rejected": -0.43262892961502075, + "logps/chosen": -0.0933023989200592, + "logps/rejected": -0.8510630130767822, + "loss": 4.0586, + "nll_loss": 0.9537221193313599, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009330240078270435, + "rewards/margins": 0.07577606290578842, + "rewards/rejected": -0.08510630577802658, + "step": 2336 + }, + { + "epoch": 1.616182572614108, + "grad_norm": 4.386343955993652, + "learning_rate": 4.657676348547718e-05, + "log_odds_chosen": 4.233821868896484, + "log_odds_ratio": -0.27742135524749756, + "logits/chosen": -0.6338566541671753, + "logits/rejected": -0.6705052852630615, + "logps/chosen": -0.08768070489168167, + "logps/rejected": -0.7955862879753113, + "loss": 2.8503, + "nll_loss": 0.6848416924476624, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008768070489168167, + "rewards/margins": 0.07079055905342102, + "rewards/rejected": -0.07955863326787949, + "step": 2337 + }, + { + "epoch": 1.6168741355463347, + "grad_norm": 6.712360858917236, + "learning_rate": 4.657292146918703e-05, + "log_odds_chosen": 4.783242225646973, + "log_odds_ratio": -0.3336433172225952, + "logits/chosen": -0.41855043172836304, + "logits/rejected": -0.474947988986969, + "logps/chosen": -0.0697491243481636, + "logps/rejected": -0.8092215657234192, + "loss": 3.5056, + "nll_loss": 0.8430478572845459, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.00697491317987442, + "rewards/margins": 0.0739472508430481, + "rewards/rejected": -0.08092215657234192, + "step": 2338 + }, + { + "epoch": 1.6175656984785616, + "grad_norm": 6.100496292114258, + "learning_rate": 4.656907945289688e-05, + "log_odds_chosen": 6.144561767578125, + "log_odds_ratio": -0.15024614334106445, + "logits/chosen": -0.35080528259277344, + "logits/rejected": -0.36896559596061707, + "logps/chosen": -0.05333162844181061, + "logps/rejected": -1.3443855047225952, + "loss": 3.5222, + "nll_loss": 0.8655195236206055, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005333162844181061, + "rewards/margins": 0.12910538911819458, + "rewards/rejected": -0.13443854451179504, + "step": 2339 + }, + { + "epoch": 1.6182572614107884, + "grad_norm": 14.227270126342773, + "learning_rate": 4.656523743660673e-05, + "log_odds_chosen": 3.9271492958068848, + "log_odds_ratio": -0.31890344619750977, + "logits/chosen": -0.368779718875885, + "logits/rejected": -0.41777634620666504, + "logps/chosen": -0.07359308004379272, + "logps/rejected": -0.626854658126831, + "loss": 4.2403, + "nll_loss": 1.028173804283142, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007359308190643787, + "rewards/margins": 0.055326156318187714, + "rewards/rejected": -0.06268545985221863, + "step": 2340 + }, + { + "epoch": 1.6189488243430152, + "grad_norm": 7.9860005378723145, + "learning_rate": 4.6561395420316584e-05, + "log_odds_chosen": 4.5483598709106445, + "log_odds_ratio": -0.15386539697647095, + "logits/chosen": -0.4116964042186737, + "logits/rejected": -0.47240209579467773, + "logps/chosen": -0.052656978368759155, + "logps/rejected": -0.7900266051292419, + "loss": 3.0385, + "nll_loss": 0.7442415952682495, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0052656978368759155, + "rewards/margins": 0.07373696565628052, + "rewards/rejected": -0.07900266349315643, + "step": 2341 + }, + { + "epoch": 1.619640387275242, + "grad_norm": 3.6560215950012207, + "learning_rate": 4.655755340402643e-05, + "log_odds_chosen": 5.275965690612793, + "log_odds_ratio": -0.13455995917320251, + "logits/chosen": -0.23229531943798065, + "logits/rejected": -0.17382174730300903, + "logps/chosen": -0.050933949649333954, + "logps/rejected": -1.087402582168579, + "loss": 2.7595, + "nll_loss": 0.6764070391654968, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0050933947786688805, + "rewards/margins": 0.10364687442779541, + "rewards/rejected": -0.10874027013778687, + "step": 2342 + }, + { + "epoch": 1.620331950207469, + "grad_norm": 6.554387092590332, + "learning_rate": 4.655371138773629e-05, + "log_odds_chosen": 5.708447456359863, + "log_odds_ratio": -0.032198466360569, + "logits/chosen": -0.35663536190986633, + "logits/rejected": -0.4237971305847168, + "logps/chosen": -0.0359690859913826, + "logps/rejected": -0.9435381889343262, + "loss": 2.962, + "nll_loss": 0.7372850179672241, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003596908412873745, + "rewards/margins": 0.09075691550970078, + "rewards/rejected": -0.0943538174033165, + "step": 2343 + }, + { + "epoch": 1.6210235131396957, + "grad_norm": 9.658689498901367, + "learning_rate": 4.6549869371446134e-05, + "log_odds_chosen": 5.208951950073242, + "log_odds_ratio": -0.1817733198404312, + "logits/chosen": -0.14689387381076813, + "logits/rejected": -0.22184035181999207, + "logps/chosen": -0.05039520561695099, + "logps/rejected": -1.0678006410598755, + "loss": 4.0494, + "nll_loss": 0.9941627979278564, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005039520561695099, + "rewards/margins": 0.10174053907394409, + "rewards/rejected": -0.10678005963563919, + "step": 2344 + }, + { + "epoch": 1.6217150760719226, + "grad_norm": 8.730910301208496, + "learning_rate": 4.654602735515599e-05, + "log_odds_chosen": 4.14716911315918, + "log_odds_ratio": -0.2981174886226654, + "logits/chosen": -0.8712558746337891, + "logits/rejected": -0.8630931377410889, + "logps/chosen": -0.07158464193344116, + "logps/rejected": -0.6259889006614685, + "loss": 4.7996, + "nll_loss": 1.1700934171676636, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007158464286476374, + "rewards/margins": 0.055440425872802734, + "rewards/rejected": -0.06259889155626297, + "step": 2345 + }, + { + "epoch": 1.6224066390041494, + "grad_norm": 5.973324775695801, + "learning_rate": 4.654218533886584e-05, + "log_odds_chosen": 5.6539106369018555, + "log_odds_ratio": -0.31066593527793884, + "logits/chosen": -0.2996593415737152, + "logits/rejected": -0.3607049584388733, + "logps/chosen": -0.1110413447022438, + "logps/rejected": -0.9045712947845459, + "loss": 2.8255, + "nll_loss": 0.6753115057945251, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01110413484275341, + "rewards/margins": 0.07935299724340439, + "rewards/rejected": -0.09045712649822235, + "step": 2346 + }, + { + "epoch": 1.6230982019363762, + "grad_norm": 7.056889533996582, + "learning_rate": 4.653834332257569e-05, + "log_odds_chosen": 4.830997943878174, + "log_odds_ratio": -0.04605808109045029, + "logits/chosen": -0.4083777666091919, + "logits/rejected": -0.4561671316623688, + "logps/chosen": -0.04745990410447121, + "logps/rejected": -0.9793378114700317, + "loss": 3.1702, + "nll_loss": 0.7879566550254822, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004745990503579378, + "rewards/margins": 0.09318779408931732, + "rewards/rejected": -0.09793378412723541, + "step": 2347 + }, + { + "epoch": 1.623789764868603, + "grad_norm": 8.9386625289917, + "learning_rate": 4.653450130628554e-05, + "log_odds_chosen": 3.8049674034118652, + "log_odds_ratio": -0.24610795080661774, + "logits/chosen": -0.6960456371307373, + "logits/rejected": -0.6541339159011841, + "logps/chosen": -0.05932285264134407, + "logps/rejected": -0.9324048757553101, + "loss": 2.891, + "nll_loss": 0.6981493234634399, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005932285450398922, + "rewards/margins": 0.08730820566415787, + "rewards/rejected": -0.09324049204587936, + "step": 2348 + }, + { + "epoch": 1.6244813278008299, + "grad_norm": 8.542035102844238, + "learning_rate": 4.653065928999539e-05, + "log_odds_chosen": 3.459298610687256, + "log_odds_ratio": -0.5191382169723511, + "logits/chosen": -0.5135226845741272, + "logits/rejected": -0.4982113242149353, + "logps/chosen": -0.11870171129703522, + "logps/rejected": -0.684704601764679, + "loss": 2.5091, + "nll_loss": 0.5753598213195801, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011870170943439007, + "rewards/margins": 0.05660029500722885, + "rewards/rejected": -0.06847047060728073, + "step": 2349 + }, + { + "epoch": 1.6251728907330567, + "grad_norm": 7.4994025230407715, + "learning_rate": 4.652681727370524e-05, + "log_odds_chosen": 3.385531187057495, + "log_odds_ratio": -0.28039830923080444, + "logits/chosen": -0.47290781140327454, + "logits/rejected": -0.5128310918807983, + "logps/chosen": -0.07095737010240555, + "logps/rejected": -0.6914070844650269, + "loss": 3.6671, + "nll_loss": 0.8887372016906738, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007095737382769585, + "rewards/margins": 0.06204497814178467, + "rewards/rejected": -0.0691407173871994, + "step": 2350 + }, + { + "epoch": 1.6258644536652835, + "grad_norm": 5.9747538566589355, + "learning_rate": 4.652297525741509e-05, + "log_odds_chosen": 4.028547286987305, + "log_odds_ratio": -0.167395681142807, + "logits/chosen": -0.8783102035522461, + "logits/rejected": -0.9642181396484375, + "logps/chosen": -0.04337505251169205, + "logps/rejected": -0.6610270142555237, + "loss": 3.3736, + "nll_loss": 0.8266501426696777, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00433750543743372, + "rewards/margins": 0.06176519766449928, + "rewards/rejected": -0.06610269844532013, + "step": 2351 + }, + { + "epoch": 1.6265560165975104, + "grad_norm": 6.233641147613525, + "learning_rate": 4.651913324112495e-05, + "log_odds_chosen": 4.822110176086426, + "log_odds_ratio": -0.1332874894142151, + "logits/chosen": -0.3468000292778015, + "logits/rejected": -0.3059294819831848, + "logps/chosen": -0.048264387995004654, + "logps/rejected": -0.8635885715484619, + "loss": 2.5327, + "nll_loss": 0.6198562979698181, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0048264386132359505, + "rewards/margins": 0.08153241872787476, + "rewards/rejected": -0.08635886013507843, + "step": 2352 + }, + { + "epoch": 1.6272475795297372, + "grad_norm": 7.80859375, + "learning_rate": 4.651529122483479e-05, + "log_odds_chosen": 6.187796115875244, + "log_odds_ratio": -0.13487344980239868, + "logits/chosen": -0.15797224640846252, + "logits/rejected": -0.1921594738960266, + "logps/chosen": -0.033555012196302414, + "logps/rejected": -0.8472077250480652, + "loss": 3.7709, + "nll_loss": 0.9292386770248413, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0033555012196302414, + "rewards/margins": 0.08136527240276337, + "rewards/rejected": -0.08472077548503876, + "step": 2353 + }, + { + "epoch": 1.627939142461964, + "grad_norm": 7.190445423126221, + "learning_rate": 4.6511449208544645e-05, + "log_odds_chosen": 3.892343521118164, + "log_odds_ratio": -0.6625378131866455, + "logits/chosen": -0.41133368015289307, + "logits/rejected": -0.43225544691085815, + "logps/chosen": -0.0741141065955162, + "logps/rejected": -0.48772764205932617, + "loss": 4.0325, + "nll_loss": 0.9418785572052002, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.00741141103208065, + "rewards/margins": 0.041361354291439056, + "rewards/rejected": -0.048772767186164856, + "step": 2354 + }, + { + "epoch": 1.6286307053941909, + "grad_norm": 11.111103057861328, + "learning_rate": 4.65076071922545e-05, + "log_odds_chosen": 3.0443787574768066, + "log_odds_ratio": -0.6435021162033081, + "logits/chosen": -0.6717063188552856, + "logits/rejected": -0.6969423294067383, + "logps/chosen": -0.1275130808353424, + "logps/rejected": -0.6429640054702759, + "loss": 3.7216, + "nll_loss": 0.8660593628883362, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01275130920112133, + "rewards/margins": 0.05154508352279663, + "rewards/rejected": -0.06429639458656311, + "step": 2355 + }, + { + "epoch": 1.6293222683264177, + "grad_norm": 7.198049545288086, + "learning_rate": 4.650376517596435e-05, + "log_odds_chosen": 8.301595687866211, + "log_odds_ratio": -0.0018735526828095317, + "logits/chosen": -0.18375827372074127, + "logits/rejected": -0.2660870850086212, + "logps/chosen": -0.002222995739430189, + "logps/rejected": -0.9437817931175232, + "loss": 2.0797, + "nll_loss": 0.5197421312332153, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00022229959722608328, + "rewards/margins": 0.09415587782859802, + "rewards/rejected": -0.09437818080186844, + "step": 2356 + }, + { + "epoch": 1.6300138312586445, + "grad_norm": 4.328792572021484, + "learning_rate": 4.6499923159674196e-05, + "log_odds_chosen": 4.548550605773926, + "log_odds_ratio": -0.16368317604064941, + "logits/chosen": -0.19425427913665771, + "logits/rejected": -0.26107314229011536, + "logps/chosen": -0.04027582332491875, + "logps/rejected": -0.6658027172088623, + "loss": 2.533, + "nll_loss": 0.6168872714042664, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004027582239359617, + "rewards/margins": 0.06255269795656204, + "rewards/rejected": -0.06658028066158295, + "step": 2357 + }, + { + "epoch": 1.6307053941908713, + "grad_norm": 8.348052024841309, + "learning_rate": 4.649608114338405e-05, + "log_odds_chosen": 4.052003383636475, + "log_odds_ratio": -0.42345088720321655, + "logits/chosen": -0.5440030694007874, + "logits/rejected": -0.5415800213813782, + "logps/chosen": -0.10518966615200043, + "logps/rejected": -0.7477426528930664, + "loss": 3.023, + "nll_loss": 0.7134120464324951, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010518967173993587, + "rewards/margins": 0.06425529718399048, + "rewards/rejected": -0.07477426528930664, + "step": 2358 + }, + { + "epoch": 1.6313969571230982, + "grad_norm": 4.577966213226318, + "learning_rate": 4.64922391270939e-05, + "log_odds_chosen": 4.806175708770752, + "log_odds_ratio": -0.07519068568944931, + "logits/chosen": -0.40390050411224365, + "logits/rejected": -0.44469988346099854, + "logps/chosen": -0.08716096729040146, + "logps/rejected": -0.8027600646018982, + "loss": 2.4672, + "nll_loss": 0.6092922687530518, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008716096170246601, + "rewards/margins": 0.07155991345643997, + "rewards/rejected": -0.0802760124206543, + "step": 2359 + }, + { + "epoch": 1.632088520055325, + "grad_norm": 6.8378705978393555, + "learning_rate": 4.6488397110803746e-05, + "log_odds_chosen": 5.506162643432617, + "log_odds_ratio": -0.03466683626174927, + "logits/chosen": -0.32365161180496216, + "logits/rejected": -0.2994399964809418, + "logps/chosen": -0.01583227887749672, + "logps/rejected": -0.6344969272613525, + "loss": 2.9468, + "nll_loss": 0.7332226037979126, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001583227887749672, + "rewards/margins": 0.06186646223068237, + "rewards/rejected": -0.0634496882557869, + "step": 2360 + }, + { + "epoch": 1.6327800829875518, + "grad_norm": 6.263278007507324, + "learning_rate": 4.6484555094513606e-05, + "log_odds_chosen": 3.8691322803497314, + "log_odds_ratio": -0.23683586716651917, + "logits/chosen": -0.5043981075286865, + "logits/rejected": -0.5182955265045166, + "logps/chosen": -0.07492068409919739, + "logps/rejected": -0.6660391092300415, + "loss": 3.5599, + "nll_loss": 0.8662796020507812, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007492068689316511, + "rewards/margins": 0.05911184102296829, + "rewards/rejected": -0.06660391390323639, + "step": 2361 + }, + { + "epoch": 1.6334716459197787, + "grad_norm": 7.12549352645874, + "learning_rate": 4.648071307822345e-05, + "log_odds_chosen": 2.8235957622528076, + "log_odds_ratio": -0.1785672903060913, + "logits/chosen": -0.43651118874549866, + "logits/rejected": -0.45994648337364197, + "logps/chosen": -0.04840053990483284, + "logps/rejected": -0.43135061860084534, + "loss": 3.0901, + "nll_loss": 0.7546652555465698, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004840054549276829, + "rewards/margins": 0.03829500824213028, + "rewards/rejected": -0.043135061860084534, + "step": 2362 + }, + { + "epoch": 1.6341632088520055, + "grad_norm": 7.425775051116943, + "learning_rate": 4.6476871061933304e-05, + "log_odds_chosen": 4.484004020690918, + "log_odds_ratio": -0.08991587907075882, + "logits/chosen": -0.5775189399719238, + "logits/rejected": -0.6644449234008789, + "logps/chosen": -0.035481616854667664, + "logps/rejected": -0.8728645443916321, + "loss": 3.218, + "nll_loss": 0.7955026030540466, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0035481618251651525, + "rewards/margins": 0.0837383046746254, + "rewards/rejected": -0.08728646486997604, + "step": 2363 + }, + { + "epoch": 1.6348547717842323, + "grad_norm": 9.061689376831055, + "learning_rate": 4.6473029045643156e-05, + "log_odds_chosen": 2.770498752593994, + "log_odds_ratio": -0.5221116542816162, + "logits/chosen": -0.9692656993865967, + "logits/rejected": -0.9988617897033691, + "logps/chosen": -0.13199469447135925, + "logps/rejected": -0.6178169250488281, + "loss": 4.2676, + "nll_loss": 1.0146853923797607, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.013199469074606895, + "rewards/margins": 0.04858222231268883, + "rewards/rejected": -0.06178169324994087, + "step": 2364 + }, + { + "epoch": 1.6355463347164592, + "grad_norm": 8.996692657470703, + "learning_rate": 4.646918702935301e-05, + "log_odds_chosen": 6.133260250091553, + "log_odds_ratio": -0.009358121082186699, + "logits/chosen": -0.6405230760574341, + "logits/rejected": -0.7012723088264465, + "logps/chosen": -0.005448098760098219, + "logps/rejected": -1.0479671955108643, + "loss": 4.8104, + "nll_loss": 1.2016545534133911, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005448098527267575, + "rewards/margins": 0.1042519062757492, + "rewards/rejected": -0.10479672253131866, + "step": 2365 + }, + { + "epoch": 1.636237897648686, + "grad_norm": 9.420392990112305, + "learning_rate": 4.6465345013062854e-05, + "log_odds_chosen": 6.216562271118164, + "log_odds_ratio": -0.010765348561108112, + "logits/chosen": -0.2744053602218628, + "logits/rejected": -0.40222588181495667, + "logps/chosen": -0.009755797684192657, + "logps/rejected": -1.333965539932251, + "loss": 3.2282, + "nll_loss": 0.8059628009796143, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009755798382684588, + "rewards/margins": 0.13242097198963165, + "rewards/rejected": -0.13339656591415405, + "step": 2366 + }, + { + "epoch": 1.6369294605809128, + "grad_norm": 7.960176467895508, + "learning_rate": 4.646150299677271e-05, + "log_odds_chosen": 3.522726535797119, + "log_odds_ratio": -0.3352411389350891, + "logits/chosen": -0.41156286001205444, + "logits/rejected": -0.4562080204486847, + "logps/chosen": -0.09292282164096832, + "logps/rejected": -0.55586838722229, + "loss": 2.4371, + "nll_loss": 0.5757578611373901, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009292282164096832, + "rewards/margins": 0.04629455506801605, + "rewards/rejected": -0.055586837232112885, + "step": 2367 + }, + { + "epoch": 1.6376210235131397, + "grad_norm": 8.390948295593262, + "learning_rate": 4.645766098048256e-05, + "log_odds_chosen": 5.556153774261475, + "log_odds_ratio": -0.1825868785381317, + "logits/chosen": -0.26770249009132385, + "logits/rejected": -0.32716527581214905, + "logps/chosen": -0.024412261322140694, + "logps/rejected": -0.647440493106842, + "loss": 2.921, + "nll_loss": 0.7120010256767273, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002441226039081812, + "rewards/margins": 0.06230282783508301, + "rewards/rejected": -0.06474405527114868, + "step": 2368 + }, + { + "epoch": 1.6383125864453665, + "grad_norm": 4.715832710266113, + "learning_rate": 4.6453818964192405e-05, + "log_odds_chosen": 4.0443806648254395, + "log_odds_ratio": -0.20312117040157318, + "logits/chosen": -0.45583438873291016, + "logits/rejected": -0.4988771677017212, + "logps/chosen": -0.06384460628032684, + "logps/rejected": -0.4571217894554138, + "loss": 2.9992, + "nll_loss": 0.7294943332672119, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0063844602555036545, + "rewards/margins": 0.039327722042798996, + "rewards/rejected": -0.0457121841609478, + "step": 2369 + }, + { + "epoch": 1.6390041493775933, + "grad_norm": 13.881675720214844, + "learning_rate": 4.6449976947902264e-05, + "log_odds_chosen": 2.983891725540161, + "log_odds_ratio": -0.3598352372646332, + "logits/chosen": -0.08851074427366257, + "logits/rejected": -0.08484543859958649, + "logps/chosen": -0.11364905536174774, + "logps/rejected": -0.7294566631317139, + "loss": 3.5079, + "nll_loss": 0.8409815430641174, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011364906094968319, + "rewards/margins": 0.06158076599240303, + "rewards/rejected": -0.07294566929340363, + "step": 2370 + }, + { + "epoch": 1.6396957123098201, + "grad_norm": 8.289203643798828, + "learning_rate": 4.644613493161211e-05, + "log_odds_chosen": 4.033492088317871, + "log_odds_ratio": -0.14057360589504242, + "logits/chosen": -0.9791640043258667, + "logits/rejected": -0.9675908088684082, + "logps/chosen": -0.03342659771442413, + "logps/rejected": -0.8204505443572998, + "loss": 4.3131, + "nll_loss": 1.0642057657241821, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003342659678310156, + "rewards/margins": 0.0787023976445198, + "rewards/rejected": -0.0820450633764267, + "step": 2371 + }, + { + "epoch": 1.640387275242047, + "grad_norm": 8.510683059692383, + "learning_rate": 4.644229291532196e-05, + "log_odds_chosen": 2.96242094039917, + "log_odds_ratio": -0.2382379025220871, + "logits/chosen": -0.44780367612838745, + "logits/rejected": -0.5181176066398621, + "logps/chosen": -0.09145855158567429, + "logps/rejected": -0.7005022764205933, + "loss": 3.3642, + "nll_loss": 0.8172252178192139, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009145855903625488, + "rewards/margins": 0.060904376208782196, + "rewards/rejected": -0.07005023211240768, + "step": 2372 + }, + { + "epoch": 1.6410788381742738, + "grad_norm": 9.975122451782227, + "learning_rate": 4.6438450899031815e-05, + "log_odds_chosen": 2.5919246673583984, + "log_odds_ratio": -0.26950156688690186, + "logits/chosen": -0.5262844562530518, + "logits/rejected": -0.5486704707145691, + "logps/chosen": -0.11428187787532806, + "logps/rejected": -0.9702275991439819, + "loss": 2.8376, + "nll_loss": 0.6824502944946289, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011428188532590866, + "rewards/margins": 0.08559457957744598, + "rewards/rejected": -0.09702275693416595, + "step": 2373 + }, + { + "epoch": 1.6417704011065006, + "grad_norm": 12.058677673339844, + "learning_rate": 4.643460888274167e-05, + "log_odds_chosen": 3.0631299018859863, + "log_odds_ratio": -0.7720636129379272, + "logits/chosen": -0.3485577702522278, + "logits/rejected": -0.4091474413871765, + "logps/chosen": -0.19718411564826965, + "logps/rejected": -0.6004868149757385, + "loss": 4.0899, + "nll_loss": 0.9452710151672363, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.019718410447239876, + "rewards/margins": 0.040330275893211365, + "rewards/rejected": -0.06004868820309639, + "step": 2374 + }, + { + "epoch": 1.6424619640387275, + "grad_norm": 8.720122337341309, + "learning_rate": 4.643076686645151e-05, + "log_odds_chosen": 3.9990530014038086, + "log_odds_ratio": -0.13114123046398163, + "logits/chosen": -0.5957059860229492, + "logits/rejected": -0.5671588778495789, + "logps/chosen": -0.06041772663593292, + "logps/rejected": -0.6443151831626892, + "loss": 3.8435, + "nll_loss": 0.9477718472480774, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006041772663593292, + "rewards/margins": 0.05838974937796593, + "rewards/rejected": -0.06443151831626892, + "step": 2375 + }, + { + "epoch": 1.6431535269709543, + "grad_norm": 6.34602689743042, + "learning_rate": 4.6426924850161365e-05, + "log_odds_chosen": 4.483710765838623, + "log_odds_ratio": -0.13568690419197083, + "logits/chosen": -0.19696210324764252, + "logits/rejected": -0.2683354914188385, + "logps/chosen": -0.06465273350477219, + "logps/rejected": -0.5963294506072998, + "loss": 2.5422, + "nll_loss": 0.6219762563705444, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006465273909270763, + "rewards/margins": 0.053167674690485, + "rewards/rejected": -0.05963294953107834, + "step": 2376 + }, + { + "epoch": 1.6438450899031811, + "grad_norm": 10.477910041809082, + "learning_rate": 4.642308283387122e-05, + "log_odds_chosen": 4.577691555023193, + "log_odds_ratio": -0.2504514455795288, + "logits/chosen": -0.5443359613418579, + "logits/rejected": -0.6230031251907349, + "logps/chosen": -0.03581617400050163, + "logps/rejected": -0.7296348810195923, + "loss": 3.2231, + "nll_loss": 0.7807391285896301, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003581617260351777, + "rewards/margins": 0.06938187777996063, + "rewards/rejected": -0.07296349108219147, + "step": 2377 + }, + { + "epoch": 1.644536652835408, + "grad_norm": 9.255958557128906, + "learning_rate": 4.641924081758107e-05, + "log_odds_chosen": 5.29841423034668, + "log_odds_ratio": -0.05628347396850586, + "logits/chosen": -0.2169831395149231, + "logits/rejected": -0.35057875514030457, + "logps/chosen": -0.025036348029971123, + "logps/rejected": -0.8059083819389343, + "loss": 4.3766, + "nll_loss": 1.0885179042816162, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0025036348961293697, + "rewards/margins": 0.0780872032046318, + "rewards/rejected": -0.08059084415435791, + "step": 2378 + }, + { + "epoch": 1.6452282157676348, + "grad_norm": 6.931178569793701, + "learning_rate": 4.641539880129092e-05, + "log_odds_chosen": 3.6957497596740723, + "log_odds_ratio": -0.12863591313362122, + "logits/chosen": -0.245091512799263, + "logits/rejected": -0.3163626790046692, + "logps/chosen": -0.07319440692663193, + "logps/rejected": -0.725760817527771, + "loss": 3.671, + "nll_loss": 0.9048863053321838, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007319441065192223, + "rewards/margins": 0.06525664031505585, + "rewards/rejected": -0.07257607579231262, + "step": 2379 + }, + { + "epoch": 1.6459197786998616, + "grad_norm": 5.41664981842041, + "learning_rate": 4.641155678500077e-05, + "log_odds_chosen": 4.072056770324707, + "log_odds_ratio": -0.12067017704248428, + "logits/chosen": -0.7107937335968018, + "logits/rejected": -0.6876804828643799, + "logps/chosen": -0.05523668974637985, + "logps/rejected": -0.7824287414550781, + "loss": 2.8099, + "nll_loss": 0.6904194951057434, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005523669067770243, + "rewards/margins": 0.07271920889616013, + "rewards/rejected": -0.07824286818504333, + "step": 2380 + }, + { + "epoch": 1.6466113416320884, + "grad_norm": 7.0871052742004395, + "learning_rate": 4.640771476871062e-05, + "log_odds_chosen": 1.587499737739563, + "log_odds_ratio": -0.522819995880127, + "logits/chosen": -0.5985342860221863, + "logits/rejected": -0.6404790282249451, + "logps/chosen": -0.13571304082870483, + "logps/rejected": -0.3021208941936493, + "loss": 5.1378, + "nll_loss": 1.232169270515442, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.013571303337812424, + "rewards/margins": 0.016640786081552505, + "rewards/rejected": -0.03021209128201008, + "step": 2381 + }, + { + "epoch": 1.6473029045643153, + "grad_norm": 15.613216400146484, + "learning_rate": 4.640387275242047e-05, + "log_odds_chosen": 4.4091796875, + "log_odds_ratio": -0.2573304772377014, + "logits/chosen": -0.7599989175796509, + "logits/rejected": -0.7812910079956055, + "logps/chosen": -0.07085003703832626, + "logps/rejected": -0.9684122204780579, + "loss": 3.8714, + "nll_loss": 0.9421118497848511, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007085003890097141, + "rewards/margins": 0.08975622057914734, + "rewards/rejected": -0.0968412309885025, + "step": 2382 + }, + { + "epoch": 1.647994467496542, + "grad_norm": 7.1068434715271, + "learning_rate": 4.6400030736130325e-05, + "log_odds_chosen": 4.6426897048950195, + "log_odds_ratio": -0.13260595500469208, + "logits/chosen": -0.4048321843147278, + "logits/rejected": -0.4985610842704773, + "logps/chosen": -0.021196218207478523, + "logps/rejected": -0.729151725769043, + "loss": 3.4597, + "nll_loss": 0.8516725301742554, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002119621727615595, + "rewards/margins": 0.07079555094242096, + "rewards/rejected": -0.07291518151760101, + "step": 2383 + }, + { + "epoch": 1.648686030428769, + "grad_norm": 7.182337760925293, + "learning_rate": 4.639618871984017e-05, + "log_odds_chosen": 5.572072982788086, + "log_odds_ratio": -0.028026271611452103, + "logits/chosen": -0.43479007482528687, + "logits/rejected": -0.5537311434745789, + "logps/chosen": -0.03721091151237488, + "logps/rejected": -1.0600721836090088, + "loss": 2.9149, + "nll_loss": 0.7259154319763184, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00372109143063426, + "rewards/margins": 0.10228613018989563, + "rewards/rejected": -0.10600721836090088, + "step": 2384 + }, + { + "epoch": 1.6493775933609958, + "grad_norm": 7.0679545402526855, + "learning_rate": 4.639234670355003e-05, + "log_odds_chosen": 5.097892761230469, + "log_odds_ratio": -0.05553697049617767, + "logits/chosen": -0.6244937777519226, + "logits/rejected": -0.6847025156021118, + "logps/chosen": -0.03268459439277649, + "logps/rejected": -1.051744818687439, + "loss": 3.2715, + "nll_loss": 0.812329113483429, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003268459578976035, + "rewards/margins": 0.10190602391958237, + "rewards/rejected": -0.1051744893193245, + "step": 2385 + }, + { + "epoch": 1.6500691562932226, + "grad_norm": 5.613284111022949, + "learning_rate": 4.6388504687259876e-05, + "log_odds_chosen": 4.971386909484863, + "log_odds_ratio": -0.11330129951238632, + "logits/chosen": -0.5442340970039368, + "logits/rejected": -0.5847311615943909, + "logps/chosen": -0.04058442637324333, + "logps/rejected": -0.7056328654289246, + "loss": 2.706, + "nll_loss": 0.665163516998291, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004058443009853363, + "rewards/margins": 0.06650485098361969, + "rewards/rejected": -0.07056329399347305, + "step": 2386 + }, + { + "epoch": 1.6507607192254494, + "grad_norm": 5.791971683502197, + "learning_rate": 4.638466267096973e-05, + "log_odds_chosen": 5.391927242279053, + "log_odds_ratio": -0.04727129638195038, + "logits/chosen": -0.3271331191062927, + "logits/rejected": -0.35083311796188354, + "logps/chosen": -0.04603290557861328, + "logps/rejected": -1.006185531616211, + "loss": 2.1011, + "nll_loss": 0.5205403566360474, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004603290930390358, + "rewards/margins": 0.09601525217294693, + "rewards/rejected": -0.10061854869127274, + "step": 2387 + }, + { + "epoch": 1.6514522821576763, + "grad_norm": 4.411108493804932, + "learning_rate": 4.638082065467958e-05, + "log_odds_chosen": 6.642820358276367, + "log_odds_ratio": -0.024102747440338135, + "logits/chosen": -0.26315993070602417, + "logits/rejected": -0.2784760892391205, + "logps/chosen": -0.014562083408236504, + "logps/rejected": -0.8451967239379883, + "loss": 2.0728, + "nll_loss": 0.5157792568206787, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014562084106728435, + "rewards/margins": 0.08306346833705902, + "rewards/rejected": -0.08451966941356659, + "step": 2388 + }, + { + "epoch": 1.652143845089903, + "grad_norm": 9.01975154876709, + "learning_rate": 4.6376978638389427e-05, + "log_odds_chosen": 3.984631061553955, + "log_odds_ratio": -0.06261247396469116, + "logits/chosen": -0.7324444055557251, + "logits/rejected": -0.7960898280143738, + "logps/chosen": -0.013113527558743954, + "logps/rejected": -0.5568873286247253, + "loss": 4.219, + "nll_loss": 1.0484963655471802, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013113527093082666, + "rewards/margins": 0.05437737703323364, + "rewards/rejected": -0.055688731372356415, + "step": 2389 + }, + { + "epoch": 1.65283540802213, + "grad_norm": 11.267913818359375, + "learning_rate": 4.637313662209928e-05, + "log_odds_chosen": 1.683915376663208, + "log_odds_ratio": -0.8660053014755249, + "logits/chosen": -0.41948461532592773, + "logits/rejected": -0.4554569125175476, + "logps/chosen": -0.1385391354560852, + "logps/rejected": -0.43766355514526367, + "loss": 3.0138, + "nll_loss": 0.6668479442596436, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01385391503572464, + "rewards/margins": 0.029912445694208145, + "rewards/rejected": -0.04376635700464249, + "step": 2390 + }, + { + "epoch": 1.6535269709543567, + "grad_norm": 7.12009334564209, + "learning_rate": 4.636929460580913e-05, + "log_odds_chosen": 5.1805267333984375, + "log_odds_ratio": -0.04070788249373436, + "logits/chosen": -0.5637659430503845, + "logits/rejected": -0.6139363050460815, + "logps/chosen": -0.020192958414554596, + "logps/rejected": -0.8956127166748047, + "loss": 2.5298, + "nll_loss": 0.6283823251724243, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020192954689264297, + "rewards/margins": 0.08754197508096695, + "rewards/rejected": -0.08956127613782883, + "step": 2391 + }, + { + "epoch": 1.6542185338865836, + "grad_norm": 9.247042655944824, + "learning_rate": 4.6365452589518984e-05, + "log_odds_chosen": 5.03125, + "log_odds_ratio": -0.3684726357460022, + "logits/chosen": -0.8912017345428467, + "logits/rejected": -0.8966810703277588, + "logps/chosen": -0.04350098595023155, + "logps/rejected": -0.5883774757385254, + "loss": 2.8245, + "nll_loss": 0.6692676544189453, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00435009878128767, + "rewards/margins": 0.05448765307664871, + "rewards/rejected": -0.05883774906396866, + "step": 2392 + }, + { + "epoch": 1.6549100968188104, + "grad_norm": 9.383896827697754, + "learning_rate": 4.636161057322883e-05, + "log_odds_chosen": 1.8873748779296875, + "log_odds_ratio": -0.37359458208084106, + "logits/chosen": -0.7230343222618103, + "logits/rejected": -0.683919370174408, + "logps/chosen": -0.09096837043762207, + "logps/rejected": -0.39955103397369385, + "loss": 3.809, + "nll_loss": 0.9149001240730286, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009096836671233177, + "rewards/margins": 0.030858265236020088, + "rewards/rejected": -0.039955101907253265, + "step": 2393 + }, + { + "epoch": 1.6556016597510372, + "grad_norm": 7.632327079772949, + "learning_rate": 4.635776855693869e-05, + "log_odds_chosen": 5.199143409729004, + "log_odds_ratio": -0.06454131752252579, + "logits/chosen": -0.39764100313186646, + "logits/rejected": -0.48015543818473816, + "logps/chosen": -0.02446744777262211, + "logps/rejected": -0.7504343390464783, + "loss": 3.2063, + "nll_loss": 0.7951316833496094, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024467448238283396, + "rewards/margins": 0.07259668409824371, + "rewards/rejected": -0.07504343241453171, + "step": 2394 + }, + { + "epoch": 1.656293222683264, + "grad_norm": 8.815381050109863, + "learning_rate": 4.6353926540648534e-05, + "log_odds_chosen": 1.673638939857483, + "log_odds_ratio": -0.5779725313186646, + "logits/chosen": -0.6094763875007629, + "logits/rejected": -0.6504048109054565, + "logps/chosen": -0.16714927554130554, + "logps/rejected": -0.5325227975845337, + "loss": 3.5577, + "nll_loss": 0.8316306471824646, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.016714926809072495, + "rewards/margins": 0.036537349224090576, + "rewards/rejected": -0.05325227975845337, + "step": 2395 + }, + { + "epoch": 1.656984785615491, + "grad_norm": 9.138748168945312, + "learning_rate": 4.635008452435839e-05, + "log_odds_chosen": 4.83054256439209, + "log_odds_ratio": -0.1090451255440712, + "logits/chosen": -0.5976523160934448, + "logits/rejected": -0.6270238161087036, + "logps/chosen": -0.04145745560526848, + "logps/rejected": -0.7733452320098877, + "loss": 3.669, + "nll_loss": 0.9063493013381958, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004145745653659105, + "rewards/margins": 0.07318878173828125, + "rewards/rejected": -0.07733452320098877, + "step": 2396 + }, + { + "epoch": 1.6576763485477177, + "grad_norm": 5.9371113777160645, + "learning_rate": 4.634624250806824e-05, + "log_odds_chosen": 6.901837348937988, + "log_odds_ratio": -0.0913933515548706, + "logits/chosen": -0.42724931240081787, + "logits/rejected": -0.4567227065563202, + "logps/chosen": -0.033262595534324646, + "logps/rejected": -1.2060177326202393, + "loss": 2.511, + "nll_loss": 0.6186057925224304, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0033262595534324646, + "rewards/margins": 0.11727550625801086, + "rewards/rejected": -0.12060176581144333, + "step": 2397 + }, + { + "epoch": 1.6583679114799446, + "grad_norm": 7.880588054656982, + "learning_rate": 4.6342400491778085e-05, + "log_odds_chosen": 5.725263595581055, + "log_odds_ratio": -0.059090036898851395, + "logits/chosen": -0.5653278231620789, + "logits/rejected": -0.6397727727890015, + "logps/chosen": -0.027447620406746864, + "logps/rejected": -1.0369272232055664, + "loss": 2.8997, + "nll_loss": 0.719023585319519, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0027447622269392014, + "rewards/margins": 0.10094796866178513, + "rewards/rejected": -0.10369272530078888, + "step": 2398 + }, + { + "epoch": 1.6590594744121714, + "grad_norm": 8.58713150024414, + "learning_rate": 4.633855847548794e-05, + "log_odds_chosen": 2.910616874694824, + "log_odds_ratio": -0.32300323247909546, + "logits/chosen": -0.6054437160491943, + "logits/rejected": -0.6225858330726624, + "logps/chosen": -0.09555420279502869, + "logps/rejected": -0.6819297075271606, + "loss": 4.6391, + "nll_loss": 1.127477765083313, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009555420838296413, + "rewards/margins": 0.058637551963329315, + "rewards/rejected": -0.0681929737329483, + "step": 2399 + }, + { + "epoch": 1.6597510373443982, + "grad_norm": 8.066679954528809, + "learning_rate": 4.633471645919779e-05, + "log_odds_chosen": 3.3613734245300293, + "log_odds_ratio": -0.4150410592556, + "logits/chosen": -0.6153652667999268, + "logits/rejected": -0.6782889366149902, + "logps/chosen": -0.10744252055883408, + "logps/rejected": -0.7856386303901672, + "loss": 2.9875, + "nll_loss": 0.7053636312484741, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010744252242147923, + "rewards/margins": 0.06781961023807526, + "rewards/rejected": -0.0785638615489006, + "step": 2400 + }, + { + "epoch": 1.660442600276625, + "grad_norm": 6.530764102935791, + "learning_rate": 4.633087444290764e-05, + "log_odds_chosen": 3.1723811626434326, + "log_odds_ratio": -0.26908573508262634, + "logits/chosen": -0.5162174701690674, + "logits/rejected": -0.5230663418769836, + "logps/chosen": -0.07934844493865967, + "logps/rejected": -0.7269495725631714, + "loss": 3.0732, + "nll_loss": 0.741379976272583, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007934845052659512, + "rewards/margins": 0.06476011872291565, + "rewards/rejected": -0.07269495725631714, + "step": 2401 + }, + { + "epoch": 1.6611341632088519, + "grad_norm": 9.620217323303223, + "learning_rate": 4.632703242661749e-05, + "log_odds_chosen": 1.2391670942306519, + "log_odds_ratio": -0.5796663165092468, + "logits/chosen": -0.7345383167266846, + "logits/rejected": -0.8077020049095154, + "logps/chosen": -0.23129664361476898, + "logps/rejected": -0.3796681761741638, + "loss": 4.2852, + "nll_loss": 1.013323426246643, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.023129666224122047, + "rewards/margins": 0.014837154187262058, + "rewards/rejected": -0.03796681761741638, + "step": 2402 + }, + { + "epoch": 1.6618257261410787, + "grad_norm": 7.168506145477295, + "learning_rate": 4.632319041032735e-05, + "log_odds_chosen": 4.937325477600098, + "log_odds_ratio": -0.08248946070671082, + "logits/chosen": -0.7550560235977173, + "logits/rejected": -0.7097613215446472, + "logps/chosen": -0.02854396589100361, + "logps/rejected": -0.46548527479171753, + "loss": 3.3639, + "nll_loss": 0.8327199220657349, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002854396589100361, + "rewards/margins": 0.04369413107633591, + "rewards/rejected": -0.04654853045940399, + "step": 2403 + }, + { + "epoch": 1.6625172890733055, + "grad_norm": 5.750540733337402, + "learning_rate": 4.631934839403719e-05, + "log_odds_chosen": 3.0646469593048096, + "log_odds_ratio": -0.09252282977104187, + "logits/chosen": -0.2593063712120056, + "logits/rejected": -0.2871415317058563, + "logps/chosen": -0.0420021153986454, + "logps/rejected": -0.5545371174812317, + "loss": 2.751, + "nll_loss": 0.6785101890563965, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004200211726129055, + "rewards/margins": 0.05125350132584572, + "rewards/rejected": -0.055453717708587646, + "step": 2404 + }, + { + "epoch": 1.6632088520055324, + "grad_norm": 7.526987552642822, + "learning_rate": 4.6315506377747045e-05, + "log_odds_chosen": 5.977910995483398, + "log_odds_ratio": -0.01979386806488037, + "logits/chosen": -0.4784207344055176, + "logits/rejected": -0.6178550720214844, + "logps/chosen": -0.0163608156144619, + "logps/rejected": -1.1045422554016113, + "loss": 2.767, + "nll_loss": 0.6897709369659424, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016360816080123186, + "rewards/margins": 0.1088181585073471, + "rewards/rejected": -0.11045423895120621, + "step": 2405 + }, + { + "epoch": 1.6639004149377592, + "grad_norm": 9.162591934204102, + "learning_rate": 4.63116643614569e-05, + "log_odds_chosen": 7.101935386657715, + "log_odds_ratio": -0.01733310893177986, + "logits/chosen": -0.4071503281593323, + "logits/rejected": -0.44532567262649536, + "logps/chosen": -0.013491926714777946, + "logps/rejected": -1.055969476699829, + "loss": 3.5565, + "nll_loss": 0.887387752532959, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013491928111761808, + "rewards/margins": 0.10424774885177612, + "rewards/rejected": -0.10559694468975067, + "step": 2406 + }, + { + "epoch": 1.664591977869986, + "grad_norm": 4.412231922149658, + "learning_rate": 4.6307822345166743e-05, + "log_odds_chosen": 2.818786859512329, + "log_odds_ratio": -0.29969677329063416, + "logits/chosen": -0.3059843182563782, + "logits/rejected": -0.3522808849811554, + "logps/chosen": -0.08872860670089722, + "logps/rejected": -0.8007669448852539, + "loss": 2.4794, + "nll_loss": 0.5898890495300293, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008872861042618752, + "rewards/margins": 0.07120384275913239, + "rewards/rejected": -0.08007669448852539, + "step": 2407 + }, + { + "epoch": 1.6652835408022129, + "grad_norm": 8.510326385498047, + "learning_rate": 4.6303980328876596e-05, + "log_odds_chosen": 2.5440433025360107, + "log_odds_ratio": -0.28360381722450256, + "logits/chosen": -0.4862896800041199, + "logits/rejected": -0.501765251159668, + "logps/chosen": -0.10348738729953766, + "logps/rejected": -0.6458582878112793, + "loss": 4.2624, + "nll_loss": 1.0372350215911865, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010348739102482796, + "rewards/margins": 0.054237090051174164, + "rewards/rejected": -0.06458583474159241, + "step": 2408 + }, + { + "epoch": 1.6659751037344397, + "grad_norm": 17.897363662719727, + "learning_rate": 4.630013831258645e-05, + "log_odds_chosen": 0.06756290793418884, + "log_odds_ratio": -0.9511553049087524, + "logits/chosen": -0.23593562841415405, + "logits/rejected": -0.2747032940387726, + "logps/chosen": -0.26998746395111084, + "logps/rejected": -0.21665412187576294, + "loss": 2.909, + "nll_loss": 0.6321403980255127, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.026998747140169144, + "rewards/margins": -0.005333336070179939, + "rewards/rejected": -0.021665412932634354, + "step": 2409 + }, + { + "epoch": 1.6666666666666665, + "grad_norm": 10.7543306350708, + "learning_rate": 4.62962962962963e-05, + "log_odds_chosen": 5.174543380737305, + "log_odds_ratio": -0.14928941428661346, + "logits/chosen": -0.45020610094070435, + "logits/rejected": -0.44557055830955505, + "logps/chosen": -0.12285329401493073, + "logps/rejected": -1.2492525577545166, + "loss": 2.964, + "nll_loss": 0.7260616421699524, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.012285329401493073, + "rewards/margins": 0.11263993382453918, + "rewards/rejected": -0.12492527067661285, + "step": 2410 + }, + { + "epoch": 1.6673582295988933, + "grad_norm": 6.037133693695068, + "learning_rate": 4.6292454280006146e-05, + "log_odds_chosen": 3.9803225994110107, + "log_odds_ratio": -0.06571348756551743, + "logits/chosen": -0.21116182208061218, + "logits/rejected": -0.24481798708438873, + "logps/chosen": -0.037533070892095566, + "logps/rejected": -0.748488187789917, + "loss": 2.9994, + "nll_loss": 0.7432898879051208, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0037533072754740715, + "rewards/margins": 0.07109551131725311, + "rewards/rejected": -0.07484881579875946, + "step": 2411 + }, + { + "epoch": 1.6680497925311202, + "grad_norm": 6.9043498039245605, + "learning_rate": 4.6288612263716006e-05, + "log_odds_chosen": 6.687690734863281, + "log_odds_ratio": -0.03177565708756447, + "logits/chosen": -0.5768988728523254, + "logits/rejected": -0.6625084280967712, + "logps/chosen": -0.010495376773178577, + "logps/rejected": -1.171600341796875, + "loss": 2.7179, + "nll_loss": 0.6762967705726624, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010495376773178577, + "rewards/margins": 0.11611049622297287, + "rewards/rejected": -0.11716003715991974, + "step": 2412 + }, + { + "epoch": 1.668741355463347, + "grad_norm": 7.08948278427124, + "learning_rate": 4.628477024742585e-05, + "log_odds_chosen": 3.832082748413086, + "log_odds_ratio": -0.317456990480423, + "logits/chosen": -0.5864173769950867, + "logits/rejected": -0.5810104012489319, + "logps/chosen": -0.08993137627840042, + "logps/rejected": -0.7754663825035095, + "loss": 2.4216, + "nll_loss": 0.5736429691314697, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008993137627840042, + "rewards/margins": 0.06855350732803345, + "rewards/rejected": -0.07754664123058319, + "step": 2413 + }, + { + "epoch": 1.6694329183955738, + "grad_norm": 6.609368801116943, + "learning_rate": 4.6280928231135704e-05, + "log_odds_chosen": 4.299071311950684, + "log_odds_ratio": -0.1827426552772522, + "logits/chosen": -0.7438517212867737, + "logits/rejected": -0.7562617063522339, + "logps/chosen": -0.08332640677690506, + "logps/rejected": -0.8302958607673645, + "loss": 3.964, + "nll_loss": 0.9727151393890381, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008332639932632446, + "rewards/margins": 0.07469694316387177, + "rewards/rejected": -0.08302959054708481, + "step": 2414 + }, + { + "epoch": 1.6701244813278007, + "grad_norm": 8.660046577453613, + "learning_rate": 4.6277086214845556e-05, + "log_odds_chosen": 2.9852561950683594, + "log_odds_ratio": -0.3320246934890747, + "logits/chosen": -0.5786329507827759, + "logits/rejected": -0.629688560962677, + "logps/chosen": -0.11012428253889084, + "logps/rejected": -0.7393942475318909, + "loss": 3.0382, + "nll_loss": 0.7263439297676086, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011012429371476173, + "rewards/margins": 0.0629269927740097, + "rewards/rejected": -0.07393942773342133, + "step": 2415 + }, + { + "epoch": 1.6708160442600275, + "grad_norm": 6.166026592254639, + "learning_rate": 4.62732441985554e-05, + "log_odds_chosen": 5.47209358215332, + "log_odds_ratio": -0.047553643584251404, + "logits/chosen": -0.7303832173347473, + "logits/rejected": -0.7512546181678772, + "logps/chosen": -0.052451252937316895, + "logps/rejected": -1.2305645942687988, + "loss": 3.5086, + "nll_loss": 0.8724021911621094, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005245125386863947, + "rewards/margins": 0.11781134456396103, + "rewards/rejected": -0.12305645644664764, + "step": 2416 + }, + { + "epoch": 1.6715076071922543, + "grad_norm": 13.398856163024902, + "learning_rate": 4.6269402182265254e-05, + "log_odds_chosen": 3.6622700691223145, + "log_odds_ratio": -0.12322408705949783, + "logits/chosen": -0.7097039222717285, + "logits/rejected": -0.7724823355674744, + "logps/chosen": -0.058933380991220474, + "logps/rejected": -0.8197494149208069, + "loss": 4.7999, + "nll_loss": 1.1876574754714966, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00589333800598979, + "rewards/margins": 0.07608160376548767, + "rewards/rejected": -0.08197493851184845, + "step": 2417 + }, + { + "epoch": 1.6721991701244814, + "grad_norm": 7.694314956665039, + "learning_rate": 4.626556016597511e-05, + "log_odds_chosen": 3.396709442138672, + "log_odds_ratio": -0.126684308052063, + "logits/chosen": -0.5527836680412292, + "logits/rejected": -0.635554313659668, + "logps/chosen": -0.041724156588315964, + "logps/rejected": -0.5722861289978027, + "loss": 2.9894, + "nll_loss": 0.7346909046173096, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004172415938228369, + "rewards/margins": 0.05305619537830353, + "rewards/rejected": -0.057228609919548035, + "step": 2418 + }, + { + "epoch": 1.6728907330567082, + "grad_norm": 8.397724151611328, + "learning_rate": 4.626171814968496e-05, + "log_odds_chosen": 5.756424903869629, + "log_odds_ratio": -0.25328224897384644, + "logits/chosen": -0.27321773767471313, + "logits/rejected": -0.2953091263771057, + "logps/chosen": -0.013859656639397144, + "logps/rejected": -1.0872235298156738, + "loss": 2.2151, + "nll_loss": 0.5284467339515686, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0013859656173735857, + "rewards/margins": 0.1073363870382309, + "rewards/rejected": -0.10872235894203186, + "step": 2419 + }, + { + "epoch": 1.673582295988935, + "grad_norm": 7.1727681159973145, + "learning_rate": 4.6257876133394805e-05, + "log_odds_chosen": 4.892050266265869, + "log_odds_ratio": -0.08339997380971909, + "logits/chosen": -0.4896528124809265, + "logits/rejected": -0.5178585052490234, + "logps/chosen": -0.03309309482574463, + "logps/rejected": -0.8350263833999634, + "loss": 4.1185, + "nll_loss": 1.0212879180908203, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003309309482574463, + "rewards/margins": 0.08019333332777023, + "rewards/rejected": -0.0835026428103447, + "step": 2420 + }, + { + "epoch": 1.6742738589211619, + "grad_norm": 6.350358009338379, + "learning_rate": 4.6254034117104664e-05, + "log_odds_chosen": 4.816648483276367, + "log_odds_ratio": -0.04416489228606224, + "logits/chosen": -0.6494845151901245, + "logits/rejected": -0.7247365117073059, + "logps/chosen": -0.03898897394537926, + "logps/rejected": -0.8093506097793579, + "loss": 2.7141, + "nll_loss": 0.6741119027137756, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003898897208273411, + "rewards/margins": 0.07703615725040436, + "rewards/rejected": -0.08093506097793579, + "step": 2421 + }, + { + "epoch": 1.6749654218533887, + "grad_norm": 6.809445858001709, + "learning_rate": 4.625019210081451e-05, + "log_odds_chosen": 6.252318382263184, + "log_odds_ratio": -0.02218172326683998, + "logits/chosen": -0.20815208554267883, + "logits/rejected": -0.22089838981628418, + "logps/chosen": -0.03607087954878807, + "logps/rejected": -0.9978293180465698, + "loss": 3.4035, + "nll_loss": 0.8486486077308655, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0036070882342755795, + "rewards/margins": 0.09617584943771362, + "rewards/rejected": -0.09978292882442474, + "step": 2422 + }, + { + "epoch": 1.6756569847856155, + "grad_norm": 8.937105178833008, + "learning_rate": 4.624635008452436e-05, + "log_odds_chosen": 4.806909561157227, + "log_odds_ratio": -0.358302503824234, + "logits/chosen": -0.48498135805130005, + "logits/rejected": -0.5562124252319336, + "logps/chosen": -0.15368816256523132, + "logps/rejected": -1.0874075889587402, + "loss": 2.9095, + "nll_loss": 0.6915530562400818, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.015368817374110222, + "rewards/margins": 0.09337194263935089, + "rewards/rejected": -0.10874076187610626, + "step": 2423 + }, + { + "epoch": 1.6763485477178424, + "grad_norm": 11.30183219909668, + "learning_rate": 4.6242508068234215e-05, + "log_odds_chosen": 5.33615779876709, + "log_odds_ratio": -0.16340480744838715, + "logits/chosen": -0.8020769357681274, + "logits/rejected": -0.8382692337036133, + "logps/chosen": -0.08057919889688492, + "logps/rejected": -0.9394962191581726, + "loss": 3.2915, + "nll_loss": 0.8065407276153564, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008057920262217522, + "rewards/margins": 0.0858917087316513, + "rewards/rejected": -0.09394963085651398, + "step": 2424 + }, + { + "epoch": 1.6770401106500692, + "grad_norm": 7.548745632171631, + "learning_rate": 4.623866605194406e-05, + "log_odds_chosen": 5.641702651977539, + "log_odds_ratio": -0.1668786108493805, + "logits/chosen": -0.6750974059104919, + "logits/rejected": -0.6827152967453003, + "logps/chosen": -0.057045768946409225, + "logps/rejected": -1.1087830066680908, + "loss": 3.5121, + "nll_loss": 0.8613306283950806, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0057045770809054375, + "rewards/margins": 0.1051737368106842, + "rewards/rejected": -0.11087830364704132, + "step": 2425 + }, + { + "epoch": 1.677731673582296, + "grad_norm": 10.312768936157227, + "learning_rate": 4.623482403565391e-05, + "log_odds_chosen": 5.615530967712402, + "log_odds_ratio": -0.10865119099617004, + "logits/chosen": -0.7960000038146973, + "logits/rejected": -0.8431391716003418, + "logps/chosen": -0.042798854410648346, + "logps/rejected": -0.9962965846061707, + "loss": 3.816, + "nll_loss": 0.9431357979774475, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004279885906726122, + "rewards/margins": 0.09534977376461029, + "rewards/rejected": -0.09962965548038483, + "step": 2426 + }, + { + "epoch": 1.6784232365145229, + "grad_norm": 5.688529014587402, + "learning_rate": 4.6230982019363765e-05, + "log_odds_chosen": 6.757950782775879, + "log_odds_ratio": -0.027478059753775597, + "logits/chosen": -0.5317009687423706, + "logits/rejected": -0.5833687782287598, + "logps/chosen": -0.013652315363287926, + "logps/rejected": -1.1175298690795898, + "loss": 1.7655, + "nll_loss": 0.4386347234249115, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013652315828949213, + "rewards/margins": 0.11038775742053986, + "rewards/rejected": -0.11175297945737839, + "step": 2427 + }, + { + "epoch": 1.6791147994467497, + "grad_norm": 7.741587162017822, + "learning_rate": 4.622714000307362e-05, + "log_odds_chosen": 3.574104070663452, + "log_odds_ratio": -0.1316196471452713, + "logits/chosen": -0.7742865085601807, + "logits/rejected": -0.7557849287986755, + "logps/chosen": -0.03781423345208168, + "logps/rejected": -0.6451917290687561, + "loss": 3.8846, + "nll_loss": 0.9579886198043823, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0037814234383404255, + "rewards/margins": 0.06073775142431259, + "rewards/rejected": -0.06451917439699173, + "step": 2428 + }, + { + "epoch": 1.6798063623789765, + "grad_norm": 18.183177947998047, + "learning_rate": 4.622329798678346e-05, + "log_odds_chosen": 3.759333610534668, + "log_odds_ratio": -0.7283949255943298, + "logits/chosen": -0.6545952558517456, + "logits/rejected": -0.6655271053314209, + "logps/chosen": -0.10793383419513702, + "logps/rejected": -0.7000494599342346, + "loss": 3.6952, + "nll_loss": 0.8509517908096313, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010793383233249187, + "rewards/margins": 0.05921155586838722, + "rewards/rejected": -0.07000494748353958, + "step": 2429 + }, + { + "epoch": 1.6804979253112033, + "grad_norm": 9.159822463989258, + "learning_rate": 4.621945597049332e-05, + "log_odds_chosen": 3.761561870574951, + "log_odds_ratio": -0.26112616062164307, + "logits/chosen": -0.45438137650489807, + "logits/rejected": -0.5548147559165955, + "logps/chosen": -0.029453417286276817, + "logps/rejected": -0.7845432162284851, + "loss": 2.6587, + "nll_loss": 0.6385586261749268, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0029453416354954243, + "rewards/margins": 0.0755089819431305, + "rewards/rejected": -0.07845432311296463, + "step": 2430 + }, + { + "epoch": 1.6811894882434302, + "grad_norm": 10.218520164489746, + "learning_rate": 4.621561395420317e-05, + "log_odds_chosen": 5.348668098449707, + "log_odds_ratio": -0.05290871858596802, + "logits/chosen": -0.37899768352508545, + "logits/rejected": -0.4649916887283325, + "logps/chosen": -0.023256096988916397, + "logps/rejected": -0.9848682284355164, + "loss": 3.9494, + "nll_loss": 0.9820523262023926, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002325609792023897, + "rewards/margins": 0.09616121649742126, + "rewards/rejected": -0.09848682582378387, + "step": 2431 + }, + { + "epoch": 1.681881051175657, + "grad_norm": 8.257439613342285, + "learning_rate": 4.621177193791302e-05, + "log_odds_chosen": 3.942685127258301, + "log_odds_ratio": -0.16259554028511047, + "logits/chosen": -0.5742173194885254, + "logits/rejected": -0.6128363609313965, + "logps/chosen": -0.03621543198823929, + "logps/rejected": -0.5569218993186951, + "loss": 3.7442, + "nll_loss": 0.9197925329208374, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0036215432919561863, + "rewards/margins": 0.052070651203393936, + "rewards/rejected": -0.055692195892333984, + "step": 2432 + }, + { + "epoch": 1.6825726141078838, + "grad_norm": 7.5304460525512695, + "learning_rate": 4.620792992162287e-05, + "log_odds_chosen": 5.471726894378662, + "log_odds_ratio": -0.12540628015995026, + "logits/chosen": -0.29969680309295654, + "logits/rejected": -0.35970044136047363, + "logps/chosen": -0.07335058599710464, + "logps/rejected": -0.9492495059967041, + "loss": 2.1545, + "nll_loss": 0.5260959267616272, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007335058879107237, + "rewards/margins": 0.08758989721536636, + "rewards/rejected": -0.09492494910955429, + "step": 2433 + }, + { + "epoch": 1.6832641770401107, + "grad_norm": 7.556035041809082, + "learning_rate": 4.620408790533272e-05, + "log_odds_chosen": 4.8540778160095215, + "log_odds_ratio": -0.09760741144418716, + "logits/chosen": -0.7015647292137146, + "logits/rejected": -0.7208361625671387, + "logps/chosen": -0.019324539229273796, + "logps/rejected": -0.9123736619949341, + "loss": 2.3607, + "nll_loss": 0.5804073214530945, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019324538297951221, + "rewards/margins": 0.08930491656064987, + "rewards/rejected": -0.091237373650074, + "step": 2434 + }, + { + "epoch": 1.6839557399723375, + "grad_norm": 13.672472953796387, + "learning_rate": 4.620024588904257e-05, + "log_odds_chosen": 4.359309196472168, + "log_odds_ratio": -0.4574434757232666, + "logits/chosen": -0.6222982406616211, + "logits/rejected": -0.6353976726531982, + "logps/chosen": -0.08387883752584457, + "logps/rejected": -0.6800822019577026, + "loss": 3.1069, + "nll_loss": 0.7309712171554565, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008387884125113487, + "rewards/margins": 0.059620339423418045, + "rewards/rejected": -0.06800822913646698, + "step": 2435 + }, + { + "epoch": 1.6846473029045643, + "grad_norm": 10.09824275970459, + "learning_rate": 4.6196403872752424e-05, + "log_odds_chosen": 5.2099432945251465, + "log_odds_ratio": -0.1256905496120453, + "logits/chosen": -0.7678384184837341, + "logits/rejected": -0.8608188033103943, + "logps/chosen": -0.09678126126527786, + "logps/rejected": -1.3110018968582153, + "loss": 3.2771, + "nll_loss": 0.806714653968811, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009678126312792301, + "rewards/margins": 0.12142205238342285, + "rewards/rejected": -0.13110017776489258, + "step": 2436 + }, + { + "epoch": 1.6853388658367912, + "grad_norm": 5.360626697540283, + "learning_rate": 4.6192561856462276e-05, + "log_odds_chosen": 3.714616060256958, + "log_odds_ratio": -0.08257103711366653, + "logits/chosen": -0.642780065536499, + "logits/rejected": -0.7198878526687622, + "logps/chosen": -0.07956398278474808, + "logps/rejected": -0.8394503593444824, + "loss": 2.3946, + "nll_loss": 0.5903887152671814, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007956398651003838, + "rewards/margins": 0.07598863542079926, + "rewards/rejected": -0.08394503593444824, + "step": 2437 + }, + { + "epoch": 1.686030428769018, + "grad_norm": 5.9998860359191895, + "learning_rate": 4.618871984017212e-05, + "log_odds_chosen": 4.092153549194336, + "log_odds_ratio": -0.14713715016841888, + "logits/chosen": -0.4301367700099945, + "logits/rejected": -0.46247437596321106, + "logps/chosen": -0.05428977310657501, + "logps/rejected": -0.6135324835777283, + "loss": 3.679, + "nll_loss": 0.9050412178039551, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005428977310657501, + "rewards/margins": 0.055924274027347565, + "rewards/rejected": -0.061353251338005066, + "step": 2438 + }, + { + "epoch": 1.6867219917012448, + "grad_norm": 7.3191609382629395, + "learning_rate": 4.618487782388198e-05, + "log_odds_chosen": 5.921332359313965, + "log_odds_ratio": -0.03245503827929497, + "logits/chosen": -0.4242483675479889, + "logits/rejected": -0.4684517979621887, + "logps/chosen": -0.010253867134451866, + "logps/rejected": -0.8007473349571228, + "loss": 2.8552, + "nll_loss": 0.7105446457862854, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010253868531435728, + "rewards/margins": 0.07904934883117676, + "rewards/rejected": -0.0800747349858284, + "step": 2439 + }, + { + "epoch": 1.6874135546334716, + "grad_norm": 9.23619270324707, + "learning_rate": 4.618103580759183e-05, + "log_odds_chosen": 5.425571441650391, + "log_odds_ratio": -0.24814097583293915, + "logits/chosen": -0.44754737615585327, + "logits/rejected": -0.5392141938209534, + "logps/chosen": -0.052859995514154434, + "logps/rejected": -1.1107946634292603, + "loss": 3.7001, + "nll_loss": 0.9002161026000977, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005285999272018671, + "rewards/margins": 0.10579346120357513, + "rewards/rejected": -0.11107945442199707, + "step": 2440 + }, + { + "epoch": 1.6881051175656985, + "grad_norm": 6.046574115753174, + "learning_rate": 4.617719379130168e-05, + "log_odds_chosen": 4.892881393432617, + "log_odds_ratio": -0.0677889883518219, + "logits/chosen": -0.29898586869239807, + "logits/rejected": -0.3219645321369171, + "logps/chosen": -0.05868750810623169, + "logps/rejected": -1.0086970329284668, + "loss": 2.9673, + "nll_loss": 0.7350428700447083, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005868750624358654, + "rewards/margins": 0.09500095993280411, + "rewards/rejected": -0.10086971521377563, + "step": 2441 + }, + { + "epoch": 1.6887966804979253, + "grad_norm": 5.991647720336914, + "learning_rate": 4.617335177501153e-05, + "log_odds_chosen": 3.6196436882019043, + "log_odds_ratio": -0.32258835434913635, + "logits/chosen": -0.5833138823509216, + "logits/rejected": -0.6411206126213074, + "logps/chosen": -0.12731850147247314, + "logps/rejected": -0.4984019994735718, + "loss": 2.7453, + "nll_loss": 0.6540665626525879, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.012731850147247314, + "rewards/margins": 0.03710835054516792, + "rewards/rejected": -0.04984020069241524, + "step": 2442 + }, + { + "epoch": 1.6894882434301521, + "grad_norm": 9.416582107543945, + "learning_rate": 4.616950975872138e-05, + "log_odds_chosen": 2.8410861492156982, + "log_odds_ratio": -0.3418985903263092, + "logits/chosen": -0.447319358587265, + "logits/rejected": -0.49639326333999634, + "logps/chosen": -0.1420106589794159, + "logps/rejected": -0.5109445452690125, + "loss": 2.9645, + "nll_loss": 0.7069295644760132, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.014201066456735134, + "rewards/margins": 0.036893390119075775, + "rewards/rejected": -0.051094453781843185, + "step": 2443 + }, + { + "epoch": 1.690179806362379, + "grad_norm": 6.755992412567139, + "learning_rate": 4.616566774243123e-05, + "log_odds_chosen": 5.548319339752197, + "log_odds_ratio": -0.2713317573070526, + "logits/chosen": -0.22183842957019806, + "logits/rejected": -0.1871333122253418, + "logps/chosen": -0.032315757125616074, + "logps/rejected": -0.8459647297859192, + "loss": 2.0004, + "nll_loss": 0.47296953201293945, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0032315754797309637, + "rewards/margins": 0.08136489987373352, + "rewards/rejected": -0.08459647744894028, + "step": 2444 + }, + { + "epoch": 1.6908713692946058, + "grad_norm": 8.874152183532715, + "learning_rate": 4.616182572614108e-05, + "log_odds_chosen": 3.405881881713867, + "log_odds_ratio": -0.4702404737472534, + "logits/chosen": -0.41794833540916443, + "logits/rejected": -0.4229547679424286, + "logps/chosen": -0.0915827602148056, + "logps/rejected": -0.6083066463470459, + "loss": 2.9032, + "nll_loss": 0.678780198097229, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.00915827602148056, + "rewards/margins": 0.05167239159345627, + "rewards/rejected": -0.06083066761493683, + "step": 2445 + }, + { + "epoch": 1.6915629322268326, + "grad_norm": 5.671358585357666, + "learning_rate": 4.6157983709850935e-05, + "log_odds_chosen": 4.28201961517334, + "log_odds_ratio": -0.14261570572853088, + "logits/chosen": -0.6459827423095703, + "logits/rejected": -0.5838777422904968, + "logps/chosen": -0.1007598340511322, + "logps/rejected": -1.0819426774978638, + "loss": 2.9352, + "nll_loss": 0.7195260524749756, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010075983591377735, + "rewards/margins": 0.09811828285455704, + "rewards/rejected": -0.1081942766904831, + "step": 2446 + }, + { + "epoch": 1.6922544951590595, + "grad_norm": 7.028148651123047, + "learning_rate": 4.615414169356078e-05, + "log_odds_chosen": 5.338274002075195, + "log_odds_ratio": -0.31613755226135254, + "logits/chosen": -0.38394150137901306, + "logits/rejected": -0.4851318597793579, + "logps/chosen": -0.04378465563058853, + "logps/rejected": -0.7531493902206421, + "loss": 1.9853, + "nll_loss": 0.4647136628627777, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004378465469926596, + "rewards/margins": 0.07093648612499237, + "rewards/rejected": -0.0753149464726448, + "step": 2447 + }, + { + "epoch": 1.6929460580912863, + "grad_norm": 6.10004186630249, + "learning_rate": 4.615029967727064e-05, + "log_odds_chosen": 5.716916084289551, + "log_odds_ratio": -0.22165924310684204, + "logits/chosen": -0.6277919411659241, + "logits/rejected": -0.7794336080551147, + "logps/chosen": -0.09859595447778702, + "logps/rejected": -0.8822535276412964, + "loss": 2.3748, + "nll_loss": 0.5715370774269104, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009859594516456127, + "rewards/margins": 0.078365758061409, + "rewards/rejected": -0.088225357234478, + "step": 2448 + }, + { + "epoch": 1.6936376210235131, + "grad_norm": 7.531747341156006, + "learning_rate": 4.6146457660980485e-05, + "log_odds_chosen": 5.302791595458984, + "log_odds_ratio": -0.1415976583957672, + "logits/chosen": -0.510055422782898, + "logits/rejected": -0.5041730403900146, + "logps/chosen": -0.06312351673841476, + "logps/rejected": -1.0165313482284546, + "loss": 2.7517, + "nll_loss": 0.6737696528434753, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0063123516738414764, + "rewards/margins": 0.0953407809138298, + "rewards/rejected": -0.10165313631296158, + "step": 2449 + }, + { + "epoch": 1.69432918395574, + "grad_norm": 9.723373413085938, + "learning_rate": 4.614261564469034e-05, + "log_odds_chosen": 4.904651165008545, + "log_odds_ratio": -0.5076399445533752, + "logits/chosen": -0.44497087597846985, + "logits/rejected": -0.4860677719116211, + "logps/chosen": -0.07396794855594635, + "logps/rejected": -0.9484530687332153, + "loss": 4.3168, + "nll_loss": 1.0284472703933716, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007396795321255922, + "rewards/margins": 0.08744850754737854, + "rewards/rejected": -0.09484530985355377, + "step": 2450 + }, + { + "epoch": 1.6950207468879668, + "grad_norm": 9.681573867797852, + "learning_rate": 4.613877362840019e-05, + "log_odds_chosen": 3.0576236248016357, + "log_odds_ratio": -0.08652878552675247, + "logits/chosen": -0.8884795904159546, + "logits/rejected": -0.9039291143417358, + "logps/chosen": -0.09103421866893768, + "logps/rejected": -0.9812101721763611, + "loss": 3.8463, + "nll_loss": 0.9529250860214233, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009103422984480858, + "rewards/margins": 0.0890175923705101, + "rewards/rejected": -0.09812101721763611, + "step": 2451 + }, + { + "epoch": 1.6957123098201936, + "grad_norm": 4.27262020111084, + "learning_rate": 4.6134931612110036e-05, + "log_odds_chosen": 4.734068393707275, + "log_odds_ratio": -0.29768267273902893, + "logits/chosen": -0.5565181970596313, + "logits/rejected": -0.5211176872253418, + "logps/chosen": -0.05631481856107712, + "logps/rejected": -0.7082623839378357, + "loss": 2.351, + "nll_loss": 0.5579749345779419, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005631481762975454, + "rewards/margins": 0.0651947632431984, + "rewards/rejected": -0.07082624733448029, + "step": 2452 + }, + { + "epoch": 1.6964038727524204, + "grad_norm": 10.028276443481445, + "learning_rate": 4.613108959581989e-05, + "log_odds_chosen": 2.7507269382476807, + "log_odds_ratio": -0.4191727638244629, + "logits/chosen": -0.4020436406135559, + "logits/rejected": -0.4461291432380676, + "logps/chosen": -0.10786256194114685, + "logps/rejected": -0.7390084862709045, + "loss": 3.349, + "nll_loss": 0.7953333258628845, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01078625675290823, + "rewards/margins": 0.06311459839344025, + "rewards/rejected": -0.07390085607767105, + "step": 2453 + }, + { + "epoch": 1.6970954356846473, + "grad_norm": 6.700299263000488, + "learning_rate": 4.612724757952974e-05, + "log_odds_chosen": 4.201428413391113, + "log_odds_ratio": -0.072795569896698, + "logits/chosen": -0.6352698802947998, + "logits/rejected": -0.6624408960342407, + "logps/chosen": -0.029072128236293793, + "logps/rejected": -0.6273342370986938, + "loss": 2.9228, + "nll_loss": 0.7234126329421997, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0029072128236293793, + "rewards/margins": 0.05982620269060135, + "rewards/rejected": -0.06273341923952103, + "step": 2454 + }, + { + "epoch": 1.697786998616874, + "grad_norm": 6.6374664306640625, + "learning_rate": 4.612340556323959e-05, + "log_odds_chosen": 4.870717525482178, + "log_odds_ratio": -0.17836816608905792, + "logits/chosen": -0.7230358123779297, + "logits/rejected": -0.7389208078384399, + "logps/chosen": -0.058723971247673035, + "logps/rejected": -0.7045331001281738, + "loss": 2.9063, + "nll_loss": 0.7087265253067017, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005872397683560848, + "rewards/margins": 0.06458091735839844, + "rewards/rejected": -0.07045331597328186, + "step": 2455 + }, + { + "epoch": 1.698478561549101, + "grad_norm": 7.5409979820251465, + "learning_rate": 4.611956354694944e-05, + "log_odds_chosen": 2.510221242904663, + "log_odds_ratio": -0.3023854196071625, + "logits/chosen": -0.7858247756958008, + "logits/rejected": -0.8371924161911011, + "logps/chosen": -0.11306092888116837, + "logps/rejected": -0.5818637609481812, + "loss": 3.1074, + "nll_loss": 0.7466117143630981, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011306094005703926, + "rewards/margins": 0.04688028246164322, + "rewards/rejected": -0.058186378329992294, + "step": 2456 + }, + { + "epoch": 1.6991701244813278, + "grad_norm": 4.992187976837158, + "learning_rate": 4.61157215306593e-05, + "log_odds_chosen": 3.4826698303222656, + "log_odds_ratio": -0.15915066003799438, + "logits/chosen": -0.5362116098403931, + "logits/rejected": -0.6215125918388367, + "logps/chosen": -0.08510999381542206, + "logps/rejected": -0.7648427486419678, + "loss": 2.3932, + "nll_loss": 0.5823809504508972, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008510999381542206, + "rewards/margins": 0.06797328591346741, + "rewards/rejected": -0.07648427784442902, + "step": 2457 + }, + { + "epoch": 1.6998616874135546, + "grad_norm": 5.758655071258545, + "learning_rate": 4.6111879514369144e-05, + "log_odds_chosen": 3.8834996223449707, + "log_odds_ratio": -0.20648881793022156, + "logits/chosen": -0.38048383593559265, + "logits/rejected": -0.33826085925102234, + "logps/chosen": -0.07839153707027435, + "logps/rejected": -0.5018714666366577, + "loss": 2.7401, + "nll_loss": 0.6643880605697632, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007839154452085495, + "rewards/margins": 0.0423479899764061, + "rewards/rejected": -0.05018714442849159, + "step": 2458 + }, + { + "epoch": 1.7005532503457814, + "grad_norm": 6.271485805511475, + "learning_rate": 4.6108037498078996e-05, + "log_odds_chosen": 3.3183982372283936, + "log_odds_ratio": -0.2391716092824936, + "logits/chosen": -0.807701826095581, + "logits/rejected": -0.8435157537460327, + "logps/chosen": -0.08216782659292221, + "logps/rejected": -0.6454704403877258, + "loss": 3.6022, + "nll_loss": 0.8766371011734009, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00821678340435028, + "rewards/margins": 0.05633026361465454, + "rewards/rejected": -0.06454704701900482, + "step": 2459 + }, + { + "epoch": 1.7012448132780082, + "grad_norm": 12.49203109741211, + "learning_rate": 4.610419548178885e-05, + "log_odds_chosen": 3.415001392364502, + "log_odds_ratio": -0.1704428493976593, + "logits/chosen": -0.6266674995422363, + "logits/rejected": -0.6652182936668396, + "logps/chosen": -0.06423190236091614, + "logps/rejected": -0.42358794808387756, + "loss": 3.8353, + "nll_loss": 0.9417771100997925, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006423190236091614, + "rewards/margins": 0.03593560308218002, + "rewards/rejected": -0.04235879331827164, + "step": 2460 + }, + { + "epoch": 1.701936376210235, + "grad_norm": 5.712089538574219, + "learning_rate": 4.6100353465498694e-05, + "log_odds_chosen": 5.9961442947387695, + "log_odds_ratio": -0.027979286387562752, + "logits/chosen": -0.5057214498519897, + "logits/rejected": -0.500369131565094, + "logps/chosen": -0.014197605662047863, + "logps/rejected": -0.7986212372779846, + "loss": 2.0362, + "nll_loss": 0.5062602758407593, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014197605196386576, + "rewards/margins": 0.07844236493110657, + "rewards/rejected": -0.07986212521791458, + "step": 2461 + }, + { + "epoch": 1.702627939142462, + "grad_norm": 5.646170139312744, + "learning_rate": 4.6096511449208547e-05, + "log_odds_chosen": 5.234273910522461, + "log_odds_ratio": -0.07074658572673798, + "logits/chosen": -0.5358408093452454, + "logits/rejected": -0.5804621577262878, + "logps/chosen": -0.04008009284734726, + "logps/rejected": -0.725631833076477, + "loss": 2.6012, + "nll_loss": 0.6432194709777832, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004008009098470211, + "rewards/margins": 0.06855517625808716, + "rewards/rejected": -0.07256318628787994, + "step": 2462 + }, + { + "epoch": 1.703319502074689, + "grad_norm": 7.1241841316223145, + "learning_rate": 4.609266943291839e-05, + "log_odds_chosen": 4.2716546058654785, + "log_odds_ratio": -0.23627769947052002, + "logits/chosen": -0.49713873863220215, + "logits/rejected": -0.47726917266845703, + "logps/chosen": -0.046655893325805664, + "logps/rejected": -0.7681934833526611, + "loss": 2.7274, + "nll_loss": 0.6582244038581848, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004665589425712824, + "rewards/margins": 0.07215375453233719, + "rewards/rejected": -0.07681934535503387, + "step": 2463 + }, + { + "epoch": 1.7040110650069158, + "grad_norm": 12.598523139953613, + "learning_rate": 4.608882741662825e-05, + "log_odds_chosen": 4.887299060821533, + "log_odds_ratio": -0.09854143112897873, + "logits/chosen": -0.8739576935768127, + "logits/rejected": -0.9403781294822693, + "logps/chosen": -0.04767553135752678, + "logps/rejected": -0.9661577343940735, + "loss": 4.9117, + "nll_loss": 1.2180765867233276, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00476755341514945, + "rewards/margins": 0.091848224401474, + "rewards/rejected": -0.09661578387022018, + "step": 2464 + }, + { + "epoch": 1.7047026279391426, + "grad_norm": 7.02893590927124, + "learning_rate": 4.60849854003381e-05, + "log_odds_chosen": 3.973839282989502, + "log_odds_ratio": -0.24144837260246277, + "logits/chosen": -0.5743393898010254, + "logits/rejected": -0.5642153024673462, + "logps/chosen": -0.06009901687502861, + "logps/rejected": -0.8592863082885742, + "loss": 2.643, + "nll_loss": 0.6366161108016968, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.006009901873767376, + "rewards/margins": 0.0799187421798706, + "rewards/rejected": -0.08592863380908966, + "step": 2465 + }, + { + "epoch": 1.7053941908713695, + "grad_norm": 11.561562538146973, + "learning_rate": 4.608114338404795e-05, + "log_odds_chosen": 5.92291259765625, + "log_odds_ratio": -0.30987101793289185, + "logits/chosen": -0.4808652997016907, + "logits/rejected": -0.5432968139648438, + "logps/chosen": -0.07293874770402908, + "logps/rejected": -1.199968934059143, + "loss": 3.8248, + "nll_loss": 0.9252121448516846, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007293875329196453, + "rewards/margins": 0.11270301789045334, + "rewards/rejected": -0.11999689042568207, + "step": 2466 + }, + { + "epoch": 1.7060857538035963, + "grad_norm": 7.464529991149902, + "learning_rate": 4.60773013677578e-05, + "log_odds_chosen": 4.637115478515625, + "log_odds_ratio": -0.1439353972673416, + "logits/chosen": -0.7529462575912476, + "logits/rejected": -0.6442465782165527, + "logps/chosen": -0.05325663462281227, + "logps/rejected": -0.60707026720047, + "loss": 2.232, + "nll_loss": 0.5436094403266907, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005325663834810257, + "rewards/margins": 0.05538136512041092, + "rewards/rejected": -0.06070702522993088, + "step": 2467 + }, + { + "epoch": 1.7067773167358231, + "grad_norm": 6.781388759613037, + "learning_rate": 4.6073459351467654e-05, + "log_odds_chosen": 3.841700553894043, + "log_odds_ratio": -0.341701865196228, + "logits/chosen": -0.6883922815322876, + "logits/rejected": -0.7041289806365967, + "logps/chosen": -0.08620139211416245, + "logps/rejected": -0.857895016670227, + "loss": 2.8108, + "nll_loss": 0.6685272455215454, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008620140142738819, + "rewards/margins": 0.07716936618089676, + "rewards/rejected": -0.0857895016670227, + "step": 2468 + }, + { + "epoch": 1.70746887966805, + "grad_norm": 7.313510894775391, + "learning_rate": 4.60696173351775e-05, + "log_odds_chosen": 5.199853897094727, + "log_odds_ratio": -0.05814574658870697, + "logits/chosen": -0.7749062776565552, + "logits/rejected": -0.892104983329773, + "logps/chosen": -0.05403900891542435, + "logps/rejected": -0.8282055258750916, + "loss": 3.1464, + "nll_loss": 0.7807949781417847, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005403900984674692, + "rewards/margins": 0.07741665095090866, + "rewards/rejected": -0.08282054960727692, + "step": 2469 + }, + { + "epoch": 1.7081604426002768, + "grad_norm": 6.603610038757324, + "learning_rate": 4.606577531888735e-05, + "log_odds_chosen": 4.6768012046813965, + "log_odds_ratio": -0.18649247288703918, + "logits/chosen": -0.4904458522796631, + "logits/rejected": -0.5735772848129272, + "logps/chosen": -0.04555944353342056, + "logps/rejected": -0.6273662447929382, + "loss": 2.7468, + "nll_loss": 0.6680549383163452, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004555944819003344, + "rewards/margins": 0.058180682361125946, + "rewards/rejected": -0.0627366229891777, + "step": 2470 + }, + { + "epoch": 1.7088520055325036, + "grad_norm": 6.438045024871826, + "learning_rate": 4.6061933302597205e-05, + "log_odds_chosen": 2.645942211151123, + "log_odds_ratio": -0.261520653963089, + "logits/chosen": -0.6434420347213745, + "logits/rejected": -0.6276592016220093, + "logps/chosen": -0.08992427587509155, + "logps/rejected": -0.4632926881313324, + "loss": 3.6067, + "nll_loss": 0.8755119442939758, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008992427960038185, + "rewards/margins": 0.037336841225624084, + "rewards/rejected": -0.04632926732301712, + "step": 2471 + }, + { + "epoch": 1.7095435684647304, + "grad_norm": 5.089877128601074, + "learning_rate": 4.605809128630705e-05, + "log_odds_chosen": 4.637329578399658, + "log_odds_ratio": -0.20717129111289978, + "logits/chosen": -0.3958089351654053, + "logits/rejected": -0.5050152540206909, + "logps/chosen": -0.06903784722089767, + "logps/rejected": -0.763074517250061, + "loss": 2.5772, + "nll_loss": 0.62359219789505, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006903784349560738, + "rewards/margins": 0.06940367817878723, + "rewards/rejected": -0.07630746066570282, + "step": 2472 + }, + { + "epoch": 1.7102351313969573, + "grad_norm": 6.6487531661987305, + "learning_rate": 4.605424927001691e-05, + "log_odds_chosen": 4.568889617919922, + "log_odds_ratio": -0.28559839725494385, + "logits/chosen": -0.49498260021209717, + "logits/rejected": -0.5599215626716614, + "logps/chosen": -0.0664115771651268, + "logps/rejected": -0.8344486951828003, + "loss": 2.6958, + "nll_loss": 0.6453921794891357, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.00664115697145462, + "rewards/margins": 0.07680370658636093, + "rewards/rejected": -0.08344486355781555, + "step": 2473 + }, + { + "epoch": 1.710926694329184, + "grad_norm": 7.506117343902588, + "learning_rate": 4.6050407253726755e-05, + "log_odds_chosen": 7.32316780090332, + "log_odds_ratio": -0.016720792278647423, + "logits/chosen": -0.5493389964103699, + "logits/rejected": -0.5543407797813416, + "logps/chosen": -0.020256822928786278, + "logps/rejected": -1.3681458234786987, + "loss": 2.9035, + "nll_loss": 0.7241946458816528, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020256820134818554, + "rewards/margins": 0.1347889006137848, + "rewards/rejected": -0.13681459426879883, + "step": 2474 + }, + { + "epoch": 1.711618257261411, + "grad_norm": 8.684357643127441, + "learning_rate": 4.604656523743661e-05, + "log_odds_chosen": 1.567682147026062, + "log_odds_ratio": -0.7690153121948242, + "logits/chosen": -0.7396680116653442, + "logits/rejected": -0.7365007400512695, + "logps/chosen": -0.164411261677742, + "logps/rejected": -0.343228280544281, + "loss": 2.7866, + "nll_loss": 0.6197413206100464, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01644112728536129, + "rewards/margins": 0.01788170263171196, + "rewards/rejected": -0.0343228280544281, + "step": 2475 + }, + { + "epoch": 1.7123098201936378, + "grad_norm": 4.919681549072266, + "learning_rate": 4.604272322114646e-05, + "log_odds_chosen": 6.278683662414551, + "log_odds_ratio": -0.04371640086174011, + "logits/chosen": -0.3746539354324341, + "logits/rejected": -0.35592973232269287, + "logps/chosen": -0.03592051565647125, + "logps/rejected": -0.9769902229309082, + "loss": 2.255, + "nll_loss": 0.5593817830085754, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0035920515656471252, + "rewards/margins": 0.09410697221755981, + "rewards/rejected": -0.09769902378320694, + "step": 2476 + }, + { + "epoch": 1.7130013831258646, + "grad_norm": 4.298426628112793, + "learning_rate": 4.603888120485631e-05, + "log_odds_chosen": 5.387601852416992, + "log_odds_ratio": -0.05379874259233475, + "logits/chosen": -0.4532133936882019, + "logits/rejected": -0.4699724316596985, + "logps/chosen": -0.028361182659864426, + "logps/rejected": -0.6521701216697693, + "loss": 2.3214, + "nll_loss": 0.5749749541282654, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002836118219420314, + "rewards/margins": 0.06238089129328728, + "rewards/rejected": -0.06521701067686081, + "step": 2477 + }, + { + "epoch": 1.7136929460580914, + "grad_norm": 6.177600860595703, + "learning_rate": 4.603503918856616e-05, + "log_odds_chosen": 5.090878486633301, + "log_odds_ratio": -0.05117940902709961, + "logits/chosen": -0.3519092798233032, + "logits/rejected": -0.41055262088775635, + "logps/chosen": -0.04080752655863762, + "logps/rejected": -0.8973877429962158, + "loss": 2.7697, + "nll_loss": 0.6873171329498291, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0040807523764669895, + "rewards/margins": 0.0856580138206482, + "rewards/rejected": -0.08973877131938934, + "step": 2478 + }, + { + "epoch": 1.7143845089903182, + "grad_norm": 12.071842193603516, + "learning_rate": 4.603119717227601e-05, + "log_odds_chosen": 2.429943323135376, + "log_odds_ratio": -0.5938160419464111, + "logits/chosen": -0.25275617837905884, + "logits/rejected": -0.28012439608573914, + "logps/chosen": -0.057525694370269775, + "logps/rejected": -0.5072909593582153, + "loss": 2.3098, + "nll_loss": 0.5180577635765076, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005752569530159235, + "rewards/margins": 0.044976525008678436, + "rewards/rejected": -0.05072909966111183, + "step": 2479 + }, + { + "epoch": 1.715076071922545, + "grad_norm": 11.65674114227295, + "learning_rate": 4.602735515598586e-05, + "log_odds_chosen": 3.821964979171753, + "log_odds_ratio": -0.3769320249557495, + "logits/chosen": -0.4692278802394867, + "logits/rejected": -0.5543645024299622, + "logps/chosen": -0.1797221601009369, + "logps/rejected": -0.7264648675918579, + "loss": 4.9171, + "nll_loss": 1.1915725469589233, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01797221601009369, + "rewards/margins": 0.05467427149415016, + "rewards/rejected": -0.07264649122953415, + "step": 2480 + }, + { + "epoch": 1.715767634854772, + "grad_norm": 5.816194534301758, + "learning_rate": 4.602351313969571e-05, + "log_odds_chosen": 4.72694206237793, + "log_odds_ratio": -0.08599922060966492, + "logits/chosen": -0.4541250765323639, + "logits/rejected": -0.48050498962402344, + "logps/chosen": -0.034682586789131165, + "logps/rejected": -0.8450120687484741, + "loss": 2.6775, + "nll_loss": 0.6607639789581299, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0034682590048760176, + "rewards/margins": 0.08103295415639877, + "rewards/rejected": -0.0845012217760086, + "step": 2481 + }, + { + "epoch": 1.7164591977869987, + "grad_norm": 8.660858154296875, + "learning_rate": 4.601967112340557e-05, + "log_odds_chosen": 5.18958044052124, + "log_odds_ratio": -0.10727565735578537, + "logits/chosen": -0.44695162773132324, + "logits/rejected": -0.5264561176300049, + "logps/chosen": -0.05404742807149887, + "logps/rejected": -1.119019865989685, + "loss": 3.3442, + "nll_loss": 0.825323760509491, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005404743365943432, + "rewards/margins": 0.10649725049734116, + "rewards/rejected": -0.11190199106931686, + "step": 2482 + }, + { + "epoch": 1.7171507607192256, + "grad_norm": 3.831432342529297, + "learning_rate": 4.6015829107115414e-05, + "log_odds_chosen": 6.9305739402771, + "log_odds_ratio": -0.011790118180215359, + "logits/chosen": -0.4339551627635956, + "logits/rejected": -0.4620319604873657, + "logps/chosen": -0.03041076473891735, + "logps/rejected": -1.066805124282837, + "loss": 2.0754, + "nll_loss": 0.5176640152931213, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003041076473891735, + "rewards/margins": 0.10363944619894028, + "rewards/rejected": -0.10668051987886429, + "step": 2483 + }, + { + "epoch": 1.7178423236514524, + "grad_norm": 8.102904319763184, + "learning_rate": 4.6011987090825266e-05, + "log_odds_chosen": 2.083221435546875, + "log_odds_ratio": -0.5790278911590576, + "logits/chosen": -0.4810032248497009, + "logits/rejected": -0.5122133493423462, + "logps/chosen": -0.12283414602279663, + "logps/rejected": -0.42418205738067627, + "loss": 3.5874, + "nll_loss": 0.838951826095581, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.012283414602279663, + "rewards/margins": 0.030134791508316994, + "rewards/rejected": -0.04241820424795151, + "step": 2484 + }, + { + "epoch": 1.7185338865836792, + "grad_norm": 8.226753234863281, + "learning_rate": 4.600814507453512e-05, + "log_odds_chosen": 5.8126068115234375, + "log_odds_ratio": -0.24563762545585632, + "logits/chosen": -0.5683649182319641, + "logits/rejected": -0.6123848557472229, + "logps/chosen": -0.06213032454252243, + "logps/rejected": -0.975424587726593, + "loss": 2.6634, + "nll_loss": 0.6412944197654724, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006213032640516758, + "rewards/margins": 0.0913294330239296, + "rewards/rejected": -0.09754246473312378, + "step": 2485 + }, + { + "epoch": 1.719225449515906, + "grad_norm": 7.954338550567627, + "learning_rate": 4.600430305824497e-05, + "log_odds_chosen": 6.833853721618652, + "log_odds_ratio": -0.2492285668849945, + "logits/chosen": -0.7041445374488831, + "logits/rejected": -0.7608736753463745, + "logps/chosen": -0.04681987315416336, + "logps/rejected": -1.284895658493042, + "loss": 3.1293, + "nll_loss": 0.7574116587638855, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004681987222284079, + "rewards/margins": 0.12380757927894592, + "rewards/rejected": -0.12848955392837524, + "step": 2486 + }, + { + "epoch": 1.7199170124481329, + "grad_norm": 8.005608558654785, + "learning_rate": 4.600046104195482e-05, + "log_odds_chosen": 2.9669289588928223, + "log_odds_ratio": -0.6262913942337036, + "logits/chosen": -0.3596298396587372, + "logits/rejected": -0.36272570490837097, + "logps/chosen": -0.08726787567138672, + "logps/rejected": -0.5723020434379578, + "loss": 3.5328, + "nll_loss": 0.8205739259719849, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008726788684725761, + "rewards/margins": 0.048503417521715164, + "rewards/rejected": -0.057230208069086075, + "step": 2487 + }, + { + "epoch": 1.7206085753803597, + "grad_norm": 5.509665012359619, + "learning_rate": 4.599661902566467e-05, + "log_odds_chosen": 7.698426723480225, + "log_odds_ratio": -0.005875317845493555, + "logits/chosen": -0.17930477857589722, + "logits/rejected": -0.21657007932662964, + "logps/chosen": -0.004427487496286631, + "logps/rejected": -1.1619611978530884, + "loss": 2.2304, + "nll_loss": 0.5570050477981567, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004427487147040665, + "rewards/margins": 0.11575337499380112, + "rewards/rejected": -0.11619612574577332, + "step": 2488 + }, + { + "epoch": 1.7213001383125865, + "grad_norm": 8.408278465270996, + "learning_rate": 4.599277700937452e-05, + "log_odds_chosen": 8.361098289489746, + "log_odds_ratio": -0.0008353714365512133, + "logits/chosen": -0.3038009703159332, + "logits/rejected": -0.31657013297080994, + "logps/chosen": -0.0015146147925406694, + "logps/rejected": -1.2001842260360718, + "loss": 2.9343, + "nll_loss": 0.7334992289543152, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015146148507483304, + "rewards/margins": 0.11986696720123291, + "rewards/rejected": -0.12001842260360718, + "step": 2489 + }, + { + "epoch": 1.7219917012448134, + "grad_norm": 10.702224731445312, + "learning_rate": 4.598893499308437e-05, + "log_odds_chosen": 3.5512514114379883, + "log_odds_ratio": -0.2796310782432556, + "logits/chosen": -0.8009122610092163, + "logits/rejected": -0.7680351734161377, + "logps/chosen": -0.05254250392317772, + "logps/rejected": -0.5955202579498291, + "loss": 3.6625, + "nll_loss": 0.8876738548278809, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005254250951111317, + "rewards/margins": 0.054297782480716705, + "rewards/rejected": -0.05955202877521515, + "step": 2490 + }, + { + "epoch": 1.7226832641770402, + "grad_norm": 11.43749713897705, + "learning_rate": 4.598509297679423e-05, + "log_odds_chosen": 3.9742114543914795, + "log_odds_ratio": -0.27042850852012634, + "logits/chosen": -0.34033113718032837, + "logits/rejected": -0.3668830990791321, + "logps/chosen": -0.19744163751602173, + "logps/rejected": -1.366867184638977, + "loss": 4.315, + "nll_loss": 1.0516953468322754, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.019744165241718292, + "rewards/margins": 0.11694254726171494, + "rewards/rejected": -0.13668671250343323, + "step": 2491 + }, + { + "epoch": 1.723374827109267, + "grad_norm": 7.133487224578857, + "learning_rate": 4.598125096050407e-05, + "log_odds_chosen": 6.378708839416504, + "log_odds_ratio": -0.04591769725084305, + "logits/chosen": 0.07625691592693329, + "logits/rejected": 0.002835869789123535, + "logps/chosen": -0.01164622139185667, + "logps/rejected": -0.8535522222518921, + "loss": 2.6138, + "nll_loss": 0.6488522291183472, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001164622139185667, + "rewards/margins": 0.08419059962034225, + "rewards/rejected": -0.0853552296757698, + "step": 2492 + }, + { + "epoch": 1.7240663900414939, + "grad_norm": 10.00739860534668, + "learning_rate": 4.5977408944213925e-05, + "log_odds_chosen": 3.1018543243408203, + "log_odds_ratio": -0.6838514804840088, + "logits/chosen": -0.28175613284111023, + "logits/rejected": -0.3513212502002716, + "logps/chosen": -0.12369559705257416, + "logps/rejected": -0.625062108039856, + "loss": 2.9139, + "nll_loss": 0.6600803732872009, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.012369560077786446, + "rewards/margins": 0.05013664811849594, + "rewards/rejected": -0.06250621378421783, + "step": 2493 + }, + { + "epoch": 1.7247579529737207, + "grad_norm": 5.779770851135254, + "learning_rate": 4.597356692792378e-05, + "log_odds_chosen": 4.564273834228516, + "log_odds_ratio": -0.040960900485515594, + "logits/chosen": -0.4975500702857971, + "logits/rejected": -0.49774548411369324, + "logps/chosen": -0.01713930070400238, + "logps/rejected": -0.5751754641532898, + "loss": 3.0896, + "nll_loss": 0.7683084011077881, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017139300471171737, + "rewards/margins": 0.05580361932516098, + "rewards/rejected": -0.05751754716038704, + "step": 2494 + }, + { + "epoch": 1.7254495159059475, + "grad_norm": 7.042632579803467, + "learning_rate": 4.596972491163363e-05, + "log_odds_chosen": 5.019662857055664, + "log_odds_ratio": -0.14574462175369263, + "logits/chosen": -0.11871343851089478, + "logits/rejected": -0.1903182566165924, + "logps/chosen": -0.037783216685056686, + "logps/rejected": -0.5918722152709961, + "loss": 2.5264, + "nll_loss": 0.617037296295166, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003778322134166956, + "rewards/margins": 0.05540889501571655, + "rewards/rejected": -0.05918722227215767, + "step": 2495 + }, + { + "epoch": 1.7261410788381744, + "grad_norm": 4.622135639190674, + "learning_rate": 4.5965882895343475e-05, + "log_odds_chosen": 4.588207721710205, + "log_odds_ratio": -0.06134911999106407, + "logits/chosen": -0.5902333855628967, + "logits/rejected": -0.637090265750885, + "logps/chosen": -0.03286483883857727, + "logps/rejected": -0.8268821835517883, + "loss": 3.1512, + "nll_loss": 0.7816723585128784, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0032864839304238558, + "rewards/margins": 0.07940173894166946, + "rewards/rejected": -0.08268822729587555, + "step": 2496 + }, + { + "epoch": 1.7268326417704012, + "grad_norm": 9.341205596923828, + "learning_rate": 4.596204087905333e-05, + "log_odds_chosen": 4.570016384124756, + "log_odds_ratio": -0.15347853302955627, + "logits/chosen": -0.23321914672851562, + "logits/rejected": -0.3474777340888977, + "logps/chosen": -0.043549127876758575, + "logps/rejected": -0.8679786920547485, + "loss": 3.9859, + "nll_loss": 0.9811323285102844, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0043549127876758575, + "rewards/margins": 0.08244295418262482, + "rewards/rejected": -0.08679787069559097, + "step": 2497 + }, + { + "epoch": 1.727524204702628, + "grad_norm": 7.280107021331787, + "learning_rate": 4.595819886276318e-05, + "log_odds_chosen": 3.2914509773254395, + "log_odds_ratio": -0.07923942804336548, + "logits/chosen": -0.5238845348358154, + "logits/rejected": -0.6023181080818176, + "logps/chosen": -0.06284703314304352, + "logps/rejected": -0.7727693915367126, + "loss": 2.7732, + "nll_loss": 0.6853671669960022, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006284703500568867, + "rewards/margins": 0.07099223881959915, + "rewards/rejected": -0.07727694511413574, + "step": 2498 + }, + { + "epoch": 1.7282157676348548, + "grad_norm": 9.751381874084473, + "learning_rate": 4.5954356846473026e-05, + "log_odds_chosen": 3.872626304626465, + "log_odds_ratio": -0.16218523681163788, + "logits/chosen": -0.5514325499534607, + "logits/rejected": -0.5251030921936035, + "logps/chosen": -0.07056479901075363, + "logps/rejected": -0.63563072681427, + "loss": 4.1649, + "nll_loss": 1.0250036716461182, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007056479807943106, + "rewards/margins": 0.05650658905506134, + "rewards/rejected": -0.06356307119131088, + "step": 2499 + }, + { + "epoch": 1.7289073305670817, + "grad_norm": 7.159670829772949, + "learning_rate": 4.5950514830182885e-05, + "log_odds_chosen": 5.2221269607543945, + "log_odds_ratio": -0.12021451443433762, + "logits/chosen": -0.3670402765274048, + "logits/rejected": -0.470436155796051, + "logps/chosen": -0.05295195057988167, + "logps/rejected": -0.8274115324020386, + "loss": 2.5272, + "nll_loss": 0.6197685599327087, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005295194685459137, + "rewards/margins": 0.07744596153497696, + "rewards/rejected": -0.0827411562204361, + "step": 2500 + }, + { + "epoch": 1.7295988934993085, + "grad_norm": 7.748027801513672, + "learning_rate": 4.594667281389273e-05, + "log_odds_chosen": 5.541880130767822, + "log_odds_ratio": -0.20395153760910034, + "logits/chosen": -0.43168240785598755, + "logits/rejected": -0.49034783244132996, + "logps/chosen": -0.05815047025680542, + "logps/rejected": -1.1151851415634155, + "loss": 2.8836, + "nll_loss": 0.7005079388618469, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005815047305077314, + "rewards/margins": 0.10570347309112549, + "rewards/rejected": -0.11151853203773499, + "step": 2501 + }, + { + "epoch": 1.7302904564315353, + "grad_norm": 7.447516918182373, + "learning_rate": 4.594283079760258e-05, + "log_odds_chosen": 4.158929824829102, + "log_odds_ratio": -0.6623351573944092, + "logits/chosen": -0.12878236174583435, + "logits/rejected": -0.11322636157274246, + "logps/chosen": -0.12923133373260498, + "logps/rejected": -0.6508136987686157, + "loss": 3.5702, + "nll_loss": 0.8263086676597595, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.012923132628202438, + "rewards/margins": 0.05215824022889137, + "rewards/rejected": -0.06508137285709381, + "step": 2502 + }, + { + "epoch": 1.7309820193637622, + "grad_norm": 5.580257892608643, + "learning_rate": 4.5938988781312436e-05, + "log_odds_chosen": 5.174313068389893, + "log_odds_ratio": -0.04148080572485924, + "logits/chosen": -0.5327701568603516, + "logits/rejected": -0.5527817606925964, + "logps/chosen": -0.024058351293206215, + "logps/rejected": -0.8554421663284302, + "loss": 2.5435, + "nll_loss": 0.6317334175109863, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024058353155851364, + "rewards/margins": 0.08313838392496109, + "rewards/rejected": -0.08554422110319138, + "step": 2503 + }, + { + "epoch": 1.731673582295989, + "grad_norm": 7.316047668457031, + "learning_rate": 4.593514676502229e-05, + "log_odds_chosen": 7.4302825927734375, + "log_odds_ratio": -0.09521831572055817, + "logits/chosen": -0.22343260049819946, + "logits/rejected": -0.3161851167678833, + "logps/chosen": -0.025817379355430603, + "logps/rejected": -1.350869059562683, + "loss": 2.7678, + "nll_loss": 0.6824326515197754, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0025817379355430603, + "rewards/margins": 0.1325051635503769, + "rewards/rejected": -0.13508689403533936, + "step": 2504 + }, + { + "epoch": 1.7323651452282158, + "grad_norm": 7.182354927062988, + "learning_rate": 4.5931304748732134e-05, + "log_odds_chosen": 5.076048851013184, + "log_odds_ratio": -0.07221361249685287, + "logits/chosen": -0.5134769678115845, + "logits/rejected": -0.5864946246147156, + "logps/chosen": -0.05353981629014015, + "logps/rejected": -1.074465036392212, + "loss": 3.9926, + "nll_loss": 0.9909363985061646, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005353981629014015, + "rewards/margins": 0.10209251940250397, + "rewards/rejected": -0.10744651407003403, + "step": 2505 + }, + { + "epoch": 1.7330567081604427, + "grad_norm": 11.882872581481934, + "learning_rate": 4.5927462732441986e-05, + "log_odds_chosen": 2.1057169437408447, + "log_odds_ratio": -0.47601622343063354, + "logits/chosen": -0.45321619510650635, + "logits/rejected": -0.42948225140571594, + "logps/chosen": -0.18323317170143127, + "logps/rejected": -0.32210710644721985, + "loss": 3.7983, + "nll_loss": 0.9019668698310852, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.018323317170143127, + "rewards/margins": 0.013887394219636917, + "rewards/rejected": -0.032210711389780045, + "step": 2506 + }, + { + "epoch": 1.7337482710926695, + "grad_norm": 8.515796661376953, + "learning_rate": 4.592362071615184e-05, + "log_odds_chosen": 6.995229721069336, + "log_odds_ratio": -0.05257797986268997, + "logits/chosen": -0.40070289373397827, + "logits/rejected": -0.44888609647750854, + "logps/chosen": -0.015650387853384018, + "logps/rejected": -1.2345138788223267, + "loss": 2.4482, + "nll_loss": 0.6067959666252136, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015650388086214662, + "rewards/margins": 0.12188635021448135, + "rewards/rejected": -0.12345139682292938, + "step": 2507 + }, + { + "epoch": 1.7344398340248963, + "grad_norm": 5.455097198486328, + "learning_rate": 4.5919778699861684e-05, + "log_odds_chosen": 5.574578285217285, + "log_odds_ratio": -0.07917968928813934, + "logits/chosen": -0.4635438323020935, + "logits/rejected": -0.4907468855381012, + "logps/chosen": -0.027772696688771248, + "logps/rejected": -0.7599036693572998, + "loss": 2.8449, + "nll_loss": 0.7033053636550903, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002777269808575511, + "rewards/margins": 0.07321310043334961, + "rewards/rejected": -0.07599036395549774, + "step": 2508 + }, + { + "epoch": 1.7351313969571232, + "grad_norm": 7.941450119018555, + "learning_rate": 4.5915936683571544e-05, + "log_odds_chosen": 5.4366536140441895, + "log_odds_ratio": -0.15930068492889404, + "logits/chosen": -0.21428066492080688, + "logits/rejected": -0.3483664393424988, + "logps/chosen": -0.03960276022553444, + "logps/rejected": -1.0487326383590698, + "loss": 3.0972, + "nll_loss": 0.7583820223808289, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003960276022553444, + "rewards/margins": 0.10091298818588257, + "rewards/rejected": -0.10487326234579086, + "step": 2509 + }, + { + "epoch": 1.73582295988935, + "grad_norm": 8.1426362991333, + "learning_rate": 4.591209466728139e-05, + "log_odds_chosen": 4.624601364135742, + "log_odds_ratio": -0.059192001819610596, + "logits/chosen": -0.5676755309104919, + "logits/rejected": -0.576343834400177, + "logps/chosen": -0.03207191824913025, + "logps/rejected": -1.0185704231262207, + "loss": 3.9412, + "nll_loss": 0.9793733358383179, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003207192290574312, + "rewards/margins": 0.09864984452724457, + "rewards/rejected": -0.10185703635215759, + "step": 2510 + }, + { + "epoch": 1.7365145228215768, + "grad_norm": 5.790066719055176, + "learning_rate": 4.590825265099124e-05, + "log_odds_chosen": 7.578642845153809, + "log_odds_ratio": -0.057620033621788025, + "logits/chosen": -0.5006792545318604, + "logits/rejected": -0.5755428671836853, + "logps/chosen": -0.013461096212267876, + "logps/rejected": -1.1927460432052612, + "loss": 2.1783, + "nll_loss": 0.5388160943984985, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013461096677929163, + "rewards/margins": 0.11792849004268646, + "rewards/rejected": -0.11927460134029388, + "step": 2511 + }, + { + "epoch": 1.7372060857538036, + "grad_norm": 8.553199768066406, + "learning_rate": 4.5904410634701094e-05, + "log_odds_chosen": 5.309554100036621, + "log_odds_ratio": -0.22256025671958923, + "logits/chosen": -0.4385831356048584, + "logits/rejected": -0.5174022316932678, + "logps/chosen": -0.05707190930843353, + "logps/rejected": -0.7726631164550781, + "loss": 3.2445, + "nll_loss": 0.7888723611831665, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005707190837711096, + "rewards/margins": 0.0715591236948967, + "rewards/rejected": -0.07726631313562393, + "step": 2512 + }, + { + "epoch": 1.7378976486860305, + "grad_norm": 9.486678123474121, + "learning_rate": 4.5900568618410947e-05, + "log_odds_chosen": 3.2149507999420166, + "log_odds_ratio": -0.8374648094177246, + "logits/chosen": -0.6298545598983765, + "logits/rejected": -0.64149010181427, + "logps/chosen": -0.07850378006696701, + "logps/rejected": -0.8460255861282349, + "loss": 3.993, + "nll_loss": 0.9144986867904663, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007850377820432186, + "rewards/margins": 0.0767521783709526, + "rewards/rejected": -0.08460256457328796, + "step": 2513 + }, + { + "epoch": 1.7385892116182573, + "grad_norm": 8.289212226867676, + "learning_rate": 4.589672660212079e-05, + "log_odds_chosen": 4.248414039611816, + "log_odds_ratio": -0.08790218830108643, + "logits/chosen": -0.052254606038331985, + "logits/rejected": -0.05429219827055931, + "logps/chosen": -0.09396925568580627, + "logps/rejected": -0.9986541867256165, + "loss": 2.8122, + "nll_loss": 0.6942670941352844, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009396926499903202, + "rewards/margins": 0.09046850353479385, + "rewards/rejected": -0.09986542910337448, + "step": 2514 + }, + { + "epoch": 1.7392807745504841, + "grad_norm": 12.42770767211914, + "learning_rate": 4.5892884585830645e-05, + "log_odds_chosen": 1.7273861169815063, + "log_odds_ratio": -0.34406787157058716, + "logits/chosen": -0.6730232238769531, + "logits/rejected": -0.6579403877258301, + "logps/chosen": -0.07420868426561356, + "logps/rejected": -0.4276873767375946, + "loss": 5.5056, + "nll_loss": 1.341995120048523, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0074208686128258705, + "rewards/margins": 0.035347871482372284, + "rewards/rejected": -0.04276873916387558, + "step": 2515 + }, + { + "epoch": 1.739972337482711, + "grad_norm": 16.12079429626465, + "learning_rate": 4.58890425695405e-05, + "log_odds_chosen": 2.7042465209960938, + "log_odds_ratio": -0.30173051357269287, + "logits/chosen": -0.5809437036514282, + "logits/rejected": -0.6290631890296936, + "logps/chosen": -0.14772537350654602, + "logps/rejected": -0.7955739498138428, + "loss": 3.8072, + "nll_loss": 0.9216184616088867, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014772538095712662, + "rewards/margins": 0.06478486210107803, + "rewards/rejected": -0.0795573964715004, + "step": 2516 + }, + { + "epoch": 1.7406639004149378, + "grad_norm": 11.333333015441895, + "learning_rate": 4.588520055325034e-05, + "log_odds_chosen": 4.032566547393799, + "log_odds_ratio": -0.5613663196563721, + "logits/chosen": -0.30087190866470337, + "logits/rejected": -0.3377087116241455, + "logps/chosen": -0.09402960538864136, + "logps/rejected": -1.0039438009262085, + "loss": 3.3515, + "nll_loss": 0.7817503213882446, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009402960538864136, + "rewards/margins": 0.09099142253398895, + "rewards/rejected": -0.10039438307285309, + "step": 2517 + }, + { + "epoch": 1.7413554633471646, + "grad_norm": 8.307899475097656, + "learning_rate": 4.58813585369602e-05, + "log_odds_chosen": 5.074389934539795, + "log_odds_ratio": -0.31445711851119995, + "logits/chosen": -0.6631494760513306, + "logits/rejected": -0.6819051504135132, + "logps/chosen": -0.0452355220913887, + "logps/rejected": -0.805597186088562, + "loss": 3.2989, + "nll_loss": 0.7932692766189575, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004523552488535643, + "rewards/margins": 0.07603617012500763, + "rewards/rejected": -0.08055973052978516, + "step": 2518 + }, + { + "epoch": 1.7420470262793915, + "grad_norm": 8.067875862121582, + "learning_rate": 4.587751652067005e-05, + "log_odds_chosen": 5.05952787399292, + "log_odds_ratio": -0.0641229972243309, + "logits/chosen": -0.5828802585601807, + "logits/rejected": -0.6675068140029907, + "logps/chosen": -0.041139908134937286, + "logps/rejected": -0.7485602498054504, + "loss": 3.6923, + "nll_loss": 0.9166650772094727, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004113990347832441, + "rewards/margins": 0.07074204087257385, + "rewards/rejected": -0.07485602796077728, + "step": 2519 + }, + { + "epoch": 1.7427385892116183, + "grad_norm": 7.006168842315674, + "learning_rate": 4.58736745043799e-05, + "log_odds_chosen": 5.627865791320801, + "log_odds_ratio": -0.30353936553001404, + "logits/chosen": -0.42846840620040894, + "logits/rejected": -0.4686059355735779, + "logps/chosen": -0.07304301857948303, + "logps/rejected": -1.1873046159744263, + "loss": 3.7581, + "nll_loss": 0.9091646075248718, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007304301485419273, + "rewards/margins": 0.11142615973949432, + "rewards/rejected": -0.11873047053813934, + "step": 2520 + }, + { + "epoch": 1.7434301521438451, + "grad_norm": 7.399003982543945, + "learning_rate": 4.586983248808975e-05, + "log_odds_chosen": 5.614686012268066, + "log_odds_ratio": -0.025744276121258736, + "logits/chosen": -0.40345659852027893, + "logits/rejected": -0.4784991145133972, + "logps/chosen": -0.033800702542066574, + "logps/rejected": -0.9102199077606201, + "loss": 2.9014, + "nll_loss": 0.7227640151977539, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003380069974809885, + "rewards/margins": 0.08764191716909409, + "rewards/rejected": -0.09102199226617813, + "step": 2521 + }, + { + "epoch": 1.744121715076072, + "grad_norm": 6.353837490081787, + "learning_rate": 4.5865990471799605e-05, + "log_odds_chosen": 5.061413764953613, + "log_odds_ratio": -0.17814365029335022, + "logits/chosen": -0.30245551466941833, + "logits/rejected": -0.31398245692253113, + "logps/chosen": -0.08056647330522537, + "logps/rejected": -0.700476348400116, + "loss": 2.9955, + "nll_loss": 0.7310599684715271, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008056647144258022, + "rewards/margins": 0.061990994960069656, + "rewards/rejected": -0.07004763931035995, + "step": 2522 + }, + { + "epoch": 1.7448132780082988, + "grad_norm": 5.448588848114014, + "learning_rate": 4.586214845550945e-05, + "log_odds_chosen": 4.95775842666626, + "log_odds_ratio": -0.16451671719551086, + "logits/chosen": -0.613614559173584, + "logits/rejected": -0.6826783418655396, + "logps/chosen": -0.0742364451289177, + "logps/rejected": -0.9489127397537231, + "loss": 2.8164, + "nll_loss": 0.6876364946365356, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007423644419759512, + "rewards/margins": 0.08746762573719025, + "rewards/rejected": -0.0948912650346756, + "step": 2523 + }, + { + "epoch": 1.7455048409405256, + "grad_norm": 9.476053237915039, + "learning_rate": 4.58583064392193e-05, + "log_odds_chosen": 3.0854625701904297, + "log_odds_ratio": -0.46415698528289795, + "logits/chosen": -0.7995980978012085, + "logits/rejected": -0.78324955701828, + "logps/chosen": -0.057111553847789764, + "logps/rejected": -0.5348659753799438, + "loss": 3.6078, + "nll_loss": 0.8555430769920349, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005711155943572521, + "rewards/margins": 0.047775447368621826, + "rewards/rejected": -0.053486600518226624, + "step": 2524 + }, + { + "epoch": 1.7461964038727524, + "grad_norm": 6.046580791473389, + "learning_rate": 4.5854464422929156e-05, + "log_odds_chosen": 5.830511569976807, + "log_odds_ratio": -0.14367660880088806, + "logits/chosen": -0.31898587942123413, + "logits/rejected": -0.33586829900741577, + "logps/chosen": -0.07724668085575104, + "logps/rejected": -0.6755509376525879, + "loss": 2.7089, + "nll_loss": 0.6628577709197998, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007724667899310589, + "rewards/margins": 0.059830423444509506, + "rewards/rejected": -0.06755509227514267, + "step": 2525 + }, + { + "epoch": 1.7468879668049793, + "grad_norm": 6.565101146697998, + "learning_rate": 4.5850622406639e-05, + "log_odds_chosen": 5.3831467628479, + "log_odds_ratio": -0.08913850039243698, + "logits/chosen": -0.5596224069595337, + "logits/rejected": -0.6060592532157898, + "logps/chosen": -0.054114218801259995, + "logps/rejected": -0.911675751209259, + "loss": 3.3401, + "nll_loss": 0.8261107802391052, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0054114218801259995, + "rewards/margins": 0.08575616031885147, + "rewards/rejected": -0.09116758406162262, + "step": 2526 + }, + { + "epoch": 1.747579529737206, + "grad_norm": 10.230347633361816, + "learning_rate": 4.584678039034886e-05, + "log_odds_chosen": 5.845252990722656, + "log_odds_ratio": -0.1744435578584671, + "logits/chosen": -0.4496798515319824, + "logits/rejected": -0.5602766275405884, + "logps/chosen": -0.27355867624282837, + "logps/rejected": -1.4243483543395996, + "loss": 2.8279, + "nll_loss": 0.6895267963409424, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.027355868369340897, + "rewards/margins": 0.11507895588874817, + "rewards/rejected": -0.14243483543395996, + "step": 2527 + }, + { + "epoch": 1.748271092669433, + "grad_norm": 7.345275402069092, + "learning_rate": 4.5842938374058706e-05, + "log_odds_chosen": 4.958152770996094, + "log_odds_ratio": -0.09105677157640457, + "logits/chosen": -0.2806224822998047, + "logits/rejected": -0.31392592191696167, + "logps/chosen": -0.014766628853976727, + "logps/rejected": -0.577477753162384, + "loss": 3.9083, + "nll_loss": 0.9679797887802124, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014766629319638014, + "rewards/margins": 0.05627111345529556, + "rewards/rejected": -0.057747773826122284, + "step": 2528 + }, + { + "epoch": 1.7489626556016598, + "grad_norm": 4.975615978240967, + "learning_rate": 4.583909635776856e-05, + "log_odds_chosen": 6.15440559387207, + "log_odds_ratio": -0.06456664204597473, + "logits/chosen": -0.2691290080547333, + "logits/rejected": -0.3199772238731384, + "logps/chosen": -0.040304284542798996, + "logps/rejected": -0.9820000529289246, + "loss": 2.0233, + "nll_loss": 0.4993680417537689, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004030428361147642, + "rewards/margins": 0.09416957199573517, + "rewards/rejected": -0.0982000008225441, + "step": 2529 + }, + { + "epoch": 1.7496542185338866, + "grad_norm": 9.393540382385254, + "learning_rate": 4.583525434147841e-05, + "log_odds_chosen": 1.6259431838989258, + "log_odds_ratio": -0.40542522072792053, + "logits/chosen": -0.5042514801025391, + "logits/rejected": -0.48449617624282837, + "logps/chosen": -0.12315410375595093, + "logps/rejected": -0.5425556302070618, + "loss": 3.9706, + "nll_loss": 0.9521090984344482, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012315411120653152, + "rewards/margins": 0.041940152645111084, + "rewards/rejected": -0.054255563765764236, + "step": 2530 + }, + { + "epoch": 1.7503457814661134, + "grad_norm": 5.123023509979248, + "learning_rate": 4.5831412325188263e-05, + "log_odds_chosen": 3.471602439880371, + "log_odds_ratio": -0.13706140220165253, + "logits/chosen": -0.40993914008140564, + "logits/rejected": -0.44549131393432617, + "logps/chosen": -0.058320820331573486, + "logps/rejected": -0.594856858253479, + "loss": 2.147, + "nll_loss": 0.5230435729026794, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005832082126289606, + "rewards/margins": 0.05365360528230667, + "rewards/rejected": -0.05948568880558014, + "step": 2531 + }, + { + "epoch": 1.7510373443983402, + "grad_norm": 3.4983346462249756, + "learning_rate": 4.582757030889811e-05, + "log_odds_chosen": 5.438455581665039, + "log_odds_ratio": -0.012369800359010696, + "logits/chosen": -0.14914925396442413, + "logits/rejected": -0.17695298790931702, + "logps/chosen": -0.03334447368979454, + "logps/rejected": -0.9888067841529846, + "loss": 2.7649, + "nll_loss": 0.6899773478507996, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0033344475086778402, + "rewards/margins": 0.09554623067378998, + "rewards/rejected": -0.09888067841529846, + "step": 2532 + }, + { + "epoch": 1.751728907330567, + "grad_norm": 6.337129592895508, + "learning_rate": 4.582372829260796e-05, + "log_odds_chosen": 5.5760579109191895, + "log_odds_ratio": -0.06427756696939468, + "logits/chosen": -0.48593372106552124, + "logits/rejected": -0.5026462078094482, + "logps/chosen": -0.06390149146318436, + "logps/rejected": -1.2482969760894775, + "loss": 3.1869, + "nll_loss": 0.7902973890304565, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0063901497051119804, + "rewards/margins": 0.11843954771757126, + "rewards/rejected": -0.12482969462871552, + "step": 2533 + }, + { + "epoch": 1.752420470262794, + "grad_norm": 6.590423107147217, + "learning_rate": 4.5819886276317814e-05, + "log_odds_chosen": 3.642829656600952, + "log_odds_ratio": -0.17655974626541138, + "logits/chosen": -0.39275485277175903, + "logits/rejected": -0.4510759115219116, + "logps/chosen": -0.0891493633389473, + "logps/rejected": -0.8172488212585449, + "loss": 3.1116, + "nll_loss": 0.7602559924125671, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00891493633389473, + "rewards/margins": 0.07280994206666946, + "rewards/rejected": -0.08172488212585449, + "step": 2534 + }, + { + "epoch": 1.7531120331950207, + "grad_norm": 4.0732340812683105, + "learning_rate": 4.581604426002766e-05, + "log_odds_chosen": 4.978409290313721, + "log_odds_ratio": -0.1693851202726364, + "logits/chosen": -0.43896347284317017, + "logits/rejected": -0.5271863341331482, + "logps/chosen": -0.040723707526922226, + "logps/rejected": -0.7022296190261841, + "loss": 2.0599, + "nll_loss": 0.49802640080451965, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00407237047329545, + "rewards/margins": 0.06615059077739716, + "rewards/rejected": -0.07022295892238617, + "step": 2535 + }, + { + "epoch": 1.7538035961272476, + "grad_norm": 6.722652435302734, + "learning_rate": 4.581220224373752e-05, + "log_odds_chosen": 6.762175559997559, + "log_odds_ratio": -0.013489147648215294, + "logits/chosen": -0.6883155703544617, + "logits/rejected": -0.6428356766700745, + "logps/chosen": -0.009356812573969364, + "logps/rejected": -0.9700776934623718, + "loss": 2.998, + "nll_loss": 0.7481532692909241, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009356812224723399, + "rewards/margins": 0.09607208520174026, + "rewards/rejected": -0.09700776636600494, + "step": 2536 + }, + { + "epoch": 1.7544951590594744, + "grad_norm": 4.753267765045166, + "learning_rate": 4.5808360227447365e-05, + "log_odds_chosen": 4.396501064300537, + "log_odds_ratio": -0.0500371977686882, + "logits/chosen": -0.6106992959976196, + "logits/rejected": -0.6349448561668396, + "logps/chosen": -0.04509191960096359, + "logps/rejected": -0.767433226108551, + "loss": 2.9094, + "nll_loss": 0.7223502993583679, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004509191960096359, + "rewards/margins": 0.0722341313958168, + "rewards/rejected": -0.07674331963062286, + "step": 2537 + }, + { + "epoch": 1.7551867219917012, + "grad_norm": 9.169681549072266, + "learning_rate": 4.580451821115722e-05, + "log_odds_chosen": 6.110998630523682, + "log_odds_ratio": -0.03990146890282631, + "logits/chosen": -0.3362918794155121, + "logits/rejected": -0.43705520033836365, + "logps/chosen": -0.014098139479756355, + "logps/rejected": -1.3306076526641846, + "loss": 3.4644, + "nll_loss": 0.8621118068695068, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014098139945417643, + "rewards/margins": 0.13165093958377838, + "rewards/rejected": -0.1330607533454895, + "step": 2538 + }, + { + "epoch": 1.755878284923928, + "grad_norm": 7.079245567321777, + "learning_rate": 4.580067619486707e-05, + "log_odds_chosen": 5.740993499755859, + "log_odds_ratio": -0.05639209598302841, + "logits/chosen": -0.6401171684265137, + "logits/rejected": -0.6803783178329468, + "logps/chosen": -0.026444023475050926, + "logps/rejected": -1.019196629524231, + "loss": 2.425, + "nll_loss": 0.6005994081497192, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00264440244063735, + "rewards/margins": 0.09927526116371155, + "rewards/rejected": -0.10191966593265533, + "step": 2539 + }, + { + "epoch": 1.7565698478561549, + "grad_norm": 9.348685264587402, + "learning_rate": 4.579683417857692e-05, + "log_odds_chosen": 5.047703742980957, + "log_odds_ratio": -0.12630712985992432, + "logits/chosen": -0.5589942336082458, + "logits/rejected": -0.6259468197822571, + "logps/chosen": -0.06140504777431488, + "logps/rejected": -0.9298676252365112, + "loss": 3.1253, + "nll_loss": 0.7687047123908997, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006140504498034716, + "rewards/margins": 0.0868462473154068, + "rewards/rejected": -0.09298676252365112, + "step": 2540 + }, + { + "epoch": 1.7572614107883817, + "grad_norm": 7.808840751647949, + "learning_rate": 4.579299216228677e-05, + "log_odds_chosen": 6.712955474853516, + "log_odds_ratio": -0.028888585045933723, + "logits/chosen": -0.5721656680107117, + "logits/rejected": -0.6266698837280273, + "logps/chosen": -0.01065114140510559, + "logps/rejected": -0.8054331541061401, + "loss": 4.1854, + "nll_loss": 1.043459415435791, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010651140473783016, + "rewards/margins": 0.07947821170091629, + "rewards/rejected": -0.08054331690073013, + "step": 2541 + }, + { + "epoch": 1.7579529737206085, + "grad_norm": 5.14798641204834, + "learning_rate": 4.578915014599662e-05, + "log_odds_chosen": 7.6750383377075195, + "log_odds_ratio": -0.011463024653494358, + "logits/chosen": -0.6050993204116821, + "logits/rejected": -0.5772296786308289, + "logps/chosen": -0.009099138900637627, + "logps/rejected": -0.947097659111023, + "loss": 2.5864, + "nll_loss": 0.6454551219940186, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009099137969315052, + "rewards/margins": 0.09379984438419342, + "rewards/rejected": -0.09470976144075394, + "step": 2542 + }, + { + "epoch": 1.7586445366528354, + "grad_norm": 6.0344367027282715, + "learning_rate": 4.578530812970647e-05, + "log_odds_chosen": 3.80947208404541, + "log_odds_ratio": -0.2249820977449417, + "logits/chosen": -0.905397891998291, + "logits/rejected": -0.8978962898254395, + "logps/chosen": -0.06969407200813293, + "logps/rejected": -0.701169490814209, + "loss": 3.0263, + "nll_loss": 0.7340745329856873, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0069694072008132935, + "rewards/margins": 0.06314754486083984, + "rewards/rejected": -0.07011695206165314, + "step": 2543 + }, + { + "epoch": 1.7593360995850622, + "grad_norm": 8.386651992797852, + "learning_rate": 4.578146611341632e-05, + "log_odds_chosen": 5.489584445953369, + "log_odds_ratio": -0.10720404982566833, + "logits/chosen": -0.622559666633606, + "logits/rejected": -0.6387258172035217, + "logps/chosen": -0.04162842780351639, + "logps/rejected": -0.9262433052062988, + "loss": 3.3995, + "nll_loss": 0.8391590118408203, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004162842407822609, + "rewards/margins": 0.0884614959359169, + "rewards/rejected": -0.09262434393167496, + "step": 2544 + }, + { + "epoch": 1.760027662517289, + "grad_norm": 10.090507507324219, + "learning_rate": 4.577762409712618e-05, + "log_odds_chosen": 4.450800895690918, + "log_odds_ratio": -0.10913616418838501, + "logits/chosen": -0.5928056240081787, + "logits/rejected": -0.5783717632293701, + "logps/chosen": -0.079688660800457, + "logps/rejected": -1.0320922136306763, + "loss": 3.6144, + "nll_loss": 0.8926970362663269, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00796886719763279, + "rewards/margins": 0.09524035453796387, + "rewards/rejected": -0.10320921242237091, + "step": 2545 + }, + { + "epoch": 1.7607192254495159, + "grad_norm": 5.601189613342285, + "learning_rate": 4.577378208083602e-05, + "log_odds_chosen": 6.303149223327637, + "log_odds_ratio": -0.05890952795743942, + "logits/chosen": -0.41871675848960876, + "logits/rejected": -0.41857463121414185, + "logps/chosen": -0.049030303955078125, + "logps/rejected": -1.0410717725753784, + "loss": 3.3528, + "nll_loss": 0.8323108553886414, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004903030581772327, + "rewards/margins": 0.0992041528224945, + "rewards/rejected": -0.10410717874765396, + "step": 2546 + }, + { + "epoch": 1.7614107883817427, + "grad_norm": 5.82822322845459, + "learning_rate": 4.5769940064545875e-05, + "log_odds_chosen": 8.01188850402832, + "log_odds_ratio": -0.0016340998699888587, + "logits/chosen": -0.23192644119262695, + "logits/rejected": -0.22401806712150574, + "logps/chosen": -0.002971423789858818, + "logps/rejected": -1.180034875869751, + "loss": 2.8009, + "nll_loss": 0.7000521421432495, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002971423964481801, + "rewards/margins": 0.11770634353160858, + "rewards/rejected": -0.1180034801363945, + "step": 2547 + }, + { + "epoch": 1.7621023513139695, + "grad_norm": 4.945810794830322, + "learning_rate": 4.576609804825573e-05, + "log_odds_chosen": 4.382489204406738, + "log_odds_ratio": -0.14788955450057983, + "logits/chosen": -0.3996325731277466, + "logits/rejected": -0.4017907381057739, + "logps/chosen": -0.04582387953996658, + "logps/rejected": -0.8064547181129456, + "loss": 2.4554, + "nll_loss": 0.5990639328956604, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004582387860864401, + "rewards/margins": 0.07606308907270432, + "rewards/rejected": -0.08064547926187515, + "step": 2548 + }, + { + "epoch": 1.7627939142461964, + "grad_norm": 3.50301456451416, + "learning_rate": 4.576225603196558e-05, + "log_odds_chosen": 5.088458061218262, + "log_odds_ratio": -0.2111215889453888, + "logits/chosen": -0.3392675518989563, + "logits/rejected": -0.3433917164802551, + "logps/chosen": -0.06524307280778885, + "logps/rejected": -0.6364519596099854, + "loss": 1.7787, + "nll_loss": 0.42355847358703613, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006524307653307915, + "rewards/margins": 0.05712088569998741, + "rewards/rejected": -0.06364519149065018, + "step": 2549 + }, + { + "epoch": 1.7634854771784232, + "grad_norm": 8.223228454589844, + "learning_rate": 4.5758414015675426e-05, + "log_odds_chosen": 3.8364288806915283, + "log_odds_ratio": -0.1705276072025299, + "logits/chosen": -0.4964444637298584, + "logits/rejected": -0.5628746151924133, + "logps/chosen": -0.05881989747285843, + "logps/rejected": -0.6852390766143799, + "loss": 1.9634, + "nll_loss": 0.47379088401794434, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0058819898404181, + "rewards/margins": 0.06264191120862961, + "rewards/rejected": -0.06852389872074127, + "step": 2550 + }, + { + "epoch": 1.76417704011065, + "grad_norm": 5.2325215339660645, + "learning_rate": 4.575457199938528e-05, + "log_odds_chosen": 7.47840690612793, + "log_odds_ratio": -0.005898052826523781, + "logits/chosen": -0.521270215511322, + "logits/rejected": -0.58015376329422, + "logps/chosen": -0.002618763130158186, + "logps/rejected": -0.877556324005127, + "loss": 2.8498, + "nll_loss": 0.7118585109710693, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002618762955535203, + "rewards/margins": 0.08749376237392426, + "rewards/rejected": -0.08775563538074493, + "step": 2551 + }, + { + "epoch": 1.7648686030428768, + "grad_norm": 8.80147933959961, + "learning_rate": 4.575072998309513e-05, + "log_odds_chosen": 4.112476348876953, + "log_odds_ratio": -0.14841890335083008, + "logits/chosen": -0.16200856864452362, + "logits/rejected": -0.18064048886299133, + "logps/chosen": -0.10202540457248688, + "logps/rejected": -1.109443187713623, + "loss": 3.2059, + "nll_loss": 0.7866219282150269, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010202541016042233, + "rewards/margins": 0.10074177384376526, + "rewards/rejected": -0.11094431579113007, + "step": 2552 + }, + { + "epoch": 1.7655601659751037, + "grad_norm": 8.807273864746094, + "learning_rate": 4.5746887966804977e-05, + "log_odds_chosen": 4.388718605041504, + "log_odds_ratio": -0.15505658090114594, + "logits/chosen": -0.5256962776184082, + "logits/rejected": -0.5840314030647278, + "logps/chosen": -0.056036874651908875, + "logps/rejected": -1.0513105392456055, + "loss": 3.4775, + "nll_loss": 0.8538665771484375, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00560368737205863, + "rewards/margins": 0.09952735900878906, + "rewards/rejected": -0.10513104498386383, + "step": 2553 + }, + { + "epoch": 1.7662517289073305, + "grad_norm": 8.24011516571045, + "learning_rate": 4.5743045950514836e-05, + "log_odds_chosen": 4.820762634277344, + "log_odds_ratio": -0.1034751906991005, + "logits/chosen": -0.4720611870288849, + "logits/rejected": -0.4762382209300995, + "logps/chosen": -0.043905775994062424, + "logps/rejected": -0.965567946434021, + "loss": 3.8933, + "nll_loss": 0.9629859328269958, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004390577785670757, + "rewards/margins": 0.0921662226319313, + "rewards/rejected": -0.09655679017305374, + "step": 2554 + }, + { + "epoch": 1.7669432918395573, + "grad_norm": 7.611776351928711, + "learning_rate": 4.573920393422468e-05, + "log_odds_chosen": 3.958975315093994, + "log_odds_ratio": -0.13509047031402588, + "logits/chosen": -0.35202181339263916, + "logits/rejected": -0.38554438948631287, + "logps/chosen": -0.06110473349690437, + "logps/rejected": -0.9542187452316284, + "loss": 2.7015, + "nll_loss": 0.6618557572364807, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006110473535954952, + "rewards/margins": 0.0893113911151886, + "rewards/rejected": -0.09542188048362732, + "step": 2555 + }, + { + "epoch": 1.7676348547717842, + "grad_norm": 8.181086540222168, + "learning_rate": 4.5735361917934534e-05, + "log_odds_chosen": 3.602811813354492, + "log_odds_ratio": -0.40293049812316895, + "logits/chosen": -0.6601470112800598, + "logits/rejected": -0.6676779389381409, + "logps/chosen": -0.11564220488071442, + "logps/rejected": -1.2787232398986816, + "loss": 3.4561, + "nll_loss": 0.8237354755401611, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011564221233129501, + "rewards/margins": 0.11630810797214508, + "rewards/rejected": -0.12787233293056488, + "step": 2556 + }, + { + "epoch": 1.768326417704011, + "grad_norm": 8.936256408691406, + "learning_rate": 4.5731519901644386e-05, + "log_odds_chosen": 2.3760743141174316, + "log_odds_ratio": -0.2754179537296295, + "logits/chosen": -0.9153653979301453, + "logits/rejected": -0.9587001800537109, + "logps/chosen": -0.16239596903324127, + "logps/rejected": -0.5563870072364807, + "loss": 4.001, + "nll_loss": 0.9727064371109009, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.016239596530795097, + "rewards/margins": 0.039399102330207825, + "rewards/rejected": -0.05563870072364807, + "step": 2557 + }, + { + "epoch": 1.7690179806362378, + "grad_norm": 8.981245994567871, + "learning_rate": 4.572767788535424e-05, + "log_odds_chosen": 3.714677095413208, + "log_odds_ratio": -0.2708074450492859, + "logits/chosen": -0.3442336618900299, + "logits/rejected": -0.4027746021747589, + "logps/chosen": -0.065810926258564, + "logps/rejected": -0.5971598029136658, + "loss": 3.5891, + "nll_loss": 0.8701846599578857, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006581092718988657, + "rewards/margins": 0.05313488841056824, + "rewards/rejected": -0.059715982526540756, + "step": 2558 + }, + { + "epoch": 1.7697095435684647, + "grad_norm": 6.440550327301025, + "learning_rate": 4.5723835869064084e-05, + "log_odds_chosen": 4.327930450439453, + "log_odds_ratio": -0.08816304057836533, + "logits/chosen": -0.5006344318389893, + "logits/rejected": -0.5131029486656189, + "logps/chosen": -0.06322955340147018, + "logps/rejected": -0.884222149848938, + "loss": 3.8265, + "nll_loss": 0.9477996826171875, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006322954781353474, + "rewards/margins": 0.08209925144910812, + "rewards/rejected": -0.08842220902442932, + "step": 2559 + }, + { + "epoch": 1.7704011065006915, + "grad_norm": 13.864747047424316, + "learning_rate": 4.571999385277394e-05, + "log_odds_chosen": 4.08857536315918, + "log_odds_ratio": -0.33679530024528503, + "logits/chosen": -0.5055022239685059, + "logits/rejected": -0.5199083089828491, + "logps/chosen": -0.045048221945762634, + "logps/rejected": -0.4534846246242523, + "loss": 3.9027, + "nll_loss": 0.9419905543327332, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.004504822660237551, + "rewards/margins": 0.04084364324808121, + "rewards/rejected": -0.04534846544265747, + "step": 2560 + }, + { + "epoch": 1.7710926694329183, + "grad_norm": 8.000943183898926, + "learning_rate": 4.571615183648379e-05, + "log_odds_chosen": 3.79542875289917, + "log_odds_ratio": -0.6328214406967163, + "logits/chosen": -0.49562186002731323, + "logits/rejected": -0.5720283389091492, + "logps/chosen": -0.22840096056461334, + "logps/rejected": -0.8777717351913452, + "loss": 2.6406, + "nll_loss": 0.5968554019927979, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.022840095683932304, + "rewards/margins": 0.06493708491325378, + "rewards/rejected": -0.08777718245983124, + "step": 2561 + }, + { + "epoch": 1.7717842323651452, + "grad_norm": 4.928500175476074, + "learning_rate": 4.571230982019364e-05, + "log_odds_chosen": 6.205010890960693, + "log_odds_ratio": -0.03770516440272331, + "logits/chosen": -0.5165535807609558, + "logits/rejected": -0.5722185969352722, + "logps/chosen": -0.04437845200300217, + "logps/rejected": -1.2871813774108887, + "loss": 2.2019, + "nll_loss": 0.5466949343681335, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004437845200300217, + "rewards/margins": 0.12428028881549835, + "rewards/rejected": -0.12871812283992767, + "step": 2562 + }, + { + "epoch": 1.772475795297372, + "grad_norm": 6.937126159667969, + "learning_rate": 4.5708467803903494e-05, + "log_odds_chosen": 5.473960876464844, + "log_odds_ratio": -0.07916474342346191, + "logits/chosen": -0.4659070372581482, + "logits/rejected": -0.5119041800498962, + "logps/chosen": -0.01686052419245243, + "logps/rejected": -0.6619855165481567, + "loss": 3.0094, + "nll_loss": 0.7444390654563904, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001686052419245243, + "rewards/margins": 0.06451250612735748, + "rewards/rejected": -0.06619855761528015, + "step": 2563 + }, + { + "epoch": 1.7731673582295988, + "grad_norm": 5.78624963760376, + "learning_rate": 4.570462578761334e-05, + "log_odds_chosen": 5.182653903961182, + "log_odds_ratio": -0.04307662695646286, + "logits/chosen": -0.3608715832233429, + "logits/rejected": -0.4103313386440277, + "logps/chosen": -0.05554118752479553, + "logps/rejected": -1.0058197975158691, + "loss": 2.8295, + "nll_loss": 0.7030580043792725, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005554119125008583, + "rewards/margins": 0.0950278639793396, + "rewards/rejected": -0.10058198869228363, + "step": 2564 + }, + { + "epoch": 1.7738589211618256, + "grad_norm": 6.86865234375, + "learning_rate": 4.570078377132319e-05, + "log_odds_chosen": 6.726229667663574, + "log_odds_ratio": -0.0850033387541771, + "logits/chosen": -0.3425213694572449, + "logits/rejected": -0.43949681520462036, + "logps/chosen": -0.03279908001422882, + "logps/rejected": -1.065782070159912, + "loss": 2.7858, + "nll_loss": 0.6879481077194214, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003279907861724496, + "rewards/margins": 0.10329829156398773, + "rewards/rejected": -0.10657820105552673, + "step": 2565 + }, + { + "epoch": 1.7745504840940525, + "grad_norm": 9.900083541870117, + "learning_rate": 4.5696941755033045e-05, + "log_odds_chosen": 3.831148147583008, + "log_odds_ratio": -0.3260344862937927, + "logits/chosen": -0.5216836333274841, + "logits/rejected": -0.5302917957305908, + "logps/chosen": -0.05193112790584564, + "logps/rejected": -0.44250690937042236, + "loss": 4.0738, + "nll_loss": 0.9858537912368774, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005193112883716822, + "rewards/margins": 0.03905757516622543, + "rewards/rejected": -0.04425068944692612, + "step": 2566 + }, + { + "epoch": 1.7752420470262793, + "grad_norm": 10.836536407470703, + "learning_rate": 4.56930997387429e-05, + "log_odds_chosen": 5.086477279663086, + "log_odds_ratio": -0.22728855907917023, + "logits/chosen": -0.6295913457870483, + "logits/rejected": -0.681605339050293, + "logps/chosen": -0.05664917454123497, + "logps/rejected": -1.0559172630310059, + "loss": 3.795, + "nll_loss": 0.9260324835777283, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005664917174726725, + "rewards/margins": 0.09992681443691254, + "rewards/rejected": -0.10559172928333282, + "step": 2567 + }, + { + "epoch": 1.7759336099585061, + "grad_norm": 11.188941955566406, + "learning_rate": 4.568925772245274e-05, + "log_odds_chosen": 5.1092705726623535, + "log_odds_ratio": -0.08259230107069016, + "logits/chosen": -0.6408547759056091, + "logits/rejected": -0.7252624034881592, + "logps/chosen": -0.01996024139225483, + "logps/rejected": -0.7125002145767212, + "loss": 4.0462, + "nll_loss": 1.0032902956008911, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019960240460932255, + "rewards/margins": 0.06925399601459503, + "rewards/rejected": -0.07125002145767212, + "step": 2568 + }, + { + "epoch": 1.776625172890733, + "grad_norm": 11.991272926330566, + "learning_rate": 4.56854157061626e-05, + "log_odds_chosen": 4.901045322418213, + "log_odds_ratio": -0.9436802268028259, + "logits/chosen": -0.5167893767356873, + "logits/rejected": -0.5799105763435364, + "logps/chosen": -0.11814716458320618, + "logps/rejected": -0.9997060298919678, + "loss": 3.4344, + "nll_loss": 0.7642271518707275, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011814717203378677, + "rewards/margins": 0.08815588057041168, + "rewards/rejected": -0.09997060894966125, + "step": 2569 + }, + { + "epoch": 1.7773167358229598, + "grad_norm": 7.425483226776123, + "learning_rate": 4.568157368987245e-05, + "log_odds_chosen": 4.753199577331543, + "log_odds_ratio": -0.31641826033592224, + "logits/chosen": -0.5371108651161194, + "logits/rejected": -0.6080737113952637, + "logps/chosen": -0.045531876385211945, + "logps/rejected": -0.6791171431541443, + "loss": 3.0047, + "nll_loss": 0.7195222973823547, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0045531876385211945, + "rewards/margins": 0.06335853040218353, + "rewards/rejected": -0.06791172176599503, + "step": 2570 + }, + { + "epoch": 1.7780082987551866, + "grad_norm": 6.604394435882568, + "learning_rate": 4.56777316735823e-05, + "log_odds_chosen": 6.296212196350098, + "log_odds_ratio": -0.0063947951421141624, + "logits/chosen": -0.5465483665466309, + "logits/rejected": -0.5125059485435486, + "logps/chosen": -0.012749740853905678, + "logps/rejected": -1.0247169733047485, + "loss": 2.7648, + "nll_loss": 0.6905537843704224, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001274973968975246, + "rewards/margins": 0.10119672119617462, + "rewards/rejected": -0.10247169435024261, + "step": 2571 + }, + { + "epoch": 1.7786998616874135, + "grad_norm": 11.984283447265625, + "learning_rate": 4.567388965729215e-05, + "log_odds_chosen": 2.5593433380126953, + "log_odds_ratio": -0.5915682911872864, + "logits/chosen": -0.699419379234314, + "logits/rejected": -0.7382344603538513, + "logps/chosen": -0.18622469902038574, + "logps/rejected": -0.6699910759925842, + "loss": 3.5449, + "nll_loss": 0.8270754218101501, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.018622469156980515, + "rewards/margins": 0.04837663844227791, + "rewards/rejected": -0.06699910759925842, + "step": 2572 + }, + { + "epoch": 1.7793914246196403, + "grad_norm": 6.196664810180664, + "learning_rate": 4.5670047641002e-05, + "log_odds_chosen": 5.564531326293945, + "log_odds_ratio": -0.047365687787532806, + "logits/chosen": -0.6117345094680786, + "logits/rejected": -0.6705157160758972, + "logps/chosen": -0.03350626677274704, + "logps/rejected": -0.5655863881111145, + "loss": 2.5519, + "nll_loss": 0.6332501173019409, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0033506269101053476, + "rewards/margins": 0.053208015859127045, + "rewards/rejected": -0.05655864253640175, + "step": 2573 + }, + { + "epoch": 1.7800829875518671, + "grad_norm": 6.181553840637207, + "learning_rate": 4.566620562471185e-05, + "log_odds_chosen": 3.522923469543457, + "log_odds_ratio": -0.24771559238433838, + "logits/chosen": -0.7219829559326172, + "logits/rejected": -0.7598026990890503, + "logps/chosen": -0.10694906860589981, + "logps/rejected": -0.6799654364585876, + "loss": 3.5333, + "nll_loss": 0.8585590720176697, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010694906115531921, + "rewards/margins": 0.05730164051055908, + "rewards/rejected": -0.067996546626091, + "step": 2574 + }, + { + "epoch": 1.780774550484094, + "grad_norm": 5.776702880859375, + "learning_rate": 4.56623636084217e-05, + "log_odds_chosen": 5.801150321960449, + "log_odds_ratio": -0.13522619009017944, + "logits/chosen": -0.7149621248245239, + "logits/rejected": -0.7345128655433655, + "logps/chosen": -0.04546257480978966, + "logps/rejected": -0.755382776260376, + "loss": 1.9775, + "nll_loss": 0.4808577001094818, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004546257667243481, + "rewards/margins": 0.07099202275276184, + "rewards/rejected": -0.0755382776260376, + "step": 2575 + }, + { + "epoch": 1.7814661134163208, + "grad_norm": 6.1580095291137695, + "learning_rate": 4.5658521592131556e-05, + "log_odds_chosen": 4.461126327514648, + "log_odds_ratio": -0.05306124687194824, + "logits/chosen": -0.3161161243915558, + "logits/rejected": -0.3657586872577667, + "logps/chosen": -0.07128717005252838, + "logps/rejected": -1.515031099319458, + "loss": 2.9739, + "nll_loss": 0.7381733059883118, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007128716912120581, + "rewards/margins": 0.14437440037727356, + "rewards/rejected": -0.15150313079357147, + "step": 2576 + }, + { + "epoch": 1.7821576763485476, + "grad_norm": 12.750073432922363, + "learning_rate": 4.56546795758414e-05, + "log_odds_chosen": 3.2818448543548584, + "log_odds_ratio": -1.0118776559829712, + "logits/chosen": -0.3662753105163574, + "logits/rejected": -0.402810662984848, + "logps/chosen": -0.09948496520519257, + "logps/rejected": -0.7612845301628113, + "loss": 4.6259, + "nll_loss": 1.0552799701690674, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.009948497638106346, + "rewards/margins": 0.06617995351552963, + "rewards/rejected": -0.07612845301628113, + "step": 2577 + }, + { + "epoch": 1.7828492392807744, + "grad_norm": 9.642328262329102, + "learning_rate": 4.565083755955126e-05, + "log_odds_chosen": 3.5306921005249023, + "log_odds_ratio": -0.5706806182861328, + "logits/chosen": -0.44251155853271484, + "logits/rejected": -0.45110124349594116, + "logps/chosen": -0.10406813770532608, + "logps/rejected": -0.5283291339874268, + "loss": 3.3329, + "nll_loss": 0.7761471271514893, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.010406811721622944, + "rewards/margins": 0.04242610186338425, + "rewards/rejected": -0.052832912653684616, + "step": 2578 + }, + { + "epoch": 1.7835408022130013, + "grad_norm": 6.969174861907959, + "learning_rate": 4.5646995543261106e-05, + "log_odds_chosen": 6.242532730102539, + "log_odds_ratio": -0.018553482368588448, + "logits/chosen": -0.6219586133956909, + "logits/rejected": -0.6382424235343933, + "logps/chosen": -0.01259948592633009, + "logps/rejected": -0.997509241104126, + "loss": 3.8471, + "nll_loss": 0.9599129557609558, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012599484762176871, + "rewards/margins": 0.09849098324775696, + "rewards/rejected": -0.09975093603134155, + "step": 2579 + }, + { + "epoch": 1.784232365145228, + "grad_norm": 7.377046585083008, + "learning_rate": 4.564315352697096e-05, + "log_odds_chosen": 3.4929206371307373, + "log_odds_ratio": -0.19127880036830902, + "logits/chosen": -0.5476193428039551, + "logits/rejected": -0.6145222187042236, + "logps/chosen": -0.03862868994474411, + "logps/rejected": -0.6367706656455994, + "loss": 3.1703, + "nll_loss": 0.7734379768371582, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0038628692273050547, + "rewards/margins": 0.0598142072558403, + "rewards/rejected": -0.06367707252502441, + "step": 2580 + }, + { + "epoch": 1.784923928077455, + "grad_norm": 7.009395599365234, + "learning_rate": 4.563931151068081e-05, + "log_odds_chosen": 4.45806884765625, + "log_odds_ratio": -0.17331717908382416, + "logits/chosen": -0.2205784022808075, + "logits/rejected": -0.26497140526771545, + "logps/chosen": -0.06806333363056183, + "logps/rejected": -0.7032272815704346, + "loss": 2.761, + "nll_loss": 0.6729127168655396, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0068063330836594105, + "rewards/margins": 0.06351640075445175, + "rewards/rejected": -0.07032272964715958, + "step": 2581 + }, + { + "epoch": 1.7856154910096818, + "grad_norm": 8.358550071716309, + "learning_rate": 4.563546949439066e-05, + "log_odds_chosen": 5.872114181518555, + "log_odds_ratio": -0.12831641733646393, + "logits/chosen": -0.7278775572776794, + "logits/rejected": -0.754865288734436, + "logps/chosen": -0.017764536663889885, + "logps/rejected": -0.8691182136535645, + "loss": 2.4239, + "nll_loss": 0.5931454300880432, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0017764536896720529, + "rewards/margins": 0.08513537049293518, + "rewards/rejected": -0.08691181242465973, + "step": 2582 + }, + { + "epoch": 1.7863070539419086, + "grad_norm": 6.859455585479736, + "learning_rate": 4.563162747810051e-05, + "log_odds_chosen": 7.026611804962158, + "log_odds_ratio": -0.007522970903664827, + "logits/chosen": -0.47302526235580444, + "logits/rejected": -0.5076656341552734, + "logps/chosen": -0.003296129172667861, + "logps/rejected": -1.0726574659347534, + "loss": 2.5881, + "nll_loss": 0.6462792158126831, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003296128998044878, + "rewards/margins": 0.10693613439798355, + "rewards/rejected": -0.10726574808359146, + "step": 2583 + }, + { + "epoch": 1.7869986168741354, + "grad_norm": 8.412336349487305, + "learning_rate": 4.562778546181036e-05, + "log_odds_chosen": 3.1393537521362305, + "log_odds_ratio": -0.3619435727596283, + "logits/chosen": -0.24041230976581573, + "logits/rejected": -0.2845999002456665, + "logps/chosen": -0.09177468717098236, + "logps/rejected": -0.8623040914535522, + "loss": 3.0168, + "nll_loss": 0.7179951071739197, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009177468717098236, + "rewards/margins": 0.07705294340848923, + "rewards/rejected": -0.08623041212558746, + "step": 2584 + }, + { + "epoch": 1.7876901798063622, + "grad_norm": 7.934391498565674, + "learning_rate": 4.5623943445520214e-05, + "log_odds_chosen": 5.535144805908203, + "log_odds_ratio": -0.0946061760187149, + "logits/chosen": -0.6780644655227661, + "logits/rejected": -0.7226251363754272, + "logps/chosen": -0.05594250559806824, + "logps/rejected": -0.9195252656936646, + "loss": 3.5998, + "nll_loss": 0.8904974460601807, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005594250746071339, + "rewards/margins": 0.08635827153921127, + "rewards/rejected": -0.09195252507925034, + "step": 2585 + }, + { + "epoch": 1.788381742738589, + "grad_norm": 9.528312683105469, + "learning_rate": 4.562010142923006e-05, + "log_odds_chosen": 4.408294677734375, + "log_odds_ratio": -0.3562813699245453, + "logits/chosen": -0.6378925442695618, + "logits/rejected": -0.6505054235458374, + "logps/chosen": -0.054091863334178925, + "logps/rejected": -0.7761125564575195, + "loss": 3.5691, + "nll_loss": 0.856635332107544, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005409186240285635, + "rewards/margins": 0.07220207899808884, + "rewards/rejected": -0.07761126756668091, + "step": 2586 + }, + { + "epoch": 1.789073305670816, + "grad_norm": 5.693426609039307, + "learning_rate": 4.561625941293992e-05, + "log_odds_chosen": 4.144665718078613, + "log_odds_ratio": -0.28512609004974365, + "logits/chosen": -0.5980684161186218, + "logits/rejected": -0.5759909152984619, + "logps/chosen": -0.08227177709341049, + "logps/rejected": -0.6942697763442993, + "loss": 3.113, + "nll_loss": 0.7497309446334839, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008227178826928139, + "rewards/margins": 0.06119980663061142, + "rewards/rejected": -0.06942698359489441, + "step": 2587 + }, + { + "epoch": 1.7897648686030427, + "grad_norm": 6.498386859893799, + "learning_rate": 4.5612417396649765e-05, + "log_odds_chosen": 7.378063678741455, + "log_odds_ratio": -0.007801724597811699, + "logits/chosen": -0.24729809165000916, + "logits/rejected": -0.284311443567276, + "logps/chosen": -0.01493473257869482, + "logps/rejected": -1.2252931594848633, + "loss": 3.0915, + "nll_loss": 0.7720844149589539, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001493473188020289, + "rewards/margins": 0.12103584408760071, + "rewards/rejected": -0.12252932041883469, + "step": 2588 + }, + { + "epoch": 1.7904564315352696, + "grad_norm": 6.341252326965332, + "learning_rate": 4.560857538035962e-05, + "log_odds_chosen": 2.9661362171173096, + "log_odds_ratio": -0.21067170798778534, + "logits/chosen": -0.1589818298816681, + "logits/rejected": -0.20216339826583862, + "logps/chosen": -0.10224173963069916, + "logps/rejected": -0.7577835321426392, + "loss": 2.759, + "nll_loss": 0.6686722040176392, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010224174708127975, + "rewards/margins": 0.065554179251194, + "rewards/rejected": -0.07577835023403168, + "step": 2589 + }, + { + "epoch": 1.7911479944674964, + "grad_norm": 8.582254409790039, + "learning_rate": 4.560473336406947e-05, + "log_odds_chosen": 5.644009590148926, + "log_odds_ratio": -0.09102729707956314, + "logits/chosen": -0.6598323583602905, + "logits/rejected": -0.6340920329093933, + "logps/chosen": -0.03555937111377716, + "logps/rejected": -1.0442126989364624, + "loss": 3.2956, + "nll_loss": 0.8148072361946106, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0035559372045099735, + "rewards/margins": 0.10086533427238464, + "rewards/rejected": -0.10442127287387848, + "step": 2590 + }, + { + "epoch": 1.7918395573997232, + "grad_norm": 5.829075813293457, + "learning_rate": 4.5600891347779315e-05, + "log_odds_chosen": 5.671568393707275, + "log_odds_ratio": -0.032739464193582535, + "logits/chosen": -0.697761595249176, + "logits/rejected": -0.686554491519928, + "logps/chosen": -0.02588575892150402, + "logps/rejected": -0.8335800170898438, + "loss": 2.9027, + "nll_loss": 0.7223943471908569, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0025885761715471745, + "rewards/margins": 0.08076941967010498, + "rewards/rejected": -0.08335800468921661, + "step": 2591 + }, + { + "epoch": 1.79253112033195, + "grad_norm": 9.570256233215332, + "learning_rate": 4.559704933148917e-05, + "log_odds_chosen": 4.774772644042969, + "log_odds_ratio": -0.18960583209991455, + "logits/chosen": -0.5086647868156433, + "logits/rejected": -0.5806989669799805, + "logps/chosen": -0.08618862181901932, + "logps/rejected": -0.7194696068763733, + "loss": 3.5935, + "nll_loss": 0.8794207572937012, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008618861436843872, + "rewards/margins": 0.0633281022310257, + "rewards/rejected": -0.07194696366786957, + "step": 2592 + }, + { + "epoch": 1.7932226832641769, + "grad_norm": 8.571864128112793, + "learning_rate": 4.559320731519902e-05, + "log_odds_chosen": 5.25043249130249, + "log_odds_ratio": -0.21316511929035187, + "logits/chosen": -0.5006336569786072, + "logits/rejected": -0.5622016787528992, + "logps/chosen": -0.08730829507112503, + "logps/rejected": -0.9564471244812012, + "loss": 3.3834, + "nll_loss": 0.8245400190353394, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008730829693377018, + "rewards/margins": 0.08691388368606567, + "rewards/rejected": -0.09564471244812012, + "step": 2593 + }, + { + "epoch": 1.7939142461964037, + "grad_norm": 10.458921432495117, + "learning_rate": 4.558936529890887e-05, + "log_odds_chosen": 3.889603614807129, + "log_odds_ratio": -0.3547723889350891, + "logits/chosen": -0.38273003697395325, + "logits/rejected": -0.37535360455513, + "logps/chosen": -0.08629313856363297, + "logps/rejected": -0.7442159056663513, + "loss": 2.6653, + "nll_loss": 0.6308448910713196, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008629313670098782, + "rewards/margins": 0.0657922774553299, + "rewards/rejected": -0.07442159950733185, + "step": 2594 + }, + { + "epoch": 1.7946058091286305, + "grad_norm": 6.538029670715332, + "learning_rate": 4.558552328261872e-05, + "log_odds_chosen": 3.9426791667938232, + "log_odds_ratio": -0.1984841227531433, + "logits/chosen": -0.4637698531150818, + "logits/rejected": -0.4950888752937317, + "logps/chosen": -0.05847723037004471, + "logps/rejected": -0.655007004737854, + "loss": 3.5605, + "nll_loss": 0.8702831864356995, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005847723223268986, + "rewards/margins": 0.05965298414230347, + "rewards/rejected": -0.06550070643424988, + "step": 2595 + }, + { + "epoch": 1.7952973720608574, + "grad_norm": 10.25632095336914, + "learning_rate": 4.558168126632858e-05, + "log_odds_chosen": 3.3027095794677734, + "log_odds_ratio": -0.3819888234138489, + "logits/chosen": -0.5645632743835449, + "logits/rejected": -0.6149783134460449, + "logps/chosen": -0.1231047660112381, + "logps/rejected": -0.7725575566291809, + "loss": 4.4363, + "nll_loss": 1.0708774328231812, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01231047697365284, + "rewards/margins": 0.0649452805519104, + "rewards/rejected": -0.07725575566291809, + "step": 2596 + }, + { + "epoch": 1.7959889349930842, + "grad_norm": 7.665077209472656, + "learning_rate": 4.557783925003842e-05, + "log_odds_chosen": 4.87660026550293, + "log_odds_ratio": -0.040577709674835205, + "logits/chosen": -0.4291841387748718, + "logits/rejected": -0.47871482372283936, + "logps/chosen": -0.02387285605072975, + "logps/rejected": -0.7996243238449097, + "loss": 3.0375, + "nll_loss": 0.7553067207336426, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0023872856982052326, + "rewards/margins": 0.07757514715194702, + "rewards/rejected": -0.07996243238449097, + "step": 2597 + }, + { + "epoch": 1.796680497925311, + "grad_norm": 5.8194193840026855, + "learning_rate": 4.5573997233748275e-05, + "log_odds_chosen": 5.245506286621094, + "log_odds_ratio": -0.1459067016839981, + "logits/chosen": -0.34469327330589294, + "logits/rejected": -0.39062270522117615, + "logps/chosen": -0.033551059663295746, + "logps/rejected": -0.7837735414505005, + "loss": 2.7199, + "nll_loss": 0.6653863787651062, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0033551061060279608, + "rewards/margins": 0.07502225041389465, + "rewards/rejected": -0.07837735116481781, + "step": 2598 + }, + { + "epoch": 1.797372060857538, + "grad_norm": 6.646912097930908, + "learning_rate": 4.557015521745812e-05, + "log_odds_chosen": 3.947014093399048, + "log_odds_ratio": -0.13185060024261475, + "logits/chosen": -0.5804282426834106, + "logits/rejected": -0.6859478950500488, + "logps/chosen": -0.0472014881670475, + "logps/rejected": -0.7761104106903076, + "loss": 3.5381, + "nll_loss": 0.8713419437408447, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0047201490961015224, + "rewards/margins": 0.07289090007543564, + "rewards/rejected": -0.077611044049263, + "step": 2599 + }, + { + "epoch": 1.798063623789765, + "grad_norm": 7.322403907775879, + "learning_rate": 4.5566313201167974e-05, + "log_odds_chosen": 4.652568340301514, + "log_odds_ratio": -0.37490737438201904, + "logits/chosen": -0.12980686128139496, + "logits/rejected": -0.11926015466451645, + "logps/chosen": -0.07453082501888275, + "logps/rejected": -0.7865791320800781, + "loss": 3.4297, + "nll_loss": 0.8199406862258911, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.00745308306068182, + "rewards/margins": 0.07120483368635178, + "rewards/rejected": -0.07865791022777557, + "step": 2600 + }, + { + "epoch": 1.7987551867219918, + "grad_norm": 5.136075496673584, + "learning_rate": 4.5562471184877826e-05, + "log_odds_chosen": 6.08044958114624, + "log_odds_ratio": -0.036412280052900314, + "logits/chosen": -0.3810223937034607, + "logits/rejected": -0.3522016108036041, + "logps/chosen": -0.01769629679620266, + "logps/rejected": -0.8769605159759521, + "loss": 2.1277, + "nll_loss": 0.5282766222953796, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017696297727525234, + "rewards/margins": 0.08592642843723297, + "rewards/rejected": -0.08769606053829193, + "step": 2601 + }, + { + "epoch": 1.7994467496542186, + "grad_norm": 8.091934204101562, + "learning_rate": 4.555862916858767e-05, + "log_odds_chosen": 3.523444652557373, + "log_odds_ratio": -0.23734408617019653, + "logits/chosen": -0.45225799083709717, + "logits/rejected": -0.5152193307876587, + "logps/chosen": -0.08465392887592316, + "logps/rejected": -0.6641150712966919, + "loss": 3.319, + "nll_loss": 0.8060159087181091, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008465392515063286, + "rewards/margins": 0.05794611573219299, + "rewards/rejected": -0.06641151010990143, + "step": 2602 + }, + { + "epoch": 1.8001383125864454, + "grad_norm": 6.36898946762085, + "learning_rate": 4.555478715229753e-05, + "log_odds_chosen": 5.446345329284668, + "log_odds_ratio": -0.07899149507284164, + "logits/chosen": -0.2378370761871338, + "logits/rejected": -0.2884705662727356, + "logps/chosen": -0.02247701585292816, + "logps/rejected": -0.6792370080947876, + "loss": 2.2205, + "nll_loss": 0.5472333431243896, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0022477013990283012, + "rewards/margins": 0.0656760036945343, + "rewards/rejected": -0.06792370229959488, + "step": 2603 + }, + { + "epoch": 1.8008298755186722, + "grad_norm": 9.214363098144531, + "learning_rate": 4.5550945136007377e-05, + "log_odds_chosen": 4.689992904663086, + "log_odds_ratio": -0.08061084896326065, + "logits/chosen": -0.5148239731788635, + "logits/rejected": -0.6048566102981567, + "logps/chosen": -0.03498592600226402, + "logps/rejected": -0.7926340103149414, + "loss": 3.5354, + "nll_loss": 0.875788688659668, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0034985924139618874, + "rewards/margins": 0.07576481252908707, + "rewards/rejected": -0.07926340401172638, + "step": 2604 + }, + { + "epoch": 1.801521438450899, + "grad_norm": 8.10094928741455, + "learning_rate": 4.554710311971723e-05, + "log_odds_chosen": 5.41352653503418, + "log_odds_ratio": -0.05584706366062164, + "logits/chosen": -0.5215616226196289, + "logits/rejected": -0.5591295957565308, + "logps/chosen": -0.037766214460134506, + "logps/rejected": -0.655017077922821, + "loss": 3.1323, + "nll_loss": 0.7774906158447266, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0037766220048069954, + "rewards/margins": 0.06172508746385574, + "rewards/rejected": -0.06550170481204987, + "step": 2605 + }, + { + "epoch": 1.802213001383126, + "grad_norm": 5.646283149719238, + "learning_rate": 4.554326110342708e-05, + "log_odds_chosen": 5.976848602294922, + "log_odds_ratio": -0.21028871834278107, + "logits/chosen": -0.4315609037876129, + "logits/rejected": -0.42234447598457336, + "logps/chosen": -0.023608697578310966, + "logps/rejected": -0.6077236533164978, + "loss": 3.1639, + "nll_loss": 0.7699489593505859, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002360869897529483, + "rewards/margins": 0.05841149389743805, + "rewards/rejected": -0.0607723630964756, + "step": 2606 + }, + { + "epoch": 1.8029045643153527, + "grad_norm": 5.159456729888916, + "learning_rate": 4.5539419087136934e-05, + "log_odds_chosen": 4.167616844177246, + "log_odds_ratio": -0.12459512799978256, + "logits/chosen": -0.12597954273223877, + "logits/rejected": -0.18310877680778503, + "logps/chosen": -0.042437825351953506, + "logps/rejected": -0.6159353256225586, + "loss": 2.3385, + "nll_loss": 0.5721673369407654, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004243782255798578, + "rewards/margins": 0.05734974890947342, + "rewards/rejected": -0.06159352511167526, + "step": 2607 + }, + { + "epoch": 1.8035961272475796, + "grad_norm": 6.154688358306885, + "learning_rate": 4.553557707084678e-05, + "log_odds_chosen": 5.0011677742004395, + "log_odds_ratio": -0.0715303048491478, + "logits/chosen": -0.651474118232727, + "logits/rejected": -0.668293297290802, + "logps/chosen": -0.04794564098119736, + "logps/rejected": -1.0686466693878174, + "loss": 2.4345, + "nll_loss": 0.6014776229858398, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004794564098119736, + "rewards/margins": 0.10207010805606842, + "rewards/rejected": -0.10686466097831726, + "step": 2608 + }, + { + "epoch": 1.8042876901798064, + "grad_norm": 7.125792026519775, + "learning_rate": 4.553173505455663e-05, + "log_odds_chosen": 5.126342296600342, + "log_odds_ratio": -0.13379913568496704, + "logits/chosen": -0.4011853039264679, + "logits/rejected": -0.42175742983818054, + "logps/chosen": -0.05009883642196655, + "logps/rejected": -0.7972033619880676, + "loss": 3.8232, + "nll_loss": 0.9424182772636414, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005009883549064398, + "rewards/margins": 0.07471044361591339, + "rewards/rejected": -0.07972033321857452, + "step": 2609 + }, + { + "epoch": 1.8049792531120332, + "grad_norm": 5.347149848937988, + "learning_rate": 4.5527893038266484e-05, + "log_odds_chosen": 5.037969589233398, + "log_odds_ratio": -0.07694417238235474, + "logits/chosen": -0.10675536841154099, + "logits/rejected": -0.15652278065681458, + "logps/chosen": -0.0804632306098938, + "logps/rejected": -1.161550521850586, + "loss": 2.5682, + "nll_loss": 0.634350597858429, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00804632343351841, + "rewards/margins": 0.10810872912406921, + "rewards/rejected": -0.11615505814552307, + "step": 2610 + }, + { + "epoch": 1.80567081604426, + "grad_norm": 6.408705234527588, + "learning_rate": 4.552405102197633e-05, + "log_odds_chosen": 7.264980316162109, + "log_odds_ratio": -0.113655224442482, + "logits/chosen": -0.20847178995609283, + "logits/rejected": -0.1949325054883957, + "logps/chosen": -0.022643744945526123, + "logps/rejected": -0.7103455066680908, + "loss": 3.0926, + "nll_loss": 0.7617882490158081, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002264374401420355, + "rewards/margins": 0.06877018511295319, + "rewards/rejected": -0.07103455066680908, + "step": 2611 + }, + { + "epoch": 1.8063623789764869, + "grad_norm": 8.062432289123535, + "learning_rate": 4.552020900568619e-05, + "log_odds_chosen": 5.82761287689209, + "log_odds_ratio": -0.04699864983558655, + "logits/chosen": -0.6141172051429749, + "logits/rejected": -0.7337712049484253, + "logps/chosen": -0.011600498110055923, + "logps/rejected": -0.7071603536605835, + "loss": 2.9428, + "nll_loss": 0.7309926152229309, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011600498110055923, + "rewards/margins": 0.06955599039793015, + "rewards/rejected": -0.07071603834629059, + "step": 2612 + }, + { + "epoch": 1.8070539419087137, + "grad_norm": 7.179983615875244, + "learning_rate": 4.5516366989396035e-05, + "log_odds_chosen": 5.070010662078857, + "log_odds_ratio": -0.0862480029463768, + "logits/chosen": -0.6351104974746704, + "logits/rejected": -0.6772160530090332, + "logps/chosen": -0.055110447108745575, + "logps/rejected": -0.751242995262146, + "loss": 3.1957, + "nll_loss": 0.7902911901473999, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005511044524610043, + "rewards/margins": 0.0696132481098175, + "rewards/rejected": -0.07512429356575012, + "step": 2613 + }, + { + "epoch": 1.8077455048409405, + "grad_norm": 7.030640602111816, + "learning_rate": 4.551252497310589e-05, + "log_odds_chosen": 4.596918106079102, + "log_odds_ratio": -0.047915950417518616, + "logits/chosen": -0.49445128440856934, + "logits/rejected": -0.5078890323638916, + "logps/chosen": -0.018410056829452515, + "logps/rejected": -0.6411164999008179, + "loss": 3.5244, + "nll_loss": 0.8763055801391602, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018410057527944446, + "rewards/margins": 0.06227065250277519, + "rewards/rejected": -0.06411165744066238, + "step": 2614 + }, + { + "epoch": 1.8084370677731674, + "grad_norm": 11.022759437561035, + "learning_rate": 4.550868295681574e-05, + "log_odds_chosen": 4.967000961303711, + "log_odds_ratio": -0.027516499161720276, + "logits/chosen": -0.6782200336456299, + "logits/rejected": -0.6999487280845642, + "logps/chosen": -0.014316966757178307, + "logps/rejected": -0.7635257244110107, + "loss": 4.7923, + "nll_loss": 1.1953269243240356, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014316968154162169, + "rewards/margins": 0.07492087781429291, + "rewards/rejected": -0.0763525739312172, + "step": 2615 + }, + { + "epoch": 1.8091286307053942, + "grad_norm": 6.712900638580322, + "learning_rate": 4.550484094052559e-05, + "log_odds_chosen": 7.674071788787842, + "log_odds_ratio": -0.011318670585751534, + "logits/chosen": -0.1972891092300415, + "logits/rejected": -0.25815290212631226, + "logps/chosen": -0.010169260203838348, + "logps/rejected": -1.0386838912963867, + "loss": 2.8093, + "nll_loss": 0.701184868812561, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001016925903968513, + "rewards/margins": 0.10285145789384842, + "rewards/rejected": -0.10386838763952255, + "step": 2616 + }, + { + "epoch": 1.809820193637621, + "grad_norm": 7.950763702392578, + "learning_rate": 4.550099892423544e-05, + "log_odds_chosen": 5.966031551361084, + "log_odds_ratio": -0.013020548038184643, + "logits/chosen": -0.31445324420928955, + "logits/rejected": -0.3987734317779541, + "logps/chosen": -0.03026457317173481, + "logps/rejected": -1.1416288614273071, + "loss": 2.969, + "nll_loss": 0.7409405708312988, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003026457503437996, + "rewards/margins": 0.11113642156124115, + "rewards/rejected": -0.11416289210319519, + "step": 2617 + }, + { + "epoch": 1.8105117565698479, + "grad_norm": 31.848552703857422, + "learning_rate": 4.549715690794529e-05, + "log_odds_chosen": 3.156466007232666, + "log_odds_ratio": -0.8378832936286926, + "logits/chosen": -0.22038498520851135, + "logits/rejected": -0.26401734352111816, + "logps/chosen": -0.12877285480499268, + "logps/rejected": -0.6650040149688721, + "loss": 3.0298, + "nll_loss": 0.6736541986465454, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.012877286411821842, + "rewards/margins": 0.05362311750650406, + "rewards/rejected": -0.06650040298700333, + "step": 2618 + }, + { + "epoch": 1.8112033195020747, + "grad_norm": 4.113903045654297, + "learning_rate": 4.549331489165514e-05, + "log_odds_chosen": 5.889928340911865, + "log_odds_ratio": -0.03983638808131218, + "logits/chosen": -0.24106872081756592, + "logits/rejected": -0.18745410442352295, + "logps/chosen": -0.025771846994757652, + "logps/rejected": -0.7980707883834839, + "loss": 1.828, + "nll_loss": 0.4530182182788849, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002577184932306409, + "rewards/margins": 0.07722988724708557, + "rewards/rejected": -0.07980707287788391, + "step": 2619 + }, + { + "epoch": 1.8118948824343015, + "grad_norm": 7.535008430480957, + "learning_rate": 4.548947287536499e-05, + "log_odds_chosen": 6.170376300811768, + "log_odds_ratio": -0.02469835989177227, + "logits/chosen": -0.5700592994689941, + "logits/rejected": -0.6413227915763855, + "logps/chosen": -0.04254474118351936, + "logps/rejected": -1.1719743013381958, + "loss": 2.9024, + "nll_loss": 0.7231208086013794, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004254474304616451, + "rewards/margins": 0.11294296383857727, + "rewards/rejected": -0.1171974241733551, + "step": 2620 + }, + { + "epoch": 1.8125864453665284, + "grad_norm": 8.301804542541504, + "learning_rate": 4.548563085907485e-05, + "log_odds_chosen": 6.983060836791992, + "log_odds_ratio": -0.027500227093696594, + "logits/chosen": -0.7125990986824036, + "logits/rejected": -0.7472147941589355, + "logps/chosen": -0.007630965206772089, + "logps/rejected": -1.0622081756591797, + "loss": 3.0055, + "nll_loss": 0.7486361265182495, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007630965556018054, + "rewards/margins": 0.10545771569013596, + "rewards/rejected": -0.10622081160545349, + "step": 2621 + }, + { + "epoch": 1.8132780082987552, + "grad_norm": 6.867591857910156, + "learning_rate": 4.5481788842784693e-05, + "log_odds_chosen": 4.466533660888672, + "log_odds_ratio": -0.2696218490600586, + "logits/chosen": -0.5059961676597595, + "logits/rejected": -0.5039629936218262, + "logps/chosen": -0.0487339124083519, + "logps/rejected": -0.6949042081832886, + "loss": 2.632, + "nll_loss": 0.6310458183288574, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.00487339124083519, + "rewards/margins": 0.06461702287197113, + "rewards/rejected": -0.06949041783809662, + "step": 2622 + }, + { + "epoch": 1.813969571230982, + "grad_norm": 6.75360107421875, + "learning_rate": 4.5477946826494546e-05, + "log_odds_chosen": 5.620083808898926, + "log_odds_ratio": -0.22650887072086334, + "logits/chosen": -0.34317028522491455, + "logits/rejected": -0.3361210525035858, + "logps/chosen": -0.08229245245456696, + "logps/rejected": -1.3436719179153442, + "loss": 2.4746, + "nll_loss": 0.595992386341095, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00822924543172121, + "rewards/margins": 0.12613794207572937, + "rewards/rejected": -0.1343671977519989, + "step": 2623 + }, + { + "epoch": 1.8146611341632088, + "grad_norm": 5.871620178222656, + "learning_rate": 4.54741048102044e-05, + "log_odds_chosen": 3.1263251304626465, + "log_odds_ratio": -0.2592034637928009, + "logits/chosen": -0.34768468141555786, + "logits/rejected": -0.3263643980026245, + "logps/chosen": -0.08932524174451828, + "logps/rejected": -0.9391407370567322, + "loss": 2.4419, + "nll_loss": 0.5845474004745483, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008932523429393768, + "rewards/margins": 0.08498155325651169, + "rewards/rejected": -0.09391407668590546, + "step": 2624 + }, + { + "epoch": 1.8153526970954357, + "grad_norm": 4.6835618019104, + "learning_rate": 4.547026279391425e-05, + "log_odds_chosen": 6.344972610473633, + "log_odds_ratio": -0.06391476094722748, + "logits/chosen": -0.3003113269805908, + "logits/rejected": -0.34467703104019165, + "logps/chosen": -0.04334036260843277, + "logps/rejected": -1.4139169454574585, + "loss": 2.7204, + "nll_loss": 0.6737198233604431, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004334036260843277, + "rewards/margins": 0.13705766201019287, + "rewards/rejected": -0.14139169454574585, + "step": 2625 + }, + { + "epoch": 1.8160442600276625, + "grad_norm": 6.126811981201172, + "learning_rate": 4.5466420777624096e-05, + "log_odds_chosen": 4.0297441482543945, + "log_odds_ratio": -0.32939809560775757, + "logits/chosen": -0.5913423895835876, + "logits/rejected": -0.57082200050354, + "logps/chosen": -0.13563945889472961, + "logps/rejected": -0.8001002073287964, + "loss": 3.0862, + "nll_loss": 0.7386195659637451, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013563944958150387, + "rewards/margins": 0.06644607335329056, + "rewards/rejected": -0.08001002669334412, + "step": 2626 + }, + { + "epoch": 1.8167358229598893, + "grad_norm": 7.478198051452637, + "learning_rate": 4.546257876133395e-05, + "log_odds_chosen": 7.136330604553223, + "log_odds_ratio": -0.007372260093688965, + "logits/chosen": -0.2368028610944748, + "logits/rejected": -0.2646666467189789, + "logps/chosen": -0.008087377063930035, + "logps/rejected": -0.9763768911361694, + "loss": 3.9637, + "nll_loss": 0.9901875853538513, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008087377063930035, + "rewards/margins": 0.09682895243167877, + "rewards/rejected": -0.09763769060373306, + "step": 2627 + }, + { + "epoch": 1.8174273858921162, + "grad_norm": 6.5756378173828125, + "learning_rate": 4.54587367450438e-05, + "log_odds_chosen": 6.8269548416137695, + "log_odds_ratio": -0.04414502903819084, + "logits/chosen": -0.35990023612976074, + "logits/rejected": -0.45491692423820496, + "logps/chosen": -0.007723034359514713, + "logps/rejected": -0.8144005537033081, + "loss": 2.5238, + "nll_loss": 0.6265450119972229, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007723035523667932, + "rewards/margins": 0.08066774904727936, + "rewards/rejected": -0.08144006133079529, + "step": 2628 + }, + { + "epoch": 1.818118948824343, + "grad_norm": 8.939872741699219, + "learning_rate": 4.545489472875365e-05, + "log_odds_chosen": 6.031547546386719, + "log_odds_ratio": -0.1246354877948761, + "logits/chosen": -0.3047257959842682, + "logits/rejected": -0.3927888870239258, + "logps/chosen": -0.04965946078300476, + "logps/rejected": -1.3520649671554565, + "loss": 2.9212, + "nll_loss": 0.7178254127502441, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004965946078300476, + "rewards/margins": 0.1302405595779419, + "rewards/rejected": -0.13520650565624237, + "step": 2629 + }, + { + "epoch": 1.8188105117565698, + "grad_norm": 5.710219860076904, + "learning_rate": 4.5451052712463506e-05, + "log_odds_chosen": 4.805438041687012, + "log_odds_ratio": -0.20486658811569214, + "logits/chosen": -0.15800583362579346, + "logits/rejected": -0.24480903148651123, + "logps/chosen": -0.09874942898750305, + "logps/rejected": -1.33525550365448, + "loss": 1.5654, + "nll_loss": 0.37085121870040894, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00987494271248579, + "rewards/margins": 0.12365061044692993, + "rewards/rejected": -0.133525550365448, + "step": 2630 + }, + { + "epoch": 1.8195020746887967, + "grad_norm": 8.432652473449707, + "learning_rate": 4.544721069617335e-05, + "log_odds_chosen": 6.5983076095581055, + "log_odds_ratio": -0.16553828120231628, + "logits/chosen": -0.528571367263794, + "logits/rejected": -0.5659384727478027, + "logps/chosen": -0.03578614443540573, + "logps/rejected": -0.9695991277694702, + "loss": 3.2697, + "nll_loss": 0.8008802533149719, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003578614443540573, + "rewards/margins": 0.09338130801916122, + "rewards/rejected": -0.0969599187374115, + "step": 2631 + }, + { + "epoch": 1.8201936376210235, + "grad_norm": 5.630443572998047, + "learning_rate": 4.5443368679883204e-05, + "log_odds_chosen": 6.879264831542969, + "log_odds_ratio": -0.007554001174867153, + "logits/chosen": -0.5525587797164917, + "logits/rejected": -0.527340292930603, + "logps/chosen": -0.014577634632587433, + "logps/rejected": -1.4096137285232544, + "loss": 2.5911, + "nll_loss": 0.6470277309417725, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014577636029571295, + "rewards/margins": 0.139503613114357, + "rewards/rejected": -0.14096137881278992, + "step": 2632 + }, + { + "epoch": 1.8208852005532503, + "grad_norm": 5.754599571228027, + "learning_rate": 4.543952666359306e-05, + "log_odds_chosen": 2.836488962173462, + "log_odds_ratio": -0.3192460834980011, + "logits/chosen": -0.20284788310527802, + "logits/rejected": -0.2106815129518509, + "logps/chosen": -0.0716264545917511, + "logps/rejected": -0.42863890528678894, + "loss": 2.5742, + "nll_loss": 0.6116150617599487, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.00716264545917511, + "rewards/margins": 0.035701245069503784, + "rewards/rejected": -0.042863890528678894, + "step": 2633 + }, + { + "epoch": 1.8215767634854771, + "grad_norm": 5.475660800933838, + "learning_rate": 4.543568464730291e-05, + "log_odds_chosen": 5.2552595138549805, + "log_odds_ratio": -0.20006102323532104, + "logits/chosen": -0.5913251638412476, + "logits/rejected": -0.6479060649871826, + "logps/chosen": -0.04129403084516525, + "logps/rejected": -1.0466065406799316, + "loss": 2.4332, + "nll_loss": 0.588290810585022, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004129402805119753, + "rewards/margins": 0.10053126513957977, + "rewards/rejected": -0.10466066002845764, + "step": 2634 + }, + { + "epoch": 1.822268326417704, + "grad_norm": 8.928821563720703, + "learning_rate": 4.5431842631012755e-05, + "log_odds_chosen": 3.049790859222412, + "log_odds_ratio": -0.3694280683994293, + "logits/chosen": -0.30583369731903076, + "logits/rejected": -0.3571898341178894, + "logps/chosen": -0.08246318250894547, + "logps/rejected": -0.2945728898048401, + "loss": 3.2304, + "nll_loss": 0.7706577181816101, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008246318437159061, + "rewards/margins": 0.02121097221970558, + "rewards/rejected": -0.029457291588187218, + "step": 2635 + }, + { + "epoch": 1.8229598893499308, + "grad_norm": 5.167267322540283, + "learning_rate": 4.542800061472261e-05, + "log_odds_chosen": 7.10857629776001, + "log_odds_ratio": -0.00941796600818634, + "logits/chosen": -0.27473002672195435, + "logits/rejected": -0.3904823660850525, + "logps/chosen": -0.03157273679971695, + "logps/rejected": -1.428621768951416, + "loss": 2.5949, + "nll_loss": 0.647790253162384, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00315727386623621, + "rewards/margins": 0.1397048979997635, + "rewards/rejected": -0.14286217093467712, + "step": 2636 + }, + { + "epoch": 1.8236514522821576, + "grad_norm": 6.512024879455566, + "learning_rate": 4.542415859843246e-05, + "log_odds_chosen": 6.002936840057373, + "log_odds_ratio": -0.030928250402212143, + "logits/chosen": -0.5706111788749695, + "logits/rejected": -0.6251883506774902, + "logps/chosen": -0.036800943315029144, + "logps/rejected": -1.3069647550582886, + "loss": 2.7111, + "nll_loss": 0.6746917366981506, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0036800941452383995, + "rewards/margins": 0.12701638042926788, + "rewards/rejected": -0.13069647550582886, + "step": 2637 + }, + { + "epoch": 1.8243430152143845, + "grad_norm": 8.315937042236328, + "learning_rate": 4.5420316582142305e-05, + "log_odds_chosen": 3.425870656967163, + "log_odds_ratio": -0.7788676023483276, + "logits/chosen": -0.43286874890327454, + "logits/rejected": -0.48489269614219666, + "logps/chosen": -0.05835752934217453, + "logps/rejected": -0.8002941608428955, + "loss": 3.263, + "nll_loss": 0.7378556728363037, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00583575339987874, + "rewards/margins": 0.07419366389513016, + "rewards/rejected": -0.08002942055463791, + "step": 2638 + }, + { + "epoch": 1.8250345781466113, + "grad_norm": 9.208503723144531, + "learning_rate": 4.5416474565852165e-05, + "log_odds_chosen": 5.260612964630127, + "log_odds_ratio": -0.086390919983387, + "logits/chosen": -0.7150118350982666, + "logits/rejected": -0.7044198513031006, + "logps/chosen": -0.03383970633149147, + "logps/rejected": -0.8515818119049072, + "loss": 3.5761, + "nll_loss": 0.8853759169578552, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0033839705865830183, + "rewards/margins": 0.08177421241998672, + "rewards/rejected": -0.08515818417072296, + "step": 2639 + }, + { + "epoch": 1.8257261410788381, + "grad_norm": 10.198955535888672, + "learning_rate": 4.541263254956201e-05, + "log_odds_chosen": 9.71574878692627, + "log_odds_ratio": -0.00011423486284911633, + "logits/chosen": -0.32049620151519775, + "logits/rejected": -0.41254082322120667, + "logps/chosen": -0.00040840901783667505, + "logps/rejected": -1.8789923191070557, + "loss": 2.8609, + "nll_loss": 0.7152112722396851, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.084090323885903e-05, + "rewards/margins": 0.18785840272903442, + "rewards/rejected": -0.18789923191070557, + "step": 2640 + }, + { + "epoch": 1.826417704011065, + "grad_norm": 5.202672481536865, + "learning_rate": 4.540879053327186e-05, + "log_odds_chosen": 6.840071201324463, + "log_odds_ratio": -0.02362118288874626, + "logits/chosen": -0.5052455067634583, + "logits/rejected": -0.588655948638916, + "logps/chosen": -0.03541530296206474, + "logps/rejected": -1.1660518646240234, + "loss": 2.545, + "nll_loss": 0.6338983774185181, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003541530342772603, + "rewards/margins": 0.11306366324424744, + "rewards/rejected": -0.11660519242286682, + "step": 2641 + }, + { + "epoch": 1.8271092669432918, + "grad_norm": 6.811013221740723, + "learning_rate": 4.5404948516981715e-05, + "log_odds_chosen": 6.056200981140137, + "log_odds_ratio": -0.16985799372196198, + "logits/chosen": -0.2569509744644165, + "logits/rejected": -0.31730279326438904, + "logps/chosen": -0.04153449460864067, + "logps/rejected": -1.42983877658844, + "loss": 2.6367, + "nll_loss": 0.6422007083892822, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0041534495539963245, + "rewards/margins": 0.13883042335510254, + "rewards/rejected": -0.14298386871814728, + "step": 2642 + }, + { + "epoch": 1.8278008298755186, + "grad_norm": 7.3355183601379395, + "learning_rate": 4.540110650069157e-05, + "log_odds_chosen": 5.2649455070495605, + "log_odds_ratio": -0.04633000120520592, + "logits/chosen": -0.46298545598983765, + "logits/rejected": -0.5828496217727661, + "logps/chosen": -0.04721803590655327, + "logps/rejected": -1.0684432983398438, + "loss": 3.6065, + "nll_loss": 0.8969941735267639, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004721803590655327, + "rewards/margins": 0.10212253034114838, + "rewards/rejected": -0.10684433579444885, + "step": 2643 + }, + { + "epoch": 1.8284923928077457, + "grad_norm": 5.123359203338623, + "learning_rate": 4.539726448440141e-05, + "log_odds_chosen": 7.063897609710693, + "log_odds_ratio": -0.01669420301914215, + "logits/chosen": -0.5784233212471008, + "logits/rejected": -0.5812735557556152, + "logps/chosen": -0.012818637304008007, + "logps/rejected": -1.1337049007415771, + "loss": 2.7851, + "nll_loss": 0.6945989727973938, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012818636605516076, + "rewards/margins": 0.1120886355638504, + "rewards/rejected": -0.11337050795555115, + "step": 2644 + }, + { + "epoch": 1.8291839557399725, + "grad_norm": 6.171611785888672, + "learning_rate": 4.5393422468111266e-05, + "log_odds_chosen": 2.9856228828430176, + "log_odds_ratio": -0.1682049185037613, + "logits/chosen": 0.5519628524780273, + "logits/rejected": 0.5516259670257568, + "logps/chosen": -0.08549317717552185, + "logps/rejected": -0.6905692219734192, + "loss": 2.8657, + "nll_loss": 0.6996016502380371, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008549317717552185, + "rewards/margins": 0.060507599264383316, + "rewards/rejected": -0.0690569207072258, + "step": 2645 + }, + { + "epoch": 1.8298755186721993, + "grad_norm": 14.625659942626953, + "learning_rate": 4.538958045182112e-05, + "log_odds_chosen": 4.857564926147461, + "log_odds_ratio": -0.1095946878194809, + "logits/chosen": -0.5669194459915161, + "logits/rejected": -0.6688473224639893, + "logps/chosen": -0.03788517788052559, + "logps/rejected": -0.8945537805557251, + "loss": 3.2053, + "nll_loss": 0.7903628945350647, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003788517788052559, + "rewards/margins": 0.08566686511039734, + "rewards/rejected": -0.08945538103580475, + "step": 2646 + }, + { + "epoch": 1.8305670816044262, + "grad_norm": 8.835652351379395, + "learning_rate": 4.5385738435530964e-05, + "log_odds_chosen": 4.281753063201904, + "log_odds_ratio": -0.10200349986553192, + "logits/chosen": -0.613962709903717, + "logits/rejected": -0.6460983157157898, + "logps/chosen": -0.03600337356328964, + "logps/rejected": -0.9600581526756287, + "loss": 3.2928, + "nll_loss": 0.8130101561546326, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0036003373097628355, + "rewards/margins": 0.09240548312664032, + "rewards/rejected": -0.09600581973791122, + "step": 2647 + }, + { + "epoch": 1.831258644536653, + "grad_norm": 12.954113006591797, + "learning_rate": 4.538189641924082e-05, + "log_odds_chosen": 5.192785739898682, + "log_odds_ratio": -0.35543930530548096, + "logits/chosen": -0.04655447229743004, + "logits/rejected": -0.07551144063472748, + "logps/chosen": -0.07405739277601242, + "logps/rejected": -1.0524441003799438, + "loss": 2.6527, + "nll_loss": 0.6276319026947021, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007405739277601242, + "rewards/margins": 0.09783867746591568, + "rewards/rejected": -0.10524441301822662, + "step": 2648 + }, + { + "epoch": 1.8319502074688798, + "grad_norm": 10.448448181152344, + "learning_rate": 4.537805440295067e-05, + "log_odds_chosen": 4.813277244567871, + "log_odds_ratio": -0.366511732339859, + "logits/chosen": -0.32178401947021484, + "logits/rejected": -0.36728590726852417, + "logps/chosen": -0.08522398769855499, + "logps/rejected": -0.9796061515808105, + "loss": 3.6681, + "nll_loss": 0.8803846836090088, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008522399701178074, + "rewards/margins": 0.08943821489810944, + "rewards/rejected": -0.09796061366796494, + "step": 2649 + }, + { + "epoch": 1.8326417704011067, + "grad_norm": 13.359732627868652, + "learning_rate": 4.537421238666052e-05, + "log_odds_chosen": 3.3614258766174316, + "log_odds_ratio": -0.47019559144973755, + "logits/chosen": -0.3268583416938782, + "logits/rejected": -0.2812211811542511, + "logps/chosen": -0.12640827894210815, + "logps/rejected": -0.7674390077590942, + "loss": 3.1545, + "nll_loss": 0.7416092157363892, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01264082733541727, + "rewards/margins": 0.06410308182239532, + "rewards/rejected": -0.07674390077590942, + "step": 2650 + }, + { + "epoch": 1.8333333333333335, + "grad_norm": 6.244953155517578, + "learning_rate": 4.5370370370370374e-05, + "log_odds_chosen": 6.26675271987915, + "log_odds_ratio": -0.0935230702161789, + "logits/chosen": -0.4478660225868225, + "logits/rejected": -0.4610947072505951, + "logps/chosen": -0.054639168083667755, + "logps/rejected": -1.0677608251571655, + "loss": 3.3856, + "nll_loss": 0.8370494842529297, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0054639168083667755, + "rewards/margins": 0.10131216049194336, + "rewards/rejected": -0.10677608847618103, + "step": 2651 + }, + { + "epoch": 1.8340248962655603, + "grad_norm": 13.164037704467773, + "learning_rate": 4.5366528354080226e-05, + "log_odds_chosen": 5.182484149932861, + "log_odds_ratio": -0.1720438152551651, + "logits/chosen": -0.2972196042537689, + "logits/rejected": -0.3702899217605591, + "logps/chosen": -0.031756024807691574, + "logps/rejected": -0.9779322743415833, + "loss": 3.9197, + "nll_loss": 0.9627140760421753, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0031756022945046425, + "rewards/margins": 0.09461762011051178, + "rewards/rejected": -0.09779322892427444, + "step": 2652 + }, + { + "epoch": 1.8347164591977871, + "grad_norm": 4.6243977546691895, + "learning_rate": 4.536268633779007e-05, + "log_odds_chosen": 7.156553745269775, + "log_odds_ratio": -0.0021701371297240257, + "logits/chosen": -0.4487355947494507, + "logits/rejected": -0.41442787647247314, + "logps/chosen": -0.01149112731218338, + "logps/rejected": -1.3405351638793945, + "loss": 3.0436, + "nll_loss": 0.7606900334358215, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011491125915199518, + "rewards/margins": 0.13290441036224365, + "rewards/rejected": -0.13405351340770721, + "step": 2653 + }, + { + "epoch": 1.835408022130014, + "grad_norm": 8.237411499023438, + "learning_rate": 4.5358844321499924e-05, + "log_odds_chosen": 3.6853272914886475, + "log_odds_ratio": -0.20125997066497803, + "logits/chosen": -0.3884178698062897, + "logits/rejected": -0.36602675914764404, + "logps/chosen": -0.14039038121700287, + "logps/rejected": -0.892852246761322, + "loss": 3.6356, + "nll_loss": 0.8887854218482971, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014039037749171257, + "rewards/margins": 0.0752461850643158, + "rewards/rejected": -0.0892852246761322, + "step": 2654 + }, + { + "epoch": 1.8360995850622408, + "grad_norm": 10.218503952026367, + "learning_rate": 4.535500230520978e-05, + "log_odds_chosen": 2.992558717727661, + "log_odds_ratio": -0.509008526802063, + "logits/chosen": -0.6339020133018494, + "logits/rejected": -0.6814983487129211, + "logps/chosen": -0.16128496825695038, + "logps/rejected": -0.9309603571891785, + "loss": 3.3514, + "nll_loss": 0.7869572639465332, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.016128497198224068, + "rewards/margins": 0.07696753740310669, + "rewards/rejected": -0.09309603273868561, + "step": 2655 + }, + { + "epoch": 1.8367911479944676, + "grad_norm": 10.936944007873535, + "learning_rate": 4.535116028891962e-05, + "log_odds_chosen": 5.422421932220459, + "log_odds_ratio": -0.5106878876686096, + "logits/chosen": -0.40086135268211365, + "logits/rejected": -0.37600135803222656, + "logps/chosen": -0.036261092871427536, + "logps/rejected": -0.7765040397644043, + "loss": 2.378, + "nll_loss": 0.543430745601654, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003626109566539526, + "rewards/margins": 0.07402429729700089, + "rewards/rejected": -0.07765040546655655, + "step": 2656 + }, + { + "epoch": 1.8374827109266945, + "grad_norm": 5.705078125, + "learning_rate": 4.534731827262948e-05, + "log_odds_chosen": 5.254233360290527, + "log_odds_ratio": -0.10906915366649628, + "logits/chosen": -0.6265988349914551, + "logits/rejected": -0.5938752889633179, + "logps/chosen": -0.04390117898583412, + "logps/rejected": -0.9048900604248047, + "loss": 2.7357, + "nll_loss": 0.6730280518531799, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0043901181779801846, + "rewards/margins": 0.08609890192747116, + "rewards/rejected": -0.09048901498317719, + "step": 2657 + }, + { + "epoch": 1.8381742738589213, + "grad_norm": 10.237077713012695, + "learning_rate": 4.534347625633933e-05, + "log_odds_chosen": 5.0289106369018555, + "log_odds_ratio": -0.6908766627311707, + "logits/chosen": -0.41462084650993347, + "logits/rejected": -0.44479289650917053, + "logps/chosen": -0.1477389633655548, + "logps/rejected": -0.9825767278671265, + "loss": 2.718, + "nll_loss": 0.6104055643081665, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.014773895964026451, + "rewards/margins": 0.08348377794027328, + "rewards/rejected": -0.09825767576694489, + "step": 2658 + }, + { + "epoch": 1.8388658367911481, + "grad_norm": 11.822378158569336, + "learning_rate": 4.533963424004918e-05, + "log_odds_chosen": 3.216097831726074, + "log_odds_ratio": -0.7136033177375793, + "logits/chosen": -0.7236344814300537, + "logits/rejected": -0.7421097755432129, + "logps/chosen": -0.15638959407806396, + "logps/rejected": -0.9404903054237366, + "loss": 3.3851, + "nll_loss": 0.7749228477478027, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.015638958662748337, + "rewards/margins": 0.0784100666642189, + "rewards/rejected": -0.09404902905225754, + "step": 2659 + }, + { + "epoch": 1.839557399723375, + "grad_norm": 7.603041648864746, + "learning_rate": 4.533579222375903e-05, + "log_odds_chosen": 5.358530044555664, + "log_odds_ratio": -0.1660924255847931, + "logits/chosen": -0.4428621530532837, + "logits/rejected": -0.44063064455986023, + "logps/chosen": -0.028198856860399246, + "logps/rejected": -1.0400341749191284, + "loss": 2.7174, + "nll_loss": 0.6627500057220459, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0028198854997754097, + "rewards/margins": 0.10118352621793747, + "rewards/rejected": -0.1040034145116806, + "step": 2660 + }, + { + "epoch": 1.8402489626556018, + "grad_norm": 5.782895565032959, + "learning_rate": 4.5331950207468885e-05, + "log_odds_chosen": 3.479762077331543, + "log_odds_ratio": -0.47276097536087036, + "logits/chosen": -0.4127695560455322, + "logits/rejected": -0.46083658933639526, + "logps/chosen": -0.1779659390449524, + "logps/rejected": -0.8920935392379761, + "loss": 2.6048, + "nll_loss": 0.6039206981658936, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01779659278690815, + "rewards/margins": 0.07141275703907013, + "rewards/rejected": -0.08920934796333313, + "step": 2661 + }, + { + "epoch": 1.8409405255878286, + "grad_norm": 12.407095909118652, + "learning_rate": 4.532810819117873e-05, + "log_odds_chosen": 2.9915771484375, + "log_odds_ratio": -0.8544542789459229, + "logits/chosen": -0.21043117344379425, + "logits/rejected": -0.21721884608268738, + "logps/chosen": -0.07011416554450989, + "logps/rejected": -0.6056758165359497, + "loss": 4.0644, + "nll_loss": 0.9306578636169434, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007011416368186474, + "rewards/margins": 0.0535561665892601, + "rewards/rejected": -0.06056758388876915, + "step": 2662 + }, + { + "epoch": 1.8416320885200554, + "grad_norm": 8.926106452941895, + "learning_rate": 4.532426617488858e-05, + "log_odds_chosen": 5.799577713012695, + "log_odds_ratio": -0.032613616436719894, + "logits/chosen": -0.6197344064712524, + "logits/rejected": -0.6875401735305786, + "logps/chosen": -0.022176966071128845, + "logps/rejected": -0.9513285160064697, + "loss": 2.8049, + "nll_loss": 0.6979745626449585, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002217696513980627, + "rewards/margins": 0.09291516244411469, + "rewards/rejected": -0.09513285756111145, + "step": 2663 + }, + { + "epoch": 1.8423236514522823, + "grad_norm": 8.99937629699707, + "learning_rate": 4.5320424158598435e-05, + "log_odds_chosen": 5.625021934509277, + "log_odds_ratio": -0.24571014940738678, + "logits/chosen": -0.10770852863788605, + "logits/rejected": -0.18785730004310608, + "logps/chosen": -0.06675803661346436, + "logps/rejected": -1.043717861175537, + "loss": 3.7772, + "nll_loss": 0.9197167754173279, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006675804033875465, + "rewards/margins": 0.0976959839463234, + "rewards/rejected": -0.10437178611755371, + "step": 2664 + }, + { + "epoch": 1.843015214384509, + "grad_norm": 8.712796211242676, + "learning_rate": 4.531658214230828e-05, + "log_odds_chosen": 4.1482439041137695, + "log_odds_ratio": -0.194962278008461, + "logits/chosen": -0.21983416378498077, + "logits/rejected": -0.23658618330955505, + "logps/chosen": -0.03656216338276863, + "logps/rejected": -0.6330381631851196, + "loss": 3.3435, + "nll_loss": 0.816391110420227, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003656216664239764, + "rewards/margins": 0.05964760482311249, + "rewards/rejected": -0.06330382078886032, + "step": 2665 + }, + { + "epoch": 1.843706777316736, + "grad_norm": 7.394927501678467, + "learning_rate": 4.531274012601814e-05, + "log_odds_chosen": 3.453693151473999, + "log_odds_ratio": -0.24411636590957642, + "logits/chosen": -0.3464747369289398, + "logits/rejected": -0.4191704988479614, + "logps/chosen": -0.10869094729423523, + "logps/rejected": -1.0189597606658936, + "loss": 2.4536, + "nll_loss": 0.5889769196510315, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010869094170629978, + "rewards/margins": 0.09102687984704971, + "rewards/rejected": -0.10189597308635712, + "step": 2666 + }, + { + "epoch": 1.8443983402489628, + "grad_norm": 4.406402587890625, + "learning_rate": 4.5308898109727986e-05, + "log_odds_chosen": 3.872445583343506, + "log_odds_ratio": -0.24112261831760406, + "logits/chosen": -0.3680941164493561, + "logits/rejected": -0.3593483567237854, + "logps/chosen": -0.08817479014396667, + "logps/rejected": -0.6911728978157043, + "loss": 3.0122, + "nll_loss": 0.7289446592330933, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008817479014396667, + "rewards/margins": 0.06029981002211571, + "rewards/rejected": -0.06911729276180267, + "step": 2667 + }, + { + "epoch": 1.8450899031811896, + "grad_norm": 6.851699352264404, + "learning_rate": 4.530505609343784e-05, + "log_odds_chosen": 4.621401309967041, + "log_odds_ratio": -0.12672731280326843, + "logits/chosen": -0.005862422287464142, + "logits/rejected": -0.0212232768535614, + "logps/chosen": -0.13591837882995605, + "logps/rejected": -1.1643211841583252, + "loss": 3.3122, + "nll_loss": 0.8153849840164185, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013591839000582695, + "rewards/margins": 0.10284027457237244, + "rewards/rejected": -0.11643211543560028, + "step": 2668 + }, + { + "epoch": 1.8457814661134164, + "grad_norm": 4.551556587219238, + "learning_rate": 4.530121407714769e-05, + "log_odds_chosen": 7.317740440368652, + "log_odds_ratio": -0.017265843227505684, + "logits/chosen": -0.6083251237869263, + "logits/rejected": -0.5865840315818787, + "logps/chosen": -0.02332671359181404, + "logps/rejected": -1.1472117900848389, + "loss": 3.5558, + "nll_loss": 0.8872328996658325, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002332671545445919, + "rewards/margins": 0.11238852143287659, + "rewards/rejected": -0.11472119390964508, + "step": 2669 + }, + { + "epoch": 1.8464730290456433, + "grad_norm": 11.547624588012695, + "learning_rate": 4.529737206085754e-05, + "log_odds_chosen": 2.723951816558838, + "log_odds_ratio": -0.5727928876876831, + "logits/chosen": -0.4375419318675995, + "logits/rejected": -0.5310130715370178, + "logps/chosen": -0.1666826605796814, + "logps/rejected": -0.9059403538703918, + "loss": 4.0289, + "nll_loss": 0.9499408006668091, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01666826754808426, + "rewards/margins": 0.07392577081918716, + "rewards/rejected": -0.09059403836727142, + "step": 2670 + }, + { + "epoch": 1.84716459197787, + "grad_norm": 5.87103271484375, + "learning_rate": 4.529353004456739e-05, + "log_odds_chosen": 3.784372568130493, + "log_odds_ratio": -0.09613749384880066, + "logits/chosen": -0.45019257068634033, + "logits/rejected": -0.4648653268814087, + "logps/chosen": -0.09225141257047653, + "logps/rejected": -1.0031421184539795, + "loss": 2.7772, + "nll_loss": 0.684694766998291, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009225141257047653, + "rewards/margins": 0.09108906984329224, + "rewards/rejected": -0.10031421482563019, + "step": 2671 + }, + { + "epoch": 1.847856154910097, + "grad_norm": 8.487081527709961, + "learning_rate": 4.528968802827724e-05, + "log_odds_chosen": 3.1780614852905273, + "log_odds_ratio": -0.18778465688228607, + "logits/chosen": -0.3719137907028198, + "logits/rejected": -0.35996168851852417, + "logps/chosen": -0.08501023054122925, + "logps/rejected": -0.5380250215530396, + "loss": 3.2701, + "nll_loss": 0.7987452149391174, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008501023054122925, + "rewards/margins": 0.04530148208141327, + "rewards/rejected": -0.053802505135536194, + "step": 2672 + }, + { + "epoch": 1.8485477178423237, + "grad_norm": 7.312365531921387, + "learning_rate": 4.5285846011987094e-05, + "log_odds_chosen": 4.829049587249756, + "log_odds_ratio": -0.06223570927977562, + "logits/chosen": -0.1999543011188507, + "logits/rejected": -0.266481876373291, + "logps/chosen": -0.030306056141853333, + "logps/rejected": -0.9615808725357056, + "loss": 3.1486, + "nll_loss": 0.7809286117553711, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003030605847015977, + "rewards/margins": 0.09312748908996582, + "rewards/rejected": -0.09615809470415115, + "step": 2673 + }, + { + "epoch": 1.8492392807745506, + "grad_norm": 7.50120210647583, + "learning_rate": 4.528200399569694e-05, + "log_odds_chosen": 3.278000593185425, + "log_odds_ratio": -0.16125383973121643, + "logits/chosen": 0.01486283540725708, + "logits/rejected": -0.009005546569824219, + "logps/chosen": -0.06412653625011444, + "logps/rejected": -0.784261167049408, + "loss": 2.7824, + "nll_loss": 0.6794755458831787, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0064126537181437016, + "rewards/margins": 0.07201346755027771, + "rewards/rejected": -0.07842611521482468, + "step": 2674 + }, + { + "epoch": 1.8499308437067774, + "grad_norm": 6.451777935028076, + "learning_rate": 4.52781619794068e-05, + "log_odds_chosen": 4.411476135253906, + "log_odds_ratio": -0.10007528215646744, + "logits/chosen": -0.1524033546447754, + "logits/rejected": -0.1610099822282791, + "logps/chosen": -0.02451336942613125, + "logps/rejected": -0.7017672061920166, + "loss": 2.7949, + "nll_loss": 0.6887252330780029, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002451336942613125, + "rewards/margins": 0.06772539019584656, + "rewards/rejected": -0.07017672061920166, + "step": 2675 + }, + { + "epoch": 1.8506224066390042, + "grad_norm": 7.678047180175781, + "learning_rate": 4.5274319963116644e-05, + "log_odds_chosen": 5.969219207763672, + "log_odds_ratio": -0.016140181571245193, + "logits/chosen": -0.49597981572151184, + "logits/rejected": -0.5496968030929565, + "logps/chosen": -0.0319584384560585, + "logps/rejected": -1.0520793199539185, + "loss": 3.7072, + "nll_loss": 0.9251886606216431, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0031958436593413353, + "rewards/margins": 0.10201209783554077, + "rewards/rejected": -0.10520793497562408, + "step": 2676 + }, + { + "epoch": 1.851313969571231, + "grad_norm": 9.525699615478516, + "learning_rate": 4.5270477946826497e-05, + "log_odds_chosen": 4.6154656410217285, + "log_odds_ratio": -0.23475691676139832, + "logits/chosen": -0.7846601009368896, + "logits/rejected": -0.7942330241203308, + "logps/chosen": -0.0763910710811615, + "logps/rejected": -0.8175442218780518, + "loss": 3.1672, + "nll_loss": 0.768312394618988, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007639107294380665, + "rewards/margins": 0.0741153210401535, + "rewards/rejected": -0.0817544236779213, + "step": 2677 + }, + { + "epoch": 1.852005532503458, + "grad_norm": 6.886621475219727, + "learning_rate": 4.526663593053635e-05, + "log_odds_chosen": 3.9328413009643555, + "log_odds_ratio": -0.31209760904312134, + "logits/chosen": -0.5099629163742065, + "logits/rejected": -0.518309473991394, + "logps/chosen": -0.11288018524646759, + "logps/rejected": -0.6504040360450745, + "loss": 3.7584, + "nll_loss": 0.9083778262138367, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011288018897175789, + "rewards/margins": 0.053752392530441284, + "rewards/rejected": -0.06504040956497192, + "step": 2678 + }, + { + "epoch": 1.8526970954356847, + "grad_norm": 8.468035697937012, + "learning_rate": 4.52627939142462e-05, + "log_odds_chosen": 3.892465353012085, + "log_odds_ratio": -0.24962744116783142, + "logits/chosen": -0.44910019636154175, + "logits/rejected": -0.4465304911136627, + "logps/chosen": -0.09325937926769257, + "logps/rejected": -0.5119410157203674, + "loss": 3.5815, + "nll_loss": 0.8704084753990173, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009325938299298286, + "rewards/margins": 0.041868165135383606, + "rewards/rejected": -0.05119410157203674, + "step": 2679 + }, + { + "epoch": 1.8533886583679116, + "grad_norm": 7.894447326660156, + "learning_rate": 4.525895189795605e-05, + "log_odds_chosen": 5.502786636352539, + "log_odds_ratio": -0.026815906167030334, + "logits/chosen": -0.44511091709136963, + "logits/rejected": -0.5357626676559448, + "logps/chosen": -0.02842571772634983, + "logps/rejected": -0.8142504096031189, + "loss": 4.3125, + "nll_loss": 1.0754499435424805, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002842571819201112, + "rewards/margins": 0.07858247309923172, + "rewards/rejected": -0.08142504841089249, + "step": 2680 + }, + { + "epoch": 1.8540802213001384, + "grad_norm": 11.441069602966309, + "learning_rate": 4.52551098816659e-05, + "log_odds_chosen": 3.513641357421875, + "log_odds_ratio": -0.535304605960846, + "logits/chosen": -0.5716827511787415, + "logits/rejected": -0.6022769808769226, + "logps/chosen": -0.07979574799537659, + "logps/rejected": -0.7356309294700623, + "loss": 3.3414, + "nll_loss": 0.7818148136138916, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007979575544595718, + "rewards/margins": 0.06558351963758469, + "rewards/rejected": -0.0735630989074707, + "step": 2681 + }, + { + "epoch": 1.8547717842323652, + "grad_norm": 6.630998611450195, + "learning_rate": 4.525126786537575e-05, + "log_odds_chosen": 6.117326736450195, + "log_odds_ratio": -0.03048916533589363, + "logits/chosen": -0.736469030380249, + "logits/rejected": -0.76263427734375, + "logps/chosen": -0.024096038192510605, + "logps/rejected": -1.0225584506988525, + "loss": 2.7666, + "nll_loss": 0.6885951161384583, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024096036795526743, + "rewards/margins": 0.0998462438583374, + "rewards/rejected": -0.10225585103034973, + "step": 2682 + }, + { + "epoch": 1.855463347164592, + "grad_norm": 7.200371742248535, + "learning_rate": 4.52474258490856e-05, + "log_odds_chosen": 5.509161949157715, + "log_odds_ratio": -0.02707480452954769, + "logits/chosen": -0.5912322402000427, + "logits/rejected": -0.6191037893295288, + "logps/chosen": -0.02199496328830719, + "logps/rejected": -1.120031714439392, + "loss": 3.2204, + "nll_loss": 0.8023803234100342, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021994966082274914, + "rewards/margins": 0.10980367660522461, + "rewards/rejected": -0.11200316995382309, + "step": 2683 + }, + { + "epoch": 1.8561549100968189, + "grad_norm": 7.9807939529418945, + "learning_rate": 4.524358383279546e-05, + "log_odds_chosen": 4.948763370513916, + "log_odds_ratio": -0.03615286201238632, + "logits/chosen": -0.10776793956756592, + "logits/rejected": -0.14994102716445923, + "logps/chosen": -0.05639262869954109, + "logps/rejected": -1.0054923295974731, + "loss": 3.5715, + "nll_loss": 0.8892561197280884, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005639263428747654, + "rewards/margins": 0.09490996599197388, + "rewards/rejected": -0.10054922848939896, + "step": 2684 + }, + { + "epoch": 1.8568464730290457, + "grad_norm": 10.224892616271973, + "learning_rate": 4.52397418165053e-05, + "log_odds_chosen": 7.033175468444824, + "log_odds_ratio": -0.005409691948443651, + "logits/chosen": -0.5313670039176941, + "logits/rejected": -0.5757410526275635, + "logps/chosen": -0.01673893816769123, + "logps/rejected": -1.2961117029190063, + "loss": 4.4357, + "nll_loss": 1.108377456665039, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016738938866183162, + "rewards/margins": 0.12793727219104767, + "rewards/rejected": -0.12961116433143616, + "step": 2685 + }, + { + "epoch": 1.8575380359612725, + "grad_norm": 4.859482765197754, + "learning_rate": 4.5235899800215155e-05, + "log_odds_chosen": 3.9110684394836426, + "log_odds_ratio": -0.108709916472435, + "logits/chosen": -0.7197506427764893, + "logits/rejected": -0.7499377727508545, + "logps/chosen": -0.039227042347192764, + "logps/rejected": -0.5742328763008118, + "loss": 4.3951, + "nll_loss": 1.0879111289978027, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003922704607248306, + "rewards/margins": 0.05350058153271675, + "rewards/rejected": -0.05742328613996506, + "step": 2686 + }, + { + "epoch": 1.8582295988934994, + "grad_norm": 5.944377422332764, + "learning_rate": 4.523205778392501e-05, + "log_odds_chosen": 5.475446701049805, + "log_odds_ratio": -0.0907863900065422, + "logits/chosen": -0.10576437413692474, + "logits/rejected": -0.1509627252817154, + "logps/chosen": -0.04343018680810928, + "logps/rejected": -0.7776180505752563, + "loss": 2.6183, + "nll_loss": 0.6454888582229614, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004343018867075443, + "rewards/margins": 0.07341878116130829, + "rewards/rejected": -0.07776179909706116, + "step": 2687 + }, + { + "epoch": 1.8589211618257262, + "grad_norm": 7.269049644470215, + "learning_rate": 4.522821576763486e-05, + "log_odds_chosen": 5.872060775756836, + "log_odds_ratio": -0.04043078050017357, + "logits/chosen": -0.6755918860435486, + "logits/rejected": -0.7131515741348267, + "logps/chosen": -0.03322390094399452, + "logps/rejected": -0.8359825015068054, + "loss": 2.8901, + "nll_loss": 0.7184814214706421, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0033223903737962246, + "rewards/margins": 0.08027586340904236, + "rewards/rejected": -0.08359825611114502, + "step": 2688 + }, + { + "epoch": 1.859612724757953, + "grad_norm": 11.413594245910645, + "learning_rate": 4.5224373751344706e-05, + "log_odds_chosen": 3.7279882431030273, + "log_odds_ratio": -0.544560432434082, + "logits/chosen": -0.646683931350708, + "logits/rejected": -0.6721376180648804, + "logps/chosen": -0.10949568450450897, + "logps/rejected": -0.7356846332550049, + "loss": 3.597, + "nll_loss": 0.8447965979576111, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010949568822979927, + "rewards/margins": 0.06261889636516571, + "rewards/rejected": -0.07356846332550049, + "step": 2689 + }, + { + "epoch": 1.8603042876901799, + "grad_norm": 11.228747367858887, + "learning_rate": 4.522053173505456e-05, + "log_odds_chosen": 4.0871171951293945, + "log_odds_ratio": -0.22871485352516174, + "logits/chosen": -0.6205450296401978, + "logits/rejected": -0.6625868082046509, + "logps/chosen": -0.04733623191714287, + "logps/rejected": -0.8056012988090515, + "loss": 3.3682, + "nll_loss": 0.8191684484481812, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004733623005449772, + "rewards/margins": 0.07582651823759079, + "rewards/rejected": -0.08056013286113739, + "step": 2690 + }, + { + "epoch": 1.8609958506224067, + "grad_norm": 6.608537197113037, + "learning_rate": 4.521668971876441e-05, + "log_odds_chosen": 5.136848449707031, + "log_odds_ratio": -0.12134000658988953, + "logits/chosen": -0.5894830822944641, + "logits/rejected": -0.5903124809265137, + "logps/chosen": -0.03736359626054764, + "logps/rejected": -0.9877703785896301, + "loss": 3.2075, + "nll_loss": 0.7897449135780334, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0037363599985837936, + "rewards/margins": 0.09504067152738571, + "rewards/rejected": -0.09877702593803406, + "step": 2691 + }, + { + "epoch": 1.8616874135546335, + "grad_norm": 9.172867774963379, + "learning_rate": 4.5212847702474256e-05, + "log_odds_chosen": 4.03933048248291, + "log_odds_ratio": -0.07970944046974182, + "logits/chosen": -0.6346632242202759, + "logits/rejected": -0.6845720410346985, + "logps/chosen": -0.05896829068660736, + "logps/rejected": -0.7044997811317444, + "loss": 4.1825, + "nll_loss": 1.0376558303833008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005896829068660736, + "rewards/margins": 0.0645531564950943, + "rewards/rejected": -0.07044997811317444, + "step": 2692 + }, + { + "epoch": 1.8623789764868603, + "grad_norm": 10.028759002685547, + "learning_rate": 4.5209005686184115e-05, + "log_odds_chosen": 5.194214344024658, + "log_odds_ratio": -0.17836083471775055, + "logits/chosen": -0.29558470845222473, + "logits/rejected": -0.3286222815513611, + "logps/chosen": -0.04205503687262535, + "logps/rejected": -1.0484198331832886, + "loss": 3.2651, + "nll_loss": 0.7984365820884705, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00420550350099802, + "rewards/margins": 0.10063648223876953, + "rewards/rejected": -0.10484198480844498, + "step": 2693 + }, + { + "epoch": 1.8630705394190872, + "grad_norm": 7.484879970550537, + "learning_rate": 4.520516366989396e-05, + "log_odds_chosen": 5.171629905700684, + "log_odds_ratio": -0.17990508675575256, + "logits/chosen": -0.5625219941139221, + "logits/rejected": -0.5617840886116028, + "logps/chosen": -0.1852400302886963, + "logps/rejected": -0.7624889016151428, + "loss": 2.7943, + "nll_loss": 0.680594265460968, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01852400228381157, + "rewards/margins": 0.05772489309310913, + "rewards/rejected": -0.076248899102211, + "step": 2694 + }, + { + "epoch": 1.863762102351314, + "grad_norm": 6.669751167297363, + "learning_rate": 4.5201321653603813e-05, + "log_odds_chosen": 4.229768753051758, + "log_odds_ratio": -0.16075065732002258, + "logits/chosen": -0.8317698240280151, + "logits/rejected": -0.863510012626648, + "logps/chosen": -0.0615994967520237, + "logps/rejected": -0.6593600511550903, + "loss": 2.3906, + "nll_loss": 0.5815827250480652, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0061599500477313995, + "rewards/margins": 0.05977606028318405, + "rewards/rejected": -0.06593600660562515, + "step": 2695 + }, + { + "epoch": 1.8644536652835408, + "grad_norm": 7.034972667694092, + "learning_rate": 4.5197479637313666e-05, + "log_odds_chosen": 3.354255199432373, + "log_odds_ratio": -0.3019982576370239, + "logits/chosen": -0.7325828075408936, + "logits/rejected": -0.7738179564476013, + "logps/chosen": -0.07212116569280624, + "logps/rejected": -0.6469085812568665, + "loss": 2.8238, + "nll_loss": 0.6757556200027466, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0072121163830161095, + "rewards/margins": 0.05747874453663826, + "rewards/rejected": -0.06469085812568665, + "step": 2696 + }, + { + "epoch": 1.8651452282157677, + "grad_norm": 7.403872966766357, + "learning_rate": 4.519363762102352e-05, + "log_odds_chosen": 2.895770788192749, + "log_odds_ratio": -0.36901670694351196, + "logits/chosen": -0.5260494947433472, + "logits/rejected": -0.5574390292167664, + "logps/chosen": -0.07369774580001831, + "logps/rejected": -0.5763075351715088, + "loss": 3.0012, + "nll_loss": 0.7134076356887817, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007369774393737316, + "rewards/margins": 0.050260983407497406, + "rewards/rejected": -0.0576307512819767, + "step": 2697 + }, + { + "epoch": 1.8658367911479945, + "grad_norm": 6.005950450897217, + "learning_rate": 4.5189795604733364e-05, + "log_odds_chosen": 5.62225341796875, + "log_odds_ratio": -0.02279970608651638, + "logits/chosen": -0.6721312403678894, + "logits/rejected": -0.6759477853775024, + "logps/chosen": -0.012295754626393318, + "logps/rejected": -0.8416109085083008, + "loss": 3.2932, + "nll_loss": 0.821026623249054, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001229575602337718, + "rewards/margins": 0.0829315185546875, + "rewards/rejected": -0.08416110277175903, + "step": 2698 + }, + { + "epoch": 1.8665283540802213, + "grad_norm": 6.691189765930176, + "learning_rate": 4.5185953588443216e-05, + "log_odds_chosen": 5.296922206878662, + "log_odds_ratio": -0.056247636675834656, + "logits/chosen": -0.7565345168113708, + "logits/rejected": -0.8415186405181885, + "logps/chosen": -0.04300527274608612, + "logps/rejected": -1.137931227684021, + "loss": 2.1701, + "nll_loss": 0.5368894338607788, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004300527274608612, + "rewards/margins": 0.10949259996414185, + "rewards/rejected": -0.11379312723875046, + "step": 2699 + }, + { + "epoch": 1.8672199170124482, + "grad_norm": 6.369565010070801, + "learning_rate": 4.518211157215307e-05, + "log_odds_chosen": 4.468364715576172, + "log_odds_ratio": -0.17497983574867249, + "logits/chosen": -0.8825744390487671, + "logits/rejected": -0.9054487943649292, + "logps/chosen": -0.06848480552434921, + "logps/rejected": -0.7379632592201233, + "loss": 4.083, + "nll_loss": 1.0032566785812378, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006848481018096209, + "rewards/margins": 0.06694784760475159, + "rewards/rejected": -0.07379632443189621, + "step": 2700 + }, + { + "epoch": 1.867911479944675, + "grad_norm": 6.025998592376709, + "learning_rate": 4.5178269555862915e-05, + "log_odds_chosen": 4.842930793762207, + "log_odds_ratio": -0.2740570306777954, + "logits/chosen": -0.7599248290061951, + "logits/rejected": -0.802500307559967, + "logps/chosen": -0.04903910309076309, + "logps/rejected": -0.7304023504257202, + "loss": 2.8573, + "nll_loss": 0.6869195699691772, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0049039097502827644, + "rewards/margins": 0.06813632696866989, + "rewards/rejected": -0.07304023206233978, + "step": 2701 + }, + { + "epoch": 1.8686030428769018, + "grad_norm": 5.929429054260254, + "learning_rate": 4.5174427539572774e-05, + "log_odds_chosen": 5.073709487915039, + "log_odds_ratio": -0.3314833641052246, + "logits/chosen": -0.43065565824508667, + "logits/rejected": -0.45779237151145935, + "logps/chosen": -0.0539717972278595, + "logps/rejected": -0.6632078886032104, + "loss": 2.8182, + "nll_loss": 0.6714061498641968, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0053971800953149796, + "rewards/margins": 0.060923609882593155, + "rewards/rejected": -0.06632079184055328, + "step": 2702 + }, + { + "epoch": 1.8692946058091287, + "grad_norm": 6.13959264755249, + "learning_rate": 4.517058552328262e-05, + "log_odds_chosen": 3.4868664741516113, + "log_odds_ratio": -0.09228986501693726, + "logits/chosen": -0.3578067421913147, + "logits/rejected": -0.41644516587257385, + "logps/chosen": -0.033023469150066376, + "logps/rejected": -0.4936770796775818, + "loss": 2.4884, + "nll_loss": 0.6128824353218079, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0033023469150066376, + "rewards/margins": 0.04606536030769348, + "rewards/rejected": -0.04936770722270012, + "step": 2703 + }, + { + "epoch": 1.8699861687413555, + "grad_norm": 13.459339141845703, + "learning_rate": 4.516674350699247e-05, + "log_odds_chosen": 3.097822666168213, + "log_odds_ratio": -0.5419131517410278, + "logits/chosen": -0.8968651294708252, + "logits/rejected": -0.9812121987342834, + "logps/chosen": -0.12201026827096939, + "logps/rejected": -0.9365118145942688, + "loss": 3.0374, + "nll_loss": 0.705159068107605, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012201027013361454, + "rewards/margins": 0.08145016431808472, + "rewards/rejected": -0.0936511904001236, + "step": 2704 + }, + { + "epoch": 1.8706777316735823, + "grad_norm": 8.480863571166992, + "learning_rate": 4.5162901490702324e-05, + "log_odds_chosen": 4.1900553703308105, + "log_odds_ratio": -0.059285108000040054, + "logits/chosen": -0.4107376039028168, + "logits/rejected": -0.4237530827522278, + "logps/chosen": -0.03784055635333061, + "logps/rejected": -0.7297183275222778, + "loss": 3.4364, + "nll_loss": 0.8531801104545593, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003784055821597576, + "rewards/margins": 0.06918777525424957, + "rewards/rejected": -0.07297183573246002, + "step": 2705 + }, + { + "epoch": 1.8713692946058091, + "grad_norm": 6.559045791625977, + "learning_rate": 4.515905947441218e-05, + "log_odds_chosen": 4.857430458068848, + "log_odds_ratio": -0.05420558527112007, + "logits/chosen": -0.28830158710479736, + "logits/rejected": -0.29584336280822754, + "logps/chosen": -0.07617554068565369, + "logps/rejected": -0.926051139831543, + "loss": 2.2317, + "nll_loss": 0.552515983581543, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007617554161697626, + "rewards/margins": 0.08498755842447281, + "rewards/rejected": -0.0926051139831543, + "step": 2706 + }, + { + "epoch": 1.872060857538036, + "grad_norm": 5.751916885375977, + "learning_rate": 4.515521745812202e-05, + "log_odds_chosen": 3.4366352558135986, + "log_odds_ratio": -0.453977108001709, + "logits/chosen": -0.28560858964920044, + "logits/rejected": -0.27950069308280945, + "logps/chosen": -0.07851672917604446, + "logps/rejected": -0.6064249873161316, + "loss": 2.5579, + "nll_loss": 0.5940800905227661, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.007851672358810902, + "rewards/margins": 0.05279082432389259, + "rewards/rejected": -0.06064249947667122, + "step": 2707 + }, + { + "epoch": 1.8727524204702628, + "grad_norm": 8.47996997833252, + "learning_rate": 4.5151375441831875e-05, + "log_odds_chosen": 3.9178547859191895, + "log_odds_ratio": -0.09079738706350327, + "logits/chosen": -0.5651100873947144, + "logits/rejected": -0.6518626809120178, + "logps/chosen": -0.03847382590174675, + "logps/rejected": -0.7573758363723755, + "loss": 3.5533, + "nll_loss": 0.8792436122894287, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0038473824970424175, + "rewards/margins": 0.0718902051448822, + "rewards/rejected": -0.07573758810758591, + "step": 2708 + }, + { + "epoch": 1.8734439834024896, + "grad_norm": 18.4063663482666, + "learning_rate": 4.514753342554173e-05, + "log_odds_chosen": 3.6568000316619873, + "log_odds_ratio": -0.2200811207294464, + "logits/chosen": -0.38470497727394104, + "logits/rejected": -0.34699082374572754, + "logps/chosen": -0.04677264019846916, + "logps/rejected": -0.6297637820243835, + "loss": 2.9132, + "nll_loss": 0.7062985897064209, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004677264019846916, + "rewards/margins": 0.05829911679029465, + "rewards/rejected": -0.06297638267278671, + "step": 2709 + }, + { + "epoch": 1.8741355463347165, + "grad_norm": 19.251474380493164, + "learning_rate": 4.514369140925157e-05, + "log_odds_chosen": 2.494581460952759, + "log_odds_ratio": -0.42333984375, + "logits/chosen": -0.7448416948318481, + "logits/rejected": -0.7299750447273254, + "logps/chosen": -0.08245445042848587, + "logps/rejected": -0.4006720781326294, + "loss": 2.5965, + "nll_loss": 0.6068035364151001, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.008245445787906647, + "rewards/margins": 0.03182176128029823, + "rewards/rejected": -0.04006720334291458, + "step": 2710 + }, + { + "epoch": 1.8748271092669433, + "grad_norm": 4.293595314025879, + "learning_rate": 4.513984939296143e-05, + "log_odds_chosen": 6.068905830383301, + "log_odds_ratio": -0.04996497184038162, + "logits/chosen": -0.5838699340820312, + "logits/rejected": -0.6190752983093262, + "logps/chosen": -0.00511554628610611, + "logps/rejected": -0.6605459451675415, + "loss": 2.7534, + "nll_loss": 0.6833570003509521, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005115546518936753, + "rewards/margins": 0.0655430406332016, + "rewards/rejected": -0.06605459749698639, + "step": 2711 + }, + { + "epoch": 1.8755186721991701, + "grad_norm": 8.421035766601562, + "learning_rate": 4.513600737667128e-05, + "log_odds_chosen": 4.47832727432251, + "log_odds_ratio": -0.16189125180244446, + "logits/chosen": -0.5043261051177979, + "logits/rejected": -0.5526118278503418, + "logps/chosen": -0.07159212976694107, + "logps/rejected": -1.062654733657837, + "loss": 3.0325, + "nll_loss": 0.7419383525848389, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007159212604165077, + "rewards/margins": 0.09910625964403152, + "rewards/rejected": -0.10626547038555145, + "step": 2712 + }, + { + "epoch": 1.876210235131397, + "grad_norm": 12.252899169921875, + "learning_rate": 4.513216536038113e-05, + "log_odds_chosen": 2.638622999191284, + "log_odds_ratio": -0.30449140071868896, + "logits/chosen": -0.6102049350738525, + "logits/rejected": -0.5791282057762146, + "logps/chosen": -0.11507735401391983, + "logps/rejected": -0.7121509909629822, + "loss": 3.5219, + "nll_loss": 0.8500288724899292, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011507734656333923, + "rewards/margins": 0.059707362204790115, + "rewards/rejected": -0.07121509313583374, + "step": 2713 + }, + { + "epoch": 1.8769017980636238, + "grad_norm": 5.714953422546387, + "learning_rate": 4.512832334409098e-05, + "log_odds_chosen": 4.1826934814453125, + "log_odds_ratio": -0.1705974042415619, + "logits/chosen": -0.7607474327087402, + "logits/rejected": -0.7724503874778748, + "logps/chosen": -0.04644298925995827, + "logps/rejected": -0.7300114631652832, + "loss": 3.2861, + "nll_loss": 0.8044597506523132, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004644298925995827, + "rewards/margins": 0.06835684180259705, + "rewards/rejected": -0.07300114631652832, + "step": 2714 + }, + { + "epoch": 1.8775933609958506, + "grad_norm": 8.1884126663208, + "learning_rate": 4.5124481327800835e-05, + "log_odds_chosen": 5.894617080688477, + "log_odds_ratio": -0.07726238667964935, + "logits/chosen": -0.43670010566711426, + "logits/rejected": -0.5112190246582031, + "logps/chosen": -0.03426039591431618, + "logps/rejected": -0.8254495859146118, + "loss": 3.5076, + "nll_loss": 0.8691853284835815, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0034260395914316177, + "rewards/margins": 0.07911892235279083, + "rewards/rejected": -0.0825449675321579, + "step": 2715 + }, + { + "epoch": 1.8782849239280774, + "grad_norm": 7.814009189605713, + "learning_rate": 4.512063931151068e-05, + "log_odds_chosen": 4.36599063873291, + "log_odds_ratio": -0.14571961760520935, + "logits/chosen": -0.588714063167572, + "logits/rejected": -0.7005572319030762, + "logps/chosen": -0.032498858869075775, + "logps/rejected": -0.8260935544967651, + "loss": 3.3757, + "nll_loss": 0.829341471195221, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003249885980039835, + "rewards/margins": 0.07935947179794312, + "rewards/rejected": -0.08260935544967651, + "step": 2716 + }, + { + "epoch": 1.8789764868603043, + "grad_norm": 6.06086540222168, + "learning_rate": 4.511679729522053e-05, + "log_odds_chosen": 3.8938732147216797, + "log_odds_ratio": -0.41503429412841797, + "logits/chosen": -0.5160313844680786, + "logits/rejected": -0.4538733959197998, + "logps/chosen": -0.041960708796978, + "logps/rejected": -0.4900236129760742, + "loss": 2.833, + "nll_loss": 0.6667478680610657, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004196071065962315, + "rewards/margins": 0.04480629041790962, + "rewards/rejected": -0.04900236055254936, + "step": 2717 + }, + { + "epoch": 1.879668049792531, + "grad_norm": 13.422004699707031, + "learning_rate": 4.5112955278930386e-05, + "log_odds_chosen": 6.625287055969238, + "log_odds_ratio": -0.11494994163513184, + "logits/chosen": -0.5009042024612427, + "logits/rejected": -0.589263916015625, + "logps/chosen": -0.02857610397040844, + "logps/rejected": -1.2235627174377441, + "loss": 2.9235, + "nll_loss": 0.7193735837936401, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0028576103504747152, + "rewards/margins": 0.11949865520000458, + "rewards/rejected": -0.12235626578330994, + "step": 2718 + }, + { + "epoch": 1.880359612724758, + "grad_norm": 9.228880882263184, + "learning_rate": 4.510911326264023e-05, + "log_odds_chosen": 4.821526050567627, + "log_odds_ratio": -0.06934637576341629, + "logits/chosen": -0.5990431308746338, + "logits/rejected": -0.5863394141197205, + "logps/chosen": -0.04004587605595589, + "logps/rejected": -0.8961687684059143, + "loss": 3.7117, + "nll_loss": 0.9209888577461243, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004004587419331074, + "rewards/margins": 0.08561229705810547, + "rewards/rejected": -0.08961687982082367, + "step": 2719 + }, + { + "epoch": 1.8810511756569848, + "grad_norm": 5.676540851593018, + "learning_rate": 4.510527124635009e-05, + "log_odds_chosen": 4.036774158477783, + "log_odds_ratio": -0.12386953830718994, + "logits/chosen": -0.6862848401069641, + "logits/rejected": -0.6691758632659912, + "logps/chosen": -0.052078358829021454, + "logps/rejected": -0.5784726142883301, + "loss": 2.4663, + "nll_loss": 0.6041802167892456, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0052078356966376305, + "rewards/margins": 0.0526394248008728, + "rewards/rejected": -0.05784726142883301, + "step": 2720 + }, + { + "epoch": 1.8817427385892116, + "grad_norm": 6.777297496795654, + "learning_rate": 4.5101429230059936e-05, + "log_odds_chosen": 5.018651485443115, + "log_odds_ratio": -0.13236510753631592, + "logits/chosen": -0.8772146701812744, + "logits/rejected": -0.8254865407943726, + "logps/chosen": -0.04593784362077713, + "logps/rejected": -0.8731970191001892, + "loss": 2.6719, + "nll_loss": 0.6547348499298096, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004593783989548683, + "rewards/margins": 0.08272591233253479, + "rewards/rejected": -0.08731970191001892, + "step": 2721 + }, + { + "epoch": 1.8824343015214384, + "grad_norm": 8.613203048706055, + "learning_rate": 4.509758721376979e-05, + "log_odds_chosen": 5.046001434326172, + "log_odds_ratio": -0.12102051824331284, + "logits/chosen": -0.33736640214920044, + "logits/rejected": -0.3499341607093811, + "logps/chosen": -0.02708081528544426, + "logps/rejected": -0.5669259428977966, + "loss": 2.5795, + "nll_loss": 0.632770836353302, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0027080816216766834, + "rewards/margins": 0.053984515368938446, + "rewards/rejected": -0.05669259652495384, + "step": 2722 + }, + { + "epoch": 1.8831258644536653, + "grad_norm": 6.157186985015869, + "learning_rate": 4.509374519747964e-05, + "log_odds_chosen": 5.869537353515625, + "log_odds_ratio": -0.01888483390212059, + "logits/chosen": -0.37536218762397766, + "logits/rejected": -0.4354729950428009, + "logps/chosen": -0.05232808366417885, + "logps/rejected": -1.428209900856018, + "loss": 2.4493, + "nll_loss": 0.6104321479797363, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0052328091114759445, + "rewards/margins": 0.13758818805217743, + "rewards/rejected": -0.14282099902629852, + "step": 2723 + }, + { + "epoch": 1.883817427385892, + "grad_norm": 8.024432182312012, + "learning_rate": 4.5089903181189494e-05, + "log_odds_chosen": 7.1319169998168945, + "log_odds_ratio": -0.12523026764392853, + "logits/chosen": -0.280782014131546, + "logits/rejected": -0.322892963886261, + "logps/chosen": -0.020273447036743164, + "logps/rejected": -0.9794613718986511, + "loss": 3.4069, + "nll_loss": 0.8392095565795898, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0020273446571081877, + "rewards/margins": 0.09591878950595856, + "rewards/rejected": -0.09794613718986511, + "step": 2724 + }, + { + "epoch": 1.884508990318119, + "grad_norm": 13.520628929138184, + "learning_rate": 4.508606116489934e-05, + "log_odds_chosen": 2.8214855194091797, + "log_odds_ratio": -0.5782947540283203, + "logits/chosen": -0.6274783611297607, + "logits/rejected": -0.6184959411621094, + "logps/chosen": -0.13344895839691162, + "logps/rejected": -0.893020749092102, + "loss": 2.5629, + "nll_loss": 0.5828902721405029, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.013344896957278252, + "rewards/margins": 0.07595717906951904, + "rewards/rejected": -0.08930207788944244, + "step": 2725 + }, + { + "epoch": 1.8852005532503457, + "grad_norm": 8.038005828857422, + "learning_rate": 4.508221914860919e-05, + "log_odds_chosen": 3.998943328857422, + "log_odds_ratio": -0.2384883165359497, + "logits/chosen": -0.6419277787208557, + "logits/rejected": -0.6372604966163635, + "logps/chosen": -0.04527169466018677, + "logps/rejected": -0.5038492679595947, + "loss": 1.9882, + "nll_loss": 0.47319209575653076, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004527169745415449, + "rewards/margins": 0.0458577536046505, + "rewards/rejected": -0.050384923815727234, + "step": 2726 + }, + { + "epoch": 1.8858921161825726, + "grad_norm": 9.993119239807129, + "learning_rate": 4.5078377132319044e-05, + "log_odds_chosen": 3.0417394638061523, + "log_odds_ratio": -0.3634532690048218, + "logits/chosen": -0.789708137512207, + "logits/rejected": -0.7860262989997864, + "logps/chosen": -0.08906921744346619, + "logps/rejected": -0.4780641496181488, + "loss": 3.6667, + "nll_loss": 0.8803341388702393, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008906921371817589, + "rewards/margins": 0.0388994924724102, + "rewards/rejected": -0.04780641570687294, + "step": 2727 + }, + { + "epoch": 1.8865836791147994, + "grad_norm": 5.467214584350586, + "learning_rate": 4.507453511602889e-05, + "log_odds_chosen": 5.641713619232178, + "log_odds_ratio": -0.1424766629934311, + "logits/chosen": -0.5579952001571655, + "logits/rejected": -0.6143260598182678, + "logps/chosen": -0.04905056208372116, + "logps/rejected": -0.918487012386322, + "loss": 3.252, + "nll_loss": 0.7987476587295532, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004905056208372116, + "rewards/margins": 0.08694364130496979, + "rewards/rejected": -0.0918486937880516, + "step": 2728 + }, + { + "epoch": 1.8872752420470262, + "grad_norm": 6.023781776428223, + "learning_rate": 4.507069309973875e-05, + "log_odds_chosen": 5.422904014587402, + "log_odds_ratio": -0.18042154610157013, + "logits/chosen": -0.21186286211013794, + "logits/rejected": -0.2458893209695816, + "logps/chosen": -0.042429495602846146, + "logps/rejected": -0.9531780481338501, + "loss": 3.054, + "nll_loss": 0.7454517483711243, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004242949653416872, + "rewards/margins": 0.0910748615860939, + "rewards/rejected": -0.09531781077384949, + "step": 2729 + }, + { + "epoch": 1.887966804979253, + "grad_norm": 6.529669761657715, + "learning_rate": 4.5066851083448595e-05, + "log_odds_chosen": 3.4735217094421387, + "log_odds_ratio": -0.13795514404773712, + "logits/chosen": -0.6479552388191223, + "logits/rejected": -0.664212703704834, + "logps/chosen": -0.08479224890470505, + "logps/rejected": -0.5873520374298096, + "loss": 3.1475, + "nll_loss": 0.7730732560157776, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00847922544926405, + "rewards/margins": 0.05025597661733627, + "rewards/rejected": -0.0587352029979229, + "step": 2730 + }, + { + "epoch": 1.88865836791148, + "grad_norm": 5.234907150268555, + "learning_rate": 4.506300906715845e-05, + "log_odds_chosen": 5.450425624847412, + "log_odds_ratio": -0.023839298635721207, + "logits/chosen": -0.4833140969276428, + "logits/rejected": -0.5143532752990723, + "logps/chosen": -0.03760465234518051, + "logps/rejected": -1.2618212699890137, + "loss": 2.6293, + "nll_loss": 0.6549372673034668, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003760465420782566, + "rewards/margins": 0.12242165207862854, + "rewards/rejected": -0.12618212401866913, + "step": 2731 + }, + { + "epoch": 1.8893499308437067, + "grad_norm": 9.136272430419922, + "learning_rate": 4.50591670508683e-05, + "log_odds_chosen": 4.215113639831543, + "log_odds_ratio": -0.39656686782836914, + "logits/chosen": -0.5351483821868896, + "logits/rejected": -0.628555417060852, + "logps/chosen": -0.0797351598739624, + "logps/rejected": -0.9139702320098877, + "loss": 3.6774, + "nll_loss": 0.8796899318695068, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.00797351635992527, + "rewards/margins": 0.08342351019382477, + "rewards/rejected": -0.09139702469110489, + "step": 2732 + }, + { + "epoch": 1.8900414937759336, + "grad_norm": 5.817863464355469, + "learning_rate": 4.505532503457815e-05, + "log_odds_chosen": 5.436681747436523, + "log_odds_ratio": -0.08999508619308472, + "logits/chosen": -0.3045744299888611, + "logits/rejected": -0.3256331980228424, + "logps/chosen": -0.024274304509162903, + "logps/rejected": -0.8689183592796326, + "loss": 2.1392, + "nll_loss": 0.525797963142395, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00242743082344532, + "rewards/margins": 0.0844644159078598, + "rewards/rejected": -0.08689183741807938, + "step": 2733 + }, + { + "epoch": 1.8907330567081604, + "grad_norm": 12.771986961364746, + "learning_rate": 4.5051483018288e-05, + "log_odds_chosen": 2.040485382080078, + "log_odds_ratio": -0.7644972801208496, + "logits/chosen": -0.6130284070968628, + "logits/rejected": -0.6401146650314331, + "logps/chosen": -0.10524089634418488, + "logps/rejected": -0.43873098492622375, + "loss": 3.3721, + "nll_loss": 0.7665740251541138, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010524090379476547, + "rewards/margins": 0.03334901109337807, + "rewards/rejected": -0.043873101472854614, + "step": 2734 + }, + { + "epoch": 1.8914246196403872, + "grad_norm": 5.783315658569336, + "learning_rate": 4.504764100199785e-05, + "log_odds_chosen": 4.502688407897949, + "log_odds_ratio": -0.18156832456588745, + "logits/chosen": -0.8265919089317322, + "logits/rejected": -0.8804236650466919, + "logps/chosen": -0.06405682116746902, + "logps/rejected": -0.7433871030807495, + "loss": 2.4883, + "nll_loss": 0.6039232611656189, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00640568183735013, + "rewards/margins": 0.06793303042650223, + "rewards/rejected": -0.07433871179819107, + "step": 2735 + }, + { + "epoch": 1.892116182572614, + "grad_norm": 7.092947959899902, + "learning_rate": 4.50437989857077e-05, + "log_odds_chosen": 6.711772918701172, + "log_odds_ratio": -0.028261862695217133, + "logits/chosen": -0.35366034507751465, + "logits/rejected": -0.4563252031803131, + "logps/chosen": -0.015546809881925583, + "logps/rejected": -0.8382112383842468, + "loss": 2.5491, + "nll_loss": 0.634446918964386, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015546809881925583, + "rewards/margins": 0.08226644992828369, + "rewards/rejected": -0.0838211253285408, + "step": 2736 + }, + { + "epoch": 1.8928077455048409, + "grad_norm": 8.738595962524414, + "learning_rate": 4.503995696941755e-05, + "log_odds_chosen": 4.996340751647949, + "log_odds_ratio": -0.047110073268413544, + "logits/chosen": -0.6247482299804688, + "logits/rejected": -0.6630555391311646, + "logps/chosen": -0.036776408553123474, + "logps/rejected": -1.061787486076355, + "loss": 3.7177, + "nll_loss": 0.92470383644104, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003677641274407506, + "rewards/margins": 0.10250110179185867, + "rewards/rejected": -0.10617874562740326, + "step": 2737 + }, + { + "epoch": 1.8934993084370677, + "grad_norm": 9.703561782836914, + "learning_rate": 4.50361149531274e-05, + "log_odds_chosen": 5.678030967712402, + "log_odds_ratio": -0.2548525929450989, + "logits/chosen": -1.067615270614624, + "logits/rejected": -1.1188896894454956, + "logps/chosen": -0.05637247487902641, + "logps/rejected": -0.8493156433105469, + "loss": 4.2344, + "nll_loss": 1.0331244468688965, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005637247581034899, + "rewards/margins": 0.07929432392120361, + "rewards/rejected": -0.08493156731128693, + "step": 2738 + }, + { + "epoch": 1.8941908713692945, + "grad_norm": 6.3866753578186035, + "learning_rate": 4.503227293683725e-05, + "log_odds_chosen": 5.3257646560668945, + "log_odds_ratio": -0.03938756138086319, + "logits/chosen": -0.6852152347564697, + "logits/rejected": -0.6986730694770813, + "logps/chosen": -0.0277650635689497, + "logps/rejected": -0.7782572507858276, + "loss": 3.0831, + "nll_loss": 0.7668415307998657, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00277650635689497, + "rewards/margins": 0.07504921406507492, + "rewards/rejected": -0.07782572507858276, + "step": 2739 + }, + { + "epoch": 1.8948824343015214, + "grad_norm": 8.502422332763672, + "learning_rate": 4.5028430920547106e-05, + "log_odds_chosen": 4.738999843597412, + "log_odds_ratio": -0.1449851542711258, + "logits/chosen": -0.9557698965072632, + "logits/rejected": -1.0144156217575073, + "logps/chosen": -0.022453149780631065, + "logps/rejected": -0.8287165760993958, + "loss": 2.469, + "nll_loss": 0.602742612361908, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002245314884930849, + "rewards/margins": 0.08062634617090225, + "rewards/rejected": -0.08287165313959122, + "step": 2740 + }, + { + "epoch": 1.8955739972337482, + "grad_norm": 9.78346061706543, + "learning_rate": 4.502458890425695e-05, + "log_odds_chosen": 7.0313920974731445, + "log_odds_ratio": -0.04768810421228409, + "logits/chosen": -0.36658820509910583, + "logits/rejected": -0.4856250584125519, + "logps/chosen": -0.01616629771888256, + "logps/rejected": -1.0696743726730347, + "loss": 3.9104, + "nll_loss": 0.9728347659111023, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001616629771888256, + "rewards/margins": 0.10535082221031189, + "rewards/rejected": -0.10696744918823242, + "step": 2741 + }, + { + "epoch": 1.896265560165975, + "grad_norm": 9.929014205932617, + "learning_rate": 4.502074688796681e-05, + "log_odds_chosen": 5.32511043548584, + "log_odds_ratio": -0.17078785598278046, + "logits/chosen": -0.284779816865921, + "logits/rejected": -0.40232762694358826, + "logps/chosen": -0.04031224176287651, + "logps/rejected": -1.167478084564209, + "loss": 3.9454, + "nll_loss": 0.9692734479904175, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004031224176287651, + "rewards/margins": 0.11271660029888153, + "rewards/rejected": -0.11674781143665314, + "step": 2742 + }, + { + "epoch": 1.8969571230982019, + "grad_norm": 8.456881523132324, + "learning_rate": 4.5016904871676656e-05, + "log_odds_chosen": 5.320615291595459, + "log_odds_ratio": -0.06412041187286377, + "logits/chosen": -0.5623228549957275, + "logits/rejected": -0.6121507287025452, + "logps/chosen": -0.028262116014957428, + "logps/rejected": -0.6847209930419922, + "loss": 2.7537, + "nll_loss": 0.6820018887519836, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0028262115083634853, + "rewards/margins": 0.06564588844776154, + "rewards/rejected": -0.06847210228443146, + "step": 2743 + }, + { + "epoch": 1.8976486860304287, + "grad_norm": 9.6913480758667, + "learning_rate": 4.501306285538651e-05, + "log_odds_chosen": 4.994526386260986, + "log_odds_ratio": -0.14609560370445251, + "logits/chosen": -0.602185070514679, + "logits/rejected": -0.6847248673439026, + "logps/chosen": -0.09153126180171967, + "logps/rejected": -1.0046964883804321, + "loss": 2.9175, + "nll_loss": 0.7147679328918457, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009153125807642937, + "rewards/margins": 0.09131652861833572, + "rewards/rejected": -0.10046965628862381, + "step": 2744 + }, + { + "epoch": 1.8983402489626555, + "grad_norm": 4.877399921417236, + "learning_rate": 4.500922083909636e-05, + "log_odds_chosen": 5.853466987609863, + "log_odds_ratio": -0.06557883322238922, + "logits/chosen": -0.3120821714401245, + "logits/rejected": -0.3295907974243164, + "logps/chosen": -0.030897993594408035, + "logps/rejected": -1.2178821563720703, + "loss": 2.5115, + "nll_loss": 0.6213077902793884, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003089799080044031, + "rewards/margins": 0.11869841814041138, + "rewards/rejected": -0.12178821861743927, + "step": 2745 + }, + { + "epoch": 1.8990318118948823, + "grad_norm": 9.56112289428711, + "learning_rate": 4.5005378822806213e-05, + "log_odds_chosen": 5.372619152069092, + "log_odds_ratio": -0.2080005705356598, + "logits/chosen": -0.4588029086589813, + "logits/rejected": -0.5576678514480591, + "logps/chosen": -0.035982996225357056, + "logps/rejected": -0.8040719032287598, + "loss": 2.6873, + "nll_loss": 0.6510368585586548, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003598299575969577, + "rewards/margins": 0.07680889219045639, + "rewards/rejected": -0.08040718734264374, + "step": 2746 + }, + { + "epoch": 1.8997233748271092, + "grad_norm": 7.170275688171387, + "learning_rate": 4.500153680651606e-05, + "log_odds_chosen": 5.3207597732543945, + "log_odds_ratio": -0.05614379793405533, + "logits/chosen": -0.22629055380821228, + "logits/rejected": -0.2640830874443054, + "logps/chosen": -0.02286584861576557, + "logps/rejected": -0.8797377347946167, + "loss": 2.8085, + "nll_loss": 0.6965088844299316, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0022865845821797848, + "rewards/margins": 0.08568719029426575, + "rewards/rejected": -0.08797377347946167, + "step": 2747 + }, + { + "epoch": 1.900414937759336, + "grad_norm": 7.8816447257995605, + "learning_rate": 4.499769479022591e-05, + "log_odds_chosen": 7.205872535705566, + "log_odds_ratio": -0.2536963224411011, + "logits/chosen": -0.333590030670166, + "logits/rejected": -0.39955171942710876, + "logps/chosen": -0.0653318241238594, + "logps/rejected": -1.3166921138763428, + "loss": 2.4678, + "nll_loss": 0.5915802717208862, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006533183157444, + "rewards/margins": 0.12513601779937744, + "rewards/rejected": -0.13166922330856323, + "step": 2748 + }, + { + "epoch": 1.9011065006915628, + "grad_norm": 10.401483535766602, + "learning_rate": 4.4993852773935764e-05, + "log_odds_chosen": 3.9236044883728027, + "log_odds_ratio": -0.4264339804649353, + "logits/chosen": -0.6277395486831665, + "logits/rejected": -0.5958032608032227, + "logps/chosen": -0.15293143689632416, + "logps/rejected": -0.730241060256958, + "loss": 4.6277, + "nll_loss": 1.1142935752868652, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.015293142758309841, + "rewards/margins": 0.057730965316295624, + "rewards/rejected": -0.07302410900592804, + "step": 2749 + }, + { + "epoch": 1.9017980636237897, + "grad_norm": 8.748340606689453, + "learning_rate": 4.499001075764561e-05, + "log_odds_chosen": 6.704047203063965, + "log_odds_ratio": -0.06408537179231644, + "logits/chosen": -0.3179192543029785, + "logits/rejected": -0.4334818124771118, + "logps/chosen": -0.06114349886775017, + "logps/rejected": -1.5072216987609863, + "loss": 2.3598, + "nll_loss": 0.5835462808609009, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006114350166171789, + "rewards/margins": 0.144607812166214, + "rewards/rejected": -0.1507221758365631, + "step": 2750 + }, + { + "epoch": 1.9024896265560165, + "grad_norm": 9.328351974487305, + "learning_rate": 4.498616874135547e-05, + "log_odds_chosen": 3.970728874206543, + "log_odds_ratio": -0.11508572101593018, + "logits/chosen": -0.657474935054779, + "logits/rejected": -0.6981061100959778, + "logps/chosen": -0.05146617442369461, + "logps/rejected": -0.6390859484672546, + "loss": 3.8184, + "nll_loss": 0.9430928230285645, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005146617069840431, + "rewards/margins": 0.05876198038458824, + "rewards/rejected": -0.06390859931707382, + "step": 2751 + }, + { + "epoch": 1.9031811894882433, + "grad_norm": 4.731758117675781, + "learning_rate": 4.4982326725065315e-05, + "log_odds_chosen": 4.844266891479492, + "log_odds_ratio": -0.4429481625556946, + "logits/chosen": -0.22525936365127563, + "logits/rejected": -0.18493236601352692, + "logps/chosen": -0.1286071240901947, + "logps/rejected": -0.9103085398674011, + "loss": 2.6905, + "nll_loss": 0.6283376216888428, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01286071166396141, + "rewards/margins": 0.07817014306783676, + "rewards/rejected": -0.09103085100650787, + "step": 2752 + }, + { + "epoch": 1.9038727524204702, + "grad_norm": 7.517221927642822, + "learning_rate": 4.497848470877517e-05, + "log_odds_chosen": 7.291058540344238, + "log_odds_ratio": -0.01622098684310913, + "logits/chosen": -0.7878361940383911, + "logits/rejected": -0.8620985150337219, + "logps/chosen": -0.008632284589111805, + "logps/rejected": -1.2846453189849854, + "loss": 3.7779, + "nll_loss": 0.9428427219390869, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008632285171188414, + "rewards/margins": 0.12760131061077118, + "rewards/rejected": -0.12846453487873077, + "step": 2753 + }, + { + "epoch": 1.904564315352697, + "grad_norm": 5.184489727020264, + "learning_rate": 4.497464269248502e-05, + "log_odds_chosen": 5.2329020500183105, + "log_odds_ratio": -0.12184281647205353, + "logits/chosen": -0.34280192852020264, + "logits/rejected": -0.42546650767326355, + "logps/chosen": -0.05324007198214531, + "logps/rejected": -0.8889765739440918, + "loss": 2.5912, + "nll_loss": 0.6356096863746643, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005324007477611303, + "rewards/margins": 0.08357363939285278, + "rewards/rejected": -0.08889764547348022, + "step": 2754 + }, + { + "epoch": 1.9052558782849238, + "grad_norm": 6.808511257171631, + "learning_rate": 4.497080067619487e-05, + "log_odds_chosen": 5.882111549377441, + "log_odds_ratio": -0.008104901760816574, + "logits/chosen": -0.4373306632041931, + "logits/rejected": -0.4659738838672638, + "logps/chosen": -0.017158400267362595, + "logps/rejected": -1.082463264465332, + "loss": 2.5273, + "nll_loss": 0.6310088634490967, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017158400733023882, + "rewards/margins": 0.10653049498796463, + "rewards/rejected": -0.1082463338971138, + "step": 2755 + }, + { + "epoch": 1.9059474412171507, + "grad_norm": 4.522149085998535, + "learning_rate": 4.496695865990472e-05, + "log_odds_chosen": 6.7430243492126465, + "log_odds_ratio": -0.009294845163822174, + "logits/chosen": -0.5068034529685974, + "logits/rejected": -0.5801413059234619, + "logps/chosen": -0.02989388071000576, + "logps/rejected": -1.3248211145401, + "loss": 2.9167, + "nll_loss": 0.728242814540863, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0029893883038312197, + "rewards/margins": 0.12949271500110626, + "rewards/rejected": -0.13248211145401, + "step": 2756 + }, + { + "epoch": 1.9066390041493775, + "grad_norm": 12.909613609313965, + "learning_rate": 4.496311664361457e-05, + "log_odds_chosen": 5.796242713928223, + "log_odds_ratio": -0.4076513648033142, + "logits/chosen": -0.40431565046310425, + "logits/rejected": -0.4224545359611511, + "logps/chosen": -0.048604223877191544, + "logps/rejected": -0.843641996383667, + "loss": 3.2017, + "nll_loss": 0.7596707344055176, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004860422573983669, + "rewards/margins": 0.07950377464294434, + "rewards/rejected": -0.08436420559883118, + "step": 2757 + }, + { + "epoch": 1.9073305670816043, + "grad_norm": 4.586458683013916, + "learning_rate": 4.495927462732442e-05, + "log_odds_chosen": 4.613317012786865, + "log_odds_ratio": -0.13756367564201355, + "logits/chosen": -0.06619075685739517, + "logits/rejected": -0.08160945028066635, + "logps/chosen": -0.04854509234428406, + "logps/rejected": -0.4217030107975006, + "loss": 2.2961, + "nll_loss": 0.5602596402168274, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004854509606957436, + "rewards/margins": 0.037315793335437775, + "rewards/rejected": -0.04217030107975006, + "step": 2758 + }, + { + "epoch": 1.9080221300138311, + "grad_norm": 4.820953845977783, + "learning_rate": 4.495543261103427e-05, + "log_odds_chosen": 4.910735607147217, + "log_odds_ratio": -0.09200502187013626, + "logits/chosen": -0.5208714008331299, + "logits/rejected": -0.5347784757614136, + "logps/chosen": -0.04374665021896362, + "logps/rejected": -0.9003453254699707, + "loss": 2.9172, + "nll_loss": 0.7201057076454163, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004374665208160877, + "rewards/margins": 0.08565986901521683, + "rewards/rejected": -0.09003454446792603, + "step": 2759 + }, + { + "epoch": 1.908713692946058, + "grad_norm": 4.447933673858643, + "learning_rate": 4.495159059474413e-05, + "log_odds_chosen": 5.480074882507324, + "log_odds_ratio": -0.02876094914972782, + "logits/chosen": -0.3962971568107605, + "logits/rejected": -0.4510257840156555, + "logps/chosen": -0.015622947365045547, + "logps/rejected": -0.7562682032585144, + "loss": 2.493, + "nll_loss": 0.6203768253326416, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015622947830706835, + "rewards/margins": 0.07406453043222427, + "rewards/rejected": -0.07562682032585144, + "step": 2760 + }, + { + "epoch": 1.9094052558782848, + "grad_norm": 7.416686058044434, + "learning_rate": 4.494774857845397e-05, + "log_odds_chosen": 4.5373969078063965, + "log_odds_ratio": -0.0510084293782711, + "logits/chosen": -0.6713343262672424, + "logits/rejected": -0.6580287218093872, + "logps/chosen": -0.02951555699110031, + "logps/rejected": -0.8437892198562622, + "loss": 3.0495, + "nll_loss": 0.7572864294052124, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0029515554197132587, + "rewards/margins": 0.08142737299203873, + "rewards/rejected": -0.0843789279460907, + "step": 2761 + }, + { + "epoch": 1.9100968188105116, + "grad_norm": 10.607845306396484, + "learning_rate": 4.4943906562163825e-05, + "log_odds_chosen": 2.8581295013427734, + "log_odds_ratio": -0.6374597549438477, + "logits/chosen": -0.5547253489494324, + "logits/rejected": -0.5826330780982971, + "logps/chosen": -0.09037734568119049, + "logps/rejected": -0.7486076354980469, + "loss": 3.3589, + "nll_loss": 0.7759672403335571, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009037734940648079, + "rewards/margins": 0.065823033452034, + "rewards/rejected": -0.07486076653003693, + "step": 2762 + }, + { + "epoch": 1.9107883817427385, + "grad_norm": 3.6539220809936523, + "learning_rate": 4.494006454587368e-05, + "log_odds_chosen": 7.207740783691406, + "log_odds_ratio": -0.004129151813685894, + "logits/chosen": -0.27698051929473877, + "logits/rejected": -0.32458335161209106, + "logps/chosen": -0.0025564860552549362, + "logps/rejected": -0.8559420108795166, + "loss": 2.4033, + "nll_loss": 0.6004086136817932, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000255648628808558, + "rewards/margins": 0.08533856272697449, + "rewards/rejected": -0.08559420704841614, + "step": 2763 + }, + { + "epoch": 1.9114799446749653, + "grad_norm": 8.975886344909668, + "learning_rate": 4.493622252958353e-05, + "log_odds_chosen": 4.372048377990723, + "log_odds_ratio": -0.060153741389513016, + "logits/chosen": -0.40431973338127136, + "logits/rejected": -0.4270198941230774, + "logps/chosen": -0.05016879737377167, + "logps/rejected": -0.8772868514060974, + "loss": 3.4937, + "nll_loss": 0.8674010038375854, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005016880109906197, + "rewards/margins": 0.08271180838346481, + "rewards/rejected": -0.08772867918014526, + "step": 2764 + }, + { + "epoch": 1.9121715076071921, + "grad_norm": 7.6837382316589355, + "learning_rate": 4.4932380513293376e-05, + "log_odds_chosen": 3.9830479621887207, + "log_odds_ratio": -0.1646263599395752, + "logits/chosen": -0.6387200355529785, + "logits/rejected": -0.6545137763023376, + "logps/chosen": -0.03771314024925232, + "logps/rejected": -0.5626676678657532, + "loss": 2.6703, + "nll_loss": 0.6511242985725403, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0037713139317929745, + "rewards/margins": 0.052495453506708145, + "rewards/rejected": -0.05626676604151726, + "step": 2765 + }, + { + "epoch": 1.912863070539419, + "grad_norm": 9.061310768127441, + "learning_rate": 4.492853849700323e-05, + "log_odds_chosen": 5.80774450302124, + "log_odds_ratio": -0.3405974209308624, + "logits/chosen": -0.2839244604110718, + "logits/rejected": -0.29523801803588867, + "logps/chosen": -0.052610985934734344, + "logps/rejected": -1.217494010925293, + "loss": 2.6866, + "nll_loss": 0.6375950574874878, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.005261098966002464, + "rewards/margins": 0.11648830771446228, + "rewards/rejected": -0.1217494010925293, + "step": 2766 + }, + { + "epoch": 1.9135546334716458, + "grad_norm": 7.12164306640625, + "learning_rate": 4.492469648071308e-05, + "log_odds_chosen": 5.14296817779541, + "log_odds_ratio": -0.17003518342971802, + "logits/chosen": -0.15876546502113342, + "logits/rejected": -0.1746135652065277, + "logps/chosen": -0.035621292889118195, + "logps/rejected": -0.6235146522521973, + "loss": 3.1877, + "nll_loss": 0.7799243927001953, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0035621291026473045, + "rewards/margins": 0.058789342641830444, + "rewards/rejected": -0.062351472675800323, + "step": 2767 + }, + { + "epoch": 1.9142461964038726, + "grad_norm": 19.558931350708008, + "learning_rate": 4.4920854464422927e-05, + "log_odds_chosen": 1.8244339227676392, + "log_odds_ratio": -0.6450532078742981, + "logits/chosen": -0.20103971660137177, + "logits/rejected": -0.17665702104568481, + "logps/chosen": -0.12792760133743286, + "logps/rejected": -0.4163038730621338, + "loss": 3.523, + "nll_loss": 0.8162442445755005, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01279276143759489, + "rewards/margins": 0.028837626799941063, + "rewards/rejected": -0.04163038730621338, + "step": 2768 + }, + { + "epoch": 1.9149377593360994, + "grad_norm": 5.6358962059021, + "learning_rate": 4.4917012448132786e-05, + "log_odds_chosen": 2.602464199066162, + "log_odds_ratio": -0.23151105642318726, + "logits/chosen": -0.6144603490829468, + "logits/rejected": -0.6773332357406616, + "logps/chosen": -0.09697036445140839, + "logps/rejected": -0.673506498336792, + "loss": 2.4692, + "nll_loss": 0.5941553115844727, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009697036817669868, + "rewards/margins": 0.05765360966324806, + "rewards/rejected": -0.06735064834356308, + "step": 2769 + }, + { + "epoch": 1.9156293222683263, + "grad_norm": 10.034943580627441, + "learning_rate": 4.491317043184263e-05, + "log_odds_chosen": 3.2985281944274902, + "log_odds_ratio": -0.25245070457458496, + "logits/chosen": -0.6084011197090149, + "logits/rejected": -0.6075975298881531, + "logps/chosen": -0.07471877336502075, + "logps/rejected": -0.7278150320053101, + "loss": 3.1846, + "nll_loss": 0.7709120512008667, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007471877615898848, + "rewards/margins": 0.06530961394309998, + "rewards/rejected": -0.07278149574995041, + "step": 2770 + }, + { + "epoch": 1.916320885200553, + "grad_norm": 7.76045036315918, + "learning_rate": 4.4909328415552484e-05, + "log_odds_chosen": 4.8657941818237305, + "log_odds_ratio": -0.16628439724445343, + "logits/chosen": -0.5928363800048828, + "logits/rejected": -0.620124340057373, + "logps/chosen": -0.06428052484989166, + "logps/rejected": -0.8373928070068359, + "loss": 2.705, + "nll_loss": 0.6596198081970215, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0064280531369149685, + "rewards/margins": 0.07731122523546219, + "rewards/rejected": -0.0837392807006836, + "step": 2771 + }, + { + "epoch": 1.91701244813278, + "grad_norm": 6.2884955406188965, + "learning_rate": 4.4905486399262336e-05, + "log_odds_chosen": 5.502465724945068, + "log_odds_ratio": -0.1859067976474762, + "logits/chosen": -0.588131308555603, + "logits/rejected": -0.6590185165405273, + "logps/chosen": -0.043672025203704834, + "logps/rejected": -0.8826485872268677, + "loss": 2.5943, + "nll_loss": 0.6299718618392944, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004367202520370483, + "rewards/margins": 0.083897665143013, + "rewards/rejected": -0.08826486766338348, + "step": 2772 + }, + { + "epoch": 1.9177040110650068, + "grad_norm": 5.941636562347412, + "learning_rate": 4.490164438297219e-05, + "log_odds_chosen": 4.905325412750244, + "log_odds_ratio": -0.022108597680926323, + "logits/chosen": -0.37843644618988037, + "logits/rejected": -0.43814554810523987, + "logps/chosen": -0.03972003236413002, + "logps/rejected": -1.0460373163223267, + "loss": 2.7761, + "nll_loss": 0.6918151378631592, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003972003236413002, + "rewards/margins": 0.1006317287683487, + "rewards/rejected": -0.10460373759269714, + "step": 2773 + }, + { + "epoch": 1.9183955739972336, + "grad_norm": 14.511189460754395, + "learning_rate": 4.4897802366682034e-05, + "log_odds_chosen": 2.286804676055908, + "log_odds_ratio": -0.4922664761543274, + "logits/chosen": -0.5373677015304565, + "logits/rejected": -0.5104278326034546, + "logps/chosen": -0.06641744822263718, + "logps/rejected": -0.5359044075012207, + "loss": 3.1558, + "nll_loss": 0.7397192716598511, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.00664174510166049, + "rewards/margins": 0.046948693692684174, + "rewards/rejected": -0.05359043926000595, + "step": 2774 + }, + { + "epoch": 1.9190871369294604, + "grad_norm": 8.476889610290527, + "learning_rate": 4.489396035039189e-05, + "log_odds_chosen": 5.486364364624023, + "log_odds_ratio": -0.040458664298057556, + "logits/chosen": -0.39343497157096863, + "logits/rejected": -0.4808794856071472, + "logps/chosen": -0.029638398438692093, + "logps/rejected": -0.981798529624939, + "loss": 3.5648, + "nll_loss": 0.8871630430221558, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0029638397973030806, + "rewards/margins": 0.09521600604057312, + "rewards/rejected": -0.09817984700202942, + "step": 2775 + }, + { + "epoch": 1.9197786998616873, + "grad_norm": 4.909126281738281, + "learning_rate": 4.489011833410174e-05, + "log_odds_chosen": 5.147785186767578, + "log_odds_ratio": -0.115696981549263, + "logits/chosen": -0.7142544388771057, + "logits/rejected": -0.7357014417648315, + "logps/chosen": -0.03013111650943756, + "logps/rejected": -0.8791296482086182, + "loss": 3.1168, + "nll_loss": 0.7676218748092651, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003013111650943756, + "rewards/margins": 0.08489985764026642, + "rewards/rejected": -0.08791296929121017, + "step": 2776 + }, + { + "epoch": 1.920470262793914, + "grad_norm": 3.1792287826538086, + "learning_rate": 4.4886276317811585e-05, + "log_odds_chosen": 2.745725393295288, + "log_odds_ratio": -0.17853854596614838, + "logits/chosen": -0.5553758144378662, + "logits/rejected": -0.5793541669845581, + "logps/chosen": -0.08622580021619797, + "logps/rejected": -0.5508575439453125, + "loss": 1.9488, + "nll_loss": 0.46934592723846436, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008622580207884312, + "rewards/margins": 0.046463172882795334, + "rewards/rejected": -0.05508575215935707, + "step": 2777 + }, + { + "epoch": 1.921161825726141, + "grad_norm": 8.015707015991211, + "learning_rate": 4.4882434301521444e-05, + "log_odds_chosen": 3.6785333156585693, + "log_odds_ratio": -0.19257181882858276, + "logits/chosen": -0.2816515862941742, + "logits/rejected": -0.30312466621398926, + "logps/chosen": -0.04474589228630066, + "logps/rejected": -0.4880346655845642, + "loss": 3.1441, + "nll_loss": 0.7667652368545532, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004474589601159096, + "rewards/margins": 0.044328875839710236, + "rewards/rejected": -0.04880346730351448, + "step": 2778 + }, + { + "epoch": 1.9218533886583677, + "grad_norm": 10.081058502197266, + "learning_rate": 4.487859228523129e-05, + "log_odds_chosen": 3.226313829421997, + "log_odds_ratio": -0.3414326608181, + "logits/chosen": -0.47777923941612244, + "logits/rejected": -0.521342396736145, + "logps/chosen": -0.08785060048103333, + "logps/rejected": -0.7554973363876343, + "loss": 2.8479, + "nll_loss": 0.6778291463851929, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.008785059675574303, + "rewards/margins": 0.06676468253135681, + "rewards/rejected": -0.07554973661899567, + "step": 2779 + }, + { + "epoch": 1.9225449515905948, + "grad_norm": 6.341608047485352, + "learning_rate": 4.487475026894114e-05, + "log_odds_chosen": 4.8398966789245605, + "log_odds_ratio": -0.04216049984097481, + "logits/chosen": -0.4660530686378479, + "logits/rejected": -0.49698352813720703, + "logps/chosen": -0.019141169264912605, + "logps/rejected": -0.7642238736152649, + "loss": 2.6743, + "nll_loss": 0.6643602848052979, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019141167867928743, + "rewards/margins": 0.07450827956199646, + "rewards/rejected": -0.07642239332199097, + "step": 2780 + }, + { + "epoch": 1.9232365145228216, + "grad_norm": 13.689393043518066, + "learning_rate": 4.4870908252650995e-05, + "log_odds_chosen": 3.5703673362731934, + "log_odds_ratio": -0.25399351119995117, + "logits/chosen": -0.32772380113601685, + "logits/rejected": -0.4199979603290558, + "logps/chosen": -0.049331896007061005, + "logps/rejected": -0.6844727993011475, + "loss": 3.7072, + "nll_loss": 0.901390552520752, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0049331896007061005, + "rewards/margins": 0.0635140985250473, + "rewards/rejected": -0.0684472918510437, + "step": 2781 + }, + { + "epoch": 1.9239280774550485, + "grad_norm": 16.07459259033203, + "learning_rate": 4.486706623636085e-05, + "log_odds_chosen": 1.7226035594940186, + "log_odds_ratio": -1.1212856769561768, + "logits/chosen": -0.5351619124412537, + "logits/rejected": -0.6109406352043152, + "logps/chosen": -0.19895480573177338, + "logps/rejected": -0.5249719023704529, + "loss": 4.4027, + "nll_loss": 0.9885578155517578, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.019895480945706367, + "rewards/margins": 0.03260171413421631, + "rewards/rejected": -0.05249718949198723, + "step": 2782 + }, + { + "epoch": 1.9246196403872753, + "grad_norm": 18.613061904907227, + "learning_rate": 4.486322422007069e-05, + "log_odds_chosen": 4.626638889312744, + "log_odds_ratio": -0.2957945168018341, + "logits/chosen": -0.607136607170105, + "logits/rejected": -0.6890389323234558, + "logps/chosen": -0.044126056134700775, + "logps/rejected": -0.6813442707061768, + "loss": 2.5626, + "nll_loss": 0.6110590100288391, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.004412606358528137, + "rewards/margins": 0.06372182816267014, + "rewards/rejected": -0.06813443452119827, + "step": 2783 + }, + { + "epoch": 1.9253112033195021, + "grad_norm": 7.884077072143555, + "learning_rate": 4.4859382203780545e-05, + "log_odds_chosen": 5.40144157409668, + "log_odds_ratio": -0.016243906691670418, + "logits/chosen": -0.4148845970630646, + "logits/rejected": -0.4984428882598877, + "logps/chosen": -0.019348058849573135, + "logps/rejected": -1.0130583047866821, + "loss": 2.9824, + "nll_loss": 0.743984580039978, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019348057685419917, + "rewards/margins": 0.0993710309267044, + "rewards/rejected": -0.10130582749843597, + "step": 2784 + }, + { + "epoch": 1.926002766251729, + "grad_norm": 9.836116790771484, + "learning_rate": 4.48555401874904e-05, + "log_odds_chosen": 4.070340633392334, + "log_odds_ratio": -0.5315168499946594, + "logits/chosen": -0.34487390518188477, + "logits/rejected": -0.3882456421852112, + "logps/chosen": -0.13489031791687012, + "logps/rejected": -0.7666717171669006, + "loss": 3.333, + "nll_loss": 0.7800894379615784, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013489031232893467, + "rewards/margins": 0.06317814439535141, + "rewards/rejected": -0.0766671746969223, + "step": 2785 + }, + { + "epoch": 1.9266943291839558, + "grad_norm": 9.22327995300293, + "learning_rate": 4.4851698171200243e-05, + "log_odds_chosen": 6.947870254516602, + "log_odds_ratio": -0.006460743024945259, + "logits/chosen": -0.16602101922035217, + "logits/rejected": -0.21374446153640747, + "logps/chosen": -0.01367330364882946, + "logps/rejected": -1.1837010383605957, + "loss": 3.0261, + "nll_loss": 0.7558853626251221, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013673304347321391, + "rewards/margins": 0.11700277030467987, + "rewards/rejected": -0.11837010085582733, + "step": 2786 + }, + { + "epoch": 1.9273858921161826, + "grad_norm": 10.728938102722168, + "learning_rate": 4.48478561549101e-05, + "log_odds_chosen": 4.983241558074951, + "log_odds_ratio": -0.04518432915210724, + "logits/chosen": -0.7207584381103516, + "logits/rejected": -0.7739526629447937, + "logps/chosen": -0.03859318047761917, + "logps/rejected": -0.9132634401321411, + "loss": 4.3894, + "nll_loss": 1.0928202867507935, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0038593183271586895, + "rewards/margins": 0.0874670222401619, + "rewards/rejected": -0.09132634103298187, + "step": 2787 + }, + { + "epoch": 1.9280774550484094, + "grad_norm": 5.109100818634033, + "learning_rate": 4.484401413861995e-05, + "log_odds_chosen": 2.9227466583251953, + "log_odds_ratio": -0.10756994783878326, + "logits/chosen": -0.6498531103134155, + "logits/rejected": -0.7055495381355286, + "logps/chosen": -0.06583870947360992, + "logps/rejected": -0.7072476148605347, + "loss": 2.6467, + "nll_loss": 0.6509165167808533, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006583871319890022, + "rewards/margins": 0.06414090096950531, + "rewards/rejected": -0.07072477042675018, + "step": 2788 + }, + { + "epoch": 1.9287690179806363, + "grad_norm": 5.672557830810547, + "learning_rate": 4.48401721223298e-05, + "log_odds_chosen": 3.876668691635132, + "log_odds_ratio": -0.19308790564537048, + "logits/chosen": -0.4066796600818634, + "logits/rejected": -0.40035372972488403, + "logps/chosen": -0.059991996735334396, + "logps/rejected": -0.8340362310409546, + "loss": 2.1607, + "nll_loss": 0.5208736062049866, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005999199580401182, + "rewards/margins": 0.07740442454814911, + "rewards/rejected": -0.08340362459421158, + "step": 2789 + }, + { + "epoch": 1.929460580912863, + "grad_norm": 5.521088600158691, + "learning_rate": 4.483633010603965e-05, + "log_odds_chosen": 4.913776874542236, + "log_odds_ratio": -0.0815575122833252, + "logits/chosen": -0.40390971302986145, + "logits/rejected": -0.3940833508968353, + "logps/chosen": -0.04151315614581108, + "logps/rejected": -0.8694093227386475, + "loss": 2.8957, + "nll_loss": 0.7157632112503052, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0041513158939778805, + "rewards/margins": 0.08278961479663849, + "rewards/rejected": -0.0869409367442131, + "step": 2790 + }, + { + "epoch": 1.93015214384509, + "grad_norm": 11.908353805541992, + "learning_rate": 4.4832488089749506e-05, + "log_odds_chosen": 3.066312074661255, + "log_odds_ratio": -0.3459596633911133, + "logits/chosen": -0.5527938008308411, + "logits/rejected": -0.5647131204605103, + "logps/chosen": -0.18038421869277954, + "logps/rejected": -0.8058091998100281, + "loss": 2.8924, + "nll_loss": 0.6885152459144592, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.018038421869277954, + "rewards/margins": 0.06254249811172485, + "rewards/rejected": -0.08058091998100281, + "step": 2791 + }, + { + "epoch": 1.9308437067773168, + "grad_norm": 10.135628700256348, + "learning_rate": 4.482864607345935e-05, + "log_odds_chosen": 3.6124460697174072, + "log_odds_ratio": -0.32742828130722046, + "logits/chosen": -0.38191068172454834, + "logits/rejected": -0.43758589029312134, + "logps/chosen": -0.11376246809959412, + "logps/rejected": -1.071624755859375, + "loss": 3.1569, + "nll_loss": 0.756482720375061, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011376245878636837, + "rewards/margins": 0.09578622877597809, + "rewards/rejected": -0.1071624681353569, + "step": 2792 + }, + { + "epoch": 1.9315352697095436, + "grad_norm": 8.463871002197266, + "learning_rate": 4.4824804057169204e-05, + "log_odds_chosen": 2.9357008934020996, + "log_odds_ratio": -0.27367833256721497, + "logits/chosen": -0.5386611819267273, + "logits/rejected": -0.5462771654129028, + "logps/chosen": -0.08388536423444748, + "logps/rejected": -0.6182925701141357, + "loss": 3.1276, + "nll_loss": 0.7545251846313477, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008388536050915718, + "rewards/margins": 0.05344071984291077, + "rewards/rejected": -0.061829254031181335, + "step": 2793 + }, + { + "epoch": 1.9322268326417704, + "grad_norm": 5.5890421867370605, + "learning_rate": 4.4820962040879056e-05, + "log_odds_chosen": 3.97865629196167, + "log_odds_ratio": -0.06815844029188156, + "logits/chosen": -1.0436514616012573, + "logits/rejected": -1.0876836776733398, + "logps/chosen": -0.02249925397336483, + "logps/rejected": -0.4423280954360962, + "loss": 3.9761, + "nll_loss": 0.9872040748596191, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0022499256301671267, + "rewards/margins": 0.04198288917541504, + "rewards/rejected": -0.0442328155040741, + "step": 2794 + }, + { + "epoch": 1.9329183955739973, + "grad_norm": 8.73758602142334, + "learning_rate": 4.48171200245889e-05, + "log_odds_chosen": 3.9699978828430176, + "log_odds_ratio": -0.24298183619976044, + "logits/chosen": -0.7401033639907837, + "logits/rejected": -0.7354604005813599, + "logps/chosen": -0.05277275666594505, + "logps/rejected": -0.5997828245162964, + "loss": 3.7816, + "nll_loss": 0.9210982322692871, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005277275573462248, + "rewards/margins": 0.054701004177331924, + "rewards/rejected": -0.05997828394174576, + "step": 2795 + }, + { + "epoch": 1.933609958506224, + "grad_norm": 7.679773330688477, + "learning_rate": 4.481327800829876e-05, + "log_odds_chosen": 5.230242729187012, + "log_odds_ratio": -0.027260133996605873, + "logits/chosen": -0.928388237953186, + "logits/rejected": -0.9680888652801514, + "logps/chosen": -0.025700656697154045, + "logps/rejected": -1.1047217845916748, + "loss": 3.5528, + "nll_loss": 0.885481595993042, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0025700656697154045, + "rewards/margins": 0.10790210962295532, + "rewards/rejected": -0.110472172498703, + "step": 2796 + }, + { + "epoch": 1.934301521438451, + "grad_norm": 10.935383796691895, + "learning_rate": 4.480943599200861e-05, + "log_odds_chosen": 5.8976030349731445, + "log_odds_ratio": -0.19832640886306763, + "logits/chosen": -0.550868570804596, + "logits/rejected": -0.6094168424606323, + "logps/chosen": -0.02254554256796837, + "logps/rejected": -0.7352597117424011, + "loss": 3.0141, + "nll_loss": 0.7336862683296204, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0022545545361936092, + "rewards/margins": 0.07127141952514648, + "rewards/rejected": -0.07352596521377563, + "step": 2797 + }, + { + "epoch": 1.9349930843706777, + "grad_norm": 8.525611877441406, + "learning_rate": 4.480559397571846e-05, + "log_odds_chosen": 4.466034412384033, + "log_odds_ratio": -0.09100319445133209, + "logits/chosen": -0.5881481170654297, + "logits/rejected": -0.5896488428115845, + "logps/chosen": -0.040058400481939316, + "logps/rejected": -0.8737333416938782, + "loss": 2.5324, + "nll_loss": 0.6240019798278809, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004005840048193932, + "rewards/margins": 0.0833674967288971, + "rewards/rejected": -0.08737333118915558, + "step": 2798 + }, + { + "epoch": 1.9356846473029046, + "grad_norm": 6.885192394256592, + "learning_rate": 4.480175195942831e-05, + "log_odds_chosen": 3.733163356781006, + "log_odds_ratio": -0.11658845096826553, + "logits/chosen": -0.5719266533851624, + "logits/rejected": -0.595264732837677, + "logps/chosen": -0.056412748992443085, + "logps/rejected": -1.1346626281738281, + "loss": 2.8544, + "nll_loss": 0.7019389271736145, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005641274619847536, + "rewards/margins": 0.10782498866319656, + "rewards/rejected": -0.11346626281738281, + "step": 2799 + }, + { + "epoch": 1.9363762102351314, + "grad_norm": 8.165283203125, + "learning_rate": 4.4797909943138164e-05, + "log_odds_chosen": 3.9729034900665283, + "log_odds_ratio": -0.15297673642635345, + "logits/chosen": -0.927630603313446, + "logits/rejected": -0.9283353090286255, + "logps/chosen": -0.05737540125846863, + "logps/rejected": -0.726395845413208, + "loss": 3.6105, + "nll_loss": 0.887338399887085, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005737540312111378, + "rewards/margins": 0.0669020488858223, + "rewards/rejected": -0.0726395919919014, + "step": 2800 + }, + { + "epoch": 1.9370677731673582, + "grad_norm": 8.619080543518066, + "learning_rate": 4.479406792684801e-05, + "log_odds_chosen": 5.21150016784668, + "log_odds_ratio": -0.023713568225502968, + "logits/chosen": -0.5210046768188477, + "logits/rejected": -0.5669018030166626, + "logps/chosen": -0.04111271724104881, + "logps/rejected": -1.0991835594177246, + "loss": 2.6817, + "nll_loss": 0.6680553555488586, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004111272282898426, + "rewards/margins": 0.1058070957660675, + "rewards/rejected": -0.10991836339235306, + "step": 2801 + }, + { + "epoch": 1.937759336099585, + "grad_norm": 8.75975513458252, + "learning_rate": 4.479022591055786e-05, + "log_odds_chosen": 6.741459846496582, + "log_odds_ratio": -0.010524172335863113, + "logits/chosen": -0.4283122420310974, + "logits/rejected": -0.45947590470314026, + "logps/chosen": -0.006821490824222565, + "logps/rejected": -1.2476394176483154, + "loss": 2.4996, + "nll_loss": 0.6238444447517395, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006821490824222565, + "rewards/margins": 0.1240818053483963, + "rewards/rejected": -0.12476395070552826, + "step": 2802 + }, + { + "epoch": 1.938450899031812, + "grad_norm": 6.204493999481201, + "learning_rate": 4.4786383894267715e-05, + "log_odds_chosen": 6.076966285705566, + "log_odds_ratio": -0.10558860003948212, + "logits/chosen": -0.5615969300270081, + "logits/rejected": -0.5650418400764465, + "logps/chosen": -0.03752349689602852, + "logps/rejected": -1.1340097188949585, + "loss": 2.4582, + "nll_loss": 0.603987455368042, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003752349875867367, + "rewards/margins": 0.10964862257242203, + "rewards/rejected": -0.11340098083019257, + "step": 2803 + }, + { + "epoch": 1.9391424619640387, + "grad_norm": 4.198095321655273, + "learning_rate": 4.478254187797756e-05, + "log_odds_chosen": 6.323598861694336, + "log_odds_ratio": -0.00858142040669918, + "logits/chosen": -0.3735266923904419, + "logits/rejected": -0.4268512725830078, + "logps/chosen": -0.009092532098293304, + "logps/rejected": -1.0283095836639404, + "loss": 2.2402, + "nll_loss": 0.5591831207275391, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009092532563954592, + "rewards/margins": 0.10192171484231949, + "rewards/rejected": -0.10283096134662628, + "step": 2804 + }, + { + "epoch": 1.9398340248962656, + "grad_norm": 7.274281978607178, + "learning_rate": 4.477869986168742e-05, + "log_odds_chosen": 6.051234245300293, + "log_odds_ratio": -0.022627316415309906, + "logits/chosen": -0.6564967036247253, + "logits/rejected": -0.7128115892410278, + "logps/chosen": -0.015038514509797096, + "logps/rejected": -0.9913001656532288, + "loss": 2.6809, + "nll_loss": 0.6679600477218628, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015038514975458384, + "rewards/margins": 0.09762617200613022, + "rewards/rejected": -0.09913001954555511, + "step": 2805 + }, + { + "epoch": 1.9405255878284924, + "grad_norm": 8.436661720275879, + "learning_rate": 4.4774857845397265e-05, + "log_odds_chosen": 5.065516948699951, + "log_odds_ratio": -0.21357516944408417, + "logits/chosen": -0.514738142490387, + "logits/rejected": -0.5642529726028442, + "logps/chosen": -0.04693892225623131, + "logps/rejected": -0.9647188782691956, + "loss": 2.5694, + "nll_loss": 0.620995283126831, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004693892784416676, + "rewards/margins": 0.0917779952287674, + "rewards/rejected": -0.0964718908071518, + "step": 2806 + }, + { + "epoch": 1.9412171507607192, + "grad_norm": 9.0737886428833, + "learning_rate": 4.477101582910712e-05, + "log_odds_chosen": 6.683316707611084, + "log_odds_ratio": -0.03487955406308174, + "logits/chosen": -0.4747570753097534, + "logits/rejected": -0.4955167770385742, + "logps/chosen": -0.011380588635802269, + "logps/rejected": -0.9118317365646362, + "loss": 2.7271, + "nll_loss": 0.6782808899879456, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011380589567124844, + "rewards/margins": 0.09004510939121246, + "rewards/rejected": -0.09118317067623138, + "step": 2807 + }, + { + "epoch": 1.941908713692946, + "grad_norm": 5.534692287445068, + "learning_rate": 4.476717381281697e-05, + "log_odds_chosen": 7.164931774139404, + "log_odds_ratio": -0.009051812812685966, + "logits/chosen": -0.5594828128814697, + "logits/rejected": -0.5811551809310913, + "logps/chosen": -0.005597162526100874, + "logps/rejected": -0.8697156310081482, + "loss": 2.5355, + "nll_loss": 0.6329704523086548, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005597162526100874, + "rewards/margins": 0.08641184866428375, + "rewards/rejected": -0.08697156608104706, + "step": 2808 + }, + { + "epoch": 1.9426002766251729, + "grad_norm": 8.621345520019531, + "learning_rate": 4.476333179652682e-05, + "log_odds_chosen": 4.592233657836914, + "log_odds_ratio": -0.15282797813415527, + "logits/chosen": -0.3370625376701355, + "logits/rejected": -0.389223575592041, + "logps/chosen": -0.07554537057876587, + "logps/rejected": -1.1386663913726807, + "loss": 3.5834, + "nll_loss": 0.8805585503578186, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007554537151008844, + "rewards/margins": 0.1063121035695076, + "rewards/rejected": -0.11386663466691971, + "step": 2809 + }, + { + "epoch": 1.9432918395573997, + "grad_norm": 11.15650463104248, + "learning_rate": 4.475948978023667e-05, + "log_odds_chosen": 5.505188465118408, + "log_odds_ratio": -0.10849100351333618, + "logits/chosen": -0.38597604632377625, + "logits/rejected": -0.4197837710380554, + "logps/chosen": -0.03493032231926918, + "logps/rejected": -1.2237557172775269, + "loss": 4.1632, + "nll_loss": 1.0299582481384277, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0034930319525301456, + "rewards/margins": 0.11888253688812256, + "rewards/rejected": -0.12237557768821716, + "step": 2810 + }, + { + "epoch": 1.9439834024896265, + "grad_norm": 6.0465922355651855, + "learning_rate": 4.475564776394652e-05, + "log_odds_chosen": 7.783335208892822, + "log_odds_ratio": -0.0031097978353500366, + "logits/chosen": -0.17682109773159027, + "logits/rejected": -0.22807860374450684, + "logps/chosen": -0.001854907488450408, + "logps/rejected": -1.069655418395996, + "loss": 2.3694, + "nll_loss": 0.5920292139053345, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018549074593465775, + "rewards/margins": 0.1067800521850586, + "rewards/rejected": -0.10696554183959961, + "step": 2811 + }, + { + "epoch": 1.9446749654218534, + "grad_norm": 5.259706020355225, + "learning_rate": 4.475180574765637e-05, + "log_odds_chosen": 5.722519397735596, + "log_odds_ratio": -0.015401207841932774, + "logits/chosen": -0.28977158665657043, + "logits/rejected": -0.3090120553970337, + "logps/chosen": -0.0432988703250885, + "logps/rejected": -0.9075368642807007, + "loss": 2.691, + "nll_loss": 0.6712194681167603, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004329887684434652, + "rewards/margins": 0.08642380684614182, + "rewards/rejected": -0.0907536968588829, + "step": 2812 + }, + { + "epoch": 1.9453665283540802, + "grad_norm": 4.855476379394531, + "learning_rate": 4.474796373136622e-05, + "log_odds_chosen": 5.955103397369385, + "log_odds_ratio": -0.02168934792280197, + "logits/chosen": -0.311231791973114, + "logits/rejected": -0.38722100853919983, + "logps/chosen": -0.015130783431231976, + "logps/rejected": -0.5940335988998413, + "loss": 2.3921, + "nll_loss": 0.5958442091941833, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015130782267078757, + "rewards/margins": 0.057890281081199646, + "rewards/rejected": -0.05940335988998413, + "step": 2813 + }, + { + "epoch": 1.946058091286307, + "grad_norm": 5.340009689331055, + "learning_rate": 4.474412171507608e-05, + "log_odds_chosen": 6.406757354736328, + "log_odds_ratio": -0.009401117451488972, + "logits/chosen": -0.700808584690094, + "logits/rejected": -0.7515424489974976, + "logps/chosen": -0.020106710493564606, + "logps/rejected": -1.5756630897521973, + "loss": 2.7495, + "nll_loss": 0.6864414811134338, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002010670956224203, + "rewards/margins": 0.15555565059185028, + "rewards/rejected": -0.15756630897521973, + "step": 2814 + }, + { + "epoch": 1.9467496542185339, + "grad_norm": 7.415836334228516, + "learning_rate": 4.4740279698785924e-05, + "log_odds_chosen": 3.2339749336242676, + "log_odds_ratio": -0.16565166413784027, + "logits/chosen": -0.265865683555603, + "logits/rejected": -0.2878696918487549, + "logps/chosen": -0.05613917112350464, + "logps/rejected": -0.42157119512557983, + "loss": 3.0349, + "nll_loss": 0.7421600818634033, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005613917484879494, + "rewards/margins": 0.03654320538043976, + "rewards/rejected": -0.0421571210026741, + "step": 2815 + }, + { + "epoch": 1.9474412171507607, + "grad_norm": 5.433392524719238, + "learning_rate": 4.4736437682495776e-05, + "log_odds_chosen": 4.738263130187988, + "log_odds_ratio": -0.29604262113571167, + "logits/chosen": -0.4162006974220276, + "logits/rejected": -0.43315163254737854, + "logps/chosen": -0.08273541927337646, + "logps/rejected": -1.1874265670776367, + "loss": 2.7732, + "nll_loss": 0.6636911034584045, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008273541927337646, + "rewards/margins": 0.11046911776065826, + "rewards/rejected": -0.11874265968799591, + "step": 2816 + }, + { + "epoch": 1.9481327800829875, + "grad_norm": 18.027544021606445, + "learning_rate": 4.473259566620563e-05, + "log_odds_chosen": 2.0296730995178223, + "log_odds_ratio": -1.2344518899917603, + "logits/chosen": -0.6975828409194946, + "logits/rejected": -0.582709550857544, + "logps/chosen": -0.16763977706432343, + "logps/rejected": -0.5025269389152527, + "loss": 4.5973, + "nll_loss": 1.025890588760376, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.016763977706432343, + "rewards/margins": 0.03348871320486069, + "rewards/rejected": -0.05025269091129303, + "step": 2817 + }, + { + "epoch": 1.9488243430152143, + "grad_norm": 8.576213836669922, + "learning_rate": 4.472875364991548e-05, + "log_odds_chosen": 8.456327438354492, + "log_odds_ratio": -0.00033374037593603134, + "logits/chosen": 0.07133971899747849, + "logits/rejected": 0.011340849101543427, + "logps/chosen": -0.000913599447812885, + "logps/rejected": -1.2824742794036865, + "loss": 3.5978, + "nll_loss": 0.8994190692901611, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.135994332609698e-05, + "rewards/margins": 0.12815608084201813, + "rewards/rejected": -0.1282474398612976, + "step": 2818 + }, + { + "epoch": 1.9495159059474412, + "grad_norm": 12.02246379852295, + "learning_rate": 4.472491163362533e-05, + "log_odds_chosen": 6.741928577423096, + "log_odds_ratio": -0.006194033194333315, + "logits/chosen": -0.3347575068473816, + "logits/rejected": -0.43152832984924316, + "logps/chosen": -0.005848361644893885, + "logps/rejected": -1.3612442016601562, + "loss": 4.2105, + "nll_loss": 1.0520122051239014, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005848361761309206, + "rewards/margins": 0.13553959131240845, + "rewards/rejected": -0.13612443208694458, + "step": 2819 + }, + { + "epoch": 1.950207468879668, + "grad_norm": 6.350009441375732, + "learning_rate": 4.472106961733518e-05, + "log_odds_chosen": 6.244994163513184, + "log_odds_ratio": -0.02147325873374939, + "logits/chosen": -0.4485066533088684, + "logits/rejected": -0.4813869297504425, + "logps/chosen": -0.028115058317780495, + "logps/rejected": -1.2422107458114624, + "loss": 2.8348, + "nll_loss": 0.7065523266792297, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0028115056920796633, + "rewards/margins": 0.12140956521034241, + "rewards/rejected": -0.1242210865020752, + "step": 2820 + }, + { + "epoch": 1.9508990318118948, + "grad_norm": 16.922008514404297, + "learning_rate": 4.471722760104503e-05, + "log_odds_chosen": 4.798357009887695, + "log_odds_ratio": -0.4773716330528259, + "logits/chosen": -0.6690393686294556, + "logits/rejected": -0.714600145816803, + "logps/chosen": -0.08855325728654861, + "logps/rejected": -0.8425127267837524, + "loss": 3.0157, + "nll_loss": 0.7061989307403564, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008855325169861317, + "rewards/margins": 0.07539595663547516, + "rewards/rejected": -0.0842512845993042, + "step": 2821 + }, + { + "epoch": 1.9515905947441217, + "grad_norm": 6.992642402648926, + "learning_rate": 4.471338558475488e-05, + "log_odds_chosen": 4.1904497146606445, + "log_odds_ratio": -0.4717223644256592, + "logits/chosen": -0.44422727823257446, + "logits/rejected": -0.4630334973335266, + "logps/chosen": -0.08977383375167847, + "logps/rejected": -1.0111335515975952, + "loss": 3.1006, + "nll_loss": 0.7279874682426453, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008977384306490421, + "rewards/margins": 0.0921359732747078, + "rewards/rejected": -0.10111334919929504, + "step": 2822 + }, + { + "epoch": 1.9522821576763485, + "grad_norm": 10.742497444152832, + "learning_rate": 4.4709543568464736e-05, + "log_odds_chosen": 3.4371228218078613, + "log_odds_ratio": -0.44192981719970703, + "logits/chosen": -0.6044036746025085, + "logits/rejected": -0.5718508958816528, + "logps/chosen": -0.0498533770442009, + "logps/rejected": -0.4369697570800781, + "loss": 2.8645, + "nll_loss": 0.6719335317611694, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.004985337611287832, + "rewards/margins": 0.03871164098381996, + "rewards/rejected": -0.04369697719812393, + "step": 2823 + }, + { + "epoch": 1.9529737206085753, + "grad_norm": 7.843807220458984, + "learning_rate": 4.470570155217458e-05, + "log_odds_chosen": 4.21707820892334, + "log_odds_ratio": -0.17529143393039703, + "logits/chosen": -0.5330499410629272, + "logits/rejected": -0.5987671613693237, + "logps/chosen": -0.05731034651398659, + "logps/rejected": -0.6517893075942993, + "loss": 3.3395, + "nll_loss": 0.8173424005508423, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005731034558266401, + "rewards/margins": 0.059447892010211945, + "rewards/rejected": -0.06517893075942993, + "step": 2824 + }, + { + "epoch": 1.9536652835408024, + "grad_norm": 7.9400739669799805, + "learning_rate": 4.4701859535884435e-05, + "log_odds_chosen": 6.131689071655273, + "log_odds_ratio": -0.041180334985256195, + "logits/chosen": -0.3131115734577179, + "logits/rejected": -0.4246234595775604, + "logps/chosen": -0.03072253428399563, + "logps/rejected": -1.005305528640747, + "loss": 2.8949, + "nll_loss": 0.7196011543273926, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003072253428399563, + "rewards/margins": 0.09745831787586212, + "rewards/rejected": -0.10053056478500366, + "step": 2825 + }, + { + "epoch": 1.9543568464730292, + "grad_norm": 8.393575668334961, + "learning_rate": 4.469801751959429e-05, + "log_odds_chosen": 5.061415195465088, + "log_odds_ratio": -0.032158877700567245, + "logits/chosen": -0.6719424724578857, + "logits/rejected": -0.6707208156585693, + "logps/chosen": -0.03238382935523987, + "logps/rejected": -0.8522112369537354, + "loss": 4.3141, + "nll_loss": 1.075303077697754, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0032383829820901155, + "rewards/margins": 0.08198274672031403, + "rewards/rejected": -0.08522112667560577, + "step": 2826 + }, + { + "epoch": 1.955048409405256, + "grad_norm": 5.415279865264893, + "learning_rate": 4.469417550330414e-05, + "log_odds_chosen": 4.250751972198486, + "log_odds_ratio": -0.11042823642492294, + "logits/chosen": -0.5419958233833313, + "logits/rejected": -0.5159631967544556, + "logps/chosen": -0.08708186447620392, + "logps/rejected": -1.1190032958984375, + "loss": 2.5718, + "nll_loss": 0.6319047808647156, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008708186447620392, + "rewards/margins": 0.10319213569164276, + "rewards/rejected": -0.11190032958984375, + "step": 2827 + }, + { + "epoch": 1.9557399723374829, + "grad_norm": 6.0877838134765625, + "learning_rate": 4.4690333487013985e-05, + "log_odds_chosen": 5.14921760559082, + "log_odds_ratio": -0.09397347271442413, + "logits/chosen": -0.05813811719417572, + "logits/rejected": -0.11568836122751236, + "logps/chosen": -0.05785089358687401, + "logps/rejected": -0.926654040813446, + "loss": 2.6938, + "nll_loss": 0.66404128074646, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005785089451819658, + "rewards/margins": 0.08688031882047653, + "rewards/rejected": -0.0926654040813446, + "step": 2828 + }, + { + "epoch": 1.9564315352697097, + "grad_norm": 9.632216453552246, + "learning_rate": 4.468649147072384e-05, + "log_odds_chosen": 6.0009660720825195, + "log_odds_ratio": -0.36622923612594604, + "logits/chosen": -0.5198516845703125, + "logits/rejected": -0.6222413182258606, + "logps/chosen": -0.027646349743008614, + "logps/rejected": -0.8318744897842407, + "loss": 3.4529, + "nll_loss": 0.8265994787216187, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0027646352536976337, + "rewards/margins": 0.08042281866073608, + "rewards/rejected": -0.08318745344877243, + "step": 2829 + }, + { + "epoch": 1.9571230982019365, + "grad_norm": 4.735780715942383, + "learning_rate": 4.468264945443369e-05, + "log_odds_chosen": 5.2941789627075195, + "log_odds_ratio": -0.14041371643543243, + "logits/chosen": -0.14039883017539978, + "logits/rejected": -0.15818633139133453, + "logps/chosen": -0.03495385870337486, + "logps/rejected": -0.6268692016601562, + "loss": 2.6105, + "nll_loss": 0.6385937333106995, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0034953858703374863, + "rewards/margins": 0.05919152498245239, + "rewards/rejected": -0.06268692016601562, + "step": 2830 + }, + { + "epoch": 1.9578146611341634, + "grad_norm": 14.99392032623291, + "learning_rate": 4.4678807438143536e-05, + "log_odds_chosen": 5.380325794219971, + "log_odds_ratio": -0.08194153010845184, + "logits/chosen": -0.45967763662338257, + "logits/rejected": -0.5550145506858826, + "logps/chosen": -0.050651248544454575, + "logps/rejected": -1.1018568277359009, + "loss": 4.1651, + "nll_loss": 1.0330907106399536, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005065124947577715, + "rewards/margins": 0.10512056201696396, + "rewards/rejected": -0.11018568277359009, + "step": 2831 + }, + { + "epoch": 1.9585062240663902, + "grad_norm": 4.958186626434326, + "learning_rate": 4.4674965421853395e-05, + "log_odds_chosen": 6.433233261108398, + "log_odds_ratio": -0.04056533798575401, + "logits/chosen": -0.49270355701446533, + "logits/rejected": -0.5152170658111572, + "logps/chosen": -0.03315674886107445, + "logps/rejected": -1.1400666236877441, + "loss": 2.5399, + "nll_loss": 0.6309077739715576, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0033156750723719597, + "rewards/margins": 0.11069098860025406, + "rewards/rejected": -0.11400666832923889, + "step": 2832 + }, + { + "epoch": 1.959197786998617, + "grad_norm": 6.792165756225586, + "learning_rate": 4.467112340556324e-05, + "log_odds_chosen": 5.845515251159668, + "log_odds_ratio": -0.07941299676895142, + "logits/chosen": -0.24126091599464417, + "logits/rejected": -0.28594672679901123, + "logps/chosen": -0.016813665628433228, + "logps/rejected": -0.8013637661933899, + "loss": 2.7251, + "nll_loss": 0.6733429431915283, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016813663532957435, + "rewards/margins": 0.07845500856637955, + "rewards/rejected": -0.08013637363910675, + "step": 2833 + }, + { + "epoch": 1.9598893499308438, + "grad_norm": 5.078237533569336, + "learning_rate": 4.466728138927309e-05, + "log_odds_chosen": 2.8996164798736572, + "log_odds_ratio": -0.2707454264163971, + "logits/chosen": -0.2614973783493042, + "logits/rejected": -0.291960746049881, + "logps/chosen": -0.09858616441488266, + "logps/rejected": -0.6153691411018372, + "loss": 2.5298, + "nll_loss": 0.6053740978240967, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009858615696430206, + "rewards/margins": 0.05167829990386963, + "rewards/rejected": -0.06153691187500954, + "step": 2834 + }, + { + "epoch": 1.9605809128630707, + "grad_norm": 6.119528293609619, + "learning_rate": 4.4663439372982945e-05, + "log_odds_chosen": 3.4581170082092285, + "log_odds_ratio": -0.26008594036102295, + "logits/chosen": -0.49485692381858826, + "logits/rejected": -0.48848897218704224, + "logps/chosen": -0.07028748095035553, + "logps/rejected": -0.5969229340553284, + "loss": 3.2002, + "nll_loss": 0.7740364074707031, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007028748281300068, + "rewards/margins": 0.052663546055555344, + "rewards/rejected": -0.05969228968024254, + "step": 2835 + }, + { + "epoch": 1.9612724757952975, + "grad_norm": 7.903665542602539, + "learning_rate": 4.46595973566928e-05, + "log_odds_chosen": 5.2713799476623535, + "log_odds_ratio": -0.165093794465065, + "logits/chosen": -0.338885098695755, + "logits/rejected": -0.3735104203224182, + "logps/chosen": -0.06486129760742188, + "logps/rejected": -0.9292372465133667, + "loss": 3.3155, + "nll_loss": 0.8123664855957031, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006486129947006702, + "rewards/margins": 0.08643759787082672, + "rewards/rejected": -0.09292373061180115, + "step": 2836 + }, + { + "epoch": 1.9619640387275243, + "grad_norm": 9.878509521484375, + "learning_rate": 4.4655755340402644e-05, + "log_odds_chosen": 5.400105953216553, + "log_odds_ratio": -0.7512086033821106, + "logits/chosen": -0.558991551399231, + "logits/rejected": -0.550513744354248, + "logps/chosen": -0.11486005038022995, + "logps/rejected": -1.0291719436645508, + "loss": 2.7288, + "nll_loss": 0.6070804595947266, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01148600596934557, + "rewards/margins": 0.09143118560314178, + "rewards/rejected": -0.10291719436645508, + "step": 2837 + }, + { + "epoch": 1.9626556016597512, + "grad_norm": 21.60213279724121, + "learning_rate": 4.4651913324112496e-05, + "log_odds_chosen": 3.2322263717651367, + "log_odds_ratio": -1.070180058479309, + "logits/chosen": -0.4052848815917969, + "logits/rejected": -0.49535343050956726, + "logps/chosen": -0.12277340888977051, + "logps/rejected": -0.6704703569412231, + "loss": 3.4206, + "nll_loss": 0.748128354549408, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012277341447770596, + "rewards/margins": 0.054769691079854965, + "rewards/rejected": -0.06704703718423843, + "step": 2838 + }, + { + "epoch": 1.963347164591978, + "grad_norm": 8.996484756469727, + "learning_rate": 4.464807130782235e-05, + "log_odds_chosen": 5.494966506958008, + "log_odds_ratio": -0.14851421117782593, + "logits/chosen": -0.5047599077224731, + "logits/rejected": -0.4936036467552185, + "logps/chosen": -0.03942044824361801, + "logps/rejected": -0.8090762495994568, + "loss": 2.9505, + "nll_loss": 0.7227743864059448, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003942045383155346, + "rewards/margins": 0.0769655853509903, + "rewards/rejected": -0.08090762794017792, + "step": 2839 + }, + { + "epoch": 1.9640387275242048, + "grad_norm": 6.9417243003845215, + "learning_rate": 4.4644229291532194e-05, + "log_odds_chosen": 5.448168754577637, + "log_odds_ratio": -0.04826320335268974, + "logits/chosen": -0.6604098081588745, + "logits/rejected": -0.6737585067749023, + "logps/chosen": -0.03252304345369339, + "logps/rejected": -0.96895432472229, + "loss": 4.2249, + "nll_loss": 1.0514090061187744, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0032523043919354677, + "rewards/margins": 0.09364312887191772, + "rewards/rejected": -0.09689544141292572, + "step": 2840 + }, + { + "epoch": 1.9647302904564317, + "grad_norm": 8.161296844482422, + "learning_rate": 4.464038727524205e-05, + "log_odds_chosen": 4.60945987701416, + "log_odds_ratio": -0.13581793010234833, + "logits/chosen": -0.5580368041992188, + "logits/rejected": -0.5402668714523315, + "logps/chosen": -0.05534191057085991, + "logps/rejected": -0.8401125073432922, + "loss": 2.6204, + "nll_loss": 0.6415296196937561, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005534191615879536, + "rewards/margins": 0.07847706973552704, + "rewards/rejected": -0.0840112566947937, + "step": 2841 + }, + { + "epoch": 1.9654218533886585, + "grad_norm": 5.922436714172363, + "learning_rate": 4.46365452589519e-05, + "log_odds_chosen": 6.903104305267334, + "log_odds_ratio": -0.17947496473789215, + "logits/chosen": 0.05723372474312782, + "logits/rejected": 0.07201800495386124, + "logps/chosen": -0.03924143314361572, + "logps/rejected": -0.9726995229721069, + "loss": 2.2232, + "nll_loss": 0.5378566980361938, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003924143500626087, + "rewards/margins": 0.09334582090377808, + "rewards/rejected": -0.09726995974779129, + "step": 2842 + }, + { + "epoch": 1.9661134163208853, + "grad_norm": 10.01150131225586, + "learning_rate": 4.463270324266175e-05, + "log_odds_chosen": 4.400213718414307, + "log_odds_ratio": -0.35011982917785645, + "logits/chosen": -0.46392229199409485, + "logits/rejected": -0.44696903228759766, + "logps/chosen": -0.028617514297366142, + "logps/rejected": -0.830445408821106, + "loss": 3.6599, + "nll_loss": 0.8799704313278198, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0028617512434720993, + "rewards/margins": 0.08018279075622559, + "rewards/rejected": -0.08304454386234283, + "step": 2843 + }, + { + "epoch": 1.9668049792531122, + "grad_norm": 5.51869010925293, + "learning_rate": 4.4628861226371604e-05, + "log_odds_chosen": 4.802142143249512, + "log_odds_ratio": -0.03285137936472893, + "logits/chosen": -0.004739582538604736, + "logits/rejected": -0.07433182001113892, + "logps/chosen": -0.021929241716861725, + "logps/rejected": -0.8128564357757568, + "loss": 2.5213, + "nll_loss": 0.6270398497581482, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00219292426481843, + "rewards/margins": 0.07909271866083145, + "rewards/rejected": -0.08128564804792404, + "step": 2844 + }, + { + "epoch": 1.967496542185339, + "grad_norm": 6.1920976638793945, + "learning_rate": 4.4625019210081456e-05, + "log_odds_chosen": 4.352306842803955, + "log_odds_ratio": -0.19127348065376282, + "logits/chosen": -0.4722940921783447, + "logits/rejected": -0.5073493123054504, + "logps/chosen": -0.06751982867717743, + "logps/rejected": -1.009918451309204, + "loss": 2.608, + "nll_loss": 0.6328845024108887, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006751983426511288, + "rewards/margins": 0.09423987567424774, + "rewards/rejected": -0.10099184513092041, + "step": 2845 + }, + { + "epoch": 1.9681881051175658, + "grad_norm": 11.177550315856934, + "learning_rate": 4.46211771937913e-05, + "log_odds_chosen": 3.4274983406066895, + "log_odds_ratio": -0.48501288890838623, + "logits/chosen": -0.7478051781654358, + "logits/rejected": -0.7823911309242249, + "logps/chosen": -0.0978512093424797, + "logps/rejected": -0.8678635358810425, + "loss": 3.7819, + "nll_loss": 0.8969804048538208, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009785121306777, + "rewards/margins": 0.07700122892856598, + "rewards/rejected": -0.08678635209798813, + "step": 2846 + }, + { + "epoch": 1.9688796680497926, + "grad_norm": 7.19655704498291, + "learning_rate": 4.4617335177501154e-05, + "log_odds_chosen": 3.085355520248413, + "log_odds_ratio": -0.2254747599363327, + "logits/chosen": -0.6109316945075989, + "logits/rejected": -0.6778422594070435, + "logps/chosen": -0.14739884436130524, + "logps/rejected": -0.8291506767272949, + "loss": 3.3712, + "nll_loss": 0.8202444911003113, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.014739884994924068, + "rewards/margins": 0.06817518174648285, + "rewards/rejected": -0.08291506767272949, + "step": 2847 + }, + { + "epoch": 1.9695712309820195, + "grad_norm": 6.391579627990723, + "learning_rate": 4.461349316121101e-05, + "log_odds_chosen": 5.636587142944336, + "log_odds_ratio": -0.05788606405258179, + "logits/chosen": -0.44978466629981995, + "logits/rejected": -0.4869334399700165, + "logps/chosen": -0.037404656410217285, + "logps/rejected": -1.0483611822128296, + "loss": 2.4287, + "nll_loss": 0.6013800501823425, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003740465734153986, + "rewards/margins": 0.10109566152095795, + "rewards/rejected": -0.1048361212015152, + "step": 2848 + }, + { + "epoch": 1.9702627939142463, + "grad_norm": 7.4656548500061035, + "learning_rate": 4.460965114492085e-05, + "log_odds_chosen": 4.645669937133789, + "log_odds_ratio": -0.22266227006912231, + "logits/chosen": -0.49918222427368164, + "logits/rejected": -0.6102890968322754, + "logps/chosen": -0.06597128510475159, + "logps/rejected": -0.750339150428772, + "loss": 3.5255, + "nll_loss": 0.8591197729110718, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006597128696739674, + "rewards/margins": 0.06843678653240204, + "rewards/rejected": -0.07503391802310944, + "step": 2849 + }, + { + "epoch": 1.9709543568464731, + "grad_norm": 5.622032165527344, + "learning_rate": 4.460580912863071e-05, + "log_odds_chosen": 3.8457815647125244, + "log_odds_ratio": -0.17395474016666412, + "logits/chosen": -0.5099331140518188, + "logits/rejected": -0.5281578898429871, + "logps/chosen": -0.062186792492866516, + "logps/rejected": -0.627037525177002, + "loss": 2.807, + "nll_loss": 0.6843648552894592, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006218679249286652, + "rewards/margins": 0.056485071778297424, + "rewards/rejected": -0.06270375102758408, + "step": 2850 + }, + { + "epoch": 1.9716459197787, + "grad_norm": 4.130119323730469, + "learning_rate": 4.460196711234056e-05, + "log_odds_chosen": 8.39101505279541, + "log_odds_ratio": -0.0006186411483213305, + "logits/chosen": -0.17172306776046753, + "logits/rejected": -0.1964360475540161, + "logps/chosen": -0.0007562484825029969, + "logps/rejected": -1.0518345832824707, + "loss": 2.7238, + "nll_loss": 0.6808990240097046, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.562484825029969e-05, + "rewards/margins": 0.10510782897472382, + "rewards/rejected": -0.105183444917202, + "step": 2851 + }, + { + "epoch": 1.9723374827109268, + "grad_norm": 10.317930221557617, + "learning_rate": 4.459812509605041e-05, + "log_odds_chosen": 6.341769218444824, + "log_odds_ratio": -0.04393875226378441, + "logits/chosen": -0.27549225091934204, + "logits/rejected": -0.3653901219367981, + "logps/chosen": -0.019962439313530922, + "logps/rejected": -0.8477230072021484, + "loss": 3.417, + "nll_loss": 0.8498455882072449, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019962440710514784, + "rewards/margins": 0.08277606219053268, + "rewards/rejected": -0.08477230370044708, + "step": 2852 + }, + { + "epoch": 1.9730290456431536, + "grad_norm": 8.948822021484375, + "learning_rate": 4.459428307976026e-05, + "log_odds_chosen": 2.1581223011016846, + "log_odds_ratio": -0.3025994896888733, + "logits/chosen": -0.6348332166671753, + "logits/rejected": -0.5737580060958862, + "logps/chosen": -0.1140579879283905, + "logps/rejected": -0.44779258966445923, + "loss": 3.7934, + "nll_loss": 0.9181008338928223, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011405798606574535, + "rewards/margins": 0.03337346017360687, + "rewards/rejected": -0.04477925971150398, + "step": 2853 + }, + { + "epoch": 1.9737206085753805, + "grad_norm": 7.595555305480957, + "learning_rate": 4.4590441063470115e-05, + "log_odds_chosen": 5.8971028327941895, + "log_odds_ratio": -0.1407633274793625, + "logits/chosen": -0.4646201729774475, + "logits/rejected": -0.5450406074523926, + "logps/chosen": -0.07197277247905731, + "logps/rejected": -1.3927878141403198, + "loss": 2.8903, + "nll_loss": 0.7084888219833374, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007197277620434761, + "rewards/margins": 0.1320815086364746, + "rewards/rejected": -0.13927876949310303, + "step": 2854 + }, + { + "epoch": 1.9744121715076073, + "grad_norm": 8.590985298156738, + "learning_rate": 4.458659904717996e-05, + "log_odds_chosen": 5.99652099609375, + "log_odds_ratio": -0.06401261687278748, + "logits/chosen": -0.3496834635734558, + "logits/rejected": -0.4178650379180908, + "logps/chosen": -0.023707207292318344, + "logps/rejected": -0.9959208369255066, + "loss": 3.0665, + "nll_loss": 0.7602324485778809, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0023707207292318344, + "rewards/margins": 0.09722136706113815, + "rewards/rejected": -0.09959208220243454, + "step": 2855 + }, + { + "epoch": 1.9751037344398341, + "grad_norm": 8.18436336517334, + "learning_rate": 4.458275703088981e-05, + "log_odds_chosen": 5.816441059112549, + "log_odds_ratio": -0.1955913007259369, + "logits/chosen": -0.29734522104263306, + "logits/rejected": -0.3642163872718811, + "logps/chosen": -0.050296586006879807, + "logps/rejected": -0.8719285726547241, + "loss": 3.4962, + "nll_loss": 0.8544830679893494, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005029658786952496, + "rewards/margins": 0.08216319978237152, + "rewards/rejected": -0.08719285577535629, + "step": 2856 + }, + { + "epoch": 1.975795297372061, + "grad_norm": 6.27744197845459, + "learning_rate": 4.4578915014599665e-05, + "log_odds_chosen": 8.145572662353516, + "log_odds_ratio": -0.0036865519359707832, + "logits/chosen": -0.3854920566082001, + "logits/rejected": -0.48750007152557373, + "logps/chosen": -0.013279465027153492, + "logps/rejected": -1.5056675672531128, + "loss": 2.6731, + "nll_loss": 0.6679179668426514, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013279466656967998, + "rewards/margins": 0.14923880994319916, + "rewards/rejected": -0.15056675672531128, + "step": 2857 + }, + { + "epoch": 1.9764868603042878, + "grad_norm": 8.208527565002441, + "learning_rate": 4.457507299830951e-05, + "log_odds_chosen": 6.305216312408447, + "log_odds_ratio": -0.020527303218841553, + "logits/chosen": -0.17919857800006866, + "logits/rejected": -0.2800734043121338, + "logps/chosen": -0.02931048907339573, + "logps/rejected": -1.0246630907058716, + "loss": 4.0488, + "nll_loss": 1.0101357698440552, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002931049093604088, + "rewards/margins": 0.0995352566242218, + "rewards/rejected": -0.10246631503105164, + "step": 2858 + }, + { + "epoch": 1.9771784232365146, + "grad_norm": 6.1647844314575195, + "learning_rate": 4.457123098201937e-05, + "log_odds_chosen": 5.047560214996338, + "log_odds_ratio": -0.14424802362918854, + "logits/chosen": -0.6080353260040283, + "logits/rejected": -0.7011988759040833, + "logps/chosen": -0.06074054166674614, + "logps/rejected": -1.0708612203598022, + "loss": 2.1104, + "nll_loss": 0.5131810307502747, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006074054166674614, + "rewards/margins": 0.10101206600666046, + "rewards/rejected": -0.10708612203598022, + "step": 2859 + }, + { + "epoch": 1.9778699861687414, + "grad_norm": 6.896218776702881, + "learning_rate": 4.4567388965729216e-05, + "log_odds_chosen": 4.218295574188232, + "log_odds_ratio": -0.23091116547584534, + "logits/chosen": -0.2940022945404053, + "logits/rejected": -0.3114502429962158, + "logps/chosen": -0.06656209379434586, + "logps/rejected": -0.5783684253692627, + "loss": 2.5845, + "nll_loss": 0.6230310797691345, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.00665620993822813, + "rewards/margins": 0.0511806383728981, + "rewards/rejected": -0.05783684551715851, + "step": 2860 + }, + { + "epoch": 1.9785615491009683, + "grad_norm": 6.326053619384766, + "learning_rate": 4.456354694943907e-05, + "log_odds_chosen": 5.043857097625732, + "log_odds_ratio": -0.07648416608572006, + "logits/chosen": -0.7028237581253052, + "logits/rejected": -0.7462775707244873, + "logps/chosen": -0.04524366185069084, + "logps/rejected": -0.6462970972061157, + "loss": 2.5193, + "nll_loss": 0.622175395488739, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004524365998804569, + "rewards/margins": 0.0601053424179554, + "rewards/rejected": -0.0646297037601471, + "step": 2861 + }, + { + "epoch": 1.979253112033195, + "grad_norm": 8.298463821411133, + "learning_rate": 4.455970493314892e-05, + "log_odds_chosen": 4.941799163818359, + "log_odds_ratio": -0.2625234127044678, + "logits/chosen": -0.6671578884124756, + "logits/rejected": -0.6977245211601257, + "logps/chosen": -0.05639420822262764, + "logps/rejected": -0.5281553864479065, + "loss": 3.7381, + "nll_loss": 0.9082846641540527, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005639421287924051, + "rewards/margins": 0.047176118940114975, + "rewards/rejected": -0.05281553789973259, + "step": 2862 + }, + { + "epoch": 1.979944674965422, + "grad_norm": 5.313868522644043, + "learning_rate": 4.455586291685877e-05, + "log_odds_chosen": 4.31884241104126, + "log_odds_ratio": -0.13699235022068024, + "logits/chosen": -0.7914988398551941, + "logits/rejected": -0.8392302989959717, + "logps/chosen": -0.08075849711894989, + "logps/rejected": -0.9601966142654419, + "loss": 3.7196, + "nll_loss": 0.916207492351532, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008075850084424019, + "rewards/margins": 0.08794382214546204, + "rewards/rejected": -0.0960196703672409, + "step": 2863 + }, + { + "epoch": 1.9806362378976488, + "grad_norm": 4.304253101348877, + "learning_rate": 4.455202090056862e-05, + "log_odds_chosen": 5.359724044799805, + "log_odds_ratio": -0.07342061400413513, + "logits/chosen": -0.6200317144393921, + "logits/rejected": -0.6785701513290405, + "logps/chosen": -0.03738350793719292, + "logps/rejected": -0.8588244318962097, + "loss": 2.112, + "nll_loss": 0.5206697583198547, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003738350933417678, + "rewards/margins": 0.08214408159255981, + "rewards/rejected": -0.08588244020938873, + "step": 2864 + }, + { + "epoch": 1.9813278008298756, + "grad_norm": 4.600732803344727, + "learning_rate": 4.454817888427847e-05, + "log_odds_chosen": 5.447720527648926, + "log_odds_ratio": -0.0366494357585907, + "logits/chosen": -0.2615331709384918, + "logits/rejected": -0.18944472074508667, + "logps/chosen": -0.03816407918930054, + "logps/rejected": -1.2567387819290161, + "loss": 3.6339, + "nll_loss": 0.9048019051551819, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0038164081051945686, + "rewards/margins": 0.12185746431350708, + "rewards/rejected": -0.12567389011383057, + "step": 2865 + }, + { + "epoch": 1.9820193637621024, + "grad_norm": 8.667045593261719, + "learning_rate": 4.4544336867988324e-05, + "log_odds_chosen": 5.813375473022461, + "log_odds_ratio": -0.20894940197467804, + "logits/chosen": -0.22451533377170563, + "logits/rejected": -0.24346986413002014, + "logps/chosen": -0.038732875138521194, + "logps/rejected": -0.8478078842163086, + "loss": 2.7926, + "nll_loss": 0.6772516965866089, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0038732877001166344, + "rewards/margins": 0.08090750873088837, + "rewards/rejected": -0.08478079736232758, + "step": 2866 + }, + { + "epoch": 1.9827109266943292, + "grad_norm": 10.199563980102539, + "learning_rate": 4.454049485169817e-05, + "log_odds_chosen": 4.983705997467041, + "log_odds_ratio": -0.24623095989227295, + "logits/chosen": -0.5467318892478943, + "logits/rejected": -0.5973883271217346, + "logps/chosen": -0.06223399564623833, + "logps/rejected": -1.1259304285049438, + "loss": 3.4424, + "nll_loss": 0.8359812498092651, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00622339965775609, + "rewards/margins": 0.10636964440345764, + "rewards/rejected": -0.11259303987026215, + "step": 2867 + }, + { + "epoch": 1.983402489626556, + "grad_norm": 8.570449829101562, + "learning_rate": 4.453665283540803e-05, + "log_odds_chosen": 6.765602111816406, + "log_odds_ratio": -0.03605763614177704, + "logits/chosen": -0.22504515945911407, + "logits/rejected": -0.267051637172699, + "logps/chosen": -0.0312674380838871, + "logps/rejected": -1.280580997467041, + "loss": 2.6459, + "nll_loss": 0.6578635573387146, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003126743948087096, + "rewards/margins": 0.12493135035037994, + "rewards/rejected": -0.12805810570716858, + "step": 2868 + }, + { + "epoch": 1.984094052558783, + "grad_norm": 11.911445617675781, + "learning_rate": 4.4532810819117874e-05, + "log_odds_chosen": 4.111666202545166, + "log_odds_ratio": -0.16683000326156616, + "logits/chosen": -0.38888388872146606, + "logits/rejected": -0.4704105854034424, + "logps/chosen": -0.061243414878845215, + "logps/rejected": -0.829252302646637, + "loss": 3.105, + "nll_loss": 0.7595645189285278, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006124341860413551, + "rewards/margins": 0.0768008902668953, + "rewards/rejected": -0.0829252377152443, + "step": 2869 + }, + { + "epoch": 1.9847856154910097, + "grad_norm": 7.618926048278809, + "learning_rate": 4.452896880282773e-05, + "log_odds_chosen": 6.063479423522949, + "log_odds_ratio": -0.06398558616638184, + "logits/chosen": -0.807996928691864, + "logits/rejected": -0.8398293852806091, + "logps/chosen": -0.022773319855332375, + "logps/rejected": -0.9269109964370728, + "loss": 2.6953, + "nll_loss": 0.6674139499664307, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00227733189240098, + "rewards/margins": 0.09041377902030945, + "rewards/rejected": -0.0926911011338234, + "step": 2870 + }, + { + "epoch": 1.9854771784232366, + "grad_norm": 7.750586032867432, + "learning_rate": 4.452512678653757e-05, + "log_odds_chosen": 4.687612056732178, + "log_odds_ratio": -0.32614877820014954, + "logits/chosen": -0.3697401285171509, + "logits/rejected": -0.43365412950515747, + "logps/chosen": -0.05747876688838005, + "logps/rejected": -0.8642250299453735, + "loss": 2.7228, + "nll_loss": 0.6480889320373535, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.005747877061367035, + "rewards/margins": 0.08067463338375092, + "rewards/rejected": -0.08642250299453735, + "step": 2871 + }, + { + "epoch": 1.9861687413554634, + "grad_norm": 6.014692306518555, + "learning_rate": 4.452128477024743e-05, + "log_odds_chosen": 3.9112367630004883, + "log_odds_ratio": -0.13464248180389404, + "logits/chosen": -0.8118262887001038, + "logits/rejected": -0.8243058323860168, + "logps/chosen": -0.029838457703590393, + "logps/rejected": -0.5436880588531494, + "loss": 3.0423, + "nll_loss": 0.7471101880073547, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0029838457703590393, + "rewards/margins": 0.05138496309518814, + "rewards/rejected": -0.05436880886554718, + "step": 2872 + }, + { + "epoch": 1.9868603042876902, + "grad_norm": 3.0963494777679443, + "learning_rate": 4.451744275395728e-05, + "log_odds_chosen": 6.304614067077637, + "log_odds_ratio": -0.021528642624616623, + "logits/chosen": -0.40912091732025146, + "logits/rejected": -0.42527052760124207, + "logps/chosen": -0.018766680732369423, + "logps/rejected": -0.8206733465194702, + "loss": 2.6346, + "nll_loss": 0.6564869284629822, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018766680732369423, + "rewards/margins": 0.08019067347049713, + "rewards/rejected": -0.0820673406124115, + "step": 2873 + }, + { + "epoch": 1.987551867219917, + "grad_norm": 7.448980808258057, + "learning_rate": 4.451360073766713e-05, + "log_odds_chosen": 4.426904678344727, + "log_odds_ratio": -0.3069632649421692, + "logits/chosen": -0.7302984595298767, + "logits/rejected": -0.7961975336074829, + "logps/chosen": -0.06175190210342407, + "logps/rejected": -0.7879979610443115, + "loss": 2.9033, + "nll_loss": 0.6951185464859009, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0061751906760036945, + "rewards/margins": 0.07262460887432098, + "rewards/rejected": -0.07879979908466339, + "step": 2874 + }, + { + "epoch": 1.9882434301521439, + "grad_norm": 8.628366470336914, + "learning_rate": 4.450975872137698e-05, + "log_odds_chosen": 6.7540740966796875, + "log_odds_ratio": -0.06287705898284912, + "logits/chosen": -0.4166528582572937, + "logits/rejected": -0.4531726539134979, + "logps/chosen": -0.029345639050006866, + "logps/rejected": -1.141418695449829, + "loss": 2.5816, + "nll_loss": 0.6391215324401855, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0029345639050006866, + "rewards/margins": 0.11120730638504028, + "rewards/rejected": -0.11414187401533127, + "step": 2875 + }, + { + "epoch": 1.9889349930843707, + "grad_norm": 7.2847137451171875, + "learning_rate": 4.450591670508683e-05, + "log_odds_chosen": 5.661350727081299, + "log_odds_ratio": -0.01193216722458601, + "logits/chosen": -0.9701260328292847, + "logits/rejected": -1.0334163904190063, + "logps/chosen": -0.029496140778064728, + "logps/rejected": -1.305067539215088, + "loss": 3.591, + "nll_loss": 0.8965597152709961, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0029496143106371164, + "rewards/margins": 0.12755712866783142, + "rewards/rejected": -0.1305067539215088, + "step": 2876 + }, + { + "epoch": 1.9896265560165975, + "grad_norm": 9.756966590881348, + "learning_rate": 4.450207468879668e-05, + "log_odds_chosen": 6.483921527862549, + "log_odds_ratio": -0.011629382148385048, + "logits/chosen": -0.3076292872428894, + "logits/rejected": -0.39227768778800964, + "logps/chosen": -0.03447824716567993, + "logps/rejected": -1.3238623142242432, + "loss": 2.9971, + "nll_loss": 0.7481008768081665, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0034478246234357357, + "rewards/margins": 0.12893840670585632, + "rewards/rejected": -0.1323862224817276, + "step": 2877 + }, + { + "epoch": 1.9903181189488244, + "grad_norm": 6.210879325866699, + "learning_rate": 4.449823267250653e-05, + "log_odds_chosen": 3.429314136505127, + "log_odds_ratio": -0.16521140933036804, + "logits/chosen": -0.35070574283599854, + "logits/rejected": -0.2949613332748413, + "logps/chosen": -0.0705675259232521, + "logps/rejected": -0.8604551553726196, + "loss": 2.848, + "nll_loss": 0.6954702138900757, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007056752685457468, + "rewards/margins": 0.07898876816034317, + "rewards/rejected": -0.0860455185174942, + "step": 2878 + }, + { + "epoch": 1.9910096818810512, + "grad_norm": 6.387576580047607, + "learning_rate": 4.4494390656216385e-05, + "log_odds_chosen": 6.673967361450195, + "log_odds_ratio": -0.09716249257326126, + "logits/chosen": -0.4616803526878357, + "logits/rejected": -0.5385611057281494, + "logps/chosen": -0.026279207319021225, + "logps/rejected": -1.023545742034912, + "loss": 2.9748, + "nll_loss": 0.7339800596237183, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00262792082503438, + "rewards/margins": 0.09972664713859558, + "rewards/rejected": -0.10235457122325897, + "step": 2879 + }, + { + "epoch": 1.991701244813278, + "grad_norm": 9.775636672973633, + "learning_rate": 4.449054863992623e-05, + "log_odds_chosen": 3.4497570991516113, + "log_odds_ratio": -0.7726833820343018, + "logits/chosen": -0.5093398690223694, + "logits/rejected": -0.5379137992858887, + "logps/chosen": -0.12793438136577606, + "logps/rejected": -0.9378427267074585, + "loss": 3.4558, + "nll_loss": 0.7866874933242798, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.012793438509106636, + "rewards/margins": 0.08099082857370377, + "rewards/rejected": -0.09378427267074585, + "step": 2880 + }, + { + "epoch": 1.9923928077455049, + "grad_norm": 3.651247262954712, + "learning_rate": 4.448670662363609e-05, + "log_odds_chosen": 5.562066555023193, + "log_odds_ratio": -0.03729052469134331, + "logits/chosen": -0.5071220993995667, + "logits/rejected": -0.6439098119735718, + "logps/chosen": -0.03577201068401337, + "logps/rejected": -1.1652424335479736, + "loss": 1.8377, + "nll_loss": 0.4556844234466553, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003577201394364238, + "rewards/margins": 0.11294703930616379, + "rewards/rejected": -0.11652424931526184, + "step": 2881 + }, + { + "epoch": 1.9930843706777317, + "grad_norm": 4.731902599334717, + "learning_rate": 4.4482864607345936e-05, + "log_odds_chosen": 6.594812393188477, + "log_odds_ratio": -0.0028282294515520334, + "logits/chosen": -0.341488778591156, + "logits/rejected": -0.3427751362323761, + "logps/chosen": -0.03473107889294624, + "logps/rejected": -1.6951305866241455, + "loss": 2.6016, + "nll_loss": 0.6501142382621765, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0034731074701994658, + "rewards/margins": 0.16603994369506836, + "rewards/rejected": -0.1695130616426468, + "step": 2882 + }, + { + "epoch": 1.9937759336099585, + "grad_norm": 7.184812545776367, + "learning_rate": 4.447902259105579e-05, + "log_odds_chosen": 5.885580062866211, + "log_odds_ratio": -0.13160696625709534, + "logits/chosen": -0.42819827795028687, + "logits/rejected": -0.45641326904296875, + "logps/chosen": -0.1743771880865097, + "logps/rejected": -0.9931154251098633, + "loss": 2.0322, + "nll_loss": 0.4948911666870117, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.017437715083360672, + "rewards/margins": 0.0818738341331482, + "rewards/rejected": -0.09931155294179916, + "step": 2883 + }, + { + "epoch": 1.9944674965421854, + "grad_norm": 7.710697650909424, + "learning_rate": 4.447518057476564e-05, + "log_odds_chosen": 5.005013465881348, + "log_odds_ratio": -0.09908229857683182, + "logits/chosen": -0.8851842284202576, + "logits/rejected": -0.960582971572876, + "logps/chosen": -0.031020062044262886, + "logps/rejected": -0.5218628644943237, + "loss": 3.4158, + "nll_loss": 0.8440319299697876, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00310200615786016, + "rewards/margins": 0.04908428341150284, + "rewards/rejected": -0.05218628793954849, + "step": 2884 + }, + { + "epoch": 1.9951590594744122, + "grad_norm": 6.744011878967285, + "learning_rate": 4.4471338558475486e-05, + "log_odds_chosen": 6.812548637390137, + "log_odds_ratio": -0.012456808239221573, + "logits/chosen": -0.9489343762397766, + "logits/rejected": -0.8417586088180542, + "logps/chosen": -0.0055020651780068874, + "logps/rejected": -1.1910438537597656, + "loss": 2.7535, + "nll_loss": 0.6871209144592285, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005502065760083497, + "rewards/margins": 0.11855418235063553, + "rewards/rejected": -0.11910439282655716, + "step": 2885 + }, + { + "epoch": 1.995850622406639, + "grad_norm": 9.45841121673584, + "learning_rate": 4.446749654218534e-05, + "log_odds_chosen": 5.1840009689331055, + "log_odds_ratio": -0.05699038878083229, + "logits/chosen": -0.7790420055389404, + "logits/rejected": -0.8795222043991089, + "logps/chosen": -0.055796995759010315, + "logps/rejected": -1.1143743991851807, + "loss": 3.517, + "nll_loss": 0.8735427260398865, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005579699762165546, + "rewards/margins": 0.1058577448129654, + "rewards/rejected": -0.11143743991851807, + "step": 2886 + }, + { + "epoch": 1.9965421853388658, + "grad_norm": 7.960890293121338, + "learning_rate": 4.446365452589519e-05, + "log_odds_chosen": 5.083607196807861, + "log_odds_ratio": -0.02815091609954834, + "logits/chosen": -0.7035555243492126, + "logits/rejected": -0.7305249571800232, + "logps/chosen": -0.026452593505382538, + "logps/rejected": -1.0385524034500122, + "loss": 3.0024, + "nll_loss": 0.7477902770042419, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0026452592574059963, + "rewards/margins": 0.10120998322963715, + "rewards/rejected": -0.10385524481534958, + "step": 2887 + }, + { + "epoch": 1.9972337482710927, + "grad_norm": 8.027430534362793, + "learning_rate": 4.4459812509605044e-05, + "log_odds_chosen": 7.032285213470459, + "log_odds_ratio": -0.01024580467492342, + "logits/chosen": -0.7812651991844177, + "logits/rejected": -0.8479325771331787, + "logps/chosen": -0.018492156639695168, + "logps/rejected": -1.0106534957885742, + "loss": 2.4929, + "nll_loss": 0.6222111582756042, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018492157105356455, + "rewards/margins": 0.09921613335609436, + "rewards/rejected": -0.10106535255908966, + "step": 2888 + }, + { + "epoch": 1.9979253112033195, + "grad_norm": 8.134820938110352, + "learning_rate": 4.445597049331489e-05, + "log_odds_chosen": 4.335086345672607, + "log_odds_ratio": -0.3089689314365387, + "logits/chosen": -0.7933206558227539, + "logits/rejected": -0.7895016074180603, + "logps/chosen": -0.07977023720741272, + "logps/rejected": -0.6772675514221191, + "loss": 2.6851, + "nll_loss": 0.6403782367706299, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007977023720741272, + "rewards/margins": 0.05974973365664482, + "rewards/rejected": -0.0677267536520958, + "step": 2889 + }, + { + "epoch": 1.9986168741355463, + "grad_norm": 9.249191284179688, + "learning_rate": 4.445212847702475e-05, + "log_odds_chosen": 4.665498733520508, + "log_odds_ratio": -0.2790174186229706, + "logits/chosen": -0.8788178563117981, + "logits/rejected": -0.9095785021781921, + "logps/chosen": -0.053846150636672974, + "logps/rejected": -1.1634886264801025, + "loss": 2.9982, + "nll_loss": 0.7216591835021973, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.005384615156799555, + "rewards/margins": 0.11096425354480743, + "rewards/rejected": -0.11634886264801025, + "step": 2890 + }, + { + "epoch": 1.9993084370677732, + "grad_norm": 11.595385551452637, + "learning_rate": 4.4448286460734594e-05, + "log_odds_chosen": 4.811706066131592, + "log_odds_ratio": -0.3877679705619812, + "logits/chosen": -0.6049620509147644, + "logits/rejected": -0.6957690715789795, + "logps/chosen": -0.07414204627275467, + "logps/rejected": -1.0122088193893433, + "loss": 2.8433, + "nll_loss": 0.6720539927482605, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007414204999804497, + "rewards/margins": 0.0938066840171814, + "rewards/rejected": -0.10122088342905045, + "step": 2891 + }, + { + "epoch": 2.0, + "grad_norm": 12.436097145080566, + "learning_rate": 4.4444444444444447e-05, + "log_odds_chosen": 6.016121864318848, + "log_odds_ratio": -0.031811460852622986, + "logits/chosen": -0.6459561586380005, + "logits/rejected": -0.6764639616012573, + "logps/chosen": -0.021977191790938377, + "logps/rejected": -0.9659074544906616, + "loss": 4.1095, + "nll_loss": 1.024193286895752, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021977191790938377, + "rewards/margins": 0.09439302980899811, + "rewards/rejected": -0.09659074991941452, + "step": 2892 + }, + { + "epoch": 2.000691562932227, + "grad_norm": 7.805417537689209, + "learning_rate": 4.44406024281543e-05, + "log_odds_chosen": 6.162633895874023, + "log_odds_ratio": -0.01756115071475506, + "logits/chosen": -0.8467515110969543, + "logits/rejected": -0.9036287665367126, + "logps/chosen": -0.02380123734474182, + "logps/rejected": -1.1052594184875488, + "loss": 2.7652, + "nll_loss": 0.6895546913146973, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0023801238276064396, + "rewards/margins": 0.1081458106637001, + "rewards/rejected": -0.1105259358882904, + "step": 2893 + }, + { + "epoch": 2.0013831258644537, + "grad_norm": 7.53794527053833, + "learning_rate": 4.4436760411864145e-05, + "log_odds_chosen": 3.3358545303344727, + "log_odds_ratio": -0.15635338425636292, + "logits/chosen": -0.5617286562919617, + "logits/rejected": -0.569664716720581, + "logps/chosen": -0.07381202280521393, + "logps/rejected": -0.4878903031349182, + "loss": 2.5615, + "nll_loss": 0.6247285604476929, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007381202653050423, + "rewards/margins": 0.04140783101320267, + "rewards/rejected": -0.04878903180360794, + "step": 2894 + }, + { + "epoch": 2.0020746887966805, + "grad_norm": 4.7608232498168945, + "learning_rate": 4.4432918395574e-05, + "log_odds_chosen": 5.928215026855469, + "log_odds_ratio": -0.2225520759820938, + "logits/chosen": -0.4803347587585449, + "logits/rejected": -0.4999226927757263, + "logps/chosen": -0.03862200677394867, + "logps/rejected": -0.9687985777854919, + "loss": 2.1594, + "nll_loss": 0.517595648765564, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0038622005376964808, + "rewards/margins": 0.09301765263080597, + "rewards/rejected": -0.09687986224889755, + "step": 2895 + }, + { + "epoch": 2.0027662517289073, + "grad_norm": 12.858072280883789, + "learning_rate": 4.442907637928385e-05, + "log_odds_chosen": 5.557333469390869, + "log_odds_ratio": -0.21996933221817017, + "logits/chosen": -0.7167026996612549, + "logits/rejected": -0.7696047425270081, + "logps/chosen": -0.04461139813065529, + "logps/rejected": -0.7744177579879761, + "loss": 3.0493, + "nll_loss": 0.7403295040130615, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004461139906197786, + "rewards/margins": 0.07298063486814499, + "rewards/rejected": -0.07744176685810089, + "step": 2896 + }, + { + "epoch": 2.003457814661134, + "grad_norm": 3.1132564544677734, + "learning_rate": 4.44252343629937e-05, + "log_odds_chosen": 5.70494270324707, + "log_odds_ratio": -0.023692548274993896, + "logits/chosen": -0.707499623298645, + "logits/rejected": -0.7106534242630005, + "logps/chosen": -0.02470366843044758, + "logps/rejected": -0.9739349484443665, + "loss": 2.3563, + "nll_loss": 0.5866976976394653, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024703668896108866, + "rewards/margins": 0.09492312371730804, + "rewards/rejected": -0.09739349037408829, + "step": 2897 + }, + { + "epoch": 2.004149377593361, + "grad_norm": 6.3205718994140625, + "learning_rate": 4.442139234670355e-05, + "log_odds_chosen": 5.740413188934326, + "log_odds_ratio": -0.0508279912173748, + "logits/chosen": -0.6263430714607239, + "logits/rejected": -0.6554606556892395, + "logps/chosen": -0.04459373652935028, + "logps/rejected": -1.0038213729858398, + "loss": 3.1485, + "nll_loss": 0.7820312976837158, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004459373652935028, + "rewards/margins": 0.09592276811599731, + "rewards/rejected": -0.10038213431835175, + "step": 2898 + }, + { + "epoch": 2.004840940525588, + "grad_norm": 8.48741340637207, + "learning_rate": 4.441755033041341e-05, + "log_odds_chosen": 5.935898780822754, + "log_odds_ratio": -0.04259810596704483, + "logits/chosen": -0.6093939542770386, + "logits/rejected": -0.6718311309814453, + "logps/chosen": -0.02856658585369587, + "logps/rejected": -1.0036630630493164, + "loss": 2.3201, + "nll_loss": 0.5757532119750977, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002856658538803458, + "rewards/margins": 0.09750963747501373, + "rewards/rejected": -0.10036630183458328, + "step": 2899 + }, + { + "epoch": 2.0055325034578146, + "grad_norm": 6.241588115692139, + "learning_rate": 4.441370831412325e-05, + "log_odds_chosen": 6.6179304122924805, + "log_odds_ratio": -0.06559796631336212, + "logits/chosen": -0.494695246219635, + "logits/rejected": -0.5800277590751648, + "logps/chosen": -0.04796488955616951, + "logps/rejected": -1.256560206413269, + "loss": 2.1233, + "nll_loss": 0.5242593288421631, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004796489141881466, + "rewards/margins": 0.1208595335483551, + "rewards/rejected": -0.12565602362155914, + "step": 2900 + }, + { + "epoch": 2.0062240663900415, + "grad_norm": 9.595657348632812, + "learning_rate": 4.4409866297833105e-05, + "log_odds_chosen": 6.480893135070801, + "log_odds_ratio": -0.05753350630402565, + "logits/chosen": -0.3866846561431885, + "logits/rejected": -0.5218240022659302, + "logps/chosen": -0.014257104136049747, + "logps/rejected": -1.110721230506897, + "loss": 2.3186, + "nll_loss": 0.5738852024078369, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014257101574912667, + "rewards/margins": 0.10964640974998474, + "rewards/rejected": -0.1110721156001091, + "step": 2901 + }, + { + "epoch": 2.0069156293222683, + "grad_norm": 5.194662570953369, + "learning_rate": 4.440602428154296e-05, + "log_odds_chosen": 4.608372211456299, + "log_odds_ratio": -0.07409907132387161, + "logits/chosen": -0.4550461173057556, + "logits/rejected": -0.5282151699066162, + "logps/chosen": -0.05694880336523056, + "logps/rejected": -0.940022349357605, + "loss": 2.1178, + "nll_loss": 0.5220479965209961, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005694880615919828, + "rewards/margins": 0.08830735832452774, + "rewards/rejected": -0.09400224685668945, + "step": 2902 + }, + { + "epoch": 2.007607192254495, + "grad_norm": 4.882895469665527, + "learning_rate": 4.44021822652528e-05, + "log_odds_chosen": 6.601312637329102, + "log_odds_ratio": -0.11335127055644989, + "logits/chosen": -0.38558921217918396, + "logits/rejected": -0.40306785702705383, + "logps/chosen": -0.05045357346534729, + "logps/rejected": -1.3800384998321533, + "loss": 2.2444, + "nll_loss": 0.5497696399688721, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005045357625931501, + "rewards/margins": 0.13295850157737732, + "rewards/rejected": -0.1380038559436798, + "step": 2903 + }, + { + "epoch": 2.008298755186722, + "grad_norm": 4.80790901184082, + "learning_rate": 4.4398340248962656e-05, + "log_odds_chosen": 5.533296585083008, + "log_odds_ratio": -0.053497496992349625, + "logits/chosen": -0.570841908454895, + "logits/rejected": -0.5967553853988647, + "logps/chosen": -0.021107885986566544, + "logps/rejected": -0.9879939556121826, + "loss": 1.6265, + "nll_loss": 0.40128064155578613, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021107885986566544, + "rewards/margins": 0.09668861329555511, + "rewards/rejected": -0.09879939258098602, + "step": 2904 + }, + { + "epoch": 2.008990318118949, + "grad_norm": 9.519055366516113, + "learning_rate": 4.439449823267251e-05, + "log_odds_chosen": 7.151142120361328, + "log_odds_ratio": -0.010571147315204144, + "logits/chosen": -0.8588684797286987, + "logits/rejected": -0.9364704489707947, + "logps/chosen": -0.017046969383955002, + "logps/rejected": -1.5156244039535522, + "loss": 4.0667, + "nll_loss": 1.0156068801879883, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017046969151124358, + "rewards/margins": 0.14985774457454681, + "rewards/rejected": -0.15156245231628418, + "step": 2905 + }, + { + "epoch": 2.0096818810511756, + "grad_norm": 11.029942512512207, + "learning_rate": 4.439065621638236e-05, + "log_odds_chosen": 6.913255214691162, + "log_odds_ratio": -0.0055812327191233635, + "logits/chosen": -0.7851934432983398, + "logits/rejected": -0.8364708423614502, + "logps/chosen": -0.01619294472038746, + "logps/rejected": -1.1548116207122803, + "loss": 3.5259, + "nll_loss": 0.8809195756912231, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016192945186048746, + "rewards/margins": 0.11386187374591827, + "rewards/rejected": -0.1154811680316925, + "step": 2906 + }, + { + "epoch": 2.0103734439834025, + "grad_norm": 9.97729778289795, + "learning_rate": 4.4386814200092206e-05, + "log_odds_chosen": 5.829092979431152, + "log_odds_ratio": -0.023012571036815643, + "logits/chosen": -0.983071506023407, + "logits/rejected": -1.1034585237503052, + "logps/chosen": -0.014948589727282524, + "logps/rejected": -1.1066315174102783, + "loss": 4.3768, + "nll_loss": 1.0918893814086914, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014948589960113168, + "rewards/margins": 0.10916829109191895, + "rewards/rejected": -0.11066314578056335, + "step": 2907 + }, + { + "epoch": 2.0110650069156293, + "grad_norm": 10.478568077087402, + "learning_rate": 4.4382972183802065e-05, + "log_odds_chosen": 6.799100875854492, + "log_odds_ratio": -0.09093787521123886, + "logits/chosen": -0.7140034437179565, + "logits/rejected": -0.7645697593688965, + "logps/chosen": -0.07351753115653992, + "logps/rejected": -1.2636497020721436, + "loss": 3.1697, + "nll_loss": 0.7833304405212402, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007351753301918507, + "rewards/margins": 0.1190132349729538, + "rewards/rejected": -0.12636497616767883, + "step": 2908 + }, + { + "epoch": 2.011756569847856, + "grad_norm": 9.869280815124512, + "learning_rate": 4.437913016751191e-05, + "log_odds_chosen": 7.119072914123535, + "log_odds_ratio": -0.004749711137264967, + "logits/chosen": -0.5111950039863586, + "logits/rejected": -0.6186168193817139, + "logps/chosen": -0.013184929266571999, + "logps/rejected": -1.3745231628417969, + "loss": 2.8304, + "nll_loss": 0.7071370482444763, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013184929266571999, + "rewards/margins": 0.1361338198184967, + "rewards/rejected": -0.13745230436325073, + "step": 2909 + }, + { + "epoch": 2.012448132780083, + "grad_norm": 10.091440200805664, + "learning_rate": 4.4375288151221763e-05, + "log_odds_chosen": 4.885725021362305, + "log_odds_ratio": -0.4392738938331604, + "logits/chosen": -0.5816129446029663, + "logits/rejected": -0.6979354619979858, + "logps/chosen": -0.1722060739994049, + "logps/rejected": -0.7426496744155884, + "loss": 3.1504, + "nll_loss": 0.7436795830726624, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01722060702741146, + "rewards/margins": 0.0570443719625473, + "rewards/rejected": -0.07426498085260391, + "step": 2910 + }, + { + "epoch": 2.0131396957123098, + "grad_norm": 6.289185047149658, + "learning_rate": 4.4371446134931616e-05, + "log_odds_chosen": 6.991199016571045, + "log_odds_ratio": -0.2730942964553833, + "logits/chosen": -0.4855737090110779, + "logits/rejected": -0.5304062366485596, + "logps/chosen": -0.036621369421482086, + "logps/rejected": -1.2104086875915527, + "loss": 1.8411, + "nll_loss": 0.4329620599746704, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003662137081846595, + "rewards/margins": 0.11737874150276184, + "rewards/rejected": -0.12104088068008423, + "step": 2911 + }, + { + "epoch": 2.0138312586445366, + "grad_norm": 12.436087608337402, + "learning_rate": 4.436760411864146e-05, + "log_odds_chosen": 6.602833271026611, + "log_odds_ratio": -0.05370119586586952, + "logits/chosen": -0.8386690616607666, + "logits/rejected": -0.9121941328048706, + "logps/chosen": -0.17093002796173096, + "logps/rejected": -1.591941475868225, + "loss": 2.9952, + "nll_loss": 0.7434421181678772, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017093002796173096, + "rewards/margins": 0.14210115373134613, + "rewards/rejected": -0.15919415652751923, + "step": 2912 + }, + { + "epoch": 2.0145228215767634, + "grad_norm": 5.664691925048828, + "learning_rate": 4.4363762102351314e-05, + "log_odds_chosen": 7.527497291564941, + "log_odds_ratio": -0.02149026468396187, + "logits/chosen": -0.5523714423179626, + "logits/rejected": -0.5327743291854858, + "logps/chosen": -0.006375204771757126, + "logps/rejected": -1.0944676399230957, + "loss": 2.1397, + "nll_loss": 0.5327867269515991, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006375204538926482, + "rewards/margins": 0.10880924761295319, + "rewards/rejected": -0.10944677144289017, + "step": 2913 + }, + { + "epoch": 2.0152143845089903, + "grad_norm": 6.964792728424072, + "learning_rate": 4.4359920086061166e-05, + "log_odds_chosen": 7.369643211364746, + "log_odds_ratio": -0.0036067054606974125, + "logits/chosen": -1.052027940750122, + "logits/rejected": -1.034885048866272, + "logps/chosen": -0.003161386586725712, + "logps/rejected": -1.0445643663406372, + "loss": 2.7793, + "nll_loss": 0.6944639682769775, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003161386412102729, + "rewards/margins": 0.10414029657840729, + "rewards/rejected": -0.10445643216371536, + "step": 2914 + }, + { + "epoch": 2.015905947441217, + "grad_norm": 7.805403232574463, + "learning_rate": 4.435607806977102e-05, + "log_odds_chosen": 7.5195770263671875, + "log_odds_ratio": -0.01669224351644516, + "logits/chosen": -0.5986104011535645, + "logits/rejected": -0.6391288042068481, + "logps/chosen": -0.009707896038889885, + "logps/rejected": -1.4273556470870972, + "loss": 2.5347, + "nll_loss": 0.6320062875747681, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000970789638813585, + "rewards/margins": 0.1417647898197174, + "rewards/rejected": -0.1427355706691742, + "step": 2915 + }, + { + "epoch": 2.016597510373444, + "grad_norm": 9.950371742248535, + "learning_rate": 4.4352236053480865e-05, + "log_odds_chosen": 4.98217248916626, + "log_odds_ratio": -0.5378924608230591, + "logits/chosen": -0.4886634349822998, + "logits/rejected": -0.5422289371490479, + "logps/chosen": -0.25005918741226196, + "logps/rejected": -0.954350471496582, + "loss": 2.733, + "nll_loss": 0.6294622421264648, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.025005917996168137, + "rewards/margins": 0.07042913138866425, + "rewards/rejected": -0.09543504565954208, + "step": 2916 + }, + { + "epoch": 2.0172890733056708, + "grad_norm": 6.791604995727539, + "learning_rate": 4.4348394037190724e-05, + "log_odds_chosen": 6.250939846038818, + "log_odds_ratio": -0.01960081048309803, + "logits/chosen": -0.5769975185394287, + "logits/rejected": -0.6699945330619812, + "logps/chosen": -0.04944942891597748, + "logps/rejected": -1.7604033946990967, + "loss": 2.1644, + "nll_loss": 0.5391305685043335, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0049449424259364605, + "rewards/margins": 0.17109540104866028, + "rewards/rejected": -0.17604035139083862, + "step": 2917 + }, + { + "epoch": 2.0179806362378976, + "grad_norm": 6.165281772613525, + "learning_rate": 4.434455202090057e-05, + "log_odds_chosen": 4.771698951721191, + "log_odds_ratio": -0.539745032787323, + "logits/chosen": -0.3371831178665161, + "logits/rejected": -0.29655882716178894, + "logps/chosen": -0.2672892212867737, + "logps/rejected": -0.9545475244522095, + "loss": 2.1533, + "nll_loss": 0.48434799909591675, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.026728922501206398, + "rewards/margins": 0.0687258318066597, + "rewards/rejected": -0.09545475244522095, + "step": 2918 + }, + { + "epoch": 2.0186721991701244, + "grad_norm": 8.98730182647705, + "learning_rate": 4.434071000461042e-05, + "log_odds_chosen": 7.4459333419799805, + "log_odds_ratio": -0.01685933582484722, + "logits/chosen": -0.782569408416748, + "logits/rejected": -0.8473621010780334, + "logps/chosen": -0.018038183450698853, + "logps/rejected": -1.451753854751587, + "loss": 2.1227, + "nll_loss": 0.5289920568466187, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001803818391636014, + "rewards/margins": 0.14337158203125, + "rewards/rejected": -0.14517538249492645, + "step": 2919 + }, + { + "epoch": 2.0193637621023512, + "grad_norm": 6.880577087402344, + "learning_rate": 4.4336867988320274e-05, + "log_odds_chosen": 7.7974348068237305, + "log_odds_ratio": -0.004169912077486515, + "logits/chosen": -0.5809128284454346, + "logits/rejected": -0.5881280303001404, + "logps/chosen": -0.007793866563588381, + "logps/rejected": -1.1953469514846802, + "loss": 2.6271, + "nll_loss": 0.6563675999641418, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007793866680003703, + "rewards/margins": 0.11875531077384949, + "rewards/rejected": -0.1195346936583519, + "step": 2920 + }, + { + "epoch": 2.020055325034578, + "grad_norm": 8.577593803405762, + "learning_rate": 4.433302597203012e-05, + "log_odds_chosen": 6.973368167877197, + "log_odds_ratio": -0.0022561801597476006, + "logits/chosen": -0.49948495626449585, + "logits/rejected": -0.5469887852668762, + "logps/chosen": -0.00309023167937994, + "logps/rejected": -0.9119285345077515, + "loss": 2.5148, + "nll_loss": 0.6284716129302979, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003090231621172279, + "rewards/margins": 0.09088382124900818, + "rewards/rejected": -0.09119285643100739, + "step": 2921 + }, + { + "epoch": 2.020746887966805, + "grad_norm": 7.7070722579956055, + "learning_rate": 4.432918395573997e-05, + "log_odds_chosen": 5.305390357971191, + "log_odds_ratio": -0.12080781906843185, + "logits/chosen": -0.8089467883110046, + "logits/rejected": -0.8239259719848633, + "logps/chosen": -0.02970268949866295, + "logps/rejected": -0.7682185769081116, + "loss": 2.6354, + "nll_loss": 0.6467616558074951, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0029702691826969385, + "rewards/margins": 0.07385159283876419, + "rewards/rejected": -0.07682186365127563, + "step": 2922 + }, + { + "epoch": 2.0214384508990317, + "grad_norm": 9.248912811279297, + "learning_rate": 4.4325341939449825e-05, + "log_odds_chosen": 4.687819004058838, + "log_odds_ratio": -0.03429641202092171, + "logits/chosen": -0.49940192699432373, + "logits/rejected": -0.5568559169769287, + "logps/chosen": -0.07113655656576157, + "logps/rejected": -0.9501205086708069, + "loss": 2.7174, + "nll_loss": 0.6759278178215027, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007113655563443899, + "rewards/margins": 0.087898388504982, + "rewards/rejected": -0.09501205384731293, + "step": 2923 + }, + { + "epoch": 2.0221300138312586, + "grad_norm": 11.791963577270508, + "learning_rate": 4.432149992315968e-05, + "log_odds_chosen": 3.852811098098755, + "log_odds_ratio": -0.19490672647953033, + "logits/chosen": -0.4790309965610504, + "logits/rejected": -0.5375632643699646, + "logps/chosen": -0.18811428546905518, + "logps/rejected": -0.8418582081794739, + "loss": 3.0405, + "nll_loss": 0.7406342625617981, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.018811428919434547, + "rewards/margins": 0.06537439674139023, + "rewards/rejected": -0.08418582379817963, + "step": 2924 + }, + { + "epoch": 2.0228215767634854, + "grad_norm": 7.580800533294678, + "learning_rate": 4.431765790686952e-05, + "log_odds_chosen": 5.117754936218262, + "log_odds_ratio": -0.01622222363948822, + "logits/chosen": -0.7211370468139648, + "logits/rejected": -0.7603697776794434, + "logps/chosen": -0.018140438944101334, + "logps/rejected": -0.9220280051231384, + "loss": 2.7227, + "nll_loss": 0.6790465712547302, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018140438478440046, + "rewards/margins": 0.09038875252008438, + "rewards/rejected": -0.0922027975320816, + "step": 2925 + }, + { + "epoch": 2.0235131396957122, + "grad_norm": 7.705148220062256, + "learning_rate": 4.431381589057938e-05, + "log_odds_chosen": 6.213085651397705, + "log_odds_ratio": -0.033481746912002563, + "logits/chosen": -0.9892230033874512, + "logits/rejected": -1.0106509923934937, + "logps/chosen": -0.03902255743741989, + "logps/rejected": -1.2663230895996094, + "loss": 3.1659, + "nll_loss": 0.7881351113319397, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003902255790308118, + "rewards/margins": 0.1227300614118576, + "rewards/rejected": -0.12663230299949646, + "step": 2926 + }, + { + "epoch": 2.024204702627939, + "grad_norm": 6.366061210632324, + "learning_rate": 4.430997387428923e-05, + "log_odds_chosen": 7.32904577255249, + "log_odds_ratio": -0.006772264838218689, + "logits/chosen": -0.19414487481117249, + "logits/rejected": -0.20445206761360168, + "logps/chosen": -0.004846815951168537, + "logps/rejected": -1.157535195350647, + "loss": 3.0902, + "nll_loss": 0.771882176399231, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00048468157183378935, + "rewards/margins": 0.11526884138584137, + "rewards/rejected": -0.11575351655483246, + "step": 2927 + }, + { + "epoch": 2.024896265560166, + "grad_norm": 6.209042549133301, + "learning_rate": 4.430613185799908e-05, + "log_odds_chosen": 5.014216899871826, + "log_odds_ratio": -0.05102992802858353, + "logits/chosen": -0.8243149518966675, + "logits/rejected": -0.8707647323608398, + "logps/chosen": -0.04010523855686188, + "logps/rejected": -0.9312438368797302, + "loss": 2.5693, + "nll_loss": 0.6372247934341431, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004010523669421673, + "rewards/margins": 0.08911386132240295, + "rewards/rejected": -0.0931243821978569, + "step": 2928 + }, + { + "epoch": 2.0255878284923927, + "grad_norm": 8.702845573425293, + "learning_rate": 4.430228984170893e-05, + "log_odds_chosen": 5.299887180328369, + "log_odds_ratio": -0.1380869746208191, + "logits/chosen": -0.4525139331817627, + "logits/rejected": -0.48863327503204346, + "logps/chosen": -0.07026822865009308, + "logps/rejected": -1.1539911031723022, + "loss": 2.3965, + "nll_loss": 0.5853177309036255, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00702682277187705, + "rewards/margins": 0.1083722934126854, + "rewards/rejected": -0.11539912223815918, + "step": 2929 + }, + { + "epoch": 2.0262793914246195, + "grad_norm": 6.655099868774414, + "learning_rate": 4.429844782541878e-05, + "log_odds_chosen": 6.435043811798096, + "log_odds_ratio": -0.039562076330184937, + "logits/chosen": -0.563301146030426, + "logits/rejected": -0.6276003122329712, + "logps/chosen": -0.03223853558301926, + "logps/rejected": -1.3018076419830322, + "loss": 2.116, + "nll_loss": 0.5250539183616638, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0032238538842648268, + "rewards/margins": 0.12695690989494324, + "rewards/rejected": -0.13018076121807098, + "step": 2930 + }, + { + "epoch": 2.0269709543568464, + "grad_norm": 8.373746871948242, + "learning_rate": 4.429460580912863e-05, + "log_odds_chosen": 6.052464485168457, + "log_odds_ratio": -0.06993289291858673, + "logits/chosen": -0.48515585064888, + "logits/rejected": -0.5455655455589294, + "logps/chosen": -0.03491077572107315, + "logps/rejected": -1.339547038078308, + "loss": 2.9546, + "nll_loss": 0.7316581010818481, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003491077572107315, + "rewards/margins": 0.1304636299610138, + "rewards/rejected": -0.1339547038078308, + "step": 2931 + }, + { + "epoch": 2.027662517289073, + "grad_norm": 4.205761432647705, + "learning_rate": 4.429076379283848e-05, + "log_odds_chosen": 6.216808319091797, + "log_odds_ratio": -0.05835818499326706, + "logits/chosen": -0.7330908179283142, + "logits/rejected": -0.7898491024971008, + "logps/chosen": -0.030182205140590668, + "logps/rejected": -1.4196422100067139, + "loss": 3.3178, + "nll_loss": 0.8236026763916016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003018220653757453, + "rewards/margins": 0.13894601166248322, + "rewards/rejected": -0.14196424186229706, + "step": 2932 + }, + { + "epoch": 2.0283540802213, + "grad_norm": 7.031354904174805, + "learning_rate": 4.4286921776548336e-05, + "log_odds_chosen": 6.888822555541992, + "log_odds_ratio": -0.05103730410337448, + "logits/chosen": -0.4072624146938324, + "logits/rejected": -0.41956576704978943, + "logps/chosen": -0.011635039933025837, + "logps/rejected": -0.923719584941864, + "loss": 2.3398, + "nll_loss": 0.579850435256958, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011635040864348412, + "rewards/margins": 0.09120845049619675, + "rewards/rejected": -0.09237195551395416, + "step": 2933 + }, + { + "epoch": 2.029045643153527, + "grad_norm": 8.670273780822754, + "learning_rate": 4.428307976025818e-05, + "log_odds_chosen": 8.007255554199219, + "log_odds_ratio": -0.004472412634640932, + "logits/chosen": -0.5988377332687378, + "logits/rejected": -0.6525205969810486, + "logps/chosen": -0.0038377277087420225, + "logps/rejected": -0.9639263153076172, + "loss": 2.4431, + "nll_loss": 0.6103239059448242, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00038377277087420225, + "rewards/margins": 0.09600885212421417, + "rewards/rejected": -0.09639262408018112, + "step": 2934 + }, + { + "epoch": 2.0297372060857537, + "grad_norm": 5.9609551429748535, + "learning_rate": 4.427923774396804e-05, + "log_odds_chosen": 5.167479515075684, + "log_odds_ratio": -0.0625988095998764, + "logits/chosen": -0.29511865973472595, + "logits/rejected": -0.3535918891429901, + "logps/chosen": -0.023435872048139572, + "logps/rejected": -0.6343368291854858, + "loss": 2.2397, + "nll_loss": 0.5536573529243469, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0023435871116816998, + "rewards/margins": 0.061090096831321716, + "rewards/rejected": -0.0634336918592453, + "step": 2935 + }, + { + "epoch": 2.0304287690179805, + "grad_norm": 12.973045349121094, + "learning_rate": 4.4275395727677886e-05, + "log_odds_chosen": 5.455376148223877, + "log_odds_ratio": -0.13886310160160065, + "logits/chosen": -0.41711336374282837, + "logits/rejected": -0.4392683207988739, + "logps/chosen": -0.013393568806350231, + "logps/rejected": -0.8761259317398071, + "loss": 3.4505, + "nll_loss": 0.8487340211868286, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0013393567642197013, + "rewards/margins": 0.08627323806285858, + "rewards/rejected": -0.0876125916838646, + "step": 2936 + }, + { + "epoch": 2.0311203319502074, + "grad_norm": 5.99356746673584, + "learning_rate": 4.427155371138774e-05, + "log_odds_chosen": 5.800067901611328, + "log_odds_ratio": -0.02246464043855667, + "logits/chosen": -0.39779436588287354, + "logits/rejected": -0.4212380051612854, + "logps/chosen": -0.03113883174955845, + "logps/rejected": -0.7767848372459412, + "loss": 2.3202, + "nll_loss": 0.5778111815452576, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0031138835474848747, + "rewards/margins": 0.0745646059513092, + "rewards/rejected": -0.07767849415540695, + "step": 2937 + }, + { + "epoch": 2.031811894882434, + "grad_norm": 8.833569526672363, + "learning_rate": 4.426771169509759e-05, + "log_odds_chosen": 8.39259147644043, + "log_odds_ratio": -0.00498524634167552, + "logits/chosen": -0.5085591673851013, + "logits/rejected": -0.6033196449279785, + "logps/chosen": -0.0022648456506431103, + "logps/rejected": -1.2173972129821777, + "loss": 3.0348, + "nll_loss": 0.7582062482833862, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00022648456797469407, + "rewards/margins": 0.1215132474899292, + "rewards/rejected": -0.12173973768949509, + "step": 2938 + }, + { + "epoch": 2.032503457814661, + "grad_norm": 6.527597904205322, + "learning_rate": 4.4263869678807444e-05, + "log_odds_chosen": 6.323877334594727, + "log_odds_ratio": -0.012947482988238335, + "logits/chosen": -0.5131242871284485, + "logits/rejected": -0.5534065961837769, + "logps/chosen": -0.02248767577111721, + "logps/rejected": -0.9359084367752075, + "loss": 3.1013, + "nll_loss": 0.7740212082862854, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002248767763376236, + "rewards/margins": 0.09134207665920258, + "rewards/rejected": -0.09359084069728851, + "step": 2939 + }, + { + "epoch": 2.033195020746888, + "grad_norm": 5.964481353759766, + "learning_rate": 4.426002766251729e-05, + "log_odds_chosen": 6.230679512023926, + "log_odds_ratio": -0.0629938542842865, + "logits/chosen": -0.7436071634292603, + "logits/rejected": -0.7987266778945923, + "logps/chosen": -0.02044205367565155, + "logps/rejected": -0.8582735061645508, + "loss": 2.7018, + "nll_loss": 0.6691381931304932, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020442053209990263, + "rewards/margins": 0.08378314971923828, + "rewards/rejected": -0.08582735806703568, + "step": 2940 + }, + { + "epoch": 2.0338865836791147, + "grad_norm": 8.997940063476562, + "learning_rate": 4.425618564622714e-05, + "log_odds_chosen": 6.974672794342041, + "log_odds_ratio": -0.006081899628043175, + "logits/chosen": -0.6376204490661621, + "logits/rejected": -0.6679280400276184, + "logps/chosen": -0.03785526379942894, + "logps/rejected": -1.6801406145095825, + "loss": 2.733, + "nll_loss": 0.6826443672180176, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003785526379942894, + "rewards/margins": 0.16422852873802185, + "rewards/rejected": -0.16801407933235168, + "step": 2941 + }, + { + "epoch": 2.0345781466113415, + "grad_norm": 9.944624900817871, + "learning_rate": 4.4252343629936994e-05, + "log_odds_chosen": 7.224353790283203, + "log_odds_ratio": -0.00259740324690938, + "logits/chosen": -0.7675692439079285, + "logits/rejected": -0.840691089630127, + "logps/chosen": -0.001974244136363268, + "logps/rejected": -1.0744328498840332, + "loss": 3.8919, + "nll_loss": 0.9727070331573486, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019742443691939116, + "rewards/margins": 0.10724586248397827, + "rewards/rejected": -0.10744328051805496, + "step": 2942 + }, + { + "epoch": 2.0352697095435683, + "grad_norm": 8.882083892822266, + "learning_rate": 4.424850161364684e-05, + "log_odds_chosen": 6.137880325317383, + "log_odds_ratio": -0.009068233892321587, + "logits/chosen": -0.5883402228355408, + "logits/rejected": -0.5579231381416321, + "logps/chosen": -0.05238615721464157, + "logps/rejected": -1.8226776123046875, + "loss": 2.9676, + "nll_loss": 0.741002082824707, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005238616373389959, + "rewards/margins": 0.17702913284301758, + "rewards/rejected": -0.18226775527000427, + "step": 2943 + }, + { + "epoch": 2.035961272475795, + "grad_norm": 7.656317710876465, + "learning_rate": 4.42446595973567e-05, + "log_odds_chosen": 7.767665863037109, + "log_odds_ratio": -0.05336516350507736, + "logits/chosen": -0.5125993490219116, + "logits/rejected": -0.5610659718513489, + "logps/chosen": -0.005388497840613127, + "logps/rejected": -1.1172276735305786, + "loss": 2.6554, + "nll_loss": 0.6585062742233276, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005388497374951839, + "rewards/margins": 0.11118391901254654, + "rewards/rejected": -0.11172277480363846, + "step": 2944 + }, + { + "epoch": 2.036652835408022, + "grad_norm": 5.670093536376953, + "learning_rate": 4.4240817581066545e-05, + "log_odds_chosen": 5.91354513168335, + "log_odds_ratio": -0.0248207226395607, + "logits/chosen": -0.49000585079193115, + "logits/rejected": -0.515639066696167, + "logps/chosen": -0.012547427788376808, + "logps/rejected": -0.6958021521568298, + "loss": 2.2802, + "nll_loss": 0.5675714015960693, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001254742732271552, + "rewards/margins": 0.068325474858284, + "rewards/rejected": -0.06958021968603134, + "step": 2945 + }, + { + "epoch": 2.037344398340249, + "grad_norm": 7.737858772277832, + "learning_rate": 4.42369755647764e-05, + "log_odds_chosen": 7.393893241882324, + "log_odds_ratio": -0.018279001116752625, + "logits/chosen": -0.422544002532959, + "logits/rejected": -0.47324442863464355, + "logps/chosen": -0.011747865006327629, + "logps/rejected": -1.5627875328063965, + "loss": 2.8876, + "nll_loss": 0.7200790643692017, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011747865937650204, + "rewards/margins": 0.15510396659374237, + "rewards/rejected": -0.15627875924110413, + "step": 2946 + }, + { + "epoch": 2.0380359612724757, + "grad_norm": 9.531057357788086, + "learning_rate": 4.423313354848625e-05, + "log_odds_chosen": 6.561200141906738, + "log_odds_ratio": -0.04572358354926109, + "logits/chosen": -0.40563100576400757, + "logits/rejected": -0.4432332515716553, + "logps/chosen": -0.023076066747307777, + "logps/rejected": -0.9452800750732422, + "loss": 3.9257, + "nll_loss": 0.976864218711853, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0023076068609952927, + "rewards/margins": 0.09222040325403214, + "rewards/rejected": -0.09452801197767258, + "step": 2947 + }, + { + "epoch": 2.0387275242047025, + "grad_norm": 7.236406326293945, + "learning_rate": 4.42292915321961e-05, + "log_odds_chosen": 7.733157157897949, + "log_odds_ratio": -0.0020131170749664307, + "logits/chosen": -0.5212618708610535, + "logits/rejected": -0.511024534702301, + "logps/chosen": -0.001999392407014966, + "logps/rejected": -0.9011251926422119, + "loss": 1.9601, + "nll_loss": 0.4898112416267395, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001999392407014966, + "rewards/margins": 0.08991258591413498, + "rewards/rejected": -0.09011252224445343, + "step": 2948 + }, + { + "epoch": 2.0394190871369293, + "grad_norm": 7.766987323760986, + "learning_rate": 4.422544951590595e-05, + "log_odds_chosen": 8.511279106140137, + "log_odds_ratio": -0.02912795916199684, + "logits/chosen": -0.27267301082611084, + "logits/rejected": -0.3947067856788635, + "logps/chosen": -0.012451526708900928, + "logps/rejected": -2.0573956966400146, + "loss": 1.7155, + "nll_loss": 0.42596328258514404, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012451526708900928, + "rewards/margins": 0.2044944167137146, + "rewards/rejected": -0.2057395577430725, + "step": 2949 + }, + { + "epoch": 2.040110650069156, + "grad_norm": 8.35403060913086, + "learning_rate": 4.42216074996158e-05, + "log_odds_chosen": 5.819576263427734, + "log_odds_ratio": -0.15582507848739624, + "logits/chosen": -0.5156236886978149, + "logits/rejected": -0.5940700173377991, + "logps/chosen": -0.045006606727838516, + "logps/rejected": -1.1011898517608643, + "loss": 2.0249, + "nll_loss": 0.4906438887119293, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004500660579651594, + "rewards/margins": 0.10561832785606384, + "rewards/rejected": -0.11011898517608643, + "step": 2950 + }, + { + "epoch": 2.040802213001383, + "grad_norm": 4.178206443786621, + "learning_rate": 4.421776548332565e-05, + "log_odds_chosen": 5.757562637329102, + "log_odds_ratio": -0.013421890325844288, + "logits/chosen": -0.43508225679397583, + "logits/rejected": -0.4227558374404907, + "logps/chosen": -0.050725605338811874, + "logps/rejected": -1.2691094875335693, + "loss": 1.6484, + "nll_loss": 0.4107661843299866, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005072561092674732, + "rewards/margins": 0.12183839082717896, + "rewards/rejected": -0.1269109547138214, + "step": 2951 + }, + { + "epoch": 2.04149377593361, + "grad_norm": 8.70975112915039, + "learning_rate": 4.42139234670355e-05, + "log_odds_chosen": 8.446166038513184, + "log_odds_ratio": -0.0025777772534638643, + "logits/chosen": -0.6618889570236206, + "logits/rejected": -0.7927266359329224, + "logps/chosen": -0.0014678852166980505, + "logps/rejected": -1.5695796012878418, + "loss": 2.0736, + "nll_loss": 0.5181523561477661, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014678851584903896, + "rewards/margins": 0.1568111628293991, + "rewards/rejected": -0.1569579690694809, + "step": 2952 + }, + { + "epoch": 2.0421853388658366, + "grad_norm": 9.313553810119629, + "learning_rate": 4.421008145074536e-05, + "log_odds_chosen": 7.796962261199951, + "log_odds_ratio": -0.0014820595970377326, + "logits/chosen": -0.8346244096755981, + "logits/rejected": -0.8700048923492432, + "logps/chosen": -0.0053040627390146255, + "logps/rejected": -1.5605573654174805, + "loss": 3.1736, + "nll_loss": 0.7932461500167847, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005304062506183982, + "rewards/margins": 0.1555253267288208, + "rewards/rejected": -0.1560557335615158, + "step": 2953 + }, + { + "epoch": 2.0428769017980635, + "grad_norm": 9.841303825378418, + "learning_rate": 4.42062394344552e-05, + "log_odds_chosen": 9.34194564819336, + "log_odds_ratio": -0.0003495306591503322, + "logits/chosen": -0.23278909921646118, + "logits/rejected": -0.24929757416248322, + "logps/chosen": -0.0005623494507744908, + "logps/rejected": -1.7412278652191162, + "loss": 2.8978, + "nll_loss": 0.7244049310684204, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6234941439470276e-05, + "rewards/margins": 0.17406655848026276, + "rewards/rejected": -0.17412279546260834, + "step": 2954 + }, + { + "epoch": 2.0435684647302903, + "grad_norm": 5.997377395629883, + "learning_rate": 4.4202397418165056e-05, + "log_odds_chosen": 6.477039337158203, + "log_odds_ratio": -0.025202833116054535, + "logits/chosen": -0.9479755163192749, + "logits/rejected": -0.9774594306945801, + "logps/chosen": -0.044182758778333664, + "logps/rejected": -1.0433984994888306, + "loss": 2.3557, + "nll_loss": 0.5864126682281494, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004418275784701109, + "rewards/margins": 0.0999215766787529, + "rewards/rejected": -0.1043398529291153, + "step": 2955 + }, + { + "epoch": 2.044260027662517, + "grad_norm": 7.110849857330322, + "learning_rate": 4.419855540187491e-05, + "log_odds_chosen": 6.030847072601318, + "log_odds_ratio": -0.037585724145174026, + "logits/chosen": -0.6416101455688477, + "logits/rejected": -0.6577980518341064, + "logps/chosen": -0.06431546062231064, + "logps/rejected": -1.2889031171798706, + "loss": 3.1393, + "nll_loss": 0.7810727953910828, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0064315455965697765, + "rewards/margins": 0.12245876342058182, + "rewards/rejected": -0.12889030575752258, + "step": 2956 + }, + { + "epoch": 2.044951590594744, + "grad_norm": 5.512500286102295, + "learning_rate": 4.419471338558476e-05, + "log_odds_chosen": 7.396589756011963, + "log_odds_ratio": -0.013286417350172997, + "logits/chosen": -0.47500455379486084, + "logits/rejected": -0.5753237009048462, + "logps/chosen": -0.04434991627931595, + "logps/rejected": -1.3787543773651123, + "loss": 2.2678, + "nll_loss": 0.5656318068504333, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004434991627931595, + "rewards/margins": 0.13344044983386993, + "rewards/rejected": -0.13787545263767242, + "step": 2957 + }, + { + "epoch": 2.045643153526971, + "grad_norm": 5.134779930114746, + "learning_rate": 4.4190871369294606e-05, + "log_odds_chosen": 6.285913944244385, + "log_odds_ratio": -0.14252759516239166, + "logits/chosen": -0.45119357109069824, + "logits/rejected": -0.5126819014549255, + "logps/chosen": -0.03729405999183655, + "logps/rejected": -1.1003504991531372, + "loss": 1.8283, + "nll_loss": 0.44282904267311096, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003729406511411071, + "rewards/margins": 0.10630562901496887, + "rewards/rejected": -0.11003503203392029, + "step": 2958 + }, + { + "epoch": 2.0463347164591976, + "grad_norm": 7.935320854187012, + "learning_rate": 4.418702935300446e-05, + "log_odds_chosen": 7.760144233703613, + "log_odds_ratio": -0.005842843558639288, + "logits/chosen": -0.8411372900009155, + "logits/rejected": -0.9680566191673279, + "logps/chosen": -0.017029302194714546, + "logps/rejected": -1.6215450763702393, + "loss": 3.1712, + "nll_loss": 0.7922165393829346, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017029301961883903, + "rewards/margins": 0.16045159101486206, + "rewards/rejected": -0.16215452551841736, + "step": 2959 + }, + { + "epoch": 2.0470262793914245, + "grad_norm": 6.970455646514893, + "learning_rate": 4.418318733671431e-05, + "log_odds_chosen": 6.338698387145996, + "log_odds_ratio": -0.03856229782104492, + "logits/chosen": -0.4592578709125519, + "logits/rejected": -0.5230832099914551, + "logps/chosen": -0.02689545601606369, + "logps/rejected": -0.9274243116378784, + "loss": 2.8688, + "nll_loss": 0.713355541229248, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0026895455084741116, + "rewards/margins": 0.09005288779735565, + "rewards/rejected": -0.0927424281835556, + "step": 2960 + }, + { + "epoch": 2.0477178423236513, + "grad_norm": 6.369350910186768, + "learning_rate": 4.417934532042416e-05, + "log_odds_chosen": 7.189737796783447, + "log_odds_ratio": -0.12329629063606262, + "logits/chosen": -0.6698348522186279, + "logits/rejected": -0.7852696180343628, + "logps/chosen": -0.028546592220664024, + "logps/rejected": -1.4175446033477783, + "loss": 1.6068, + "nll_loss": 0.38936716318130493, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0028546589892357588, + "rewards/margins": 0.1388998031616211, + "rewards/rejected": -0.14175444841384888, + "step": 2961 + }, + { + "epoch": 2.048409405255878, + "grad_norm": 8.8323392868042, + "learning_rate": 4.4175503304134016e-05, + "log_odds_chosen": 4.420435905456543, + "log_odds_ratio": -0.3130551874637604, + "logits/chosen": -0.4343242347240448, + "logits/rejected": -0.4906140863895416, + "logps/chosen": -0.07207857817411423, + "logps/rejected": -0.7014517784118652, + "loss": 2.3546, + "nll_loss": 0.5573325753211975, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0072078583762049675, + "rewards/margins": 0.06293731927871704, + "rewards/rejected": -0.07014517486095428, + "step": 2962 + }, + { + "epoch": 2.049100968188105, + "grad_norm": 7.881039142608643, + "learning_rate": 4.417166128784386e-05, + "log_odds_chosen": 7.461102485656738, + "log_odds_ratio": -0.05037471279501915, + "logits/chosen": -0.6518298387527466, + "logits/rejected": -0.6640526652336121, + "logps/chosen": -0.015996096655726433, + "logps/rejected": -1.207945466041565, + "loss": 3.1082, + "nll_loss": 0.7720248103141785, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015996096190065145, + "rewards/margins": 0.11919493228197098, + "rewards/rejected": -0.12079453468322754, + "step": 2963 + }, + { + "epoch": 2.0497925311203318, + "grad_norm": 8.567526817321777, + "learning_rate": 4.4167819271553714e-05, + "log_odds_chosen": 7.342993259429932, + "log_odds_ratio": -0.04448270797729492, + "logits/chosen": -0.610135555267334, + "logits/rejected": -0.6820641160011292, + "logps/chosen": -0.10857971012592316, + "logps/rejected": -1.3978261947631836, + "loss": 2.6709, + "nll_loss": 0.6632830500602722, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01085797231644392, + "rewards/margins": 0.1289246529340744, + "rewards/rejected": -0.1397826224565506, + "step": 2964 + }, + { + "epoch": 2.0504840940525586, + "grad_norm": 11.050207138061523, + "learning_rate": 4.4163977255263567e-05, + "log_odds_chosen": 5.7598795890808105, + "log_odds_ratio": -0.3220196068286896, + "logits/chosen": -0.7325015068054199, + "logits/rejected": -0.8497984409332275, + "logps/chosen": -0.12142018973827362, + "logps/rejected": -1.4437401294708252, + "loss": 3.1083, + "nll_loss": 0.744879424571991, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.012142017483711243, + "rewards/margins": 0.13223199546337128, + "rewards/rejected": -0.14437401294708252, + "step": 2965 + }, + { + "epoch": 2.0511756569847854, + "grad_norm": 6.123977184295654, + "learning_rate": 4.416013523897342e-05, + "log_odds_chosen": 8.347021102905273, + "log_odds_ratio": -0.0005506742745637894, + "logits/chosen": -0.625741720199585, + "logits/rejected": -0.6533139944076538, + "logps/chosen": -0.0005300822667777538, + "logps/rejected": -0.7603899240493774, + "loss": 3.1846, + "nll_loss": 0.7960931062698364, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.300822886056267e-05, + "rewards/margins": 0.07598598301410675, + "rewards/rejected": -0.07603899389505386, + "step": 2966 + }, + { + "epoch": 2.0518672199170123, + "grad_norm": 7.458584308624268, + "learning_rate": 4.4156293222683265e-05, + "log_odds_chosen": 5.091979026794434, + "log_odds_ratio": -0.9392639398574829, + "logits/chosen": -0.37719377875328064, + "logits/rejected": -0.4038720428943634, + "logps/chosen": -0.17346209287643433, + "logps/rejected": -0.9408200979232788, + "loss": 2.9051, + "nll_loss": 0.6323404908180237, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.017346208915114403, + "rewards/margins": 0.07673580199480057, + "rewards/rejected": -0.09408202022314072, + "step": 2967 + }, + { + "epoch": 2.052558782849239, + "grad_norm": 5.1775665283203125, + "learning_rate": 4.415245120639312e-05, + "log_odds_chosen": 7.053496837615967, + "log_odds_ratio": -0.07024918496608734, + "logits/chosen": -0.5811555981636047, + "logits/rejected": -0.5938743948936462, + "logps/chosen": -0.015003536827862263, + "logps/rejected": -0.9554708003997803, + "loss": 2.0713, + "nll_loss": 0.5108081698417664, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015003536827862263, + "rewards/margins": 0.09404672682285309, + "rewards/rejected": -0.09554708003997803, + "step": 2968 + }, + { + "epoch": 2.053250345781466, + "grad_norm": 12.167163848876953, + "learning_rate": 4.414860919010297e-05, + "log_odds_chosen": 6.978175640106201, + "log_odds_ratio": -0.0033219067845493555, + "logits/chosen": -0.8989130258560181, + "logits/rejected": -0.8963654041290283, + "logps/chosen": -0.0031070064287632704, + "logps/rejected": -0.9147448539733887, + "loss": 3.3015, + "nll_loss": 0.8250552415847778, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003107006778009236, + "rewards/margins": 0.09116378426551819, + "rewards/rejected": -0.0914744883775711, + "step": 2969 + }, + { + "epoch": 2.0539419087136928, + "grad_norm": 6.647165298461914, + "learning_rate": 4.4144767173812815e-05, + "log_odds_chosen": 8.044532775878906, + "log_odds_ratio": -0.02352350763976574, + "logits/chosen": -0.7308659553527832, + "logits/rejected": -0.7580546140670776, + "logps/chosen": -0.008968767710030079, + "logps/rejected": -1.5896055698394775, + "loss": 2.3966, + "nll_loss": 0.5967886447906494, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008968767942860723, + "rewards/margins": 0.15806369483470917, + "rewards/rejected": -0.15896056592464447, + "step": 2970 + }, + { + "epoch": 2.0546334716459196, + "grad_norm": 4.377399921417236, + "learning_rate": 4.4140925157522674e-05, + "log_odds_chosen": 8.596623420715332, + "log_odds_ratio": -0.0039055885281413794, + "logits/chosen": -0.5529035329818726, + "logits/rejected": -0.5938980579376221, + "logps/chosen": -0.01188119500875473, + "logps/rejected": -1.5860004425048828, + "loss": 1.7488, + "nll_loss": 0.43680238723754883, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011881195241585374, + "rewards/margins": 0.15741194784641266, + "rewards/rejected": -0.15860004723072052, + "step": 2971 + }, + { + "epoch": 2.0553250345781464, + "grad_norm": 5.296402454376221, + "learning_rate": 4.413708314123252e-05, + "log_odds_chosen": 7.072319030761719, + "log_odds_ratio": -0.008747157640755177, + "logits/chosen": -0.9367965459823608, + "logits/rejected": -0.9441543817520142, + "logps/chosen": -0.01353341992944479, + "logps/rejected": -1.0854418277740479, + "loss": 3.3855, + "nll_loss": 0.8455039858818054, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013533420860767365, + "rewards/margins": 0.10719083249568939, + "rewards/rejected": -0.10854417830705643, + "step": 2972 + }, + { + "epoch": 2.0560165975103732, + "grad_norm": 9.635480880737305, + "learning_rate": 4.413324112494237e-05, + "log_odds_chosen": 5.131409645080566, + "log_odds_ratio": -0.15523457527160645, + "logits/chosen": -0.8282453417778015, + "logits/rejected": -0.8770512938499451, + "logps/chosen": -0.06053111329674721, + "logps/rejected": -1.2064192295074463, + "loss": 2.7522, + "nll_loss": 0.6725161075592041, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006053111050277948, + "rewards/margins": 0.11458881199359894, + "rewards/rejected": -0.12064193189144135, + "step": 2973 + }, + { + "epoch": 2.0567081604426, + "grad_norm": 8.57661247253418, + "learning_rate": 4.4129399108652225e-05, + "log_odds_chosen": 4.66064453125, + "log_odds_ratio": -0.11506712436676025, + "logits/chosen": -0.6556863188743591, + "logits/rejected": -0.6985541582107544, + "logps/chosen": -0.03705421835184097, + "logps/rejected": -0.6838787794113159, + "loss": 2.855, + "nll_loss": 0.7022481560707092, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0037054221611469984, + "rewards/margins": 0.06468245387077332, + "rewards/rejected": -0.06838788092136383, + "step": 2974 + }, + { + "epoch": 2.057399723374827, + "grad_norm": 3.915243148803711, + "learning_rate": 4.412555709236208e-05, + "log_odds_chosen": 6.537201404571533, + "log_odds_ratio": -0.006676637101918459, + "logits/chosen": -0.4104037880897522, + "logits/rejected": -0.4930589199066162, + "logps/chosen": -0.036588139832019806, + "logps/rejected": -1.1748409271240234, + "loss": 1.939, + "nll_loss": 0.4840763211250305, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0036588143557310104, + "rewards/margins": 0.11382529139518738, + "rewards/rejected": -0.11748410016298294, + "step": 2975 + }, + { + "epoch": 2.0580912863070537, + "grad_norm": 4.104969501495361, + "learning_rate": 4.412171507607192e-05, + "log_odds_chosen": 7.7819671630859375, + "log_odds_ratio": -0.00965783093124628, + "logits/chosen": -0.36368995904922485, + "logits/rejected": -0.40193721652030945, + "logps/chosen": -0.008413492701947689, + "logps/rejected": -1.0480165481567383, + "loss": 1.8093, + "nll_loss": 0.45135873556137085, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008413493051193655, + "rewards/margins": 0.1039603054523468, + "rewards/rejected": -0.10480165481567383, + "step": 2976 + }, + { + "epoch": 2.0587828492392806, + "grad_norm": 5.845517635345459, + "learning_rate": 4.4117873059781775e-05, + "log_odds_chosen": 6.87288761138916, + "log_odds_ratio": -0.02248889021575451, + "logits/chosen": -0.6869525909423828, + "logits/rejected": -0.761170506477356, + "logps/chosen": -0.08527921140193939, + "logps/rejected": -1.6256699562072754, + "loss": 2.9241, + "nll_loss": 0.7287745475769043, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008527921512722969, + "rewards/margins": 0.15403907001018524, + "rewards/rejected": -0.16256700456142426, + "step": 2977 + }, + { + "epoch": 2.0594744121715074, + "grad_norm": 8.611862182617188, + "learning_rate": 4.411403104349163e-05, + "log_odds_chosen": 6.731040954589844, + "log_odds_ratio": -0.0147066880017519, + "logits/chosen": -0.5096957087516785, + "logits/rejected": -0.6066713929176331, + "logps/chosen": -0.025971077382564545, + "logps/rejected": -1.575836181640625, + "loss": 2.8274, + "nll_loss": 0.70537930727005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002597107784822583, + "rewards/margins": 0.15498653054237366, + "rewards/rejected": -0.15758362412452698, + "step": 2978 + }, + { + "epoch": 2.0601659751037342, + "grad_norm": 5.3427863121032715, + "learning_rate": 4.4110189027201474e-05, + "log_odds_chosen": 4.761931419372559, + "log_odds_ratio": -0.08862251043319702, + "logits/chosen": -0.3233840763568878, + "logits/rejected": -0.41529786586761475, + "logps/chosen": -0.052310310304164886, + "logps/rejected": -1.16804039478302, + "loss": 2.4812, + "nll_loss": 0.6114499568939209, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005231031216681004, + "rewards/margins": 0.11157301068305969, + "rewards/rejected": -0.11680404841899872, + "step": 2979 + }, + { + "epoch": 2.060857538035961, + "grad_norm": 4.2136125564575195, + "learning_rate": 4.410634701091133e-05, + "log_odds_chosen": 6.927209854125977, + "log_odds_ratio": -0.01120884157717228, + "logits/chosen": -0.4469285011291504, + "logits/rejected": -0.5098540782928467, + "logps/chosen": -0.0214360561221838, + "logps/rejected": -1.3919093608856201, + "loss": 2.3812, + "nll_loss": 0.5941844582557678, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00214360561221838, + "rewards/margins": 0.13704732060432434, + "rewards/rejected": -0.1391909271478653, + "step": 2980 + }, + { + "epoch": 2.061549100968188, + "grad_norm": 10.168533325195312, + "learning_rate": 4.410250499462118e-05, + "log_odds_chosen": 5.533295154571533, + "log_odds_ratio": -0.11421045660972595, + "logits/chosen": -0.6414197683334351, + "logits/rejected": -0.6973565816879272, + "logps/chosen": -0.030152834951877594, + "logps/rejected": -0.9524630308151245, + "loss": 3.0553, + "nll_loss": 0.7524070143699646, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0030152834951877594, + "rewards/margins": 0.09223102033138275, + "rewards/rejected": -0.09524630755186081, + "step": 2981 + }, + { + "epoch": 2.0622406639004147, + "grad_norm": 8.804760932922363, + "learning_rate": 4.409866297833103e-05, + "log_odds_chosen": 7.190434455871582, + "log_odds_ratio": -0.01756965182721615, + "logits/chosen": -0.7176415324211121, + "logits/rejected": -0.7547638416290283, + "logps/chosen": -0.02191227488219738, + "logps/rejected": -1.1347317695617676, + "loss": 2.602, + "nll_loss": 0.6487484574317932, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002191227627918124, + "rewards/margins": 0.11128196120262146, + "rewards/rejected": -0.11347319185733795, + "step": 2982 + }, + { + "epoch": 2.0629322268326415, + "grad_norm": 6.621882438659668, + "learning_rate": 4.409482096204088e-05, + "log_odds_chosen": 8.03929615020752, + "log_odds_ratio": -0.003591003129258752, + "logits/chosen": -0.6009180545806885, + "logits/rejected": -0.6958531737327576, + "logps/chosen": -0.003013760782778263, + "logps/rejected": -1.226391315460205, + "loss": 2.0684, + "nll_loss": 0.5167450904846191, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003013760724570602, + "rewards/margins": 0.12233775854110718, + "rewards/rejected": -0.12263913452625275, + "step": 2983 + }, + { + "epoch": 2.0636237897648684, + "grad_norm": 10.417600631713867, + "learning_rate": 4.4090978945750736e-05, + "log_odds_chosen": 6.877012729644775, + "log_odds_ratio": -0.10523069649934769, + "logits/chosen": -0.5266302227973938, + "logits/rejected": -0.6316937804222107, + "logps/chosen": -0.034719862043857574, + "logps/rejected": -1.2791069746017456, + "loss": 1.802, + "nll_loss": 0.439973920583725, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0034719863906502724, + "rewards/margins": 0.12443870306015015, + "rewards/rejected": -0.12791068851947784, + "step": 2984 + }, + { + "epoch": 2.064315352697095, + "grad_norm": 7.508418560028076, + "learning_rate": 4.408713692946058e-05, + "log_odds_chosen": 7.389313697814941, + "log_odds_ratio": -0.0030453759245574474, + "logits/chosen": -0.6497970223426819, + "logits/rejected": -0.7042725682258606, + "logps/chosen": -0.002656069817021489, + "logps/rejected": -1.0196506977081299, + "loss": 2.5648, + "nll_loss": 0.6408883929252625, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00026560697006061673, + "rewards/margins": 0.10169944912195206, + "rewards/rejected": -0.10196506232023239, + "step": 2985 + }, + { + "epoch": 2.0650069156293225, + "grad_norm": 9.812541007995605, + "learning_rate": 4.4083294913170434e-05, + "log_odds_chosen": 7.382270336151123, + "log_odds_ratio": -0.036466456949710846, + "logits/chosen": -0.7856600284576416, + "logits/rejected": -0.8382467031478882, + "logps/chosen": -0.020869005471467972, + "logps/rejected": -1.2076259851455688, + "loss": 3.1957, + "nll_loss": 0.7952706813812256, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020869006402790546, + "rewards/margins": 0.11867569386959076, + "rewards/rejected": -0.12076259404420853, + "step": 2986 + }, + { + "epoch": 2.0656984785615493, + "grad_norm": 5.368399143218994, + "learning_rate": 4.4079452896880286e-05, + "log_odds_chosen": 7.302699089050293, + "log_odds_ratio": -0.0016621847171336412, + "logits/chosen": -0.5824560523033142, + "logits/rejected": -0.632736086845398, + "logps/chosen": -0.0023292091209441423, + "logps/rejected": -1.1314865350723267, + "loss": 1.9341, + "nll_loss": 0.4833478331565857, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00023292092373594642, + "rewards/margins": 0.11291573196649551, + "rewards/rejected": -0.11314865946769714, + "step": 2987 + }, + { + "epoch": 2.066390041493776, + "grad_norm": 6.796201705932617, + "learning_rate": 4.407561088059013e-05, + "log_odds_chosen": 7.102804660797119, + "log_odds_ratio": -0.02306043915450573, + "logits/chosen": -0.41754207015037537, + "logits/rejected": -0.51899653673172, + "logps/chosen": -0.028687093406915665, + "logps/rejected": -1.1891157627105713, + "loss": 2.5866, + "nll_loss": 0.644343376159668, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002868709387257695, + "rewards/margins": 0.11604287475347519, + "rewards/rejected": -0.11891157925128937, + "step": 2988 + }, + { + "epoch": 2.067081604426003, + "grad_norm": 9.230788230895996, + "learning_rate": 4.407176886429999e-05, + "log_odds_chosen": 5.161102294921875, + "log_odds_ratio": -0.2189641296863556, + "logits/chosen": -0.5901562571525574, + "logits/rejected": -0.6268595457077026, + "logps/chosen": -0.10483792424201965, + "logps/rejected": -1.382750153541565, + "loss": 2.6292, + "nll_loss": 0.6354115605354309, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01048379298299551, + "rewards/margins": 0.12779122591018677, + "rewards/rejected": -0.13827502727508545, + "step": 2989 + }, + { + "epoch": 2.06777316735823, + "grad_norm": 4.151891708374023, + "learning_rate": 4.406792684800984e-05, + "log_odds_chosen": 5.546518325805664, + "log_odds_ratio": -0.1175304651260376, + "logits/chosen": -0.41287970542907715, + "logits/rejected": -0.4530215859413147, + "logps/chosen": -0.025290869176387787, + "logps/rejected": -0.804233193397522, + "loss": 2.3301, + "nll_loss": 0.5707738995552063, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002529087010771036, + "rewards/margins": 0.07789423316717148, + "rewards/rejected": -0.08042332530021667, + "step": 2990 + }, + { + "epoch": 2.0684647302904566, + "grad_norm": 7.8981757164001465, + "learning_rate": 4.406408483171969e-05, + "log_odds_chosen": 8.113343238830566, + "log_odds_ratio": -0.005146768409758806, + "logits/chosen": -0.7099666595458984, + "logits/rejected": -0.8495659232139587, + "logps/chosen": -0.024176517501473427, + "logps/rejected": -1.4046846628189087, + "loss": 2.4403, + "nll_loss": 0.609563946723938, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024176519364118576, + "rewards/margins": 0.13805082440376282, + "rewards/rejected": -0.14046847820281982, + "step": 2991 + }, + { + "epoch": 2.0691562932226835, + "grad_norm": 8.642290115356445, + "learning_rate": 4.406024281542954e-05, + "log_odds_chosen": 7.045675754547119, + "log_odds_ratio": -0.01596110127866268, + "logits/chosen": -0.534246027469635, + "logits/rejected": -0.5317660570144653, + "logps/chosen": -0.040739212185144424, + "logps/rejected": -1.5891478061676025, + "loss": 2.1967, + "nll_loss": 0.5475837588310242, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004073921591043472, + "rewards/margins": 0.1548408567905426, + "rewards/rejected": -0.15891478955745697, + "step": 2992 + }, + { + "epoch": 2.0698478561549103, + "grad_norm": 7.404383659362793, + "learning_rate": 4.4056400799139394e-05, + "log_odds_chosen": 5.8501434326171875, + "log_odds_ratio": -0.00894884578883648, + "logits/chosen": -0.43841439485549927, + "logits/rejected": -0.47529149055480957, + "logps/chosen": -0.015168728306889534, + "logps/rejected": -0.8849344849586487, + "loss": 2.0077, + "nll_loss": 0.5010417699813843, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015168729005381465, + "rewards/margins": 0.08697657287120819, + "rewards/rejected": -0.08849343657493591, + "step": 2993 + }, + { + "epoch": 2.070539419087137, + "grad_norm": 7.639882564544678, + "learning_rate": 4.405255878284924e-05, + "log_odds_chosen": 9.027555465698242, + "log_odds_ratio": -0.0001843296631705016, + "logits/chosen": -0.3901001811027527, + "logits/rejected": -0.3636050820350647, + "logps/chosen": -0.0006443070597015321, + "logps/rejected": -1.427654504776001, + "loss": 2.051, + "nll_loss": 0.5127426981925964, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.443070742534474e-05, + "rewards/margins": 0.14270102977752686, + "rewards/rejected": -0.14276546239852905, + "step": 2994 + }, + { + "epoch": 2.071230982019364, + "grad_norm": 14.118929862976074, + "learning_rate": 4.404871676655909e-05, + "log_odds_chosen": 6.0568623542785645, + "log_odds_ratio": -0.5541839599609375, + "logits/chosen": -0.5042297840118408, + "logits/rejected": -0.5497844219207764, + "logps/chosen": -0.07083805650472641, + "logps/rejected": -1.3127648830413818, + "loss": 3.5312, + "nll_loss": 0.8273938894271851, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007083804812282324, + "rewards/margins": 0.12419269233942032, + "rewards/rejected": -0.13127650320529938, + "step": 2995 + }, + { + "epoch": 2.071922544951591, + "grad_norm": 5.028932571411133, + "learning_rate": 4.4044874750268945e-05, + "log_odds_chosen": 6.680631637573242, + "log_odds_ratio": -0.13834092020988464, + "logits/chosen": -0.1440374255180359, + "logits/rejected": -0.12740664184093475, + "logps/chosen": -0.09567593038082123, + "logps/rejected": -1.1710020303726196, + "loss": 2.5045, + "nll_loss": 0.6122902035713196, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009567593224346638, + "rewards/margins": 0.10753262042999268, + "rewards/rejected": -0.11710020899772644, + "step": 2996 + }, + { + "epoch": 2.0726141078838176, + "grad_norm": 6.292712211608887, + "learning_rate": 4.404103273397879e-05, + "log_odds_chosen": 9.41784954071045, + "log_odds_ratio": -0.0002688511158339679, + "logits/chosen": -0.6236108541488647, + "logits/rejected": -0.7072266340255737, + "logps/chosen": -0.0007091419538483024, + "logps/rejected": -1.6054515838623047, + "loss": 2.2208, + "nll_loss": 0.5551624894142151, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.091420411597937e-05, + "rewards/margins": 0.1604742556810379, + "rewards/rejected": -0.16054517030715942, + "step": 2997 + }, + { + "epoch": 2.0733056708160444, + "grad_norm": 5.728390216827393, + "learning_rate": 4.403719071768865e-05, + "log_odds_chosen": 5.915194988250732, + "log_odds_ratio": -0.00786438025534153, + "logits/chosen": -0.6329678893089294, + "logits/rejected": -0.6185204982757568, + "logps/chosen": -0.010668737813830376, + "logps/rejected": -1.0165667533874512, + "loss": 2.9553, + "nll_loss": 0.7380290627479553, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010668738977983594, + "rewards/margins": 0.1005897969007492, + "rewards/rejected": -0.10165666788816452, + "step": 2998 + }, + { + "epoch": 2.0739972337482713, + "grad_norm": 8.11888599395752, + "learning_rate": 4.4033348701398495e-05, + "log_odds_chosen": 7.804757118225098, + "log_odds_ratio": -0.0029276611749082804, + "logits/chosen": -0.8999743461608887, + "logits/rejected": -0.8988440036773682, + "logps/chosen": -0.0017784256488084793, + "logps/rejected": -1.1857937574386597, + "loss": 2.9097, + "nll_loss": 0.727120578289032, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017784256488084793, + "rewards/margins": 0.11840153485536575, + "rewards/rejected": -0.11857938021421432, + "step": 2999 + }, + { + "epoch": 2.074688796680498, + "grad_norm": 7.151800155639648, + "learning_rate": 4.402950668510835e-05, + "log_odds_chosen": 4.465782165527344, + "log_odds_ratio": -0.09260845929384232, + "logits/chosen": -0.7498218417167664, + "logits/rejected": -0.7401602864265442, + "logps/chosen": -0.08501623570919037, + "logps/rejected": -1.4172019958496094, + "loss": 3.1935, + "nll_loss": 0.7891181111335754, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008501622825860977, + "rewards/margins": 0.13321858644485474, + "rewards/rejected": -0.14172020554542542, + "step": 3000 + }, + { + "epoch": 2.075380359612725, + "grad_norm": 6.480521202087402, + "learning_rate": 4.40256646688182e-05, + "log_odds_chosen": 7.112648010253906, + "log_odds_ratio": -0.03398454934358597, + "logits/chosen": -0.7977691888809204, + "logits/rejected": -0.8164126873016357, + "logps/chosen": -0.02818971686065197, + "logps/rejected": -1.5443800687789917, + "loss": 2.9939, + "nll_loss": 0.7450749278068542, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002818971872329712, + "rewards/margins": 0.1516190469264984, + "rewards/rejected": -0.15443801879882812, + "step": 3001 + }, + { + "epoch": 2.0760719225449518, + "grad_norm": 7.503549575805664, + "learning_rate": 4.402182265252805e-05, + "log_odds_chosen": 7.291573524475098, + "log_odds_ratio": -0.0050234016962349415, + "logits/chosen": -0.8397652506828308, + "logits/rejected": -0.9112769961357117, + "logps/chosen": -0.011641757562756538, + "logps/rejected": -1.3105417490005493, + "loss": 2.9127, + "nll_loss": 0.7276700735092163, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011641758028417826, + "rewards/margins": 0.12989000976085663, + "rewards/rejected": -0.13105419278144836, + "step": 3002 + }, + { + "epoch": 2.0767634854771786, + "grad_norm": 4.157247543334961, + "learning_rate": 4.40179806362379e-05, + "log_odds_chosen": 6.718463897705078, + "log_odds_ratio": -0.05722730606794357, + "logits/chosen": -0.07942191511392593, + "logits/rejected": -0.15350845456123352, + "logps/chosen": -0.0334065780043602, + "logps/rejected": -0.8967019319534302, + "loss": 2.0644, + "nll_loss": 0.5103872418403625, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00334065780043602, + "rewards/margins": 0.08632953464984894, + "rewards/rejected": -0.08967019617557526, + "step": 3003 + }, + { + "epoch": 2.0774550484094054, + "grad_norm": 5.905579090118408, + "learning_rate": 4.401413861994775e-05, + "log_odds_chosen": 5.6199469566345215, + "log_odds_ratio": -0.07426194846630096, + "logits/chosen": -0.4605045020580292, + "logits/rejected": -0.5288490653038025, + "logps/chosen": -0.03111214004456997, + "logps/rejected": -0.9659241437911987, + "loss": 1.8886, + "nll_loss": 0.46473127603530884, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0031112139113247395, + "rewards/margins": 0.09348119795322418, + "rewards/rejected": -0.09659241884946823, + "step": 3004 + }, + { + "epoch": 2.0781466113416323, + "grad_norm": 6.317102432250977, + "learning_rate": 4.40102966036576e-05, + "log_odds_chosen": 7.723196029663086, + "log_odds_ratio": -0.002818305743858218, + "logits/chosen": -0.5551360845565796, + "logits/rejected": -0.6236424446105957, + "logps/chosen": -0.0045334878377616405, + "logps/rejected": -1.4982926845550537, + "loss": 2.474, + "nll_loss": 0.6182283163070679, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004533488245215267, + "rewards/margins": 0.14937594532966614, + "rewards/rejected": -0.14982928335666656, + "step": 3005 + }, + { + "epoch": 2.078838174273859, + "grad_norm": 6.065270900726318, + "learning_rate": 4.400645458736745e-05, + "log_odds_chosen": 7.245368003845215, + "log_odds_ratio": -0.004873716738075018, + "logits/chosen": -0.5570109486579895, + "logits/rejected": -0.5704992413520813, + "logps/chosen": -0.0028182133100926876, + "logps/rejected": -0.9843683242797852, + "loss": 2.09, + "nll_loss": 0.5220032930374146, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002818213542923331, + "rewards/margins": 0.09815500676631927, + "rewards/rejected": -0.09843683242797852, + "step": 3006 + }, + { + "epoch": 2.079529737206086, + "grad_norm": 6.175274848937988, + "learning_rate": 4.40026125710773e-05, + "log_odds_chosen": 7.454465866088867, + "log_odds_ratio": -0.008208566345274448, + "logits/chosen": -0.45918309688568115, + "logits/rejected": -0.4840087890625, + "logps/chosen": -0.005108486860990524, + "logps/rejected": -1.2046600580215454, + "loss": 1.7959, + "nll_loss": 0.4481605887413025, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005108487675897777, + "rewards/margins": 0.1199551522731781, + "rewards/rejected": -0.12046600878238678, + "step": 3007 + }, + { + "epoch": 2.0802213001383127, + "grad_norm": 7.142673492431641, + "learning_rate": 4.3998770554787154e-05, + "log_odds_chosen": 4.702856540679932, + "log_odds_ratio": -0.34338873624801636, + "logits/chosen": -0.3096299171447754, + "logits/rejected": -0.3472004532814026, + "logps/chosen": -0.10073808580636978, + "logps/rejected": -1.0663481950759888, + "loss": 1.6838, + "nll_loss": 0.38661855459213257, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010073808953166008, + "rewards/margins": 0.0965610146522522, + "rewards/rejected": -0.10663482546806335, + "step": 3008 + }, + { + "epoch": 2.0809128630705396, + "grad_norm": 9.609098434448242, + "learning_rate": 4.3994928538497006e-05, + "log_odds_chosen": 7.715497016906738, + "log_odds_ratio": -0.003823342267423868, + "logits/chosen": -0.43299996852874756, + "logits/rejected": -0.49476325511932373, + "logps/chosen": -0.007047053426504135, + "logps/rejected": -1.4771684408187866, + "loss": 3.3901, + "nll_loss": 0.8471373319625854, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007047054241411388, + "rewards/margins": 0.1470121443271637, + "rewards/rejected": -0.14771683514118195, + "step": 3009 + }, + { + "epoch": 2.0816044260027664, + "grad_norm": 9.416679382324219, + "learning_rate": 4.399108652220685e-05, + "log_odds_chosen": 7.42830753326416, + "log_odds_ratio": -0.022011570632457733, + "logits/chosen": -0.3753679692745209, + "logits/rejected": -0.47189861536026, + "logps/chosen": -0.016750670969486237, + "logps/rejected": -0.9741970300674438, + "loss": 2.1602, + "nll_loss": 0.5378579497337341, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001675067120231688, + "rewards/margins": 0.09574463218450546, + "rewards/rejected": -0.09741970151662827, + "step": 3010 + }, + { + "epoch": 2.0822959889349932, + "grad_norm": 11.991842269897461, + "learning_rate": 4.398724450591671e-05, + "log_odds_chosen": 5.96860933303833, + "log_odds_ratio": -0.2523881793022156, + "logits/chosen": -0.1894284188747406, + "logits/rejected": -0.2235884815454483, + "logps/chosen": -0.036848284304142, + "logps/rejected": -0.9814808368682861, + "loss": 2.2428, + "nll_loss": 0.5354623198509216, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0036848282907158136, + "rewards/margins": 0.09446325898170471, + "rewards/rejected": -0.09814808517694473, + "step": 3011 + }, + { + "epoch": 2.08298755186722, + "grad_norm": 5.714529514312744, + "learning_rate": 4.398340248962656e-05, + "log_odds_chosen": 5.944969177246094, + "log_odds_ratio": -0.19645489752292633, + "logits/chosen": -0.07002492249011993, + "logits/rejected": -0.11378204077482224, + "logps/chosen": -0.06899984925985336, + "logps/rejected": -1.3910918235778809, + "loss": 2.6087, + "nll_loss": 0.632527768611908, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0068999845534563065, + "rewards/margins": 0.1322091966867447, + "rewards/rejected": -0.13910917937755585, + "step": 3012 + }, + { + "epoch": 2.083679114799447, + "grad_norm": 7.844751358032227, + "learning_rate": 4.397956047333641e-05, + "log_odds_chosen": 3.936131715774536, + "log_odds_ratio": -0.30492010712623596, + "logits/chosen": -0.40647411346435547, + "logits/rejected": -0.41465991735458374, + "logps/chosen": -0.0794655904173851, + "logps/rejected": -0.8100540637969971, + "loss": 3.0597, + "nll_loss": 0.7344415783882141, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007946559228003025, + "rewards/margins": 0.0730588510632515, + "rewards/rejected": -0.08100540935993195, + "step": 3013 + }, + { + "epoch": 2.0843706777316737, + "grad_norm": 8.143545150756836, + "learning_rate": 4.397571845704626e-05, + "log_odds_chosen": 6.678995132446289, + "log_odds_ratio": -0.020952634513378143, + "logits/chosen": -0.488328754901886, + "logits/rejected": -0.5505259037017822, + "logps/chosen": -0.03460100293159485, + "logps/rejected": -1.2816894054412842, + "loss": 2.2789, + "nll_loss": 0.5676225423812866, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0034601001534610987, + "rewards/margins": 0.12470885366201401, + "rewards/rejected": -0.1281689554452896, + "step": 3014 + }, + { + "epoch": 2.0850622406639006, + "grad_norm": 7.773388385772705, + "learning_rate": 4.397187644075611e-05, + "log_odds_chosen": 7.691577434539795, + "log_odds_ratio": -0.0022774143144488335, + "logits/chosen": -0.5185723304748535, + "logits/rejected": -0.5329963564872742, + "logps/chosen": -0.0031612124294042587, + "logps/rejected": -1.4423391819000244, + "loss": 2.1191, + "nll_loss": 0.529556393623352, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00031612126622349024, + "rewards/margins": 0.14391779899597168, + "rewards/rejected": -0.14423391222953796, + "step": 3015 + }, + { + "epoch": 2.0857538035961274, + "grad_norm": 6.006858825683594, + "learning_rate": 4.396803442446596e-05, + "log_odds_chosen": 7.957676887512207, + "log_odds_ratio": -0.04760212451219559, + "logits/chosen": 0.1509585678577423, + "logits/rejected": 0.08457720279693604, + "logps/chosen": -0.014640584588050842, + "logps/rejected": -1.7319622039794922, + "loss": 1.9171, + "nll_loss": 0.4745044708251953, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014640585286542773, + "rewards/margins": 0.17173215746879578, + "rewards/rejected": -0.1731962263584137, + "step": 3016 + }, + { + "epoch": 2.086445366528354, + "grad_norm": 84.41448211669922, + "learning_rate": 4.396419240817581e-05, + "log_odds_chosen": 8.378778457641602, + "log_odds_ratio": -0.29373475909233093, + "logits/chosen": -0.27526938915252686, + "logits/rejected": -0.3401549160480499, + "logps/chosen": -0.013904515653848648, + "logps/rejected": -1.756449818611145, + "loss": 3.8393, + "nll_loss": 0.9304454326629639, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.001390451448969543, + "rewards/margins": 0.17425453662872314, + "rewards/rejected": -0.17564497888088226, + "step": 3017 + }, + { + "epoch": 2.087136929460581, + "grad_norm": 8.896300315856934, + "learning_rate": 4.3960350391885665e-05, + "log_odds_chosen": 5.943868160247803, + "log_odds_ratio": -0.09057852625846863, + "logits/chosen": -0.29931822419166565, + "logits/rejected": -0.28687506914138794, + "logps/chosen": -0.018265876919031143, + "logps/rejected": -0.9328300952911377, + "loss": 2.5869, + "nll_loss": 0.6376628279685974, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018265878316015005, + "rewards/margins": 0.09145642817020416, + "rewards/rejected": -0.09328301250934601, + "step": 3018 + }, + { + "epoch": 2.087828492392808, + "grad_norm": 9.654539108276367, + "learning_rate": 4.395650837559551e-05, + "log_odds_chosen": 6.187833786010742, + "log_odds_ratio": -0.17915089428424835, + "logits/chosen": -0.13763611018657684, + "logits/rejected": -0.1429610550403595, + "logps/chosen": -0.10990156978368759, + "logps/rejected": -2.195013999938965, + "loss": 2.9133, + "nll_loss": 0.7104144096374512, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010990156792104244, + "rewards/margins": 0.20851124823093414, + "rewards/rejected": -0.21950142085552216, + "step": 3019 + }, + { + "epoch": 2.0885200553250347, + "grad_norm": 5.395124435424805, + "learning_rate": 4.395266635930537e-05, + "log_odds_chosen": 5.991332054138184, + "log_odds_ratio": -0.06997621804475784, + "logits/chosen": -0.45609647035598755, + "logits/rejected": -0.4964160919189453, + "logps/chosen": -0.03144243732094765, + "logps/rejected": -1.150253176689148, + "loss": 2.0452, + "nll_loss": 0.5043097734451294, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0031442439649254084, + "rewards/margins": 0.1118810772895813, + "rewards/rejected": -0.11502531915903091, + "step": 3020 + }, + { + "epoch": 2.0892116182572615, + "grad_norm": 9.584518432617188, + "learning_rate": 4.3948824343015215e-05, + "log_odds_chosen": 5.3715128898620605, + "log_odds_ratio": -0.03806905820965767, + "logits/chosen": -0.29475364089012146, + "logits/rejected": -0.3395359218120575, + "logps/chosen": -0.08184289932250977, + "logps/rejected": -1.1609790325164795, + "loss": 2.3293, + "nll_loss": 0.5785099864006042, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008184290491044521, + "rewards/margins": 0.10791360586881638, + "rewards/rejected": -0.11609790474176407, + "step": 3021 + }, + { + "epoch": 2.0899031811894884, + "grad_norm": 8.217158317565918, + "learning_rate": 4.394498232672507e-05, + "log_odds_chosen": 6.2889580726623535, + "log_odds_ratio": -0.016910960897803307, + "logits/chosen": -0.6262481808662415, + "logits/rejected": -0.6448217630386353, + "logps/chosen": -0.024054640904068947, + "logps/rejected": -1.1732820272445679, + "loss": 2.7227, + "nll_loss": 0.6789869070053101, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024054639507085085, + "rewards/margins": 0.11492274701595306, + "rewards/rejected": -0.11732819676399231, + "step": 3022 + }, + { + "epoch": 2.090594744121715, + "grad_norm": 9.62985897064209, + "learning_rate": 4.394114031043492e-05, + "log_odds_chosen": 5.910818099975586, + "log_odds_ratio": -0.05642978847026825, + "logits/chosen": -0.41197532415390015, + "logits/rejected": -0.4623297452926636, + "logps/chosen": -0.025548553094267845, + "logps/rejected": -1.2217142581939697, + "loss": 3.3591, + "nll_loss": 0.8341245055198669, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002554855542257428, + "rewards/margins": 0.1196165680885315, + "rewards/rejected": -0.12217142432928085, + "step": 3023 + }, + { + "epoch": 2.091286307053942, + "grad_norm": 6.7654709815979, + "learning_rate": 4.3937298294144766e-05, + "log_odds_chosen": 6.483285427093506, + "log_odds_ratio": -0.045767199248075485, + "logits/chosen": -0.5368460416793823, + "logits/rejected": -0.5440640449523926, + "logps/chosen": -0.05254742503166199, + "logps/rejected": -1.434328317642212, + "loss": 2.4722, + "nll_loss": 0.6134682893753052, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005254742689430714, + "rewards/margins": 0.13817809522151947, + "rewards/rejected": -0.1434328258037567, + "step": 3024 + }, + { + "epoch": 2.091977869986169, + "grad_norm": 7.35762357711792, + "learning_rate": 4.393345627785462e-05, + "log_odds_chosen": 6.339735507965088, + "log_odds_ratio": -0.044218193739652634, + "logits/chosen": -0.454628050327301, + "logits/rejected": -0.5306775569915771, + "logps/chosen": -0.0601518377661705, + "logps/rejected": -1.0033906698226929, + "loss": 2.0879, + "nll_loss": 0.5175583958625793, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006015183869749308, + "rewards/margins": 0.09432388097047806, + "rewards/rejected": -0.10033906996250153, + "step": 3025 + }, + { + "epoch": 2.0926694329183957, + "grad_norm": 11.857747077941895, + "learning_rate": 4.392961426156447e-05, + "log_odds_chosen": 6.992668628692627, + "log_odds_ratio": -0.021204454824328423, + "logits/chosen": -0.3737304210662842, + "logits/rejected": -0.436320960521698, + "logps/chosen": -0.06880363821983337, + "logps/rejected": -1.2175607681274414, + "loss": 2.5603, + "nll_loss": 0.6379530429840088, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006880364380776882, + "rewards/margins": 0.11487573385238647, + "rewards/rejected": -0.12175609171390533, + "step": 3026 + }, + { + "epoch": 2.0933609958506225, + "grad_norm": 10.206303596496582, + "learning_rate": 4.392577224527432e-05, + "log_odds_chosen": 8.618885040283203, + "log_odds_ratio": -0.023046551272273064, + "logits/chosen": -0.2979205846786499, + "logits/rejected": -0.39356330037117004, + "logps/chosen": -0.022700179368257523, + "logps/rejected": -2.0342507362365723, + "loss": 1.8279, + "nll_loss": 0.4546803832054138, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0022700177505612373, + "rewards/margins": 0.20115506649017334, + "rewards/rejected": -0.2034250795841217, + "step": 3027 + }, + { + "epoch": 2.0940525587828493, + "grad_norm": 6.319029808044434, + "learning_rate": 4.392193022898417e-05, + "log_odds_chosen": 5.630053520202637, + "log_odds_ratio": -0.29856622219085693, + "logits/chosen": -0.4813880920410156, + "logits/rejected": -0.5345386266708374, + "logps/chosen": -0.05630096048116684, + "logps/rejected": -0.8578755855560303, + "loss": 1.5996, + "nll_loss": 0.37004461884498596, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005630096886307001, + "rewards/margins": 0.08015745878219604, + "rewards/rejected": -0.08578755706548691, + "step": 3028 + }, + { + "epoch": 2.094744121715076, + "grad_norm": 7.860509872436523, + "learning_rate": 4.391808821269403e-05, + "log_odds_chosen": 7.750638008117676, + "log_odds_ratio": -0.0035974266938865185, + "logits/chosen": -0.4576967656612396, + "logits/rejected": -0.5285370945930481, + "logps/chosen": -0.018102500587701797, + "logps/rejected": -1.5745806694030762, + "loss": 2.7199, + "nll_loss": 0.6796204447746277, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018102501053363085, + "rewards/margins": 0.15564781427383423, + "rewards/rejected": -0.15745806694030762, + "step": 3029 + }, + { + "epoch": 2.095435684647303, + "grad_norm": 7.408651828765869, + "learning_rate": 4.3914246196403874e-05, + "log_odds_chosen": 6.946645736694336, + "log_odds_ratio": -0.02821875549852848, + "logits/chosen": -0.214560866355896, + "logits/rejected": -0.2366018295288086, + "logps/chosen": -0.012655358761548996, + "logps/rejected": -1.053880214691162, + "loss": 1.9313, + "nll_loss": 0.48000746965408325, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012655358295887709, + "rewards/margins": 0.10412249714136124, + "rewards/rejected": -0.10538802295923233, + "step": 3030 + }, + { + "epoch": 2.09612724757953, + "grad_norm": 7.017563819885254, + "learning_rate": 4.3910404180113726e-05, + "log_odds_chosen": 8.451176643371582, + "log_odds_ratio": -0.0035266894847154617, + "logits/chosen": -0.2520374059677124, + "logits/rejected": -0.32749876379966736, + "logps/chosen": -0.007094074506312609, + "logps/rejected": -1.1015607118606567, + "loss": 2.426, + "nll_loss": 0.6061570644378662, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007094074971973896, + "rewards/margins": 0.10944667458534241, + "rewards/rejected": -0.11015607416629791, + "step": 3031 + }, + { + "epoch": 2.0968188105117567, + "grad_norm": 5.60612154006958, + "learning_rate": 4.390656216382358e-05, + "log_odds_chosen": 6.388954162597656, + "log_odds_ratio": -0.03232260048389435, + "logits/chosen": -0.354230135679245, + "logits/rejected": -0.5040380954742432, + "logps/chosen": -0.043253205716609955, + "logps/rejected": -0.8980000019073486, + "loss": 1.9146, + "nll_loss": 0.47541293501853943, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004325320012867451, + "rewards/margins": 0.08547468483448029, + "rewards/rejected": -0.08980000019073486, + "step": 3032 + }, + { + "epoch": 2.0975103734439835, + "grad_norm": 9.741111755371094, + "learning_rate": 4.3902720147533424e-05, + "log_odds_chosen": 5.344499588012695, + "log_odds_ratio": -0.20701748132705688, + "logits/chosen": -0.30035096406936646, + "logits/rejected": -0.2930985689163208, + "logps/chosen": -0.0543060302734375, + "logps/rejected": -1.173159122467041, + "loss": 3.3527, + "nll_loss": 0.8174670934677124, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0054306029342114925, + "rewards/margins": 0.11188530176877975, + "rewards/rejected": -0.11731590330600739, + "step": 3033 + }, + { + "epoch": 2.0982019363762103, + "grad_norm": 8.195241928100586, + "learning_rate": 4.389887813124328e-05, + "log_odds_chosen": 5.892043590545654, + "log_odds_ratio": -0.021361295133829117, + "logits/chosen": -0.43137770891189575, + "logits/rejected": -0.49864283204078674, + "logps/chosen": -0.049551934003829956, + "logps/rejected": -1.3703354597091675, + "loss": 2.82, + "nll_loss": 0.7028669714927673, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004955193493515253, + "rewards/margins": 0.1320783495903015, + "rewards/rejected": -0.13703353703022003, + "step": 3034 + }, + { + "epoch": 2.098893499308437, + "grad_norm": 10.782358169555664, + "learning_rate": 4.389503611495313e-05, + "log_odds_chosen": 6.741562843322754, + "log_odds_ratio": -0.02253532223403454, + "logits/chosen": -0.5028831362724304, + "logits/rejected": -0.5559301376342773, + "logps/chosen": -0.008717098273336887, + "logps/rejected": -0.9764584898948669, + "loss": 2.835, + "nll_loss": 0.7064973711967468, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008717098389752209, + "rewards/margins": 0.0967741385102272, + "rewards/rejected": -0.0976458415389061, + "step": 3035 + }, + { + "epoch": 2.099585062240664, + "grad_norm": 8.789740562438965, + "learning_rate": 4.389119409866298e-05, + "log_odds_chosen": 7.403459548950195, + "log_odds_ratio": -0.022690054029226303, + "logits/chosen": -0.4163126051425934, + "logits/rejected": -0.45792490243911743, + "logps/chosen": -0.014534620568156242, + "logps/rejected": -1.4380933046340942, + "loss": 2.1172, + "nll_loss": 0.5270276665687561, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014534619404003024, + "rewards/margins": 0.14235585927963257, + "rewards/rejected": -0.14380933344364166, + "step": 3036 + }, + { + "epoch": 2.100276625172891, + "grad_norm": 12.233290672302246, + "learning_rate": 4.388735208237283e-05, + "log_odds_chosen": 3.0194010734558105, + "log_odds_ratio": -0.5815798044204712, + "logits/chosen": -0.6315550804138184, + "logits/rejected": -0.6456758975982666, + "logps/chosen": -0.10260287672281265, + "logps/rejected": -0.5657974481582642, + "loss": 2.8741, + "nll_loss": 0.6603554487228394, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01026028860360384, + "rewards/margins": 0.04631945490837097, + "rewards/rejected": -0.05657974258065224, + "step": 3037 + }, + { + "epoch": 2.1009681881051177, + "grad_norm": 3.961130142211914, + "learning_rate": 4.3883510066082686e-05, + "log_odds_chosen": 6.792495250701904, + "log_odds_ratio": -0.010656296275556087, + "logits/chosen": -0.26495587825775146, + "logits/rejected": -0.4072640836238861, + "logps/chosen": -0.005022024270147085, + "logps/rejected": -0.6567428112030029, + "loss": 1.9578, + "nll_loss": 0.4883909225463867, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005022024270147085, + "rewards/margins": 0.06517207622528076, + "rewards/rejected": -0.06567428261041641, + "step": 3038 + }, + { + "epoch": 2.1016597510373445, + "grad_norm": 9.593063354492188, + "learning_rate": 4.387966804979253e-05, + "log_odds_chosen": 7.462541580200195, + "log_odds_ratio": -0.0023812439758330584, + "logits/chosen": -0.4061529040336609, + "logits/rejected": -0.5284491777420044, + "logps/chosen": -0.0027313565369695425, + "logps/rejected": -1.1370166540145874, + "loss": 2.463, + "nll_loss": 0.615522563457489, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00027313566533848643, + "rewards/margins": 0.11342853307723999, + "rewards/rejected": -0.11370167136192322, + "step": 3039 + }, + { + "epoch": 2.1023513139695713, + "grad_norm": 3.8307673931121826, + "learning_rate": 4.3875826033502385e-05, + "log_odds_chosen": 7.653198719024658, + "log_odds_ratio": -0.03865279629826546, + "logits/chosen": -0.058998480439186096, + "logits/rejected": -0.08295667916536331, + "logps/chosen": -0.013944515958428383, + "logps/rejected": -1.039737582206726, + "loss": 2.3333, + "nll_loss": 0.5794506072998047, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013944517122581601, + "rewards/margins": 0.10257931053638458, + "rewards/rejected": -0.10397376120090485, + "step": 3040 + }, + { + "epoch": 2.103042876901798, + "grad_norm": 11.61815071105957, + "learning_rate": 4.387198401721224e-05, + "log_odds_chosen": 7.312662124633789, + "log_odds_ratio": -0.004587090574204922, + "logits/chosen": -0.631591796875, + "logits/rejected": -0.6536903381347656, + "logps/chosen": -0.02399228885769844, + "logps/rejected": -1.5654449462890625, + "loss": 3.4732, + "nll_loss": 0.8678387403488159, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002399228746071458, + "rewards/margins": 0.1541452705860138, + "rewards/rejected": -0.156544491648674, + "step": 3041 + }, + { + "epoch": 2.103734439834025, + "grad_norm": 5.323784351348877, + "learning_rate": 4.386814200092208e-05, + "log_odds_chosen": 6.599758148193359, + "log_odds_ratio": -0.11133424937725067, + "logits/chosen": -0.5072517991065979, + "logits/rejected": -0.5091153383255005, + "logps/chosen": -0.0216111671179533, + "logps/rejected": -1.1363182067871094, + "loss": 2.4531, + "nll_loss": 0.6021370887756348, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0021611163392663, + "rewards/margins": 0.11147069931030273, + "rewards/rejected": -0.11363181471824646, + "step": 3042 + }, + { + "epoch": 2.104426002766252, + "grad_norm": 6.221068382263184, + "learning_rate": 4.3864299984631935e-05, + "log_odds_chosen": 7.854094505310059, + "log_odds_ratio": -0.0015729665756225586, + "logits/chosen": -0.6950536370277405, + "logits/rejected": -0.7626223564147949, + "logps/chosen": -0.001281562028452754, + "logps/rejected": -1.0853941440582275, + "loss": 2.3986, + "nll_loss": 0.5995044708251953, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012815619993489236, + "rewards/margins": 0.10841125249862671, + "rewards/rejected": -0.10853941738605499, + "step": 3043 + }, + { + "epoch": 2.1051175656984786, + "grad_norm": 9.613995552062988, + "learning_rate": 4.386045796834179e-05, + "log_odds_chosen": 5.732051849365234, + "log_odds_ratio": -0.03842860832810402, + "logits/chosen": -0.36531874537467957, + "logits/rejected": -0.38595065474510193, + "logps/chosen": -0.03054799698293209, + "logps/rejected": -1.0037133693695068, + "loss": 2.823, + "nll_loss": 0.7019094824790955, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003054799512028694, + "rewards/margins": 0.09731653332710266, + "rewards/rejected": -0.1003713309764862, + "step": 3044 + }, + { + "epoch": 2.1058091286307055, + "grad_norm": 7.104485511779785, + "learning_rate": 4.385661595205164e-05, + "log_odds_chosen": 7.818532943725586, + "log_odds_ratio": -0.0011651016538962722, + "logits/chosen": -0.6283485889434814, + "logits/rejected": -0.6749677658081055, + "logps/chosen": -0.0066756028681993484, + "logps/rejected": -1.2415318489074707, + "loss": 2.2926, + "nll_loss": 0.5730224847793579, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006675602635368705, + "rewards/margins": 0.12348562479019165, + "rewards/rejected": -0.12415318191051483, + "step": 3045 + }, + { + "epoch": 2.1065006915629323, + "grad_norm": 8.063847541809082, + "learning_rate": 4.3852773935761486e-05, + "log_odds_chosen": 7.581356048583984, + "log_odds_ratio": -0.0035385629162192345, + "logits/chosen": -0.46461209654808044, + "logits/rejected": -0.5649842023849487, + "logps/chosen": -0.01552728284150362, + "logps/rejected": -1.3513633012771606, + "loss": 2.725, + "nll_loss": 0.6808865070343018, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015527281211689115, + "rewards/margins": 0.13358360528945923, + "rewards/rejected": -0.13513632118701935, + "step": 3046 + }, + { + "epoch": 2.107192254495159, + "grad_norm": 10.078218460083008, + "learning_rate": 4.3848931919471345e-05, + "log_odds_chosen": 6.767643928527832, + "log_odds_ratio": -0.0026630214415490627, + "logits/chosen": -0.5481566786766052, + "logits/rejected": -0.6565913558006287, + "logps/chosen": -0.004768464248627424, + "logps/rejected": -1.2390985488891602, + "loss": 3.2571, + "nll_loss": 0.8140066862106323, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00047684641322121024, + "rewards/margins": 0.12343300879001617, + "rewards/rejected": -0.1239098608493805, + "step": 3047 + }, + { + "epoch": 2.107883817427386, + "grad_norm": 12.023402214050293, + "learning_rate": 4.384508990318119e-05, + "log_odds_chosen": 4.712347984313965, + "log_odds_ratio": -0.2186054289340973, + "logits/chosen": -0.5743224024772644, + "logits/rejected": -0.5981598496437073, + "logps/chosen": -0.04524346441030502, + "logps/rejected": -0.4811258614063263, + "loss": 2.2951, + "nll_loss": 0.5519220232963562, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004524346441030502, + "rewards/margins": 0.04358823969960213, + "rewards/rejected": -0.04811258241534233, + "step": 3048 + }, + { + "epoch": 2.108575380359613, + "grad_norm": 6.6201677322387695, + "learning_rate": 4.384124788689104e-05, + "log_odds_chosen": 8.445602416992188, + "log_odds_ratio": -0.00592130608856678, + "logits/chosen": -0.5682473182678223, + "logits/rejected": -0.5630465149879456, + "logps/chosen": -0.0022103991359472275, + "logps/rejected": -1.4159787893295288, + "loss": 2.651, + "nll_loss": 0.6621494889259338, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00022103989613242447, + "rewards/margins": 0.14137683808803558, + "rewards/rejected": -0.14159788191318512, + "step": 3049 + }, + { + "epoch": 2.1092669432918396, + "grad_norm": 10.28715991973877, + "learning_rate": 4.3837405870600895e-05, + "log_odds_chosen": 6.215579032897949, + "log_odds_ratio": -0.005182541906833649, + "logits/chosen": -0.6934223771095276, + "logits/rejected": -0.7401363849639893, + "logps/chosen": -0.006744416896253824, + "logps/rejected": -0.9159350991249084, + "loss": 3.3619, + "nll_loss": 0.8399689197540283, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006744416896253824, + "rewards/margins": 0.09091907739639282, + "rewards/rejected": -0.09159351885318756, + "step": 3050 + }, + { + "epoch": 2.1099585062240664, + "grad_norm": 6.814821243286133, + "learning_rate": 4.383356385431074e-05, + "log_odds_chosen": 7.128850936889648, + "log_odds_ratio": -0.09054242074489594, + "logits/chosen": -0.1573888510465622, + "logits/rejected": -0.19654060900211334, + "logps/chosen": -0.028513900935649872, + "logps/rejected": -1.0100467205047607, + "loss": 1.8336, + "nll_loss": 0.4493419826030731, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002851390279829502, + "rewards/margins": 0.09815328568220139, + "rewards/rejected": -0.10100467503070831, + "step": 3051 + }, + { + "epoch": 2.1106500691562933, + "grad_norm": 4.588749408721924, + "learning_rate": 4.3829721838020594e-05, + "log_odds_chosen": 6.3818359375, + "log_odds_ratio": -0.09973902255296707, + "logits/chosen": -0.4939768314361572, + "logits/rejected": -0.46895715594291687, + "logps/chosen": -0.0433577224612236, + "logps/rejected": -1.110426664352417, + "loss": 1.7762, + "nll_loss": 0.43407464027404785, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0043357727117836475, + "rewards/margins": 0.106706902384758, + "rewards/rejected": -0.11104267090559006, + "step": 3052 + }, + { + "epoch": 2.11134163208852, + "grad_norm": 10.691590309143066, + "learning_rate": 4.3825879821730446e-05, + "log_odds_chosen": 8.708065032958984, + "log_odds_ratio": -0.003415337298065424, + "logits/chosen": -0.9307934641838074, + "logits/rejected": -0.9371286034584045, + "logps/chosen": -0.0015783170238137245, + "logps/rejected": -1.419217586517334, + "loss": 2.508, + "nll_loss": 0.6266547441482544, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015783170238137245, + "rewards/margins": 0.14176392555236816, + "rewards/rejected": -0.1419217586517334, + "step": 3053 + }, + { + "epoch": 2.112033195020747, + "grad_norm": 14.266855239868164, + "learning_rate": 4.38220378054403e-05, + "log_odds_chosen": 8.21017837524414, + "log_odds_ratio": -0.010857914574444294, + "logits/chosen": -0.5948159694671631, + "logits/rejected": -0.7014087438583374, + "logps/chosen": -0.005287535954266787, + "logps/rejected": -1.3881491422653198, + "loss": 2.6336, + "nll_loss": 0.6573028564453125, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000528753618709743, + "rewards/margins": 0.13828615844249725, + "rewards/rejected": -0.13881491124629974, + "step": 3054 + }, + { + "epoch": 2.1127247579529738, + "grad_norm": 11.680485725402832, + "learning_rate": 4.3818195789150144e-05, + "log_odds_chosen": 6.707440376281738, + "log_odds_ratio": -0.6255326867103577, + "logits/chosen": -0.5511981844902039, + "logits/rejected": -0.5761828422546387, + "logps/chosen": -0.016642756760120392, + "logps/rejected": -1.5115129947662354, + "loss": 3.3974, + "nll_loss": 0.7867913246154785, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0016642757691442966, + "rewards/margins": 0.14948701858520508, + "rewards/rejected": -0.15115129947662354, + "step": 3055 + }, + { + "epoch": 2.1134163208852006, + "grad_norm": 7.237359523773193, + "learning_rate": 4.381435377286e-05, + "log_odds_chosen": 7.038687705993652, + "log_odds_ratio": -0.004269938915967941, + "logits/chosen": -0.7024118900299072, + "logits/rejected": -0.7122198343276978, + "logps/chosen": -0.01284304354339838, + "logps/rejected": -0.9574891328811646, + "loss": 2.6331, + "nll_loss": 0.6578375101089478, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012843044241890311, + "rewards/margins": 0.09446461498737335, + "rewards/rejected": -0.0957489162683487, + "step": 3056 + }, + { + "epoch": 2.1141078838174274, + "grad_norm": 4.956737041473389, + "learning_rate": 4.381051175656985e-05, + "log_odds_chosen": 7.141617774963379, + "log_odds_ratio": -0.12717103958129883, + "logits/chosen": -0.32736361026763916, + "logits/rejected": -0.3319385051727295, + "logps/chosen": -0.036458250135183334, + "logps/rejected": -1.1596159934997559, + "loss": 1.3879, + "nll_loss": 0.33425214886665344, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003645824734121561, + "rewards/margins": 0.11231578886508942, + "rewards/rejected": -0.11596160382032394, + "step": 3057 + }, + { + "epoch": 2.1147994467496543, + "grad_norm": 7.007951736450195, + "learning_rate": 4.38066697402797e-05, + "log_odds_chosen": 6.820135116577148, + "log_odds_ratio": -0.06915004551410675, + "logits/chosen": -0.6815324425697327, + "logits/rejected": -0.7533388733863831, + "logps/chosen": -0.02807777002453804, + "logps/rejected": -1.120797872543335, + "loss": 2.661, + "nll_loss": 0.6583237051963806, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0028077769093215466, + "rewards/margins": 0.10927200317382812, + "rewards/rejected": -0.11207978427410126, + "step": 3058 + }, + { + "epoch": 2.115491009681881, + "grad_norm": 10.030107498168945, + "learning_rate": 4.3802827723989554e-05, + "log_odds_chosen": 6.217621803283691, + "log_odds_ratio": -0.040066175162792206, + "logits/chosen": -0.6635290384292603, + "logits/rejected": -0.7061796188354492, + "logps/chosen": -0.024360649287700653, + "logps/rejected": -1.495863914489746, + "loss": 3.3484, + "nll_loss": 0.8330943584442139, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024360651150345802, + "rewards/margins": 0.14715032279491425, + "rewards/rejected": -0.14958639442920685, + "step": 3059 + }, + { + "epoch": 2.116182572614108, + "grad_norm": 8.80070686340332, + "learning_rate": 4.37989857076994e-05, + "log_odds_chosen": 6.714267730712891, + "log_odds_ratio": -0.007191003765910864, + "logits/chosen": -0.5821347832679749, + "logits/rejected": -0.619955837726593, + "logps/chosen": -0.019215121865272522, + "logps/rejected": -1.294236660003662, + "loss": 2.5476, + "nll_loss": 0.636172354221344, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001921512302942574, + "rewards/margins": 0.12750215828418732, + "rewards/rejected": -0.12942366302013397, + "step": 3060 + }, + { + "epoch": 2.1168741355463347, + "grad_norm": 7.5158514976501465, + "learning_rate": 4.379514369140925e-05, + "log_odds_chosen": 7.826415538787842, + "log_odds_ratio": -0.02342759445309639, + "logits/chosen": -0.2783409059047699, + "logits/rejected": -0.3393867015838623, + "logps/chosen": -0.01329967100173235, + "logps/rejected": -2.1055209636688232, + "loss": 1.9379, + "nll_loss": 0.4821318984031677, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013299670536071062, + "rewards/margins": 0.20922213792800903, + "rewards/rejected": -0.21055209636688232, + "step": 3061 + }, + { + "epoch": 2.1175656984785616, + "grad_norm": 4.418914318084717, + "learning_rate": 4.3791301675119104e-05, + "log_odds_chosen": 6.063641548156738, + "log_odds_ratio": -0.015754833817481995, + "logits/chosen": -0.22157390415668488, + "logits/rejected": -0.30585265159606934, + "logps/chosen": -0.058802567422389984, + "logps/rejected": -1.6046185493469238, + "loss": 2.4442, + "nll_loss": 0.6094802618026733, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005880257114768028, + "rewards/margins": 0.15458157658576965, + "rewards/rejected": -0.16046184301376343, + "step": 3062 + }, + { + "epoch": 2.1182572614107884, + "grad_norm": 9.49417781829834, + "learning_rate": 4.378745965882896e-05, + "log_odds_chosen": 6.562379360198975, + "log_odds_ratio": -0.07851752638816833, + "logits/chosen": -0.41256842017173767, + "logits/rejected": -0.4614778161048889, + "logps/chosen": -0.018696602433919907, + "logps/rejected": -1.1838551759719849, + "loss": 3.4774, + "nll_loss": 0.8615073561668396, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018696601036936045, + "rewards/margins": 0.1165158599615097, + "rewards/rejected": -0.11838552355766296, + "step": 3063 + }, + { + "epoch": 2.1189488243430152, + "grad_norm": 6.9558515548706055, + "learning_rate": 4.37836176425388e-05, + "log_odds_chosen": 7.499703884124756, + "log_odds_ratio": -0.004096648655831814, + "logits/chosen": -0.224429190158844, + "logits/rejected": -0.3123741149902344, + "logps/chosen": -0.012242062948644161, + "logps/rejected": -1.5422005653381348, + "loss": 2.1784, + "nll_loss": 0.5441972017288208, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012242061784490943, + "rewards/margins": 0.15299585461616516, + "rewards/rejected": -0.15422005951404572, + "step": 3064 + }, + { + "epoch": 2.119640387275242, + "grad_norm": 6.659056186676025, + "learning_rate": 4.377977562624866e-05, + "log_odds_chosen": 6.294110298156738, + "log_odds_ratio": -0.1503944993019104, + "logits/chosen": -0.1056843176484108, + "logits/rejected": -0.10444828867912292, + "logps/chosen": -0.037452977150678635, + "logps/rejected": -1.0243463516235352, + "loss": 2.2648, + "nll_loss": 0.5511507391929626, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0037452979013323784, + "rewards/margins": 0.09868934750556946, + "rewards/rejected": -0.10243464261293411, + "step": 3065 + }, + { + "epoch": 2.120331950207469, + "grad_norm": 6.566095352172852, + "learning_rate": 4.377593360995851e-05, + "log_odds_chosen": 4.200222969055176, + "log_odds_ratio": -0.04109200835227966, + "logits/chosen": -0.42699283361434937, + "logits/rejected": -0.46811172366142273, + "logps/chosen": -0.04961252957582474, + "logps/rejected": -1.147521734237671, + "loss": 3.3842, + "nll_loss": 0.8419307470321655, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004961253143846989, + "rewards/margins": 0.10979092121124268, + "rewards/rejected": -0.11475217342376709, + "step": 3066 + }, + { + "epoch": 2.1210235131396957, + "grad_norm": 7.800382137298584, + "learning_rate": 4.377209159366836e-05, + "log_odds_chosen": 3.6615090370178223, + "log_odds_ratio": -0.11077920347452164, + "logits/chosen": -0.49535033106803894, + "logits/rejected": -0.5360032320022583, + "logps/chosen": -0.09559698402881622, + "logps/rejected": -1.1526672840118408, + "loss": 1.8968, + "nll_loss": 0.46311768889427185, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009559698402881622, + "rewards/margins": 0.10570703446865082, + "rewards/rejected": -0.11526672542095184, + "step": 3067 + }, + { + "epoch": 2.1217150760719226, + "grad_norm": 13.618182182312012, + "learning_rate": 4.376824957737821e-05, + "log_odds_chosen": 7.400300979614258, + "log_odds_ratio": -0.23561367392539978, + "logits/chosen": -0.46377870440483093, + "logits/rejected": -0.48604080080986023, + "logps/chosen": -0.03631383925676346, + "logps/rejected": -1.319928526878357, + "loss": 3.283, + "nll_loss": 0.7972000241279602, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003631383879110217, + "rewards/margins": 0.12836146354675293, + "rewards/rejected": -0.13199284672737122, + "step": 3068 + }, + { + "epoch": 2.1224066390041494, + "grad_norm": 8.135883331298828, + "learning_rate": 4.376440756108806e-05, + "log_odds_chosen": 6.368939399719238, + "log_odds_ratio": -0.22905480861663818, + "logits/chosen": -0.6161007881164551, + "logits/rejected": -0.6187179088592529, + "logps/chosen": -0.035729553550481796, + "logps/rejected": -0.9610961079597473, + "loss": 1.7314, + "nll_loss": 0.4099564850330353, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003572955960407853, + "rewards/margins": 0.09253665804862976, + "rewards/rejected": -0.09610961377620697, + "step": 3069 + }, + { + "epoch": 2.123098201936376, + "grad_norm": 9.88579273223877, + "learning_rate": 4.376056554479791e-05, + "log_odds_chosen": 6.737878799438477, + "log_odds_ratio": -0.027707181870937347, + "logits/chosen": -0.20080123841762543, + "logits/rejected": -0.25162220001220703, + "logps/chosen": -0.014487730339169502, + "logps/rejected": -1.53798246383667, + "loss": 2.5172, + "nll_loss": 0.6265227794647217, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014487729640677571, + "rewards/margins": 0.15234947204589844, + "rewards/rejected": -0.15379825234413147, + "step": 3070 + }, + { + "epoch": 2.123789764868603, + "grad_norm": 7.084987640380859, + "learning_rate": 4.375672352850776e-05, + "log_odds_chosen": 6.6261749267578125, + "log_odds_ratio": -0.028139235451817513, + "logits/chosen": -0.6384437084197998, + "logits/rejected": -0.6909653544425964, + "logps/chosen": -0.021311424672603607, + "logps/rejected": -1.0407835245132446, + "loss": 2.2195, + "nll_loss": 0.5520578622817993, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021311421878635883, + "rewards/margins": 0.10194721817970276, + "rewards/rejected": -0.10407835245132446, + "step": 3071 + }, + { + "epoch": 2.12448132780083, + "grad_norm": 10.54240894317627, + "learning_rate": 4.3752881512217615e-05, + "log_odds_chosen": 4.758946418762207, + "log_odds_ratio": -0.662519633769989, + "logits/chosen": -0.6168556809425354, + "logits/rejected": -0.7287572622299194, + "logps/chosen": -0.06607799232006073, + "logps/rejected": -0.7929989099502563, + "loss": 2.9218, + "nll_loss": 0.6641974449157715, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.006607798859477043, + "rewards/margins": 0.07269209623336792, + "rewards/rejected": -0.07929988205432892, + "step": 3072 + }, + { + "epoch": 2.1251728907330567, + "grad_norm": 12.808792114257812, + "learning_rate": 4.374903949592746e-05, + "log_odds_chosen": 3.1861891746520996, + "log_odds_ratio": -0.6998291015625, + "logits/chosen": -0.7169103026390076, + "logits/rejected": -0.7344023585319519, + "logps/chosen": -0.1427215337753296, + "logps/rejected": -0.6728063821792603, + "loss": 3.1134, + "nll_loss": 0.7083545923233032, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.014272153377532959, + "rewards/margins": 0.05300847813487053, + "rewards/rejected": -0.06728063523769379, + "step": 3073 + }, + { + "epoch": 2.1258644536652835, + "grad_norm": 6.509385585784912, + "learning_rate": 4.374519747963732e-05, + "log_odds_chosen": 7.967581748962402, + "log_odds_ratio": -0.017956677824258804, + "logits/chosen": -0.6940062046051025, + "logits/rejected": -0.840277910232544, + "logps/chosen": -0.06229571998119354, + "logps/rejected": -1.6665977239608765, + "loss": 2.5872, + "nll_loss": 0.645008385181427, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0062295724637806416, + "rewards/margins": 0.1604302078485489, + "rewards/rejected": -0.16665977239608765, + "step": 3074 + }, + { + "epoch": 2.1265560165975104, + "grad_norm": 7.396805763244629, + "learning_rate": 4.3741355463347166e-05, + "log_odds_chosen": 6.3950324058532715, + "log_odds_ratio": -0.05477265268564224, + "logits/chosen": -0.3604324162006378, + "logits/rejected": -0.4033384323120117, + "logps/chosen": -0.018422357738018036, + "logps/rejected": -0.9343924522399902, + "loss": 2.7091, + "nll_loss": 0.671809196472168, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018422356806695461, + "rewards/margins": 0.09159701317548752, + "rewards/rejected": -0.09343923628330231, + "step": 3075 + }, + { + "epoch": 2.127247579529737, + "grad_norm": 7.560407638549805, + "learning_rate": 4.373751344705702e-05, + "log_odds_chosen": 7.058584690093994, + "log_odds_ratio": -0.06325285881757736, + "logits/chosen": -0.8072975277900696, + "logits/rejected": -0.8635538816452026, + "logps/chosen": -0.024707302451133728, + "logps/rejected": -1.4035589694976807, + "loss": 2.4432, + "nll_loss": 0.6044746041297913, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024707301054149866, + "rewards/margins": 0.1378851681947708, + "rewards/rejected": -0.1403558999300003, + "step": 3076 + }, + { + "epoch": 2.127939142461964, + "grad_norm": 5.632557392120361, + "learning_rate": 4.373367143076687e-05, + "log_odds_chosen": 6.698379993438721, + "log_odds_ratio": -0.05778937041759491, + "logits/chosen": -0.690493106842041, + "logits/rejected": -0.6445252895355225, + "logps/chosen": -0.016724741086363792, + "logps/rejected": -1.3040034770965576, + "loss": 2.5376, + "nll_loss": 0.6286225318908691, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016724740853533149, + "rewards/margins": 0.12872786819934845, + "rewards/rejected": -0.13040034472942352, + "step": 3077 + }, + { + "epoch": 2.128630705394191, + "grad_norm": 68.70565795898438, + "learning_rate": 4.3729829414476716e-05, + "log_odds_chosen": 5.167508602142334, + "log_odds_ratio": -0.539081871509552, + "logits/chosen": -0.37192028760910034, + "logits/rejected": -0.38595011830329895, + "logps/chosen": -0.0928657129406929, + "logps/rejected": -1.187703251838684, + "loss": 2.6988, + "nll_loss": 0.6207873821258545, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00928657315671444, + "rewards/margins": 0.1094837561249733, + "rewards/rejected": -0.11877032369375229, + "step": 3078 + }, + { + "epoch": 2.1293222683264177, + "grad_norm": 4.5042219161987305, + "learning_rate": 4.372598739818657e-05, + "log_odds_chosen": 6.700753211975098, + "log_odds_ratio": -0.0060334340669214725, + "logits/chosen": -0.6424789428710938, + "logits/rejected": -0.6247678995132446, + "logps/chosen": -0.009268735535442829, + "logps/rejected": -0.9170104265213013, + "loss": 1.6578, + "nll_loss": 0.41384565830230713, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009268735884688795, + "rewards/margins": 0.09077417105436325, + "rewards/rejected": -0.09170104563236237, + "step": 3079 + }, + { + "epoch": 2.1300138312586445, + "grad_norm": 8.544678688049316, + "learning_rate": 4.372214538189642e-05, + "log_odds_chosen": 6.743369102478027, + "log_odds_ratio": -0.025003895163536072, + "logits/chosen": -0.7757008075714111, + "logits/rejected": -0.8247724771499634, + "logps/chosen": -0.027958286926150322, + "logps/rejected": -0.9012587070465088, + "loss": 3.0383, + "nll_loss": 0.757074236869812, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002795828739181161, + "rewards/margins": 0.08733003586530685, + "rewards/rejected": -0.09012586623430252, + "step": 3080 + }, + { + "epoch": 2.1307053941908713, + "grad_norm": 7.25593376159668, + "learning_rate": 4.3718303365606274e-05, + "log_odds_chosen": 6.274052619934082, + "log_odds_ratio": -0.05131087079644203, + "logits/chosen": -0.46268534660339355, + "logits/rejected": -0.5040990114212036, + "logps/chosen": -0.020884795114398003, + "logps/rejected": -1.282250165939331, + "loss": 2.0169, + "nll_loss": 0.49910321831703186, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020884794648736715, + "rewards/margins": 0.12613654136657715, + "rewards/rejected": -0.12822501361370087, + "step": 3081 + }, + { + "epoch": 2.131396957123098, + "grad_norm": 8.337603569030762, + "learning_rate": 4.371446134931612e-05, + "log_odds_chosen": 8.045146942138672, + "log_odds_ratio": -0.008465875871479511, + "logits/chosen": -0.40172910690307617, + "logits/rejected": -0.48722216486930847, + "logps/chosen": -0.0039009880274534225, + "logps/rejected": -1.5526306629180908, + "loss": 2.1899, + "nll_loss": 0.5466317534446716, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00039009880856610835, + "rewards/margins": 0.15487296879291534, + "rewards/rejected": -0.15526306629180908, + "step": 3082 + }, + { + "epoch": 2.132088520055325, + "grad_norm": 14.335528373718262, + "learning_rate": 4.371061933302598e-05, + "log_odds_chosen": 7.333463668823242, + "log_odds_ratio": -0.25078287720680237, + "logits/chosen": -0.5369386076927185, + "logits/rejected": -0.5791423916816711, + "logps/chosen": -0.04182959720492363, + "logps/rejected": -1.1157071590423584, + "loss": 3.0666, + "nll_loss": 0.7415834069252014, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004182959441095591, + "rewards/margins": 0.10738775134086609, + "rewards/rejected": -0.11157071590423584, + "step": 3083 + }, + { + "epoch": 2.132780082987552, + "grad_norm": 8.54443359375, + "learning_rate": 4.3706777316735824e-05, + "log_odds_chosen": 7.05941104888916, + "log_odds_ratio": -0.0036911824718117714, + "logits/chosen": -1.0558934211730957, + "logits/rejected": -1.1036031246185303, + "logps/chosen": -0.003986523952335119, + "logps/rejected": -1.0695551633834839, + "loss": 3.1301, + "nll_loss": 0.7821628451347351, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003986524243373424, + "rewards/margins": 0.10655686259269714, + "rewards/rejected": -0.10695552080869675, + "step": 3084 + }, + { + "epoch": 2.1334716459197787, + "grad_norm": 10.675700187683105, + "learning_rate": 4.370293530044568e-05, + "log_odds_chosen": 7.191103935241699, + "log_odds_ratio": -0.05402178317308426, + "logits/chosen": -0.6177210211753845, + "logits/rejected": -0.7148678302764893, + "logps/chosen": -0.01665673218667507, + "logps/rejected": -1.175557255744934, + "loss": 2.7801, + "nll_loss": 0.6896116733551025, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016656732186675072, + "rewards/margins": 0.11589005589485168, + "rewards/rejected": -0.11755572259426117, + "step": 3085 + }, + { + "epoch": 2.1341632088520055, + "grad_norm": 12.264751434326172, + "learning_rate": 4.369909328415553e-05, + "log_odds_chosen": 7.793338298797607, + "log_odds_ratio": -0.003364799777045846, + "logits/chosen": -0.5212658643722534, + "logits/rejected": -0.6233722567558289, + "logps/chosen": -0.00235918746329844, + "logps/rejected": -1.2549175024032593, + "loss": 3.3469, + "nll_loss": 0.8363955616950989, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002359187783440575, + "rewards/margins": 0.12525583803653717, + "rewards/rejected": -0.12549175322055817, + "step": 3086 + }, + { + "epoch": 2.1348547717842323, + "grad_norm": 5.636877059936523, + "learning_rate": 4.3695251267865375e-05, + "log_odds_chosen": 5.239736080169678, + "log_odds_ratio": -0.030729567632079124, + "logits/chosen": -0.47833728790283203, + "logits/rejected": -0.5737687945365906, + "logps/chosen": -0.050199344754219055, + "logps/rejected": -1.2572249174118042, + "loss": 2.0981, + "nll_loss": 0.5214625597000122, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005019934847950935, + "rewards/margins": 0.12070255726575851, + "rewards/rejected": -0.1257224977016449, + "step": 3087 + }, + { + "epoch": 2.135546334716459, + "grad_norm": 8.218557357788086, + "learning_rate": 4.369140925157523e-05, + "log_odds_chosen": 6.637486457824707, + "log_odds_ratio": -0.033020954579114914, + "logits/chosen": -0.6227655410766602, + "logits/rejected": -0.6597899794578552, + "logps/chosen": -0.019715534523129463, + "logps/rejected": -1.0135293006896973, + "loss": 3.0103, + "nll_loss": 0.7492808103561401, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00197155331261456, + "rewards/margins": 0.09938137233257294, + "rewards/rejected": -0.10135292261838913, + "step": 3088 + }, + { + "epoch": 2.136237897648686, + "grad_norm": 10.687223434448242, + "learning_rate": 4.368756723528508e-05, + "log_odds_chosen": 5.657548904418945, + "log_odds_ratio": -0.20293311774730682, + "logits/chosen": -0.6749211549758911, + "logits/rejected": -0.7769599556922913, + "logps/chosen": -0.03227122128009796, + "logps/rejected": -1.0073554515838623, + "loss": 3.2338, + "nll_loss": 0.788148045539856, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003227122128009796, + "rewards/margins": 0.09750843048095703, + "rewards/rejected": -0.10073556005954742, + "step": 3089 + }, + { + "epoch": 2.136929460580913, + "grad_norm": 9.447967529296875, + "learning_rate": 4.368372521899493e-05, + "log_odds_chosen": 5.053914546966553, + "log_odds_ratio": -0.16671393811702728, + "logits/chosen": -0.41087472438812256, + "logits/rejected": -0.4866830110549927, + "logps/chosen": -0.055432647466659546, + "logps/rejected": -1.1628947257995605, + "loss": 2.8171, + "nll_loss": 0.687608540058136, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00554326456040144, + "rewards/margins": 0.11074619740247726, + "rewards/rejected": -0.11628946661949158, + "step": 3090 + }, + { + "epoch": 2.1376210235131397, + "grad_norm": 6.8454203605651855, + "learning_rate": 4.367988320270478e-05, + "log_odds_chosen": 7.090332508087158, + "log_odds_ratio": -0.013906768523156643, + "logits/chosen": -0.6725280284881592, + "logits/rejected": -0.7864277362823486, + "logps/chosen": -0.012035176157951355, + "logps/rejected": -0.9701966047286987, + "loss": 2.2742, + "nll_loss": 0.5671637058258057, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012035175459459424, + "rewards/margins": 0.09581615030765533, + "rewards/rejected": -0.09701967239379883, + "step": 3091 + }, + { + "epoch": 2.1383125864453665, + "grad_norm": 5.8847975730896, + "learning_rate": 4.367604118641464e-05, + "log_odds_chosen": 5.034954071044922, + "log_odds_ratio": -0.1259640008211136, + "logits/chosen": -0.524029552936554, + "logits/rejected": -0.5001509189605713, + "logps/chosen": -0.10554247349500656, + "logps/rejected": -0.838758647441864, + "loss": 2.4572, + "nll_loss": 0.6016969680786133, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010554247535765171, + "rewards/margins": 0.0733216181397438, + "rewards/rejected": -0.0838758647441864, + "step": 3092 + }, + { + "epoch": 2.1390041493775933, + "grad_norm": 4.628413677215576, + "learning_rate": 4.367219917012448e-05, + "log_odds_chosen": 4.764793395996094, + "log_odds_ratio": -0.023185372352600098, + "logits/chosen": -0.4627071022987366, + "logits/rejected": -0.5550016760826111, + "logps/chosen": -0.03563341498374939, + "logps/rejected": -0.6548776626586914, + "loss": 2.7578, + "nll_loss": 0.6871291399002075, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0035633419174700975, + "rewards/margins": 0.061924420297145844, + "rewards/rejected": -0.06548776477575302, + "step": 3093 + }, + { + "epoch": 2.13969571230982, + "grad_norm": 10.076739311218262, + "learning_rate": 4.3668357153834335e-05, + "log_odds_chosen": 5.545009613037109, + "log_odds_ratio": -0.26811662316322327, + "logits/chosen": -0.7443239688873291, + "logits/rejected": -0.7627904415130615, + "logps/chosen": -0.041051093488931656, + "logps/rejected": -1.1456853151321411, + "loss": 4.2332, + "nll_loss": 1.031481146812439, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0041051097214221954, + "rewards/margins": 0.11046342551708221, + "rewards/rejected": -0.11456853896379471, + "step": 3094 + }, + { + "epoch": 2.140387275242047, + "grad_norm": 8.096652030944824, + "learning_rate": 4.366451513754419e-05, + "log_odds_chosen": 8.124271392822266, + "log_odds_ratio": -0.012365620583295822, + "logits/chosen": -0.7031798958778381, + "logits/rejected": -0.833611011505127, + "logps/chosen": -0.0064608375541865826, + "logps/rejected": -1.580578327178955, + "loss": 3.3684, + "nll_loss": 0.8408694267272949, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006460837321355939, + "rewards/margins": 0.15741175413131714, + "rewards/rejected": -0.15805783867835999, + "step": 3095 + }, + { + "epoch": 2.141078838174274, + "grad_norm": 8.342835426330566, + "learning_rate": 4.366067312125403e-05, + "log_odds_chosen": 4.668115139007568, + "log_odds_ratio": -0.02511441893875599, + "logits/chosen": -0.5143285393714905, + "logits/rejected": -0.5451970100402832, + "logps/chosen": -0.03286014497280121, + "logps/rejected": -0.9364689588546753, + "loss": 2.5867, + "nll_loss": 0.6441757678985596, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0032860147766768932, + "rewards/margins": 0.09036087989807129, + "rewards/rejected": -0.09364689141511917, + "step": 3096 + }, + { + "epoch": 2.1417704011065006, + "grad_norm": 8.438102722167969, + "learning_rate": 4.3656831104963886e-05, + "log_odds_chosen": 5.5746002197265625, + "log_odds_ratio": -0.021498534828424454, + "logits/chosen": -0.3380069136619568, + "logits/rejected": -0.358273983001709, + "logps/chosen": -0.020813195034861565, + "logps/rejected": -0.9423534274101257, + "loss": 2.385, + "nll_loss": 0.5941092371940613, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020813194569200277, + "rewards/margins": 0.09215402603149414, + "rewards/rejected": -0.09423534572124481, + "step": 3097 + }, + { + "epoch": 2.1424619640387275, + "grad_norm": 5.3521037101745605, + "learning_rate": 4.365298908867374e-05, + "log_odds_chosen": 6.82499885559082, + "log_odds_ratio": -0.004994858056306839, + "logits/chosen": -0.49015307426452637, + "logits/rejected": -0.530591607093811, + "logps/chosen": -0.0181460902094841, + "logps/rejected": -1.093785047531128, + "loss": 2.2152, + "nll_loss": 0.5533087253570557, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018146091606467962, + "rewards/margins": 0.10756390541791916, + "rewards/rejected": -0.10937851667404175, + "step": 3098 + }, + { + "epoch": 2.1431535269709543, + "grad_norm": 5.270591735839844, + "learning_rate": 4.364914707238359e-05, + "log_odds_chosen": 6.464059829711914, + "log_odds_ratio": -0.039618462324142456, + "logits/chosen": -0.5720619559288025, + "logits/rejected": -0.5675368309020996, + "logps/chosen": -0.027832329273223877, + "logps/rejected": -1.39834725856781, + "loss": 2.5972, + "nll_loss": 0.6453447937965393, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002783233067020774, + "rewards/margins": 0.13705149292945862, + "rewards/rejected": -0.13983473181724548, + "step": 3099 + }, + { + "epoch": 2.143845089903181, + "grad_norm": 9.967931747436523, + "learning_rate": 4.3645305056093436e-05, + "log_odds_chosen": 5.426502704620361, + "log_odds_ratio": -0.5206568241119385, + "logits/chosen": -0.6620529890060425, + "logits/rejected": -0.6964467763900757, + "logps/chosen": -0.03018881566822529, + "logps/rejected": -0.8304793238639832, + "loss": 3.2589, + "nll_loss": 0.7626627683639526, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0030188814271241426, + "rewards/margins": 0.08002904802560806, + "rewards/rejected": -0.08304793387651443, + "step": 3100 + }, + { + "epoch": 2.144536652835408, + "grad_norm": 5.0591020584106445, + "learning_rate": 4.3641463039803296e-05, + "log_odds_chosen": 6.7463483810424805, + "log_odds_ratio": -0.010486850515007973, + "logits/chosen": -0.3661188781261444, + "logits/rejected": -0.32782259583473206, + "logps/chosen": -0.02775876224040985, + "logps/rejected": -1.2433809041976929, + "loss": 1.8088, + "nll_loss": 0.4511429965496063, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002775876084342599, + "rewards/margins": 0.12156221270561218, + "rewards/rejected": -0.12433809041976929, + "step": 3101 + }, + { + "epoch": 2.145228215767635, + "grad_norm": 6.19116735458374, + "learning_rate": 4.363762102351314e-05, + "log_odds_chosen": 6.955341815948486, + "log_odds_ratio": -0.01190902478992939, + "logits/chosen": -0.6434040069580078, + "logits/rejected": -0.6938708424568176, + "logps/chosen": -0.016568060964345932, + "logps/rejected": -1.0684852600097656, + "loss": 2.4648, + "nll_loss": 0.6150209903717041, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016568060964345932, + "rewards/margins": 0.10519173741340637, + "rewards/rejected": -0.10684853047132492, + "step": 3102 + }, + { + "epoch": 2.1459197786998616, + "grad_norm": 6.833184242248535, + "learning_rate": 4.3633779007222994e-05, + "log_odds_chosen": 5.31828498840332, + "log_odds_ratio": -0.09289534389972687, + "logits/chosen": -0.6373336315155029, + "logits/rejected": -0.6737222075462341, + "logps/chosen": -0.03717003017663956, + "logps/rejected": -0.8076385855674744, + "loss": 2.3527, + "nll_loss": 0.5788832306861877, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0037170026917010546, + "rewards/margins": 0.07704685628414154, + "rewards/rejected": -0.08076386153697968, + "step": 3103 + }, + { + "epoch": 2.1466113416320884, + "grad_norm": 5.522740364074707, + "learning_rate": 4.3629936990932846e-05, + "log_odds_chosen": 8.604452133178711, + "log_odds_ratio": -0.0005866825231350958, + "logits/chosen": -0.1628008484840393, + "logits/rejected": -0.1629152148962021, + "logps/chosen": -0.0003812982467934489, + "logps/rejected": -0.9394527673721313, + "loss": 2.0881, + "nll_loss": 0.5219618678092957, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.812982322415337e-05, + "rewards/margins": 0.09390714764595032, + "rewards/rejected": -0.09394527971744537, + "step": 3104 + }, + { + "epoch": 2.1473029045643153, + "grad_norm": 8.878650665283203, + "learning_rate": 4.362609497464269e-05, + "log_odds_chosen": 7.132782459259033, + "log_odds_ratio": -0.0020438514184206724, + "logits/chosen": -0.4118482768535614, + "logits/rejected": -0.41403189301490784, + "logps/chosen": -0.0040047611109912395, + "logps/rejected": -0.7762624025344849, + "loss": 2.7574, + "nll_loss": 0.6891574859619141, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00040047609945759177, + "rewards/margins": 0.07722576707601547, + "rewards/rejected": -0.07762623578310013, + "step": 3105 + }, + { + "epoch": 2.147994467496542, + "grad_norm": 7.991227626800537, + "learning_rate": 4.3622252958352544e-05, + "log_odds_chosen": 7.156825065612793, + "log_odds_ratio": -0.030668115243315697, + "logits/chosen": -0.40880826115608215, + "logits/rejected": -0.4779745936393738, + "logps/chosen": -0.01195601373910904, + "logps/rejected": -1.2787102460861206, + "loss": 2.6504, + "nll_loss": 0.6595234274864197, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011956014204770327, + "rewards/margins": 0.12667542695999146, + "rewards/rejected": -0.12787103652954102, + "step": 3106 + }, + { + "epoch": 2.148686030428769, + "grad_norm": 6.472316265106201, + "learning_rate": 4.3618410942062397e-05, + "log_odds_chosen": 7.069546699523926, + "log_odds_ratio": -0.16113081574440002, + "logits/chosen": -0.6364884972572327, + "logits/rejected": -0.6532797813415527, + "logps/chosen": -0.029024068266153336, + "logps/rejected": -1.284508228302002, + "loss": 2.3296, + "nll_loss": 0.5662857890129089, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002902406733483076, + "rewards/margins": 0.12554842233657837, + "rewards/rejected": -0.12845084071159363, + "step": 3107 + }, + { + "epoch": 2.1493775933609958, + "grad_norm": 10.088801383972168, + "learning_rate": 4.361456892577225e-05, + "log_odds_chosen": 6.0846967697143555, + "log_odds_ratio": -0.06640740483999252, + "logits/chosen": -0.5729522705078125, + "logits/rejected": -0.6283227205276489, + "logps/chosen": -0.029298869892954826, + "logps/rejected": -1.473647117614746, + "loss": 2.8737, + "nll_loss": 0.7117915153503418, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002929887268692255, + "rewards/margins": 0.1444348245859146, + "rewards/rejected": -0.14736472070217133, + "step": 3108 + }, + { + "epoch": 2.1500691562932226, + "grad_norm": 9.722908973693848, + "learning_rate": 4.3610726909482095e-05, + "log_odds_chosen": 5.92418098449707, + "log_odds_ratio": -0.29958927631378174, + "logits/chosen": -0.45902007818222046, + "logits/rejected": -0.5431629419326782, + "logps/chosen": -0.03596596419811249, + "logps/rejected": -1.0880738496780396, + "loss": 2.997, + "nll_loss": 0.7192915678024292, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003596596186980605, + "rewards/margins": 0.1052107885479927, + "rewards/rejected": -0.10880738496780396, + "step": 3109 + }, + { + "epoch": 2.1507607192254494, + "grad_norm": 5.922492980957031, + "learning_rate": 4.3606884893191954e-05, + "log_odds_chosen": 6.920762538909912, + "log_odds_ratio": -0.004645219072699547, + "logits/chosen": -0.6970812082290649, + "logits/rejected": -0.7362354397773743, + "logps/chosen": -0.01976136490702629, + "logps/rejected": -1.1406362056732178, + "loss": 3.0133, + "nll_loss": 0.7528709173202515, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019761365838348866, + "rewards/margins": 0.11208748817443848, + "rewards/rejected": -0.11406362056732178, + "step": 3110 + }, + { + "epoch": 2.1514522821576763, + "grad_norm": 5.159032821655273, + "learning_rate": 4.36030428769018e-05, + "log_odds_chosen": 3.858637809753418, + "log_odds_ratio": -0.22229480743408203, + "logits/chosen": -0.6306631565093994, + "logits/rejected": -0.6449373960494995, + "logps/chosen": -0.10265744477510452, + "logps/rejected": -1.142212986946106, + "loss": 2.3788, + "nll_loss": 0.5724680423736572, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010265744291245937, + "rewards/margins": 0.10395555198192596, + "rewards/rejected": -0.11422129720449448, + "step": 3111 + }, + { + "epoch": 2.152143845089903, + "grad_norm": 7.787853717803955, + "learning_rate": 4.359920086061165e-05, + "log_odds_chosen": 6.308670997619629, + "log_odds_ratio": -0.008247793652117252, + "logits/chosen": -0.37978595495224, + "logits/rejected": -0.432533860206604, + "logps/chosen": -0.012554554268717766, + "logps/rejected": -1.0429201126098633, + "loss": 2.0206, + "nll_loss": 0.5043319463729858, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012554554268717766, + "rewards/margins": 0.1030365526676178, + "rewards/rejected": -0.10429200530052185, + "step": 3112 + }, + { + "epoch": 2.15283540802213, + "grad_norm": 8.554698944091797, + "learning_rate": 4.3595358844321504e-05, + "log_odds_chosen": 6.984411716461182, + "log_odds_ratio": -0.005794777534902096, + "logits/chosen": -0.4820101261138916, + "logits/rejected": -0.6359446048736572, + "logps/chosen": -0.013413993641734123, + "logps/rejected": -1.2459449768066406, + "loss": 2.9193, + "nll_loss": 0.7292553186416626, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013413995038717985, + "rewards/margins": 0.12325310707092285, + "rewards/rejected": -0.12459450215101242, + "step": 3113 + }, + { + "epoch": 2.1535269709543567, + "grad_norm": 12.569887161254883, + "learning_rate": 4.359151682803135e-05, + "log_odds_chosen": 6.896792411804199, + "log_odds_ratio": -0.009464538656175137, + "logits/chosen": -0.2823646068572998, + "logits/rejected": -0.3563285768032074, + "logps/chosen": -0.028261443600058556, + "logps/rejected": -1.547232985496521, + "loss": 2.4172, + "nll_loss": 0.6033486723899841, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0028261442203074694, + "rewards/margins": 0.15189716219902039, + "rewards/rejected": -0.15472330152988434, + "step": 3114 + }, + { + "epoch": 2.1542185338865836, + "grad_norm": 12.023890495300293, + "learning_rate": 4.35876748117412e-05, + "log_odds_chosen": 7.172085285186768, + "log_odds_ratio": -0.012960941530764103, + "logits/chosen": -0.6027337312698364, + "logits/rejected": -0.6425750851631165, + "logps/chosen": -0.01669890806078911, + "logps/rejected": -1.2620288133621216, + "loss": 3.2111, + "nll_loss": 0.8014745712280273, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016698910621926188, + "rewards/margins": 0.12453299760818481, + "rewards/rejected": -0.12620288133621216, + "step": 3115 + }, + { + "epoch": 2.1549100968188104, + "grad_norm": 12.55388069152832, + "learning_rate": 4.3583832795451055e-05, + "log_odds_chosen": 7.494009971618652, + "log_odds_ratio": -0.02409595623612404, + "logits/chosen": -0.3516850769519806, + "logits/rejected": -0.4178076386451721, + "logps/chosen": -0.009373624809086323, + "logps/rejected": -1.325321078300476, + "loss": 2.9038, + "nll_loss": 0.7235294580459595, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009373623761348426, + "rewards/margins": 0.13159474730491638, + "rewards/rejected": -0.13253211975097656, + "step": 3116 + }, + { + "epoch": 2.1556016597510372, + "grad_norm": 6.692192554473877, + "learning_rate": 4.357999077916091e-05, + "log_odds_chosen": 8.052202224731445, + "log_odds_ratio": -0.08095759153366089, + "logits/chosen": -0.4351692795753479, + "logits/rejected": -0.5287215113639832, + "logps/chosen": -0.036751508712768555, + "logps/rejected": -1.66717529296875, + "loss": 2.1873, + "nll_loss": 0.5387200117111206, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003675150917842984, + "rewards/margins": 0.16304238140583038, + "rewards/rejected": -0.166717529296875, + "step": 3117 + }, + { + "epoch": 2.156293222683264, + "grad_norm": 5.936384677886963, + "learning_rate": 4.357614876287075e-05, + "log_odds_chosen": 6.1470746994018555, + "log_odds_ratio": -0.04051423817873001, + "logits/chosen": -0.3913887143135071, + "logits/rejected": -0.4102795720100403, + "logps/chosen": -0.033366814255714417, + "logps/rejected": -1.026632308959961, + "loss": 2.2855, + "nll_loss": 0.5673317909240723, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003336681518703699, + "rewards/margins": 0.09932655096054077, + "rewards/rejected": -0.10266323387622833, + "step": 3118 + }, + { + "epoch": 2.156984785615491, + "grad_norm": 11.439972877502441, + "learning_rate": 4.357230674658061e-05, + "log_odds_chosen": 7.716203689575195, + "log_odds_ratio": -0.002041627187281847, + "logits/chosen": -0.4915839433670044, + "logits/rejected": -0.5531714558601379, + "logps/chosen": -0.0034083856735378504, + "logps/rejected": -1.5144751071929932, + "loss": 2.4189, + "nll_loss": 0.6045123934745789, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00034083856735378504, + "rewards/margins": 0.15110668540000916, + "rewards/rejected": -0.15144753456115723, + "step": 3119 + }, + { + "epoch": 2.1576763485477177, + "grad_norm": 7.074250221252441, + "learning_rate": 4.356846473029046e-05, + "log_odds_chosen": 4.405117034912109, + "log_odds_ratio": -0.1953820288181305, + "logits/chosen": -0.43208175897598267, + "logits/rejected": -0.4175960123538971, + "logps/chosen": -0.13776032626628876, + "logps/rejected": -1.0861680507659912, + "loss": 1.9592, + "nll_loss": 0.4702651798725128, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.013776032254099846, + "rewards/margins": 0.09484077990055084, + "rewards/rejected": -0.10861679911613464, + "step": 3120 + }, + { + "epoch": 2.1583679114799446, + "grad_norm": 5.285325527191162, + "learning_rate": 4.356462271400031e-05, + "log_odds_chosen": 7.057203769683838, + "log_odds_ratio": -0.005945372395217419, + "logits/chosen": -0.3348833918571472, + "logits/rejected": -0.41188618540763855, + "logps/chosen": -0.012273477390408516, + "logps/rejected": -1.1557308435440063, + "loss": 2.3991, + "nll_loss": 0.599189817905426, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012273476459085941, + "rewards/margins": 0.1143457442522049, + "rewards/rejected": -0.11557309329509735, + "step": 3121 + }, + { + "epoch": 2.1590594744121714, + "grad_norm": 7.700890064239502, + "learning_rate": 4.356078069771016e-05, + "log_odds_chosen": 6.288533687591553, + "log_odds_ratio": -0.031821660697460175, + "logits/chosen": -0.9171697497367859, + "logits/rejected": -0.9709770679473877, + "logps/chosen": -0.018146559596061707, + "logps/rejected": -1.7577550411224365, + "loss": 2.243, + "nll_loss": 0.5575743913650513, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018146559596061707, + "rewards/margins": 0.1739608645439148, + "rewards/rejected": -0.17577551305294037, + "step": 3122 + }, + { + "epoch": 2.159751037344398, + "grad_norm": 8.697759628295898, + "learning_rate": 4.3556938681420015e-05, + "log_odds_chosen": 8.340778350830078, + "log_odds_ratio": -0.003053261898458004, + "logits/chosen": -0.5757359266281128, + "logits/rejected": -0.6766760349273682, + "logps/chosen": -0.013043480925261974, + "logps/rejected": -1.9186817407608032, + "loss": 2.648, + "nll_loss": 0.6617018580436707, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013043482322245836, + "rewards/margins": 0.19056381285190582, + "rewards/rejected": -0.19186817109584808, + "step": 3123 + }, + { + "epoch": 2.160442600276625, + "grad_norm": 6.629286289215088, + "learning_rate": 4.355309666512986e-05, + "log_odds_chosen": 7.6335368156433105, + "log_odds_ratio": -0.11332618445158005, + "logits/chosen": -0.316942036151886, + "logits/rejected": -0.31863781809806824, + "logps/chosen": -0.021302711218595505, + "logps/rejected": -0.7485167980194092, + "loss": 2.1456, + "nll_loss": 0.5250740647315979, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002130271168425679, + "rewards/margins": 0.07272141426801682, + "rewards/rejected": -0.07485168427228928, + "step": 3124 + }, + { + "epoch": 2.161134163208852, + "grad_norm": 4.9336323738098145, + "learning_rate": 4.3549254648839713e-05, + "log_odds_chosen": 5.858908176422119, + "log_odds_ratio": -0.13920627534389496, + "logits/chosen": -0.09984740614891052, + "logits/rejected": -0.13050922751426697, + "logps/chosen": -0.1269267499446869, + "logps/rejected": -1.3792228698730469, + "loss": 2.5293, + "nll_loss": 0.6184045672416687, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.012692674063146114, + "rewards/margins": 0.125229611992836, + "rewards/rejected": -0.1379222869873047, + "step": 3125 + }, + { + "epoch": 2.1618257261410787, + "grad_norm": 7.739724636077881, + "learning_rate": 4.3545412632549566e-05, + "log_odds_chosen": 6.311422348022461, + "log_odds_ratio": -0.011560730636119843, + "logits/chosen": -0.6573699116706848, + "logits/rejected": -0.6942727565765381, + "logps/chosen": -0.027487125247716904, + "logps/rejected": -1.0137187242507935, + "loss": 3.0823, + "nll_loss": 0.7694215178489685, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00274871289730072, + "rewards/margins": 0.09862315654754639, + "rewards/rejected": -0.1013718843460083, + "step": 3126 + }, + { + "epoch": 2.1625172890733055, + "grad_norm": 5.675347328186035, + "learning_rate": 4.354157061625941e-05, + "log_odds_chosen": 6.896705627441406, + "log_odds_ratio": -0.012512540444731712, + "logits/chosen": -0.49224478006362915, + "logits/rejected": -0.6597970128059387, + "logps/chosen": -0.014489945955574512, + "logps/rejected": -1.1900012493133545, + "loss": 1.9826, + "nll_loss": 0.4943990111351013, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014489946188405156, + "rewards/margins": 0.11755111813545227, + "rewards/rejected": -0.11900011450052261, + "step": 3127 + }, + { + "epoch": 2.1632088520055324, + "grad_norm": 7.413966655731201, + "learning_rate": 4.353772859996927e-05, + "log_odds_chosen": 6.049656867980957, + "log_odds_ratio": -0.06955083459615707, + "logits/chosen": -0.37585240602493286, + "logits/rejected": -0.3701566457748413, + "logps/chosen": -0.034673117101192474, + "logps/rejected": -1.2663806676864624, + "loss": 2.9125, + "nll_loss": 0.721172571182251, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00346731161698699, + "rewards/margins": 0.12317074835300446, + "rewards/rejected": -0.12663805484771729, + "step": 3128 + }, + { + "epoch": 2.163900414937759, + "grad_norm": 4.2426042556762695, + "learning_rate": 4.3533886583679116e-05, + "log_odds_chosen": 6.2769880294799805, + "log_odds_ratio": -0.04037241265177727, + "logits/chosen": -0.38149911165237427, + "logits/rejected": -0.46072056889533997, + "logps/chosen": -0.030505575239658356, + "logps/rejected": -1.0659232139587402, + "loss": 2.2097, + "nll_loss": 0.5483903884887695, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003050557803362608, + "rewards/margins": 0.10354176163673401, + "rewards/rejected": -0.1065923273563385, + "step": 3129 + }, + { + "epoch": 2.164591977869986, + "grad_norm": 5.698359489440918, + "learning_rate": 4.353004456738897e-05, + "log_odds_chosen": 6.9791436195373535, + "log_odds_ratio": -0.0023404499515891075, + "logits/chosen": -0.4339294731616974, + "logits/rejected": -0.439035564661026, + "logps/chosen": -0.007990580052137375, + "logps/rejected": -1.0161094665527344, + "loss": 2.305, + "nll_loss": 0.5760195255279541, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007990580052137375, + "rewards/margins": 0.1008118987083435, + "rewards/rejected": -0.1016109511256218, + "step": 3130 + }, + { + "epoch": 2.165283540802213, + "grad_norm": 8.642672538757324, + "learning_rate": 4.352620255109882e-05, + "log_odds_chosen": 7.645012378692627, + "log_odds_ratio": -0.0010081573855131865, + "logits/chosen": -0.40549156069755554, + "logits/rejected": -0.5037175416946411, + "logps/chosen": -0.002098255092278123, + "logps/rejected": -1.2280879020690918, + "loss": 2.5388, + "nll_loss": 0.6345977187156677, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00020982550631742924, + "rewards/margins": 0.12259896099567413, + "rewards/rejected": -0.1228087916970253, + "step": 3131 + }, + { + "epoch": 2.1659751037344397, + "grad_norm": 8.14933967590332, + "learning_rate": 4.3522360534808674e-05, + "log_odds_chosen": 8.192554473876953, + "log_odds_ratio": -0.0013432309497147799, + "logits/chosen": -0.6731882691383362, + "logits/rejected": -0.6550188064575195, + "logps/chosen": -0.00395369715988636, + "logps/rejected": -1.4148451089859009, + "loss": 1.9726, + "nll_loss": 0.49301496148109436, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000395369715988636, + "rewards/margins": 0.14108915627002716, + "rewards/rejected": -0.14148451387882233, + "step": 3132 + }, + { + "epoch": 2.1666666666666665, + "grad_norm": 11.413957595825195, + "learning_rate": 4.351851851851852e-05, + "log_odds_chosen": 5.338525772094727, + "log_odds_ratio": -0.5185012221336365, + "logits/chosen": -0.24774277210235596, + "logits/rejected": -0.31962743401527405, + "logps/chosen": -0.05943586304783821, + "logps/rejected": -1.091323971748352, + "loss": 2.7867, + "nll_loss": 0.6448211669921875, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005943586118519306, + "rewards/margins": 0.10318881273269653, + "rewards/rejected": -0.10913239419460297, + "step": 3133 + }, + { + "epoch": 2.1673582295988933, + "grad_norm": 7.512145519256592, + "learning_rate": 4.351467650222837e-05, + "log_odds_chosen": 6.613034725189209, + "log_odds_ratio": -0.16497647762298584, + "logits/chosen": -0.5425326824188232, + "logits/rejected": -0.55716472864151, + "logps/chosen": -0.03287721425294876, + "logps/rejected": -1.1185500621795654, + "loss": 2.4417, + "nll_loss": 0.5939371585845947, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0032877211924642324, + "rewards/margins": 0.10856729000806808, + "rewards/rejected": -0.11185500025749207, + "step": 3134 + }, + { + "epoch": 2.16804979253112, + "grad_norm": 9.989569664001465, + "learning_rate": 4.3510834485938224e-05, + "log_odds_chosen": 5.303834915161133, + "log_odds_ratio": -0.14418858289718628, + "logits/chosen": -0.38814419507980347, + "logits/rejected": -0.42610323429107666, + "logps/chosen": -0.06320082396268845, + "logps/rejected": -1.138200283050537, + "loss": 3.2482, + "nll_loss": 0.7976294159889221, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006320081651210785, + "rewards/margins": 0.10749995708465576, + "rewards/rejected": -0.11382003128528595, + "step": 3135 + }, + { + "epoch": 2.168741355463347, + "grad_norm": 14.738361358642578, + "learning_rate": 4.350699246964807e-05, + "log_odds_chosen": 5.855432987213135, + "log_odds_ratio": -0.2356831133365631, + "logits/chosen": -0.7010008692741394, + "logits/rejected": -0.7187799215316772, + "logps/chosen": -0.030093370005488396, + "logps/rejected": -1.0057425498962402, + "loss": 3.5095, + "nll_loss": 0.853812575340271, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0030093372333794832, + "rewards/margins": 0.0975649282336235, + "rewards/rejected": -0.10057426989078522, + "step": 3136 + }, + { + "epoch": 2.169432918395574, + "grad_norm": 8.910038948059082, + "learning_rate": 4.350315045335793e-05, + "log_odds_chosen": 7.2788405418396, + "log_odds_ratio": -0.002646538894623518, + "logits/chosen": -0.22019320726394653, + "logits/rejected": -0.28553158044815063, + "logps/chosen": -0.0055422307923436165, + "logps/rejected": -1.1429450511932373, + "loss": 2.859, + "nll_loss": 0.7144944667816162, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005542230792343616, + "rewards/margins": 0.11374028027057648, + "rewards/rejected": -0.11429451406002045, + "step": 3137 + }, + { + "epoch": 2.1701244813278007, + "grad_norm": 8.61752700805664, + "learning_rate": 4.3499308437067775e-05, + "log_odds_chosen": 7.681922912597656, + "log_odds_ratio": -0.003510521724820137, + "logits/chosen": -0.4615509510040283, + "logits/rejected": -0.5451355576515198, + "logps/chosen": -0.009908279404044151, + "logps/rejected": -1.4224928617477417, + "loss": 2.6812, + "nll_loss": 0.6699454188346863, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009908280335366726, + "rewards/margins": 0.14125844836235046, + "rewards/rejected": -0.14224928617477417, + "step": 3138 + }, + { + "epoch": 2.1708160442600275, + "grad_norm": 5.192074775695801, + "learning_rate": 4.349546642077763e-05, + "log_odds_chosen": 8.567987442016602, + "log_odds_ratio": -0.005496464669704437, + "logits/chosen": -0.4235052466392517, + "logits/rejected": -0.49524080753326416, + "logps/chosen": -0.019714895635843277, + "logps/rejected": -1.7913901805877686, + "loss": 2.0488, + "nll_loss": 0.5116626024246216, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019714897498488426, + "rewards/margins": 0.17716753482818604, + "rewards/rejected": -0.17913901805877686, + "step": 3139 + }, + { + "epoch": 2.1715076071922543, + "grad_norm": 8.280911445617676, + "learning_rate": 4.349162440448748e-05, + "log_odds_chosen": 5.522249221801758, + "log_odds_ratio": -0.041618652641773224, + "logits/chosen": -0.22748862206935883, + "logits/rejected": -0.18105798959732056, + "logps/chosen": -0.028005346655845642, + "logps/rejected": -1.168099045753479, + "loss": 2.8935, + "nll_loss": 0.7192178964614868, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0028005349449813366, + "rewards/margins": 0.11400936543941498, + "rewards/rejected": -0.1168099045753479, + "step": 3140 + }, + { + "epoch": 2.172199170124481, + "grad_norm": 9.586698532104492, + "learning_rate": 4.348778238819733e-05, + "log_odds_chosen": 5.8105058670043945, + "log_odds_ratio": -0.25834015011787415, + "logits/chosen": -0.427727073431015, + "logits/rejected": -0.4316749572753906, + "logps/chosen": -0.05228612199425697, + "logps/rejected": -1.0495209693908691, + "loss": 1.6904, + "nll_loss": 0.39677131175994873, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00522861210629344, + "rewards/margins": 0.09972350299358368, + "rewards/rejected": -0.10495211184024811, + "step": 3141 + }, + { + "epoch": 2.172890733056708, + "grad_norm": 9.963493347167969, + "learning_rate": 4.348394037190718e-05, + "log_odds_chosen": 5.8443074226379395, + "log_odds_ratio": -0.05553425848484039, + "logits/chosen": -0.5284481048583984, + "logits/rejected": -0.5299968719482422, + "logps/chosen": -0.021340377628803253, + "logps/rejected": -0.9841907024383545, + "loss": 3.4941, + "nll_loss": 0.8679712414741516, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021340379025787115, + "rewards/margins": 0.09628503769636154, + "rewards/rejected": -0.09841907024383545, + "step": 3142 + }, + { + "epoch": 2.173582295988935, + "grad_norm": 7.235234260559082, + "learning_rate": 4.348009835561703e-05, + "log_odds_chosen": 5.568575382232666, + "log_odds_ratio": -0.010355843231081963, + "logits/chosen": -0.43284177780151367, + "logits/rejected": -0.4781281650066376, + "logps/chosen": -0.014164619147777557, + "logps/rejected": -0.929655909538269, + "loss": 1.9955, + "nll_loss": 0.49784544110298157, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014164620079100132, + "rewards/margins": 0.09154912829399109, + "rewards/rejected": -0.09296559542417526, + "step": 3143 + }, + { + "epoch": 2.1742738589211617, + "grad_norm": 6.723057746887207, + "learning_rate": 4.347625633932688e-05, + "log_odds_chosen": 6.059225559234619, + "log_odds_ratio": -0.050255246460437775, + "logits/chosen": -0.4582567811012268, + "logits/rejected": -0.539722204208374, + "logps/chosen": -0.031468067318201065, + "logps/rejected": -1.1873829364776611, + "loss": 2.8291, + "nll_loss": 0.7022486925125122, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0031468067318201065, + "rewards/margins": 0.11559149622917175, + "rewards/rejected": -0.11873829364776611, + "step": 3144 + }, + { + "epoch": 2.1749654218533885, + "grad_norm": 4.7158203125, + "learning_rate": 4.347241432303673e-05, + "log_odds_chosen": 4.731280326843262, + "log_odds_ratio": -0.03173203766345978, + "logits/chosen": -0.48660892248153687, + "logits/rejected": -0.515285074710846, + "logps/chosen": -0.056296419352293015, + "logps/rejected": -0.9932827949523926, + "loss": 2.2232, + "nll_loss": 0.5526218414306641, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0056296419352293015, + "rewards/margins": 0.093698650598526, + "rewards/rejected": -0.09932827949523926, + "step": 3145 + }, + { + "epoch": 2.1756569847856153, + "grad_norm": 5.949375629425049, + "learning_rate": 4.346857230674658e-05, + "log_odds_chosen": 6.329002380371094, + "log_odds_ratio": -0.013269197195768356, + "logits/chosen": -0.4359148442745209, + "logits/rejected": -0.42959028482437134, + "logps/chosen": -0.028586188331246376, + "logps/rejected": -1.123631238937378, + "loss": 3.5716, + "nll_loss": 0.8915849924087524, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002858618972823024, + "rewards/margins": 0.10950449109077454, + "rewards/rejected": -0.11236311495304108, + "step": 3146 + }, + { + "epoch": 2.176348547717842, + "grad_norm": 7.0173845291137695, + "learning_rate": 4.346473029045643e-05, + "log_odds_chosen": 7.977410316467285, + "log_odds_ratio": -0.00586349843069911, + "logits/chosen": 0.18461117148399353, + "logits/rejected": 0.1510668843984604, + "logps/chosen": -0.0582503117620945, + "logps/rejected": -2.1898932456970215, + "loss": 2.341, + "nll_loss": 0.5846550464630127, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00582503154873848, + "rewards/margins": 0.2131642997264862, + "rewards/rejected": -0.2189893275499344, + "step": 3147 + }, + { + "epoch": 2.177040110650069, + "grad_norm": 7.535158157348633, + "learning_rate": 4.3460888274166286e-05, + "log_odds_chosen": 6.55058479309082, + "log_odds_ratio": -0.009487954899668694, + "logits/chosen": -0.4937528967857361, + "logits/rejected": -0.5068577527999878, + "logps/chosen": -0.03335797041654587, + "logps/rejected": -1.729652762413025, + "loss": 2.6839, + "nll_loss": 0.670030415058136, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0033357972279191017, + "rewards/margins": 0.16962948441505432, + "rewards/rejected": -0.17296528816223145, + "step": 3148 + }, + { + "epoch": 2.177731673582296, + "grad_norm": 4.840470790863037, + "learning_rate": 4.345704625787613e-05, + "log_odds_chosen": 5.8305511474609375, + "log_odds_ratio": -0.0832158774137497, + "logits/chosen": -0.1915377974510193, + "logits/rejected": -0.14588585495948792, + "logps/chosen": -0.017783522605895996, + "logps/rejected": -0.8130568265914917, + "loss": 1.8575, + "nll_loss": 0.4560588598251343, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017783523071557283, + "rewards/margins": 0.07952733337879181, + "rewards/rejected": -0.08130568265914917, + "step": 3149 + }, + { + "epoch": 2.1784232365145226, + "grad_norm": 6.872931003570557, + "learning_rate": 4.345320424158599e-05, + "log_odds_chosen": 4.3958892822265625, + "log_odds_ratio": -0.44033169746398926, + "logits/chosen": -0.5839468240737915, + "logits/rejected": -0.6039784550666809, + "logps/chosen": -0.09881820529699326, + "logps/rejected": -0.9070853590965271, + "loss": 2.3977, + "nll_loss": 0.555400550365448, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009881820529699326, + "rewards/margins": 0.08082671463489532, + "rewards/rejected": -0.09070853888988495, + "step": 3150 + }, + { + "epoch": 2.1791147994467495, + "grad_norm": 7.127779006958008, + "learning_rate": 4.3449362225295836e-05, + "log_odds_chosen": 5.093412399291992, + "log_odds_ratio": -0.0317099504172802, + "logits/chosen": -0.38323113322257996, + "logits/rejected": -0.4130544662475586, + "logps/chosen": -0.028745543211698532, + "logps/rejected": -1.1016664505004883, + "loss": 2.6644, + "nll_loss": 0.6629339456558228, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0028745546005666256, + "rewards/margins": 0.10729208588600159, + "rewards/rejected": -0.11016664654016495, + "step": 3151 + }, + { + "epoch": 2.1798063623789763, + "grad_norm": 10.824358940124512, + "learning_rate": 4.344552020900569e-05, + "log_odds_chosen": 5.998074531555176, + "log_odds_ratio": -0.14802958071231842, + "logits/chosen": -0.4905872344970703, + "logits/rejected": -0.5300999283790588, + "logps/chosen": -0.046079330146312714, + "logps/rejected": -1.1805709600448608, + "loss": 3.2875, + "nll_loss": 0.8070785999298096, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004607933573424816, + "rewards/margins": 0.11344917863607407, + "rewards/rejected": -0.11805711686611176, + "step": 3152 + }, + { + "epoch": 2.180497925311203, + "grad_norm": 6.493251323699951, + "learning_rate": 4.344167819271554e-05, + "log_odds_chosen": 7.639892101287842, + "log_odds_ratio": -0.05055145174264908, + "logits/chosen": -0.6294256448745728, + "logits/rejected": -0.7311701774597168, + "logps/chosen": -0.01981090009212494, + "logps/rejected": -1.5551573038101196, + "loss": 2.365, + "nll_loss": 0.5861850380897522, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019810900557786226, + "rewards/margins": 0.1535346359014511, + "rewards/rejected": -0.15551573038101196, + "step": 3153 + }, + { + "epoch": 2.18118948824343, + "grad_norm": 8.375, + "learning_rate": 4.343783617642539e-05, + "log_odds_chosen": 6.690349578857422, + "log_odds_ratio": -0.008571420796215534, + "logits/chosen": -0.6027698516845703, + "logits/rejected": -0.6378378868103027, + "logps/chosen": -0.005035060923546553, + "logps/rejected": -0.8536246418952942, + "loss": 2.751, + "nll_loss": 0.6869030594825745, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005035061039961874, + "rewards/margins": 0.0848589539527893, + "rewards/rejected": -0.08536246418952942, + "step": 3154 + }, + { + "epoch": 2.181881051175657, + "grad_norm": 5.154770851135254, + "learning_rate": 4.343399416013524e-05, + "log_odds_chosen": 5.609927177429199, + "log_odds_ratio": -0.0658092275261879, + "logits/chosen": -0.2716918885707855, + "logits/rejected": -0.33563169836997986, + "logps/chosen": -0.027060627937316895, + "logps/rejected": -0.7668888568878174, + "loss": 2.1289, + "nll_loss": 0.5256554484367371, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0027060629799962044, + "rewards/margins": 0.07398282736539841, + "rewards/rejected": -0.07668889313936234, + "step": 3155 + }, + { + "epoch": 2.1825726141078836, + "grad_norm": 9.937250137329102, + "learning_rate": 4.343015214384509e-05, + "log_odds_chosen": 7.801394462585449, + "log_odds_ratio": -0.004217131529003382, + "logits/chosen": -0.5749065279960632, + "logits/rejected": -0.6880556344985962, + "logps/chosen": -0.005276396404951811, + "logps/rejected": -1.3525454998016357, + "loss": 3.1324, + "nll_loss": 0.7826860547065735, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005276396404951811, + "rewards/margins": 0.13472691178321838, + "rewards/rejected": -0.13525454699993134, + "step": 3156 + }, + { + "epoch": 2.1832641770401104, + "grad_norm": 5.070712566375732, + "learning_rate": 4.3426310127554944e-05, + "log_odds_chosen": 6.111451625823975, + "log_odds_ratio": -0.1284855157136917, + "logits/chosen": -0.7218311429023743, + "logits/rejected": -0.7740025520324707, + "logps/chosen": -0.03459743782877922, + "logps/rejected": -1.094872236251831, + "loss": 2.0958, + "nll_loss": 0.5111120939254761, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0034597436897456646, + "rewards/margins": 0.10602748394012451, + "rewards/rejected": -0.10948723554611206, + "step": 3157 + }, + { + "epoch": 2.1839557399723377, + "grad_norm": 10.03350830078125, + "learning_rate": 4.342246811126479e-05, + "log_odds_chosen": 5.510814189910889, + "log_odds_ratio": -0.17792275547981262, + "logits/chosen": -0.505163311958313, + "logits/rejected": -0.5847234129905701, + "logps/chosen": -0.040842242538928986, + "logps/rejected": -1.2754498720169067, + "loss": 3.4138, + "nll_loss": 0.8356497883796692, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004084224347025156, + "rewards/margins": 0.12346076965332031, + "rewards/rejected": -0.12754499912261963, + "step": 3158 + }, + { + "epoch": 2.1846473029045645, + "grad_norm": 4.863821983337402, + "learning_rate": 4.341862609497465e-05, + "log_odds_chosen": 6.666403293609619, + "log_odds_ratio": -0.012772869318723679, + "logits/chosen": -0.48330166935920715, + "logits/rejected": -0.5371727347373962, + "logps/chosen": -0.033257968723773956, + "logps/rejected": -1.3910984992980957, + "loss": 1.819, + "nll_loss": 0.4534626603126526, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0033257966861128807, + "rewards/margins": 0.1357840597629547, + "rewards/rejected": -0.13910984992980957, + "step": 3159 + }, + { + "epoch": 2.1853388658367914, + "grad_norm": 6.057682037353516, + "learning_rate": 4.3414784078684495e-05, + "log_odds_chosen": 6.663447380065918, + "log_odds_ratio": -0.05722741037607193, + "logits/chosen": -0.7273718118667603, + "logits/rejected": -0.7586309313774109, + "logps/chosen": -0.03430384770035744, + "logps/rejected": -1.0461488962173462, + "loss": 2.3148, + "nll_loss": 0.5729818940162659, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0034303846769034863, + "rewards/margins": 0.10118450969457626, + "rewards/rejected": -0.10461489111185074, + "step": 3160 + }, + { + "epoch": 2.186030428769018, + "grad_norm": 11.682771682739258, + "learning_rate": 4.341094206239435e-05, + "log_odds_chosen": 8.383731842041016, + "log_odds_ratio": -0.0027355810161679983, + "logits/chosen": -0.29401373863220215, + "logits/rejected": -0.40509486198425293, + "logps/chosen": -0.0017849082360044122, + "logps/rejected": -1.3605860471725464, + "loss": 2.7221, + "nll_loss": 0.6802555322647095, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017849083815235645, + "rewards/margins": 0.13588011264801025, + "rewards/rejected": -0.13605859875679016, + "step": 3161 + }, + { + "epoch": 2.186721991701245, + "grad_norm": 11.995657920837402, + "learning_rate": 4.34071000461042e-05, + "log_odds_chosen": 5.557642936706543, + "log_odds_ratio": -0.14004188776016235, + "logits/chosen": -0.2498472034931183, + "logits/rejected": -0.259319543838501, + "logps/chosen": -0.07155990600585938, + "logps/rejected": -1.4202849864959717, + "loss": 2.6449, + "nll_loss": 0.6472086906433105, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007155990693718195, + "rewards/margins": 0.13487249612808228, + "rewards/rejected": -0.14202848076820374, + "step": 3162 + }, + { + "epoch": 2.187413554633472, + "grad_norm": 9.4674654006958, + "learning_rate": 4.3403258029814045e-05, + "log_odds_chosen": 6.2325825691223145, + "log_odds_ratio": -0.1332755982875824, + "logits/chosen": -0.5602701306343079, + "logits/rejected": -0.6629458665847778, + "logps/chosen": -0.03466428816318512, + "logps/rejected": -1.2542963027954102, + "loss": 1.4881, + "nll_loss": 0.35869354009628296, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0034664287231862545, + "rewards/margins": 0.12196320295333862, + "rewards/rejected": -0.12542963027954102, + "step": 3163 + }, + { + "epoch": 2.1881051175656987, + "grad_norm": 10.65891170501709, + "learning_rate": 4.33994160135239e-05, + "log_odds_chosen": 7.232670783996582, + "log_odds_ratio": -0.021584775298833847, + "logits/chosen": -0.6576371788978577, + "logits/rejected": -0.6923035979270935, + "logps/chosen": -0.01624915562570095, + "logps/rejected": -1.8470706939697266, + "loss": 3.5342, + "nll_loss": 0.8813962936401367, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016249155160039663, + "rewards/margins": 0.18308216333389282, + "rewards/rejected": -0.18470707535743713, + "step": 3164 + }, + { + "epoch": 2.1887966804979255, + "grad_norm": 4.825099945068359, + "learning_rate": 4.339557399723375e-05, + "log_odds_chosen": 5.801044940948486, + "log_odds_ratio": -0.16033858060836792, + "logits/chosen": -0.3030264377593994, + "logits/rejected": -0.3055965304374695, + "logps/chosen": -0.05437358096241951, + "logps/rejected": -1.6448538303375244, + "loss": 3.0495, + "nll_loss": 0.7463371753692627, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005437357816845179, + "rewards/margins": 0.15904802083969116, + "rewards/rejected": -0.1644853949546814, + "step": 3165 + }, + { + "epoch": 2.1894882434301524, + "grad_norm": 6.770928382873535, + "learning_rate": 4.33917319809436e-05, + "log_odds_chosen": 6.155390739440918, + "log_odds_ratio": -0.030024850741028786, + "logits/chosen": -0.48152872920036316, + "logits/rejected": -0.4823768436908722, + "logps/chosen": -0.020001396536827087, + "logps/rejected": -0.9744084477424622, + "loss": 2.3497, + "nll_loss": 0.5844292640686035, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002000139793381095, + "rewards/margins": 0.09544070065021515, + "rewards/rejected": -0.09744083881378174, + "step": 3166 + }, + { + "epoch": 2.190179806362379, + "grad_norm": 7.0886640548706055, + "learning_rate": 4.338788996465345e-05, + "log_odds_chosen": 6.047344207763672, + "log_odds_ratio": -0.231770858168602, + "logits/chosen": -0.5151282548904419, + "logits/rejected": -0.5332531929016113, + "logps/chosen": -0.06000122055411339, + "logps/rejected": -0.7603382468223572, + "loss": 2.0776, + "nll_loss": 0.4962257444858551, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006000122055411339, + "rewards/margins": 0.07003369927406311, + "rewards/rejected": -0.0760338231921196, + "step": 3167 + }, + { + "epoch": 2.190871369294606, + "grad_norm": 6.543388366699219, + "learning_rate": 4.338404794836331e-05, + "log_odds_chosen": 7.2738542556762695, + "log_odds_ratio": -0.04685303941369057, + "logits/chosen": -0.5018479228019714, + "logits/rejected": -0.5226489305496216, + "logps/chosen": -0.02508268505334854, + "logps/rejected": -1.4995683431625366, + "loss": 2.4453, + "nll_loss": 0.6066345572471619, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0025082684587687254, + "rewards/margins": 0.1474485844373703, + "rewards/rejected": -0.1499568521976471, + "step": 3168 + }, + { + "epoch": 2.191562932226833, + "grad_norm": 11.554959297180176, + "learning_rate": 4.338020593207315e-05, + "log_odds_chosen": 7.814593315124512, + "log_odds_ratio": -0.0014177625998854637, + "logits/chosen": -0.47919517755508423, + "logits/rejected": -0.5403696894645691, + "logps/chosen": -0.004644293338060379, + "logps/rejected": -1.5748608112335205, + "loss": 3.3544, + "nll_loss": 0.8384552001953125, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004644293221645057, + "rewards/margins": 0.1570216715335846, + "rewards/rejected": -0.15748609602451324, + "step": 3169 + }, + { + "epoch": 2.1922544951590597, + "grad_norm": 5.401383876800537, + "learning_rate": 4.3376363915783006e-05, + "log_odds_chosen": 4.879332542419434, + "log_odds_ratio": -0.09349296241998672, + "logits/chosen": -0.6353992819786072, + "logits/rejected": -0.6436095833778381, + "logps/chosen": -0.03518253192305565, + "logps/rejected": -0.8030743598937988, + "loss": 2.365, + "nll_loss": 0.5818997621536255, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003518253332003951, + "rewards/margins": 0.07678918540477753, + "rewards/rejected": -0.08030743151903152, + "step": 3170 + }, + { + "epoch": 2.1929460580912865, + "grad_norm": 14.059950828552246, + "learning_rate": 4.337252189949286e-05, + "log_odds_chosen": 6.713525295257568, + "log_odds_ratio": -0.08972524106502533, + "logits/chosen": -0.9425017833709717, + "logits/rejected": -0.9960139393806458, + "logps/chosen": -0.03543572127819061, + "logps/rejected": -1.2302966117858887, + "loss": 3.1483, + "nll_loss": 0.7781030535697937, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003543572500348091, + "rewards/margins": 0.11948608607053757, + "rewards/rejected": -0.1230296641588211, + "step": 3171 + }, + { + "epoch": 2.1936376210235133, + "grad_norm": 8.6504545211792, + "learning_rate": 4.3368679883202704e-05, + "log_odds_chosen": 7.600931167602539, + "log_odds_ratio": -0.0028964250814169645, + "logits/chosen": -0.5214453935623169, + "logits/rejected": -0.6939715147018433, + "logps/chosen": -0.014445780776441097, + "logps/rejected": -1.601884126663208, + "loss": 2.5539, + "nll_loss": 0.63817298412323, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014445781707763672, + "rewards/margins": 0.15874382853507996, + "rewards/rejected": -0.16018840670585632, + "step": 3172 + }, + { + "epoch": 2.19432918395574, + "grad_norm": 11.834299087524414, + "learning_rate": 4.3364837866912556e-05, + "log_odds_chosen": 7.545048713684082, + "log_odds_ratio": -0.004208111669868231, + "logits/chosen": -0.34441643953323364, + "logits/rejected": -0.4657328128814697, + "logps/chosen": -0.010565445758402348, + "logps/rejected": -1.5098049640655518, + "loss": 2.9481, + "nll_loss": 0.7365975379943848, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010565445991232991, + "rewards/margins": 0.14992395043373108, + "rewards/rejected": -0.15098050236701965, + "step": 3173 + }, + { + "epoch": 2.195020746887967, + "grad_norm": 5.100581169128418, + "learning_rate": 4.336099585062241e-05, + "log_odds_chosen": 6.659306526184082, + "log_odds_ratio": -0.045205675065517426, + "logits/chosen": -0.47770747542381287, + "logits/rejected": -0.43313068151474, + "logps/chosen": -0.015689756721258163, + "logps/rejected": -0.8002600073814392, + "loss": 1.6891, + "nll_loss": 0.41774749755859375, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015689757419750094, + "rewards/margins": 0.07845702022314072, + "rewards/rejected": -0.08002599328756332, + "step": 3174 + }, + { + "epoch": 2.195712309820194, + "grad_norm": 5.566988468170166, + "learning_rate": 4.335715383433226e-05, + "log_odds_chosen": 7.460770130157471, + "log_odds_ratio": -0.07459472864866257, + "logits/chosen": -0.44164443016052246, + "logits/rejected": -0.47057870030403137, + "logps/chosen": -0.02058670111000538, + "logps/rejected": -1.0957387685775757, + "loss": 2.013, + "nll_loss": 0.495780348777771, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020586701575666666, + "rewards/margins": 0.10751520097255707, + "rewards/rejected": -0.10957387089729309, + "step": 3175 + }, + { + "epoch": 2.1964038727524207, + "grad_norm": 8.06294059753418, + "learning_rate": 4.335331181804211e-05, + "log_odds_chosen": 7.625150203704834, + "log_odds_ratio": -0.005710828583687544, + "logits/chosen": -0.643086314201355, + "logits/rejected": -0.6431131958961487, + "logps/chosen": -0.016301624476909637, + "logps/rejected": -1.1473497152328491, + "loss": 2.9075, + "nll_loss": 0.7262943983078003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016301622381433845, + "rewards/margins": 0.11310481280088425, + "rewards/rejected": -0.11473497003316879, + "step": 3176 + }, + { + "epoch": 2.1970954356846475, + "grad_norm": 7.399056434631348, + "learning_rate": 4.3349469801751966e-05, + "log_odds_chosen": 6.73783540725708, + "log_odds_ratio": -0.18007498979568481, + "logits/chosen": -0.7180381417274475, + "logits/rejected": -0.7221200466156006, + "logps/chosen": -0.028597483411431313, + "logps/rejected": -0.8597887754440308, + "loss": 2.8968, + "nll_loss": 0.7061822414398193, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002859748201444745, + "rewards/margins": 0.08311913162469864, + "rewards/rejected": -0.08597888052463531, + "step": 3177 + }, + { + "epoch": 2.1977869986168743, + "grad_norm": 12.78672981262207, + "learning_rate": 4.334562778546181e-05, + "log_odds_chosen": 8.832401275634766, + "log_odds_ratio": -0.00019618018995970488, + "logits/chosen": -0.592092752456665, + "logits/rejected": -0.6911430954933167, + "logps/chosen": -0.0007649950566701591, + "logps/rejected": -1.6313860416412354, + "loss": 2.6642, + "nll_loss": 0.6660318970680237, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.649950566701591e-05, + "rewards/margins": 0.16306212544441223, + "rewards/rejected": -0.16313862800598145, + "step": 3178 + }, + { + "epoch": 2.198478561549101, + "grad_norm": 9.519674301147461, + "learning_rate": 4.3341785769171664e-05, + "log_odds_chosen": 8.840486526489258, + "log_odds_ratio": -0.0005940008559264243, + "logits/chosen": -0.784791886806488, + "logits/rejected": -0.7828980684280396, + "logps/chosen": -0.0008778494084253907, + "logps/rejected": -1.399017572402954, + "loss": 2.7933, + "nll_loss": 0.698258101940155, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.778493793215603e-05, + "rewards/margins": 0.13981398940086365, + "rewards/rejected": -0.1399017721414566, + "step": 3179 + }, + { + "epoch": 2.199170124481328, + "grad_norm": 7.758335113525391, + "learning_rate": 4.3337943752881517e-05, + "log_odds_chosen": 8.568967819213867, + "log_odds_ratio": -0.0028527555987238884, + "logits/chosen": -0.9076350927352905, + "logits/rejected": -0.968063473701477, + "logps/chosen": -0.005145165137946606, + "logps/rejected": -1.6919078826904297, + "loss": 2.2237, + "nll_loss": 0.5556411147117615, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005145165487192571, + "rewards/margins": 0.16867627203464508, + "rewards/rejected": -0.16919079422950745, + "step": 3180 + }, + { + "epoch": 2.199861687413555, + "grad_norm": 6.658428192138672, + "learning_rate": 4.333410173659136e-05, + "log_odds_chosen": 6.849039077758789, + "log_odds_ratio": -0.1127195730805397, + "logits/chosen": -0.6284754276275635, + "logits/rejected": -0.653616726398468, + "logps/chosen": -0.02700984664261341, + "logps/rejected": -1.5343716144561768, + "loss": 2.2461, + "nll_loss": 0.5502545833587646, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00270098471082747, + "rewards/margins": 0.15073618292808533, + "rewards/rejected": -0.15343716740608215, + "step": 3181 + }, + { + "epoch": 2.2005532503457816, + "grad_norm": 6.70884895324707, + "learning_rate": 4.3330259720301215e-05, + "log_odds_chosen": 7.267721652984619, + "log_odds_ratio": -0.00911356508731842, + "logits/chosen": -0.41547131538391113, + "logits/rejected": -0.4822637736797333, + "logps/chosen": -0.02417285554111004, + "logps/rejected": -1.272136926651001, + "loss": 2.368, + "nll_loss": 0.5911010503768921, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024172854609787464, + "rewards/margins": 0.12479639798402786, + "rewards/rejected": -0.12721368670463562, + "step": 3182 + }, + { + "epoch": 2.2012448132780085, + "grad_norm": 7.333751201629639, + "learning_rate": 4.332641770401107e-05, + "log_odds_chosen": 5.247653007507324, + "log_odds_ratio": -0.06348458677530289, + "logits/chosen": -0.7271361351013184, + "logits/rejected": -0.7556310892105103, + "logps/chosen": -0.17119011282920837, + "logps/rejected": -1.9269461631774902, + "loss": 2.6175, + "nll_loss": 0.6480197310447693, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017119012773036957, + "rewards/margins": 0.1755756139755249, + "rewards/rejected": -0.19269460439682007, + "step": 3183 + }, + { + "epoch": 2.2019363762102353, + "grad_norm": 6.730661392211914, + "learning_rate": 4.332257568772092e-05, + "log_odds_chosen": 7.7098894119262695, + "log_odds_ratio": -0.17441943287849426, + "logits/chosen": -0.4983978867530823, + "logits/rejected": -0.5662646293640137, + "logps/chosen": -0.024879546836018562, + "logps/rejected": -0.975651741027832, + "loss": 1.9166, + "nll_loss": 0.46170371770858765, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0024879546836018562, + "rewards/margins": 0.09507721662521362, + "rewards/rejected": -0.0975651741027832, + "step": 3184 + }, + { + "epoch": 2.202627939142462, + "grad_norm": 8.689640045166016, + "learning_rate": 4.3318733671430765e-05, + "log_odds_chosen": 5.190764904022217, + "log_odds_ratio": -0.04406759887933731, + "logits/chosen": -0.7414398193359375, + "logits/rejected": -0.7421854734420776, + "logps/chosen": -0.07408274710178375, + "logps/rejected": -1.9030416011810303, + "loss": 2.7025, + "nll_loss": 0.6712265014648438, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007408274337649345, + "rewards/margins": 0.18289589881896973, + "rewards/rejected": -0.19030416011810303, + "step": 3185 + }, + { + "epoch": 2.203319502074689, + "grad_norm": 10.43053150177002, + "learning_rate": 4.3314891655140624e-05, + "log_odds_chosen": 8.137721061706543, + "log_odds_ratio": -0.007064457517117262, + "logits/chosen": -0.40223050117492676, + "logits/rejected": -0.49093905091285706, + "logps/chosen": -0.001537282601930201, + "logps/rejected": -1.422028660774231, + "loss": 2.1063, + "nll_loss": 0.5258598327636719, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015372825146187097, + "rewards/margins": 0.1420491486787796, + "rewards/rejected": -0.14220286905765533, + "step": 3186 + }, + { + "epoch": 2.204011065006916, + "grad_norm": 9.425034523010254, + "learning_rate": 4.331104963885047e-05, + "log_odds_chosen": 9.267492294311523, + "log_odds_ratio": -0.002203315496444702, + "logits/chosen": -0.7236309051513672, + "logits/rejected": -0.8363832235336304, + "logps/chosen": -0.008194814436137676, + "logps/rejected": -1.8639302253723145, + "loss": 2.013, + "nll_loss": 0.5030335187911987, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008194815600290895, + "rewards/margins": 0.1855735182762146, + "rewards/rejected": -0.18639302253723145, + "step": 3187 + }, + { + "epoch": 2.2047026279391426, + "grad_norm": 6.292108058929443, + "learning_rate": 4.330720762256032e-05, + "log_odds_chosen": 5.8964643478393555, + "log_odds_ratio": -0.04489860683679581, + "logits/chosen": -0.4235934019088745, + "logits/rejected": -0.48958835005760193, + "logps/chosen": -0.018647415563464165, + "logps/rejected": -1.075378179550171, + "loss": 2.4478, + "nll_loss": 0.6074610948562622, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018647415563464165, + "rewards/margins": 0.10567308962345123, + "rewards/rejected": -0.10753783583641052, + "step": 3188 + }, + { + "epoch": 2.2053941908713695, + "grad_norm": 8.567946434020996, + "learning_rate": 4.3303365606270175e-05, + "log_odds_chosen": 8.469558715820312, + "log_odds_ratio": -0.0012380550615489483, + "logits/chosen": -0.5258292555809021, + "logits/rejected": -0.5719144344329834, + "logps/chosen": -0.004365398548543453, + "logps/rejected": -1.2858614921569824, + "loss": 2.6561, + "nll_loss": 0.6639001369476318, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004365398781374097, + "rewards/margins": 0.12814961373806, + "rewards/rejected": -0.12858614325523376, + "step": 3189 + }, + { + "epoch": 2.2060857538035963, + "grad_norm": 7.210387706756592, + "learning_rate": 4.329952358998002e-05, + "log_odds_chosen": 6.826498985290527, + "log_odds_ratio": -0.052564047276973724, + "logits/chosen": -0.3505515456199646, + "logits/rejected": -0.37268006801605225, + "logps/chosen": -0.05025108531117439, + "logps/rejected": -1.5903440713882446, + "loss": 2.7754, + "nll_loss": 0.6885868310928345, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005025108344852924, + "rewards/margins": 0.15400929749011993, + "rewards/rejected": -0.15903441607952118, + "step": 3190 + }, + { + "epoch": 2.206777316735823, + "grad_norm": 6.022358417510986, + "learning_rate": 4.329568157368987e-05, + "log_odds_chosen": 5.189307689666748, + "log_odds_ratio": -0.2198401540517807, + "logits/chosen": -0.7311754822731018, + "logits/rejected": -0.8276402354240417, + "logps/chosen": -0.051636867225170135, + "logps/rejected": -0.7299488186836243, + "loss": 3.4755, + "nll_loss": 0.8468843102455139, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005163686349987984, + "rewards/margins": 0.06783119589090347, + "rewards/rejected": -0.07299488037824631, + "step": 3191 + }, + { + "epoch": 2.20746887966805, + "grad_norm": 5.002099514007568, + "learning_rate": 4.3291839557399726e-05, + "log_odds_chosen": 5.58121395111084, + "log_odds_ratio": -0.09256982058286667, + "logits/chosen": -0.7110233306884766, + "logits/rejected": -0.6769241094589233, + "logps/chosen": -0.04848054423928261, + "logps/rejected": -0.9648277163505554, + "loss": 2.7033, + "nll_loss": 0.6665750741958618, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004848054610192776, + "rewards/margins": 0.09163472056388855, + "rewards/rejected": -0.0964827761054039, + "step": 3192 + }, + { + "epoch": 2.2081604426002768, + "grad_norm": 5.96661376953125, + "learning_rate": 4.328799754110958e-05, + "log_odds_chosen": 3.965442657470703, + "log_odds_ratio": -0.117483951151371, + "logits/chosen": -0.41260385513305664, + "logits/rejected": -0.45036524534225464, + "logps/chosen": -0.07093721628189087, + "logps/rejected": -1.108747959136963, + "loss": 1.993, + "nll_loss": 0.4865078926086426, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007093721069395542, + "rewards/margins": 0.1037810817360878, + "rewards/rejected": -0.11087480187416077, + "step": 3193 + }, + { + "epoch": 2.2088520055325036, + "grad_norm": 8.659708976745605, + "learning_rate": 4.3284155524819424e-05, + "log_odds_chosen": 7.016357421875, + "log_odds_ratio": -0.006338158156722784, + "logits/chosen": -0.5837326049804688, + "logits/rejected": -0.6578401923179626, + "logps/chosen": -0.003191157942637801, + "logps/rejected": -1.1636102199554443, + "loss": 2.2925, + "nll_loss": 0.5724791288375854, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003191157302353531, + "rewards/margins": 0.11604189872741699, + "rewards/rejected": -0.11636102199554443, + "step": 3194 + }, + { + "epoch": 2.2095435684647304, + "grad_norm": 9.927667617797852, + "learning_rate": 4.328031350852928e-05, + "log_odds_chosen": 8.560630798339844, + "log_odds_ratio": -0.0008007477736100554, + "logits/chosen": -0.42888209223747253, + "logits/rejected": -0.4656681418418884, + "logps/chosen": -0.0037196280900388956, + "logps/rejected": -1.6876860857009888, + "loss": 2.4, + "nll_loss": 0.5999287962913513, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003719627857208252, + "rewards/margins": 0.16839665174484253, + "rewards/rejected": -0.16876859962940216, + "step": 3195 + }, + { + "epoch": 2.2102351313969573, + "grad_norm": 6.38437557220459, + "learning_rate": 4.327647149223913e-05, + "log_odds_chosen": 7.007352352142334, + "log_odds_ratio": -0.009551974013447762, + "logits/chosen": -0.7193029522895813, + "logits/rejected": -0.759920597076416, + "logps/chosen": -0.03922717645764351, + "logps/rejected": -1.365323543548584, + "loss": 2.1614, + "nll_loss": 0.5394060611724854, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003922717645764351, + "rewards/margins": 0.13260963559150696, + "rewards/rejected": -0.13653233647346497, + "step": 3196 + }, + { + "epoch": 2.210926694329184, + "grad_norm": 6.613427639007568, + "learning_rate": 4.327262947594898e-05, + "log_odds_chosen": 7.841188430786133, + "log_odds_ratio": -0.0046505313366651535, + "logits/chosen": -0.6819300651550293, + "logits/rejected": -0.7347622513771057, + "logps/chosen": -0.0033948104828596115, + "logps/rejected": -1.0823031663894653, + "loss": 2.5261, + "nll_loss": 0.6310635209083557, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00033948104828596115, + "rewards/margins": 0.10789082944393158, + "rewards/rejected": -0.10823030769824982, + "step": 3197 + }, + { + "epoch": 2.211618257261411, + "grad_norm": 7.287887096405029, + "learning_rate": 4.3268787459658833e-05, + "log_odds_chosen": 8.129772186279297, + "log_odds_ratio": -0.002522420370951295, + "logits/chosen": -0.7787665724754333, + "logits/rejected": -0.8593880534172058, + "logps/chosen": -0.0031739026308059692, + "logps/rejected": -1.6081467866897583, + "loss": 1.8657, + "nll_loss": 0.4661679267883301, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00031739025143906474, + "rewards/margins": 0.1604972928762436, + "rewards/rejected": -0.16081468760967255, + "step": 3198 + }, + { + "epoch": 2.2123098201936378, + "grad_norm": 5.78539514541626, + "learning_rate": 4.326494544336868e-05, + "log_odds_chosen": 8.055619239807129, + "log_odds_ratio": -0.0013233129866421223, + "logits/chosen": -0.6449908018112183, + "logits/rejected": -0.7909893989562988, + "logps/chosen": -0.00542853306978941, + "logps/rejected": -1.635412335395813, + "loss": 1.5937, + "nll_loss": 0.39829838275909424, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005428533768281341, + "rewards/margins": 0.16299840807914734, + "rewards/rejected": -0.1635412573814392, + "step": 3199 + }, + { + "epoch": 2.2130013831258646, + "grad_norm": 8.962825775146484, + "learning_rate": 4.326110342707853e-05, + "log_odds_chosen": 6.840261459350586, + "log_odds_ratio": -0.08905114978551865, + "logits/chosen": -0.623429000377655, + "logits/rejected": -0.6256603598594666, + "logps/chosen": -0.021636225283145905, + "logps/rejected": -1.1779842376708984, + "loss": 1.8908, + "nll_loss": 0.4637908339500427, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0021636225283145905, + "rewards/margins": 0.11563479900360107, + "rewards/rejected": -0.11779841780662537, + "step": 3200 + }, + { + "epoch": 2.2136929460580914, + "grad_norm": 6.071709156036377, + "learning_rate": 4.3257261410788384e-05, + "log_odds_chosen": 6.589384078979492, + "log_odds_ratio": -0.06430207192897797, + "logits/chosen": -0.5658938884735107, + "logits/rejected": -0.5847985744476318, + "logps/chosen": -0.02275737375020981, + "logps/rejected": -1.0458507537841797, + "loss": 2.4343, + "nll_loss": 0.6021410226821899, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0022757374681532383, + "rewards/margins": 0.10230933874845505, + "rewards/rejected": -0.10458506643772125, + "step": 3201 + }, + { + "epoch": 2.2143845089903182, + "grad_norm": 9.392425537109375, + "learning_rate": 4.3253419394498236e-05, + "log_odds_chosen": 8.317364692687988, + "log_odds_ratio": -0.0019505569944158196, + "logits/chosen": -0.3586891293525696, + "logits/rejected": -0.41375863552093506, + "logps/chosen": -0.013265244662761688, + "logps/rejected": -1.7743667364120483, + "loss": 2.3781, + "nll_loss": 0.5943280458450317, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013265246525406837, + "rewards/margins": 0.1761101484298706, + "rewards/rejected": -0.1774366796016693, + "step": 3202 + }, + { + "epoch": 2.215076071922545, + "grad_norm": 11.797636032104492, + "learning_rate": 4.324957737820808e-05, + "log_odds_chosen": 7.6043806076049805, + "log_odds_ratio": -0.04728805273771286, + "logits/chosen": -0.5296475291252136, + "logits/rejected": -0.6080471277236938, + "logps/chosen": -0.010652073659002781, + "logps/rejected": -1.2598812580108643, + "loss": 2.3432, + "nll_loss": 0.5810590982437134, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010652071796357632, + "rewards/margins": 0.12492291629314423, + "rewards/rejected": -0.12598812580108643, + "step": 3203 + }, + { + "epoch": 2.215767634854772, + "grad_norm": 7.488215923309326, + "learning_rate": 4.324573536191794e-05, + "log_odds_chosen": 7.223307132720947, + "log_odds_ratio": -0.0028760689310729504, + "logits/chosen": -0.8060204982757568, + "logits/rejected": -0.8052228689193726, + "logps/chosen": -0.0104671660810709, + "logps/rejected": -1.5643423795700073, + "loss": 3.5774, + "nll_loss": 0.8940551280975342, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010467165848240256, + "rewards/margins": 0.1553875207901001, + "rewards/rejected": -0.15643423795700073, + "step": 3204 + }, + { + "epoch": 2.2164591977869987, + "grad_norm": 7.110921859741211, + "learning_rate": 4.324189334562779e-05, + "log_odds_chosen": 7.80830717086792, + "log_odds_ratio": -0.018234528601169586, + "logits/chosen": -0.8252729773521423, + "logits/rejected": -0.8693796396255493, + "logps/chosen": -0.02617500349879265, + "logps/rejected": -1.7985427379608154, + "loss": 2.2648, + "nll_loss": 0.5643655061721802, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0026175002567470074, + "rewards/margins": 0.17723676562309265, + "rewards/rejected": -0.17985425889492035, + "step": 3205 + }, + { + "epoch": 2.2171507607192256, + "grad_norm": 5.993846893310547, + "learning_rate": 4.323805132933764e-05, + "log_odds_chosen": 7.54710578918457, + "log_odds_ratio": -0.19196613132953644, + "logits/chosen": -0.11485698819160461, + "logits/rejected": -0.1784641146659851, + "logps/chosen": -0.026814231649041176, + "logps/rejected": -1.0498394966125488, + "loss": 2.0394, + "nll_loss": 0.4906499981880188, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0026814232114702463, + "rewards/margins": 0.10230252146720886, + "rewards/rejected": -0.10498394072055817, + "step": 3206 + }, + { + "epoch": 2.2178423236514524, + "grad_norm": 10.639652252197266, + "learning_rate": 4.323420931304749e-05, + "log_odds_chosen": 8.695178031921387, + "log_odds_ratio": -0.00283455359749496, + "logits/chosen": -0.5208930969238281, + "logits/rejected": -0.571614146232605, + "logps/chosen": -0.00851532258093357, + "logps/rejected": -1.6688697338104248, + "loss": 2.6946, + "nll_loss": 0.6733658909797668, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008515321533195674, + "rewards/margins": 0.16603544354438782, + "rewards/rejected": -0.16688698530197144, + "step": 3207 + }, + { + "epoch": 2.2185338865836792, + "grad_norm": 7.5925798416137695, + "learning_rate": 4.323036729675734e-05, + "log_odds_chosen": 8.466772079467773, + "log_odds_ratio": -0.0017478003865107894, + "logits/chosen": -0.5638378262519836, + "logits/rejected": -0.6701875925064087, + "logps/chosen": -0.002004731446504593, + "logps/rejected": -1.594688892364502, + "loss": 2.7888, + "nll_loss": 0.697022020816803, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00020047312136739492, + "rewards/margins": 0.15926840901374817, + "rewards/rejected": -0.1594688892364502, + "step": 3208 + }, + { + "epoch": 2.219225449515906, + "grad_norm": 6.606655597686768, + "learning_rate": 4.322652528046719e-05, + "log_odds_chosen": 8.772965431213379, + "log_odds_ratio": -0.0006073166732676327, + "logits/chosen": -0.5933998823165894, + "logits/rejected": -0.616485595703125, + "logps/chosen": -0.0005924435099586844, + "logps/rejected": -1.153617262840271, + "loss": 1.9326, + "nll_loss": 0.4830939769744873, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9244346630293876e-05, + "rewards/margins": 0.11530248820781708, + "rewards/rejected": -0.11536173522472382, + "step": 3209 + }, + { + "epoch": 2.219917012448133, + "grad_norm": 7.080139636993408, + "learning_rate": 4.322268326417704e-05, + "log_odds_chosen": 7.402169227600098, + "log_odds_ratio": -0.04810430854558945, + "logits/chosen": -0.6404480338096619, + "logits/rejected": -0.699675440788269, + "logps/chosen": -0.021948248147964478, + "logps/rejected": -1.3484892845153809, + "loss": 2.6392, + "nll_loss": 0.6549916863441467, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021948248613625765, + "rewards/margins": 0.1326541006565094, + "rewards/rejected": -0.1348489373922348, + "step": 3210 + }, + { + "epoch": 2.2206085753803597, + "grad_norm": 8.870227813720703, + "learning_rate": 4.3218841247886895e-05, + "log_odds_chosen": 6.025036334991455, + "log_odds_ratio": -0.06660658121109009, + "logits/chosen": -0.6402832865715027, + "logits/rejected": -0.6744290590286255, + "logps/chosen": -0.03994206339120865, + "logps/rejected": -1.503450870513916, + "loss": 2.9903, + "nll_loss": 0.7409057021141052, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00399420689791441, + "rewards/margins": 0.1463508903980255, + "rewards/rejected": -0.1503450721502304, + "step": 3211 + }, + { + "epoch": 2.2213001383125865, + "grad_norm": 8.091143608093262, + "learning_rate": 4.321499923159674e-05, + "log_odds_chosen": 6.035751819610596, + "log_odds_ratio": -0.014101025648415089, + "logits/chosen": -0.5790939927101135, + "logits/rejected": -0.6447158455848694, + "logps/chosen": -0.15419796109199524, + "logps/rejected": -1.8588969707489014, + "loss": 2.8139, + "nll_loss": 0.7020571231842041, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015419796109199524, + "rewards/margins": 0.17046990990638733, + "rewards/rejected": -0.18588969111442566, + "step": 3212 + }, + { + "epoch": 2.2219917012448134, + "grad_norm": 9.204658508300781, + "learning_rate": 4.32111572153066e-05, + "log_odds_chosen": 8.189249038696289, + "log_odds_ratio": -0.0007342756725847721, + "logits/chosen": -0.5341463685035706, + "logits/rejected": -0.541786789894104, + "logps/chosen": -0.0009174979059025645, + "logps/rejected": -1.1954847574234009, + "loss": 2.9619, + "nll_loss": 0.7404070496559143, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.174978913506493e-05, + "rewards/margins": 0.11945672333240509, + "rewards/rejected": -0.11954847723245621, + "step": 3213 + }, + { + "epoch": 2.22268326417704, + "grad_norm": 6.102440357208252, + "learning_rate": 4.3207315199016445e-05, + "log_odds_chosen": 8.242511749267578, + "log_odds_ratio": -0.0031344012822955847, + "logits/chosen": -0.5629912614822388, + "logits/rejected": -0.6202033758163452, + "logps/chosen": -0.010664651170372963, + "logps/rejected": -1.4259583950042725, + "loss": 2.9102, + "nll_loss": 0.727225661277771, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001066465163603425, + "rewards/margins": 0.141529381275177, + "rewards/rejected": -0.14259584248065948, + "step": 3214 + }, + { + "epoch": 2.223374827109267, + "grad_norm": 10.480937957763672, + "learning_rate": 4.32034731827263e-05, + "log_odds_chosen": 7.406135559082031, + "log_odds_ratio": -0.09118548780679703, + "logits/chosen": -0.3470768332481384, + "logits/rejected": -0.44605544209480286, + "logps/chosen": -0.06514297425746918, + "logps/rejected": -1.6992648839950562, + "loss": 2.9795, + "nll_loss": 0.7357611060142517, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00651429733261466, + "rewards/margins": 0.1634121984243393, + "rewards/rejected": -0.1699264943599701, + "step": 3215 + }, + { + "epoch": 2.224066390041494, + "grad_norm": 10.333035469055176, + "learning_rate": 4.319963116643615e-05, + "log_odds_chosen": 7.455963134765625, + "log_odds_ratio": -0.016867658123373985, + "logits/chosen": -0.5451837182044983, + "logits/rejected": -0.5722004771232605, + "logps/chosen": -0.007615362759679556, + "logps/rejected": -1.1253387928009033, + "loss": 2.4731, + "nll_loss": 0.6165924668312073, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007615363574586809, + "rewards/margins": 0.11177234351634979, + "rewards/rejected": -0.11253388226032257, + "step": 3216 + }, + { + "epoch": 2.2247579529737207, + "grad_norm": 7.6712517738342285, + "learning_rate": 4.3195789150145996e-05, + "log_odds_chosen": 7.022882461547852, + "log_odds_ratio": -0.06982903182506561, + "logits/chosen": -0.428468257188797, + "logits/rejected": -0.46561765670776367, + "logps/chosen": -0.046739161014556885, + "logps/rejected": -1.5080595016479492, + "loss": 2.43, + "nll_loss": 0.6005263328552246, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0046739159151911736, + "rewards/margins": 0.14613203704357147, + "rewards/rejected": -0.15080595016479492, + "step": 3217 + }, + { + "epoch": 2.2254495159059475, + "grad_norm": 6.225061416625977, + "learning_rate": 4.319194713385585e-05, + "log_odds_chosen": 6.340688705444336, + "log_odds_ratio": -0.019213810563087463, + "logits/chosen": -0.4877764880657196, + "logits/rejected": -0.5567202568054199, + "logps/chosen": -0.06046876683831215, + "logps/rejected": -1.2595614194869995, + "loss": 2.7736, + "nll_loss": 0.6914803385734558, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006046876776963472, + "rewards/margins": 0.11990926414728165, + "rewards/rejected": -0.12595614790916443, + "step": 3218 + }, + { + "epoch": 2.2261410788381744, + "grad_norm": 11.778791427612305, + "learning_rate": 4.31881051175657e-05, + "log_odds_chosen": 8.48779296875, + "log_odds_ratio": -0.0030446574091911316, + "logits/chosen": -0.41129714250564575, + "logits/rejected": -0.42171400785446167, + "logps/chosen": -0.009282330051064491, + "logps/rejected": -1.4498590230941772, + "loss": 2.7957, + "nll_loss": 0.6986181735992432, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009282330865971744, + "rewards/margins": 0.14405766129493713, + "rewards/rejected": -0.1449858844280243, + "step": 3219 + }, + { + "epoch": 2.226832641770401, + "grad_norm": 8.739641189575195, + "learning_rate": 4.318426310127555e-05, + "log_odds_chosen": 6.724615097045898, + "log_odds_ratio": -0.20382250845432281, + "logits/chosen": -0.6981180906295776, + "logits/rejected": -0.7419140338897705, + "logps/chosen": -0.025747288018465042, + "logps/rejected": -1.162453532218933, + "loss": 2.2124, + "nll_loss": 0.5327064394950867, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002574728801846504, + "rewards/margins": 0.11367063224315643, + "rewards/rejected": -0.11624535918235779, + "step": 3220 + }, + { + "epoch": 2.227524204702628, + "grad_norm": 10.79511833190918, + "learning_rate": 4.31804210849854e-05, + "log_odds_chosen": 6.931116580963135, + "log_odds_ratio": -0.022382281720638275, + "logits/chosen": -0.3587380051612854, + "logits/rejected": -0.408026784658432, + "logps/chosen": -0.03245670348405838, + "logps/rejected": -1.308230996131897, + "loss": 2.5902, + "nll_loss": 0.6453030109405518, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0032456705812364817, + "rewards/margins": 0.12757742404937744, + "rewards/rejected": -0.13082310557365417, + "step": 3221 + }, + { + "epoch": 2.228215767634855, + "grad_norm": 11.780632019042969, + "learning_rate": 4.317657906869526e-05, + "log_odds_chosen": 9.010337829589844, + "log_odds_ratio": -0.0002952085924334824, + "logits/chosen": -0.4327678382396698, + "logits/rejected": -0.5492873191833496, + "logps/chosen": -0.0007466014940291643, + "logps/rejected": -1.6293330192565918, + "loss": 3.5053, + "nll_loss": 0.8762847185134888, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.466015813406557e-05, + "rewards/margins": 0.16285865008831024, + "rewards/rejected": -0.16293330490589142, + "step": 3222 + }, + { + "epoch": 2.2289073305670817, + "grad_norm": 7.237020492553711, + "learning_rate": 4.3172737052405104e-05, + "log_odds_chosen": 7.4671525955200195, + "log_odds_ratio": -0.01417066901922226, + "logits/chosen": -0.6515873670578003, + "logits/rejected": -0.7530137896537781, + "logps/chosen": -0.1246192455291748, + "logps/rejected": -1.8631937503814697, + "loss": 2.5728, + "nll_loss": 0.6417914628982544, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012461923994123936, + "rewards/margins": 0.17385748028755188, + "rewards/rejected": -0.18631939589977264, + "step": 3223 + }, + { + "epoch": 2.2295988934993085, + "grad_norm": 7.527968883514404, + "learning_rate": 4.3168895036114956e-05, + "log_odds_chosen": 6.418063640594482, + "log_odds_ratio": -0.1917363405227661, + "logits/chosen": -0.35648202896118164, + "logits/rejected": -0.47666841745376587, + "logps/chosen": -0.09514914453029633, + "logps/rejected": -1.2580252885818481, + "loss": 2.0882, + "nll_loss": 0.5028823018074036, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009514914825558662, + "rewards/margins": 0.11628760397434235, + "rewards/rejected": -0.12580251693725586, + "step": 3224 + }, + { + "epoch": 2.2302904564315353, + "grad_norm": 8.227404594421387, + "learning_rate": 4.316505301982481e-05, + "log_odds_chosen": 8.37899398803711, + "log_odds_ratio": -0.0015768279554322362, + "logits/chosen": -0.7162021398544312, + "logits/rejected": -0.7547247409820557, + "logps/chosen": -0.0065619018860161304, + "logps/rejected": -1.9897502660751343, + "loss": 2.6541, + "nll_loss": 0.6633625030517578, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006561902118846774, + "rewards/margins": 0.19831883907318115, + "rewards/rejected": -0.19897502660751343, + "step": 3225 + }, + { + "epoch": 2.230982019363762, + "grad_norm": 8.16875171661377, + "learning_rate": 4.3161211003534654e-05, + "log_odds_chosen": 6.907095909118652, + "log_odds_ratio": -0.03357435390353203, + "logits/chosen": -0.6958314776420593, + "logits/rejected": -0.7136378288269043, + "logps/chosen": -0.016598278656601906, + "logps/rejected": -1.4084336757659912, + "loss": 2.3213, + "nll_loss": 0.5769670605659485, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016598280053585768, + "rewards/margins": 0.13918353617191315, + "rewards/rejected": -0.14084336161613464, + "step": 3226 + }, + { + "epoch": 2.231673582295989, + "grad_norm": 12.13620662689209, + "learning_rate": 4.315736898724451e-05, + "log_odds_chosen": 7.031831741333008, + "log_odds_ratio": -0.21214856207370758, + "logits/chosen": -0.37228450179100037, + "logits/rejected": -0.4435897469520569, + "logps/chosen": -0.06253484636545181, + "logps/rejected": -1.210754632949829, + "loss": 2.4154, + "nll_loss": 0.5826359391212463, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006253485102206469, + "rewards/margins": 0.11482198536396027, + "rewards/rejected": -0.12107546627521515, + "step": 3227 + }, + { + "epoch": 2.232365145228216, + "grad_norm": 8.838326454162598, + "learning_rate": 4.315352697095436e-05, + "log_odds_chosen": 6.4838547706604, + "log_odds_ratio": -0.07985038310289383, + "logits/chosen": -0.7065879106521606, + "logits/rejected": -0.7691741585731506, + "logps/chosen": -0.01937960647046566, + "logps/rejected": -0.8377599716186523, + "loss": 2.8604, + "nll_loss": 0.7071273326873779, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019379606237635016, + "rewards/margins": 0.08183804154396057, + "rewards/rejected": -0.08377599716186523, + "step": 3228 + }, + { + "epoch": 2.2330567081604427, + "grad_norm": 10.37695026397705, + "learning_rate": 4.314968495466421e-05, + "log_odds_chosen": 7.060420513153076, + "log_odds_ratio": -0.003771477611735463, + "logits/chosen": -0.5641513466835022, + "logits/rejected": -0.7125513553619385, + "logps/chosen": -0.10711301118135452, + "logps/rejected": -1.5135568380355835, + "loss": 2.4957, + "nll_loss": 0.6235363483428955, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010711301118135452, + "rewards/margins": 0.140644371509552, + "rewards/rejected": -0.15135568380355835, + "step": 3229 + }, + { + "epoch": 2.2337482710926695, + "grad_norm": 6.700646877288818, + "learning_rate": 4.314584293837406e-05, + "log_odds_chosen": 5.052937030792236, + "log_odds_ratio": -0.2864532768726349, + "logits/chosen": -0.41295093297958374, + "logits/rejected": -0.538131594657898, + "logps/chosen": -0.05101025477051735, + "logps/rejected": -1.1042132377624512, + "loss": 2.5972, + "nll_loss": 0.6206562519073486, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005101025104522705, + "rewards/margins": 0.10532030463218689, + "rewards/rejected": -0.1104213297367096, + "step": 3230 + }, + { + "epoch": 2.2344398340248963, + "grad_norm": 12.453567504882812, + "learning_rate": 4.3142000922083917e-05, + "log_odds_chosen": 6.392004013061523, + "log_odds_ratio": -0.27353009581565857, + "logits/chosen": -0.4739932417869568, + "logits/rejected": -0.5234993696212769, + "logps/chosen": -0.03688199445605278, + "logps/rejected": -1.5051337480545044, + "loss": 2.8271, + "nll_loss": 0.6794204711914062, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0036881992127746344, + "rewards/margins": 0.1468251794576645, + "rewards/rejected": -0.15051338076591492, + "step": 3231 + }, + { + "epoch": 2.235131396957123, + "grad_norm": 11.653568267822266, + "learning_rate": 4.313815890579376e-05, + "log_odds_chosen": 7.194883346557617, + "log_odds_ratio": -0.005872397683560848, + "logits/chosen": -0.10072185844182968, + "logits/rejected": -0.15041132271289825, + "logps/chosen": -0.017534758895635605, + "logps/rejected": -1.875885248184204, + "loss": 3.2902, + "nll_loss": 0.8219622373580933, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017534758662804961, + "rewards/margins": 0.18583504855632782, + "rewards/rejected": -0.18758851289749146, + "step": 3232 + }, + { + "epoch": 2.23582295988935, + "grad_norm": 4.405982494354248, + "learning_rate": 4.3134316889503615e-05, + "log_odds_chosen": 9.255558013916016, + "log_odds_ratio": -0.00452328659594059, + "logits/chosen": -0.003129318356513977, + "logits/rejected": 0.01258845254778862, + "logps/chosen": -0.010522023774683475, + "logps/rejected": -1.2999018430709839, + "loss": 2.0627, + "nll_loss": 0.5152261257171631, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010522024240344763, + "rewards/margins": 0.1289379894733429, + "rewards/rejected": -0.12999019026756287, + "step": 3233 + }, + { + "epoch": 2.236514522821577, + "grad_norm": 5.9882941246032715, + "learning_rate": 4.313047487321347e-05, + "log_odds_chosen": 7.687836647033691, + "log_odds_ratio": -0.0037897920701652765, + "logits/chosen": -0.625095009803772, + "logits/rejected": -0.5738711953163147, + "logps/chosen": -0.005181067157536745, + "logps/rejected": -1.1728625297546387, + "loss": 2.7346, + "nll_loss": 0.6832716464996338, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005181067390367389, + "rewards/margins": 0.11676815152168274, + "rewards/rejected": -0.11728626489639282, + "step": 3234 + }, + { + "epoch": 2.2372060857538036, + "grad_norm": 9.690523147583008, + "learning_rate": 4.312663285692331e-05, + "log_odds_chosen": 6.730501174926758, + "log_odds_ratio": -0.04687266796827316, + "logits/chosen": -0.4622930884361267, + "logits/rejected": -0.4392380714416504, + "logps/chosen": -0.003654046915471554, + "logps/rejected": -0.8899112343788147, + "loss": 3.1119, + "nll_loss": 0.7732935547828674, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00036540470318868756, + "rewards/margins": 0.0886257141828537, + "rewards/rejected": -0.08899112045764923, + "step": 3235 + }, + { + "epoch": 2.2378976486860305, + "grad_norm": 12.868103981018066, + "learning_rate": 4.3122790840633165e-05, + "log_odds_chosen": 7.857724189758301, + "log_odds_ratio": -0.08621303737163544, + "logits/chosen": -0.08840826898813248, + "logits/rejected": -0.20376858115196228, + "logps/chosen": -0.017540447413921356, + "logps/rejected": -1.723393201828003, + "loss": 3.3885, + "nll_loss": 0.8385149240493774, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017540447879582644, + "rewards/margins": 0.1705852895975113, + "rewards/rejected": -0.1723393201828003, + "step": 3236 + }, + { + "epoch": 2.2385892116182573, + "grad_norm": 8.59496784210205, + "learning_rate": 4.311894882434302e-05, + "log_odds_chosen": 7.627242088317871, + "log_odds_ratio": -0.003344690427184105, + "logits/chosen": -0.5222266912460327, + "logits/rejected": -0.507487952709198, + "logps/chosen": -0.0037036265712231398, + "logps/rejected": -1.4348680973052979, + "loss": 1.8954, + "nll_loss": 0.47351786494255066, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000370362657122314, + "rewards/margins": 0.14311645925045013, + "rewards/rejected": -0.14348681271076202, + "step": 3237 + }, + { + "epoch": 2.239280774550484, + "grad_norm": 6.6844072341918945, + "learning_rate": 4.311510680805287e-05, + "log_odds_chosen": 6.948589324951172, + "log_odds_ratio": -0.007444444112479687, + "logits/chosen": -0.5402169227600098, + "logits/rejected": -0.5486041307449341, + "logps/chosen": -0.01570907235145569, + "logps/rejected": -1.117315411567688, + "loss": 2.0259, + "nll_loss": 0.5057216882705688, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015709070721641183, + "rewards/margins": 0.11016062647104263, + "rewards/rejected": -0.11173154413700104, + "step": 3238 + }, + { + "epoch": 2.239972337482711, + "grad_norm": 10.699847221374512, + "learning_rate": 4.3111264791762716e-05, + "log_odds_chosen": 7.9485344886779785, + "log_odds_ratio": -0.001679889508523047, + "logits/chosen": -0.6571987867355347, + "logits/rejected": -0.7422472238540649, + "logps/chosen": -0.004468269646167755, + "logps/rejected": -1.2286795377731323, + "loss": 2.6932, + "nll_loss": 0.6731306910514832, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00044682700536213815, + "rewards/margins": 0.12242113053798676, + "rewards/rejected": -0.12286796420812607, + "step": 3239 + }, + { + "epoch": 2.240663900414938, + "grad_norm": 10.12278938293457, + "learning_rate": 4.3107422775472575e-05, + "log_odds_chosen": 7.134811878204346, + "log_odds_ratio": -0.009079055860638618, + "logits/chosen": -0.6396535634994507, + "logits/rejected": -0.7066131234169006, + "logps/chosen": -0.008538391441106796, + "logps/rejected": -1.2243645191192627, + "loss": 2.8026, + "nll_loss": 0.6997353434562683, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008538390975445509, + "rewards/margins": 0.12158262729644775, + "rewards/rejected": -0.12243646383285522, + "step": 3240 + }, + { + "epoch": 2.2413554633471646, + "grad_norm": 8.627345085144043, + "learning_rate": 4.310358075918242e-05, + "log_odds_chosen": 5.974154472351074, + "log_odds_ratio": -0.21858333051204681, + "logits/chosen": -0.43080878257751465, + "logits/rejected": -0.45782575011253357, + "logps/chosen": -0.04417749494314194, + "logps/rejected": -1.6393053531646729, + "loss": 2.274, + "nll_loss": 0.5466482639312744, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004417749587446451, + "rewards/margins": 0.15951277315616608, + "rewards/rejected": -0.16393053531646729, + "step": 3241 + }, + { + "epoch": 2.2420470262793915, + "grad_norm": 9.293315887451172, + "learning_rate": 4.309973874289227e-05, + "log_odds_chosen": 6.996009826660156, + "log_odds_ratio": -0.05223194509744644, + "logits/chosen": -0.5507470369338989, + "logits/rejected": -0.6064082980155945, + "logps/chosen": -0.024966636672616005, + "logps/rejected": -1.1820029020309448, + "loss": 3.2471, + "nll_loss": 0.8065450191497803, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024966637138277292, + "rewards/margins": 0.11570362746715546, + "rewards/rejected": -0.11820029467344284, + "step": 3242 + }, + { + "epoch": 2.2427385892116183, + "grad_norm": 8.495476722717285, + "learning_rate": 4.3095896726602126e-05, + "log_odds_chosen": 7.876456260681152, + "log_odds_ratio": -0.0026620151475071907, + "logits/chosen": -0.5941371917724609, + "logits/rejected": -0.7753130793571472, + "logps/chosen": -0.013255937024950981, + "logps/rejected": -1.70023512840271, + "loss": 2.64, + "nll_loss": 0.6597373485565186, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013255936792120337, + "rewards/margins": 0.16869792342185974, + "rewards/rejected": -0.17002353072166443, + "step": 3243 + }, + { + "epoch": 2.243430152143845, + "grad_norm": 3.8010642528533936, + "learning_rate": 4.309205471031197e-05, + "log_odds_chosen": 5.32291841506958, + "log_odds_ratio": -0.3219887912273407, + "logits/chosen": -0.490628182888031, + "logits/rejected": -0.5665719509124756, + "logps/chosen": -0.09193715453147888, + "logps/rejected": -0.8743967413902283, + "loss": 2.4086, + "nll_loss": 0.5699490308761597, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009193716570734978, + "rewards/margins": 0.07824596017599106, + "rewards/rejected": -0.08743968605995178, + "step": 3244 + }, + { + "epoch": 2.244121715076072, + "grad_norm": 6.200137138366699, + "learning_rate": 4.3088212694021824e-05, + "log_odds_chosen": 7.523862361907959, + "log_odds_ratio": -0.02580084837973118, + "logits/chosen": -0.7987110614776611, + "logits/rejected": -0.8353661894798279, + "logps/chosen": -0.03375190123915672, + "logps/rejected": -1.976253628730774, + "loss": 2.1664, + "nll_loss": 0.539008617401123, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0033751900773495436, + "rewards/margins": 0.1942501664161682, + "rewards/rejected": -0.19762536883354187, + "step": 3245 + }, + { + "epoch": 2.2448132780082988, + "grad_norm": 8.950324058532715, + "learning_rate": 4.3084370677731676e-05, + "log_odds_chosen": 8.77133560180664, + "log_odds_ratio": -0.0008398180943913758, + "logits/chosen": -0.801625669002533, + "logits/rejected": -0.8420397043228149, + "logps/chosen": -0.0011170408688485622, + "logps/rejected": -1.5125846862792969, + "loss": 2.4786, + "nll_loss": 0.6195661425590515, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011170408106409013, + "rewards/margins": 0.1511467546224594, + "rewards/rejected": -0.1512584686279297, + "step": 3246 + }, + { + "epoch": 2.2455048409405256, + "grad_norm": 11.53207015991211, + "learning_rate": 4.308052866144153e-05, + "log_odds_chosen": 7.653658866882324, + "log_odds_ratio": -0.05250634253025055, + "logits/chosen": -0.7041653394699097, + "logits/rejected": -0.8292118906974792, + "logps/chosen": -0.028626399114727974, + "logps/rejected": -1.410700798034668, + "loss": 2.655, + "nll_loss": 0.6584897637367249, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0028626397252082825, + "rewards/margins": 0.13820745050907135, + "rewards/rejected": -0.14107009768486023, + "step": 3247 + }, + { + "epoch": 2.2461964038727524, + "grad_norm": 7.0916829109191895, + "learning_rate": 4.3076686645151374e-05, + "log_odds_chosen": 7.575862407684326, + "log_odds_ratio": -0.0012360899709165096, + "logits/chosen": -0.537795901298523, + "logits/rejected": -0.5750592947006226, + "logps/chosen": -0.009833377785980701, + "logps/rejected": -1.325676441192627, + "loss": 2.8621, + "nll_loss": 0.7153981924057007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009833378717303276, + "rewards/margins": 0.1315843164920807, + "rewards/rejected": -0.1325676441192627, + "step": 3248 + }, + { + "epoch": 2.2468879668049793, + "grad_norm": 7.13214111328125, + "learning_rate": 4.3072844628861233e-05, + "log_odds_chosen": 7.926778793334961, + "log_odds_ratio": -0.02117532305419445, + "logits/chosen": -0.764047384262085, + "logits/rejected": -0.8071606755256653, + "logps/chosen": -0.016845818608999252, + "logps/rejected": -1.3062502145767212, + "loss": 2.505, + "nll_loss": 0.6241413950920105, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016845817444846034, + "rewards/margins": 0.1289404332637787, + "rewards/rejected": -0.13062502443790436, + "step": 3249 + }, + { + "epoch": 2.247579529737206, + "grad_norm": 10.385427474975586, + "learning_rate": 4.306900261257108e-05, + "log_odds_chosen": 7.289114952087402, + "log_odds_ratio": -0.011439472436904907, + "logits/chosen": -0.7682449817657471, + "logits/rejected": -0.7987627983093262, + "logps/chosen": -0.04962502047419548, + "logps/rejected": -1.5088868141174316, + "loss": 2.6635, + "nll_loss": 0.6647320985794067, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004962501581758261, + "rewards/margins": 0.14592617750167847, + "rewards/rejected": -0.15088868141174316, + "step": 3250 + }, + { + "epoch": 2.248271092669433, + "grad_norm": 9.709715843200684, + "learning_rate": 4.306516059628093e-05, + "log_odds_chosen": 5.992304801940918, + "log_odds_ratio": -0.07425712794065475, + "logits/chosen": -0.8729088306427002, + "logits/rejected": -0.9067466259002686, + "logps/chosen": -0.023998547345399857, + "logps/rejected": -1.020616054534912, + "loss": 2.7299, + "nll_loss": 0.6750485897064209, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002399854827672243, + "rewards/margins": 0.09966175258159637, + "rewards/rejected": -0.10206159949302673, + "step": 3251 + }, + { + "epoch": 2.2489626556016598, + "grad_norm": 8.44819450378418, + "learning_rate": 4.3061318579990784e-05, + "log_odds_chosen": 9.362162590026855, + "log_odds_ratio": -0.000212931539863348, + "logits/chosen": -0.7932662963867188, + "logits/rejected": -0.8243728876113892, + "logps/chosen": -0.0005738566978834569, + "logps/rejected": -1.580110788345337, + "loss": 2.2292, + "nll_loss": 0.5572723150253296, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7385670515941456e-05, + "rewards/margins": 0.15795369446277618, + "rewards/rejected": -0.1580110788345337, + "step": 3252 + }, + { + "epoch": 2.2496542185338866, + "grad_norm": 6.70921516418457, + "learning_rate": 4.305747656370063e-05, + "log_odds_chosen": 8.296453475952148, + "log_odds_ratio": -0.0009130655089393258, + "logits/chosen": -0.5469620823860168, + "logits/rejected": -0.7344156503677368, + "logps/chosen": -0.0072532035410404205, + "logps/rejected": -1.434815526008606, + "loss": 1.8735, + "nll_loss": 0.4682943820953369, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007253203075379133, + "rewards/margins": 0.14275622367858887, + "rewards/rejected": -0.1434815526008606, + "step": 3253 + }, + { + "epoch": 2.2503457814661134, + "grad_norm": 10.811256408691406, + "learning_rate": 4.305363454741048e-05, + "log_odds_chosen": 7.021681785583496, + "log_odds_ratio": -0.006112986709922552, + "logits/chosen": -0.5054683685302734, + "logits/rejected": -0.5628206133842468, + "logps/chosen": -0.013626248575747013, + "logps/rejected": -0.9956163167953491, + "loss": 2.7577, + "nll_loss": 0.688805103302002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013626248110085726, + "rewards/margins": 0.09819900244474411, + "rewards/rejected": -0.09956163913011551, + "step": 3254 + }, + { + "epoch": 2.2510373443983402, + "grad_norm": 10.7681884765625, + "learning_rate": 4.3049792531120335e-05, + "log_odds_chosen": 6.661191940307617, + "log_odds_ratio": -0.055751726031303406, + "logits/chosen": -0.8494280576705933, + "logits/rejected": -0.9281111359596252, + "logps/chosen": -0.025588368996977806, + "logps/rejected": -1.560171127319336, + "loss": 3.1838, + "nll_loss": 0.7903729677200317, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002558837179094553, + "rewards/margins": 0.15345828235149384, + "rewards/rejected": -0.15601710975170135, + "step": 3255 + }, + { + "epoch": 2.251728907330567, + "grad_norm": 11.42106819152832, + "learning_rate": 4.304595051483019e-05, + "log_odds_chosen": 6.612816333770752, + "log_odds_ratio": -0.2027987539768219, + "logits/chosen": -0.3843982517719269, + "logits/rejected": -0.5736691951751709, + "logps/chosen": -0.13335944712162018, + "logps/rejected": -1.2244727611541748, + "loss": 2.5032, + "nll_loss": 0.6055225133895874, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.013335946016013622, + "rewards/margins": 0.10911132395267487, + "rewards/rejected": -0.12244727462530136, + "step": 3256 + }, + { + "epoch": 2.252420470262794, + "grad_norm": 8.008171081542969, + "learning_rate": 4.304210849854003e-05, + "log_odds_chosen": 6.746915817260742, + "log_odds_ratio": -0.010379820130765438, + "logits/chosen": -0.6378681063652039, + "logits/rejected": -0.6194570064544678, + "logps/chosen": -0.037365447729825974, + "logps/rejected": -1.6308256387710571, + "loss": 2.3197, + "nll_loss": 0.5788797736167908, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0037365450989454985, + "rewards/margins": 0.15934602916240692, + "rewards/rejected": -0.1630825698375702, + "step": 3257 + }, + { + "epoch": 2.2531120331950207, + "grad_norm": 7.668332099914551, + "learning_rate": 4.303826648224989e-05, + "log_odds_chosen": 7.569102764129639, + "log_odds_ratio": -0.025676576420664787, + "logits/chosen": -0.661882758140564, + "logits/rejected": -0.6706777811050415, + "logps/chosen": -0.030474940314888954, + "logps/rejected": -1.866816759109497, + "loss": 1.7545, + "nll_loss": 0.436055988073349, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0030474942177534103, + "rewards/margins": 0.1836341917514801, + "rewards/rejected": -0.18668167293071747, + "step": 3258 + }, + { + "epoch": 2.2538035961272476, + "grad_norm": 7.231141090393066, + "learning_rate": 4.303442446595974e-05, + "log_odds_chosen": 9.443159103393555, + "log_odds_ratio": -0.0002675468276720494, + "logits/chosen": -0.6568310856819153, + "logits/rejected": -0.6061131954193115, + "logps/chosen": -0.00038531774771399796, + "logps/rejected": -1.2876076698303223, + "loss": 1.9718, + "nll_loss": 0.4929143190383911, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.853177622659132e-05, + "rewards/margins": 0.12872223556041718, + "rewards/rejected": -0.12876076996326447, + "step": 3259 + }, + { + "epoch": 2.2544951590594744, + "grad_norm": 11.41521167755127, + "learning_rate": 4.303058244966959e-05, + "log_odds_chosen": 7.185343265533447, + "log_odds_ratio": -0.009479331783950329, + "logits/chosen": -0.7706685662269592, + "logits/rejected": -0.9025553464889526, + "logps/chosen": -0.024524778127670288, + "logps/rejected": -1.3457211256027222, + "loss": 2.4802, + "nll_loss": 0.6191108226776123, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002452477812767029, + "rewards/margins": 0.13211962580680847, + "rewards/rejected": -0.1345721185207367, + "step": 3260 + }, + { + "epoch": 2.2551867219917012, + "grad_norm": 5.6372456550598145, + "learning_rate": 4.302674043337944e-05, + "log_odds_chosen": 6.075026035308838, + "log_odds_ratio": -0.04334619268774986, + "logits/chosen": -0.6701595783233643, + "logits/rejected": -0.5777078866958618, + "logps/chosen": -0.10955867916345596, + "logps/rejected": -1.360260248184204, + "loss": 2.4343, + "nll_loss": 0.6042494177818298, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010955867357552052, + "rewards/margins": 0.12507015466690063, + "rewards/rejected": -0.1360260248184204, + "step": 3261 + }, + { + "epoch": 2.255878284923928, + "grad_norm": 5.7197723388671875, + "learning_rate": 4.302289841708929e-05, + "log_odds_chosen": 4.844676971435547, + "log_odds_ratio": -0.0366857573390007, + "logits/chosen": -0.4729902446269989, + "logits/rejected": -0.44772809743881226, + "logps/chosen": -0.03843897208571434, + "logps/rejected": -0.9760329723358154, + "loss": 2.5159, + "nll_loss": 0.6253054738044739, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003843897022306919, + "rewards/margins": 0.09375940263271332, + "rewards/rejected": -0.09760329872369766, + "step": 3262 + }, + { + "epoch": 2.256569847856155, + "grad_norm": 5.996796131134033, + "learning_rate": 4.301905640079914e-05, + "log_odds_chosen": 6.512656211853027, + "log_odds_ratio": -0.018681341782212257, + "logits/chosen": 0.005796484649181366, + "logits/rejected": -0.04244758188724518, + "logps/chosen": -0.033937402069568634, + "logps/rejected": -1.1949613094329834, + "loss": 2.3876, + "nll_loss": 0.5950331687927246, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0033937403932213783, + "rewards/margins": 0.11610238999128342, + "rewards/rejected": -0.11949612945318222, + "step": 3263 + }, + { + "epoch": 2.2572614107883817, + "grad_norm": 9.408733367919922, + "learning_rate": 4.301521438450899e-05, + "log_odds_chosen": 7.253141403198242, + "log_odds_ratio": -0.11470719426870346, + "logits/chosen": -0.7124958634376526, + "logits/rejected": -0.7846969962120056, + "logps/chosen": -0.04086989909410477, + "logps/rejected": -1.3902311325073242, + "loss": 3.3567, + "nll_loss": 0.8277014493942261, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004086989909410477, + "rewards/margins": 0.1349361389875412, + "rewards/rejected": -0.13902312517166138, + "step": 3264 + }, + { + "epoch": 2.2579529737206085, + "grad_norm": 4.989302635192871, + "learning_rate": 4.3011372368218845e-05, + "log_odds_chosen": 6.661134243011475, + "log_odds_ratio": -0.055529430508613586, + "logits/chosen": -0.5685741901397705, + "logits/rejected": -0.6001918315887451, + "logps/chosen": -0.04951227083802223, + "logps/rejected": -1.53061842918396, + "loss": 2.2204, + "nll_loss": 0.5495560765266418, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004951227456331253, + "rewards/margins": 0.14811062812805176, + "rewards/rejected": -0.15306183695793152, + "step": 3265 + }, + { + "epoch": 2.2586445366528354, + "grad_norm": 5.90012264251709, + "learning_rate": 4.300753035192869e-05, + "log_odds_chosen": 7.330995082855225, + "log_odds_ratio": -0.004380673170089722, + "logits/chosen": -0.5569080710411072, + "logits/rejected": -0.5699841976165771, + "logps/chosen": -0.0508880577981472, + "logps/rejected": -2.003079891204834, + "loss": 3.2612, + "nll_loss": 0.8148605823516846, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00508880615234375, + "rewards/margins": 0.19521918892860413, + "rewards/rejected": -0.20030798017978668, + "step": 3266 + }, + { + "epoch": 2.259336099585062, + "grad_norm": 11.648260116577148, + "learning_rate": 4.300368833563855e-05, + "log_odds_chosen": 7.323399543762207, + "log_odds_ratio": -0.030352065339684486, + "logits/chosen": -0.6574798226356506, + "logits/rejected": -0.7213975787162781, + "logps/chosen": -0.05200228840112686, + "logps/rejected": -1.6685948371887207, + "loss": 3.7964, + "nll_loss": 0.9460740089416504, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005200228653848171, + "rewards/margins": 0.16165924072265625, + "rewards/rejected": -0.1668594777584076, + "step": 3267 + }, + { + "epoch": 2.260027662517289, + "grad_norm": 12.301355361938477, + "learning_rate": 4.2999846319348396e-05, + "log_odds_chosen": 9.659789085388184, + "log_odds_ratio": -0.013645894825458527, + "logits/chosen": -0.4796256422996521, + "logits/rejected": -0.5424432158470154, + "logps/chosen": -0.022504033520817757, + "logps/rejected": -2.019725799560547, + "loss": 3.1548, + "nll_loss": 0.7873326539993286, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0022504031658172607, + "rewards/margins": 0.19972217082977295, + "rewards/rejected": -0.2019725739955902, + "step": 3268 + }, + { + "epoch": 2.260719225449516, + "grad_norm": 10.616440773010254, + "learning_rate": 4.299600430305825e-05, + "log_odds_chosen": 9.40095329284668, + "log_odds_ratio": -0.0002967410546261817, + "logits/chosen": -0.5349110960960388, + "logits/rejected": -0.6757592558860779, + "logps/chosen": -0.01788119412958622, + "logps/rejected": -2.1804044246673584, + "loss": 3.1055, + "nll_loss": 0.7763397097587585, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017881194362416863, + "rewards/margins": 0.21625235676765442, + "rewards/rejected": -0.21804045140743256, + "step": 3269 + }, + { + "epoch": 2.2614107883817427, + "grad_norm": 6.400076389312744, + "learning_rate": 4.29921622867681e-05, + "log_odds_chosen": 6.685382843017578, + "log_odds_ratio": -0.15850648283958435, + "logits/chosen": -0.45440176129341125, + "logits/rejected": -0.4439389109611511, + "logps/chosen": -0.025936201214790344, + "logps/rejected": -0.8457040786743164, + "loss": 2.7164, + "nll_loss": 0.6632498502731323, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002593620214611292, + "rewards/margins": 0.08197679370641708, + "rewards/rejected": -0.08457040786743164, + "step": 3270 + }, + { + "epoch": 2.2621023513139695, + "grad_norm": 6.308727264404297, + "learning_rate": 4.2988320270477947e-05, + "log_odds_chosen": 6.813342571258545, + "log_odds_ratio": -0.07909463346004486, + "logits/chosen": -0.6073681116104126, + "logits/rejected": -0.6416606903076172, + "logps/chosen": -0.01722707785665989, + "logps/rejected": -1.173327922821045, + "loss": 1.9741, + "nll_loss": 0.4856259226799011, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017227077623829246, + "rewards/margins": 0.11561009287834167, + "rewards/rejected": -0.1173328086733818, + "step": 3271 + }, + { + "epoch": 2.2627939142461964, + "grad_norm": 7.209175109863281, + "learning_rate": 4.29844782541878e-05, + "log_odds_chosen": 9.962041854858398, + "log_odds_ratio": -0.0001319254224654287, + "logits/chosen": -0.6178686618804932, + "logits/rejected": -0.792114794254303, + "logps/chosen": -0.0007171865436248481, + "logps/rejected": -1.918930172920227, + "loss": 1.7354, + "nll_loss": 0.43383434414863586, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.171865581767634e-05, + "rewards/margins": 0.19182130694389343, + "rewards/rejected": -0.19189301133155823, + "step": 3272 + }, + { + "epoch": 2.263485477178423, + "grad_norm": 5.474750995635986, + "learning_rate": 4.298063623789765e-05, + "log_odds_chosen": 7.85467529296875, + "log_odds_ratio": -0.0014620490837842226, + "logits/chosen": -0.4752342998981476, + "logits/rejected": -0.4551814794540405, + "logps/chosen": -0.004117067903280258, + "logps/rejected": -1.1596763134002686, + "loss": 2.1883, + "nll_loss": 0.5469228625297546, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000411706801969558, + "rewards/margins": 0.11555592715740204, + "rewards/rejected": -0.11596763134002686, + "step": 3273 + }, + { + "epoch": 2.26417704011065, + "grad_norm": 7.661037445068359, + "learning_rate": 4.2976794221607504e-05, + "log_odds_chosen": 7.732230186462402, + "log_odds_ratio": -0.018154030665755272, + "logits/chosen": -0.6091871857643127, + "logits/rejected": -0.7187412977218628, + "logps/chosen": -0.008824712596833706, + "logps/rejected": -1.1421221494674683, + "loss": 2.5441, + "nll_loss": 0.6342145204544067, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008824712713249028, + "rewards/margins": 0.11332975327968597, + "rewards/rejected": -0.11421222984790802, + "step": 3274 + }, + { + "epoch": 2.264868603042877, + "grad_norm": 8.103368759155273, + "learning_rate": 4.297295220531735e-05, + "log_odds_chosen": 7.086808204650879, + "log_odds_ratio": -0.00975433737039566, + "logits/chosen": -0.7722989916801453, + "logits/rejected": -0.7403442859649658, + "logps/chosen": -0.028213070705533028, + "logps/rejected": -1.6213114261627197, + "loss": 3.1533, + "nll_loss": 0.7873413562774658, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0028213071636855602, + "rewards/margins": 0.15930984914302826, + "rewards/rejected": -0.1621311455965042, + "step": 3275 + }, + { + "epoch": 2.2655601659751037, + "grad_norm": 11.01082992553711, + "learning_rate": 4.296911018902721e-05, + "log_odds_chosen": 6.675724983215332, + "log_odds_ratio": -0.3206350803375244, + "logits/chosen": -0.5880841016769409, + "logits/rejected": -0.6000953316688538, + "logps/chosen": -0.05225434899330139, + "logps/rejected": -1.3412305116653442, + "loss": 2.7484, + "nll_loss": 0.6550301909446716, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005225434899330139, + "rewards/margins": 0.12889762222766876, + "rewards/rejected": -0.1341230571269989, + "step": 3276 + }, + { + "epoch": 2.2662517289073305, + "grad_norm": 10.304984092712402, + "learning_rate": 4.2965268172737054e-05, + "log_odds_chosen": 4.339324951171875, + "log_odds_ratio": -0.11746401339769363, + "logits/chosen": -0.5929984450340271, + "logits/rejected": -0.5827913284301758, + "logps/chosen": -0.07616761326789856, + "logps/rejected": -0.944993793964386, + "loss": 2.7198, + "nll_loss": 0.6682088375091553, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007616761140525341, + "rewards/margins": 0.08688262850046158, + "rewards/rejected": -0.0944993868470192, + "step": 3277 + }, + { + "epoch": 2.2669432918395573, + "grad_norm": 8.508423805236816, + "learning_rate": 4.296142615644691e-05, + "log_odds_chosen": 7.036334037780762, + "log_odds_ratio": -0.026559626683592796, + "logits/chosen": -0.6788300275802612, + "logits/rejected": -0.6941066384315491, + "logps/chosen": -0.08082263916730881, + "logps/rejected": -1.114711880683899, + "loss": 2.1955, + "nll_loss": 0.5462226271629333, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008082263171672821, + "rewards/margins": 0.10338892042636871, + "rewards/rejected": -0.11147119104862213, + "step": 3278 + }, + { + "epoch": 2.267634854771784, + "grad_norm": 14.72610855102539, + "learning_rate": 4.295758414015675e-05, + "log_odds_chosen": 8.000221252441406, + "log_odds_ratio": -0.11321330070495605, + "logits/chosen": -0.512886106967926, + "logits/rejected": -0.5639876127243042, + "logps/chosen": -0.02386327274143696, + "logps/rejected": -1.4123374223709106, + "loss": 2.587, + "nll_loss": 0.6354241371154785, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0023863273672759533, + "rewards/margins": 0.1388474404811859, + "rewards/rejected": -0.14123375713825226, + "step": 3279 + }, + { + "epoch": 2.268326417704011, + "grad_norm": 9.623932838439941, + "learning_rate": 4.2953742123866605e-05, + "log_odds_chosen": 8.210575103759766, + "log_odds_ratio": -0.001431704848073423, + "logits/chosen": -0.9136925935745239, + "logits/rejected": -0.9186261296272278, + "logps/chosen": -0.0012273931642994285, + "logps/rejected": -1.3958138227462769, + "loss": 3.3816, + "nll_loss": 0.845266580581665, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012273932225070894, + "rewards/margins": 0.13945864140987396, + "rewards/rejected": -0.13958138227462769, + "step": 3280 + }, + { + "epoch": 2.269017980636238, + "grad_norm": 18.497825622558594, + "learning_rate": 4.294990010757646e-05, + "log_odds_chosen": 5.381030082702637, + "log_odds_ratio": -0.511152982711792, + "logits/chosen": -0.505366861820221, + "logits/rejected": -0.5537108778953552, + "logps/chosen": -0.06397618353366852, + "logps/rejected": -0.9307079911231995, + "loss": 1.8231, + "nll_loss": 0.40466052293777466, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006397617980837822, + "rewards/margins": 0.08667318522930145, + "rewards/rejected": -0.09307080507278442, + "step": 3281 + }, + { + "epoch": 2.2697095435684647, + "grad_norm": 7.17557954788208, + "learning_rate": 4.29460580912863e-05, + "log_odds_chosen": 8.549341201782227, + "log_odds_ratio": -0.0057712700217962265, + "logits/chosen": -0.8990902304649353, + "logits/rejected": -0.9997949600219727, + "logps/chosen": -0.001989273354411125, + "logps/rejected": -1.7681491374969482, + "loss": 2.0357, + "nll_loss": 0.5083510875701904, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019892734417226166, + "rewards/margins": 0.17661598324775696, + "rewards/rejected": -0.17681491374969482, + "step": 3282 + }, + { + "epoch": 2.2704011065006915, + "grad_norm": 6.074166774749756, + "learning_rate": 4.294221607499616e-05, + "log_odds_chosen": 8.438508033752441, + "log_odds_ratio": -0.00044998922385275364, + "logits/chosen": -0.5825424790382385, + "logits/rejected": -0.5944786071777344, + "logps/chosen": -0.00998244434595108, + "logps/rejected": -1.5468660593032837, + "loss": 1.8963, + "nll_loss": 0.47402337193489075, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009982445044443011, + "rewards/margins": 0.15368834137916565, + "rewards/rejected": -0.1546865999698639, + "step": 3283 + }, + { + "epoch": 2.2710926694329183, + "grad_norm": 13.932870864868164, + "learning_rate": 4.293837405870601e-05, + "log_odds_chosen": 7.090470790863037, + "log_odds_ratio": -0.0042477683164179325, + "logits/chosen": -0.8459784388542175, + "logits/rejected": -0.8491066694259644, + "logps/chosen": -0.018670564517378807, + "logps/rejected": -1.428104043006897, + "loss": 3.267, + "nll_loss": 0.8163129091262817, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001867056475020945, + "rewards/margins": 0.14094334840774536, + "rewards/rejected": -0.1428104043006897, + "step": 3284 + }, + { + "epoch": 2.271784232365145, + "grad_norm": 7.576010704040527, + "learning_rate": 4.293453204241586e-05, + "log_odds_chosen": 6.226518630981445, + "log_odds_ratio": -0.0069326963275671005, + "logits/chosen": -0.7165563106536865, + "logits/rejected": -0.7860947847366333, + "logps/chosen": -0.021648811176419258, + "logps/rejected": -1.109487533569336, + "loss": 2.6907, + "nll_loss": 0.6719880104064941, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002164881443604827, + "rewards/margins": 0.10878387093544006, + "rewards/rejected": -0.11094875633716583, + "step": 3285 + }, + { + "epoch": 2.272475795297372, + "grad_norm": 10.582438468933105, + "learning_rate": 4.293069002612571e-05, + "log_odds_chosen": 7.673453330993652, + "log_odds_ratio": -0.00417360058054328, + "logits/chosen": -0.9730564951896667, + "logits/rejected": -1.0138814449310303, + "logps/chosen": -0.004357243422418833, + "logps/rejected": -1.4962083101272583, + "loss": 2.5051, + "nll_loss": 0.625868558883667, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00043572435970418155, + "rewards/margins": 0.14918510615825653, + "rewards/rejected": -0.14962083101272583, + "step": 3286 + }, + { + "epoch": 2.273167358229599, + "grad_norm": 12.714532852172852, + "learning_rate": 4.2926848009835565e-05, + "log_odds_chosen": 8.081611633300781, + "log_odds_ratio": -0.0064338194206357, + "logits/chosen": -0.6164727807044983, + "logits/rejected": -0.6589667797088623, + "logps/chosen": -0.004069966729730368, + "logps/rejected": -1.4070924520492554, + "loss": 2.7513, + "nll_loss": 0.6871762275695801, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004069966671522707, + "rewards/margins": 0.14030225574970245, + "rewards/rejected": -0.14070925116539001, + "step": 3287 + }, + { + "epoch": 2.2738589211618256, + "grad_norm": 8.455578804016113, + "learning_rate": 4.292300599354541e-05, + "log_odds_chosen": 8.7753267288208, + "log_odds_ratio": -0.0006104600615799427, + "logits/chosen": -0.547274649143219, + "logits/rejected": -0.6455528140068054, + "logps/chosen": -0.0025969611015170813, + "logps/rejected": -1.232614517211914, + "loss": 2.6083, + "nll_loss": 0.6520036458969116, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00025969609851017594, + "rewards/margins": 0.12300176173448563, + "rewards/rejected": -0.1232614517211914, + "step": 3288 + }, + { + "epoch": 2.2745504840940525, + "grad_norm": 16.860441207885742, + "learning_rate": 4.2919163977255263e-05, + "log_odds_chosen": 7.584886074066162, + "log_odds_ratio": -0.05619276314973831, + "logits/chosen": -0.2892361283302307, + "logits/rejected": -0.36306679248809814, + "logps/chosen": -0.010725020430982113, + "logps/rejected": -1.1932777166366577, + "loss": 3.1154, + "nll_loss": 0.7732195854187012, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010725021129474044, + "rewards/margins": 0.11825526505708694, + "rewards/rejected": -0.11932776868343353, + "step": 3289 + }, + { + "epoch": 2.2752420470262793, + "grad_norm": 16.079126358032227, + "learning_rate": 4.2915321960965116e-05, + "log_odds_chosen": 7.958521366119385, + "log_odds_ratio": -0.0023251264356076717, + "logits/chosen": -0.20205731689929962, + "logits/rejected": -0.3348516523838043, + "logps/chosen": -0.0021186298690736294, + "logps/rejected": -1.3939123153686523, + "loss": 3.0566, + "nll_loss": 0.7639187574386597, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00021186300728004426, + "rewards/margins": 0.13917936384677887, + "rewards/rejected": -0.139391228556633, + "step": 3290 + }, + { + "epoch": 2.275933609958506, + "grad_norm": 8.559741973876953, + "learning_rate": 4.291147994467496e-05, + "log_odds_chosen": 7.290081024169922, + "log_odds_ratio": -0.003927894867956638, + "logits/chosen": -0.7065256237983704, + "logits/rejected": -0.6933072209358215, + "logps/chosen": -0.01851980946958065, + "logps/rejected": -1.1994181871414185, + "loss": 2.6089, + "nll_loss": 0.6518220901489258, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018519810400903225, + "rewards/margins": 0.11808983981609344, + "rewards/rejected": -0.1199418231844902, + "step": 3291 + }, + { + "epoch": 2.276625172890733, + "grad_norm": 6.953580379486084, + "learning_rate": 4.290763792838482e-05, + "log_odds_chosen": 5.8773016929626465, + "log_odds_ratio": -0.015241350047290325, + "logits/chosen": -0.986510157585144, + "logits/rejected": -0.9865601062774658, + "logps/chosen": -0.03149587661027908, + "logps/rejected": -1.2142492532730103, + "loss": 3.2376, + "nll_loss": 0.8078746795654297, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003149587893858552, + "rewards/margins": 0.11827534437179565, + "rewards/rejected": -0.12142492830753326, + "step": 3292 + }, + { + "epoch": 2.27731673582296, + "grad_norm": 6.562819480895996, + "learning_rate": 4.2903795912094666e-05, + "log_odds_chosen": 6.810787200927734, + "log_odds_ratio": -0.005457735620439053, + "logits/chosen": -0.3480616807937622, + "logits/rejected": -0.31973057985305786, + "logps/chosen": -0.0210685096681118, + "logps/rejected": -1.0529942512512207, + "loss": 2.3147, + "nll_loss": 0.578117847442627, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00210685096681118, + "rewards/margins": 0.10319257527589798, + "rewards/rejected": -0.10529942810535431, + "step": 3293 + }, + { + "epoch": 2.2780082987551866, + "grad_norm": 6.798682689666748, + "learning_rate": 4.289995389580452e-05, + "log_odds_chosen": 8.399381637573242, + "log_odds_ratio": -0.008013543672859669, + "logits/chosen": -0.9271516799926758, + "logits/rejected": -0.969430685043335, + "logps/chosen": -0.02214404195547104, + "logps/rejected": -1.5581920146942139, + "loss": 1.5441, + "nll_loss": 0.3852202892303467, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002214404521510005, + "rewards/margins": 0.15360480546951294, + "rewards/rejected": -0.15581920742988586, + "step": 3294 + }, + { + "epoch": 2.2786998616874135, + "grad_norm": 36.96128845214844, + "learning_rate": 4.289611187951437e-05, + "log_odds_chosen": 6.635312557220459, + "log_odds_ratio": -0.13570347428321838, + "logits/chosen": -0.6005826592445374, + "logits/rejected": -0.6524233818054199, + "logps/chosen": -0.04530520737171173, + "logps/rejected": -1.0357275009155273, + "loss": 2.2325, + "nll_loss": 0.5445461869239807, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004530521109700203, + "rewards/margins": 0.09904223680496216, + "rewards/rejected": -0.10357275605201721, + "step": 3295 + }, + { + "epoch": 2.2793914246196403, + "grad_norm": 10.540643692016602, + "learning_rate": 4.2892269863224224e-05, + "log_odds_chosen": 6.827335357666016, + "log_odds_ratio": -0.002269915770739317, + "logits/chosen": -0.46423041820526123, + "logits/rejected": -0.5512615442276001, + "logps/chosen": -0.008189619518816471, + "logps/rejected": -1.168349266052246, + "loss": 2.3288, + "nll_loss": 0.5819670557975769, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008189620566554368, + "rewards/margins": 0.1160159707069397, + "rewards/rejected": -0.11683492362499237, + "step": 3296 + }, + { + "epoch": 2.280082987551867, + "grad_norm": 15.574295043945312, + "learning_rate": 4.288842784693407e-05, + "log_odds_chosen": 7.315655708312988, + "log_odds_ratio": -0.30651018023490906, + "logits/chosen": -0.4777139723300934, + "logits/rejected": -0.4968945384025574, + "logps/chosen": -0.1229388415813446, + "logps/rejected": -1.1089379787445068, + "loss": 1.7984, + "nll_loss": 0.41893768310546875, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01229388453066349, + "rewards/margins": 0.09859991073608398, + "rewards/rejected": -0.11089379340410233, + "step": 3297 + }, + { + "epoch": 2.280774550484094, + "grad_norm": 8.10746955871582, + "learning_rate": 4.288458583064392e-05, + "log_odds_chosen": 8.925813674926758, + "log_odds_ratio": -0.0036792377941310406, + "logits/chosen": -0.5411398410797119, + "logits/rejected": -0.567682147026062, + "logps/chosen": -0.03685717657208443, + "logps/rejected": -1.8378243446350098, + "loss": 1.8082, + "nll_loss": 0.4516713619232178, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0036857177037745714, + "rewards/margins": 0.18009671568870544, + "rewards/rejected": -0.1837824285030365, + "step": 3298 + }, + { + "epoch": 2.2814661134163208, + "grad_norm": 12.693647384643555, + "learning_rate": 4.2880743814353774e-05, + "log_odds_chosen": 7.668078422546387, + "log_odds_ratio": -0.06813301891088486, + "logits/chosen": -0.4694977402687073, + "logits/rejected": -0.5269255638122559, + "logps/chosen": -0.02033541537821293, + "logps/rejected": -1.7283085584640503, + "loss": 2.4857, + "nll_loss": 0.6146007776260376, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002033541677519679, + "rewards/margins": 0.17079731822013855, + "rewards/rejected": -0.17283086478710175, + "step": 3299 + }, + { + "epoch": 2.2821576763485476, + "grad_norm": 6.430253982543945, + "learning_rate": 4.287690179806363e-05, + "log_odds_chosen": 7.170631408691406, + "log_odds_ratio": -0.012828205712139606, + "logits/chosen": -0.4530397653579712, + "logits/rejected": -0.49351730942726135, + "logps/chosen": -0.026906395331025124, + "logps/rejected": -1.5824973583221436, + "loss": 2.0449, + "nll_loss": 0.5099424719810486, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0026906393468379974, + "rewards/margins": 0.15555909276008606, + "rewards/rejected": -0.15824973583221436, + "step": 3300 + }, + { + "epoch": 2.2828492392807744, + "grad_norm": 7.007138729095459, + "learning_rate": 4.287305978177348e-05, + "log_odds_chosen": 9.538039207458496, + "log_odds_ratio": -0.00013177035725675523, + "logits/chosen": -0.6366586089134216, + "logits/rejected": -0.5977941751480103, + "logps/chosen": -0.0002648232621140778, + "logps/rejected": -1.5456188917160034, + "loss": 2.3247, + "nll_loss": 0.5811559557914734, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6482328394195065e-05, + "rewards/margins": 0.1545354127883911, + "rewards/rejected": -0.15456190705299377, + "step": 3301 + }, + { + "epoch": 2.2835408022130013, + "grad_norm": 10.941584587097168, + "learning_rate": 4.2869217765483325e-05, + "log_odds_chosen": 7.814338207244873, + "log_odds_ratio": -0.12133978307247162, + "logits/chosen": -0.515864372253418, + "logits/rejected": -0.5393328070640564, + "logps/chosen": -0.03073921799659729, + "logps/rejected": -1.3862650394439697, + "loss": 3.2848, + "nll_loss": 0.8090637922286987, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003073921659961343, + "rewards/margins": 0.13555258512496948, + "rewards/rejected": -0.13862650096416473, + "step": 3302 + }, + { + "epoch": 2.284232365145228, + "grad_norm": 11.696934700012207, + "learning_rate": 4.286537574919318e-05, + "log_odds_chosen": 7.683901309967041, + "log_odds_ratio": -0.013870958238840103, + "logits/chosen": -0.7030923962593079, + "logits/rejected": -0.7184832692146301, + "logps/chosen": -0.027227070182561874, + "logps/rejected": -1.4573798179626465, + "loss": 3.7154, + "nll_loss": 0.927453875541687, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002722707111388445, + "rewards/margins": 0.1430152803659439, + "rewards/rejected": -0.14573800563812256, + "step": 3303 + }, + { + "epoch": 2.284923928077455, + "grad_norm": 7.419434070587158, + "learning_rate": 4.286153373290303e-05, + "log_odds_chosen": 8.207198143005371, + "log_odds_ratio": -0.017640601843595505, + "logits/chosen": -0.6689510941505432, + "logits/rejected": -0.6708952188491821, + "logps/chosen": -0.014345421455800533, + "logps/rejected": -1.242602825164795, + "loss": 2.4851, + "nll_loss": 0.6195045709609985, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014345422387123108, + "rewards/margins": 0.1228257417678833, + "rewards/rejected": -0.12426027655601501, + "step": 3304 + }, + { + "epoch": 2.2856154910096818, + "grad_norm": 7.105348587036133, + "learning_rate": 4.285769171661288e-05, + "log_odds_chosen": 7.531919002532959, + "log_odds_ratio": -0.07848531007766724, + "logits/chosen": -0.8401054739952087, + "logits/rejected": -0.9255817532539368, + "logps/chosen": -0.014528581872582436, + "logps/rejected": -0.8990166783332825, + "loss": 3.1558, + "nll_loss": 0.7811073064804077, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001452858094125986, + "rewards/margins": 0.08844882249832153, + "rewards/rejected": -0.08990167081356049, + "step": 3305 + }, + { + "epoch": 2.2863070539419086, + "grad_norm": 6.265262603759766, + "learning_rate": 4.285384970032273e-05, + "log_odds_chosen": 8.80881118774414, + "log_odds_ratio": -0.00043272125185467303, + "logits/chosen": -0.4315575957298279, + "logits/rejected": -0.5561200976371765, + "logps/chosen": -0.0004981325473636389, + "logps/rejected": -1.1576604843139648, + "loss": 2.6085, + "nll_loss": 0.6520813703536987, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9813254008768126e-05, + "rewards/margins": 0.11571623384952545, + "rewards/rejected": -0.11576604843139648, + "step": 3306 + }, + { + "epoch": 2.2869986168741354, + "grad_norm": 6.479380130767822, + "learning_rate": 4.285000768403258e-05, + "log_odds_chosen": 6.527117729187012, + "log_odds_ratio": -0.058632515370845795, + "logits/chosen": -0.519963800907135, + "logits/rejected": -0.5593026280403137, + "logps/chosen": -0.042398929595947266, + "logps/rejected": -1.2286722660064697, + "loss": 1.8583, + "nll_loss": 0.458715558052063, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004239893052726984, + "rewards/margins": 0.11862733215093613, + "rewards/rejected": -0.12286723405122757, + "step": 3307 + }, + { + "epoch": 2.2876901798063622, + "grad_norm": 8.754281044006348, + "learning_rate": 4.284616566774243e-05, + "log_odds_chosen": 6.8843255043029785, + "log_odds_ratio": -0.02043495699763298, + "logits/chosen": -0.46738386154174805, + "logits/rejected": -0.4485289454460144, + "logps/chosen": -0.04600503295660019, + "logps/rejected": -1.3759292364120483, + "loss": 2.411, + "nll_loss": 0.600706934928894, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0046005030162632465, + "rewards/margins": 0.13299241662025452, + "rewards/rejected": -0.13759291172027588, + "step": 3308 + }, + { + "epoch": 2.288381742738589, + "grad_norm": 7.990466594696045, + "learning_rate": 4.2842323651452285e-05, + "log_odds_chosen": 6.636929512023926, + "log_odds_ratio": -0.009947605431079865, + "logits/chosen": -0.5649703741073608, + "logits/rejected": -0.6036151051521301, + "logps/chosen": -0.019645478576421738, + "logps/rejected": -1.340593695640564, + "loss": 2.5821, + "nll_loss": 0.6445379853248596, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001964548137038946, + "rewards/margins": 0.13209481537342072, + "rewards/rejected": -0.1340593695640564, + "step": 3309 + }, + { + "epoch": 2.289073305670816, + "grad_norm": 5.59704065322876, + "learning_rate": 4.283848163516214e-05, + "log_odds_chosen": 7.1342620849609375, + "log_odds_ratio": -0.025569546967744827, + "logits/chosen": -0.5936905145645142, + "logits/rejected": -0.5649482011795044, + "logps/chosen": -0.024222206324338913, + "logps/rejected": -1.1674721240997314, + "loss": 2.0807, + "nll_loss": 0.5176059007644653, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002422221004962921, + "rewards/margins": 0.11432498693466187, + "rewards/rejected": -0.11674721539020538, + "step": 3310 + }, + { + "epoch": 2.2897648686030427, + "grad_norm": 9.29557991027832, + "learning_rate": 4.283463961887198e-05, + "log_odds_chosen": 7.460236072540283, + "log_odds_ratio": -0.009771870449185371, + "logits/chosen": -0.8589206337928772, + "logits/rejected": -0.8911477327346802, + "logps/chosen": -0.021632924675941467, + "logps/rejected": -1.742672324180603, + "loss": 2.977, + "nll_loss": 0.7432758212089539, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002163292607292533, + "rewards/margins": 0.17210394144058228, + "rewards/rejected": -0.1742672175168991, + "step": 3311 + }, + { + "epoch": 2.2904564315352696, + "grad_norm": 7.1764092445373535, + "learning_rate": 4.2830797602581836e-05, + "log_odds_chosen": 6.939169883728027, + "log_odds_ratio": -0.018409615382552147, + "logits/chosen": -0.23254843056201935, + "logits/rejected": -0.3019201159477234, + "logps/chosen": -0.05554074048995972, + "logps/rejected": -1.724541425704956, + "loss": 2.4416, + "nll_loss": 0.6085711717605591, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0055540744215250015, + "rewards/margins": 0.16690006852149963, + "rewards/rejected": -0.17245414853096008, + "step": 3312 + }, + { + "epoch": 2.2911479944674964, + "grad_norm": 8.69416618347168, + "learning_rate": 4.282695558629169e-05, + "log_odds_chosen": 8.617881774902344, + "log_odds_ratio": -0.004176803398877382, + "logits/chosen": -0.6528822779655457, + "logits/rejected": -0.7093425989151001, + "logps/chosen": -0.04139639809727669, + "logps/rejected": -1.784732699394226, + "loss": 2.0386, + "nll_loss": 0.5092415809631348, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004139639437198639, + "rewards/margins": 0.1743336319923401, + "rewards/rejected": -0.17847327888011932, + "step": 3313 + }, + { + "epoch": 2.2918395573997232, + "grad_norm": 9.222074508666992, + "learning_rate": 4.282311357000154e-05, + "log_odds_chosen": 6.117391109466553, + "log_odds_ratio": -0.3925205171108246, + "logits/chosen": -0.5966262817382812, + "logits/rejected": -0.63462895154953, + "logps/chosen": -0.07529162615537643, + "logps/rejected": -1.2157065868377686, + "loss": 2.2904, + "nll_loss": 0.5333395004272461, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007529162336140871, + "rewards/margins": 0.11404149234294891, + "rewards/rejected": -0.1215706616640091, + "step": 3314 + }, + { + "epoch": 2.29253112033195, + "grad_norm": 5.440239429473877, + "learning_rate": 4.2819271553711386e-05, + "log_odds_chosen": 5.083359718322754, + "log_odds_ratio": -0.09348893165588379, + "logits/chosen": -0.5668821930885315, + "logits/rejected": -0.6814877986907959, + "logps/chosen": -0.06102047860622406, + "logps/rejected": -1.2425282001495361, + "loss": 2.8531, + "nll_loss": 0.7039321660995483, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006102047860622406, + "rewards/margins": 0.11815077811479568, + "rewards/rejected": -0.12425282597541809, + "step": 3315 + }, + { + "epoch": 2.293222683264177, + "grad_norm": 9.030952453613281, + "learning_rate": 4.2815429537421246e-05, + "log_odds_chosen": 6.790431499481201, + "log_odds_ratio": -0.16002391278743744, + "logits/chosen": -0.5022440552711487, + "logits/rejected": -0.5329065322875977, + "logps/chosen": -0.0351906344294548, + "logps/rejected": -1.013128399848938, + "loss": 2.4143, + "nll_loss": 0.587579071521759, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0035190628841519356, + "rewards/margins": 0.09779377281665802, + "rewards/rejected": -0.10131284594535828, + "step": 3316 + }, + { + "epoch": 2.2939142461964037, + "grad_norm": 7.451211929321289, + "learning_rate": 4.281158752113109e-05, + "log_odds_chosen": 9.210694313049316, + "log_odds_ratio": -0.00048021538532339036, + "logits/chosen": -0.33806172013282776, + "logits/rejected": -0.36309614777565, + "logps/chosen": -0.0005009582964703441, + "logps/rejected": -1.570671558380127, + "loss": 1.9924, + "nll_loss": 0.4980572760105133, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.009582673665136e-05, + "rewards/margins": 0.1570170819759369, + "rewards/rejected": -0.1570671796798706, + "step": 3317 + }, + { + "epoch": 2.2946058091286305, + "grad_norm": 5.859487533569336, + "learning_rate": 4.2807745504840944e-05, + "log_odds_chosen": 8.068329811096191, + "log_odds_ratio": -0.001363530638627708, + "logits/chosen": -1.121833086013794, + "logits/rejected": -1.131767749786377, + "logps/chosen": -0.0072917938232421875, + "logps/rejected": -1.3566069602966309, + "loss": 2.5084, + "nll_loss": 0.6269651055335999, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007291793590411544, + "rewards/margins": 0.1349315196275711, + "rewards/rejected": -0.13566069304943085, + "step": 3318 + }, + { + "epoch": 2.2952973720608574, + "grad_norm": 10.225635528564453, + "learning_rate": 4.2803903488550796e-05, + "log_odds_chosen": 8.81074047088623, + "log_odds_ratio": -0.0011985604651272297, + "logits/chosen": -0.9156633615493774, + "logits/rejected": -1.0778508186340332, + "logps/chosen": -0.0008523757569491863, + "logps/rejected": -1.6153143644332886, + "loss": 2.0688, + "nll_loss": 0.5170796513557434, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.523757423972711e-05, + "rewards/margins": 0.16144618391990662, + "rewards/rejected": -0.16153143346309662, + "step": 3319 + }, + { + "epoch": 2.295988934993084, + "grad_norm": 7.461869716644287, + "learning_rate": 4.280006147226064e-05, + "log_odds_chosen": 6.207457542419434, + "log_odds_ratio": -0.020755963400006294, + "logits/chosen": -0.6278154850006104, + "logits/rejected": -0.6054253578186035, + "logps/chosen": -0.01028019841760397, + "logps/rejected": -1.0288918018341064, + "loss": 2.6409, + "nll_loss": 0.6581598520278931, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010280199348926544, + "rewards/margins": 0.10186116397380829, + "rewards/rejected": -0.10288918763399124, + "step": 3320 + }, + { + "epoch": 2.296680497925311, + "grad_norm": 9.767592430114746, + "learning_rate": 4.2796219455970494e-05, + "log_odds_chosen": 6.164911270141602, + "log_odds_ratio": -0.19356755912303925, + "logits/chosen": -1.0346119403839111, + "logits/rejected": -1.0217839479446411, + "logps/chosen": -0.06772316247224808, + "logps/rejected": -1.1891506910324097, + "loss": 2.9357, + "nll_loss": 0.7145782709121704, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006772316992282867, + "rewards/margins": 0.11214275658130646, + "rewards/rejected": -0.11891507357358932, + "step": 3321 + }, + { + "epoch": 2.297372060857538, + "grad_norm": 3.739311456680298, + "learning_rate": 4.279237743968035e-05, + "log_odds_chosen": 5.006280422210693, + "log_odds_ratio": -0.11216719448566437, + "logits/chosen": -0.5570608973503113, + "logits/rejected": -0.5480868816375732, + "logps/chosen": -0.05582691729068756, + "logps/rejected": -1.1256422996520996, + "loss": 2.4771, + "nll_loss": 0.6080511212348938, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005582691170275211, + "rewards/margins": 0.10698153078556061, + "rewards/rejected": -0.11256422102451324, + "step": 3322 + }, + { + "epoch": 2.2980636237897647, + "grad_norm": 4.676652908325195, + "learning_rate": 4.27885354233902e-05, + "log_odds_chosen": 5.39848518371582, + "log_odds_ratio": -0.1628534346818924, + "logits/chosen": -0.2611388564109802, + "logits/rejected": -0.3353046774864197, + "logps/chosen": -0.052117325365543365, + "logps/rejected": -1.071663737297058, + "loss": 1.9026, + "nll_loss": 0.4593610465526581, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005211732815951109, + "rewards/margins": 0.1019546389579773, + "rewards/rejected": -0.10716637223958969, + "step": 3323 + }, + { + "epoch": 2.2987551867219915, + "grad_norm": 9.791510581970215, + "learning_rate": 4.2784693407100045e-05, + "log_odds_chosen": 5.421011447906494, + "log_odds_ratio": -0.15795129537582397, + "logits/chosen": -0.5889769792556763, + "logits/rejected": -0.6601840257644653, + "logps/chosen": -0.05094009265303612, + "logps/rejected": -0.8651940226554871, + "loss": 3.0226, + "nll_loss": 0.7398436665534973, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005094009451568127, + "rewards/margins": 0.08142539113759995, + "rewards/rejected": -0.08651940524578094, + "step": 3324 + }, + { + "epoch": 2.2994467496542184, + "grad_norm": 5.987621784210205, + "learning_rate": 4.2780851390809904e-05, + "log_odds_chosen": 6.50314474105835, + "log_odds_ratio": -0.015520547516644001, + "logits/chosen": -0.582254946231842, + "logits/rejected": -0.6059767007827759, + "logps/chosen": -0.0028326380997896194, + "logps/rejected": -0.8199695348739624, + "loss": 1.4449, + "nll_loss": 0.35967934131622314, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00028326379833742976, + "rewards/margins": 0.08171369135379791, + "rewards/rejected": -0.08199695497751236, + "step": 3325 + }, + { + "epoch": 2.300138312586445, + "grad_norm": 9.296241760253906, + "learning_rate": 4.277700937451975e-05, + "log_odds_chosen": 9.585782051086426, + "log_odds_ratio": -0.000565931259188801, + "logits/chosen": -0.5659375190734863, + "logits/rejected": -0.6192294359207153, + "logps/chosen": -0.0011265147477388382, + "logps/rejected": -1.658210277557373, + "loss": 2.6348, + "nll_loss": 0.6586532592773438, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011265148350503296, + "rewards/margins": 0.16570837795734406, + "rewards/rejected": -0.16582103073596954, + "step": 3326 + }, + { + "epoch": 2.300829875518672, + "grad_norm": 5.303914546966553, + "learning_rate": 4.27731673582296e-05, + "log_odds_chosen": 6.517918109893799, + "log_odds_ratio": -0.12337314337491989, + "logits/chosen": -0.6629981398582458, + "logits/rejected": -0.6698999404907227, + "logps/chosen": -0.10352815687656403, + "logps/rejected": -0.9165140390396118, + "loss": 2.3389, + "nll_loss": 0.5723835229873657, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010352815501391888, + "rewards/margins": 0.0812985822558403, + "rewards/rejected": -0.09165140241384506, + "step": 3327 + }, + { + "epoch": 2.301521438450899, + "grad_norm": 11.455277442932129, + "learning_rate": 4.2769325341939455e-05, + "log_odds_chosen": 8.0078706741333, + "log_odds_ratio": -0.12266886234283447, + "logits/chosen": -0.8499240875244141, + "logits/rejected": -0.8820198774337769, + "logps/chosen": -0.02486070990562439, + "logps/rejected": -1.4918012619018555, + "loss": 2.8902, + "nll_loss": 0.7102892994880676, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0024860710836946964, + "rewards/margins": 0.14669406414031982, + "rewards/rejected": -0.14918012917041779, + "step": 3328 + }, + { + "epoch": 2.3022130013831257, + "grad_norm": 9.202096939086914, + "learning_rate": 4.27654833256493e-05, + "log_odds_chosen": 7.53227424621582, + "log_odds_ratio": -0.08717505633831024, + "logits/chosen": -0.7595465779304504, + "logits/rejected": -0.8716880083084106, + "logps/chosen": -0.019876958802342415, + "logps/rejected": -1.5796029567718506, + "loss": 2.1861, + "nll_loss": 0.5378171801567078, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00198769592680037, + "rewards/margins": 0.15597259998321533, + "rewards/rejected": -0.15796029567718506, + "step": 3329 + }, + { + "epoch": 2.3029045643153525, + "grad_norm": 8.945493698120117, + "learning_rate": 4.276164130935915e-05, + "log_odds_chosen": 6.195101261138916, + "log_odds_ratio": -0.008044025860726833, + "logits/chosen": -0.3661419153213501, + "logits/rejected": -0.40794235467910767, + "logps/chosen": -0.029904384166002274, + "logps/rejected": -1.367875099182129, + "loss": 2.7449, + "nll_loss": 0.6854293346405029, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002990438835695386, + "rewards/margins": 0.1337970793247223, + "rewards/rejected": -0.1367875039577484, + "step": 3330 + }, + { + "epoch": 2.3035961272475793, + "grad_norm": 8.282926559448242, + "learning_rate": 4.2757799293069005e-05, + "log_odds_chosen": 8.08902359008789, + "log_odds_ratio": -0.012745675630867481, + "logits/chosen": -0.3566465377807617, + "logits/rejected": -0.3846319317817688, + "logps/chosen": -0.04099631682038307, + "logps/rejected": -1.4674735069274902, + "loss": 2.3052, + "nll_loss": 0.5750317573547363, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004099631682038307, + "rewards/margins": 0.14264771342277527, + "rewards/rejected": -0.14674735069274902, + "step": 3331 + }, + { + "epoch": 2.304287690179806, + "grad_norm": 11.868744850158691, + "learning_rate": 4.275395727677886e-05, + "log_odds_chosen": 7.529213905334473, + "log_odds_ratio": -0.005522570572793484, + "logits/chosen": -0.42124423384666443, + "logits/rejected": -0.5007656812667847, + "logps/chosen": -0.020202763378620148, + "logps/rejected": -1.7953150272369385, + "loss": 2.9072, + "nll_loss": 0.7262435555458069, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020202761515975, + "rewards/margins": 0.17751124501228333, + "rewards/rejected": -0.1795315146446228, + "step": 3332 + }, + { + "epoch": 2.304979253112033, + "grad_norm": 13.092118263244629, + "learning_rate": 4.27501152604887e-05, + "log_odds_chosen": 7.509737014770508, + "log_odds_ratio": -0.03149031475186348, + "logits/chosen": -0.6241152286529541, + "logits/rejected": -0.6165431141853333, + "logps/chosen": -0.06661160290241241, + "logps/rejected": -1.4761927127838135, + "loss": 3.2141, + "nll_loss": 0.8003746271133423, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006661160849034786, + "rewards/margins": 0.14095811545848846, + "rewards/rejected": -0.14761927723884583, + "step": 3333 + }, + { + "epoch": 2.30567081604426, + "grad_norm": 7.950167179107666, + "learning_rate": 4.274627324419856e-05, + "log_odds_chosen": 7.4247660636901855, + "log_odds_ratio": -0.015696687623858452, + "logits/chosen": -0.5261014699935913, + "logits/rejected": -0.6262849569320679, + "logps/chosen": -0.01380687952041626, + "logps/rejected": -1.6431702375411987, + "loss": 2.1106, + "nll_loss": 0.5260758399963379, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013806880451738834, + "rewards/margins": 0.16293632984161377, + "rewards/rejected": -0.1643170267343521, + "step": 3334 + }, + { + "epoch": 2.3063623789764867, + "grad_norm": 3.703700542449951, + "learning_rate": 4.274243122790841e-05, + "log_odds_chosen": 6.943711280822754, + "log_odds_ratio": -0.03206576034426689, + "logits/chosen": -0.31782206892967224, + "logits/rejected": -0.30885663628578186, + "logps/chosen": -0.037259530276060104, + "logps/rejected": -1.4616565704345703, + "loss": 1.9969, + "nll_loss": 0.4960177540779114, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0037259531673043966, + "rewards/margins": 0.14243970811367035, + "rewards/rejected": -0.1461656540632248, + "step": 3335 + }, + { + "epoch": 2.3070539419087135, + "grad_norm": 5.592972755432129, + "learning_rate": 4.273858921161826e-05, + "log_odds_chosen": 6.800052642822266, + "log_odds_ratio": -0.0781567394733429, + "logits/chosen": -0.18893122673034668, + "logits/rejected": -0.23857125639915466, + "logps/chosen": -0.0250327717512846, + "logps/rejected": -1.233428716659546, + "loss": 2.063, + "nll_loss": 0.5079275965690613, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002503277501091361, + "rewards/margins": 0.12083958834409714, + "rewards/rejected": -0.12334287166595459, + "step": 3336 + }, + { + "epoch": 2.3077455048409403, + "grad_norm": 9.436056137084961, + "learning_rate": 4.273474719532811e-05, + "log_odds_chosen": 4.679429054260254, + "log_odds_ratio": -0.05722453072667122, + "logits/chosen": -0.5767453908920288, + "logits/rejected": -0.6028765439987183, + "logps/chosen": -0.024937059730291367, + "logps/rejected": -0.6621779799461365, + "loss": 2.9017, + "nll_loss": 0.7196928262710571, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002493706066161394, + "rewards/margins": 0.0637240931391716, + "rewards/rejected": -0.06621779501438141, + "step": 3337 + }, + { + "epoch": 2.308437067773167, + "grad_norm": 8.038519859313965, + "learning_rate": 4.273090517903796e-05, + "log_odds_chosen": 7.326345920562744, + "log_odds_ratio": -0.0033848145976662636, + "logits/chosen": -0.5330811142921448, + "logits/rejected": -0.5206597447395325, + "logps/chosen": -0.005207228474318981, + "logps/rejected": -0.8569300174713135, + "loss": 2.671, + "nll_loss": 0.6674127578735352, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005207228241488338, + "rewards/margins": 0.08517228811979294, + "rewards/rejected": -0.08569300919771194, + "step": 3338 + }, + { + "epoch": 2.309128630705394, + "grad_norm": 5.706264972686768, + "learning_rate": 4.272706316274781e-05, + "log_odds_chosen": 4.299847602844238, + "log_odds_ratio": -0.45109936594963074, + "logits/chosen": -0.4020465910434723, + "logits/rejected": -0.45422112941741943, + "logps/chosen": -0.20991401374340057, + "logps/rejected": -1.2692890167236328, + "loss": 2.4653, + "nll_loss": 0.5712153911590576, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.020991403609514236, + "rewards/margins": 0.10593751072883606, + "rewards/rejected": -0.12692891061306, + "step": 3339 + }, + { + "epoch": 2.309820193637621, + "grad_norm": 11.377758979797363, + "learning_rate": 4.2723221146457664e-05, + "log_odds_chosen": 7.025807857513428, + "log_odds_ratio": -0.06139063090085983, + "logits/chosen": -0.5835516452789307, + "logits/rejected": -0.6491535902023315, + "logps/chosen": -0.059739407151937485, + "logps/rejected": -1.3370709419250488, + "loss": 2.5576, + "nll_loss": 0.6332700848579407, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0059739407151937485, + "rewards/margins": 0.12773315608501434, + "rewards/rejected": -0.13370709121227264, + "step": 3340 + }, + { + "epoch": 2.3105117565698476, + "grad_norm": 9.424481391906738, + "learning_rate": 4.2719379130167516e-05, + "log_odds_chosen": 6.853867053985596, + "log_odds_ratio": -0.01839285157620907, + "logits/chosen": -0.599707841873169, + "logits/rejected": -0.6209062337875366, + "logps/chosen": -0.0179190244525671, + "logps/rejected": -0.9835525751113892, + "loss": 2.3245, + "nll_loss": 0.5792940258979797, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017919024685397744, + "rewards/margins": 0.09656335413455963, + "rewards/rejected": -0.0983552485704422, + "step": 3341 + }, + { + "epoch": 2.3112033195020745, + "grad_norm": 8.705497741699219, + "learning_rate": 4.271553711387736e-05, + "log_odds_chosen": 8.515935897827148, + "log_odds_ratio": -0.002595825819298625, + "logits/chosen": -0.5556164979934692, + "logits/rejected": -0.5878057479858398, + "logps/chosen": -0.013318197801709175, + "logps/rejected": -1.6672247648239136, + "loss": 2.3244, + "nll_loss": 0.5808513164520264, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013318199198693037, + "rewards/margins": 0.16539067029953003, + "rewards/rejected": -0.16672247648239136, + "step": 3342 + }, + { + "epoch": 2.3118948824343013, + "grad_norm": 11.22912311553955, + "learning_rate": 4.271169509758722e-05, + "log_odds_chosen": 4.5884246826171875, + "log_odds_ratio": -0.17706415057182312, + "logits/chosen": -0.3864899277687073, + "logits/rejected": -0.4442172050476074, + "logps/chosen": -0.09313705563545227, + "logps/rejected": -0.860073447227478, + "loss": 2.2343, + "nll_loss": 0.5408572554588318, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009313706308603287, + "rewards/margins": 0.07669363915920258, + "rewards/rejected": -0.08600734919309616, + "step": 3343 + }, + { + "epoch": 2.312586445366528, + "grad_norm": 7.984221458435059, + "learning_rate": 4.2707853081297067e-05, + "log_odds_chosen": 7.36676025390625, + "log_odds_ratio": -0.1042494997382164, + "logits/chosen": -0.49889159202575684, + "logits/rejected": -0.6104129552841187, + "logps/chosen": -0.02517927810549736, + "logps/rejected": -1.2142199277877808, + "loss": 1.9779, + "nll_loss": 0.4840526580810547, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0025179279036819935, + "rewards/margins": 0.11890406906604767, + "rewards/rejected": -0.12142199277877808, + "step": 3344 + }, + { + "epoch": 2.313278008298755, + "grad_norm": 9.529037475585938, + "learning_rate": 4.270401106500692e-05, + "log_odds_chosen": 8.3910551071167, + "log_odds_ratio": -0.0006391934584826231, + "logits/chosen": -0.32860779762268066, + "logits/rejected": -0.4095700979232788, + "logps/chosen": -0.008856563828885555, + "logps/rejected": -1.5362614393234253, + "loss": 2.9869, + "nll_loss": 0.7466493844985962, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008856563363224268, + "rewards/margins": 0.1527405083179474, + "rewards/rejected": -0.15362614393234253, + "step": 3345 + }, + { + "epoch": 2.313969571230982, + "grad_norm": 9.688497543334961, + "learning_rate": 4.270016904871677e-05, + "log_odds_chosen": 6.905933856964111, + "log_odds_ratio": -0.052650876343250275, + "logits/chosen": -0.6683744788169861, + "logits/rejected": -0.7596943378448486, + "logps/chosen": -0.035936955362558365, + "logps/rejected": -1.0634468793869019, + "loss": 3.1083, + "nll_loss": 0.7718011140823364, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0035936955828219652, + "rewards/margins": 0.1027509942650795, + "rewards/rejected": -0.10634469240903854, + "step": 3346 + }, + { + "epoch": 2.3146611341632086, + "grad_norm": 7.8678507804870605, + "learning_rate": 4.269632703242662e-05, + "log_odds_chosen": 6.931734085083008, + "log_odds_ratio": -0.015753693878650665, + "logits/chosen": -0.5395713448524475, + "logits/rejected": -0.6051682233810425, + "logps/chosen": -0.013178205117583275, + "logps/rejected": -0.8928380012512207, + "loss": 2.3201, + "nll_loss": 0.5784523487091064, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013178205117583275, + "rewards/margins": 0.08796598762273788, + "rewards/rejected": -0.08928380161523819, + "step": 3347 + }, + { + "epoch": 2.3153526970954355, + "grad_norm": 8.95217514038086, + "learning_rate": 4.269248501613647e-05, + "log_odds_chosen": 8.627083778381348, + "log_odds_ratio": -0.0007175215287134051, + "logits/chosen": -0.5515803098678589, + "logits/rejected": -0.6366013884544373, + "logps/chosen": -0.009073970839381218, + "logps/rejected": -1.704122543334961, + "loss": 2.1583, + "nll_loss": 0.5395137667655945, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009073971305042505, + "rewards/margins": 0.16950486600399017, + "rewards/rejected": -0.17041227221488953, + "step": 3348 + }, + { + "epoch": 2.3160442600276623, + "grad_norm": 5.506462574005127, + "learning_rate": 4.268864299984632e-05, + "log_odds_chosen": 8.146820068359375, + "log_odds_ratio": -0.0020080609247088432, + "logits/chosen": -0.4577869176864624, + "logits/rejected": -0.5449948906898499, + "logps/chosen": -0.004896472208201885, + "logps/rejected": -1.3375948667526245, + "loss": 1.7863, + "nll_loss": 0.44636422395706177, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004896472091786563, + "rewards/margins": 0.13326984643936157, + "rewards/rejected": -0.1337594985961914, + "step": 3349 + }, + { + "epoch": 2.316735822959889, + "grad_norm": 7.025913715362549, + "learning_rate": 4.2684800983556174e-05, + "log_odds_chosen": 9.591825485229492, + "log_odds_ratio": -0.0010005512740463018, + "logits/chosen": -0.7512106895446777, + "logits/rejected": -0.8099105358123779, + "logps/chosen": -0.010130836628377438, + "logps/rejected": -2.046741008758545, + "loss": 1.7075, + "nll_loss": 0.42677560448646545, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010130837326869369, + "rewards/margins": 0.2036610245704651, + "rewards/rejected": -0.20467409491539001, + "step": 3350 + }, + { + "epoch": 2.317427385892116, + "grad_norm": 9.430314064025879, + "learning_rate": 4.268095896726602e-05, + "log_odds_chosen": 8.802265167236328, + "log_odds_ratio": -0.00432342104613781, + "logits/chosen": -0.6346014142036438, + "logits/rejected": -0.6634232401847839, + "logps/chosen": -0.010224283672869205, + "logps/rejected": -1.2433936595916748, + "loss": 2.379, + "nll_loss": 0.5943161249160767, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010224285069853067, + "rewards/margins": 0.1233169287443161, + "rewards/rejected": -0.12433935701847076, + "step": 3351 + }, + { + "epoch": 2.3181189488243428, + "grad_norm": 7.456939697265625, + "learning_rate": 4.267711695097588e-05, + "log_odds_chosen": 7.387084007263184, + "log_odds_ratio": -0.014544477686285973, + "logits/chosen": -0.7580875158309937, + "logits/rejected": -0.7801265716552734, + "logps/chosen": -0.008447481319308281, + "logps/rejected": -1.2176425457000732, + "loss": 2.5504, + "nll_loss": 0.6361559629440308, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008447481086477637, + "rewards/margins": 0.12091951072216034, + "rewards/rejected": -0.12176425755023956, + "step": 3352 + }, + { + "epoch": 2.3188105117565696, + "grad_norm": 9.283626556396484, + "learning_rate": 4.2673274934685725e-05, + "log_odds_chosen": 7.965545654296875, + "log_odds_ratio": -0.0008849737932905555, + "logits/chosen": -0.2704313397407532, + "logits/rejected": -0.2850708067417145, + "logps/chosen": -0.002565020928159356, + "logps/rejected": -1.3778682947158813, + "loss": 2.3377, + "nll_loss": 0.5843430161476135, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00025650212774053216, + "rewards/margins": 0.13753032684326172, + "rewards/rejected": -0.1377868354320526, + "step": 3353 + }, + { + "epoch": 2.3195020746887964, + "grad_norm": 11.757554054260254, + "learning_rate": 4.266943291839558e-05, + "log_odds_chosen": 7.557385444641113, + "log_odds_ratio": -0.05157068371772766, + "logits/chosen": -0.40662682056427, + "logits/rejected": -0.4159397780895233, + "logps/chosen": -0.01183061208575964, + "logps/rejected": -1.2338945865631104, + "loss": 3.4943, + "nll_loss": 0.868411660194397, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011830611620098352, + "rewards/margins": 0.12220640480518341, + "rewards/rejected": -0.12338946759700775, + "step": 3354 + }, + { + "epoch": 2.3201936376210233, + "grad_norm": 8.072080612182617, + "learning_rate": 4.266559090210543e-05, + "log_odds_chosen": 8.094703674316406, + "log_odds_ratio": -0.002076277043670416, + "logits/chosen": -0.5662438869476318, + "logits/rejected": -0.6120657920837402, + "logps/chosen": -0.01660882495343685, + "logps/rejected": -1.4540126323699951, + "loss": 2.4049, + "nll_loss": 0.6010183095932007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016608824953436852, + "rewards/margins": 0.14374037086963654, + "rewards/rejected": -0.1454012542963028, + "step": 3355 + }, + { + "epoch": 2.3208852005532505, + "grad_norm": 12.50888729095459, + "learning_rate": 4.2661748885815276e-05, + "log_odds_chosen": 6.450561046600342, + "log_odds_ratio": -0.26917147636413574, + "logits/chosen": -0.5345016717910767, + "logits/rejected": -0.5320403575897217, + "logps/chosen": -0.007884496822953224, + "logps/rejected": -0.9485344290733337, + "loss": 2.1652, + "nll_loss": 0.5143883228302002, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.000788449659012258, + "rewards/margins": 0.09406498819589615, + "rewards/rejected": -0.09485343843698502, + "step": 3356 + }, + { + "epoch": 2.3215767634854774, + "grad_norm": 8.556360244750977, + "learning_rate": 4.265790686952513e-05, + "log_odds_chosen": 6.606521129608154, + "log_odds_ratio": -0.016001202166080475, + "logits/chosen": -0.8234816789627075, + "logits/rejected": -0.8304534554481506, + "logps/chosen": -0.03415491804480553, + "logps/rejected": -1.4321098327636719, + "loss": 3.307, + "nll_loss": 0.8251463174819946, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00341549189761281, + "rewards/margins": 0.1397954821586609, + "rewards/rejected": -0.1432109773159027, + "step": 3357 + }, + { + "epoch": 2.322268326417704, + "grad_norm": 9.19092082977295, + "learning_rate": 4.265406485323498e-05, + "log_odds_chosen": 4.379786968231201, + "log_odds_ratio": -0.23112638294696808, + "logits/chosen": -0.5057024359703064, + "logits/rejected": -0.5283100008964539, + "logps/chosen": -0.09777574241161346, + "logps/rejected": -1.1245331764221191, + "loss": 2.7133, + "nll_loss": 0.6552160978317261, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009777573868632317, + "rewards/margins": 0.10267573595046997, + "rewards/rejected": -0.11245331168174744, + "step": 3358 + }, + { + "epoch": 2.322959889349931, + "grad_norm": 10.96688175201416, + "learning_rate": 4.265022283694483e-05, + "log_odds_chosen": 7.878183364868164, + "log_odds_ratio": -0.005137327127158642, + "logits/chosen": -0.4825310707092285, + "logits/rejected": -0.5595512390136719, + "logps/chosen": -0.007061135023832321, + "logps/rejected": -1.8817293643951416, + "loss": 2.8067, + "nll_loss": 0.7011556625366211, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007061135256662965, + "rewards/margins": 0.1874668300151825, + "rewards/rejected": -0.18817293643951416, + "step": 3359 + }, + { + "epoch": 2.323651452282158, + "grad_norm": 9.870452880859375, + "learning_rate": 4.264638082065468e-05, + "log_odds_chosen": 9.459579467773438, + "log_odds_ratio": -0.00026476330822333694, + "logits/chosen": -0.5286309719085693, + "logits/rejected": -0.6351138949394226, + "logps/chosen": -0.000664436724036932, + "logps/rejected": -1.7894325256347656, + "loss": 3.2136, + "nll_loss": 0.8033857345581055, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.644366658292711e-05, + "rewards/margins": 0.17887680232524872, + "rewards/rejected": -0.17894324660301208, + "step": 3360 + }, + { + "epoch": 2.3243430152143847, + "grad_norm": 5.560983180999756, + "learning_rate": 4.264253880436454e-05, + "log_odds_chosen": 8.930059432983398, + "log_odds_ratio": -0.0022857023868709803, + "logits/chosen": -0.9240604043006897, + "logits/rejected": -0.8908398747444153, + "logps/chosen": -0.01126360148191452, + "logps/rejected": -1.5385799407958984, + "loss": 1.9848, + "nll_loss": 0.49597764015197754, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011263600317761302, + "rewards/margins": 0.15273164212703705, + "rewards/rejected": -0.1538580060005188, + "step": 3361 + }, + { + "epoch": 2.3250345781466115, + "grad_norm": 7.829687595367432, + "learning_rate": 4.263869678807438e-05, + "log_odds_chosen": 9.804492950439453, + "log_odds_ratio": -0.00015975243877619505, + "logits/chosen": -0.2846134901046753, + "logits/rejected": -0.36845675110816956, + "logps/chosen": -0.0004424336366355419, + "logps/rejected": -1.7838191986083984, + "loss": 1.8686, + "nll_loss": 0.4671386778354645, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.424336657393724e-05, + "rewards/margins": 0.1783376783132553, + "rewards/rejected": -0.17838191986083984, + "step": 3362 + }, + { + "epoch": 2.3257261410788383, + "grad_norm": 13.060300827026367, + "learning_rate": 4.2634854771784236e-05, + "log_odds_chosen": 9.596633911132812, + "log_odds_ratio": -0.0004242879222147167, + "logits/chosen": -0.2015463262796402, + "logits/rejected": -0.468197762966156, + "logps/chosen": -0.0012442361330613494, + "logps/rejected": -2.1529362201690674, + "loss": 3.0141, + "nll_loss": 0.7534917593002319, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012442361912690103, + "rewards/margins": 0.21516920626163483, + "rewards/rejected": -0.21529361605644226, + "step": 3363 + }, + { + "epoch": 2.326417704011065, + "grad_norm": 5.3858232498168945, + "learning_rate": 4.263101275549409e-05, + "log_odds_chosen": 6.65142297744751, + "log_odds_ratio": -0.11172687262296677, + "logits/chosen": -0.5483109951019287, + "logits/rejected": -0.5711467266082764, + "logps/chosen": -0.03660149127244949, + "logps/rejected": -0.868246853351593, + "loss": 1.6949, + "nll_loss": 0.4125487804412842, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0036601494066417217, + "rewards/margins": 0.08316454291343689, + "rewards/rejected": -0.0868246853351593, + "step": 3364 + }, + { + "epoch": 2.327109266943292, + "grad_norm": 9.577594757080078, + "learning_rate": 4.2627170739203934e-05, + "log_odds_chosen": 6.5316572189331055, + "log_odds_ratio": -0.023967696353793144, + "logits/chosen": -0.6139042377471924, + "logits/rejected": -0.7178165316581726, + "logps/chosen": -0.010692340321838856, + "logps/rejected": -0.900058388710022, + "loss": 2.6979, + "nll_loss": 0.6720776557922363, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010692339856177568, + "rewards/margins": 0.08893661201000214, + "rewards/rejected": -0.09000584483146667, + "step": 3365 + }, + { + "epoch": 2.327800829875519, + "grad_norm": 11.827892303466797, + "learning_rate": 4.2623328722913786e-05, + "log_odds_chosen": 7.095213890075684, + "log_odds_ratio": -0.007561472710222006, + "logits/chosen": -0.701814591884613, + "logits/rejected": -0.7703323364257812, + "logps/chosen": -0.005921615287661552, + "logps/rejected": -1.0419270992279053, + "loss": 3.8682, + "nll_loss": 0.966285765171051, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005921615520492196, + "rewards/margins": 0.10360054671764374, + "rewards/rejected": -0.10419271886348724, + "step": 3366 + }, + { + "epoch": 2.3284923928077457, + "grad_norm": 8.764759063720703, + "learning_rate": 4.261948670662364e-05, + "log_odds_chosen": 6.575815200805664, + "log_odds_ratio": -0.09238407015800476, + "logits/chosen": -0.5462766885757446, + "logits/rejected": -0.4617829918861389, + "logps/chosen": -0.026065904647111893, + "logps/rejected": -1.1544698476791382, + "loss": 2.4102, + "nll_loss": 0.5933218002319336, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0026065902784466743, + "rewards/margins": 0.11284039914608002, + "rewards/rejected": -0.11544698476791382, + "step": 3367 + }, + { + "epoch": 2.3291839557399725, + "grad_norm": 8.00680160522461, + "learning_rate": 4.261564469033349e-05, + "log_odds_chosen": 6.982217311859131, + "log_odds_ratio": -0.007230084389448166, + "logits/chosen": -0.35788673162460327, + "logits/rejected": -0.35731175541877747, + "logps/chosen": -0.017076268792152405, + "logps/rejected": -1.1515657901763916, + "loss": 2.0273, + "nll_loss": 0.5060920715332031, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001707626972347498, + "rewards/margins": 0.11344894766807556, + "rewards/rejected": -0.11515657603740692, + "step": 3368 + }, + { + "epoch": 2.3298755186721993, + "grad_norm": 8.321171760559082, + "learning_rate": 4.261180267404334e-05, + "log_odds_chosen": 7.154203414916992, + "log_odds_ratio": -0.15566202998161316, + "logits/chosen": -0.24052327871322632, + "logits/rejected": -0.26545450091362, + "logps/chosen": -0.04068966582417488, + "logps/rejected": -1.7842777967453003, + "loss": 2.2312, + "nll_loss": 0.542233407497406, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004068966023623943, + "rewards/margins": 0.1743588149547577, + "rewards/rejected": -0.1784277707338333, + "step": 3369 + }, + { + "epoch": 2.330567081604426, + "grad_norm": 8.19678783416748, + "learning_rate": 4.2607960657753196e-05, + "log_odds_chosen": 7.805452823638916, + "log_odds_ratio": -0.07030778378248215, + "logits/chosen": -0.2236385941505432, + "logits/rejected": -0.3405549228191376, + "logps/chosen": -0.029319610446691513, + "logps/rejected": -1.758199691772461, + "loss": 2.6445, + "nll_loss": 0.6540853381156921, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0029319613240659237, + "rewards/margins": 0.17288801074028015, + "rewards/rejected": -0.17581996321678162, + "step": 3370 + }, + { + "epoch": 2.331258644536653, + "grad_norm": 6.791223049163818, + "learning_rate": 4.260411864146304e-05, + "log_odds_chosen": 9.60167407989502, + "log_odds_ratio": -0.0004270426870789379, + "logits/chosen": -0.7064838409423828, + "logits/rejected": -0.7003648281097412, + "logps/chosen": -0.015431979671120644, + "logps/rejected": -2.0963101387023926, + "loss": 2.5563, + "nll_loss": 0.6390376091003418, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015431978972628713, + "rewards/margins": 0.20808780193328857, + "rewards/rejected": -0.2096310257911682, + "step": 3371 + }, + { + "epoch": 2.33195020746888, + "grad_norm": 11.994937896728516, + "learning_rate": 4.2600276625172894e-05, + "log_odds_chosen": 8.312555313110352, + "log_odds_ratio": -0.0009366670856252313, + "logits/chosen": -0.4604548513889313, + "logits/rejected": -0.5908196568489075, + "logps/chosen": -0.020090028643608093, + "logps/rejected": -1.9958922863006592, + "loss": 2.9871, + "nll_loss": 0.7466820478439331, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002009002724662423, + "rewards/margins": 0.1975802183151245, + "rewards/rejected": -0.19958922266960144, + "step": 3372 + }, + { + "epoch": 2.3326417704011067, + "grad_norm": 10.920364379882812, + "learning_rate": 4.259643460888275e-05, + "log_odds_chosen": 7.596035003662109, + "log_odds_ratio": -0.005939878057688475, + "logits/chosen": -0.49765828251838684, + "logits/rejected": -0.6091522574424744, + "logps/chosen": -0.017343439161777496, + "logps/rejected": -1.555943250656128, + "loss": 3.4278, + "nll_loss": 0.8563669919967651, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017343438230454922, + "rewards/margins": 0.15386000275611877, + "rewards/rejected": -0.1555943489074707, + "step": 3373 + }, + { + "epoch": 2.3333333333333335, + "grad_norm": 53.26963806152344, + "learning_rate": 4.259259259259259e-05, + "log_odds_chosen": 6.723474025726318, + "log_odds_ratio": -0.45630714297294617, + "logits/chosen": -0.5959466695785522, + "logits/rejected": -0.6924944519996643, + "logps/chosen": -0.09198511391878128, + "logps/rejected": -1.44384765625, + "loss": 2.6244, + "nll_loss": 0.6104597449302673, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009198511950671673, + "rewards/margins": 0.13518625497817993, + "rewards/rejected": -0.14438477158546448, + "step": 3374 + }, + { + "epoch": 2.3340248962655603, + "grad_norm": 7.871276378631592, + "learning_rate": 4.2588750576302445e-05, + "log_odds_chosen": 6.338235855102539, + "log_odds_ratio": -0.1509992927312851, + "logits/chosen": -0.4784778952598572, + "logits/rejected": -0.48658180236816406, + "logps/chosen": -0.03482593595981598, + "logps/rejected": -1.3916345834732056, + "loss": 3.3691, + "nll_loss": 0.8271795511245728, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0034825934562832117, + "rewards/margins": 0.13568086922168732, + "rewards/rejected": -0.13916344940662384, + "step": 3375 + }, + { + "epoch": 2.334716459197787, + "grad_norm": 6.738348007202148, + "learning_rate": 4.25849085600123e-05, + "log_odds_chosen": 6.788658142089844, + "log_odds_ratio": -0.042201511561870575, + "logits/chosen": -0.833795428276062, + "logits/rejected": -0.8814170360565186, + "logps/chosen": -0.028427157551050186, + "logps/rejected": -1.651319980621338, + "loss": 3.5604, + "nll_loss": 0.8858755826950073, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00284271570853889, + "rewards/margins": 0.16228929162025452, + "rewards/rejected": -0.16513200104236603, + "step": 3376 + }, + { + "epoch": 2.335408022130014, + "grad_norm": 10.640534400939941, + "learning_rate": 4.258106654372215e-05, + "log_odds_chosen": 8.235334396362305, + "log_odds_ratio": -0.003731532720848918, + "logits/chosen": -0.8289970755577087, + "logits/rejected": -0.9308689832687378, + "logps/chosen": -0.017879465594887733, + "logps/rejected": -1.359239101409912, + "loss": 2.9657, + "nll_loss": 0.7410598993301392, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017879465594887733, + "rewards/margins": 0.13413597643375397, + "rewards/rejected": -0.13592392206192017, + "step": 3377 + }, + { + "epoch": 2.336099585062241, + "grad_norm": 7.650116443634033, + "learning_rate": 4.2577224527431995e-05, + "log_odds_chosen": 7.192252159118652, + "log_odds_ratio": -0.012806126847863197, + "logits/chosen": -0.7580456137657166, + "logits/rejected": -0.7242326736450195, + "logps/chosen": -0.03407813236117363, + "logps/rejected": -1.6755526065826416, + "loss": 2.846, + "nll_loss": 0.7102081179618835, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003407812910154462, + "rewards/margins": 0.16414742171764374, + "rewards/rejected": -0.16755524277687073, + "step": 3378 + }, + { + "epoch": 2.3367911479944676, + "grad_norm": 9.497821807861328, + "learning_rate": 4.2573382511141855e-05, + "log_odds_chosen": 7.8366498947143555, + "log_odds_ratio": -0.0061141084879636765, + "logits/chosen": -0.8389533758163452, + "logits/rejected": -0.8879673480987549, + "logps/chosen": -0.024966338649392128, + "logps/rejected": -1.4060308933258057, + "loss": 2.5736, + "nll_loss": 0.6428003907203674, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024966339115053415, + "rewards/margins": 0.13810646533966064, + "rewards/rejected": -0.14060309529304504, + "step": 3379 + }, + { + "epoch": 2.3374827109266945, + "grad_norm": 5.044812202453613, + "learning_rate": 4.25695404948517e-05, + "log_odds_chosen": 5.910860061645508, + "log_odds_ratio": -0.1787855178117752, + "logits/chosen": -0.4667434096336365, + "logits/rejected": -0.4716196656227112, + "logps/chosen": -0.043792542070150375, + "logps/rejected": -1.2373631000518799, + "loss": 2.0394, + "nll_loss": 0.491960346698761, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004379254300147295, + "rewards/margins": 0.11935704946517944, + "rewards/rejected": -0.12373629957437515, + "step": 3380 + }, + { + "epoch": 2.3381742738589213, + "grad_norm": 8.184005737304688, + "learning_rate": 4.256569847856155e-05, + "log_odds_chosen": 7.020155429840088, + "log_odds_ratio": -0.15846848487854004, + "logits/chosen": -0.07825784385204315, + "logits/rejected": -0.1667136251926422, + "logps/chosen": -0.030404016375541687, + "logps/rejected": -1.6403634548187256, + "loss": 2.0926, + "nll_loss": 0.5073078870773315, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003040401265025139, + "rewards/margins": 0.16099593043327332, + "rewards/rejected": -0.1640363484621048, + "step": 3381 + }, + { + "epoch": 2.338865836791148, + "grad_norm": 5.734543323516846, + "learning_rate": 4.2561856462271405e-05, + "log_odds_chosen": 7.248272895812988, + "log_odds_ratio": -0.060873474925756454, + "logits/chosen": -0.5805363655090332, + "logits/rejected": -0.6600069999694824, + "logps/chosen": -0.05188342556357384, + "logps/rejected": -1.8321489095687866, + "loss": 2.1616, + "nll_loss": 0.5343201756477356, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005188342649489641, + "rewards/margins": 0.17802655696868896, + "rewards/rejected": -0.18321490287780762, + "step": 3382 + }, + { + "epoch": 2.339557399723375, + "grad_norm": 5.963815212249756, + "learning_rate": 4.255801444598125e-05, + "log_odds_chosen": 7.02641487121582, + "log_odds_ratio": -0.10145469754934311, + "logits/chosen": -0.6014857292175293, + "logits/rejected": -0.5714874267578125, + "logps/chosen": -0.05983032286167145, + "logps/rejected": -1.6911689043045044, + "loss": 2.3081, + "nll_loss": 0.5668916702270508, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005983032286167145, + "rewards/margins": 0.16313385963439941, + "rewards/rejected": -0.16911689937114716, + "step": 3383 + }, + { + "epoch": 2.340248962655602, + "grad_norm": 98.84029388427734, + "learning_rate": 4.25541724296911e-05, + "log_odds_chosen": 6.901886940002441, + "log_odds_ratio": -0.3027273416519165, + "logits/chosen": -0.7416228652000427, + "logits/rejected": -0.7577385902404785, + "logps/chosen": -0.01666867919266224, + "logps/rejected": -0.819347620010376, + "loss": 2.099, + "nll_loss": 0.4944872260093689, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.001666867989115417, + "rewards/margins": 0.08026789873838425, + "rewards/rejected": -0.08193476498126984, + "step": 3384 + }, + { + "epoch": 2.3409405255878286, + "grad_norm": 7.437020778656006, + "learning_rate": 4.2550330413400956e-05, + "log_odds_chosen": 7.493680000305176, + "log_odds_ratio": -0.04263937845826149, + "logits/chosen": -0.6599336862564087, + "logits/rejected": -0.7314375638961792, + "logps/chosen": -0.011237611062824726, + "logps/rejected": -1.0297126770019531, + "loss": 2.617, + "nll_loss": 0.6499974131584167, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011237610597163439, + "rewards/margins": 0.10184749215841293, + "rewards/rejected": -0.10297125577926636, + "step": 3385 + }, + { + "epoch": 2.3416320885200554, + "grad_norm": 8.780964851379395, + "learning_rate": 4.254648839711081e-05, + "log_odds_chosen": 7.680317401885986, + "log_odds_ratio": -0.062225475907325745, + "logits/chosen": -0.587841808795929, + "logits/rejected": -0.6148817539215088, + "logps/chosen": -0.0225834921002388, + "logps/rejected": -1.806999921798706, + "loss": 1.9738, + "nll_loss": 0.48721805214881897, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0022583494428545237, + "rewards/margins": 0.17844164371490479, + "rewards/rejected": -0.18070000410079956, + "step": 3386 + }, + { + "epoch": 2.3423236514522823, + "grad_norm": 9.550161361694336, + "learning_rate": 4.2542646380820654e-05, + "log_odds_chosen": 7.595914363861084, + "log_odds_ratio": -0.0024608231615275145, + "logits/chosen": -0.7636619806289673, + "logits/rejected": -0.7690795063972473, + "logps/chosen": -0.002205885713919997, + "logps/rejected": -1.0934929847717285, + "loss": 2.2587, + "nll_loss": 0.5644301176071167, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00022058856848161668, + "rewards/margins": 0.10912871360778809, + "rewards/rejected": -0.10934930294752121, + "step": 3387 + }, + { + "epoch": 2.343015214384509, + "grad_norm": 10.059521675109863, + "learning_rate": 4.253880436453051e-05, + "log_odds_chosen": 9.377660751342773, + "log_odds_ratio": -0.00010987836867570877, + "logits/chosen": -0.5637887716293335, + "logits/rejected": -0.6180630922317505, + "logps/chosen": -0.0005490887560881674, + "logps/rejected": -1.5444753170013428, + "loss": 2.1725, + "nll_loss": 0.5431035161018372, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.49088726984337e-05, + "rewards/margins": 0.15439262986183167, + "rewards/rejected": -0.1544475257396698, + "step": 3388 + }, + { + "epoch": 2.343706777316736, + "grad_norm": 7.881161212921143, + "learning_rate": 4.253496234824036e-05, + "log_odds_chosen": 5.156569957733154, + "log_odds_ratio": -0.16542679071426392, + "logits/chosen": -0.14296261966228485, + "logits/rejected": -0.2547076344490051, + "logps/chosen": -0.07005628198385239, + "logps/rejected": -0.8354336619377136, + "loss": 2.1545, + "nll_loss": 0.5220921635627747, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007005628198385239, + "rewards/margins": 0.07653774321079254, + "rewards/rejected": -0.08354337513446808, + "step": 3389 + }, + { + "epoch": 2.3443983402489628, + "grad_norm": 9.925989151000977, + "learning_rate": 4.253112033195021e-05, + "log_odds_chosen": 7.2002058029174805, + "log_odds_ratio": -0.2609242796897888, + "logits/chosen": -0.3674536347389221, + "logits/rejected": -0.41105300188064575, + "logps/chosen": -0.07734145224094391, + "logps/rejected": -1.5246528387069702, + "loss": 2.3385, + "nll_loss": 0.5585339069366455, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007734145503491163, + "rewards/margins": 0.14473114907741547, + "rewards/rejected": -0.15246528387069702, + "step": 3390 + }, + { + "epoch": 2.3450899031811896, + "grad_norm": 7.965763092041016, + "learning_rate": 4.2527278315660064e-05, + "log_odds_chosen": 6.444962501525879, + "log_odds_ratio": -0.21917779743671417, + "logits/chosen": -0.5414268970489502, + "logits/rejected": -0.603859007358551, + "logps/chosen": -0.06609084457159042, + "logps/rejected": -1.0937172174453735, + "loss": 2.0623, + "nll_loss": 0.4936527609825134, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0066090840846300125, + "rewards/margins": 0.10276263952255249, + "rewards/rejected": -0.10937172919511795, + "step": 3391 + }, + { + "epoch": 2.3457814661134164, + "grad_norm": 21.98008918762207, + "learning_rate": 4.252343629936991e-05, + "log_odds_chosen": 5.816531658172607, + "log_odds_ratio": -0.6510119438171387, + "logits/chosen": -0.5702582001686096, + "logits/rejected": -0.6187250018119812, + "logps/chosen": -0.11002440005540848, + "logps/rejected": -1.22270929813385, + "loss": 3.7288, + "nll_loss": 0.8671107292175293, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.011002440005540848, + "rewards/margins": 0.11126849800348282, + "rewards/rejected": -0.12227094173431396, + "step": 3392 + }, + { + "epoch": 2.3464730290456433, + "grad_norm": 9.141814231872559, + "learning_rate": 4.251959428307976e-05, + "log_odds_chosen": 7.8273210525512695, + "log_odds_ratio": -0.0032731585670262575, + "logits/chosen": -0.28313112258911133, + "logits/rejected": -0.34095481038093567, + "logps/chosen": -0.00457628583535552, + "logps/rejected": -1.3544855117797852, + "loss": 2.2227, + "nll_loss": 0.5553531646728516, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004576286009978503, + "rewards/margins": 0.13499093055725098, + "rewards/rejected": -0.13544854521751404, + "step": 3393 + }, + { + "epoch": 2.34716459197787, + "grad_norm": 9.178505897521973, + "learning_rate": 4.2515752266789614e-05, + "log_odds_chosen": 8.25627613067627, + "log_odds_ratio": -0.001994677586480975, + "logits/chosen": -0.4252711534500122, + "logits/rejected": -0.44464176893234253, + "logps/chosen": -0.0024501513689756393, + "logps/rejected": -1.3708351850509644, + "loss": 2.0642, + "nll_loss": 0.5158490538597107, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00024501513689756393, + "rewards/margins": 0.13683849573135376, + "rewards/rejected": -0.1370835304260254, + "step": 3394 + }, + { + "epoch": 2.347856154910097, + "grad_norm": 4.448156356811523, + "learning_rate": 4.2511910250499467e-05, + "log_odds_chosen": 7.523759841918945, + "log_odds_ratio": -0.055687472224235535, + "logits/chosen": -0.6664091944694519, + "logits/rejected": -0.7099178433418274, + "logps/chosen": -0.02036425471305847, + "logps/rejected": -1.0317578315734863, + "loss": 1.8873, + "nll_loss": 0.46624624729156494, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020364252850413322, + "rewards/margins": 0.10113934427499771, + "rewards/rejected": -0.10317577421665192, + "step": 3395 + }, + { + "epoch": 2.3485477178423237, + "grad_norm": 9.876389503479004, + "learning_rate": 4.250806823420931e-05, + "log_odds_chosen": 8.495573043823242, + "log_odds_ratio": -0.045671653002500534, + "logits/chosen": -0.7123823165893555, + "logits/rejected": -0.7269488573074341, + "logps/chosen": -0.013587714172899723, + "logps/rejected": -1.3124465942382812, + "loss": 2.8378, + "nll_loss": 0.7048929929733276, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001358771463856101, + "rewards/margins": 0.12988589704036713, + "rewards/rejected": -0.13124465942382812, + "step": 3396 + }, + { + "epoch": 2.3492392807745506, + "grad_norm": 10.173689842224121, + "learning_rate": 4.250422621791917e-05, + "log_odds_chosen": 8.026592254638672, + "log_odds_ratio": -0.002193247899413109, + "logits/chosen": -0.9392632246017456, + "logits/rejected": -0.9983953833580017, + "logps/chosen": -0.011764070950448513, + "logps/rejected": -1.7628229856491089, + "loss": 2.3371, + "nll_loss": 0.5840455889701843, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011764070950448513, + "rewards/margins": 0.17510589957237244, + "rewards/rejected": -0.17628228664398193, + "step": 3397 + }, + { + "epoch": 2.3499308437067774, + "grad_norm": 9.759671211242676, + "learning_rate": 4.250038420162902e-05, + "log_odds_chosen": 7.043476581573486, + "log_odds_ratio": -0.4816550314426422, + "logits/chosen": -0.6363857388496399, + "logits/rejected": -0.64066481590271, + "logps/chosen": -0.07748615741729736, + "logps/rejected": -0.8302817344665527, + "loss": 2.5485, + "nll_loss": 0.5889508724212646, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0077486163936555386, + "rewards/margins": 0.07527956366539001, + "rewards/rejected": -0.08302817493677139, + "step": 3398 + }, + { + "epoch": 2.3506224066390042, + "grad_norm": 7.589184284210205, + "learning_rate": 4.249654218533887e-05, + "log_odds_chosen": 7.6665849685668945, + "log_odds_ratio": -0.00181456352584064, + "logits/chosen": -0.5171942710876465, + "logits/rejected": -0.5481059551239014, + "logps/chosen": -0.0013496269239112735, + "logps/rejected": -0.8110032677650452, + "loss": 2.1699, + "nll_loss": 0.542304515838623, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013496269821189344, + "rewards/margins": 0.08096536993980408, + "rewards/rejected": -0.08110032975673676, + "step": 3399 + }, + { + "epoch": 2.351313969571231, + "grad_norm": 10.063977241516113, + "learning_rate": 4.249270016904872e-05, + "log_odds_chosen": 6.615221977233887, + "log_odds_ratio": -0.10633889585733414, + "logits/chosen": -0.42302262783050537, + "logits/rejected": -0.48643821477890015, + "logps/chosen": -0.039021894335746765, + "logps/rejected": -1.403071403503418, + "loss": 1.9831, + "nll_loss": 0.48514240980148315, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003902189899235964, + "rewards/margins": 0.13640496134757996, + "rewards/rejected": -0.14030715823173523, + "step": 3400 + }, + { + "epoch": 2.352005532503458, + "grad_norm": 11.160541534423828, + "learning_rate": 4.248885815275857e-05, + "log_odds_chosen": 7.308645725250244, + "log_odds_ratio": -0.03402864187955856, + "logits/chosen": -0.5057591795921326, + "logits/rejected": -0.5204235911369324, + "logps/chosen": -0.007180843036621809, + "logps/rejected": -1.547482967376709, + "loss": 2.3467, + "nll_loss": 0.5832793712615967, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007180843385867774, + "rewards/margins": 0.1540302038192749, + "rewards/rejected": -0.15474829077720642, + "step": 3401 + }, + { + "epoch": 2.3526970954356847, + "grad_norm": 11.588892936706543, + "learning_rate": 4.248501613646842e-05, + "log_odds_chosen": 6.584766387939453, + "log_odds_ratio": -0.03737543523311615, + "logits/chosen": -0.587253749370575, + "logits/rejected": -0.659322202205658, + "logps/chosen": -0.019411414861679077, + "logps/rejected": -1.6802135705947876, + "loss": 2.1524, + "nll_loss": 0.5343660712242126, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001941141439601779, + "rewards/margins": 0.16608020663261414, + "rewards/rejected": -0.16802135109901428, + "step": 3402 + }, + { + "epoch": 2.3533886583679116, + "grad_norm": 9.891732215881348, + "learning_rate": 4.248117412017827e-05, + "log_odds_chosen": 6.61793851852417, + "log_odds_ratio": -0.17892657220363617, + "logits/chosen": -0.5233025550842285, + "logits/rejected": -0.5984399318695068, + "logps/chosen": -0.026846522465348244, + "logps/rejected": -1.040014386177063, + "loss": 2.8344, + "nll_loss": 0.690710723400116, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002684652339667082, + "rewards/margins": 0.10131677985191345, + "rewards/rejected": -0.10400144010782242, + "step": 3403 + }, + { + "epoch": 2.3540802213001384, + "grad_norm": 11.967804908752441, + "learning_rate": 4.2477332103888125e-05, + "log_odds_chosen": 7.058257102966309, + "log_odds_ratio": -0.05008646100759506, + "logits/chosen": -0.23758041858673096, + "logits/rejected": -0.26495781540870667, + "logps/chosen": -0.005382682662457228, + "logps/rejected": -0.8401252031326294, + "loss": 2.3625, + "nll_loss": 0.5856223106384277, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005382683011703193, + "rewards/margins": 0.08347424864768982, + "rewards/rejected": -0.08401252329349518, + "step": 3404 + }, + { + "epoch": 2.354771784232365, + "grad_norm": 9.493339538574219, + "learning_rate": 4.247349008759797e-05, + "log_odds_chosen": 5.9872236251831055, + "log_odds_ratio": -0.04331246018409729, + "logits/chosen": -0.8071039319038391, + "logits/rejected": -0.851711094379425, + "logps/chosen": -0.018634863197803497, + "logps/rejected": -1.080324649810791, + "loss": 2.2317, + "nll_loss": 0.5535867810249329, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018634865991771221, + "rewards/margins": 0.1061689704656601, + "rewards/rejected": -0.10803245007991791, + "step": 3405 + }, + { + "epoch": 2.355463347164592, + "grad_norm": 12.45865535736084, + "learning_rate": 4.246964807130783e-05, + "log_odds_chosen": 7.26013708114624, + "log_odds_ratio": -0.015082553029060364, + "logits/chosen": -0.2669753134250641, + "logits/rejected": -0.3914386034011841, + "logps/chosen": -0.02450769767165184, + "logps/rejected": -2.022952079772949, + "loss": 2.6149, + "nll_loss": 0.6522207260131836, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002450769767165184, + "rewards/margins": 0.19984443485736847, + "rewards/rejected": -0.2022952139377594, + "step": 3406 + }, + { + "epoch": 2.356154910096819, + "grad_norm": 12.195465087890625, + "learning_rate": 4.2465806055017676e-05, + "log_odds_chosen": 9.071712493896484, + "log_odds_ratio": -0.000601945910602808, + "logits/chosen": -0.23555348813533783, + "logits/rejected": -0.28923851251602173, + "logps/chosen": -0.0022937441244721413, + "logps/rejected": -2.087567090988159, + "loss": 1.9265, + "nll_loss": 0.4815739691257477, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00022937438916414976, + "rewards/margins": 0.2085273265838623, + "rewards/rejected": -0.208756685256958, + "step": 3407 + }, + { + "epoch": 2.3568464730290457, + "grad_norm": 8.689661026000977, + "learning_rate": 4.246196403872753e-05, + "log_odds_chosen": 8.243035316467285, + "log_odds_ratio": -0.047890111804008484, + "logits/chosen": -0.37406691908836365, + "logits/rejected": -0.44183045625686646, + "logps/chosen": -0.012632353231310844, + "logps/rejected": -1.2202600240707397, + "loss": 1.9614, + "nll_loss": 0.48556771874427795, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012632354628294706, + "rewards/margins": 0.12076276540756226, + "rewards/rejected": -0.1220259964466095, + "step": 3408 + }, + { + "epoch": 2.3575380359612725, + "grad_norm": 8.059005737304688, + "learning_rate": 4.245812202243738e-05, + "log_odds_chosen": 7.652141571044922, + "log_odds_ratio": -0.09811528772115707, + "logits/chosen": -0.6067800521850586, + "logits/rejected": -0.7010886073112488, + "logps/chosen": -0.019398357719182968, + "logps/rejected": -1.3128105401992798, + "loss": 1.9711, + "nll_loss": 0.4829697608947754, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0019398359581828117, + "rewards/margins": 0.1293412148952484, + "rewards/rejected": -0.1312810480594635, + "step": 3409 + }, + { + "epoch": 2.3582295988934994, + "grad_norm": 8.750677108764648, + "learning_rate": 4.2454280006147226e-05, + "log_odds_chosen": 8.003756523132324, + "log_odds_ratio": -0.006866155192255974, + "logits/chosen": -0.5898510217666626, + "logits/rejected": -0.5858031511306763, + "logps/chosen": -0.01122718770056963, + "logps/rejected": -1.737359881401062, + "loss": 2.2807, + "nll_loss": 0.5694831013679504, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011227187933400273, + "rewards/margins": 0.172613263130188, + "rewards/rejected": -0.17373599112033844, + "step": 3410 + }, + { + "epoch": 2.358921161825726, + "grad_norm": 11.310158729553223, + "learning_rate": 4.245043798985708e-05, + "log_odds_chosen": 7.99793004989624, + "log_odds_ratio": -0.0029664812609553337, + "logits/chosen": -0.3674885928630829, + "logits/rejected": -0.4538128972053528, + "logps/chosen": -0.011136957444250584, + "logps/rejected": -1.4392294883728027, + "loss": 2.2423, + "nll_loss": 0.5602694749832153, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011136956745758653, + "rewards/margins": 0.14280925691127777, + "rewards/rejected": -0.14392295479774475, + "step": 3411 + }, + { + "epoch": 2.359612724757953, + "grad_norm": 6.208254337310791, + "learning_rate": 4.2446595973566924e-05, + "log_odds_chosen": 8.18211555480957, + "log_odds_ratio": -0.006016615778207779, + "logits/chosen": -0.4709635376930237, + "logits/rejected": -0.4899592101573944, + "logps/chosen": -0.01079797837883234, + "logps/rejected": -0.919940710067749, + "loss": 1.9933, + "nll_loss": 0.4977228343486786, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010797978611662984, + "rewards/margins": 0.0909142792224884, + "rewards/rejected": -0.09199407696723938, + "step": 3412 + }, + { + "epoch": 2.36030428769018, + "grad_norm": 6.6337480545043945, + "learning_rate": 4.2442753957276783e-05, + "log_odds_chosen": 7.813111305236816, + "log_odds_ratio": -0.005881158635020256, + "logits/chosen": -0.67177414894104, + "logits/rejected": -0.7192281484603882, + "logps/chosen": -0.0034163526725023985, + "logps/rejected": -1.134279727935791, + "loss": 2.1709, + "nll_loss": 0.5421328544616699, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003416352847125381, + "rewards/margins": 0.11308634281158447, + "rewards/rejected": -0.11342797428369522, + "step": 3413 + }, + { + "epoch": 2.3609958506224067, + "grad_norm": 12.27839469909668, + "learning_rate": 4.243891194098663e-05, + "log_odds_chosen": 8.821434020996094, + "log_odds_ratio": -0.0011850158916786313, + "logits/chosen": -0.6717657446861267, + "logits/rejected": -0.7319881916046143, + "logps/chosen": -0.0052223182283341885, + "logps/rejected": -1.7572263479232788, + "loss": 2.1305, + "nll_loss": 0.5324950814247131, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005222317995503545, + "rewards/margins": 0.17520040273666382, + "rewards/rejected": -0.1757226288318634, + "step": 3414 + }, + { + "epoch": 2.3616874135546335, + "grad_norm": 7.977497100830078, + "learning_rate": 4.243506992469648e-05, + "log_odds_chosen": 7.292098045349121, + "log_odds_ratio": -0.004010710399597883, + "logits/chosen": -0.5028542876243591, + "logits/rejected": -0.53304123878479, + "logps/chosen": -0.01968352310359478, + "logps/rejected": -1.1983541250228882, + "loss": 2.8042, + "nll_loss": 0.7006375789642334, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001968352124094963, + "rewards/margins": 0.11786707490682602, + "rewards/rejected": -0.11983540654182434, + "step": 3415 + }, + { + "epoch": 2.3623789764868603, + "grad_norm": 6.613243103027344, + "learning_rate": 4.2431227908406334e-05, + "log_odds_chosen": 8.827180862426758, + "log_odds_ratio": -0.019252749159932137, + "logits/chosen": -0.7933902144432068, + "logits/rejected": -0.9102729558944702, + "logps/chosen": -0.005132139194756746, + "logps/rejected": -1.7154276371002197, + "loss": 1.4716, + "nll_loss": 0.36596986651420593, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005132139194756746, + "rewards/margins": 0.17102953791618347, + "rewards/rejected": -0.17154276371002197, + "step": 3416 + }, + { + "epoch": 2.363070539419087, + "grad_norm": 6.68892765045166, + "learning_rate": 4.2427385892116186e-05, + "log_odds_chosen": 5.407261848449707, + "log_odds_ratio": -0.06341204047203064, + "logits/chosen": -0.5899242758750916, + "logits/rejected": -0.6239021420478821, + "logps/chosen": -0.05954143777489662, + "logps/rejected": -1.1532893180847168, + "loss": 2.598, + "nll_loss": 0.6431571245193481, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0059541440568864346, + "rewards/margins": 0.10937478393316269, + "rewards/rejected": -0.11532893031835556, + "step": 3417 + }, + { + "epoch": 2.363762102351314, + "grad_norm": 6.749809741973877, + "learning_rate": 4.242354387582603e-05, + "log_odds_chosen": 6.653587341308594, + "log_odds_ratio": -0.09195633977651596, + "logits/chosen": -0.6077258586883545, + "logits/rejected": -0.6534870862960815, + "logps/chosen": -0.029678650200366974, + "logps/rejected": -1.3541957139968872, + "loss": 2.3267, + "nll_loss": 0.5724772214889526, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0029678652063012123, + "rewards/margins": 0.13245171308517456, + "rewards/rejected": -0.1354195773601532, + "step": 3418 + }, + { + "epoch": 2.364453665283541, + "grad_norm": 7.185380458831787, + "learning_rate": 4.2419701859535885e-05, + "log_odds_chosen": 6.369234085083008, + "log_odds_ratio": -0.03255006670951843, + "logits/chosen": -0.7959968447685242, + "logits/rejected": -0.9139914512634277, + "logps/chosen": -0.02814808301627636, + "logps/rejected": -1.2766046524047852, + "loss": 2.2784, + "nll_loss": 0.5663396120071411, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0028148081619292498, + "rewards/margins": 0.12484566122293472, + "rewards/rejected": -0.12766046822071075, + "step": 3419 + }, + { + "epoch": 2.3651452282157677, + "grad_norm": 7.040897846221924, + "learning_rate": 4.241585984324574e-05, + "log_odds_chosen": 7.790557384490967, + "log_odds_ratio": -0.013517394661903381, + "logits/chosen": -0.758858859539032, + "logits/rejected": -0.7736672163009644, + "logps/chosen": -0.02057792991399765, + "logps/rejected": -1.3754760026931763, + "loss": 2.202, + "nll_loss": 0.5491434335708618, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020577930845320225, + "rewards/margins": 0.135489821434021, + "rewards/rejected": -0.1375475972890854, + "step": 3420 + }, + { + "epoch": 2.3658367911479945, + "grad_norm": 5.4301252365112305, + "learning_rate": 4.241201782695558e-05, + "log_odds_chosen": 6.388769626617432, + "log_odds_ratio": -0.09578309953212738, + "logits/chosen": -0.2594180405139923, + "logits/rejected": -0.24878433346748352, + "logps/chosen": -0.02873176522552967, + "logps/rejected": -1.0656076669692993, + "loss": 2.0233, + "nll_loss": 0.49625498056411743, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002873176708817482, + "rewards/margins": 0.103687584400177, + "rewards/rejected": -0.10656075924634933, + "step": 3421 + }, + { + "epoch": 2.3665283540802213, + "grad_norm": 14.583373069763184, + "learning_rate": 4.240817581066544e-05, + "log_odds_chosen": 6.067316055297852, + "log_odds_ratio": -0.5061472654342651, + "logits/chosen": -0.438271164894104, + "logits/rejected": -0.5065256357192993, + "logps/chosen": -0.0877622589468956, + "logps/rejected": -1.3191258907318115, + "loss": 2.9576, + "nll_loss": 0.6887795329093933, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008776226080954075, + "rewards/margins": 0.12313637137413025, + "rewards/rejected": -0.13191257417201996, + "step": 3422 + }, + { + "epoch": 2.367219917012448, + "grad_norm": 6.707622051239014, + "learning_rate": 4.240433379437529e-05, + "log_odds_chosen": 6.3979363441467285, + "log_odds_ratio": -0.11842715740203857, + "logits/chosen": -0.6803793907165527, + "logits/rejected": -0.6929797530174255, + "logps/chosen": -0.02465580217540264, + "logps/rejected": -1.0064455270767212, + "loss": 2.5078, + "nll_loss": 0.6151173114776611, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002465580590069294, + "rewards/margins": 0.09817897528409958, + "rewards/rejected": -0.1006445586681366, + "step": 3423 + }, + { + "epoch": 2.367911479944675, + "grad_norm": 8.288269996643066, + "learning_rate": 4.240049177808514e-05, + "log_odds_chosen": 7.2223219871521, + "log_odds_ratio": -0.015759773552417755, + "logits/chosen": -0.6505395174026489, + "logits/rejected": -0.6855196952819824, + "logps/chosen": -0.04627052694559097, + "logps/rejected": -1.183556318283081, + "loss": 2.4063, + "nll_loss": 0.6000023484230042, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004627052694559097, + "rewards/margins": 0.11372856795787811, + "rewards/rejected": -0.11835562437772751, + "step": 3424 + }, + { + "epoch": 2.368603042876902, + "grad_norm": 6.999942302703857, + "learning_rate": 4.239664976179499e-05, + "log_odds_chosen": 8.207736015319824, + "log_odds_ratio": -0.008073610253632069, + "logits/chosen": -0.5741896629333496, + "logits/rejected": -0.6363852024078369, + "logps/chosen": -0.02019553631544113, + "logps/rejected": -1.5705045461654663, + "loss": 2.1859, + "nll_loss": 0.5456606149673462, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002019553678110242, + "rewards/margins": 0.15503089129924774, + "rewards/rejected": -0.15705044567584991, + "step": 3425 + }, + { + "epoch": 2.3692946058091287, + "grad_norm": 7.205198287963867, + "learning_rate": 4.2392807745504845e-05, + "log_odds_chosen": 7.496864318847656, + "log_odds_ratio": -0.027593035250902176, + "logits/chosen": -0.0906713455915451, + "logits/rejected": -0.1560470461845398, + "logps/chosen": -0.011206595227122307, + "logps/rejected": -1.5430270433425903, + "loss": 1.8617, + "nll_loss": 0.4626578688621521, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011206595227122307, + "rewards/margins": 0.1531820446252823, + "rewards/rejected": -0.154302716255188, + "step": 3426 + }, + { + "epoch": 2.3699861687413555, + "grad_norm": 9.317922592163086, + "learning_rate": 4.238896572921469e-05, + "log_odds_chosen": 8.463835716247559, + "log_odds_ratio": -0.002514339517802, + "logits/chosen": -0.8520632386207581, + "logits/rejected": -0.8247945308685303, + "logps/chosen": -0.0037631026934832335, + "logps/rejected": -2.1205391883850098, + "loss": 2.2781, + "nll_loss": 0.5692840218544006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003763103159144521, + "rewards/margins": 0.21167761087417603, + "rewards/rejected": -0.21205392479896545, + "step": 3427 + }, + { + "epoch": 2.3706777316735823, + "grad_norm": 12.661592483520508, + "learning_rate": 4.238512371292454e-05, + "log_odds_chosen": 7.507530212402344, + "log_odds_ratio": -0.002025590743869543, + "logits/chosen": -0.5045261383056641, + "logits/rejected": -0.5771041512489319, + "logps/chosen": -0.008932366967201233, + "logps/rejected": -1.6672617197036743, + "loss": 3.6596, + "nll_loss": 0.9147093892097473, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008932367200031877, + "rewards/margins": 0.16583293676376343, + "rewards/rejected": -0.16672618687152863, + "step": 3428 + }, + { + "epoch": 2.371369294605809, + "grad_norm": 7.28396463394165, + "learning_rate": 4.2381281696634395e-05, + "log_odds_chosen": 5.50955867767334, + "log_odds_ratio": -0.18129563331604004, + "logits/chosen": -0.43971797823905945, + "logits/rejected": -0.42192983627319336, + "logps/chosen": -0.05782304331660271, + "logps/rejected": -1.093947410583496, + "loss": 2.5992, + "nll_loss": 0.6316773891448975, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005782304331660271, + "rewards/margins": 0.10361243784427643, + "rewards/rejected": -0.10939474403858185, + "step": 3429 + }, + { + "epoch": 2.372060857538036, + "grad_norm": 19.657962799072266, + "learning_rate": 4.237743968034424e-05, + "log_odds_chosen": 7.222588539123535, + "log_odds_ratio": -0.03204096108675003, + "logits/chosen": -0.7179023027420044, + "logits/rejected": -0.8604851365089417, + "logps/chosen": -0.012024748139083385, + "logps/rejected": -1.5378297567367554, + "loss": 2.6524, + "nll_loss": 0.6599072217941284, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012024750467389822, + "rewards/margins": 0.15258049964904785, + "rewards/rejected": -0.15378296375274658, + "step": 3430 + }, + { + "epoch": 2.372752420470263, + "grad_norm": 9.396291732788086, + "learning_rate": 4.23735976640541e-05, + "log_odds_chosen": 6.863970756530762, + "log_odds_ratio": -0.008415119722485542, + "logits/chosen": -0.6852945685386658, + "logits/rejected": -0.7253008484840393, + "logps/chosen": -0.026889167726039886, + "logps/rejected": -1.6196262836456299, + "loss": 2.5258, + "nll_loss": 0.6306195259094238, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002688916865736246, + "rewards/margins": 0.15927371382713318, + "rewards/rejected": -0.161962628364563, + "step": 3431 + }, + { + "epoch": 2.3734439834024896, + "grad_norm": 10.495866775512695, + "learning_rate": 4.2369755647763946e-05, + "log_odds_chosen": 6.5691633224487305, + "log_odds_ratio": -0.08497857302427292, + "logits/chosen": -0.30856412649154663, + "logits/rejected": -0.33345896005630493, + "logps/chosen": -0.028544174507260323, + "logps/rejected": -1.3216798305511475, + "loss": 2.7573, + "nll_loss": 0.6808239221572876, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0028544177766889334, + "rewards/margins": 0.12931357324123383, + "rewards/rejected": -0.1321679949760437, + "step": 3432 + }, + { + "epoch": 2.3741355463347165, + "grad_norm": 6.415916919708252, + "learning_rate": 4.23659136314738e-05, + "log_odds_chosen": 6.058993339538574, + "log_odds_ratio": -0.006088991649448872, + "logits/chosen": -0.6415539979934692, + "logits/rejected": -0.6294801235198975, + "logps/chosen": -0.025170328095555305, + "logps/rejected": -1.4109984636306763, + "loss": 1.6702, + "nll_loss": 0.41693025827407837, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002517032902687788, + "rewards/margins": 0.13858281075954437, + "rewards/rejected": -0.14109984040260315, + "step": 3433 + }, + { + "epoch": 2.3748271092669433, + "grad_norm": 4.665531635284424, + "learning_rate": 4.236207161518365e-05, + "log_odds_chosen": 7.872640132904053, + "log_odds_ratio": -0.002072228817269206, + "logits/chosen": -0.19979572296142578, + "logits/rejected": -0.26814359426498413, + "logps/chosen": -0.024378551170229912, + "logps/rejected": -1.237983226776123, + "loss": 2.4034, + "nll_loss": 0.6006313562393188, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002437855117022991, + "rewards/margins": 0.12136045098304749, + "rewards/rejected": -0.12379831075668335, + "step": 3434 + }, + { + "epoch": 2.37551867219917, + "grad_norm": 7.580257415771484, + "learning_rate": 4.23582295988935e-05, + "log_odds_chosen": 6.102107048034668, + "log_odds_ratio": -0.2961543798446655, + "logits/chosen": -0.10959547758102417, + "logits/rejected": -0.15983079373836517, + "logps/chosen": -0.07290703058242798, + "logps/rejected": -1.5514822006225586, + "loss": 2.3746, + "nll_loss": 0.5640414953231812, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007290703244507313, + "rewards/margins": 0.14785751700401306, + "rewards/rejected": -0.1551482081413269, + "step": 3435 + }, + { + "epoch": 2.376210235131397, + "grad_norm": 10.682846069335938, + "learning_rate": 4.235438758260335e-05, + "log_odds_chosen": 8.229127883911133, + "log_odds_ratio": -0.0005710788536816835, + "logits/chosen": -0.6116784811019897, + "logits/rejected": -0.6720350384712219, + "logps/chosen": -0.007531903684139252, + "logps/rejected": -1.4279307126998901, + "loss": 4.3613, + "nll_loss": 1.090259075164795, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007531904266215861, + "rewards/margins": 0.14203989505767822, + "rewards/rejected": -0.14279307425022125, + "step": 3436 + }, + { + "epoch": 2.376901798063624, + "grad_norm": 4.78339958190918, + "learning_rate": 4.23505455663132e-05, + "log_odds_chosen": 6.552225112915039, + "log_odds_ratio": -0.1655147522687912, + "logits/chosen": -0.2748771905899048, + "logits/rejected": -0.28989773988723755, + "logps/chosen": -0.04578880965709686, + "logps/rejected": -1.4036614894866943, + "loss": 2.4327, + "nll_loss": 0.591616690158844, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004578881431370974, + "rewards/margins": 0.1357872635126114, + "rewards/rejected": -0.14036613702774048, + "step": 3437 + }, + { + "epoch": 2.3775933609958506, + "grad_norm": 5.989414691925049, + "learning_rate": 4.2346703550023054e-05, + "log_odds_chosen": 7.069077968597412, + "log_odds_ratio": -0.014767002314329147, + "logits/chosen": -0.3627806305885315, + "logits/rejected": -0.3392050564289093, + "logps/chosen": -0.02562572993338108, + "logps/rejected": -2.0423498153686523, + "loss": 2.2831, + "nll_loss": 0.5692924857139587, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002562573179602623, + "rewards/margins": 0.20167241990566254, + "rewards/rejected": -0.2042349874973297, + "step": 3438 + }, + { + "epoch": 2.3782849239280774, + "grad_norm": 12.863221168518066, + "learning_rate": 4.23428615337329e-05, + "log_odds_chosen": 9.35482406616211, + "log_odds_ratio": -0.028638150542974472, + "logits/chosen": -0.42819708585739136, + "logits/rejected": -0.5285694003105164, + "logps/chosen": -0.00422546686604619, + "logps/rejected": -1.748453140258789, + "loss": 3.1993, + "nll_loss": 0.7969600558280945, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004225466982461512, + "rewards/margins": 0.17442278563976288, + "rewards/rejected": -0.17484530806541443, + "step": 3439 + }, + { + "epoch": 2.3789764868603043, + "grad_norm": 7.027946949005127, + "learning_rate": 4.233901951744276e-05, + "log_odds_chosen": 7.778133869171143, + "log_odds_ratio": -0.0407455638051033, + "logits/chosen": -0.663629949092865, + "logits/rejected": -0.7393888235092163, + "logps/chosen": -0.012316934764385223, + "logps/rejected": -1.5184440612792969, + "loss": 2.6779, + "nll_loss": 0.6653945446014404, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012316934298723936, + "rewards/margins": 0.1506127119064331, + "rewards/rejected": -0.15184439718723297, + "step": 3440 + }, + { + "epoch": 2.379668049792531, + "grad_norm": 13.124755859375, + "learning_rate": 4.2335177501152604e-05, + "log_odds_chosen": 5.84478759765625, + "log_odds_ratio": -0.586132287979126, + "logits/chosen": -0.6295636296272278, + "logits/rejected": -0.6671774387359619, + "logps/chosen": -0.1445447951555252, + "logps/rejected": -1.2849104404449463, + "loss": 2.3785, + "nll_loss": 0.5360167026519775, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.014454478397965431, + "rewards/margins": 0.11403656005859375, + "rewards/rejected": -0.12849104404449463, + "step": 3441 + }, + { + "epoch": 2.380359612724758, + "grad_norm": 8.439517974853516, + "learning_rate": 4.233133548486246e-05, + "log_odds_chosen": 6.201792240142822, + "log_odds_ratio": -0.06423471868038177, + "logits/chosen": -0.31344470381736755, + "logits/rejected": -0.3481372594833374, + "logps/chosen": -0.023115266114473343, + "logps/rejected": -1.3296178579330444, + "loss": 2.4696, + "nll_loss": 0.6109641790390015, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002311526797711849, + "rewards/margins": 0.13065025210380554, + "rewards/rejected": -0.13296179473400116, + "step": 3442 + }, + { + "epoch": 2.3810511756569848, + "grad_norm": 5.6978020668029785, + "learning_rate": 4.232749346857231e-05, + "log_odds_chosen": 8.566123962402344, + "log_odds_ratio": -0.0043657380156219006, + "logits/chosen": -0.17997238039970398, + "logits/rejected": -0.23091138899326324, + "logps/chosen": -0.005184969864785671, + "logps/rejected": -1.4944618940353394, + "loss": 2.1032, + "nll_loss": 0.5253602266311646, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005184969631955028, + "rewards/margins": 0.1489276885986328, + "rewards/rejected": -0.14944618940353394, + "step": 3443 + }, + { + "epoch": 2.3817427385892116, + "grad_norm": 5.119330406188965, + "learning_rate": 4.232365145228216e-05, + "log_odds_chosen": 5.514773845672607, + "log_odds_ratio": -0.09987065941095352, + "logits/chosen": -0.41642263531684875, + "logits/rejected": -0.4259873926639557, + "logps/chosen": -0.0746447816491127, + "logps/rejected": -0.8729536533355713, + "loss": 2.3954, + "nll_loss": 0.5888611674308777, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00746447779238224, + "rewards/margins": 0.07983088493347168, + "rewards/rejected": -0.08729536831378937, + "step": 3444 + }, + { + "epoch": 2.3824343015214384, + "grad_norm": 6.080493927001953, + "learning_rate": 4.231980943599201e-05, + "log_odds_chosen": 6.873577117919922, + "log_odds_ratio": -0.08558716624975204, + "logits/chosen": -0.35207927227020264, + "logits/rejected": -0.4364143908023834, + "logps/chosen": -0.026298439130187035, + "logps/rejected": -2.3086209297180176, + "loss": 2.4776, + "nll_loss": 0.6108462810516357, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0026298437733203173, + "rewards/margins": 0.2282322347164154, + "rewards/rejected": -0.2308620810508728, + "step": 3445 + }, + { + "epoch": 2.3831258644536653, + "grad_norm": 12.112558364868164, + "learning_rate": 4.231596741970186e-05, + "log_odds_chosen": 9.93198013305664, + "log_odds_ratio": -0.006665684282779694, + "logits/chosen": -0.46666690707206726, + "logits/rejected": -0.5174428224563599, + "logps/chosen": -0.006805818993598223, + "logps/rejected": -2.516042470932007, + "loss": 2.0792, + "nll_loss": 0.519120991230011, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006805819575674832, + "rewards/margins": 0.25092369318008423, + "rewards/rejected": -0.25160425901412964, + "step": 3446 + }, + { + "epoch": 2.383817427385892, + "grad_norm": 11.87564754486084, + "learning_rate": 4.231212540341171e-05, + "log_odds_chosen": 7.434047222137451, + "log_odds_ratio": -0.04765244945883751, + "logits/chosen": -0.36915624141693115, + "logits/rejected": -0.42544883489608765, + "logps/chosen": -0.008615016005933285, + "logps/rejected": -1.1400610208511353, + "loss": 3.0075, + "nll_loss": 0.7471170425415039, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008615015540271997, + "rewards/margins": 0.11314460635185242, + "rewards/rejected": -0.11400610208511353, + "step": 3447 + }, + { + "epoch": 2.384508990318119, + "grad_norm": 9.698756217956543, + "learning_rate": 4.230828338712156e-05, + "log_odds_chosen": 7.496614456176758, + "log_odds_ratio": -0.00677049346268177, + "logits/chosen": -0.5656105875968933, + "logits/rejected": -0.6477078199386597, + "logps/chosen": -0.008299939334392548, + "logps/rejected": -1.4475886821746826, + "loss": 3.4122, + "nll_loss": 0.8523802757263184, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008299939800053835, + "rewards/margins": 0.1439288854598999, + "rewards/rejected": -0.14475886523723602, + "step": 3448 + }, + { + "epoch": 2.3852005532503457, + "grad_norm": 10.093292236328125, + "learning_rate": 4.230444137083142e-05, + "log_odds_chosen": 4.253750324249268, + "log_odds_ratio": -0.2802105247974396, + "logits/chosen": -0.6729620099067688, + "logits/rejected": -0.7065836787223816, + "logps/chosen": -0.0628410279750824, + "logps/rejected": -0.6742812395095825, + "loss": 3.1062, + "nll_loss": 0.7485273480415344, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006284103263169527, + "rewards/margins": 0.06114402785897255, + "rewards/rejected": -0.06742812693119049, + "step": 3449 + }, + { + "epoch": 2.3858921161825726, + "grad_norm": 10.238203048706055, + "learning_rate": 4.230059935454126e-05, + "log_odds_chosen": 6.280107021331787, + "log_odds_ratio": -0.08056508749723434, + "logits/chosen": -0.7012012600898743, + "logits/rejected": -0.7339482307434082, + "logps/chosen": -0.041637249290943146, + "logps/rejected": -1.1578660011291504, + "loss": 3.0957, + "nll_loss": 0.7658792734146118, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004163725301623344, + "rewards/margins": 0.1116228848695755, + "rewards/rejected": -0.1157865971326828, + "step": 3450 + }, + { + "epoch": 2.3865836791147994, + "grad_norm": 5.492684364318848, + "learning_rate": 4.2296757338251115e-05, + "log_odds_chosen": 7.788609981536865, + "log_odds_ratio": -0.005910110659897327, + "logits/chosen": -0.5513278245925903, + "logits/rejected": -0.6317500472068787, + "logps/chosen": -0.04046875983476639, + "logps/rejected": -1.576035737991333, + "loss": 2.3057, + "nll_loss": 0.5758423209190369, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004046875983476639, + "rewards/margins": 0.1535567045211792, + "rewards/rejected": -0.15760357677936554, + "step": 3451 + }, + { + "epoch": 2.3872752420470262, + "grad_norm": 9.929596900939941, + "learning_rate": 4.229291532196097e-05, + "log_odds_chosen": 7.062018394470215, + "log_odds_ratio": -0.007457717787474394, + "logits/chosen": -0.6636491417884827, + "logits/rejected": -0.6853954792022705, + "logps/chosen": -0.028744446113705635, + "logps/rejected": -1.737699031829834, + "loss": 2.9979, + "nll_loss": 0.7487397789955139, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0028744444716721773, + "rewards/margins": 0.17089545726776123, + "rewards/rejected": -0.17376990616321564, + "step": 3452 + }, + { + "epoch": 2.387966804979253, + "grad_norm": 10.12405776977539, + "learning_rate": 4.228907330567082e-05, + "log_odds_chosen": 7.498879432678223, + "log_odds_ratio": -0.014232144691050053, + "logits/chosen": -0.513949453830719, + "logits/rejected": -0.5723739862442017, + "logps/chosen": -0.046775419265031815, + "logps/rejected": -1.9690231084823608, + "loss": 2.5009, + "nll_loss": 0.6237916946411133, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004677542019635439, + "rewards/margins": 0.19222477078437805, + "rewards/rejected": -0.1969023048877716, + "step": 3453 + }, + { + "epoch": 2.38865836791148, + "grad_norm": 7.1505608558654785, + "learning_rate": 4.2285231289380666e-05, + "log_odds_chosen": 7.8512282371521, + "log_odds_ratio": -0.0028052683919668198, + "logits/chosen": -0.6594638824462891, + "logits/rejected": -0.6560357809066772, + "logps/chosen": -0.010988444089889526, + "logps/rejected": -1.8420581817626953, + "loss": 2.0855, + "nll_loss": 0.5210833549499512, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010988445719704032, + "rewards/margins": 0.18310695886611938, + "rewards/rejected": -0.1842058002948761, + "step": 3454 + }, + { + "epoch": 2.3893499308437067, + "grad_norm": 10.518378257751465, + "learning_rate": 4.228138927309052e-05, + "log_odds_chosen": 7.85250186920166, + "log_odds_ratio": -0.059666577726602554, + "logits/chosen": -0.5748401880264282, + "logits/rejected": -0.6480482220649719, + "logps/chosen": -0.016306307166814804, + "logps/rejected": -1.607426404953003, + "loss": 3.279, + "nll_loss": 0.8137713670730591, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001630630693398416, + "rewards/margins": 0.15911200642585754, + "rewards/rejected": -0.1607426404953003, + "step": 3455 + }, + { + "epoch": 2.3900414937759336, + "grad_norm": 9.05886459350586, + "learning_rate": 4.227754725680037e-05, + "log_odds_chosen": 8.22207260131836, + "log_odds_ratio": -0.002946071792393923, + "logits/chosen": -0.8173617124557495, + "logits/rejected": -0.9066953063011169, + "logps/chosen": -0.007910683751106262, + "logps/rejected": -1.566526174545288, + "loss": 2.5319, + "nll_loss": 0.6326807141304016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000791068421676755, + "rewards/margins": 0.15586155652999878, + "rewards/rejected": -0.15665262937545776, + "step": 3456 + }, + { + "epoch": 2.3907330567081604, + "grad_norm": 11.151406288146973, + "learning_rate": 4.2273705240510216e-05, + "log_odds_chosen": 6.041598320007324, + "log_odds_ratio": -0.27977150678634644, + "logits/chosen": -0.2281467467546463, + "logits/rejected": -0.3184647262096405, + "logps/chosen": -0.037950512021780014, + "logps/rejected": -1.3815393447875977, + "loss": 2.4032, + "nll_loss": 0.5728345513343811, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.00379505124874413, + "rewards/margins": 0.13435888290405273, + "rewards/rejected": -0.13815394043922424, + "step": 3457 + }, + { + "epoch": 2.391424619640387, + "grad_norm": 5.956516265869141, + "learning_rate": 4.2269863224220076e-05, + "log_odds_chosen": 8.76341724395752, + "log_odds_ratio": -0.005829709582030773, + "logits/chosen": -0.8177924156188965, + "logits/rejected": -0.8164641857147217, + "logps/chosen": -0.007455950137227774, + "logps/rejected": -1.714500904083252, + "loss": 2.3196, + "nll_loss": 0.5793288946151733, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007455950835719705, + "rewards/margins": 0.17070451378822327, + "rewards/rejected": -0.17145009338855743, + "step": 3458 + }, + { + "epoch": 2.392116182572614, + "grad_norm": 8.454880714416504, + "learning_rate": 4.226602120792992e-05, + "log_odds_chosen": 8.373088836669922, + "log_odds_ratio": -0.006798881571739912, + "logits/chosen": -0.7450951337814331, + "logits/rejected": -0.7184380888938904, + "logps/chosen": -0.0014081236440688372, + "logps/rejected": -0.927725613117218, + "loss": 2.5047, + "nll_loss": 0.625499427318573, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014081236440688372, + "rewards/margins": 0.0926317572593689, + "rewards/rejected": -0.09277257323265076, + "step": 3459 + }, + { + "epoch": 2.392807745504841, + "grad_norm": 6.365279674530029, + "learning_rate": 4.2262179191639774e-05, + "log_odds_chosen": 7.5471720695495605, + "log_odds_ratio": -0.15343418717384338, + "logits/chosen": -0.733639657497406, + "logits/rejected": -0.7661193013191223, + "logps/chosen": -0.038739293813705444, + "logps/rejected": -1.47605562210083, + "loss": 2.1541, + "nll_loss": 0.5231756567955017, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003873929614201188, + "rewards/margins": 0.14373163878917694, + "rewards/rejected": -0.14760556817054749, + "step": 3460 + }, + { + "epoch": 2.3934993084370677, + "grad_norm": 5.454896926879883, + "learning_rate": 4.2258337175349626e-05, + "log_odds_chosen": 8.599830627441406, + "log_odds_ratio": -0.000444973586127162, + "logits/chosen": -0.5653376579284668, + "logits/rejected": -0.5564531087875366, + "logps/chosen": -0.018016574904322624, + "logps/rejected": -1.6302697658538818, + "loss": 2.4157, + "nll_loss": 0.603872537612915, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018016574904322624, + "rewards/margins": 0.1612253189086914, + "rewards/rejected": -0.16302695870399475, + "step": 3461 + }, + { + "epoch": 2.3941908713692945, + "grad_norm": 6.181629180908203, + "learning_rate": 4.225449515905948e-05, + "log_odds_chosen": 6.0642008781433105, + "log_odds_ratio": -0.38111788034439087, + "logits/chosen": -0.5761101841926575, + "logits/rejected": -0.6472548246383667, + "logps/chosen": -0.141601100564003, + "logps/rejected": -1.98850679397583, + "loss": 1.897, + "nll_loss": 0.43614256381988525, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01416010968387127, + "rewards/margins": 0.18469057977199554, + "rewards/rejected": -0.19885067641735077, + "step": 3462 + }, + { + "epoch": 2.3948824343015214, + "grad_norm": 9.098873138427734, + "learning_rate": 4.2250653142769324e-05, + "log_odds_chosen": 6.658843040466309, + "log_odds_ratio": -0.08088690787553787, + "logits/chosen": -0.36394232511520386, + "logits/rejected": -0.4369269609451294, + "logps/chosen": -0.019272904843091965, + "logps/rejected": -1.2084076404571533, + "loss": 2.163, + "nll_loss": 0.53264981508255, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001927290461026132, + "rewards/margins": 0.11891347914934158, + "rewards/rejected": -0.12084076553583145, + "step": 3463 + }, + { + "epoch": 2.395573997233748, + "grad_norm": 9.867566108703613, + "learning_rate": 4.224681112647918e-05, + "log_odds_chosen": 5.931120872497559, + "log_odds_ratio": -0.029134787619113922, + "logits/chosen": -0.5077770948410034, + "logits/rejected": -0.5073307752609253, + "logps/chosen": -0.0129080293700099, + "logps/rejected": -1.1946752071380615, + "loss": 2.1006, + "nll_loss": 0.5222440958023071, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012908029602840543, + "rewards/margins": 0.11817672848701477, + "rewards/rejected": -0.11946753412485123, + "step": 3464 + }, + { + "epoch": 2.396265560165975, + "grad_norm": 11.358476638793945, + "learning_rate": 4.224296911018903e-05, + "log_odds_chosen": 4.821341514587402, + "log_odds_ratio": -0.3646850287914276, + "logits/chosen": -0.519396185874939, + "logits/rejected": -0.5581240653991699, + "logps/chosen": -0.0811510905623436, + "logps/rejected": -0.7058289051055908, + "loss": 2.3962, + "nll_loss": 0.5625927448272705, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.00811510905623436, + "rewards/margins": 0.0624677836894989, + "rewards/rejected": -0.07058288902044296, + "step": 3465 + }, + { + "epoch": 2.396957123098202, + "grad_norm": 7.898974895477295, + "learning_rate": 4.2239127093898875e-05, + "log_odds_chosen": 8.102975845336914, + "log_odds_ratio": -0.007673066109418869, + "logits/chosen": -0.388555109500885, + "logits/rejected": -0.4589465856552124, + "logps/chosen": -0.009603948332369328, + "logps/rejected": -1.1193115711212158, + "loss": 2.0014, + "nll_loss": 0.4995794892311096, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009603948565199971, + "rewards/margins": 0.11097076535224915, + "rewards/rejected": -0.11193115264177322, + "step": 3466 + }, + { + "epoch": 2.3976486860304287, + "grad_norm": 9.023910522460938, + "learning_rate": 4.2235285077608734e-05, + "log_odds_chosen": 5.483177185058594, + "log_odds_ratio": -0.30498525500297546, + "logits/chosen": -0.40885457396507263, + "logits/rejected": -0.45225241780281067, + "logps/chosen": -0.05430954322218895, + "logps/rejected": -0.9977082014083862, + "loss": 2.5177, + "nll_loss": 0.5989183783531189, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00543095450848341, + "rewards/margins": 0.09433987736701965, + "rewards/rejected": -0.09977082908153534, + "step": 3467 + }, + { + "epoch": 2.3983402489626555, + "grad_norm": 10.120564460754395, + "learning_rate": 4.223144306131858e-05, + "log_odds_chosen": 6.483001708984375, + "log_odds_ratio": -0.017802314832806587, + "logits/chosen": -0.8806463479995728, + "logits/rejected": -0.8878389596939087, + "logps/chosen": -0.01730438694357872, + "logps/rejected": -1.4190657138824463, + "loss": 2.5606, + "nll_loss": 0.6383647918701172, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017304387874901295, + "rewards/margins": 0.14017613232135773, + "rewards/rejected": -0.14190655946731567, + "step": 3468 + }, + { + "epoch": 2.3990318118948823, + "grad_norm": 82.23304748535156, + "learning_rate": 4.222760104502843e-05, + "log_odds_chosen": 5.96318244934082, + "log_odds_ratio": -0.2941014766693115, + "logits/chosen": -0.9196685552597046, + "logits/rejected": -0.9675211906433105, + "logps/chosen": -0.08452773094177246, + "logps/rejected": -1.7398574352264404, + "loss": 2.2949, + "nll_loss": 0.5443228483200073, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008452773094177246, + "rewards/margins": 0.16553297638893127, + "rewards/rejected": -0.17398573458194733, + "step": 3469 + }, + { + "epoch": 2.399723374827109, + "grad_norm": 8.60822582244873, + "learning_rate": 4.2223759028738285e-05, + "log_odds_chosen": 7.1528096199035645, + "log_odds_ratio": -0.01803523115813732, + "logits/chosen": -0.9783897995948792, + "logits/rejected": -1.034075379371643, + "logps/chosen": -0.023058656603097916, + "logps/rejected": -1.163728952407837, + "loss": 2.1729, + "nll_loss": 0.5414154529571533, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002305865753442049, + "rewards/margins": 0.11406703293323517, + "rewards/rejected": -0.11637289822101593, + "step": 3470 + }, + { + "epoch": 2.400414937759336, + "grad_norm": 8.744670867919922, + "learning_rate": 4.221991701244814e-05, + "log_odds_chosen": 4.124874114990234, + "log_odds_ratio": -0.30007296800613403, + "logits/chosen": -0.20929817855358124, + "logits/rejected": -0.24133385717868805, + "logps/chosen": -0.0663536861538887, + "logps/rejected": -0.3404579758644104, + "loss": 3.041, + "nll_loss": 0.7302361726760864, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.006635368801653385, + "rewards/margins": 0.02741042897105217, + "rewards/rejected": -0.03404579684138298, + "step": 3471 + }, + { + "epoch": 2.401106500691563, + "grad_norm": 11.96430778503418, + "learning_rate": 4.221607499615798e-05, + "log_odds_chosen": 9.046222686767578, + "log_odds_ratio": -0.001090765930712223, + "logits/chosen": -0.8712348937988281, + "logits/rejected": -1.0210875272750854, + "logps/chosen": -0.0011071586050093174, + "logps/rejected": -1.683934211730957, + "loss": 2.9123, + "nll_loss": 0.7279656529426575, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011071586050093174, + "rewards/margins": 0.16828271746635437, + "rewards/rejected": -0.16839343309402466, + "step": 3472 + }, + { + "epoch": 2.4017980636237897, + "grad_norm": 7.895980358123779, + "learning_rate": 4.2212232979867835e-05, + "log_odds_chosen": 8.111176490783691, + "log_odds_ratio": -0.0013888756511732936, + "logits/chosen": -0.8593344688415527, + "logits/rejected": -0.7808473110198975, + "logps/chosen": -0.0021940222941339016, + "logps/rejected": -1.2674121856689453, + "loss": 2.1754, + "nll_loss": 0.5437151193618774, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00021940222359262407, + "rewards/margins": 0.12652181088924408, + "rewards/rejected": -0.1267412155866623, + "step": 3473 + }, + { + "epoch": 2.4024896265560165, + "grad_norm": 8.358758926391602, + "learning_rate": 4.220839096357769e-05, + "log_odds_chosen": 6.501728534698486, + "log_odds_ratio": -0.021476784721016884, + "logits/chosen": -1.0236040353775024, + "logits/rejected": -1.0076302289962769, + "logps/chosen": -0.023863408714532852, + "logps/rejected": -1.1087217330932617, + "loss": 2.3159, + "nll_loss": 0.5768356919288635, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0023863406386226416, + "rewards/margins": 0.10848583281040192, + "rewards/rejected": -0.11087217181921005, + "step": 3474 + }, + { + "epoch": 2.4031811894882433, + "grad_norm": 7.01558256149292, + "learning_rate": 4.220454894728753e-05, + "log_odds_chosen": 5.170758247375488, + "log_odds_ratio": -0.13841156661510468, + "logits/chosen": -0.5865446925163269, + "logits/rejected": -0.612939178943634, + "logps/chosen": -0.07330407202243805, + "logps/rejected": -1.681246042251587, + "loss": 2.7987, + "nll_loss": 0.685846209526062, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00733040738850832, + "rewards/margins": 0.160794198513031, + "rewards/rejected": -0.16812460124492645, + "step": 3475 + }, + { + "epoch": 2.40387275242047, + "grad_norm": 7.598639965057373, + "learning_rate": 4.220070693099739e-05, + "log_odds_chosen": 7.575196743011475, + "log_odds_ratio": -0.0028736027888953686, + "logits/chosen": -0.8495464324951172, + "logits/rejected": -0.847161591053009, + "logps/chosen": -0.02533099614083767, + "logps/rejected": -1.672313928604126, + "loss": 2.3675, + "nll_loss": 0.5915927886962891, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002533099614083767, + "rewards/margins": 0.1646983027458191, + "rewards/rejected": -0.16723139584064484, + "step": 3476 + }, + { + "epoch": 2.404564315352697, + "grad_norm": 7.659574031829834, + "learning_rate": 4.219686491470724e-05, + "log_odds_chosen": 5.771527290344238, + "log_odds_ratio": -0.05470234900712967, + "logits/chosen": -0.5478336215019226, + "logits/rejected": -0.634901762008667, + "logps/chosen": -0.02737801894545555, + "logps/rejected": -0.8024625778198242, + "loss": 2.5849, + "nll_loss": 0.6407510042190552, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0027378019876778126, + "rewards/margins": 0.07750844955444336, + "rewards/rejected": -0.08024625480175018, + "step": 3477 + }, + { + "epoch": 2.405255878284924, + "grad_norm": 10.186736106872559, + "learning_rate": 4.219302289841709e-05, + "log_odds_chosen": 5.86126708984375, + "log_odds_ratio": -0.1267208307981491, + "logits/chosen": -0.6385801434516907, + "logits/rejected": -0.6514959931373596, + "logps/chosen": -0.038592059165239334, + "logps/rejected": -0.8937729597091675, + "loss": 2.1297, + "nll_loss": 0.5197412371635437, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0038592061027884483, + "rewards/margins": 0.08551809191703796, + "rewards/rejected": -0.08937729895114899, + "step": 3478 + }, + { + "epoch": 2.4059474412171507, + "grad_norm": 12.728795051574707, + "learning_rate": 4.218918088212694e-05, + "log_odds_chosen": 8.360170364379883, + "log_odds_ratio": -0.05475056543946266, + "logits/chosen": -0.445570170879364, + "logits/rejected": -0.5104779601097107, + "logps/chosen": -0.019982457160949707, + "logps/rejected": -1.60440993309021, + "loss": 1.9802, + "nll_loss": 0.4895625710487366, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001998245483264327, + "rewards/margins": 0.15844275057315826, + "rewards/rejected": -0.16044099628925323, + "step": 3479 + }, + { + "epoch": 2.4066390041493775, + "grad_norm": 6.73025369644165, + "learning_rate": 4.2185338865836796e-05, + "log_odds_chosen": 7.225137710571289, + "log_odds_ratio": -0.025533132255077362, + "logits/chosen": -0.6241756677627563, + "logits/rejected": -0.6339388489723206, + "logps/chosen": -0.010463009588420391, + "logps/rejected": -1.1056491136550903, + "loss": 2.7345, + "nll_loss": 0.6810735464096069, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010463010985404253, + "rewards/margins": 0.10951860249042511, + "rewards/rejected": -0.11056490242481232, + "step": 3480 + }, + { + "epoch": 2.4073305670816043, + "grad_norm": 6.189925670623779, + "learning_rate": 4.218149684954664e-05, + "log_odds_chosen": 6.061086177825928, + "log_odds_ratio": -0.16295485198497772, + "logits/chosen": -0.5748433470726013, + "logits/rejected": -0.5821690559387207, + "logps/chosen": -0.04194222018122673, + "logps/rejected": -0.7503185272216797, + "loss": 2.334, + "nll_loss": 0.5672000050544739, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004194222390651703, + "rewards/margins": 0.07083762437105179, + "rewards/rejected": -0.07503185421228409, + "step": 3481 + }, + { + "epoch": 2.408022130013831, + "grad_norm": 8.091987609863281, + "learning_rate": 4.2177654833256494e-05, + "log_odds_chosen": 6.556269645690918, + "log_odds_ratio": -0.05733267217874527, + "logits/chosen": -0.47325581312179565, + "logits/rejected": -0.45619022846221924, + "logps/chosen": -0.021132618188858032, + "logps/rejected": -1.0394395589828491, + "loss": 1.7748, + "nll_loss": 0.43796780705451965, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021132619585841894, + "rewards/margins": 0.10183070600032806, + "rewards/rejected": -0.10394395887851715, + "step": 3482 + }, + { + "epoch": 2.408713692946058, + "grad_norm": 9.280055046081543, + "learning_rate": 4.2173812816966346e-05, + "log_odds_chosen": 8.679950714111328, + "log_odds_ratio": -0.0015581633197143674, + "logits/chosen": -0.7012354731559753, + "logits/rejected": -0.7508944272994995, + "logps/chosen": -0.013607164844870567, + "logps/rejected": -1.7001599073410034, + "loss": 2.3659, + "nll_loss": 0.5913158655166626, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013607164146378636, + "rewards/margins": 0.16865527629852295, + "rewards/rejected": -0.17001599073410034, + "step": 3483 + }, + { + "epoch": 2.409405255878285, + "grad_norm": 13.053488731384277, + "learning_rate": 4.216997080067619e-05, + "log_odds_chosen": 7.968313217163086, + "log_odds_ratio": -0.06880037486553192, + "logits/chosen": -0.5693072080612183, + "logits/rejected": -0.6625873446464539, + "logps/chosen": -0.02932230569422245, + "logps/rejected": -1.8904154300689697, + "loss": 3.0299, + "nll_loss": 0.7505956888198853, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0029322307091206312, + "rewards/margins": 0.18610931932926178, + "rewards/rejected": -0.18904155492782593, + "step": 3484 + }, + { + "epoch": 2.4100968188105116, + "grad_norm": 7.838770389556885, + "learning_rate": 4.216612878438605e-05, + "log_odds_chosen": 7.503055572509766, + "log_odds_ratio": -0.07354681938886642, + "logits/chosen": -0.6398206949234009, + "logits/rejected": -0.5904887914657593, + "logps/chosen": -0.022758491337299347, + "logps/rejected": -1.350595235824585, + "loss": 2.1233, + "nll_loss": 0.5234798192977905, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0022758489940315485, + "rewards/margins": 0.1327836811542511, + "rewards/rejected": -0.13505952060222626, + "step": 3485 + }, + { + "epoch": 2.4107883817427385, + "grad_norm": 9.07406234741211, + "learning_rate": 4.2162286768095897e-05, + "log_odds_chosen": 6.529168128967285, + "log_odds_ratio": -0.09720098227262497, + "logits/chosen": -0.5522468090057373, + "logits/rejected": -0.5980552434921265, + "logps/chosen": -0.025193000212311745, + "logps/rejected": -1.235506534576416, + "loss": 2.4523, + "nll_loss": 0.6033427119255066, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0025192999746650457, + "rewards/margins": 0.12103135138750076, + "rewards/rejected": -0.1235506534576416, + "step": 3486 + }, + { + "epoch": 2.4114799446749653, + "grad_norm": 7.101974010467529, + "learning_rate": 4.215844475180575e-05, + "log_odds_chosen": 6.016046524047852, + "log_odds_ratio": -0.020641181617975235, + "logits/chosen": -0.7464077472686768, + "logits/rejected": -0.7497326135635376, + "logps/chosen": -0.056564487516880035, + "logps/rejected": -1.5879918336868286, + "loss": 2.3461, + "nll_loss": 0.5844558477401733, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005656449124217033, + "rewards/margins": 0.15314273536205292, + "rewards/rejected": -0.1587991863489151, + "step": 3487 + }, + { + "epoch": 2.412171507607192, + "grad_norm": 7.258253574371338, + "learning_rate": 4.21546027355156e-05, + "log_odds_chosen": 8.490350723266602, + "log_odds_ratio": -0.0014901505783200264, + "logits/chosen": -0.9239876866340637, + "logits/rejected": -0.9879686832427979, + "logps/chosen": -0.009935002774000168, + "logps/rejected": -1.9407963752746582, + "loss": 2.2881, + "nll_loss": 0.5718832015991211, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009935003472492099, + "rewards/margins": 0.19308611750602722, + "rewards/rejected": -0.19407962262630463, + "step": 3488 + }, + { + "epoch": 2.412863070539419, + "grad_norm": 7.0164289474487305, + "learning_rate": 4.2150760719225454e-05, + "log_odds_chosen": 8.137097358703613, + "log_odds_ratio": -0.012940660119056702, + "logits/chosen": -0.7661728858947754, + "logits/rejected": -0.7889710664749146, + "logps/chosen": -0.014242495410144329, + "logps/rejected": -1.191624402999878, + "loss": 2.1544, + "nll_loss": 0.5372986197471619, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001424249610863626, + "rewards/margins": 0.11773819476366043, + "rewards/rejected": -0.11916244775056839, + "step": 3489 + }, + { + "epoch": 2.413554633471646, + "grad_norm": 4.6547160148620605, + "learning_rate": 4.21469187029353e-05, + "log_odds_chosen": 7.072702407836914, + "log_odds_ratio": -0.027786776423454285, + "logits/chosen": -0.6046419143676758, + "logits/rejected": -0.6040664911270142, + "logps/chosen": -0.029178176075220108, + "logps/rejected": -1.7043691873550415, + "loss": 2.238, + "nll_loss": 0.5567184090614319, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0029178177937865257, + "rewards/margins": 0.16751909255981445, + "rewards/rejected": -0.17043691873550415, + "step": 3490 + }, + { + "epoch": 2.4142461964038726, + "grad_norm": 11.417491912841797, + "learning_rate": 4.214307668664515e-05, + "log_odds_chosen": 5.747222423553467, + "log_odds_ratio": -0.08429299294948578, + "logits/chosen": -0.7581441402435303, + "logits/rejected": -0.7898090481758118, + "logps/chosen": -0.0240895077586174, + "logps/rejected": -0.8935565948486328, + "loss": 2.6229, + "nll_loss": 0.6472893357276917, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002408950822427869, + "rewards/margins": 0.08694671094417572, + "rewards/rejected": -0.08935566246509552, + "step": 3491 + }, + { + "epoch": 2.4149377593360994, + "grad_norm": 9.846121788024902, + "learning_rate": 4.2139234670355004e-05, + "log_odds_chosen": 7.643199443817139, + "log_odds_ratio": -0.1389947384595871, + "logits/chosen": -0.8550637364387512, + "logits/rejected": -0.9452530145645142, + "logps/chosen": -0.013365296646952629, + "logps/rejected": -1.3257532119750977, + "loss": 2.4252, + "nll_loss": 0.5924007892608643, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0013365296181291342, + "rewards/margins": 0.13123878836631775, + "rewards/rejected": -0.13257533311843872, + "step": 3492 + }, + { + "epoch": 2.4156293222683263, + "grad_norm": 4.824674606323242, + "learning_rate": 4.213539265406486e-05, + "log_odds_chosen": 5.845300674438477, + "log_odds_ratio": -0.03971134126186371, + "logits/chosen": -0.853974461555481, + "logits/rejected": -0.8486250042915344, + "logps/chosen": -0.023859363049268723, + "logps/rejected": -0.889519453048706, + "loss": 2.4991, + "nll_loss": 0.620795488357544, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002385936211794615, + "rewards/margins": 0.0865660086274147, + "rewards/rejected": -0.0889519453048706, + "step": 3493 + }, + { + "epoch": 2.416320885200553, + "grad_norm": 6.13293981552124, + "learning_rate": 4.213155063777471e-05, + "log_odds_chosen": 8.28451919555664, + "log_odds_ratio": -0.00443453062325716, + "logits/chosen": -0.6030639410018921, + "logits/rejected": -0.6766177415847778, + "logps/chosen": -0.005843472667038441, + "logps/rejected": -1.6858534812927246, + "loss": 2.6302, + "nll_loss": 0.6570960283279419, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005843472899869084, + "rewards/margins": 0.16800101101398468, + "rewards/rejected": -0.16858534514904022, + "step": 3494 + }, + { + "epoch": 2.41701244813278, + "grad_norm": 10.260222434997559, + "learning_rate": 4.2127708621484555e-05, + "log_odds_chosen": 7.628552436828613, + "log_odds_ratio": -0.005517259240150452, + "logits/chosen": -0.7729060649871826, + "logits/rejected": -0.7599183917045593, + "logps/chosen": -0.014096952974796295, + "logps/rejected": -1.4275188446044922, + "loss": 2.4221, + "nll_loss": 0.6049808859825134, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014096952509135008, + "rewards/margins": 0.1413421928882599, + "rewards/rejected": -0.14275188744068146, + "step": 3495 + }, + { + "epoch": 2.4177040110650068, + "grad_norm": 10.477607727050781, + "learning_rate": 4.212386660519441e-05, + "log_odds_chosen": 8.311507225036621, + "log_odds_ratio": -0.0017723742639645934, + "logits/chosen": -0.7644646763801575, + "logits/rejected": -0.795981764793396, + "logps/chosen": -0.0015009690541774035, + "logps/rejected": -1.2311259508132935, + "loss": 2.3773, + "nll_loss": 0.5941388607025146, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001500969083281234, + "rewards/margins": 0.12296249717473984, + "rewards/rejected": -0.12311260402202606, + "step": 3496 + }, + { + "epoch": 2.4183955739972336, + "grad_norm": 10.727534294128418, + "learning_rate": 4.212002458890426e-05, + "log_odds_chosen": 8.098236083984375, + "log_odds_ratio": -0.02444791980087757, + "logits/chosen": -0.6742796897888184, + "logits/rejected": -0.7470793128013611, + "logps/chosen": -0.02403697744011879, + "logps/rejected": -1.4629892110824585, + "loss": 3.3694, + "nll_loss": 0.8398998379707336, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002403697930276394, + "rewards/margins": 0.143895223736763, + "rewards/rejected": -0.14629891514778137, + "step": 3497 + }, + { + "epoch": 2.4190871369294604, + "grad_norm": 11.279629707336426, + "learning_rate": 4.211618257261411e-05, + "log_odds_chosen": 7.625337600708008, + "log_odds_ratio": -0.005302184261381626, + "logits/chosen": -0.9389767050743103, + "logits/rejected": -1.0070160627365112, + "logps/chosen": -0.00925231259316206, + "logps/rejected": -1.2491271495819092, + "loss": 3.1125, + "nll_loss": 0.7776023745536804, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009252313175238669, + "rewards/margins": 0.12398748099803925, + "rewards/rejected": -0.12491270899772644, + "step": 3498 + }, + { + "epoch": 2.4197786998616873, + "grad_norm": 6.170611381530762, + "learning_rate": 4.211234055632396e-05, + "log_odds_chosen": 6.709138870239258, + "log_odds_ratio": -0.046844013035297394, + "logits/chosen": -0.5986511707305908, + "logits/rejected": -0.5998558402061462, + "logps/chosen": -0.013723606243729591, + "logps/rejected": -1.2466332912445068, + "loss": 2.3662, + "nll_loss": 0.5868594646453857, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001372360740788281, + "rewards/margins": 0.12329097092151642, + "rewards/rejected": -0.1246633380651474, + "step": 3499 + }, + { + "epoch": 2.420470262793914, + "grad_norm": 9.391353607177734, + "learning_rate": 4.210849854003382e-05, + "log_odds_chosen": 5.222013473510742, + "log_odds_ratio": -0.22473469376564026, + "logits/chosen": -0.480400025844574, + "logits/rejected": -0.5001802444458008, + "logps/chosen": -0.056873537600040436, + "logps/rejected": -0.8878986239433289, + "loss": 2.4718, + "nll_loss": 0.5954755544662476, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.005687354132533073, + "rewards/margins": 0.0831025093793869, + "rewards/rejected": -0.08878986537456512, + "step": 3500 + }, + { + "epoch": 2.421161825726141, + "grad_norm": 11.657116889953613, + "learning_rate": 4.210465652374366e-05, + "log_odds_chosen": 8.136720657348633, + "log_odds_ratio": -0.0016932344296947122, + "logits/chosen": -0.6791071891784668, + "logits/rejected": -0.8140288591384888, + "logps/chosen": -0.005484652239829302, + "logps/rejected": -1.6490072011947632, + "loss": 2.8094, + "nll_loss": 0.7021803855895996, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005484651774168015, + "rewards/margins": 0.16435226798057556, + "rewards/rejected": -0.16490072011947632, + "step": 3501 + }, + { + "epoch": 2.4218533886583677, + "grad_norm": 9.51025676727295, + "learning_rate": 4.2100814507453515e-05, + "log_odds_chosen": 8.480232238769531, + "log_odds_ratio": -0.04696385934948921, + "logits/chosen": -0.5885998010635376, + "logits/rejected": -0.6898617148399353, + "logps/chosen": -0.05135025084018707, + "logps/rejected": -2.130434989929199, + "loss": 2.084, + "nll_loss": 0.5163017511367798, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005135024897754192, + "rewards/margins": 0.2079084813594818, + "rewards/rejected": -0.21304351091384888, + "step": 3502 + }, + { + "epoch": 2.4225449515905946, + "grad_norm": 8.47912883758545, + "learning_rate": 4.209697249116337e-05, + "log_odds_chosen": 8.443939208984375, + "log_odds_ratio": -0.0013600240927189589, + "logits/chosen": -0.7223008871078491, + "logits/rejected": -0.7537130117416382, + "logps/chosen": -0.0009003658778965473, + "logps/rejected": -1.134358525276184, + "loss": 2.8194, + "nll_loss": 0.7047219276428223, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.003658487927169e-05, + "rewards/margins": 0.11334581673145294, + "rewards/rejected": -0.11343584954738617, + "step": 3503 + }, + { + "epoch": 2.4232365145228214, + "grad_norm": 8.608758926391602, + "learning_rate": 4.2093130474873213e-05, + "log_odds_chosen": 7.91787576675415, + "log_odds_ratio": -0.08348464220762253, + "logits/chosen": -0.559022068977356, + "logits/rejected": -0.6558287143707275, + "logps/chosen": -0.013709326274693012, + "logps/rejected": -1.163172960281372, + "loss": 1.8885, + "nll_loss": 0.46378472447395325, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00137093267403543, + "rewards/margins": 0.11494636535644531, + "rewards/rejected": -0.11631729453802109, + "step": 3504 + }, + { + "epoch": 2.4239280774550482, + "grad_norm": 12.582856178283691, + "learning_rate": 4.2089288458583066e-05, + "log_odds_chosen": 9.710137367248535, + "log_odds_ratio": -0.00018082182214129716, + "logits/chosen": -0.7905318737030029, + "logits/rejected": -0.8423488140106201, + "logps/chosen": -0.0003087608201894909, + "logps/rejected": -1.664839267730713, + "loss": 2.9246, + "nll_loss": 0.7311212420463562, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0876079108566046e-05, + "rewards/margins": 0.1664530634880066, + "rewards/rejected": -0.16648393869400024, + "step": 3505 + }, + { + "epoch": 2.424619640387275, + "grad_norm": 6.5926594734191895, + "learning_rate": 4.208544644229292e-05, + "log_odds_chosen": 7.102262496948242, + "log_odds_ratio": -0.005528897512704134, + "logits/chosen": -0.6839585304260254, + "logits/rejected": -0.5873481631278992, + "logps/chosen": -0.008574813604354858, + "logps/rejected": -0.9425464868545532, + "loss": 2.1099, + "nll_loss": 0.5269334316253662, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008574813837185502, + "rewards/margins": 0.09339717775583267, + "rewards/rejected": -0.09425465762615204, + "step": 3506 + }, + { + "epoch": 2.425311203319502, + "grad_norm": 8.038623809814453, + "learning_rate": 4.208160442600277e-05, + "log_odds_chosen": 7.128605365753174, + "log_odds_ratio": -0.003702126909047365, + "logits/chosen": -0.7105140089988708, + "logits/rejected": -0.7407926917076111, + "logps/chosen": -0.010936792008578777, + "logps/rejected": -1.22905695438385, + "loss": 2.5495, + "nll_loss": 0.6369987726211548, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001093679224140942, + "rewards/margins": 0.12181201577186584, + "rewards/rejected": -0.12290570139884949, + "step": 3507 + }, + { + "epoch": 2.4260027662517287, + "grad_norm": 9.31099796295166, + "learning_rate": 4.2077762409712616e-05, + "log_odds_chosen": 7.37070369720459, + "log_odds_ratio": -0.043804366141557693, + "logits/chosen": -0.39794284105300903, + "logits/rejected": -0.46229493618011475, + "logps/chosen": -0.022990640252828598, + "logps/rejected": -1.276123285293579, + "loss": 2.4207, + "nll_loss": 0.6007853150367737, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0022990640718489885, + "rewards/margins": 0.12531326711177826, + "rewards/rejected": -0.12761233747005463, + "step": 3508 + }, + { + "epoch": 2.4266943291839556, + "grad_norm": 6.147286415100098, + "learning_rate": 4.2073920393422476e-05, + "log_odds_chosen": 7.822290420532227, + "log_odds_ratio": -0.0024672728031873703, + "logits/chosen": -0.8302035927772522, + "logits/rejected": -0.9225561618804932, + "logps/chosen": -0.005152544938027859, + "logps/rejected": -1.4149426221847534, + "loss": 1.9211, + "nll_loss": 0.4800260365009308, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005152545054443181, + "rewards/margins": 0.14097902178764343, + "rewards/rejected": -0.1414942592382431, + "step": 3509 + }, + { + "epoch": 2.4273858921161824, + "grad_norm": 4.319844722747803, + "learning_rate": 4.207007837713232e-05, + "log_odds_chosen": 7.981142044067383, + "log_odds_ratio": -0.0016685464652255177, + "logits/chosen": -0.8217602372169495, + "logits/rejected": -0.8055883049964905, + "logps/chosen": -0.00772473169490695, + "logps/rejected": -1.4985321760177612, + "loss": 1.509, + "nll_loss": 0.37708476185798645, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000772473169490695, + "rewards/margins": 0.14908073842525482, + "rewards/rejected": -0.14985322952270508, + "step": 3510 + }, + { + "epoch": 2.428077455048409, + "grad_norm": 9.382851600646973, + "learning_rate": 4.2066236360842174e-05, + "log_odds_chosen": 7.408436298370361, + "log_odds_ratio": -0.004545638337731361, + "logits/chosen": -0.6942574381828308, + "logits/rejected": -0.6026928424835205, + "logps/chosen": -0.014898409135639668, + "logps/rejected": -1.4916660785675049, + "loss": 2.2225, + "nll_loss": 0.5551747679710388, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001489840797148645, + "rewards/margins": 0.1476767659187317, + "rewards/rejected": -0.14916659891605377, + "step": 3511 + }, + { + "epoch": 2.428769017980636, + "grad_norm": 6.530588150024414, + "learning_rate": 4.2062394344552026e-05, + "log_odds_chosen": 7.621374130249023, + "log_odds_ratio": -0.020029377192258835, + "logits/chosen": -0.36985185742378235, + "logits/rejected": -0.4080933928489685, + "logps/chosen": -0.010694924741983414, + "logps/rejected": -1.1310560703277588, + "loss": 2.2096, + "nll_loss": 0.550404965877533, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010694925440475345, + "rewards/margins": 0.11203610897064209, + "rewards/rejected": -0.11310561001300812, + "step": 3512 + }, + { + "epoch": 2.429460580912863, + "grad_norm": 9.551876068115234, + "learning_rate": 4.205855232826187e-05, + "log_odds_chosen": 6.510831356048584, + "log_odds_ratio": -0.10963248461484909, + "logits/chosen": -0.7897687554359436, + "logits/rejected": -0.8556662201881409, + "logps/chosen": -0.02455962263047695, + "logps/rejected": -1.2443952560424805, + "loss": 2.7122, + "nll_loss": 0.6670763492584229, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002455962123349309, + "rewards/margins": 0.12198356539011002, + "rewards/rejected": -0.12443952262401581, + "step": 3513 + }, + { + "epoch": 2.43015214384509, + "grad_norm": 8.177549362182617, + "learning_rate": 4.2054710311971724e-05, + "log_odds_chosen": 7.105329513549805, + "log_odds_ratio": -0.0745643824338913, + "logits/chosen": -0.7847766876220703, + "logits/rejected": -0.8372522592544556, + "logps/chosen": -0.02346830442547798, + "logps/rejected": -1.133172631263733, + "loss": 2.8405, + "nll_loss": 0.702670156955719, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0023468302097171545, + "rewards/margins": 0.11097043752670288, + "rewards/rejected": -0.11331726610660553, + "step": 3514 + }, + { + "epoch": 2.430843706777317, + "grad_norm": 6.872097492218018, + "learning_rate": 4.205086829568158e-05, + "log_odds_chosen": 7.203080654144287, + "log_odds_ratio": -0.014032398350536823, + "logits/chosen": -0.7077941298484802, + "logits/rejected": -0.7775839567184448, + "logps/chosen": -0.010666534304618835, + "logps/rejected": -1.1262292861938477, + "loss": 2.1964, + "nll_loss": 0.5477020740509033, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010666532907634974, + "rewards/margins": 0.1115562841296196, + "rewards/rejected": -0.112622931599617, + "step": 3515 + }, + { + "epoch": 2.431535269709544, + "grad_norm": 9.27294921875, + "learning_rate": 4.204702627939143e-05, + "log_odds_chosen": 9.278311729431152, + "log_odds_ratio": -0.014651588164269924, + "logits/chosen": -0.4278988242149353, + "logits/rejected": -0.505477786064148, + "logps/chosen": -0.0059097036719322205, + "logps/rejected": -1.815339207649231, + "loss": 2.4822, + "nll_loss": 0.6190872192382812, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005909703322686255, + "rewards/margins": 0.18094295263290405, + "rewards/rejected": -0.18153391778469086, + "step": 3516 + }, + { + "epoch": 2.4322268326417706, + "grad_norm": 12.633123397827148, + "learning_rate": 4.2043184263101275e-05, + "log_odds_chosen": 7.5888190269470215, + "log_odds_ratio": -0.12230602651834488, + "logits/chosen": -0.7426217794418335, + "logits/rejected": -0.8118423223495483, + "logps/chosen": -0.02818923629820347, + "logps/rejected": -1.1682384014129639, + "loss": 3.2121, + "nll_loss": 0.7908050417900085, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0028189236763864756, + "rewards/margins": 0.11400490999221802, + "rewards/rejected": -0.11682382971048355, + "step": 3517 + }, + { + "epoch": 2.4329183955739975, + "grad_norm": 8.882617950439453, + "learning_rate": 4.2039342246811134e-05, + "log_odds_chosen": 8.545615196228027, + "log_odds_ratio": -0.0015734564512968063, + "logits/chosen": -0.6913523077964783, + "logits/rejected": -0.6268677115440369, + "logps/chosen": -0.01377897709608078, + "logps/rejected": -2.0227184295654297, + "loss": 3.4862, + "nll_loss": 0.8713939189910889, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013778976863250136, + "rewards/margins": 0.20089393854141235, + "rewards/rejected": -0.20227184891700745, + "step": 3518 + }, + { + "epoch": 2.4336099585062243, + "grad_norm": 12.85490894317627, + "learning_rate": 4.203550023052098e-05, + "log_odds_chosen": 6.209042549133301, + "log_odds_ratio": -0.16449643671512604, + "logits/chosen": -0.46550968289375305, + "logits/rejected": -0.47567084431648254, + "logps/chosen": -0.06882783770561218, + "logps/rejected": -0.9017906188964844, + "loss": 2.3628, + "nll_loss": 0.5742617845535278, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006882783956825733, + "rewards/margins": 0.0832962840795517, + "rewards/rejected": -0.09017906337976456, + "step": 3519 + }, + { + "epoch": 2.434301521438451, + "grad_norm": 9.354718208312988, + "learning_rate": 4.203165821423083e-05, + "log_odds_chosen": 7.354831695556641, + "log_odds_ratio": -0.0032965652644634247, + "logits/chosen": -0.7076842784881592, + "logits/rejected": -0.6892897486686707, + "logps/chosen": -0.02614540606737137, + "logps/rejected": -1.610516905784607, + "loss": 2.6776, + "nll_loss": 0.6690692901611328, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0026145405136048794, + "rewards/margins": 0.15843714773654938, + "rewards/rejected": -0.1610516905784607, + "step": 3520 + }, + { + "epoch": 2.434993084370678, + "grad_norm": 14.583526611328125, + "learning_rate": 4.2027816197940685e-05, + "log_odds_chosen": 7.048150539398193, + "log_odds_ratio": -0.45794180035591125, + "logits/chosen": -0.5279990434646606, + "logits/rejected": -0.551681637763977, + "logps/chosen": -0.06697956472635269, + "logps/rejected": -1.5029540061950684, + "loss": 2.3421, + "nll_loss": 0.5397307276725769, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006697956472635269, + "rewards/margins": 0.14359745383262634, + "rewards/rejected": -0.1502954065799713, + "step": 3521 + }, + { + "epoch": 2.435684647302905, + "grad_norm": 8.86139965057373, + "learning_rate": 4.202397418165053e-05, + "log_odds_chosen": 8.100759506225586, + "log_odds_ratio": -0.0019487441750243306, + "logits/chosen": -0.4604555070400238, + "logits/rejected": -0.5274443030357361, + "logps/chosen": -0.0014906483702361584, + "logps/rejected": -1.226670742034912, + "loss": 2.3142, + "nll_loss": 0.5783494114875793, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014906484284438193, + "rewards/margins": 0.12251799553632736, + "rewards/rejected": -0.12266705930233002, + "step": 3522 + }, + { + "epoch": 2.4363762102351316, + "grad_norm": 5.555274486541748, + "learning_rate": 4.202013216536038e-05, + "log_odds_chosen": 6.237675666809082, + "log_odds_ratio": -0.018100908026099205, + "logits/chosen": -0.7315844893455505, + "logits/rejected": -0.810881495475769, + "logps/chosen": -0.03815982863306999, + "logps/rejected": -1.432349443435669, + "loss": 3.2658, + "nll_loss": 0.8146426677703857, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0038159831892699003, + "rewards/margins": 0.13941895961761475, + "rewards/rejected": -0.14323493838310242, + "step": 3523 + }, + { + "epoch": 2.4370677731673585, + "grad_norm": 3.357123851776123, + "learning_rate": 4.2016290149070235e-05, + "log_odds_chosen": 8.570686340332031, + "log_odds_ratio": -0.011956355534493923, + "logits/chosen": -0.3522316813468933, + "logits/rejected": -0.29090994596481323, + "logps/chosen": -0.006132098380476236, + "logps/rejected": -1.357385277748108, + "loss": 2.189, + "nll_loss": 0.5460516810417175, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006132098496891558, + "rewards/margins": 0.1351253092288971, + "rewards/rejected": -0.1357385218143463, + "step": 3524 + }, + { + "epoch": 2.4377593360995853, + "grad_norm": 10.35288143157959, + "learning_rate": 4.201244813278009e-05, + "log_odds_chosen": 9.286419868469238, + "log_odds_ratio": -0.002155565656721592, + "logits/chosen": -0.15422987937927246, + "logits/rejected": -0.22997677326202393, + "logps/chosen": -0.000969383807387203, + "logps/rejected": -1.7188987731933594, + "loss": 2.9679, + "nll_loss": 0.7417480945587158, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.693838364910334e-05, + "rewards/margins": 0.17179293930530548, + "rewards/rejected": -0.17188987135887146, + "step": 3525 + }, + { + "epoch": 2.438450899031812, + "grad_norm": 5.98746919631958, + "learning_rate": 4.200860611648993e-05, + "log_odds_chosen": 7.209506988525391, + "log_odds_ratio": -0.01720517687499523, + "logits/chosen": -0.255402535200119, + "logits/rejected": -0.2656596004962921, + "logps/chosen": -0.019557084888219833, + "logps/rejected": -1.6097584962844849, + "loss": 2.8255, + "nll_loss": 0.7046565413475037, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019557084888219833, + "rewards/margins": 0.15902015566825867, + "rewards/rejected": -0.1609758585691452, + "step": 3526 + }, + { + "epoch": 2.439142461964039, + "grad_norm": 6.258415222167969, + "learning_rate": 4.200476410019979e-05, + "log_odds_chosen": 6.269476890563965, + "log_odds_ratio": -0.038511913269758224, + "logits/chosen": -0.18216294050216675, + "logits/rejected": -0.20947065949440002, + "logps/chosen": -0.0269983671605587, + "logps/rejected": -1.0320063829421997, + "loss": 2.2651, + "nll_loss": 0.562411904335022, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0026998370885849, + "rewards/margins": 0.10050080716609955, + "rewards/rejected": -0.10320064425468445, + "step": 3527 + }, + { + "epoch": 2.4398340248962658, + "grad_norm": 6.181754112243652, + "learning_rate": 4.200092208390964e-05, + "log_odds_chosen": 7.249432563781738, + "log_odds_ratio": -0.002386817242950201, + "logits/chosen": -0.22067557275295258, + "logits/rejected": -0.26951614022254944, + "logps/chosen": -0.007044011726975441, + "logps/rejected": -1.532610535621643, + "loss": 1.9815, + "nll_loss": 0.49513524770736694, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007044011726975441, + "rewards/margins": 0.1525566577911377, + "rewards/rejected": -0.15326105058193207, + "step": 3528 + }, + { + "epoch": 2.4405255878284926, + "grad_norm": 10.975595474243164, + "learning_rate": 4.199708006761949e-05, + "log_odds_chosen": 5.667407512664795, + "log_odds_ratio": -0.10486754029989243, + "logits/chosen": -0.21335190534591675, + "logits/rejected": -0.21384568512439728, + "logps/chosen": -0.018393559381365776, + "logps/rejected": -1.1246503591537476, + "loss": 2.6217, + "nll_loss": 0.6449326276779175, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.001839356031268835, + "rewards/margins": 0.11062569171190262, + "rewards/rejected": -0.112465038895607, + "step": 3529 + }, + { + "epoch": 2.4412171507607194, + "grad_norm": 6.723755359649658, + "learning_rate": 4.199323805132934e-05, + "log_odds_chosen": 9.1740140914917, + "log_odds_ratio": -0.0013784350594505668, + "logits/chosen": -0.5831416845321655, + "logits/rejected": -0.6475759744644165, + "logps/chosen": -0.0018314392073079944, + "logps/rejected": -1.3935047388076782, + "loss": 2.3557, + "nll_loss": 0.5887914896011353, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018314392946194857, + "rewards/margins": 0.13916733860969543, + "rewards/rejected": -0.13935047388076782, + "step": 3530 + }, + { + "epoch": 2.4419087136929463, + "grad_norm": 5.532959938049316, + "learning_rate": 4.198939603503919e-05, + "log_odds_chosen": 8.193620681762695, + "log_odds_ratio": -0.08815432339906693, + "logits/chosen": -0.32796207070350647, + "logits/rejected": -0.309958815574646, + "logps/chosen": -0.0200906153768301, + "logps/rejected": -1.7347533702850342, + "loss": 2.6446, + "nll_loss": 0.6523411870002747, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0020090616308152676, + "rewards/margins": 0.17146629095077515, + "rewards/rejected": -0.17347534000873566, + "step": 3531 + }, + { + "epoch": 2.442600276625173, + "grad_norm": 8.325709342956543, + "learning_rate": 4.198555401874904e-05, + "log_odds_chosen": 9.631030082702637, + "log_odds_ratio": -0.0007575347553938627, + "logits/chosen": 0.05261028930544853, + "logits/rejected": -0.10341690480709076, + "logps/chosen": -0.0013362450990825891, + "logps/rejected": -1.930006980895996, + "loss": 1.9346, + "nll_loss": 0.48357462882995605, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013362453319132328, + "rewards/margins": 0.19286708533763885, + "rewards/rejected": -0.1930007040500641, + "step": 3532 + }, + { + "epoch": 2.4432918395574, + "grad_norm": 42.79154586791992, + "learning_rate": 4.1981712002458894e-05, + "log_odds_chosen": 7.646982192993164, + "log_odds_ratio": -0.18479464948177338, + "logits/chosen": -0.06726447492837906, + "logits/rejected": -0.2156417965888977, + "logps/chosen": -0.017851902171969414, + "logps/rejected": -1.2830493450164795, + "loss": 2.2222, + "nll_loss": 0.5370683670043945, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0017851904267445207, + "rewards/margins": 0.12651973962783813, + "rewards/rejected": -0.12830494344234467, + "step": 3533 + }, + { + "epoch": 2.4439834024896268, + "grad_norm": 6.985574245452881, + "learning_rate": 4.1977869986168746e-05, + "log_odds_chosen": 8.568099975585938, + "log_odds_ratio": -0.009587760083377361, + "logits/chosen": -0.29672423005104065, + "logits/rejected": -0.30285751819610596, + "logps/chosen": -0.01729811169207096, + "logps/rejected": -1.8276588916778564, + "loss": 2.1486, + "nll_loss": 0.5361930727958679, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017298111924901605, + "rewards/margins": 0.1810360699892044, + "rewards/rejected": -0.1827658861875534, + "step": 3534 + }, + { + "epoch": 2.4446749654218536, + "grad_norm": 5.899697780609131, + "learning_rate": 4.197402796987859e-05, + "log_odds_chosen": 7.62744140625, + "log_odds_ratio": -0.029710203409194946, + "logits/chosen": -0.30902692675590515, + "logits/rejected": -0.35745781660079956, + "logps/chosen": -0.019765477627515793, + "logps/rejected": -1.4128010272979736, + "loss": 1.9326, + "nll_loss": 0.4801683723926544, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019765477627515793, + "rewards/margins": 0.1393035501241684, + "rewards/rejected": -0.14128009974956512, + "step": 3535 + }, + { + "epoch": 2.4453665283540804, + "grad_norm": 8.331310272216797, + "learning_rate": 4.197018595358845e-05, + "log_odds_chosen": 7.946462631225586, + "log_odds_ratio": -0.002366039901971817, + "logits/chosen": -0.227493554353714, + "logits/rejected": -0.2392466962337494, + "logps/chosen": -0.02204231731593609, + "logps/rejected": -1.4582772254943848, + "loss": 2.4526, + "nll_loss": 0.6129144430160522, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0022042319178581238, + "rewards/margins": 0.1436235010623932, + "rewards/rejected": -0.14582772552967072, + "step": 3536 + }, + { + "epoch": 2.4460580912863072, + "grad_norm": 10.663069725036621, + "learning_rate": 4.19663439372983e-05, + "log_odds_chosen": 8.45026969909668, + "log_odds_ratio": -0.0037501309998333454, + "logits/chosen": -0.5676348209381104, + "logits/rejected": -0.6324989795684814, + "logps/chosen": -0.003062083385884762, + "logps/rejected": -1.8823702335357666, + "loss": 2.9366, + "nll_loss": 0.7337688207626343, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00030620835605077446, + "rewards/margins": 0.18793082237243652, + "rewards/rejected": -0.1882370412349701, + "step": 3537 + }, + { + "epoch": 2.446749654218534, + "grad_norm": 6.520402431488037, + "learning_rate": 4.196250192100815e-05, + "log_odds_chosen": 6.4279375076293945, + "log_odds_ratio": -0.06751563400030136, + "logits/chosen": -0.5972424745559692, + "logits/rejected": -0.6959658861160278, + "logps/chosen": -0.026075618341565132, + "logps/rejected": -0.9391674995422363, + "loss": 2.8835, + "nll_loss": 0.714113712310791, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0026075621135532856, + "rewards/margins": 0.09130918234586716, + "rewards/rejected": -0.09391674399375916, + "step": 3538 + }, + { + "epoch": 2.447441217150761, + "grad_norm": 7.875150680541992, + "learning_rate": 4.1958659904718e-05, + "log_odds_chosen": 8.601234436035156, + "log_odds_ratio": -0.023501534014940262, + "logits/chosen": -0.28037527203559875, + "logits/rejected": -0.25492340326309204, + "logps/chosen": -0.024181626737117767, + "logps/rejected": -1.3170043230056763, + "loss": 2.0807, + "nll_loss": 0.5178147554397583, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002418162766844034, + "rewards/margins": 0.12928228080272675, + "rewards/rejected": -0.13170044124126434, + "step": 3539 + }, + { + "epoch": 2.4481327800829877, + "grad_norm": 32.92366409301758, + "learning_rate": 4.195481788842785e-05, + "log_odds_chosen": 6.287031173706055, + "log_odds_ratio": -0.3081459701061249, + "logits/chosen": -0.6735745668411255, + "logits/rejected": -0.6229374408721924, + "logps/chosen": -0.10165555775165558, + "logps/rejected": -1.1201739311218262, + "loss": 2.6028, + "nll_loss": 0.6198921203613281, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010165555402636528, + "rewards/margins": 0.10185182839632034, + "rewards/rejected": -0.11201739311218262, + "step": 3540 + }, + { + "epoch": 2.4488243430152146, + "grad_norm": 28.747129440307617, + "learning_rate": 4.19509758721377e-05, + "log_odds_chosen": 6.925184726715088, + "log_odds_ratio": -0.44749465584754944, + "logits/chosen": -0.7733160257339478, + "logits/rejected": -0.6954240798950195, + "logps/chosen": -0.08167851716279984, + "logps/rejected": -1.769601583480835, + "loss": 2.2594, + "nll_loss": 0.5201063752174377, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008167851716279984, + "rewards/margins": 0.16879230737686157, + "rewards/rejected": -0.17696017026901245, + "step": 3541 + }, + { + "epoch": 2.4495159059474414, + "grad_norm": 10.873333930969238, + "learning_rate": 4.194713385584755e-05, + "log_odds_chosen": 8.917963981628418, + "log_odds_ratio": -0.0018444982124492526, + "logits/chosen": -0.2584773302078247, + "logits/rejected": -0.37781739234924316, + "logps/chosen": -0.001504393294453621, + "logps/rejected": -1.7596986293792725, + "loss": 2.4579, + "nll_loss": 0.6142921447753906, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001504393294453621, + "rewards/margins": 0.17581942677497864, + "rewards/rejected": -0.17596986889839172, + "step": 3542 + }, + { + "epoch": 2.4502074688796682, + "grad_norm": 8.034497261047363, + "learning_rate": 4.1943291839557405e-05, + "log_odds_chosen": 8.216196060180664, + "log_odds_ratio": -0.07497686892747879, + "logits/chosen": -0.32882681488990784, + "logits/rejected": -0.3448234498500824, + "logps/chosen": -0.011722751893103123, + "logps/rejected": -1.3132429122924805, + "loss": 1.8573, + "nll_loss": 0.4568219780921936, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001172275166027248, + "rewards/margins": 0.13015201687812805, + "rewards/rejected": -0.13132429122924805, + "step": 3543 + }, + { + "epoch": 2.450899031811895, + "grad_norm": 16.778364181518555, + "learning_rate": 4.193944982326725e-05, + "log_odds_chosen": 7.608541011810303, + "log_odds_ratio": -0.09194928407669067, + "logits/chosen": -0.5622957944869995, + "logits/rejected": -0.66969233751297, + "logps/chosen": -0.02298230305314064, + "logps/rejected": -1.5121748447418213, + "loss": 2.6808, + "nll_loss": 0.6610052585601807, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002298230305314064, + "rewards/margins": 0.1489192545413971, + "rewards/rejected": -0.1512174904346466, + "step": 3544 + }, + { + "epoch": 2.451590594744122, + "grad_norm": 5.129400253295898, + "learning_rate": 4.193560780697711e-05, + "log_odds_chosen": 6.8556013107299805, + "log_odds_ratio": -0.06903890520334244, + "logits/chosen": -0.09445610642433167, + "logits/rejected": -0.1201615259051323, + "logps/chosen": -0.0447566993534565, + "logps/rejected": -1.1922276020050049, + "loss": 2.4929, + "nll_loss": 0.6163094639778137, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00447566993534565, + "rewards/margins": 0.11474709212779999, + "rewards/rejected": -0.11922276020050049, + "step": 3545 + }, + { + "epoch": 2.4522821576763487, + "grad_norm": 19.428516387939453, + "learning_rate": 4.1931765790686955e-05, + "log_odds_chosen": 7.740385055541992, + "log_odds_ratio": -0.1259237378835678, + "logits/chosen": -0.6805443167686462, + "logits/rejected": -0.745235025882721, + "logps/chosen": -0.029267750680446625, + "logps/rejected": -1.4361294507980347, + "loss": 3.2265, + "nll_loss": 0.7940384149551392, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0029267752543091774, + "rewards/margins": 0.14068616926670074, + "rewards/rejected": -0.14361293613910675, + "step": 3546 + }, + { + "epoch": 2.4529737206085755, + "grad_norm": 7.1293206214904785, + "learning_rate": 4.192792377439681e-05, + "log_odds_chosen": 8.598723411560059, + "log_odds_ratio": -0.002772655338048935, + "logits/chosen": -0.4891475439071655, + "logits/rejected": -0.5739641189575195, + "logps/chosen": -0.06681334227323532, + "logps/rejected": -1.9241998195648193, + "loss": 2.3965, + "nll_loss": 0.5988386869430542, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006681334227323532, + "rewards/margins": 0.18573863804340363, + "rewards/rejected": -0.19241997599601746, + "step": 3547 + }, + { + "epoch": 2.4536652835408024, + "grad_norm": 7.23325777053833, + "learning_rate": 4.192408175810665e-05, + "log_odds_chosen": 7.28911828994751, + "log_odds_ratio": -0.011899770237505436, + "logits/chosen": -0.3867855668067932, + "logits/rejected": -0.42984023690223694, + "logps/chosen": -0.01793701946735382, + "logps/rejected": -1.2952455282211304, + "loss": 1.6492, + "nll_loss": 0.4111155569553375, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017937019001692533, + "rewards/margins": 0.1277308613061905, + "rewards/rejected": -0.12952455878257751, + "step": 3548 + }, + { + "epoch": 2.454356846473029, + "grad_norm": 5.491495132446289, + "learning_rate": 4.1920239741816506e-05, + "log_odds_chosen": 9.01888656616211, + "log_odds_ratio": -0.0014132228679955006, + "logits/chosen": -0.6249552965164185, + "logits/rejected": -0.7304789423942566, + "logps/chosen": -0.0013792149256914854, + "logps/rejected": -1.6873981952667236, + "loss": 2.1958, + "nll_loss": 0.5488159656524658, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013792149547953159, + "rewards/margins": 0.1686019003391266, + "rewards/rejected": -0.16873982548713684, + "step": 3549 + }, + { + "epoch": 2.455048409405256, + "grad_norm": 11.909871101379395, + "learning_rate": 4.191639772552636e-05, + "log_odds_chosen": 7.287874698638916, + "log_odds_ratio": -0.013897876255214214, + "logits/chosen": -0.5943728089332581, + "logits/rejected": -0.6236305832862854, + "logps/chosen": -0.08704106509685516, + "logps/rejected": -1.2860745191574097, + "loss": 2.9799, + "nll_loss": 0.7435942888259888, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008704107254743576, + "rewards/margins": 0.11990335583686829, + "rewards/rejected": -0.12860745191574097, + "step": 3550 + }, + { + "epoch": 2.455739972337483, + "grad_norm": 6.244982719421387, + "learning_rate": 4.1912555709236204e-05, + "log_odds_chosen": 9.310818672180176, + "log_odds_ratio": -0.0022720363922417164, + "logits/chosen": -0.31200623512268066, + "logits/rejected": -0.3407549262046814, + "logps/chosen": -0.0016210743924602866, + "logps/rejected": -1.4185664653778076, + "loss": 2.4014, + "nll_loss": 0.6001317501068115, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016210746252909303, + "rewards/margins": 0.14169453084468842, + "rewards/rejected": -0.14185664057731628, + "step": 3551 + }, + { + "epoch": 2.4564315352697097, + "grad_norm": 5.678574085235596, + "learning_rate": 4.190871369294606e-05, + "log_odds_chosen": 9.565153121948242, + "log_odds_ratio": -0.00012046610208926722, + "logits/chosen": -0.41298067569732666, + "logits/rejected": -0.3656235933303833, + "logps/chosen": -0.00019133117166347802, + "logps/rejected": -1.162304401397705, + "loss": 1.3996, + "nll_loss": 0.34988972544670105, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.913311643875204e-05, + "rewards/margins": 0.11621131747961044, + "rewards/rejected": -0.11623044312000275, + "step": 3552 + }, + { + "epoch": 2.4571230982019365, + "grad_norm": 9.438420295715332, + "learning_rate": 4.190487167665591e-05, + "log_odds_chosen": 9.713960647583008, + "log_odds_ratio": -0.0001747915375744924, + "logits/chosen": -0.29356127977371216, + "logits/rejected": -0.35049235820770264, + "logps/chosen": -0.001048873527906835, + "logps/rejected": -1.8610334396362305, + "loss": 2.2306, + "nll_loss": 0.5576305389404297, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010488735279068351, + "rewards/margins": 0.18599846959114075, + "rewards/rejected": -0.18610334396362305, + "step": 3553 + }, + { + "epoch": 2.4578146611341634, + "grad_norm": 8.732086181640625, + "learning_rate": 4.190102966036576e-05, + "log_odds_chosen": 8.62548828125, + "log_odds_ratio": -0.003063853830099106, + "logits/chosen": -0.4967626929283142, + "logits/rejected": -0.513431966304779, + "logps/chosen": -0.0021129597444087267, + "logps/rejected": -1.5165539979934692, + "loss": 2.2112, + "nll_loss": 0.5525052547454834, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002112959627993405, + "rewards/margins": 0.15144410729408264, + "rewards/rejected": -0.1516554057598114, + "step": 3554 + }, + { + "epoch": 2.45850622406639, + "grad_norm": 4.8293890953063965, + "learning_rate": 4.1897187644075614e-05, + "log_odds_chosen": 7.02392578125, + "log_odds_ratio": -0.004108362831175327, + "logits/chosen": -0.42433008551597595, + "logits/rejected": -0.46287766098976135, + "logps/chosen": -0.005805987864732742, + "logps/rejected": -0.9526754021644592, + "loss": 1.7432, + "nll_loss": 0.4353874921798706, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000580598774831742, + "rewards/margins": 0.09468695521354675, + "rewards/rejected": -0.09526754915714264, + "step": 3555 + }, + { + "epoch": 2.459197786998617, + "grad_norm": 8.237462043762207, + "learning_rate": 4.1893345627785466e-05, + "log_odds_chosen": 5.931075096130371, + "log_odds_ratio": -0.18810392916202545, + "logits/chosen": -0.5654093623161316, + "logits/rejected": -0.6350131034851074, + "logps/chosen": -0.12591242790222168, + "logps/rejected": -1.0404949188232422, + "loss": 2.4037, + "nll_loss": 0.5821194648742676, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.012591241858899593, + "rewards/margins": 0.09145824611186981, + "rewards/rejected": -0.10404949635267258, + "step": 3556 + }, + { + "epoch": 2.459889349930844, + "grad_norm": 11.721480369567871, + "learning_rate": 4.188950361149531e-05, + "log_odds_chosen": 9.411944389343262, + "log_odds_ratio": -0.0009381014388054609, + "logits/chosen": -0.695732057094574, + "logits/rejected": -0.733596920967102, + "logps/chosen": -0.0009364191209897399, + "logps/rejected": -1.7959481477737427, + "loss": 2.5884, + "nll_loss": 0.6470180153846741, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.364191646454856e-05, + "rewards/margins": 0.17950116097927094, + "rewards/rejected": -0.17959479987621307, + "step": 3557 + }, + { + "epoch": 2.4605809128630707, + "grad_norm": 11.841344833374023, + "learning_rate": 4.1885661595205164e-05, + "log_odds_chosen": 5.187502861022949, + "log_odds_ratio": -0.566064715385437, + "logits/chosen": -0.46774718165397644, + "logits/rejected": -0.48095083236694336, + "logps/chosen": -0.0918060690164566, + "logps/rejected": -1.4666005373001099, + "loss": 3.5646, + "nll_loss": 0.834552526473999, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.00918060727417469, + "rewards/margins": 0.13747945427894592, + "rewards/rejected": -0.14666005969047546, + "step": 3558 + }, + { + "epoch": 2.4612724757952975, + "grad_norm": 9.315000534057617, + "learning_rate": 4.1881819578915017e-05, + "log_odds_chosen": 7.865085601806641, + "log_odds_ratio": -0.0006331136683002114, + "logits/chosen": -0.5515438914299011, + "logits/rejected": -0.6229417324066162, + "logps/chosen": -0.0010849256068468094, + "logps/rejected": -1.1749250888824463, + "loss": 2.4928, + "nll_loss": 0.6231462955474854, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010849256068468094, + "rewards/margins": 0.11738403141498566, + "rewards/rejected": -0.11749253422021866, + "step": 3559 + }, + { + "epoch": 2.4619640387275243, + "grad_norm": 9.650796890258789, + "learning_rate": 4.187797756262486e-05, + "log_odds_chosen": 6.812906265258789, + "log_odds_ratio": -0.07138875871896744, + "logits/chosen": -0.46891918778419495, + "logits/rejected": -0.45208626985549927, + "logps/chosen": -0.011670373380184174, + "logps/rejected": -1.3647243976593018, + "loss": 2.8237, + "nll_loss": 0.698790431022644, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011670372914522886, + "rewards/margins": 0.13530540466308594, + "rewards/rejected": -0.1364724338054657, + "step": 3560 + }, + { + "epoch": 2.462655601659751, + "grad_norm": 7.182980537414551, + "learning_rate": 4.187413554633472e-05, + "log_odds_chosen": 5.552410125732422, + "log_odds_ratio": -0.07222361117601395, + "logits/chosen": -0.4027874171733856, + "logits/rejected": -0.4223852753639221, + "logps/chosen": -0.03922223672270775, + "logps/rejected": -0.9920768141746521, + "loss": 2.3507, + "nll_loss": 0.580459713935852, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003922224044799805, + "rewards/margins": 0.09528546035289764, + "rewards/rejected": -0.09920768439769745, + "step": 3561 + }, + { + "epoch": 2.463347164591978, + "grad_norm": 10.363471984863281, + "learning_rate": 4.187029353004457e-05, + "log_odds_chosen": 9.871475219726562, + "log_odds_ratio": -0.00011123805597890168, + "logits/chosen": -0.625693678855896, + "logits/rejected": -0.6865161061286926, + "logps/chosen": -0.00019468393293209374, + "logps/rejected": -1.5656342506408691, + "loss": 2.7754, + "nll_loss": 0.6938341856002808, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9468394384603016e-05, + "rewards/margins": 0.15654397010803223, + "rewards/rejected": -0.1565634310245514, + "step": 3562 + }, + { + "epoch": 2.464038727524205, + "grad_norm": 8.084590911865234, + "learning_rate": 4.186645151375442e-05, + "log_odds_chosen": 8.47359561920166, + "log_odds_ratio": -0.0016386422794312239, + "logits/chosen": -0.30302852392196655, + "logits/rejected": -0.3554832339286804, + "logps/chosen": -0.0012318093795329332, + "logps/rejected": -1.1712515354156494, + "loss": 2.147, + "nll_loss": 0.5365808606147766, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012318094377405941, + "rewards/margins": 0.1170019656419754, + "rewards/rejected": -0.11712514609098434, + "step": 3563 + }, + { + "epoch": 2.4647302904564317, + "grad_norm": 9.11082649230957, + "learning_rate": 4.186260949746427e-05, + "log_odds_chosen": 5.911943435668945, + "log_odds_ratio": -0.5747730135917664, + "logits/chosen": -0.7678748369216919, + "logits/rejected": -0.7210307121276855, + "logps/chosen": -0.1222434714436531, + "logps/rejected": -0.7873757481575012, + "loss": 2.8083, + "nll_loss": 0.6446093916893005, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012224346399307251, + "rewards/margins": 0.06651322543621063, + "rewards/rejected": -0.07873757183551788, + "step": 3564 + }, + { + "epoch": 2.4654218533886585, + "grad_norm": 9.616727828979492, + "learning_rate": 4.1858767481174124e-05, + "log_odds_chosen": 7.258073329925537, + "log_odds_ratio": -0.04513192176818848, + "logits/chosen": 0.04711649566888809, + "logits/rejected": -0.013913527131080627, + "logps/chosen": -0.03416390344500542, + "logps/rejected": -0.891162633895874, + "loss": 2.1688, + "nll_loss": 0.5376936197280884, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003416390623897314, + "rewards/margins": 0.08569987118244171, + "rewards/rejected": -0.08911626040935516, + "step": 3565 + }, + { + "epoch": 2.4661134163208853, + "grad_norm": 6.7814836502075195, + "learning_rate": 4.185492546488397e-05, + "log_odds_chosen": 7.401462554931641, + "log_odds_ratio": -0.07456041872501373, + "logits/chosen": -0.8305421471595764, + "logits/rejected": -0.8727108240127563, + "logps/chosen": -0.008470169268548489, + "logps/rejected": -1.1675711870193481, + "loss": 1.6602, + "nll_loss": 0.40759918093681335, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008470169268548489, + "rewards/margins": 0.11591009795665741, + "rewards/rejected": -0.1167571172118187, + "step": 3566 + }, + { + "epoch": 2.466804979253112, + "grad_norm": 8.304616928100586, + "learning_rate": 4.185108344859382e-05, + "log_odds_chosen": 7.286868095397949, + "log_odds_ratio": -0.024223200976848602, + "logits/chosen": -0.8337805271148682, + "logits/rejected": -0.8922019600868225, + "logps/chosen": -0.03300413489341736, + "logps/rejected": -1.7029643058776855, + "loss": 2.0032, + "nll_loss": 0.49838027358055115, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003300413955003023, + "rewards/margins": 0.16699601709842682, + "rewards/rejected": -0.17029643058776855, + "step": 3567 + }, + { + "epoch": 2.467496542185339, + "grad_norm": 7.218667984008789, + "learning_rate": 4.1847241432303675e-05, + "log_odds_chosen": 7.885411262512207, + "log_odds_ratio": -0.060036953538656235, + "logits/chosen": -0.47863897681236267, + "logits/rejected": -0.4377424716949463, + "logps/chosen": -0.019913654774427414, + "logps/rejected": -1.4336100816726685, + "loss": 1.865, + "nll_loss": 0.46025776863098145, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001991365570574999, + "rewards/margins": 0.14136964082717896, + "rewards/rejected": -0.14336100220680237, + "step": 3568 + }, + { + "epoch": 2.468188105117566, + "grad_norm": 6.102696895599365, + "learning_rate": 4.184339941601352e-05, + "log_odds_chosen": 8.722169876098633, + "log_odds_ratio": -0.0007616700604557991, + "logits/chosen": -0.11960343271493912, + "logits/rejected": -0.15465594828128815, + "logps/chosen": -0.025594644248485565, + "logps/rejected": -1.8997677564620972, + "loss": 2.0682, + "nll_loss": 0.5169663429260254, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0025594644248485565, + "rewards/margins": 0.18741732835769653, + "rewards/rejected": -0.1899767965078354, + "step": 3569 + }, + { + "epoch": 2.4688796680497926, + "grad_norm": 11.712726593017578, + "learning_rate": 4.183955739972338e-05, + "log_odds_chosen": 8.480051040649414, + "log_odds_ratio": -0.003982068505138159, + "logits/chosen": -0.550246000289917, + "logits/rejected": -0.5758745074272156, + "logps/chosen": -0.0030394475907087326, + "logps/rejected": -1.7417186498641968, + "loss": 2.3966, + "nll_loss": 0.5987579822540283, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003039447474293411, + "rewards/margins": 0.17386791110038757, + "rewards/rejected": -0.17417186498641968, + "step": 3570 + }, + { + "epoch": 2.4695712309820195, + "grad_norm": 11.296751022338867, + "learning_rate": 4.1835715383433226e-05, + "log_odds_chosen": 6.232704162597656, + "log_odds_ratio": -0.18135468661785126, + "logits/chosen": -0.5195183753967285, + "logits/rejected": -0.5521309971809387, + "logps/chosen": -0.03475148230791092, + "logps/rejected": -1.137387990951538, + "loss": 2.0156, + "nll_loss": 0.48577266931533813, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0034751484636217356, + "rewards/margins": 0.1102636530995369, + "rewards/rejected": -0.11373881250619888, + "step": 3571 + }, + { + "epoch": 2.4702627939142463, + "grad_norm": 14.958809852600098, + "learning_rate": 4.183187336714308e-05, + "log_odds_chosen": 7.303866386413574, + "log_odds_ratio": -0.2045290172100067, + "logits/chosen": -0.3235791027545929, + "logits/rejected": -0.3822289705276489, + "logps/chosen": -0.043132223188877106, + "logps/rejected": -1.458284616470337, + "loss": 1.892, + "nll_loss": 0.45253604650497437, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004313222132623196, + "rewards/margins": 0.14151525497436523, + "rewards/rejected": -0.1458284705877304, + "step": 3572 + }, + { + "epoch": 2.470954356846473, + "grad_norm": 11.448431968688965, + "learning_rate": 4.182803135085293e-05, + "log_odds_chosen": 8.554526329040527, + "log_odds_ratio": -0.004633777309209108, + "logits/chosen": -0.35143518447875977, + "logits/rejected": -0.4527926445007324, + "logps/chosen": -0.0009376034140586853, + "logps/rejected": -1.0931717157363892, + "loss": 2.191, + "nll_loss": 0.5472741723060608, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.376034722663462e-05, + "rewards/margins": 0.10922341048717499, + "rewards/rejected": -0.10931716859340668, + "step": 3573 + }, + { + "epoch": 2.4716459197787, + "grad_norm": 9.666537284851074, + "learning_rate": 4.182418933456278e-05, + "log_odds_chosen": 7.457915782928467, + "log_odds_ratio": -0.11223464459180832, + "logits/chosen": -0.7632264494895935, + "logits/rejected": -0.7934819459915161, + "logps/chosen": -0.07048378139734268, + "logps/rejected": -1.9534372091293335, + "loss": 2.2209, + "nll_loss": 0.5439975261688232, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007048378232866526, + "rewards/margins": 0.1882953643798828, + "rewards/rejected": -0.1953437328338623, + "step": 3574 + }, + { + "epoch": 2.472337482710927, + "grad_norm": 9.697028160095215, + "learning_rate": 4.182034731827263e-05, + "log_odds_chosen": 8.214350700378418, + "log_odds_ratio": -0.0024849059991538525, + "logits/chosen": -0.23068474233150482, + "logits/rejected": -0.2544393837451935, + "logps/chosen": -0.0016188130248337984, + "logps/rejected": -1.2002190351486206, + "loss": 2.3922, + "nll_loss": 0.597804069519043, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016188131121452898, + "rewards/margins": 0.11986003071069717, + "rewards/rejected": -0.12002190947532654, + "step": 3575 + }, + { + "epoch": 2.4730290456431536, + "grad_norm": 5.239798069000244, + "learning_rate": 4.181650530198248e-05, + "log_odds_chosen": 6.941335678100586, + "log_odds_ratio": -0.00915892980992794, + "logits/chosen": -0.027788877487182617, + "logits/rejected": -0.049438588321208954, + "logps/chosen": -0.0346873477101326, + "logps/rejected": -1.5081522464752197, + "loss": 1.4903, + "nll_loss": 0.37166211009025574, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0034687344450503588, + "rewards/margins": 0.14734649658203125, + "rewards/rejected": -0.1508152186870575, + "step": 3576 + }, + { + "epoch": 2.4737206085753805, + "grad_norm": 9.928712844848633, + "learning_rate": 4.1812663285692333e-05, + "log_odds_chosen": 5.2237091064453125, + "log_odds_ratio": -0.2726137042045593, + "logits/chosen": -0.5916513204574585, + "logits/rejected": -0.5914942026138306, + "logps/chosen": -0.20390404760837555, + "logps/rejected": -1.1321120262145996, + "loss": 3.9991, + "nll_loss": 0.9725254774093628, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.020390406250953674, + "rewards/margins": 0.09282079339027405, + "rewards/rejected": -0.11321119964122772, + "step": 3577 + }, + { + "epoch": 2.4744121715076073, + "grad_norm": 5.80659818649292, + "learning_rate": 4.180882126940218e-05, + "log_odds_chosen": 5.961888313293457, + "log_odds_ratio": -0.009627663530409336, + "logits/chosen": -0.3582921326160431, + "logits/rejected": -0.36567357182502747, + "logps/chosen": -0.02949088253080845, + "logps/rejected": -1.1007611751556396, + "loss": 2.4407, + "nll_loss": 0.6092128753662109, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0029490883462131023, + "rewards/margins": 0.10712704807519913, + "rewards/rejected": -0.11007612943649292, + "step": 3578 + }, + { + "epoch": 2.475103734439834, + "grad_norm": 4.562489032745361, + "learning_rate": 4.180497925311204e-05, + "log_odds_chosen": 6.75181245803833, + "log_odds_ratio": -0.0999455451965332, + "logits/chosen": -0.38527029752731323, + "logits/rejected": -0.388009250164032, + "logps/chosen": -0.0627438947558403, + "logps/rejected": -1.3990108966827393, + "loss": 2.3529, + "nll_loss": 0.578228771686554, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006274389568716288, + "rewards/margins": 0.13362669944763184, + "rewards/rejected": -0.13990110158920288, + "step": 3579 + }, + { + "epoch": 2.475795297372061, + "grad_norm": 12.828277587890625, + "learning_rate": 4.1801137236821884e-05, + "log_odds_chosen": 9.04585075378418, + "log_odds_ratio": -0.00040327146416530013, + "logits/chosen": -0.14115282893180847, + "logits/rejected": -0.23341891169548035, + "logps/chosen": -0.0007533514872193336, + "logps/rejected": -1.5589354038238525, + "loss": 2.4261, + "nll_loss": 0.6064795851707458, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.533514872193336e-05, + "rewards/margins": 0.15581819415092468, + "rewards/rejected": -0.15589353442192078, + "step": 3580 + }, + { + "epoch": 2.4764868603042878, + "grad_norm": 9.210855484008789, + "learning_rate": 4.1797295220531736e-05, + "log_odds_chosen": 6.8370819091796875, + "log_odds_ratio": -0.12023650854825974, + "logits/chosen": -0.5468093752861023, + "logits/rejected": -0.5404381155967712, + "logps/chosen": -0.06064155325293541, + "logps/rejected": -1.6137638092041016, + "loss": 3.0733, + "nll_loss": 0.756309986114502, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006064155604690313, + "rewards/margins": 0.15531222522258759, + "rewards/rejected": -0.16137638688087463, + "step": 3581 + }, + { + "epoch": 2.4771784232365146, + "grad_norm": 6.40119743347168, + "learning_rate": 4.179345320424159e-05, + "log_odds_chosen": 7.077298641204834, + "log_odds_ratio": -0.010537205263972282, + "logits/chosen": -0.4514099359512329, + "logits/rejected": -0.4493291974067688, + "logps/chosen": -0.031477462500333786, + "logps/rejected": -1.365896463394165, + "loss": 2.946, + "nll_loss": 0.7354586124420166, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0031477464362978935, + "rewards/margins": 0.13344189524650574, + "rewards/rejected": -0.1365896463394165, + "step": 3582 + }, + { + "epoch": 2.4778699861687414, + "grad_norm": 8.285335540771484, + "learning_rate": 4.178961118795144e-05, + "log_odds_chosen": 7.359831809997559, + "log_odds_ratio": -0.03024285100400448, + "logits/chosen": -0.49037089943885803, + "logits/rejected": -0.45868930220603943, + "logps/chosen": -0.00882015936076641, + "logps/rejected": -0.8284816741943359, + "loss": 1.7684, + "nll_loss": 0.4390672445297241, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008820158545859158, + "rewards/margins": 0.08196616172790527, + "rewards/rejected": -0.08284817636013031, + "step": 3583 + }, + { + "epoch": 2.4785615491009683, + "grad_norm": 9.757538795471191, + "learning_rate": 4.178576917166129e-05, + "log_odds_chosen": 6.399682998657227, + "log_odds_ratio": -0.10057688504457474, + "logits/chosen": -0.3511509895324707, + "logits/rejected": -0.35170474648475647, + "logps/chosen": -0.02771635912358761, + "logps/rejected": -1.0118992328643799, + "loss": 2.0764, + "nll_loss": 0.5090445280075073, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002771636238321662, + "rewards/margins": 0.09841828048229218, + "rewards/rejected": -0.10118991881608963, + "step": 3584 + }, + { + "epoch": 2.479253112033195, + "grad_norm": 6.219103813171387, + "learning_rate": 4.178192715537114e-05, + "log_odds_chosen": 7.042239189147949, + "log_odds_ratio": -0.0017283523920923471, + "logits/chosen": -0.3467369079589844, + "logits/rejected": -0.33279949426651, + "logps/chosen": -0.009448968805372715, + "logps/rejected": -1.1239537000656128, + "loss": 2.2031, + "nll_loss": 0.550603985786438, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009448969503864646, + "rewards/margins": 0.11145047843456268, + "rewards/rejected": -0.11239536851644516, + "step": 3585 + }, + { + "epoch": 2.479944674965422, + "grad_norm": 8.289338111877441, + "learning_rate": 4.177808513908099e-05, + "log_odds_chosen": 7.68574333190918, + "log_odds_ratio": -0.002449170919135213, + "logits/chosen": -0.4363767206668854, + "logits/rejected": -0.5026494264602661, + "logps/chosen": -0.013567497953772545, + "logps/rejected": -1.48887038230896, + "loss": 3.3617, + "nll_loss": 0.8401690721511841, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013567497953772545, + "rewards/margins": 0.14753028750419617, + "rewards/rejected": -0.148887038230896, + "step": 3586 + }, + { + "epoch": 2.4806362378976488, + "grad_norm": 8.901142120361328, + "learning_rate": 4.177424312279084e-05, + "log_odds_chosen": 6.479077339172363, + "log_odds_ratio": -0.03187675029039383, + "logits/chosen": -0.4941876530647278, + "logits/rejected": -0.5805838704109192, + "logps/chosen": -0.007656537927687168, + "logps/rejected": -1.0773801803588867, + "loss": 1.8931, + "nll_loss": 0.4700874984264374, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007656537345610559, + "rewards/margins": 0.10697236657142639, + "rewards/rejected": -0.10773801803588867, + "step": 3587 + }, + { + "epoch": 2.4813278008298756, + "grad_norm": 10.053321838378906, + "learning_rate": 4.17704011065007e-05, + "log_odds_chosen": 7.308525085449219, + "log_odds_ratio": -0.006363980006426573, + "logits/chosen": -0.9116455316543579, + "logits/rejected": -0.9546210169792175, + "logps/chosen": -0.018561337143182755, + "logps/rejected": -1.8791520595550537, + "loss": 2.5481, + "nll_loss": 0.6363804936408997, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001856133807450533, + "rewards/margins": 0.18605907261371613, + "rewards/rejected": -0.18791520595550537, + "step": 3588 + }, + { + "epoch": 2.4820193637621024, + "grad_norm": 7.588259696960449, + "learning_rate": 4.176655909021054e-05, + "log_odds_chosen": 6.818621635437012, + "log_odds_ratio": -0.02121109515428543, + "logits/chosen": -0.714361310005188, + "logits/rejected": -0.7343268394470215, + "logps/chosen": -0.042965568602085114, + "logps/rejected": -1.5659581422805786, + "loss": 1.9111, + "nll_loss": 0.4756477475166321, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004296557046473026, + "rewards/margins": 0.15229925513267517, + "rewards/rejected": -0.15659581124782562, + "step": 3589 + }, + { + "epoch": 2.4827109266943292, + "grad_norm": 6.362364768981934, + "learning_rate": 4.1762717073920395e-05, + "log_odds_chosen": 8.199535369873047, + "log_odds_ratio": -0.0014860157389193773, + "logits/chosen": -0.5204131603240967, + "logits/rejected": -0.5123761892318726, + "logps/chosen": -0.015633605420589447, + "logps/rejected": -1.625710129737854, + "loss": 1.7413, + "nll_loss": 0.43518710136413574, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001563360681757331, + "rewards/margins": 0.16100767254829407, + "rewards/rejected": -0.1625710278749466, + "step": 3590 + }, + { + "epoch": 2.483402489626556, + "grad_norm": 10.019575119018555, + "learning_rate": 4.175887505763025e-05, + "log_odds_chosen": 7.701471328735352, + "log_odds_ratio": -0.09589733183383942, + "logits/chosen": -0.6089304685592651, + "logits/rejected": -0.6508186459541321, + "logps/chosen": -0.029153263196349144, + "logps/rejected": -1.3184267282485962, + "loss": 3.1084, + "nll_loss": 0.7675221562385559, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0029153262730687857, + "rewards/margins": 0.1289273500442505, + "rewards/rejected": -0.13184267282485962, + "step": 3591 + }, + { + "epoch": 2.484094052558783, + "grad_norm": 11.550004005432129, + "learning_rate": 4.17550330413401e-05, + "log_odds_chosen": 7.115861892700195, + "log_odds_ratio": -0.07433371245861053, + "logits/chosen": -0.6918562054634094, + "logits/rejected": -0.7228385210037231, + "logps/chosen": -0.02212700992822647, + "logps/rejected": -1.7282590866088867, + "loss": 2.6529, + "nll_loss": 0.6557831168174744, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0022127011325210333, + "rewards/margins": 0.17061319947242737, + "rewards/rejected": -0.1728259027004242, + "step": 3592 + }, + { + "epoch": 2.4847856154910097, + "grad_norm": 9.391145706176758, + "learning_rate": 4.1751191025049945e-05, + "log_odds_chosen": 5.340620517730713, + "log_odds_ratio": -0.19077332317829132, + "logits/chosen": -0.24847255647182465, + "logits/rejected": -0.29707443714141846, + "logps/chosen": -0.07398758828639984, + "logps/rejected": -1.1074838638305664, + "loss": 2.5396, + "nll_loss": 0.6158105134963989, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007398759014904499, + "rewards/margins": 0.10334962606430054, + "rewards/rejected": -0.11074838787317276, + "step": 3593 + }, + { + "epoch": 2.4854771784232366, + "grad_norm": 11.480371475219727, + "learning_rate": 4.17473490087598e-05, + "log_odds_chosen": 7.875348091125488, + "log_odds_ratio": -0.001167620182968676, + "logits/chosen": -0.6042084097862244, + "logits/rejected": -0.6367383599281311, + "logps/chosen": -0.004173867404460907, + "logps/rejected": -1.4751344919204712, + "loss": 3.0375, + "nll_loss": 0.7592536211013794, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00041738676372915506, + "rewards/margins": 0.14709606766700745, + "rewards/rejected": -0.1475134640932083, + "step": 3594 + }, + { + "epoch": 2.4861687413554634, + "grad_norm": 6.479579448699951, + "learning_rate": 4.174350699246965e-05, + "log_odds_chosen": 6.573331832885742, + "log_odds_ratio": -0.056094568222761154, + "logits/chosen": -0.6127046346664429, + "logits/rejected": -0.5991913676261902, + "logps/chosen": -0.028692560270428658, + "logps/rejected": -1.1372803449630737, + "loss": 2.9897, + "nll_loss": 0.741807222366333, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0028692560736089945, + "rewards/margins": 0.11085877567529678, + "rewards/rejected": -0.11372803151607513, + "step": 3595 + }, + { + "epoch": 2.4868603042876902, + "grad_norm": 9.766852378845215, + "learning_rate": 4.1739664976179496e-05, + "log_odds_chosen": 7.247687816619873, + "log_odds_ratio": -0.012085276655852795, + "logits/chosen": -0.5809255838394165, + "logits/rejected": -0.607845664024353, + "logps/chosen": -0.01595086231827736, + "logps/rejected": -1.7827832698822021, + "loss": 2.4924, + "nll_loss": 0.6218916177749634, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001595086301676929, + "rewards/margins": 0.176683247089386, + "rewards/rejected": -0.17827832698822021, + "step": 3596 + }, + { + "epoch": 2.487551867219917, + "grad_norm": 7.794604301452637, + "learning_rate": 4.1735822959889355e-05, + "log_odds_chosen": 6.11072301864624, + "log_odds_ratio": -0.013152681291103363, + "logits/chosen": -0.7504695653915405, + "logits/rejected": -0.8083955645561218, + "logps/chosen": -0.01121857576072216, + "logps/rejected": -1.0678339004516602, + "loss": 3.1961, + "nll_loss": 0.7977035045623779, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011218574363738298, + "rewards/margins": 0.1056615337729454, + "rewards/rejected": -0.10678339004516602, + "step": 3597 + }, + { + "epoch": 2.488243430152144, + "grad_norm": 10.555513381958008, + "learning_rate": 4.17319809435992e-05, + "log_odds_chosen": 5.213860511779785, + "log_odds_ratio": -0.4782610535621643, + "logits/chosen": -0.3444117307662964, + "logits/rejected": -0.32682672142982483, + "logps/chosen": -0.053420089185237885, + "logps/rejected": -0.8151041269302368, + "loss": 2.8929, + "nll_loss": 0.6753936409950256, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005342008545994759, + "rewards/margins": 0.07616840302944183, + "rewards/rejected": -0.08151040971279144, + "step": 3598 + }, + { + "epoch": 2.4889349930843707, + "grad_norm": 5.460208892822266, + "learning_rate": 4.172813892730905e-05, + "log_odds_chosen": 7.005800724029541, + "log_odds_ratio": -0.004268847871571779, + "logits/chosen": -0.388731986284256, + "logits/rejected": -0.36822617053985596, + "logps/chosen": -0.0035434365272521973, + "logps/rejected": -0.8111451268196106, + "loss": 1.8473, + "nll_loss": 0.4613950252532959, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00035434364690445364, + "rewards/margins": 0.0807601809501648, + "rewards/rejected": -0.0811145156621933, + "step": 3599 + }, + { + "epoch": 2.4896265560165975, + "grad_norm": 9.794899940490723, + "learning_rate": 4.1724296911018906e-05, + "log_odds_chosen": 6.337307453155518, + "log_odds_ratio": -0.060170188546180725, + "logits/chosen": -0.3038662075996399, + "logits/rejected": -0.3344561755657196, + "logps/chosen": -0.018794970586895943, + "logps/rejected": -0.899629533290863, + "loss": 2.4075, + "nll_loss": 0.5958477258682251, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018794970819726586, + "rewards/margins": 0.08808346092700958, + "rewards/rejected": -0.08996295928955078, + "step": 3600 + }, + { + "epoch": 2.4903181189488244, + "grad_norm": 7.350867748260498, + "learning_rate": 4.172045489472876e-05, + "log_odds_chosen": 7.752102375030518, + "log_odds_ratio": -0.0015861605061218143, + "logits/chosen": -0.4283638596534729, + "logits/rejected": -0.4642946124076843, + "logps/chosen": -0.0022205570712685585, + "logps/rejected": -1.1298866271972656, + "loss": 2.6427, + "nll_loss": 0.6605045199394226, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00022205570712685585, + "rewards/margins": 0.11276662349700928, + "rewards/rejected": -0.11298868060112, + "step": 3601 + }, + { + "epoch": 2.491009681881051, + "grad_norm": 8.670126914978027, + "learning_rate": 4.1716612878438604e-05, + "log_odds_chosen": 8.038167953491211, + "log_odds_ratio": -0.0027964513283222914, + "logits/chosen": -0.3971180319786072, + "logits/rejected": -0.4683905839920044, + "logps/chosen": -0.010728216730058193, + "logps/rejected": -1.5991425514221191, + "loss": 1.7308, + "nll_loss": 0.43241745233535767, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010728216730058193, + "rewards/margins": 0.15884143114089966, + "rewards/rejected": -0.15991425514221191, + "step": 3602 + }, + { + "epoch": 2.491701244813278, + "grad_norm": 11.873007774353027, + "learning_rate": 4.1712770862148456e-05, + "log_odds_chosen": 7.145326614379883, + "log_odds_ratio": -0.01985153742134571, + "logits/chosen": -0.5149418115615845, + "logits/rejected": -0.5132201910018921, + "logps/chosen": -0.027801502496004105, + "logps/rejected": -1.4563482999801636, + "loss": 2.5473, + "nll_loss": 0.6348486542701721, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002780150156468153, + "rewards/margins": 0.1428546905517578, + "rewards/rejected": -0.14563484489917755, + "step": 3603 + }, + { + "epoch": 2.492392807745505, + "grad_norm": 6.793402194976807, + "learning_rate": 4.170892884585831e-05, + "log_odds_chosen": 6.222506046295166, + "log_odds_ratio": -0.09904603660106659, + "logits/chosen": -0.5928165912628174, + "logits/rejected": -0.5925490856170654, + "logps/chosen": -0.03379706293344498, + "logps/rejected": -1.099259853363037, + "loss": 3.1783, + "nll_loss": 0.7846640348434448, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0033797065261751413, + "rewards/margins": 0.10654628276824951, + "rewards/rejected": -0.10992599278688431, + "step": 3604 + }, + { + "epoch": 2.4930843706777317, + "grad_norm": 8.131731033325195, + "learning_rate": 4.1705086829568154e-05, + "log_odds_chosen": 7.854827880859375, + "log_odds_ratio": -0.013267126865684986, + "logits/chosen": -0.8261618614196777, + "logits/rejected": -0.8810702562332153, + "logps/chosen": -0.004752838518470526, + "logps/rejected": -1.3137333393096924, + "loss": 2.0157, + "nll_loss": 0.5025960206985474, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00047528385766781867, + "rewards/margins": 0.13089805841445923, + "rewards/rejected": -0.131373330950737, + "step": 3605 + }, + { + "epoch": 2.4937759336099585, + "grad_norm": 8.815180778503418, + "learning_rate": 4.1701244813278014e-05, + "log_odds_chosen": 7.279289245605469, + "log_odds_ratio": -0.05281849950551987, + "logits/chosen": -0.7257159352302551, + "logits/rejected": -0.823196530342102, + "logps/chosen": -0.027641449123620987, + "logps/rejected": -1.5369056463241577, + "loss": 3.0359, + "nll_loss": 0.753699779510498, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0027641449123620987, + "rewards/margins": 0.150926411151886, + "rewards/rejected": -0.15369057655334473, + "step": 3606 + }, + { + "epoch": 2.4944674965421854, + "grad_norm": 9.75115966796875, + "learning_rate": 4.169740279698786e-05, + "log_odds_chosen": 6.750976085662842, + "log_odds_ratio": -0.052393123507499695, + "logits/chosen": -0.4319000244140625, + "logits/rejected": -0.4197804629802704, + "logps/chosen": -0.03174269199371338, + "logps/rejected": -1.9581066370010376, + "loss": 2.3748, + "nll_loss": 0.5884555578231812, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003174269339069724, + "rewards/margins": 0.1926364153623581, + "rewards/rejected": -0.19581067562103271, + "step": 3607 + }, + { + "epoch": 2.495159059474412, + "grad_norm": 9.596985816955566, + "learning_rate": 4.169356078069771e-05, + "log_odds_chosen": 4.707325458526611, + "log_odds_ratio": -0.47428303956985474, + "logits/chosen": -0.7324930429458618, + "logits/rejected": -0.713641881942749, + "logps/chosen": -0.119391530752182, + "logps/rejected": -1.1009997129440308, + "loss": 2.5459, + "nll_loss": 0.5890485048294067, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0119391530752182, + "rewards/margins": 0.09816082566976547, + "rewards/rejected": -0.11009997874498367, + "step": 3608 + }, + { + "epoch": 2.495850622406639, + "grad_norm": 4.151665210723877, + "learning_rate": 4.1689718764407564e-05, + "log_odds_chosen": 7.3658833503723145, + "log_odds_ratio": -0.009625360369682312, + "logits/chosen": -0.3350619673728943, + "logits/rejected": -0.45576098561286926, + "logps/chosen": -0.003950429614633322, + "logps/rejected": -0.8261271715164185, + "loss": 1.8249, + "nll_loss": 0.4552645683288574, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003950429381802678, + "rewards/margins": 0.08221767842769623, + "rewards/rejected": -0.08261272311210632, + "step": 3609 + }, + { + "epoch": 2.496542185338866, + "grad_norm": 6.8638997077941895, + "learning_rate": 4.1685876748117417e-05, + "log_odds_chosen": 7.412099838256836, + "log_odds_ratio": -0.0016858414746820927, + "logits/chosen": -0.4631730914115906, + "logits/rejected": -0.539579451084137, + "logps/chosen": -0.03612817823886871, + "logps/rejected": -1.7752039432525635, + "loss": 1.7417, + "nll_loss": 0.4352552592754364, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0036128174979239702, + "rewards/margins": 0.1739075779914856, + "rewards/rejected": -0.17752039432525635, + "step": 3610 + }, + { + "epoch": 2.4972337482710927, + "grad_norm": 9.153830528259277, + "learning_rate": 4.168203473182726e-05, + "log_odds_chosen": 8.275040626525879, + "log_odds_ratio": -0.0027329260483384132, + "logits/chosen": -0.6889567971229553, + "logits/rejected": -0.7413190007209778, + "logps/chosen": -0.005626749712973833, + "logps/rejected": -1.3126780986785889, + "loss": 2.9899, + "nll_loss": 0.7472079992294312, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005626750062219799, + "rewards/margins": 0.13070513308048248, + "rewards/rejected": -0.13126781582832336, + "step": 3611 + }, + { + "epoch": 2.4979253112033195, + "grad_norm": 9.003024101257324, + "learning_rate": 4.1678192715537115e-05, + "log_odds_chosen": 8.323738098144531, + "log_odds_ratio": -0.012034624814987183, + "logits/chosen": -0.8074311017990112, + "logits/rejected": -0.8693141937255859, + "logps/chosen": -0.05786255747079849, + "logps/rejected": -1.7432314157485962, + "loss": 2.1424, + "nll_loss": 0.5343984961509705, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0057862563990056515, + "rewards/margins": 0.16853688657283783, + "rewards/rejected": -0.17432314157485962, + "step": 3612 + }, + { + "epoch": 2.4986168741355463, + "grad_norm": 8.100503921508789, + "learning_rate": 4.167435069924697e-05, + "log_odds_chosen": 6.446010112762451, + "log_odds_ratio": -0.12480390071868896, + "logits/chosen": -0.4934987425804138, + "logits/rejected": -0.5425033569335938, + "logps/chosen": -0.045704033225774765, + "logps/rejected": -1.0807603597640991, + "loss": 2.082, + "nll_loss": 0.5080088973045349, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004570403136312962, + "rewards/margins": 0.10350564122200012, + "rewards/rejected": -0.10807604342699051, + "step": 3613 + }, + { + "epoch": 2.499308437067773, + "grad_norm": 9.035955429077148, + "learning_rate": 4.167050868295681e-05, + "log_odds_chosen": 5.936398983001709, + "log_odds_ratio": -0.4109426736831665, + "logits/chosen": -0.48561152815818787, + "logits/rejected": -0.5519176721572876, + "logps/chosen": -0.058604124933481216, + "logps/rejected": -1.421567678451538, + "loss": 2.1311, + "nll_loss": 0.49168580770492554, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005860412493348122, + "rewards/margins": 0.1362963616847992, + "rewards/rejected": -0.14215677976608276, + "step": 3614 + }, + { + "epoch": 2.5, + "grad_norm": 9.259349822998047, + "learning_rate": 4.166666666666667e-05, + "log_odds_chosen": 4.857237815856934, + "log_odds_ratio": -0.47869187593460083, + "logits/chosen": -0.548782467842102, + "logits/rejected": -0.5626112818717957, + "logps/chosen": -0.06998100131750107, + "logps/rejected": -0.7982571125030518, + "loss": 2.3702, + "nll_loss": 0.5446842908859253, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.006998100318014622, + "rewards/margins": 0.07282762229442596, + "rewards/rejected": -0.07982571423053741, + "step": 3615 + }, + { + "epoch": 2.500691562932227, + "grad_norm": 11.092930793762207, + "learning_rate": 4.166282465037652e-05, + "log_odds_chosen": 7.3806047439575195, + "log_odds_ratio": -0.007463864050805569, + "logits/chosen": -0.9096285104751587, + "logits/rejected": -0.9700690507888794, + "logps/chosen": -0.013421340845525265, + "logps/rejected": -1.608832597732544, + "loss": 2.8723, + "nll_loss": 0.7173298597335815, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013421342009678483, + "rewards/margins": 0.15954113006591797, + "rewards/rejected": -0.16088326275348663, + "step": 3616 + }, + { + "epoch": 2.5013831258644537, + "grad_norm": 6.884032726287842, + "learning_rate": 4.165898263408637e-05, + "log_odds_chosen": 8.47869873046875, + "log_odds_ratio": -0.006416505668312311, + "logits/chosen": -0.5534612536430359, + "logits/rejected": -0.5722460150718689, + "logps/chosen": -0.003985046874731779, + "logps/rejected": -1.439831018447876, + "loss": 2.0179, + "nll_loss": 0.5038344860076904, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000398504693293944, + "rewards/margins": 0.14358460903167725, + "rewards/rejected": -0.14398311078548431, + "step": 3617 + }, + { + "epoch": 2.5020746887966805, + "grad_norm": 6.384535789489746, + "learning_rate": 4.165514061779622e-05, + "log_odds_chosen": 8.993051528930664, + "log_odds_ratio": -0.0003727722796611488, + "logits/chosen": -0.7564731240272522, + "logits/rejected": -0.7366716265678406, + "logps/chosen": -0.0005397037602961063, + "logps/rejected": -1.3181127309799194, + "loss": 2.341, + "nll_loss": 0.5852185487747192, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3970376029610634e-05, + "rewards/margins": 0.13175730407238007, + "rewards/rejected": -0.13181129097938538, + "step": 3618 + }, + { + "epoch": 2.5027662517289073, + "grad_norm": 7.561714172363281, + "learning_rate": 4.1651298601506075e-05, + "log_odds_chosen": 7.267178058624268, + "log_odds_ratio": -0.003275883849710226, + "logits/chosen": -0.7403993010520935, + "logits/rejected": -0.7949719429016113, + "logps/chosen": -0.00556724751368165, + "logps/rejected": -1.3268835544586182, + "loss": 2.1509, + "nll_loss": 0.5374022722244263, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005567247862927616, + "rewards/margins": 0.1321316510438919, + "rewards/rejected": -0.13268837332725525, + "step": 3619 + }, + { + "epoch": 2.503457814661134, + "grad_norm": 7.276869297027588, + "learning_rate": 4.164745658521592e-05, + "log_odds_chosen": 9.333890914916992, + "log_odds_ratio": -0.0008068106253631413, + "logits/chosen": -0.6412614583969116, + "logits/rejected": -0.6887497901916504, + "logps/chosen": -0.0008464233251288533, + "logps/rejected": -1.1573611497879028, + "loss": 1.6581, + "nll_loss": 0.41444694995880127, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.464233542326838e-05, + "rewards/margins": 0.11565147340297699, + "rewards/rejected": -0.11573611199855804, + "step": 3620 + }, + { + "epoch": 2.504149377593361, + "grad_norm": 6.960307598114014, + "learning_rate": 4.164361456892577e-05, + "log_odds_chosen": 6.703517913818359, + "log_odds_ratio": -0.022511044517159462, + "logits/chosen": -0.7467190623283386, + "logits/rejected": -0.7689237594604492, + "logps/chosen": -0.014704996719956398, + "logps/rejected": -1.5688300132751465, + "loss": 2.1581, + "nll_loss": 0.5372787714004517, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001470499555580318, + "rewards/margins": 0.155412495136261, + "rewards/rejected": -0.15688300132751465, + "step": 3621 + }, + { + "epoch": 2.504840940525588, + "grad_norm": 8.569221496582031, + "learning_rate": 4.1639772552635626e-05, + "log_odds_chosen": 6.749528884887695, + "log_odds_ratio": -0.12193821370601654, + "logits/chosen": -0.2547239065170288, + "logits/rejected": -0.34872543811798096, + "logps/chosen": -0.04174911230802536, + "logps/rejected": -1.5677011013031006, + "loss": 2.2557, + "nll_loss": 0.5517250895500183, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004174911882728338, + "rewards/margins": 0.15259522199630737, + "rewards/rejected": -0.15677013993263245, + "step": 3622 + }, + { + "epoch": 2.5055325034578146, + "grad_norm": 7.080369472503662, + "learning_rate": 4.163593053634547e-05, + "log_odds_chosen": 7.612618923187256, + "log_odds_ratio": -0.0030858798418194056, + "logits/chosen": -0.45897969603538513, + "logits/rejected": -0.5010754466056824, + "logps/chosen": -0.03506157547235489, + "logps/rejected": -1.5567389726638794, + "loss": 2.1821, + "nll_loss": 0.5452094078063965, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003506158012896776, + "rewards/margins": 0.15216773748397827, + "rewards/rejected": -0.15567392110824585, + "step": 3623 + }, + { + "epoch": 2.5062240663900415, + "grad_norm": 14.771923065185547, + "learning_rate": 4.163208852005533e-05, + "log_odds_chosen": 7.967820167541504, + "log_odds_ratio": -0.002373651135712862, + "logits/chosen": -0.7408491373062134, + "logits/rejected": -0.8213186264038086, + "logps/chosen": -0.005303661338984966, + "logps/rejected": -1.6401475667953491, + "loss": 3.355, + "nll_loss": 0.8385175466537476, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005303661455400288, + "rewards/margins": 0.1634843945503235, + "rewards/rejected": -0.1640147566795349, + "step": 3624 + }, + { + "epoch": 2.5069156293222683, + "grad_norm": 5.273161888122559, + "learning_rate": 4.1628246503765176e-05, + "log_odds_chosen": 6.562053680419922, + "log_odds_ratio": -0.06032273545861244, + "logits/chosen": -0.3231491446495056, + "logits/rejected": -0.3876674771308899, + "logps/chosen": -0.023305881768465042, + "logps/rejected": -0.8332839012145996, + "loss": 1.8313, + "nll_loss": 0.45178982615470886, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0023305879440158606, + "rewards/margins": 0.08099780976772308, + "rewards/rejected": -0.08332839608192444, + "step": 3625 + }, + { + "epoch": 2.507607192254495, + "grad_norm": 9.690799713134766, + "learning_rate": 4.162440448747503e-05, + "log_odds_chosen": 6.092679977416992, + "log_odds_ratio": -0.023633258417248726, + "logits/chosen": -0.6088852882385254, + "logits/rejected": -0.7097447514533997, + "logps/chosen": -0.05217176675796509, + "logps/rejected": -1.6721185445785522, + "loss": 2.3207, + "nll_loss": 0.5778143405914307, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005217176862061024, + "rewards/margins": 0.16199468076229095, + "rewards/rejected": -0.1672118604183197, + "step": 3626 + }, + { + "epoch": 2.508298755186722, + "grad_norm": 9.050857543945312, + "learning_rate": 4.162056247118488e-05, + "log_odds_chosen": 8.486778259277344, + "log_odds_ratio": -0.001319223316386342, + "logits/chosen": -0.47137826681137085, + "logits/rejected": -0.5116744041442871, + "logps/chosen": -0.006257123313844204, + "logps/rejected": -1.4125785827636719, + "loss": 2.1905, + "nll_loss": 0.547484278678894, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006257123895920813, + "rewards/margins": 0.14063213765621185, + "rewards/rejected": -0.1412578523159027, + "step": 3627 + }, + { + "epoch": 2.508990318118949, + "grad_norm": 7.497647285461426, + "learning_rate": 4.1616720454894733e-05, + "log_odds_chosen": 6.012910842895508, + "log_odds_ratio": -0.14136555790901184, + "logits/chosen": -0.2557965815067291, + "logits/rejected": -0.3421425521373749, + "logps/chosen": -0.026772573590278625, + "logps/rejected": -1.0495741367340088, + "loss": 2.4656, + "nll_loss": 0.6022545099258423, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0026772571727633476, + "rewards/margins": 0.1022801548242569, + "rewards/rejected": -0.10495741665363312, + "step": 3628 + }, + { + "epoch": 2.5096818810511756, + "grad_norm": 6.901090145111084, + "learning_rate": 4.161287843860458e-05, + "log_odds_chosen": 6.562984466552734, + "log_odds_ratio": -0.045680299401283264, + "logits/chosen": -0.601599931716919, + "logits/rejected": -0.5920424461364746, + "logps/chosen": -0.01585603505373001, + "logps/rejected": -1.0223904848098755, + "loss": 1.7887, + "nll_loss": 0.4426080584526062, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015856034588068724, + "rewards/margins": 0.10065343976020813, + "rewards/rejected": -0.10223904997110367, + "step": 3629 + }, + { + "epoch": 2.5103734439834025, + "grad_norm": 5.661782741546631, + "learning_rate": 4.160903642231443e-05, + "log_odds_chosen": 7.5030975341796875, + "log_odds_ratio": -0.023007739335298538, + "logits/chosen": -0.5051695704460144, + "logits/rejected": -0.5527991652488708, + "logps/chosen": -0.01834404654800892, + "logps/rejected": -1.1878063678741455, + "loss": 1.4083, + "nll_loss": 0.3497661054134369, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018344044219702482, + "rewards/margins": 0.11694624274969101, + "rewards/rejected": -0.11878064274787903, + "step": 3630 + }, + { + "epoch": 2.5110650069156293, + "grad_norm": 6.964871883392334, + "learning_rate": 4.1605194406024284e-05, + "log_odds_chosen": 7.265258312225342, + "log_odds_ratio": -0.052114978432655334, + "logits/chosen": -0.512820839881897, + "logits/rejected": -0.5915680527687073, + "logps/chosen": -0.03276847302913666, + "logps/rejected": -1.5541868209838867, + "loss": 1.5971, + "nll_loss": 0.3940580487251282, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0032768475357443094, + "rewards/margins": 0.15214183926582336, + "rewards/rejected": -0.15541867911815643, + "step": 3631 + }, + { + "epoch": 2.511756569847856, + "grad_norm": 6.527342796325684, + "learning_rate": 4.160135238973413e-05, + "log_odds_chosen": 8.185376167297363, + "log_odds_ratio": -0.0033926228061318398, + "logits/chosen": -0.7429603338241577, + "logits/rejected": -0.7161184549331665, + "logps/chosen": -0.01603500172495842, + "logps/rejected": -1.4270488023757935, + "loss": 1.7159, + "nll_loss": 0.4286254346370697, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016035002190619707, + "rewards/margins": 0.1411013901233673, + "rewards/rejected": -0.14270488917827606, + "step": 3632 + }, + { + "epoch": 2.512448132780083, + "grad_norm": 10.357183456420898, + "learning_rate": 4.159751037344399e-05, + "log_odds_chosen": 8.630908966064453, + "log_odds_ratio": -0.000405243132263422, + "logits/chosen": -0.7257434725761414, + "logits/rejected": -0.8590246438980103, + "logps/chosen": -0.0009773027850314975, + "logps/rejected": -1.3986811637878418, + "loss": 2.2534, + "nll_loss": 0.5633000731468201, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.773028432391584e-05, + "rewards/margins": 0.13977038860321045, + "rewards/rejected": -0.1398681253194809, + "step": 3633 + }, + { + "epoch": 2.5131396957123098, + "grad_norm": 12.191122055053711, + "learning_rate": 4.1593668357153835e-05, + "log_odds_chosen": 6.413578987121582, + "log_odds_ratio": -0.006660694722086191, + "logits/chosen": -0.7182711958885193, + "logits/rejected": -0.7781530022621155, + "logps/chosen": -0.01736205443739891, + "logps/rejected": -1.2246630191802979, + "loss": 3.0462, + "nll_loss": 0.7608934640884399, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017362055368721485, + "rewards/margins": 0.12073009461164474, + "rewards/rejected": -0.12246629595756531, + "step": 3634 + }, + { + "epoch": 2.5138312586445366, + "grad_norm": 6.257330417633057, + "learning_rate": 4.158982634086369e-05, + "log_odds_chosen": 5.112867832183838, + "log_odds_ratio": -0.13929276168346405, + "logits/chosen": -0.5829563140869141, + "logits/rejected": -0.5793777704238892, + "logps/chosen": -0.047521110624074936, + "logps/rejected": -0.9819206595420837, + "loss": 2.4791, + "nll_loss": 0.605844259262085, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004752111155539751, + "rewards/margins": 0.09343995153903961, + "rewards/rejected": -0.09819206595420837, + "step": 3635 + }, + { + "epoch": 2.5145228215767634, + "grad_norm": 19.461641311645508, + "learning_rate": 4.158598432457354e-05, + "log_odds_chosen": 7.858949184417725, + "log_odds_ratio": -0.027700483798980713, + "logits/chosen": -0.5611499547958374, + "logits/rejected": -0.6735534071922302, + "logps/chosen": -0.030950110405683517, + "logps/rejected": -1.3868426084518433, + "loss": 1.8722, + "nll_loss": 0.4652819037437439, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003095010994002223, + "rewards/margins": 0.13558925688266754, + "rewards/rejected": -0.13868427276611328, + "step": 3636 + }, + { + "epoch": 2.5152143845089903, + "grad_norm": 4.125360488891602, + "learning_rate": 4.158214230828339e-05, + "log_odds_chosen": 8.624306678771973, + "log_odds_ratio": -0.0006575646111741662, + "logits/chosen": -0.2748103737831116, + "logits/rejected": -0.2165181040763855, + "logps/chosen": -0.0003640328941401094, + "logps/rejected": -0.9175252914428711, + "loss": 1.2471, + "nll_loss": 0.3117210865020752, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.640328941401094e-05, + "rewards/margins": 0.09171614050865173, + "rewards/rejected": -0.0917525440454483, + "step": 3637 + }, + { + "epoch": 2.515905947441217, + "grad_norm": 7.473677158355713, + "learning_rate": 4.157830029199324e-05, + "log_odds_chosen": 6.285887718200684, + "log_odds_ratio": -0.08532913774251938, + "logits/chosen": -0.5009844303131104, + "logits/rejected": -0.5384580492973328, + "logps/chosen": -0.02979310043156147, + "logps/rejected": -0.9416558742523193, + "loss": 2.2114, + "nll_loss": 0.5443115234375, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0029793099965900183, + "rewards/margins": 0.0911862775683403, + "rewards/rejected": -0.09416559338569641, + "step": 3638 + }, + { + "epoch": 2.516597510373444, + "grad_norm": 12.419747352600098, + "learning_rate": 4.157445827570309e-05, + "log_odds_chosen": 8.659000396728516, + "log_odds_ratio": -0.048959698528051376, + "logits/chosen": -0.46966612339019775, + "logits/rejected": -0.5531617403030396, + "logps/chosen": -0.008436794392764568, + "logps/rejected": -1.7925329208374023, + "loss": 2.0447, + "nll_loss": 0.5062693953514099, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008436795906163752, + "rewards/margins": 0.1784096211194992, + "rewards/rejected": -0.17925329506397247, + "step": 3639 + }, + { + "epoch": 2.5172890733056708, + "grad_norm": 8.426016807556152, + "learning_rate": 4.157061625941294e-05, + "log_odds_chosen": 8.495464324951172, + "log_odds_ratio": -0.03560282662510872, + "logits/chosen": -0.8380284309387207, + "logits/rejected": -0.9043651819229126, + "logps/chosen": -0.010453960858285427, + "logps/rejected": -1.4786534309387207, + "loss": 1.5629, + "nll_loss": 0.38715261220932007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010453959694132209, + "rewards/margins": 0.14681994915008545, + "rewards/rejected": -0.14786535501480103, + "step": 3640 + }, + { + "epoch": 2.5179806362378976, + "grad_norm": 5.699162483215332, + "learning_rate": 4.156677424312279e-05, + "log_odds_chosen": 7.089378356933594, + "log_odds_ratio": -0.006637131329625845, + "logits/chosen": -0.15468713641166687, + "logits/rejected": -0.1885947287082672, + "logps/chosen": -0.06389021128416061, + "logps/rejected": -1.86360502243042, + "loss": 2.1492, + "nll_loss": 0.5366443991661072, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006389021407812834, + "rewards/margins": 0.17997147142887115, + "rewards/rejected": -0.18636049330234528, + "step": 3641 + }, + { + "epoch": 2.5186721991701244, + "grad_norm": 8.707549095153809, + "learning_rate": 4.156293222683265e-05, + "log_odds_chosen": 7.686588287353516, + "log_odds_ratio": -0.003062628209590912, + "logits/chosen": -0.936684787273407, + "logits/rejected": -1.0674530267715454, + "logps/chosen": -0.007867519743740559, + "logps/rejected": -1.4413552284240723, + "loss": 2.7943, + "nll_loss": 0.6982684135437012, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007867519743740559, + "rewards/margins": 0.14334876835346222, + "rewards/rejected": -0.14413553476333618, + "step": 3642 + }, + { + "epoch": 2.5193637621023512, + "grad_norm": 7.398531913757324, + "learning_rate": 4.155909021054249e-05, + "log_odds_chosen": 6.667263507843018, + "log_odds_ratio": -0.12674516439437866, + "logits/chosen": -0.4337894320487976, + "logits/rejected": -0.45456454157829285, + "logps/chosen": -0.04140767455101013, + "logps/rejected": -1.1496739387512207, + "loss": 2.1738, + "nll_loss": 0.5307748913764954, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004140767734497786, + "rewards/margins": 0.11082662642002106, + "rewards/rejected": -0.11496739089488983, + "step": 3643 + }, + { + "epoch": 2.520055325034578, + "grad_norm": 8.9595365524292, + "learning_rate": 4.1555248194252345e-05, + "log_odds_chosen": 8.366384506225586, + "log_odds_ratio": -0.007697094231843948, + "logits/chosen": -0.702767550945282, + "logits/rejected": -0.7329120635986328, + "logps/chosen": -0.007878115400671959, + "logps/rejected": -1.8763498067855835, + "loss": 1.952, + "nll_loss": 0.48721975088119507, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007878115284256637, + "rewards/margins": 0.18684718012809753, + "rewards/rejected": -0.18763497471809387, + "step": 3644 + }, + { + "epoch": 2.520746887966805, + "grad_norm": 9.695239067077637, + "learning_rate": 4.15514061779622e-05, + "log_odds_chosen": 4.558919906616211, + "log_odds_ratio": -0.2631775140762329, + "logits/chosen": -0.6078516244888306, + "logits/rejected": -0.5811923742294312, + "logps/chosen": -0.047537729144096375, + "logps/rejected": -0.8359494805335999, + "loss": 2.5318, + "nll_loss": 0.6066234111785889, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00475377356633544, + "rewards/margins": 0.0788411796092987, + "rewards/rejected": -0.08359494805335999, + "step": 3645 + }, + { + "epoch": 2.5214384508990317, + "grad_norm": 8.223495483398438, + "learning_rate": 4.154756416167205e-05, + "log_odds_chosen": 9.086836814880371, + "log_odds_ratio": -0.0041122944094240665, + "logits/chosen": -0.5973942875862122, + "logits/rejected": -0.6405963897705078, + "logps/chosen": -0.008441880345344543, + "logps/rejected": -1.617875337600708, + "loss": 2.6734, + "nll_loss": 0.6679355502128601, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008441880927421153, + "rewards/margins": 0.16094335913658142, + "rewards/rejected": -0.16178753972053528, + "step": 3646 + }, + { + "epoch": 2.5221300138312586, + "grad_norm": 4.186194896697998, + "learning_rate": 4.1543722145381896e-05, + "log_odds_chosen": 7.946590423583984, + "log_odds_ratio": -0.04126410558819771, + "logits/chosen": -0.6535416841506958, + "logits/rejected": -0.6662824153900146, + "logps/chosen": -0.010252664797008038, + "logps/rejected": -1.1775137186050415, + "loss": 2.225, + "nll_loss": 0.552130937576294, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010252664797008038, + "rewards/margins": 0.1167261004447937, + "rewards/rejected": -0.11775137484073639, + "step": 3647 + }, + { + "epoch": 2.5228215767634854, + "grad_norm": 7.053506851196289, + "learning_rate": 4.153988012909175e-05, + "log_odds_chosen": 8.013751983642578, + "log_odds_ratio": -0.0022309324704110622, + "logits/chosen": -0.44436901807785034, + "logits/rejected": -0.5080931186676025, + "logps/chosen": -0.048563968390226364, + "logps/rejected": -1.8365061283111572, + "loss": 2.5409, + "nll_loss": 0.6350106596946716, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004856396932154894, + "rewards/margins": 0.1787942349910736, + "rewards/rejected": -0.18365061283111572, + "step": 3648 + }, + { + "epoch": 2.5235131396957122, + "grad_norm": 8.54772663116455, + "learning_rate": 4.15360381128016e-05, + "log_odds_chosen": 8.504034996032715, + "log_odds_ratio": -0.04370781406760216, + "logits/chosen": -0.526552677154541, + "logits/rejected": -0.5632598400115967, + "logps/chosen": -0.014592758379876614, + "logps/rejected": -1.5742847919464111, + "loss": 2.3825, + "nll_loss": 0.5912644863128662, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014592758379876614, + "rewards/margins": 0.15596920251846313, + "rewards/rejected": -0.15742847323417664, + "step": 3649 + }, + { + "epoch": 2.524204702627939, + "grad_norm": 7.6013593673706055, + "learning_rate": 4.1532196096511447e-05, + "log_odds_chosen": 7.224681854248047, + "log_odds_ratio": -0.053471360355615616, + "logits/chosen": -0.7720454335212708, + "logits/rejected": -0.8293158411979675, + "logps/chosen": -0.014203069731593132, + "logps/rejected": -1.1382806301116943, + "loss": 2.4617, + "nll_loss": 0.6100710034370422, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014203068567439914, + "rewards/margins": 0.11240774393081665, + "rewards/rejected": -0.11382806301116943, + "step": 3650 + }, + { + "epoch": 2.524896265560166, + "grad_norm": 5.8260955810546875, + "learning_rate": 4.1528354080221306e-05, + "log_odds_chosen": 4.697497367858887, + "log_odds_ratio": -0.16557545959949493, + "logits/chosen": -0.7771276831626892, + "logits/rejected": -0.7690300941467285, + "logps/chosen": -0.05061003565788269, + "logps/rejected": -1.3822760581970215, + "loss": 2.6081, + "nll_loss": 0.6354620456695557, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005061003845185041, + "rewards/margins": 0.1331666111946106, + "rewards/rejected": -0.13822761178016663, + "step": 3651 + }, + { + "epoch": 2.5255878284923927, + "grad_norm": 10.953791618347168, + "learning_rate": 4.152451206393115e-05, + "log_odds_chosen": 9.335792541503906, + "log_odds_ratio": -0.0005506311426870525, + "logits/chosen": -0.826223611831665, + "logits/rejected": -0.9384573101997375, + "logps/chosen": -0.000618268852122128, + "logps/rejected": -1.434199571609497, + "loss": 1.8864, + "nll_loss": 0.4715338349342346, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.182688230182976e-05, + "rewards/margins": 0.14335814118385315, + "rewards/rejected": -0.14341996610164642, + "step": 3652 + }, + { + "epoch": 2.5262793914246195, + "grad_norm": 6.306936740875244, + "learning_rate": 4.1520670047641004e-05, + "log_odds_chosen": 6.237020969390869, + "log_odds_ratio": -0.03519082069396973, + "logits/chosen": -0.5907887816429138, + "logits/rejected": -0.6402274966239929, + "logps/chosen": -0.012569701299071312, + "logps/rejected": -1.149125337600708, + "loss": 2.4935, + "nll_loss": 0.6198525428771973, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012569701066240668, + "rewards/margins": 0.11365556716918945, + "rewards/rejected": -0.11491253972053528, + "step": 3653 + }, + { + "epoch": 2.5269709543568464, + "grad_norm": 9.48086929321289, + "learning_rate": 4.1516828031350856e-05, + "log_odds_chosen": 9.682076454162598, + "log_odds_ratio": -0.00010647479211911559, + "logits/chosen": -0.76712965965271, + "logits/rejected": -0.744515597820282, + "logps/chosen": -0.00026008131681010127, + "logps/rejected": -1.4440491199493408, + "loss": 1.9111, + "nll_loss": 0.4777595102787018, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.600813240860589e-05, + "rewards/margins": 0.1443789005279541, + "rewards/rejected": -0.14440491795539856, + "step": 3654 + }, + { + "epoch": 2.527662517289073, + "grad_norm": 5.191808223724365, + "learning_rate": 4.151298601506071e-05, + "log_odds_chosen": 6.734824180603027, + "log_odds_ratio": -0.12821802496910095, + "logits/chosen": -0.4947073459625244, + "logits/rejected": -0.5462866425514221, + "logps/chosen": -0.060167863965034485, + "logps/rejected": -1.258905291557312, + "loss": 1.9003, + "nll_loss": 0.4622592628002167, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0060167862102389336, + "rewards/margins": 0.11987375468015671, + "rewards/rejected": -0.12589053809642792, + "step": 3655 + }, + { + "epoch": 2.5283540802213, + "grad_norm": 14.005337715148926, + "learning_rate": 4.1509143998770554e-05, + "log_odds_chosen": 6.469050407409668, + "log_odds_ratio": -0.2855418622493744, + "logits/chosen": -0.3703860938549042, + "logits/rejected": -0.44764846563339233, + "logps/chosen": -0.027607586234807968, + "logps/rejected": -0.7636227607727051, + "loss": 3.0046, + "nll_loss": 0.7226047515869141, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.002760758623480797, + "rewards/margins": 0.07360151410102844, + "rewards/rejected": -0.07636226713657379, + "step": 3656 + }, + { + "epoch": 2.529045643153527, + "grad_norm": 10.278541564941406, + "learning_rate": 4.150530198248041e-05, + "log_odds_chosen": 7.559301853179932, + "log_odds_ratio": -0.005141490139067173, + "logits/chosen": -0.977824866771698, + "logits/rejected": -1.0137873888015747, + "logps/chosen": -0.044321730732917786, + "logps/rejected": -1.5173851251602173, + "loss": 3.0781, + "nll_loss": 0.7690147757530212, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004432173445820808, + "rewards/margins": 0.14730635285377502, + "rewards/rejected": -0.1517385095357895, + "step": 3657 + }, + { + "epoch": 2.5297372060857537, + "grad_norm": 7.183173656463623, + "learning_rate": 4.150145996619026e-05, + "log_odds_chosen": 5.152936935424805, + "log_odds_ratio": -0.27972304821014404, + "logits/chosen": -0.7417833209037781, + "logits/rejected": -0.7697551250457764, + "logps/chosen": -0.04818735271692276, + "logps/rejected": -0.8900142908096313, + "loss": 2.265, + "nll_loss": 0.5382810831069946, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.004818734712898731, + "rewards/margins": 0.08418269455432892, + "rewards/rejected": -0.08900142461061478, + "step": 3658 + }, + { + "epoch": 2.5304287690179805, + "grad_norm": 6.066020965576172, + "learning_rate": 4.1497617949900105e-05, + "log_odds_chosen": 9.213735580444336, + "log_odds_ratio": -0.0029404088854789734, + "logits/chosen": -0.8174360394477844, + "logits/rejected": -0.8695136308670044, + "logps/chosen": -0.005035539623349905, + "logps/rejected": -1.9761308431625366, + "loss": 1.7436, + "nll_loss": 0.4355984330177307, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005035539506934583, + "rewards/margins": 0.19710955023765564, + "rewards/rejected": -0.19761309027671814, + "step": 3659 + }, + { + "epoch": 2.5311203319502074, + "grad_norm": 8.873180389404297, + "learning_rate": 4.1493775933609964e-05, + "log_odds_chosen": 7.810351371765137, + "log_odds_ratio": -0.004365085158497095, + "logits/chosen": -0.9499499797821045, + "logits/rejected": -0.9649522304534912, + "logps/chosen": -0.0038736488204449415, + "logps/rejected": -1.2680219411849976, + "loss": 2.5343, + "nll_loss": 0.6331478357315063, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00038736488204449415, + "rewards/margins": 0.12641483545303345, + "rewards/rejected": -0.12680219113826752, + "step": 3660 + }, + { + "epoch": 2.531811894882434, + "grad_norm": 10.1052885055542, + "learning_rate": 4.148993391731981e-05, + "log_odds_chosen": 6.543613433837891, + "log_odds_ratio": -0.043987736105918884, + "logits/chosen": -0.9714975953102112, + "logits/rejected": -0.9878481030464172, + "logps/chosen": -0.024164706468582153, + "logps/rejected": -1.5076501369476318, + "loss": 2.6552, + "nll_loss": 0.6593921184539795, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002416470320895314, + "rewards/margins": 0.14834854006767273, + "rewards/rejected": -0.15076500177383423, + "step": 3661 + }, + { + "epoch": 2.532503457814661, + "grad_norm": 8.464655876159668, + "learning_rate": 4.148609190102966e-05, + "log_odds_chosen": 7.593095302581787, + "log_odds_ratio": -0.012364407069981098, + "logits/chosen": -0.8906121253967285, + "logits/rejected": -0.9512820243835449, + "logps/chosen": -0.01501818560063839, + "logps/rejected": -1.4770665168762207, + "loss": 2.2662, + "nll_loss": 0.5653175711631775, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015018185367807746, + "rewards/margins": 0.14620482921600342, + "rewards/rejected": -0.14770665764808655, + "step": 3662 + }, + { + "epoch": 2.533195020746888, + "grad_norm": 8.537209510803223, + "learning_rate": 4.1482249884739515e-05, + "log_odds_chosen": 7.7734456062316895, + "log_odds_ratio": -0.010539239272475243, + "logits/chosen": -0.37151771783828735, + "logits/rejected": -0.40873855352401733, + "logps/chosen": -0.026093240827322006, + "logps/rejected": -1.6680834293365479, + "loss": 2.0097, + "nll_loss": 0.5013793110847473, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0026093239430338144, + "rewards/margins": 0.16419902443885803, + "rewards/rejected": -0.1668083518743515, + "step": 3663 + }, + { + "epoch": 2.5338865836791147, + "grad_norm": 9.18942928314209, + "learning_rate": 4.147840786844937e-05, + "log_odds_chosen": 8.71163558959961, + "log_odds_ratio": -0.0007794310804456472, + "logits/chosen": -0.5640786290168762, + "logits/rejected": -0.6506339311599731, + "logps/chosen": -0.0010314120445400476, + "logps/rejected": -1.8243160247802734, + "loss": 2.0071, + "nll_loss": 0.5017048120498657, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010314121027477086, + "rewards/margins": 0.182328462600708, + "rewards/rejected": -0.18243160843849182, + "step": 3664 + }, + { + "epoch": 2.5345781466113415, + "grad_norm": 6.1480584144592285, + "learning_rate": 4.147456585215921e-05, + "log_odds_chosen": 8.104879379272461, + "log_odds_ratio": -0.002435260685160756, + "logits/chosen": -0.5540738105773926, + "logits/rejected": -0.6117613911628723, + "logps/chosen": -0.0028951477725058794, + "logps/rejected": -1.5245311260223389, + "loss": 1.6881, + "nll_loss": 0.42178869247436523, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002895148063544184, + "rewards/margins": 0.15216359496116638, + "rewards/rejected": -0.15245312452316284, + "step": 3665 + }, + { + "epoch": 2.5352697095435683, + "grad_norm": 14.560870170593262, + "learning_rate": 4.1470723835869065e-05, + "log_odds_chosen": 6.843084812164307, + "log_odds_ratio": -0.24995103478431702, + "logits/chosen": -0.35342735052108765, + "logits/rejected": -0.46956032514572144, + "logps/chosen": -0.032915253192186356, + "logps/rejected": -1.3369067907333374, + "loss": 3.4494, + "nll_loss": 0.8373644948005676, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003291525412350893, + "rewards/margins": 0.13039915263652802, + "rewards/rejected": -0.13369068503379822, + "step": 3666 + }, + { + "epoch": 2.535961272475795, + "grad_norm": 6.680951118469238, + "learning_rate": 4.146688181957892e-05, + "log_odds_chosen": 7.095094680786133, + "log_odds_ratio": -0.0850469321012497, + "logits/chosen": -0.43845391273498535, + "logits/rejected": -0.48212161660194397, + "logps/chosen": -0.024386655539274216, + "logps/rejected": -1.4623682498931885, + "loss": 2.0037, + "nll_loss": 0.4924285411834717, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024386656004935503, + "rewards/margins": 0.14379817247390747, + "rewards/rejected": -0.1462368369102478, + "step": 3667 + }, + { + "epoch": 2.536652835408022, + "grad_norm": 9.7996187210083, + "learning_rate": 4.1463039803288763e-05, + "log_odds_chosen": 8.075040817260742, + "log_odds_ratio": -0.004537233617156744, + "logits/chosen": -0.26644569635391235, + "logits/rejected": -0.3150561451911926, + "logps/chosen": -0.0036520101130008698, + "logps/rejected": -1.3317700624465942, + "loss": 2.0921, + "nll_loss": 0.5225600600242615, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003652009763754904, + "rewards/margins": 0.1328117996454239, + "rewards/rejected": -0.1331769973039627, + "step": 3668 + }, + { + "epoch": 2.537344398340249, + "grad_norm": 8.300936698913574, + "learning_rate": 4.145919778699862e-05, + "log_odds_chosen": 8.642348289489746, + "log_odds_ratio": -0.013115583918988705, + "logits/chosen": -0.6805007457733154, + "logits/rejected": -0.6964966058731079, + "logps/chosen": -0.005282273981720209, + "logps/rejected": -2.066359281539917, + "loss": 2.5309, + "nll_loss": 0.631418764591217, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005282273632474244, + "rewards/margins": 0.2061077058315277, + "rewards/rejected": -0.2066359519958496, + "step": 3669 + }, + { + "epoch": 2.5380359612724757, + "grad_norm": 7.85097074508667, + "learning_rate": 4.145535577070847e-05, + "log_odds_chosen": 7.3196611404418945, + "log_odds_ratio": -0.0034911674447357655, + "logits/chosen": -0.6438291072845459, + "logits/rejected": -0.6845942139625549, + "logps/chosen": -0.025680480524897575, + "logps/rejected": -1.8952102661132812, + "loss": 2.6675, + "nll_loss": 0.6665199995040894, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0025680481921881437, + "rewards/margins": 0.18695297837257385, + "rewards/rejected": -0.18952102959156036, + "step": 3670 + }, + { + "epoch": 2.5387275242047025, + "grad_norm": 11.16006088256836, + "learning_rate": 4.145151375441832e-05, + "log_odds_chosen": 6.697037696838379, + "log_odds_ratio": -0.09762268513441086, + "logits/chosen": -0.33623313903808594, + "logits/rejected": -0.41128477454185486, + "logps/chosen": -0.04758386313915253, + "logps/rejected": -1.1416031122207642, + "loss": 2.5198, + "nll_loss": 0.6201927065849304, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00475838640704751, + "rewards/margins": 0.10940193384885788, + "rewards/rejected": -0.11416031420230865, + "step": 3671 + }, + { + "epoch": 2.5394190871369293, + "grad_norm": 10.982725143432617, + "learning_rate": 4.144767173812817e-05, + "log_odds_chosen": 6.469160079956055, + "log_odds_ratio": -0.0469195693731308, + "logits/chosen": -0.312357634305954, + "logits/rejected": -0.38012784719467163, + "logps/chosen": -0.18392308056354523, + "logps/rejected": -1.900977373123169, + "loss": 2.5113, + "nll_loss": 0.6231358647346497, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.018392309546470642, + "rewards/margins": 0.171705424785614, + "rewards/rejected": -0.19009774923324585, + "step": 3672 + }, + { + "epoch": 2.540110650069156, + "grad_norm": 8.845681190490723, + "learning_rate": 4.1443829721838026e-05, + "log_odds_chosen": 10.247167587280273, + "log_odds_ratio": -0.00015303498366847634, + "logits/chosen": -0.7295843958854675, + "logits/rejected": -0.7623701095581055, + "logps/chosen": -0.00021613975695800036, + "logps/rejected": -1.8121672868728638, + "loss": 2.4817, + "nll_loss": 0.6204196214675903, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1613976059597917e-05, + "rewards/margins": 0.18119511008262634, + "rewards/rejected": -0.18121671676635742, + "step": 3673 + }, + { + "epoch": 2.540802213001383, + "grad_norm": 8.529345512390137, + "learning_rate": 4.143998770554787e-05, + "log_odds_chosen": 7.201008319854736, + "log_odds_ratio": -0.19989247620105743, + "logits/chosen": -0.26101046800613403, + "logits/rejected": -0.27898168563842773, + "logps/chosen": -0.03462667763233185, + "logps/rejected": -1.7183300256729126, + "loss": 2.1496, + "nll_loss": 0.5173985958099365, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003462668042629957, + "rewards/margins": 0.1683703362941742, + "rewards/rejected": -0.17183300852775574, + "step": 3674 + }, + { + "epoch": 2.54149377593361, + "grad_norm": 11.021639823913574, + "learning_rate": 4.1436145689257724e-05, + "log_odds_chosen": 8.368658065795898, + "log_odds_ratio": -0.0014843323733657598, + "logits/chosen": -0.3710412383079529, + "logits/rejected": -0.4252433776855469, + "logps/chosen": -0.002052636817097664, + "logps/rejected": -1.7162036895751953, + "loss": 2.2048, + "nll_loss": 0.5510503053665161, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00020526369917206466, + "rewards/margins": 0.17141512036323547, + "rewards/rejected": -0.17162038385868073, + "step": 3675 + }, + { + "epoch": 2.5421853388658366, + "grad_norm": 7.163419723510742, + "learning_rate": 4.1432303672967576e-05, + "log_odds_chosen": 7.673587799072266, + "log_odds_ratio": -0.13042639195919037, + "logits/chosen": -0.08417253941297531, + "logits/rejected": -0.20888949930667877, + "logps/chosen": -0.022530486807227135, + "logps/rejected": -1.327731728553772, + "loss": 2.3599, + "nll_loss": 0.5769286155700684, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0022530488204210997, + "rewards/margins": 0.13052012026309967, + "rewards/rejected": -0.13277317583560944, + "step": 3676 + }, + { + "epoch": 2.5428769017980635, + "grad_norm": 6.752707481384277, + "learning_rate": 4.142846165667743e-05, + "log_odds_chosen": 8.717757225036621, + "log_odds_ratio": -0.0008739815093576908, + "logits/chosen": -0.856940507888794, + "logits/rejected": -0.9300462603569031, + "logps/chosen": -0.03579828515648842, + "logps/rejected": -2.1495282649993896, + "loss": 1.8846, + "nll_loss": 0.47106146812438965, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0035798284225165844, + "rewards/margins": 0.2113730013370514, + "rewards/rejected": -0.21495282649993896, + "step": 3677 + }, + { + "epoch": 2.5435684647302903, + "grad_norm": 6.426164627075195, + "learning_rate": 4.142461964038728e-05, + "log_odds_chosen": 9.750266075134277, + "log_odds_ratio": -0.0001426611270289868, + "logits/chosen": -0.3759447932243347, + "logits/rejected": -0.5120629072189331, + "logps/chosen": -0.0004961431259289384, + "logps/rejected": -1.8397492170333862, + "loss": 1.7415, + "nll_loss": 0.4353630244731903, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9614311137702316e-05, + "rewards/margins": 0.1839253008365631, + "rewards/rejected": -0.18397492170333862, + "step": 3678 + }, + { + "epoch": 2.544260027662517, + "grad_norm": 11.520225524902344, + "learning_rate": 4.142077762409713e-05, + "log_odds_chosen": 9.523664474487305, + "log_odds_ratio": -0.00046379820560105145, + "logits/chosen": -0.46259498596191406, + "logits/rejected": -0.5426309108734131, + "logps/chosen": -0.0007358100265264511, + "logps/rejected": -1.7401684522628784, + "loss": 3.2972, + "nll_loss": 0.8242548108100891, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.358100265264511e-05, + "rewards/margins": 0.17394328117370605, + "rewards/rejected": -0.17401686310768127, + "step": 3679 + }, + { + "epoch": 2.544951590594744, + "grad_norm": 6.897475719451904, + "learning_rate": 4.141693560780698e-05, + "log_odds_chosen": 8.941631317138672, + "log_odds_ratio": -0.0003182542568538338, + "logits/chosen": -0.5127269625663757, + "logits/rejected": -0.46197599172592163, + "logps/chosen": -0.0016620360547676682, + "logps/rejected": -1.2729542255401611, + "loss": 2.1741, + "nll_loss": 0.5434816479682922, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016620359383523464, + "rewards/margins": 0.1271292269229889, + "rewards/rejected": -0.12729541957378387, + "step": 3680 + }, + { + "epoch": 2.545643153526971, + "grad_norm": 3.8709726333618164, + "learning_rate": 4.141309359151683e-05, + "log_odds_chosen": 8.852423667907715, + "log_odds_ratio": -0.0007429651450365782, + "logits/chosen": -0.33135154843330383, + "logits/rejected": -0.41605496406555176, + "logps/chosen": -0.006752867251634598, + "logps/rejected": -2.200617551803589, + "loss": 1.7138, + "nll_loss": 0.4283781051635742, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006752866902388632, + "rewards/margins": 0.21938645839691162, + "rewards/rejected": -0.22006173431873322, + "step": 3681 + }, + { + "epoch": 2.5463347164591976, + "grad_norm": 10.108016014099121, + "learning_rate": 4.1409251575226684e-05, + "log_odds_chosen": 7.953339576721191, + "log_odds_ratio": -0.09382897615432739, + "logits/chosen": -0.15474338829517365, + "logits/rejected": -0.23979714512825012, + "logps/chosen": -0.03481658548116684, + "logps/rejected": -1.3647801876068115, + "loss": 2.6875, + "nll_loss": 0.6624833941459656, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0034816591069102287, + "rewards/margins": 0.13299638032913208, + "rewards/rejected": -0.13647803664207458, + "step": 3682 + }, + { + "epoch": 2.5470262793914245, + "grad_norm": 7.43285608291626, + "learning_rate": 4.140540955893653e-05, + "log_odds_chosen": 8.776679992675781, + "log_odds_ratio": -0.0011820968938991427, + "logits/chosen": -0.5200778245925903, + "logits/rejected": -0.6396703720092773, + "logps/chosen": -0.002887851558625698, + "logps/rejected": -1.8419222831726074, + "loss": 2.5257, + "nll_loss": 0.6313114166259766, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002887851442210376, + "rewards/margins": 0.18390345573425293, + "rewards/rejected": -0.1841922402381897, + "step": 3683 + }, + { + "epoch": 2.5477178423236513, + "grad_norm": 5.443092346191406, + "learning_rate": 4.140156754264638e-05, + "log_odds_chosen": 7.036106109619141, + "log_odds_ratio": -0.06073131412267685, + "logits/chosen": -0.4874728322029114, + "logits/rejected": -0.4316939115524292, + "logps/chosen": -0.04359099268913269, + "logps/rejected": -1.3618528842926025, + "loss": 1.7898, + "nll_loss": 0.44136685132980347, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004359099082648754, + "rewards/margins": 0.13182619214057922, + "rewards/rejected": -0.13618530333042145, + "step": 3684 + }, + { + "epoch": 2.548409405255878, + "grad_norm": 6.47907829284668, + "learning_rate": 4.1397725526356235e-05, + "log_odds_chosen": 6.502830505371094, + "log_odds_ratio": -0.054894234985113144, + "logits/chosen": -0.24924349784851074, + "logits/rejected": -0.2819105386734009, + "logps/chosen": -0.025829501450061798, + "logps/rejected": -1.2309153079986572, + "loss": 2.0147, + "nll_loss": 0.49817532300949097, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002582950284704566, + "rewards/margins": 0.1205085888504982, + "rewards/rejected": -0.12309154123067856, + "step": 3685 + }, + { + "epoch": 2.549100968188105, + "grad_norm": 9.771394729614258, + "learning_rate": 4.139388351006609e-05, + "log_odds_chosen": 8.620658874511719, + "log_odds_ratio": -0.0012988585513085127, + "logits/chosen": -0.5891374945640564, + "logits/rejected": -0.6297314763069153, + "logps/chosen": -0.00756434490904212, + "logps/rejected": -1.4627835750579834, + "loss": 2.5286, + "nll_loss": 0.6320264339447021, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007564345141872764, + "rewards/margins": 0.14552193880081177, + "rewards/rejected": -0.14627835154533386, + "step": 3686 + }, + { + "epoch": 2.5497925311203318, + "grad_norm": 10.010108947753906, + "learning_rate": 4.139004149377593e-05, + "log_odds_chosen": 8.177577018737793, + "log_odds_ratio": -0.0021723266690969467, + "logits/chosen": -0.5750257968902588, + "logits/rejected": -0.5584626793861389, + "logps/chosen": -0.0018111247336491942, + "logps/rejected": -1.4841028451919556, + "loss": 2.0266, + "nll_loss": 0.5064324736595154, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018111246754415333, + "rewards/margins": 0.14822916686534882, + "rewards/rejected": -0.14841027557849884, + "step": 3687 + }, + { + "epoch": 2.5504840940525586, + "grad_norm": 8.33242130279541, + "learning_rate": 4.1386199477485785e-05, + "log_odds_chosen": 7.380660533905029, + "log_odds_ratio": -0.07418529689311981, + "logits/chosen": -0.39534804224967957, + "logits/rejected": -0.3996621072292328, + "logps/chosen": -0.019277188926935196, + "logps/rejected": -1.3679413795471191, + "loss": 1.7864, + "nll_loss": 0.4391922354698181, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001927718985825777, + "rewards/margins": 0.13486641645431519, + "rewards/rejected": -0.13679413497447968, + "step": 3688 + }, + { + "epoch": 2.5511756569847854, + "grad_norm": 35.614662170410156, + "learning_rate": 4.138235746119564e-05, + "log_odds_chosen": 6.996004104614258, + "log_odds_ratio": -0.24035514891147614, + "logits/chosen": -0.7962977886199951, + "logits/rejected": -0.8157656788825989, + "logps/chosen": -0.04621091112494469, + "logps/rejected": -0.9972507953643799, + "loss": 2.4286, + "nll_loss": 0.5831174254417419, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004621092230081558, + "rewards/margins": 0.09510399401187897, + "rewards/rejected": -0.09972508251667023, + "step": 3689 + }, + { + "epoch": 2.5518672199170123, + "grad_norm": 10.523552894592285, + "learning_rate": 4.137851544490548e-05, + "log_odds_chosen": 8.526422500610352, + "log_odds_ratio": -0.0025308893527835608, + "logits/chosen": -0.40642938017845154, + "logits/rejected": -0.49298715591430664, + "logps/chosen": -0.05294908583164215, + "logps/rejected": -1.5810902118682861, + "loss": 2.9231, + "nll_loss": 0.7305221557617188, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005294908303767443, + "rewards/margins": 0.152814120054245, + "rewards/rejected": -0.15810903906822205, + "step": 3690 + }, + { + "epoch": 2.552558782849239, + "grad_norm": 9.078492164611816, + "learning_rate": 4.137467342861534e-05, + "log_odds_chosen": 7.632801055908203, + "log_odds_ratio": -0.03201550990343094, + "logits/chosen": -0.6973223686218262, + "logits/rejected": -0.7350947856903076, + "logps/chosen": -0.0043390472419559956, + "logps/rejected": -1.2494264841079712, + "loss": 2.6931, + "nll_loss": 0.6700709462165833, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00043390473001636565, + "rewards/margins": 0.12450873851776123, + "rewards/rejected": -0.12494263797998428, + "step": 3691 + }, + { + "epoch": 2.553250345781466, + "grad_norm": 7.067032337188721, + "learning_rate": 4.137083141232519e-05, + "log_odds_chosen": 8.267839431762695, + "log_odds_ratio": -0.002087076660245657, + "logits/chosen": -0.5631198883056641, + "logits/rejected": -0.5623632073402405, + "logps/chosen": -0.002745155245065689, + "logps/rejected": -1.3989791870117188, + "loss": 1.7896, + "nll_loss": 0.44719675183296204, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002745155361481011, + "rewards/margins": 0.13962340354919434, + "rewards/rejected": -0.1398979127407074, + "step": 3692 + }, + { + "epoch": 2.5539419087136928, + "grad_norm": 5.6397576332092285, + "learning_rate": 4.136698939603504e-05, + "log_odds_chosen": 8.663679122924805, + "log_odds_ratio": -0.0006652399315498769, + "logits/chosen": -0.6536080241203308, + "logits/rejected": -0.6602993607521057, + "logps/chosen": -0.0023707440122962, + "logps/rejected": -1.1587908267974854, + "loss": 2.405, + "nll_loss": 0.6011757850646973, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002370743895880878, + "rewards/margins": 0.1156420111656189, + "rewards/rejected": -0.11587909609079361, + "step": 3693 + }, + { + "epoch": 2.5546334716459196, + "grad_norm": 8.868212699890137, + "learning_rate": 4.136314737974489e-05, + "log_odds_chosen": 6.981932640075684, + "log_odds_ratio": -0.21796710789203644, + "logits/chosen": -0.44232919812202454, + "logits/rejected": -0.4955691993236542, + "logps/chosen": -0.02946803905069828, + "logps/rejected": -1.4040617942810059, + "loss": 1.9888, + "nll_loss": 0.47540146112442017, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0029468040447682142, + "rewards/margins": 0.13745936751365662, + "rewards/rejected": -0.14040617644786835, + "step": 3694 + }, + { + "epoch": 2.5553250345781464, + "grad_norm": 13.996137619018555, + "learning_rate": 4.1359305363454746e-05, + "log_odds_chosen": 9.852861404418945, + "log_odds_ratio": -0.00029456906486302614, + "logits/chosen": -0.46493977308273315, + "logits/rejected": -0.5792163610458374, + "logps/chosen": -0.004407463129609823, + "logps/rejected": -2.5801842212677, + "loss": 2.9734, + "nll_loss": 0.7433305382728577, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004407463420648128, + "rewards/margins": 0.25757768750190735, + "rewards/rejected": -0.258018434047699, + "step": 3695 + }, + { + "epoch": 2.5560165975103732, + "grad_norm": 6.141745567321777, + "learning_rate": 4.135546334716459e-05, + "log_odds_chosen": 8.410773277282715, + "log_odds_ratio": -0.0020805567037314177, + "logits/chosen": -0.5083537697792053, + "logits/rejected": -0.5329819321632385, + "logps/chosen": -0.011984552256762981, + "logps/rejected": -1.7198209762573242, + "loss": 1.2896, + "nll_loss": 0.32219621539115906, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00119845534209162, + "rewards/margins": 0.1707836538553238, + "rewards/rejected": -0.17198210954666138, + "step": 3696 + }, + { + "epoch": 2.5567081604426, + "grad_norm": 14.502395629882812, + "learning_rate": 4.1351621330874444e-05, + "log_odds_chosen": 6.762268543243408, + "log_odds_ratio": -0.03570711612701416, + "logits/chosen": -0.43851161003112793, + "logits/rejected": -0.4272356629371643, + "logps/chosen": -0.019428087398409843, + "logps/rejected": -1.2996336221694946, + "loss": 2.1978, + "nll_loss": 0.5458870530128479, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019428087398409843, + "rewards/margins": 0.12802055478096008, + "rewards/rejected": -0.12996336817741394, + "step": 3697 + }, + { + "epoch": 2.557399723374827, + "grad_norm": 9.247694969177246, + "learning_rate": 4.1347779314584296e-05, + "log_odds_chosen": 8.048039436340332, + "log_odds_ratio": -0.011633592657744884, + "logits/chosen": -0.6219062805175781, + "logits/rejected": -0.7027066946029663, + "logps/chosen": -0.006910478230565786, + "logps/rejected": -1.1331398487091064, + "loss": 2.5269, + "nll_loss": 0.6305506229400635, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006910478696227074, + "rewards/margins": 0.112622931599617, + "rewards/rejected": -0.11331398785114288, + "step": 3698 + }, + { + "epoch": 2.5580912863070537, + "grad_norm": 11.068485260009766, + "learning_rate": 4.134393729829414e-05, + "log_odds_chosen": 9.680505752563477, + "log_odds_ratio": -0.00019160093506798148, + "logits/chosen": -0.8199482560157776, + "logits/rejected": -0.8591936230659485, + "logps/chosen": -0.0002677099546417594, + "logps/rejected": -1.6102635860443115, + "loss": 2.397, + "nll_loss": 0.5992240905761719, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6770994736580178e-05, + "rewards/margins": 0.1609995812177658, + "rewards/rejected": -0.16102635860443115, + "step": 3699 + }, + { + "epoch": 2.5587828492392806, + "grad_norm": 9.24276065826416, + "learning_rate": 4.1340095282004e-05, + "log_odds_chosen": 9.128841400146484, + "log_odds_ratio": -0.0005217056022956967, + "logits/chosen": -0.8914197087287903, + "logits/rejected": -0.9727503061294556, + "logps/chosen": -0.001058247173205018, + "logps/rejected": -1.5193347930908203, + "loss": 3.1986, + "nll_loss": 0.7996020317077637, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010582470713416114, + "rewards/margins": 0.15182766318321228, + "rewards/rejected": -0.151933491230011, + "step": 3700 + }, + { + "epoch": 2.5594744121715074, + "grad_norm": 17.474658966064453, + "learning_rate": 4.133625326571385e-05, + "log_odds_chosen": 9.318645477294922, + "log_odds_ratio": -0.0022993730381131172, + "logits/chosen": -0.8668950796127319, + "logits/rejected": -0.9719336628913879, + "logps/chosen": -0.009635216556489468, + "logps/rejected": -2.347177028656006, + "loss": 2.9675, + "nll_loss": 0.7416494488716125, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009635217138566077, + "rewards/margins": 0.2337542176246643, + "rewards/rejected": -0.2347177267074585, + "step": 3701 + }, + { + "epoch": 2.5601659751037342, + "grad_norm": 9.051102638244629, + "learning_rate": 4.13324112494237e-05, + "log_odds_chosen": 8.766997337341309, + "log_odds_ratio": -0.002480762079358101, + "logits/chosen": -0.7824666500091553, + "logits/rejected": -0.8516795039176941, + "logps/chosen": -0.0019446569494903088, + "logps/rejected": -1.526063323020935, + "loss": 2.5402, + "nll_loss": 0.634802520275116, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001944656833074987, + "rewards/margins": 0.15241187810897827, + "rewards/rejected": -0.15260633826255798, + "step": 3702 + }, + { + "epoch": 2.560857538035961, + "grad_norm": 7.871819972991943, + "learning_rate": 4.132856923313355e-05, + "log_odds_chosen": 7.462924480438232, + "log_odds_ratio": -0.014852583408355713, + "logits/chosen": -0.57256019115448, + "logits/rejected": -0.6143975257873535, + "logps/chosen": -0.014655756764113903, + "logps/rejected": -1.7680262327194214, + "loss": 2.694, + "nll_loss": 0.6720025539398193, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001465575653128326, + "rewards/margins": 0.1753370612859726, + "rewards/rejected": -0.1768026351928711, + "step": 3703 + }, + { + "epoch": 2.561549100968188, + "grad_norm": 13.968018531799316, + "learning_rate": 4.1324727216843404e-05, + "log_odds_chosen": 6.530374050140381, + "log_odds_ratio": -0.1512891799211502, + "logits/chosen": -0.767929196357727, + "logits/rejected": -0.7999763488769531, + "logps/chosen": -0.20367056131362915, + "logps/rejected": -2.1344516277313232, + "loss": 3.0548, + "nll_loss": 0.7485730648040771, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.020367056131362915, + "rewards/margins": 0.19307810068130493, + "rewards/rejected": -0.21344517171382904, + "step": 3704 + }, + { + "epoch": 2.5622406639004147, + "grad_norm": 5.46859073638916, + "learning_rate": 4.132088520055325e-05, + "log_odds_chosen": 8.344247817993164, + "log_odds_ratio": -0.014011223800480366, + "logits/chosen": -0.5818994045257568, + "logits/rejected": -0.5327481627464294, + "logps/chosen": -0.005584258586168289, + "logps/rejected": -1.0000637769699097, + "loss": 1.9534, + "nll_loss": 0.4869387149810791, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005584259051829576, + "rewards/margins": 0.09944795817136765, + "rewards/rejected": -0.10000638663768768, + "step": 3705 + }, + { + "epoch": 2.5629322268326415, + "grad_norm": 10.0899019241333, + "learning_rate": 4.13170431842631e-05, + "log_odds_chosen": 9.544801712036133, + "log_odds_ratio": -0.00021236162865534425, + "logits/chosen": -0.7554320096969604, + "logits/rejected": -0.8102554082870483, + "logps/chosen": -0.0018119094893336296, + "logps/rejected": -2.1428074836730957, + "loss": 2.1968, + "nll_loss": 0.5491690635681152, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018119096057489514, + "rewards/margins": 0.21409955620765686, + "rewards/rejected": -0.21428075432777405, + "step": 3706 + }, + { + "epoch": 2.5636237897648684, + "grad_norm": 10.008685111999512, + "learning_rate": 4.1313201167972955e-05, + "log_odds_chosen": 9.383994102478027, + "log_odds_ratio": -0.00018407402967568487, + "logits/chosen": -1.0799190998077393, + "logits/rejected": -1.0719953775405884, + "logps/chosen": -0.0004757646529469639, + "logps/rejected": -1.5452126264572144, + "loss": 2.2054, + "nll_loss": 0.5513247847557068, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.757646092912182e-05, + "rewards/margins": 0.154473677277565, + "rewards/rejected": -0.15452125668525696, + "step": 3707 + }, + { + "epoch": 2.564315352697095, + "grad_norm": 8.341119766235352, + "learning_rate": 4.13093591516828e-05, + "log_odds_chosen": 6.45013952255249, + "log_odds_ratio": -0.07698262482881546, + "logits/chosen": -0.9068139791488647, + "logits/rejected": -0.8944410085678101, + "logps/chosen": -0.022624794393777847, + "logps/rejected": -1.1139686107635498, + "loss": 1.9664, + "nll_loss": 0.4838896691799164, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0022624791599810123, + "rewards/margins": 0.10913438349962234, + "rewards/rejected": -0.11139686405658722, + "step": 3708 + }, + { + "epoch": 2.565006915629322, + "grad_norm": 9.152691841125488, + "learning_rate": 4.130551713539266e-05, + "log_odds_chosen": 8.225065231323242, + "log_odds_ratio": -0.05828314274549484, + "logits/chosen": -0.5669647455215454, + "logits/rejected": -0.6710978150367737, + "logps/chosen": -0.02455291338264942, + "logps/rejected": -1.7932811975479126, + "loss": 2.4232, + "nll_loss": 0.5999712944030762, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002455291338264942, + "rewards/margins": 0.1768728345632553, + "rewards/rejected": -0.17932814359664917, + "step": 3709 + }, + { + "epoch": 2.565698478561549, + "grad_norm": 11.202807426452637, + "learning_rate": 4.1301675119102505e-05, + "log_odds_chosen": 7.55279016494751, + "log_odds_ratio": -0.0012247057165950537, + "logits/chosen": -0.7727817893028259, + "logits/rejected": -0.7303429245948792, + "logps/chosen": -0.00481916731223464, + "logps/rejected": -1.6028831005096436, + "loss": 2.6498, + "nll_loss": 0.6623245477676392, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004819167370442301, + "rewards/margins": 0.15980640053749084, + "rewards/rejected": -0.16028831899166107, + "step": 3710 + }, + { + "epoch": 2.5663900414937757, + "grad_norm": 5.204437255859375, + "learning_rate": 4.129783310281236e-05, + "log_odds_chosen": 6.436047077178955, + "log_odds_ratio": -0.12044772505760193, + "logits/chosen": -0.5464393496513367, + "logits/rejected": -0.5386316180229187, + "logps/chosen": -0.10069790482521057, + "logps/rejected": -1.1179600954055786, + "loss": 2.2288, + "nll_loss": 0.5451546907424927, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010069791227579117, + "rewards/margins": 0.1017262265086174, + "rewards/rejected": -0.11179601401090622, + "step": 3711 + }, + { + "epoch": 2.5670816044260025, + "grad_norm": 7.855978012084961, + "learning_rate": 4.129399108652221e-05, + "log_odds_chosen": 7.216761589050293, + "log_odds_ratio": -0.008108437061309814, + "logits/chosen": -0.7366430163383484, + "logits/rejected": -0.742645263671875, + "logps/chosen": -0.033469267189502716, + "logps/rejected": -1.6177469491958618, + "loss": 2.6916, + "nll_loss": 0.6720876693725586, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0033469272311776876, + "rewards/margins": 0.15842777490615845, + "rewards/rejected": -0.16177469491958618, + "step": 3712 + }, + { + "epoch": 2.5677731673582294, + "grad_norm": 6.685059070587158, + "learning_rate": 4.129014907023206e-05, + "log_odds_chosen": 5.891242980957031, + "log_odds_ratio": -0.12909001111984253, + "logits/chosen": -0.5541249513626099, + "logits/rejected": -0.570735514163971, + "logps/chosen": -0.02706187777221203, + "logps/rejected": -0.9847200512886047, + "loss": 2.8047, + "nll_loss": 0.6882719993591309, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002706187777221203, + "rewards/margins": 0.09576582908630371, + "rewards/rejected": -0.09847201406955719, + "step": 3713 + }, + { + "epoch": 2.568464730290456, + "grad_norm": 8.131349563598633, + "learning_rate": 4.128630705394191e-05, + "log_odds_chosen": 9.295339584350586, + "log_odds_ratio": -0.0011500322725623846, + "logits/chosen": -0.5707579255104065, + "logits/rejected": -0.6556651592254639, + "logps/chosen": -0.00318812089972198, + "logps/rejected": -1.8498668670654297, + "loss": 1.9003, + "nll_loss": 0.4749618172645569, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000318812089972198, + "rewards/margins": 0.18466788530349731, + "rewards/rejected": -0.18498669564723969, + "step": 3714 + }, + { + "epoch": 2.569156293222683, + "grad_norm": 8.574792861938477, + "learning_rate": 4.128246503765176e-05, + "log_odds_chosen": 7.151443958282471, + "log_odds_ratio": -0.05823368579149246, + "logits/chosen": -0.7019017934799194, + "logits/rejected": -0.6479978561401367, + "logps/chosen": -0.08117261528968811, + "logps/rejected": -1.3640259504318237, + "loss": 2.1487, + "nll_loss": 0.5313621163368225, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00811726227402687, + "rewards/margins": 0.12828533351421356, + "rewards/rejected": -0.13640260696411133, + "step": 3715 + }, + { + "epoch": 2.56984785615491, + "grad_norm": 12.357011795043945, + "learning_rate": 4.127862302136161e-05, + "log_odds_chosen": 7.658764839172363, + "log_odds_ratio": -0.002434109104797244, + "logits/chosen": -0.6412211656570435, + "logits/rejected": -0.6666697859764099, + "logps/chosen": -0.00591158214956522, + "logps/rejected": -1.2501399517059326, + "loss": 3.0724, + "nll_loss": 0.7678543329238892, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005911581683903933, + "rewards/margins": 0.1244228333234787, + "rewards/rejected": -0.12501400709152222, + "step": 3716 + }, + { + "epoch": 2.5705394190871367, + "grad_norm": 8.453063011169434, + "learning_rate": 4.127478100507146e-05, + "log_odds_chosen": 9.030070304870605, + "log_odds_ratio": -0.001330976141616702, + "logits/chosen": -0.3844653367996216, + "logits/rejected": -0.46229255199432373, + "logps/chosen": -0.021825632080435753, + "logps/rejected": -2.265153408050537, + "loss": 1.9788, + "nll_loss": 0.4945632815361023, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021825633011758327, + "rewards/margins": 0.2243327796459198, + "rewards/rejected": -0.22651533782482147, + "step": 3717 + }, + { + "epoch": 2.5712309820193635, + "grad_norm": 8.923296928405762, + "learning_rate": 4.127093898878132e-05, + "log_odds_chosen": 8.41724967956543, + "log_odds_ratio": -0.06643003225326538, + "logits/chosen": 0.05033461004495621, + "logits/rejected": -0.05653882771730423, + "logps/chosen": -0.013024937361478806, + "logps/rejected": -1.5312068462371826, + "loss": 2.0591, + "nll_loss": 0.5081378817558289, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013024937361478806, + "rewards/margins": 0.1518182009458542, + "rewards/rejected": -0.15312069654464722, + "step": 3718 + }, + { + "epoch": 2.5719225449515903, + "grad_norm": 14.89078426361084, + "learning_rate": 4.1267096972491164e-05, + "log_odds_chosen": 6.805048942565918, + "log_odds_ratio": -0.16832545399665833, + "logits/chosen": -0.3612036406993866, + "logits/rejected": -0.38720014691352844, + "logps/chosen": -0.05015212297439575, + "logps/rejected": -1.658402681350708, + "loss": 2.9003, + "nll_loss": 0.7082515954971313, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00501521211117506, + "rewards/margins": 0.16082505881786346, + "rewards/rejected": -0.165840283036232, + "step": 3719 + }, + { + "epoch": 2.572614107883817, + "grad_norm": 6.6490068435668945, + "learning_rate": 4.1263254956201016e-05, + "log_odds_chosen": 9.336997985839844, + "log_odds_ratio": -0.03805683180689812, + "logits/chosen": -0.35168910026550293, + "logits/rejected": -0.4202730357646942, + "logps/chosen": -0.02017226442694664, + "logps/rejected": -2.215473175048828, + "loss": 1.8718, + "nll_loss": 0.46415483951568604, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020172265358269215, + "rewards/margins": 0.21953007578849792, + "rewards/rejected": -0.22154730558395386, + "step": 3720 + }, + { + "epoch": 2.573305670816044, + "grad_norm": 5.562412738800049, + "learning_rate": 4.125941293991087e-05, + "log_odds_chosen": 7.149702072143555, + "log_odds_ratio": -0.057801514863967896, + "logits/chosen": -0.4055611789226532, + "logits/rejected": -0.4318541884422302, + "logps/chosen": -0.02749335952103138, + "logps/rejected": -1.172360897064209, + "loss": 1.7467, + "nll_loss": 0.4308894872665405, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0027493361849337816, + "rewards/margins": 0.11448674649000168, + "rewards/rejected": -0.11723607778549194, + "step": 3721 + }, + { + "epoch": 2.573997233748271, + "grad_norm": 8.328250885009766, + "learning_rate": 4.125557092362072e-05, + "log_odds_chosen": 6.597143650054932, + "log_odds_ratio": -0.031210634857416153, + "logits/chosen": -0.7388463020324707, + "logits/rejected": -0.7064796090126038, + "logps/chosen": -0.020624712109565735, + "logps/rejected": -1.1217725276947021, + "loss": 2.4386, + "nll_loss": 0.606519877910614, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020624713506549597, + "rewards/margins": 0.11011476814746857, + "rewards/rejected": -0.11217725276947021, + "step": 3722 + }, + { + "epoch": 2.5746887966804977, + "grad_norm": 4.782063961029053, + "learning_rate": 4.1251728907330567e-05, + "log_odds_chosen": 9.41702938079834, + "log_odds_ratio": -0.0007288760971277952, + "logits/chosen": -0.2897818386554718, + "logits/rejected": -0.30856797099113464, + "logps/chosen": -0.0008728159009478986, + "logps/rejected": -1.4619526863098145, + "loss": 1.7117, + "nll_loss": 0.4278546869754791, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.728158718440682e-05, + "rewards/margins": 0.1461080014705658, + "rewards/rejected": -0.14619527757167816, + "step": 3723 + }, + { + "epoch": 2.5753803596127245, + "grad_norm": 7.023058891296387, + "learning_rate": 4.124788689104042e-05, + "log_odds_chosen": 7.026298999786377, + "log_odds_ratio": -0.4848077595233917, + "logits/chosen": -0.5465977787971497, + "logits/rejected": -0.5585201978683472, + "logps/chosen": -0.16477973759174347, + "logps/rejected": -1.1323270797729492, + "loss": 2.2299, + "nll_loss": 0.5089831352233887, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.016477974131703377, + "rewards/margins": 0.09675473719835281, + "rewards/rejected": -0.11323270946741104, + "step": 3724 + }, + { + "epoch": 2.5760719225449513, + "grad_norm": 41.20199966430664, + "learning_rate": 4.124404487475027e-05, + "log_odds_chosen": 4.795586585998535, + "log_odds_ratio": -0.8382541537284851, + "logits/chosen": -0.34821343421936035, + "logits/rejected": -0.3835321068763733, + "logps/chosen": -0.23601265251636505, + "logps/rejected": -0.6838950514793396, + "loss": 2.6539, + "nll_loss": 0.5796493291854858, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.023601265624165535, + "rewards/margins": 0.044788237661123276, + "rewards/rejected": -0.06838950514793396, + "step": 3725 + }, + { + "epoch": 2.576763485477178, + "grad_norm": 12.310609817504883, + "learning_rate": 4.124020285846012e-05, + "log_odds_chosen": 7.353997230529785, + "log_odds_ratio": -0.1631837785243988, + "logits/chosen": -0.6637724041938782, + "logits/rejected": -0.6767927408218384, + "logps/chosen": -0.04751261696219444, + "logps/rejected": -1.9424400329589844, + "loss": 2.6097, + "nll_loss": 0.6360946893692017, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004751261789351702, + "rewards/margins": 0.18949274718761444, + "rewards/rejected": -0.19424399733543396, + "step": 3726 + }, + { + "epoch": 2.577455048409405, + "grad_norm": 5.297358989715576, + "learning_rate": 4.1236360842169976e-05, + "log_odds_chosen": 8.62894344329834, + "log_odds_ratio": -0.0006866774056106806, + "logits/chosen": -0.5644747018814087, + "logits/rejected": -0.646756649017334, + "logps/chosen": -0.0179891437292099, + "logps/rejected": -2.733907699584961, + "loss": 2.2156, + "nll_loss": 0.5538387298583984, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017989143962040544, + "rewards/margins": 0.2715918719768524, + "rewards/rejected": -0.2733907699584961, + "step": 3727 + }, + { + "epoch": 2.5781466113416323, + "grad_norm": 7.3834099769592285, + "learning_rate": 4.123251882587982e-05, + "log_odds_chosen": 7.929017066955566, + "log_odds_ratio": -0.0026038330979645252, + "logits/chosen": -0.5381110906600952, + "logits/rejected": -0.5581732392311096, + "logps/chosen": -0.018378963693976402, + "logps/rejected": -1.692350149154663, + "loss": 2.5575, + "nll_loss": 0.6391133069992065, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001837896415963769, + "rewards/margins": 0.16739711165428162, + "rewards/rejected": -0.1692350208759308, + "step": 3728 + }, + { + "epoch": 2.578838174273859, + "grad_norm": 6.349059104919434, + "learning_rate": 4.1228676809589674e-05, + "log_odds_chosen": 9.217550277709961, + "log_odds_ratio": -0.001908198813907802, + "logits/chosen": -0.7512813210487366, + "logits/rejected": -0.8352804780006409, + "logps/chosen": -0.0008954018703661859, + "logps/rejected": -1.6340348720550537, + "loss": 1.7692, + "nll_loss": 0.4421083927154541, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.954018994700164e-05, + "rewards/margins": 0.16331395506858826, + "rewards/rejected": -0.1634034961462021, + "step": 3729 + }, + { + "epoch": 2.579529737206086, + "grad_norm": 7.565160751342773, + "learning_rate": 4.122483479329953e-05, + "log_odds_chosen": 8.419466018676758, + "log_odds_ratio": -0.0009257863275706768, + "logits/chosen": -0.25553634762763977, + "logits/rejected": -0.31241804361343384, + "logps/chosen": -0.005397059954702854, + "logps/rejected": -1.4249114990234375, + "loss": 1.9198, + "nll_loss": 0.4798622727394104, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000539705972187221, + "rewards/margins": 0.14195145666599274, + "rewards/rejected": -0.1424911618232727, + "step": 3730 + }, + { + "epoch": 2.5802213001383127, + "grad_norm": 11.41741943359375, + "learning_rate": 4.122099277700938e-05, + "log_odds_chosen": 7.808590412139893, + "log_odds_ratio": -0.004272694233804941, + "logits/chosen": -0.8148729801177979, + "logits/rejected": -0.8728553056716919, + "logps/chosen": -0.0217888280749321, + "logps/rejected": -1.8935446739196777, + "loss": 2.8953, + "nll_loss": 0.7234096527099609, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021788827143609524, + "rewards/margins": 0.18717558681964874, + "rewards/rejected": -0.18935447931289673, + "step": 3731 + }, + { + "epoch": 2.5809128630705396, + "grad_norm": 11.43437671661377, + "learning_rate": 4.1217150760719225e-05, + "log_odds_chosen": 10.363927841186523, + "log_odds_ratio": -9.404075535712764e-05, + "logits/chosen": -0.7213290929794312, + "logits/rejected": -0.8471459746360779, + "logps/chosen": -0.00039993657264858484, + "logps/rejected": -2.210651397705078, + "loss": 2.5843, + "nll_loss": 0.6460575461387634, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.999365435447544e-05, + "rewards/margins": 0.22102515399456024, + "rewards/rejected": -0.22106513381004333, + "step": 3732 + }, + { + "epoch": 2.5816044260027664, + "grad_norm": 59.48982238769531, + "learning_rate": 4.121330874442908e-05, + "log_odds_chosen": 6.653110980987549, + "log_odds_ratio": -0.5694103837013245, + "logits/chosen": -0.6047723293304443, + "logits/rejected": -0.5345290303230286, + "logps/chosen": -0.07189460098743439, + "logps/rejected": -1.7830207347869873, + "loss": 2.3783, + "nll_loss": 0.5376428961753845, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007189460098743439, + "rewards/margins": 0.17111262679100037, + "rewards/rejected": -0.1783020794391632, + "step": 3733 + }, + { + "epoch": 2.5822959889349932, + "grad_norm": 67.61016082763672, + "learning_rate": 4.120946672813893e-05, + "log_odds_chosen": 7.578793048858643, + "log_odds_ratio": -0.19432534277439117, + "logits/chosen": -0.1921953558921814, + "logits/rejected": -0.2634783387184143, + "logps/chosen": -0.031715311110019684, + "logps/rejected": -1.1055599451065063, + "loss": 3.2861, + "nll_loss": 0.8021039962768555, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0031715314835309982, + "rewards/margins": 0.10738445818424225, + "rewards/rejected": -0.1105559840798378, + "step": 3734 + }, + { + "epoch": 2.58298755186722, + "grad_norm": 7.091904640197754, + "learning_rate": 4.1205624711848776e-05, + "log_odds_chosen": 8.691332817077637, + "log_odds_ratio": -0.001732210977934301, + "logits/chosen": -0.7369958162307739, + "logits/rejected": -0.7992552518844604, + "logps/chosen": -0.0012605376541614532, + "logps/rejected": -1.5876713991165161, + "loss": 2.1241, + "nll_loss": 0.5308501124382019, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012605376832652837, + "rewards/margins": 0.15864109992980957, + "rewards/rejected": -0.15876714885234833, + "step": 3735 + }, + { + "epoch": 2.583679114799447, + "grad_norm": 6.683202743530273, + "learning_rate": 4.1201782695558635e-05, + "log_odds_chosen": 8.278829574584961, + "log_odds_ratio": -0.005301903001964092, + "logits/chosen": -0.338886022567749, + "logits/rejected": -0.38707235455513, + "logps/chosen": -0.01110898144543171, + "logps/rejected": -1.345354676246643, + "loss": 2.1922, + "nll_loss": 0.547519326210022, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011108980979770422, + "rewards/margins": 0.1334245651960373, + "rewards/rejected": -0.13453546166419983, + "step": 3736 + }, + { + "epoch": 2.5843706777316737, + "grad_norm": 6.4845685958862305, + "learning_rate": 4.119794067926848e-05, + "log_odds_chosen": 6.890257835388184, + "log_odds_ratio": -0.016935264691710472, + "logits/chosen": -0.611894428730011, + "logits/rejected": -0.6529337167739868, + "logps/chosen": -0.007628034334629774, + "logps/rejected": -0.7589771747589111, + "loss": 2.2051, + "nll_loss": 0.5495746731758118, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007628033636137843, + "rewards/margins": 0.07513491064310074, + "rewards/rejected": -0.075897715985775, + "step": 3737 + }, + { + "epoch": 2.5850622406639006, + "grad_norm": 7.8248724937438965, + "learning_rate": 4.119409866297833e-05, + "log_odds_chosen": 6.516547679901123, + "log_odds_ratio": -0.011854683980345726, + "logits/chosen": -0.7901416420936584, + "logits/rejected": -0.7836036682128906, + "logps/chosen": -0.011734462343156338, + "logps/rejected": -1.2134819030761719, + "loss": 2.6598, + "nll_loss": 0.6637638807296753, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011734463041648269, + "rewards/margins": 0.12017473578453064, + "rewards/rejected": -0.12134818732738495, + "step": 3738 + }, + { + "epoch": 2.5857538035961274, + "grad_norm": 8.53990364074707, + "learning_rate": 4.1190256646688185e-05, + "log_odds_chosen": 6.061685562133789, + "log_odds_ratio": -0.22549201548099518, + "logits/chosen": -0.6229150295257568, + "logits/rejected": -0.6731955409049988, + "logps/chosen": -0.036444906145334244, + "logps/rejected": -1.0788745880126953, + "loss": 2.3329, + "nll_loss": 0.5606649518013, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0036444906145334244, + "rewards/margins": 0.1042429655790329, + "rewards/rejected": -0.10788745433092117, + "step": 3739 + }, + { + "epoch": 2.586445366528354, + "grad_norm": 10.496400833129883, + "learning_rate": 4.118641463039804e-05, + "log_odds_chosen": 9.048690795898438, + "log_odds_ratio": -0.0010932005243375897, + "logits/chosen": -0.6942977905273438, + "logits/rejected": -0.746668815612793, + "logps/chosen": -0.003788003697991371, + "logps/rejected": -1.28829026222229, + "loss": 2.3722, + "nll_loss": 0.5929319858551025, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003788003814406693, + "rewards/margins": 0.1284502148628235, + "rewards/rejected": -0.12882903218269348, + "step": 3740 + }, + { + "epoch": 2.587136929460581, + "grad_norm": 8.886248588562012, + "learning_rate": 4.118257261410788e-05, + "log_odds_chosen": 9.778158187866211, + "log_odds_ratio": -9.212135773850605e-05, + "logits/chosen": -0.6989057064056396, + "logits/rejected": -0.7910153865814209, + "logps/chosen": -0.0002986646140925586, + "logps/rejected": -1.6260040998458862, + "loss": 2.3061, + "nll_loss": 0.576522707939148, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9866463592043146e-05, + "rewards/margins": 0.1625705361366272, + "rewards/rejected": -0.16260039806365967, + "step": 3741 + }, + { + "epoch": 2.587828492392808, + "grad_norm": 8.751694679260254, + "learning_rate": 4.1178730597817736e-05, + "log_odds_chosen": 7.6103715896606445, + "log_odds_ratio": -0.10901050269603729, + "logits/chosen": -0.5994385480880737, + "logits/rejected": -0.6329340934753418, + "logps/chosen": -0.027846332639455795, + "logps/rejected": -1.5407474040985107, + "loss": 3.1338, + "nll_loss": 0.7725571990013123, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0027846333105117083, + "rewards/margins": 0.15129011869430542, + "rewards/rejected": -0.1540747433900833, + "step": 3742 + }, + { + "epoch": 2.5885200553250347, + "grad_norm": 11.04948902130127, + "learning_rate": 4.117488858152759e-05, + "log_odds_chosen": 8.791509628295898, + "log_odds_ratio": -0.0031256452202796936, + "logits/chosen": -0.50660240650177, + "logits/rejected": -0.6062948703765869, + "logps/chosen": -0.0033155661076307297, + "logps/rejected": -1.942363977432251, + "loss": 2.4201, + "nll_loss": 0.6047027111053467, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00033155662822537124, + "rewards/margins": 0.1939048320055008, + "rewards/rejected": -0.1942363828420639, + "step": 3743 + }, + { + "epoch": 2.5892116182572615, + "grad_norm": 9.975250244140625, + "learning_rate": 4.1171046565237434e-05, + "log_odds_chosen": 7.344701766967773, + "log_odds_ratio": -0.2183784544467926, + "logits/chosen": -0.41115838289260864, + "logits/rejected": -0.4515266418457031, + "logps/chosen": -0.07224129885435104, + "logps/rejected": -2.3858866691589355, + "loss": 2.3751, + "nll_loss": 0.5719287991523743, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007224130444228649, + "rewards/margins": 0.23136454820632935, + "rewards/rejected": -0.23858866095542908, + "step": 3744 + }, + { + "epoch": 2.5899031811894884, + "grad_norm": 4.491325855255127, + "learning_rate": 4.116720454894729e-05, + "log_odds_chosen": 9.122901916503906, + "log_odds_ratio": -0.00112934282515198, + "logits/chosen": -0.6243253350257874, + "logits/rejected": -0.639293372631073, + "logps/chosen": -0.0041105677373707294, + "logps/rejected": -1.7523343563079834, + "loss": 1.4115, + "nll_loss": 0.3527562618255615, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00041105682612396777, + "rewards/margins": 0.1748223900794983, + "rewards/rejected": -0.17523345351219177, + "step": 3745 + }, + { + "epoch": 2.590594744121715, + "grad_norm": 7.6031341552734375, + "learning_rate": 4.116336253265714e-05, + "log_odds_chosen": 7.601007461547852, + "log_odds_ratio": -0.10867451876401901, + "logits/chosen": -0.421059787273407, + "logits/rejected": -0.5527886152267456, + "logps/chosen": -0.15169177949428558, + "logps/rejected": -1.3082287311553955, + "loss": 2.2726, + "nll_loss": 0.5572940111160278, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.015169178135693073, + "rewards/margins": 0.11565369367599487, + "rewards/rejected": -0.13082286715507507, + "step": 3746 + }, + { + "epoch": 2.591286307053942, + "grad_norm": 11.434212684631348, + "learning_rate": 4.115952051636699e-05, + "log_odds_chosen": 7.910490989685059, + "log_odds_ratio": -0.007251637522131205, + "logits/chosen": -0.2362067997455597, + "logits/rejected": -0.4405251741409302, + "logps/chosen": -0.04665207117795944, + "logps/rejected": -1.3648693561553955, + "loss": 2.7146, + "nll_loss": 0.6779237985610962, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004665207117795944, + "rewards/margins": 0.13182173669338226, + "rewards/rejected": -0.1364869475364685, + "step": 3747 + }, + { + "epoch": 2.591977869986169, + "grad_norm": 7.133166790008545, + "learning_rate": 4.1155678500076844e-05, + "log_odds_chosen": 7.42473030090332, + "log_odds_ratio": -0.0022472471464425325, + "logits/chosen": -0.6103567481040955, + "logits/rejected": -0.6208904981613159, + "logps/chosen": -0.003965867683291435, + "logps/rejected": -0.9936189651489258, + "loss": 1.4544, + "nll_loss": 0.3633824288845062, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00039658680907450616, + "rewards/margins": 0.09896530210971832, + "rewards/rejected": -0.09936189651489258, + "step": 3748 + }, + { + "epoch": 2.5926694329183957, + "grad_norm": 12.385791778564453, + "learning_rate": 4.1151836483786696e-05, + "log_odds_chosen": 7.634697914123535, + "log_odds_ratio": -0.05306196212768555, + "logits/chosen": -1.0478848218917847, + "logits/rejected": -1.1195652484893799, + "logps/chosen": -0.00799348670989275, + "logps/rejected": -1.5046417713165283, + "loss": 3.5996, + "nll_loss": 0.8945903778076172, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007993485778570175, + "rewards/margins": 0.14966483414173126, + "rewards/rejected": -0.15046417713165283, + "step": 3749 + }, + { + "epoch": 2.5933609958506225, + "grad_norm": 12.1556396484375, + "learning_rate": 4.114799446749654e-05, + "log_odds_chosen": 7.831677436828613, + "log_odds_ratio": -0.04401255026459694, + "logits/chosen": -0.6939361095428467, + "logits/rejected": -0.8240491151809692, + "logps/chosen": -0.022363290190696716, + "logps/rejected": -1.603339433670044, + "loss": 2.7254, + "nll_loss": 0.6769535541534424, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0022363290190696716, + "rewards/margins": 0.15809760987758636, + "rewards/rejected": -0.16033394634723663, + "step": 3750 + }, + { + "epoch": 2.5940525587828493, + "grad_norm": 5.799905776977539, + "learning_rate": 4.1144152451206394e-05, + "log_odds_chosen": 8.367742538452148, + "log_odds_ratio": -0.0042973789386451244, + "logits/chosen": -0.5330762267112732, + "logits/rejected": -0.6044706702232361, + "logps/chosen": -0.0007183550042100251, + "logps/rejected": -0.879762589931488, + "loss": 1.5809, + "nll_loss": 0.39480525255203247, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.183550042100251e-05, + "rewards/margins": 0.0879044234752655, + "rewards/rejected": -0.08797626197338104, + "step": 3751 + }, + { + "epoch": 2.594744121715076, + "grad_norm": 11.173681259155273, + "learning_rate": 4.114031043491625e-05, + "log_odds_chosen": 7.132036209106445, + "log_odds_ratio": -0.06363911926746368, + "logits/chosen": -0.5374847054481506, + "logits/rejected": -0.5664747953414917, + "logps/chosen": -0.0732634961605072, + "logps/rejected": -1.957589864730835, + "loss": 3.0929, + "nll_loss": 0.7668578624725342, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00732634961605072, + "rewards/margins": 0.18843263387680054, + "rewards/rejected": -0.19575899839401245, + "step": 3752 + }, + { + "epoch": 2.595435684647303, + "grad_norm": 8.798983573913574, + "learning_rate": 4.113646841862609e-05, + "log_odds_chosen": 7.174066066741943, + "log_odds_ratio": -0.0576028972864151, + "logits/chosen": -0.12469097971916199, + "logits/rejected": -0.19542089104652405, + "logps/chosen": -0.02913515642285347, + "logps/rejected": -1.2168099880218506, + "loss": 2.0781, + "nll_loss": 0.5137559771537781, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002913515781983733, + "rewards/margins": 0.11876747757196426, + "rewards/rejected": -0.12168100476264954, + "step": 3753 + }, + { + "epoch": 2.59612724757953, + "grad_norm": 9.644112586975098, + "learning_rate": 4.113262640233595e-05, + "log_odds_chosen": 7.350411415100098, + "log_odds_ratio": -0.01159360259771347, + "logits/chosen": -0.4807983636856079, + "logits/rejected": -0.546984076499939, + "logps/chosen": -0.038234058767557144, + "logps/rejected": -1.8546245098114014, + "loss": 2.8755, + "nll_loss": 0.7177104949951172, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0038234058301895857, + "rewards/margins": 0.18163906037807465, + "rewards/rejected": -0.18546245992183685, + "step": 3754 + }, + { + "epoch": 2.5968188105117567, + "grad_norm": 7.758711814880371, + "learning_rate": 4.11287843860458e-05, + "log_odds_chosen": 7.503241062164307, + "log_odds_ratio": -0.08800943195819855, + "logits/chosen": -0.5578584671020508, + "logits/rejected": -0.6296103596687317, + "logps/chosen": -0.015839863568544388, + "logps/rejected": -1.226827621459961, + "loss": 1.8376, + "nll_loss": 0.4506067633628845, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0015839864499866962, + "rewards/margins": 0.12109877914190292, + "rewards/rejected": -0.12268276512622833, + "step": 3755 + }, + { + "epoch": 2.5975103734439835, + "grad_norm": 13.332018852233887, + "learning_rate": 4.112494236975565e-05, + "log_odds_chosen": 9.398469924926758, + "log_odds_ratio": -0.0007393067935481668, + "logits/chosen": -0.7192118167877197, + "logits/rejected": -0.8135256171226501, + "logps/chosen": -0.0019481182098388672, + "logps/rejected": -1.9981613159179688, + "loss": 2.3335, + "nll_loss": 0.5832892656326294, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019481181516312063, + "rewards/margins": 0.199621319770813, + "rewards/rejected": -0.19981613755226135, + "step": 3756 + }, + { + "epoch": 2.5982019363762103, + "grad_norm": 5.693976879119873, + "learning_rate": 4.11211003534655e-05, + "log_odds_chosen": 7.411104202270508, + "log_odds_ratio": -0.037521954625844955, + "logits/chosen": -0.6283434629440308, + "logits/rejected": -0.6703078150749207, + "logps/chosen": -0.02808951586484909, + "logps/rejected": -1.5351510047912598, + "loss": 1.8259, + "nll_loss": 0.4527303874492645, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002808951772749424, + "rewards/margins": 0.15070615708827972, + "rewards/rejected": -0.15351510047912598, + "step": 3757 + }, + { + "epoch": 2.598893499308437, + "grad_norm": 6.475027561187744, + "learning_rate": 4.1117258337175355e-05, + "log_odds_chosen": 5.8653364181518555, + "log_odds_ratio": -0.1683236062526703, + "logits/chosen": -0.2808341085910797, + "logits/rejected": -0.25737473368644714, + "logps/chosen": -0.027778543531894684, + "logps/rejected": -0.9268168210983276, + "loss": 1.9337, + "nll_loss": 0.4665814936161041, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002777854213491082, + "rewards/margins": 0.0899038314819336, + "rewards/rejected": -0.09268169105052948, + "step": 3758 + }, + { + "epoch": 2.599585062240664, + "grad_norm": 10.565065383911133, + "learning_rate": 4.11134163208852e-05, + "log_odds_chosen": 8.24848461151123, + "log_odds_ratio": -0.0010619653621688485, + "logits/chosen": -0.6984177827835083, + "logits/rejected": -0.7020764350891113, + "logps/chosen": -0.0028242850676178932, + "logps/rejected": -1.5658907890319824, + "loss": 2.4563, + "nll_loss": 0.6139716506004333, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002824285184033215, + "rewards/margins": 0.156306654214859, + "rewards/rejected": -0.1565890908241272, + "step": 3759 + }, + { + "epoch": 2.600276625172891, + "grad_norm": 11.125207901000977, + "learning_rate": 4.110957430459505e-05, + "log_odds_chosen": 8.153125762939453, + "log_odds_ratio": -0.008074373006820679, + "logits/chosen": -0.6523471474647522, + "logits/rejected": -0.7233648896217346, + "logps/chosen": -0.019731884822249413, + "logps/rejected": -1.3840183019638062, + "loss": 3.0923, + "nll_loss": 0.7722706198692322, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019731884822249413, + "rewards/margins": 0.13642865419387817, + "rewards/rejected": -0.1384018361568451, + "step": 3760 + }, + { + "epoch": 2.6009681881051177, + "grad_norm": 8.181022644042969, + "learning_rate": 4.1105732288304905e-05, + "log_odds_chosen": 7.941451549530029, + "log_odds_ratio": -0.006261616013944149, + "logits/chosen": -0.8328216075897217, + "logits/rejected": -0.8856630325317383, + "logps/chosen": -0.012141270563006401, + "logps/rejected": -1.4298372268676758, + "loss": 3.0807, + "nll_loss": 0.769557774066925, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00121412705630064, + "rewards/margins": 0.14176960289478302, + "rewards/rejected": -0.14298373460769653, + "step": 3761 + }, + { + "epoch": 2.6016597510373445, + "grad_norm": 8.711572647094727, + "learning_rate": 4.110189027201475e-05, + "log_odds_chosen": 8.191829681396484, + "log_odds_ratio": -0.003800910897552967, + "logits/chosen": -0.4973553717136383, + "logits/rejected": -0.512773871421814, + "logps/chosen": -0.015499060973525047, + "logps/rejected": -1.1160831451416016, + "loss": 2.1024, + "nll_loss": 0.5252153873443604, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015499060973525047, + "rewards/margins": 0.11005841195583344, + "rewards/rejected": -0.11160831153392792, + "step": 3762 + }, + { + "epoch": 2.6023513139695713, + "grad_norm": 11.474364280700684, + "learning_rate": 4.109804825572461e-05, + "log_odds_chosen": 9.341354370117188, + "log_odds_ratio": -0.0021199476905167103, + "logits/chosen": -0.6627390384674072, + "logits/rejected": -0.7836405038833618, + "logps/chosen": -0.015581142157316208, + "logps/rejected": -1.9159284830093384, + "loss": 2.3043, + "nll_loss": 0.5758620500564575, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015581144252792, + "rewards/margins": 0.19003473222255707, + "rewards/rejected": -0.19159284234046936, + "step": 3763 + }, + { + "epoch": 2.603042876901798, + "grad_norm": 12.52896785736084, + "learning_rate": 4.1094206239434456e-05, + "log_odds_chosen": 7.266579627990723, + "log_odds_ratio": -0.11897798627614975, + "logits/chosen": -0.5841730237007141, + "logits/rejected": -0.6162888407707214, + "logps/chosen": -0.02642166055738926, + "logps/rejected": -1.2500101327896118, + "loss": 2.8538, + "nll_loss": 0.7015467882156372, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002642165869474411, + "rewards/margins": 0.12235884368419647, + "rewards/rejected": -0.12500101327896118, + "step": 3764 + }, + { + "epoch": 2.603734439834025, + "grad_norm": 15.374418258666992, + "learning_rate": 4.109036422314431e-05, + "log_odds_chosen": 8.82072639465332, + "log_odds_ratio": -0.12637755274772644, + "logits/chosen": -0.657711923122406, + "logits/rejected": -0.7071999311447144, + "logps/chosen": -0.016477566212415695, + "logps/rejected": -1.6140774488449097, + "loss": 2.0818, + "nll_loss": 0.5078055262565613, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0016477566678076982, + "rewards/margins": 0.159759983420372, + "rewards/rejected": -0.16140775382518768, + "step": 3765 + }, + { + "epoch": 2.604426002766252, + "grad_norm": 11.307380676269531, + "learning_rate": 4.108652220685416e-05, + "log_odds_chosen": 8.271891593933105, + "log_odds_ratio": -0.001959600020200014, + "logits/chosen": -0.8720443844795227, + "logits/rejected": -0.802766740322113, + "logps/chosen": -0.008896499872207642, + "logps/rejected": -1.7645208835601807, + "loss": 2.4307, + "nll_loss": 0.6074774265289307, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008896499057300389, + "rewards/margins": 0.17556244134902954, + "rewards/rejected": -0.17645208537578583, + "step": 3766 + }, + { + "epoch": 2.6051175656984786, + "grad_norm": 5.210627555847168, + "learning_rate": 4.108268019056401e-05, + "log_odds_chosen": 8.590871810913086, + "log_odds_ratio": -0.023738976567983627, + "logits/chosen": -0.7618262767791748, + "logits/rejected": -0.7200245261192322, + "logps/chosen": -0.031090902164578438, + "logps/rejected": -1.953304409980774, + "loss": 2.4172, + "nll_loss": 0.6019155979156494, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003109090495854616, + "rewards/margins": 0.19222135841846466, + "rewards/rejected": -0.1953304409980774, + "step": 3767 + }, + { + "epoch": 2.6058091286307055, + "grad_norm": 8.155974388122559, + "learning_rate": 4.107883817427386e-05, + "log_odds_chosen": 7.465442657470703, + "log_odds_ratio": -0.15218786895275116, + "logits/chosen": -0.6641751527786255, + "logits/rejected": -0.6453537344932556, + "logps/chosen": -0.03820411115884781, + "logps/rejected": -1.7105059623718262, + "loss": 2.7485, + "nll_loss": 0.6719143390655518, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0038204113952815533, + "rewards/margins": 0.16723018884658813, + "rewards/rejected": -0.17105060815811157, + "step": 3768 + }, + { + "epoch": 2.6065006915629323, + "grad_norm": 10.886385917663574, + "learning_rate": 4.107499615798371e-05, + "log_odds_chosen": 9.198636054992676, + "log_odds_ratio": -0.0007475916645489633, + "logits/chosen": -0.5266662240028381, + "logits/rejected": -0.6063534617424011, + "logps/chosen": -0.03145657107234001, + "logps/rejected": -1.9173073768615723, + "loss": 2.6858, + "nll_loss": 0.6713849902153015, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0031456567812711, + "rewards/margins": 0.1885850876569748, + "rewards/rejected": -0.19173073768615723, + "step": 3769 + }, + { + "epoch": 2.607192254495159, + "grad_norm": 4.671172142028809, + "learning_rate": 4.1071154141693564e-05, + "log_odds_chosen": 8.649884223937988, + "log_odds_ratio": -0.09504207968711853, + "logits/chosen": -0.5261533260345459, + "logits/rejected": -0.5755932927131653, + "logps/chosen": -0.02501145377755165, + "logps/rejected": -1.5033838748931885, + "loss": 1.524, + "nll_loss": 0.37148401141166687, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0025011454708874226, + "rewards/margins": 0.14783723652362823, + "rewards/rejected": -0.15033838152885437, + "step": 3770 + }, + { + "epoch": 2.607883817427386, + "grad_norm": 12.868654251098633, + "learning_rate": 4.106731212540341e-05, + "log_odds_chosen": 8.409965515136719, + "log_odds_ratio": -0.004177105613052845, + "logits/chosen": -0.5481134653091431, + "logits/rejected": -0.6200520396232605, + "logps/chosen": -0.01093447208404541, + "logps/rejected": -1.4257891178131104, + "loss": 4.3731, + "nll_loss": 1.0928475856781006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001093447208404541, + "rewards/margins": 0.14148546755313873, + "rewards/rejected": -0.14257891476154327, + "step": 3771 + }, + { + "epoch": 2.608575380359613, + "grad_norm": 6.570640563964844, + "learning_rate": 4.106347010911327e-05, + "log_odds_chosen": 8.466964721679688, + "log_odds_ratio": -0.00046557781752198935, + "logits/chosen": -0.23388677835464478, + "logits/rejected": -0.23734617233276367, + "logps/chosen": -0.00039772834861651063, + "logps/rejected": -1.008994221687317, + "loss": 2.5733, + "nll_loss": 0.6432757377624512, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.977283267886378e-05, + "rewards/margins": 0.10085965692996979, + "rewards/rejected": -0.10089942812919617, + "step": 3772 + }, + { + "epoch": 2.6092669432918396, + "grad_norm": 10.865242958068848, + "learning_rate": 4.1059628092823114e-05, + "log_odds_chosen": 7.911555290222168, + "log_odds_ratio": -0.0076803830452263355, + "logits/chosen": -0.5928400754928589, + "logits/rejected": -0.6205337643623352, + "logps/chosen": -0.00917899701744318, + "logps/rejected": -1.467241883277893, + "loss": 3.2169, + "nll_loss": 0.8034663200378418, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009178997133858502, + "rewards/margins": 0.14580628275871277, + "rewards/rejected": -0.14672419428825378, + "step": 3773 + }, + { + "epoch": 2.6099585062240664, + "grad_norm": 7.693874359130859, + "learning_rate": 4.1055786076532967e-05, + "log_odds_chosen": 7.510994911193848, + "log_odds_ratio": -0.011618074029684067, + "logits/chosen": -0.38228869438171387, + "logits/rejected": -0.44549834728240967, + "logps/chosen": -0.01588156260550022, + "logps/rejected": -1.182736873626709, + "loss": 2.1218, + "nll_loss": 0.5292880535125732, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015881562139838934, + "rewards/margins": 0.11668553948402405, + "rewards/rejected": -0.11827369034290314, + "step": 3774 + }, + { + "epoch": 2.6106500691562933, + "grad_norm": 8.907024383544922, + "learning_rate": 4.105194406024282e-05, + "log_odds_chosen": 6.680961608886719, + "log_odds_ratio": -0.06957157701253891, + "logits/chosen": -0.8452804088592529, + "logits/rejected": -0.7844629883766174, + "logps/chosen": -0.07758989930152893, + "logps/rejected": -1.437814474105835, + "loss": 2.7614, + "nll_loss": 0.6833951473236084, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007758989930152893, + "rewards/margins": 0.13602246344089508, + "rewards/rejected": -0.14378145337104797, + "step": 3775 + }, + { + "epoch": 2.61134163208852, + "grad_norm": 6.217175006866455, + "learning_rate": 4.104810204395267e-05, + "log_odds_chosen": 6.552158832550049, + "log_odds_ratio": -0.0551423579454422, + "logits/chosen": -0.4047723412513733, + "logits/rejected": -0.42235076427459717, + "logps/chosen": -0.036186181008815765, + "logps/rejected": -1.1089141368865967, + "loss": 2.6563, + "nll_loss": 0.6585571765899658, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003618618007749319, + "rewards/margins": 0.10727280378341675, + "rewards/rejected": -0.1108914166688919, + "step": 3776 + }, + { + "epoch": 2.612033195020747, + "grad_norm": 7.649138450622559, + "learning_rate": 4.104426002766252e-05, + "log_odds_chosen": 7.4502434730529785, + "log_odds_ratio": -0.003743886947631836, + "logits/chosen": -0.5986257791519165, + "logits/rejected": -0.6282727122306824, + "logps/chosen": -0.007481364067643881, + "logps/rejected": -1.1748636960983276, + "loss": 2.1058, + "nll_loss": 0.5260686874389648, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007481364300474524, + "rewards/margins": 0.11673823744058609, + "rewards/rejected": -0.117486372590065, + "step": 3777 + }, + { + "epoch": 2.6127247579529738, + "grad_norm": 16.62424659729004, + "learning_rate": 4.104041801137237e-05, + "log_odds_chosen": 6.623798370361328, + "log_odds_ratio": -0.41715821623802185, + "logits/chosen": -0.7055549025535583, + "logits/rejected": -0.7265179753303528, + "logps/chosen": -0.0954117402434349, + "logps/rejected": -1.7129487991333008, + "loss": 2.4636, + "nll_loss": 0.574172854423523, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009541173465549946, + "rewards/margins": 0.16175371408462524, + "rewards/rejected": -0.17129486799240112, + "step": 3778 + }, + { + "epoch": 2.6134163208852006, + "grad_norm": 14.161336898803711, + "learning_rate": 4.103657599508222e-05, + "log_odds_chosen": 5.261303901672363, + "log_odds_ratio": -0.38418567180633545, + "logits/chosen": -0.4809882640838623, + "logits/rejected": -0.5193066596984863, + "logps/chosen": -0.051292117685079575, + "logps/rejected": -1.037435531616211, + "loss": 2.9689, + "nll_loss": 0.7038084864616394, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005129212513566017, + "rewards/margins": 0.09861434251070023, + "rewards/rejected": -0.1037435531616211, + "step": 3779 + }, + { + "epoch": 2.6141078838174274, + "grad_norm": 8.383172035217285, + "learning_rate": 4.103273397879207e-05, + "log_odds_chosen": 8.017049789428711, + "log_odds_ratio": -0.00684964656829834, + "logits/chosen": -0.5790307521820068, + "logits/rejected": -0.5661333203315735, + "logps/chosen": -0.004952224902808666, + "logps/rejected": -1.5408872365951538, + "loss": 2.4536, + "nll_loss": 0.6127271056175232, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000495222513563931, + "rewards/margins": 0.153593510389328, + "rewards/rejected": -0.15408873558044434, + "step": 3780 + }, + { + "epoch": 2.6147994467496543, + "grad_norm": 9.299695014953613, + "learning_rate": 4.102889196250193e-05, + "log_odds_chosen": 8.528641700744629, + "log_odds_ratio": -0.0012356049846857786, + "logits/chosen": -0.8160710334777832, + "logits/rejected": -0.8102253079414368, + "logps/chosen": -0.006617030128836632, + "logps/rejected": -1.8163725137710571, + "loss": 2.2087, + "nll_loss": 0.5520486831665039, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006617030594497919, + "rewards/margins": 0.1809755563735962, + "rewards/rejected": -0.1816372573375702, + "step": 3781 + }, + { + "epoch": 2.615491009681881, + "grad_norm": 8.188958168029785, + "learning_rate": 4.102504994621177e-05, + "log_odds_chosen": 10.0280122756958, + "log_odds_ratio": -9.502626198809594e-05, + "logits/chosen": -0.2838655710220337, + "logits/rejected": -0.40352708101272583, + "logps/chosen": -0.00035995981306768954, + "logps/rejected": -1.7294433116912842, + "loss": 1.5491, + "nll_loss": 0.38726910948753357, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.599598494474776e-05, + "rewards/margins": 0.17290833592414856, + "rewards/rejected": -0.1729443371295929, + "step": 3782 + }, + { + "epoch": 2.616182572614108, + "grad_norm": 6.363637447357178, + "learning_rate": 4.1021207929921625e-05, + "log_odds_chosen": 7.355223655700684, + "log_odds_ratio": -0.036441314965486526, + "logits/chosen": -0.5759349465370178, + "logits/rejected": -0.5798578262329102, + "logps/chosen": -0.027550656348466873, + "logps/rejected": -1.470800518989563, + "loss": 2.4145, + "nll_loss": 0.5999844074249268, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002755065681412816, + "rewards/margins": 0.14432498812675476, + "rewards/rejected": -0.14708006381988525, + "step": 3783 + }, + { + "epoch": 2.6168741355463347, + "grad_norm": 7.999043941497803, + "learning_rate": 4.101736591363148e-05, + "log_odds_chosen": 7.062474727630615, + "log_odds_ratio": -0.05684699863195419, + "logits/chosen": -0.5237227082252502, + "logits/rejected": -0.5357232093811035, + "logps/chosen": -0.02586865797638893, + "logps/rejected": -1.7018102407455444, + "loss": 2.2973, + "nll_loss": 0.5686320066452026, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002586865797638893, + "rewards/margins": 0.16759416460990906, + "rewards/rejected": -0.1701810210943222, + "step": 3784 + }, + { + "epoch": 2.6175656984785616, + "grad_norm": 7.679310321807861, + "learning_rate": 4.101352389734133e-05, + "log_odds_chosen": 7.4297637939453125, + "log_odds_ratio": -0.06839942932128906, + "logits/chosen": -0.2992628812789917, + "logits/rejected": -0.4127195179462433, + "logps/chosen": -0.041940055787563324, + "logps/rejected": -1.3728930950164795, + "loss": 2.1012, + "nll_loss": 0.5184489488601685, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004194005858153105, + "rewards/margins": 0.13309532403945923, + "rewards/rejected": -0.13728931546211243, + "step": 3785 + }, + { + "epoch": 2.6182572614107884, + "grad_norm": 11.26512622833252, + "learning_rate": 4.1009681881051176e-05, + "log_odds_chosen": 5.08907413482666, + "log_odds_ratio": -0.2812805473804474, + "logits/chosen": -0.6196216344833374, + "logits/rejected": -0.5990339517593384, + "logps/chosen": -0.10777544230222702, + "logps/rejected": -0.9553592801094055, + "loss": 2.5721, + "nll_loss": 0.6148989200592041, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.010777544230222702, + "rewards/margins": 0.08475838601589203, + "rewards/rejected": -0.09553593397140503, + "step": 3786 + }, + { + "epoch": 2.6189488243430152, + "grad_norm": 9.276042938232422, + "learning_rate": 4.100583986476103e-05, + "log_odds_chosen": 7.981724739074707, + "log_odds_ratio": -0.006409569643437862, + "logits/chosen": -0.7504392266273499, + "logits/rejected": -0.8193677067756653, + "logps/chosen": -0.011914849281311035, + "logps/rejected": -1.3361014127731323, + "loss": 2.7555, + "nll_loss": 0.6882370114326477, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011914849746972322, + "rewards/margins": 0.1324186474084854, + "rewards/rejected": -0.13361014425754547, + "step": 3787 + }, + { + "epoch": 2.619640387275242, + "grad_norm": 13.060140609741211, + "learning_rate": 4.100199784847088e-05, + "log_odds_chosen": 7.931905746459961, + "log_odds_ratio": -0.000717336602974683, + "logits/chosen": -0.7390726804733276, + "logits/rejected": -0.8203366994857788, + "logps/chosen": -0.0012997111771255732, + "logps/rejected": -1.4393033981323242, + "loss": 3.8084, + "nll_loss": 0.9520176649093628, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012997111480217427, + "rewards/margins": 0.14380037784576416, + "rewards/rejected": -0.1439303457736969, + "step": 3788 + }, + { + "epoch": 2.620331950207469, + "grad_norm": 9.99527645111084, + "learning_rate": 4.0998155832180726e-05, + "log_odds_chosen": 7.0623884201049805, + "log_odds_ratio": -0.10466547310352325, + "logits/chosen": -0.6097970008850098, + "logits/rejected": -0.7266175746917725, + "logps/chosen": -0.020670359954237938, + "logps/rejected": -1.1227185726165771, + "loss": 3.1378, + "nll_loss": 0.7739837169647217, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002067035995423794, + "rewards/margins": 0.11020482331514359, + "rewards/rejected": -0.11227186769247055, + "step": 3789 + }, + { + "epoch": 2.6210235131396957, + "grad_norm": 8.242027282714844, + "learning_rate": 4.0994313815890585e-05, + "log_odds_chosen": 9.106143951416016, + "log_odds_ratio": -0.00046437146374955773, + "logits/chosen": -0.7410778999328613, + "logits/rejected": -0.6651361584663391, + "logps/chosen": -0.001504677115008235, + "logps/rejected": -1.3949095010757446, + "loss": 2.6114, + "nll_loss": 0.6528007388114929, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001504677056800574, + "rewards/margins": 0.13934049010276794, + "rewards/rejected": -0.13949096202850342, + "step": 3790 + }, + { + "epoch": 2.6217150760719226, + "grad_norm": 7.91328239440918, + "learning_rate": 4.099047179960043e-05, + "log_odds_chosen": 7.157289505004883, + "log_odds_ratio": -0.0055101243779063225, + "logits/chosen": -0.6908423900604248, + "logits/rejected": -0.6601514220237732, + "logps/chosen": -0.022872548550367355, + "logps/rejected": -1.2855727672576904, + "loss": 1.8462, + "nll_loss": 0.46100759506225586, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002287255134433508, + "rewards/margins": 0.12627002596855164, + "rewards/rejected": -0.1285572648048401, + "step": 3791 + }, + { + "epoch": 2.6224066390041494, + "grad_norm": 6.737125873565674, + "learning_rate": 4.0986629783310283e-05, + "log_odds_chosen": 6.822482109069824, + "log_odds_ratio": -0.0716143399477005, + "logits/chosen": -0.4894212484359741, + "logits/rejected": -0.4532252550125122, + "logps/chosen": -0.04768814519047737, + "logps/rejected": -1.8026762008666992, + "loss": 2.5347, + "nll_loss": 0.6265243291854858, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00476881442591548, + "rewards/margins": 0.17549879848957062, + "rewards/rejected": -0.18026763200759888, + "step": 3792 + }, + { + "epoch": 2.623098201936376, + "grad_norm": 4.337125301361084, + "learning_rate": 4.0982787767020136e-05, + "log_odds_chosen": 5.33636474609375, + "log_odds_ratio": -0.1461387425661087, + "logits/chosen": -0.3308134078979492, + "logits/rejected": -0.31176647543907166, + "logps/chosen": -0.062302809208631516, + "logps/rejected": -1.0903065204620361, + "loss": 1.4588, + "nll_loss": 0.3500952422618866, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006230281665921211, + "rewards/margins": 0.10280036926269531, + "rewards/rejected": -0.10903064906597137, + "step": 3793 + }, + { + "epoch": 2.623789764868603, + "grad_norm": 6.027596950531006, + "learning_rate": 4.097894575072999e-05, + "log_odds_chosen": 7.09362268447876, + "log_odds_ratio": -0.06785794347524643, + "logits/chosen": -0.4358813464641571, + "logits/rejected": -0.48490625619888306, + "logps/chosen": -0.02412148006260395, + "logps/rejected": -1.2474931478500366, + "loss": 2.536, + "nll_loss": 0.6272022724151611, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024121480528265238, + "rewards/margins": 0.12233716994524002, + "rewards/rejected": -0.1247493103146553, + "step": 3794 + }, + { + "epoch": 2.62448132780083, + "grad_norm": 10.282882690429688, + "learning_rate": 4.0975103734439834e-05, + "log_odds_chosen": 9.190343856811523, + "log_odds_ratio": -0.001859789015725255, + "logits/chosen": -0.63014817237854, + "logits/rejected": -0.6413495540618896, + "logps/chosen": -0.00032331724651157856, + "logps/rejected": -1.4479916095733643, + "loss": 3.6494, + "nll_loss": 0.9121723175048828, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.233172537875362e-05, + "rewards/margins": 0.14476682245731354, + "rewards/rejected": -0.1447991579771042, + "step": 3795 + }, + { + "epoch": 2.6251728907330567, + "grad_norm": 8.422372817993164, + "learning_rate": 4.0971261718149686e-05, + "log_odds_chosen": 7.013335227966309, + "log_odds_ratio": -0.0271234679967165, + "logits/chosen": -0.7074835300445557, + "logits/rejected": -0.7603086829185486, + "logps/chosen": -0.01090591587126255, + "logps/rejected": -1.0833988189697266, + "loss": 1.9539, + "nll_loss": 0.4857563078403473, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010905916569754481, + "rewards/margins": 0.10724928230047226, + "rewards/rejected": -0.10833987593650818, + "step": 3796 + }, + { + "epoch": 2.6258644536652835, + "grad_norm": 8.807729721069336, + "learning_rate": 4.096741970185954e-05, + "log_odds_chosen": 7.642078399658203, + "log_odds_ratio": -0.00967707671225071, + "logits/chosen": -0.5156012773513794, + "logits/rejected": -0.5499816536903381, + "logps/chosen": -0.0036300821229815483, + "logps/rejected": -1.2207317352294922, + "loss": 2.6392, + "nll_loss": 0.6588394045829773, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00036300826468504965, + "rewards/margins": 0.1217101663351059, + "rewards/rejected": -0.12207317352294922, + "step": 3797 + }, + { + "epoch": 2.6265560165975104, + "grad_norm": 6.9938554763793945, + "learning_rate": 4.0963577685569385e-05, + "log_odds_chosen": 7.7642130851745605, + "log_odds_ratio": -0.006069661118090153, + "logits/chosen": -0.47169187664985657, + "logits/rejected": -0.5067437291145325, + "logps/chosen": -0.08318636566400528, + "logps/rejected": -2.1593117713928223, + "loss": 3.3399, + "nll_loss": 0.8343735933303833, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008318635635077953, + "rewards/margins": 0.207612544298172, + "rewards/rejected": -0.21593117713928223, + "step": 3798 + }, + { + "epoch": 2.627247579529737, + "grad_norm": 8.0711030960083, + "learning_rate": 4.0959735669279244e-05, + "log_odds_chosen": 8.419703483581543, + "log_odds_ratio": -0.0006184577941894531, + "logits/chosen": -0.749565601348877, + "logits/rejected": -0.7853137254714966, + "logps/chosen": -0.0029448317363858223, + "logps/rejected": -1.5154144763946533, + "loss": 2.1765, + "nll_loss": 0.5440510511398315, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002944831794593483, + "rewards/margins": 0.15124696493148804, + "rewards/rejected": -0.15154144167900085, + "step": 3799 + }, + { + "epoch": 2.627939142461964, + "grad_norm": 3.90470552444458, + "learning_rate": 4.095589365298909e-05, + "log_odds_chosen": 7.269073963165283, + "log_odds_ratio": -0.0662284791469574, + "logits/chosen": -0.3588363826274872, + "logits/rejected": -0.3765372037887573, + "logps/chosen": -0.03212092071771622, + "logps/rejected": -1.3606046438217163, + "loss": 2.0721, + "nll_loss": 0.5114089250564575, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0032120919786393642, + "rewards/margins": 0.13284838199615479, + "rewards/rejected": -0.1360604614019394, + "step": 3800 + }, + { + "epoch": 2.628630705394191, + "grad_norm": 10.0017728805542, + "learning_rate": 4.095205163669894e-05, + "log_odds_chosen": 8.136731147766113, + "log_odds_ratio": -0.018487481400370598, + "logits/chosen": -0.6621404886245728, + "logits/rejected": -0.6417994499206543, + "logps/chosen": -0.009381298907101154, + "logps/rejected": -1.1562975645065308, + "loss": 2.6296, + "nll_loss": 0.6555531024932861, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009381298441439867, + "rewards/margins": 0.11469162255525589, + "rewards/rejected": -0.11562975496053696, + "step": 3801 + }, + { + "epoch": 2.6293222683264177, + "grad_norm": 8.75160026550293, + "learning_rate": 4.0948209620408794e-05, + "log_odds_chosen": 8.332706451416016, + "log_odds_ratio": -0.003808370791375637, + "logits/chosen": -0.5499763488769531, + "logits/rejected": -0.6190809607505798, + "logps/chosen": -0.015007829293608665, + "logps/rejected": -1.885420322418213, + "loss": 2.1238, + "nll_loss": 0.530563235282898, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015007827896624804, + "rewards/margins": 0.1870412528514862, + "rewards/rejected": -0.18854205310344696, + "step": 3802 + }, + { + "epoch": 2.6300138312586445, + "grad_norm": 4.922801494598389, + "learning_rate": 4.094436760411865e-05, + "log_odds_chosen": 4.8717803955078125, + "log_odds_ratio": -0.04627423360943794, + "logits/chosen": -0.4469316005706787, + "logits/rejected": -0.44363802671432495, + "logps/chosen": -0.04971982538700104, + "logps/rejected": -1.0474414825439453, + "loss": 2.4512, + "nll_loss": 0.6081790328025818, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004971982911229134, + "rewards/margins": 0.09977217018604279, + "rewards/rejected": -0.10474415123462677, + "step": 3803 + }, + { + "epoch": 2.6307053941908713, + "grad_norm": 7.153709411621094, + "learning_rate": 4.094052558782849e-05, + "log_odds_chosen": 6.108127117156982, + "log_odds_ratio": -0.015503794886171818, + "logits/chosen": -0.4905102252960205, + "logits/rejected": -0.48964786529541016, + "logps/chosen": -0.1222764328122139, + "logps/rejected": -1.8249282836914062, + "loss": 1.9354, + "nll_loss": 0.482305109500885, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01222764328122139, + "rewards/margins": 0.17026519775390625, + "rewards/rejected": -0.18249283730983734, + "step": 3804 + }, + { + "epoch": 2.631396957123098, + "grad_norm": 8.944799423217773, + "learning_rate": 4.0936683571538345e-05, + "log_odds_chosen": 6.0882673263549805, + "log_odds_ratio": -0.16901487112045288, + "logits/chosen": -0.391895592212677, + "logits/rejected": -0.4109463095664978, + "logps/chosen": -0.04145354777574539, + "logps/rejected": -1.382396936416626, + "loss": 2.3299, + "nll_loss": 0.5655611157417297, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004145354963839054, + "rewards/margins": 0.13409434258937836, + "rewards/rejected": -0.13823971152305603, + "step": 3805 + }, + { + "epoch": 2.632088520055325, + "grad_norm": 13.911857604980469, + "learning_rate": 4.09328415552482e-05, + "log_odds_chosen": 6.242876052856445, + "log_odds_ratio": -0.21624933183193207, + "logits/chosen": -0.6284334063529968, + "logits/rejected": -0.6858136653900146, + "logps/chosen": -0.0389171838760376, + "logps/rejected": -0.7917266488075256, + "loss": 2.2325, + "nll_loss": 0.5364995002746582, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003891718341037631, + "rewards/margins": 0.07528094947338104, + "rewards/rejected": -0.07917267084121704, + "step": 3806 + }, + { + "epoch": 2.632780082987552, + "grad_norm": 5.895772457122803, + "learning_rate": 4.092899953895804e-05, + "log_odds_chosen": 7.286813735961914, + "log_odds_ratio": -0.037271179258823395, + "logits/chosen": -0.357649028301239, + "logits/rejected": -0.4334043562412262, + "logps/chosen": -0.02683999016880989, + "logps/rejected": -1.6060643196105957, + "loss": 2.1091, + "nll_loss": 0.5235556960105896, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002683999016880989, + "rewards/margins": 0.1579224318265915, + "rewards/rejected": -0.16060644388198853, + "step": 3807 + }, + { + "epoch": 2.6334716459197787, + "grad_norm": 6.329620838165283, + "learning_rate": 4.09251575226679e-05, + "log_odds_chosen": 7.340235710144043, + "log_odds_ratio": -0.040890198200941086, + "logits/chosen": -0.2552340030670166, + "logits/rejected": -0.29688987135887146, + "logps/chosen": -0.013605457730591297, + "logps/rejected": -1.0799440145492554, + "loss": 1.9992, + "nll_loss": 0.4957117438316345, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013605458661913872, + "rewards/margins": 0.10663385689258575, + "rewards/rejected": -0.10799440741539001, + "step": 3808 + }, + { + "epoch": 2.6341632088520055, + "grad_norm": 6.554021835327148, + "learning_rate": 4.092131550637775e-05, + "log_odds_chosen": 8.336524963378906, + "log_odds_ratio": -0.0008704437641426921, + "logits/chosen": -0.2876972556114197, + "logits/rejected": -0.33872395753860474, + "logps/chosen": -0.010608218610286713, + "logps/rejected": -1.7114243507385254, + "loss": 2.0471, + "nll_loss": 0.5116779804229736, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010608219308778644, + "rewards/margins": 0.17008161544799805, + "rewards/rejected": -0.17114242911338806, + "step": 3809 + }, + { + "epoch": 2.6348547717842323, + "grad_norm": 8.618371963500977, + "learning_rate": 4.09174734900876e-05, + "log_odds_chosen": 7.948423862457275, + "log_odds_ratio": -0.003505430184304714, + "logits/chosen": -0.5390564799308777, + "logits/rejected": -0.5939716100692749, + "logps/chosen": -0.0177299827337265, + "logps/rejected": -1.9059690237045288, + "loss": 2.2947, + "nll_loss": 0.5733277201652527, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017729983665049076, + "rewards/margins": 0.1888239085674286, + "rewards/rejected": -0.19059689342975616, + "step": 3810 + }, + { + "epoch": 2.635546334716459, + "grad_norm": 9.77852725982666, + "learning_rate": 4.091363147379745e-05, + "log_odds_chosen": 9.04955005645752, + "log_odds_ratio": -0.007421260699629784, + "logits/chosen": -0.48122864961624146, + "logits/rejected": -0.5198429226875305, + "logps/chosen": -0.00442839739844203, + "logps/rejected": -1.4086993932724, + "loss": 1.6727, + "nll_loss": 0.4174409508705139, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004428397514857352, + "rewards/margins": 0.1404271125793457, + "rewards/rejected": -0.14086996018886566, + "step": 3811 + }, + { + "epoch": 2.636237897648686, + "grad_norm": 12.669425964355469, + "learning_rate": 4.0909789457507305e-05, + "log_odds_chosen": 8.74130630493164, + "log_odds_ratio": -0.00046664898400194943, + "logits/chosen": -0.7802400588989258, + "logits/rejected": -0.7810622453689575, + "logps/chosen": -0.001165109919384122, + "logps/rejected": -1.3760836124420166, + "loss": 2.9398, + "nll_loss": 0.7349057197570801, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001165109861176461, + "rewards/margins": 0.1374918520450592, + "rewards/rejected": -0.1376083493232727, + "step": 3812 + }, + { + "epoch": 2.636929460580913, + "grad_norm": 6.967644214630127, + "learning_rate": 4.090594744121715e-05, + "log_odds_chosen": 9.533140182495117, + "log_odds_ratio": -0.00017059470701497048, + "logits/chosen": -0.41741979122161865, + "logits/rejected": -0.5131128430366516, + "logps/chosen": -0.0004757500428240746, + "logps/rejected": -1.5622491836547852, + "loss": 1.7849, + "nll_loss": 0.4462040066719055, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7575005737598985e-05, + "rewards/margins": 0.1561773270368576, + "rewards/rejected": -0.15622490644454956, + "step": 3813 + }, + { + "epoch": 2.6376210235131397, + "grad_norm": 5.830072402954102, + "learning_rate": 4.0902105424927e-05, + "log_odds_chosen": 7.749738693237305, + "log_odds_ratio": -0.003948946483433247, + "logits/chosen": -0.3899442255496979, + "logits/rejected": -0.42192280292510986, + "logps/chosen": -0.0035098264925181866, + "logps/rejected": -1.289759635925293, + "loss": 1.5506, + "nll_loss": 0.3872567117214203, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00035098264925181866, + "rewards/margins": 0.12862497568130493, + "rewards/rejected": -0.12897595763206482, + "step": 3814 + }, + { + "epoch": 2.6383125864453665, + "grad_norm": 7.257383823394775, + "learning_rate": 4.0898263408636856e-05, + "log_odds_chosen": 6.416209697723389, + "log_odds_ratio": -0.1501171886920929, + "logits/chosen": -0.5847111344337463, + "logits/rejected": -0.5577553510665894, + "logps/chosen": -0.03365294635295868, + "logps/rejected": -1.0939298868179321, + "loss": 1.979, + "nll_loss": 0.47974252700805664, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003365294774994254, + "rewards/margins": 0.10602769255638123, + "rewards/rejected": -0.10939298570156097, + "step": 3815 + }, + { + "epoch": 2.6390041493775933, + "grad_norm": 8.99246597290039, + "learning_rate": 4.08944213923467e-05, + "log_odds_chosen": 6.8492841720581055, + "log_odds_ratio": -0.004586468450725079, + "logits/chosen": -0.41203147172927856, + "logits/rejected": -0.46544843912124634, + "logps/chosen": -0.03983590006828308, + "logps/rejected": -1.4733881950378418, + "loss": 2.9193, + "nll_loss": 0.7293692827224731, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003983589820563793, + "rewards/margins": 0.14335523545742035, + "rewards/rejected": -0.1473388373851776, + "step": 3816 + }, + { + "epoch": 2.63969571230982, + "grad_norm": 8.560349464416504, + "learning_rate": 4.089057937605656e-05, + "log_odds_chosen": 7.125585556030273, + "log_odds_ratio": -0.010650004260241985, + "logits/chosen": -0.5648536682128906, + "logits/rejected": -0.6092085242271423, + "logps/chosen": -0.009023960679769516, + "logps/rejected": -1.2239824533462524, + "loss": 2.6873, + "nll_loss": 0.6707549095153809, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009023960446938872, + "rewards/margins": 0.12149585783481598, + "rewards/rejected": -0.122398242354393, + "step": 3817 + }, + { + "epoch": 2.640387275242047, + "grad_norm": 12.124699592590332, + "learning_rate": 4.0886737359766406e-05, + "log_odds_chosen": 7.615258693695068, + "log_odds_ratio": -0.0019200460519641638, + "logits/chosen": -0.48766252398490906, + "logits/rejected": -0.5352762937545776, + "logps/chosen": -0.04233637452125549, + "logps/rejected": -2.3085317611694336, + "loss": 2.8713, + "nll_loss": 0.717635989189148, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004233637358993292, + "rewards/margins": 0.22661955654621124, + "rewards/rejected": -0.23085319995880127, + "step": 3818 + }, + { + "epoch": 2.641078838174274, + "grad_norm": 9.745429039001465, + "learning_rate": 4.088289534347626e-05, + "log_odds_chosen": 7.206563949584961, + "log_odds_ratio": -0.10519903898239136, + "logits/chosen": -0.33590954542160034, + "logits/rejected": -0.37854519486427307, + "logps/chosen": -0.19065965712070465, + "logps/rejected": -1.7531626224517822, + "loss": 1.8261, + "nll_loss": 0.4459928572177887, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.019065964967012405, + "rewards/margins": 0.15625028312206268, + "rewards/rejected": -0.1753162443637848, + "step": 3819 + }, + { + "epoch": 2.6417704011065006, + "grad_norm": 10.741318702697754, + "learning_rate": 4.0879053327186104e-05, + "log_odds_chosen": 8.253884315490723, + "log_odds_ratio": -0.050954267382621765, + "logits/chosen": -0.6642457842826843, + "logits/rejected": -0.6910403966903687, + "logps/chosen": -0.06695730984210968, + "logps/rejected": -1.5085256099700928, + "loss": 3.24, + "nll_loss": 0.804897665977478, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0066957310773432255, + "rewards/margins": 0.14415684342384338, + "rewards/rejected": -0.15085257589817047, + "step": 3820 + }, + { + "epoch": 2.6424619640387275, + "grad_norm": 11.081913948059082, + "learning_rate": 4.0875211310895964e-05, + "log_odds_chosen": 6.918118476867676, + "log_odds_ratio": -0.24356284737586975, + "logits/chosen": -0.5838861465454102, + "logits/rejected": -0.6377038955688477, + "logps/chosen": -0.050251834094524384, + "logps/rejected": -1.658482313156128, + "loss": 2.5045, + "nll_loss": 0.601761519908905, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005025183781981468, + "rewards/margins": 0.1608230471611023, + "rewards/rejected": -0.1658482402563095, + "step": 3821 + }, + { + "epoch": 2.6431535269709543, + "grad_norm": 8.70766830444336, + "learning_rate": 4.087136929460581e-05, + "log_odds_chosen": 7.524777412414551, + "log_odds_ratio": -0.002118032891303301, + "logits/chosen": -0.003473300486803055, + "logits/rejected": -0.07419916987419128, + "logps/chosen": -0.039731841534376144, + "logps/rejected": -2.60288143157959, + "loss": 1.8429, + "nll_loss": 0.4605070948600769, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0039731841534376144, + "rewards/margins": 0.2563149631023407, + "rewards/rejected": -0.26028814911842346, + "step": 3822 + }, + { + "epoch": 2.643845089903181, + "grad_norm": 11.58268928527832, + "learning_rate": 4.086752727831566e-05, + "log_odds_chosen": 5.5339131355285645, + "log_odds_ratio": -1.1879533529281616, + "logits/chosen": -0.6753538250923157, + "logits/rejected": -0.7414652705192566, + "logps/chosen": -0.3033027946949005, + "logps/rejected": -1.4092167615890503, + "loss": 2.8279, + "nll_loss": 0.5881817936897278, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.03033027984201908, + "rewards/margins": 0.11059139668941498, + "rewards/rejected": -0.1409216821193695, + "step": 3823 + }, + { + "epoch": 2.644536652835408, + "grad_norm": 8.396631240844727, + "learning_rate": 4.0863685262025514e-05, + "log_odds_chosen": 6.820697784423828, + "log_odds_ratio": -0.06059110537171364, + "logits/chosen": -0.8757572174072266, + "logits/rejected": -0.9193231463432312, + "logps/chosen": -0.05826441943645477, + "logps/rejected": -1.401708960533142, + "loss": 2.2095, + "nll_loss": 0.546317994594574, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005826442502439022, + "rewards/margins": 0.1343444585800171, + "rewards/rejected": -0.1401709020137787, + "step": 3824 + }, + { + "epoch": 2.645228215767635, + "grad_norm": 8.253293991088867, + "learning_rate": 4.085984324573536e-05, + "log_odds_chosen": 9.214951515197754, + "log_odds_ratio": -0.00024521065643057227, + "logits/chosen": -0.3974234163761139, + "logits/rejected": -0.40845921635627747, + "logps/chosen": -0.00035066824057139456, + "logps/rejected": -1.5347728729248047, + "loss": 1.7804, + "nll_loss": 0.4450783133506775, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5066823329543695e-05, + "rewards/margins": 0.15344221889972687, + "rewards/rejected": -0.153477281332016, + "step": 3825 + }, + { + "epoch": 2.6459197786998616, + "grad_norm": 9.070773124694824, + "learning_rate": 4.085600122944521e-05, + "log_odds_chosen": 8.925060272216797, + "log_odds_ratio": -0.000363756698789075, + "logits/chosen": -0.2657910883426666, + "logits/rejected": -0.286965012550354, + "logps/chosen": -0.0059166718274354935, + "logps/rejected": -1.8982032537460327, + "loss": 1.4624, + "nll_loss": 0.3655526041984558, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005916672525927424, + "rewards/margins": 0.18922865390777588, + "rewards/rejected": -0.1898203343153, + "step": 3826 + }, + { + "epoch": 2.6466113416320884, + "grad_norm": 7.061948299407959, + "learning_rate": 4.0852159213155065e-05, + "log_odds_chosen": 6.75771951675415, + "log_odds_ratio": -0.022473342716693878, + "logits/chosen": -0.501249372959137, + "logits/rejected": -0.47875383496284485, + "logps/chosen": -0.04296121746301651, + "logps/rejected": -2.019442558288574, + "loss": 1.5044, + "nll_loss": 0.3738507330417633, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004296122118830681, + "rewards/margins": 0.19764815270900726, + "rewards/rejected": -0.2019442617893219, + "step": 3827 + }, + { + "epoch": 2.6473029045643153, + "grad_norm": 8.723488807678223, + "learning_rate": 4.084831719686492e-05, + "log_odds_chosen": 7.7598161697387695, + "log_odds_ratio": -0.08781145513057709, + "logits/chosen": -0.5303641557693481, + "logits/rejected": -0.5795314311981201, + "logps/chosen": -0.017855996266007423, + "logps/rejected": -1.4885255098342896, + "loss": 2.612, + "nll_loss": 0.6442256569862366, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0017855996266007423, + "rewards/margins": 0.14706696569919586, + "rewards/rejected": -0.14885255694389343, + "step": 3828 + }, + { + "epoch": 2.647994467496542, + "grad_norm": 6.054506301879883, + "learning_rate": 4.084447518057476e-05, + "log_odds_chosen": 5.622161865234375, + "log_odds_ratio": -0.16529729962348938, + "logits/chosen": -0.09599445015192032, + "logits/rejected": -0.1764889806509018, + "logps/chosen": -0.06396627426147461, + "logps/rejected": -1.3258779048919678, + "loss": 2.5946, + "nll_loss": 0.6321138739585876, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006396627984941006, + "rewards/margins": 0.1261911690235138, + "rewards/rejected": -0.13258779048919678, + "step": 3829 + }, + { + "epoch": 2.648686030428769, + "grad_norm": 9.629890441894531, + "learning_rate": 4.084063316428462e-05, + "log_odds_chosen": 7.408921718597412, + "log_odds_ratio": -0.009323184378445148, + "logits/chosen": -0.6157118082046509, + "logits/rejected": -0.6616868376731873, + "logps/chosen": -0.021634576842188835, + "logps/rejected": -1.3038392066955566, + "loss": 2.1013, + "nll_loss": 0.5243857502937317, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021634576842188835, + "rewards/margins": 0.12822046875953674, + "rewards/rejected": -0.1303839236497879, + "step": 3830 + }, + { + "epoch": 2.6493775933609958, + "grad_norm": 7.59913969039917, + "learning_rate": 4.083679114799447e-05, + "log_odds_chosen": 8.643135070800781, + "log_odds_ratio": -0.002662697108462453, + "logits/chosen": -0.5360656976699829, + "logits/rejected": -0.5914124250411987, + "logps/chosen": -0.026968974620103836, + "logps/rejected": -1.923891305923462, + "loss": 2.1109, + "nll_loss": 0.5274561643600464, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00269689760170877, + "rewards/margins": 0.18969221413135529, + "rewards/rejected": -0.1923891305923462, + "step": 3831 + }, + { + "epoch": 2.6500691562932226, + "grad_norm": 10.914976119995117, + "learning_rate": 4.083294913170432e-05, + "log_odds_chosen": 9.900793075561523, + "log_odds_ratio": -0.00014636406558565795, + "logits/chosen": -0.720471978187561, + "logits/rejected": -0.8152539730072021, + "logps/chosen": -0.00039817302604205906, + "logps/rejected": -1.9039825201034546, + "loss": 2.374, + "nll_loss": 0.5934768319129944, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.981730333180167e-05, + "rewards/margins": 0.19035843014717102, + "rewards/rejected": -0.19039824604988098, + "step": 3832 + }, + { + "epoch": 2.6507607192254494, + "grad_norm": 10.051604270935059, + "learning_rate": 4.082910711541417e-05, + "log_odds_chosen": 7.259871482849121, + "log_odds_ratio": -0.020360752940177917, + "logits/chosen": -0.5566485524177551, + "logits/rejected": -0.6278355717658997, + "logps/chosen": -0.04195665195584297, + "logps/rejected": -1.222951054573059, + "loss": 2.6429, + "nll_loss": 0.6586805582046509, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004195665009319782, + "rewards/margins": 0.11809943616390228, + "rewards/rejected": -0.12229510396718979, + "step": 3833 + }, + { + "epoch": 2.6514522821576763, + "grad_norm": 22.69855499267578, + "learning_rate": 4.082526509912402e-05, + "log_odds_chosen": 7.608578205108643, + "log_odds_ratio": -0.2585284411907196, + "logits/chosen": -0.7369226813316345, + "logits/rejected": -0.8471648097038269, + "logps/chosen": -0.047729022800922394, + "logps/rejected": -1.2069224119186401, + "loss": 2.6804, + "nll_loss": 0.6442494988441467, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004772902466356754, + "rewards/margins": 0.1159193366765976, + "rewards/rejected": -0.12069223821163177, + "step": 3834 + }, + { + "epoch": 2.652143845089903, + "grad_norm": 16.364238739013672, + "learning_rate": 4.082142308283387e-05, + "log_odds_chosen": 7.655045032501221, + "log_odds_ratio": -0.38344433903694153, + "logits/chosen": -0.11542128026485443, + "logits/rejected": -0.21868827939033508, + "logps/chosen": -0.010792739689350128, + "logps/rejected": -2.05288028717041, + "loss": 2.0762, + "nll_loss": 0.4807088077068329, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0010792739922180772, + "rewards/margins": 0.20420874655246735, + "rewards/rejected": -0.20528802275657654, + "step": 3835 + }, + { + "epoch": 2.65283540802213, + "grad_norm": 12.420515060424805, + "learning_rate": 4.081758106654372e-05, + "log_odds_chosen": 7.426298141479492, + "log_odds_ratio": -0.22978830337524414, + "logits/chosen": -0.37689489126205444, + "logits/rejected": -0.39640137553215027, + "logps/chosen": -0.07515082508325577, + "logps/rejected": -1.773220419883728, + "loss": 2.6472, + "nll_loss": 0.6388266086578369, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007515083998441696, + "rewards/margins": 0.16980695724487305, + "rewards/rejected": -0.17732205986976624, + "step": 3836 + }, + { + "epoch": 2.6535269709543567, + "grad_norm": 6.804900169372559, + "learning_rate": 4.0813739050253576e-05, + "log_odds_chosen": 7.390723705291748, + "log_odds_ratio": -0.01640833169221878, + "logits/chosen": -0.37023866176605225, + "logits/rejected": -0.41221803426742554, + "logps/chosen": -0.04346206784248352, + "logps/rejected": -1.1033592224121094, + "loss": 1.6437, + "nll_loss": 0.4092819094657898, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0043462058529257774, + "rewards/margins": 0.1059897169470787, + "rewards/rejected": -0.11033592373132706, + "step": 3837 + }, + { + "epoch": 2.6542185338865836, + "grad_norm": 6.01309871673584, + "learning_rate": 4.080989703396342e-05, + "log_odds_chosen": 6.006360054016113, + "log_odds_ratio": -0.042434729635715485, + "logits/chosen": -0.6176404356956482, + "logits/rejected": -0.6368340849876404, + "logps/chosen": -0.025102484971284866, + "logps/rejected": -1.3419160842895508, + "loss": 2.0207, + "nll_loss": 0.5009331107139587, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0025102486833930016, + "rewards/margins": 0.13168136775493622, + "rewards/rejected": -0.1341916024684906, + "step": 3838 + }, + { + "epoch": 2.6549100968188104, + "grad_norm": 10.797191619873047, + "learning_rate": 4.080605501767328e-05, + "log_odds_chosen": 7.917986869812012, + "log_odds_ratio": -0.001113984500989318, + "logits/chosen": -0.5247561931610107, + "logits/rejected": -0.5820844173431396, + "logps/chosen": -0.01609625667333603, + "logps/rejected": -1.6284340620040894, + "loss": 2.8802, + "nll_loss": 0.7199360132217407, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016096257604658604, + "rewards/margins": 0.16123376786708832, + "rewards/rejected": -0.16284340620040894, + "step": 3839 + }, + { + "epoch": 2.6556016597510372, + "grad_norm": 7.469875812530518, + "learning_rate": 4.0802213001383126e-05, + "log_odds_chosen": 6.664145469665527, + "log_odds_ratio": -0.3955520987510681, + "logits/chosen": -0.5398687124252319, + "logits/rejected": -0.5271255970001221, + "logps/chosen": -0.07110904902219772, + "logps/rejected": -1.0651947259902954, + "loss": 2.023, + "nll_loss": 0.4662015438079834, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007110904902219772, + "rewards/margins": 0.09940856695175171, + "rewards/rejected": -0.10651947557926178, + "step": 3840 + }, + { + "epoch": 2.656293222683264, + "grad_norm": 14.15065860748291, + "learning_rate": 4.079837098509298e-05, + "log_odds_chosen": 6.631532669067383, + "log_odds_ratio": -0.2880239188671112, + "logits/chosen": -0.23957356810569763, + "logits/rejected": -0.41902583837509155, + "logps/chosen": -0.10840874910354614, + "logps/rejected": -1.2948004007339478, + "loss": 2.6744, + "nll_loss": 0.6398000121116638, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010840874165296555, + "rewards/margins": 0.11863917112350464, + "rewards/rejected": -0.1294800490140915, + "step": 3841 + }, + { + "epoch": 2.656984785615491, + "grad_norm": 11.483959197998047, + "learning_rate": 4.079452896880283e-05, + "log_odds_chosen": 7.611615180969238, + "log_odds_ratio": -0.0306989885866642, + "logits/chosen": -0.1826712191104889, + "logits/rejected": -0.19769719243049622, + "logps/chosen": -0.008514686487615108, + "logps/rejected": -1.3686504364013672, + "loss": 2.3888, + "nll_loss": 0.594137966632843, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008514686487615108, + "rewards/margins": 0.13601356744766235, + "rewards/rejected": -0.13686503469944, + "step": 3842 + }, + { + "epoch": 2.6576763485477177, + "grad_norm": 14.097295761108398, + "learning_rate": 4.079068695251268e-05, + "log_odds_chosen": 5.852741718292236, + "log_odds_ratio": -0.24867239594459534, + "logits/chosen": -0.47088390588760376, + "logits/rejected": -0.5414155125617981, + "logps/chosen": -0.12828010320663452, + "logps/rejected": -1.1394619941711426, + "loss": 3.247, + "nll_loss": 0.7868857383728027, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.012828009203076363, + "rewards/margins": 0.10111817717552185, + "rewards/rejected": -0.11394619941711426, + "step": 3843 + }, + { + "epoch": 2.6583679114799446, + "grad_norm": 9.225852966308594, + "learning_rate": 4.078684493622253e-05, + "log_odds_chosen": 7.986328125, + "log_odds_ratio": -0.0008940557599999011, + "logits/chosen": -0.49089115858078003, + "logits/rejected": -0.519273042678833, + "logps/chosen": -0.01081137452274561, + "logps/rejected": -1.614487886428833, + "loss": 2.1834, + "nll_loss": 0.5457635521888733, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001081137452274561, + "rewards/margins": 0.16036765277385712, + "rewards/rejected": -0.16144880652427673, + "step": 3844 + }, + { + "epoch": 2.6590594744121714, + "grad_norm": 6.242781639099121, + "learning_rate": 4.078300291993238e-05, + "log_odds_chosen": 7.671241760253906, + "log_odds_ratio": -0.0021602134220302105, + "logits/chosen": -0.2434796690940857, + "logits/rejected": -0.3377665877342224, + "logps/chosen": -0.0016284455778077245, + "logps/rejected": -1.052886962890625, + "loss": 2.3956, + "nll_loss": 0.5986765027046204, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016284457524307072, + "rewards/margins": 0.10512584447860718, + "rewards/rejected": -0.10528869181871414, + "step": 3845 + }, + { + "epoch": 2.659751037344398, + "grad_norm": 11.046217918395996, + "learning_rate": 4.0779160903642234e-05, + "log_odds_chosen": 7.890171527862549, + "log_odds_ratio": -0.0014221521560102701, + "logits/chosen": -0.5744228363037109, + "logits/rejected": -0.625802218914032, + "logps/chosen": -0.014100349508225918, + "logps/rejected": -1.7941745519638062, + "loss": 2.8484, + "nll_loss": 0.7119573354721069, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014100349508225918, + "rewards/margins": 0.17800742387771606, + "rewards/rejected": -0.1794174611568451, + "step": 3846 + }, + { + "epoch": 2.660442600276625, + "grad_norm": 9.494037628173828, + "learning_rate": 4.077531888735208e-05, + "log_odds_chosen": 6.027531147003174, + "log_odds_ratio": -0.03056892566382885, + "logits/chosen": -0.7430992722511292, + "logits/rejected": -0.7252732515335083, + "logps/chosen": -0.015541885048151016, + "logps/rejected": -1.1196444034576416, + "loss": 2.77, + "nll_loss": 0.6894551515579224, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015541885513812304, + "rewards/margins": 0.11041024327278137, + "rewards/rejected": -0.11196442693471909, + "step": 3847 + }, + { + "epoch": 2.661134163208852, + "grad_norm": 7.332900524139404, + "learning_rate": 4.077147687106194e-05, + "log_odds_chosen": 7.575974464416504, + "log_odds_ratio": -0.00114710524212569, + "logits/chosen": -0.6454428434371948, + "logits/rejected": -0.6171896457672119, + "logps/chosen": -0.013584421947598457, + "logps/rejected": -1.3186981678009033, + "loss": 2.2862, + "nll_loss": 0.571441113948822, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013584423577412963, + "rewards/margins": 0.13051137328147888, + "rewards/rejected": -0.13186980783939362, + "step": 3848 + }, + { + "epoch": 2.6618257261410787, + "grad_norm": 6.807016372680664, + "learning_rate": 4.0767634854771785e-05, + "log_odds_chosen": 6.989053726196289, + "log_odds_ratio": -0.00887046754360199, + "logits/chosen": -0.7269768714904785, + "logits/rejected": -0.800000011920929, + "logps/chosen": -0.015401525422930717, + "logps/rejected": -1.5553052425384521, + "loss": 2.6655, + "nll_loss": 0.6654878854751587, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015401525888592005, + "rewards/margins": 0.15399038791656494, + "rewards/rejected": -0.15553054213523865, + "step": 3849 + }, + { + "epoch": 2.6625172890733055, + "grad_norm": 10.290566444396973, + "learning_rate": 4.076379283848164e-05, + "log_odds_chosen": 4.950558662414551, + "log_odds_ratio": -0.01827535592019558, + "logits/chosen": -0.8898689150810242, + "logits/rejected": -0.9134399890899658, + "logps/chosen": -0.012501144781708717, + "logps/rejected": -0.901066780090332, + "loss": 3.7583, + "nll_loss": 0.9377538561820984, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012501144083216786, + "rewards/margins": 0.08885655552148819, + "rewards/rejected": -0.09010668098926544, + "step": 3850 + }, + { + "epoch": 2.6632088520055324, + "grad_norm": 9.970890045166016, + "learning_rate": 4.075995082219149e-05, + "log_odds_chosen": 6.204949378967285, + "log_odds_ratio": -0.05663444474339485, + "logits/chosen": -0.5326727628707886, + "logits/rejected": -0.6079345941543579, + "logps/chosen": -0.055087704211473465, + "logps/rejected": -1.7836432456970215, + "loss": 2.3693, + "nll_loss": 0.5866531729698181, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005508770234882832, + "rewards/margins": 0.17285554111003876, + "rewards/rejected": -0.1783643215894699, + "step": 3851 + }, + { + "epoch": 2.663900414937759, + "grad_norm": 10.323980331420898, + "learning_rate": 4.0756108805901335e-05, + "log_odds_chosen": 7.44677734375, + "log_odds_ratio": -0.030988784506917, + "logits/chosen": -0.5944615602493286, + "logits/rejected": -0.6548985242843628, + "logps/chosen": -0.010647930204868317, + "logps/rejected": -1.1650986671447754, + "loss": 2.1231, + "nll_loss": 0.5276675820350647, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001064792973920703, + "rewards/margins": 0.11544506996870041, + "rewards/rejected": -0.11650986969470978, + "step": 3852 + }, + { + "epoch": 2.664591977869986, + "grad_norm": 6.948586940765381, + "learning_rate": 4.075226678961119e-05, + "log_odds_chosen": 8.399258613586426, + "log_odds_ratio": -0.007890290580689907, + "logits/chosen": -0.6912119388580322, + "logits/rejected": -0.798456609249115, + "logps/chosen": -0.0031807105988264084, + "logps/rejected": -1.0791079998016357, + "loss": 1.4553, + "nll_loss": 0.36302649974823, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00031807104824110866, + "rewards/margins": 0.10759273171424866, + "rewards/rejected": -0.10791080445051193, + "step": 3853 + }, + { + "epoch": 2.665283540802213, + "grad_norm": 5.564873695373535, + "learning_rate": 4.074842477332104e-05, + "log_odds_chosen": 6.925118446350098, + "log_odds_ratio": -0.04115324467420578, + "logits/chosen": -0.24638259410858154, + "logits/rejected": -0.32120299339294434, + "logps/chosen": -0.036915652453899384, + "logps/rejected": -1.4263198375701904, + "loss": 2.0515, + "nll_loss": 0.5087476968765259, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0036915652453899384, + "rewards/margins": 0.13894042372703552, + "rewards/rejected": -0.14263199269771576, + "step": 3854 + }, + { + "epoch": 2.6659751037344397, + "grad_norm": 7.070037364959717, + "learning_rate": 4.074458275703089e-05, + "log_odds_chosen": 6.6316328048706055, + "log_odds_ratio": -0.04871571436524391, + "logits/chosen": -0.3488408625125885, + "logits/rejected": -0.46453577280044556, + "logps/chosen": -0.035265352576971054, + "logps/rejected": -1.430934190750122, + "loss": 2.7185, + "nll_loss": 0.6747473478317261, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003526535350829363, + "rewards/margins": 0.13956689834594727, + "rewards/rejected": -0.14309342205524445, + "step": 3855 + }, + { + "epoch": 2.6666666666666665, + "grad_norm": 7.606586456298828, + "learning_rate": 4.074074074074074e-05, + "log_odds_chosen": 8.715312957763672, + "log_odds_ratio": -0.0006025927141308784, + "logits/chosen": -0.5068209767341614, + "logits/rejected": -0.613471508026123, + "logps/chosen": -0.0014307673554867506, + "logps/rejected": -1.4268276691436768, + "loss": 1.8998, + "nll_loss": 0.47490203380584717, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014307672972790897, + "rewards/margins": 0.14253969490528107, + "rewards/rejected": -0.1426827758550644, + "step": 3856 + }, + { + "epoch": 2.6673582295988933, + "grad_norm": 6.091766357421875, + "learning_rate": 4.07368987244506e-05, + "log_odds_chosen": 6.20139217376709, + "log_odds_ratio": -0.07082384079694748, + "logits/chosen": -0.4589351415634155, + "logits/rejected": -0.5272402763366699, + "logps/chosen": -0.03111000917851925, + "logps/rejected": -1.0170793533325195, + "loss": 2.2244, + "nll_loss": 0.549013614654541, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003111000871285796, + "rewards/margins": 0.09859693795442581, + "rewards/rejected": -0.10170793533325195, + "step": 3857 + }, + { + "epoch": 2.66804979253112, + "grad_norm": 14.512560844421387, + "learning_rate": 4.073305670816044e-05, + "log_odds_chosen": 9.04071044921875, + "log_odds_ratio": -0.00032202163129113615, + "logits/chosen": -0.7915189862251282, + "logits/rejected": -0.8212922215461731, + "logps/chosen": -0.002225534524768591, + "logps/rejected": -2.078573703765869, + "loss": 2.7696, + "nll_loss": 0.6923729777336121, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00022255346993915737, + "rewards/margins": 0.2076348066329956, + "rewards/rejected": -0.20785734057426453, + "step": 3858 + }, + { + "epoch": 2.668741355463347, + "grad_norm": 11.682271003723145, + "learning_rate": 4.0729214691870296e-05, + "log_odds_chosen": 8.06418514251709, + "log_odds_ratio": -0.0015061056474223733, + "logits/chosen": -0.5448694229125977, + "logits/rejected": -0.5539915561676025, + "logps/chosen": -0.001430652802810073, + "logps/rejected": -1.1192471981048584, + "loss": 2.0914, + "nll_loss": 0.5227075815200806, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014306529192253947, + "rewards/margins": 0.11178165674209595, + "rewards/rejected": -0.11192472279071808, + "step": 3859 + }, + { + "epoch": 2.669432918395574, + "grad_norm": 13.919990539550781, + "learning_rate": 4.072537267558015e-05, + "log_odds_chosen": 7.997103214263916, + "log_odds_ratio": -0.001455902587622404, + "logits/chosen": -0.8294209241867065, + "logits/rejected": -0.881327748298645, + "logps/chosen": -0.010614650323987007, + "logps/rejected": -1.5739727020263672, + "loss": 2.1879, + "nll_loss": 0.5468218326568604, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010614650091156363, + "rewards/margins": 0.15633580088615417, + "rewards/rejected": -0.15739727020263672, + "step": 3860 + }, + { + "epoch": 2.6701244813278007, + "grad_norm": 10.517012596130371, + "learning_rate": 4.0721530659289994e-05, + "log_odds_chosen": 6.757225513458252, + "log_odds_ratio": -0.0808834433555603, + "logits/chosen": -0.7147971391677856, + "logits/rejected": -0.7620722651481628, + "logps/chosen": -0.004747908562421799, + "logps/rejected": -1.114861249923706, + "loss": 2.979, + "nll_loss": 0.7366524934768677, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004747908387798816, + "rewards/margins": 0.1110113263130188, + "rewards/rejected": -0.11148611456155777, + "step": 3861 + }, + { + "epoch": 2.6708160442600275, + "grad_norm": 9.34632396697998, + "learning_rate": 4.0717688642999846e-05, + "log_odds_chosen": 5.147512912750244, + "log_odds_ratio": -0.12234330177307129, + "logits/chosen": -0.4665258228778839, + "logits/rejected": -0.5213587284088135, + "logps/chosen": -0.023862307891249657, + "logps/rejected": -1.0018904209136963, + "loss": 3.204, + "nll_loss": 0.7887601256370544, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002386230742558837, + "rewards/margins": 0.09780281782150269, + "rewards/rejected": -0.10018904507160187, + "step": 3862 + }, + { + "epoch": 2.6715076071922543, + "grad_norm": 6.544745445251465, + "learning_rate": 4.07138466267097e-05, + "log_odds_chosen": 4.824288368225098, + "log_odds_ratio": -0.04939433932304382, + "logits/chosen": -0.4103182554244995, + "logits/rejected": -0.4253101050853729, + "logps/chosen": -0.042487192898988724, + "logps/rejected": -1.0595020055770874, + "loss": 2.1754, + "nll_loss": 0.5389158129692078, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004248719196766615, + "rewards/margins": 0.10170148313045502, + "rewards/rejected": -0.10595019906759262, + "step": 3863 + }, + { + "epoch": 2.6721991701244816, + "grad_norm": 5.377713680267334, + "learning_rate": 4.071000461041955e-05, + "log_odds_chosen": 6.0147199630737305, + "log_odds_ratio": -0.17093312740325928, + "logits/chosen": -0.4358542859554291, + "logits/rejected": -0.5036448836326599, + "logps/chosen": -0.0451769195497036, + "logps/rejected": -1.2233073711395264, + "loss": 1.7589, + "nll_loss": 0.4226234257221222, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004517692141234875, + "rewards/margins": 0.11781305074691772, + "rewards/rejected": -0.12233074754476547, + "step": 3864 + }, + { + "epoch": 2.6728907330567084, + "grad_norm": 11.191709518432617, + "learning_rate": 4.07061625941294e-05, + "log_odds_chosen": 6.220905303955078, + "log_odds_ratio": -0.1648993343114853, + "logits/chosen": -0.7400772571563721, + "logits/rejected": -0.7881424427032471, + "logps/chosen": -0.0402042493224144, + "logps/rejected": -1.097238540649414, + "loss": 2.8797, + "nll_loss": 0.7034355401992798, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004020425025373697, + "rewards/margins": 0.1057034358382225, + "rewards/rejected": -0.10972385853528976, + "step": 3865 + }, + { + "epoch": 2.6735822959889353, + "grad_norm": 11.402688026428223, + "learning_rate": 4.0702320577839256e-05, + "log_odds_chosen": 7.402166366577148, + "log_odds_ratio": -0.003541384357959032, + "logits/chosen": -0.4315453767776489, + "logits/rejected": -0.48258814215660095, + "logps/chosen": -0.03788773715496063, + "logps/rejected": -2.194901466369629, + "loss": 1.6595, + "nll_loss": 0.4145263433456421, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003788774134591222, + "rewards/margins": 0.2157014012336731, + "rewards/rejected": -0.2194901704788208, + "step": 3866 + }, + { + "epoch": 2.674273858921162, + "grad_norm": 9.743550300598145, + "learning_rate": 4.06984785615491e-05, + "log_odds_chosen": 8.811948776245117, + "log_odds_ratio": -0.0250435471534729, + "logits/chosen": -1.0121331214904785, + "logits/rejected": -1.1049795150756836, + "logps/chosen": -0.006895300932228565, + "logps/rejected": -1.593515396118164, + "loss": 2.7048, + "nll_loss": 0.6736976504325867, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006895301048643887, + "rewards/margins": 0.15866202116012573, + "rewards/rejected": -0.15935155749320984, + "step": 3867 + }, + { + "epoch": 2.674965421853389, + "grad_norm": 14.078466415405273, + "learning_rate": 4.0694636545258954e-05, + "log_odds_chosen": 9.76037883758545, + "log_odds_ratio": -0.00011932184861507267, + "logits/chosen": -0.4967171847820282, + "logits/rejected": -0.585934042930603, + "logps/chosen": -0.0009728466393426061, + "logps/rejected": -2.253514528274536, + "loss": 3.3634, + "nll_loss": 0.8408321738243103, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.728466102387756e-05, + "rewards/margins": 0.22525416314601898, + "rewards/rejected": -0.2253514528274536, + "step": 3868 + }, + { + "epoch": 2.6756569847856158, + "grad_norm": 7.892293930053711, + "learning_rate": 4.0690794528968806e-05, + "log_odds_chosen": 7.16309928894043, + "log_odds_ratio": -0.0015303477412089705, + "logits/chosen": -0.6888867616653442, + "logits/rejected": -0.7380569577217102, + "logps/chosen": -0.005860478151589632, + "logps/rejected": -1.0863014459609985, + "loss": 2.0397, + "nll_loss": 0.5097621083259583, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005860478268004954, + "rewards/margins": 0.1080440878868103, + "rewards/rejected": -0.10863013565540314, + "step": 3869 + }, + { + "epoch": 2.6763485477178426, + "grad_norm": 10.240550994873047, + "learning_rate": 4.068695251267866e-05, + "log_odds_chosen": 7.787205219268799, + "log_odds_ratio": -0.0014418363571166992, + "logits/chosen": -0.3999979794025421, + "logits/rejected": -0.3923502564430237, + "logps/chosen": -0.0008615354308858514, + "logps/rejected": -1.0863168239593506, + "loss": 3.3077, + "nll_loss": 0.826777458190918, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.615355181973428e-05, + "rewards/margins": 0.10854553431272507, + "rewards/rejected": -0.1086316853761673, + "step": 3870 + }, + { + "epoch": 2.6770401106500694, + "grad_norm": 6.977838039398193, + "learning_rate": 4.0683110496388504e-05, + "log_odds_chosen": 6.633417129516602, + "log_odds_ratio": -0.0035154526121914387, + "logits/chosen": -0.522433876991272, + "logits/rejected": -0.5463957190513611, + "logps/chosen": -0.02177012898027897, + "logps/rejected": -1.4686945676803589, + "loss": 2.0591, + "nll_loss": 0.5144321918487549, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002177012851461768, + "rewards/margins": 0.14469242095947266, + "rewards/rejected": -0.1468694508075714, + "step": 3871 + }, + { + "epoch": 2.6777316735822962, + "grad_norm": 8.900273323059082, + "learning_rate": 4.067926848009836e-05, + "log_odds_chosen": 8.341584205627441, + "log_odds_ratio": -0.005001600366085768, + "logits/chosen": -0.33963701128959656, + "logits/rejected": -0.41181546449661255, + "logps/chosen": -0.08042304217815399, + "logps/rejected": -2.277837038040161, + "loss": 1.7396, + "nll_loss": 0.43440160155296326, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008042304776608944, + "rewards/margins": 0.21974140405654907, + "rewards/rejected": -0.2277837097644806, + "step": 3872 + }, + { + "epoch": 2.678423236514523, + "grad_norm": 13.76147174835205, + "learning_rate": 4.067542646380821e-05, + "log_odds_chosen": 7.684887886047363, + "log_odds_ratio": -0.29360559582710266, + "logits/chosen": -0.41378188133239746, + "logits/rejected": -0.47570520639419556, + "logps/chosen": -0.03724703937768936, + "logps/rejected": -1.4443128108978271, + "loss": 2.0423, + "nll_loss": 0.48122406005859375, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0037247042637318373, + "rewards/margins": 0.14070658385753632, + "rewards/rejected": -0.14443129301071167, + "step": 3873 + }, + { + "epoch": 2.67911479944675, + "grad_norm": 8.732939720153809, + "learning_rate": 4.0671584447518055e-05, + "log_odds_chosen": 7.691486835479736, + "log_odds_ratio": -0.015395074151456356, + "logits/chosen": -0.4411604404449463, + "logits/rejected": -0.5540481209754944, + "logps/chosen": -0.02716037817299366, + "logps/rejected": -1.8648601770401, + "loss": 2.5227, + "nll_loss": 0.6291290521621704, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0027160379104316235, + "rewards/margins": 0.18376997113227844, + "rewards/rejected": -0.18648600578308105, + "step": 3874 + }, + { + "epoch": 2.6798063623789767, + "grad_norm": 8.468565940856934, + "learning_rate": 4.0667742431227914e-05, + "log_odds_chosen": 7.416378498077393, + "log_odds_ratio": -0.08451016247272491, + "logits/chosen": -0.8846219182014465, + "logits/rejected": -0.8550897836685181, + "logps/chosen": -0.010380887426435947, + "logps/rejected": -1.1413564682006836, + "loss": 2.8681, + "nll_loss": 0.7085626721382141, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001038088696077466, + "rewards/margins": 0.11309756338596344, + "rewards/rejected": -0.11413565278053284, + "step": 3875 + }, + { + "epoch": 2.6804979253112036, + "grad_norm": 14.807464599609375, + "learning_rate": 4.066390041493776e-05, + "log_odds_chosen": 8.475648880004883, + "log_odds_ratio": -0.06202422454953194, + "logits/chosen": -0.2031635195016861, + "logits/rejected": -0.30704939365386963, + "logps/chosen": -0.018131043761968613, + "logps/rejected": -1.6106603145599365, + "loss": 2.4935, + "nll_loss": 0.6171661615371704, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018131043761968613, + "rewards/margins": 0.15925294160842896, + "rewards/rejected": -0.16106604039669037, + "step": 3876 + }, + { + "epoch": 2.6811894882434304, + "grad_norm": 6.985348224639893, + "learning_rate": 4.066005839864761e-05, + "log_odds_chosen": 5.815200328826904, + "log_odds_ratio": -0.10103943943977356, + "logits/chosen": -0.5342247486114502, + "logits/rejected": -0.49896302819252014, + "logps/chosen": -0.024796854704618454, + "logps/rejected": -0.9605525732040405, + "loss": 2.5107, + "nll_loss": 0.6175825595855713, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024796854704618454, + "rewards/margins": 0.09357556700706482, + "rewards/rejected": -0.09605525434017181, + "step": 3877 + }, + { + "epoch": 2.6818810511756572, + "grad_norm": 6.360101222991943, + "learning_rate": 4.0656216382357465e-05, + "log_odds_chosen": 6.302757740020752, + "log_odds_ratio": -0.01789461448788643, + "logits/chosen": -0.4748285710811615, + "logits/rejected": -0.4775317907333374, + "logps/chosen": -0.02190949022769928, + "logps/rejected": -0.8361377716064453, + "loss": 2.0028, + "nll_loss": 0.49890488386154175, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002190949162468314, + "rewards/margins": 0.0814228355884552, + "rewards/rejected": -0.0836137905716896, + "step": 3878 + }, + { + "epoch": 2.682572614107884, + "grad_norm": 7.889153957366943, + "learning_rate": 4.065237436606732e-05, + "log_odds_chosen": 8.003373146057129, + "log_odds_ratio": -0.006866448558866978, + "logits/chosen": -0.47109514474868774, + "logits/rejected": -0.4927683472633362, + "logps/chosen": -0.024222789332270622, + "logps/rejected": -1.558125376701355, + "loss": 1.7338, + "nll_loss": 0.4327537417411804, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002422278979793191, + "rewards/margins": 0.15339027345180511, + "rewards/rejected": -0.15581254661083221, + "step": 3879 + }, + { + "epoch": 2.683264177040111, + "grad_norm": 7.911652088165283, + "learning_rate": 4.064853234977716e-05, + "log_odds_chosen": 5.731238842010498, + "log_odds_ratio": -0.2041652798652649, + "logits/chosen": -0.3681705594062805, + "logits/rejected": -0.3524719476699829, + "logps/chosen": -0.038544073700904846, + "logps/rejected": -0.7402932047843933, + "loss": 2.4387, + "nll_loss": 0.5892567038536072, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.003854407463222742, + "rewards/margins": 0.0701749175786972, + "rewards/rejected": -0.07402931898832321, + "step": 3880 + }, + { + "epoch": 2.6839557399723377, + "grad_norm": 7.045600414276123, + "learning_rate": 4.0644690333487015e-05, + "log_odds_chosen": 8.163865089416504, + "log_odds_ratio": -0.0022141621448099613, + "logits/chosen": -0.6396666169166565, + "logits/rejected": -0.6936120986938477, + "logps/chosen": -0.0017530673649162054, + "logps/rejected": -1.4846478700637817, + "loss": 1.7957, + "nll_loss": 0.4487159848213196, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017530674813315272, + "rewards/margins": 0.1482894867658615, + "rewards/rejected": -0.14846479892730713, + "step": 3881 + }, + { + "epoch": 2.6846473029045645, + "grad_norm": 7.05449104309082, + "learning_rate": 4.064084831719687e-05, + "log_odds_chosen": 7.6457719802856445, + "log_odds_ratio": -0.0029674817342311144, + "logits/chosen": -0.5483105182647705, + "logits/rejected": -0.528850793838501, + "logps/chosen": -0.015136461704969406, + "logps/rejected": -1.5551209449768066, + "loss": 1.7261, + "nll_loss": 0.4312302768230438, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015136462170630693, + "rewards/margins": 0.1539984494447708, + "rewards/rejected": -0.15551209449768066, + "step": 3882 + }, + { + "epoch": 2.6853388658367914, + "grad_norm": 9.146442413330078, + "learning_rate": 4.0637006300906713e-05, + "log_odds_chosen": 6.629992961883545, + "log_odds_ratio": -0.14234022796154022, + "logits/chosen": -0.4649689793586731, + "logits/rejected": -0.5079630613327026, + "logps/chosen": -0.028579382225871086, + "logps/rejected": -1.3636877536773682, + "loss": 2.2741, + "nll_loss": 0.5542930960655212, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0028579384088516235, + "rewards/margins": 0.13351084291934967, + "rewards/rejected": -0.1363687813282013, + "step": 3883 + }, + { + "epoch": 2.686030428769018, + "grad_norm": 8.364623069763184, + "learning_rate": 4.063316428461657e-05, + "log_odds_chosen": 7.477568626403809, + "log_odds_ratio": -0.01047502364963293, + "logits/chosen": -0.16539695858955383, + "logits/rejected": -0.2058202177286148, + "logps/chosen": -0.00590811762958765, + "logps/rejected": -1.113823652267456, + "loss": 2.0598, + "nll_loss": 0.5138932466506958, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005908117746002972, + "rewards/margins": 0.11079156398773193, + "rewards/rejected": -0.1113823801279068, + "step": 3884 + }, + { + "epoch": 2.686721991701245, + "grad_norm": 8.873336791992188, + "learning_rate": 4.062932226832642e-05, + "log_odds_chosen": 7.151572227478027, + "log_odds_ratio": -0.010587374679744244, + "logits/chosen": -0.7112501859664917, + "logits/rejected": -0.7304208278656006, + "logps/chosen": -0.008737252093851566, + "logps/rejected": -1.2269251346588135, + "loss": 1.7025, + "nll_loss": 0.4245630204677582, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008737251628190279, + "rewards/margins": 0.12181878834962845, + "rewards/rejected": -0.12269251048564911, + "step": 3885 + }, + { + "epoch": 2.687413554633472, + "grad_norm": 13.44513988494873, + "learning_rate": 4.062548025203627e-05, + "log_odds_chosen": 8.571016311645508, + "log_odds_ratio": -0.0013945872196927667, + "logits/chosen": -0.5865503549575806, + "logits/rejected": -0.5623582601547241, + "logps/chosen": -0.0017820007633417845, + "logps/rejected": -1.2935950756072998, + "loss": 2.1752, + "nll_loss": 0.543658435344696, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017820007633417845, + "rewards/margins": 0.12918131053447723, + "rewards/rejected": -0.12935952842235565, + "step": 3886 + }, + { + "epoch": 2.6881051175656987, + "grad_norm": 5.672190189361572, + "learning_rate": 4.062163823574612e-05, + "log_odds_chosen": 7.870635986328125, + "log_odds_ratio": -0.006466720253229141, + "logits/chosen": -0.4872884750366211, + "logits/rejected": -0.5083516240119934, + "logps/chosen": -0.01766045391559601, + "logps/rejected": -1.3608150482177734, + "loss": 2.0634, + "nll_loss": 0.5152048468589783, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001766045344993472, + "rewards/margins": 0.13431546092033386, + "rewards/rejected": -0.1360815167427063, + "step": 3887 + }, + { + "epoch": 2.6887966804979255, + "grad_norm": 9.264443397521973, + "learning_rate": 4.0617796219455976e-05, + "log_odds_chosen": 7.730437755584717, + "log_odds_ratio": -0.0016871271654963493, + "logits/chosen": -0.5565177798271179, + "logits/rejected": -0.6040325164794922, + "logps/chosen": -0.0025901379995048046, + "logps/rejected": -1.3575786352157593, + "loss": 2.4853, + "nll_loss": 0.6211501955986023, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002590137883089483, + "rewards/margins": 0.13549885153770447, + "rewards/rejected": -0.13575786352157593, + "step": 3888 + }, + { + "epoch": 2.6894882434301524, + "grad_norm": 7.969750881195068, + "learning_rate": 4.061395420316582e-05, + "log_odds_chosen": 9.363624572753906, + "log_odds_ratio": -0.0005542628350667655, + "logits/chosen": -0.7252931594848633, + "logits/rejected": -0.8185261487960815, + "logps/chosen": -0.016552282497286797, + "logps/rejected": -2.182529926300049, + "loss": 2.064, + "nll_loss": 0.5159405469894409, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016552285524085164, + "rewards/margins": 0.2165977656841278, + "rewards/rejected": -0.2182529866695404, + "step": 3889 + }, + { + "epoch": 2.690179806362379, + "grad_norm": 8.628170013427734, + "learning_rate": 4.0610112186875674e-05, + "log_odds_chosen": 8.0739164352417, + "log_odds_ratio": -0.002225684467703104, + "logits/chosen": -0.747127115726471, + "logits/rejected": -0.8388808965682983, + "logps/chosen": -0.003269063076004386, + "logps/rejected": -1.6631715297698975, + "loss": 2.0785, + "nll_loss": 0.5193923115730286, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00032690633088350296, + "rewards/margins": 0.16599026322364807, + "rewards/rejected": -0.1663171648979187, + "step": 3890 + }, + { + "epoch": 2.690871369294606, + "grad_norm": 11.820703506469727, + "learning_rate": 4.0606270170585526e-05, + "log_odds_chosen": 7.5276336669921875, + "log_odds_ratio": -0.0058481087908148766, + "logits/chosen": -0.43524500727653503, + "logits/rejected": -0.48104870319366455, + "logps/chosen": -0.009421736001968384, + "logps/rejected": -1.2175965309143066, + "loss": 2.5074, + "nll_loss": 0.6262714266777039, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000942173704970628, + "rewards/margins": 0.12081749737262726, + "rewards/rejected": -0.12175966054201126, + "step": 3891 + }, + { + "epoch": 2.691562932226833, + "grad_norm": 10.340312957763672, + "learning_rate": 4.060242815429537e-05, + "log_odds_chosen": 7.890967845916748, + "log_odds_ratio": -0.00823564175516367, + "logits/chosen": -0.7037793397903442, + "logits/rejected": -0.7014849781990051, + "logps/chosen": -0.011880909092724323, + "logps/rejected": -1.146535873413086, + "loss": 2.7982, + "nll_loss": 0.6987218260765076, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011880907695740461, + "rewards/margins": 0.11346549540758133, + "rewards/rejected": -0.11465359479188919, + "step": 3892 + }, + { + "epoch": 2.6922544951590597, + "grad_norm": 10.122757911682129, + "learning_rate": 4.059858613800523e-05, + "log_odds_chosen": 5.4945068359375, + "log_odds_ratio": -0.09575443714857101, + "logits/chosen": -0.725883960723877, + "logits/rejected": -0.7544358968734741, + "logps/chosen": -0.022189276292920113, + "logps/rejected": -1.1797430515289307, + "loss": 2.3616, + "nll_loss": 0.5808249711990356, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0022189277224242687, + "rewards/margins": 0.11575537174940109, + "rewards/rejected": -0.11797429621219635, + "step": 3893 + }, + { + "epoch": 2.6929460580912865, + "grad_norm": 5.698261737823486, + "learning_rate": 4.059474412171508e-05, + "log_odds_chosen": 9.365954399108887, + "log_odds_ratio": -0.010592492297291756, + "logits/chosen": -0.4197603464126587, + "logits/rejected": -0.38374871015548706, + "logps/chosen": -0.0014273038832470775, + "logps/rejected": -1.4466173648834229, + "loss": 1.3858, + "nll_loss": 0.3453885614871979, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001427303795935586, + "rewards/margins": 0.14451901614665985, + "rewards/rejected": -0.14466175436973572, + "step": 3894 + }, + { + "epoch": 2.6936376210235133, + "grad_norm": 15.388368606567383, + "learning_rate": 4.059090210542493e-05, + "log_odds_chosen": 7.5923991203308105, + "log_odds_ratio": -0.1653386652469635, + "logits/chosen": -0.33131247758865356, + "logits/rejected": -0.41436147689819336, + "logps/chosen": -0.0448850654065609, + "logps/rejected": -1.642098307609558, + "loss": 2.4806, + "nll_loss": 0.6036100387573242, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004488506354391575, + "rewards/margins": 0.15972132980823517, + "rewards/rejected": -0.16420982778072357, + "step": 3895 + }, + { + "epoch": 2.69432918395574, + "grad_norm": 9.265947341918945, + "learning_rate": 4.058706008913478e-05, + "log_odds_chosen": 7.897387981414795, + "log_odds_ratio": -0.02778870053589344, + "logits/chosen": -0.24363026022911072, + "logits/rejected": -0.2546027898788452, + "logps/chosen": -0.015297142788767815, + "logps/rejected": -1.6907174587249756, + "loss": 2.0793, + "nll_loss": 0.5170523524284363, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015297143254429102, + "rewards/margins": 0.16754204034805298, + "rewards/rejected": -0.169071763753891, + "step": 3896 + }, + { + "epoch": 2.695020746887967, + "grad_norm": 9.980796813964844, + "learning_rate": 4.0583218072844634e-05, + "log_odds_chosen": 9.521817207336426, + "log_odds_ratio": -0.0002317545295227319, + "logits/chosen": -0.4232358932495117, + "logits/rejected": -0.582566499710083, + "logps/chosen": -0.0009553482523187995, + "logps/rejected": -1.9109771251678467, + "loss": 2.051, + "nll_loss": 0.5127320289611816, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.553483687341213e-05, + "rewards/margins": 0.19100217521190643, + "rewards/rejected": -0.1910977065563202, + "step": 3897 + }, + { + "epoch": 2.695712309820194, + "grad_norm": 4.509500503540039, + "learning_rate": 4.057937605655448e-05, + "log_odds_chosen": 8.891090393066406, + "log_odds_ratio": -0.031227584928274155, + "logits/chosen": -0.4662817120552063, + "logits/rejected": -0.5478078126907349, + "logps/chosen": -0.011539162136614323, + "logps/rejected": -1.3029841184616089, + "loss": 1.6549, + "nll_loss": 0.41060373187065125, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011539161205291748, + "rewards/margins": 0.12914448976516724, + "rewards/rejected": -0.1302984207868576, + "step": 3898 + }, + { + "epoch": 2.6964038727524207, + "grad_norm": 9.72545337677002, + "learning_rate": 4.057553404026433e-05, + "log_odds_chosen": 7.56840705871582, + "log_odds_ratio": -0.00843381229788065, + "logits/chosen": -0.6165993213653564, + "logits/rejected": -0.6628961563110352, + "logps/chosen": -0.008199600502848625, + "logps/rejected": -1.769745111465454, + "loss": 2.7607, + "nll_loss": 0.6893199682235718, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008199600852094591, + "rewards/margins": 0.17615455389022827, + "rewards/rejected": -0.17697452008724213, + "step": 3899 + }, + { + "epoch": 2.6970954356846475, + "grad_norm": 8.728021621704102, + "learning_rate": 4.0571692023974185e-05, + "log_odds_chosen": 6.957355976104736, + "log_odds_ratio": -0.0020753592252731323, + "logits/chosen": -0.6975337266921997, + "logits/rejected": -0.7607104778289795, + "logps/chosen": -0.024932991713285446, + "logps/rejected": -1.4897279739379883, + "loss": 2.086, + "nll_loss": 0.521297812461853, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002493299311026931, + "rewards/margins": 0.1464795023202896, + "rewards/rejected": -0.1489727944135666, + "step": 3900 + }, + { + "epoch": 2.6977869986168743, + "grad_norm": 10.900203704833984, + "learning_rate": 4.056785000768403e-05, + "log_odds_chosen": 6.62492561340332, + "log_odds_ratio": -0.17824991047382355, + "logits/chosen": -0.7206655740737915, + "logits/rejected": -0.693558931350708, + "logps/chosen": -0.03227349370718002, + "logps/rejected": -1.4377551078796387, + "loss": 2.3092, + "nll_loss": 0.5594759583473206, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0032273493707180023, + "rewards/margins": 0.14054815471172333, + "rewards/rejected": -0.14377550780773163, + "step": 3901 + }, + { + "epoch": 2.698478561549101, + "grad_norm": 9.88683795928955, + "learning_rate": 4.056400799139389e-05, + "log_odds_chosen": 8.03813648223877, + "log_odds_ratio": -0.0048516602255403996, + "logits/chosen": -0.41523438692092896, + "logits/rejected": -0.502999484539032, + "logps/chosen": -0.013218377716839314, + "logps/rejected": -1.833310604095459, + "loss": 1.7961, + "nll_loss": 0.44853001832962036, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013218377716839314, + "rewards/margins": 0.18200922012329102, + "rewards/rejected": -0.18333107233047485, + "step": 3902 + }, + { + "epoch": 2.699170124481328, + "grad_norm": 9.741769790649414, + "learning_rate": 4.0560165975103735e-05, + "log_odds_chosen": 7.460450172424316, + "log_odds_ratio": -0.0015208596596494317, + "logits/chosen": -0.9277177453041077, + "logits/rejected": -0.9233143329620361, + "logps/chosen": -0.002522763330489397, + "logps/rejected": -1.2246127128601074, + "loss": 3.0391, + "nll_loss": 0.7596240043640137, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00025227630976587534, + "rewards/margins": 0.12220901250839233, + "rewards/rejected": -0.12246128171682358, + "step": 3903 + }, + { + "epoch": 2.699861687413555, + "grad_norm": 8.596832275390625, + "learning_rate": 4.055632395881359e-05, + "log_odds_chosen": 8.594837188720703, + "log_odds_ratio": -0.0006484482437372208, + "logits/chosen": -0.6567248702049255, + "logits/rejected": -0.7241695523262024, + "logps/chosen": -0.0012694273609668016, + "logps/rejected": -1.3293309211730957, + "loss": 2.7539, + "nll_loss": 0.6884142160415649, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012694273027591407, + "rewards/margins": 0.13280615210533142, + "rewards/rejected": -0.1329330950975418, + "step": 3904 + }, + { + "epoch": 2.7005532503457816, + "grad_norm": 11.800738334655762, + "learning_rate": 4.055248194252344e-05, + "log_odds_chosen": 7.378905296325684, + "log_odds_ratio": -0.009102431125938892, + "logits/chosen": -0.5646547079086304, + "logits/rejected": -0.5651717185974121, + "logps/chosen": -0.009457389824092388, + "logps/rejected": -1.2582478523254395, + "loss": 2.4284, + "nll_loss": 0.6061837673187256, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009457390988245606, + "rewards/margins": 0.12487904727458954, + "rewards/rejected": -0.12582479417324066, + "step": 3905 + }, + { + "epoch": 2.7012448132780085, + "grad_norm": 10.2549467086792, + "learning_rate": 4.054863992623329e-05, + "log_odds_chosen": 8.616276741027832, + "log_odds_ratio": -0.0003504530468489975, + "logits/chosen": -0.49540776014328003, + "logits/rejected": -0.5995450019836426, + "logps/chosen": -0.0056231142953038216, + "logps/rejected": -1.4994674921035767, + "loss": 1.9879, + "nll_loss": 0.4969436228275299, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005623113829642534, + "rewards/margins": 0.14938445389270782, + "rewards/rejected": -0.14994676411151886, + "step": 3906 + }, + { + "epoch": 2.7019363762102353, + "grad_norm": 13.21338176727295, + "learning_rate": 4.054479790994314e-05, + "log_odds_chosen": 8.875137329101562, + "log_odds_ratio": -0.0010612740879878402, + "logits/chosen": -0.5491659641265869, + "logits/rejected": -0.6577551364898682, + "logps/chosen": -0.014460853300988674, + "logps/rejected": -2.1844234466552734, + "loss": 2.2323, + "nll_loss": 0.5579643845558167, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014460852835327387, + "rewards/margins": 0.21699626743793488, + "rewards/rejected": -0.21844235062599182, + "step": 3907 + }, + { + "epoch": 2.702627939142462, + "grad_norm": 12.623719215393066, + "learning_rate": 4.054095589365299e-05, + "log_odds_chosen": 8.661251068115234, + "log_odds_ratio": -0.0006629570852965117, + "logits/chosen": -0.6035693287849426, + "logits/rejected": -0.632934033870697, + "logps/chosen": -0.0006702827522531152, + "logps/rejected": -1.1736888885498047, + "loss": 2.1473, + "nll_loss": 0.5367664694786072, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.702827522531152e-05, + "rewards/margins": 0.11730185151100159, + "rewards/rejected": -0.11736888438463211, + "step": 3908 + }, + { + "epoch": 2.703319502074689, + "grad_norm": 8.1231050491333, + "learning_rate": 4.053711387736284e-05, + "log_odds_chosen": 8.479167938232422, + "log_odds_ratio": -0.000644492800347507, + "logits/chosen": -0.8432545065879822, + "logits/rejected": -0.8350276947021484, + "logps/chosen": -0.0008359847124665976, + "logps/rejected": -1.1647535562515259, + "loss": 2.1751, + "nll_loss": 0.5437168478965759, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.359846833627671e-05, + "rewards/margins": 0.11639176309108734, + "rewards/rejected": -0.11647535860538483, + "step": 3909 + }, + { + "epoch": 2.704011065006916, + "grad_norm": 12.870767593383789, + "learning_rate": 4.053327186107269e-05, + "log_odds_chosen": 8.805551528930664, + "log_odds_ratio": -0.0004201167030259967, + "logits/chosen": -0.4179730713367462, + "logits/rejected": -0.550365149974823, + "logps/chosen": -0.001364890718832612, + "logps/rejected": -1.2633905410766602, + "loss": 1.5198, + "nll_loss": 0.3799135088920593, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001364890777040273, + "rewards/margins": 0.1262025684118271, + "rewards/rejected": -0.12633906304836273, + "step": 3910 + }, + { + "epoch": 2.7047026279391426, + "grad_norm": 12.722868919372559, + "learning_rate": 4.052942984478255e-05, + "log_odds_chosen": 7.820643901824951, + "log_odds_ratio": -0.007156676612794399, + "logits/chosen": -0.5849895477294922, + "logits/rejected": -0.6761038303375244, + "logps/chosen": -0.006085277535021305, + "logps/rejected": -1.2077401876449585, + "loss": 3.0812, + "nll_loss": 0.7695819735527039, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006085278000682592, + "rewards/margins": 0.12016548216342926, + "rewards/rejected": -0.12077402323484421, + "step": 3911 + }, + { + "epoch": 2.7053941908713695, + "grad_norm": 5.718795299530029, + "learning_rate": 4.0525587828492394e-05, + "log_odds_chosen": 7.587345123291016, + "log_odds_ratio": -0.0025898406747728586, + "logits/chosen": -0.4436808228492737, + "logits/rejected": -0.5065404176712036, + "logps/chosen": -0.017170462757349014, + "logps/rejected": -1.5590879917144775, + "loss": 1.8046, + "nll_loss": 0.4508890211582184, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001717046252451837, + "rewards/margins": 0.1541917622089386, + "rewards/rejected": -0.15590879321098328, + "step": 3912 + }, + { + "epoch": 2.7060857538035963, + "grad_norm": 8.069631576538086, + "learning_rate": 4.0521745812202246e-05, + "log_odds_chosen": 8.478148460388184, + "log_odds_ratio": -0.00025979289785027504, + "logits/chosen": -0.8399416208267212, + "logits/rejected": -0.9217376708984375, + "logps/chosen": -0.0006875486578792334, + "logps/rejected": -1.0694687366485596, + "loss": 3.5088, + "nll_loss": 0.8771728873252869, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.875485996715724e-05, + "rewards/margins": 0.1068781167268753, + "rewards/rejected": -0.10694687068462372, + "step": 3913 + }, + { + "epoch": 2.706777316735823, + "grad_norm": 10.46756362915039, + "learning_rate": 4.05179037959121e-05, + "log_odds_chosen": 8.568737983703613, + "log_odds_ratio": -0.046277035027742386, + "logits/chosen": -1.0236213207244873, + "logits/rejected": -1.0970962047576904, + "logps/chosen": -0.014784927479922771, + "logps/rejected": -1.8517358303070068, + "loss": 2.4229, + "nll_loss": 0.6010984182357788, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001478492864407599, + "rewards/margins": 0.18369510769844055, + "rewards/rejected": -0.18517358601093292, + "step": 3914 + }, + { + "epoch": 2.70746887966805, + "grad_norm": 5.574758529663086, + "learning_rate": 4.051406177962195e-05, + "log_odds_chosen": 9.207605361938477, + "log_odds_ratio": -0.015368283726274967, + "logits/chosen": -0.5122767686843872, + "logits/rejected": -0.5474386811256409, + "logps/chosen": -0.00530365202575922, + "logps/rejected": -1.8214813470840454, + "loss": 1.4181, + "nll_loss": 0.3529995381832123, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005303652142174542, + "rewards/margins": 0.18161778151988983, + "rewards/rejected": -0.18214815855026245, + "step": 3915 + }, + { + "epoch": 2.7081604426002768, + "grad_norm": 7.1005940437316895, + "learning_rate": 4.05102197633318e-05, + "log_odds_chosen": 7.656611919403076, + "log_odds_ratio": -0.05239873006939888, + "logits/chosen": -0.8261842131614685, + "logits/rejected": -0.8624880313873291, + "logps/chosen": -0.052322208881378174, + "logps/rejected": -1.5566890239715576, + "loss": 2.0033, + "nll_loss": 0.495580792427063, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005232220981270075, + "rewards/margins": 0.15043668448925018, + "rewards/rejected": -0.15566891431808472, + "step": 3916 + }, + { + "epoch": 2.7088520055325036, + "grad_norm": 9.42386531829834, + "learning_rate": 4.050637774704165e-05, + "log_odds_chosen": 7.031638145446777, + "log_odds_ratio": -0.0677785649895668, + "logits/chosen": -0.798640251159668, + "logits/rejected": -0.8351523280143738, + "logps/chosen": -0.04338229447603226, + "logps/rejected": -1.8607432842254639, + "loss": 2.8671, + "nll_loss": 0.7099849581718445, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004338229540735483, + "rewards/margins": 0.18173609673976898, + "rewards/rejected": -0.18607433140277863, + "step": 3917 + }, + { + "epoch": 2.7095435684647304, + "grad_norm": 6.6379008293151855, + "learning_rate": 4.05025357307515e-05, + "log_odds_chosen": 7.005310535430908, + "log_odds_ratio": -0.09212741255760193, + "logits/chosen": -0.563160240650177, + "logits/rejected": -0.5820728540420532, + "logps/chosen": -0.02856297791004181, + "logps/rejected": -1.6477155685424805, + "loss": 1.4233, + "nll_loss": 0.346622496843338, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0028562978841364384, + "rewards/margins": 0.16191525757312775, + "rewards/rejected": -0.16477157175540924, + "step": 3918 + }, + { + "epoch": 2.7102351313969573, + "grad_norm": 9.909972190856934, + "learning_rate": 4.049869371446135e-05, + "log_odds_chosen": 7.453892707824707, + "log_odds_ratio": -0.039632659405469894, + "logits/chosen": -0.8415449857711792, + "logits/rejected": -0.8661006689071655, + "logps/chosen": -0.022686485201120377, + "logps/rejected": -1.4245719909667969, + "loss": 2.5347, + "nll_loss": 0.629721999168396, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0022686487063765526, + "rewards/margins": 0.1401885449886322, + "rewards/rejected": -0.14245720207691193, + "step": 3919 + }, + { + "epoch": 2.710926694329184, + "grad_norm": 14.87858772277832, + "learning_rate": 4.0494851698171206e-05, + "log_odds_chosen": 6.864459991455078, + "log_odds_ratio": -0.20086896419525146, + "logits/chosen": -0.7582334280014038, + "logits/rejected": -0.7744381427764893, + "logps/chosen": -0.0741969421505928, + "logps/rejected": -1.1814193725585938, + "loss": 3.4825, + "nll_loss": 0.8505353927612305, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00741969421505928, + "rewards/margins": 0.11072224378585815, + "rewards/rejected": -0.11814194172620773, + "step": 3920 + }, + { + "epoch": 2.711618257261411, + "grad_norm": 7.602975845336914, + "learning_rate": 4.049100968188105e-05, + "log_odds_chosen": 9.197195053100586, + "log_odds_ratio": -0.0003866076876875013, + "logits/chosen": -0.5975632667541504, + "logits/rejected": -0.6337493658065796, + "logps/chosen": -0.003041280433535576, + "logps/rejected": -1.9324337244033813, + "loss": 1.2749, + "nll_loss": 0.31867676973342896, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00030412807245738804, + "rewards/margins": 0.19293925166130066, + "rewards/rejected": -0.1932433843612671, + "step": 3921 + }, + { + "epoch": 2.7123098201936378, + "grad_norm": 9.588038444519043, + "learning_rate": 4.0487167665590905e-05, + "log_odds_chosen": 8.537476539611816, + "log_odds_ratio": -0.22845543920993805, + "logits/chosen": -0.618996262550354, + "logits/rejected": -0.6214995384216309, + "logps/chosen": -0.03453611209988594, + "logps/rejected": -1.6125141382217407, + "loss": 1.9046, + "nll_loss": 0.45329976081848145, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003453611396253109, + "rewards/margins": 0.15779779851436615, + "rewards/rejected": -0.16125141084194183, + "step": 3922 + }, + { + "epoch": 2.7130013831258646, + "grad_norm": 10.34792423248291, + "learning_rate": 4.048332564930076e-05, + "log_odds_chosen": 8.786086082458496, + "log_odds_ratio": -0.035590723156929016, + "logits/chosen": -0.7593101263046265, + "logits/rejected": -0.7925082445144653, + "logps/chosen": -0.012174851261079311, + "logps/rejected": -2.1274166107177734, + "loss": 2.3948, + "nll_loss": 0.595142662525177, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012174851726740599, + "rewards/margins": 0.21152418851852417, + "rewards/rejected": -0.2127416729927063, + "step": 3923 + }, + { + "epoch": 2.7136929460580914, + "grad_norm": 8.583378791809082, + "learning_rate": 4.047948363301061e-05, + "log_odds_chosen": 8.819097518920898, + "log_odds_ratio": -0.004179012030363083, + "logits/chosen": -0.5776971578598022, + "logits/rejected": -0.5667839050292969, + "logps/chosen": -0.003491302952170372, + "logps/rejected": -1.191100835800171, + "loss": 2.0645, + "nll_loss": 0.5156947374343872, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003491303068585694, + "rewards/margins": 0.11876094341278076, + "rewards/rejected": -0.11911007761955261, + "step": 3924 + }, + { + "epoch": 2.7143845089903182, + "grad_norm": 13.72464656829834, + "learning_rate": 4.0475641616720455e-05, + "log_odds_chosen": 7.309238910675049, + "log_odds_ratio": -0.14363299310207367, + "logits/chosen": -0.5639284253120422, + "logits/rejected": -0.6585797667503357, + "logps/chosen": -0.05896759405732155, + "logps/rejected": -1.1728018522262573, + "loss": 2.576, + "nll_loss": 0.6296295523643494, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005896759685128927, + "rewards/margins": 0.11138343065977097, + "rewards/rejected": -0.11728018522262573, + "step": 3925 + }, + { + "epoch": 2.715076071922545, + "grad_norm": 8.657648086547852, + "learning_rate": 4.047179960043031e-05, + "log_odds_chosen": 7.906722545623779, + "log_odds_ratio": -0.03158888593316078, + "logits/chosen": -0.5023703575134277, + "logits/rejected": -0.5678955912590027, + "logps/chosen": -0.03969509154558182, + "logps/rejected": -1.6585890054702759, + "loss": 2.9702, + "nll_loss": 0.7393918633460999, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003969509620219469, + "rewards/margins": 0.16188938915729523, + "rewards/rejected": -0.1658589094877243, + "step": 3926 + }, + { + "epoch": 2.715767634854772, + "grad_norm": 11.101727485656738, + "learning_rate": 4.046795758414016e-05, + "log_odds_chosen": 8.115633010864258, + "log_odds_ratio": -0.01802477240562439, + "logits/chosen": -0.6598634719848633, + "logits/rejected": -0.6970669627189636, + "logps/chosen": -0.029014674946665764, + "logps/rejected": -1.402409553527832, + "loss": 2.0918, + "nll_loss": 0.5211363434791565, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00290146772749722, + "rewards/margins": 0.1373395025730133, + "rewards/rejected": -0.14024096727371216, + "step": 3927 + }, + { + "epoch": 2.7164591977869987, + "grad_norm": 5.158077239990234, + "learning_rate": 4.0464115567850006e-05, + "log_odds_chosen": 8.50020980834961, + "log_odds_ratio": -0.0006952831172384322, + "logits/chosen": -0.3887978792190552, + "logits/rejected": -0.3988155126571655, + "logps/chosen": -0.00446537509560585, + "logps/rejected": -1.4568560123443604, + "loss": 1.2471, + "nll_loss": 0.31169915199279785, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000446537509560585, + "rewards/margins": 0.14523907005786896, + "rewards/rejected": -0.1456855982542038, + "step": 3928 + }, + { + "epoch": 2.7171507607192256, + "grad_norm": 10.49365234375, + "learning_rate": 4.0460273551559865e-05, + "log_odds_chosen": 7.3715620040893555, + "log_odds_ratio": -0.10738270729780197, + "logits/chosen": -0.6264104843139648, + "logits/rejected": -0.6489126086235046, + "logps/chosen": -0.03647862747311592, + "logps/rejected": -1.6284416913986206, + "loss": 2.7679, + "nll_loss": 0.6812424063682556, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0036478627007454634, + "rewards/margins": 0.15919630229473114, + "rewards/rejected": -0.16284418106079102, + "step": 3929 + }, + { + "epoch": 2.7178423236514524, + "grad_norm": 3.889863967895508, + "learning_rate": 4.045643153526971e-05, + "log_odds_chosen": 6.721612930297852, + "log_odds_ratio": -0.034767501056194305, + "logits/chosen": -0.447994589805603, + "logits/rejected": -0.5033765435218811, + "logps/chosen": -0.05682168900966644, + "logps/rejected": -1.3589305877685547, + "loss": 2.0974, + "nll_loss": 0.5208672881126404, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005682168994098902, + "rewards/margins": 0.13021089136600494, + "rewards/rejected": -0.1358930617570877, + "step": 3930 + }, + { + "epoch": 2.7185338865836792, + "grad_norm": 7.74894905090332, + "learning_rate": 4.045258951897956e-05, + "log_odds_chosen": 8.092951774597168, + "log_odds_ratio": -0.07686490565538406, + "logits/chosen": -0.49599015712738037, + "logits/rejected": -0.5249794721603394, + "logps/chosen": -0.01426799688488245, + "logps/rejected": -1.4215922355651855, + "loss": 1.975, + "nll_loss": 0.486062228679657, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001426799688488245, + "rewards/margins": 0.14073242247104645, + "rewards/rejected": -0.14215922355651855, + "step": 3931 + }, + { + "epoch": 2.719225449515906, + "grad_norm": 6.715245246887207, + "learning_rate": 4.0448747502689415e-05, + "log_odds_chosen": 7.388888359069824, + "log_odds_ratio": -0.0016329422360286117, + "logits/chosen": -0.5692066550254822, + "logits/rejected": -0.6565683484077454, + "logps/chosen": -0.003036600537598133, + "logps/rejected": -1.260680079460144, + "loss": 2.2788, + "nll_loss": 0.569549024105072, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003036600537598133, + "rewards/margins": 0.12576434016227722, + "rewards/rejected": -0.12606801092624664, + "step": 3932 + }, + { + "epoch": 2.719917012448133, + "grad_norm": 7.658824920654297, + "learning_rate": 4.044490548639927e-05, + "log_odds_chosen": 9.301992416381836, + "log_odds_ratio": -0.0002013940247707069, + "logits/chosen": -0.6492097973823547, + "logits/rejected": -0.7609802484512329, + "logps/chosen": -0.00040117939352057874, + "logps/rejected": -1.10262131690979, + "loss": 2.1947, + "nll_loss": 0.5486506223678589, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.011793862446211e-05, + "rewards/margins": 0.11022201180458069, + "rewards/rejected": -0.11026212573051453, + "step": 3933 + }, + { + "epoch": 2.7206085753803597, + "grad_norm": 8.80551528930664, + "learning_rate": 4.0441063470109114e-05, + "log_odds_chosen": 7.008855819702148, + "log_odds_ratio": -0.08213210105895996, + "logits/chosen": -0.6002588868141174, + "logits/rejected": -0.6500236988067627, + "logps/chosen": -0.027482986450195312, + "logps/rejected": -1.412733554840088, + "loss": 2.0206, + "nll_loss": 0.49693989753723145, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00274829869158566, + "rewards/margins": 0.13852505385875702, + "rewards/rejected": -0.1412733495235443, + "step": 3934 + }, + { + "epoch": 2.7213001383125865, + "grad_norm": 10.194206237792969, + "learning_rate": 4.0437221453818966e-05, + "log_odds_chosen": 7.7847771644592285, + "log_odds_ratio": -0.07691100239753723, + "logits/chosen": -0.17872712016105652, + "logits/rejected": -0.23985889554023743, + "logps/chosen": -0.01729346066713333, + "logps/rejected": -1.4045839309692383, + "loss": 2.0997, + "nll_loss": 0.5172348022460938, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017293461132794619, + "rewards/margins": 0.1387290358543396, + "rewards/rejected": -0.1404583901166916, + "step": 3935 + }, + { + "epoch": 2.7219917012448134, + "grad_norm": 10.218342781066895, + "learning_rate": 4.043337943752882e-05, + "log_odds_chosen": 8.4993314743042, + "log_odds_ratio": -0.002254007151350379, + "logits/chosen": -0.8403540253639221, + "logits/rejected": -0.8589710593223572, + "logps/chosen": -0.004420984070748091, + "logps/rejected": -1.4414920806884766, + "loss": 2.1637, + "nll_loss": 0.5407010316848755, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004420984478201717, + "rewards/margins": 0.14370711147785187, + "rewards/rejected": -0.14414921402931213, + "step": 3936 + }, + { + "epoch": 2.72268326417704, + "grad_norm": 6.502514362335205, + "learning_rate": 4.0429537421238664e-05, + "log_odds_chosen": 6.624269485473633, + "log_odds_ratio": -0.03526769578456879, + "logits/chosen": -0.283847838640213, + "logits/rejected": -0.3334774076938629, + "logps/chosen": -0.017359206452965736, + "logps/rejected": -1.1060389280319214, + "loss": 1.9313, + "nll_loss": 0.4792954921722412, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017359207849949598, + "rewards/margins": 0.10886797308921814, + "rewards/rejected": -0.11060389131307602, + "step": 3937 + }, + { + "epoch": 2.723374827109267, + "grad_norm": 8.666592597961426, + "learning_rate": 4.042569540494852e-05, + "log_odds_chosen": 8.527840614318848, + "log_odds_ratio": -0.06644105166196823, + "logits/chosen": -0.4190026521682739, + "logits/rejected": -0.44489941000938416, + "logps/chosen": -0.012680643238127232, + "logps/rejected": -1.3973329067230225, + "loss": 2.6437, + "nll_loss": 0.6542931795120239, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00126806425396353, + "rewards/margins": 0.13846522569656372, + "rewards/rejected": -0.13973329961299896, + "step": 3938 + }, + { + "epoch": 2.724066390041494, + "grad_norm": 5.463160991668701, + "learning_rate": 4.042185338865837e-05, + "log_odds_chosen": 7.737649917602539, + "log_odds_ratio": -0.08719510585069656, + "logits/chosen": -0.5029735565185547, + "logits/rejected": -0.5753784775733948, + "logps/chosen": -0.015753207728266716, + "logps/rejected": -1.1266005039215088, + "loss": 1.5785, + "nll_loss": 0.38590627908706665, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015753208426758647, + "rewards/margins": 0.11108473688364029, + "rewards/rejected": -0.11266005784273148, + "step": 3939 + }, + { + "epoch": 2.7247579529737207, + "grad_norm": 12.32331657409668, + "learning_rate": 4.041801137236822e-05, + "log_odds_chosen": 6.248561382293701, + "log_odds_ratio": -0.05965609475970268, + "logits/chosen": -0.560837984085083, + "logits/rejected": -0.6607064008712769, + "logps/chosen": -0.04490054026246071, + "logps/rejected": -1.6605818271636963, + "loss": 2.3404, + "nll_loss": 0.5791374444961548, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0044900537468492985, + "rewards/margins": 0.16156813502311707, + "rewards/rejected": -0.16605818271636963, + "step": 3940 + }, + { + "epoch": 2.7254495159059475, + "grad_norm": 11.115545272827148, + "learning_rate": 4.0414169356078074e-05, + "log_odds_chosen": 7.029140949249268, + "log_odds_ratio": -0.01237096730619669, + "logits/chosen": -0.44518017768859863, + "logits/rejected": -0.4358668327331543, + "logps/chosen": -0.018776437267661095, + "logps/rejected": -1.5602182149887085, + "loss": 2.9906, + "nll_loss": 0.7464084625244141, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018776437500491738, + "rewards/margins": 0.15414418280124664, + "rewards/rejected": -0.1560218185186386, + "step": 3941 + }, + { + "epoch": 2.7261410788381744, + "grad_norm": 14.741984367370605, + "learning_rate": 4.0410327339787926e-05, + "log_odds_chosen": 9.35867691040039, + "log_odds_ratio": -0.0004632086493074894, + "logits/chosen": -0.8224536776542664, + "logits/rejected": -0.8809283971786499, + "logps/chosen": -0.0005872789770364761, + "logps/rejected": -1.5677838325500488, + "loss": 2.3178, + "nll_loss": 0.5793916583061218, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.872789188288152e-05, + "rewards/margins": 0.1567196547985077, + "rewards/rejected": -0.15677838027477264, + "step": 3942 + }, + { + "epoch": 2.726832641770401, + "grad_norm": 9.652228355407715, + "learning_rate": 4.040648532349777e-05, + "log_odds_chosen": 7.889554500579834, + "log_odds_ratio": -0.018208064138889313, + "logits/chosen": -0.488161563873291, + "logits/rejected": -0.5011002421379089, + "logps/chosen": -0.06083019822835922, + "logps/rejected": -2.197711229324341, + "loss": 2.2119, + "nll_loss": 0.5511464476585388, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006083020009100437, + "rewards/margins": 0.21368810534477234, + "rewards/rejected": -0.2197711169719696, + "step": 3943 + }, + { + "epoch": 2.727524204702628, + "grad_norm": 10.315166473388672, + "learning_rate": 4.0402643307207624e-05, + "log_odds_chosen": 6.549238204956055, + "log_odds_ratio": -0.12524710595607758, + "logits/chosen": -0.8253248929977417, + "logits/rejected": -0.8640207648277283, + "logps/chosen": -0.04595312848687172, + "logps/rejected": -1.4616085290908813, + "loss": 2.8316, + "nll_loss": 0.6953847408294678, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0045953127555549145, + "rewards/margins": 0.14156554639339447, + "rewards/rejected": -0.14616085588932037, + "step": 3944 + }, + { + "epoch": 2.728215767634855, + "grad_norm": 8.87439250946045, + "learning_rate": 4.039880129091748e-05, + "log_odds_chosen": 7.761504173278809, + "log_odds_ratio": -0.10865526646375656, + "logits/chosen": -0.005162131041288376, + "logits/rejected": -0.15493258833885193, + "logps/chosen": -0.08741172403097153, + "logps/rejected": -1.810775876045227, + "loss": 1.7261, + "nll_loss": 0.42064934968948364, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008741172961890697, + "rewards/margins": 0.1723364144563675, + "rewards/rejected": -0.18107758462429047, + "step": 3945 + }, + { + "epoch": 2.7289073305670817, + "grad_norm": 9.416640281677246, + "learning_rate": 4.039495927462732e-05, + "log_odds_chosen": 6.959840297698975, + "log_odds_ratio": -0.04584544152021408, + "logits/chosen": -0.35366320610046387, + "logits/rejected": -0.4630590081214905, + "logps/chosen": -0.03433135151863098, + "logps/rejected": -1.7933472394943237, + "loss": 1.7769, + "nll_loss": 0.4396374225616455, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0034331348724663258, + "rewards/margins": 0.17590157687664032, + "rewards/rejected": -0.17933472990989685, + "step": 3946 + }, + { + "epoch": 2.7295988934993085, + "grad_norm": 10.85071849822998, + "learning_rate": 4.039111725833718e-05, + "log_odds_chosen": 9.792091369628906, + "log_odds_ratio": -0.0015566610964015126, + "logits/chosen": -0.450967013835907, + "logits/rejected": -0.5745599269866943, + "logps/chosen": -0.006072147749364376, + "logps/rejected": -2.566922187805176, + "loss": 2.017, + "nll_loss": 0.5040937066078186, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006072148098610342, + "rewards/margins": 0.25608503818511963, + "rewards/rejected": -0.25669223070144653, + "step": 3947 + }, + { + "epoch": 2.7302904564315353, + "grad_norm": 12.596390724182129, + "learning_rate": 4.038727524204703e-05, + "log_odds_chosen": 9.460590362548828, + "log_odds_ratio": -0.0002683461061678827, + "logits/chosen": -0.580082893371582, + "logits/rejected": -0.689507782459259, + "logps/chosen": -0.000847513903863728, + "logps/rejected": -1.7451519966125488, + "loss": 2.1106, + "nll_loss": 0.5276321172714233, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.475138747598976e-05, + "rewards/margins": 0.1744304597377777, + "rewards/rejected": -0.17451520264148712, + "step": 3948 + }, + { + "epoch": 2.730982019363762, + "grad_norm": 7.90508508682251, + "learning_rate": 4.038343322575688e-05, + "log_odds_chosen": 9.44764232635498, + "log_odds_ratio": -0.0003510116948746145, + "logits/chosen": -0.6924615502357483, + "logits/rejected": -0.6415228843688965, + "logps/chosen": -0.0007907212129794061, + "logps/rejected": -1.8190643787384033, + "loss": 2.1582, + "nll_loss": 0.5395174622535706, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.907212420832366e-05, + "rewards/margins": 0.1818273663520813, + "rewards/rejected": -0.18190644681453705, + "step": 3949 + }, + { + "epoch": 2.731673582295989, + "grad_norm": 8.298971176147461, + "learning_rate": 4.037959120946673e-05, + "log_odds_chosen": 7.152010440826416, + "log_odds_ratio": -0.0063316673040390015, + "logits/chosen": -0.6834003925323486, + "logits/rejected": -0.7061575055122375, + "logps/chosen": -0.012138995341956615, + "logps/rejected": -1.122275710105896, + "loss": 2.9132, + "nll_loss": 0.7276560664176941, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012138995807617903, + "rewards/margins": 0.11101366579532623, + "rewards/rejected": -0.11222756654024124, + "step": 3950 + }, + { + "epoch": 2.732365145228216, + "grad_norm": 11.298041343688965, + "learning_rate": 4.0375749193176585e-05, + "log_odds_chosen": 7.214498519897461, + "log_odds_ratio": -0.10135520249605179, + "logits/chosen": -0.7198128700256348, + "logits/rejected": -0.7529308199882507, + "logps/chosen": -0.024711720645427704, + "logps/rejected": -1.5879918336868286, + "loss": 3.3971, + "nll_loss": 0.8391504287719727, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002471171785145998, + "rewards/margins": 0.156328022480011, + "rewards/rejected": -0.1587992012500763, + "step": 3951 + }, + { + "epoch": 2.7330567081604427, + "grad_norm": 5.922629356384277, + "learning_rate": 4.037190717688643e-05, + "log_odds_chosen": 8.451513290405273, + "log_odds_ratio": -0.06844214349985123, + "logits/chosen": -0.566558837890625, + "logits/rejected": -0.593731164932251, + "logps/chosen": -0.020725499838590622, + "logps/rejected": -1.4322352409362793, + "loss": 2.1264, + "nll_loss": 0.5247609615325928, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020725501235574484, + "rewards/margins": 0.14115098118782043, + "rewards/rejected": -0.14322352409362793, + "step": 3952 + }, + { + "epoch": 2.7337482710926695, + "grad_norm": 8.351309776306152, + "learning_rate": 4.036806516059628e-05, + "log_odds_chosen": 6.58920955657959, + "log_odds_ratio": -0.1379947066307068, + "logits/chosen": -0.47179096937179565, + "logits/rejected": -0.447683185338974, + "logps/chosen": -0.039678920060396194, + "logps/rejected": -1.048729419708252, + "loss": 2.3215, + "nll_loss": 0.5665820837020874, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003967892378568649, + "rewards/margins": 0.10090505331754684, + "rewards/rejected": -0.1048729419708252, + "step": 3953 + }, + { + "epoch": 2.7344398340248963, + "grad_norm": 9.427661895751953, + "learning_rate": 4.0364223144306135e-05, + "log_odds_chosen": 9.888446807861328, + "log_odds_ratio": -7.997354987310246e-05, + "logits/chosen": -0.5060456991195679, + "logits/rejected": -0.583258867263794, + "logps/chosen": -0.00034985889215022326, + "logps/rejected": -1.884965419769287, + "loss": 1.8423, + "nll_loss": 0.46057385206222534, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.498589285300113e-05, + "rewards/margins": 0.1884615570306778, + "rewards/rejected": -0.18849653005599976, + "step": 3954 + }, + { + "epoch": 2.735131396957123, + "grad_norm": 10.099342346191406, + "learning_rate": 4.036038112801598e-05, + "log_odds_chosen": 5.796465873718262, + "log_odds_ratio": -0.08623596280813217, + "logits/chosen": -0.9473574161529541, + "logits/rejected": -0.9406151175498962, + "logps/chosen": -0.010863440111279488, + "logps/rejected": -0.6920241117477417, + "loss": 2.4653, + "nll_loss": 0.6077094078063965, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010863440111279488, + "rewards/margins": 0.06811606884002686, + "rewards/rejected": -0.06920240819454193, + "step": 3955 + }, + { + "epoch": 2.73582295988935, + "grad_norm": 9.084993362426758, + "learning_rate": 4.0356539111725833e-05, + "log_odds_chosen": 8.409929275512695, + "log_odds_ratio": -0.05629992485046387, + "logits/chosen": -0.634174644947052, + "logits/rejected": -0.6410291194915771, + "logps/chosen": -0.009794793091714382, + "logps/rejected": -1.433899164199829, + "loss": 2.3453, + "nll_loss": 0.5806872248649597, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009794794023036957, + "rewards/margins": 0.14241044223308563, + "rewards/rejected": -0.14338991045951843, + "step": 3956 + }, + { + "epoch": 2.736514522821577, + "grad_norm": 8.876943588256836, + "learning_rate": 4.0352697095435686e-05, + "log_odds_chosen": 8.132782936096191, + "log_odds_ratio": -0.0022850818932056427, + "logits/chosen": -0.9935863018035889, + "logits/rejected": -0.968889057636261, + "logps/chosen": -0.09043926745653152, + "logps/rejected": -1.6776096820831299, + "loss": 2.4984, + "nll_loss": 0.6243612766265869, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009043926373124123, + "rewards/margins": 0.15871703624725342, + "rewards/rejected": -0.167760968208313, + "step": 3957 + }, + { + "epoch": 2.7372060857538036, + "grad_norm": 9.663110733032227, + "learning_rate": 4.034885507914554e-05, + "log_odds_chosen": 9.72215747833252, + "log_odds_ratio": -0.00014341410133056343, + "logits/chosen": -0.6471495628356934, + "logits/rejected": -0.607186496257782, + "logps/chosen": -0.0002497847599443048, + "logps/rejected": -1.2455099821090698, + "loss": 2.172, + "nll_loss": 0.5429768562316895, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.497847526683472e-05, + "rewards/margins": 0.1245260089635849, + "rewards/rejected": -0.12455099821090698, + "step": 3958 + }, + { + "epoch": 2.7378976486860305, + "grad_norm": 11.114601135253906, + "learning_rate": 4.0345013062855384e-05, + "log_odds_chosen": 9.111005783081055, + "log_odds_ratio": -0.0003828184853773564, + "logits/chosen": -0.6637808084487915, + "logits/rejected": -0.6742137670516968, + "logps/chosen": -0.01722045987844467, + "logps/rejected": -1.8206391334533691, + "loss": 2.439, + "nll_loss": 0.6097138524055481, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017220460576936603, + "rewards/margins": 0.18034186959266663, + "rewards/rejected": -0.18206390738487244, + "step": 3959 + }, + { + "epoch": 2.7385892116182573, + "grad_norm": 6.329898357391357, + "learning_rate": 4.034117104656524e-05, + "log_odds_chosen": 6.555821418762207, + "log_odds_ratio": -0.016997672617435455, + "logits/chosen": -0.5201515555381775, + "logits/rejected": -0.6183047294616699, + "logps/chosen": -0.030940266326069832, + "logps/rejected": -1.0968849658966064, + "loss": 2.1955, + "nll_loss": 0.5471838116645813, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003094026818871498, + "rewards/margins": 0.10659447312355042, + "rewards/rejected": -0.10968849062919617, + "step": 3960 + }, + { + "epoch": 2.739280774550484, + "grad_norm": 21.0051326751709, + "learning_rate": 4.033732903027509e-05, + "log_odds_chosen": 7.625939846038818, + "log_odds_ratio": -0.05772934854030609, + "logits/chosen": -0.830172598361969, + "logits/rejected": -0.8871779441833496, + "logps/chosen": -0.009763781912624836, + "logps/rejected": -1.484619379043579, + "loss": 2.1533, + "nll_loss": 0.5325421094894409, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000976378214545548, + "rewards/margins": 0.14748555421829224, + "rewards/rejected": -0.1484619379043579, + "step": 3961 + }, + { + "epoch": 2.739972337482711, + "grad_norm": 4.871427536010742, + "learning_rate": 4.033348701398494e-05, + "log_odds_chosen": 8.589265823364258, + "log_odds_ratio": -0.025390522554516792, + "logits/chosen": -0.6745571494102478, + "logits/rejected": -0.836500883102417, + "logps/chosen": -0.015709497034549713, + "logps/rejected": -1.4090120792388916, + "loss": 1.9426, + "nll_loss": 0.48311781883239746, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001570949680171907, + "rewards/margins": 0.13933026790618896, + "rewards/rejected": -0.14090119302272797, + "step": 3962 + }, + { + "epoch": 2.740663900414938, + "grad_norm": 9.784700393676758, + "learning_rate": 4.0329644997694794e-05, + "log_odds_chosen": 7.266717433929443, + "log_odds_ratio": -0.24722522497177124, + "logits/chosen": -0.6542754769325256, + "logits/rejected": -0.7426164150238037, + "logps/chosen": -0.050657421350479126, + "logps/rejected": -0.9146636128425598, + "loss": 2.0315, + "nll_loss": 0.48315340280532837, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005065742414444685, + "rewards/margins": 0.08640061318874359, + "rewards/rejected": -0.09146635979413986, + "step": 3963 + }, + { + "epoch": 2.7413554633471646, + "grad_norm": 6.475007057189941, + "learning_rate": 4.032580298140464e-05, + "log_odds_chosen": 7.788166046142578, + "log_odds_ratio": -0.054864123463630676, + "logits/chosen": -0.7246919870376587, + "logits/rejected": -0.7207714915275574, + "logps/chosen": -0.028761431574821472, + "logps/rejected": -1.7532126903533936, + "loss": 1.7134, + "nll_loss": 0.42286068201065063, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002876143204048276, + "rewards/margins": 0.1724451184272766, + "rewards/rejected": -0.1753212809562683, + "step": 3964 + }, + { + "epoch": 2.7420470262793915, + "grad_norm": 12.352106094360352, + "learning_rate": 4.032196096511449e-05, + "log_odds_chosen": 6.385231971740723, + "log_odds_ratio": -0.1378117799758911, + "logits/chosen": -0.6535122990608215, + "logits/rejected": -0.6418668031692505, + "logps/chosen": -0.03800104185938835, + "logps/rejected": -0.8482541441917419, + "loss": 2.5641, + "nll_loss": 0.6272392272949219, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0038001039065420628, + "rewards/margins": 0.08102530986070633, + "rewards/rejected": -0.08482541143894196, + "step": 3965 + }, + { + "epoch": 2.7427385892116183, + "grad_norm": 6.539783954620361, + "learning_rate": 4.0318118948824344e-05, + "log_odds_chosen": 9.533210754394531, + "log_odds_ratio": -0.0005043463315814734, + "logits/chosen": -0.9464341402053833, + "logits/rejected": -0.9169366955757141, + "logps/chosen": -0.00027665687957778573, + "logps/rejected": -1.6659789085388184, + "loss": 2.2158, + "nll_loss": 0.5538901090621948, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7665688321576454e-05, + "rewards/margins": 0.16657023131847382, + "rewards/rejected": -0.1665979027748108, + "step": 3966 + }, + { + "epoch": 2.743430152143845, + "grad_norm": 11.467350006103516, + "learning_rate": 4.03142769325342e-05, + "log_odds_chosen": 6.552248477935791, + "log_odds_ratio": -0.2534642815589905, + "logits/chosen": -0.7549129128456116, + "logits/rejected": -0.827406644821167, + "logps/chosen": -0.03583712875843048, + "logps/rejected": -1.2653224468231201, + "loss": 2.5423, + "nll_loss": 0.6102339625358582, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0035837129689753056, + "rewards/margins": 0.1229485347867012, + "rewards/rejected": -0.12653225660324097, + "step": 3967 + }, + { + "epoch": 2.744121715076072, + "grad_norm": 6.8532938957214355, + "learning_rate": 4.031043491624404e-05, + "log_odds_chosen": 8.665301322937012, + "log_odds_ratio": -0.0003807971370406449, + "logits/chosen": -0.9270603656768799, + "logits/rejected": -0.9942599534988403, + "logps/chosen": -0.016690397635102272, + "logps/rejected": -2.1862082481384277, + "loss": 2.1557, + "nll_loss": 0.5388835668563843, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016690397169440985, + "rewards/margins": 0.21695180237293243, + "rewards/rejected": -0.21862083673477173, + "step": 3968 + }, + { + "epoch": 2.7448132780082988, + "grad_norm": 10.167153358459473, + "learning_rate": 4.03065928999539e-05, + "log_odds_chosen": 8.84040355682373, + "log_odds_ratio": -0.00025179842486977577, + "logits/chosen": -0.7059938907623291, + "logits/rejected": -0.9238421320915222, + "logps/chosen": -0.0011752690188586712, + "logps/rejected": -1.904888391494751, + "loss": 3.019, + "nll_loss": 0.7547341585159302, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001175269135273993, + "rewards/margins": 0.1903713047504425, + "rewards/rejected": -0.19048884510993958, + "step": 3969 + }, + { + "epoch": 2.7455048409405256, + "grad_norm": 4.787475109100342, + "learning_rate": 4.030275088366375e-05, + "log_odds_chosen": 8.128543853759766, + "log_odds_ratio": -0.09393294155597687, + "logits/chosen": -0.33512839674949646, + "logits/rejected": -0.311124712228775, + "logps/chosen": -0.03485126420855522, + "logps/rejected": -1.4904800653457642, + "loss": 1.7159, + "nll_loss": 0.41957443952560425, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003485126420855522, + "rewards/margins": 0.14556287229061127, + "rewards/rejected": -0.14904801547527313, + "step": 3970 + }, + { + "epoch": 2.7461964038727524, + "grad_norm": 6.264207363128662, + "learning_rate": 4.02989088673736e-05, + "log_odds_chosen": 8.825662612915039, + "log_odds_ratio": -0.000746442936360836, + "logits/chosen": -0.7133558988571167, + "logits/rejected": -0.7757239937782288, + "logps/chosen": -0.004503064788877964, + "logps/rejected": -1.8890973329544067, + "loss": 2.119, + "nll_loss": 0.5296758413314819, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00045030651381239295, + "rewards/margins": 0.18845942616462708, + "rewards/rejected": -0.18890973925590515, + "step": 3971 + }, + { + "epoch": 2.7468879668049793, + "grad_norm": 7.407824516296387, + "learning_rate": 4.029506685108345e-05, + "log_odds_chosen": 7.501564979553223, + "log_odds_ratio": -0.0042665572836995125, + "logits/chosen": -0.771816611289978, + "logits/rejected": -0.8200007081031799, + "logps/chosen": -0.002163660479709506, + "logps/rejected": -1.0099034309387207, + "loss": 2.718, + "nll_loss": 0.6790682077407837, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00021636603923980147, + "rewards/margins": 0.10077398270368576, + "rewards/rejected": -0.10099035501480103, + "step": 3972 + }, + { + "epoch": 2.747579529737206, + "grad_norm": 7.049964904785156, + "learning_rate": 4.02912248347933e-05, + "log_odds_chosen": 8.63952922821045, + "log_odds_ratio": -0.000538341177161783, + "logits/chosen": -0.78837651014328, + "logits/rejected": -0.7955034971237183, + "logps/chosen": -0.000618205638602376, + "logps/rejected": -1.2546896934509277, + "loss": 2.262, + "nll_loss": 0.5654584765434265, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.182056677062064e-05, + "rewards/margins": 0.1254071444272995, + "rewards/rejected": -0.12546896934509277, + "step": 3973 + }, + { + "epoch": 2.748271092669433, + "grad_norm": 10.377960205078125, + "learning_rate": 4.028738281850315e-05, + "log_odds_chosen": 8.370406150817871, + "log_odds_ratio": -0.0035593262873589993, + "logits/chosen": -0.9567809104919434, + "logits/rejected": -0.9531281590461731, + "logps/chosen": -0.01249743066728115, + "logps/rejected": -1.7988122701644897, + "loss": 2.3203, + "nll_loss": 0.5797082781791687, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012497431598603725, + "rewards/margins": 0.1786315143108368, + "rewards/rejected": -0.1798812448978424, + "step": 3974 + }, + { + "epoch": 2.7489626556016598, + "grad_norm": 8.847599029541016, + "learning_rate": 4.0283540802213e-05, + "log_odds_chosen": 8.98076343536377, + "log_odds_ratio": -0.0664471983909607, + "logits/chosen": -0.6736356616020203, + "logits/rejected": -0.7417312264442444, + "logps/chosen": -0.012593384832143784, + "logps/rejected": -1.628709077835083, + "loss": 2.18, + "nll_loss": 0.538344144821167, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012593385763466358, + "rewards/margins": 0.16161157190799713, + "rewards/rejected": -0.16287091374397278, + "step": 3975 + }, + { + "epoch": 2.7496542185338866, + "grad_norm": 8.94532299041748, + "learning_rate": 4.0279698785922855e-05, + "log_odds_chosen": 7.802983283996582, + "log_odds_ratio": -0.002552380319684744, + "logits/chosen": -0.6118870377540588, + "logits/rejected": -0.6478884220123291, + "logps/chosen": -0.014447561465203762, + "logps/rejected": -1.78226900100708, + "loss": 2.3816, + "nll_loss": 0.5951405167579651, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014447560533881187, + "rewards/margins": 0.17678214609622955, + "rewards/rejected": -0.17822691798210144, + "step": 3976 + }, + { + "epoch": 2.7503457814661134, + "grad_norm": 7.9482011795043945, + "learning_rate": 4.02758567696327e-05, + "log_odds_chosen": 7.3630757331848145, + "log_odds_ratio": -0.029140042141079903, + "logits/chosen": -0.6187721490859985, + "logits/rejected": -0.6251906752586365, + "logps/chosen": -0.01222775038331747, + "logps/rejected": -1.3199526071548462, + "loss": 2.0326, + "nll_loss": 0.5052246451377869, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001222774968482554, + "rewards/margins": 0.13077250123023987, + "rewards/rejected": -0.13199526071548462, + "step": 3977 + }, + { + "epoch": 2.7510373443983402, + "grad_norm": 13.724565505981445, + "learning_rate": 4.027201475334256e-05, + "log_odds_chosen": 7.626209259033203, + "log_odds_ratio": -0.019534602761268616, + "logits/chosen": -0.7065523862838745, + "logits/rejected": -0.7183932065963745, + "logps/chosen": -0.07203464210033417, + "logps/rejected": -1.4935412406921387, + "loss": 2.5822, + "nll_loss": 0.6436068415641785, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007203464396297932, + "rewards/margins": 0.1421506404876709, + "rewards/rejected": -0.14935411512851715, + "step": 3978 + }, + { + "epoch": 2.751728907330567, + "grad_norm": 8.385231971740723, + "learning_rate": 4.0268172737052406e-05, + "log_odds_chosen": 8.239843368530273, + "log_odds_ratio": -0.006714486517012119, + "logits/chosen": -0.6160125136375427, + "logits/rejected": -0.6247880458831787, + "logps/chosen": -0.03856637701392174, + "logps/rejected": -1.549187421798706, + "loss": 1.8715, + "nll_loss": 0.46721044182777405, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00385663821361959, + "rewards/margins": 0.15106210112571716, + "rewards/rejected": -0.15491873025894165, + "step": 3979 + }, + { + "epoch": 2.752420470262794, + "grad_norm": 4.186921119689941, + "learning_rate": 4.026433072076226e-05, + "log_odds_chosen": 7.790356636047363, + "log_odds_ratio": -0.018140949308872223, + "logits/chosen": -0.3370264172554016, + "logits/rejected": -0.31349921226501465, + "logps/chosen": -0.03148407116532326, + "logps/rejected": -1.4903897047042847, + "loss": 1.9644, + "nll_loss": 0.48929262161254883, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003148407209664583, + "rewards/margins": 0.14589056372642517, + "rewards/rejected": -0.14903897047042847, + "step": 3980 + }, + { + "epoch": 2.7531120331950207, + "grad_norm": 16.19760513305664, + "learning_rate": 4.026048870447211e-05, + "log_odds_chosen": 7.4634857177734375, + "log_odds_ratio": -0.3400443494319916, + "logits/chosen": -0.22639168798923492, + "logits/rejected": -0.19484885036945343, + "logps/chosen": -0.03847894072532654, + "logps/rejected": -1.5501339435577393, + "loss": 1.7273, + "nll_loss": 0.39781975746154785, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003847894025966525, + "rewards/margins": 0.15116551518440247, + "rewards/rejected": -0.15501339733600616, + "step": 3981 + }, + { + "epoch": 2.7538035961272476, + "grad_norm": 6.2771782875061035, + "learning_rate": 4.0256646688181956e-05, + "log_odds_chosen": 6.931901454925537, + "log_odds_ratio": -0.3148242235183716, + "logits/chosen": -0.6994768381118774, + "logits/rejected": -0.7499065399169922, + "logps/chosen": -0.06756354868412018, + "logps/rejected": -1.5410547256469727, + "loss": 2.0378, + "nll_loss": 0.47797098755836487, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006756355054676533, + "rewards/margins": 0.14734911918640137, + "rewards/rejected": -0.15410546958446503, + "step": 3982 + }, + { + "epoch": 2.7544951590594744, + "grad_norm": 5.253891468048096, + "learning_rate": 4.025280467189181e-05, + "log_odds_chosen": 8.897655487060547, + "log_odds_ratio": -0.0008970237104222178, + "logits/chosen": -0.45553719997406006, + "logits/rejected": -0.48855888843536377, + "logps/chosen": -0.019885070621967316, + "logps/rejected": -1.8306454420089722, + "loss": 1.6886, + "nll_loss": 0.42205068469047546, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019885068759322166, + "rewards/margins": 0.1810760498046875, + "rewards/rejected": -0.1830645501613617, + "step": 3983 + }, + { + "epoch": 2.7551867219917012, + "grad_norm": 16.035680770874023, + "learning_rate": 4.024896265560166e-05, + "log_odds_chosen": 9.053376197814941, + "log_odds_ratio": -0.029256589710712433, + "logits/chosen": -0.5385884642601013, + "logits/rejected": -0.6471099257469177, + "logps/chosen": -0.004622517619282007, + "logps/rejected": -1.9592335224151611, + "loss": 3.2722, + "nll_loss": 0.8151220083236694, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00046225180267356336, + "rewards/margins": 0.19546110928058624, + "rewards/rejected": -0.19592337310314178, + "step": 3984 + }, + { + "epoch": 2.755878284923928, + "grad_norm": 7.4928483963012695, + "learning_rate": 4.0245120639311514e-05, + "log_odds_chosen": 8.065269470214844, + "log_odds_ratio": -0.0013664980651810765, + "logits/chosen": -0.3876135051250458, + "logits/rejected": -0.4293254315853119, + "logps/chosen": -0.030116790905594826, + "logps/rejected": -1.8832831382751465, + "loss": 1.8511, + "nll_loss": 0.4626496732234955, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0030116792768239975, + "rewards/margins": 0.18531666696071625, + "rewards/rejected": -0.1883283257484436, + "step": 3985 + }, + { + "epoch": 2.756569847856155, + "grad_norm": 14.424047470092773, + "learning_rate": 4.024127862302136e-05, + "log_odds_chosen": 9.725471496582031, + "log_odds_ratio": -0.000976808718405664, + "logits/chosen": -0.5688410401344299, + "logits/rejected": -0.7274327278137207, + "logps/chosen": -0.0016707740724086761, + "logps/rejected": -1.8488935232162476, + "loss": 2.6804, + "nll_loss": 0.6700056195259094, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016707740724086761, + "rewards/margins": 0.18472227454185486, + "rewards/rejected": -0.18488934636116028, + "step": 3986 + }, + { + "epoch": 2.7572614107883817, + "grad_norm": 6.92812442779541, + "learning_rate": 4.023743660673122e-05, + "log_odds_chosen": 8.491493225097656, + "log_odds_ratio": -0.07710529118776321, + "logits/chosen": -0.5341342687606812, + "logits/rejected": -0.561559796333313, + "logps/chosen": -0.019994715228676796, + "logps/rejected": -1.5566470623016357, + "loss": 1.8402, + "nll_loss": 0.45234861969947815, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019994715694338083, + "rewards/margins": 0.15366524457931519, + "rewards/rejected": -0.15566471219062805, + "step": 3987 + }, + { + "epoch": 2.7579529737206085, + "grad_norm": 8.029306411743164, + "learning_rate": 4.0233594590441064e-05, + "log_odds_chosen": 8.991018295288086, + "log_odds_ratio": -0.00014573686348740011, + "logits/chosen": -0.5196717381477356, + "logits/rejected": -0.5397700071334839, + "logps/chosen": -0.001510739792138338, + "logps/rejected": -1.6422498226165771, + "loss": 1.666, + "nll_loss": 0.41648074984550476, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015107399667613208, + "rewards/margins": 0.1640739142894745, + "rewards/rejected": -0.16422498226165771, + "step": 3988 + }, + { + "epoch": 2.7586445366528354, + "grad_norm": 5.102666854858398, + "learning_rate": 4.022975257415092e-05, + "log_odds_chosen": 10.04948616027832, + "log_odds_ratio": -7.772055687382817e-05, + "logits/chosen": -0.6284279823303223, + "logits/rejected": -0.6805120706558228, + "logps/chosen": -0.0002829919976647943, + "logps/rejected": -1.904032588005066, + "loss": 1.6529, + "nll_loss": 0.4132192134857178, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8299202313064598e-05, + "rewards/margins": 0.1903749704360962, + "rewards/rejected": -0.1904032677412033, + "step": 3989 + }, + { + "epoch": 2.759336099585062, + "grad_norm": 9.161526679992676, + "learning_rate": 4.022591055786077e-05, + "log_odds_chosen": 7.404929161071777, + "log_odds_ratio": -0.0020130304619669914, + "logits/chosen": -0.5665072798728943, + "logits/rejected": -0.5944963097572327, + "logps/chosen": -0.012136176228523254, + "logps/rejected": -1.4921623468399048, + "loss": 2.8559, + "nll_loss": 0.7137806415557861, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012136177392676473, + "rewards/margins": 0.14800262451171875, + "rewards/rejected": -0.14921623468399048, + "step": 3990 + }, + { + "epoch": 2.760027662517289, + "grad_norm": 12.188777923583984, + "learning_rate": 4.0222068541570615e-05, + "log_odds_chosen": 9.008995056152344, + "log_odds_ratio": -0.0009650088031776249, + "logits/chosen": -0.7447724342346191, + "logits/rejected": -0.8593225479125977, + "logps/chosen": -0.0012664712266996503, + "logps/rejected": -1.6476327180862427, + "loss": 2.5855, + "nll_loss": 0.6462736129760742, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012664712266996503, + "rewards/margins": 0.16463662683963776, + "rewards/rejected": -0.16476327180862427, + "step": 3991 + }, + { + "epoch": 2.760719225449516, + "grad_norm": 12.439476013183594, + "learning_rate": 4.021822652528047e-05, + "log_odds_chosen": 8.762724876403809, + "log_odds_ratio": -0.07469207048416138, + "logits/chosen": -0.9009903073310852, + "logits/rejected": -0.9644653797149658, + "logps/chosen": -0.013878803700208664, + "logps/rejected": -1.6378298997879028, + "loss": 1.9406, + "nll_loss": 0.47767218947410583, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013878804165869951, + "rewards/margins": 0.16239511966705322, + "rewards/rejected": -0.1637829840183258, + "step": 3992 + }, + { + "epoch": 2.7614107883817427, + "grad_norm": 13.11072063446045, + "learning_rate": 4.021438450899032e-05, + "log_odds_chosen": 6.637383460998535, + "log_odds_ratio": -0.4479435086250305, + "logits/chosen": -0.4912869930267334, + "logits/rejected": -0.5125991702079773, + "logps/chosen": -0.01698119565844536, + "logps/rejected": -1.0482051372528076, + "loss": 2.3407, + "nll_loss": 0.5403863191604614, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0016981197986751795, + "rewards/margins": 0.10312239825725555, + "rewards/rejected": -0.10482051968574524, + "step": 3993 + }, + { + "epoch": 2.7621023513139695, + "grad_norm": 11.071776390075684, + "learning_rate": 4.021054249270017e-05, + "log_odds_chosen": 8.1903076171875, + "log_odds_ratio": -0.0026236893609166145, + "logits/chosen": -0.891800045967102, + "logits/rejected": -0.9731010794639587, + "logps/chosen": -0.01190229132771492, + "logps/rejected": -2.0794668197631836, + "loss": 1.7713, + "nll_loss": 0.4425641894340515, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001190229202620685, + "rewards/margins": 0.20675644278526306, + "rewards/rejected": -0.20794667303562164, + "step": 3994 + }, + { + "epoch": 2.7627939142461964, + "grad_norm": 21.30668067932129, + "learning_rate": 4.020670047641002e-05, + "log_odds_chosen": 7.261108875274658, + "log_odds_ratio": -0.03292210027575493, + "logits/chosen": 0.15424926578998566, + "logits/rejected": 0.09129300713539124, + "logps/chosen": -0.08206956088542938, + "logps/rejected": -1.2352995872497559, + "loss": 1.7829, + "nll_loss": 0.4424290955066681, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008206957019865513, + "rewards/margins": 0.11532299220561981, + "rewards/rejected": -0.12352995574474335, + "step": 3995 + }, + { + "epoch": 2.763485477178423, + "grad_norm": 6.121913909912109, + "learning_rate": 4.020285846011988e-05, + "log_odds_chosen": 7.284252166748047, + "log_odds_ratio": -0.10654313862323761, + "logits/chosen": -0.4905470609664917, + "logits/rejected": -0.5098867416381836, + "logps/chosen": -0.03048006258904934, + "logps/rejected": -1.1575863361358643, + "loss": 1.9294, + "nll_loss": 0.47170132398605347, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003048006445169449, + "rewards/margins": 0.1127106249332428, + "rewards/rejected": -0.11575863510370255, + "step": 3996 + }, + { + "epoch": 2.76417704011065, + "grad_norm": 9.75130558013916, + "learning_rate": 4.019901644382972e-05, + "log_odds_chosen": 7.484089374542236, + "log_odds_ratio": -0.06234271451830864, + "logits/chosen": -0.6909130215644836, + "logits/rejected": -0.7404910922050476, + "logps/chosen": -0.018211044371128082, + "logps/rejected": -1.218412160873413, + "loss": 2.6985, + "nll_loss": 0.6683934330940247, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018211043206974864, + "rewards/margins": 0.12002012133598328, + "rewards/rejected": -0.12184122204780579, + "step": 3997 + }, + { + "epoch": 2.764868603042877, + "grad_norm": 10.215263366699219, + "learning_rate": 4.0195174427539575e-05, + "log_odds_chosen": 6.937007904052734, + "log_odds_ratio": -0.007254381664097309, + "logits/chosen": -0.6279563903808594, + "logits/rejected": -0.6680964827537537, + "logps/chosen": -0.01847420632839203, + "logps/rejected": -1.2389494180679321, + "loss": 1.7004, + "nll_loss": 0.4243742823600769, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001847420702688396, + "rewards/margins": 0.1220475286245346, + "rewards/rejected": -0.12389494478702545, + "step": 3998 + }, + { + "epoch": 2.7655601659751037, + "grad_norm": 7.872920989990234, + "learning_rate": 4.019133241124943e-05, + "log_odds_chosen": 7.680271148681641, + "log_odds_ratio": -0.07638738304376602, + "logits/chosen": -0.5571773052215576, + "logits/rejected": -0.6206885576248169, + "logps/chosen": -0.038735780864953995, + "logps/rejected": -1.5839513540267944, + "loss": 2.0725, + "nll_loss": 0.5104899406433105, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003873578505590558, + "rewards/margins": 0.15452156960964203, + "rewards/rejected": -0.15839514136314392, + "step": 3999 + }, + { + "epoch": 2.7662517289073305, + "grad_norm": 9.754056930541992, + "learning_rate": 4.018749039495927e-05, + "log_odds_chosen": 8.085151672363281, + "log_odds_ratio": -0.0051775057800114155, + "logits/chosen": -0.7449995875358582, + "logits/rejected": -0.8129273653030396, + "logps/chosen": -0.018867207691073418, + "logps/rejected": -2.167125701904297, + "loss": 1.6079, + "nll_loss": 0.4014506936073303, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018867208855226636, + "rewards/margins": 0.2148258537054062, + "rewards/rejected": -0.2167125642299652, + "step": 4000 + }, + { + "epoch": 2.7669432918395573, + "grad_norm": 12.460733413696289, + "learning_rate": 4.0183648378669126e-05, + "log_odds_chosen": 8.746673583984375, + "log_odds_ratio": -0.0002168616047129035, + "logits/chosen": -0.6806103587150574, + "logits/rejected": -0.6511906385421753, + "logps/chosen": -0.005443001165986061, + "logps/rejected": -1.9330780506134033, + "loss": 2.4411, + "nll_loss": 0.610245406627655, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005443001282401383, + "rewards/margins": 0.19276350736618042, + "rewards/rejected": -0.1933078020811081, + "step": 4001 + }, + { + "epoch": 2.767634854771784, + "grad_norm": 8.446491241455078, + "learning_rate": 4.017980636237898e-05, + "log_odds_chosen": 7.240784645080566, + "log_odds_ratio": -0.04213593900203705, + "logits/chosen": -0.8190625309944153, + "logits/rejected": -0.8051847219467163, + "logps/chosen": -0.010527187958359718, + "logps/rejected": -1.0580394268035889, + "loss": 2.336, + "nll_loss": 0.5797888040542603, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010527188424021006, + "rewards/margins": 0.10475122928619385, + "rewards/rejected": -0.105803944170475, + "step": 4002 + }, + { + "epoch": 2.768326417704011, + "grad_norm": 9.982586860656738, + "learning_rate": 4.017596434608883e-05, + "log_odds_chosen": 8.95711612701416, + "log_odds_ratio": -0.004018096718937159, + "logits/chosen": -0.46991389989852905, + "logits/rejected": -0.5015337467193604, + "logps/chosen": -0.03133925050497055, + "logps/rejected": -2.5801265239715576, + "loss": 2.4328, + "nll_loss": 0.6077884435653687, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003133924910798669, + "rewards/margins": 0.2548786997795105, + "rewards/rejected": -0.25801265239715576, + "step": 4003 + }, + { + "epoch": 2.769017980636238, + "grad_norm": 11.527600288391113, + "learning_rate": 4.0172122329798676e-05, + "log_odds_chosen": 6.015590667724609, + "log_odds_ratio": -0.04452924430370331, + "logits/chosen": -0.6099879741668701, + "logits/rejected": -0.6066796183586121, + "logps/chosen": -0.12464918941259384, + "logps/rejected": -2.0709896087646484, + "loss": 2.5879, + "nll_loss": 0.642532229423523, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012464918196201324, + "rewards/margins": 0.19463402032852173, + "rewards/rejected": -0.20709894597530365, + "step": 4004 + }, + { + "epoch": 2.7697095435684647, + "grad_norm": 10.465564727783203, + "learning_rate": 4.0168280313508535e-05, + "log_odds_chosen": 8.651500701904297, + "log_odds_ratio": -0.00045137875713407993, + "logits/chosen": -0.6306678652763367, + "logits/rejected": -0.7083243131637573, + "logps/chosen": -0.0007956651970744133, + "logps/rejected": -1.4663710594177246, + "loss": 2.4454, + "nll_loss": 0.6112978458404541, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.956652552820742e-05, + "rewards/margins": 0.1465575248003006, + "rewards/rejected": -0.14663709700107574, + "step": 4005 + }, + { + "epoch": 2.7704011065006915, + "grad_norm": 7.135653495788574, + "learning_rate": 4.016443829721838e-05, + "log_odds_chosen": 6.230930328369141, + "log_odds_ratio": -0.06728748232126236, + "logits/chosen": -0.4055030643939972, + "logits/rejected": -0.4956081509590149, + "logps/chosen": -0.019831674173474312, + "logps/rejected": -1.0246037244796753, + "loss": 2.184, + "nll_loss": 0.5392595529556274, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001983167137950659, + "rewards/margins": 0.1004772037267685, + "rewards/rejected": -0.10246037691831589, + "step": 4006 + }, + { + "epoch": 2.7710926694329183, + "grad_norm": 4.531956195831299, + "learning_rate": 4.0160596280928233e-05, + "log_odds_chosen": 8.730263710021973, + "log_odds_ratio": -0.0006904865731485188, + "logits/chosen": -0.45924514532089233, + "logits/rejected": -0.5823002457618713, + "logps/chosen": -0.002315716352313757, + "logps/rejected": -1.4111111164093018, + "loss": 2.0128, + "nll_loss": 0.503140926361084, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00023157161194831133, + "rewards/margins": 0.1408795416355133, + "rewards/rejected": -0.1411111056804657, + "step": 4007 + }, + { + "epoch": 2.771784232365145, + "grad_norm": 9.06564998626709, + "learning_rate": 4.0156754264638086e-05, + "log_odds_chosen": 6.933753967285156, + "log_odds_ratio": -0.1319449245929718, + "logits/chosen": -0.6110938191413879, + "logits/rejected": -0.6701584458351135, + "logps/chosen": -0.039627041667699814, + "logps/rejected": -2.0406789779663086, + "loss": 2.2408, + "nll_loss": 0.5470160245895386, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003962704446166754, + "rewards/margins": 0.200105220079422, + "rewards/rejected": -0.20406793057918549, + "step": 4008 + }, + { + "epoch": 2.772475795297372, + "grad_norm": 5.062579154968262, + "learning_rate": 4.015291224834793e-05, + "log_odds_chosen": 6.016531944274902, + "log_odds_ratio": -0.0551312081515789, + "logits/chosen": -0.5424445867538452, + "logits/rejected": -0.5113322734832764, + "logps/chosen": -0.06058872863650322, + "logps/rejected": -1.9856109619140625, + "loss": 1.9022, + "nll_loss": 0.47004297375679016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006058873143047094, + "rewards/margins": 0.1925022304058075, + "rewards/rejected": -0.19856110215187073, + "step": 4009 + }, + { + "epoch": 2.773167358229599, + "grad_norm": 6.759472846984863, + "learning_rate": 4.0149070232057784e-05, + "log_odds_chosen": 7.94062614440918, + "log_odds_ratio": -0.00318117905408144, + "logits/chosen": -0.6912134885787964, + "logits/rejected": -0.7385083436965942, + "logps/chosen": -0.03081917017698288, + "logps/rejected": -2.4372217655181885, + "loss": 2.304, + "nll_loss": 0.5756765604019165, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0030819172970950603, + "rewards/margins": 0.24064025282859802, + "rewards/rejected": -0.24372217059135437, + "step": 4010 + }, + { + "epoch": 2.7738589211618256, + "grad_norm": 12.081768035888672, + "learning_rate": 4.0145228215767636e-05, + "log_odds_chosen": 7.577772617340088, + "log_odds_ratio": -0.006525705568492413, + "logits/chosen": -0.5828157067298889, + "logits/rejected": -0.6487139463424683, + "logps/chosen": -0.01663350872695446, + "logps/rejected": -1.8727152347564697, + "loss": 2.6041, + "nll_loss": 0.6503660082817078, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016633509658277035, + "rewards/margins": 0.1856081783771515, + "rewards/rejected": -0.18727153539657593, + "step": 4011 + }, + { + "epoch": 2.7745504840940525, + "grad_norm": 9.275781631469727, + "learning_rate": 4.014138619947749e-05, + "log_odds_chosen": 8.689956665039062, + "log_odds_ratio": -0.0027832777705043554, + "logits/chosen": -0.5445963144302368, + "logits/rejected": -0.5595325231552124, + "logps/chosen": -0.007963388226926327, + "logps/rejected": -1.6978943347930908, + "loss": 2.0967, + "nll_loss": 0.5239031910896301, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007963387761265039, + "rewards/margins": 0.16899308562278748, + "rewards/rejected": -0.16978943347930908, + "step": 4012 + }, + { + "epoch": 2.7752420470262793, + "grad_norm": 4.7812604904174805, + "learning_rate": 4.0137544183187335e-05, + "log_odds_chosen": 6.512237548828125, + "log_odds_ratio": -0.04956220090389252, + "logits/chosen": -0.8668148517608643, + "logits/rejected": -0.8137930035591125, + "logps/chosen": -0.05137316510081291, + "logps/rejected": -1.837198257446289, + "loss": 2.3039, + "nll_loss": 0.5710086226463318, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005137316882610321, + "rewards/margins": 0.17858250439167023, + "rewards/rejected": -0.18371984362602234, + "step": 4013 + }, + { + "epoch": 2.775933609958506, + "grad_norm": 46.199378967285156, + "learning_rate": 4.0133702166897194e-05, + "log_odds_chosen": 4.95249605178833, + "log_odds_ratio": -0.3153817653656006, + "logits/chosen": -0.49015292525291443, + "logits/rejected": -0.5078474879264832, + "logps/chosen": -0.05883823335170746, + "logps/rejected": -0.6610848307609558, + "loss": 2.4359, + "nll_loss": 0.5774248242378235, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0058838240802288055, + "rewards/margins": 0.060224659740924835, + "rewards/rejected": -0.06610848009586334, + "step": 4014 + }, + { + "epoch": 2.776625172890733, + "grad_norm": 12.034295082092285, + "learning_rate": 4.012986015060704e-05, + "log_odds_chosen": 8.162162780761719, + "log_odds_ratio": -0.002041205298155546, + "logits/chosen": -0.4860447943210602, + "logits/rejected": -0.5647892951965332, + "logps/chosen": -0.006864494178444147, + "logps/rejected": -1.4859334230422974, + "loss": 1.6817, + "nll_loss": 0.42022138833999634, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006864494062028825, + "rewards/margins": 0.14790688455104828, + "rewards/rejected": -0.14859335124492645, + "step": 4015 + }, + { + "epoch": 2.77731673582296, + "grad_norm": 11.894104957580566, + "learning_rate": 4.012601813431689e-05, + "log_odds_chosen": 9.395560264587402, + "log_odds_ratio": -0.0009875416290014982, + "logits/chosen": -1.0839825868606567, + "logits/rejected": -1.0906481742858887, + "logps/chosen": -0.005453157238662243, + "logps/rejected": -2.035409450531006, + "loss": 2.0012, + "nll_loss": 0.5002046227455139, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005453157937154174, + "rewards/margins": 0.2029956430196762, + "rewards/rejected": -0.20354095101356506, + "step": 4016 + }, + { + "epoch": 2.7780082987551866, + "grad_norm": 7.765829563140869, + "learning_rate": 4.0122176118026744e-05, + "log_odds_chosen": 9.723349571228027, + "log_odds_ratio": -0.00022265892766881734, + "logits/chosen": -0.6836073398590088, + "logits/rejected": -0.8250362873077393, + "logps/chosen": -0.0006030694930814207, + "logps/rejected": -1.8063586950302124, + "loss": 1.6202, + "nll_loss": 0.4050217568874359, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0306949308142066e-05, + "rewards/margins": 0.18057554960250854, + "rewards/rejected": -0.18063588440418243, + "step": 4017 + }, + { + "epoch": 2.7786998616874135, + "grad_norm": 6.4268364906311035, + "learning_rate": 4.011833410173659e-05, + "log_odds_chosen": 7.016240119934082, + "log_odds_ratio": -0.007511706091463566, + "logits/chosen": -0.5878118276596069, + "logits/rejected": -0.6550705432891846, + "logps/chosen": -0.02737569808959961, + "logps/rejected": -1.6542975902557373, + "loss": 1.9515, + "nll_loss": 0.4871138334274292, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002737569622695446, + "rewards/margins": 0.16269220411777496, + "rewards/rejected": -0.16542977094650269, + "step": 4018 + }, + { + "epoch": 2.7793914246196403, + "grad_norm": 31.795257568359375, + "learning_rate": 4.011449208544644e-05, + "log_odds_chosen": 7.1993408203125, + "log_odds_ratio": -0.17486952245235443, + "logits/chosen": -0.45069921016693115, + "logits/rejected": -0.5299843549728394, + "logps/chosen": -0.03464874252676964, + "logps/rejected": -1.7125872373580933, + "loss": 2.3585, + "nll_loss": 0.5721323490142822, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003464874578639865, + "rewards/margins": 0.1677938550710678, + "rewards/rejected": -0.17125873267650604, + "step": 4019 + }, + { + "epoch": 2.780082987551867, + "grad_norm": 5.901736736297607, + "learning_rate": 4.0110650069156295e-05, + "log_odds_chosen": 4.562118053436279, + "log_odds_ratio": -0.3361849784851074, + "logits/chosen": -0.3503913879394531, + "logits/rejected": -0.3848911225795746, + "logps/chosen": -0.14615672826766968, + "logps/rejected": -1.2584377527236938, + "loss": 2.3862, + "nll_loss": 0.5629367828369141, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.014615673571825027, + "rewards/margins": 0.1112281009554863, + "rewards/rejected": -0.12584376335144043, + "step": 4020 + }, + { + "epoch": 2.780774550484094, + "grad_norm": 11.129192352294922, + "learning_rate": 4.010680805286615e-05, + "log_odds_chosen": 8.119607925415039, + "log_odds_ratio": -0.031143292784690857, + "logits/chosen": -0.4110638201236725, + "logits/rejected": -0.42109963297843933, + "logps/chosen": -0.049136094748973846, + "logps/rejected": -1.557751178741455, + "loss": 2.3672, + "nll_loss": 0.58868008852005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004913609474897385, + "rewards/margins": 0.15086150169372559, + "rewards/rejected": -0.15577509999275208, + "step": 4021 + }, + { + "epoch": 2.7814661134163208, + "grad_norm": 8.29444694519043, + "learning_rate": 4.010296603657599e-05, + "log_odds_chosen": 6.8840227127075195, + "log_odds_ratio": -0.18162855505943298, + "logits/chosen": -0.45118263363838196, + "logits/rejected": -0.5519630908966064, + "logps/chosen": -0.04445667192339897, + "logps/rejected": -1.5489853620529175, + "loss": 1.8229, + "nll_loss": 0.4375506043434143, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004445666912943125, + "rewards/margins": 0.1504528671503067, + "rewards/rejected": -0.154898539185524, + "step": 4022 + }, + { + "epoch": 2.7821576763485476, + "grad_norm": 10.82302474975586, + "learning_rate": 4.009912402028585e-05, + "log_odds_chosen": 8.228324890136719, + "log_odds_ratio": -0.0011258398881182075, + "logits/chosen": -0.3915403485298157, + "logits/rejected": -0.4355335235595703, + "logps/chosen": -0.0018793190829455853, + "logps/rejected": -1.365816354751587, + "loss": 2.1308, + "nll_loss": 0.5325887799263, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018793190247379243, + "rewards/margins": 0.1363936960697174, + "rewards/rejected": -0.1365816295146942, + "step": 4023 + }, + { + "epoch": 2.7828492392807744, + "grad_norm": 5.188867568969727, + "learning_rate": 4.00952820039957e-05, + "log_odds_chosen": 8.392681121826172, + "log_odds_ratio": -0.0071550956927239895, + "logits/chosen": -0.46807339787483215, + "logits/rejected": -0.46381571888923645, + "logps/chosen": -0.017753636464476585, + "logps/rejected": -1.238928198814392, + "loss": 1.9335, + "nll_loss": 0.482657790184021, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017753635765984654, + "rewards/margins": 0.12211745232343674, + "rewards/rejected": -0.12389282882213593, + "step": 4024 + }, + { + "epoch": 2.7835408022130013, + "grad_norm": 5.690225601196289, + "learning_rate": 4.009143998770555e-05, + "log_odds_chosen": 5.526510715484619, + "log_odds_ratio": -0.1800631582736969, + "logits/chosen": -0.48184502124786377, + "logits/rejected": -0.5255023241043091, + "logps/chosen": -0.0705324187874794, + "logps/rejected": -1.812889814376831, + "loss": 1.9376, + "nll_loss": 0.46639716625213623, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007053242065012455, + "rewards/margins": 0.1742357611656189, + "rewards/rejected": -0.18128898739814758, + "step": 4025 + }, + { + "epoch": 2.784232365145228, + "grad_norm": 7.3954691886901855, + "learning_rate": 4.00875979714154e-05, + "log_odds_chosen": 7.558425426483154, + "log_odds_ratio": -0.01121465303003788, + "logits/chosen": -0.47046932578086853, + "logits/rejected": -0.46596261858940125, + "logps/chosen": -0.004000439308583736, + "logps/rejected": -0.7876605987548828, + "loss": 2.5458, + "nll_loss": 0.6353315114974976, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004000439075753093, + "rewards/margins": 0.07836601883172989, + "rewards/rejected": -0.07876606285572052, + "step": 4026 + }, + { + "epoch": 2.784923928077455, + "grad_norm": 7.344405651092529, + "learning_rate": 4.008375595512525e-05, + "log_odds_chosen": 8.260010719299316, + "log_odds_ratio": -0.0010395023273304105, + "logits/chosen": -0.6027242541313171, + "logits/rejected": -0.5930612087249756, + "logps/chosen": -0.021254943683743477, + "logps/rejected": -1.3257710933685303, + "loss": 2.3931, + "nll_loss": 0.5981633067131042, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00212549464777112, + "rewards/margins": 0.13045161962509155, + "rewards/rejected": -0.1325771063566208, + "step": 4027 + }, + { + "epoch": 2.7856154910096818, + "grad_norm": 7.669803142547607, + "learning_rate": 4.00799139388351e-05, + "log_odds_chosen": 7.015578746795654, + "log_odds_ratio": -0.10500874370336533, + "logits/chosen": -0.49662086367607117, + "logits/rejected": -0.5779070854187012, + "logps/chosen": -0.04561088979244232, + "logps/rejected": -1.9851887226104736, + "loss": 2.0359, + "nll_loss": 0.49846282601356506, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0045610894449055195, + "rewards/margins": 0.19395779073238373, + "rewards/rejected": -0.19851887226104736, + "step": 4028 + }, + { + "epoch": 2.7863070539419086, + "grad_norm": 10.287899017333984, + "learning_rate": 4.007607192254495e-05, + "log_odds_chosen": 8.59146499633789, + "log_odds_ratio": -0.0028011025860905647, + "logits/chosen": -0.5080961585044861, + "logits/rejected": -0.5439881086349487, + "logps/chosen": -0.002593546872958541, + "logps/rejected": -1.545117974281311, + "loss": 2.7331, + "nll_loss": 0.6829999685287476, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000259354681475088, + "rewards/margins": 0.15425243973731995, + "rewards/rejected": -0.15451179444789886, + "step": 4029 + }, + { + "epoch": 2.7869986168741354, + "grad_norm": 20.256053924560547, + "learning_rate": 4.0072229906254806e-05, + "log_odds_chosen": 6.982294082641602, + "log_odds_ratio": -0.0798601508140564, + "logits/chosen": -0.4688485860824585, + "logits/rejected": -0.48391133546829224, + "logps/chosen": -0.03412385657429695, + "logps/rejected": -1.5629781484603882, + "loss": 2.6672, + "nll_loss": 0.6588075757026672, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003412386169657111, + "rewards/margins": 0.15288543701171875, + "rewards/rejected": -0.15629780292510986, + "step": 4030 + }, + { + "epoch": 2.7876901798063622, + "grad_norm": 10.259025573730469, + "learning_rate": 4.006838788996465e-05, + "log_odds_chosen": 7.8801093101501465, + "log_odds_ratio": -0.09860547631978989, + "logits/chosen": -0.29706934094429016, + "logits/rejected": -0.32118216156959534, + "logps/chosen": -0.018375318497419357, + "logps/rejected": -1.4736485481262207, + "loss": 1.8238, + "nll_loss": 0.44608259201049805, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0018375319195911288, + "rewards/margins": 0.14552733302116394, + "rewards/rejected": -0.14736486971378326, + "step": 4031 + }, + { + "epoch": 2.788381742738589, + "grad_norm": 19.465635299682617, + "learning_rate": 4.006454587367451e-05, + "log_odds_chosen": 9.73245620727539, + "log_odds_ratio": -0.08642785251140594, + "logits/chosen": -0.3006506562232971, + "logits/rejected": -0.38461631536483765, + "logps/chosen": -0.027534427121281624, + "logps/rejected": -2.6337976455688477, + "loss": 2.5214, + "nll_loss": 0.6217066049575806, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0027534423861652613, + "rewards/margins": 0.26062634587287903, + "rewards/rejected": -0.2633797824382782, + "step": 4032 + }, + { + "epoch": 2.789073305670816, + "grad_norm": 9.730466842651367, + "learning_rate": 4.0060703857384356e-05, + "log_odds_chosen": 7.375240802764893, + "log_odds_ratio": -0.042026542127132416, + "logits/chosen": -0.5182772874832153, + "logits/rejected": -0.549079179763794, + "logps/chosen": -0.033890802413225174, + "logps/rejected": -1.802767276763916, + "loss": 2.4504, + "nll_loss": 0.6084006428718567, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0033890805207192898, + "rewards/margins": 0.176887646317482, + "rewards/rejected": -0.18027672171592712, + "step": 4033 + }, + { + "epoch": 2.7897648686030427, + "grad_norm": 8.207728385925293, + "learning_rate": 4.005686184109421e-05, + "log_odds_chosen": 8.203607559204102, + "log_odds_ratio": -0.01775890588760376, + "logits/chosen": -0.5532872080802917, + "logits/rejected": -0.5614966750144958, + "logps/chosen": -0.008538950234651566, + "logps/rejected": -1.3925681114196777, + "loss": 1.7439, + "nll_loss": 0.43420541286468506, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008538949186913669, + "rewards/margins": 0.13840290904045105, + "rewards/rejected": -0.1392568200826645, + "step": 4034 + }, + { + "epoch": 2.7904564315352696, + "grad_norm": 5.796356201171875, + "learning_rate": 4.005301982480406e-05, + "log_odds_chosen": 8.953681945800781, + "log_odds_ratio": -0.0008014945196919143, + "logits/chosen": -0.3912316560745239, + "logits/rejected": -0.37463435530662537, + "logps/chosen": -0.015697212889790535, + "logps/rejected": -2.049187660217285, + "loss": 2.3212, + "nll_loss": 0.5802172422409058, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001569721382111311, + "rewards/margins": 0.2033490538597107, + "rewards/rejected": -0.204918771982193, + "step": 4035 + }, + { + "epoch": 2.7911479944674964, + "grad_norm": 7.292884826660156, + "learning_rate": 4.004917780851391e-05, + "log_odds_chosen": 6.377499103546143, + "log_odds_ratio": -0.09592024981975555, + "logits/chosen": -0.37000563740730286, + "logits/rejected": -0.39865243434906006, + "logps/chosen": -0.06645894795656204, + "logps/rejected": -1.4568859338760376, + "loss": 1.8935, + "nll_loss": 0.46377626061439514, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006645894609391689, + "rewards/margins": 0.1390427052974701, + "rewards/rejected": -0.14568859338760376, + "step": 4036 + }, + { + "epoch": 2.7918395573997232, + "grad_norm": 9.267068862915039, + "learning_rate": 4.004533579222376e-05, + "log_odds_chosen": 8.6797513961792, + "log_odds_ratio": -0.00480996398255229, + "logits/chosen": -0.7288060784339905, + "logits/rejected": -0.6807577013969421, + "logps/chosen": -0.009048324078321457, + "logps/rejected": -1.9454820156097412, + "loss": 1.945, + "nll_loss": 0.48575958609580994, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009048324427567422, + "rewards/margins": 0.19364337623119354, + "rewards/rejected": -0.19454818964004517, + "step": 4037 + }, + { + "epoch": 2.79253112033195, + "grad_norm": 7.5702080726623535, + "learning_rate": 4.004149377593361e-05, + "log_odds_chosen": 6.665687084197998, + "log_odds_ratio": -0.019693978130817413, + "logits/chosen": -0.9752466678619385, + "logits/rejected": -0.9107370972633362, + "logps/chosen": -0.012831311672925949, + "logps/rejected": -1.1290169954299927, + "loss": 2.1806, + "nll_loss": 0.5431694984436035, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012831311905756593, + "rewards/margins": 0.11161856353282928, + "rewards/rejected": -0.11290168762207031, + "step": 4038 + }, + { + "epoch": 2.793222683264177, + "grad_norm": 5.317671775817871, + "learning_rate": 4.0037651759643464e-05, + "log_odds_chosen": 8.465705871582031, + "log_odds_ratio": -0.0017247693613171577, + "logits/chosen": -0.5362719297409058, + "logits/rejected": -0.583799421787262, + "logps/chosen": -0.006024193484336138, + "logps/rejected": -1.5577731132507324, + "loss": 1.6803, + "nll_loss": 0.4199100732803345, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006024193135090172, + "rewards/margins": 0.1551748812198639, + "rewards/rejected": -0.15577730536460876, + "step": 4039 + }, + { + "epoch": 2.7939142461964037, + "grad_norm": 7.264804840087891, + "learning_rate": 4.003380974335331e-05, + "log_odds_chosen": 9.161325454711914, + "log_odds_ratio": -0.0010589503217488527, + "logits/chosen": -0.19057327508926392, + "logits/rejected": -0.3201407194137573, + "logps/chosen": -0.0007936095353215933, + "logps/rejected": -1.4208149909973145, + "loss": 1.5752, + "nll_loss": 0.39369529485702515, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.93609578977339e-05, + "rewards/margins": 0.142002135515213, + "rewards/rejected": -0.14208149909973145, + "step": 4040 + }, + { + "epoch": 2.7946058091286305, + "grad_norm": 11.013221740722656, + "learning_rate": 4.002996772706317e-05, + "log_odds_chosen": 8.914923667907715, + "log_odds_ratio": -0.0003905659541487694, + "logits/chosen": -0.5774435997009277, + "logits/rejected": -0.715229868888855, + "logps/chosen": -0.0004203822463750839, + "logps/rejected": -1.1688902378082275, + "loss": 2.4236, + "nll_loss": 0.6058591604232788, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.203822390991263e-05, + "rewards/margins": 0.1168469786643982, + "rewards/rejected": -0.11688902229070663, + "step": 4041 + }, + { + "epoch": 2.7952973720608574, + "grad_norm": 6.471141338348389, + "learning_rate": 4.0026125710773015e-05, + "log_odds_chosen": 5.749246597290039, + "log_odds_ratio": -0.014492766000330448, + "logits/chosen": -0.3788442313671112, + "logits/rejected": -0.41355931758880615, + "logps/chosen": -0.011785943061113358, + "logps/rejected": -0.8134269714355469, + "loss": 2.1996, + "nll_loss": 0.5484617948532104, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011785943061113358, + "rewards/margins": 0.0801641047000885, + "rewards/rejected": -0.08134269714355469, + "step": 4042 + }, + { + "epoch": 2.795988934993084, + "grad_norm": 9.5997314453125, + "learning_rate": 4.002228369448287e-05, + "log_odds_chosen": 8.17568588256836, + "log_odds_ratio": -0.06700452417135239, + "logits/chosen": -0.5788341164588928, + "logits/rejected": -0.5489345788955688, + "logps/chosen": -0.0365753248333931, + "logps/rejected": -1.9677122831344604, + "loss": 2.4008, + "nll_loss": 0.5935037732124329, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003657532623037696, + "rewards/margins": 0.19311368465423584, + "rewards/rejected": -0.19677123427391052, + "step": 4043 + }, + { + "epoch": 2.796680497925311, + "grad_norm": 9.586773872375488, + "learning_rate": 4.001844167819272e-05, + "log_odds_chosen": 6.191709518432617, + "log_odds_ratio": -0.024510130286216736, + "logits/chosen": -0.8226577043533325, + "logits/rejected": -0.8689040541648865, + "logps/chosen": -0.009145855903625488, + "logps/rejected": -1.0411524772644043, + "loss": 2.4241, + "nll_loss": 0.6035729050636292, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009145855437964201, + "rewards/margins": 0.10320065915584564, + "rewards/rejected": -0.10411524772644043, + "step": 4044 + }, + { + "epoch": 2.797372060857538, + "grad_norm": 12.250510215759277, + "learning_rate": 4.0014599661902565e-05, + "log_odds_chosen": 9.6131591796875, + "log_odds_ratio": -0.00039203441701829433, + "logits/chosen": -0.606217622756958, + "logits/rejected": -0.6473501920700073, + "logps/chosen": -0.0008249045349657536, + "logps/rejected": -1.6661516427993774, + "loss": 2.7216, + "nll_loss": 0.6803591251373291, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.24904564069584e-05, + "rewards/margins": 0.1665326952934265, + "rewards/rejected": -0.16661517322063446, + "step": 4045 + }, + { + "epoch": 2.7980636237897647, + "grad_norm": 8.003177642822266, + "learning_rate": 4.001075764561242e-05, + "log_odds_chosen": 7.322998046875, + "log_odds_ratio": -0.10197833180427551, + "logits/chosen": -0.562049150466919, + "logits/rejected": -0.5889880657196045, + "logps/chosen": -0.030817590653896332, + "logps/rejected": -1.0370532274246216, + "loss": 2.5991, + "nll_loss": 0.6395775079727173, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003081759437918663, + "rewards/margins": 0.10062356293201447, + "rewards/rejected": -0.10370532423257828, + "step": 4046 + }, + { + "epoch": 2.7987551867219915, + "grad_norm": 9.056816101074219, + "learning_rate": 4.000691562932227e-05, + "log_odds_chosen": 8.016447067260742, + "log_odds_ratio": -0.07796118408441544, + "logits/chosen": -0.7534922957420349, + "logits/rejected": -0.7713943719863892, + "logps/chosen": -0.03694911673665047, + "logps/rejected": -1.8605222702026367, + "loss": 2.1996, + "nll_loss": 0.5421122908592224, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0036949114874005318, + "rewards/margins": 0.1823573112487793, + "rewards/rejected": -0.18605221807956696, + "step": 4047 + }, + { + "epoch": 2.7994467496542184, + "grad_norm": 9.049676895141602, + "learning_rate": 4.000307361303212e-05, + "log_odds_chosen": 7.648162841796875, + "log_odds_ratio": -0.03125055879354477, + "logits/chosen": -0.5109508633613586, + "logits/rejected": -0.5957425832748413, + "logps/chosen": -0.013546126894652843, + "logps/rejected": -1.659212350845337, + "loss": 1.6936, + "nll_loss": 0.4202747941017151, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013546126428991556, + "rewards/margins": 0.16456662118434906, + "rewards/rejected": -0.16592122614383698, + "step": 4048 + }, + { + "epoch": 2.800138312586445, + "grad_norm": 7.598301410675049, + "learning_rate": 3.999923159674197e-05, + "log_odds_chosen": 8.735095977783203, + "log_odds_ratio": -0.004727786872535944, + "logits/chosen": -0.5558849573135376, + "logits/rejected": -0.5349166989326477, + "logps/chosen": -0.009516008198261261, + "logps/rejected": -1.6679878234863281, + "loss": 1.7339, + "nll_loss": 0.433004766702652, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009516008431091905, + "rewards/margins": 0.16584721207618713, + "rewards/rejected": -0.16679880023002625, + "step": 4049 + }, + { + "epoch": 2.800829875518672, + "grad_norm": 13.116601943969727, + "learning_rate": 3.999538958045183e-05, + "log_odds_chosen": 9.699845314025879, + "log_odds_ratio": -0.00045129720820114017, + "logits/chosen": -0.43724325299263, + "logits/rejected": -0.4891416132450104, + "logps/chosen": -0.0005087569006718695, + "logps/rejected": -1.6439473628997803, + "loss": 1.9135, + "nll_loss": 0.4783242344856262, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.087569297757e-05, + "rewards/margins": 0.16434386372566223, + "rewards/rejected": -0.16439473628997803, + "step": 4050 + }, + { + "epoch": 2.801521438450899, + "grad_norm": 7.343100547790527, + "learning_rate": 3.999154756416167e-05, + "log_odds_chosen": 6.8652567863464355, + "log_odds_ratio": -0.00484616169705987, + "logits/chosen": -0.5420911908149719, + "logits/rejected": -0.561378002166748, + "logps/chosen": -0.015762126073241234, + "logps/rejected": -1.9778697490692139, + "loss": 2.0224, + "nll_loss": 0.505126953125, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015762128168717027, + "rewards/margins": 0.19621075689792633, + "rewards/rejected": -0.19778698682785034, + "step": 4051 + }, + { + "epoch": 2.8022130013831257, + "grad_norm": 9.008139610290527, + "learning_rate": 3.9987705547871526e-05, + "log_odds_chosen": 8.551125526428223, + "log_odds_ratio": -0.03587919473648071, + "logits/chosen": -0.5163147449493408, + "logits/rejected": -0.601231575012207, + "logps/chosen": -0.03649171441793442, + "logps/rejected": -2.1623549461364746, + "loss": 2.2451, + "nll_loss": 0.5576812624931335, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0036491714417934418, + "rewards/margins": 0.21258632838726044, + "rewards/rejected": -0.21623550355434418, + "step": 4052 + }, + { + "epoch": 2.8029045643153525, + "grad_norm": 13.98880386352539, + "learning_rate": 3.998386353158138e-05, + "log_odds_chosen": 7.360173225402832, + "log_odds_ratio": -0.06701034307479858, + "logits/chosen": -0.699334979057312, + "logits/rejected": -0.7111362218856812, + "logps/chosen": -0.046054355800151825, + "logps/rejected": -2.1368675231933594, + "loss": 2.3722, + "nll_loss": 0.5863499641418457, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00460543530061841, + "rewards/margins": 0.20908132195472717, + "rewards/rejected": -0.2136867493391037, + "step": 4053 + }, + { + "epoch": 2.8035961272475793, + "grad_norm": 24.911413192749023, + "learning_rate": 3.998002151529123e-05, + "log_odds_chosen": 5.606881141662598, + "log_odds_ratio": -0.14306114614009857, + "logits/chosen": -0.6254528164863586, + "logits/rejected": -0.6368358135223389, + "logps/chosen": -0.0452696867287159, + "logps/rejected": -1.1443300247192383, + "loss": 2.1549, + "nll_loss": 0.5244289636611938, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0045269690454006195, + "rewards/margins": 0.10990603268146515, + "rewards/rejected": -0.11443300545215607, + "step": 4054 + }, + { + "epoch": 2.804287690179806, + "grad_norm": 6.053211212158203, + "learning_rate": 3.9976179499001076e-05, + "log_odds_chosen": 7.3733110427856445, + "log_odds_ratio": -0.002126081380993128, + "logits/chosen": -0.6541106700897217, + "logits/rejected": -0.6929240822792053, + "logps/chosen": -0.01228273380547762, + "logps/rejected": -1.2755669355392456, + "loss": 2.4835, + "nll_loss": 0.620650053024292, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001228273380547762, + "rewards/margins": 0.12632840871810913, + "rewards/rejected": -0.1275566965341568, + "step": 4055 + }, + { + "epoch": 2.804979253112033, + "grad_norm": 11.584280967712402, + "learning_rate": 3.997233748271093e-05, + "log_odds_chosen": 8.602903366088867, + "log_odds_ratio": -0.02909720316529274, + "logits/chosen": -1.1846723556518555, + "logits/rejected": -1.2454025745391846, + "logps/chosen": -0.009676833637058735, + "logps/rejected": -1.965323805809021, + "loss": 3.4535, + "nll_loss": 0.8604767322540283, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009676833869889379, + "rewards/margins": 0.19556470215320587, + "rewards/rejected": -0.19653236865997314, + "step": 4056 + }, + { + "epoch": 2.80567081604426, + "grad_norm": 8.060429573059082, + "learning_rate": 3.996849546642078e-05, + "log_odds_chosen": 7.116885185241699, + "log_odds_ratio": -0.005772262811660767, + "logits/chosen": -0.7592568397521973, + "logits/rejected": -0.7438384890556335, + "logps/chosen": -0.028484918177127838, + "logps/rejected": -1.9294735193252563, + "loss": 2.7605, + "nll_loss": 0.6895406246185303, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0028484920039772987, + "rewards/margins": 0.1900988668203354, + "rewards/rejected": -0.1929473578929901, + "step": 4057 + }, + { + "epoch": 2.8063623789764867, + "grad_norm": 5.724400997161865, + "learning_rate": 3.996465345013063e-05, + "log_odds_chosen": 8.871892929077148, + "log_odds_ratio": -0.0014322178903967142, + "logits/chosen": -0.28607577085494995, + "logits/rejected": -0.3320361375808716, + "logps/chosen": -0.017788385972380638, + "logps/rejected": -1.4142343997955322, + "loss": 2.0421, + "nll_loss": 0.5103698968887329, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017788386903703213, + "rewards/margins": 0.13964460790157318, + "rewards/rejected": -0.14142344892024994, + "step": 4058 + }, + { + "epoch": 2.8070539419087135, + "grad_norm": 8.724485397338867, + "learning_rate": 3.9960811433840486e-05, + "log_odds_chosen": 8.082185745239258, + "log_odds_ratio": -0.018572242930531502, + "logits/chosen": -0.7844616174697876, + "logits/rejected": -0.8723915815353394, + "logps/chosen": -0.010151880793273449, + "logps/rejected": -1.1117634773254395, + "loss": 2.5914, + "nll_loss": 0.6459817290306091, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010151881724596024, + "rewards/margins": 0.11016116291284561, + "rewards/rejected": -0.11117635667324066, + "step": 4059 + }, + { + "epoch": 2.8077455048409403, + "grad_norm": 8.289448738098145, + "learning_rate": 3.995696941755033e-05, + "log_odds_chosen": 7.2113566398620605, + "log_odds_ratio": -0.05613557994365692, + "logits/chosen": -0.7712424397468567, + "logits/rejected": -0.7228599786758423, + "logps/chosen": -0.010022724978625774, + "logps/rejected": -0.9686402082443237, + "loss": 2.3449, + "nll_loss": 0.58060222864151, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001002272474579513, + "rewards/margins": 0.09586174786090851, + "rewards/rejected": -0.0968640148639679, + "step": 4060 + }, + { + "epoch": 2.808437067773167, + "grad_norm": 7.372903347015381, + "learning_rate": 3.9953127401260184e-05, + "log_odds_chosen": 6.312814235687256, + "log_odds_ratio": -0.024066109210252762, + "logits/chosen": -0.23818959295749664, + "logits/rejected": -0.38583752512931824, + "logps/chosen": -0.014680419117212296, + "logps/rejected": -1.2234750986099243, + "loss": 1.8019, + "nll_loss": 0.44808071851730347, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014680419117212296, + "rewards/margins": 0.12087946385145187, + "rewards/rejected": -0.12234750390052795, + "step": 4061 + }, + { + "epoch": 2.809128630705394, + "grad_norm": 5.99877405166626, + "learning_rate": 3.9949285384970037e-05, + "log_odds_chosen": 7.9143571853637695, + "log_odds_ratio": -0.01069034356623888, + "logits/chosen": -0.4054313600063324, + "logits/rejected": -0.5019809007644653, + "logps/chosen": -0.011747448705136776, + "logps/rejected": -1.5104658603668213, + "loss": 2.6341, + "nll_loss": 0.6574532389640808, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011747449170798063, + "rewards/margins": 0.1498718410730362, + "rewards/rejected": -0.15104657411575317, + "step": 4062 + }, + { + "epoch": 2.809820193637621, + "grad_norm": 9.521771430969238, + "learning_rate": 3.994544336867989e-05, + "log_odds_chosen": 8.424257278442383, + "log_odds_ratio": -0.0014310001861304045, + "logits/chosen": -0.7866235375404358, + "logits/rejected": -0.8643758296966553, + "logps/chosen": -0.002819900633767247, + "logps/rejected": -1.5088589191436768, + "loss": 2.7465, + "nll_loss": 0.686479389667511, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002819900691974908, + "rewards/margins": 0.15060390532016754, + "rewards/rejected": -0.1508859097957611, + "step": 4063 + }, + { + "epoch": 2.8105117565698476, + "grad_norm": 7.929076194763184, + "learning_rate": 3.9941601352389735e-05, + "log_odds_chosen": 7.050183296203613, + "log_odds_ratio": -0.054438620805740356, + "logits/chosen": -0.5261591672897339, + "logits/rejected": -0.5558338761329651, + "logps/chosen": -0.01621352881193161, + "logps/rejected": -1.1250156164169312, + "loss": 2.1176, + "nll_loss": 0.5239666700363159, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016213529743254185, + "rewards/margins": 0.11088021099567413, + "rewards/rejected": -0.11250156164169312, + "step": 4064 + }, + { + "epoch": 2.8112033195020745, + "grad_norm": 8.669575691223145, + "learning_rate": 3.993775933609959e-05, + "log_odds_chosen": 7.901257514953613, + "log_odds_ratio": -0.006656877230852842, + "logits/chosen": -0.7184497117996216, + "logits/rejected": -0.7672439813613892, + "logps/chosen": -0.01615772396326065, + "logps/rejected": -1.3511242866516113, + "loss": 2.0056, + "nll_loss": 0.5007306337356567, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016157726058736444, + "rewards/margins": 0.13349665701389313, + "rewards/rejected": -0.1351124346256256, + "step": 4065 + }, + { + "epoch": 2.8118948824343013, + "grad_norm": 11.849681854248047, + "learning_rate": 3.993391731980944e-05, + "log_odds_chosen": 10.316424369812012, + "log_odds_ratio": -0.00012056290142936632, + "logits/chosen": -0.29598119854927063, + "logits/rejected": -0.3441726863384247, + "logps/chosen": -0.00017041430692188442, + "logps/rejected": -1.8917357921600342, + "loss": 2.6001, + "nll_loss": 0.6500047445297241, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7041431419784203e-05, + "rewards/margins": 0.18915654718875885, + "rewards/rejected": -0.18917357921600342, + "step": 4066 + }, + { + "epoch": 2.812586445366528, + "grad_norm": 22.96870231628418, + "learning_rate": 3.9930075303519285e-05, + "log_odds_chosen": 6.919788360595703, + "log_odds_ratio": -0.9415428638458252, + "logits/chosen": -0.5172785520553589, + "logits/rejected": -0.5798472166061401, + "logps/chosen": -0.16864734888076782, + "logps/rejected": -0.9507753849029541, + "loss": 2.3151, + "nll_loss": 0.4846179485321045, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01686473749577999, + "rewards/margins": 0.07821279764175415, + "rewards/rejected": -0.09507754445075989, + "step": 4067 + }, + { + "epoch": 2.813278008298755, + "grad_norm": 9.234999656677246, + "learning_rate": 3.9926233287229144e-05, + "log_odds_chosen": 9.32768440246582, + "log_odds_ratio": -0.15762081742286682, + "logits/chosen": -0.7216007113456726, + "logits/rejected": -0.8246335983276367, + "logps/chosen": -0.02265734225511551, + "logps/rejected": -1.8226202726364136, + "loss": 1.8922, + "nll_loss": 0.4572892189025879, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0022657345980405807, + "rewards/margins": 0.1799962967634201, + "rewards/rejected": -0.18226204812526703, + "step": 4068 + }, + { + "epoch": 2.813969571230982, + "grad_norm": 5.7767133712768555, + "learning_rate": 3.992239127093899e-05, + "log_odds_chosen": 7.759276866912842, + "log_odds_ratio": -0.0045270719565451145, + "logits/chosen": -0.47087520360946655, + "logits/rejected": -0.40145114064216614, + "logps/chosen": -0.007731554564088583, + "logps/rejected": -1.359923005104065, + "loss": 2.0149, + "nll_loss": 0.5032612085342407, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007731555378995836, + "rewards/margins": 0.13521915674209595, + "rewards/rejected": -0.13599231839179993, + "step": 4069 + }, + { + "epoch": 2.8146611341632086, + "grad_norm": 5.5008440017700195, + "learning_rate": 3.991854925464884e-05, + "log_odds_chosen": 7.026598930358887, + "log_odds_ratio": -0.11519190669059753, + "logits/chosen": -0.7082157731056213, + "logits/rejected": -0.6972053647041321, + "logps/chosen": -0.02982058934867382, + "logps/rejected": -1.2710275650024414, + "loss": 1.9039, + "nll_loss": 0.4644562602043152, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002982059260830283, + "rewards/margins": 0.12412068992853165, + "rewards/rejected": -0.12710274755954742, + "step": 4070 + }, + { + "epoch": 2.8153526970954355, + "grad_norm": 9.34103775024414, + "learning_rate": 3.9914707238358695e-05, + "log_odds_chosen": 8.257344245910645, + "log_odds_ratio": -0.0006549620884470642, + "logits/chosen": -0.46251440048217773, + "logits/rejected": -0.4937194585800171, + "logps/chosen": -0.0014829322462901473, + "logps/rejected": -1.1529827117919922, + "loss": 2.7946, + "nll_loss": 0.6985812187194824, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014829322753939778, + "rewards/margins": 0.11514997482299805, + "rewards/rejected": -0.11529827117919922, + "step": 4071 + }, + { + "epoch": 2.8160442600276623, + "grad_norm": 7.539175033569336, + "learning_rate": 3.991086522206855e-05, + "log_odds_chosen": 7.862712383270264, + "log_odds_ratio": -0.000995173119008541, + "logits/chosen": -0.8730528950691223, + "logits/rejected": -0.8860530853271484, + "logps/chosen": -0.0025655007921159267, + "logps/rejected": -1.3373017311096191, + "loss": 2.4647, + "nll_loss": 0.6160710453987122, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00025655009085312486, + "rewards/margins": 0.13347361981868744, + "rewards/rejected": -0.13373017311096191, + "step": 4072 + }, + { + "epoch": 2.816735822959889, + "grad_norm": 9.269205093383789, + "learning_rate": 3.990702320577839e-05, + "log_odds_chosen": 8.780440330505371, + "log_odds_ratio": -0.0008190101943910122, + "logits/chosen": -0.5245490074157715, + "logits/rejected": -0.6109641790390015, + "logps/chosen": -0.004009606782346964, + "logps/rejected": -1.9757808446884155, + "loss": 1.9942, + "nll_loss": 0.49846985936164856, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00040096070733852684, + "rewards/margins": 0.19717712700366974, + "rewards/rejected": -0.1975780874490738, + "step": 4073 + }, + { + "epoch": 2.817427385892116, + "grad_norm": 7.564273357391357, + "learning_rate": 3.9903181189488246e-05, + "log_odds_chosen": 8.549010276794434, + "log_odds_ratio": -0.0013980288058519363, + "logits/chosen": -0.347595751285553, + "logits/rejected": -0.43927520513534546, + "logps/chosen": -0.015520025976002216, + "logps/rejected": -1.797426700592041, + "loss": 1.9958, + "nll_loss": 0.4988030791282654, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015520025044679642, + "rewards/margins": 0.17819064855575562, + "rewards/rejected": -0.17974264919757843, + "step": 4074 + }, + { + "epoch": 2.8181189488243428, + "grad_norm": 32.705650329589844, + "learning_rate": 3.98993391731981e-05, + "log_odds_chosen": 6.796420574188232, + "log_odds_ratio": -0.398301899433136, + "logits/chosen": -0.7179882526397705, + "logits/rejected": -0.7630666494369507, + "logps/chosen": -0.051055118441581726, + "logps/rejected": -1.2427799701690674, + "loss": 2.342, + "nll_loss": 0.5456675291061401, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0051055122166872025, + "rewards/margins": 0.11917249858379364, + "rewards/rejected": -0.1242780089378357, + "step": 4075 + }, + { + "epoch": 2.8188105117565696, + "grad_norm": 8.102416038513184, + "learning_rate": 3.9895497156907944e-05, + "log_odds_chosen": 6.3788862228393555, + "log_odds_ratio": -0.029690608382225037, + "logits/chosen": -0.5226523280143738, + "logits/rejected": -0.5438002347946167, + "logps/chosen": -0.014367911033332348, + "logps/rejected": -1.0449059009552002, + "loss": 1.9453, + "nll_loss": 0.4833501875400543, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014367912663146853, + "rewards/margins": 0.10305380821228027, + "rewards/rejected": -0.10449059307575226, + "step": 4076 + }, + { + "epoch": 2.8195020746887964, + "grad_norm": 9.83740234375, + "learning_rate": 3.98916551406178e-05, + "log_odds_chosen": 8.079355239868164, + "log_odds_ratio": -0.05913592129945755, + "logits/chosen": -0.5341934561729431, + "logits/rejected": -0.6323039531707764, + "logps/chosen": -0.026006482541561127, + "logps/rejected": -1.7434455156326294, + "loss": 2.0879, + "nll_loss": 0.5160654783248901, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0026006484404206276, + "rewards/margins": 0.17174391448497772, + "rewards/rejected": -0.17434456944465637, + "step": 4077 + }, + { + "epoch": 2.8201936376210233, + "grad_norm": 16.19186019897461, + "learning_rate": 3.988781312432765e-05, + "log_odds_chosen": 6.893074035644531, + "log_odds_ratio": -0.011647537350654602, + "logits/chosen": -0.8889976739883423, + "logits/rejected": -0.9283890128135681, + "logps/chosen": -0.01870199292898178, + "logps/rejected": -1.302621603012085, + "loss": 2.6, + "nll_loss": 0.6488242149353027, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018701993394643068, + "rewards/margins": 0.12839196622371674, + "rewards/rejected": -0.13026216626167297, + "step": 4078 + }, + { + "epoch": 2.82088520055325, + "grad_norm": 12.260211944580078, + "learning_rate": 3.98839711080375e-05, + "log_odds_chosen": 8.926525115966797, + "log_odds_ratio": -0.0010325999464839697, + "logits/chosen": -0.690007209777832, + "logits/rejected": -0.803962230682373, + "logps/chosen": -0.002110689412802458, + "logps/rejected": -1.6868860721588135, + "loss": 2.4598, + "nll_loss": 0.6148371696472168, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002110689238179475, + "rewards/margins": 0.16847753524780273, + "rewards/rejected": -0.1686885952949524, + "step": 4079 + }, + { + "epoch": 2.821576763485477, + "grad_norm": 11.205087661743164, + "learning_rate": 3.9880129091747353e-05, + "log_odds_chosen": 8.465922355651855, + "log_odds_ratio": -0.004587572067975998, + "logits/chosen": -0.6520918607711792, + "logits/rejected": -0.7593033909797668, + "logps/chosen": -0.0034907313529402018, + "logps/rejected": -1.5698950290679932, + "loss": 2.1028, + "nll_loss": 0.5252323150634766, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00034907314693555236, + "rewards/margins": 0.15664042532444, + "rewards/rejected": -0.15698951482772827, + "step": 4080 + }, + { + "epoch": 2.8222683264177038, + "grad_norm": 7.319963455200195, + "learning_rate": 3.9876287075457206e-05, + "log_odds_chosen": 6.9736738204956055, + "log_odds_ratio": -0.1095675528049469, + "logits/chosen": -0.5960395336151123, + "logits/rejected": -0.6045196056365967, + "logps/chosen": -0.02621668018400669, + "logps/rejected": -1.096388578414917, + "loss": 1.7686, + "nll_loss": 0.4312053322792053, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002621668390929699, + "rewards/margins": 0.10701719671487808, + "rewards/rejected": -0.10963885486125946, + "step": 4081 + }, + { + "epoch": 2.8229598893499306, + "grad_norm": 7.484302520751953, + "learning_rate": 3.987244505916705e-05, + "log_odds_chosen": 9.62667465209961, + "log_odds_ratio": -0.00041180552216246724, + "logits/chosen": -0.7579289674758911, + "logits/rejected": -0.8042924404144287, + "logps/chosen": -0.017796583473682404, + "logps/rejected": -1.883541226387024, + "loss": 1.8983, + "nll_loss": 0.4745240807533264, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001779658254235983, + "rewards/margins": 0.18657445907592773, + "rewards/rejected": -0.18835411965847015, + "step": 4082 + }, + { + "epoch": 2.8236514522821574, + "grad_norm": 7.532115459442139, + "learning_rate": 3.9868603042876904e-05, + "log_odds_chosen": 6.588289260864258, + "log_odds_ratio": -0.2632429897785187, + "logits/chosen": -0.4547191560268402, + "logits/rejected": -0.4525451362133026, + "logps/chosen": -0.10993438214063644, + "logps/rejected": -1.7299728393554688, + "loss": 2.3395, + "nll_loss": 0.5585499405860901, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01099343877285719, + "rewards/margins": 0.16200384497642517, + "rewards/rejected": -0.17299726605415344, + "step": 4083 + }, + { + "epoch": 2.8243430152143842, + "grad_norm": 9.845251083374023, + "learning_rate": 3.9864761026586756e-05, + "log_odds_chosen": 7.189066410064697, + "log_odds_ratio": -0.004802032373845577, + "logits/chosen": -0.8864326477050781, + "logits/rejected": -0.8713440895080566, + "logps/chosen": -0.021907519549131393, + "logps/rejected": -1.5963313579559326, + "loss": 2.434, + "nll_loss": 0.608009397983551, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002190752187743783, + "rewards/margins": 0.1574423909187317, + "rewards/rejected": -0.15963315963745117, + "step": 4084 + }, + { + "epoch": 2.825034578146611, + "grad_norm": 5.436544895172119, + "learning_rate": 3.98609190102966e-05, + "log_odds_chosen": 8.149063110351562, + "log_odds_ratio": -0.031937647610902786, + "logits/chosen": -0.3354184031486511, + "logits/rejected": -0.4097675681114197, + "logps/chosen": -0.023115739226341248, + "logps/rejected": -1.7368464469909668, + "loss": 2.3099, + "nll_loss": 0.574272096157074, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0023115738295018673, + "rewards/margins": 0.17137306928634644, + "rewards/rejected": -0.17368465662002563, + "step": 4085 + }, + { + "epoch": 2.825726141078838, + "grad_norm": 10.83985710144043, + "learning_rate": 3.985707699400646e-05, + "log_odds_chosen": 8.234752655029297, + "log_odds_ratio": -0.04173066467046738, + "logits/chosen": -0.9166703820228577, + "logits/rejected": -1.0253310203552246, + "logps/chosen": -0.009630718268454075, + "logps/rejected": -1.1056783199310303, + "loss": 2.5543, + "nll_loss": 0.6344038248062134, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009630717686377466, + "rewards/margins": 0.10960475355386734, + "rewards/rejected": -0.11056783050298691, + "step": 4086 + }, + { + "epoch": 2.8264177040110647, + "grad_norm": 13.876534461975098, + "learning_rate": 3.985323497771631e-05, + "log_odds_chosen": 8.142451286315918, + "log_odds_ratio": -0.23297399282455444, + "logits/chosen": -0.8295532464981079, + "logits/rejected": -0.8863059282302856, + "logps/chosen": -0.02916792407631874, + "logps/rejected": -1.4469013214111328, + "loss": 2.3819, + "nll_loss": 0.5721670389175415, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002916792407631874, + "rewards/margins": 0.14177334308624268, + "rewards/rejected": -0.14469014108181, + "step": 4087 + }, + { + "epoch": 2.8271092669432916, + "grad_norm": 6.461430549621582, + "learning_rate": 3.984939296142616e-05, + "log_odds_chosen": 6.785519599914551, + "log_odds_ratio": -0.04183311015367508, + "logits/chosen": -0.7603569030761719, + "logits/rejected": -0.7885888814926147, + "logps/chosen": -0.0300129521638155, + "logps/rejected": -1.3980698585510254, + "loss": 1.657, + "nll_loss": 0.4100547432899475, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0030012952629476786, + "rewards/margins": 0.1368056982755661, + "rewards/rejected": -0.13980698585510254, + "step": 4088 + }, + { + "epoch": 2.8278008298755184, + "grad_norm": 6.225308418273926, + "learning_rate": 3.984555094513601e-05, + "log_odds_chosen": 9.266956329345703, + "log_odds_ratio": -0.002623880747705698, + "logits/chosen": -0.6244158744812012, + "logits/rejected": -0.597240149974823, + "logps/chosen": -0.0062347580678761005, + "logps/rejected": -1.559669017791748, + "loss": 1.5555, + "nll_loss": 0.3886123597621918, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006234758184291422, + "rewards/margins": 0.1553434431552887, + "rewards/rejected": -0.15596690773963928, + "step": 4089 + }, + { + "epoch": 2.8284923928077457, + "grad_norm": 7.004493236541748, + "learning_rate": 3.9841708928845864e-05, + "log_odds_chosen": 7.017504692077637, + "log_odds_ratio": -0.045389194041490555, + "logits/chosen": -0.6390866041183472, + "logits/rejected": -0.6546069979667664, + "logps/chosen": -0.041990771889686584, + "logps/rejected": -1.8552742004394531, + "loss": 2.1232, + "nll_loss": 0.5262652635574341, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004199077840894461, + "rewards/margins": 0.18132832646369934, + "rewards/rejected": -0.18552741408348083, + "step": 4090 + }, + { + "epoch": 2.8291839557399725, + "grad_norm": 8.383001327514648, + "learning_rate": 3.983786691255571e-05, + "log_odds_chosen": 8.123327255249023, + "log_odds_ratio": -0.013854804448783398, + "logits/chosen": -0.6715909242630005, + "logits/rejected": -0.6964853405952454, + "logps/chosen": -0.009874495677649975, + "logps/rejected": -1.4259437322616577, + "loss": 2.3549, + "nll_loss": 0.5873280167579651, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009874494280666113, + "rewards/margins": 0.14160692691802979, + "rewards/rejected": -0.1425943672657013, + "step": 4091 + }, + { + "epoch": 2.8298755186721993, + "grad_norm": 8.464040756225586, + "learning_rate": 3.983402489626556e-05, + "log_odds_chosen": 8.556112289428711, + "log_odds_ratio": -0.0016304109012708068, + "logits/chosen": -0.5392479300498962, + "logits/rejected": -0.6534115076065063, + "logps/chosen": -0.027099961414933205, + "logps/rejected": -2.275935649871826, + "loss": 2.2959, + "nll_loss": 0.5738140344619751, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002709996188059449, + "rewards/margins": 0.224883571267128, + "rewards/rejected": -0.2275935411453247, + "step": 4092 + }, + { + "epoch": 2.830567081604426, + "grad_norm": 7.396304130554199, + "learning_rate": 3.9830182879975415e-05, + "log_odds_chosen": 8.257478713989258, + "log_odds_ratio": -0.0026086117140948772, + "logits/chosen": -0.4450608491897583, + "logits/rejected": -0.46384957432746887, + "logps/chosen": -0.026241516694426537, + "logps/rejected": -2.154176950454712, + "loss": 2.0062, + "nll_loss": 0.5012954473495483, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002624151762574911, + "rewards/margins": 0.21279355883598328, + "rewards/rejected": -0.21541771292686462, + "step": 4093 + }, + { + "epoch": 2.831258644536653, + "grad_norm": 10.601005554199219, + "learning_rate": 3.982634086368526e-05, + "log_odds_chosen": 5.183744430541992, + "log_odds_ratio": -0.2201242297887802, + "logits/chosen": -0.25432318449020386, + "logits/rejected": -0.29870888590812683, + "logps/chosen": -0.06550457328557968, + "logps/rejected": -1.0095858573913574, + "loss": 2.2327, + "nll_loss": 0.5361602306365967, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006550457328557968, + "rewards/margins": 0.09440812468528748, + "rewards/rejected": -0.10095858573913574, + "step": 4094 + }, + { + "epoch": 2.83195020746888, + "grad_norm": 11.715688705444336, + "learning_rate": 3.982249884739511e-05, + "log_odds_chosen": 9.65654182434082, + "log_odds_ratio": -0.00367301725782454, + "logits/chosen": -0.3656267821788788, + "logits/rejected": -0.4155307412147522, + "logps/chosen": -0.0016836244612932205, + "logps/rejected": -2.357590675354004, + "loss": 3.5452, + "nll_loss": 0.8859277367591858, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000168362443218939, + "rewards/margins": 0.23559071123600006, + "rewards/rejected": -0.23575909435749054, + "step": 4095 + }, + { + "epoch": 2.8326417704011067, + "grad_norm": 10.904032707214355, + "learning_rate": 3.9818656831104965e-05, + "log_odds_chosen": 8.239508628845215, + "log_odds_ratio": -0.0010725038591772318, + "logits/chosen": -0.660797119140625, + "logits/rejected": -0.666714072227478, + "logps/chosen": -0.013842078857123852, + "logps/rejected": -1.586050033569336, + "loss": 2.3764, + "nll_loss": 0.5939972400665283, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013842078624293208, + "rewards/margins": 0.15722079575061798, + "rewards/rejected": -0.15860500931739807, + "step": 4096 + }, + { + "epoch": 2.8333333333333335, + "grad_norm": 7.934488296508789, + "learning_rate": 3.981481481481482e-05, + "log_odds_chosen": 8.910290718078613, + "log_odds_ratio": -0.0014349442208185792, + "logits/chosen": -0.5394958853721619, + "logits/rejected": -0.630646824836731, + "logps/chosen": -0.0013760102447122335, + "logps/rejected": -1.5402330160140991, + "loss": 2.5146, + "nll_loss": 0.628517746925354, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013760101865045726, + "rewards/margins": 0.15388569235801697, + "rewards/rejected": -0.15402328968048096, + "step": 4097 + }, + { + "epoch": 2.8340248962655603, + "grad_norm": 5.750637531280518, + "learning_rate": 3.9810972798524664e-05, + "log_odds_chosen": 7.891044616699219, + "log_odds_ratio": -0.004763246979564428, + "logits/chosen": -0.578194797039032, + "logits/rejected": -0.5832671523094177, + "logps/chosen": -0.012611321173608303, + "logps/rejected": -1.1056181192398071, + "loss": 1.6103, + "nll_loss": 0.40209874510765076, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012611323036253452, + "rewards/margins": 0.10930068790912628, + "rewards/rejected": -0.11056182533502579, + "step": 4098 + }, + { + "epoch": 2.834716459197787, + "grad_norm": 7.133883953094482, + "learning_rate": 3.980713078223452e-05, + "log_odds_chosen": 5.702484130859375, + "log_odds_ratio": -0.15084023773670197, + "logits/chosen": -0.6206138134002686, + "logits/rejected": -0.5826375484466553, + "logps/chosen": -0.053505752235651016, + "logps/rejected": -1.6038875579833984, + "loss": 1.9244, + "nll_loss": 0.4660155475139618, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005350574851036072, + "rewards/margins": 0.15503817796707153, + "rewards/rejected": -0.1603887677192688, + "step": 4099 + }, + { + "epoch": 2.835408022130014, + "grad_norm": 8.215850830078125, + "learning_rate": 3.980328876594437e-05, + "log_odds_chosen": 9.187931060791016, + "log_odds_ratio": -0.0014828175771981478, + "logits/chosen": -0.7627009153366089, + "logits/rejected": -0.7655040621757507, + "logps/chosen": -0.0007910731364972889, + "logps/rejected": -1.2454516887664795, + "loss": 2.5632, + "nll_loss": 0.6406489610671997, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.910731801530346e-05, + "rewards/margins": 0.12446606159210205, + "rewards/rejected": -0.12454516440629959, + "step": 4100 + }, + { + "epoch": 2.836099585062241, + "grad_norm": 7.439212799072266, + "learning_rate": 3.979944674965422e-05, + "log_odds_chosen": 7.659041881561279, + "log_odds_ratio": -0.004087934270501137, + "logits/chosen": -0.8255820870399475, + "logits/rejected": -0.879666268825531, + "logps/chosen": -0.006819070316851139, + "logps/rejected": -1.261864423751831, + "loss": 1.5073, + "nll_loss": 0.376420259475708, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006819070549681783, + "rewards/margins": 0.1255045384168625, + "rewards/rejected": -0.12618646025657654, + "step": 4101 + }, + { + "epoch": 2.8367911479944676, + "grad_norm": 10.409981727600098, + "learning_rate": 3.979560473336407e-05, + "log_odds_chosen": 7.2086687088012695, + "log_odds_ratio": -0.09979799389839172, + "logits/chosen": -0.33021828532218933, + "logits/rejected": -0.4076710343360901, + "logps/chosen": -0.019604945555329323, + "logps/rejected": -0.9485692977905273, + "loss": 1.9901, + "nll_loss": 0.4875490367412567, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019604945555329323, + "rewards/margins": 0.09289643168449402, + "rewards/rejected": -0.09485693275928497, + "step": 4102 + }, + { + "epoch": 2.8374827109266945, + "grad_norm": 11.524858474731445, + "learning_rate": 3.979176271707392e-05, + "log_odds_chosen": 9.426741600036621, + "log_odds_ratio": -0.000592447817325592, + "logits/chosen": -0.9023887515068054, + "logits/rejected": -0.9688795804977417, + "logps/chosen": -0.0008985823369584978, + "logps/rejected": -1.7494679689407349, + "loss": 2.6452, + "nll_loss": 0.6612343192100525, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.985823660623282e-05, + "rewards/margins": 0.17485694587230682, + "rewards/rejected": -0.17494678497314453, + "step": 4103 + }, + { + "epoch": 2.8381742738589213, + "grad_norm": 7.3802008628845215, + "learning_rate": 3.978792070078377e-05, + "log_odds_chosen": 8.576873779296875, + "log_odds_ratio": -0.09843210875988007, + "logits/chosen": -0.6974600553512573, + "logits/rejected": -0.7106385231018066, + "logps/chosen": -0.04491880536079407, + "logps/rejected": -1.5757704973220825, + "loss": 1.641, + "nll_loss": 0.4003986716270447, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004491880536079407, + "rewards/margins": 0.15308517217636108, + "rewards/rejected": -0.1575770378112793, + "step": 4104 + }, + { + "epoch": 2.838865836791148, + "grad_norm": 15.590590476989746, + "learning_rate": 3.9784078684493624e-05, + "log_odds_chosen": 7.23812198638916, + "log_odds_ratio": -0.07408453524112701, + "logits/chosen": -0.49816223978996277, + "logits/rejected": -0.5431675314903259, + "logps/chosen": -0.02068902552127838, + "logps/rejected": -1.185168743133545, + "loss": 1.8842, + "nll_loss": 0.46363386511802673, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002068902365863323, + "rewards/margins": 0.11644796282052994, + "rewards/rejected": -0.11851686239242554, + "step": 4105 + }, + { + "epoch": 2.839557399723375, + "grad_norm": 6.0863165855407715, + "learning_rate": 3.9780236668203476e-05, + "log_odds_chosen": 7.257332801818848, + "log_odds_ratio": -0.07132648676633835, + "logits/chosen": -0.6692647933959961, + "logits/rejected": -0.7386534214019775, + "logps/chosen": -0.03094615414738655, + "logps/rejected": -0.9907289743423462, + "loss": 1.726, + "nll_loss": 0.424371600151062, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0030946151819080114, + "rewards/margins": 0.095978282392025, + "rewards/rejected": -0.0990729033946991, + "step": 4106 + }, + { + "epoch": 2.840248962655602, + "grad_norm": 9.847002983093262, + "learning_rate": 3.977639465191332e-05, + "log_odds_chosen": 5.666792869567871, + "log_odds_ratio": -0.20099371671676636, + "logits/chosen": -0.583608090877533, + "logits/rejected": -0.6237548589706421, + "logps/chosen": -0.06134883314371109, + "logps/rejected": -1.838850736618042, + "loss": 2.6003, + "nll_loss": 0.6299666166305542, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006134883500635624, + "rewards/margins": 0.17775020003318787, + "rewards/rejected": -0.18388508260250092, + "step": 4107 + }, + { + "epoch": 2.8409405255878286, + "grad_norm": 12.510313987731934, + "learning_rate": 3.977255263562318e-05, + "log_odds_chosen": 7.918940544128418, + "log_odds_ratio": -0.0010384717024862766, + "logits/chosen": -0.6723248958587646, + "logits/rejected": -0.7267094850540161, + "logps/chosen": -0.0015254435129463673, + "logps/rejected": -1.2971229553222656, + "loss": 2.1719, + "nll_loss": 0.5428681969642639, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015254435129463673, + "rewards/margins": 0.12955975532531738, + "rewards/rejected": -0.1297122985124588, + "step": 4108 + }, + { + "epoch": 2.8416320885200554, + "grad_norm": 7.601841449737549, + "learning_rate": 3.976871061933303e-05, + "log_odds_chosen": 5.5808939933776855, + "log_odds_ratio": -0.04469112306833267, + "logits/chosen": -0.6102938652038574, + "logits/rejected": -0.634024977684021, + "logps/chosen": -0.04405169188976288, + "logps/rejected": -0.8939533233642578, + "loss": 2.4455, + "nll_loss": 0.6069144606590271, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004405169747769833, + "rewards/margins": 0.08499015867710114, + "rewards/rejected": -0.08939532935619354, + "step": 4109 + }, + { + "epoch": 2.8423236514522823, + "grad_norm": 11.374734878540039, + "learning_rate": 3.976486860304288e-05, + "log_odds_chosen": 9.27154541015625, + "log_odds_ratio": -0.0003829200577456504, + "logits/chosen": -0.6784447431564331, + "logits/rejected": -0.7364102602005005, + "logps/chosen": -0.0008755293092690408, + "logps/rejected": -1.626358151435852, + "loss": 1.8203, + "nll_loss": 0.45503830909729004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.755293674767017e-05, + "rewards/margins": 0.1625482589006424, + "rewards/rejected": -0.16263581812381744, + "step": 4110 + }, + { + "epoch": 2.843015214384509, + "grad_norm": 8.393563270568848, + "learning_rate": 3.976102658675273e-05, + "log_odds_chosen": 8.176108360290527, + "log_odds_ratio": -0.0006821705610491335, + "logits/chosen": -0.8334037661552429, + "logits/rejected": -0.839069128036499, + "logps/chosen": -0.010099105536937714, + "logps/rejected": -1.0930083990097046, + "loss": 2.1009, + "nll_loss": 0.5251496434211731, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010099106002599, + "rewards/margins": 0.10829093307256699, + "rewards/rejected": -0.10930084437131882, + "step": 4111 + }, + { + "epoch": 2.843706777316736, + "grad_norm": 9.47371768951416, + "learning_rate": 3.975718457046258e-05, + "log_odds_chosen": 6.1424174308776855, + "log_odds_ratio": -0.0985918939113617, + "logits/chosen": -0.5856118202209473, + "logits/rejected": -0.6207878589630127, + "logps/chosen": -0.04165790230035782, + "logps/rejected": -1.1315250396728516, + "loss": 2.456, + "nll_loss": 0.6041349768638611, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004165790043771267, + "rewards/margins": 0.10898672044277191, + "rewards/rejected": -0.11315250396728516, + "step": 4112 + }, + { + "epoch": 2.8443983402489628, + "grad_norm": 30.912683486938477, + "learning_rate": 3.975334255417243e-05, + "log_odds_chosen": 7.40262508392334, + "log_odds_ratio": -0.27424168586730957, + "logits/chosen": -0.7680657505989075, + "logits/rejected": -0.7505401372909546, + "logps/chosen": -0.07187169790267944, + "logps/rejected": -1.3441351652145386, + "loss": 1.8906, + "nll_loss": 0.44523701071739197, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007187169045209885, + "rewards/margins": 0.1272263377904892, + "rewards/rejected": -0.13441351056098938, + "step": 4113 + }, + { + "epoch": 2.8450899031811896, + "grad_norm": 7.518012046813965, + "learning_rate": 3.974950053788228e-05, + "log_odds_chosen": 9.113598823547363, + "log_odds_ratio": -0.00020042213145643473, + "logits/chosen": -0.4726852774620056, + "logits/rejected": -0.49504923820495605, + "logps/chosen": -0.0003604450321290642, + "logps/rejected": -1.195584774017334, + "loss": 2.1996, + "nll_loss": 0.549877941608429, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.604450466809794e-05, + "rewards/margins": 0.11952243745326996, + "rewards/rejected": -0.11955846846103668, + "step": 4114 + }, + { + "epoch": 2.8457814661134164, + "grad_norm": 14.199585914611816, + "learning_rate": 3.9745658521592135e-05, + "log_odds_chosen": 7.985638618469238, + "log_odds_ratio": -0.008731107227504253, + "logits/chosen": -0.4885767698287964, + "logits/rejected": -0.45740532875061035, + "logps/chosen": -0.0026937490329146385, + "logps/rejected": -1.0088611841201782, + "loss": 2.509, + "nll_loss": 0.6263870000839233, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002693749265745282, + "rewards/margins": 0.10061675310134888, + "rewards/rejected": -0.10088612139225006, + "step": 4115 + }, + { + "epoch": 2.8464730290456433, + "grad_norm": 6.5890889167785645, + "learning_rate": 3.974181650530198e-05, + "log_odds_chosen": 9.449125289916992, + "log_odds_ratio": -0.0001227892644237727, + "logits/chosen": -0.5998516082763672, + "logits/rejected": -0.6113935112953186, + "logps/chosen": -0.0005787935806438327, + "logps/rejected": -1.388109803199768, + "loss": 1.5608, + "nll_loss": 0.39019450545310974, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.787936242995784e-05, + "rewards/margins": 0.13875310122966766, + "rewards/rejected": -0.13881099224090576, + "step": 4116 + }, + { + "epoch": 2.84716459197787, + "grad_norm": 17.623966217041016, + "learning_rate": 3.973797448901184e-05, + "log_odds_chosen": 5.721372604370117, + "log_odds_ratio": -0.3729022145271301, + "logits/chosen": -0.5502513647079468, + "logits/rejected": -0.5644844770431519, + "logps/chosen": -0.06836064159870148, + "logps/rejected": -1.288745641708374, + "loss": 2.1235, + "nll_loss": 0.49359455704689026, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.006836064159870148, + "rewards/margins": 0.12203850597143173, + "rewards/rejected": -0.12887457013130188, + "step": 4117 + }, + { + "epoch": 2.847856154910097, + "grad_norm": 5.089163780212402, + "learning_rate": 3.9734132472721685e-05, + "log_odds_chosen": 7.329201698303223, + "log_odds_ratio": -0.17281562089920044, + "logits/chosen": -0.6293739080429077, + "logits/rejected": -0.6185722947120667, + "logps/chosen": -0.04492910951375961, + "logps/rejected": -1.3377739191055298, + "loss": 2.0892, + "nll_loss": 0.5050212740898132, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004492911044508219, + "rewards/margins": 0.12928447127342224, + "rewards/rejected": -0.13377737998962402, + "step": 4118 + }, + { + "epoch": 2.8485477178423237, + "grad_norm": 7.862229347229004, + "learning_rate": 3.973029045643154e-05, + "log_odds_chosen": 5.797403335571289, + "log_odds_ratio": -0.11605511605739594, + "logits/chosen": -0.495116651058197, + "logits/rejected": -0.5606188774108887, + "logps/chosen": -0.03224784508347511, + "logps/rejected": -0.8752045631408691, + "loss": 2.3005, + "nll_loss": 0.5635241270065308, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0032247842755168676, + "rewards/margins": 0.08429567515850067, + "rewards/rejected": -0.08752045035362244, + "step": 4119 + }, + { + "epoch": 2.8492392807745506, + "grad_norm": 7.012640953063965, + "learning_rate": 3.972644844014139e-05, + "log_odds_chosen": 7.387671947479248, + "log_odds_ratio": -0.2472531795501709, + "logits/chosen": -0.9369310140609741, + "logits/rejected": -1.023695707321167, + "logps/chosen": -0.051521994173526764, + "logps/rejected": -1.3849338293075562, + "loss": 2.4995, + "nll_loss": 0.6001489162445068, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.005152199417352676, + "rewards/margins": 0.13334119319915771, + "rewards/rejected": -0.1384933888912201, + "step": 4120 + }, + { + "epoch": 2.8499308437067774, + "grad_norm": 8.53339958190918, + "learning_rate": 3.9722606423851236e-05, + "log_odds_chosen": 6.195735931396484, + "log_odds_ratio": -0.14083018898963928, + "logits/chosen": -0.48863381147384644, + "logits/rejected": -0.5069273710250854, + "logps/chosen": -0.041694194078445435, + "logps/rejected": -1.1637004613876343, + "loss": 1.8266, + "nll_loss": 0.4425726532936096, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0041694194078445435, + "rewards/margins": 0.11220061779022217, + "rewards/rejected": -0.11637004464864731, + "step": 4121 + }, + { + "epoch": 2.8506224066390042, + "grad_norm": 9.292778015136719, + "learning_rate": 3.971876440756109e-05, + "log_odds_chosen": 8.705432891845703, + "log_odds_ratio": -0.0005094002117402852, + "logits/chosen": -0.24346569180488586, + "logits/rejected": -0.2411637306213379, + "logps/chosen": -0.000389696768252179, + "logps/rejected": -1.2692971229553223, + "loss": 1.9885, + "nll_loss": 0.49707961082458496, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.896967973560095e-05, + "rewards/margins": 0.12689074873924255, + "rewards/rejected": -0.12692971527576447, + "step": 4122 + }, + { + "epoch": 2.851313969571231, + "grad_norm": 8.638797760009766, + "learning_rate": 3.971492239127094e-05, + "log_odds_chosen": 8.19682788848877, + "log_odds_ratio": -0.24544239044189453, + "logits/chosen": -0.5123015642166138, + "logits/rejected": -0.5535436868667603, + "logps/chosen": -0.027725744992494583, + "logps/rejected": -1.5624759197235107, + "loss": 2.4238, + "nll_loss": 0.5813997983932495, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002772574545815587, + "rewards/margins": 0.15347503125667572, + "rewards/rejected": -0.1562476009130478, + "step": 4123 + }, + { + "epoch": 2.852005532503458, + "grad_norm": 7.665647506713867, + "learning_rate": 3.971108037498079e-05, + "log_odds_chosen": 7.87119722366333, + "log_odds_ratio": -0.09006493538618088, + "logits/chosen": -0.5190091133117676, + "logits/rejected": -0.6639402508735657, + "logps/chosen": -0.02196761779487133, + "logps/rejected": -1.3968288898468018, + "loss": 1.7224, + "nll_loss": 0.42158395051956177, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002196761779487133, + "rewards/margins": 0.13748612999916077, + "rewards/rejected": -0.13968288898468018, + "step": 4124 + }, + { + "epoch": 2.8526970954356847, + "grad_norm": 8.311286926269531, + "learning_rate": 3.970723835869064e-05, + "log_odds_chosen": 6.067910194396973, + "log_odds_ratio": -0.029917169362306595, + "logits/chosen": -0.5240775346755981, + "logits/rejected": -0.519280731678009, + "logps/chosen": -0.03226805478334427, + "logps/rejected": -1.527235507965088, + "loss": 2.5063, + "nll_loss": 0.6235796213150024, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0032268057111650705, + "rewards/margins": 0.14949676394462585, + "rewards/rejected": -0.1527235507965088, + "step": 4125 + }, + { + "epoch": 2.8533886583679116, + "grad_norm": 10.94139289855957, + "learning_rate": 3.97033963424005e-05, + "log_odds_chosen": 8.23917007446289, + "log_odds_ratio": -0.16562430560588837, + "logits/chosen": -0.6252275705337524, + "logits/rejected": -0.7134463787078857, + "logps/chosen": -0.07940398901700974, + "logps/rejected": -1.8555247783660889, + "loss": 2.4194, + "nll_loss": 0.5882754921913147, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007940399460494518, + "rewards/margins": 0.1776120811700821, + "rewards/rejected": -0.1855524778366089, + "step": 4126 + }, + { + "epoch": 2.8540802213001384, + "grad_norm": 9.70150089263916, + "learning_rate": 3.9699554326110344e-05, + "log_odds_chosen": 8.478819847106934, + "log_odds_ratio": -0.002084367675706744, + "logits/chosen": -0.7293291687965393, + "logits/rejected": -0.7733567953109741, + "logps/chosen": -0.002238813554868102, + "logps/rejected": -1.5626964569091797, + "loss": 2.2216, + "nll_loss": 0.5551963448524475, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00022388134675566107, + "rewards/margins": 0.15604576468467712, + "rewards/rejected": -0.1562696397304535, + "step": 4127 + }, + { + "epoch": 2.854771784232365, + "grad_norm": 7.927999019622803, + "learning_rate": 3.9695712309820196e-05, + "log_odds_chosen": 6.218683242797852, + "log_odds_ratio": -0.05425438657402992, + "logits/chosen": -0.599659264087677, + "logits/rejected": -0.6657902002334595, + "logps/chosen": -0.02499246969819069, + "logps/rejected": -1.1194629669189453, + "loss": 2.4865, + "nll_loss": 0.6162107586860657, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002499246969819069, + "rewards/margins": 0.10944704711437225, + "rewards/rejected": -0.11194629967212677, + "step": 4128 + }, + { + "epoch": 2.855463347164592, + "grad_norm": 6.431820869445801, + "learning_rate": 3.969187029353005e-05, + "log_odds_chosen": 4.778810501098633, + "log_odds_ratio": -0.20529891550540924, + "logits/chosen": -0.6679726839065552, + "logits/rejected": -0.6747136116027832, + "logps/chosen": -0.042183686047792435, + "logps/rejected": -0.634368896484375, + "loss": 2.6459, + "nll_loss": 0.6409425735473633, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004218368325382471, + "rewards/margins": 0.05921851843595505, + "rewards/rejected": -0.06343688815832138, + "step": 4129 + }, + { + "epoch": 2.856154910096819, + "grad_norm": 10.62767505645752, + "learning_rate": 3.9688028277239894e-05, + "log_odds_chosen": 8.825204849243164, + "log_odds_ratio": -0.0023956261575222015, + "logits/chosen": -0.37675511837005615, + "logits/rejected": -0.46603935956954956, + "logps/chosen": -0.023891257122159004, + "logps/rejected": -1.8063663244247437, + "loss": 2.1087, + "nll_loss": 0.5269260406494141, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002389125758782029, + "rewards/margins": 0.17824749648571014, + "rewards/rejected": -0.18063662946224213, + "step": 4130 + }, + { + "epoch": 2.8568464730290457, + "grad_norm": 9.79223918914795, + "learning_rate": 3.968418626094975e-05, + "log_odds_chosen": 7.960743427276611, + "log_odds_ratio": -0.025452613830566406, + "logits/chosen": -0.4843645691871643, + "logits/rejected": -0.5887432098388672, + "logps/chosen": -0.015297478064894676, + "logps/rejected": -1.1496312618255615, + "loss": 2.0604, + "nll_loss": 0.512560248374939, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015297478530555964, + "rewards/margins": 0.11343339085578918, + "rewards/rejected": -0.11496314406394958, + "step": 4131 + }, + { + "epoch": 2.8575380359612725, + "grad_norm": 13.040294647216797, + "learning_rate": 3.96803442446596e-05, + "log_odds_chosen": 7.492790222167969, + "log_odds_ratio": -0.09991626441478729, + "logits/chosen": -0.7050648331642151, + "logits/rejected": -0.7885043621063232, + "logps/chosen": -0.07697435468435287, + "logps/rejected": -1.779345989227295, + "loss": 1.6866, + "nll_loss": 0.411651611328125, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007697435095906258, + "rewards/margins": 0.17023716866970062, + "rewards/rejected": -0.17793460190296173, + "step": 4132 + }, + { + "epoch": 2.8582295988934994, + "grad_norm": 12.954906463623047, + "learning_rate": 3.967650222836945e-05, + "log_odds_chosen": 8.792889595031738, + "log_odds_ratio": -0.0010062884539365768, + "logits/chosen": -0.7896513938903809, + "logits/rejected": -0.8716443181037903, + "logps/chosen": -0.00804637186229229, + "logps/rejected": -2.03944993019104, + "loss": 3.4081, + "nll_loss": 0.8519180417060852, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008046371513046324, + "rewards/margins": 0.20314034819602966, + "rewards/rejected": -0.20394501090049744, + "step": 4133 + }, + { + "epoch": 2.858921161825726, + "grad_norm": 9.084781646728516, + "learning_rate": 3.96726602120793e-05, + "log_odds_chosen": 9.515411376953125, + "log_odds_ratio": -0.0002063115971395746, + "logits/chosen": -0.6839795112609863, + "logits/rejected": -0.7928391098976135, + "logps/chosen": -0.0004598861269187182, + "logps/rejected": -1.465610146522522, + "loss": 1.3488, + "nll_loss": 0.3371778130531311, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.598861050908454e-05, + "rewards/margins": 0.14651501178741455, + "rewards/rejected": -0.14656101167201996, + "step": 4134 + }, + { + "epoch": 2.859612724757953, + "grad_norm": 5.373547077178955, + "learning_rate": 3.9668818195789156e-05, + "log_odds_chosen": 6.309614658355713, + "log_odds_ratio": -0.1429893523454666, + "logits/chosen": -0.3133348524570465, + "logits/rejected": -0.2622450888156891, + "logps/chosen": -0.03732801601290703, + "logps/rejected": -1.3269708156585693, + "loss": 1.9574, + "nll_loss": 0.4750511348247528, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0037328016478568316, + "rewards/margins": 0.12896430492401123, + "rewards/rejected": -0.13269709050655365, + "step": 4135 + }, + { + "epoch": 2.86030428769018, + "grad_norm": 6.702598571777344, + "learning_rate": 3.9664976179499e-05, + "log_odds_chosen": 7.8618597984313965, + "log_odds_ratio": -0.0689602792263031, + "logits/chosen": -0.43502557277679443, + "logits/rejected": -0.5458186268806458, + "logps/chosen": -0.023434704169631004, + "logps/rejected": -1.301213026046753, + "loss": 1.5143, + "nll_loss": 0.37167826294898987, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0023434702306985855, + "rewards/margins": 0.1277778297662735, + "rewards/rejected": -0.13012130558490753, + "step": 4136 + }, + { + "epoch": 2.8609958506224067, + "grad_norm": 6.976266860961914, + "learning_rate": 3.9661134163208855e-05, + "log_odds_chosen": 8.573393821716309, + "log_odds_ratio": -0.002217457164078951, + "logits/chosen": -0.6940039396286011, + "logits/rejected": -0.6850975155830383, + "logps/chosen": -0.01544831320643425, + "logps/rejected": -1.618596076965332, + "loss": 2.9021, + "nll_loss": 0.725298285484314, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001544831320643425, + "rewards/margins": 0.16031478345394135, + "rewards/rejected": -0.16185960173606873, + "step": 4137 + }, + { + "epoch": 2.8616874135546335, + "grad_norm": 10.130950927734375, + "learning_rate": 3.965729214691871e-05, + "log_odds_chosen": 8.983856201171875, + "log_odds_ratio": -0.0004064887179993093, + "logits/chosen": -0.7249870896339417, + "logits/rejected": -0.8000103235244751, + "logps/chosen": -0.0005151379154995084, + "logps/rejected": -1.072391152381897, + "loss": 3.4311, + "nll_loss": 0.8577353358268738, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.151379082235508e-05, + "rewards/margins": 0.10718759894371033, + "rewards/rejected": -0.10723911970853806, + "step": 4138 + }, + { + "epoch": 2.8623789764868603, + "grad_norm": 11.023736000061035, + "learning_rate": 3.965345013062855e-05, + "log_odds_chosen": 7.778514862060547, + "log_odds_ratio": -0.0062539223581552505, + "logits/chosen": -0.6994754076004028, + "logits/rejected": -0.727636992931366, + "logps/chosen": -0.013756345957517624, + "logps/rejected": -1.6888890266418457, + "loss": 2.6655, + "nll_loss": 0.6657446026802063, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001375634572468698, + "rewards/margins": 0.16751326620578766, + "rewards/rejected": -0.1688888967037201, + "step": 4139 + }, + { + "epoch": 2.863070539419087, + "grad_norm": 10.296049118041992, + "learning_rate": 3.9649608114338405e-05, + "log_odds_chosen": 8.372491836547852, + "log_odds_ratio": -0.00872521847486496, + "logits/chosen": -0.6874377131462097, + "logits/rejected": -0.6679731607437134, + "logps/chosen": -0.01878192648291588, + "logps/rejected": -2.1108551025390625, + "loss": 1.8428, + "nll_loss": 0.45982909202575684, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001878192531876266, + "rewards/margins": 0.20920731127262115, + "rewards/rejected": -0.2110855132341385, + "step": 4140 + }, + { + "epoch": 2.863762102351314, + "grad_norm": 8.425458908081055, + "learning_rate": 3.964576609804826e-05, + "log_odds_chosen": 5.280470371246338, + "log_odds_ratio": -0.17009109258651733, + "logits/chosen": -0.6054092645645142, + "logits/rejected": -0.6110110282897949, + "logps/chosen": -0.03894772753119469, + "logps/rejected": -0.6882196664810181, + "loss": 2.2599, + "nll_loss": 0.5479753017425537, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003894772846251726, + "rewards/margins": 0.06492719054222107, + "rewards/rejected": -0.06882195919752121, + "step": 4141 + }, + { + "epoch": 2.864453665283541, + "grad_norm": 8.525484085083008, + "learning_rate": 3.964192408175811e-05, + "log_odds_chosen": 7.187023639678955, + "log_odds_ratio": -0.005043178331106901, + "logits/chosen": -0.5835676193237305, + "logits/rejected": -0.6493792533874512, + "logps/chosen": -0.009211929515004158, + "logps/rejected": -1.476091742515564, + "loss": 1.7105, + "nll_loss": 0.4271281957626343, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009211929282173514, + "rewards/margins": 0.14668798446655273, + "rewards/rejected": -0.1476091593503952, + "step": 4142 + }, + { + "epoch": 2.8651452282157677, + "grad_norm": 10.203916549682617, + "learning_rate": 3.9638082065467956e-05, + "log_odds_chosen": 7.878159046173096, + "log_odds_ratio": -0.032731618732213974, + "logits/chosen": -0.5193063020706177, + "logits/rejected": -0.5699669122695923, + "logps/chosen": -0.013802244327962399, + "logps/rejected": -1.6927944421768188, + "loss": 1.8504, + "nll_loss": 0.45931798219680786, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013802244793623686, + "rewards/margins": 0.1678992211818695, + "rewards/rejected": -0.16927944123744965, + "step": 4143 + }, + { + "epoch": 2.8658367911479945, + "grad_norm": 10.561854362487793, + "learning_rate": 3.9634240049177815e-05, + "log_odds_chosen": 8.101981163024902, + "log_odds_ratio": -0.049096763134002686, + "logits/chosen": -0.5377451777458191, + "logits/rejected": -0.5618083477020264, + "logps/chosen": -0.011895643547177315, + "logps/rejected": -1.4573811292648315, + "loss": 2.1364, + "nll_loss": 0.5291892290115356, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011895645875483751, + "rewards/margins": 0.14454853534698486, + "rewards/rejected": -0.14573809504508972, + "step": 4144 + }, + { + "epoch": 2.8665283540802213, + "grad_norm": 16.13058853149414, + "learning_rate": 3.963039803288766e-05, + "log_odds_chosen": 8.42239761352539, + "log_odds_ratio": -0.0038425899110734463, + "logits/chosen": -0.7643156051635742, + "logits/rejected": -0.8458773493766785, + "logps/chosen": -0.0014390680007636547, + "logps/rejected": -1.2367219924926758, + "loss": 2.3132, + "nll_loss": 0.5779082775115967, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014390680007636547, + "rewards/margins": 0.12352830171585083, + "rewards/rejected": -0.12367220222949982, + "step": 4145 + }, + { + "epoch": 2.867219917012448, + "grad_norm": 11.940716743469238, + "learning_rate": 3.962655601659751e-05, + "log_odds_chosen": 7.8669891357421875, + "log_odds_ratio": -0.0037622463423758745, + "logits/chosen": -0.45503783226013184, + "logits/rejected": -0.5161993503570557, + "logps/chosen": -0.0027905493043363094, + "logps/rejected": -1.4165853261947632, + "loss": 2.5674, + "nll_loss": 0.6414811611175537, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002790549478959292, + "rewards/margins": 0.1413794755935669, + "rewards/rejected": -0.14165852963924408, + "step": 4146 + }, + { + "epoch": 2.867911479944675, + "grad_norm": 11.2161865234375, + "learning_rate": 3.9622714000307365e-05, + "log_odds_chosen": 8.637145042419434, + "log_odds_ratio": -0.0002987972693517804, + "logits/chosen": -0.39274901151657104, + "logits/rejected": -0.3932146430015564, + "logps/chosen": -0.0006174084264785051, + "logps/rejected": -1.0560269355773926, + "loss": 2.1117, + "nll_loss": 0.5278958082199097, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.174084410304204e-05, + "rewards/margins": 0.10554095357656479, + "rewards/rejected": -0.1056026890873909, + "step": 4147 + }, + { + "epoch": 2.868603042876902, + "grad_norm": 9.172666549682617, + "learning_rate": 3.961887198401721e-05, + "log_odds_chosen": 7.592833042144775, + "log_odds_ratio": -0.12618932127952576, + "logits/chosen": -0.5942907333374023, + "logits/rejected": -0.507341742515564, + "logps/chosen": -0.021718140691518784, + "logps/rejected": -1.5091440677642822, + "loss": 2.3169, + "nll_loss": 0.5666061043739319, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0021718142088502645, + "rewards/margins": 0.14874258637428284, + "rewards/rejected": -0.15091440081596375, + "step": 4148 + }, + { + "epoch": 2.8692946058091287, + "grad_norm": 8.65243911743164, + "learning_rate": 3.9615029967727064e-05, + "log_odds_chosen": 9.024596214294434, + "log_odds_ratio": -0.0005055609508417547, + "logits/chosen": -0.7445271015167236, + "logits/rejected": -0.7984901666641235, + "logps/chosen": -0.0023893089964985847, + "logps/rejected": -1.6262643337249756, + "loss": 2.3654, + "nll_loss": 0.5912907719612122, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00023893089382909238, + "rewards/margins": 0.16238752007484436, + "rewards/rejected": -0.1626264452934265, + "step": 4149 + }, + { + "epoch": 2.8699861687413555, + "grad_norm": 6.936448574066162, + "learning_rate": 3.9611187951436916e-05, + "log_odds_chosen": 8.660881996154785, + "log_odds_ratio": -0.003154081990942359, + "logits/chosen": -0.3486626446247101, + "logits/rejected": -0.5041624307632446, + "logps/chosen": -0.008399656973779202, + "logps/rejected": -1.586033582687378, + "loss": 1.7391, + "nll_loss": 0.43445295095443726, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008399657090194523, + "rewards/margins": 0.15776340663433075, + "rewards/rejected": -0.15860337018966675, + "step": 4150 + }, + { + "epoch": 2.8706777316735823, + "grad_norm": 9.0235595703125, + "learning_rate": 3.960734593514677e-05, + "log_odds_chosen": 8.743791580200195, + "log_odds_ratio": -0.016513537615537643, + "logits/chosen": -0.32983332872390747, + "logits/rejected": -0.3408893346786499, + "logps/chosen": -0.007986058481037617, + "logps/rejected": -1.2895114421844482, + "loss": 1.7975, + "nll_loss": 0.44772592186927795, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007986059063114226, + "rewards/margins": 0.128152534365654, + "rewards/rejected": -0.12895113229751587, + "step": 4151 + }, + { + "epoch": 2.871369294605809, + "grad_norm": 7.92490291595459, + "learning_rate": 3.9603503918856614e-05, + "log_odds_chosen": 7.174158096313477, + "log_odds_ratio": -0.10260120034217834, + "logits/chosen": -0.7217217087745667, + "logits/rejected": -0.7370195984840393, + "logps/chosen": -0.03901343792676926, + "logps/rejected": -1.1967353820800781, + "loss": 2.2984, + "nll_loss": 0.5643276572227478, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0039013437926769257, + "rewards/margins": 0.11577218770980835, + "rewards/rejected": -0.11967353522777557, + "step": 4152 + }, + { + "epoch": 2.872060857538036, + "grad_norm": 10.109107971191406, + "learning_rate": 3.959966190256647e-05, + "log_odds_chosen": 8.623994827270508, + "log_odds_ratio": -0.0028305151499807835, + "logits/chosen": -0.376717209815979, + "logits/rejected": -0.4848959743976593, + "logps/chosen": -0.01420350931584835, + "logps/rejected": -1.7301173210144043, + "loss": 2.1019, + "nll_loss": 0.5252029895782471, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014203509781509638, + "rewards/margins": 0.1715914011001587, + "rewards/rejected": -0.17301173508167267, + "step": 4153 + }, + { + "epoch": 2.872752420470263, + "grad_norm": 24.528562545776367, + "learning_rate": 3.959581988627632e-05, + "log_odds_chosen": 5.912657737731934, + "log_odds_ratio": -0.3544897437095642, + "logits/chosen": -0.593536376953125, + "logits/rejected": -0.5855855941772461, + "logps/chosen": -0.03977097570896149, + "logps/rejected": -1.3248205184936523, + "loss": 2.729, + "nll_loss": 0.6468054056167603, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003977097105234861, + "rewards/margins": 0.1285049468278885, + "rewards/rejected": -0.13248205184936523, + "step": 4154 + }, + { + "epoch": 2.8734439834024896, + "grad_norm": 3.737313747406006, + "learning_rate": 3.959197786998617e-05, + "log_odds_chosen": 7.370685577392578, + "log_odds_ratio": -0.003854503622278571, + "logits/chosen": -0.1420593559741974, + "logits/rejected": -0.14798593521118164, + "logps/chosen": -0.029535293579101562, + "logps/rejected": -1.2239516973495483, + "loss": 1.6555, + "nll_loss": 0.4134930968284607, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0029535293579101562, + "rewards/margins": 0.11944163590669632, + "rewards/rejected": -0.12239515781402588, + "step": 4155 + }, + { + "epoch": 2.8741355463347165, + "grad_norm": 12.146896362304688, + "learning_rate": 3.9588135853696024e-05, + "log_odds_chosen": 8.372788429260254, + "log_odds_ratio": -0.0069664292968809605, + "logits/chosen": -0.7387031316757202, + "logits/rejected": -0.8111766576766968, + "logps/chosen": -0.03236401453614235, + "logps/rejected": -1.8445135354995728, + "loss": 3.1845, + "nll_loss": 0.7954254150390625, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003236401593312621, + "rewards/margins": 0.18121495842933655, + "rewards/rejected": -0.18445135653018951, + "step": 4156 + }, + { + "epoch": 2.8748271092669433, + "grad_norm": 5.84153938293457, + "learning_rate": 3.958429383740587e-05, + "log_odds_chosen": 8.153403282165527, + "log_odds_ratio": -0.006102471146732569, + "logits/chosen": -0.6401076316833496, + "logits/rejected": -0.6655992865562439, + "logps/chosen": -0.02678937464952469, + "logps/rejected": -1.4880584478378296, + "loss": 1.7642, + "nll_loss": 0.440429151058197, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0026789375115185976, + "rewards/margins": 0.1461269110441208, + "rewards/rejected": -0.14880585670471191, + "step": 4157 + }, + { + "epoch": 2.87551867219917, + "grad_norm": 12.111544609069824, + "learning_rate": 3.958045182111572e-05, + "log_odds_chosen": 8.821361541748047, + "log_odds_ratio": -0.018130799755454063, + "logits/chosen": -0.8116099834442139, + "logits/rejected": -0.8167247772216797, + "logps/chosen": -0.01426270417869091, + "logps/rejected": -1.8682608604431152, + "loss": 2.8337, + "nll_loss": 0.7066094279289246, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014262704644352198, + "rewards/margins": 0.18539981544017792, + "rewards/rejected": -0.18682609498500824, + "step": 4158 + }, + { + "epoch": 2.876210235131397, + "grad_norm": 10.255803108215332, + "learning_rate": 3.9576609804825574e-05, + "log_odds_chosen": 8.658646583557129, + "log_odds_ratio": -0.0022335494868457317, + "logits/chosen": -0.34910067915916443, + "logits/rejected": -0.41302329301834106, + "logps/chosen": -0.02276512421667576, + "logps/rejected": -1.6416635513305664, + "loss": 2.4113, + "nll_loss": 0.6026020646095276, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002276512561365962, + "rewards/margins": 0.16188983619213104, + "rewards/rejected": -0.16416634619235992, + "step": 4159 + }, + { + "epoch": 2.876901798063624, + "grad_norm": 11.113587379455566, + "learning_rate": 3.957276778853543e-05, + "log_odds_chosen": 7.869854927062988, + "log_odds_ratio": -0.012342148460447788, + "logits/chosen": -0.27500343322753906, + "logits/rejected": -0.31267884373664856, + "logps/chosen": -0.0049442751333117485, + "logps/rejected": -0.9284360408782959, + "loss": 2.7271, + "nll_loss": 0.6805315017700195, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004944275133311749, + "rewards/margins": 0.09234918653964996, + "rewards/rejected": -0.09284360706806183, + "step": 4160 + }, + { + "epoch": 2.8775933609958506, + "grad_norm": 6.368313789367676, + "learning_rate": 3.956892577224527e-05, + "log_odds_chosen": 8.16576099395752, + "log_odds_ratio": -0.0021114160772413015, + "logits/chosen": -0.646938681602478, + "logits/rejected": -0.7232961654663086, + "logps/chosen": -0.007677272893488407, + "logps/rejected": -1.0247087478637695, + "loss": 1.4459, + "nll_loss": 0.36126214265823364, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007677272660657763, + "rewards/margins": 0.10170315951108932, + "rewards/rejected": -0.10247088968753815, + "step": 4161 + }, + { + "epoch": 2.8782849239280774, + "grad_norm": 8.01225757598877, + "learning_rate": 3.956508375595513e-05, + "log_odds_chosen": 8.719640731811523, + "log_odds_ratio": -0.0030138578731566668, + "logits/chosen": -0.22106723487377167, + "logits/rejected": -0.2978143095970154, + "logps/chosen": -0.014302356168627739, + "logps/rejected": -1.9104068279266357, + "loss": 2.0553, + "nll_loss": 0.5135329961776733, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014302355702966452, + "rewards/margins": 0.18961046636104584, + "rewards/rejected": -0.19104069471359253, + "step": 4162 + }, + { + "epoch": 2.8789764868603043, + "grad_norm": 3.933363199234009, + "learning_rate": 3.956124173966498e-05, + "log_odds_chosen": 6.446390628814697, + "log_odds_ratio": -0.0866515040397644, + "logits/chosen": -0.45056915283203125, + "logits/rejected": -0.44341132044792175, + "logps/chosen": -0.06035517156124115, + "logps/rejected": -2.150413990020752, + "loss": 2.0524, + "nll_loss": 0.5044370293617249, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0060355169698596, + "rewards/margins": 0.20900589227676392, + "rewards/rejected": -0.2150413990020752, + "step": 4163 + }, + { + "epoch": 2.879668049792531, + "grad_norm": 4.023238658905029, + "learning_rate": 3.955739972337483e-05, + "log_odds_chosen": 8.455058097839355, + "log_odds_ratio": -0.021985219791531563, + "logits/chosen": -0.3112573027610779, + "logits/rejected": -0.4217797815799713, + "logps/chosen": -0.0011460219975560904, + "logps/rejected": -1.2003974914550781, + "loss": 1.8707, + "nll_loss": 0.4654797911643982, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011460219684522599, + "rewards/margins": 0.11992514133453369, + "rewards/rejected": -0.12003974616527557, + "step": 4164 + }, + { + "epoch": 2.880359612724758, + "grad_norm": 14.296460151672363, + "learning_rate": 3.955355770708468e-05, + "log_odds_chosen": 7.658043384552002, + "log_odds_ratio": -0.11128035187721252, + "logits/chosen": -0.44805073738098145, + "logits/rejected": -0.5070619583129883, + "logps/chosen": -0.021924814209342003, + "logps/rejected": -1.7103160619735718, + "loss": 1.6055, + "nll_loss": 0.3902527093887329, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0021924814209342003, + "rewards/margins": 0.16883914172649384, + "rewards/rejected": -0.1710316240787506, + "step": 4165 + }, + { + "epoch": 2.8810511756569848, + "grad_norm": 16.067855834960938, + "learning_rate": 3.954971569079453e-05, + "log_odds_chosen": 6.472123622894287, + "log_odds_ratio": -0.07002764195203781, + "logits/chosen": -0.31374144554138184, + "logits/rejected": -0.3764995038509369, + "logps/chosen": -0.010352091863751411, + "logps/rejected": -1.2906413078308105, + "loss": 2.0815, + "nll_loss": 0.5133610963821411, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010352092795073986, + "rewards/margins": 0.12802892923355103, + "rewards/rejected": -0.12906414270401, + "step": 4166 + }, + { + "epoch": 2.8817427385892116, + "grad_norm": 8.798420906066895, + "learning_rate": 3.954587367450438e-05, + "log_odds_chosen": 8.108315467834473, + "log_odds_ratio": -0.0021794813219457865, + "logits/chosen": -0.4807795584201813, + "logits/rejected": -0.5782303214073181, + "logps/chosen": -0.010585448704659939, + "logps/rejected": -1.2855427265167236, + "loss": 2.992, + "nll_loss": 0.7477890849113464, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010585449635982513, + "rewards/margins": 0.12749573588371277, + "rewards/rejected": -0.12855426967144012, + "step": 4167 + }, + { + "epoch": 2.8824343015214384, + "grad_norm": 8.277303695678711, + "learning_rate": 3.954203165821423e-05, + "log_odds_chosen": 8.692684173583984, + "log_odds_ratio": -0.0006725117564201355, + "logits/chosen": -0.350612074136734, + "logits/rejected": -0.4381290674209595, + "logps/chosen": -0.027638660743832588, + "logps/rejected": -1.892930030822754, + "loss": 2.8563, + "nll_loss": 0.7140158414840698, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002763866214081645, + "rewards/margins": 0.18652912974357605, + "rewards/rejected": -0.1892929971218109, + "step": 4168 + }, + { + "epoch": 2.8831258644536653, + "grad_norm": 10.069982528686523, + "learning_rate": 3.9538189641924085e-05, + "log_odds_chosen": 9.616327285766602, + "log_odds_ratio": -0.00030772568425163627, + "logits/chosen": -0.5223462581634521, + "logits/rejected": -0.6427964568138123, + "logps/chosen": -0.0007177912630140781, + "logps/rejected": -1.6643712520599365, + "loss": 2.0674, + "nll_loss": 0.5168122053146362, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.177912630140781e-05, + "rewards/margins": 0.1663653403520584, + "rewards/rejected": -0.16643711924552917, + "step": 4169 + }, + { + "epoch": 2.883817427385892, + "grad_norm": 7.767836093902588, + "learning_rate": 3.953434762563393e-05, + "log_odds_chosen": 6.838160514831543, + "log_odds_ratio": -0.02437964268028736, + "logits/chosen": -0.39230552315711975, + "logits/rejected": -0.443790078163147, + "logps/chosen": -0.028187813237309456, + "logps/rejected": -1.36873197555542, + "loss": 2.5007, + "nll_loss": 0.6227420568466187, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002818781416863203, + "rewards/margins": 0.13405440747737885, + "rewards/rejected": -0.13687318563461304, + "step": 4170 + }, + { + "epoch": 2.884508990318119, + "grad_norm": 13.185744285583496, + "learning_rate": 3.953050560934379e-05, + "log_odds_chosen": 7.265320777893066, + "log_odds_ratio": -0.028976215049624443, + "logits/chosen": -0.5060882568359375, + "logits/rejected": -0.49441277980804443, + "logps/chosen": -0.03849921375513077, + "logps/rejected": -1.089863657951355, + "loss": 2.2308, + "nll_loss": 0.5547950267791748, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0038499212823808193, + "rewards/margins": 0.105136439204216, + "rewards/rejected": -0.10898637026548386, + "step": 4171 + }, + { + "epoch": 2.8852005532503457, + "grad_norm": 12.196954727172852, + "learning_rate": 3.9526663593053636e-05, + "log_odds_chosen": 6.205423355102539, + "log_odds_ratio": -0.13248874247074127, + "logits/chosen": -0.4635886549949646, + "logits/rejected": -0.46544769406318665, + "logps/chosen": -0.026167435571551323, + "logps/rejected": -1.3755590915679932, + "loss": 2.1558, + "nll_loss": 0.5257101655006409, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002616743789985776, + "rewards/margins": 0.13493917882442474, + "rewards/rejected": -0.13755591213703156, + "step": 4172 + }, + { + "epoch": 2.8858921161825726, + "grad_norm": 8.013738632202148, + "learning_rate": 3.952282157676349e-05, + "log_odds_chosen": 7.696534156799316, + "log_odds_ratio": -0.016941042616963387, + "logits/chosen": -1.0045013427734375, + "logits/rejected": -0.9985222816467285, + "logps/chosen": -0.007150155026465654, + "logps/rejected": -1.0973981618881226, + "loss": 3.2758, + "nll_loss": 0.8172488212585449, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007150155142880976, + "rewards/margins": 0.10902479290962219, + "rewards/rejected": -0.10973981767892838, + "step": 4173 + }, + { + "epoch": 2.8865836791147994, + "grad_norm": 8.398833274841309, + "learning_rate": 3.951897956047334e-05, + "log_odds_chosen": 7.615683078765869, + "log_odds_ratio": -0.03679567947983742, + "logits/chosen": -0.08813100308179855, + "logits/rejected": -0.1682872474193573, + "logps/chosen": -0.015082152560353279, + "logps/rejected": -1.5612318515777588, + "loss": 1.8013, + "nll_loss": 0.44665414094924927, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015082152094691992, + "rewards/margins": 0.15461498498916626, + "rewards/rejected": -0.15612319111824036, + "step": 4174 + }, + { + "epoch": 2.8872752420470262, + "grad_norm": 6.936132907867432, + "learning_rate": 3.9515137544183186e-05, + "log_odds_chosen": 8.234755516052246, + "log_odds_ratio": -0.009359374642372131, + "logits/chosen": -0.6276150941848755, + "logits/rejected": -0.6759732365608215, + "logps/chosen": -0.01394907757639885, + "logps/rejected": -1.7503783702850342, + "loss": 2.0957, + "nll_loss": 0.522986114025116, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001394907827489078, + "rewards/margins": 0.17364291846752167, + "rewards/rejected": -0.17503784596920013, + "step": 4175 + }, + { + "epoch": 2.887966804979253, + "grad_norm": 11.726055145263672, + "learning_rate": 3.951129552789304e-05, + "log_odds_chosen": 9.761301040649414, + "log_odds_ratio": -8.028695447137579e-05, + "logits/chosen": -0.7729347348213196, + "logits/rejected": -0.8374239206314087, + "logps/chosen": -0.00031806406332179904, + "logps/rejected": -1.7444686889648438, + "loss": 1.9061, + "nll_loss": 0.4765219986438751, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.180640851496719e-05, + "rewards/margins": 0.17441505193710327, + "rewards/rejected": -0.17444688081741333, + "step": 4176 + }, + { + "epoch": 2.88865836791148, + "grad_norm": 9.028305053710938, + "learning_rate": 3.950745351160289e-05, + "log_odds_chosen": 9.551950454711914, + "log_odds_ratio": -0.048649102449417114, + "logits/chosen": -0.5353203415870667, + "logits/rejected": -0.6251725554466248, + "logps/chosen": -0.011355753988027573, + "logps/rejected": -1.9747955799102783, + "loss": 1.852, + "nll_loss": 0.45813390612602234, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011355754686519504, + "rewards/margins": 0.19634398818016052, + "rewards/rejected": -0.19747956097126007, + "step": 4177 + }, + { + "epoch": 2.8893499308437067, + "grad_norm": 7.553938865661621, + "learning_rate": 3.9503611495312744e-05, + "log_odds_chosen": 8.27835750579834, + "log_odds_ratio": -0.04242272675037384, + "logits/chosen": -0.3490467667579651, + "logits/rejected": -0.36693528294563293, + "logps/chosen": -0.010712453164160252, + "logps/rejected": -1.159693717956543, + "loss": 1.8144, + "nll_loss": 0.4493652880191803, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010712452931329608, + "rewards/margins": 0.11489813029766083, + "rewards/rejected": -0.11596937477588654, + "step": 4178 + }, + { + "epoch": 2.8900414937759336, + "grad_norm": 10.130223274230957, + "learning_rate": 3.949976947902259e-05, + "log_odds_chosen": 6.871800422668457, + "log_odds_ratio": -0.04268191382288933, + "logits/chosen": -0.7141193151473999, + "logits/rejected": -0.7577247619628906, + "logps/chosen": -0.012114400044083595, + "logps/rejected": -1.2614021301269531, + "loss": 2.3988, + "nll_loss": 0.5954397916793823, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012114399578422308, + "rewards/margins": 0.124928779900074, + "rewards/rejected": -0.12614022195339203, + "step": 4179 + }, + { + "epoch": 2.8907330567081604, + "grad_norm": 5.669473648071289, + "learning_rate": 3.949592746273245e-05, + "log_odds_chosen": 7.002610206604004, + "log_odds_ratio": -0.08763153851032257, + "logits/chosen": -0.6747829914093018, + "logits/rejected": -0.6756397485733032, + "logps/chosen": -0.015487837605178356, + "logps/rejected": -0.8023409247398376, + "loss": 2.2792, + "nll_loss": 0.5610270500183105, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.001548783970065415, + "rewards/margins": 0.07868531346321106, + "rewards/rejected": -0.0802340880036354, + "step": 4180 + }, + { + "epoch": 2.891424619640387, + "grad_norm": 7.439328193664551, + "learning_rate": 3.9492085446442294e-05, + "log_odds_chosen": 8.811647415161133, + "log_odds_ratio": -0.0013991171726956964, + "logits/chosen": -0.5426240563392639, + "logits/rejected": -0.5518914461135864, + "logps/chosen": -0.007188364397734404, + "logps/rejected": -1.4096122980117798, + "loss": 2.1917, + "nll_loss": 0.5477972030639648, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007188364979811013, + "rewards/margins": 0.14024239778518677, + "rewards/rejected": -0.14096122980117798, + "step": 4181 + }, + { + "epoch": 2.892116182572614, + "grad_norm": 5.265755653381348, + "learning_rate": 3.948824343015215e-05, + "log_odds_chosen": 7.264721870422363, + "log_odds_ratio": -0.18816621601581573, + "logits/chosen": -0.3916324973106384, + "logits/rejected": -0.34224846959114075, + "logps/chosen": -0.046060629189014435, + "logps/rejected": -1.132361888885498, + "loss": 1.7736, + "nll_loss": 0.42457854747772217, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004606062546372414, + "rewards/margins": 0.10863012075424194, + "rewards/rejected": -0.1132361888885498, + "step": 4182 + }, + { + "epoch": 2.892807745504841, + "grad_norm": 5.767202854156494, + "learning_rate": 3.9484401413862e-05, + "log_odds_chosen": 6.7517900466918945, + "log_odds_ratio": -0.016323775053024292, + "logits/chosen": -0.07235578447580338, + "logits/rejected": -0.05019722133874893, + "logps/chosen": -0.018838627263903618, + "logps/rejected": -1.6113855838775635, + "loss": 1.6765, + "nll_loss": 0.4175013303756714, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018838628893718123, + "rewards/margins": 0.15925469994544983, + "rewards/rejected": -0.16113856434822083, + "step": 4183 + }, + { + "epoch": 2.8934993084370677, + "grad_norm": 13.19206428527832, + "learning_rate": 3.9480559397571845e-05, + "log_odds_chosen": 8.968629837036133, + "log_odds_ratio": -0.00036881750565953553, + "logits/chosen": -0.6369163393974304, + "logits/rejected": -0.7073555588722229, + "logps/chosen": -0.0007098768837749958, + "logps/rejected": -1.733012080192566, + "loss": 2.5317, + "nll_loss": 0.6328850388526917, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.098769856384024e-05, + "rewards/margins": 0.17323023080825806, + "rewards/rejected": -0.17330121994018555, + "step": 4184 + }, + { + "epoch": 2.8941908713692945, + "grad_norm": 9.749066352844238, + "learning_rate": 3.94767173812817e-05, + "log_odds_chosen": 9.5924072265625, + "log_odds_ratio": -0.0001539234654046595, + "logits/chosen": -0.7313152551651001, + "logits/rejected": -0.8595709800720215, + "logps/chosen": -0.0010388323571532965, + "logps/rejected": -1.938551902770996, + "loss": 2.7596, + "nll_loss": 0.6898916959762573, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010388322698418051, + "rewards/margins": 0.19375132024288177, + "rewards/rejected": -0.1938551962375641, + "step": 4185 + }, + { + "epoch": 2.8948824343015214, + "grad_norm": 6.4055328369140625, + "learning_rate": 3.947287536499155e-05, + "log_odds_chosen": 8.286406517028809, + "log_odds_ratio": -0.07344295084476471, + "logits/chosen": -0.599174439907074, + "logits/rejected": -0.5423631072044373, + "logps/chosen": -0.01735123060643673, + "logps/rejected": -1.1026806831359863, + "loss": 1.5879, + "nll_loss": 0.38963472843170166, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017351231072098017, + "rewards/margins": 0.10853295028209686, + "rewards/rejected": -0.11026806384325027, + "step": 4186 + }, + { + "epoch": 2.895573997233748, + "grad_norm": 9.003849983215332, + "learning_rate": 3.94690333487014e-05, + "log_odds_chosen": 8.759421348571777, + "log_odds_ratio": -0.03563835099339485, + "logits/chosen": -1.0180939435958862, + "logits/rejected": -1.0876891613006592, + "logps/chosen": -0.008947193622589111, + "logps/rejected": -1.673654556274414, + "loss": 1.7472, + "nll_loss": 0.4332250952720642, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008947193855419755, + "rewards/margins": 0.1664707213640213, + "rewards/rejected": -0.16736546158790588, + "step": 4187 + }, + { + "epoch": 2.896265560165975, + "grad_norm": 7.504535675048828, + "learning_rate": 3.946519133241125e-05, + "log_odds_chosen": 8.320568084716797, + "log_odds_ratio": -0.05261503532528877, + "logits/chosen": -0.7506527304649353, + "logits/rejected": -0.7245360612869263, + "logps/chosen": -0.016820482909679413, + "logps/rejected": -1.4949345588684082, + "loss": 1.5225, + "nll_loss": 0.37536221742630005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016820483142510056, + "rewards/margins": 0.1478113979101181, + "rewards/rejected": -0.14949345588684082, + "step": 4188 + }, + { + "epoch": 2.896957123098202, + "grad_norm": 10.017531394958496, + "learning_rate": 3.946134931612111e-05, + "log_odds_chosen": 8.923404693603516, + "log_odds_ratio": -0.012731466442346573, + "logits/chosen": -0.9017990827560425, + "logits/rejected": -0.985031008720398, + "logps/chosen": -0.0057351253926754, + "logps/rejected": -1.6369001865386963, + "loss": 1.9505, + "nll_loss": 0.48634442687034607, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005735125159844756, + "rewards/margins": 0.16311649978160858, + "rewards/rejected": -0.1636900156736374, + "step": 4189 + }, + { + "epoch": 2.8976486860304287, + "grad_norm": 11.514911651611328, + "learning_rate": 3.945750729983095e-05, + "log_odds_chosen": 9.583656311035156, + "log_odds_ratio": -0.000768057769164443, + "logits/chosen": -0.44403791427612305, + "logits/rejected": -0.5449143648147583, + "logps/chosen": -0.0007324862526729703, + "logps/rejected": -1.5823893547058105, + "loss": 1.8875, + "nll_loss": 0.4717921316623688, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.32486296328716e-05, + "rewards/margins": 0.15816569328308105, + "rewards/rejected": -0.15823894739151, + "step": 4190 + }, + { + "epoch": 2.8983402489626555, + "grad_norm": 9.892921447753906, + "learning_rate": 3.9453665283540805e-05, + "log_odds_chosen": 10.3226957321167, + "log_odds_ratio": -0.0001017776012304239, + "logits/chosen": -0.7956699132919312, + "logits/rejected": -0.9215668439865112, + "logps/chosen": -0.006516370922327042, + "logps/rejected": -2.688707113265991, + "loss": 2.5422, + "nll_loss": 0.6355412006378174, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006516370922327042, + "rewards/margins": 0.2682190537452698, + "rewards/rejected": -0.2688707113265991, + "step": 4191 + }, + { + "epoch": 2.8990318118948823, + "grad_norm": 3.587132215499878, + "learning_rate": 3.944982326725066e-05, + "log_odds_chosen": 8.61502456665039, + "log_odds_ratio": -0.0015266663394868374, + "logits/chosen": -0.507659375667572, + "logits/rejected": -0.5381101369857788, + "logps/chosen": -0.010759102180600166, + "logps/rejected": -1.9087737798690796, + "loss": 1.8812, + "nll_loss": 0.4701571464538574, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001075910171493888, + "rewards/margins": 0.18980145454406738, + "rewards/rejected": -0.19087737798690796, + "step": 4192 + }, + { + "epoch": 2.899723374827109, + "grad_norm": 9.977949142456055, + "learning_rate": 3.94459812509605e-05, + "log_odds_chosen": 8.210229873657227, + "log_odds_ratio": -0.0020022920798510313, + "logits/chosen": -0.6053897738456726, + "logits/rejected": -0.593431830406189, + "logps/chosen": -0.0019178414950147271, + "logps/rejected": -1.0676766633987427, + "loss": 1.6852, + "nll_loss": 0.42109107971191406, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019178414368070662, + "rewards/margins": 0.1065758764743805, + "rewards/rejected": -0.10676766186952591, + "step": 4193 + }, + { + "epoch": 2.900414937759336, + "grad_norm": 5.745366096496582, + "learning_rate": 3.9442139234670356e-05, + "log_odds_chosen": 8.749312400817871, + "log_odds_ratio": -0.007004758343100548, + "logits/chosen": -0.43974483013153076, + "logits/rejected": -0.4170272648334503, + "logps/chosen": -0.009306280873715878, + "logps/rejected": -2.1502115726470947, + "loss": 2.3188, + "nll_loss": 0.5790024995803833, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000930628040805459, + "rewards/margins": 0.2140905261039734, + "rewards/rejected": -0.21502117812633514, + "step": 4194 + }, + { + "epoch": 2.901106500691563, + "grad_norm": 9.20225715637207, + "learning_rate": 3.943829721838021e-05, + "log_odds_chosen": 8.916379928588867, + "log_odds_ratio": -0.0017394019523635507, + "logits/chosen": -0.3987049162387848, + "logits/rejected": -0.4037243127822876, + "logps/chosen": -0.011702263727784157, + "logps/rejected": -1.6109840869903564, + "loss": 2.0616, + "nll_loss": 0.5152207016944885, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011702264891937375, + "rewards/margins": 0.15992820262908936, + "rewards/rejected": -0.1610984057188034, + "step": 4195 + }, + { + "epoch": 2.9017980636237897, + "grad_norm": 8.414830207824707, + "learning_rate": 3.943445520209006e-05, + "log_odds_chosen": 9.312470436096191, + "log_odds_ratio": -0.0006356225931085646, + "logits/chosen": -0.4111187756061554, + "logits/rejected": -0.49365267157554626, + "logps/chosen": -0.024308985099196434, + "logps/rejected": -1.9406907558441162, + "loss": 2.0854, + "nll_loss": 0.5212797522544861, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024308988358825445, + "rewards/margins": 0.19163817167282104, + "rewards/rejected": -0.19406907260417938, + "step": 4196 + }, + { + "epoch": 2.9024896265560165, + "grad_norm": 6.095809459686279, + "learning_rate": 3.9430613185799906e-05, + "log_odds_chosen": 9.26791763305664, + "log_odds_ratio": -0.00040234148036688566, + "logits/chosen": -0.5236822962760925, + "logits/rejected": -0.48013976216316223, + "logps/chosen": -0.03143753483891487, + "logps/rejected": -2.8108372688293457, + "loss": 1.9503, + "nll_loss": 0.4875357151031494, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0031437536235898733, + "rewards/margins": 0.2779400050640106, + "rewards/rejected": -0.28108376264572144, + "step": 4197 + }, + { + "epoch": 2.9031811894882433, + "grad_norm": 12.865019798278809, + "learning_rate": 3.9426771169509766e-05, + "log_odds_chosen": 7.660120964050293, + "log_odds_ratio": -0.009351848624646664, + "logits/chosen": -0.6887496709823608, + "logits/rejected": -0.7265763282775879, + "logps/chosen": -0.02220279350876808, + "logps/rejected": -2.0053799152374268, + "loss": 2.4326, + "nll_loss": 0.6072081923484802, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0022202793043106794, + "rewards/margins": 0.1983177214860916, + "rewards/rejected": -0.2005380094051361, + "step": 4198 + }, + { + "epoch": 2.90387275242047, + "grad_norm": 5.224964141845703, + "learning_rate": 3.942292915321961e-05, + "log_odds_chosen": 6.3237409591674805, + "log_odds_ratio": -0.14260509610176086, + "logits/chosen": -0.5718114376068115, + "logits/rejected": -0.5917366743087769, + "logps/chosen": -0.03757050260901451, + "logps/rejected": -1.6181234121322632, + "loss": 2.0782, + "nll_loss": 0.5052976608276367, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003757050260901451, + "rewards/margins": 0.15805530548095703, + "rewards/rejected": -0.16181235015392303, + "step": 4199 + }, + { + "epoch": 2.904564315352697, + "grad_norm": 7.983197212219238, + "learning_rate": 3.9419087136929464e-05, + "log_odds_chosen": 8.89438247680664, + "log_odds_ratio": -0.0003911318490281701, + "logits/chosen": -0.39353638887405396, + "logits/rejected": -0.46018946170806885, + "logps/chosen": -0.0006360848783515394, + "logps/rejected": -1.3578107357025146, + "loss": 2.3677, + "nll_loss": 0.5918948650360107, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.360848783515394e-05, + "rewards/margins": 0.1357174664735794, + "rewards/rejected": -0.13578107953071594, + "step": 4200 + }, + { + "epoch": 2.905255878284924, + "grad_norm": 5.677807331085205, + "learning_rate": 3.9415245120639316e-05, + "log_odds_chosen": 7.875686168670654, + "log_odds_ratio": -0.05565962940454483, + "logits/chosen": -0.5313123464584351, + "logits/rejected": -0.5201388597488403, + "logps/chosen": -0.03696315363049507, + "logps/rejected": -1.6507844924926758, + "loss": 1.4112, + "nll_loss": 0.3472402095794678, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0036963154561817646, + "rewards/margins": 0.16138213872909546, + "rewards/rejected": -0.16507846117019653, + "step": 4201 + }, + { + "epoch": 2.9059474412171507, + "grad_norm": 4.537448406219482, + "learning_rate": 3.941140310434916e-05, + "log_odds_chosen": 8.374617576599121, + "log_odds_ratio": -0.0009444555034860969, + "logits/chosen": -0.8532025218009949, + "logits/rejected": -0.8434891104698181, + "logps/chosen": -0.006630830001085997, + "logps/rejected": -2.0214223861694336, + "loss": 1.8496, + "nll_loss": 0.46230918169021606, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006630829884670675, + "rewards/margins": 0.20147916674613953, + "rewards/rejected": -0.20214225351810455, + "step": 4202 + }, + { + "epoch": 2.9066390041493775, + "grad_norm": 7.785838603973389, + "learning_rate": 3.9407561088059014e-05, + "log_odds_chosen": 8.70776081085205, + "log_odds_ratio": -0.0008020623936317861, + "logits/chosen": -0.733709454536438, + "logits/rejected": -0.7174305319786072, + "logps/chosen": -0.016540687531232834, + "logps/rejected": -2.144221305847168, + "loss": 2.8436, + "nll_loss": 0.710813581943512, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016540689393877983, + "rewards/margins": 0.2127680480480194, + "rewards/rejected": -0.21442212164402008, + "step": 4203 + }, + { + "epoch": 2.9073305670816043, + "grad_norm": 56.542301177978516, + "learning_rate": 3.940371907176887e-05, + "log_odds_chosen": 4.422608375549316, + "log_odds_ratio": -0.39145660400390625, + "logits/chosen": -0.3432076871395111, + "logits/rejected": -0.4004635810852051, + "logps/chosen": -0.07698570191860199, + "logps/rejected": -1.0786305665969849, + "loss": 2.1216, + "nll_loss": 0.49124303460121155, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007698570378124714, + "rewards/margins": 0.10016448050737381, + "rewards/rejected": -0.10786305367946625, + "step": 4204 + }, + { + "epoch": 2.908022130013831, + "grad_norm": 10.854572296142578, + "learning_rate": 3.939987705547872e-05, + "log_odds_chosen": 7.742916584014893, + "log_odds_ratio": -0.07084621489048004, + "logits/chosen": -0.7000867128372192, + "logits/rejected": -0.7924137711524963, + "logps/chosen": -0.009045793674886227, + "logps/rejected": -1.1441210508346558, + "loss": 2.219, + "nll_loss": 0.5476707816123962, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009045794140547514, + "rewards/margins": 0.11350752413272858, + "rewards/rejected": -0.1144120991230011, + "step": 4205 + }, + { + "epoch": 2.908713692946058, + "grad_norm": 14.714045524597168, + "learning_rate": 3.9396035039188565e-05, + "log_odds_chosen": 7.317990779876709, + "log_odds_ratio": -0.3476130962371826, + "logits/chosen": -0.6655561923980713, + "logits/rejected": -0.6966454982757568, + "logps/chosen": -0.021608801558613777, + "logps/rejected": -0.984076976776123, + "loss": 2.5814, + "nll_loss": 0.6105821132659912, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0021608800161629915, + "rewards/margins": 0.09624682366847992, + "rewards/rejected": -0.09840770065784454, + "step": 4206 + }, + { + "epoch": 2.909405255878285, + "grad_norm": 10.995607376098633, + "learning_rate": 3.9392193022898424e-05, + "log_odds_chosen": 7.899350166320801, + "log_odds_ratio": -0.21427175402641296, + "logits/chosen": -0.6187517642974854, + "logits/rejected": -0.6937867403030396, + "logps/chosen": -0.10036478191614151, + "logps/rejected": -1.7736245393753052, + "loss": 1.8907, + "nll_loss": 0.4512489140033722, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.010036477819085121, + "rewards/margins": 0.1673259735107422, + "rewards/rejected": -0.17736247181892395, + "step": 4207 + }, + { + "epoch": 2.9100968188105116, + "grad_norm": 7.072151184082031, + "learning_rate": 3.938835100660827e-05, + "log_odds_chosen": 9.21097469329834, + "log_odds_ratio": -0.053593918681144714, + "logits/chosen": -0.14670710265636444, + "logits/rejected": -0.2045065462589264, + "logps/chosen": -0.029685556888580322, + "logps/rejected": -1.4088356494903564, + "loss": 1.424, + "nll_loss": 0.3506321609020233, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002968555549159646, + "rewards/margins": 0.1379150152206421, + "rewards/rejected": -0.14088356494903564, + "step": 4208 + }, + { + "epoch": 2.9107883817427385, + "grad_norm": 6.414186000823975, + "learning_rate": 3.938450899031812e-05, + "log_odds_chosen": 9.366171836853027, + "log_odds_ratio": -0.00038891323492862284, + "logits/chosen": -0.3959054946899414, + "logits/rejected": -0.4083250164985657, + "logps/chosen": -0.009610005654394627, + "logps/rejected": -1.7705631256103516, + "loss": 1.6591, + "nll_loss": 0.41473865509033203, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009610005654394627, + "rewards/margins": 0.17609530687332153, + "rewards/rejected": -0.17705631256103516, + "step": 4209 + }, + { + "epoch": 2.9114799446749653, + "grad_norm": 10.280040740966797, + "learning_rate": 3.9380666974027975e-05, + "log_odds_chosen": 9.308536529541016, + "log_odds_ratio": -0.00011768620606744662, + "logits/chosen": -0.6421458721160889, + "logits/rejected": -0.665635347366333, + "logps/chosen": -0.0003707177529577166, + "logps/rejected": -1.6169333457946777, + "loss": 2.0478, + "nll_loss": 0.5119322538375854, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.707177529577166e-05, + "rewards/margins": 0.16165626049041748, + "rewards/rejected": -0.16169333457946777, + "step": 4210 + }, + { + "epoch": 2.912171507607192, + "grad_norm": 12.741182327270508, + "learning_rate": 3.937682495773782e-05, + "log_odds_chosen": 8.938257217407227, + "log_odds_ratio": -0.41519448161125183, + "logits/chosen": -0.4574255347251892, + "logits/rejected": -0.5206787586212158, + "logps/chosen": -0.06497032195329666, + "logps/rejected": -2.0512773990631104, + "loss": 2.2847, + "nll_loss": 0.529647707939148, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006497031543403864, + "rewards/margins": 0.19863072037696838, + "rewards/rejected": -0.2051277458667755, + "step": 4211 + }, + { + "epoch": 2.912863070539419, + "grad_norm": 11.842411994934082, + "learning_rate": 3.937298294144767e-05, + "log_odds_chosen": 8.423245429992676, + "log_odds_ratio": -0.0006916861748322845, + "logits/chosen": -0.7953372001647949, + "logits/rejected": -0.8773461580276489, + "logps/chosen": -0.001747145433910191, + "logps/rejected": -1.8659417629241943, + "loss": 3.0268, + "nll_loss": 0.7566385269165039, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001747145433910191, + "rewards/margins": 0.18641947209835052, + "rewards/rejected": -0.1865941882133484, + "step": 4212 + }, + { + "epoch": 2.913554633471646, + "grad_norm": 8.623528480529785, + "learning_rate": 3.9369140925157525e-05, + "log_odds_chosen": 9.111515045166016, + "log_odds_ratio": -0.00039377735811285675, + "logits/chosen": -0.8491336703300476, + "logits/rejected": -0.8755612969398499, + "logps/chosen": -0.0004639826947823167, + "logps/rejected": -1.3792146444320679, + "loss": 2.2468, + "nll_loss": 0.5616547465324402, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.63982651126571e-05, + "rewards/margins": 0.13787506520748138, + "rewards/rejected": -0.13792146742343903, + "step": 4213 + }, + { + "epoch": 2.9142461964038726, + "grad_norm": 9.771961212158203, + "learning_rate": 3.936529890886738e-05, + "log_odds_chosen": 9.823007583618164, + "log_odds_ratio": -0.0001347611687378958, + "logits/chosen": -0.7041987776756287, + "logits/rejected": -0.6953439712524414, + "logps/chosen": -0.000278160790912807, + "logps/rejected": -1.5819087028503418, + "loss": 2.2263, + "nll_loss": 0.5565525889396667, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7816076908493415e-05, + "rewards/margins": 0.15816305577754974, + "rewards/rejected": -0.15819087624549866, + "step": 4214 + }, + { + "epoch": 2.9149377593360994, + "grad_norm": 18.160442352294922, + "learning_rate": 3.936145689257722e-05, + "log_odds_chosen": 7.970919609069824, + "log_odds_ratio": -0.017684318125247955, + "logits/chosen": -0.5528253316879272, + "logits/rejected": -0.6049160361289978, + "logps/chosen": -0.02685156650841236, + "logps/rejected": -1.7387304306030273, + "loss": 1.8739, + "nll_loss": 0.46671339869499207, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0026851568836718798, + "rewards/margins": 0.1711878776550293, + "rewards/rejected": -0.17387305200099945, + "step": 4215 + }, + { + "epoch": 2.9156293222683263, + "grad_norm": 10.328432083129883, + "learning_rate": 3.935761487628708e-05, + "log_odds_chosen": 9.84450912475586, + "log_odds_ratio": -8.842186798574403e-05, + "logits/chosen": -0.6626081466674805, + "logits/rejected": -0.7288064956665039, + "logps/chosen": -0.0009883481543511152, + "logps/rejected": -1.6530663967132568, + "loss": 1.9919, + "nll_loss": 0.4979715347290039, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.883481834549457e-05, + "rewards/margins": 0.16520781815052032, + "rewards/rejected": -0.16530665755271912, + "step": 4216 + }, + { + "epoch": 2.916320885200553, + "grad_norm": 4.938757419586182, + "learning_rate": 3.935377285999693e-05, + "log_odds_chosen": 8.558516502380371, + "log_odds_ratio": -0.002902967156842351, + "logits/chosen": -0.6540226936340332, + "logits/rejected": -0.5421640276908875, + "logps/chosen": -0.031563468277454376, + "logps/rejected": -1.5742692947387695, + "loss": 1.5972, + "nll_loss": 0.3990045189857483, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003156346967443824, + "rewards/margins": 0.15427058935165405, + "rewards/rejected": -0.15742693841457367, + "step": 4217 + }, + { + "epoch": 2.91701244813278, + "grad_norm": 8.419347763061523, + "learning_rate": 3.934993084370678e-05, + "log_odds_chosen": 7.854592800140381, + "log_odds_ratio": -0.006661005783826113, + "logits/chosen": -0.7401362061500549, + "logits/rejected": -0.7170186638832092, + "logps/chosen": -0.06734812259674072, + "logps/rejected": -2.068657159805298, + "loss": 2.8914, + "nll_loss": 0.7221934199333191, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006734812632203102, + "rewards/margins": 0.200130894780159, + "rewards/rejected": -0.20686571300029755, + "step": 4218 + }, + { + "epoch": 2.9177040110650068, + "grad_norm": 16.346050262451172, + "learning_rate": 3.934608882741663e-05, + "log_odds_chosen": 5.416713714599609, + "log_odds_ratio": -0.6428422331809998, + "logits/chosen": -0.48766276240348816, + "logits/rejected": -0.5208728313446045, + "logps/chosen": -0.110658660531044, + "logps/rejected": -1.206841230392456, + "loss": 2.2163, + "nll_loss": 0.489782452583313, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01106586679816246, + "rewards/margins": 0.10961826145648956, + "rewards/rejected": -0.12068411707878113, + "step": 4219 + }, + { + "epoch": 2.9183955739972336, + "grad_norm": 5.745017051696777, + "learning_rate": 3.934224681112648e-05, + "log_odds_chosen": 6.683662414550781, + "log_odds_ratio": -0.08422426879405975, + "logits/chosen": -0.37878748774528503, + "logits/rejected": -0.42978018522262573, + "logps/chosen": -0.018797200173139572, + "logps/rejected": -1.3064601421356201, + "loss": 1.8086, + "nll_loss": 0.44373905658721924, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018797202501446009, + "rewards/margins": 0.12876629829406738, + "rewards/rejected": -0.1306460201740265, + "step": 4220 + }, + { + "epoch": 2.9190871369294604, + "grad_norm": 10.25680160522461, + "learning_rate": 3.933840479483633e-05, + "log_odds_chosen": 9.171581268310547, + "log_odds_ratio": -0.0021570641547441483, + "logits/chosen": -0.6555256843566895, + "logits/rejected": -0.6951804161071777, + "logps/chosen": -0.003925275523215532, + "logps/rejected": -1.6789007186889648, + "loss": 1.3798, + "nll_loss": 0.34473517537117004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00039252755232155323, + "rewards/margins": 0.16749754548072815, + "rewards/rejected": -0.16789008677005768, + "step": 4221 + }, + { + "epoch": 2.9197786998616873, + "grad_norm": 15.780588150024414, + "learning_rate": 3.9334562778546184e-05, + "log_odds_chosen": 5.6302595138549805, + "log_odds_ratio": -0.6053066253662109, + "logits/chosen": -0.7769954800605774, + "logits/rejected": -0.816230058670044, + "logps/chosen": -0.09104986488819122, + "logps/rejected": -1.325893759727478, + "loss": 2.6182, + "nll_loss": 0.5940166115760803, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009104986675083637, + "rewards/margins": 0.12348438799381256, + "rewards/rejected": -0.13258937001228333, + "step": 4222 + }, + { + "epoch": 2.920470262793914, + "grad_norm": 11.950429916381836, + "learning_rate": 3.9330720762256036e-05, + "log_odds_chosen": 9.111445426940918, + "log_odds_ratio": -0.007683016825467348, + "logits/chosen": -0.725569486618042, + "logits/rejected": -0.8772428035736084, + "logps/chosen": -0.007877436466515064, + "logps/rejected": -1.8429392576217651, + "loss": 1.9335, + "nll_loss": 0.4826072156429291, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007877436000853777, + "rewards/margins": 0.18350617587566376, + "rewards/rejected": -0.1842939257621765, + "step": 4223 + }, + { + "epoch": 2.921161825726141, + "grad_norm": 9.985198020935059, + "learning_rate": 3.932687874596588e-05, + "log_odds_chosen": 9.653959274291992, + "log_odds_ratio": -0.00018180804909206927, + "logits/chosen": -0.528473973274231, + "logits/rejected": -0.6489172577857971, + "logps/chosen": -0.0006757283117622137, + "logps/rejected": -2.1277120113372803, + "loss": 1.8746, + "nll_loss": 0.46863406896591187, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.757282972102985e-05, + "rewards/margins": 0.2127036303281784, + "rewards/rejected": -0.2127711921930313, + "step": 4224 + }, + { + "epoch": 2.9218533886583677, + "grad_norm": 5.75289249420166, + "learning_rate": 3.932303672967574e-05, + "log_odds_chosen": 8.337453842163086, + "log_odds_ratio": -0.002024096203967929, + "logits/chosen": -0.5319167375564575, + "logits/rejected": -0.6447017192840576, + "logps/chosen": -0.0009236917831003666, + "logps/rejected": -1.2165443897247314, + "loss": 1.9135, + "nll_loss": 0.47816094756126404, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.236918413080275e-05, + "rewards/margins": 0.12156207114458084, + "rewards/rejected": -0.1216544359922409, + "step": 4225 + }, + { + "epoch": 2.922544951590595, + "grad_norm": 11.359525680541992, + "learning_rate": 3.9319194713385587e-05, + "log_odds_chosen": 6.562146186828613, + "log_odds_ratio": -0.07711490243673325, + "logits/chosen": -0.5829511880874634, + "logits/rejected": -0.6149609088897705, + "logps/chosen": -0.05082641541957855, + "logps/rejected": -1.6802895069122314, + "loss": 2.6736, + "nll_loss": 0.660689651966095, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005082641262561083, + "rewards/margins": 0.16294631361961365, + "rewards/rejected": -0.16802895069122314, + "step": 4226 + }, + { + "epoch": 2.923236514522822, + "grad_norm": 7.414312839508057, + "learning_rate": 3.931535269709544e-05, + "log_odds_chosen": 8.072091102600098, + "log_odds_ratio": -0.0029985117726027966, + "logits/chosen": -0.7848948240280151, + "logits/rejected": -0.8188717365264893, + "logps/chosen": -0.03268032893538475, + "logps/rejected": -1.8890024423599243, + "loss": 2.1037, + "nll_loss": 0.5256178975105286, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0032680328004062176, + "rewards/margins": 0.18563222885131836, + "rewards/rejected": -0.18890026211738586, + "step": 4227 + }, + { + "epoch": 2.9239280774550487, + "grad_norm": 6.1554059982299805, + "learning_rate": 3.9311510680805285e-05, + "log_odds_chosen": 8.72746753692627, + "log_odds_ratio": -0.00024264020612463355, + "logits/chosen": -0.5359256267547607, + "logits/rejected": -0.5526854991912842, + "logps/chosen": -0.0005240375176072121, + "logps/rejected": -1.1332951784133911, + "loss": 2.1296, + "nll_loss": 0.532380223274231, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.240375321591273e-05, + "rewards/margins": 0.1132771223783493, + "rewards/rejected": -0.11332952231168747, + "step": 4228 + }, + { + "epoch": 2.9246196403872755, + "grad_norm": 10.648176193237305, + "learning_rate": 3.930766866451514e-05, + "log_odds_chosen": 7.886308670043945, + "log_odds_ratio": -0.014126875437796116, + "logits/chosen": -0.7142013311386108, + "logits/rejected": -0.8259632587432861, + "logps/chosen": -0.005320434924215078, + "logps/rejected": -1.1804341077804565, + "loss": 2.7332, + "nll_loss": 0.6818897724151611, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005320435157045722, + "rewards/margins": 0.11751136183738708, + "rewards/rejected": -0.11804340034723282, + "step": 4229 + }, + { + "epoch": 2.9253112033195023, + "grad_norm": 5.809198379516602, + "learning_rate": 3.930382664822499e-05, + "log_odds_chosen": 8.172040939331055, + "log_odds_ratio": -0.21329273283481598, + "logits/chosen": -0.49062544107437134, + "logits/rejected": -0.5070676207542419, + "logps/chosen": -0.029187794774770737, + "logps/rejected": -1.2543081045150757, + "loss": 2.1047, + "nll_loss": 0.5048477649688721, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0029187791515141726, + "rewards/margins": 0.12251203507184982, + "rewards/rejected": -0.12543080747127533, + "step": 4230 + }, + { + "epoch": 2.926002766251729, + "grad_norm": 9.787761688232422, + "learning_rate": 3.929998463193484e-05, + "log_odds_chosen": 6.455556392669678, + "log_odds_ratio": -0.03728388249874115, + "logits/chosen": -0.3578833341598511, + "logits/rejected": -0.33278632164001465, + "logps/chosen": -0.00679417559877038, + "logps/rejected": -0.7100874185562134, + "loss": 2.3575, + "nll_loss": 0.5856543779373169, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006794175715185702, + "rewards/margins": 0.07032933086156845, + "rewards/rejected": -0.07100874185562134, + "step": 4231 + }, + { + "epoch": 2.926694329183956, + "grad_norm": 11.004345893859863, + "learning_rate": 3.9296142615644694e-05, + "log_odds_chosen": 9.477989196777344, + "log_odds_ratio": -0.00019522027287166566, + "logits/chosen": -0.5522055625915527, + "logits/rejected": -0.5933316946029663, + "logps/chosen": -0.0004704441817011684, + "logps/rejected": -1.6733304262161255, + "loss": 2.1303, + "nll_loss": 0.5325589179992676, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.704441744252108e-05, + "rewards/margins": 0.1672860085964203, + "rewards/rejected": -0.16733305156230927, + "step": 4232 + }, + { + "epoch": 2.927385892116183, + "grad_norm": 9.407308578491211, + "learning_rate": 3.929230059935454e-05, + "log_odds_chosen": 8.922008514404297, + "log_odds_ratio": -0.00025383057072758675, + "logits/chosen": -0.364921510219574, + "logits/rejected": -0.3875230848789215, + "logps/chosen": -0.0007518371567130089, + "logps/rejected": -1.1318978071212769, + "loss": 1.9326, + "nll_loss": 0.4831249713897705, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.518372149206698e-05, + "rewards/margins": 0.1131146103143692, + "rewards/rejected": -0.11318978667259216, + "step": 4233 + }, + { + "epoch": 2.9280774550484097, + "grad_norm": 9.522400856018066, + "learning_rate": 3.928845858306439e-05, + "log_odds_chosen": 6.760089874267578, + "log_odds_ratio": -0.32851678133010864, + "logits/chosen": -0.5342674851417542, + "logits/rejected": -0.6126154661178589, + "logps/chosen": -0.12489331513643265, + "logps/rejected": -1.1863129138946533, + "loss": 1.918, + "nll_loss": 0.44663700461387634, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.01248933281749487, + "rewards/margins": 0.10614196956157684, + "rewards/rejected": -0.11863130331039429, + "step": 4234 + }, + { + "epoch": 2.9287690179806365, + "grad_norm": 8.68045711517334, + "learning_rate": 3.9284616566774245e-05, + "log_odds_chosen": 8.119098663330078, + "log_odds_ratio": -0.0052395109087228775, + "logits/chosen": -0.5035631656646729, + "logits/rejected": -0.5046873092651367, + "logps/chosen": -0.02135084755718708, + "logps/rejected": -1.6839799880981445, + "loss": 2.2956, + "nll_loss": 0.5733876824378967, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021350847091525793, + "rewards/margins": 0.1662629246711731, + "rewards/rejected": -0.16839802265167236, + "step": 4235 + }, + { + "epoch": 2.9294605809128633, + "grad_norm": 9.004694938659668, + "learning_rate": 3.92807745504841e-05, + "log_odds_chosen": 6.27830696105957, + "log_odds_ratio": -0.1884261518716812, + "logits/chosen": -0.676655113697052, + "logits/rejected": -0.7348206043243408, + "logps/chosen": -0.08192940056324005, + "logps/rejected": -1.172129511833191, + "loss": 1.7475, + "nll_loss": 0.41804251074790955, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00819294061511755, + "rewards/margins": 0.10902000963687897, + "rewards/rejected": -0.11721295118331909, + "step": 4236 + }, + { + "epoch": 2.93015214384509, + "grad_norm": 10.645638465881348, + "learning_rate": 3.927693253419394e-05, + "log_odds_chosen": 7.660663604736328, + "log_odds_ratio": -0.0013477486791089177, + "logits/chosen": -0.351400762796402, + "logits/rejected": -0.4446882903575897, + "logps/chosen": -0.0024033382069319487, + "logps/rejected": -1.6316173076629639, + "loss": 2.9281, + "nll_loss": 0.7318964004516602, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000240333829424344, + "rewards/margins": 0.16292139887809753, + "rewards/rejected": -0.1631617248058319, + "step": 4237 + }, + { + "epoch": 2.930843706777317, + "grad_norm": 6.696867942810059, + "learning_rate": 3.92730905179038e-05, + "log_odds_chosen": 8.292326927185059, + "log_odds_ratio": -0.0022583678364753723, + "logits/chosen": -0.19574078917503357, + "logits/rejected": -0.21028833091259003, + "logps/chosen": -0.0025987233966588974, + "logps/rejected": -1.272185206413269, + "loss": 2.0609, + "nll_loss": 0.5149998068809509, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002598723513074219, + "rewards/margins": 0.12695865333080292, + "rewards/rejected": -0.12721852958202362, + "step": 4238 + }, + { + "epoch": 2.931535269709544, + "grad_norm": 7.561919212341309, + "learning_rate": 3.926924850161365e-05, + "log_odds_chosen": 8.425704002380371, + "log_odds_ratio": -0.0013171505415812135, + "logits/chosen": -0.6048797369003296, + "logits/rejected": -0.7474537491798401, + "logps/chosen": -0.0018197052413597703, + "logps/rejected": -1.1950289011001587, + "loss": 2.2343, + "nll_loss": 0.5584410429000854, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018197050667367876, + "rewards/margins": 0.11932092905044556, + "rewards/rejected": -0.11950289458036423, + "step": 4239 + }, + { + "epoch": 2.9322268326417706, + "grad_norm": 7.599555015563965, + "learning_rate": 3.92654064853235e-05, + "log_odds_chosen": 9.207232475280762, + "log_odds_ratio": -0.0002494049840606749, + "logits/chosen": -0.1796451061964035, + "logits/rejected": -0.23438525199890137, + "logps/chosen": -0.0007484787493012846, + "logps/rejected": -1.6299362182617188, + "loss": 1.4964, + "nll_loss": 0.37406790256500244, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.48478778405115e-05, + "rewards/margins": 0.16291877627372742, + "rewards/rejected": -0.1629936248064041, + "step": 4240 + }, + { + "epoch": 2.9329183955739975, + "grad_norm": 11.072807312011719, + "learning_rate": 3.926156446903335e-05, + "log_odds_chosen": 8.597249984741211, + "log_odds_ratio": -0.011972310021519661, + "logits/chosen": -0.3940218985080719, + "logits/rejected": -0.4087577760219574, + "logps/chosen": -0.00673884991556406, + "logps/rejected": -1.875365972518921, + "loss": 2.1652, + "nll_loss": 0.5400927662849426, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000673884991556406, + "rewards/margins": 0.1868627369403839, + "rewards/rejected": -0.18753661215305328, + "step": 4241 + }, + { + "epoch": 2.9336099585062243, + "grad_norm": 6.58119535446167, + "learning_rate": 3.92577224527432e-05, + "log_odds_chosen": 8.13068962097168, + "log_odds_ratio": -0.005946990102529526, + "logits/chosen": -0.3203129768371582, + "logits/rejected": -0.4184744358062744, + "logps/chosen": -0.03520968556404114, + "logps/rejected": -2.0817081928253174, + "loss": 1.4605, + "nll_loss": 0.36452704668045044, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00352096906863153, + "rewards/margins": 0.20464983582496643, + "rewards/rejected": -0.2081708163022995, + "step": 4242 + }, + { + "epoch": 2.934301521438451, + "grad_norm": 12.412485122680664, + "learning_rate": 3.925388043645305e-05, + "log_odds_chosen": 7.7870073318481445, + "log_odds_ratio": -0.0017178517300635576, + "logits/chosen": -0.6953123211860657, + "logits/rejected": -0.7932695150375366, + "logps/chosen": -0.002230787882581353, + "logps/rejected": -1.3735847473144531, + "loss": 2.1767, + "nll_loss": 0.5439935922622681, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00022307877952698618, + "rewards/margins": 0.13713541626930237, + "rewards/rejected": -0.13735848665237427, + "step": 4243 + }, + { + "epoch": 2.934993084370678, + "grad_norm": 7.041884422302246, + "learning_rate": 3.9250038420162903e-05, + "log_odds_chosen": 5.54819393157959, + "log_odds_ratio": -0.09255164861679077, + "logits/chosen": -0.25514480471611023, + "logits/rejected": -0.3109058141708374, + "logps/chosen": -0.0339653380215168, + "logps/rejected": -0.9779253005981445, + "loss": 2.4905, + "nll_loss": 0.6133647561073303, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0033965338952839375, + "rewards/margins": 0.09439600259065628, + "rewards/rejected": -0.09779253602027893, + "step": 4244 + }, + { + "epoch": 2.935684647302905, + "grad_norm": 14.048311233520508, + "learning_rate": 3.9246196403872756e-05, + "log_odds_chosen": 8.181252479553223, + "log_odds_ratio": -0.033230848610401154, + "logits/chosen": -0.5824018716812134, + "logits/rejected": -0.6378239989280701, + "logps/chosen": -0.014942415058612823, + "logps/rejected": -2.086918354034424, + "loss": 3.1938, + "nll_loss": 0.7951152920722961, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014942414127290249, + "rewards/margins": 0.20719760656356812, + "rewards/rejected": -0.20869183540344238, + "step": 4245 + }, + { + "epoch": 2.9363762102351316, + "grad_norm": 8.99216079711914, + "learning_rate": 3.92423543875826e-05, + "log_odds_chosen": 8.586301803588867, + "log_odds_ratio": -0.004110632464289665, + "logits/chosen": -0.8050059080123901, + "logits/rejected": -0.8754081130027771, + "logps/chosen": -0.0025968970730900764, + "logps/rejected": -1.4557313919067383, + "loss": 2.9999, + "nll_loss": 0.7495602965354919, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002596897247713059, + "rewards/margins": 0.14531344175338745, + "rewards/rejected": -0.14557313919067383, + "step": 4246 + }, + { + "epoch": 2.9370677731673585, + "grad_norm": 11.729406356811523, + "learning_rate": 3.923851237129246e-05, + "log_odds_chosen": 9.105123519897461, + "log_odds_ratio": -0.012979497201740742, + "logits/chosen": -0.5769229531288147, + "logits/rejected": -0.6147894859313965, + "logps/chosen": -0.07719717919826508, + "logps/rejected": -2.1323089599609375, + "loss": 1.7843, + "nll_loss": 0.44478702545166016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007719717919826508, + "rewards/margins": 0.2055111825466156, + "rewards/rejected": -0.2132309079170227, + "step": 4247 + }, + { + "epoch": 2.9377593360995853, + "grad_norm": 9.502190589904785, + "learning_rate": 3.9234670355002306e-05, + "log_odds_chosen": 7.549291133880615, + "log_odds_ratio": -0.019823361188173294, + "logits/chosen": -1.219071388244629, + "logits/rejected": -1.203476071357727, + "logps/chosen": -0.023129645735025406, + "logps/rejected": -1.278996467590332, + "loss": 1.776, + "nll_loss": 0.4420260190963745, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002312964526936412, + "rewards/margins": 0.12558668851852417, + "rewards/rejected": -0.1278996467590332, + "step": 4248 + }, + { + "epoch": 2.938450899031812, + "grad_norm": 8.954014778137207, + "learning_rate": 3.923082833871216e-05, + "log_odds_chosen": 6.703267574310303, + "log_odds_ratio": -0.28621187806129456, + "logits/chosen": -0.2853702902793884, + "logits/rejected": -0.29533183574676514, + "logps/chosen": -0.051028452813625336, + "logps/rejected": -1.3374086618423462, + "loss": 2.1933, + "nll_loss": 0.5197107791900635, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005102845374494791, + "rewards/margins": 0.12863802909851074, + "rewards/rejected": -0.1337408721446991, + "step": 4249 + }, + { + "epoch": 2.939142461964039, + "grad_norm": 9.448728561401367, + "learning_rate": 3.922698632242201e-05, + "log_odds_chosen": 8.249773979187012, + "log_odds_ratio": -0.00031678471714258194, + "logits/chosen": -0.5632359981536865, + "logits/rejected": -0.698321521282196, + "logps/chosen": -0.0008963820873759687, + "logps/rejected": -1.3571325540542603, + "loss": 2.2072, + "nll_loss": 0.5517725944519043, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.963821164797992e-05, + "rewards/margins": 0.13562361896038055, + "rewards/rejected": -0.13571324944496155, + "step": 4250 + }, + { + "epoch": 2.9398340248962658, + "grad_norm": 10.276646614074707, + "learning_rate": 3.922314430613186e-05, + "log_odds_chosen": 7.62478494644165, + "log_odds_ratio": -0.047770481556653976, + "logits/chosen": -0.7422584891319275, + "logits/rejected": -0.7952272295951843, + "logps/chosen": -0.011581145226955414, + "logps/rejected": -1.3504177331924438, + "loss": 2.5862, + "nll_loss": 0.6417734622955322, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011581146391108632, + "rewards/margins": 0.13388365507125854, + "rewards/rejected": -0.13504177331924438, + "step": 4251 + }, + { + "epoch": 2.9405255878284926, + "grad_norm": 6.6713128089904785, + "learning_rate": 3.921930228984171e-05, + "log_odds_chosen": 7.338813781738281, + "log_odds_ratio": -0.011613673530519009, + "logits/chosen": -0.5105392932891846, + "logits/rejected": -0.5866726040840149, + "logps/chosen": -0.004626925103366375, + "logps/rejected": -0.7748826146125793, + "loss": 1.8144, + "nll_loss": 0.4524500072002411, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004626925219781697, + "rewards/margins": 0.07702556252479553, + "rewards/rejected": -0.07748826593160629, + "step": 4252 + }, + { + "epoch": 2.9412171507607194, + "grad_norm": 7.037342548370361, + "learning_rate": 3.921546027355156e-05, + "log_odds_chosen": 9.136719703674316, + "log_odds_ratio": -0.0018027378246188164, + "logits/chosen": -0.3973497152328491, + "logits/rejected": -0.4088752865791321, + "logps/chosen": -0.0014848411083221436, + "logps/rejected": -1.3421297073364258, + "loss": 1.5211, + "nll_loss": 0.3800997734069824, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014848413411527872, + "rewards/margins": 0.1340644806623459, + "rewards/rejected": -0.13421295583248138, + "step": 4253 + }, + { + "epoch": 2.9419087136929463, + "grad_norm": 6.471978664398193, + "learning_rate": 3.9211618257261414e-05, + "log_odds_chosen": 7.913397789001465, + "log_odds_ratio": -0.019799327477812767, + "logits/chosen": -0.7454057931900024, + "logits/rejected": -0.8406031131744385, + "logps/chosen": -0.013360895216464996, + "logps/rejected": -0.9780092239379883, + "loss": 2.2511, + "nll_loss": 0.5607913732528687, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013360894517973065, + "rewards/margins": 0.09646482765674591, + "rewards/rejected": -0.09780092537403107, + "step": 4254 + }, + { + "epoch": 2.942600276625173, + "grad_norm": 9.354866027832031, + "learning_rate": 3.920777624097126e-05, + "log_odds_chosen": 9.310257911682129, + "log_odds_ratio": -0.0006552515551447868, + "logits/chosen": 0.03181115537881851, + "logits/rejected": -0.08694909512996674, + "logps/chosen": -0.0003843040904030204, + "logps/rejected": -1.5662592649459839, + "loss": 2.1056, + "nll_loss": 0.5263240337371826, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8430407585110515e-05, + "rewards/margins": 0.15658749639987946, + "rewards/rejected": -0.1566259264945984, + "step": 4255 + }, + { + "epoch": 2.9432918395574, + "grad_norm": 4.746151447296143, + "learning_rate": 3.920393422468112e-05, + "log_odds_chosen": 8.743995666503906, + "log_odds_ratio": -0.0007172015612013638, + "logits/chosen": -0.5727619528770447, + "logits/rejected": -0.6019116044044495, + "logps/chosen": -0.0029322528280317783, + "logps/rejected": -1.502450704574585, + "loss": 2.0104, + "nll_loss": 0.5025299787521362, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002932252536993474, + "rewards/margins": 0.14995186030864716, + "rewards/rejected": -0.1502450704574585, + "step": 4256 + }, + { + "epoch": 2.9439834024896268, + "grad_norm": 12.371516227722168, + "learning_rate": 3.9200092208390965e-05, + "log_odds_chosen": 7.0362443923950195, + "log_odds_ratio": -0.06542985886335373, + "logits/chosen": -0.6845235228538513, + "logits/rejected": -0.7309256792068481, + "logps/chosen": -0.01841142028570175, + "logps/rejected": -1.020219326019287, + "loss": 1.8128, + "nll_loss": 0.4466596841812134, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001841141958720982, + "rewards/margins": 0.10018078982830048, + "rewards/rejected": -0.10202193260192871, + "step": 4257 + }, + { + "epoch": 2.9446749654218536, + "grad_norm": 9.63621997833252, + "learning_rate": 3.919625019210082e-05, + "log_odds_chosen": 8.842986106872559, + "log_odds_ratio": -0.0012780596734955907, + "logits/chosen": -0.31890368461608887, + "logits/rejected": -0.3919992446899414, + "logps/chosen": -0.01795879378914833, + "logps/rejected": -2.36039400100708, + "loss": 2.2404, + "nll_loss": 0.5599759221076965, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017958792159333825, + "rewards/margins": 0.2342435121536255, + "rewards/rejected": -0.23603938519954681, + "step": 4258 + }, + { + "epoch": 2.9453665283540804, + "grad_norm": 7.4359307289123535, + "learning_rate": 3.919240817581067e-05, + "log_odds_chosen": 7.937699317932129, + "log_odds_ratio": -0.0024727436248213053, + "logits/chosen": -0.06838397681713104, + "logits/rejected": -0.0431574210524559, + "logps/chosen": -0.012063509784638882, + "logps/rejected": -1.3039665222167969, + "loss": 1.6813, + "nll_loss": 0.42007312178611755, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012063512112945318, + "rewards/margins": 0.1291903257369995, + "rewards/rejected": -0.13039666414260864, + "step": 4259 + }, + { + "epoch": 2.9460580912863072, + "grad_norm": 9.317797660827637, + "learning_rate": 3.9188566159520515e-05, + "log_odds_chosen": 7.148035049438477, + "log_odds_ratio": -0.00553960120305419, + "logits/chosen": -0.5761981010437012, + "logits/rejected": -0.5446697473526001, + "logps/chosen": -0.013994252309203148, + "logps/rejected": -1.1831421852111816, + "loss": 2.3347, + "nll_loss": 0.5831324458122253, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013994253240525723, + "rewards/margins": 0.1169147938489914, + "rewards/rejected": -0.1183142215013504, + "step": 4260 + }, + { + "epoch": 2.946749654218534, + "grad_norm": 7.1314849853515625, + "learning_rate": 3.918472414323037e-05, + "log_odds_chosen": 6.1319427490234375, + "log_odds_ratio": -0.08988655358552933, + "logits/chosen": -0.23304779827594757, + "logits/rejected": -0.24430322647094727, + "logps/chosen": -0.026172567158937454, + "logps/rejected": -0.8957435488700867, + "loss": 2.2714, + "nll_loss": 0.5588510632514954, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0026172564830631018, + "rewards/margins": 0.08695709705352783, + "rewards/rejected": -0.08957435190677643, + "step": 4261 + }, + { + "epoch": 2.947441217150761, + "grad_norm": 6.733025550842285, + "learning_rate": 3.918088212694022e-05, + "log_odds_chosen": 5.93974494934082, + "log_odds_ratio": -0.17785590887069702, + "logits/chosen": -0.21797175705432892, + "logits/rejected": -0.22403068840503693, + "logps/chosen": -0.05725084990262985, + "logps/rejected": -1.5029304027557373, + "loss": 2.5309, + "nll_loss": 0.6149465441703796, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005725085269659758, + "rewards/margins": 0.14456796646118164, + "rewards/rejected": -0.15029305219650269, + "step": 4262 + }, + { + "epoch": 2.9481327800829877, + "grad_norm": 20.814090728759766, + "learning_rate": 3.917704011065007e-05, + "log_odds_chosen": 7.192148208618164, + "log_odds_ratio": -0.20047271251678467, + "logits/chosen": -0.5887202620506287, + "logits/rejected": -0.6112239956855774, + "logps/chosen": -0.041321538388729095, + "logps/rejected": -1.0154424905776978, + "loss": 2.7193, + "nll_loss": 0.6597743034362793, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004132153932005167, + "rewards/margins": 0.0974120944738388, + "rewards/rejected": -0.10154424607753754, + "step": 4263 + }, + { + "epoch": 2.9488243430152146, + "grad_norm": 10.13122272491455, + "learning_rate": 3.917319809435992e-05, + "log_odds_chosen": 9.080028533935547, + "log_odds_ratio": -0.0002214660053141415, + "logits/chosen": -0.5770301818847656, + "logits/rejected": -0.6393797993659973, + "logps/chosen": -0.0003423684975132346, + "logps/rejected": -1.1868422031402588, + "loss": 2.4343, + "nll_loss": 0.6085643768310547, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4236851206514984e-05, + "rewards/margins": 0.11864998936653137, + "rewards/rejected": -0.11868421733379364, + "step": 4264 + }, + { + "epoch": 2.9495159059474414, + "grad_norm": 10.2073974609375, + "learning_rate": 3.916935607806978e-05, + "log_odds_chosen": 7.860172271728516, + "log_odds_ratio": -0.0023849881254136562, + "logits/chosen": -0.5341753959655762, + "logits/rejected": -0.579387366771698, + "logps/chosen": -0.010475466959178448, + "logps/rejected": -1.4733842611312866, + "loss": 1.7168, + "nll_loss": 0.42895734310150146, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010475468588992953, + "rewards/margins": 0.1462908834218979, + "rewards/rejected": -0.14733843505382538, + "step": 4265 + }, + { + "epoch": 2.9502074688796682, + "grad_norm": 13.511934280395508, + "learning_rate": 3.916551406177962e-05, + "log_odds_chosen": 8.346586227416992, + "log_odds_ratio": -0.0005293315043672919, + "logits/chosen": -1.025830626487732, + "logits/rejected": -1.034759521484375, + "logps/chosen": -0.0005322285578586161, + "logps/rejected": -1.1304508447647095, + "loss": 2.7532, + "nll_loss": 0.6882580518722534, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.322285505826585e-05, + "rewards/margins": 0.11299186199903488, + "rewards/rejected": -0.11304508149623871, + "step": 4266 + }, + { + "epoch": 2.950899031811895, + "grad_norm": 10.670827865600586, + "learning_rate": 3.9161672045489476e-05, + "log_odds_chosen": 10.011075973510742, + "log_odds_ratio": -8.805980905890465e-05, + "logits/chosen": -0.17297831177711487, + "logits/rejected": -0.18511781096458435, + "logps/chosen": -0.000354648131178692, + "logps/rejected": -1.5110416412353516, + "loss": 2.2342, + "nll_loss": 0.5585365891456604, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.546481457306072e-05, + "rewards/margins": 0.15106868743896484, + "rewards/rejected": -0.1511041522026062, + "step": 4267 + }, + { + "epoch": 2.951590594744122, + "grad_norm": 9.187833786010742, + "learning_rate": 3.915783002919933e-05, + "log_odds_chosen": 7.468688011169434, + "log_odds_ratio": -0.0015352519694715738, + "logits/chosen": -0.5218496322631836, + "logits/rejected": -0.5341426134109497, + "logps/chosen": -0.0017257456202059984, + "logps/rejected": -0.9419411420822144, + "loss": 1.9649, + "nll_loss": 0.4910805821418762, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017257456784136593, + "rewards/margins": 0.09402154386043549, + "rewards/rejected": -0.09419411420822144, + "step": 4268 + }, + { + "epoch": 2.9522821576763487, + "grad_norm": 7.416023254394531, + "learning_rate": 3.9153988012909174e-05, + "log_odds_chosen": 6.75653076171875, + "log_odds_ratio": -0.06857343018054962, + "logits/chosen": -0.5883265137672424, + "logits/rejected": -0.5947442650794983, + "logps/chosen": -0.01682782731950283, + "logps/rejected": -1.1577966213226318, + "loss": 1.7577, + "nll_loss": 0.4325792193412781, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016827830113470554, + "rewards/margins": 0.11409687250852585, + "rewards/rejected": -0.11577965319156647, + "step": 4269 + }, + { + "epoch": 2.9529737206085755, + "grad_norm": 11.112635612487793, + "learning_rate": 3.9150145996619026e-05, + "log_odds_chosen": 9.163501739501953, + "log_odds_ratio": -0.00044503927347250283, + "logits/chosen": -0.5382460355758667, + "logits/rejected": -0.6539362668991089, + "logps/chosen": -0.0007444759830832481, + "logps/rejected": -1.6698039770126343, + "loss": 2.0158, + "nll_loss": 0.5039148926734924, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.44475910323672e-05, + "rewards/margins": 0.1669059544801712, + "rewards/rejected": -0.16698040068149567, + "step": 4270 + }, + { + "epoch": 2.9536652835408024, + "grad_norm": 10.982603073120117, + "learning_rate": 3.914630398032888e-05, + "log_odds_chosen": 8.879176139831543, + "log_odds_ratio": -0.0016005634097382426, + "logits/chosen": -0.184329092502594, + "logits/rejected": -0.24073684215545654, + "logps/chosen": -0.0036374146584421396, + "logps/rejected": -1.4348986148834229, + "loss": 2.0214, + "nll_loss": 0.5051819086074829, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003637414483819157, + "rewards/margins": 0.14312610030174255, + "rewards/rejected": -0.14348986744880676, + "step": 4271 + }, + { + "epoch": 2.954356846473029, + "grad_norm": 10.150177955627441, + "learning_rate": 3.914246196403873e-05, + "log_odds_chosen": 9.1649808883667, + "log_odds_ratio": -0.001695746323093772, + "logits/chosen": -0.9434870481491089, + "logits/rejected": -1.068698525428772, + "logps/chosen": -0.0010683319997042418, + "logps/rejected": -1.5475596189498901, + "loss": 2.3266, + "nll_loss": 0.5814720392227173, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010683320579119027, + "rewards/margins": 0.15464913845062256, + "rewards/rejected": -0.15475594997406006, + "step": 4272 + }, + { + "epoch": 2.955048409405256, + "grad_norm": 10.010522842407227, + "learning_rate": 3.913861994774858e-05, + "log_odds_chosen": 6.911557197570801, + "log_odds_ratio": -0.00604627002030611, + "logits/chosen": -0.9683492183685303, + "logits/rejected": -1.006256341934204, + "logps/chosen": -0.019437741488218307, + "logps/rejected": -1.368293285369873, + "loss": 2.4068, + "nll_loss": 0.6010944843292236, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019437741721048951, + "rewards/margins": 0.13488556444644928, + "rewards/rejected": -0.13682934641838074, + "step": 4273 + }, + { + "epoch": 2.955739972337483, + "grad_norm": 6.434133529663086, + "learning_rate": 3.9134777931458436e-05, + "log_odds_chosen": 8.657649040222168, + "log_odds_ratio": -0.000983987469226122, + "logits/chosen": -0.45698267221450806, + "logits/rejected": -0.5368836522102356, + "logps/chosen": -0.0029307485092431307, + "logps/rejected": -1.5611507892608643, + "loss": 1.236, + "nll_loss": 0.30890151858329773, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000293074845103547, + "rewards/margins": 0.1558220088481903, + "rewards/rejected": -0.1561150848865509, + "step": 4274 + }, + { + "epoch": 2.9564315352697097, + "grad_norm": 5.325925827026367, + "learning_rate": 3.913093591516828e-05, + "log_odds_chosen": 8.406046867370605, + "log_odds_ratio": -0.019645029678940773, + "logits/chosen": -0.8153131008148193, + "logits/rejected": -0.8119903802871704, + "logps/chosen": -0.02138805016875267, + "logps/rejected": -1.3045752048492432, + "loss": 1.2154, + "nll_loss": 0.3018897473812103, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021388051100075245, + "rewards/margins": 0.12831872701644897, + "rewards/rejected": -0.1304575353860855, + "step": 4275 + }, + { + "epoch": 2.9571230982019365, + "grad_norm": 8.21432876586914, + "learning_rate": 3.9127093898878134e-05, + "log_odds_chosen": 8.348808288574219, + "log_odds_ratio": -0.003002789104357362, + "logits/chosen": -0.29157984256744385, + "logits/rejected": -0.32698357105255127, + "logps/chosen": -0.0036668144166469574, + "logps/rejected": -0.9981551170349121, + "loss": 1.9019, + "nll_loss": 0.4751623272895813, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00036668143002316356, + "rewards/margins": 0.09944883733987808, + "rewards/rejected": -0.09981551021337509, + "step": 4276 + }, + { + "epoch": 2.9578146611341634, + "grad_norm": 7.75137996673584, + "learning_rate": 3.9123251882587987e-05, + "log_odds_chosen": 8.320832252502441, + "log_odds_ratio": -0.007186429109424353, + "logits/chosen": -0.2537834346294403, + "logits/rejected": -0.2808937132358551, + "logps/chosen": -0.03648059815168381, + "logps/rejected": -2.1640422344207764, + "loss": 1.7763, + "nll_loss": 0.443354070186615, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003648059908300638, + "rewards/margins": 0.21275615692138672, + "rewards/rejected": -0.21640421450138092, + "step": 4277 + }, + { + "epoch": 2.95850622406639, + "grad_norm": 12.242390632629395, + "learning_rate": 3.911940986629783e-05, + "log_odds_chosen": 5.879909515380859, + "log_odds_ratio": -0.027574969455599785, + "logits/chosen": -0.39817455410957336, + "logits/rejected": -0.41713497042655945, + "logps/chosen": -0.021489018574357033, + "logps/rejected": -0.9022034406661987, + "loss": 1.5127, + "nll_loss": 0.3754188120365143, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002148902043700218, + "rewards/margins": 0.08807145059108734, + "rewards/rejected": -0.09022034704685211, + "step": 4278 + }, + { + "epoch": 2.959197786998617, + "grad_norm": 6.00631046295166, + "learning_rate": 3.9115567850007685e-05, + "log_odds_chosen": 7.773863792419434, + "log_odds_ratio": -0.10315965116024017, + "logits/chosen": -0.10753624141216278, + "logits/rejected": -0.18743924796581268, + "logps/chosen": -0.039872244000434875, + "logps/rejected": -1.489803671836853, + "loss": 1.8879, + "nll_loss": 0.4616524577140808, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003987224772572517, + "rewards/margins": 0.1449931412935257, + "rewards/rejected": -0.14898037910461426, + "step": 4279 + }, + { + "epoch": 2.959889349930844, + "grad_norm": 7.796035289764404, + "learning_rate": 3.911172583371754e-05, + "log_odds_chosen": 9.047157287597656, + "log_odds_ratio": -0.000789603334851563, + "logits/chosen": -0.5538119673728943, + "logits/rejected": -0.5578813552856445, + "logps/chosen": -0.008371025323867798, + "logps/rejected": -1.8051315546035767, + "loss": 1.6288, + "nll_loss": 0.4071248471736908, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008371025323867798, + "rewards/margins": 0.17967605590820312, + "rewards/rejected": -0.1805131584405899, + "step": 4280 + }, + { + "epoch": 2.9605809128630707, + "grad_norm": 9.822040557861328, + "learning_rate": 3.910788381742739e-05, + "log_odds_chosen": 8.555595397949219, + "log_odds_ratio": -0.04050131142139435, + "logits/chosen": -0.5271527767181396, + "logits/rejected": -0.5641564726829529, + "logps/chosen": -0.009184690192341805, + "logps/rejected": -1.4864246845245361, + "loss": 2.6192, + "nll_loss": 0.6507552266120911, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009184691007249057, + "rewards/margins": 0.1477240025997162, + "rewards/rejected": -0.14864246547222137, + "step": 4281 + }, + { + "epoch": 2.9612724757952975, + "grad_norm": 8.706021308898926, + "learning_rate": 3.9104041801137235e-05, + "log_odds_chosen": 8.526544570922852, + "log_odds_ratio": -0.0027415938675403595, + "logits/chosen": -0.2767260670661926, + "logits/rejected": -0.3521498739719391, + "logps/chosen": -0.03149921074509621, + "logps/rejected": -2.332549810409546, + "loss": 2.7675, + "nll_loss": 0.6915987133979797, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0031499210745096207, + "rewards/margins": 0.23010505735874176, + "rewards/rejected": -0.23325496912002563, + "step": 4282 + }, + { + "epoch": 2.9619640387275243, + "grad_norm": 8.435463905334473, + "learning_rate": 3.9100199784847094e-05, + "log_odds_chosen": 8.319658279418945, + "log_odds_ratio": -0.0016154496697708964, + "logits/chosen": -0.6601822972297668, + "logits/rejected": -0.6941728591918945, + "logps/chosen": -0.007337766233831644, + "logps/rejected": -1.6939918994903564, + "loss": 2.7152, + "nll_loss": 0.6786311268806458, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007337766001001, + "rewards/margins": 0.16866540908813477, + "rewards/rejected": -0.1693992018699646, + "step": 4283 + }, + { + "epoch": 2.962655601659751, + "grad_norm": 10.894098281860352, + "learning_rate": 3.909635776855694e-05, + "log_odds_chosen": 8.50836181640625, + "log_odds_ratio": -0.0030262740328907967, + "logits/chosen": -0.6033471822738647, + "logits/rejected": -0.6101424098014832, + "logps/chosen": -0.004452358465641737, + "logps/rejected": -1.6144495010375977, + "loss": 2.132, + "nll_loss": 0.5326870083808899, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00044523581163957715, + "rewards/margins": 0.16099971532821655, + "rewards/rejected": -0.16144494712352753, + "step": 4284 + }, + { + "epoch": 2.963347164591978, + "grad_norm": 8.354084968566895, + "learning_rate": 3.909251575226679e-05, + "log_odds_chosen": 9.289058685302734, + "log_odds_ratio": -0.0005091601051390171, + "logits/chosen": -0.4666883051395416, + "logits/rejected": -0.4653850793838501, + "logps/chosen": -0.0041956775821745396, + "logps/rejected": -1.7832475900650024, + "loss": 1.9979, + "nll_loss": 0.49942725896835327, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004195678047835827, + "rewards/margins": 0.17790518701076508, + "rewards/rejected": -0.17832475900650024, + "step": 4285 + }, + { + "epoch": 2.964038727524205, + "grad_norm": 8.920733451843262, + "learning_rate": 3.9088673735976645e-05, + "log_odds_chosen": 8.48482894897461, + "log_odds_ratio": -0.31773439049720764, + "logits/chosen": -0.47151845693588257, + "logits/rejected": -0.5274024605751038, + "logps/chosen": -0.04550067335367203, + "logps/rejected": -1.6561365127563477, + "loss": 2.1528, + "nll_loss": 0.5064210891723633, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004550067242234945, + "rewards/margins": 0.16106358170509338, + "rewards/rejected": -0.16561365127563477, + "step": 4286 + }, + { + "epoch": 2.9647302904564317, + "grad_norm": 10.381241798400879, + "learning_rate": 3.908483171968649e-05, + "log_odds_chosen": 8.353084564208984, + "log_odds_ratio": -0.000998140312731266, + "logits/chosen": -0.4602486491203308, + "logits/rejected": -0.5085919499397278, + "logps/chosen": -0.010730762965977192, + "logps/rejected": -1.7254202365875244, + "loss": 1.8188, + "nll_loss": 0.45460206270217896, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010730763897299767, + "rewards/margins": 0.17146895825862885, + "rewards/rejected": -0.1725420355796814, + "step": 4287 + }, + { + "epoch": 2.9654218533886585, + "grad_norm": 8.11890983581543, + "learning_rate": 3.908098970339634e-05, + "log_odds_chosen": 9.263433456420898, + "log_odds_ratio": -0.000751931220293045, + "logits/chosen": -0.4940032958984375, + "logits/rejected": -0.5015082955360413, + "logps/chosen": -0.0017792684957385063, + "logps/rejected": -1.5851678848266602, + "loss": 2.8127, + "nll_loss": 0.7030935883522034, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017792684957385063, + "rewards/margins": 0.15833887457847595, + "rewards/rejected": -0.1585167944431305, + "step": 4288 + }, + { + "epoch": 2.9661134163208853, + "grad_norm": 7.235042572021484, + "learning_rate": 3.9077147687106196e-05, + "log_odds_chosen": 9.509930610656738, + "log_odds_ratio": -0.00032048820867203176, + "logits/chosen": -0.4654198884963989, + "logits/rejected": -0.5719193816184998, + "logps/chosen": -0.0006036916165612638, + "logps/rejected": -1.7015082836151123, + "loss": 1.8108, + "nll_loss": 0.4526631832122803, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0369158745743334e-05, + "rewards/margins": 0.1700904667377472, + "rewards/rejected": -0.17015081644058228, + "step": 4289 + }, + { + "epoch": 2.966804979253112, + "grad_norm": 10.309484481811523, + "learning_rate": 3.907330567081605e-05, + "log_odds_chosen": 7.775326728820801, + "log_odds_ratio": -0.002041358035057783, + "logits/chosen": -0.80727618932724, + "logits/rejected": -0.8146430253982544, + "logps/chosen": -0.009806342422962189, + "logps/rejected": -1.9370990991592407, + "loss": 2.6617, + "nll_loss": 0.6652133464813232, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009806342422962189, + "rewards/margins": 0.19272929430007935, + "rewards/rejected": -0.19370993971824646, + "step": 4290 + }, + { + "epoch": 2.967496542185339, + "grad_norm": 6.211965560913086, + "learning_rate": 3.9069463654525894e-05, + "log_odds_chosen": 7.471760272979736, + "log_odds_ratio": -0.003108600154519081, + "logits/chosen": -0.6506915092468262, + "logits/rejected": -0.6464847326278687, + "logps/chosen": -0.013406043872237206, + "logps/rejected": -1.191932201385498, + "loss": 2.562, + "nll_loss": 0.64018714427948, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013406045036390424, + "rewards/margins": 0.11785262078046799, + "rewards/rejected": -0.11919323354959488, + "step": 4291 + }, + { + "epoch": 2.968188105117566, + "grad_norm": 8.040523529052734, + "learning_rate": 3.906562163823575e-05, + "log_odds_chosen": 8.697381973266602, + "log_odds_ratio": -0.04064595699310303, + "logits/chosen": -0.8994853496551514, + "logits/rejected": -0.9400737285614014, + "logps/chosen": -0.009031183086335659, + "logps/rejected": -1.172762393951416, + "loss": 1.6371, + "nll_loss": 0.40520185232162476, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009031182853505015, + "rewards/margins": 0.11637313663959503, + "rewards/rejected": -0.11727625131607056, + "step": 4292 + }, + { + "epoch": 2.9688796680497926, + "grad_norm": 8.956297874450684, + "learning_rate": 3.90617796219456e-05, + "log_odds_chosen": 7.714384078979492, + "log_odds_ratio": -0.022408613935112953, + "logits/chosen": -1.1202744245529175, + "logits/rejected": -1.164198875427246, + "logps/chosen": -0.03206299990415573, + "logps/rejected": -1.8425147533416748, + "loss": 2.6058, + "nll_loss": 0.6492141485214233, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003206299850717187, + "rewards/margins": 0.18104518949985504, + "rewards/rejected": -0.18425148725509644, + "step": 4293 + }, + { + "epoch": 2.9695712309820195, + "grad_norm": 9.355306625366211, + "learning_rate": 3.905793760565545e-05, + "log_odds_chosen": 9.887144088745117, + "log_odds_ratio": -0.00020667076751124114, + "logits/chosen": -1.2133868932724, + "logits/rejected": -1.2913806438446045, + "logps/chosen": -0.0007871249108575284, + "logps/rejected": -2.1001222133636475, + "loss": 3.3309, + "nll_loss": 0.832693338394165, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.871249545132741e-05, + "rewards/margins": 0.20993351936340332, + "rewards/rejected": -0.21001222729682922, + "step": 4294 + }, + { + "epoch": 2.9702627939142463, + "grad_norm": 11.772032737731934, + "learning_rate": 3.9054095589365303e-05, + "log_odds_chosen": 8.659290313720703, + "log_odds_ratio": -0.0025949098635464907, + "logits/chosen": -0.4823494553565979, + "logits/rejected": -0.6542070508003235, + "logps/chosen": -0.0011625216575339437, + "logps/rejected": -1.6143386363983154, + "loss": 2.0923, + "nll_loss": 0.5228087902069092, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011625216575339437, + "rewards/margins": 0.1613176167011261, + "rewards/rejected": -0.1614338606595993, + "step": 4295 + }, + { + "epoch": 2.970954356846473, + "grad_norm": 8.367432594299316, + "learning_rate": 3.905025357307515e-05, + "log_odds_chosen": 8.646538734436035, + "log_odds_ratio": -0.002555843908339739, + "logits/chosen": -0.8320046663284302, + "logits/rejected": -0.8604940176010132, + "logps/chosen": -0.01020450796931982, + "logps/rejected": -1.5223532915115356, + "loss": 2.0883, + "nll_loss": 0.5218141674995422, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001020450727082789, + "rewards/margins": 0.15121488273143768, + "rewards/rejected": -0.15223534405231476, + "step": 4296 + }, + { + "epoch": 2.9716459197787, + "grad_norm": 10.044321060180664, + "learning_rate": 3.9046411556785e-05, + "log_odds_chosen": 8.615673065185547, + "log_odds_ratio": -0.0004599147359840572, + "logits/chosen": -0.566417932510376, + "logits/rejected": -0.5312891602516174, + "logps/chosen": -0.0008795886533334851, + "logps/rejected": -1.526329517364502, + "loss": 1.9875, + "nll_loss": 0.49684005975723267, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.795886242296547e-05, + "rewards/margins": 0.1525450050830841, + "rewards/rejected": -0.1526329517364502, + "step": 4297 + }, + { + "epoch": 2.972337482710927, + "grad_norm": 7.15255880355835, + "learning_rate": 3.9042569540494854e-05, + "log_odds_chosen": 6.27223539352417, + "log_odds_ratio": -0.08419568091630936, + "logits/chosen": -0.48312613368034363, + "logits/rejected": -0.5573301315307617, + "logps/chosen": -0.014529145322740078, + "logps/rejected": -0.7528274059295654, + "loss": 2.0046, + "nll_loss": 0.49272122979164124, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001452914671972394, + "rewards/margins": 0.07382982224225998, + "rewards/rejected": -0.0752827376127243, + "step": 4298 + }, + { + "epoch": 2.9730290456431536, + "grad_norm": 9.718936920166016, + "learning_rate": 3.9038727524204706e-05, + "log_odds_chosen": 10.059136390686035, + "log_odds_ratio": -9.451636287849396e-05, + "logits/chosen": -0.7800579071044922, + "logits/rejected": -0.7673681974411011, + "logps/chosen": -0.0003319536044728011, + "logps/rejected": -1.5992937088012695, + "loss": 1.5703, + "nll_loss": 0.39255863428115845, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.319535971968435e-05, + "rewards/margins": 0.15989619493484497, + "rewards/rejected": -0.15992936491966248, + "step": 4299 + }, + { + "epoch": 2.9737206085753805, + "grad_norm": 8.345458984375, + "learning_rate": 3.903488550791455e-05, + "log_odds_chosen": 8.29789924621582, + "log_odds_ratio": -0.25329098105430603, + "logits/chosen": -0.9458674192428589, + "logits/rejected": -0.9740471839904785, + "logps/chosen": -0.03172118216753006, + "logps/rejected": -1.9004459381103516, + "loss": 1.5651, + "nll_loss": 0.36595281958580017, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003172118216753006, + "rewards/margins": 0.1868724822998047, + "rewards/rejected": -0.1900446116924286, + "step": 4300 + }, + { + "epoch": 2.9744121715076073, + "grad_norm": 9.374674797058105, + "learning_rate": 3.903104349162441e-05, + "log_odds_chosen": 7.801394939422607, + "log_odds_ratio": -0.002809441415593028, + "logits/chosen": -0.9712445735931396, + "logits/rejected": -1.0163609981536865, + "logps/chosen": -0.02158650942146778, + "logps/rejected": -2.050865411758423, + "loss": 3.2508, + "nll_loss": 0.8124136924743652, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002158651128411293, + "rewards/margins": 0.20292788743972778, + "rewards/rejected": -0.20508654415607452, + "step": 4301 + }, + { + "epoch": 2.975103734439834, + "grad_norm": 9.962836265563965, + "learning_rate": 3.902720147533426e-05, + "log_odds_chosen": 8.387914657592773, + "log_odds_ratio": -0.0036154557019472122, + "logits/chosen": -0.7854832410812378, + "logits/rejected": -0.8578284978866577, + "logps/chosen": -0.015231077559292316, + "logps/rejected": -1.6216599941253662, + "loss": 1.7954, + "nll_loss": 0.4484889507293701, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015231078723445535, + "rewards/margins": 0.16064289212226868, + "rewards/rejected": -0.16216599941253662, + "step": 4302 + }, + { + "epoch": 2.975795297372061, + "grad_norm": 70.63771057128906, + "learning_rate": 3.902335945904411e-05, + "log_odds_chosen": 8.389386177062988, + "log_odds_ratio": -0.09924168884754181, + "logits/chosen": -0.97005295753479, + "logits/rejected": -0.9767628908157349, + "logps/chosen": -0.019456295296549797, + "logps/rejected": -1.2770992517471313, + "loss": 2.3251, + "nll_loss": 0.5713623762130737, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.001945629483088851, + "rewards/margins": 0.12576431035995483, + "rewards/rejected": -0.12770992517471313, + "step": 4303 + }, + { + "epoch": 2.9764868603042878, + "grad_norm": 11.85477066040039, + "learning_rate": 3.901951744275396e-05, + "log_odds_chosen": 7.693626880645752, + "log_odds_ratio": -0.008109038695693016, + "logits/chosen": -1.0131137371063232, + "logits/rejected": -1.0055873394012451, + "logps/chosen": -0.003135553328320384, + "logps/rejected": -1.1383662223815918, + "loss": 3.107, + "nll_loss": 0.7759391069412231, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00031355535611510277, + "rewards/margins": 0.11352306604385376, + "rewards/rejected": -0.1138366311788559, + "step": 4304 + }, + { + "epoch": 2.9771784232365146, + "grad_norm": 8.77263355255127, + "learning_rate": 3.901567542646381e-05, + "log_odds_chosen": 6.874538898468018, + "log_odds_ratio": -0.012623955495655537, + "logits/chosen": -0.9305988550186157, + "logits/rejected": -0.9772317409515381, + "logps/chosen": -0.04155722260475159, + "logps/rejected": -1.3131730556488037, + "loss": 2.2006, + "nll_loss": 0.5488851070404053, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004155721981078386, + "rewards/margins": 0.12716159224510193, + "rewards/rejected": -0.13131731748580933, + "step": 4305 + }, + { + "epoch": 2.9778699861687414, + "grad_norm": 15.39285659790039, + "learning_rate": 3.901183341017366e-05, + "log_odds_chosen": 8.982562065124512, + "log_odds_ratio": -0.0003474602708593011, + "logits/chosen": -0.8192355632781982, + "logits/rejected": -0.9334505796432495, + "logps/chosen": -0.0009468475473113358, + "logps/rejected": -1.6504664421081543, + "loss": 2.0991, + "nll_loss": 0.5247402191162109, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.468475764151663e-05, + "rewards/margins": 0.16495195031166077, + "rewards/rejected": -0.16504666209220886, + "step": 4306 + }, + { + "epoch": 2.9785615491009683, + "grad_norm": 14.8624906539917, + "learning_rate": 3.900799139388351e-05, + "log_odds_chosen": 9.051704406738281, + "log_odds_ratio": -0.0017430292209610343, + "logits/chosen": -0.6860344409942627, + "logits/rejected": -0.8370791673660278, + "logps/chosen": -0.0037408650387078524, + "logps/rejected": -1.9641859531402588, + "loss": 2.7644, + "nll_loss": 0.6909268498420715, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003740865213330835, + "rewards/margins": 0.19604453444480896, + "rewards/rejected": -0.1964186131954193, + "step": 4307 + }, + { + "epoch": 2.979253112033195, + "grad_norm": 8.236113548278809, + "learning_rate": 3.9004149377593365e-05, + "log_odds_chosen": 7.400773048400879, + "log_odds_ratio": -0.09109840542078018, + "logits/chosen": -0.5979514122009277, + "logits/rejected": -0.5859559774398804, + "logps/chosen": -0.022600244730710983, + "logps/rejected": -1.639123558998108, + "loss": 2.1339, + "nll_loss": 0.5243626236915588, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0022600244265049696, + "rewards/margins": 0.1616523414850235, + "rewards/rejected": -0.1639123558998108, + "step": 4308 + }, + { + "epoch": 2.979944674965422, + "grad_norm": 4.792588233947754, + "learning_rate": 3.900030736130321e-05, + "log_odds_chosen": 5.436771392822266, + "log_odds_ratio": -0.09919846802949905, + "logits/chosen": -0.6263391971588135, + "logits/rejected": -0.7491417527198792, + "logps/chosen": -0.03652811795473099, + "logps/rejected": -1.2123457193374634, + "loss": 2.3405, + "nll_loss": 0.5751992464065552, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0036528119817376137, + "rewards/margins": 0.11758175492286682, + "rewards/rejected": -0.12123456597328186, + "step": 4309 + }, + { + "epoch": 2.9806362378976488, + "grad_norm": 8.58399486541748, + "learning_rate": 3.899646534501307e-05, + "log_odds_chosen": 10.13625717163086, + "log_odds_ratio": -0.00011886454740306363, + "logits/chosen": -0.5826963782310486, + "logits/rejected": -0.7255100011825562, + "logps/chosen": -0.00013461017806548625, + "logps/rejected": -1.4675657749176025, + "loss": 2.1245, + "nll_loss": 0.5311151742935181, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3461018170346506e-05, + "rewards/margins": 0.14674311876296997, + "rewards/rejected": -0.1467565894126892, + "step": 4310 + }, + { + "epoch": 2.9813278008298756, + "grad_norm": 10.050031661987305, + "learning_rate": 3.8992623328722915e-05, + "log_odds_chosen": 8.63019847869873, + "log_odds_ratio": -0.0012419001432135701, + "logits/chosen": -0.5965954065322876, + "logits/rejected": -0.6558192372322083, + "logps/chosen": -0.004061999265104532, + "logps/rejected": -1.7488057613372803, + "loss": 1.8868, + "nll_loss": 0.4715661108493805, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00040619992068968713, + "rewards/margins": 0.17447435855865479, + "rewards/rejected": -0.17488057911396027, + "step": 4311 + }, + { + "epoch": 2.9820193637621024, + "grad_norm": 9.843374252319336, + "learning_rate": 3.898878131243277e-05, + "log_odds_chosen": 9.409902572631836, + "log_odds_ratio": -0.00021617556922137737, + "logits/chosen": -1.0234929323196411, + "logits/rejected": -1.1329476833343506, + "logps/chosen": -0.0003393127117305994, + "logps/rejected": -1.5909518003463745, + "loss": 1.8993, + "nll_loss": 0.4748102128505707, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3931268262676895e-05, + "rewards/margins": 0.1590612381696701, + "rewards/rejected": -0.1590951681137085, + "step": 4312 + }, + { + "epoch": 2.9827109266943292, + "grad_norm": 9.166037559509277, + "learning_rate": 3.898493929614262e-05, + "log_odds_chosen": 7.901092052459717, + "log_odds_ratio": -0.054978758096694946, + "logits/chosen": -0.7726214528083801, + "logits/rejected": -0.8527544140815735, + "logps/chosen": -0.028022143989801407, + "logps/rejected": -1.464059829711914, + "loss": 1.8119, + "nll_loss": 0.44747546315193176, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002802214352414012, + "rewards/margins": 0.14360378682613373, + "rewards/rejected": -0.14640599489212036, + "step": 4313 + }, + { + "epoch": 2.983402489626556, + "grad_norm": 7.450784206390381, + "learning_rate": 3.8981097279852466e-05, + "log_odds_chosen": 7.23293399810791, + "log_odds_ratio": -0.05062123015522957, + "logits/chosen": -0.5278096199035645, + "logits/rejected": -0.62985759973526, + "logps/chosen": -0.026304200291633606, + "logps/rejected": -1.278918981552124, + "loss": 1.8208, + "nll_loss": 0.45012617111206055, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0026304200291633606, + "rewards/margins": 0.12526148557662964, + "rewards/rejected": -0.1278918981552124, + "step": 4314 + }, + { + "epoch": 2.984094052558783, + "grad_norm": 7.188449382781982, + "learning_rate": 3.897725526356232e-05, + "log_odds_chosen": 7.602090358734131, + "log_odds_ratio": -0.07067767530679703, + "logits/chosen": -0.8626900911331177, + "logits/rejected": -0.8943912982940674, + "logps/chosen": -0.0472387969493866, + "logps/rejected": -1.7587538957595825, + "loss": 1.9876, + "nll_loss": 0.48984020948410034, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004723879974335432, + "rewards/margins": 0.17115150392055511, + "rewards/rejected": -0.17587539553642273, + "step": 4315 + }, + { + "epoch": 2.9847856154910097, + "grad_norm": 7.509032726287842, + "learning_rate": 3.897341324727217e-05, + "log_odds_chosen": 8.086267471313477, + "log_odds_ratio": -0.052891407161951065, + "logits/chosen": -0.4344555139541626, + "logits/rejected": -0.4079325199127197, + "logps/chosen": -0.015601033344864845, + "logps/rejected": -1.3612074851989746, + "loss": 1.4352, + "nll_loss": 0.3534983992576599, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015601032646372914, + "rewards/margins": 0.13456064462661743, + "rewards/rejected": -0.1361207515001297, + "step": 4316 + }, + { + "epoch": 2.9854771784232366, + "grad_norm": 7.007813453674316, + "learning_rate": 3.896957123098202e-05, + "log_odds_chosen": 8.999627113342285, + "log_odds_ratio": -0.0010090176947414875, + "logits/chosen": -0.666915237903595, + "logits/rejected": -0.7530708909034729, + "logps/chosen": -0.01949833706021309, + "logps/rejected": -2.4200823307037354, + "loss": 1.8857, + "nll_loss": 0.47133368253707886, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019498338224366307, + "rewards/margins": 0.24005839228630066, + "rewards/rejected": -0.242008239030838, + "step": 4317 + }, + { + "epoch": 2.9861687413554634, + "grad_norm": 11.183320045471191, + "learning_rate": 3.896572921469187e-05, + "log_odds_chosen": 6.617632865905762, + "log_odds_ratio": -0.04021994769573212, + "logits/chosen": -0.7787746787071228, + "logits/rejected": -0.8017224073410034, + "logps/chosen": -0.041280489414930344, + "logps/rejected": -1.8801112174987793, + "loss": 2.2311, + "nll_loss": 0.5537528991699219, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004128048662096262, + "rewards/margins": 0.18388308584690094, + "rewards/rejected": -0.18801113963127136, + "step": 4318 + }, + { + "epoch": 2.9868603042876902, + "grad_norm": 9.392274856567383, + "learning_rate": 3.896188719840173e-05, + "log_odds_chosen": 8.517684936523438, + "log_odds_ratio": -0.010786582715809345, + "logits/chosen": -0.641266942024231, + "logits/rejected": -0.6726396679878235, + "logps/chosen": -0.042483001947402954, + "logps/rejected": -1.9543349742889404, + "loss": 2.701, + "nll_loss": 0.6741783022880554, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004248300567269325, + "rewards/margins": 0.19118520617485046, + "rewards/rejected": -0.19543349742889404, + "step": 4319 + }, + { + "epoch": 2.987551867219917, + "grad_norm": 8.228974342346191, + "learning_rate": 3.8958045182111574e-05, + "log_odds_chosen": 5.370292663574219, + "log_odds_ratio": -0.3013046681880951, + "logits/chosen": -0.2348720133304596, + "logits/rejected": -0.37705564498901367, + "logps/chosen": -0.04984167218208313, + "logps/rejected": -0.9333829283714294, + "loss": 2.2342, + "nll_loss": 0.5284290313720703, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004984167404472828, + "rewards/margins": 0.08835412561893463, + "rewards/rejected": -0.09333829581737518, + "step": 4320 + }, + { + "epoch": 2.988243430152144, + "grad_norm": 50.50739288330078, + "learning_rate": 3.8954203165821426e-05, + "log_odds_chosen": 8.352073669433594, + "log_odds_ratio": -0.0004624387656804174, + "logits/chosen": -0.5019068717956543, + "logits/rejected": -0.5019280910491943, + "logps/chosen": -0.011270806193351746, + "logps/rejected": -1.633318543434143, + "loss": 3.0913, + "nll_loss": 0.7727884650230408, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001127080642618239, + "rewards/margins": 0.162204772233963, + "rewards/rejected": -0.16333186626434326, + "step": 4321 + }, + { + "epoch": 2.9889349930843707, + "grad_norm": 8.486451148986816, + "learning_rate": 3.895036114953128e-05, + "log_odds_chosen": 8.059738159179688, + "log_odds_ratio": -0.00266972160898149, + "logits/chosen": -0.7248245477676392, + "logits/rejected": -0.78750079870224, + "logps/chosen": -0.047702398151159286, + "logps/rejected": -2.0713438987731934, + "loss": 1.8981, + "nll_loss": 0.474260151386261, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004770240746438503, + "rewards/margins": 0.20236416161060333, + "rewards/rejected": -0.2071343958377838, + "step": 4322 + }, + { + "epoch": 2.9896265560165975, + "grad_norm": 9.462930679321289, + "learning_rate": 3.8946519133241124e-05, + "log_odds_chosen": 8.55579948425293, + "log_odds_ratio": -0.02762962505221367, + "logits/chosen": -0.5688346028327942, + "logits/rejected": -0.5741695165634155, + "logps/chosen": -0.011251446790993214, + "logps/rejected": -1.7209012508392334, + "loss": 1.7582, + "nll_loss": 0.43678048253059387, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001125144655816257, + "rewards/margins": 0.17096498608589172, + "rewards/rejected": -0.17209011316299438, + "step": 4323 + }, + { + "epoch": 2.9903181189488244, + "grad_norm": 5.706713676452637, + "learning_rate": 3.894267711695098e-05, + "log_odds_chosen": 7.146937370300293, + "log_odds_ratio": -0.009990318678319454, + "logits/chosen": -0.022167712450027466, + "logits/rejected": -0.007036931812763214, + "logps/chosen": -0.019013497978448868, + "logps/rejected": -1.0692731142044067, + "loss": 1.9051, + "nll_loss": 0.4752686023712158, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001901349751278758, + "rewards/margins": 0.10502596944570541, + "rewards/rejected": -0.10692732036113739, + "step": 4324 + }, + { + "epoch": 2.991009681881051, + "grad_norm": 12.431560516357422, + "learning_rate": 3.893883510066083e-05, + "log_odds_chosen": 8.107927322387695, + "log_odds_ratio": -0.06721797585487366, + "logits/chosen": -0.8215648531913757, + "logits/rejected": -0.911736786365509, + "logps/chosen": -0.024141529574990273, + "logps/rejected": -1.440712332725525, + "loss": 2.7399, + "nll_loss": 0.6782621741294861, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024141529574990273, + "rewards/margins": 0.14165708422660828, + "rewards/rejected": -0.14407123625278473, + "step": 4325 + }, + { + "epoch": 2.991701244813278, + "grad_norm": 11.858086585998535, + "learning_rate": 3.893499308437068e-05, + "log_odds_chosen": 7.023995876312256, + "log_odds_ratio": -0.17745637893676758, + "logits/chosen": -0.5713260769844055, + "logits/rejected": -0.6480407118797302, + "logps/chosen": -0.03282373398542404, + "logps/rejected": -1.1977462768554688, + "loss": 2.0188, + "nll_loss": 0.48696666955947876, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003282373771071434, + "rewards/margins": 0.11649225652217865, + "rewards/rejected": -0.11977462470531464, + "step": 4326 + }, + { + "epoch": 2.992392807745505, + "grad_norm": 11.904878616333008, + "learning_rate": 3.893115106808053e-05, + "log_odds_chosen": 10.03628921508789, + "log_odds_ratio": -7.053057925077155e-05, + "logits/chosen": -0.4557734429836273, + "logits/rejected": -0.5793010592460632, + "logps/chosen": -0.0002729504485614598, + "logps/rejected": -1.6975574493408203, + "loss": 2.4038, + "nll_loss": 0.6009531021118164, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7295047402731143e-05, + "rewards/margins": 0.16972845792770386, + "rewards/rejected": -0.169755756855011, + "step": 4327 + }, + { + "epoch": 2.9930843706777317, + "grad_norm": 9.12769603729248, + "learning_rate": 3.892730905179039e-05, + "log_odds_chosen": 7.800318717956543, + "log_odds_ratio": -0.0034801724832504988, + "logits/chosen": -0.60991370677948, + "logits/rejected": -0.7001553177833557, + "logps/chosen": -0.03430306911468506, + "logps/rejected": -1.6319289207458496, + "loss": 2.4333, + "nll_loss": 0.6079657077789307, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0034303071442991495, + "rewards/margins": 0.15976257622241974, + "rewards/rejected": -0.16319288313388824, + "step": 4328 + }, + { + "epoch": 2.9937759336099585, + "grad_norm": 13.82374095916748, + "learning_rate": 3.892346703550023e-05, + "log_odds_chosen": 8.682519912719727, + "log_odds_ratio": -0.010508873499929905, + "logits/chosen": -0.4079381227493286, + "logits/rejected": -0.5559213161468506, + "logps/chosen": -0.005879267118871212, + "logps/rejected": -2.0450315475463867, + "loss": 2.4957, + "nll_loss": 0.622867226600647, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005879267118871212, + "rewards/margins": 0.20391523838043213, + "rewards/rejected": -0.20450317859649658, + "step": 4329 + }, + { + "epoch": 2.9944674965421854, + "grad_norm": 9.895845413208008, + "learning_rate": 3.8919625019210085e-05, + "log_odds_chosen": 8.463839530944824, + "log_odds_ratio": -0.0006095452117733657, + "logits/chosen": -0.5000820159912109, + "logits/rejected": -0.5394564867019653, + "logps/chosen": -0.0017691230168566108, + "logps/rejected": -1.2518408298492432, + "loss": 1.7294, + "nll_loss": 0.43227720260620117, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017691230459604412, + "rewards/margins": 0.12500718235969543, + "rewards/rejected": -0.12518410384655, + "step": 4330 + }, + { + "epoch": 2.995159059474412, + "grad_norm": 6.428073406219482, + "learning_rate": 3.891578300291994e-05, + "log_odds_chosen": 7.467696666717529, + "log_odds_ratio": -0.1370055079460144, + "logits/chosen": -0.2736653983592987, + "logits/rejected": -0.2604514956474304, + "logps/chosen": -0.03479510545730591, + "logps/rejected": -1.2272887229919434, + "loss": 1.6824, + "nll_loss": 0.40690019726753235, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003479510312899947, + "rewards/margins": 0.1192493662238121, + "rewards/rejected": -0.1227288767695427, + "step": 4331 + }, + { + "epoch": 2.995850622406639, + "grad_norm": 16.991193771362305, + "learning_rate": 3.891194098662978e-05, + "log_odds_chosen": 10.145772933959961, + "log_odds_ratio": -0.00011397979687899351, + "logits/chosen": -1.0705267190933228, + "logits/rejected": -1.188539981842041, + "logps/chosen": -0.00029643025482073426, + "logps/rejected": -1.6331541538238525, + "loss": 2.6387, + "nll_loss": 0.659654438495636, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9643026209669188e-05, + "rewards/margins": 0.16328579187393188, + "rewards/rejected": -0.16331541538238525, + "step": 4332 + }, + { + "epoch": 2.996542185338866, + "grad_norm": 5.449528694152832, + "learning_rate": 3.8908098970339635e-05, + "log_odds_chosen": 7.263501167297363, + "log_odds_ratio": -0.13936229050159454, + "logits/chosen": -0.24265936017036438, + "logits/rejected": -0.30016687512397766, + "logps/chosen": -0.028652330860495567, + "logps/rejected": -0.9009796380996704, + "loss": 1.887, + "nll_loss": 0.4578217566013336, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0028652329929172993, + "rewards/margins": 0.08723273873329163, + "rewards/rejected": -0.09009796380996704, + "step": 4333 + }, + { + "epoch": 2.9972337482710927, + "grad_norm": 13.071614265441895, + "learning_rate": 3.890425695404949e-05, + "log_odds_chosen": 9.893881797790527, + "log_odds_ratio": -0.00010834841668838635, + "logits/chosen": -0.7541863918304443, + "logits/rejected": -0.902802586555481, + "logps/chosen": -0.0004265864845365286, + "logps/rejected": -1.915844440460205, + "loss": 2.3551, + "nll_loss": 0.5887622833251953, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.265864481567405e-05, + "rewards/margins": 0.19154179096221924, + "rewards/rejected": -0.19158445298671722, + "step": 4334 + }, + { + "epoch": 2.9979253112033195, + "grad_norm": 10.656232833862305, + "learning_rate": 3.890041493775934e-05, + "log_odds_chosen": 8.736654281616211, + "log_odds_ratio": -0.0017928852466866374, + "logits/chosen": -0.9096781611442566, + "logits/rejected": -0.9119482040405273, + "logps/chosen": -0.0038549380842596292, + "logps/rejected": -1.348965048789978, + "loss": 1.8054, + "nll_loss": 0.4511691629886627, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003854937676806003, + "rewards/margins": 0.13451100885868073, + "rewards/rejected": -0.13489650189876556, + "step": 4335 + }, + { + "epoch": 2.9986168741355463, + "grad_norm": 15.79922866821289, + "learning_rate": 3.8896572921469186e-05, + "log_odds_chosen": 7.798433303833008, + "log_odds_ratio": -0.15630565583705902, + "logits/chosen": -0.7631257772445679, + "logits/rejected": -0.7486634254455566, + "logps/chosen": -0.011932741850614548, + "logps/rejected": -1.5030312538146973, + "loss": 1.8688, + "nll_loss": 0.45157331228256226, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0011932742781937122, + "rewards/margins": 0.1491098701953888, + "rewards/rejected": -0.15030314028263092, + "step": 4336 + }, + { + "epoch": 2.999308437067773, + "grad_norm": 13.872130393981934, + "learning_rate": 3.8892730905179045e-05, + "log_odds_chosen": 8.419084548950195, + "log_odds_ratio": -0.0037066680379211903, + "logits/chosen": -0.9183025360107422, + "logits/rejected": -1.061065435409546, + "logps/chosen": -0.0020110062323510647, + "logps/rejected": -1.3740841150283813, + "loss": 3.0195, + "nll_loss": 0.7545135021209717, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002011006436077878, + "rewards/margins": 0.13720732927322388, + "rewards/rejected": -0.13740840554237366, + "step": 4337 + }, + { + "epoch": 3.0, + "grad_norm": 5.617066383361816, + "learning_rate": 3.888888888888889e-05, + "log_odds_chosen": 8.242136001586914, + "log_odds_ratio": -0.003207864472642541, + "logits/chosen": -0.576624870300293, + "logits/rejected": -0.5586822032928467, + "logps/chosen": -0.011922507546842098, + "logps/rejected": -1.576462984085083, + "loss": 1.8424, + "nll_loss": 0.4602872133255005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011922508710995317, + "rewards/margins": 0.15645405650138855, + "rewards/rejected": -0.1576462984085083, + "step": 4338 + }, + { + "epoch": 3.000691562932227, + "grad_norm": 7.336403846740723, + "learning_rate": 3.888504687259874e-05, + "log_odds_chosen": 8.288568496704102, + "log_odds_ratio": -0.002005348913371563, + "logits/chosen": -0.22258299589157104, + "logits/rejected": -0.23540785908699036, + "logps/chosen": -0.0008239853195846081, + "logps/rejected": -1.2276651859283447, + "loss": 1.6028, + "nll_loss": 0.4004961848258972, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.239853195846081e-05, + "rewards/margins": 0.12268412858247757, + "rewards/rejected": -0.12276651710271835, + "step": 4339 + }, + { + "epoch": 3.0013831258644537, + "grad_norm": 8.08159351348877, + "learning_rate": 3.8881204856308596e-05, + "log_odds_chosen": 8.504263877868652, + "log_odds_ratio": -0.0014581052819266915, + "logits/chosen": -0.4644441604614258, + "logits/rejected": -0.6355771422386169, + "logps/chosen": -0.001975719118490815, + "logps/rejected": -1.3220500946044922, + "loss": 1.7005, + "nll_loss": 0.4249787926673889, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019757190602831542, + "rewards/margins": 0.1320074498653412, + "rewards/rejected": -0.13220500946044922, + "step": 4340 + }, + { + "epoch": 3.0020746887966805, + "grad_norm": 9.502801895141602, + "learning_rate": 3.887736284001844e-05, + "log_odds_chosen": 8.841012954711914, + "log_odds_ratio": -0.0030967092607170343, + "logits/chosen": -0.8835254907608032, + "logits/rejected": -0.9606581926345825, + "logps/chosen": -0.01663133129477501, + "logps/rejected": -1.7187840938568115, + "loss": 1.8045, + "nll_loss": 0.45080313086509705, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016631331527605653, + "rewards/margins": 0.17021527886390686, + "rewards/rejected": -0.17187842726707458, + "step": 4341 + }, + { + "epoch": 3.0027662517289073, + "grad_norm": 7.053144454956055, + "learning_rate": 3.8873520823728294e-05, + "log_odds_chosen": 6.129289627075195, + "log_odds_ratio": -0.018381556496024132, + "logits/chosen": -0.45580482482910156, + "logits/rejected": -0.43981999158859253, + "logps/chosen": -0.02091745100915432, + "logps/rejected": -1.376305341720581, + "loss": 2.0763, + "nll_loss": 0.5172290802001953, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020917453803122044, + "rewards/margins": 0.13553880155086517, + "rewards/rejected": -0.13763055205345154, + "step": 4342 + }, + { + "epoch": 3.003457814661134, + "grad_norm": 6.612039089202881, + "learning_rate": 3.8869678807438146e-05, + "log_odds_chosen": 6.951261520385742, + "log_odds_ratio": -0.010566813871264458, + "logits/chosen": -0.4701387286186218, + "logits/rejected": -0.5493282675743103, + "logps/chosen": -0.008238430134952068, + "logps/rejected": -1.044950246810913, + "loss": 2.1988, + "nll_loss": 0.548647403717041, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008238430600613356, + "rewards/margins": 0.10367118567228317, + "rewards/rejected": -0.10449503362178802, + "step": 4343 + }, + { + "epoch": 3.004149377593361, + "grad_norm": 6.696752548217773, + "learning_rate": 3.8865836791148e-05, + "log_odds_chosen": 8.452653884887695, + "log_odds_ratio": -0.001230748021043837, + "logits/chosen": -0.7052797079086304, + "logits/rejected": -0.7413879632949829, + "logps/chosen": -0.0010859024478122592, + "logps/rejected": -1.1285423040390015, + "loss": 1.9587, + "nll_loss": 0.48956286907196045, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010859025496756658, + "rewards/margins": 0.11274563521146774, + "rewards/rejected": -0.1128542348742485, + "step": 4344 + }, + { + "epoch": 3.004840940525588, + "grad_norm": 8.277460098266602, + "learning_rate": 3.8861994774857844e-05, + "log_odds_chosen": 7.86977481842041, + "log_odds_ratio": -0.08035603165626526, + "logits/chosen": -0.5653095245361328, + "logits/rejected": -0.6719416975975037, + "logps/chosen": -0.02020619437098503, + "logps/rejected": -1.7089112997055054, + "loss": 1.8993, + "nll_loss": 0.46680140495300293, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020206195767968893, + "rewards/margins": 0.16887050867080688, + "rewards/rejected": -0.17089113593101501, + "step": 4345 + }, + { + "epoch": 3.0055325034578146, + "grad_norm": 17.090787887573242, + "learning_rate": 3.8858152758567704e-05, + "log_odds_chosen": 9.048724174499512, + "log_odds_ratio": -0.0008302384521812201, + "logits/chosen": -0.6417528986930847, + "logits/rejected": -0.7200732231140137, + "logps/chosen": -0.0013055673334747553, + "logps/rejected": -1.8107833862304688, + "loss": 2.562, + "nll_loss": 0.6404181122779846, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013055672752670944, + "rewards/margins": 0.18094778060913086, + "rewards/rejected": -0.18107834458351135, + "step": 4346 + }, + { + "epoch": 3.0062240663900415, + "grad_norm": 10.707474708557129, + "learning_rate": 3.885431074227755e-05, + "log_odds_chosen": 9.08885669708252, + "log_odds_ratio": -0.0011596616823226213, + "logits/chosen": -0.686644971370697, + "logits/rejected": -0.7940715551376343, + "logps/chosen": -0.009002113714814186, + "logps/rejected": -2.2833571434020996, + "loss": 1.6825, + "nll_loss": 0.42050090432167053, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000900211394764483, + "rewards/margins": 0.22743549942970276, + "rewards/rejected": -0.22833570837974548, + "step": 4347 + }, + { + "epoch": 3.0069156293222683, + "grad_norm": 13.651171684265137, + "learning_rate": 3.88504687259874e-05, + "log_odds_chosen": 9.122451782226562, + "log_odds_ratio": -0.00026590804918669164, + "logits/chosen": -0.4620441794395447, + "logits/rejected": -0.5854654312133789, + "logps/chosen": -0.0007560051744803786, + "logps/rejected": -1.7641313076019287, + "loss": 1.5304, + "nll_loss": 0.3825651705265045, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.560051744803786e-05, + "rewards/margins": 0.17633754014968872, + "rewards/rejected": -0.1764131486415863, + "step": 4348 + }, + { + "epoch": 3.007607192254495, + "grad_norm": 4.420802593231201, + "learning_rate": 3.8846626709697254e-05, + "log_odds_chosen": 8.020853042602539, + "log_odds_ratio": -0.008768648840487003, + "logits/chosen": -0.2722416818141937, + "logits/rejected": -0.3450675904750824, + "logps/chosen": -0.024905625730752945, + "logps/rejected": -1.2258330583572388, + "loss": 2.3368, + "nll_loss": 0.5833240747451782, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002490562153980136, + "rewards/margins": 0.12009275704622269, + "rewards/rejected": -0.1225833147764206, + "step": 4349 + }, + { + "epoch": 3.008298755186722, + "grad_norm": 8.147367477416992, + "learning_rate": 3.88427846934071e-05, + "log_odds_chosen": 8.38794994354248, + "log_odds_ratio": -0.0030633790884166956, + "logits/chosen": -0.9003145694732666, + "logits/rejected": -0.939216673374176, + "logps/chosen": -0.0036230748519301414, + "logps/rejected": -1.565699577331543, + "loss": 1.3701, + "nll_loss": 0.3422118127346039, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003623075026553124, + "rewards/margins": 0.15620765089988708, + "rewards/rejected": -0.1565699726343155, + "step": 4350 + }, + { + "epoch": 3.008990318118949, + "grad_norm": 7.668023109436035, + "learning_rate": 3.883894267711695e-05, + "log_odds_chosen": 8.312056541442871, + "log_odds_ratio": -0.0012021416332572699, + "logits/chosen": -0.7460612058639526, + "logits/rejected": -0.786266565322876, + "logps/chosen": -0.015112178400158882, + "logps/rejected": -1.3443372249603271, + "loss": 2.188, + "nll_loss": 0.5468809008598328, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015112179098650813, + "rewards/margins": 0.13292251527309418, + "rewards/rejected": -0.13443374633789062, + "step": 4351 + }, + { + "epoch": 3.0096818810511756, + "grad_norm": 12.11293888092041, + "learning_rate": 3.8835100660826805e-05, + "log_odds_chosen": 9.41283893585205, + "log_odds_ratio": -0.00010628172458382323, + "logits/chosen": -0.8179978132247925, + "logits/rejected": -0.8387307524681091, + "logps/chosen": -0.00039945554453879595, + "logps/rejected": -1.5601049661636353, + "loss": 1.4393, + "nll_loss": 0.3598126471042633, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9945560274645686e-05, + "rewards/margins": 0.15597054362297058, + "rewards/rejected": -0.1560104936361313, + "step": 4352 + }, + { + "epoch": 3.0103734439834025, + "grad_norm": 6.373059272766113, + "learning_rate": 3.883125864453666e-05, + "log_odds_chosen": 8.087642669677734, + "log_odds_ratio": -0.003798246616497636, + "logits/chosen": -0.5732825994491577, + "logits/rejected": -0.6371693015098572, + "logps/chosen": -0.006854540202766657, + "logps/rejected": -1.7817339897155762, + "loss": 1.4208, + "nll_loss": 0.35482344031333923, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006854540552012622, + "rewards/margins": 0.17748793959617615, + "rewards/rejected": -0.17817339301109314, + "step": 4353 + }, + { + "epoch": 3.0110650069156293, + "grad_norm": 6.119757652282715, + "learning_rate": 3.88274166282465e-05, + "log_odds_chosen": 10.328543663024902, + "log_odds_ratio": -6.733743794029579e-05, + "logits/chosen": -0.8365834355354309, + "logits/rejected": -0.8045994639396667, + "logps/chosen": -0.00036057105171494186, + "logps/rejected": -2.1830642223358154, + "loss": 1.7374, + "nll_loss": 0.43434974551200867, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.605710662668571e-05, + "rewards/margins": 0.21827037632465363, + "rewards/rejected": -0.21830643713474274, + "step": 4354 + }, + { + "epoch": 3.011756569847856, + "grad_norm": 7.220427513122559, + "learning_rate": 3.882357461195636e-05, + "log_odds_chosen": 8.390373229980469, + "log_odds_ratio": -0.010137408040463924, + "logits/chosen": -0.5291712880134583, + "logits/rejected": -0.6182175278663635, + "logps/chosen": -0.021362818777561188, + "logps/rejected": -1.3063123226165771, + "loss": 1.7929, + "nll_loss": 0.4471994936466217, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021362819243222475, + "rewards/margins": 0.12849494814872742, + "rewards/rejected": -0.1306312382221222, + "step": 4355 + }, + { + "epoch": 3.012448132780083, + "grad_norm": 10.587180137634277, + "learning_rate": 3.881973259566621e-05, + "log_odds_chosen": 9.672530174255371, + "log_odds_ratio": -0.00017483210831414908, + "logits/chosen": -0.6423888802528381, + "logits/rejected": -0.6911444664001465, + "logps/chosen": -0.0017972304485738277, + "logps/rejected": -2.239515781402588, + "loss": 1.8757, + "nll_loss": 0.4689146876335144, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017972305067814887, + "rewards/margins": 0.2237718403339386, + "rewards/rejected": -0.2239515781402588, + "step": 4356 + }, + { + "epoch": 3.0131396957123098, + "grad_norm": 10.566658973693848, + "learning_rate": 3.881589057937606e-05, + "log_odds_chosen": 9.284326553344727, + "log_odds_ratio": -0.0003429109347052872, + "logits/chosen": -0.8109344244003296, + "logits/rejected": -0.9047459363937378, + "logps/chosen": -0.0017176901455968618, + "logps/rejected": -1.703169584274292, + "loss": 2.3812, + "nll_loss": 0.5952752232551575, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017176903202198446, + "rewards/margins": 0.170145183801651, + "rewards/rejected": -0.17031696438789368, + "step": 4357 + }, + { + "epoch": 3.0138312586445366, + "grad_norm": 17.43169593811035, + "learning_rate": 3.881204856308591e-05, + "log_odds_chosen": 8.845995903015137, + "log_odds_ratio": -0.004415709525346756, + "logits/chosen": -0.5009146332740784, + "logits/rejected": -0.5920040607452393, + "logps/chosen": -0.014210812747478485, + "logps/rejected": -2.076862335205078, + "loss": 2.4603, + "nll_loss": 0.6146366596221924, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001421081367880106, + "rewards/margins": 0.2062651515007019, + "rewards/rejected": -0.20768624544143677, + "step": 4358 + }, + { + "epoch": 3.0145228215767634, + "grad_norm": 12.506994247436523, + "learning_rate": 3.880820654679576e-05, + "log_odds_chosen": 7.278022766113281, + "log_odds_ratio": -0.028671320527791977, + "logits/chosen": -0.5169048309326172, + "logits/rejected": -0.5569590330123901, + "logps/chosen": -0.008861766196787357, + "logps/rejected": -1.2220641374588013, + "loss": 2.141, + "nll_loss": 0.5323811769485474, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008861765963956714, + "rewards/margins": 0.12132023274898529, + "rewards/rejected": -0.12220640480518341, + "step": 4359 + }, + { + "epoch": 3.0152143845089903, + "grad_norm": 9.292475700378418, + "learning_rate": 3.880436453050561e-05, + "log_odds_chosen": 8.517985343933105, + "log_odds_ratio": -0.0006469730869866908, + "logits/chosen": -0.5386276245117188, + "logits/rejected": -0.565827488899231, + "logps/chosen": -0.002869624411687255, + "logps/rejected": -1.2151039838790894, + "loss": 1.6565, + "nll_loss": 0.41406431794166565, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00028696245863102376, + "rewards/margins": 0.12122343480587006, + "rewards/rejected": -0.12151038646697998, + "step": 4360 + }, + { + "epoch": 3.015905947441217, + "grad_norm": 9.588244438171387, + "learning_rate": 3.8800522514215456e-05, + "log_odds_chosen": 9.962486267089844, + "log_odds_ratio": -0.000979436095803976, + "logits/chosen": -0.4506533741950989, + "logits/rejected": -0.4827750325202942, + "logps/chosen": -0.03247072920203209, + "logps/rejected": -2.2011234760284424, + "loss": 1.5661, + "nll_loss": 0.3914303481578827, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0032470731530338526, + "rewards/margins": 0.21686527132987976, + "rewards/rejected": -0.22011235356330872, + "step": 4361 + }, + { + "epoch": 3.016597510373444, + "grad_norm": 8.233197212219238, + "learning_rate": 3.8796680497925316e-05, + "log_odds_chosen": 7.316631317138672, + "log_odds_ratio": -0.021387575194239616, + "logits/chosen": -0.6702169179916382, + "logits/rejected": -0.7465333938598633, + "logps/chosen": -0.02479386515915394, + "logps/rejected": -1.9944210052490234, + "loss": 1.6044, + "nll_loss": 0.39896106719970703, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00247938628308475, + "rewards/margins": 0.19696269929409027, + "rewards/rejected": -0.1994420886039734, + "step": 4362 + }, + { + "epoch": 3.0172890733056708, + "grad_norm": 7.379128456115723, + "learning_rate": 3.879283848163516e-05, + "log_odds_chosen": 8.85990047454834, + "log_odds_ratio": -0.003634555032476783, + "logits/chosen": -0.5511636734008789, + "logits/rejected": -0.5116143226623535, + "logps/chosen": -0.016325172036886215, + "logps/rejected": -1.544255256652832, + "loss": 1.1823, + "nll_loss": 0.295213907957077, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016325172036886215, + "rewards/margins": 0.1527930200099945, + "rewards/rejected": -0.15442554652690887, + "step": 4363 + }, + { + "epoch": 3.0179806362378976, + "grad_norm": 5.7828803062438965, + "learning_rate": 3.8788996465345014e-05, + "log_odds_chosen": 6.473647117614746, + "log_odds_ratio": -0.01633561961352825, + "logits/chosen": -0.6352428197860718, + "logits/rejected": -0.698454737663269, + "logps/chosen": -0.008926715701818466, + "logps/rejected": -1.0296143293380737, + "loss": 1.1327, + "nll_loss": 0.2815358638763428, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008926716400310397, + "rewards/margins": 0.10206875950098038, + "rewards/rejected": -0.10296143591403961, + "step": 4364 + }, + { + "epoch": 3.0186721991701244, + "grad_norm": 6.8958659172058105, + "learning_rate": 3.8785154449054866e-05, + "log_odds_chosen": 6.793898105621338, + "log_odds_ratio": -0.013819929212331772, + "logits/chosen": -0.07012942433357239, + "logits/rejected": -0.07956613600254059, + "logps/chosen": -0.00711780646815896, + "logps/rejected": -0.6190686225891113, + "loss": 1.1591, + "nll_loss": 0.2883892059326172, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007117806235328317, + "rewards/margins": 0.06119508296251297, + "rewards/rejected": -0.061906859278678894, + "step": 4365 + }, + { + "epoch": 3.0193637621023512, + "grad_norm": 6.910161972045898, + "learning_rate": 3.878131243276472e-05, + "log_odds_chosen": 7.337653160095215, + "log_odds_ratio": -0.14091874659061432, + "logits/chosen": -0.6717323064804077, + "logits/rejected": -0.6785844564437866, + "logps/chosen": -0.034647684544324875, + "logps/rejected": -1.0071440935134888, + "loss": 1.4795, + "nll_loss": 0.35579484701156616, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0034647686406970024, + "rewards/margins": 0.09724964201450348, + "rewards/rejected": -0.10071440786123276, + "step": 4366 + }, + { + "epoch": 3.020055325034578, + "grad_norm": 8.759042739868164, + "learning_rate": 3.8777470416474564e-05, + "log_odds_chosen": 7.364710807800293, + "log_odds_ratio": -0.22978220880031586, + "logits/chosen": -0.8016922473907471, + "logits/rejected": -0.8354951739311218, + "logps/chosen": -0.035079024732112885, + "logps/rejected": -1.2885403633117676, + "loss": 2.256, + "nll_loss": 0.541018545627594, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003507902380079031, + "rewards/margins": 0.12534615397453308, + "rewards/rejected": -0.12885405123233795, + "step": 4367 + }, + { + "epoch": 3.020746887966805, + "grad_norm": 7.147649765014648, + "learning_rate": 3.877362840018442e-05, + "log_odds_chosen": 8.82288932800293, + "log_odds_ratio": -0.0035906489938497543, + "logits/chosen": -0.41625678539276123, + "logits/rejected": -0.5004568099975586, + "logps/chosen": -0.008884168229997158, + "logps/rejected": -2.025074005126953, + "loss": 1.6076, + "nll_loss": 0.4015321135520935, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008884167764335871, + "rewards/margins": 0.2016189694404602, + "rewards/rejected": -0.2025073915719986, + "step": 4368 + }, + { + "epoch": 3.0214384508990317, + "grad_norm": 11.277046203613281, + "learning_rate": 3.876978638389427e-05, + "log_odds_chosen": 9.12707805633545, + "log_odds_ratio": -0.004158929456025362, + "logits/chosen": -0.5607892274856567, + "logits/rejected": -0.578012228012085, + "logps/chosen": -0.0050364332273602486, + "logps/rejected": -1.462164282798767, + "loss": 1.7723, + "nll_loss": 0.44266659021377563, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005036432994529605, + "rewards/margins": 0.14571279287338257, + "rewards/rejected": -0.14621643722057343, + "step": 4369 + }, + { + "epoch": 3.0221300138312586, + "grad_norm": 10.114571571350098, + "learning_rate": 3.8765944367604115e-05, + "log_odds_chosen": 7.488341808319092, + "log_odds_ratio": -0.2522977590560913, + "logits/chosen": -0.7943065762519836, + "logits/rejected": -0.812098503112793, + "logps/chosen": -0.03343523293733597, + "logps/rejected": -1.2078806161880493, + "loss": 2.4229, + "nll_loss": 0.5804873704910278, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0033435234799981117, + "rewards/margins": 0.11744453012943268, + "rewards/rejected": -0.12078805267810822, + "step": 4370 + }, + { + "epoch": 3.0228215767634854, + "grad_norm": 8.294888496398926, + "learning_rate": 3.8762102351313974e-05, + "log_odds_chosen": 8.21873664855957, + "log_odds_ratio": -0.0010850150138139725, + "logits/chosen": -0.38183748722076416, + "logits/rejected": -0.4458293914794922, + "logps/chosen": -0.0054486412554979324, + "logps/rejected": -1.4044756889343262, + "loss": 1.8935, + "nll_loss": 0.4732619524002075, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005448641604743898, + "rewards/margins": 0.13990271091461182, + "rewards/rejected": -0.14044757187366486, + "step": 4371 + }, + { + "epoch": 3.0235131396957122, + "grad_norm": 9.032965660095215, + "learning_rate": 3.875826033502382e-05, + "log_odds_chosen": 9.557016372680664, + "log_odds_ratio": -0.00015933552640490234, + "logits/chosen": -0.6070985198020935, + "logits/rejected": -0.653249204158783, + "logps/chosen": -0.0006065353518351912, + "logps/rejected": -2.034036159515381, + "loss": 1.3604, + "nll_loss": 0.3400716185569763, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.06535431870725e-05, + "rewards/margins": 0.2033429592847824, + "rewards/rejected": -0.20340359210968018, + "step": 4372 + }, + { + "epoch": 3.024204702627939, + "grad_norm": 9.028538703918457, + "learning_rate": 3.875441831873367e-05, + "log_odds_chosen": 9.122668266296387, + "log_odds_ratio": -0.013028179295361042, + "logits/chosen": -0.01854856312274933, + "logits/rejected": -0.10948525369167328, + "logps/chosen": -0.014808842912316322, + "logps/rejected": -1.9937236309051514, + "loss": 1.8602, + "nll_loss": 0.46375733613967896, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001480884151533246, + "rewards/margins": 0.1978914886713028, + "rewards/rejected": -0.19937236607074738, + "step": 4373 + }, + { + "epoch": 3.024896265560166, + "grad_norm": 8.094748497009277, + "learning_rate": 3.8750576302443524e-05, + "log_odds_chosen": 8.073196411132812, + "log_odds_ratio": -0.07655221968889236, + "logits/chosen": -0.5587270855903625, + "logits/rejected": -0.5245088338851929, + "logps/chosen": -0.028404507786035538, + "logps/rejected": -1.8108826875686646, + "loss": 1.9002, + "nll_loss": 0.4673946499824524, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0028404509648680687, + "rewards/margins": 0.17824780941009521, + "rewards/rejected": -0.18108826875686646, + "step": 4374 + }, + { + "epoch": 3.0255878284923927, + "grad_norm": 5.171634674072266, + "learning_rate": 3.874673428615338e-05, + "log_odds_chosen": 7.644787311553955, + "log_odds_ratio": -0.06222674623131752, + "logits/chosen": -0.5618513822555542, + "logits/rejected": -0.6038928031921387, + "logps/chosen": -0.03759719431400299, + "logps/rejected": -1.9660907983779907, + "loss": 1.4441, + "nll_loss": 0.35479259490966797, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0037597191985696554, + "rewards/margins": 0.19284936785697937, + "rewards/rejected": -0.19660907983779907, + "step": 4375 + }, + { + "epoch": 3.0262793914246195, + "grad_norm": 7.60352087020874, + "learning_rate": 3.874289226986322e-05, + "log_odds_chosen": 7.690865993499756, + "log_odds_ratio": -0.005768823437392712, + "logits/chosen": -0.6503806710243225, + "logits/rejected": -0.5802209973335266, + "logps/chosen": -0.012286531738936901, + "logps/rejected": -1.5931379795074463, + "loss": 1.3203, + "nll_loss": 0.3294871747493744, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012286532437428832, + "rewards/margins": 0.15808513760566711, + "rewards/rejected": -0.15931379795074463, + "step": 4376 + }, + { + "epoch": 3.0269709543568464, + "grad_norm": 9.345633506774902, + "learning_rate": 3.8739050253573075e-05, + "log_odds_chosen": 8.948345184326172, + "log_odds_ratio": -0.0007345854537561536, + "logits/chosen": -0.46991199254989624, + "logits/rejected": -0.5386595129966736, + "logps/chosen": -0.018476814031600952, + "logps/rejected": -2.32340407371521, + "loss": 1.6296, + "nll_loss": 0.4073340892791748, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018476813565939665, + "rewards/margins": 0.2304927259683609, + "rewards/rejected": -0.23234039545059204, + "step": 4377 + }, + { + "epoch": 3.027662517289073, + "grad_norm": 6.002345561981201, + "learning_rate": 3.873520823728293e-05, + "log_odds_chosen": 9.55575942993164, + "log_odds_ratio": -0.0006717491778545082, + "logits/chosen": -0.5274007320404053, + "logits/rejected": -0.5790569186210632, + "logps/chosen": -0.020701147615909576, + "logps/rejected": -2.3770816326141357, + "loss": 1.5081, + "nll_loss": 0.37695783376693726, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020701151806861162, + "rewards/margins": 0.23563805222511292, + "rewards/rejected": -0.237708181142807, + "step": 4378 + }, + { + "epoch": 3.0283540802213, + "grad_norm": 8.811324119567871, + "learning_rate": 3.873136622099277e-05, + "log_odds_chosen": 9.221470832824707, + "log_odds_ratio": -0.010890920646488667, + "logits/chosen": -0.4534289836883545, + "logits/rejected": -0.5714493989944458, + "logps/chosen": -0.01702927052974701, + "logps/rejected": -2.3649582862854004, + "loss": 1.7555, + "nll_loss": 0.4377870261669159, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017029274022206664, + "rewards/margins": 0.23479288816452026, + "rewards/rejected": -0.23649582266807556, + "step": 4379 + }, + { + "epoch": 3.029045643153527, + "grad_norm": 8.119233131408691, + "learning_rate": 3.872752420470263e-05, + "log_odds_chosen": 8.403648376464844, + "log_odds_ratio": -0.0031433827243745327, + "logits/chosen": -0.6514449119567871, + "logits/rejected": -0.6899955868721008, + "logps/chosen": -0.06951490044593811, + "logps/rejected": -2.216085433959961, + "loss": 1.6456, + "nll_loss": 0.41109544038772583, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006951490417122841, + "rewards/margins": 0.2146570384502411, + "rewards/rejected": -0.22160851955413818, + "step": 4380 + }, + { + "epoch": 3.0297372060857537, + "grad_norm": 4.929358959197998, + "learning_rate": 3.872368218841248e-05, + "log_odds_chosen": 8.164472579956055, + "log_odds_ratio": -0.0010731443762779236, + "logits/chosen": -0.4625300168991089, + "logits/rejected": -0.4479852318763733, + "logps/chosen": -0.02008689194917679, + "logps/rejected": -1.1178135871887207, + "loss": 1.9998, + "nll_loss": 0.49984341859817505, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020086888689547777, + "rewards/margins": 0.10977266728878021, + "rewards/rejected": -0.11178135126829147, + "step": 4381 + }, + { + "epoch": 3.0304287690179805, + "grad_norm": 15.727904319763184, + "learning_rate": 3.871984017212233e-05, + "log_odds_chosen": 8.262201309204102, + "log_odds_ratio": -0.046532727777957916, + "logits/chosen": -0.38712355494499207, + "logits/rejected": -0.37175098061561584, + "logps/chosen": -0.01136075146496296, + "logps/rejected": -1.7719151973724365, + "loss": 2.4988, + "nll_loss": 0.6200357675552368, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011360751232132316, + "rewards/margins": 0.176055446267128, + "rewards/rejected": -0.17719152569770813, + "step": 4382 + }, + { + "epoch": 3.0311203319502074, + "grad_norm": 7.210832595825195, + "learning_rate": 3.871599815583218e-05, + "log_odds_chosen": 8.842565536499023, + "log_odds_ratio": -0.024006277322769165, + "logits/chosen": -0.667304277420044, + "logits/rejected": -0.7376624941825867, + "logps/chosen": -0.009047461673617363, + "logps/rejected": -1.5637233257293701, + "loss": 1.5166, + "nll_loss": 0.37674498558044434, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009047460625879467, + "rewards/margins": 0.15546758472919464, + "rewards/rejected": -0.1563723385334015, + "step": 4383 + }, + { + "epoch": 3.031811894882434, + "grad_norm": 15.053751945495605, + "learning_rate": 3.8712156139542035e-05, + "log_odds_chosen": 9.234689712524414, + "log_odds_ratio": -0.00016180895909201354, + "logits/chosen": -0.6031422019004822, + "logits/rejected": -0.6600396037101746, + "logps/chosen": -0.0003273483016528189, + "logps/rejected": -1.3932242393493652, + "loss": 2.0128, + "nll_loss": 0.5031747221946716, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2734831620473415e-05, + "rewards/margins": 0.13928969204425812, + "rewards/rejected": -0.139322429895401, + "step": 4384 + }, + { + "epoch": 3.032503457814661, + "grad_norm": 8.344961166381836, + "learning_rate": 3.870831412325188e-05, + "log_odds_chosen": 8.851398468017578, + "log_odds_ratio": -0.027447307482361794, + "logits/chosen": -0.4169967770576477, + "logits/rejected": -0.455474317073822, + "logps/chosen": -0.030304603278636932, + "logps/rejected": -1.9393913745880127, + "loss": 1.1909, + "nll_loss": 0.29497459530830383, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0030304603278636932, + "rewards/margins": 0.19090867042541504, + "rewards/rejected": -0.19393913447856903, + "step": 4385 + }, + { + "epoch": 3.033195020746888, + "grad_norm": 10.690800666809082, + "learning_rate": 3.8704472106961733e-05, + "log_odds_chosen": 7.3425164222717285, + "log_odds_ratio": -0.03778545558452606, + "logits/chosen": -0.6645103693008423, + "logits/rejected": -0.7565867900848389, + "logps/chosen": -0.028321033343672752, + "logps/rejected": -1.0251092910766602, + "loss": 2.4422, + "nll_loss": 0.6067838072776794, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0028321032878011465, + "rewards/margins": 0.09967882931232452, + "rewards/rejected": -0.10251092910766602, + "step": 4386 + }, + { + "epoch": 3.0338865836791147, + "grad_norm": 12.634204864501953, + "learning_rate": 3.8700630090671586e-05, + "log_odds_chosen": 8.964178085327148, + "log_odds_ratio": -0.0009216453763656318, + "logits/chosen": -1.0691150426864624, + "logits/rejected": -1.1286835670471191, + "logps/chosen": -0.0009471324156038463, + "logps/rejected": -1.5427871942520142, + "loss": 2.3683, + "nll_loss": 0.5919942259788513, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.471323573961854e-05, + "rewards/margins": 0.1541840136051178, + "rewards/rejected": -0.1542787253856659, + "step": 4387 + }, + { + "epoch": 3.0345781466113415, + "grad_norm": 7.034060955047607, + "learning_rate": 3.869678807438143e-05, + "log_odds_chosen": 8.673445701599121, + "log_odds_ratio": -0.03487813100218773, + "logits/chosen": -0.5235074758529663, + "logits/rejected": -0.5503664612770081, + "logps/chosen": -0.013425452634692192, + "logps/rejected": -1.4154572486877441, + "loss": 1.669, + "nll_loss": 0.41377225518226624, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001342545379884541, + "rewards/margins": 0.14020317792892456, + "rewards/rejected": -0.14154571294784546, + "step": 4388 + }, + { + "epoch": 3.0352697095435683, + "grad_norm": 11.320638656616211, + "learning_rate": 3.869294605809129e-05, + "log_odds_chosen": 7.365285873413086, + "log_odds_ratio": -0.1156717836856842, + "logits/chosen": -0.9415791034698486, + "logits/rejected": -0.9529377222061157, + "logps/chosen": -0.031220735982060432, + "logps/rejected": -1.4222886562347412, + "loss": 3.6534, + "nll_loss": 0.9017861485481262, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0031220735982060432, + "rewards/margins": 0.13910678029060364, + "rewards/rejected": -0.1422288715839386, + "step": 4389 + }, + { + "epoch": 3.035961272475795, + "grad_norm": 11.250693321228027, + "learning_rate": 3.8689104041801136e-05, + "log_odds_chosen": 10.075836181640625, + "log_odds_ratio": -0.00014684397319797426, + "logits/chosen": -0.6689065098762512, + "logits/rejected": -0.7600905895233154, + "logps/chosen": -0.0005376600893214345, + "logps/rejected": -2.088433265686035, + "loss": 1.5687, + "nll_loss": 0.39217111468315125, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.376600893214345e-05, + "rewards/margins": 0.20878957211971283, + "rewards/rejected": -0.20884335041046143, + "step": 4390 + }, + { + "epoch": 3.036652835408022, + "grad_norm": 6.093531608581543, + "learning_rate": 3.868526202551099e-05, + "log_odds_chosen": 7.18631649017334, + "log_odds_ratio": -0.03731034696102142, + "logits/chosen": -0.5766352415084839, + "logits/rejected": -0.6117807626724243, + "logps/chosen": -0.022443819791078568, + "logps/rejected": -1.7518061399459839, + "loss": 2.0163, + "nll_loss": 0.5003336668014526, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0022443821653723717, + "rewards/margins": 0.17293623089790344, + "rewards/rejected": -0.1751806139945984, + "step": 4391 + }, + { + "epoch": 3.037344398340249, + "grad_norm": 8.917433738708496, + "learning_rate": 3.868142000922084e-05, + "log_odds_chosen": 9.447513580322266, + "log_odds_ratio": -0.0014931621262803674, + "logits/chosen": -0.3905342221260071, + "logits/rejected": -0.39794474840164185, + "logps/chosen": -0.015030551701784134, + "logps/rejected": -1.5086308717727661, + "loss": 1.3014, + "nll_loss": 0.32520678639411926, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015030552167445421, + "rewards/margins": 0.1493600308895111, + "rewards/rejected": -0.15086308121681213, + "step": 4392 + }, + { + "epoch": 3.0380359612724757, + "grad_norm": 7.268914222717285, + "learning_rate": 3.8677577992930694e-05, + "log_odds_chosen": 7.071560859680176, + "log_odds_ratio": -0.10249079018831253, + "logits/chosen": -0.7327741384506226, + "logits/rejected": -0.772663414478302, + "logps/chosen": -0.01829216629266739, + "logps/rejected": -1.123624563217163, + "loss": 1.1595, + "nll_loss": 0.2796328663825989, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0018292168388143182, + "rewards/margins": 0.11053324490785599, + "rewards/rejected": -0.11236246675252914, + "step": 4393 + }, + { + "epoch": 3.0387275242047025, + "grad_norm": 9.540959358215332, + "learning_rate": 3.867373597664054e-05, + "log_odds_chosen": 7.865304946899414, + "log_odds_ratio": -0.01287818793207407, + "logits/chosen": -0.20857594907283783, + "logits/rejected": -0.26517733931541443, + "logps/chosen": -0.005776575766503811, + "logps/rejected": -1.1927257776260376, + "loss": 1.668, + "nll_loss": 0.41570839285850525, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005776575417257845, + "rewards/margins": 0.11869492381811142, + "rewards/rejected": -0.11927258223295212, + "step": 4394 + }, + { + "epoch": 3.0394190871369293, + "grad_norm": 11.04859447479248, + "learning_rate": 3.866989396035039e-05, + "log_odds_chosen": 8.835795402526855, + "log_odds_ratio": -0.0005848797736689448, + "logits/chosen": -0.9367823004722595, + "logits/rejected": -0.9795846939086914, + "logps/chosen": -0.0011461263056844473, + "logps/rejected": -1.7380290031433105, + "loss": 1.6836, + "nll_loss": 0.4208444654941559, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011461263056844473, + "rewards/margins": 0.17368827760219574, + "rewards/rejected": -0.17380289733409882, + "step": 4395 + }, + { + "epoch": 3.040110650069156, + "grad_norm": 8.23969554901123, + "learning_rate": 3.8666051944060244e-05, + "log_odds_chosen": 8.328208923339844, + "log_odds_ratio": -0.09206652641296387, + "logits/chosen": -0.5694941282272339, + "logits/rejected": -0.6927324533462524, + "logps/chosen": -0.019737619906663895, + "logps/rejected": -1.4263737201690674, + "loss": 1.2315, + "nll_loss": 0.298658162355423, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0019737619441002607, + "rewards/margins": 0.14066360890865326, + "rewards/rejected": -0.14263737201690674, + "step": 4396 + }, + { + "epoch": 3.040802213001383, + "grad_norm": 7.260254383087158, + "learning_rate": 3.866220992777009e-05, + "log_odds_chosen": 4.9062933921813965, + "log_odds_ratio": -0.148776113986969, + "logits/chosen": 0.028328221291303635, + "logits/rejected": -0.10123750567436218, + "logps/chosen": -0.11511752009391785, + "logps/rejected": -1.450113296508789, + "loss": 1.7417, + "nll_loss": 0.42053550481796265, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011511752381920815, + "rewards/margins": 0.13349959254264832, + "rewards/rejected": -0.14501133561134338, + "step": 4397 + }, + { + "epoch": 3.04149377593361, + "grad_norm": 9.752484321594238, + "learning_rate": 3.865836791147995e-05, + "log_odds_chosen": 9.574945449829102, + "log_odds_ratio": -0.00010855550499400124, + "logits/chosen": -0.7012908458709717, + "logits/rejected": -0.826108992099762, + "logps/chosen": -0.00035735705750994384, + "logps/rejected": -1.6022120714187622, + "loss": 2.4012, + "nll_loss": 0.6002871990203857, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5735705750994384e-05, + "rewards/margins": 0.16018547117710114, + "rewards/rejected": -0.16022121906280518, + "step": 4398 + }, + { + "epoch": 3.0421853388658366, + "grad_norm": 9.122933387756348, + "learning_rate": 3.8654525895189795e-05, + "log_odds_chosen": 9.139860153198242, + "log_odds_ratio": -0.0013590446906164289, + "logits/chosen": -0.5273385643959045, + "logits/rejected": -0.6435430645942688, + "logps/chosen": -0.0033525507897138596, + "logps/rejected": -2.115792751312256, + "loss": 1.7969, + "nll_loss": 0.44908446073532104, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00033525508479215205, + "rewards/margins": 0.21124403178691864, + "rewards/rejected": -0.21157927811145782, + "step": 4399 + }, + { + "epoch": 3.0428769017980635, + "grad_norm": 9.778103828430176, + "learning_rate": 3.865068387889965e-05, + "log_odds_chosen": 9.495455741882324, + "log_odds_ratio": -0.0002024644345510751, + "logits/chosen": -0.9759871959686279, + "logits/rejected": -0.9766150712966919, + "logps/chosen": -0.0011281885672360659, + "logps/rejected": -1.9828314781188965, + "loss": 1.5857, + "nll_loss": 0.39639735221862793, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011281885235803202, + "rewards/margins": 0.19817033410072327, + "rewards/rejected": -0.1982831358909607, + "step": 4400 + }, + { + "epoch": 3.0435684647302903, + "grad_norm": 6.08988618850708, + "learning_rate": 3.86468418626095e-05, + "log_odds_chosen": 7.714538097381592, + "log_odds_ratio": -0.015934668481349945, + "logits/chosen": -0.5180322527885437, + "logits/rejected": -0.47100329399108887, + "logps/chosen": -0.005107459146529436, + "logps/rejected": -0.8941366672515869, + "loss": 1.3783, + "nll_loss": 0.34297019243240356, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005107459728606045, + "rewards/margins": 0.08890292048454285, + "rewards/rejected": -0.08941367268562317, + "step": 4401 + }, + { + "epoch": 3.044260027662517, + "grad_norm": 8.789294242858887, + "learning_rate": 3.864299984631935e-05, + "log_odds_chosen": 9.35297966003418, + "log_odds_ratio": -0.00039945071330294013, + "logits/chosen": -0.43555620312690735, + "logits/rejected": -0.5225505828857422, + "logps/chosen": -0.008832286112010479, + "logps/rejected": -2.5036051273345947, + "loss": 1.5038, + "nll_loss": 0.3759007751941681, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008832286112010479, + "rewards/margins": 0.2494772970676422, + "rewards/rejected": -0.25036054849624634, + "step": 4402 + }, + { + "epoch": 3.044951590594744, + "grad_norm": 8.463292121887207, + "learning_rate": 3.86391578300292e-05, + "log_odds_chosen": 9.686382293701172, + "log_odds_ratio": -0.00024558964651077986, + "logits/chosen": -0.31426572799682617, + "logits/rejected": -0.4136694073677063, + "logps/chosen": -0.017539246007800102, + "logps/rejected": -2.283996105194092, + "loss": 2.083, + "nll_loss": 0.5207360982894897, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017539247637614608, + "rewards/margins": 0.22664567828178406, + "rewards/rejected": -0.2283996194601059, + "step": 4403 + }, + { + "epoch": 3.045643153526971, + "grad_norm": 7.6976318359375, + "learning_rate": 3.863531581373905e-05, + "log_odds_chosen": 9.104455947875977, + "log_odds_ratio": -0.0006909758085384965, + "logits/chosen": -0.6387878656387329, + "logits/rejected": -0.6774571537971497, + "logps/chosen": -0.006374065764248371, + "logps/rejected": -2.3529205322265625, + "loss": 1.3831, + "nll_loss": 0.34571653604507446, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006374065997079015, + "rewards/margins": 0.23465465009212494, + "rewards/rejected": -0.23529204726219177, + "step": 4404 + }, + { + "epoch": 3.0463347164591976, + "grad_norm": 8.895613670349121, + "learning_rate": 3.86314737974489e-05, + "log_odds_chosen": 9.99482536315918, + "log_odds_ratio": -0.00017433454922866076, + "logits/chosen": -0.6106169819831848, + "logits/rejected": -0.6616973876953125, + "logps/chosen": -0.008926170878112316, + "logps/rejected": -2.503861427307129, + "loss": 1.3169, + "nll_loss": 0.32919979095458984, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008926171576604247, + "rewards/margins": 0.24949350953102112, + "rewards/rejected": -0.25038614869117737, + "step": 4405 + }, + { + "epoch": 3.0470262793914245, + "grad_norm": 13.473219871520996, + "learning_rate": 3.862763178115875e-05, + "log_odds_chosen": 5.710055351257324, + "log_odds_ratio": -0.2638789713382721, + "logits/chosen": -0.42658939957618713, + "logits/rejected": -0.46941259503364563, + "logps/chosen": -0.05069947615265846, + "logps/rejected": -1.32853364944458, + "loss": 2.2273, + "nll_loss": 0.5304248332977295, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005069947801530361, + "rewards/margins": 0.1277834177017212, + "rewards/rejected": -0.13285337388515472, + "step": 4406 + }, + { + "epoch": 3.0477178423236513, + "grad_norm": 11.018197059631348, + "learning_rate": 3.862378976486861e-05, + "log_odds_chosen": 9.014132499694824, + "log_odds_ratio": -0.0004934677272103727, + "logits/chosen": -0.33915597200393677, + "logits/rejected": -0.4539306163787842, + "logps/chosen": -0.0013354574330151081, + "logps/rejected": -1.8095229864120483, + "loss": 1.4394, + "nll_loss": 0.35978925228118896, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001335457491222769, + "rewards/margins": 0.18081875145435333, + "rewards/rejected": -0.1809522956609726, + "step": 4407 + }, + { + "epoch": 3.048409405255878, + "grad_norm": 8.081323623657227, + "learning_rate": 3.861994774857845e-05, + "log_odds_chosen": 9.20877742767334, + "log_odds_ratio": -0.00035140541149303317, + "logits/chosen": -0.569820761680603, + "logits/rejected": -0.685656726360321, + "logps/chosen": -0.001261628814972937, + "logps/rejected": -1.6668808460235596, + "loss": 1.6334, + "nll_loss": 0.4083223342895508, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012616287858691067, + "rewards/margins": 0.16656193137168884, + "rewards/rejected": -0.16668808460235596, + "step": 4408 + }, + { + "epoch": 3.049100968188105, + "grad_norm": 13.364263534545898, + "learning_rate": 3.8616105732288306e-05, + "log_odds_chosen": 9.914350509643555, + "log_odds_ratio": -0.0038212628569453955, + "logits/chosen": -0.30686965584754944, + "logits/rejected": -0.433027446269989, + "logps/chosen": -0.007338955998420715, + "logps/rejected": -2.2491300106048584, + "loss": 1.7598, + "nll_loss": 0.43957746028900146, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007338955765590072, + "rewards/margins": 0.22417908906936646, + "rewards/rejected": -0.22491300106048584, + "step": 4409 + }, + { + "epoch": 3.0497925311203318, + "grad_norm": 6.702042102813721, + "learning_rate": 3.861226371599816e-05, + "log_odds_chosen": 7.66822624206543, + "log_odds_ratio": -0.03534835949540138, + "logits/chosen": -0.4316217303276062, + "logits/rejected": -0.48696446418762207, + "logps/chosen": -0.04215482994914055, + "logps/rejected": -1.7758280038833618, + "loss": 1.3272, + "nll_loss": 0.3282645344734192, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004215483088046312, + "rewards/margins": 0.17336732149124146, + "rewards/rejected": -0.17758280038833618, + "step": 4410 + }, + { + "epoch": 3.0504840940525586, + "grad_norm": 7.669111251831055, + "learning_rate": 3.860842169970801e-05, + "log_odds_chosen": 8.917407989501953, + "log_odds_ratio": -0.00028047675732523203, + "logits/chosen": -0.3156546354293823, + "logits/rejected": -0.37852245569229126, + "logps/chosen": -0.0016239421674981713, + "logps/rejected": -1.795964241027832, + "loss": 1.3518, + "nll_loss": 0.3379298448562622, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016239422257058322, + "rewards/margins": 0.17943403124809265, + "rewards/rejected": -0.1795964241027832, + "step": 4411 + }, + { + "epoch": 3.0511756569847854, + "grad_norm": 12.863265991210938, + "learning_rate": 3.8604579683417856e-05, + "log_odds_chosen": 8.214728355407715, + "log_odds_ratio": -0.001312942593358457, + "logits/chosen": -0.463234543800354, + "logits/rejected": -0.5421000719070435, + "logps/chosen": -0.0032501136884093285, + "logps/rejected": -1.4576647281646729, + "loss": 1.573, + "nll_loss": 0.3931177854537964, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00032501138048246503, + "rewards/margins": 0.145441472530365, + "rewards/rejected": -0.1457664966583252, + "step": 4412 + }, + { + "epoch": 3.0518672199170123, + "grad_norm": 12.542684555053711, + "learning_rate": 3.860073766712771e-05, + "log_odds_chosen": 9.891372680664062, + "log_odds_ratio": -0.0006059879087843001, + "logits/chosen": -0.6966993808746338, + "logits/rejected": -0.69878089427948, + "logps/chosen": -0.0019379917066544294, + "logps/rejected": -2.054600238800049, + "loss": 1.5864, + "nll_loss": 0.39654040336608887, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019379917648620903, + "rewards/margins": 0.20526623725891113, + "rewards/rejected": -0.20546004176139832, + "step": 4413 + }, + { + "epoch": 3.052558782849239, + "grad_norm": 14.597355842590332, + "learning_rate": 3.859689565083756e-05, + "log_odds_chosen": 9.579411506652832, + "log_odds_ratio": -0.0002695823786780238, + "logits/chosen": -0.38167479634284973, + "logits/rejected": -0.4428882598876953, + "logps/chosen": -0.0002935394586529583, + "logps/rejected": -1.5735745429992676, + "loss": 1.9326, + "nll_loss": 0.4831179082393646, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.935394695668947e-05, + "rewards/margins": 0.15732811391353607, + "rewards/rejected": -0.15735746920108795, + "step": 4414 + }, + { + "epoch": 3.053250345781466, + "grad_norm": 7.0043559074401855, + "learning_rate": 3.8593053634547414e-05, + "log_odds_chosen": 9.005176544189453, + "log_odds_ratio": -0.04333890229463577, + "logits/chosen": -0.7164819836616516, + "logits/rejected": -0.7455224394798279, + "logps/chosen": -0.01237468235194683, + "logps/rejected": -2.378635883331299, + "loss": 1.8281, + "nll_loss": 0.45269614458084106, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012374682119116187, + "rewards/margins": 0.23662608861923218, + "rewards/rejected": -0.23786357045173645, + "step": 4415 + }, + { + "epoch": 3.0539419087136928, + "grad_norm": 6.635776996612549, + "learning_rate": 3.8589211618257266e-05, + "log_odds_chosen": 9.555427551269531, + "log_odds_ratio": -0.0004285011673346162, + "logits/chosen": -0.5694887042045593, + "logits/rejected": -0.6285260915756226, + "logps/chosen": -0.0004087548586539924, + "logps/rejected": -1.590221643447876, + "loss": 1.5202, + "nll_loss": 0.3800092339515686, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.087548586539924e-05, + "rewards/margins": 0.1589812934398651, + "rewards/rejected": -0.15902216732501984, + "step": 4416 + }, + { + "epoch": 3.0546334716459196, + "grad_norm": 9.27367877960205, + "learning_rate": 3.858536960196711e-05, + "log_odds_chosen": 7.5078935623168945, + "log_odds_ratio": -0.0073926495388150215, + "logits/chosen": -0.6996981501579285, + "logits/rejected": -0.7687663435935974, + "logps/chosen": -0.00571818882599473, + "logps/rejected": -1.2903776168823242, + "loss": 2.3679, + "nll_loss": 0.5912336111068726, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005718189058825374, + "rewards/margins": 0.128465935587883, + "rewards/rejected": -0.1290377676486969, + "step": 4417 + }, + { + "epoch": 3.0553250345781464, + "grad_norm": 9.297929763793945, + "learning_rate": 3.8581527585676964e-05, + "log_odds_chosen": 8.047447204589844, + "log_odds_ratio": -0.012478945776820183, + "logits/chosen": -0.6040946841239929, + "logits/rejected": -0.671326756477356, + "logps/chosen": -0.014846572652459145, + "logps/rejected": -1.8723747730255127, + "loss": 1.5809, + "nll_loss": 0.3939892053604126, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014846572885289788, + "rewards/margins": 0.18575282394886017, + "rewards/rejected": -0.18723748624324799, + "step": 4418 + }, + { + "epoch": 3.0560165975103732, + "grad_norm": 8.1918306350708, + "learning_rate": 3.857768556938682e-05, + "log_odds_chosen": 9.591246604919434, + "log_odds_ratio": -0.00013377962750382721, + "logits/chosen": -0.23663687705993652, + "logits/rejected": -0.28745290637016296, + "logps/chosen": -0.0005562568549066782, + "logps/rejected": -1.7879509925842285, + "loss": 2.0461, + "nll_loss": 0.5115119218826294, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.562568549066782e-05, + "rewards/margins": 0.17873947322368622, + "rewards/rejected": -0.17879509925842285, + "step": 4419 + }, + { + "epoch": 3.0567081604426, + "grad_norm": 4.319944858551025, + "learning_rate": 3.857384355309667e-05, + "log_odds_chosen": 8.3831787109375, + "log_odds_ratio": -0.004961658269166946, + "logits/chosen": -0.5148369073867798, + "logits/rejected": -0.5669583678245544, + "logps/chosen": -0.006720840930938721, + "logps/rejected": -1.406997561454773, + "loss": 1.7634, + "nll_loss": 0.4403529167175293, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006720841629430652, + "rewards/margins": 0.14002767205238342, + "rewards/rejected": -0.14069975912570953, + "step": 4420 + }, + { + "epoch": 3.057399723374827, + "grad_norm": 9.564262390136719, + "learning_rate": 3.8570001536806515e-05, + "log_odds_chosen": 9.466020584106445, + "log_odds_ratio": -0.012601032853126526, + "logits/chosen": -1.022118330001831, + "logits/rejected": -1.0248609781265259, + "logps/chosen": -0.004576122388243675, + "logps/rejected": -1.540669322013855, + "loss": 1.5513, + "nll_loss": 0.3865562677383423, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004576121864374727, + "rewards/margins": 0.15360932052135468, + "rewards/rejected": -0.15406693518161774, + "step": 4421 + }, + { + "epoch": 3.0580912863070537, + "grad_norm": 11.14309310913086, + "learning_rate": 3.856615952051637e-05, + "log_odds_chosen": 8.333141326904297, + "log_odds_ratio": -0.029943065717816353, + "logits/chosen": -0.7349153161048889, + "logits/rejected": -0.7577260732650757, + "logps/chosen": -0.009285441599786282, + "logps/rejected": -1.486148715019226, + "loss": 1.6955, + "nll_loss": 0.4208747148513794, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009285442065447569, + "rewards/margins": 0.14768633246421814, + "rewards/rejected": -0.14861486852169037, + "step": 4422 + }, + { + "epoch": 3.0587828492392806, + "grad_norm": 9.059833526611328, + "learning_rate": 3.856231750422622e-05, + "log_odds_chosen": 9.1171875, + "log_odds_ratio": -0.010607258416712284, + "logits/chosen": -0.42079970240592957, + "logits/rejected": -0.5635207295417786, + "logps/chosen": -0.004433733876794577, + "logps/rejected": -1.4279680252075195, + "loss": 1.6389, + "nll_loss": 0.40866702795028687, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00044337339932098985, + "rewards/margins": 0.14235343039035797, + "rewards/rejected": -0.14279679954051971, + "step": 4423 + }, + { + "epoch": 3.0594744121715074, + "grad_norm": 8.973243713378906, + "learning_rate": 3.855847548793607e-05, + "log_odds_chosen": 7.946057319641113, + "log_odds_ratio": -0.0021891689393669367, + "logits/chosen": -1.1421451568603516, + "logits/rejected": -1.0701775550842285, + "logps/chosen": -0.015628967434167862, + "logps/rejected": -1.5090515613555908, + "loss": 1.647, + "nll_loss": 0.41152113676071167, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015628967666998506, + "rewards/margins": 0.14934225380420685, + "rewards/rejected": -0.15090514719486237, + "step": 4424 + }, + { + "epoch": 3.0601659751037342, + "grad_norm": 11.055354118347168, + "learning_rate": 3.8554633471645925e-05, + "log_odds_chosen": 6.188984394073486, + "log_odds_ratio": -0.10389276593923569, + "logits/chosen": -0.5800085067749023, + "logits/rejected": -0.6050304174423218, + "logps/chosen": -0.0289864894002676, + "logps/rejected": -0.9998306035995483, + "loss": 1.4891, + "nll_loss": 0.3618795573711395, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0028986490797251463, + "rewards/margins": 0.0970844104886055, + "rewards/rejected": -0.09998306632041931, + "step": 4425 + }, + { + "epoch": 3.060857538035961, + "grad_norm": 26.787431716918945, + "learning_rate": 3.855079145535577e-05, + "log_odds_chosen": 9.614953994750977, + "log_odds_ratio": -0.0001302398304687813, + "logits/chosen": -0.6753515601158142, + "logits/rejected": -0.6884777545928955, + "logps/chosen": -0.0004761649470310658, + "logps/rejected": -1.7124427556991577, + "loss": 2.7306, + "nll_loss": 0.6826401948928833, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.761649324791506e-05, + "rewards/margins": 0.17119666934013367, + "rewards/rejected": -0.171244278550148, + "step": 4426 + }, + { + "epoch": 3.061549100968188, + "grad_norm": 9.503124237060547, + "learning_rate": 3.854694943906562e-05, + "log_odds_chosen": 6.231769561767578, + "log_odds_ratio": -0.08688283711671829, + "logits/chosen": -0.7942008972167969, + "logits/rejected": -0.7996950149536133, + "logps/chosen": -0.14525654911994934, + "logps/rejected": -1.7236475944519043, + "loss": 2.1466, + "nll_loss": 0.5279530882835388, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01452565286308527, + "rewards/margins": 0.1578390896320343, + "rewards/rejected": -0.172364741563797, + "step": 4427 + }, + { + "epoch": 3.0622406639004147, + "grad_norm": 4.853358268737793, + "learning_rate": 3.8543107422775475e-05, + "log_odds_chosen": 8.784786224365234, + "log_odds_ratio": -0.000326198001857847, + "logits/chosen": -0.4402710795402527, + "logits/rejected": -0.499617338180542, + "logps/chosen": -0.006046361289918423, + "logps/rejected": -1.3748338222503662, + "loss": 1.959, + "nll_loss": 0.4897170066833496, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006046361522749066, + "rewards/margins": 0.13687875866889954, + "rewards/rejected": -0.1374833881855011, + "step": 4428 + }, + { + "epoch": 3.0629322268326415, + "grad_norm": 5.846126079559326, + "learning_rate": 3.853926540648533e-05, + "log_odds_chosen": 8.772059440612793, + "log_odds_ratio": -0.0014638709835708141, + "logits/chosen": -0.651498556137085, + "logits/rejected": -0.7316693663597107, + "logps/chosen": -0.01148604042828083, + "logps/rejected": -1.9184293746948242, + "loss": 1.7238, + "nll_loss": 0.43079331517219543, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00114860397297889, + "rewards/margins": 0.19069434702396393, + "rewards/rejected": -0.1918429434299469, + "step": 4429 + }, + { + "epoch": 3.0636237897648684, + "grad_norm": 7.701274871826172, + "learning_rate": 3.853542339019517e-05, + "log_odds_chosen": 9.660760879516602, + "log_odds_ratio": -0.0004473314620554447, + "logits/chosen": -0.6717379093170166, + "logits/rejected": -0.7057700157165527, + "logps/chosen": -0.0006988372188061476, + "logps/rejected": -1.5927550792694092, + "loss": 1.4822, + "nll_loss": 0.37049365043640137, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.98837247909978e-05, + "rewards/margins": 0.1592056155204773, + "rewards/rejected": -0.15927551686763763, + "step": 4430 + }, + { + "epoch": 3.064315352697095, + "grad_norm": 10.977331161499023, + "learning_rate": 3.853158137390503e-05, + "log_odds_chosen": 9.710474014282227, + "log_odds_ratio": -0.0019115728791803122, + "logits/chosen": -0.3936481773853302, + "logits/rejected": -0.5065484046936035, + "logps/chosen": -0.0009990218095481396, + "logps/rejected": -1.7076680660247803, + "loss": 2.1401, + "nll_loss": 0.5348359942436218, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.990217949962243e-05, + "rewards/margins": 0.17066690325737, + "rewards/rejected": -0.17076681554317474, + "step": 4431 + }, + { + "epoch": 3.0650069156293225, + "grad_norm": 7.184093475341797, + "learning_rate": 3.852773935761488e-05, + "log_odds_chosen": 8.461292266845703, + "log_odds_ratio": -0.015264814719557762, + "logits/chosen": -0.4620281457901001, + "logits/rejected": -0.5182569026947021, + "logps/chosen": -0.01789667457342148, + "logps/rejected": -1.422250747680664, + "loss": 1.4042, + "nll_loss": 0.3495308458805084, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001789667527191341, + "rewards/margins": 0.14043541252613068, + "rewards/rejected": -0.14222508668899536, + "step": 4432 + }, + { + "epoch": 3.0656984785615493, + "grad_norm": 14.419909477233887, + "learning_rate": 3.852389734132473e-05, + "log_odds_chosen": 7.307796478271484, + "log_odds_ratio": -0.04180353879928589, + "logits/chosen": -0.6438498497009277, + "logits/rejected": -0.7512152194976807, + "logps/chosen": -0.013369385153055191, + "logps/rejected": -1.3575177192687988, + "loss": 1.801, + "nll_loss": 0.44607308506965637, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013369384687393904, + "rewards/margins": 0.13441482186317444, + "rewards/rejected": -0.13575176894664764, + "step": 4433 + }, + { + "epoch": 3.066390041493776, + "grad_norm": 17.058692932128906, + "learning_rate": 3.852005532503458e-05, + "log_odds_chosen": 7.7271904945373535, + "log_odds_ratio": -0.015360197052359581, + "logits/chosen": -0.5677446722984314, + "logits/rejected": -0.7343844771385193, + "logps/chosen": -0.021658003330230713, + "logps/rejected": -1.5284223556518555, + "loss": 2.6219, + "nll_loss": 0.6539467573165894, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021658004261553288, + "rewards/margins": 0.1506764441728592, + "rewards/rejected": -0.15284225344657898, + "step": 4434 + }, + { + "epoch": 3.067081604426003, + "grad_norm": 7.667229175567627, + "learning_rate": 3.851621330874443e-05, + "log_odds_chosen": 8.72133731842041, + "log_odds_ratio": -0.0008878376102074981, + "logits/chosen": -0.579259991645813, + "logits/rejected": -0.5156400799751282, + "logps/chosen": -0.01773679256439209, + "logps/rejected": -1.6515758037567139, + "loss": 1.635, + "nll_loss": 0.40866297483444214, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017736791633069515, + "rewards/margins": 0.16338390111923218, + "rewards/rejected": -0.16515758633613586, + "step": 4435 + }, + { + "epoch": 3.06777316735823, + "grad_norm": 10.078861236572266, + "learning_rate": 3.851237129245428e-05, + "log_odds_chosen": 9.7711763381958, + "log_odds_ratio": -0.0007375985151156783, + "logits/chosen": -0.965905487537384, + "logits/rejected": -1.0674183368682861, + "logps/chosen": -0.0014153417432680726, + "logps/rejected": -2.040693759918213, + "loss": 2.9026, + "nll_loss": 0.7255856990814209, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001415341830579564, + "rewards/margins": 0.20392782986164093, + "rewards/rejected": -0.2040693610906601, + "step": 4436 + }, + { + "epoch": 3.0684647302904566, + "grad_norm": 8.490081787109375, + "learning_rate": 3.8508529276164134e-05, + "log_odds_chosen": 9.745711326599121, + "log_odds_ratio": -0.0002596020931378007, + "logits/chosen": -0.6246272921562195, + "logits/rejected": -0.7017021179199219, + "logps/chosen": -0.01125405728816986, + "logps/rejected": -2.0048890113830566, + "loss": 1.1157, + "nll_loss": 0.2789047956466675, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001125405658967793, + "rewards/margins": 0.19936349987983704, + "rewards/rejected": -0.20048891007900238, + "step": 4437 + }, + { + "epoch": 3.0691562932226835, + "grad_norm": 10.923334121704102, + "learning_rate": 3.8504687259873986e-05, + "log_odds_chosen": 9.16975212097168, + "log_odds_ratio": -0.00021780317183583975, + "logits/chosen": -0.8082156777381897, + "logits/rejected": -0.8127976059913635, + "logps/chosen": -0.0005995776737108827, + "logps/rejected": -1.327314853668213, + "loss": 1.7019, + "nll_loss": 0.4254598319530487, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.995776882627979e-05, + "rewards/margins": 0.132671520113945, + "rewards/rejected": -0.13273146748542786, + "step": 4438 + }, + { + "epoch": 3.0698478561549103, + "grad_norm": 7.313838958740234, + "learning_rate": 3.850084524358383e-05, + "log_odds_chosen": 8.235380172729492, + "log_odds_ratio": -0.005429758690297604, + "logits/chosen": -0.5990132093429565, + "logits/rejected": -0.6255252361297607, + "logps/chosen": -0.004577214829623699, + "logps/rejected": -1.3805031776428223, + "loss": 1.5167, + "nll_loss": 0.3786201477050781, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00045772147132083774, + "rewards/margins": 0.1375925987958908, + "rewards/rejected": -0.13805033266544342, + "step": 4439 + }, + { + "epoch": 3.070539419087137, + "grad_norm": 14.503705024719238, + "learning_rate": 3.849700322729369e-05, + "log_odds_chosen": 9.288135528564453, + "log_odds_ratio": -0.0004684936720877886, + "logits/chosen": -0.7211368680000305, + "logits/rejected": -0.7905447483062744, + "logps/chosen": -0.001473725656978786, + "logps/rejected": -1.8873465061187744, + "loss": 1.8446, + "nll_loss": 0.4611014127731323, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014737255696672946, + "rewards/margins": 0.1885872781276703, + "rewards/rejected": -0.18873465061187744, + "step": 4440 + }, + { + "epoch": 3.071230982019364, + "grad_norm": 9.53628921508789, + "learning_rate": 3.8493161211003537e-05, + "log_odds_chosen": 9.065003395080566, + "log_odds_ratio": -0.022357532754540443, + "logits/chosen": -0.8508001565933228, + "logits/rejected": -0.8630539178848267, + "logps/chosen": -0.005625884048640728, + "logps/rejected": -1.6689910888671875, + "loss": 2.0876, + "nll_loss": 0.5196753144264221, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005625883350148797, + "rewards/margins": 0.1663365215063095, + "rewards/rejected": -0.16689911484718323, + "step": 4441 + }, + { + "epoch": 3.071922544951591, + "grad_norm": 8.680511474609375, + "learning_rate": 3.848931919471339e-05, + "log_odds_chosen": 8.122732162475586, + "log_odds_ratio": -0.0008864381816238165, + "logits/chosen": -0.891105055809021, + "logits/rejected": -0.9033305048942566, + "logps/chosen": -0.005982173141092062, + "logps/rejected": -0.9022830724716187, + "loss": 1.7817, + "nll_loss": 0.44534575939178467, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000598217302467674, + "rewards/margins": 0.08963009715080261, + "rewards/rejected": -0.09022831171751022, + "step": 4442 + }, + { + "epoch": 3.0726141078838176, + "grad_norm": 10.046014785766602, + "learning_rate": 3.848547717842324e-05, + "log_odds_chosen": 8.835466384887695, + "log_odds_ratio": -0.0003820597776211798, + "logits/chosen": -0.8449460864067078, + "logits/rejected": -0.8626073598861694, + "logps/chosen": -0.0031728330068290234, + "logps/rejected": -1.5975279808044434, + "loss": 1.8497, + "nll_loss": 0.46238836646080017, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003172833239659667, + "rewards/margins": 0.15943552553653717, + "rewards/rejected": -0.15975281596183777, + "step": 4443 + }, + { + "epoch": 3.0733056708160444, + "grad_norm": 12.780777931213379, + "learning_rate": 3.848163516213309e-05, + "log_odds_chosen": 8.433425903320312, + "log_odds_ratio": -0.009119795635342598, + "logits/chosen": -1.046372890472412, + "logits/rejected": -1.0667132139205933, + "logps/chosen": -0.008828721009194851, + "logps/rejected": -1.5524736642837524, + "loss": 2.306, + "nll_loss": 0.575589120388031, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008828719728626311, + "rewards/margins": 0.15436449646949768, + "rewards/rejected": -0.15524739027023315, + "step": 4444 + }, + { + "epoch": 3.0739972337482713, + "grad_norm": 9.021060943603516, + "learning_rate": 3.847779314584294e-05, + "log_odds_chosen": 9.643104553222656, + "log_odds_ratio": -0.0003110415127594024, + "logits/chosen": -0.7924270033836365, + "logits/rejected": -0.8385024070739746, + "logps/chosen": -0.010611020028591156, + "logps/rejected": -2.142685651779175, + "loss": 2.2366, + "nll_loss": 0.5591127872467041, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00106110202614218, + "rewards/margins": 0.2132074534893036, + "rewards/rejected": -0.21426856517791748, + "step": 4445 + }, + { + "epoch": 3.074688796680498, + "grad_norm": 13.978005409240723, + "learning_rate": 3.847395112955279e-05, + "log_odds_chosen": 9.722139358520508, + "log_odds_ratio": -0.00034077069722115993, + "logits/chosen": -0.8204346895217896, + "logits/rejected": -0.9325641393661499, + "logps/chosen": -0.0008741967030800879, + "logps/rejected": -1.7474991083145142, + "loss": 1.6533, + "nll_loss": 0.41330283880233765, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.741967030800879e-05, + "rewards/margins": 0.1746625006198883, + "rewards/rejected": -0.1747499257326126, + "step": 4446 + }, + { + "epoch": 3.075380359612725, + "grad_norm": 10.52839469909668, + "learning_rate": 3.8470109113262644e-05, + "log_odds_chosen": 9.051468849182129, + "log_odds_ratio": -0.0009590685949660838, + "logits/chosen": -0.9662353992462158, + "logits/rejected": -1.0331647396087646, + "logps/chosen": -0.0024615302681922913, + "logps/rejected": -1.8895854949951172, + "loss": 1.9556, + "nll_loss": 0.4887927770614624, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002461530384607613, + "rewards/margins": 0.18871241807937622, + "rewards/rejected": -0.1889585554599762, + "step": 4447 + }, + { + "epoch": 3.0760719225449518, + "grad_norm": 10.936558723449707, + "learning_rate": 3.846626709697249e-05, + "log_odds_chosen": 8.980976104736328, + "log_odds_ratio": -0.00037278165109455585, + "logits/chosen": -0.9659571647644043, + "logits/rejected": -1.1706774234771729, + "logps/chosen": -0.0006645218818448484, + "logps/rejected": -1.483097791671753, + "loss": 1.7788, + "nll_loss": 0.4446701407432556, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.645219400525093e-05, + "rewards/margins": 0.14824333786964417, + "rewards/rejected": -0.14830978214740753, + "step": 4448 + }, + { + "epoch": 3.0767634854771786, + "grad_norm": 11.437039375305176, + "learning_rate": 3.846242508068235e-05, + "log_odds_chosen": 8.314053535461426, + "log_odds_ratio": -0.13158449530601501, + "logits/chosen": -0.728661298751831, + "logits/rejected": -0.7856017351150513, + "logps/chosen": -0.03292816877365112, + "logps/rejected": -1.8274672031402588, + "loss": 2.5398, + "nll_loss": 0.6218024492263794, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0032928166911005974, + "rewards/margins": 0.17945389449596405, + "rewards/rejected": -0.18274672329425812, + "step": 4449 + }, + { + "epoch": 3.0774550484094054, + "grad_norm": 9.860296249389648, + "learning_rate": 3.8458583064392195e-05, + "log_odds_chosen": 8.01667594909668, + "log_odds_ratio": -0.07519376277923584, + "logits/chosen": -0.6754899621009827, + "logits/rejected": -0.6415287256240845, + "logps/chosen": -0.014091627672314644, + "logps/rejected": -1.4123716354370117, + "loss": 1.9168, + "nll_loss": 0.4716867208480835, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014091627672314644, + "rewards/margins": 0.13982799649238586, + "rewards/rejected": -0.14123715460300446, + "step": 4450 + }, + { + "epoch": 3.0781466113416323, + "grad_norm": 7.970553398132324, + "learning_rate": 3.845474104810205e-05, + "log_odds_chosen": 8.878427505493164, + "log_odds_ratio": -0.0015269446885213256, + "logits/chosen": -0.44634032249450684, + "logits/rejected": -0.49079009890556335, + "logps/chosen": -0.004734280984848738, + "logps/rejected": -1.7657830715179443, + "loss": 1.4751, + "nll_loss": 0.36861249804496765, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004734280810225755, + "rewards/margins": 0.17610487341880798, + "rewards/rejected": -0.17657829821109772, + "step": 4451 + }, + { + "epoch": 3.078838174273859, + "grad_norm": 5.924354076385498, + "learning_rate": 3.84508990318119e-05, + "log_odds_chosen": 8.421602249145508, + "log_odds_ratio": -0.06661777198314667, + "logits/chosen": -0.6268646717071533, + "logits/rejected": -0.7258235812187195, + "logps/chosen": -0.012585322372615337, + "logps/rejected": -1.2588582038879395, + "loss": 2.1821, + "nll_loss": 0.5388583540916443, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012585322838276625, + "rewards/margins": 0.12462729215621948, + "rewards/rejected": -0.12588582932949066, + "step": 4452 + }, + { + "epoch": 3.079529737206086, + "grad_norm": 9.645779609680176, + "learning_rate": 3.8447057015521746e-05, + "log_odds_chosen": 9.39462661743164, + "log_odds_ratio": -0.03679275140166283, + "logits/chosen": -0.5939630270004272, + "logits/rejected": -0.644051194190979, + "logps/chosen": -0.017827268689870834, + "logps/rejected": -2.204650402069092, + "loss": 1.4338, + "nll_loss": 0.3547765016555786, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001782726845704019, + "rewards/margins": 0.21868231892585754, + "rewards/rejected": -0.2204650640487671, + "step": 4453 + }, + { + "epoch": 3.0802213001383127, + "grad_norm": 13.677511215209961, + "learning_rate": 3.84432149992316e-05, + "log_odds_chosen": 9.449896812438965, + "log_odds_ratio": -0.0015739205991849303, + "logits/chosen": -0.8569554090499878, + "logits/rejected": -0.8320474624633789, + "logps/chosen": -0.0067398822866380215, + "logps/rejected": -2.1724767684936523, + "loss": 1.5862, + "nll_loss": 0.3963874578475952, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006739882519468665, + "rewards/margins": 0.21657368540763855, + "rewards/rejected": -0.21724767982959747, + "step": 4454 + }, + { + "epoch": 3.0809128630705396, + "grad_norm": 11.459304809570312, + "learning_rate": 3.843937298294145e-05, + "log_odds_chosen": 9.47178840637207, + "log_odds_ratio": -0.0006335485959425569, + "logits/chosen": -0.6219982504844666, + "logits/rejected": -0.6629889011383057, + "logps/chosen": -0.0020509539172053337, + "logps/rejected": -1.8684937953948975, + "loss": 1.8082, + "nll_loss": 0.45199406147003174, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002050954062724486, + "rewards/margins": 0.1866442710161209, + "rewards/rejected": -0.18684938549995422, + "step": 4455 + }, + { + "epoch": 3.0816044260027664, + "grad_norm": 7.764420986175537, + "learning_rate": 3.84355309666513e-05, + "log_odds_chosen": 8.673635482788086, + "log_odds_ratio": -0.0005076941451989114, + "logits/chosen": -0.6043013334274292, + "logits/rejected": -0.6264380216598511, + "logps/chosen": -0.0006218141061253846, + "logps/rejected": -1.4252135753631592, + "loss": 2.0007, + "nll_loss": 0.5001353025436401, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.218141061253846e-05, + "rewards/margins": 0.1424591839313507, + "rewards/rejected": -0.14252136647701263, + "step": 4456 + }, + { + "epoch": 3.0822959889349932, + "grad_norm": 7.5119733810424805, + "learning_rate": 3.843168895036115e-05, + "log_odds_chosen": 8.390885353088379, + "log_odds_ratio": -0.025730164721608162, + "logits/chosen": -0.5082446336746216, + "logits/rejected": -0.567543625831604, + "logps/chosen": -0.01098723616451025, + "logps/rejected": -1.2334426641464233, + "loss": 1.687, + "nll_loss": 0.4191865026950836, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001098723616451025, + "rewards/margins": 0.12224555015563965, + "rewards/rejected": -0.12334427237510681, + "step": 4457 + }, + { + "epoch": 3.08298755186722, + "grad_norm": 21.876222610473633, + "learning_rate": 3.842784693407101e-05, + "log_odds_chosen": 8.640466690063477, + "log_odds_ratio": -0.03930068388581276, + "logits/chosen": -0.8005126714706421, + "logits/rejected": -0.8496750593185425, + "logps/chosen": -0.009835846722126007, + "logps/rejected": -2.1062915325164795, + "loss": 1.8627, + "nll_loss": 0.4617469310760498, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009835846722126007, + "rewards/margins": 0.2096455693244934, + "rewards/rejected": -0.2106291502714157, + "step": 4458 + }, + { + "epoch": 3.083679114799447, + "grad_norm": 10.259416580200195, + "learning_rate": 3.8424004917780853e-05, + "log_odds_chosen": 9.032968521118164, + "log_odds_ratio": -0.007468566298484802, + "logits/chosen": -0.7418262958526611, + "logits/rejected": -0.7376289367675781, + "logps/chosen": -0.003130620112642646, + "logps/rejected": -1.7070213556289673, + "loss": 1.2162, + "nll_loss": 0.303314745426178, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003130620170850307, + "rewards/margins": 0.1703890711069107, + "rewards/rejected": -0.17070214450359344, + "step": 4459 + }, + { + "epoch": 3.0843706777316737, + "grad_norm": 10.962529182434082, + "learning_rate": 3.8420162901490706e-05, + "log_odds_chosen": 8.482834815979004, + "log_odds_ratio": -0.21217072010040283, + "logits/chosen": -0.8609898090362549, + "logits/rejected": -0.8481860160827637, + "logps/chosen": -0.07208161801099777, + "logps/rejected": -1.3337786197662354, + "loss": 1.6501, + "nll_loss": 0.39131245017051697, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007208161521703005, + "rewards/margins": 0.12616971135139465, + "rewards/rejected": -0.13337786495685577, + "step": 4460 + }, + { + "epoch": 3.0850622406639006, + "grad_norm": 5.700719833374023, + "learning_rate": 3.841632088520056e-05, + "log_odds_chosen": 9.250066757202148, + "log_odds_ratio": -0.00032611002097837627, + "logits/chosen": -0.6338516473770142, + "logits/rejected": -0.6218562126159668, + "logps/chosen": -0.0013254042714834213, + "logps/rejected": -1.582740068435669, + "loss": 1.8656, + "nll_loss": 0.46636563539505005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013254041550680995, + "rewards/margins": 0.15814147889614105, + "rewards/rejected": -0.15827400982379913, + "step": 4461 + }, + { + "epoch": 3.0857538035961274, + "grad_norm": 8.559025764465332, + "learning_rate": 3.8412478868910404e-05, + "log_odds_chosen": 10.36841106414795, + "log_odds_ratio": -0.00013494495942723006, + "logits/chosen": -0.857991099357605, + "logits/rejected": -0.963939368724823, + "logps/chosen": -0.003404158866032958, + "logps/rejected": -2.077117681503296, + "loss": 1.3211, + "nll_loss": 0.3302675485610962, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003404158924240619, + "rewards/margins": 0.20737135410308838, + "rewards/rejected": -0.20771175622940063, + "step": 4462 + }, + { + "epoch": 3.086445366528354, + "grad_norm": 8.417356491088867, + "learning_rate": 3.8408636852620256e-05, + "log_odds_chosen": 9.118749618530273, + "log_odds_ratio": -0.012486644089221954, + "logits/chosen": -0.5907131433486938, + "logits/rejected": -0.62277752161026, + "logps/chosen": -0.06458212435245514, + "logps/rejected": -2.072675943374634, + "loss": 1.0546, + "nll_loss": 0.26241254806518555, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006458211690187454, + "rewards/margins": 0.20080935955047607, + "rewards/rejected": -0.2072676122188568, + "step": 4463 + }, + { + "epoch": 3.087136929460581, + "grad_norm": 12.34536075592041, + "learning_rate": 3.840479483633011e-05, + "log_odds_chosen": 8.383492469787598, + "log_odds_ratio": -0.0008394765318371356, + "logits/chosen": -0.6303070783615112, + "logits/rejected": -0.6680947542190552, + "logps/chosen": -0.0022870246320962906, + "logps/rejected": -1.5010545253753662, + "loss": 2.1234, + "nll_loss": 0.5307590961456299, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00022870246903039515, + "rewards/margins": 0.14987675845623016, + "rewards/rejected": -0.15010544657707214, + "step": 4464 + }, + { + "epoch": 3.087828492392808, + "grad_norm": 10.20311164855957, + "learning_rate": 3.840095282003996e-05, + "log_odds_chosen": 9.271467208862305, + "log_odds_ratio": -0.0012613222934305668, + "logits/chosen": -0.6812804937362671, + "logits/rejected": -0.7113140225410461, + "logps/chosen": -0.03402145206928253, + "logps/rejected": -2.317378282546997, + "loss": 2.2311, + "nll_loss": 0.5576537251472473, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003402145579457283, + "rewards/margins": 0.2283356785774231, + "rewards/rejected": -0.23173782229423523, + "step": 4465 + }, + { + "epoch": 3.0885200553250347, + "grad_norm": 7.677236557006836, + "learning_rate": 3.839711080374981e-05, + "log_odds_chosen": 9.496734619140625, + "log_odds_ratio": -0.0010136470664292574, + "logits/chosen": -0.6936898827552795, + "logits/rejected": -0.7297221422195435, + "logps/chosen": -0.0011777316685765982, + "logps/rejected": -2.1823971271514893, + "loss": 1.4355, + "nll_loss": 0.3587798476219177, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011777316831285134, + "rewards/margins": 0.21812193095684052, + "rewards/rejected": -0.2182397097349167, + "step": 4466 + }, + { + "epoch": 3.0892116182572615, + "grad_norm": 15.080026626586914, + "learning_rate": 3.8393268787459666e-05, + "log_odds_chosen": 9.812665939331055, + "log_odds_ratio": -0.00014757076860405505, + "logits/chosen": -0.3832654654979706, + "logits/rejected": -0.47605186700820923, + "logps/chosen": -0.0010359041625633836, + "logps/rejected": -1.9479248523712158, + "loss": 2.1878, + "nll_loss": 0.5469351410865784, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010359041334595531, + "rewards/margins": 0.19468891620635986, + "rewards/rejected": -0.1947924941778183, + "step": 4467 + }, + { + "epoch": 3.0899031811894884, + "grad_norm": 12.628984451293945, + "learning_rate": 3.838942677116951e-05, + "log_odds_chosen": 8.88953971862793, + "log_odds_ratio": -0.012125710025429726, + "logits/chosen": -0.4032934308052063, + "logits/rejected": -0.45221227407455444, + "logps/chosen": -0.005299792625010014, + "logps/rejected": -1.8057115077972412, + "loss": 1.4878, + "nll_loss": 0.37074652314186096, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000529979239217937, + "rewards/margins": 0.18004117906093597, + "rewards/rejected": -0.18057113885879517, + "step": 4468 + }, + { + "epoch": 3.090594744121715, + "grad_norm": 9.369607925415039, + "learning_rate": 3.8385584754879364e-05, + "log_odds_chosen": 9.427680969238281, + "log_odds_ratio": -0.0027859038673341274, + "logits/chosen": -0.3410671353340149, + "logits/rejected": -0.4170883595943451, + "logps/chosen": -0.009003382176160812, + "logps/rejected": -1.8481632471084595, + "loss": 1.8318, + "nll_loss": 0.4576743543148041, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009003382292576134, + "rewards/margins": 0.18391598761081696, + "rewards/rejected": -0.18481633067131042, + "step": 4469 + }, + { + "epoch": 3.091286307053942, + "grad_norm": 8.817591667175293, + "learning_rate": 3.838174273858922e-05, + "log_odds_chosen": 8.1690034866333, + "log_odds_ratio": -0.008122103288769722, + "logits/chosen": -0.32725226879119873, + "logits/rejected": -0.3610275983810425, + "logps/chosen": -0.010240322910249233, + "logps/rejected": -1.8948895931243896, + "loss": 1.558, + "nll_loss": 0.3886779844760895, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010240323608741164, + "rewards/margins": 0.1884649097919464, + "rewards/rejected": -0.18948894739151, + "step": 4470 + }, + { + "epoch": 3.091977869986169, + "grad_norm": 6.53830623626709, + "learning_rate": 3.837790072229906e-05, + "log_odds_chosen": 8.212554931640625, + "log_odds_ratio": -0.18837156891822815, + "logits/chosen": -0.4135130047798157, + "logits/rejected": -0.4412657916545868, + "logps/chosen": -0.04065573215484619, + "logps/rejected": -1.0980712175369263, + "loss": 1.8802, + "nll_loss": 0.45120689272880554, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0040655736811459064, + "rewards/margins": 0.10574156045913696, + "rewards/rejected": -0.10980713367462158, + "step": 4471 + }, + { + "epoch": 3.0926694329183957, + "grad_norm": 9.242819786071777, + "learning_rate": 3.8374058706008915e-05, + "log_odds_chosen": 9.362022399902344, + "log_odds_ratio": -0.00011695074499584734, + "logits/chosen": -0.4138777554035187, + "logits/rejected": -0.43331849575042725, + "logps/chosen": -0.00035065674455836415, + "logps/rejected": -1.322685956954956, + "loss": 2.1748, + "nll_loss": 0.5436833500862122, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.506567736621946e-05, + "rewards/margins": 0.13223353028297424, + "rewards/rejected": -0.13226859271526337, + "step": 4472 + }, + { + "epoch": 3.0933609958506225, + "grad_norm": 10.725008964538574, + "learning_rate": 3.837021668971877e-05, + "log_odds_chosen": 9.351563453674316, + "log_odds_ratio": -0.0004978245706297457, + "logits/chosen": -0.5398173928260803, + "logits/rejected": -0.5418287515640259, + "logps/chosen": -0.005135357845574617, + "logps/rejected": -2.0309886932373047, + "loss": 1.4824, + "nll_loss": 0.3705606460571289, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005135358078405261, + "rewards/margins": 0.2025853395462036, + "rewards/rejected": -0.203098863363266, + "step": 4473 + }, + { + "epoch": 3.0940525587828493, + "grad_norm": 8.506511688232422, + "learning_rate": 3.836637467342862e-05, + "log_odds_chosen": 6.215714931488037, + "log_odds_ratio": -0.2552088499069214, + "logits/chosen": -0.8886780738830566, + "logits/rejected": -0.8105942010879517, + "logps/chosen": -0.043494973331689835, + "logps/rejected": -0.7536239624023438, + "loss": 2.0884, + "nll_loss": 0.4965880513191223, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0043494971469044685, + "rewards/margins": 0.07101289927959442, + "rewards/rejected": -0.07536239176988602, + "step": 4474 + }, + { + "epoch": 3.094744121715076, + "grad_norm": 9.47619342803955, + "learning_rate": 3.8362532657138465e-05, + "log_odds_chosen": 8.74432373046875, + "log_odds_ratio": -0.00039555650437250733, + "logits/chosen": -0.6446212530136108, + "logits/rejected": -0.6923754215240479, + "logps/chosen": -0.002961081452667713, + "logps/rejected": -1.4609273672103882, + "loss": 1.93, + "nll_loss": 0.4824484586715698, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00029610813362523913, + "rewards/margins": 0.14579662680625916, + "rewards/rejected": -0.1460927426815033, + "step": 4475 + }, + { + "epoch": 3.095435684647303, + "grad_norm": 13.779592514038086, + "learning_rate": 3.8358690640848325e-05, + "log_odds_chosen": 9.060077667236328, + "log_odds_ratio": -0.00019415131828282028, + "logits/chosen": -0.7600535154342651, + "logits/rejected": -0.7289277911186218, + "logps/chosen": -0.0007039851043373346, + "logps/rejected": -1.6375209093093872, + "loss": 1.7422, + "nll_loss": 0.43554073572158813, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.039851334411651e-05, + "rewards/margins": 0.16368168592453003, + "rewards/rejected": -0.16375207901000977, + "step": 4476 + }, + { + "epoch": 3.09612724757953, + "grad_norm": 10.712538719177246, + "learning_rate": 3.835484862455817e-05, + "log_odds_chosen": 9.2240571975708, + "log_odds_ratio": -0.0012207168620079756, + "logits/chosen": -0.7368443012237549, + "logits/rejected": -0.7674077153205872, + "logps/chosen": -0.007611352019011974, + "logps/rejected": -2.200847864151001, + "loss": 2.2683, + "nll_loss": 0.5669484734535217, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007611351902596653, + "rewards/margins": 0.21932365000247955, + "rewards/rejected": -0.2200847864151001, + "step": 4477 + }, + { + "epoch": 3.0968188105117567, + "grad_norm": 6.989372730255127, + "learning_rate": 3.835100660826802e-05, + "log_odds_chosen": 8.106101989746094, + "log_odds_ratio": -0.0028811958618462086, + "logits/chosen": -0.8484435677528381, + "logits/rejected": -0.8258570432662964, + "logps/chosen": -0.038005031645298004, + "logps/rejected": -1.863260269165039, + "loss": 1.3121, + "nll_loss": 0.32773715257644653, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0038005029782652855, + "rewards/margins": 0.18252553045749664, + "rewards/rejected": -0.1863260418176651, + "step": 4478 + }, + { + "epoch": 3.0975103734439835, + "grad_norm": 12.75827693939209, + "learning_rate": 3.8347164591977875e-05, + "log_odds_chosen": 8.949575424194336, + "log_odds_ratio": -0.0010895613813772798, + "logits/chosen": -0.6992684602737427, + "logits/rejected": -0.7569445967674255, + "logps/chosen": -0.0008068022434599698, + "logps/rejected": -1.7668452262878418, + "loss": 1.9399, + "nll_loss": 0.484869122505188, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.068021270446479e-05, + "rewards/margins": 0.17660385370254517, + "rewards/rejected": -0.17668454349040985, + "step": 4479 + }, + { + "epoch": 3.0982019363762103, + "grad_norm": 11.199858665466309, + "learning_rate": 3.834332257568772e-05, + "log_odds_chosen": 8.661763191223145, + "log_odds_ratio": -0.016961198300123215, + "logits/chosen": -0.7467653751373291, + "logits/rejected": -0.78775954246521, + "logps/chosen": -0.014684153720736504, + "logps/rejected": -1.940002679824829, + "loss": 2.0541, + "nll_loss": 0.511833131313324, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014684153720736504, + "rewards/margins": 0.19253185391426086, + "rewards/rejected": -0.1940002590417862, + "step": 4480 + }, + { + "epoch": 3.098893499308437, + "grad_norm": 4.9811110496521, + "learning_rate": 3.833948055939757e-05, + "log_odds_chosen": 7.296429634094238, + "log_odds_ratio": -0.016438350081443787, + "logits/chosen": -0.735593318939209, + "logits/rejected": -0.7207514643669128, + "logps/chosen": -0.011228415183722973, + "logps/rejected": -1.2033060789108276, + "loss": 2.4165, + "nll_loss": 0.6024818420410156, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001122841495089233, + "rewards/margins": 0.11920776963233948, + "rewards/rejected": -0.12033060938119888, + "step": 4481 + }, + { + "epoch": 3.099585062240664, + "grad_norm": 13.971213340759277, + "learning_rate": 3.8335638543107426e-05, + "log_odds_chosen": 9.258010864257812, + "log_odds_ratio": -0.00527906185016036, + "logits/chosen": -0.8993180990219116, + "logits/rejected": -0.9418272376060486, + "logps/chosen": -0.0038291211239993572, + "logps/rejected": -1.8594636917114258, + "loss": 2.4635, + "nll_loss": 0.6153481602668762, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003829121415037662, + "rewards/margins": 0.18556344509124756, + "rewards/rejected": -0.18594636023044586, + "step": 4482 + }, + { + "epoch": 3.100276625172891, + "grad_norm": 8.516523361206055, + "learning_rate": 3.833179652681728e-05, + "log_odds_chosen": 10.226236343383789, + "log_odds_ratio": -0.00016320720897056162, + "logits/chosen": -0.9161720275878906, + "logits/rejected": -0.917118489742279, + "logps/chosen": -0.0006441689911298454, + "logps/rejected": -2.2198100090026855, + "loss": 1.2774, + "nll_loss": 0.31933191418647766, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.441689765779302e-05, + "rewards/margins": 0.22191661596298218, + "rewards/rejected": -0.221981018781662, + "step": 4483 + }, + { + "epoch": 3.1009681881051177, + "grad_norm": 11.72066879272461, + "learning_rate": 3.8327954510527124e-05, + "log_odds_chosen": 10.348617553710938, + "log_odds_ratio": -0.0001743563188938424, + "logits/chosen": -0.4661424160003662, + "logits/rejected": -0.530230700969696, + "logps/chosen": -0.0005677434965036809, + "logps/rejected": -2.4082119464874268, + "loss": 1.6134, + "nll_loss": 0.403322696685791, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6774355471134186e-05, + "rewards/margins": 0.24076443910598755, + "rewards/rejected": -0.2408212125301361, + "step": 4484 + }, + { + "epoch": 3.1016597510373445, + "grad_norm": 6.819164752960205, + "learning_rate": 3.832411249423698e-05, + "log_odds_chosen": 8.427994728088379, + "log_odds_ratio": -0.0013439118629321456, + "logits/chosen": -0.6012922525405884, + "logits/rejected": -0.6957547664642334, + "logps/chosen": -0.00487975450232625, + "logps/rejected": -1.8782953023910522, + "loss": 1.712, + "nll_loss": 0.42785927653312683, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004879754560533911, + "rewards/margins": 0.18734155595302582, + "rewards/rejected": -0.18782952427864075, + "step": 4485 + }, + { + "epoch": 3.1023513139695713, + "grad_norm": 14.561470985412598, + "learning_rate": 3.832027047794683e-05, + "log_odds_chosen": 10.100393295288086, + "log_odds_ratio": -0.00017121568089351058, + "logits/chosen": -0.9278507232666016, + "logits/rejected": -1.0120364427566528, + "logps/chosen": -0.0006230022408999503, + "logps/rejected": -2.0958151817321777, + "loss": 1.978, + "nll_loss": 0.4944764971733093, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.230021972442046e-05, + "rewards/margins": 0.2095191925764084, + "rewards/rejected": -0.20958150923252106, + "step": 4486 + }, + { + "epoch": 3.103042876901798, + "grad_norm": 7.111811637878418, + "learning_rate": 3.831642846165668e-05, + "log_odds_chosen": 7.821290969848633, + "log_odds_ratio": -0.15456566214561462, + "logits/chosen": -0.44528767466545105, + "logits/rejected": -0.4628318250179291, + "logps/chosen": -0.02520749345421791, + "logps/rejected": -1.2928862571716309, + "loss": 1.7394, + "nll_loss": 0.4193989038467407, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0025207491125911474, + "rewards/margins": 0.12676787376403809, + "rewards/rejected": -0.12928862869739532, + "step": 4487 + }, + { + "epoch": 3.103734439834025, + "grad_norm": 9.723301887512207, + "learning_rate": 3.8312586445366534e-05, + "log_odds_chosen": 6.991793155670166, + "log_odds_ratio": -0.03994767740368843, + "logits/chosen": -0.3234378695487976, + "logits/rejected": -0.3832213878631592, + "logps/chosen": -0.012255754321813583, + "logps/rejected": -1.1929444074630737, + "loss": 1.8702, + "nll_loss": 0.46356457471847534, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012255755718797445, + "rewards/margins": 0.1180688664317131, + "rewards/rejected": -0.11929444223642349, + "step": 4488 + }, + { + "epoch": 3.104426002766252, + "grad_norm": 10.895448684692383, + "learning_rate": 3.830874442907638e-05, + "log_odds_chosen": 10.129297256469727, + "log_odds_ratio": -5.873259578947909e-05, + "logits/chosen": -0.6546196341514587, + "logits/rejected": -0.7104349136352539, + "logps/chosen": -0.0002052735653705895, + "logps/rejected": -1.7905243635177612, + "loss": 1.3839, + "nll_loss": 0.34596529603004456, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.052735726465471e-05, + "rewards/margins": 0.17903190851211548, + "rewards/rejected": -0.1790524423122406, + "step": 4489 + }, + { + "epoch": 3.1051175656984786, + "grad_norm": 6.577591896057129, + "learning_rate": 3.830490241278623e-05, + "log_odds_chosen": 8.690784454345703, + "log_odds_ratio": -0.00047942117089405656, + "logits/chosen": -0.844971776008606, + "logits/rejected": -0.897977888584137, + "logps/chosen": -0.0027525126934051514, + "logps/rejected": -1.9402804374694824, + "loss": 1.9784, + "nll_loss": 0.4945591688156128, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00027525125187821686, + "rewards/margins": 0.19375279545783997, + "rewards/rejected": -0.19402804970741272, + "step": 4490 + }, + { + "epoch": 3.1058091286307055, + "grad_norm": 13.503525733947754, + "learning_rate": 3.8301060396496084e-05, + "log_odds_chosen": 9.159711837768555, + "log_odds_ratio": -0.0036358933430165052, + "logits/chosen": -0.22813749313354492, + "logits/rejected": -0.34502047300338745, + "logps/chosen": -0.0027327819261699915, + "logps/rejected": -1.9411729574203491, + "loss": 1.6807, + "nll_loss": 0.41981786489486694, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00027327818679623306, + "rewards/margins": 0.1938440203666687, + "rewards/rejected": -0.19411730766296387, + "step": 4491 + }, + { + "epoch": 3.1065006915629323, + "grad_norm": 8.305024147033691, + "learning_rate": 3.829721838020594e-05, + "log_odds_chosen": 8.52673053741455, + "log_odds_ratio": -0.002459357026964426, + "logits/chosen": -0.7672133445739746, + "logits/rejected": -0.7956629395484924, + "logps/chosen": -0.001488923910073936, + "logps/rejected": -1.4330894947052002, + "loss": 1.3449, + "nll_loss": 0.33596938848495483, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014889237354509532, + "rewards/margins": 0.14316006004810333, + "rewards/rejected": -0.14330896735191345, + "step": 4492 + }, + { + "epoch": 3.107192254495159, + "grad_norm": 5.7519850730896, + "learning_rate": 3.829337636391578e-05, + "log_odds_chosen": 9.599166870117188, + "log_odds_ratio": -0.00018097971042152494, + "logits/chosen": -0.658562183380127, + "logits/rejected": -0.6959885358810425, + "logps/chosen": -0.0007571708410978317, + "logps/rejected": -1.9449284076690674, + "loss": 2.0916, + "nll_loss": 0.5228797197341919, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.57170855649747e-05, + "rewards/margins": 0.19441711902618408, + "rewards/rejected": -0.19449283182621002, + "step": 4493 + }, + { + "epoch": 3.107883817427386, + "grad_norm": 8.070000648498535, + "learning_rate": 3.828953434762564e-05, + "log_odds_chosen": 7.616555690765381, + "log_odds_ratio": -0.029507823288440704, + "logits/chosen": -0.8429253101348877, + "logits/rejected": -0.7952168583869934, + "logps/chosen": -0.00954954419285059, + "logps/rejected": -1.3259698152542114, + "loss": 1.6679, + "nll_loss": 0.41403070092201233, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009549543610773981, + "rewards/margins": 0.13164202868938446, + "rewards/rejected": -0.13259698450565338, + "step": 4494 + }, + { + "epoch": 3.108575380359613, + "grad_norm": 9.600811004638672, + "learning_rate": 3.828569233133549e-05, + "log_odds_chosen": 6.310328483581543, + "log_odds_ratio": -0.22290322184562683, + "logits/chosen": -0.32344478368759155, + "logits/rejected": -0.44014453887939453, + "logps/chosen": -0.04343204200267792, + "logps/rejected": -1.4269005060195923, + "loss": 1.9721, + "nll_loss": 0.47072654962539673, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004343204665929079, + "rewards/margins": 0.1383468508720398, + "rewards/rejected": -0.14269006252288818, + "step": 4495 + }, + { + "epoch": 3.1092669432918396, + "grad_norm": 8.956048011779785, + "learning_rate": 3.828185031504534e-05, + "log_odds_chosen": 8.941858291625977, + "log_odds_ratio": -0.0007558095967397094, + "logits/chosen": -0.4833359122276306, + "logits/rejected": -0.5360693335533142, + "logps/chosen": -0.0011183847673237324, + "logps/rejected": -1.6190630197525024, + "loss": 2.241, + "nll_loss": 0.5601741075515747, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011183848255313933, + "rewards/margins": 0.16179445385932922, + "rewards/rejected": -0.16190630197525024, + "step": 4496 + }, + { + "epoch": 3.1099585062240664, + "grad_norm": 10.014410972595215, + "learning_rate": 3.8278008298755185e-05, + "log_odds_chosen": 8.638592720031738, + "log_odds_ratio": -0.013766951858997345, + "logits/chosen": -0.6217601895332336, + "logits/rejected": -0.6732291579246521, + "logps/chosen": -0.06834837794303894, + "logps/rejected": -1.6543635129928589, + "loss": 1.7909, + "nll_loss": 0.4463382065296173, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006834837608039379, + "rewards/margins": 0.1586015224456787, + "rewards/rejected": -0.16543635725975037, + "step": 4497 + }, + { + "epoch": 3.1106500691562933, + "grad_norm": 10.994170188903809, + "learning_rate": 3.827416628246504e-05, + "log_odds_chosen": 8.087845802307129, + "log_odds_ratio": -0.020464470610022545, + "logits/chosen": -0.4601096510887146, + "logits/rejected": -0.5213154554367065, + "logps/chosen": -0.00844386126846075, + "logps/rejected": -1.5365309715270996, + "loss": 1.4808, + "nll_loss": 0.3681448698043823, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008443861734122038, + "rewards/margins": 0.15280871093273163, + "rewards/rejected": -0.1536531001329422, + "step": 4498 + }, + { + "epoch": 3.11134163208852, + "grad_norm": 11.073749542236328, + "learning_rate": 3.827032426617489e-05, + "log_odds_chosen": 9.555994033813477, + "log_odds_ratio": -0.00013254325313027948, + "logits/chosen": -0.42572540044784546, + "logits/rejected": -0.4704053997993469, + "logps/chosen": -0.0004056716861668974, + "logps/rejected": -1.6349362134933472, + "loss": 2.0059, + "nll_loss": 0.5014705061912537, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.056716716149822e-05, + "rewards/margins": 0.16345307230949402, + "rewards/rejected": -0.16349363327026367, + "step": 4499 + }, + { + "epoch": 3.112033195020747, + "grad_norm": 9.384265899658203, + "learning_rate": 3.8266482249884736e-05, + "log_odds_chosen": 8.335820198059082, + "log_odds_ratio": -0.0012116122525185347, + "logits/chosen": -0.46154022216796875, + "logits/rejected": -0.5181608200073242, + "logps/chosen": -0.0015864280285313725, + "logps/rejected": -1.568237066268921, + "loss": 1.5026, + "nll_loss": 0.3755166828632355, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015864279703237116, + "rewards/margins": 0.15666505694389343, + "rewards/rejected": -0.15682370960712433, + "step": 4500 + }, + { + "epoch": 3.1127247579529738, + "grad_norm": 12.012398719787598, + "learning_rate": 3.8262640233594595e-05, + "log_odds_chosen": 10.375574111938477, + "log_odds_ratio": -0.004499649163335562, + "logits/chosen": -0.9000028371810913, + "logits/rejected": -0.9860438108444214, + "logps/chosen": -0.002155824564397335, + "logps/rejected": -2.352019786834717, + "loss": 2.0993, + "nll_loss": 0.5243684649467468, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002155824622604996, + "rewards/margins": 0.23498639464378357, + "rewards/rejected": -0.23520199954509735, + "step": 4501 + }, + { + "epoch": 3.1134163208852006, + "grad_norm": 12.486892700195312, + "learning_rate": 3.825879821730444e-05, + "log_odds_chosen": 8.377776145935059, + "log_odds_ratio": -0.0027169152162969112, + "logits/chosen": -0.4547843337059021, + "logits/rejected": -0.5709913372993469, + "logps/chosen": -0.001894856453873217, + "logps/rejected": -1.635668396949768, + "loss": 1.9407, + "nll_loss": 0.4848959445953369, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018948563956655562, + "rewards/margins": 0.16337734460830688, + "rewards/rejected": -0.16356684267520905, + "step": 4502 + }, + { + "epoch": 3.1141078838174274, + "grad_norm": 12.442771911621094, + "learning_rate": 3.825495620101429e-05, + "log_odds_chosen": 8.797966003417969, + "log_odds_ratio": -0.0005118194967508316, + "logits/chosen": -0.7598543167114258, + "logits/rejected": -0.8394927382469177, + "logps/chosen": -0.0020327758975327015, + "logps/rejected": -1.7451748847961426, + "loss": 1.5557, + "nll_loss": 0.388874888420105, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000203277581022121, + "rewards/margins": 0.1743142157793045, + "rewards/rejected": -0.17451749742031097, + "step": 4503 + }, + { + "epoch": 3.1147994467496543, + "grad_norm": 10.25559139251709, + "learning_rate": 3.8251114184724146e-05, + "log_odds_chosen": 9.953062057495117, + "log_odds_ratio": -0.00027120395679958165, + "logits/chosen": -0.7184591889381409, + "logits/rejected": -0.8281209468841553, + "logps/chosen": -0.000444697099737823, + "logps/rejected": -1.6925357580184937, + "loss": 1.6686, + "nll_loss": 0.4171278774738312, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.446971433935687e-05, + "rewards/margins": 0.16920912265777588, + "rewards/rejected": -0.16925358772277832, + "step": 4504 + }, + { + "epoch": 3.115491009681881, + "grad_norm": 17.230388641357422, + "learning_rate": 3.8247272168434e-05, + "log_odds_chosen": 9.531290054321289, + "log_odds_ratio": -0.00029410183196887374, + "logits/chosen": -0.9135196208953857, + "logits/rejected": -1.0209636688232422, + "logps/chosen": -0.0008348989649675786, + "logps/rejected": -1.95163893699646, + "loss": 1.6661, + "nll_loss": 0.4165038764476776, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.34899110486731e-05, + "rewards/margins": 0.19508041441440582, + "rewards/rejected": -0.19516390562057495, + "step": 4505 + }, + { + "epoch": 3.116182572614108, + "grad_norm": 11.494372367858887, + "learning_rate": 3.8243430152143844e-05, + "log_odds_chosen": 7.4096455574035645, + "log_odds_ratio": -0.1444232016801834, + "logits/chosen": -0.5988174676895142, + "logits/rejected": -0.6123107671737671, + "logps/chosen": -0.039420872926712036, + "logps/rejected": -1.9260753393173218, + "loss": 1.6472, + "nll_loss": 0.3973577618598938, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003942087292671204, + "rewards/margins": 0.1886654496192932, + "rewards/rejected": -0.19260753691196442, + "step": 4506 + }, + { + "epoch": 3.1168741355463347, + "grad_norm": 4.810232639312744, + "learning_rate": 3.8239588135853696e-05, + "log_odds_chosen": 7.191157341003418, + "log_odds_ratio": -0.08262602239847183, + "logits/chosen": -0.46612024307250977, + "logits/rejected": -0.5304436683654785, + "logps/chosen": -0.041873492300510406, + "logps/rejected": -1.7846962213516235, + "loss": 1.7705, + "nll_loss": 0.43437063694000244, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004187349695712328, + "rewards/margins": 0.1742822825908661, + "rewards/rejected": -0.17846962809562683, + "step": 4507 + }, + { + "epoch": 3.1175656984785616, + "grad_norm": 7.528070449829102, + "learning_rate": 3.823574611956355e-05, + "log_odds_chosen": 7.74015998840332, + "log_odds_ratio": -0.1015208289027214, + "logits/chosen": -0.9653025269508362, + "logits/rejected": -1.017502784729004, + "logps/chosen": -0.017939701676368713, + "logps/rejected": -1.6744894981384277, + "loss": 1.7263, + "nll_loss": 0.42141109704971313, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0017939701210707426, + "rewards/margins": 0.1656549721956253, + "rewards/rejected": -0.16744893789291382, + "step": 4508 + }, + { + "epoch": 3.1182572614107884, + "grad_norm": 8.28232192993164, + "learning_rate": 3.8231904103273394e-05, + "log_odds_chosen": 9.37982177734375, + "log_odds_ratio": -0.0001628204045118764, + "logits/chosen": -0.7197470664978027, + "logits/rejected": -0.7083673477172852, + "logps/chosen": -0.0006745536811649799, + "logps/rejected": -1.5198719501495361, + "loss": 1.8457, + "nll_loss": 0.46140754222869873, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.745537393726408e-05, + "rewards/margins": 0.15191973745822906, + "rewards/rejected": -0.15198718011379242, + "step": 4509 + }, + { + "epoch": 3.1189488243430152, + "grad_norm": 10.852813720703125, + "learning_rate": 3.8228062086983253e-05, + "log_odds_chosen": 9.736101150512695, + "log_odds_ratio": -0.00016522295481991023, + "logits/chosen": -0.613042414188385, + "logits/rejected": -0.5795801877975464, + "logps/chosen": -0.010511064901947975, + "logps/rejected": -2.089296817779541, + "loss": 1.7505, + "nll_loss": 0.43759751319885254, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010511064901947975, + "rewards/margins": 0.20787858963012695, + "rewards/rejected": -0.20892968773841858, + "step": 4510 + }, + { + "epoch": 3.119640387275242, + "grad_norm": 9.711675643920898, + "learning_rate": 3.82242200706931e-05, + "log_odds_chosen": 10.058385848999023, + "log_odds_ratio": -7.867505337344483e-05, + "logits/chosen": -0.710852324962616, + "logits/rejected": -0.7429115176200867, + "logps/chosen": -0.00019204053387511522, + "logps/rejected": -1.4844186305999756, + "loss": 1.7442, + "nll_loss": 0.43605199456214905, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.920405338751152e-05, + "rewards/margins": 0.14842267334461212, + "rewards/rejected": -0.1484418660402298, + "step": 4511 + }, + { + "epoch": 3.120331950207469, + "grad_norm": 7.414134502410889, + "learning_rate": 3.822037805440295e-05, + "log_odds_chosen": 9.799873352050781, + "log_odds_ratio": -0.00024242886865977198, + "logits/chosen": -0.7370198369026184, + "logits/rejected": -0.7367205023765564, + "logps/chosen": -0.00038399602635763586, + "logps/rejected": -1.648240566253662, + "loss": 1.9762, + "nll_loss": 0.49401822686195374, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.839960481855087e-05, + "rewards/margins": 0.16478565335273743, + "rewards/rejected": -0.16482405364513397, + "step": 4512 + }, + { + "epoch": 3.1210235131396957, + "grad_norm": 8.614060401916504, + "learning_rate": 3.8216536038112804e-05, + "log_odds_chosen": 7.210538387298584, + "log_odds_ratio": -0.010765092447400093, + "logits/chosen": -0.8183335661888123, + "logits/rejected": -0.8469040989875793, + "logps/chosen": -0.013249891810119152, + "logps/rejected": -1.218770146369934, + "loss": 2.2016, + "nll_loss": 0.5493332147598267, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013249891344457865, + "rewards/margins": 0.12055202573537827, + "rewards/rejected": -0.12187701463699341, + "step": 4513 + }, + { + "epoch": 3.1217150760719226, + "grad_norm": 14.239781379699707, + "learning_rate": 3.8212694021822656e-05, + "log_odds_chosen": 8.896451950073242, + "log_odds_ratio": -0.015659527853131294, + "logits/chosen": -0.49167075753211975, + "logits/rejected": -0.549437403678894, + "logps/chosen": -0.005587196443229914, + "logps/rejected": -1.6201717853546143, + "loss": 1.9809, + "nll_loss": 0.49366411566734314, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005587196792475879, + "rewards/margins": 0.16145846247673035, + "rewards/rejected": -0.16201716661453247, + "step": 4514 + }, + { + "epoch": 3.1224066390041494, + "grad_norm": 8.278278350830078, + "learning_rate": 3.82088520055325e-05, + "log_odds_chosen": 8.167107582092285, + "log_odds_ratio": -0.0011531723430380225, + "logits/chosen": -0.5432494282722473, + "logits/rejected": -0.5762361884117126, + "logps/chosen": -0.005155651364475489, + "logps/rejected": -1.4995512962341309, + "loss": 2.03, + "nll_loss": 0.5073837637901306, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005155651015229523, + "rewards/margins": 0.14943957328796387, + "rewards/rejected": -0.1499551236629486, + "step": 4515 + }, + { + "epoch": 3.123098201936376, + "grad_norm": 9.371342658996582, + "learning_rate": 3.8205009989242355e-05, + "log_odds_chosen": 9.502702713012695, + "log_odds_ratio": -0.0005149688804522157, + "logits/chosen": -0.6440606117248535, + "logits/rejected": -0.8232892155647278, + "logps/chosen": -0.004486470948904753, + "logps/rejected": -1.9728906154632568, + "loss": 2.2974, + "nll_loss": 0.5743060111999512, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004486471298150718, + "rewards/margins": 0.19684040546417236, + "rewards/rejected": -0.19728906452655792, + "step": 4516 + }, + { + "epoch": 3.123789764868603, + "grad_norm": 19.65491485595703, + "learning_rate": 3.820116797295221e-05, + "log_odds_chosen": 8.875723838806152, + "log_odds_ratio": -0.009844149462878704, + "logits/chosen": -0.3485298156738281, + "logits/rejected": -0.44744396209716797, + "logps/chosen": -0.034259259700775146, + "logps/rejected": -1.8825244903564453, + "loss": 1.6779, + "nll_loss": 0.4184797406196594, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0034259259700775146, + "rewards/margins": 0.18482650816440582, + "rewards/rejected": -0.18825244903564453, + "step": 4517 + }, + { + "epoch": 3.12448132780083, + "grad_norm": 13.161083221435547, + "learning_rate": 3.819732595666205e-05, + "log_odds_chosen": 8.252479553222656, + "log_odds_ratio": -0.004612181335687637, + "logits/chosen": -0.711460292339325, + "logits/rejected": -0.7563948631286621, + "logps/chosen": -0.006149608641862869, + "logps/rejected": -1.7720332145690918, + "loss": 2.0579, + "nll_loss": 0.514003574848175, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006149609107524157, + "rewards/margins": 0.17658837139606476, + "rewards/rejected": -0.17720332741737366, + "step": 4518 + }, + { + "epoch": 3.1251728907330567, + "grad_norm": 9.831012725830078, + "learning_rate": 3.819348394037191e-05, + "log_odds_chosen": 8.261101722717285, + "log_odds_ratio": -0.1013021394610405, + "logits/chosen": -0.40049105882644653, + "logits/rejected": -0.4902288317680359, + "logps/chosen": -0.016606254503130913, + "logps/rejected": -1.3624508380889893, + "loss": 1.502, + "nll_loss": 0.3653719425201416, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0016606254503130913, + "rewards/margins": 0.1345844566822052, + "rewards/rejected": -0.13624508678913116, + "step": 4519 + }, + { + "epoch": 3.1258644536652835, + "grad_norm": 7.515540599822998, + "learning_rate": 3.818964192408176e-05, + "log_odds_chosen": 8.36376667022705, + "log_odds_ratio": -0.004170221742242575, + "logits/chosen": -0.2855423390865326, + "logits/rejected": -0.3653988838195801, + "logps/chosen": -0.027606811374425888, + "logps/rejected": -2.2836270332336426, + "loss": 1.6224, + "nll_loss": 0.40518805384635925, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0027606813237071037, + "rewards/margins": 0.22560201585292816, + "rewards/rejected": -0.22836267948150635, + "step": 4520 + }, + { + "epoch": 3.1265560165975104, + "grad_norm": 6.657315731048584, + "learning_rate": 3.818579990779161e-05, + "log_odds_chosen": 7.705994606018066, + "log_odds_ratio": -0.022902924567461014, + "logits/chosen": -0.4771485924720764, + "logits/rejected": -0.49368542432785034, + "logps/chosen": -0.007439715787768364, + "logps/rejected": -1.3107768297195435, + "loss": 1.8003, + "nll_loss": 0.4477929472923279, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007439716137014329, + "rewards/margins": 0.13033372163772583, + "rewards/rejected": -0.13107769191265106, + "step": 4521 + }, + { + "epoch": 3.127247579529737, + "grad_norm": 11.238456726074219, + "learning_rate": 3.818195789150146e-05, + "log_odds_chosen": 9.492000579833984, + "log_odds_ratio": -0.001921428251080215, + "logits/chosen": -0.7598745822906494, + "logits/rejected": -0.7640295028686523, + "logps/chosen": -0.013556775636970997, + "logps/rejected": -2.340526819229126, + "loss": 2.0358, + "nll_loss": 0.5087577104568481, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013556774938479066, + "rewards/margins": 0.2326970100402832, + "rewards/rejected": -0.23405268788337708, + "step": 4522 + }, + { + "epoch": 3.127939142461964, + "grad_norm": 10.358023643493652, + "learning_rate": 3.8178115875211315e-05, + "log_odds_chosen": 9.809257507324219, + "log_odds_ratio": -8.809195423964411e-05, + "logits/chosen": -0.3360787034034729, + "logits/rejected": -0.3992619216442108, + "logps/chosen": -0.00785091519355774, + "logps/rejected": -2.415611743927002, + "loss": 1.505, + "nll_loss": 0.3762507736682892, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007850916008464992, + "rewards/margins": 0.24077607691287994, + "rewards/rejected": -0.241561159491539, + "step": 4523 + }, + { + "epoch": 3.128630705394191, + "grad_norm": 7.600725173950195, + "learning_rate": 3.817427385892116e-05, + "log_odds_chosen": 8.749622344970703, + "log_odds_ratio": -0.027113988995552063, + "logits/chosen": -0.5375210642814636, + "logits/rejected": -0.5235632658004761, + "logps/chosen": -0.007372237276285887, + "logps/rejected": -1.1214462518692017, + "loss": 1.6342, + "nll_loss": 0.40584635734558105, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007372237741947174, + "rewards/margins": 0.11140740662813187, + "rewards/rejected": -0.11214463412761688, + "step": 4524 + }, + { + "epoch": 3.1293222683264177, + "grad_norm": 8.687854766845703, + "learning_rate": 3.817043184263101e-05, + "log_odds_chosen": 8.751347541809082, + "log_odds_ratio": -0.001098288339562714, + "logits/chosen": -0.6035765409469604, + "logits/rejected": -0.6965278387069702, + "logps/chosen": -0.1082809567451477, + "logps/rejected": -2.047489881515503, + "loss": 1.6846, + "nll_loss": 0.42103543877601624, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010828095488250256, + "rewards/margins": 0.19392091035842896, + "rewards/rejected": -0.2047489881515503, + "step": 4525 + }, + { + "epoch": 3.1300138312586445, + "grad_norm": 11.338460922241211, + "learning_rate": 3.8166589826340865e-05, + "log_odds_chosen": 9.703506469726562, + "log_odds_ratio": -0.0005722501664422452, + "logits/chosen": -0.792091429233551, + "logits/rejected": -0.8154028654098511, + "logps/chosen": -0.0005980893620289862, + "logps/rejected": -1.6763802766799927, + "loss": 1.62, + "nll_loss": 0.40493831038475037, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.980893911328167e-05, + "rewards/margins": 0.16757820546627045, + "rewards/rejected": -0.16763801872730255, + "step": 4526 + }, + { + "epoch": 3.1307053941908713, + "grad_norm": 11.097390174865723, + "learning_rate": 3.816274781005071e-05, + "log_odds_chosen": 10.066173553466797, + "log_odds_ratio": -0.00018428280600346625, + "logits/chosen": -1.0647556781768799, + "logits/rejected": -1.1590054035186768, + "logps/chosen": -0.000478035188280046, + "logps/rejected": -2.538705825805664, + "loss": 1.7628, + "nll_loss": 0.44069257378578186, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.780351810040884e-05, + "rewards/margins": 0.2538227438926697, + "rewards/rejected": -0.25387057662010193, + "step": 4527 + }, + { + "epoch": 3.131396957123098, + "grad_norm": 9.307280540466309, + "learning_rate": 3.815890579376057e-05, + "log_odds_chosen": 9.103038787841797, + "log_odds_ratio": -0.001420386484824121, + "logits/chosen": -0.7872853875160217, + "logits/rejected": -0.8193323612213135, + "logps/chosen": -0.0031216240022331476, + "logps/rejected": -2.2407970428466797, + "loss": 2.1735, + "nll_loss": 0.5432285070419312, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00031216241768561304, + "rewards/margins": 0.22376754879951477, + "rewards/rejected": -0.2240796983242035, + "step": 4528 + }, + { + "epoch": 3.132088520055325, + "grad_norm": 7.521166801452637, + "learning_rate": 3.8155063777470416e-05, + "log_odds_chosen": 9.6439208984375, + "log_odds_ratio": -0.00010717587429098785, + "logits/chosen": -0.4613168239593506, + "logits/rejected": -0.5104846954345703, + "logps/chosen": -0.0003044250188395381, + "logps/rejected": -1.3650177717208862, + "loss": 1.3345, + "nll_loss": 0.3336198329925537, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.044250115635805e-05, + "rewards/margins": 0.13647134602069855, + "rewards/rejected": -0.13650178909301758, + "step": 4529 + }, + { + "epoch": 3.132780082987552, + "grad_norm": 8.576324462890625, + "learning_rate": 3.815122176118027e-05, + "log_odds_chosen": 9.355891227722168, + "log_odds_ratio": -0.02535889483988285, + "logits/chosen": -0.7813079953193665, + "logits/rejected": -0.7906760573387146, + "logps/chosen": -0.007265019230544567, + "logps/rejected": -1.7882845401763916, + "loss": 1.091, + "nll_loss": 0.27020618319511414, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007265019230544567, + "rewards/margins": 0.17810194194316864, + "rewards/rejected": -0.17882846295833588, + "step": 4530 + }, + { + "epoch": 3.1334716459197787, + "grad_norm": 9.892020225524902, + "learning_rate": 3.814737974489012e-05, + "log_odds_chosen": 10.09764289855957, + "log_odds_ratio": -0.00012242019874975085, + "logits/chosen": -0.5264326333999634, + "logits/rejected": -0.5695961117744446, + "logps/chosen": -0.00032916830969043076, + "logps/rejected": -1.9653332233428955, + "loss": 1.6326, + "nll_loss": 0.4081262946128845, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.29168324242346e-05, + "rewards/margins": 0.19650039076805115, + "rewards/rejected": -0.19653332233428955, + "step": 4531 + }, + { + "epoch": 3.1341632088520055, + "grad_norm": 14.981584548950195, + "learning_rate": 3.814353772859997e-05, + "log_odds_chosen": 10.171943664550781, + "log_odds_ratio": -4.946034459862858e-05, + "logits/chosen": -0.9239128828048706, + "logits/rejected": -0.9973942637443542, + "logps/chosen": -0.0003368504694662988, + "logps/rejected": -1.8945417404174805, + "loss": 2.7217, + "nll_loss": 0.6804137229919434, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.368504621903412e-05, + "rewards/margins": 0.18942049145698547, + "rewards/rejected": -0.18945418298244476, + "step": 4532 + }, + { + "epoch": 3.1348547717842323, + "grad_norm": 10.596940994262695, + "learning_rate": 3.813969571230982e-05, + "log_odds_chosen": 10.182984352111816, + "log_odds_ratio": -9.355310612590984e-05, + "logits/chosen": -0.8709641695022583, + "logits/rejected": -0.9588356018066406, + "logps/chosen": -0.000607682392001152, + "logps/rejected": -2.1859307289123535, + "loss": 1.4606, + "nll_loss": 0.3651290535926819, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.076823774492368e-05, + "rewards/margins": 0.21853229403495789, + "rewards/rejected": -0.2185930758714676, + "step": 4533 + }, + { + "epoch": 3.135546334716459, + "grad_norm": 10.857816696166992, + "learning_rate": 3.813585369601967e-05, + "log_odds_chosen": 7.239170074462891, + "log_odds_ratio": -0.014962945133447647, + "logits/chosen": -0.5337001085281372, + "logits/rejected": -0.5952611565589905, + "logps/chosen": -0.02933443710207939, + "logps/rejected": -1.7761834859848022, + "loss": 1.7248, + "nll_loss": 0.42969420552253723, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0029334432911127806, + "rewards/margins": 0.17468491196632385, + "rewards/rejected": -0.1776183545589447, + "step": 4534 + }, + { + "epoch": 3.136237897648686, + "grad_norm": 4.286896705627441, + "learning_rate": 3.8132011679729524e-05, + "log_odds_chosen": 7.423393249511719, + "log_odds_ratio": -0.02035851590335369, + "logits/chosen": -0.6046768426895142, + "logits/rejected": -0.5994390845298767, + "logps/chosen": -0.034431342035532, + "logps/rejected": -1.7557482719421387, + "loss": 1.6542, + "nll_loss": 0.4115162491798401, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0034431342501193285, + "rewards/margins": 0.17213168740272522, + "rewards/rejected": -0.17557483911514282, + "step": 4535 + }, + { + "epoch": 3.136929460580913, + "grad_norm": 10.18018627166748, + "learning_rate": 3.812816966343937e-05, + "log_odds_chosen": 8.911344528198242, + "log_odds_ratio": -0.00033108796924352646, + "logits/chosen": -0.779600977897644, + "logits/rejected": -0.7951276898384094, + "logps/chosen": -0.0005056941881775856, + "logps/rejected": -1.4306249618530273, + "loss": 2.17, + "nll_loss": 0.5424723625183105, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.056941881775856e-05, + "rewards/margins": 0.1430119276046753, + "rewards/rejected": -0.1430625021457672, + "step": 4536 + }, + { + "epoch": 3.1376210235131397, + "grad_norm": 12.990944862365723, + "learning_rate": 3.812432764714923e-05, + "log_odds_chosen": 10.054588317871094, + "log_odds_ratio": -7.011681009316817e-05, + "logits/chosen": -0.7560017108917236, + "logits/rejected": -0.8109539747238159, + "logps/chosen": -0.0004920088686048985, + "logps/rejected": -1.7367125749588013, + "loss": 1.9389, + "nll_loss": 0.48472166061401367, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9200891226064414e-05, + "rewards/margins": 0.17362205684185028, + "rewards/rejected": -0.17367127537727356, + "step": 4537 + }, + { + "epoch": 3.1383125864453665, + "grad_norm": 6.141697406768799, + "learning_rate": 3.8120485630859074e-05, + "log_odds_chosen": 10.290353775024414, + "log_odds_ratio": -6.922272586962208e-05, + "logits/chosen": -0.2762095630168915, + "logits/rejected": -0.3313080370426178, + "logps/chosen": -0.000490238016936928, + "logps/rejected": -2.147183895111084, + "loss": 2.0987, + "nll_loss": 0.5246639251708984, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.902379441773519e-05, + "rewards/margins": 0.2146693915128708, + "rewards/rejected": -0.21471840143203735, + "step": 4538 + }, + { + "epoch": 3.1390041493775933, + "grad_norm": 10.24838638305664, + "learning_rate": 3.811664361456893e-05, + "log_odds_chosen": 7.072497844696045, + "log_odds_ratio": -0.2984233796596527, + "logits/chosen": -0.2776835262775421, + "logits/rejected": -0.3410561680793762, + "logps/chosen": -0.04763030633330345, + "logps/rejected": -1.6126518249511719, + "loss": 1.9583, + "nll_loss": 0.45972296595573425, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00476303044706583, + "rewards/margins": 0.1565021276473999, + "rewards/rejected": -0.16126517951488495, + "step": 4539 + }, + { + "epoch": 3.13969571230982, + "grad_norm": 16.48712158203125, + "learning_rate": 3.811280159827878e-05, + "log_odds_chosen": 8.227566719055176, + "log_odds_ratio": -0.04556776210665703, + "logits/chosen": -0.9189306497573853, + "logits/rejected": -0.9514791965484619, + "logps/chosen": -0.03799891471862793, + "logps/rejected": -1.8318504095077515, + "loss": 2.0979, + "nll_loss": 0.5199169516563416, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0037998920306563377, + "rewards/margins": 0.17938515543937683, + "rewards/rejected": -0.18318504095077515, + "step": 4540 + }, + { + "epoch": 3.140387275242047, + "grad_norm": 9.07288646697998, + "learning_rate": 3.810895958198863e-05, + "log_odds_chosen": 9.004685401916504, + "log_odds_ratio": -0.06647829711437225, + "logits/chosen": -0.6648062467575073, + "logits/rejected": -0.6944547891616821, + "logps/chosen": -0.012586474418640137, + "logps/rejected": -1.3501288890838623, + "loss": 2.3062, + "nll_loss": 0.5699008107185364, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012586475349962711, + "rewards/margins": 0.13375423848628998, + "rewards/rejected": -0.1350128948688507, + "step": 4541 + }, + { + "epoch": 3.141078838174274, + "grad_norm": 7.928465366363525, + "learning_rate": 3.810511756569848e-05, + "log_odds_chosen": 10.70061206817627, + "log_odds_ratio": -3.524612111505121e-05, + "logits/chosen": -0.664079487323761, + "logits/rejected": -0.7402001619338989, + "logps/chosen": -0.00035893419408239424, + "logps/rejected": -2.4550933837890625, + "loss": 1.3285, + "nll_loss": 0.3321237564086914, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5893419408239424e-05, + "rewards/margins": 0.2454734593629837, + "rewards/rejected": -0.24550935626029968, + "step": 4542 + }, + { + "epoch": 3.1417704011065006, + "grad_norm": 7.757707595825195, + "learning_rate": 3.810127554940833e-05, + "log_odds_chosen": 9.735649108886719, + "log_odds_ratio": -0.0007004727958701551, + "logits/chosen": -0.6714752316474915, + "logits/rejected": -0.6968262195587158, + "logps/chosen": -0.00038532583857886493, + "logps/rejected": -1.7883989810943604, + "loss": 1.6233, + "nll_loss": 0.4057468771934509, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.853258385788649e-05, + "rewards/margins": 0.17880135774612427, + "rewards/rejected": -0.17883989214897156, + "step": 4543 + }, + { + "epoch": 3.1424619640387275, + "grad_norm": 4.612785816192627, + "learning_rate": 3.809743353311818e-05, + "log_odds_chosen": 8.947471618652344, + "log_odds_ratio": -0.0007860027835704386, + "logits/chosen": -0.5513883233070374, + "logits/rejected": -0.5391987562179565, + "logps/chosen": -0.007197847589850426, + "logps/rejected": -1.6272315979003906, + "loss": 1.3186, + "nll_loss": 0.32956576347351074, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007197847589850426, + "rewards/margins": 0.16200336813926697, + "rewards/rejected": -0.16272316873073578, + "step": 4544 + }, + { + "epoch": 3.1431535269709543, + "grad_norm": 10.903369903564453, + "learning_rate": 3.809359151682803e-05, + "log_odds_chosen": 8.987730026245117, + "log_odds_ratio": -0.0010401320178061724, + "logits/chosen": -0.6096397638320923, + "logits/rejected": -0.6015598177909851, + "logps/chosen": -0.001135033555328846, + "logps/rejected": -1.9247807264328003, + "loss": 1.033, + "nll_loss": 0.2581413686275482, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011350335262250155, + "rewards/margins": 0.19236457347869873, + "rewards/rejected": -0.19247806072235107, + "step": 4545 + }, + { + "epoch": 3.143845089903181, + "grad_norm": 12.1816987991333, + "learning_rate": 3.808974950053789e-05, + "log_odds_chosen": 9.709785461425781, + "log_odds_ratio": -0.0002501948911231011, + "logits/chosen": -0.3249852955341339, + "logits/rejected": -0.35515284538269043, + "logps/chosen": -0.0010478305630385876, + "logps/rejected": -1.9457213878631592, + "loss": 1.5735, + "nll_loss": 0.39334553480148315, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010478307376615703, + "rewards/margins": 0.19446735084056854, + "rewards/rejected": -0.19457213580608368, + "step": 4546 + }, + { + "epoch": 3.144536652835408, + "grad_norm": 8.440278053283691, + "learning_rate": 3.808590748424773e-05, + "log_odds_chosen": 10.5477876663208, + "log_odds_ratio": -5.409811274148524e-05, + "logits/chosen": -0.5726054906845093, + "logits/rejected": -0.5959814786911011, + "logps/chosen": -0.0015591013943776488, + "logps/rejected": -2.7095227241516113, + "loss": 1.3806, + "nll_loss": 0.3451417088508606, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015591015107929707, + "rewards/margins": 0.27079635858535767, + "rewards/rejected": -0.2709522545337677, + "step": 4547 + }, + { + "epoch": 3.145228215767635, + "grad_norm": 5.923503398895264, + "learning_rate": 3.8082065467957585e-05, + "log_odds_chosen": 9.889942169189453, + "log_odds_ratio": -0.0002645776839926839, + "logits/chosen": -0.6835029125213623, + "logits/rejected": -0.6722432374954224, + "logps/chosen": -0.0024868096224963665, + "logps/rejected": -2.269965171813965, + "loss": 2.0377, + "nll_loss": 0.509391725063324, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00024868096807040274, + "rewards/margins": 0.22674782574176788, + "rewards/rejected": -0.226996511220932, + "step": 4548 + }, + { + "epoch": 3.1459197786998616, + "grad_norm": 10.639673233032227, + "learning_rate": 3.807822345166744e-05, + "log_odds_chosen": 8.301789283752441, + "log_odds_ratio": -0.021940121427178383, + "logits/chosen": -0.6153140664100647, + "logits/rejected": -0.7271953821182251, + "logps/chosen": -0.013397076167166233, + "logps/rejected": -2.071251392364502, + "loss": 1.7852, + "nll_loss": 0.44411715865135193, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013397075235843658, + "rewards/margins": 0.20578543841838837, + "rewards/rejected": -0.20712514221668243, + "step": 4549 + }, + { + "epoch": 3.1466113416320884, + "grad_norm": 15.749545097351074, + "learning_rate": 3.807438143537729e-05, + "log_odds_chosen": 7.985233306884766, + "log_odds_ratio": -0.12385857850313187, + "logits/chosen": -0.4573472738265991, + "logits/rejected": -0.39598169922828674, + "logps/chosen": -0.02607070654630661, + "logps/rejected": -1.364487886428833, + "loss": 2.2428, + "nll_loss": 0.5483059883117676, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002607070840895176, + "rewards/margins": 0.13384172320365906, + "rewards/rejected": -0.13644880056381226, + "step": 4550 + }, + { + "epoch": 3.1473029045643153, + "grad_norm": 8.415826797485352, + "learning_rate": 3.8070539419087136e-05, + "log_odds_chosen": 8.762563705444336, + "log_odds_ratio": -0.010597055777907372, + "logits/chosen": -0.6360316276550293, + "logits/rejected": -0.7695156335830688, + "logps/chosen": -0.004911277908831835, + "logps/rejected": -1.581146478652954, + "loss": 1.7155, + "nll_loss": 0.427810400724411, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004911277792416513, + "rewards/margins": 0.1576235145330429, + "rewards/rejected": -0.15811465680599213, + "step": 4551 + }, + { + "epoch": 3.147994467496542, + "grad_norm": 12.998376846313477, + "learning_rate": 3.806669740279699e-05, + "log_odds_chosen": 8.921663284301758, + "log_odds_ratio": -0.04195103421807289, + "logits/chosen": -0.6693733334541321, + "logits/rejected": -0.7234500646591187, + "logps/chosen": -0.01005852036178112, + "logps/rejected": -1.833262324333191, + "loss": 1.5829, + "nll_loss": 0.3915401101112366, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010058521293103695, + "rewards/margins": 0.18232038617134094, + "rewards/rejected": -0.18332622945308685, + "step": 4552 + }, + { + "epoch": 3.148686030428769, + "grad_norm": 5.58607816696167, + "learning_rate": 3.806285538650684e-05, + "log_odds_chosen": 8.226150512695312, + "log_odds_ratio": -0.004057474434375763, + "logits/chosen": -0.2192223072052002, + "logits/rejected": -0.24665291607379913, + "logps/chosen": -0.01471928134560585, + "logps/rejected": -1.6626795530319214, + "loss": 1.2105, + "nll_loss": 0.3022083342075348, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014719280879944563, + "rewards/margins": 0.16479603946208954, + "rewards/rejected": -0.16626796126365662, + "step": 4553 + }, + { + "epoch": 3.1493775933609958, + "grad_norm": 8.711701393127441, + "learning_rate": 3.8059013370216686e-05, + "log_odds_chosen": 9.90752124786377, + "log_odds_ratio": -0.0003670519217848778, + "logits/chosen": -0.6568311452865601, + "logits/rejected": -0.7144882678985596, + "logps/chosen": -0.035695165395736694, + "logps/rejected": -2.3609328269958496, + "loss": 1.7224, + "nll_loss": 0.4305512309074402, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003569516586139798, + "rewards/margins": 0.23252378404140472, + "rewards/rejected": -0.23609329760074615, + "step": 4554 + }, + { + "epoch": 3.1500691562932226, + "grad_norm": 6.859455108642578, + "learning_rate": 3.8055171353926546e-05, + "log_odds_chosen": 10.337671279907227, + "log_odds_ratio": -0.00044965840061195195, + "logits/chosen": -0.5700564980506897, + "logits/rejected": -0.6095435619354248, + "logps/chosen": -0.004616261925548315, + "logps/rejected": -1.8322263956069946, + "loss": 2.594, + "nll_loss": 0.6484533548355103, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00046162621583789587, + "rewards/margins": 0.18276099860668182, + "rewards/rejected": -0.18322263658046722, + "step": 4555 + }, + { + "epoch": 3.1507607192254494, + "grad_norm": 14.39889144897461, + "learning_rate": 3.805132933763639e-05, + "log_odds_chosen": 10.14631462097168, + "log_odds_ratio": -0.0001559885567985475, + "logits/chosen": -0.24603904783725739, + "logits/rejected": -0.26028525829315186, + "logps/chosen": -0.0026844381354749203, + "logps/rejected": -2.728746175765991, + "loss": 1.5621, + "nll_loss": 0.3905075490474701, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002684438368305564, + "rewards/margins": 0.27260616421699524, + "rewards/rejected": -0.2728746235370636, + "step": 4556 + }, + { + "epoch": 3.1514522821576763, + "grad_norm": 8.706809043884277, + "learning_rate": 3.8047487321346244e-05, + "log_odds_chosen": 8.413780212402344, + "log_odds_ratio": -0.020199157297611237, + "logits/chosen": -0.43870848417282104, + "logits/rejected": -0.503291666507721, + "logps/chosen": -0.0241429153829813, + "logps/rejected": -1.6240514516830444, + "loss": 2.3396, + "nll_loss": 0.5828862190246582, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024142912589013577, + "rewards/margins": 0.15999086201190948, + "rewards/rejected": -0.16240514814853668, + "step": 4557 + }, + { + "epoch": 3.152143845089903, + "grad_norm": 10.746550559997559, + "learning_rate": 3.8043645305056096e-05, + "log_odds_chosen": 8.803289413452148, + "log_odds_ratio": -0.006239832378923893, + "logits/chosen": -0.8961911201477051, + "logits/rejected": -0.9288344979286194, + "logps/chosen": -0.004250252153724432, + "logps/rejected": -1.7004587650299072, + "loss": 2.3146, + "nll_loss": 0.5780288577079773, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00042502518044784665, + "rewards/margins": 0.16962085664272308, + "rewards/rejected": -0.1700458824634552, + "step": 4558 + }, + { + "epoch": 3.15283540802213, + "grad_norm": 6.900879383087158, + "learning_rate": 3.803980328876595e-05, + "log_odds_chosen": 9.192712783813477, + "log_odds_ratio": -0.0011526880552992225, + "logits/chosen": -0.31786349415779114, + "logits/rejected": -0.3973749876022339, + "logps/chosen": -0.0021980530582368374, + "logps/rejected": -2.287642478942871, + "loss": 1.7842, + "nll_loss": 0.44592586159706116, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00021980531164444983, + "rewards/margins": 0.2285444587469101, + "rewards/rejected": -0.22876425087451935, + "step": 4559 + }, + { + "epoch": 3.1535269709543567, + "grad_norm": 11.182805061340332, + "learning_rate": 3.8035961272475794e-05, + "log_odds_chosen": 9.342815399169922, + "log_odds_ratio": -0.0012750012101605535, + "logits/chosen": -0.6669700145721436, + "logits/rejected": -0.7313302755355835, + "logps/chosen": -0.001783718471415341, + "logps/rejected": -1.8242666721343994, + "loss": 2.2457, + "nll_loss": 0.5613073706626892, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001783718471415341, + "rewards/margins": 0.1822482794523239, + "rewards/rejected": -0.18242666125297546, + "step": 4560 + }, + { + "epoch": 3.1542185338865836, + "grad_norm": 6.648580074310303, + "learning_rate": 3.803211925618565e-05, + "log_odds_chosen": 9.386903762817383, + "log_odds_ratio": -0.020936183631420135, + "logits/chosen": -0.4923725426197052, + "logits/rejected": -0.5274564027786255, + "logps/chosen": -0.005909002851694822, + "logps/rejected": -1.635195016860962, + "loss": 1.4973, + "nll_loss": 0.3722338080406189, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005909003666602075, + "rewards/margins": 0.16292861104011536, + "rewards/rejected": -0.1635195016860962, + "step": 4561 + }, + { + "epoch": 3.1549100968188104, + "grad_norm": 7.211044788360596, + "learning_rate": 3.80282772398955e-05, + "log_odds_chosen": 10.988120079040527, + "log_odds_ratio": -2.3240507289301604e-05, + "logits/chosen": -0.544777512550354, + "logits/rejected": -0.581328809261322, + "logps/chosen": -0.00042092709918506444, + "logps/rejected": -2.3012421131134033, + "loss": 1.4785, + "nll_loss": 0.3696138560771942, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2092706280527636e-05, + "rewards/margins": 0.23008212447166443, + "rewards/rejected": -0.23012422025203705, + "step": 4562 + }, + { + "epoch": 3.1556016597510372, + "grad_norm": 9.094809532165527, + "learning_rate": 3.8024435223605345e-05, + "log_odds_chosen": 9.047502517700195, + "log_odds_ratio": -0.009910568594932556, + "logits/chosen": -0.8417686223983765, + "logits/rejected": -0.9273465275764465, + "logps/chosen": -0.005829110741615295, + "logps/rejected": -1.8627815246582031, + "loss": 1.3684, + "nll_loss": 0.3411003649234772, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005829111323691905, + "rewards/margins": 0.18569524586200714, + "rewards/rejected": -0.18627816438674927, + "step": 4563 + }, + { + "epoch": 3.156293222683264, + "grad_norm": 11.577505111694336, + "learning_rate": 3.8020593207315204e-05, + "log_odds_chosen": 9.485040664672852, + "log_odds_ratio": -0.00032070037559606135, + "logits/chosen": -0.59970623254776, + "logits/rejected": -0.6934555768966675, + "logps/chosen": -0.0005038708914071321, + "logps/rejected": -1.4899804592132568, + "loss": 1.8164, + "nll_loss": 0.4540640711784363, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.038708695792593e-05, + "rewards/margins": 0.14894765615463257, + "rewards/rejected": -0.14899805188179016, + "step": 4564 + }, + { + "epoch": 3.156984785615491, + "grad_norm": 9.965083122253418, + "learning_rate": 3.801675119102505e-05, + "log_odds_chosen": 7.520229339599609, + "log_odds_ratio": -0.08336061239242554, + "logits/chosen": -0.6140985488891602, + "logits/rejected": -0.5827906727790833, + "logps/chosen": -0.05686326324939728, + "logps/rejected": -1.5699549913406372, + "loss": 2.1781, + "nll_loss": 0.53618323802948, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005686326418071985, + "rewards/margins": 0.15130917727947235, + "rewards/rejected": -0.1569955050945282, + "step": 4565 + }, + { + "epoch": 3.1576763485477177, + "grad_norm": 6.392039775848389, + "learning_rate": 3.80129091747349e-05, + "log_odds_chosen": 7.001106262207031, + "log_odds_ratio": -0.07764378935098648, + "logits/chosen": -0.8396638631820679, + "logits/rejected": -0.8487118482589722, + "logps/chosen": -0.0225069560110569, + "logps/rejected": -1.720860242843628, + "loss": 0.8638, + "nll_loss": 0.20819467306137085, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0022506958339363337, + "rewards/margins": 0.16983532905578613, + "rewards/rejected": -0.17208603024482727, + "step": 4566 + }, + { + "epoch": 3.1583679114799446, + "grad_norm": 10.21845531463623, + "learning_rate": 3.8009067158444755e-05, + "log_odds_chosen": 9.769824028015137, + "log_odds_ratio": -0.001093681319616735, + "logits/chosen": -0.9067624807357788, + "logits/rejected": -1.0266830921173096, + "logps/chosen": -0.018000207841396332, + "logps/rejected": -2.444042444229126, + "loss": 2.1081, + "nll_loss": 0.5269204378128052, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018000205745920539, + "rewards/margins": 0.24260422587394714, + "rewards/rejected": -0.24440424144268036, + "step": 4567 + }, + { + "epoch": 3.1590594744121714, + "grad_norm": 9.175846099853516, + "learning_rate": 3.800522514215461e-05, + "log_odds_chosen": 7.827970027923584, + "log_odds_ratio": -0.13693012297153473, + "logits/chosen": -0.70560622215271, + "logits/rejected": -0.7182061076164246, + "logps/chosen": -0.03513843193650246, + "logps/rejected": -1.7183489799499512, + "loss": 1.957, + "nll_loss": 0.4755551517009735, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0035138430539518595, + "rewards/margins": 0.16832104325294495, + "rewards/rejected": -0.17183488607406616, + "step": 4568 + }, + { + "epoch": 3.159751037344398, + "grad_norm": 12.983506202697754, + "learning_rate": 3.800138312586445e-05, + "log_odds_chosen": 10.426617622375488, + "log_odds_ratio": -4.319160507293418e-05, + "logits/chosen": -0.6623454093933105, + "logits/rejected": -0.677485466003418, + "logps/chosen": -0.0005207035574130714, + "logps/rejected": -2.6388802528381348, + "loss": 1.8196, + "nll_loss": 0.45490360260009766, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2070354286115617e-05, + "rewards/margins": 0.26383593678474426, + "rewards/rejected": -0.26388800144195557, + "step": 4569 + }, + { + "epoch": 3.160442600276625, + "grad_norm": 9.496644020080566, + "learning_rate": 3.7997541109574305e-05, + "log_odds_chosen": 10.419952392578125, + "log_odds_ratio": -4.813496343558654e-05, + "logits/chosen": -0.4542783796787262, + "logits/rejected": -0.4638960361480713, + "logps/chosen": -0.0003169108822476119, + "logps/rejected": -2.302415370941162, + "loss": 1.5223, + "nll_loss": 0.3805696666240692, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.169108822476119e-05, + "rewards/margins": 0.2302098423242569, + "rewards/rejected": -0.23024152219295502, + "step": 4570 + }, + { + "epoch": 3.161134163208852, + "grad_norm": 7.444447994232178, + "learning_rate": 3.799369909328416e-05, + "log_odds_chosen": 8.599638938903809, + "log_odds_ratio": -0.007685279473662376, + "logits/chosen": -0.6876360774040222, + "logits/rejected": -0.7898210287094116, + "logps/chosen": -0.009488348849117756, + "logps/rejected": -1.3206067085266113, + "loss": 1.4894, + "nll_loss": 0.3715746998786926, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009488348732702434, + "rewards/margins": 0.13111184537410736, + "rewards/rejected": -0.1320606917142868, + "step": 4571 + }, + { + "epoch": 3.1618257261410787, + "grad_norm": 7.070675373077393, + "learning_rate": 3.7989857076994e-05, + "log_odds_chosen": 7.803130149841309, + "log_odds_ratio": -0.07015282660722733, + "logits/chosen": -0.4623362720012665, + "logits/rejected": -0.44682449102401733, + "logps/chosen": -0.033260829746723175, + "logps/rejected": -1.460196852684021, + "loss": 2.0823, + "nll_loss": 0.5135682225227356, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003326083067804575, + "rewards/margins": 0.14269360899925232, + "rewards/rejected": -0.14601969718933105, + "step": 4572 + }, + { + "epoch": 3.1625172890733055, + "grad_norm": 13.822750091552734, + "learning_rate": 3.798601506070386e-05, + "log_odds_chosen": 6.678684234619141, + "log_odds_ratio": -0.26905539631843567, + "logits/chosen": -0.6584138870239258, + "logits/rejected": -0.7105019092559814, + "logps/chosen": -0.052003588527441025, + "logps/rejected": -1.2119102478027344, + "loss": 2.0026, + "nll_loss": 0.4737449884414673, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005200359039008617, + "rewards/margins": 0.11599066853523254, + "rewards/rejected": -0.12119103968143463, + "step": 4573 + }, + { + "epoch": 3.1632088520055324, + "grad_norm": 8.823368072509766, + "learning_rate": 3.798217304441371e-05, + "log_odds_chosen": 7.944136142730713, + "log_odds_ratio": -0.015342186205089092, + "logits/chosen": -0.36007851362228394, + "logits/rejected": -0.37841683626174927, + "logps/chosen": -0.035427793860435486, + "logps/rejected": -1.5605015754699707, + "loss": 1.9745, + "nll_loss": 0.49209773540496826, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0035427792463451624, + "rewards/margins": 0.15250737965106964, + "rewards/rejected": -0.1560501605272293, + "step": 4574 + }, + { + "epoch": 3.163900414937759, + "grad_norm": 6.48723840713501, + "learning_rate": 3.797833102812356e-05, + "log_odds_chosen": 9.472557067871094, + "log_odds_ratio": -0.0004954534815624356, + "logits/chosen": -0.7768731117248535, + "logits/rejected": -0.7985833287239075, + "logps/chosen": -0.04230440780520439, + "logps/rejected": -2.2706217765808105, + "loss": 2.1138, + "nll_loss": 0.5283978581428528, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0042304410599172115, + "rewards/margins": 0.22283174097537994, + "rewards/rejected": -0.2270621806383133, + "step": 4575 + }, + { + "epoch": 3.164591977869986, + "grad_norm": 12.95726490020752, + "learning_rate": 3.797448901183341e-05, + "log_odds_chosen": 10.314776420593262, + "log_odds_ratio": -7.154385093599558e-05, + "logits/chosen": -0.6321621537208557, + "logits/rejected": -0.7744560837745667, + "logps/chosen": -0.0019077484030276537, + "logps/rejected": -2.5642080307006836, + "loss": 2.4073, + "nll_loss": 0.6018108129501343, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019077486649621278, + "rewards/margins": 0.25623005628585815, + "rewards/rejected": -0.2564208209514618, + "step": 4576 + }, + { + "epoch": 3.165283540802213, + "grad_norm": 6.923228740692139, + "learning_rate": 3.7970646995543266e-05, + "log_odds_chosen": 10.274030685424805, + "log_odds_ratio": -0.00012094212434021756, + "logits/chosen": -0.4876629114151001, + "logits/rejected": -0.4081573486328125, + "logps/chosen": -0.01138163823634386, + "logps/rejected": -2.783386468887329, + "loss": 1.3585, + "nll_loss": 0.3396143317222595, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011381638469174504, + "rewards/margins": 0.27720052003860474, + "rewards/rejected": -0.2783386707305908, + "step": 4577 + }, + { + "epoch": 3.1659751037344397, + "grad_norm": 6.5532450675964355, + "learning_rate": 3.796680497925311e-05, + "log_odds_chosen": 8.911333084106445, + "log_odds_ratio": -0.005457364488393068, + "logits/chosen": -0.43831944465637207, + "logits/rejected": -0.43045973777770996, + "logps/chosen": -0.02725759893655777, + "logps/rejected": -1.86832594871521, + "loss": 1.2491, + "nll_loss": 0.3117315173149109, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002725760219618678, + "rewards/margins": 0.18410682678222656, + "rewards/rejected": -0.18683260679244995, + "step": 4578 + }, + { + "epoch": 3.1666666666666665, + "grad_norm": 4.984968662261963, + "learning_rate": 3.7962962962962964e-05, + "log_odds_chosen": 8.321807861328125, + "log_odds_ratio": -0.0017942956183105707, + "logits/chosen": -0.6507587432861328, + "logits/rejected": -0.7082344889640808, + "logps/chosen": -0.019144365563988686, + "logps/rejected": -1.0949219465255737, + "loss": 1.0557, + "nll_loss": 0.26374536752700806, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001914436463266611, + "rewards/margins": 0.10757777094841003, + "rewards/rejected": -0.10949219763278961, + "step": 4579 + }, + { + "epoch": 3.1673582295988933, + "grad_norm": 9.328558921813965, + "learning_rate": 3.7959120946672816e-05, + "log_odds_chosen": 8.825399398803711, + "log_odds_ratio": -0.0010717068798840046, + "logits/chosen": -0.4912228584289551, + "logits/rejected": -0.6055634617805481, + "logps/chosen": -0.0007479118648916483, + "logps/rejected": -1.322448492050171, + "loss": 1.5699, + "nll_loss": 0.39237523078918457, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.47911908547394e-05, + "rewards/margins": 0.13217005133628845, + "rewards/rejected": -0.13224485516548157, + "step": 4580 + }, + { + "epoch": 3.16804979253112, + "grad_norm": 6.22381067276001, + "learning_rate": 3.795527893038266e-05, + "log_odds_chosen": 10.746294021606445, + "log_odds_ratio": -0.00043368813931010664, + "logits/chosen": -0.6561489701271057, + "logits/rejected": -0.6594254970550537, + "logps/chosen": -0.0003508516529109329, + "logps/rejected": -2.45505428314209, + "loss": 1.1298, + "nll_loss": 0.28241264820098877, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.508516601868905e-05, + "rewards/margins": 0.24547035992145538, + "rewards/rejected": -0.2455054521560669, + "step": 4581 + }, + { + "epoch": 3.168741355463347, + "grad_norm": 18.001495361328125, + "learning_rate": 3.795143691409252e-05, + "log_odds_chosen": 9.882756233215332, + "log_odds_ratio": -9.903610043693334e-05, + "logits/chosen": -0.8339722156524658, + "logits/rejected": -0.8579087257385254, + "logps/chosen": -0.008268280886113644, + "logps/rejected": -2.6852235794067383, + "loss": 2.2308, + "nll_loss": 0.5577019453048706, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008268280653283, + "rewards/margins": 0.2676955461502075, + "rewards/rejected": -0.26852235198020935, + "step": 4582 + }, + { + "epoch": 3.169432918395574, + "grad_norm": 9.087573051452637, + "learning_rate": 3.794759489780237e-05, + "log_odds_chosen": 6.8832855224609375, + "log_odds_ratio": -0.0470358170568943, + "logits/chosen": -0.7121328711509705, + "logits/rejected": -0.7107651233673096, + "logps/chosen": -0.022450348362326622, + "logps/rejected": -1.4015861749649048, + "loss": 1.4622, + "nll_loss": 0.36085256934165955, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0022450347896665335, + "rewards/margins": 0.13791358470916748, + "rewards/rejected": -0.14015862345695496, + "step": 4583 + }, + { + "epoch": 3.1701244813278007, + "grad_norm": 7.808243274688721, + "learning_rate": 3.794375288151222e-05, + "log_odds_chosen": 9.859292984008789, + "log_odds_ratio": -0.0006968683446757495, + "logits/chosen": -0.9944567084312439, + "logits/rejected": -1.0274537801742554, + "logps/chosen": -0.004056369420140982, + "logps/rejected": -2.3057827949523926, + "loss": 1.354, + "nll_loss": 0.33843401074409485, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00040563696529716253, + "rewards/margins": 0.23017263412475586, + "rewards/rejected": -0.23057827353477478, + "step": 4584 + }, + { + "epoch": 3.1708160442600275, + "grad_norm": 12.067472457885742, + "learning_rate": 3.793991086522207e-05, + "log_odds_chosen": 10.771336555480957, + "log_odds_ratio": -7.997899956535548e-05, + "logits/chosen": -0.7247877717018127, + "logits/rejected": -0.7443068027496338, + "logps/chosen": -0.011935079470276833, + "logps/rejected": -2.6619701385498047, + "loss": 2.0333, + "nll_loss": 0.5083144307136536, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001193507923744619, + "rewards/margins": 0.2650035321712494, + "rewards/rejected": -0.2661970257759094, + "step": 4585 + }, + { + "epoch": 3.1715076071922543, + "grad_norm": 8.026774406433105, + "learning_rate": 3.7936068848931924e-05, + "log_odds_chosen": 9.327430725097656, + "log_odds_ratio": -0.0010386076755821705, + "logits/chosen": -0.7047549486160278, + "logits/rejected": -0.7773745656013489, + "logps/chosen": -0.012416575103998184, + "logps/rejected": -2.148869752883911, + "loss": 1.9018, + "nll_loss": 0.4753361642360687, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012416575336828828, + "rewards/margins": 0.213645339012146, + "rewards/rejected": -0.21488699316978455, + "step": 4586 + }, + { + "epoch": 3.172199170124481, + "grad_norm": 9.1338472366333, + "learning_rate": 3.793222683264177e-05, + "log_odds_chosen": 9.807607650756836, + "log_odds_ratio": -0.00013375926937442273, + "logits/chosen": -0.5474683046340942, + "logits/rejected": -0.6458426117897034, + "logps/chosen": -0.00019876201986335218, + "logps/rejected": -1.5424573421478271, + "loss": 2.3267, + "nll_loss": 0.5816740393638611, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.987620271393098e-05, + "rewards/margins": 0.15422585606575012, + "rewards/rejected": -0.15424573421478271, + "step": 4587 + }, + { + "epoch": 3.172890733056708, + "grad_norm": 10.483997344970703, + "learning_rate": 3.792838481635162e-05, + "log_odds_chosen": 9.483867645263672, + "log_odds_ratio": -0.0002670464455150068, + "logits/chosen": -0.7305455207824707, + "logits/rejected": -0.7739405632019043, + "logps/chosen": -0.00530233234167099, + "logps/rejected": -1.971374273300171, + "loss": 1.3878, + "nll_loss": 0.34692639112472534, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005302332574501634, + "rewards/margins": 0.19660718739032745, + "rewards/rejected": -0.19713741540908813, + "step": 4588 + }, + { + "epoch": 3.173582295988935, + "grad_norm": 11.079160690307617, + "learning_rate": 3.7924542800061475e-05, + "log_odds_chosen": 7.841558456420898, + "log_odds_ratio": -0.4125269949436188, + "logits/chosen": -0.4979378581047058, + "logits/rejected": -0.581967830657959, + "logps/chosen": -0.024180810898542404, + "logps/rejected": -1.787544846534729, + "loss": 2.3551, + "nll_loss": 0.5475137233734131, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002418080810457468, + "rewards/margins": 0.176336407661438, + "rewards/rejected": -0.17875447869300842, + "step": 4589 + }, + { + "epoch": 3.1742738589211617, + "grad_norm": 5.296082496643066, + "learning_rate": 3.792070078377132e-05, + "log_odds_chosen": 9.685503005981445, + "log_odds_ratio": -0.00016764621250331402, + "logits/chosen": -0.4837448000907898, + "logits/rejected": -0.56712806224823, + "logps/chosen": -0.0006031938828527927, + "logps/rejected": -1.852403998374939, + "loss": 3.0507, + "nll_loss": 0.7626625299453735, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.031939119566232e-05, + "rewards/margins": 0.18518008291721344, + "rewards/rejected": -0.18524041771888733, + "step": 4590 + }, + { + "epoch": 3.1749654218533885, + "grad_norm": 7.954952239990234, + "learning_rate": 3.791685876748118e-05, + "log_odds_chosen": 9.898430824279785, + "log_odds_ratio": -0.00040304564754478633, + "logits/chosen": -0.5487443208694458, + "logits/rejected": -0.7202839851379395, + "logps/chosen": -0.00048723159125074744, + "logps/rejected": -1.7513352632522583, + "loss": 1.1091, + "nll_loss": 0.27722257375717163, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8723159125074744e-05, + "rewards/margins": 0.17508479952812195, + "rewards/rejected": -0.17513352632522583, + "step": 4591 + }, + { + "epoch": 3.1756569847856153, + "grad_norm": 10.471096992492676, + "learning_rate": 3.7913016751191025e-05, + "log_odds_chosen": 9.12653923034668, + "log_odds_ratio": -0.001498258556239307, + "logits/chosen": -0.8056678771972656, + "logits/rejected": -0.7711480259895325, + "logps/chosen": -0.009058143012225628, + "logps/rejected": -1.626591682434082, + "loss": 1.6736, + "nll_loss": 0.4182409346103668, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009058142895810306, + "rewards/margins": 0.1617533564567566, + "rewards/rejected": -0.1626591831445694, + "step": 4592 + }, + { + "epoch": 3.176348547717842, + "grad_norm": 7.665005683898926, + "learning_rate": 3.790917473490088e-05, + "log_odds_chosen": 9.794302940368652, + "log_odds_ratio": -9.794899233384058e-05, + "logits/chosen": -0.4575244188308716, + "logits/rejected": -0.5109673738479614, + "logps/chosen": -0.0003009043575730175, + "logps/rejected": -1.6932740211486816, + "loss": 1.7761, + "nll_loss": 0.44400835037231445, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0090435757301748e-05, + "rewards/margins": 0.16929732263088226, + "rewards/rejected": -0.16932742297649384, + "step": 4593 + }, + { + "epoch": 3.177040110650069, + "grad_norm": 12.323850631713867, + "learning_rate": 3.790533271861073e-05, + "log_odds_chosen": 8.642744064331055, + "log_odds_ratio": -0.00040836347034201026, + "logits/chosen": -0.7057383060455322, + "logits/rejected": -0.7906292080879211, + "logps/chosen": -0.0008442990947514772, + "logps/rejected": -1.459619402885437, + "loss": 1.6049, + "nll_loss": 0.40117207169532776, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.442990656476468e-05, + "rewards/margins": 0.14587751030921936, + "rewards/rejected": -0.1459619402885437, + "step": 4594 + }, + { + "epoch": 3.177731673582296, + "grad_norm": 10.345414161682129, + "learning_rate": 3.790149070232058e-05, + "log_odds_chosen": 8.904953002929688, + "log_odds_ratio": -0.0004517165943980217, + "logits/chosen": -0.9660544395446777, + "logits/rejected": -0.9590066075325012, + "logps/chosen": -0.006094220094382763, + "logps/rejected": -1.2050225734710693, + "loss": 2.3718, + "nll_loss": 0.5929133892059326, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006094219861552119, + "rewards/margins": 0.11989283561706543, + "rewards/rejected": -0.12050226330757141, + "step": 4595 + }, + { + "epoch": 3.1784232365145226, + "grad_norm": 10.865817070007324, + "learning_rate": 3.789764868603043e-05, + "log_odds_chosen": 7.841320991516113, + "log_odds_ratio": -0.1730465143918991, + "logits/chosen": -0.350616455078125, + "logits/rejected": -0.4073426425457001, + "logps/chosen": -0.033529266715049744, + "logps/rejected": -0.9049347043037415, + "loss": 1.5156, + "nll_loss": 0.36158961057662964, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0033529268112033606, + "rewards/margins": 0.08714054524898529, + "rewards/rejected": -0.09049347043037415, + "step": 4596 + }, + { + "epoch": 3.1791147994467495, + "grad_norm": 8.323912620544434, + "learning_rate": 3.789380666974028e-05, + "log_odds_chosen": 9.956376075744629, + "log_odds_ratio": -0.0005759909981861711, + "logits/chosen": -0.5365810990333557, + "logits/rejected": -0.617435872554779, + "logps/chosen": -0.0005928762257099152, + "logps/rejected": -2.054478406906128, + "loss": 1.5833, + "nll_loss": 0.3957583010196686, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.928762402618304e-05, + "rewards/margins": 0.20538857579231262, + "rewards/rejected": -0.20544785261154175, + "step": 4597 + }, + { + "epoch": 3.1798063623789763, + "grad_norm": 22.66376304626465, + "learning_rate": 3.788996465345013e-05, + "log_odds_chosen": 7.783935546875, + "log_odds_ratio": -0.05122813954949379, + "logits/chosen": -0.7455435991287231, + "logits/rejected": -0.7452982664108276, + "logps/chosen": -0.21089009940624237, + "logps/rejected": -1.9292497634887695, + "loss": 1.9919, + "nll_loss": 0.492841899394989, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.021089009940624237, + "rewards/margins": 0.1718359738588333, + "rewards/rejected": -0.19292497634887695, + "step": 4598 + }, + { + "epoch": 3.180497925311203, + "grad_norm": 10.29366397857666, + "learning_rate": 3.788612263715998e-05, + "log_odds_chosen": 7.870556354522705, + "log_odds_ratio": -0.030002042651176453, + "logits/chosen": -0.7490079998970032, + "logits/rejected": -0.7733038067817688, + "logps/chosen": -0.008063086308538914, + "logps/rejected": -1.3171665668487549, + "loss": 2.0297, + "nll_loss": 0.504428505897522, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008063087007030845, + "rewards/margins": 0.13091033697128296, + "rewards/rejected": -0.13171665370464325, + "step": 4599 + }, + { + "epoch": 3.18118948824343, + "grad_norm": 8.865200996398926, + "learning_rate": 3.788228062086984e-05, + "log_odds_chosen": 9.468286514282227, + "log_odds_ratio": -0.004047780763357878, + "logits/chosen": -0.5546740889549255, + "logits/rejected": -0.6222197413444519, + "logps/chosen": -0.0008241356699727476, + "logps/rejected": -1.7708520889282227, + "loss": 2.0809, + "nll_loss": 0.5198326110839844, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.241356408689171e-05, + "rewards/margins": 0.17700281739234924, + "rewards/rejected": -0.17708522081375122, + "step": 4600 + }, + { + "epoch": 3.181881051175657, + "grad_norm": 6.551109790802002, + "learning_rate": 3.7878438604579684e-05, + "log_odds_chosen": 8.998653411865234, + "log_odds_ratio": -0.00079822022235021, + "logits/chosen": -0.2313028872013092, + "logits/rejected": -0.26333147287368774, + "logps/chosen": -0.0006675302283838391, + "logps/rejected": -1.1907142400741577, + "loss": 1.2586, + "nll_loss": 0.3145686388015747, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.675302574876696e-05, + "rewards/margins": 0.11900466680526733, + "rewards/rejected": -0.11907142400741577, + "step": 4601 + }, + { + "epoch": 3.1825726141078836, + "grad_norm": 8.488152503967285, + "learning_rate": 3.7874596588289536e-05, + "log_odds_chosen": 8.680622100830078, + "log_odds_ratio": -0.001602665986865759, + "logits/chosen": -0.5562158226966858, + "logits/rejected": -0.596197247505188, + "logps/chosen": -0.0017571898642927408, + "logps/rejected": -1.3305821418762207, + "loss": 2.0244, + "nll_loss": 0.5059409141540527, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017571900389157236, + "rewards/margins": 0.1328824907541275, + "rewards/rejected": -0.13305820524692535, + "step": 4602 + }, + { + "epoch": 3.1832641770401104, + "grad_norm": 8.795045852661133, + "learning_rate": 3.787075457199939e-05, + "log_odds_chosen": 9.601675987243652, + "log_odds_ratio": -0.0003586334642022848, + "logits/chosen": -0.5978565812110901, + "logits/rejected": -0.6616254448890686, + "logps/chosen": -0.0011712521081790328, + "logps/rejected": -1.742924451828003, + "loss": 2.3246, + "nll_loss": 0.5811123251914978, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011712520790752023, + "rewards/margins": 0.17417532205581665, + "rewards/rejected": -0.1742924451828003, + "step": 4603 + }, + { + "epoch": 3.1839557399723377, + "grad_norm": 11.541089057922363, + "learning_rate": 3.786691255570924e-05, + "log_odds_chosen": 9.450611114501953, + "log_odds_ratio": -0.0001150656898971647, + "logits/chosen": -0.6971194744110107, + "logits/rejected": -0.7279193997383118, + "logps/chosen": -0.0005755338934250176, + "logps/rejected": -1.7603645324707031, + "loss": 2.0298, + "nll_loss": 0.5074312686920166, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7553388614906e-05, + "rewards/margins": 0.1759788990020752, + "rewards/rejected": -0.17603644728660583, + "step": 4604 + }, + { + "epoch": 3.1846473029045645, + "grad_norm": 6.5050482749938965, + "learning_rate": 3.7863070539419087e-05, + "log_odds_chosen": 8.913361549377441, + "log_odds_ratio": -0.0013621591497212648, + "logits/chosen": -0.23251497745513916, + "logits/rejected": -0.23843058943748474, + "logps/chosen": -0.034153182059526443, + "logps/rejected": -2.1316874027252197, + "loss": 1.797, + "nll_loss": 0.4491085112094879, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0034153189044445753, + "rewards/margins": 0.2097533941268921, + "rewards/rejected": -0.21316871047019958, + "step": 4605 + }, + { + "epoch": 3.1853388658367914, + "grad_norm": 11.801334381103516, + "learning_rate": 3.785922852312894e-05, + "log_odds_chosen": 7.46860408782959, + "log_odds_ratio": -0.024415817111730576, + "logits/chosen": -0.6700199246406555, + "logits/rejected": -0.7383530735969543, + "logps/chosen": -0.010652041994035244, + "logps/rejected": -1.7683783769607544, + "loss": 2.0786, + "nll_loss": 0.5172020196914673, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010652042692527175, + "rewards/margins": 0.17577266693115234, + "rewards/rejected": -0.17683786153793335, + "step": 4606 + }, + { + "epoch": 3.186030428769018, + "grad_norm": 7.657423496246338, + "learning_rate": 3.785538650683879e-05, + "log_odds_chosen": 8.825857162475586, + "log_odds_ratio": -0.0038504679687321186, + "logits/chosen": -0.6110005378723145, + "logits/rejected": -0.6402556300163269, + "logps/chosen": -0.002229629550129175, + "logps/rejected": -1.0497758388519287, + "loss": 1.3781, + "nll_loss": 0.34415027499198914, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002229629608336836, + "rewards/margins": 0.10475462675094604, + "rewards/rejected": -0.10497759282588959, + "step": 4607 + }, + { + "epoch": 3.186721991701245, + "grad_norm": 4.710501194000244, + "learning_rate": 3.7851544490548644e-05, + "log_odds_chosen": 8.452669143676758, + "log_odds_ratio": -0.0828661322593689, + "logits/chosen": -0.49123167991638184, + "logits/rejected": -0.48999887704849243, + "logps/chosen": -0.034764111042022705, + "logps/rejected": -2.062809944152832, + "loss": 0.9485, + "nll_loss": 0.22884541749954224, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003476410871371627, + "rewards/margins": 0.2028045952320099, + "rewards/rejected": -0.20628100633621216, + "step": 4608 + }, + { + "epoch": 3.187413554633472, + "grad_norm": 9.057634353637695, + "learning_rate": 3.7847702474258496e-05, + "log_odds_chosen": 7.493239402770996, + "log_odds_ratio": -0.0032867516856640577, + "logits/chosen": -0.0850645899772644, + "logits/rejected": -0.13177891075611115, + "logps/chosen": -0.01688811369240284, + "logps/rejected": -1.6319787502288818, + "loss": 1.76, + "nll_loss": 0.43966561555862427, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016888114623725414, + "rewards/margins": 0.16150905191898346, + "rewards/rejected": -0.16319787502288818, + "step": 4609 + }, + { + "epoch": 3.1881051175656987, + "grad_norm": 7.127264499664307, + "learning_rate": 3.784386045796834e-05, + "log_odds_chosen": 9.543813705444336, + "log_odds_ratio": -0.00027629200485534966, + "logits/chosen": -0.680914044380188, + "logits/rejected": -0.7802472710609436, + "logps/chosen": -0.0005112183280289173, + "logps/rejected": -1.5584700107574463, + "loss": 1.5544, + "nll_loss": 0.3885806202888489, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1121834985679016e-05, + "rewards/margins": 0.1557958722114563, + "rewards/rejected": -0.1558469831943512, + "step": 4610 + }, + { + "epoch": 3.1887966804979255, + "grad_norm": 10.605690956115723, + "learning_rate": 3.7840018441678194e-05, + "log_odds_chosen": 10.069377899169922, + "log_odds_ratio": -8.173806418199092e-05, + "logits/chosen": -0.7247356176376343, + "logits/rejected": -0.7631049752235413, + "logps/chosen": -0.0006380442646332085, + "logps/rejected": -2.2513394355773926, + "loss": 1.7809, + "nll_loss": 0.44522354006767273, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.380442209774628e-05, + "rewards/margins": 0.22507014870643616, + "rewards/rejected": -0.22513394057750702, + "step": 4611 + }, + { + "epoch": 3.1894882434301524, + "grad_norm": 13.950614929199219, + "learning_rate": 3.783617642538805e-05, + "log_odds_chosen": 8.581480026245117, + "log_odds_ratio": -0.001877216505818069, + "logits/chosen": -0.7479287385940552, + "logits/rejected": -0.8167770504951477, + "logps/chosen": -0.0037166469264775515, + "logps/rejected": -2.0587105751037598, + "loss": 1.4035, + "nll_loss": 0.3506844639778137, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003716647333931178, + "rewards/margins": 0.20549941062927246, + "rewards/rejected": -0.2058710753917694, + "step": 4612 + }, + { + "epoch": 3.190179806362379, + "grad_norm": 8.323124885559082, + "learning_rate": 3.78323344090979e-05, + "log_odds_chosen": 9.50680160522461, + "log_odds_ratio": -0.00012267159763723612, + "logits/chosen": -0.42881646752357483, + "logits/rejected": -0.4316210150718689, + "logps/chosen": -0.00014182465383782983, + "logps/rejected": -1.0632882118225098, + "loss": 1.4954, + "nll_loss": 0.3738468587398529, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4182465747580864e-05, + "rewards/margins": 0.10631464421749115, + "rewards/rejected": -0.1063288226723671, + "step": 4613 + }, + { + "epoch": 3.190871369294606, + "grad_norm": 7.727511405944824, + "learning_rate": 3.7828492392807745e-05, + "log_odds_chosen": 6.572525501251221, + "log_odds_ratio": -0.040850505232810974, + "logits/chosen": -0.5981727242469788, + "logits/rejected": -0.7035253047943115, + "logps/chosen": -0.012225775048136711, + "logps/rejected": -0.6517828106880188, + "loss": 1.4826, + "nll_loss": 0.3665538430213928, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012225775280967355, + "rewards/margins": 0.06395570933818817, + "rewards/rejected": -0.0651782900094986, + "step": 4614 + }, + { + "epoch": 3.191562932226833, + "grad_norm": 6.5258660316467285, + "learning_rate": 3.7824650376517604e-05, + "log_odds_chosen": 9.075474739074707, + "log_odds_ratio": -0.00019371425150893629, + "logits/chosen": -0.7500657439231873, + "logits/rejected": -0.8560510277748108, + "logps/chosen": -0.009036424569785595, + "logps/rejected": -2.4027295112609863, + "loss": 2.0085, + "nll_loss": 0.5021045207977295, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009036423871293664, + "rewards/margins": 0.23936930298805237, + "rewards/rejected": -0.24027293920516968, + "step": 4615 + }, + { + "epoch": 3.1922544951590597, + "grad_norm": 7.792971611022949, + "learning_rate": 3.782080836022745e-05, + "log_odds_chosen": 9.568330764770508, + "log_odds_ratio": -0.0003454481775406748, + "logits/chosen": -0.705230712890625, + "logits/rejected": -0.7632952332496643, + "logps/chosen": -0.00021943078900221735, + "logps/rejected": -1.222402811050415, + "loss": 1.8451, + "nll_loss": 0.4612407982349396, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1943080355413258e-05, + "rewards/margins": 0.12221834063529968, + "rewards/rejected": -0.12224028259515762, + "step": 4616 + }, + { + "epoch": 3.1929460580912865, + "grad_norm": 8.6287260055542, + "learning_rate": 3.78169663439373e-05, + "log_odds_chosen": 8.9024076461792, + "log_odds_ratio": -0.029390254989266396, + "logits/chosen": -0.546991765499115, + "logits/rejected": -0.5764098167419434, + "logps/chosen": -0.13760261237621307, + "logps/rejected": -2.3649721145629883, + "loss": 1.7657, + "nll_loss": 0.43848732113838196, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013760262168943882, + "rewards/margins": 0.22273695468902588, + "rewards/rejected": -0.23649722337722778, + "step": 4617 + }, + { + "epoch": 3.1936376210235133, + "grad_norm": 8.359898567199707, + "learning_rate": 3.7813124327647155e-05, + "log_odds_chosen": 7.723917484283447, + "log_odds_ratio": -0.30947861075401306, + "logits/chosen": -0.6778655648231506, + "logits/rejected": -0.6863540410995483, + "logps/chosen": -0.04144514724612236, + "logps/rejected": -1.8415319919586182, + "loss": 1.5114, + "nll_loss": 0.3469085991382599, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004144514445215464, + "rewards/margins": 0.1800086796283722, + "rewards/rejected": -0.18415319919586182, + "step": 4618 + }, + { + "epoch": 3.19432918395574, + "grad_norm": 6.584410667419434, + "learning_rate": 3.7809282311357e-05, + "log_odds_chosen": 9.380361557006836, + "log_odds_ratio": -0.00014648567594122142, + "logits/chosen": -0.2656836211681366, + "logits/rejected": -0.38636136054992676, + "logps/chosen": -0.000417731876950711, + "logps/rejected": -1.453555703163147, + "loss": 1.4629, + "nll_loss": 0.3657173216342926, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.177319351583719e-05, + "rewards/margins": 0.1453137993812561, + "rewards/rejected": -0.14535556733608246, + "step": 4619 + }, + { + "epoch": 3.195020746887967, + "grad_norm": 9.390706062316895, + "learning_rate": 3.780544029506685e-05, + "log_odds_chosen": 7.611919403076172, + "log_odds_ratio": -0.09439986199140549, + "logits/chosen": -0.7868224382400513, + "logits/rejected": -0.7948030829429626, + "logps/chosen": -0.02480524592101574, + "logps/rejected": -1.5412051677703857, + "loss": 2.0887, + "nll_loss": 0.5127406120300293, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0024805248249322176, + "rewards/margins": 0.15163999795913696, + "rewards/rejected": -0.1541205197572708, + "step": 4620 + }, + { + "epoch": 3.195712309820194, + "grad_norm": 7.162692070007324, + "learning_rate": 3.7801598278776705e-05, + "log_odds_chosen": 8.350488662719727, + "log_odds_ratio": -0.0006202237564139068, + "logits/chosen": -0.5837564468383789, + "logits/rejected": -0.596234917640686, + "logps/chosen": -0.0023574563674628735, + "logps/rejected": -1.5767695903778076, + "loss": 1.3595, + "nll_loss": 0.33981087803840637, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00023574565420858562, + "rewards/margins": 0.15744122862815857, + "rewards/rejected": -0.15767696499824524, + "step": 4621 + }, + { + "epoch": 3.1964038727524207, + "grad_norm": 9.734930992126465, + "learning_rate": 3.779775626248656e-05, + "log_odds_chosen": 8.387091636657715, + "log_odds_ratio": -0.07301833480596542, + "logits/chosen": -0.7368804216384888, + "logits/rejected": -0.8132337331771851, + "logps/chosen": -0.028411580249667168, + "logps/rejected": -1.6419252157211304, + "loss": 2.2744, + "nll_loss": 0.5613013505935669, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0028411580715328455, + "rewards/margins": 0.16135136783123016, + "rewards/rejected": -0.16419252753257751, + "step": 4622 + }, + { + "epoch": 3.1970954356846475, + "grad_norm": 10.370285987854004, + "learning_rate": 3.7793914246196403e-05, + "log_odds_chosen": 10.116138458251953, + "log_odds_ratio": -6.656107871094719e-05, + "logits/chosen": -0.6933034062385559, + "logits/rejected": -0.7805424928665161, + "logps/chosen": -0.00021460730931721628, + "logps/rejected": -1.8299367427825928, + "loss": 2.3349, + "nll_loss": 0.5837261080741882, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1460733478306793e-05, + "rewards/margins": 0.18297219276428223, + "rewards/rejected": -0.18299366533756256, + "step": 4623 + }, + { + "epoch": 3.1977869986168743, + "grad_norm": 7.127041339874268, + "learning_rate": 3.779007222990626e-05, + "log_odds_chosen": 9.852951049804688, + "log_odds_ratio": -0.00026556866941973567, + "logits/chosen": -0.4125402271747589, + "logits/rejected": -0.44134292006492615, + "logps/chosen": -0.004293524660170078, + "logps/rejected": -2.5027565956115723, + "loss": 1.6023, + "nll_loss": 0.4005424678325653, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004293524834793061, + "rewards/margins": 0.24984632432460785, + "rewards/rejected": -0.2502756714820862, + "step": 4624 + }, + { + "epoch": 3.198478561549101, + "grad_norm": 8.07728099822998, + "learning_rate": 3.778623021361611e-05, + "log_odds_chosen": 8.688798904418945, + "log_odds_ratio": -0.0003031464875675738, + "logits/chosen": -0.6191985011100769, + "logits/rejected": -0.6176548004150391, + "logps/chosen": -0.0016528278356418014, + "logps/rejected": -1.2181322574615479, + "loss": 1.6133, + "nll_loss": 0.40328437089920044, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016528279229532927, + "rewards/margins": 0.12164793908596039, + "rewards/rejected": -0.12181322276592255, + "step": 4625 + }, + { + "epoch": 3.199170124481328, + "grad_norm": 9.304462432861328, + "learning_rate": 3.778238819732596e-05, + "log_odds_chosen": 8.981358528137207, + "log_odds_ratio": -0.0006013654638081789, + "logits/chosen": -0.578431248664856, + "logits/rejected": -0.6131760478019714, + "logps/chosen": -0.008758382871747017, + "logps/rejected": -2.5303421020507812, + "loss": 1.6803, + "nll_loss": 0.42000359296798706, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008758382173255086, + "rewards/margins": 0.25215837359428406, + "rewards/rejected": -0.25303423404693604, + "step": 4626 + }, + { + "epoch": 3.199861687413555, + "grad_norm": 11.000825881958008, + "learning_rate": 3.777854618103581e-05, + "log_odds_chosen": 8.437768936157227, + "log_odds_ratio": -0.0017144496086984873, + "logits/chosen": -0.2553323805332184, + "logits/rejected": -0.2956623435020447, + "logps/chosen": -0.0262621957808733, + "logps/rejected": -1.871435523033142, + "loss": 1.5366, + "nll_loss": 0.38397151231765747, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002626219531521201, + "rewards/margins": 0.18451733887195587, + "rewards/rejected": -0.18714354932308197, + "step": 4627 + }, + { + "epoch": 3.2005532503457816, + "grad_norm": 8.009211540222168, + "learning_rate": 3.777470416474566e-05, + "log_odds_chosen": 8.235366821289062, + "log_odds_ratio": -0.0006904486217536032, + "logits/chosen": -0.5074923038482666, + "logits/rejected": -0.5455666184425354, + "logps/chosen": -0.015887683257460594, + "logps/rejected": -1.4512051343917847, + "loss": 1.4743, + "nll_loss": 0.3685183823108673, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015887684421613812, + "rewards/margins": 0.14353173971176147, + "rewards/rejected": -0.1451205164194107, + "step": 4628 + }, + { + "epoch": 3.2012448132780085, + "grad_norm": 10.77706241607666, + "learning_rate": 3.777086214845551e-05, + "log_odds_chosen": 8.013542175292969, + "log_odds_ratio": -0.001794246258214116, + "logits/chosen": -0.5844020843505859, + "logits/rejected": -0.6350750923156738, + "logps/chosen": -0.008887016214430332, + "logps/rejected": -1.4342986345291138, + "loss": 1.9349, + "nll_loss": 0.4835505485534668, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008887016447260976, + "rewards/margins": 0.14254117012023926, + "rewards/rejected": -0.14342986047267914, + "step": 4629 + }, + { + "epoch": 3.2019363762102353, + "grad_norm": 9.606144905090332, + "learning_rate": 3.7767020132165364e-05, + "log_odds_chosen": 8.354914665222168, + "log_odds_ratio": -0.005320781376212835, + "logits/chosen": -0.3048575818538666, + "logits/rejected": -0.3707965612411499, + "logps/chosen": -0.03981057181954384, + "logps/rejected": -1.787867784500122, + "loss": 1.398, + "nll_loss": 0.3489583134651184, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003981057554483414, + "rewards/margins": 0.17480574548244476, + "rewards/rejected": -0.17878679931163788, + "step": 4630 + }, + { + "epoch": 3.202627939142462, + "grad_norm": 9.291351318359375, + "learning_rate": 3.7763178115875216e-05, + "log_odds_chosen": 10.094281196594238, + "log_odds_ratio": -7.057151378830895e-05, + "logits/chosen": -0.5524444580078125, + "logits/rejected": -0.6190738081932068, + "logps/chosen": -0.00044838193571195006, + "logps/rejected": -2.0691215991973877, + "loss": 2.091, + "nll_loss": 0.5227524042129517, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.483819066081196e-05, + "rewards/margins": 0.20686733722686768, + "rewards/rejected": -0.20691215991973877, + "step": 4631 + }, + { + "epoch": 3.203319502074689, + "grad_norm": 7.177628040313721, + "learning_rate": 3.775933609958506e-05, + "log_odds_chosen": 6.744282245635986, + "log_odds_ratio": -0.07658500224351883, + "logits/chosen": -0.2799968123435974, + "logits/rejected": -0.276175320148468, + "logps/chosen": -0.06268740445375443, + "logps/rejected": -1.8641242980957031, + "loss": 1.4837, + "nll_loss": 0.3632669448852539, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006268740631639957, + "rewards/margins": 0.18014369904994965, + "rewards/rejected": -0.18641243875026703, + "step": 4632 + }, + { + "epoch": 3.204011065006916, + "grad_norm": 10.223982810974121, + "learning_rate": 3.7755494083294914e-05, + "log_odds_chosen": 5.96131706237793, + "log_odds_ratio": -0.437809556722641, + "logits/chosen": -0.4530424475669861, + "logits/rejected": -0.4831022024154663, + "logps/chosen": -0.05227883160114288, + "logps/rejected": -0.825284481048584, + "loss": 2.3553, + "nll_loss": 0.545049250125885, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.005227882880717516, + "rewards/margins": 0.07730056345462799, + "rewards/rejected": -0.08252844959497452, + "step": 4633 + }, + { + "epoch": 3.2047026279391426, + "grad_norm": 7.440293788909912, + "learning_rate": 3.775165206700477e-05, + "log_odds_chosen": 7.756006717681885, + "log_odds_ratio": -0.1270664632320404, + "logits/chosen": -0.7057449221611023, + "logits/rejected": -0.7199209332466125, + "logps/chosen": -0.04400571063160896, + "logps/rejected": -1.755561113357544, + "loss": 1.4132, + "nll_loss": 0.3405888080596924, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004400571342557669, + "rewards/margins": 0.17115554213523865, + "rewards/rejected": -0.17555610835552216, + "step": 4634 + }, + { + "epoch": 3.2053941908713695, + "grad_norm": 121.2735366821289, + "learning_rate": 3.774781005071462e-05, + "log_odds_chosen": 7.710501670837402, + "log_odds_ratio": -0.44360071420669556, + "logits/chosen": -0.39047908782958984, + "logits/rejected": -0.46261516213417053, + "logps/chosen": -0.06811662018299103, + "logps/rejected": -1.5281715393066406, + "loss": 1.5479, + "nll_loss": 0.342612624168396, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0068116625770926476, + "rewards/margins": 0.1460055112838745, + "rewards/rejected": -0.15281715989112854, + "step": 4635 + }, + { + "epoch": 3.2060857538035963, + "grad_norm": 7.653830528259277, + "learning_rate": 3.7743968034424465e-05, + "log_odds_chosen": 9.797605514526367, + "log_odds_ratio": -0.0001808924862416461, + "logits/chosen": -0.7511324286460876, + "logits/rejected": -0.790381908416748, + "logps/chosen": -0.0045016733929514885, + "logps/rejected": -2.013901948928833, + "loss": 1.6972, + "nll_loss": 0.42428848147392273, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004501673683989793, + "rewards/margins": 0.20094002783298492, + "rewards/rejected": -0.20139019191265106, + "step": 4636 + }, + { + "epoch": 3.206777316735823, + "grad_norm": 6.133581161499023, + "learning_rate": 3.774012601813432e-05, + "log_odds_chosen": 7.885909080505371, + "log_odds_ratio": -0.005818231031298637, + "logits/chosen": -0.3401247560977936, + "logits/rejected": -0.3749280273914337, + "logps/chosen": -0.01006716676056385, + "logps/rejected": -1.574007272720337, + "loss": 1.4664, + "nll_loss": 0.3660276234149933, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010067166294902563, + "rewards/margins": 0.15639400482177734, + "rewards/rejected": -0.1574007272720337, + "step": 4637 + }, + { + "epoch": 3.20746887966805, + "grad_norm": 7.458352565765381, + "learning_rate": 3.773628400184417e-05, + "log_odds_chosen": 7.854743003845215, + "log_odds_ratio": -0.07570353895425797, + "logits/chosen": -0.5678203105926514, + "logits/rejected": -0.5638433694839478, + "logps/chosen": -0.0164833664894104, + "logps/rejected": -1.0614879131317139, + "loss": 1.7779, + "nll_loss": 0.43691444396972656, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016483367653563619, + "rewards/margins": 0.10450047254562378, + "rewards/rejected": -0.10614880919456482, + "step": 4638 + }, + { + "epoch": 3.2081604426002768, + "grad_norm": 6.762449264526367, + "learning_rate": 3.7732441985554015e-05, + "log_odds_chosen": 9.169034957885742, + "log_odds_ratio": -0.00012380752013996243, + "logits/chosen": -0.2560634911060333, + "logits/rejected": -0.2874855101108551, + "logps/chosen": -0.00047212644130922854, + "logps/rejected": -1.5481019020080566, + "loss": 1.2975, + "nll_loss": 0.32436853647232056, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.721264849649742e-05, + "rewards/margins": 0.15476298332214355, + "rewards/rejected": -0.15481020510196686, + "step": 4639 + }, + { + "epoch": 3.2088520055325036, + "grad_norm": 6.330355644226074, + "learning_rate": 3.7728599969263875e-05, + "log_odds_chosen": 9.285317420959473, + "log_odds_ratio": -0.0011506613809615374, + "logits/chosen": -0.3541565537452698, + "logits/rejected": -0.34927690029144287, + "logps/chosen": -0.006774414796382189, + "logps/rejected": -2.2020339965820312, + "loss": 1.4599, + "nll_loss": 0.3648587465286255, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006774414796382189, + "rewards/margins": 0.21952593326568604, + "rewards/rejected": -0.22020339965820312, + "step": 4640 + }, + { + "epoch": 3.2095435684647304, + "grad_norm": 12.923638343811035, + "learning_rate": 3.772475795297372e-05, + "log_odds_chosen": 9.016387939453125, + "log_odds_ratio": -0.0076195537112653255, + "logits/chosen": -0.5307125449180603, + "logits/rejected": -0.6310874819755554, + "logps/chosen": -0.004158839583396912, + "logps/rejected": -1.8606791496276855, + "loss": 1.7386, + "nll_loss": 0.4339001178741455, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00041588395833969116, + "rewards/margins": 0.18565204739570618, + "rewards/rejected": -0.18606792390346527, + "step": 4641 + }, + { + "epoch": 3.2102351313969573, + "grad_norm": 6.46682071685791, + "learning_rate": 3.772091593668357e-05, + "log_odds_chosen": 8.791299819946289, + "log_odds_ratio": -0.0005798639031127095, + "logits/chosen": -0.37945863604545593, + "logits/rejected": -0.31576138734817505, + "logps/chosen": -0.00813287403434515, + "logps/rejected": -1.8918553590774536, + "loss": 1.5963, + "nll_loss": 0.3990292549133301, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008132873917929828, + "rewards/margins": 0.18837225437164307, + "rewards/rejected": -0.1891855150461197, + "step": 4642 + }, + { + "epoch": 3.210926694329184, + "grad_norm": 10.52108097076416, + "learning_rate": 3.7717073920393425e-05, + "log_odds_chosen": 8.242277145385742, + "log_odds_ratio": -0.009203329682350159, + "logits/chosen": -0.21045435965061188, + "logits/rejected": -0.2219468653202057, + "logps/chosen": -0.015710486099123955, + "logps/rejected": -1.6792577505111694, + "loss": 1.8406, + "nll_loss": 0.45923006534576416, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015710486331954598, + "rewards/margins": 0.1663547158241272, + "rewards/rejected": -0.16792577505111694, + "step": 4643 + }, + { + "epoch": 3.211618257261411, + "grad_norm": 9.322639465332031, + "learning_rate": 3.771323190410328e-05, + "log_odds_chosen": 8.99551010131836, + "log_odds_ratio": -0.0010692543582990766, + "logits/chosen": -0.7445977926254272, + "logits/rejected": -0.706108808517456, + "logps/chosen": -0.0009857756085693836, + "logps/rejected": -1.4609174728393555, + "loss": 1.4885, + "nll_loss": 0.37202388048171997, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.85775695880875e-05, + "rewards/margins": 0.145993173122406, + "rewards/rejected": -0.1460917592048645, + "step": 4644 + }, + { + "epoch": 3.2123098201936378, + "grad_norm": 7.412604808807373, + "learning_rate": 3.770938988781312e-05, + "log_odds_chosen": 8.64638900756836, + "log_odds_ratio": -0.08681802451610565, + "logits/chosen": -0.6938418745994568, + "logits/rejected": -0.6647101044654846, + "logps/chosen": -0.01793830655515194, + "logps/rejected": -1.8186700344085693, + "loss": 0.9929, + "nll_loss": 0.23953798413276672, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0017938308883458376, + "rewards/margins": 0.18007317185401917, + "rewards/rejected": -0.18186700344085693, + "step": 4645 + }, + { + "epoch": 3.2130013831258646, + "grad_norm": 6.7649149894714355, + "learning_rate": 3.7705547871522976e-05, + "log_odds_chosen": 8.722943305969238, + "log_odds_ratio": -0.0013751662336289883, + "logits/chosen": -0.6181378364562988, + "logits/rejected": -0.5605261325836182, + "logps/chosen": -0.017265386879444122, + "logps/rejected": -2.7568469047546387, + "loss": 1.6982, + "nll_loss": 0.4244130849838257, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017265386413782835, + "rewards/margins": 0.27395814657211304, + "rewards/rejected": -0.2756847143173218, + "step": 4646 + }, + { + "epoch": 3.2136929460580914, + "grad_norm": 9.90661907196045, + "learning_rate": 3.770170585523283e-05, + "log_odds_chosen": 8.376338005065918, + "log_odds_ratio": -0.0018616068409755826, + "logits/chosen": -0.4833725094795227, + "logits/rejected": -0.5298304557800293, + "logps/chosen": -0.016209768131375313, + "logps/rejected": -1.9687637090682983, + "loss": 1.4897, + "nll_loss": 0.3722422420978546, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016209769528359175, + "rewards/margins": 0.19525539875030518, + "rewards/rejected": -0.1968763917684555, + "step": 4647 + }, + { + "epoch": 3.2143845089903182, + "grad_norm": 9.517404556274414, + "learning_rate": 3.7697863838942674e-05, + "log_odds_chosen": 9.411402702331543, + "log_odds_ratio": -0.00038138747913762927, + "logits/chosen": -0.8192919492721558, + "logits/rejected": -0.7597867846488953, + "logps/chosen": -0.02547089383006096, + "logps/rejected": -2.002095937728882, + "loss": 1.7269, + "nll_loss": 0.431691974401474, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0025470894761383533, + "rewards/margins": 0.19766250252723694, + "rewards/rejected": -0.2002095878124237, + "step": 4648 + }, + { + "epoch": 3.215076071922545, + "grad_norm": 4.255143165588379, + "learning_rate": 3.769402182265253e-05, + "log_odds_chosen": 8.469841003417969, + "log_odds_ratio": -0.0006997321615926921, + "logits/chosen": -0.44030725955963135, + "logits/rejected": -0.4799768924713135, + "logps/chosen": -0.01644117198884487, + "logps/rejected": -1.5012691020965576, + "loss": 1.9054, + "nll_loss": 0.4762773811817169, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001644117059186101, + "rewards/margins": 0.1484827995300293, + "rewards/rejected": -0.15012691915035248, + "step": 4649 + }, + { + "epoch": 3.215767634854772, + "grad_norm": 11.852595329284668, + "learning_rate": 3.769017980636238e-05, + "log_odds_chosen": 8.309383392333984, + "log_odds_ratio": -0.04895387962460518, + "logits/chosen": -0.5808581113815308, + "logits/rejected": -0.6516112089157104, + "logps/chosen": -0.010794704779982567, + "logps/rejected": -1.5447827577590942, + "loss": 1.8383, + "nll_loss": 0.4546731412410736, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010794706176966429, + "rewards/margins": 0.1533988118171692, + "rewards/rejected": -0.1544782817363739, + "step": 4650 + }, + { + "epoch": 3.2164591977869987, + "grad_norm": 16.106447219848633, + "learning_rate": 3.768633779007223e-05, + "log_odds_chosen": 9.898414611816406, + "log_odds_ratio": -8.67611524881795e-05, + "logits/chosen": -0.503020703792572, + "logits/rejected": -0.518354058265686, + "logps/chosen": -0.000286134920315817, + "logps/rejected": -1.6814404726028442, + "loss": 1.6288, + "nll_loss": 0.40719372034072876, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8613490940188058e-05, + "rewards/margins": 0.16811543703079224, + "rewards/rejected": -0.16814404726028442, + "step": 4651 + }, + { + "epoch": 3.2171507607192256, + "grad_norm": 13.375833511352539, + "learning_rate": 3.7682495773782084e-05, + "log_odds_chosen": 8.774674415588379, + "log_odds_ratio": -0.0004774326807819307, + "logits/chosen": -0.5813500881195068, + "logits/rejected": -0.640762448310852, + "logps/chosen": -0.016906345263123512, + "logps/rejected": -2.210822105407715, + "loss": 2.4823, + "nll_loss": 0.6205355525016785, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016906345263123512, + "rewards/margins": 0.2193915843963623, + "rewards/rejected": -0.22108224034309387, + "step": 4652 + }, + { + "epoch": 3.2178423236514524, + "grad_norm": 12.245010375976562, + "learning_rate": 3.7678653757491936e-05, + "log_odds_chosen": 7.981349945068359, + "log_odds_ratio": -0.042398203164339066, + "logits/chosen": -0.6212899684906006, + "logits/rejected": -0.6401181221008301, + "logps/chosen": -0.0034417440183460712, + "logps/rejected": -1.4640731811523438, + "loss": 1.0523, + "nll_loss": 0.2588362991809845, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00034417444840073586, + "rewards/margins": 0.14606314897537231, + "rewards/rejected": -0.1464073210954666, + "step": 4653 + }, + { + "epoch": 3.2185338865836792, + "grad_norm": 9.426918983459473, + "learning_rate": 3.767481174120178e-05, + "log_odds_chosen": 9.180564880371094, + "log_odds_ratio": -0.004941493272781372, + "logits/chosen": -0.8051036596298218, + "logits/rejected": -0.8713425993919373, + "logps/chosen": -0.006668214686214924, + "logps/rejected": -2.0180726051330566, + "loss": 2.0154, + "nll_loss": 0.5033589601516724, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006668214919045568, + "rewards/margins": 0.20114043354988098, + "rewards/rejected": -0.20180726051330566, + "step": 4654 + }, + { + "epoch": 3.219225449515906, + "grad_norm": 10.532155990600586, + "learning_rate": 3.7670969724911634e-05, + "log_odds_chosen": 8.6746826171875, + "log_odds_ratio": -0.0008860914967954159, + "logits/chosen": -1.1926677227020264, + "logits/rejected": -1.298346996307373, + "logps/chosen": -0.009572312235832214, + "logps/rejected": -1.7488539218902588, + "loss": 1.5501, + "nll_loss": 0.38743335008621216, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009572312119416893, + "rewards/margins": 0.1739281713962555, + "rewards/rejected": -0.17488539218902588, + "step": 4655 + }, + { + "epoch": 3.219917012448133, + "grad_norm": 7.555779457092285, + "learning_rate": 3.7667127708621487e-05, + "log_odds_chosen": 8.387954711914062, + "log_odds_ratio": -0.0016320085851475596, + "logits/chosen": -0.6773730516433716, + "logits/rejected": -0.6358497738838196, + "logps/chosen": -0.0013973293825984001, + "logps/rejected": -1.4357331991195679, + "loss": 1.094, + "nll_loss": 0.2733432948589325, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013973294699098915, + "rewards/margins": 0.1434335708618164, + "rewards/rejected": -0.1435733139514923, + "step": 4656 + }, + { + "epoch": 3.2206085753803597, + "grad_norm": 9.341814041137695, + "learning_rate": 3.766328569233133e-05, + "log_odds_chosen": 7.2777605056762695, + "log_odds_ratio": -0.05912681296467781, + "logits/chosen": -0.7827059626579285, + "logits/rejected": -0.7716456651687622, + "logps/chosen": -0.02723333239555359, + "logps/rejected": -1.409245252609253, + "loss": 2.0224, + "nll_loss": 0.4996985197067261, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002723333425819874, + "rewards/margins": 0.13820120692253113, + "rewards/rejected": -0.14092452824115753, + "step": 4657 + }, + { + "epoch": 3.2213001383125865, + "grad_norm": 9.240910530090332, + "learning_rate": 3.765944367604119e-05, + "log_odds_chosen": 8.91317367553711, + "log_odds_ratio": -0.00021192299027461559, + "logits/chosen": -0.8595993518829346, + "logits/rejected": -0.8856823444366455, + "logps/chosen": -0.00036952694063074887, + "logps/rejected": -0.9628509283065796, + "loss": 2.6645, + "nll_loss": 0.6660939455032349, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.695269697345793e-05, + "rewards/margins": 0.09624814242124557, + "rewards/rejected": -0.09628509730100632, + "step": 4658 + }, + { + "epoch": 3.2219917012448134, + "grad_norm": 9.384906768798828, + "learning_rate": 3.765560165975104e-05, + "log_odds_chosen": 9.198957443237305, + "log_odds_ratio": -0.0007888744585216045, + "logits/chosen": -0.6961106061935425, + "logits/rejected": -0.809903621673584, + "logps/chosen": -0.002150989603251219, + "logps/rejected": -2.140223503112793, + "loss": 2.2783, + "nll_loss": 0.569500744342804, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002150989748770371, + "rewards/margins": 0.21380724012851715, + "rewards/rejected": -0.21402233839035034, + "step": 4659 + }, + { + "epoch": 3.22268326417704, + "grad_norm": 5.048548221588135, + "learning_rate": 3.765175964346089e-05, + "log_odds_chosen": 8.297301292419434, + "log_odds_ratio": -0.002085586078464985, + "logits/chosen": -0.19204308092594147, + "logits/rejected": -0.17156429588794708, + "logps/chosen": -0.0024641165509819984, + "logps/rejected": -1.0932461023330688, + "loss": 1.2116, + "nll_loss": 0.3026840090751648, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00024641165509819984, + "rewards/margins": 0.10907819867134094, + "rewards/rejected": -0.10932460427284241, + "step": 4660 + }, + { + "epoch": 3.223374827109267, + "grad_norm": 8.581104278564453, + "learning_rate": 3.764791762717074e-05, + "log_odds_chosen": 8.011213302612305, + "log_odds_ratio": -0.015043283812701702, + "logits/chosen": -0.7729774713516235, + "logits/rejected": -0.7363721132278442, + "logps/chosen": -0.004782415926456451, + "logps/rejected": -0.9363967180252075, + "loss": 1.5382, + "nll_loss": 0.3830445110797882, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004782416217494756, + "rewards/margins": 0.0931614339351654, + "rewards/rejected": -0.09363967180252075, + "step": 4661 + }, + { + "epoch": 3.224066390041494, + "grad_norm": 9.607839584350586, + "learning_rate": 3.7644075610880594e-05, + "log_odds_chosen": 9.721281051635742, + "log_odds_ratio": -0.00016697979299351573, + "logits/chosen": -0.5198001861572266, + "logits/rejected": -0.6026105284690857, + "logps/chosen": -0.0010092060547322035, + "logps/rejected": -1.845146894454956, + "loss": 1.4448, + "nll_loss": 0.3611833155155182, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010092060983879492, + "rewards/margins": 0.18441376090049744, + "rewards/rejected": -0.18451470136642456, + "step": 4662 + }, + { + "epoch": 3.2247579529737207, + "grad_norm": 9.854679107666016, + "learning_rate": 3.764023359459044e-05, + "log_odds_chosen": 8.785548210144043, + "log_odds_ratio": -0.0028546079993247986, + "logits/chosen": -0.47076669335365295, + "logits/rejected": -0.5453388094902039, + "logps/chosen": -0.002330200746655464, + "logps/rejected": -1.5070253610610962, + "loss": 1.6038, + "nll_loss": 0.40066125988960266, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00023302006593439728, + "rewards/margins": 0.15046951174736023, + "rewards/rejected": -0.15070253610610962, + "step": 4663 + }, + { + "epoch": 3.2254495159059475, + "grad_norm": 13.590742111206055, + "learning_rate": 3.763639157830029e-05, + "log_odds_chosen": 7.963131904602051, + "log_odds_ratio": -0.0036431909538805485, + "logits/chosen": -0.0865345448255539, + "logits/rejected": -0.1931847631931305, + "logps/chosen": -0.021115001291036606, + "logps/rejected": -1.798100471496582, + "loss": 1.7865, + "nll_loss": 0.4462681710720062, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021115001291036606, + "rewards/margins": 0.17769855260849, + "rewards/rejected": -0.1798100471496582, + "step": 4664 + }, + { + "epoch": 3.2261410788381744, + "grad_norm": 5.58372163772583, + "learning_rate": 3.7632549562010145e-05, + "log_odds_chosen": 9.053085327148438, + "log_odds_ratio": -0.0012438575504347682, + "logits/chosen": -0.5867530107498169, + "logits/rejected": -0.7836533784866333, + "logps/chosen": -0.01469984371215105, + "logps/rejected": -1.4102587699890137, + "loss": 1.4402, + "nll_loss": 0.3599216938018799, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014699844177812338, + "rewards/margins": 0.1395559012889862, + "rewards/rejected": -0.14102588593959808, + "step": 4665 + }, + { + "epoch": 3.226832641770401, + "grad_norm": 12.05872631072998, + "learning_rate": 3.762870754571999e-05, + "log_odds_chosen": 7.115346431732178, + "log_odds_ratio": -0.04925874248147011, + "logits/chosen": -0.7009913921356201, + "logits/rejected": -0.6906958222389221, + "logps/chosen": -0.032538898289203644, + "logps/rejected": -1.2821375131607056, + "loss": 2.1572, + "nll_loss": 0.5343620777130127, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003253889735788107, + "rewards/margins": 0.12495986372232437, + "rewards/rejected": -0.12821374833583832, + "step": 4666 + }, + { + "epoch": 3.227524204702628, + "grad_norm": 10.255760192871094, + "learning_rate": 3.762486552942985e-05, + "log_odds_chosen": 9.634766578674316, + "log_odds_ratio": -0.007561844773590565, + "logits/chosen": -0.41329842805862427, + "logits/rejected": -0.42982420325279236, + "logps/chosen": -0.0032992465421557426, + "logps/rejected": -2.2600021362304688, + "loss": 1.6797, + "nll_loss": 0.4191625416278839, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00032992466003634036, + "rewards/margins": 0.22567029297351837, + "rewards/rejected": -0.22600021958351135, + "step": 4667 + }, + { + "epoch": 3.228215767634855, + "grad_norm": 8.777718544006348, + "learning_rate": 3.7621023513139696e-05, + "log_odds_chosen": 8.005030632019043, + "log_odds_ratio": -0.001454401994124055, + "logits/chosen": -0.6022816300392151, + "logits/rejected": -0.6292502880096436, + "logps/chosen": -0.029774591326713562, + "logps/rejected": -2.4813647270202637, + "loss": 1.5148, + "nll_loss": 0.37855100631713867, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0029774592258036137, + "rewards/margins": 0.24515900015830994, + "rewards/rejected": -0.2481364607810974, + "step": 4668 + }, + { + "epoch": 3.2289073305670817, + "grad_norm": 6.01991605758667, + "learning_rate": 3.761718149684955e-05, + "log_odds_chosen": 7.310305595397949, + "log_odds_ratio": -0.23577818274497986, + "logits/chosen": -0.6367329955101013, + "logits/rejected": -0.6445616483688354, + "logps/chosen": -0.03664974868297577, + "logps/rejected": -1.2936198711395264, + "loss": 1.756, + "nll_loss": 0.4154262840747833, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0036649745889008045, + "rewards/margins": 0.12569700181484222, + "rewards/rejected": -0.12936197221279144, + "step": 4669 + }, + { + "epoch": 3.2295988934993085, + "grad_norm": 11.206335067749023, + "learning_rate": 3.76133394805594e-05, + "log_odds_chosen": 8.69202709197998, + "log_odds_ratio": -0.003309912048280239, + "logits/chosen": -0.06795523315668106, + "logits/rejected": -0.1211337149143219, + "logps/chosen": -0.0026320209726691246, + "logps/rejected": -1.8271875381469727, + "loss": 1.6936, + "nll_loss": 0.42306768894195557, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00026320209144614637, + "rewards/margins": 0.18245553970336914, + "rewards/rejected": -0.18271872401237488, + "step": 4670 + }, + { + "epoch": 3.2302904564315353, + "grad_norm": 8.706618309020996, + "learning_rate": 3.760949746426925e-05, + "log_odds_chosen": 8.893049240112305, + "log_odds_ratio": -0.0005472367629408836, + "logits/chosen": -0.8280361890792847, + "logits/rejected": -0.839066207408905, + "logps/chosen": -0.019564703106880188, + "logps/rejected": -2.181659698486328, + "loss": 2.0402, + "nll_loss": 0.510004997253418, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001956470310688019, + "rewards/margins": 0.2162095010280609, + "rewards/rejected": -0.21816599369049072, + "step": 4671 + }, + { + "epoch": 3.230982019363762, + "grad_norm": 7.39252233505249, + "learning_rate": 3.76056554479791e-05, + "log_odds_chosen": 9.779670715332031, + "log_odds_ratio": -0.0002500510308891535, + "logits/chosen": -0.37103089690208435, + "logits/rejected": -0.3734338879585266, + "logps/chosen": -0.004838225431740284, + "logps/rejected": -1.9937759637832642, + "loss": 1.261, + "nll_loss": 0.31522834300994873, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004838225431740284, + "rewards/margins": 0.19889378547668457, + "rewards/rejected": -0.1993776112794876, + "step": 4672 + }, + { + "epoch": 3.231673582295989, + "grad_norm": 7.205052852630615, + "learning_rate": 3.760181343168895e-05, + "log_odds_chosen": 8.463676452636719, + "log_odds_ratio": -0.0005333481822162867, + "logits/chosen": -0.3758777678012848, + "logits/rejected": -0.38880136609077454, + "logps/chosen": -0.0009052710374817252, + "logps/rejected": -1.089587688446045, + "loss": 1.5308, + "nll_loss": 0.3826429843902588, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.052710811374709e-05, + "rewards/margins": 0.10886824131011963, + "rewards/rejected": -0.10895876586437225, + "step": 4673 + }, + { + "epoch": 3.232365145228216, + "grad_norm": 7.246854305267334, + "learning_rate": 3.7597971415398803e-05, + "log_odds_chosen": 8.98906135559082, + "log_odds_ratio": -0.00046163221122696996, + "logits/chosen": -0.6586583256721497, + "logits/rejected": -0.6791250705718994, + "logps/chosen": -0.0069456384517252445, + "logps/rejected": -2.028202772140503, + "loss": 1.3642, + "nll_loss": 0.3409973084926605, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006945638451725245, + "rewards/margins": 0.20212571322917938, + "rewards/rejected": -0.20282027125358582, + "step": 4674 + }, + { + "epoch": 3.2330567081604427, + "grad_norm": 14.615860939025879, + "learning_rate": 3.759412939910865e-05, + "log_odds_chosen": 9.896066665649414, + "log_odds_ratio": -5.868840526090935e-05, + "logits/chosen": -0.7222040891647339, + "logits/rejected": -0.8251558542251587, + "logps/chosen": -0.00048558454727753997, + "logps/rejected": -1.741550326347351, + "loss": 3.3383, + "nll_loss": 0.8345783352851868, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8558453272562474e-05, + "rewards/margins": 0.17410646378993988, + "rewards/rejected": -0.17415504157543182, + "step": 4675 + }, + { + "epoch": 3.2337482710926695, + "grad_norm": 5.720992088317871, + "learning_rate": 3.759028738281851e-05, + "log_odds_chosen": 9.12053394317627, + "log_odds_ratio": -0.01311265118420124, + "logits/chosen": -0.35370558500289917, + "logits/rejected": -0.44249600172042847, + "logps/chosen": -0.01543546374887228, + "logps/rejected": -2.2226688861846924, + "loss": 1.205, + "nll_loss": 0.2999301552772522, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015435463283210993, + "rewards/margins": 0.22072333097457886, + "rewards/rejected": -0.22226688265800476, + "step": 4676 + }, + { + "epoch": 3.2344398340248963, + "grad_norm": 8.618167877197266, + "learning_rate": 3.7586445366528354e-05, + "log_odds_chosen": 9.140830993652344, + "log_odds_ratio": -0.00018256741168443114, + "logits/chosen": -0.7402679920196533, + "logits/rejected": -0.766697347164154, + "logps/chosen": -0.009820668958127499, + "logps/rejected": -1.8408517837524414, + "loss": 2.3509, + "nll_loss": 0.587706446647644, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000982066965661943, + "rewards/margins": 0.18310311436653137, + "rewards/rejected": -0.1840851902961731, + "step": 4677 + }, + { + "epoch": 3.235131396957123, + "grad_norm": 10.047858238220215, + "learning_rate": 3.7582603350238206e-05, + "log_odds_chosen": 9.806346893310547, + "log_odds_ratio": -7.717790140304714e-05, + "logits/chosen": -0.7226029634475708, + "logits/rejected": -0.7571486234664917, + "logps/chosen": -0.0005480307736434042, + "logps/rejected": -1.879559874534607, + "loss": 1.7701, + "nll_loss": 0.4425126910209656, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.480307663674466e-05, + "rewards/margins": 0.18790119886398315, + "rewards/rejected": -0.18795599043369293, + "step": 4678 + }, + { + "epoch": 3.23582295988935, + "grad_norm": 6.668112277984619, + "learning_rate": 3.757876133394806e-05, + "log_odds_chosen": 7.855587005615234, + "log_odds_ratio": -0.0021889405325055122, + "logits/chosen": -0.7883478403091431, + "logits/rejected": -0.8201271891593933, + "logps/chosen": -0.0025607063435018063, + "logps/rejected": -1.2306946516036987, + "loss": 1.9619, + "nll_loss": 0.49024853110313416, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002560706343501806, + "rewards/margins": 0.1228134036064148, + "rewards/rejected": -0.12306946516036987, + "step": 4679 + }, + { + "epoch": 3.236514522821577, + "grad_norm": 12.489912986755371, + "learning_rate": 3.757491931765791e-05, + "log_odds_chosen": 7.957180023193359, + "log_odds_ratio": -0.06421130150556564, + "logits/chosen": -0.35384392738342285, + "logits/rejected": -0.4528539180755615, + "logps/chosen": -0.012504791840910912, + "logps/rejected": -1.523071050643921, + "loss": 1.6993, + "nll_loss": 0.4184127449989319, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012504790211096406, + "rewards/margins": 0.15105663239955902, + "rewards/rejected": -0.15230710804462433, + "step": 4680 + }, + { + "epoch": 3.2372060857538036, + "grad_norm": 5.42405366897583, + "learning_rate": 3.757107730136776e-05, + "log_odds_chosen": 7.128323554992676, + "log_odds_ratio": -0.10372748225927353, + "logits/chosen": -0.41413596272468567, + "logits/rejected": -0.3195302486419678, + "logps/chosen": -0.03096316009759903, + "logps/rejected": -1.648804783821106, + "loss": 1.1236, + "nll_loss": 0.2705293595790863, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0030963162425905466, + "rewards/margins": 0.16178417205810547, + "rewards/rejected": -0.16488048434257507, + "step": 4681 + }, + { + "epoch": 3.2378976486860305, + "grad_norm": 14.078836441040039, + "learning_rate": 3.756723528507761e-05, + "log_odds_chosen": 8.796689987182617, + "log_odds_ratio": -0.0006093117990531027, + "logits/chosen": -0.5976904034614563, + "logits/rejected": -0.6900879144668579, + "logps/chosen": -0.0045931520871818066, + "logps/rejected": -1.6781189441680908, + "loss": 1.8045, + "nll_loss": 0.45107364654541016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00045931522618047893, + "rewards/margins": 0.1673526018857956, + "rewards/rejected": -0.16781191527843475, + "step": 4682 + }, + { + "epoch": 3.2385892116182573, + "grad_norm": 14.267437934875488, + "learning_rate": 3.756339326878746e-05, + "log_odds_chosen": 9.33151626586914, + "log_odds_ratio": -0.008812183514237404, + "logits/chosen": -0.42314353585243225, + "logits/rejected": -0.6270802021026611, + "logps/chosen": -0.008810743689537048, + "logps/rejected": -2.262091875076294, + "loss": 2.0369, + "nll_loss": 0.5083341002464294, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008810744038783014, + "rewards/margins": 0.22532811760902405, + "rewards/rejected": -0.22620920836925507, + "step": 4683 + }, + { + "epoch": 3.239280774550484, + "grad_norm": 6.2066779136657715, + "learning_rate": 3.755955125249731e-05, + "log_odds_chosen": 9.290958404541016, + "log_odds_ratio": -0.0004162929253652692, + "logits/chosen": -0.5276498794555664, + "logits/rejected": -0.6197217702865601, + "logps/chosen": -0.002184888580814004, + "logps/rejected": -1.5688152313232422, + "loss": 1.398, + "nll_loss": 0.34945833683013916, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00021848888718523085, + "rewards/margins": 0.15666301548480988, + "rewards/rejected": -0.15688151121139526, + "step": 4684 + }, + { + "epoch": 3.239972337482711, + "grad_norm": 14.069849014282227, + "learning_rate": 3.755570923620717e-05, + "log_odds_chosen": 7.24415397644043, + "log_odds_ratio": -0.2386513352394104, + "logits/chosen": -0.6899997591972351, + "logits/rejected": -0.7094993591308594, + "logps/chosen": -0.034362297505140305, + "logps/rejected": -1.2999882698059082, + "loss": 1.9539, + "nll_loss": 0.4645982086658478, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0034362301230430603, + "rewards/margins": 0.12656259536743164, + "rewards/rejected": -0.1299988329410553, + "step": 4685 + }, + { + "epoch": 3.240663900414938, + "grad_norm": 9.512862205505371, + "learning_rate": 3.755186721991701e-05, + "log_odds_chosen": 7.003537178039551, + "log_odds_ratio": -0.06596534699201584, + "logits/chosen": -0.7419699430465698, + "logits/rejected": -0.7511963248252869, + "logps/chosen": -0.024283548817038536, + "logps/rejected": -1.1892660856246948, + "loss": 2.5285, + "nll_loss": 0.6255288124084473, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024283546954393387, + "rewards/margins": 0.11649825423955917, + "rewards/rejected": -0.11892661452293396, + "step": 4686 + }, + { + "epoch": 3.2413554633471646, + "grad_norm": 11.929668426513672, + "learning_rate": 3.7548025203626865e-05, + "log_odds_chosen": 9.594942092895508, + "log_odds_ratio": -0.00027311124722473323, + "logits/chosen": -0.5834130048751831, + "logits/rejected": -0.660904049873352, + "logps/chosen": -0.007404697127640247, + "logps/rejected": -2.3006372451782227, + "loss": 1.9043, + "nll_loss": 0.47605520486831665, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007404697826132178, + "rewards/margins": 0.22932323813438416, + "rewards/rejected": -0.23006370663642883, + "step": 4687 + }, + { + "epoch": 3.2420470262793915, + "grad_norm": 7.606420516967773, + "learning_rate": 3.754418318733672e-05, + "log_odds_chosen": 8.018352508544922, + "log_odds_ratio": -0.033401452004909515, + "logits/chosen": -0.433298796415329, + "logits/rejected": -0.4908546209335327, + "logps/chosen": -0.019605904817581177, + "logps/rejected": -1.8726167678833008, + "loss": 1.1228, + "nll_loss": 0.2773599624633789, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019605904817581177, + "rewards/margins": 0.18530109524726868, + "rewards/rejected": -0.1872616857290268, + "step": 4688 + }, + { + "epoch": 3.2427385892116183, + "grad_norm": 7.604212760925293, + "learning_rate": 3.754034117104657e-05, + "log_odds_chosen": 7.7977447509765625, + "log_odds_ratio": -0.02325718104839325, + "logits/chosen": -0.2695230543613434, + "logits/rejected": -0.33405792713165283, + "logps/chosen": -0.009040276519954205, + "logps/rejected": -1.8517611026763916, + "loss": 1.3656, + "nll_loss": 0.3390858471393585, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009040277218446136, + "rewards/margins": 0.18427208065986633, + "rewards/rejected": -0.18517610430717468, + "step": 4689 + }, + { + "epoch": 3.243430152143845, + "grad_norm": 7.930447578430176, + "learning_rate": 3.7536499154756415e-05, + "log_odds_chosen": 8.696924209594727, + "log_odds_ratio": -0.0006968708476051688, + "logits/chosen": -0.5834269523620605, + "logits/rejected": -0.6710518598556519, + "logps/chosen": -0.010049977339804173, + "logps/rejected": -1.799387812614441, + "loss": 1.7001, + "nll_loss": 0.4249535799026489, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010049976408481598, + "rewards/margins": 0.1789337694644928, + "rewards/rejected": -0.17993877828121185, + "step": 4690 + }, + { + "epoch": 3.244121715076072, + "grad_norm": 10.962292671203613, + "learning_rate": 3.753265713846627e-05, + "log_odds_chosen": 9.12042236328125, + "log_odds_ratio": -0.0011845820117741823, + "logits/chosen": -0.7881402969360352, + "logits/rejected": -0.8471523523330688, + "logps/chosen": -0.001060610287822783, + "logps/rejected": -1.4474725723266602, + "loss": 1.4429, + "nll_loss": 0.36061328649520874, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010606103023746982, + "rewards/margins": 0.1446411907672882, + "rewards/rejected": -0.1447472721338272, + "step": 4691 + }, + { + "epoch": 3.2448132780082988, + "grad_norm": 10.399835586547852, + "learning_rate": 3.752881512217612e-05, + "log_odds_chosen": 8.767321586608887, + "log_odds_ratio": -0.0005905175348743796, + "logits/chosen": -0.6651334762573242, + "logits/rejected": -0.6290321350097656, + "logps/chosen": -0.0007113451138138771, + "logps/rejected": -1.4993962049484253, + "loss": 1.7853, + "nll_loss": 0.44627705216407776, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.113451283657923e-05, + "rewards/margins": 0.14986848831176758, + "rewards/rejected": -0.1499396115541458, + "step": 4692 + }, + { + "epoch": 3.2455048409405256, + "grad_norm": 8.132790565490723, + "learning_rate": 3.7524973105885966e-05, + "log_odds_chosen": 8.834731101989746, + "log_odds_ratio": -0.0014325689990073442, + "logits/chosen": -0.7984225749969482, + "logits/rejected": -0.8978487253189087, + "logps/chosen": -0.034150779247283936, + "logps/rejected": -1.837937831878662, + "loss": 1.5841, + "nll_loss": 0.39587467908859253, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0034150779247283936, + "rewards/margins": 0.180378720164299, + "rewards/rejected": -0.1837937980890274, + "step": 4693 + }, + { + "epoch": 3.2461964038727524, + "grad_norm": 11.265973091125488, + "learning_rate": 3.7521131089595825e-05, + "log_odds_chosen": 7.646029472351074, + "log_odds_ratio": -0.02673262730240822, + "logits/chosen": -0.5934436321258545, + "logits/rejected": -0.5885952711105347, + "logps/chosen": -0.008165750652551651, + "logps/rejected": -1.3897361755371094, + "loss": 1.3938, + "nll_loss": 0.3457859456539154, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008165750768966973, + "rewards/margins": 0.13815705478191376, + "rewards/rejected": -0.13897360861301422, + "step": 4694 + }, + { + "epoch": 3.2468879668049793, + "grad_norm": 6.324791431427002, + "learning_rate": 3.751728907330567e-05, + "log_odds_chosen": 6.769179344177246, + "log_odds_ratio": -0.11484857648611069, + "logits/chosen": -0.6039650440216064, + "logits/rejected": -0.5800857543945312, + "logps/chosen": -0.028059110045433044, + "logps/rejected": -1.295393466949463, + "loss": 1.1339, + "nll_loss": 0.271982342004776, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0028059110045433044, + "rewards/margins": 0.1267334371805191, + "rewards/rejected": -0.1295393407344818, + "step": 4695 + }, + { + "epoch": 3.247579529737206, + "grad_norm": 20.774612426757812, + "learning_rate": 3.751344705701552e-05, + "log_odds_chosen": 9.281335830688477, + "log_odds_ratio": -0.0002557536936365068, + "logits/chosen": -0.4461503028869629, + "logits/rejected": -0.5599817037582397, + "logps/chosen": -0.0008539292612113059, + "logps/rejected": -1.8166067600250244, + "loss": 2.0747, + "nll_loss": 0.5186419486999512, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.539292321074754e-05, + "rewards/margins": 0.18157526850700378, + "rewards/rejected": -0.18166068196296692, + "step": 4696 + }, + { + "epoch": 3.248271092669433, + "grad_norm": 10.650715827941895, + "learning_rate": 3.7509605040725376e-05, + "log_odds_chosen": 10.62962532043457, + "log_odds_ratio": -5.8132434787694365e-05, + "logits/chosen": -0.6330527663230896, + "logits/rejected": -0.7648962140083313, + "logps/chosen": -0.00021957623539492488, + "logps/rejected": -2.031571865081787, + "loss": 1.2311, + "nll_loss": 0.3077716529369354, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.195762499468401e-05, + "rewards/margins": 0.20313522219657898, + "rewards/rejected": -0.2031571865081787, + "step": 4697 + }, + { + "epoch": 3.2489626556016598, + "grad_norm": 9.276825904846191, + "learning_rate": 3.750576302443523e-05, + "log_odds_chosen": 9.023599624633789, + "log_odds_ratio": -0.00047183758579194546, + "logits/chosen": -0.6361697912216187, + "logits/rejected": -0.6914676427841187, + "logps/chosen": -0.007565617561340332, + "logps/rejected": -1.8860620260238647, + "loss": 1.7635, + "nll_loss": 0.44083526730537415, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007565617561340332, + "rewards/margins": 0.18784965574741364, + "rewards/rejected": -0.18860623240470886, + "step": 4698 + }, + { + "epoch": 3.2496542185338866, + "grad_norm": 11.17447280883789, + "learning_rate": 3.7501921008145074e-05, + "log_odds_chosen": 9.626748085021973, + "log_odds_ratio": -0.00036149457446299493, + "logits/chosen": -0.8828724026679993, + "logits/rejected": -0.9713633060455322, + "logps/chosen": -0.0006358891841955483, + "logps/rejected": -2.0440754890441895, + "loss": 2.0904, + "nll_loss": 0.5225660800933838, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.358891550917178e-05, + "rewards/margins": 0.2043439894914627, + "rewards/rejected": -0.20440757274627686, + "step": 4699 + }, + { + "epoch": 3.2503457814661134, + "grad_norm": 12.056692123413086, + "learning_rate": 3.7498078991854926e-05, + "log_odds_chosen": 8.626978874206543, + "log_odds_ratio": -0.00200482876971364, + "logits/chosen": -0.8026310801506042, + "logits/rejected": -0.8775153756141663, + "logps/chosen": -0.0016090385615825653, + "logps/rejected": -1.2534581422805786, + "loss": 1.9658, + "nll_loss": 0.49123844504356384, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016090385906863958, + "rewards/margins": 0.12518492341041565, + "rewards/rejected": -0.12534582614898682, + "step": 4700 + }, + { + "epoch": 3.2510373443983402, + "grad_norm": 6.449684143066406, + "learning_rate": 3.749423697556478e-05, + "log_odds_chosen": 8.831612586975098, + "log_odds_ratio": -0.0013644417049363256, + "logits/chosen": -0.6984375715255737, + "logits/rejected": -0.7231895923614502, + "logps/chosen": -0.001228701206855476, + "logps/rejected": -1.282002568244934, + "loss": 1.0217, + "nll_loss": 0.2552833557128906, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012287012941669673, + "rewards/margins": 0.12807738780975342, + "rewards/rejected": -0.1282002478837967, + "step": 4701 + }, + { + "epoch": 3.251728907330567, + "grad_norm": 9.102448463439941, + "learning_rate": 3.7490394959274624e-05, + "log_odds_chosen": 8.337583541870117, + "log_odds_ratio": -0.038717515766620636, + "logits/chosen": -0.4444388747215271, + "logits/rejected": -0.47403082251548767, + "logps/chosen": -0.010870045982301235, + "logps/rejected": -1.121246337890625, + "loss": 1.3042, + "nll_loss": 0.3221665024757385, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010870046680793166, + "rewards/margins": 0.11103762686252594, + "rewards/rejected": -0.11212463676929474, + "step": 4702 + }, + { + "epoch": 3.252420470262794, + "grad_norm": 9.043835639953613, + "learning_rate": 3.7486552942984484e-05, + "log_odds_chosen": 8.033955574035645, + "log_odds_ratio": -0.004091629758477211, + "logits/chosen": -0.4190574288368225, + "logits/rejected": -0.4952911138534546, + "logps/chosen": -0.012978767044842243, + "logps/rejected": -1.4955980777740479, + "loss": 1.331, + "nll_loss": 0.3323467969894409, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012978767044842243, + "rewards/margins": 0.1482619345188141, + "rewards/rejected": -0.14955979585647583, + "step": 4703 + }, + { + "epoch": 3.2531120331950207, + "grad_norm": 9.657242774963379, + "learning_rate": 3.748271092669433e-05, + "log_odds_chosen": 8.201099395751953, + "log_odds_ratio": -0.040349896997213364, + "logits/chosen": -0.22873516380786896, + "logits/rejected": -0.3231565058231354, + "logps/chosen": -0.022254683077335358, + "logps/rejected": -1.6321990489959717, + "loss": 1.5839, + "nll_loss": 0.3919522762298584, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0022254684008657932, + "rewards/margins": 0.16099445521831512, + "rewards/rejected": -0.16321992874145508, + "step": 4704 + }, + { + "epoch": 3.2538035961272476, + "grad_norm": 9.094130516052246, + "learning_rate": 3.747886891040418e-05, + "log_odds_chosen": 9.257597923278809, + "log_odds_ratio": -0.0005117820110172033, + "logits/chosen": -0.713983416557312, + "logits/rejected": -0.7741215229034424, + "logps/chosen": -0.0029167216271162033, + "logps/rejected": -1.9832541942596436, + "loss": 1.2118, + "nll_loss": 0.3028981387615204, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002916721859946847, + "rewards/margins": 0.19803375005722046, + "rewards/rejected": -0.19832541048526764, + "step": 4705 + }, + { + "epoch": 3.2544951590594744, + "grad_norm": 7.859956741333008, + "learning_rate": 3.7475026894114034e-05, + "log_odds_chosen": 9.877527236938477, + "log_odds_ratio": -9.827398753259331e-05, + "logits/chosen": -0.8897565007209778, + "logits/rejected": -0.9196685552597046, + "logps/chosen": -0.0003954821149818599, + "logps/rejected": -1.7171218395233154, + "loss": 1.2827, + "nll_loss": 0.3206574618816376, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.954820931539871e-05, + "rewards/margins": 0.17167265713214874, + "rewards/rejected": -0.1717122197151184, + "step": 4706 + }, + { + "epoch": 3.2551867219917012, + "grad_norm": 5.342949867248535, + "learning_rate": 3.747118487782389e-05, + "log_odds_chosen": 10.59014892578125, + "log_odds_ratio": -4.1027629777090624e-05, + "logits/chosen": -0.4254288673400879, + "logits/rejected": -0.4047529697418213, + "logps/chosen": -0.00017517567903269082, + "logps/rejected": -1.7674227952957153, + "loss": 1.1699, + "nll_loss": 0.2924777865409851, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7517568267066963e-05, + "rewards/margins": 0.17672476172447205, + "rewards/rejected": -0.176742285490036, + "step": 4707 + }, + { + "epoch": 3.255878284923928, + "grad_norm": 7.126961708068848, + "learning_rate": 3.746734286153373e-05, + "log_odds_chosen": 8.468984603881836, + "log_odds_ratio": -0.0038509315345436335, + "logits/chosen": -0.4620926082134247, + "logits/rejected": -0.42402681708335876, + "logps/chosen": -0.0030720517970621586, + "logps/rejected": -1.2905032634735107, + "loss": 1.0187, + "nll_loss": 0.25430044531822205, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003072051622439176, + "rewards/margins": 0.12874311208724976, + "rewards/rejected": -0.1290503293275833, + "step": 4708 + }, + { + "epoch": 3.256569847856155, + "grad_norm": 7.006777286529541, + "learning_rate": 3.7463500845243585e-05, + "log_odds_chosen": 9.9539213180542, + "log_odds_ratio": -0.0007148530567064881, + "logits/chosen": -0.14592701196670532, + "logits/rejected": -0.2852640151977539, + "logps/chosen": -0.000978219322860241, + "logps/rejected": -2.2065348625183105, + "loss": 1.3983, + "nll_loss": 0.34950917959213257, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.78219322860241e-05, + "rewards/margins": 0.2205556333065033, + "rewards/rejected": -0.2206534594297409, + "step": 4709 + }, + { + "epoch": 3.2572614107883817, + "grad_norm": 7.470785617828369, + "learning_rate": 3.745965882895344e-05, + "log_odds_chosen": 9.500198364257812, + "log_odds_ratio": -0.08528933674097061, + "logits/chosen": -0.06970225274562836, + "logits/rejected": -0.07198717445135117, + "logps/chosen": -0.014830323867499828, + "logps/rejected": -2.3052525520324707, + "loss": 1.4311, + "nll_loss": 0.34924232959747314, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014830323634669185, + "rewards/margins": 0.2290422022342682, + "rewards/rejected": -0.23052525520324707, + "step": 4710 + }, + { + "epoch": 3.2579529737206085, + "grad_norm": 9.470431327819824, + "learning_rate": 3.745581681266328e-05, + "log_odds_chosen": 8.710151672363281, + "log_odds_ratio": -0.006062633823603392, + "logits/chosen": -0.39688247442245483, + "logits/rejected": -0.48148801922798157, + "logps/chosen": -0.006541873794049025, + "logps/rejected": -1.8868510723114014, + "loss": 1.1379, + "nll_loss": 0.2838761806488037, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006541873444803059, + "rewards/margins": 0.1880309134721756, + "rewards/rejected": -0.1886851042509079, + "step": 4711 + }, + { + "epoch": 3.2586445366528354, + "grad_norm": 10.436737060546875, + "learning_rate": 3.745197479637314e-05, + "log_odds_chosen": 8.634658813476562, + "log_odds_ratio": -0.00040024961344897747, + "logits/chosen": -0.3570972681045532, + "logits/rejected": -0.5034259557723999, + "logps/chosen": -0.005407108925282955, + "logps/rejected": -2.238614082336426, + "loss": 2.0126, + "nll_loss": 0.5031040906906128, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005407108692452312, + "rewards/margins": 0.22332070767879486, + "rewards/rejected": -0.223861426115036, + "step": 4712 + }, + { + "epoch": 3.259336099585062, + "grad_norm": 8.839387893676758, + "learning_rate": 3.744813278008299e-05, + "log_odds_chosen": 8.221335411071777, + "log_odds_ratio": -0.037947457283735275, + "logits/chosen": -0.43112167716026306, + "logits/rejected": -0.5641734004020691, + "logps/chosen": -0.028066959232091904, + "logps/rejected": -2.2719764709472656, + "loss": 1.7187, + "nll_loss": 0.42588597536087036, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0028066958766430616, + "rewards/margins": 0.2243909388780594, + "rewards/rejected": -0.22719764709472656, + "step": 4713 + }, + { + "epoch": 3.260027662517289, + "grad_norm": 11.743000984191895, + "learning_rate": 3.744429076379284e-05, + "log_odds_chosen": 8.314166069030762, + "log_odds_ratio": -0.012927965261042118, + "logits/chosen": -0.32482391595840454, + "logits/rejected": -0.29587215185165405, + "logps/chosen": -0.015798617154359818, + "logps/rejected": -1.9533848762512207, + "loss": 1.5372, + "nll_loss": 0.3830021917819977, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001579861855134368, + "rewards/margins": 0.19375863671302795, + "rewards/rejected": -0.19533848762512207, + "step": 4714 + }, + { + "epoch": 3.260719225449516, + "grad_norm": 8.914383888244629, + "learning_rate": 3.744044874750269e-05, + "log_odds_chosen": 8.001260757446289, + "log_odds_ratio": -0.0005701860645785928, + "logits/chosen": -0.16800744831562042, + "logits/rejected": -0.20939353108406067, + "logps/chosen": -0.012287750840187073, + "logps/rejected": -1.8615694046020508, + "loss": 1.8311, + "nll_loss": 0.4577205777168274, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001228775130584836, + "rewards/margins": 0.18492814898490906, + "rewards/rejected": -0.18615692853927612, + "step": 4715 + }, + { + "epoch": 3.2614107883817427, + "grad_norm": 6.675778388977051, + "learning_rate": 3.7436606731212545e-05, + "log_odds_chosen": 10.189067840576172, + "log_odds_ratio": -0.00026544820866547525, + "logits/chosen": -0.6262035369873047, + "logits/rejected": -0.6417590975761414, + "logps/chosen": -0.0010462929494678974, + "logps/rejected": -2.1971559524536133, + "loss": 1.6888, + "nll_loss": 0.42217081785202026, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010462929640198126, + "rewards/margins": 0.21961098909378052, + "rewards/rejected": -0.21971561014652252, + "step": 4716 + }, + { + "epoch": 3.2621023513139695, + "grad_norm": 6.2069573402404785, + "learning_rate": 3.743276471492239e-05, + "log_odds_chosen": 6.975490570068359, + "log_odds_ratio": -0.04731149226427078, + "logits/chosen": -0.4405580163002014, + "logits/rejected": -0.40422195196151733, + "logps/chosen": -0.10175307095050812, + "logps/rejected": -1.5015373229980469, + "loss": 1.356, + "nll_loss": 0.33427995443344116, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010175305418670177, + "rewards/margins": 0.13997843861579895, + "rewards/rejected": -0.1501537412405014, + "step": 4717 + }, + { + "epoch": 3.2627939142461964, + "grad_norm": 10.46078872680664, + "learning_rate": 3.742892269863224e-05, + "log_odds_chosen": 8.430739402770996, + "log_odds_ratio": -0.01749058999121189, + "logits/chosen": -0.4526011645793915, + "logits/rejected": -0.4871124029159546, + "logps/chosen": -0.03805088624358177, + "logps/rejected": -2.7476189136505127, + "loss": 2.6868, + "nll_loss": 0.669959545135498, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0038050890434533358, + "rewards/margins": 0.270956814289093, + "rewards/rejected": -0.2747619152069092, + "step": 4718 + }, + { + "epoch": 3.263485477178423, + "grad_norm": 12.18230152130127, + "learning_rate": 3.7425080682342096e-05, + "log_odds_chosen": 9.675816535949707, + "log_odds_ratio": -0.00019988187705166638, + "logits/chosen": -0.4546333849430084, + "logits/rejected": -0.5552395582199097, + "logps/chosen": -0.0006059492588974535, + "logps/rejected": -1.817103624343872, + "loss": 2.1331, + "nll_loss": 0.5332649946212769, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.059492443455383e-05, + "rewards/margins": 0.18164978921413422, + "rewards/rejected": -0.1817103624343872, + "step": 4719 + }, + { + "epoch": 3.26417704011065, + "grad_norm": 7.717270374298096, + "learning_rate": 3.742123866605194e-05, + "log_odds_chosen": 8.973867416381836, + "log_odds_ratio": -0.0006007368210703135, + "logits/chosen": -0.3857366442680359, + "logits/rejected": -0.43481382727622986, + "logps/chosen": -0.000942138722166419, + "logps/rejected": -1.4649817943572998, + "loss": 1.9242, + "nll_loss": 0.480987548828125, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.421388676855713e-05, + "rewards/margins": 0.146403968334198, + "rewards/rejected": -0.1464981883764267, + "step": 4720 + }, + { + "epoch": 3.264868603042877, + "grad_norm": 29.994112014770508, + "learning_rate": 3.74173966497618e-05, + "log_odds_chosen": 7.361050605773926, + "log_odds_ratio": -0.41764599084854126, + "logits/chosen": -0.2790865898132324, + "logits/rejected": -0.3517614006996155, + "logps/chosen": -0.04281236231327057, + "logps/rejected": -1.6136527061462402, + "loss": 1.7113, + "nll_loss": 0.3860637843608856, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004281235858798027, + "rewards/margins": 0.15708401799201965, + "rewards/rejected": -0.16136527061462402, + "step": 4721 + }, + { + "epoch": 3.2655601659751037, + "grad_norm": 10.153568267822266, + "learning_rate": 3.7413554633471646e-05, + "log_odds_chosen": 9.24374008178711, + "log_odds_ratio": -0.0005770561983808875, + "logits/chosen": -0.6040127277374268, + "logits/rejected": -0.6861763000488281, + "logps/chosen": -0.0012250742875039577, + "logps/rejected": -1.8835153579711914, + "loss": 1.9434, + "nll_loss": 0.48578059673309326, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012250742292962968, + "rewards/margins": 0.1882290244102478, + "rewards/rejected": -0.18835154175758362, + "step": 4722 + }, + { + "epoch": 3.2662517289073305, + "grad_norm": 6.868340492248535, + "learning_rate": 3.74097126171815e-05, + "log_odds_chosen": 8.424043655395508, + "log_odds_ratio": -0.030705248937010765, + "logits/chosen": -0.4594751000404358, + "logits/rejected": -0.5086755752563477, + "logps/chosen": -0.00897553376853466, + "logps/rejected": -1.7296792268753052, + "loss": 2.3846, + "nll_loss": 0.5930869579315186, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008975533419288695, + "rewards/margins": 0.17207039892673492, + "rewards/rejected": -0.17296794056892395, + "step": 4723 + }, + { + "epoch": 3.2669432918395573, + "grad_norm": 12.509700775146484, + "learning_rate": 3.740587060089135e-05, + "log_odds_chosen": 9.228704452514648, + "log_odds_ratio": -0.0015716326888650656, + "logits/chosen": -0.19610503315925598, + "logits/rejected": -0.2479446977376938, + "logps/chosen": -0.001635462511330843, + "logps/rejected": -1.4660680294036865, + "loss": 1.6548, + "nll_loss": 0.4135492444038391, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016354625404346734, + "rewards/margins": 0.14644327759742737, + "rewards/rejected": -0.14660681784152985, + "step": 4724 + }, + { + "epoch": 3.267634854771784, + "grad_norm": 22.524150848388672, + "learning_rate": 3.7402028584601204e-05, + "log_odds_chosen": 8.740550994873047, + "log_odds_ratio": -0.031293027102947235, + "logits/chosen": -0.2052878588438034, + "logits/rejected": -0.2539139986038208, + "logps/chosen": -0.02158481813967228, + "logps/rejected": -2.054436206817627, + "loss": 1.9949, + "nll_loss": 0.49560779333114624, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021584820933640003, + "rewards/margins": 0.20328515768051147, + "rewards/rejected": -0.2054436206817627, + "step": 4725 + }, + { + "epoch": 3.268326417704011, + "grad_norm": 10.84430980682373, + "learning_rate": 3.739818656831105e-05, + "log_odds_chosen": 9.799678802490234, + "log_odds_ratio": -0.0003848494670819491, + "logits/chosen": -0.34542927145957947, + "logits/rejected": -0.43698975443840027, + "logps/chosen": -0.0053330291993916035, + "logps/rejected": -2.442246437072754, + "loss": 1.2302, + "nll_loss": 0.30752086639404297, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005333030130714178, + "rewards/margins": 0.2436913400888443, + "rewards/rejected": -0.24422462284564972, + "step": 4726 + }, + { + "epoch": 3.269017980636238, + "grad_norm": 16.114307403564453, + "learning_rate": 3.73943445520209e-05, + "log_odds_chosen": 9.769522666931152, + "log_odds_ratio": -0.0005406438722275198, + "logits/chosen": -0.8938454985618591, + "logits/rejected": -1.0274933576583862, + "logps/chosen": -0.0006901758024469018, + "logps/rejected": -1.437190055847168, + "loss": 2.3199, + "nll_loss": 0.579930305480957, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.901758024469018e-05, + "rewards/margins": 0.14364999532699585, + "rewards/rejected": -0.14371900260448456, + "step": 4727 + }, + { + "epoch": 3.2697095435684647, + "grad_norm": 8.889013290405273, + "learning_rate": 3.7390502535730754e-05, + "log_odds_chosen": 8.69186019897461, + "log_odds_ratio": -0.017956608906388283, + "logits/chosen": -0.2601277828216553, + "logits/rejected": -0.3766717314720154, + "logps/chosen": -0.021709920838475227, + "logps/rejected": -2.236056327819824, + "loss": 1.2342, + "nll_loss": 0.30675405263900757, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021709920838475227, + "rewards/margins": 0.22143465280532837, + "rewards/rejected": -0.22360563278198242, + "step": 4728 + }, + { + "epoch": 3.2704011065006915, + "grad_norm": 8.644242286682129, + "learning_rate": 3.73866605194406e-05, + "log_odds_chosen": 9.557779312133789, + "log_odds_ratio": -0.0012040914734825492, + "logits/chosen": -0.4545590877532959, + "logits/rejected": -0.4834221601486206, + "logps/chosen": -0.001696384628303349, + "logps/rejected": -2.1973724365234375, + "loss": 1.6645, + "nll_loss": 0.4160057306289673, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001696384570095688, + "rewards/margins": 0.21956762671470642, + "rewards/rejected": -0.21973726153373718, + "step": 4729 + }, + { + "epoch": 3.2710926694329183, + "grad_norm": 12.513564109802246, + "learning_rate": 3.738281850315046e-05, + "log_odds_chosen": 9.51201057434082, + "log_odds_ratio": -0.26938343048095703, + "logits/chosen": -0.7239038944244385, + "logits/rejected": -0.8134328722953796, + "logps/chosen": -0.0924244299530983, + "logps/rejected": -2.5132055282592773, + "loss": 1.6323, + "nll_loss": 0.38113951683044434, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009242444299161434, + "rewards/margins": 0.24207809567451477, + "rewards/rejected": -0.2513205409049988, + "step": 4730 + }, + { + "epoch": 3.271784232365145, + "grad_norm": 5.1850152015686035, + "learning_rate": 3.7378976486860305e-05, + "log_odds_chosen": 8.564149856567383, + "log_odds_ratio": -0.005627782549709082, + "logits/chosen": -0.4772839844226837, + "logits/rejected": -0.6249641180038452, + "logps/chosen": -0.022416256368160248, + "logps/rejected": -1.7249755859375, + "loss": 1.7617, + "nll_loss": 0.4398678243160248, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002241625916212797, + "rewards/margins": 0.17025592923164368, + "rewards/rejected": -0.17249755561351776, + "step": 4731 + }, + { + "epoch": 3.272475795297372, + "grad_norm": 14.917389869689941, + "learning_rate": 3.737513447057016e-05, + "log_odds_chosen": 8.748239517211914, + "log_odds_ratio": -0.0023502488620579243, + "logits/chosen": -0.5880697965621948, + "logits/rejected": -0.6493375897407532, + "logps/chosen": -0.037263672798871994, + "logps/rejected": -2.3619611263275146, + "loss": 2.0115, + "nll_loss": 0.5026419162750244, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003726367373019457, + "rewards/margins": 0.23246973752975464, + "rewards/rejected": -0.2361961156129837, + "step": 4732 + }, + { + "epoch": 3.273167358229599, + "grad_norm": 8.856287002563477, + "learning_rate": 3.737129245428001e-05, + "log_odds_chosen": 10.241095542907715, + "log_odds_ratio": -6.276419298956171e-05, + "logits/chosen": -0.5032787322998047, + "logits/rejected": -0.5542346239089966, + "logps/chosen": -0.00041386799421161413, + "logps/rejected": -2.267441749572754, + "loss": 1.573, + "nll_loss": 0.39324095845222473, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1386800148757175e-05, + "rewards/margins": 0.22670279443264008, + "rewards/rejected": -0.2267441749572754, + "step": 4733 + }, + { + "epoch": 3.2738589211618256, + "grad_norm": 4.8699774742126465, + "learning_rate": 3.736745043798986e-05, + "log_odds_chosen": 7.746283531188965, + "log_odds_ratio": -0.013506181538105011, + "logits/chosen": -0.5837900638580322, + "logits/rejected": -0.6591947078704834, + "logps/chosen": -0.005485900677740574, + "logps/rejected": -1.2493566274642944, + "loss": 1.6014, + "nll_loss": 0.3990109860897064, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005485900910571218, + "rewards/margins": 0.12438708543777466, + "rewards/rejected": -0.12493567168712616, + "step": 4734 + }, + { + "epoch": 3.2745504840940525, + "grad_norm": 11.48668384552002, + "learning_rate": 3.736360842169971e-05, + "log_odds_chosen": 9.136337280273438, + "log_odds_ratio": -0.0008283422794193029, + "logits/chosen": -0.8731837272644043, + "logits/rejected": -1.00054132938385, + "logps/chosen": -0.0013460691552609205, + "logps/rejected": -1.7370753288269043, + "loss": 2.0574, + "nll_loss": 0.514275848865509, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001346069184364751, + "rewards/margins": 0.17357292771339417, + "rewards/rejected": -0.17370754480361938, + "step": 4735 + }, + { + "epoch": 3.2752420470262793, + "grad_norm": 4.959877014160156, + "learning_rate": 3.735976640540956e-05, + "log_odds_chosen": 8.550050735473633, + "log_odds_ratio": -0.0060114869847893715, + "logits/chosen": -0.5094909071922302, + "logits/rejected": -0.528753936290741, + "logps/chosen": -0.004416503012180328, + "logps/rejected": -1.5144058465957642, + "loss": 2.0407, + "nll_loss": 0.5095845460891724, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00044165030703879893, + "rewards/margins": 0.15099893510341644, + "rewards/rejected": -0.1514405906200409, + "step": 4736 + }, + { + "epoch": 3.275933609958506, + "grad_norm": 8.902658462524414, + "learning_rate": 3.735592438911941e-05, + "log_odds_chosen": 9.97501277923584, + "log_odds_ratio": -0.0002525137388147414, + "logits/chosen": -0.5565110445022583, + "logits/rejected": -0.5615907311439514, + "logps/chosen": -0.0011530027259141207, + "logps/rejected": -2.144369602203369, + "loss": 1.6975, + "nll_loss": 0.42434871196746826, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001153002813225612, + "rewards/margins": 0.21432164311408997, + "rewards/rejected": -0.21443697810173035, + "step": 4737 + }, + { + "epoch": 3.276625172890733, + "grad_norm": 7.4417829513549805, + "learning_rate": 3.735208237282926e-05, + "log_odds_chosen": 9.793359756469727, + "log_odds_ratio": -0.00034874703851528466, + "logits/chosen": -0.7387258410453796, + "logits/rejected": -0.7721572518348694, + "logps/chosen": -0.0049118902534246445, + "logps/rejected": -1.93038010597229, + "loss": 1.2391, + "nll_loss": 0.309741735458374, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004911890137009323, + "rewards/margins": 0.1925468146800995, + "rewards/rejected": -0.19303801655769348, + "step": 4738 + }, + { + "epoch": 3.27731673582296, + "grad_norm": 6.360231399536133, + "learning_rate": 3.734824035653912e-05, + "log_odds_chosen": 9.175899505615234, + "log_odds_ratio": -0.0006352405180223286, + "logits/chosen": -0.5413529872894287, + "logits/rejected": -0.6137167811393738, + "logps/chosen": -0.04030369967222214, + "logps/rejected": -1.6991791725158691, + "loss": 1.1838, + "nll_loss": 0.29588454961776733, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004030370619148016, + "rewards/margins": 0.16588754951953888, + "rewards/rejected": -0.16991791129112244, + "step": 4739 + }, + { + "epoch": 3.2780082987551866, + "grad_norm": 5.473787307739258, + "learning_rate": 3.734439834024896e-05, + "log_odds_chosen": 8.764920234680176, + "log_odds_ratio": -0.002741128671914339, + "logits/chosen": -0.7053573131561279, + "logits/rejected": -0.6922200322151184, + "logps/chosen": -0.0052336049266159534, + "logps/rejected": -2.0399675369262695, + "loss": 1.3755, + "nll_loss": 0.34360355138778687, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005233605043031275, + "rewards/margins": 0.20347338914871216, + "rewards/rejected": -0.20399674773216248, + "step": 4740 + }, + { + "epoch": 3.2786998616874135, + "grad_norm": 8.485190391540527, + "learning_rate": 3.7340556323958816e-05, + "log_odds_chosen": 7.325547218322754, + "log_odds_ratio": -0.06429079920053482, + "logits/chosen": -0.5360948443412781, + "logits/rejected": -0.577506422996521, + "logps/chosen": -0.037310246378183365, + "logps/rejected": -1.4450794458389282, + "loss": 1.5523, + "nll_loss": 0.3816385567188263, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0037310244515538216, + "rewards/margins": 0.14077691733837128, + "rewards/rejected": -0.14450794458389282, + "step": 4741 + }, + { + "epoch": 3.2793914246196403, + "grad_norm": 9.204703330993652, + "learning_rate": 3.733671430766867e-05, + "log_odds_chosen": 9.312698364257812, + "log_odds_ratio": -0.013727872632443905, + "logits/chosen": -0.41289976239204407, + "logits/rejected": -0.5273554921150208, + "logps/chosen": -0.008033557794988155, + "logps/rejected": -1.684893012046814, + "loss": 2.0903, + "nll_loss": 0.5211901664733887, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008033557678572834, + "rewards/margins": 0.16768595576286316, + "rewards/rejected": -0.16848930716514587, + "step": 4742 + }, + { + "epoch": 3.280082987551867, + "grad_norm": 7.151228427886963, + "learning_rate": 3.733287229137852e-05, + "log_odds_chosen": 9.259330749511719, + "log_odds_ratio": -0.00040504755452275276, + "logits/chosen": -0.268002986907959, + "logits/rejected": -0.2747061848640442, + "logps/chosen": -0.007128972094506025, + "logps/rejected": -1.4844189882278442, + "loss": 1.7262, + "nll_loss": 0.431497186422348, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007128972210921347, + "rewards/margins": 0.14772900938987732, + "rewards/rejected": -0.14844189584255219, + "step": 4743 + }, + { + "epoch": 3.280774550484094, + "grad_norm": 10.045866966247559, + "learning_rate": 3.7329030275088366e-05, + "log_odds_chosen": 9.405765533447266, + "log_odds_ratio": -0.00025165293482132256, + "logits/chosen": -0.642254114151001, + "logits/rejected": -0.7636048793792725, + "logps/chosen": -0.0007097673369571567, + "logps/rejected": -1.7180622816085815, + "loss": 1.6269, + "nll_loss": 0.40670347213745117, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.097673369571567e-05, + "rewards/margins": 0.1717352718114853, + "rewards/rejected": -0.1718062460422516, + "step": 4744 + }, + { + "epoch": 3.2814661134163208, + "grad_norm": 7.029824256896973, + "learning_rate": 3.732518825879822e-05, + "log_odds_chosen": 9.815935134887695, + "log_odds_ratio": -0.0002565347240306437, + "logits/chosen": -0.47242191433906555, + "logits/rejected": -0.5548115968704224, + "logps/chosen": -0.025179818272590637, + "logps/rejected": -2.6731204986572266, + "loss": 1.1865, + "nll_loss": 0.29660704731941223, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002517981920391321, + "rewards/margins": 0.26479408144950867, + "rewards/rejected": -0.26731204986572266, + "step": 4745 + }, + { + "epoch": 3.2821576763485476, + "grad_norm": 7.584310054779053, + "learning_rate": 3.732134624250807e-05, + "log_odds_chosen": 9.373991966247559, + "log_odds_ratio": -0.00020613643573597074, + "logits/chosen": -0.8278031349182129, + "logits/rejected": -0.8617972135543823, + "logps/chosen": -0.0006570966215804219, + "logps/rejected": -1.5188932418823242, + "loss": 1.2428, + "nll_loss": 0.3106880187988281, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.570966797880828e-05, + "rewards/margins": 0.15182361006736755, + "rewards/rejected": -0.15188932418823242, + "step": 4746 + }, + { + "epoch": 3.2828492392807744, + "grad_norm": 5.923523902893066, + "learning_rate": 3.731750422621792e-05, + "log_odds_chosen": 8.727062225341797, + "log_odds_ratio": -0.0840362086892128, + "logits/chosen": -0.7970731258392334, + "logits/rejected": -0.8237400054931641, + "logps/chosen": -0.017797797918319702, + "logps/rejected": -1.4784642457962036, + "loss": 1.4738, + "nll_loss": 0.36003950238227844, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017797796754166484, + "rewards/margins": 0.14606666564941406, + "rewards/rejected": -0.14784643054008484, + "step": 4747 + }, + { + "epoch": 3.2835408022130013, + "grad_norm": 8.834217071533203, + "learning_rate": 3.7313662209927776e-05, + "log_odds_chosen": 7.273342609405518, + "log_odds_ratio": -0.022097529843449593, + "logits/chosen": -0.3649141490459442, + "logits/rejected": -0.4236929714679718, + "logps/chosen": -0.027750710025429726, + "logps/rejected": -2.403799533843994, + "loss": 1.8193, + "nll_loss": 0.4526097774505615, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0027750711888074875, + "rewards/margins": 0.23760490119457245, + "rewards/rejected": -0.24037997424602509, + "step": 4748 + }, + { + "epoch": 3.284232365145228, + "grad_norm": 8.271860122680664, + "learning_rate": 3.730982019363762e-05, + "log_odds_chosen": 8.717733383178711, + "log_odds_ratio": -0.02827119268476963, + "logits/chosen": -0.6745389103889465, + "logits/rejected": -0.646474301815033, + "logps/chosen": -0.011464545503258705, + "logps/rejected": -2.0525588989257812, + "loss": 1.6412, + "nll_loss": 0.4074724316596985, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011464543640613556, + "rewards/margins": 0.20410946011543274, + "rewards/rejected": -0.2052558958530426, + "step": 4749 + }, + { + "epoch": 3.284923928077455, + "grad_norm": 11.006572723388672, + "learning_rate": 3.7305978177347474e-05, + "log_odds_chosen": 8.783592224121094, + "log_odds_ratio": -0.012629851698875427, + "logits/chosen": -0.5243411064147949, + "logits/rejected": -0.5682640075683594, + "logps/chosen": -0.002311853226274252, + "logps/rejected": -1.406437635421753, + "loss": 1.7829, + "nll_loss": 0.44445228576660156, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00023118533135857433, + "rewards/margins": 0.1404125690460205, + "rewards/rejected": -0.14064376056194305, + "step": 4750 + }, + { + "epoch": 3.2856154910096818, + "grad_norm": 9.709880828857422, + "learning_rate": 3.7302136161057326e-05, + "log_odds_chosen": 8.833198547363281, + "log_odds_ratio": -0.00226973881945014, + "logits/chosen": -0.6147016882896423, + "logits/rejected": -0.6394162178039551, + "logps/chosen": -0.0030848130118101835, + "logps/rejected": -1.5361934900283813, + "loss": 2.1818, + "nll_loss": 0.5452192425727844, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000308481277897954, + "rewards/margins": 0.1533108651638031, + "rewards/rejected": -0.15361934900283813, + "step": 4751 + }, + { + "epoch": 3.2863070539419086, + "grad_norm": 9.029719352722168, + "learning_rate": 3.729829414476718e-05, + "log_odds_chosen": 9.643852233886719, + "log_odds_ratio": -0.001490089576691389, + "logits/chosen": -0.41162461042404175, + "logits/rejected": -0.5935603380203247, + "logps/chosen": -0.001466776942834258, + "logps/rejected": -1.9239782094955444, + "loss": 1.3499, + "nll_loss": 0.33732154965400696, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014667770301457494, + "rewards/margins": 0.19225114583969116, + "rewards/rejected": -0.1923978179693222, + "step": 4752 + }, + { + "epoch": 3.2869986168741354, + "grad_norm": 12.33303165435791, + "learning_rate": 3.7294452128477025e-05, + "log_odds_chosen": 9.344205856323242, + "log_odds_ratio": -0.0002524256706237793, + "logits/chosen": -0.6274995803833008, + "logits/rejected": -0.7359373569488525, + "logps/chosen": -0.0009275332558900118, + "logps/rejected": -1.629117727279663, + "loss": 1.4462, + "nll_loss": 0.3615281581878662, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.275333286495879e-05, + "rewards/margins": 0.162819042801857, + "rewards/rejected": -0.1629117727279663, + "step": 4753 + }, + { + "epoch": 3.2876901798063622, + "grad_norm": 17.345125198364258, + "learning_rate": 3.729061011218688e-05, + "log_odds_chosen": 8.758502006530762, + "log_odds_ratio": -0.013476484455168247, + "logits/chosen": -0.06280569732189178, + "logits/rejected": -0.1538834273815155, + "logps/chosen": -0.02584027126431465, + "logps/rejected": -1.9973444938659668, + "loss": 2.1901, + "nll_loss": 0.5461861491203308, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002584027126431465, + "rewards/margins": 0.19715043902397156, + "rewards/rejected": -0.19973447918891907, + "step": 4754 + }, + { + "epoch": 3.288381742738589, + "grad_norm": 7.788273811340332, + "learning_rate": 3.728676809589673e-05, + "log_odds_chosen": 8.044576644897461, + "log_odds_ratio": -0.17449286580085754, + "logits/chosen": -0.3410576283931732, + "logits/rejected": -0.4063025414943695, + "logps/chosen": -0.026682965457439423, + "logps/rejected": -1.7172755002975464, + "loss": 1.4453, + "nll_loss": 0.34386640787124634, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002668296452611685, + "rewards/margins": 0.16905924677848816, + "rewards/rejected": -0.17172753810882568, + "step": 4755 + }, + { + "epoch": 3.289073305670816, + "grad_norm": 13.536659240722656, + "learning_rate": 3.7282926079606575e-05, + "log_odds_chosen": 9.390349388122559, + "log_odds_ratio": -0.00024700278299860656, + "logits/chosen": -0.7557802200317383, + "logits/rejected": -0.7990991473197937, + "logps/chosen": -0.002822623588144779, + "logps/rejected": -1.7095152139663696, + "loss": 2.6826, + "nll_loss": 0.6706240773200989, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002822624228429049, + "rewards/margins": 0.17066925764083862, + "rewards/rejected": -0.17095153033733368, + "step": 4756 + }, + { + "epoch": 3.2897648686030427, + "grad_norm": 16.100502014160156, + "learning_rate": 3.7279084063316434e-05, + "log_odds_chosen": 9.26073169708252, + "log_odds_ratio": -0.002371899550780654, + "logits/chosen": -0.45070087909698486, + "logits/rejected": -0.5162512063980103, + "logps/chosen": -0.0017145187593996525, + "logps/rejected": -1.4648433923721313, + "loss": 1.8425, + "nll_loss": 0.4603844881057739, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001714518730295822, + "rewards/margins": 0.1463128924369812, + "rewards/rejected": -0.1464843451976776, + "step": 4757 + }, + { + "epoch": 3.2904564315352696, + "grad_norm": 6.50117826461792, + "learning_rate": 3.727524204702628e-05, + "log_odds_chosen": 9.957597732543945, + "log_odds_ratio": -9.806110756471753e-05, + "logits/chosen": -0.35002419352531433, + "logits/rejected": -0.4274992346763611, + "logps/chosen": -0.00034100955235771835, + "logps/rejected": -1.987114667892456, + "loss": 1.2194, + "nll_loss": 0.30484938621520996, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4100954508176073e-05, + "rewards/margins": 0.19867737591266632, + "rewards/rejected": -0.19871146976947784, + "step": 4758 + }, + { + "epoch": 3.2911479944674964, + "grad_norm": 11.918055534362793, + "learning_rate": 3.727140003073613e-05, + "log_odds_chosen": 10.449407577514648, + "log_odds_ratio": -0.0009086823556572199, + "logits/chosen": -0.7754091024398804, + "logits/rejected": -0.7539989948272705, + "logps/chosen": -0.0005091758212074637, + "logps/rejected": -1.9469212293624878, + "loss": 1.4888, + "nll_loss": 0.3721088767051697, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0917587941512465e-05, + "rewards/margins": 0.19464120268821716, + "rewards/rejected": -0.19469213485717773, + "step": 4759 + }, + { + "epoch": 3.2918395573997232, + "grad_norm": 9.799060821533203, + "learning_rate": 3.7267558014445985e-05, + "log_odds_chosen": 10.056310653686523, + "log_odds_ratio": -0.0005241757608018816, + "logits/chosen": -0.19608081877231598, + "logits/rejected": -0.23960356414318085, + "logps/chosen": -0.0006183648947626352, + "logps/rejected": -2.189321756362915, + "loss": 1.4989, + "nll_loss": 0.37467676401138306, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.183648656588048e-05, + "rewards/margins": 0.2188703566789627, + "rewards/rejected": -0.21893219649791718, + "step": 4760 + }, + { + "epoch": 3.29253112033195, + "grad_norm": 10.568760871887207, + "learning_rate": 3.726371599815584e-05, + "log_odds_chosen": 8.719406127929688, + "log_odds_ratio": -0.0002006332069868222, + "logits/chosen": -0.05832742527127266, + "logits/rejected": -0.1293889284133911, + "logps/chosen": -0.004344870802015066, + "logps/rejected": -2.0650267601013184, + "loss": 1.9617, + "nll_loss": 0.4904080033302307, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00043448706855997443, + "rewards/margins": 0.20606820285320282, + "rewards/rejected": -0.20650267601013184, + "step": 4761 + }, + { + "epoch": 3.293222683264177, + "grad_norm": 6.398842811584473, + "learning_rate": 3.725987398186568e-05, + "log_odds_chosen": 10.638826370239258, + "log_odds_ratio": -8.899492968339473e-05, + "logits/chosen": -0.46759265661239624, + "logits/rejected": -0.5266586542129517, + "logps/chosen": -0.0001857294118963182, + "logps/rejected": -2.1859965324401855, + "loss": 1.4842, + "nll_loss": 0.3710480034351349, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8572942281025462e-05, + "rewards/margins": 0.21858109533786774, + "rewards/rejected": -0.21859967708587646, + "step": 4762 + }, + { + "epoch": 3.2939142461964037, + "grad_norm": 14.124534606933594, + "learning_rate": 3.7256031965575535e-05, + "log_odds_chosen": 7.912993431091309, + "log_odds_ratio": -0.026029715314507484, + "logits/chosen": -0.35933125019073486, + "logits/rejected": -0.3802638351917267, + "logps/chosen": -0.007536513265222311, + "logps/rejected": -1.4723609685897827, + "loss": 1.7175, + "nll_loss": 0.42677438259124756, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007536513148806989, + "rewards/margins": 0.1464824378490448, + "rewards/rejected": -0.14723609387874603, + "step": 4763 + }, + { + "epoch": 3.2946058091286305, + "grad_norm": 8.384628295898438, + "learning_rate": 3.725218994928539e-05, + "log_odds_chosen": 9.867562294006348, + "log_odds_ratio": -0.00011213291145395488, + "logits/chosen": -0.3775690793991089, + "logits/rejected": -0.5076505541801453, + "logps/chosen": -0.013233819045126438, + "logps/rejected": -2.4109346866607666, + "loss": 1.8189, + "nll_loss": 0.45471256971359253, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013233819045126438, + "rewards/margins": 0.23977011442184448, + "rewards/rejected": -0.2410934865474701, + "step": 4764 + }, + { + "epoch": 3.2952973720608574, + "grad_norm": 10.633298873901367, + "learning_rate": 3.7248347932995233e-05, + "log_odds_chosen": 10.024510383605957, + "log_odds_ratio": -0.00019864975183736533, + "logits/chosen": -0.5552939176559448, + "logits/rejected": -0.6305736303329468, + "logps/chosen": -0.00040514758438803256, + "logps/rejected": -1.8062925338745117, + "loss": 1.5388, + "nll_loss": 0.3846917748451233, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0514758438803256e-05, + "rewards/margins": 0.1805887222290039, + "rewards/rejected": -0.18062923848628998, + "step": 4765 + }, + { + "epoch": 3.295988934993084, + "grad_norm": 8.180196762084961, + "learning_rate": 3.724450591670509e-05, + "log_odds_chosen": 6.821606636047363, + "log_odds_ratio": -0.08719392120838165, + "logits/chosen": -0.6663360595703125, + "logits/rejected": -0.601808488368988, + "logps/chosen": -0.04260578006505966, + "logps/rejected": -1.3567452430725098, + "loss": 2.4537, + "nll_loss": 0.6047061085700989, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004260578192770481, + "rewards/margins": 0.13141396641731262, + "rewards/rejected": -0.13567453622817993, + "step": 4766 + }, + { + "epoch": 3.296680497925311, + "grad_norm": 9.587262153625488, + "learning_rate": 3.724066390041494e-05, + "log_odds_chosen": 7.748490333557129, + "log_odds_ratio": -0.20099495351314545, + "logits/chosen": -0.5211882591247559, + "logits/rejected": -0.5478272438049316, + "logps/chosen": -0.029580960050225258, + "logps/rejected": -1.0142784118652344, + "loss": 1.6294, + "nll_loss": 0.38724058866500854, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002958096330985427, + "rewards/margins": 0.09846975654363632, + "rewards/rejected": -0.1014278456568718, + "step": 4767 + }, + { + "epoch": 3.297372060857538, + "grad_norm": 10.243107795715332, + "learning_rate": 3.723682188412479e-05, + "log_odds_chosen": 9.759126663208008, + "log_odds_ratio": -0.00040256179636344314, + "logits/chosen": -0.7398092746734619, + "logits/rejected": -0.7639995813369751, + "logps/chosen": -0.0003068627556785941, + "logps/rejected": -1.6395602226257324, + "loss": 1.6066, + "nll_loss": 0.4016038477420807, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.068627484026365e-05, + "rewards/margins": 0.16392534971237183, + "rewards/rejected": -0.16395603120326996, + "step": 4768 + }, + { + "epoch": 3.2980636237897647, + "grad_norm": 11.689058303833008, + "learning_rate": 3.7232979867834636e-05, + "log_odds_chosen": 8.956748008728027, + "log_odds_ratio": -0.0011930334148928523, + "logits/chosen": -0.5303239226341248, + "logits/rejected": -0.5913784503936768, + "logps/chosen": -0.016812235116958618, + "logps/rejected": -1.9123353958129883, + "loss": 2.2169, + "nll_loss": 0.5541026592254639, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00168122339528054, + "rewards/margins": 0.18955230712890625, + "rewards/rejected": -0.1912335455417633, + "step": 4769 + }, + { + "epoch": 3.2987551867219915, + "grad_norm": 9.376022338867188, + "learning_rate": 3.7229137851544496e-05, + "log_odds_chosen": 9.66012954711914, + "log_odds_ratio": -0.0038533341139554977, + "logits/chosen": -0.5144587755203247, + "logits/rejected": -0.5820307731628418, + "logps/chosen": -0.0020845159888267517, + "logps/rejected": -2.0297763347625732, + "loss": 1.5981, + "nll_loss": 0.3991428315639496, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000208451587241143, + "rewards/margins": 0.20276916027069092, + "rewards/rejected": -0.20297762751579285, + "step": 4770 + }, + { + "epoch": 3.2994467496542184, + "grad_norm": 12.997052192687988, + "learning_rate": 3.722529583525434e-05, + "log_odds_chosen": 10.184316635131836, + "log_odds_ratio": -0.0001452596188755706, + "logits/chosen": -0.6555896997451782, + "logits/rejected": -0.7356351017951965, + "logps/chosen": -0.004165071528404951, + "logps/rejected": -2.575788974761963, + "loss": 1.886, + "nll_loss": 0.4714895784854889, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004165071586612612, + "rewards/margins": 0.2571623921394348, + "rewards/rejected": -0.25757887959480286, + "step": 4771 + }, + { + "epoch": 3.300138312586445, + "grad_norm": 13.00661849975586, + "learning_rate": 3.7221453818964194e-05, + "log_odds_chosen": 9.663476943969727, + "log_odds_ratio": -0.0003805930900853127, + "logits/chosen": -0.6680766344070435, + "logits/rejected": -0.7372719049453735, + "logps/chosen": -0.0004427245585247874, + "logps/rejected": -1.4613854885101318, + "loss": 1.5827, + "nll_loss": 0.3956254720687866, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.427245585247874e-05, + "rewards/margins": 0.14609427750110626, + "rewards/rejected": -0.14613854885101318, + "step": 4772 + }, + { + "epoch": 3.300829875518672, + "grad_norm": 13.011005401611328, + "learning_rate": 3.7217611802674046e-05, + "log_odds_chosen": 11.087613105773926, + "log_odds_ratio": -4.3162217480130494e-05, + "logits/chosen": -0.5127568244934082, + "logits/rejected": -0.6861209869384766, + "logps/chosen": -0.00031132507137954235, + "logps/rejected": -2.7826836109161377, + "loss": 1.3939, + "nll_loss": 0.3484596014022827, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.113250932074152e-05, + "rewards/margins": 0.2782372236251831, + "rewards/rejected": -0.27826836705207825, + "step": 4773 + }, + { + "epoch": 3.301521438450899, + "grad_norm": 9.222591400146484, + "learning_rate": 3.721376978638389e-05, + "log_odds_chosen": 8.618949890136719, + "log_odds_ratio": -0.0009218844352290034, + "logits/chosen": -0.47571730613708496, + "logits/rejected": -0.4952141344547272, + "logps/chosen": -0.006290379445999861, + "logps/rejected": -1.6514685153961182, + "loss": 1.4282, + "nll_loss": 0.3569517135620117, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006290380260907114, + "rewards/margins": 0.1645178198814392, + "rewards/rejected": -0.1651468724012375, + "step": 4774 + }, + { + "epoch": 3.3022130013831257, + "grad_norm": 5.612703323364258, + "learning_rate": 3.7209927770093744e-05, + "log_odds_chosen": 7.623640060424805, + "log_odds_ratio": -0.06587830185890198, + "logits/chosen": -0.44902193546295166, + "logits/rejected": -0.4639015793800354, + "logps/chosen": -0.0315217524766922, + "logps/rejected": -1.3852890729904175, + "loss": 1.7845, + "nll_loss": 0.43953758478164673, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0031521753408014774, + "rewards/margins": 0.1353767365217209, + "rewards/rejected": -0.13852891325950623, + "step": 4775 + }, + { + "epoch": 3.3029045643153525, + "grad_norm": 9.524703979492188, + "learning_rate": 3.72060857538036e-05, + "log_odds_chosen": 7.134241580963135, + "log_odds_ratio": -0.06777474284172058, + "logits/chosen": -0.585055947303772, + "logits/rejected": -0.6165054440498352, + "logps/chosen": -0.04166354238986969, + "logps/rejected": -1.9927994012832642, + "loss": 1.7974, + "nll_loss": 0.44256073236465454, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004166354890912771, + "rewards/margins": 0.19511358439922333, + "rewards/rejected": -0.19927993416786194, + "step": 4776 + }, + { + "epoch": 3.3035961272475793, + "grad_norm": 8.323135375976562, + "learning_rate": 3.720224373751345e-05, + "log_odds_chosen": 10.659330368041992, + "log_odds_ratio": -0.00018401745182927698, + "logits/chosen": -0.5943928360939026, + "logits/rejected": -0.6496483087539673, + "logps/chosen": -0.0003157538012601435, + "logps/rejected": -2.077713966369629, + "loss": 1.7976, + "nll_loss": 0.449379563331604, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.157538594678044e-05, + "rewards/margins": 0.20773981511592865, + "rewards/rejected": -0.2077714055776596, + "step": 4777 + }, + { + "epoch": 3.304287690179806, + "grad_norm": 6.543783187866211, + "learning_rate": 3.7198401721223295e-05, + "log_odds_chosen": 9.414234161376953, + "log_odds_ratio": -0.0005971124628558755, + "logits/chosen": -0.26304084062576294, + "logits/rejected": -0.33669501543045044, + "logps/chosen": -0.0007507125264964998, + "logps/rejected": -1.7340823411941528, + "loss": 1.6501, + "nll_loss": 0.4124714136123657, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.507124973926693e-05, + "rewards/margins": 0.17333316802978516, + "rewards/rejected": -0.17340824007987976, + "step": 4778 + }, + { + "epoch": 3.304979253112033, + "grad_norm": 11.716812133789062, + "learning_rate": 3.7194559704933154e-05, + "log_odds_chosen": 7.778428077697754, + "log_odds_ratio": -0.06828571856021881, + "logits/chosen": -0.36957937479019165, + "logits/rejected": -0.3652217388153076, + "logps/chosen": -0.015425732359290123, + "logps/rejected": -1.1986706256866455, + "loss": 2.1979, + "nll_loss": 0.5426568388938904, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001542573212645948, + "rewards/margins": 0.11832448840141296, + "rewards/rejected": -0.11986706405878067, + "step": 4779 + }, + { + "epoch": 3.30567081604426, + "grad_norm": 10.31844711303711, + "learning_rate": 3.7190717688643e-05, + "log_odds_chosen": 8.872358322143555, + "log_odds_ratio": -0.006060821935534477, + "logits/chosen": -0.010356791317462921, + "logits/rejected": -0.13408158719539642, + "logps/chosen": -0.009832553565502167, + "logps/rejected": -1.800208330154419, + "loss": 1.3841, + "nll_loss": 0.3454144597053528, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009832553332671523, + "rewards/margins": 0.17903758585453033, + "rewards/rejected": -0.18002083897590637, + "step": 4780 + }, + { + "epoch": 3.3063623789764867, + "grad_norm": 9.09090518951416, + "learning_rate": 3.718687567235285e-05, + "log_odds_chosen": 9.495561599731445, + "log_odds_ratio": -0.0005472712800838053, + "logits/chosen": -0.47284039855003357, + "logits/rejected": -0.5695121884346008, + "logps/chosen": -0.01995399221777916, + "logps/rejected": -2.2598190307617188, + "loss": 1.5845, + "nll_loss": 0.3960671126842499, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001995399361476302, + "rewards/margins": 0.22398647665977478, + "rewards/rejected": -0.22598187625408173, + "step": 4781 + }, + { + "epoch": 3.3070539419087135, + "grad_norm": 16.406513214111328, + "learning_rate": 3.7183033656062705e-05, + "log_odds_chosen": 11.0305814743042, + "log_odds_ratio": -2.177390160795767e-05, + "logits/chosen": -0.5238522887229919, + "logits/rejected": -0.5845245122909546, + "logps/chosen": -0.00019303163571748883, + "logps/rejected": -2.37955379486084, + "loss": 2.8331, + "nll_loss": 0.7082697153091431, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.930316284415312e-05, + "rewards/margins": 0.23793606460094452, + "rewards/rejected": -0.23795536160469055, + "step": 4782 + }, + { + "epoch": 3.3077455048409403, + "grad_norm": 11.432744026184082, + "learning_rate": 3.717919163977255e-05, + "log_odds_chosen": 9.310041427612305, + "log_odds_ratio": -0.00014564645243808627, + "logits/chosen": -0.7327515482902527, + "logits/rejected": -0.7921730279922485, + "logps/chosen": -0.0005434756749309599, + "logps/rejected": -1.7320445775985718, + "loss": 1.7618, + "nll_loss": 0.4404332637786865, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.434756894828752e-05, + "rewards/margins": 0.17315012216567993, + "rewards/rejected": -0.1732044517993927, + "step": 4783 + }, + { + "epoch": 3.308437067773167, + "grad_norm": 10.639967918395996, + "learning_rate": 3.71753496234824e-05, + "log_odds_chosen": 9.80274772644043, + "log_odds_ratio": -7.620429096277803e-05, + "logits/chosen": -0.7275650501251221, + "logits/rejected": -0.756881833076477, + "logps/chosen": -0.0004280672874301672, + "logps/rejected": -1.7424432039260864, + "loss": 1.9727, + "nll_loss": 0.49316996335983276, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.280672874301672e-05, + "rewards/margins": 0.17420151829719543, + "rewards/rejected": -0.17424434423446655, + "step": 4784 + }, + { + "epoch": 3.309128630705394, + "grad_norm": 7.955739974975586, + "learning_rate": 3.7171507607192255e-05, + "log_odds_chosen": 10.062747955322266, + "log_odds_ratio": -7.456566527253017e-05, + "logits/chosen": -0.1481330692768097, + "logits/rejected": -0.26083531975746155, + "logps/chosen": -0.0004507679841481149, + "logps/rejected": -2.0520520210266113, + "loss": 1.0819, + "nll_loss": 0.27046719193458557, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.50768020527903e-05, + "rewards/margins": 0.20516012609004974, + "rewards/rejected": -0.20520521700382233, + "step": 4785 + }, + { + "epoch": 3.309820193637621, + "grad_norm": 7.6142401695251465, + "learning_rate": 3.716766559090211e-05, + "log_odds_chosen": 9.340099334716797, + "log_odds_ratio": -0.0005384586984291673, + "logits/chosen": -0.25933513045310974, + "logits/rejected": -0.32356947660446167, + "logps/chosen": -0.031100928783416748, + "logps/rejected": -2.5838711261749268, + "loss": 1.492, + "nll_loss": 0.37295711040496826, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003110092831775546, + "rewards/margins": 0.25527700781822205, + "rewards/rejected": -0.25838708877563477, + "step": 4786 + }, + { + "epoch": 3.3105117565698476, + "grad_norm": 10.63508415222168, + "learning_rate": 3.716382357461195e-05, + "log_odds_chosen": 10.072786331176758, + "log_odds_ratio": -0.00014562705473508686, + "logits/chosen": -0.4115908443927765, + "logits/rejected": -0.5597538948059082, + "logps/chosen": -0.00044055056059733033, + "logps/rejected": -1.7020169496536255, + "loss": 1.7935, + "nll_loss": 0.4483675956726074, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4055057514924556e-05, + "rewards/margins": 0.17015764117240906, + "rewards/rejected": -0.17020170390605927, + "step": 4787 + }, + { + "epoch": 3.3112033195020745, + "grad_norm": 6.960546970367432, + "learning_rate": 3.715998155832181e-05, + "log_odds_chosen": 9.475131034851074, + "log_odds_ratio": -0.0017900835955515504, + "logits/chosen": -0.4856712818145752, + "logits/rejected": -0.5267800092697144, + "logps/chosen": -0.00179449247661978, + "logps/rejected": -1.439497947692871, + "loss": 2.1871, + "nll_loss": 0.5465947985649109, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017944925639312714, + "rewards/margins": 0.14377035200595856, + "rewards/rejected": -0.14394979178905487, + "step": 4788 + }, + { + "epoch": 3.3118948824343013, + "grad_norm": 14.887598991394043, + "learning_rate": 3.715613954203166e-05, + "log_odds_chosen": 7.631422996520996, + "log_odds_ratio": -0.0026702506002038717, + "logits/chosen": -0.4037625193595886, + "logits/rejected": -0.5052109956741333, + "logps/chosen": -0.00553013663738966, + "logps/rejected": -1.4539145231246948, + "loss": 1.6762, + "nll_loss": 0.4187846779823303, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005530136404559016, + "rewards/margins": 0.14483843743801117, + "rewards/rejected": -0.14539144933223724, + "step": 4789 + }, + { + "epoch": 3.312586445366528, + "grad_norm": 6.999749183654785, + "learning_rate": 3.715229752574151e-05, + "log_odds_chosen": 7.64384651184082, + "log_odds_ratio": -0.005185406655073166, + "logits/chosen": -0.34098395705223083, + "logits/rejected": -0.3821839690208435, + "logps/chosen": -0.11446300894021988, + "logps/rejected": -1.7795770168304443, + "loss": 1.458, + "nll_loss": 0.3639754354953766, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011446301825344563, + "rewards/margins": 0.1665114015340805, + "rewards/rejected": -0.1779577136039734, + "step": 4790 + }, + { + "epoch": 3.313278008298755, + "grad_norm": 5.284163475036621, + "learning_rate": 3.714845550945136e-05, + "log_odds_chosen": 8.501035690307617, + "log_odds_ratio": -0.001044795848429203, + "logits/chosen": -0.4037015736103058, + "logits/rejected": -0.3955928087234497, + "logps/chosen": -0.0008926805458031595, + "logps/rejected": -1.4123013019561768, + "loss": 1.7369, + "nll_loss": 0.43411070108413696, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.926806185627356e-05, + "rewards/margins": 0.1411408632993698, + "rewards/rejected": -0.14123013615608215, + "step": 4791 + }, + { + "epoch": 3.313969571230982, + "grad_norm": 8.896031379699707, + "learning_rate": 3.7144613493161216e-05, + "log_odds_chosen": 10.451597213745117, + "log_odds_ratio": -4.365799395600334e-05, + "logits/chosen": -0.40877458453178406, + "logits/rejected": -0.44063982367515564, + "logps/chosen": -0.0005057338275946677, + "logps/rejected": -2.0362067222595215, + "loss": 1.5345, + "nll_loss": 0.3836180865764618, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.05733878526371e-05, + "rewards/margins": 0.20357009768486023, + "rewards/rejected": -0.20362067222595215, + "step": 4792 + }, + { + "epoch": 3.3146611341632086, + "grad_norm": 6.940845012664795, + "learning_rate": 3.714077147687106e-05, + "log_odds_chosen": 8.740618705749512, + "log_odds_ratio": -0.00044958863873034716, + "logits/chosen": -0.5630000829696655, + "logits/rejected": -0.6577044129371643, + "logps/chosen": -0.0014754270669072866, + "logps/rejected": -1.6538732051849365, + "loss": 1.2009, + "nll_loss": 0.3001739978790283, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014754271251149476, + "rewards/margins": 0.16523978114128113, + "rewards/rejected": -0.1653873324394226, + "step": 4793 + }, + { + "epoch": 3.3153526970954355, + "grad_norm": 6.365971565246582, + "learning_rate": 3.7136929460580914e-05, + "log_odds_chosen": 8.731281280517578, + "log_odds_ratio": -0.0011150891659781337, + "logits/chosen": -0.5653225779533386, + "logits/rejected": -0.5691289901733398, + "logps/chosen": -0.0014174432726576924, + "logps/rejected": -1.182433843612671, + "loss": 1.8279, + "nll_loss": 0.4568542242050171, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014174434181768447, + "rewards/margins": 0.11810164898633957, + "rewards/rejected": -0.11824339628219604, + "step": 4794 + }, + { + "epoch": 3.3160442600276623, + "grad_norm": 12.208864212036133, + "learning_rate": 3.7133087444290766e-05, + "log_odds_chosen": 7.481908798217773, + "log_odds_ratio": -0.05939479172229767, + "logits/chosen": -0.37778154015541077, + "logits/rejected": -0.3924955725669861, + "logps/chosen": -0.025003833696246147, + "logps/rejected": -2.280137062072754, + "loss": 1.7202, + "nll_loss": 0.42409858107566833, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002500383649021387, + "rewards/margins": 0.2255133092403412, + "rewards/rejected": -0.22801369428634644, + "step": 4795 + }, + { + "epoch": 3.316735822959889, + "grad_norm": 10.980515480041504, + "learning_rate": 3.712924542800061e-05, + "log_odds_chosen": 9.156580924987793, + "log_odds_ratio": -0.005245847627520561, + "logits/chosen": -0.6666525602340698, + "logits/rejected": -0.7071576118469238, + "logps/chosen": -0.008825276978313923, + "logps/rejected": -1.9418580532073975, + "loss": 2.2029, + "nll_loss": 0.5501908659934998, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008825276745483279, + "rewards/margins": 0.19330328702926636, + "rewards/rejected": -0.1941857933998108, + "step": 4796 + }, + { + "epoch": 3.317427385892116, + "grad_norm": 8.450002670288086, + "learning_rate": 3.712540341171047e-05, + "log_odds_chosen": 10.313995361328125, + "log_odds_ratio": -5.322957440512255e-05, + "logits/chosen": -0.7361082434654236, + "logits/rejected": -0.7873325347900391, + "logps/chosen": -0.00023771397536620498, + "logps/rejected": -1.855948567390442, + "loss": 2.2595, + "nll_loss": 0.5648688077926636, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3771397536620498e-05, + "rewards/margins": 0.18557108938694, + "rewards/rejected": -0.1855948567390442, + "step": 4797 + }, + { + "epoch": 3.3181189488243428, + "grad_norm": 10.989029884338379, + "learning_rate": 3.712156139542032e-05, + "log_odds_chosen": 7.254358291625977, + "log_odds_ratio": -0.11698737740516663, + "logits/chosen": -0.6756665706634521, + "logits/rejected": -0.6980599164962769, + "logps/chosen": -0.028153615072369576, + "logps/rejected": -1.4995083808898926, + "loss": 1.6483, + "nll_loss": 0.40037593245506287, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0028153618331998587, + "rewards/margins": 0.14713549613952637, + "rewards/rejected": -0.14995084702968597, + "step": 4798 + }, + { + "epoch": 3.3188105117565696, + "grad_norm": 5.330296039581299, + "learning_rate": 3.711771937913017e-05, + "log_odds_chosen": 9.479177474975586, + "log_odds_ratio": -0.00011878873192472383, + "logits/chosen": -0.49675679206848145, + "logits/rejected": -0.4976978600025177, + "logps/chosen": -0.0007499220664612949, + "logps/rejected": -1.5317151546478271, + "loss": 1.6152, + "nll_loss": 0.40379104018211365, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.499221101170406e-05, + "rewards/margins": 0.1530965268611908, + "rewards/rejected": -0.15317150950431824, + "step": 4799 + }, + { + "epoch": 3.3195020746887964, + "grad_norm": 9.043655395507812, + "learning_rate": 3.711387736284002e-05, + "log_odds_chosen": 8.289007186889648, + "log_odds_ratio": -0.14393651485443115, + "logits/chosen": -0.4536038339138031, + "logits/rejected": -0.46336764097213745, + "logps/chosen": -0.0383828841149807, + "logps/rejected": -1.167110562324524, + "loss": 1.2512, + "nll_loss": 0.2983952462673187, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0038382881321012974, + "rewards/margins": 0.11287276446819305, + "rewards/rejected": -0.11671105772256851, + "step": 4800 + }, + { + "epoch": 3.3201936376210233, + "grad_norm": 6.758234024047852, + "learning_rate": 3.7110035346549874e-05, + "log_odds_chosen": 5.697368621826172, + "log_odds_ratio": -0.11512251198291779, + "logits/chosen": -0.019853461533784866, + "logits/rejected": -0.1009041965007782, + "logps/chosen": -0.0331367626786232, + "logps/rejected": -0.8319405913352966, + "loss": 1.7912, + "nll_loss": 0.436276912689209, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003313676454126835, + "rewards/margins": 0.07988037914037704, + "rewards/rejected": -0.0831940546631813, + "step": 4801 + }, + { + "epoch": 3.3208852005532505, + "grad_norm": 10.116432189941406, + "learning_rate": 3.710619333025972e-05, + "log_odds_chosen": 8.380141258239746, + "log_odds_ratio": -0.005037578754127026, + "logits/chosen": -0.3611530661582947, + "logits/rejected": -0.36258572340011597, + "logps/chosen": -0.012789360247552395, + "logps/rejected": -1.5567326545715332, + "loss": 2.2324, + "nll_loss": 0.557590663433075, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012789360480383039, + "rewards/margins": 0.15439432859420776, + "rewards/rejected": -0.15567326545715332, + "step": 4802 + }, + { + "epoch": 3.3215767634854774, + "grad_norm": 17.863174438476562, + "learning_rate": 3.710235131396957e-05, + "log_odds_chosen": 8.179805755615234, + "log_odds_ratio": -0.006676828023046255, + "logits/chosen": -0.6714662909507751, + "logits/rejected": -0.7768672704696655, + "logps/chosen": -0.08153266459703445, + "logps/rejected": -1.7913302183151245, + "loss": 1.9364, + "nll_loss": 0.4834328889846802, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008153267204761505, + "rewards/margins": 0.17097975313663483, + "rewards/rejected": -0.17913301289081573, + "step": 4803 + }, + { + "epoch": 3.322268326417704, + "grad_norm": 6.330430030822754, + "learning_rate": 3.7098509297679425e-05, + "log_odds_chosen": 8.275510787963867, + "log_odds_ratio": -0.006861433852463961, + "logits/chosen": -0.5030953288078308, + "logits/rejected": -0.6301823258399963, + "logps/chosen": -0.003684797789901495, + "logps/rejected": -1.0211801528930664, + "loss": 2.1073, + "nll_loss": 0.5261452794075012, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00036847975570708513, + "rewards/margins": 0.10174953937530518, + "rewards/rejected": -0.10211801528930664, + "step": 4804 + }, + { + "epoch": 3.322959889349931, + "grad_norm": 5.604506015777588, + "learning_rate": 3.709466728138927e-05, + "log_odds_chosen": 9.087284088134766, + "log_odds_ratio": -0.000316342047881335, + "logits/chosen": -0.32346826791763306, + "logits/rejected": -0.3070969581604004, + "logps/chosen": -0.00042608988587744534, + "logps/rejected": -1.4158622026443481, + "loss": 1.1869, + "nll_loss": 0.2966977059841156, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.260899004293606e-05, + "rewards/margins": 0.14154361188411713, + "rewards/rejected": -0.14158621430397034, + "step": 4805 + }, + { + "epoch": 3.323651452282158, + "grad_norm": 7.937886714935303, + "learning_rate": 3.709082526509913e-05, + "log_odds_chosen": 9.678321838378906, + "log_odds_ratio": -0.00020336979650892317, + "logits/chosen": -0.5113595724105835, + "logits/rejected": -0.5489328503608704, + "logps/chosen": -0.00028491675038821995, + "logps/rejected": -1.596942663192749, + "loss": 1.0067, + "nll_loss": 0.2516666650772095, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8491673219832592e-05, + "rewards/margins": 0.1596657633781433, + "rewards/rejected": -0.15969425439834595, + "step": 4806 + }, + { + "epoch": 3.3243430152143847, + "grad_norm": 11.258094787597656, + "learning_rate": 3.7086983248808975e-05, + "log_odds_chosen": 9.129833221435547, + "log_odds_ratio": -0.0031539711635559797, + "logits/chosen": -0.3189602792263031, + "logits/rejected": -0.4230721890926361, + "logps/chosen": -0.03089648112654686, + "logps/rejected": -1.9423742294311523, + "loss": 1.3906, + "nll_loss": 0.3473414182662964, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0030896479729562998, + "rewards/margins": 0.19114777445793152, + "rewards/rejected": -0.19423742592334747, + "step": 4807 + }, + { + "epoch": 3.3250345781466115, + "grad_norm": 10.421914100646973, + "learning_rate": 3.708314123251883e-05, + "log_odds_chosen": 9.675724029541016, + "log_odds_ratio": -0.0007099607028067112, + "logits/chosen": -0.479727566242218, + "logits/rejected": -0.49939435720443726, + "logps/chosen": -0.018236767500638962, + "logps/rejected": -2.0636579990386963, + "loss": 1.6704, + "nll_loss": 0.4175257384777069, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001823676866479218, + "rewards/margins": 0.2045421302318573, + "rewards/rejected": -0.20636579394340515, + "step": 4808 + }, + { + "epoch": 3.3257261410788383, + "grad_norm": 7.315793037414551, + "learning_rate": 3.707929921622868e-05, + "log_odds_chosen": 8.784111976623535, + "log_odds_ratio": -0.03864699602127075, + "logits/chosen": -0.43619537353515625, + "logits/rejected": -0.4987362325191498, + "logps/chosen": -0.012032095342874527, + "logps/rejected": -1.2803680896759033, + "loss": 1.5409, + "nll_loss": 0.38137125968933105, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012032096274197102, + "rewards/margins": 0.12683358788490295, + "rewards/rejected": -0.12803681194782257, + "step": 4809 + }, + { + "epoch": 3.326417704011065, + "grad_norm": 9.22579574584961, + "learning_rate": 3.707545719993853e-05, + "log_odds_chosen": 7.742648124694824, + "log_odds_ratio": -0.022376982495188713, + "logits/chosen": -0.3023233413696289, + "logits/rejected": -0.4116421937942505, + "logps/chosen": -0.008770937100052834, + "logps/rejected": -1.4326858520507812, + "loss": 1.9755, + "nll_loss": 0.491626501083374, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008770937565714121, + "rewards/margins": 0.14239150285720825, + "rewards/rejected": -0.14326860010623932, + "step": 4810 + }, + { + "epoch": 3.327109266943292, + "grad_norm": 15.761456489562988, + "learning_rate": 3.707161518364838e-05, + "log_odds_chosen": 11.231200218200684, + "log_odds_ratio": -2.7991562092211097e-05, + "logits/chosen": -0.5510903596878052, + "logits/rejected": -0.5870881080627441, + "logps/chosen": -0.00022866626386530697, + "logps/rejected": -2.7161855697631836, + "loss": 1.3399, + "nll_loss": 0.3349756598472595, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2866624931339175e-05, + "rewards/margins": 0.27159571647644043, + "rewards/rejected": -0.2716185748577118, + "step": 4811 + }, + { + "epoch": 3.327800829875519, + "grad_norm": 10.318330764770508, + "learning_rate": 3.706777316735823e-05, + "log_odds_chosen": 7.601865768432617, + "log_odds_ratio": -0.08645662665367126, + "logits/chosen": -0.3268486559391022, + "logits/rejected": -0.3267649710178375, + "logps/chosen": -0.019028767943382263, + "logps/rejected": -1.6411206722259521, + "loss": 1.4555, + "nll_loss": 0.3552231788635254, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001902876771055162, + "rewards/margins": 0.16220919787883759, + "rewards/rejected": -0.16411206126213074, + "step": 4812 + }, + { + "epoch": 3.3284923928077457, + "grad_norm": 7.725826740264893, + "learning_rate": 3.706393115106808e-05, + "log_odds_chosen": 7.646389484405518, + "log_odds_ratio": -0.010575213469564915, + "logits/chosen": -0.27327248454093933, + "logits/rejected": -0.29643142223358154, + "logps/chosen": -0.023020073771476746, + "logps/rejected": -1.444989562034607, + "loss": 1.4151, + "nll_loss": 0.35272642970085144, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002302007284015417, + "rewards/margins": 0.14219695329666138, + "rewards/rejected": -0.14449895918369293, + "step": 4813 + }, + { + "epoch": 3.3291839557399725, + "grad_norm": 7.744783401489258, + "learning_rate": 3.706008913477793e-05, + "log_odds_chosen": 8.471439361572266, + "log_odds_ratio": -0.08351105451583862, + "logits/chosen": -0.23322007060050964, + "logits/rejected": -0.14264808595180511, + "logps/chosen": -0.01474690344184637, + "logps/rejected": -1.1287660598754883, + "loss": 1.9275, + "nll_loss": 0.47353053092956543, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014746903907507658, + "rewards/margins": 0.11140191555023193, + "rewards/rejected": -0.11287661641836166, + "step": 4814 + }, + { + "epoch": 3.3298755186721993, + "grad_norm": 5.945075035095215, + "learning_rate": 3.705624711848779e-05, + "log_odds_chosen": 7.3363752365112305, + "log_odds_ratio": -0.008688355796039104, + "logits/chosen": -0.5283100008964539, + "logits/rejected": -0.5767071843147278, + "logps/chosen": -0.039233021438121796, + "logps/rejected": -1.4986639022827148, + "loss": 1.1494, + "nll_loss": 0.28649193048477173, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003923302050679922, + "rewards/margins": 0.14594310522079468, + "rewards/rejected": -0.14986640214920044, + "step": 4815 + }, + { + "epoch": 3.330567081604426, + "grad_norm": 7.487738609313965, + "learning_rate": 3.7052405102197634e-05, + "log_odds_chosen": 7.99285888671875, + "log_odds_ratio": -0.182649627327919, + "logits/chosen": -0.5866954326629639, + "logits/rejected": -0.5401970148086548, + "logps/chosen": -0.02980240248143673, + "logps/rejected": -1.5133633613586426, + "loss": 1.9412, + "nll_loss": 0.46703076362609863, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0029802401550114155, + "rewards/margins": 0.148356094956398, + "rewards/rejected": -0.15133635699748993, + "step": 4816 + }, + { + "epoch": 3.331258644536653, + "grad_norm": 12.524292945861816, + "learning_rate": 3.7048563085907486e-05, + "log_odds_chosen": 7.655625343322754, + "log_odds_ratio": -0.0921371728181839, + "logits/chosen": -0.5067025423049927, + "logits/rejected": -0.49522316455841064, + "logps/chosen": -0.06759315729141235, + "logps/rejected": -1.3731836080551147, + "loss": 1.9436, + "nll_loss": 0.4766749143600464, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006759315729141235, + "rewards/margins": 0.130559042096138, + "rewards/rejected": -0.13731835782527924, + "step": 4817 + }, + { + "epoch": 3.33195020746888, + "grad_norm": 6.2454938888549805, + "learning_rate": 3.704472106961734e-05, + "log_odds_chosen": 7.497071743011475, + "log_odds_ratio": -0.12156727910041809, + "logits/chosen": -0.2898300588130951, + "logits/rejected": -0.28401628136634827, + "logps/chosen": -0.028395526111125946, + "logps/rejected": -1.1893532276153564, + "loss": 1.7249, + "nll_loss": 0.4190668761730194, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002839552704244852, + "rewards/margins": 0.11609578132629395, + "rewards/rejected": -0.11893533170223236, + "step": 4818 + }, + { + "epoch": 3.3326417704011067, + "grad_norm": 9.945439338684082, + "learning_rate": 3.704087905332719e-05, + "log_odds_chosen": 7.445404052734375, + "log_odds_ratio": -0.027865968644618988, + "logits/chosen": -0.3710874319076538, + "logits/rejected": -0.4082677662372589, + "logps/chosen": -0.015139044262468815, + "logps/rejected": -1.3844302892684937, + "loss": 1.3629, + "nll_loss": 0.3379259407520294, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015139044262468815, + "rewards/margins": 0.13692912459373474, + "rewards/rejected": -0.13844303786754608, + "step": 4819 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 6.541971206665039, + "learning_rate": 3.7037037037037037e-05, + "log_odds_chosen": 9.330412864685059, + "log_odds_ratio": -0.0032438477501273155, + "logits/chosen": -0.21376971900463104, + "logits/rejected": -0.23448964953422546, + "logps/chosen": -0.010493476875126362, + "logps/rejected": -1.3022187948226929, + "loss": 1.7931, + "nll_loss": 0.4479522705078125, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010493475710973144, + "rewards/margins": 0.12917253375053406, + "rewards/rejected": -0.1302218735218048, + "step": 4820 + }, + { + "epoch": 3.3340248962655603, + "grad_norm": 5.288464069366455, + "learning_rate": 3.703319502074689e-05, + "log_odds_chosen": 9.160746574401855, + "log_odds_ratio": -0.0006003558519296348, + "logits/chosen": -0.060743916779756546, + "logits/rejected": -0.1093602329492569, + "logps/chosen": -0.009671274572610855, + "logps/rejected": -1.690582513809204, + "loss": 1.7897, + "nll_loss": 0.4473611116409302, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009671273874118924, + "rewards/margins": 0.16809113323688507, + "rewards/rejected": -0.16905826330184937, + "step": 4821 + }, + { + "epoch": 3.334716459197787, + "grad_norm": 10.332783699035645, + "learning_rate": 3.702935300445674e-05, + "log_odds_chosen": 10.676567077636719, + "log_odds_ratio": -0.00016885343939065933, + "logits/chosen": -0.6289178133010864, + "logits/rejected": -0.711728036403656, + "logps/chosen": -0.006669633090496063, + "logps/rejected": -2.646531105041504, + "loss": 1.7676, + "nll_loss": 0.44187238812446594, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000666963285766542, + "rewards/margins": 0.26398617029190063, + "rewards/rejected": -0.26465311646461487, + "step": 4822 + }, + { + "epoch": 3.335408022130014, + "grad_norm": 8.359293937683105, + "learning_rate": 3.702551098816659e-05, + "log_odds_chosen": 9.531179428100586, + "log_odds_ratio": -0.0008255833527073264, + "logits/chosen": -0.38351601362228394, + "logits/rejected": -0.4077991545200348, + "logps/chosen": -0.012399173341691494, + "logps/rejected": -2.359907388687134, + "loss": 1.2938, + "nll_loss": 0.32336685061454773, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012399173574522138, + "rewards/margins": 0.23475082218647003, + "rewards/rejected": -0.2359907627105713, + "step": 4823 + }, + { + "epoch": 3.336099585062241, + "grad_norm": 11.444721221923828, + "learning_rate": 3.7021668971876446e-05, + "log_odds_chosen": 8.420994758605957, + "log_odds_ratio": -0.003699307329952717, + "logits/chosen": -0.10867477208375931, + "logits/rejected": -0.1305103600025177, + "logps/chosen": -0.01947128400206566, + "logps/rejected": -3.052764654159546, + "loss": 1.8229, + "nll_loss": 0.45536285638809204, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019471283303573728, + "rewards/margins": 0.30332931876182556, + "rewards/rejected": -0.30527645349502563, + "step": 4824 + }, + { + "epoch": 3.3367911479944676, + "grad_norm": 8.161203384399414, + "learning_rate": 3.701782695558629e-05, + "log_odds_chosen": 7.156606197357178, + "log_odds_ratio": -0.05996629595756531, + "logits/chosen": -0.3300841152667999, + "logits/rejected": -0.3203493356704712, + "logps/chosen": -0.018597949296236038, + "logps/rejected": -1.0929030179977417, + "loss": 1.774, + "nll_loss": 0.4375021159648895, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00185979506932199, + "rewards/margins": 0.10743050277233124, + "rewards/rejected": -0.10929030179977417, + "step": 4825 + }, + { + "epoch": 3.3374827109266945, + "grad_norm": 7.885430812835693, + "learning_rate": 3.7013984939296144e-05, + "log_odds_chosen": 9.564796447753906, + "log_odds_ratio": -0.0009235774632543325, + "logits/chosen": -0.48841261863708496, + "logits/rejected": -0.5725415945053101, + "logps/chosen": -0.013065568171441555, + "logps/rejected": -2.3977560997009277, + "loss": 1.3252, + "nll_loss": 0.3312044143676758, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013065567472949624, + "rewards/margins": 0.23846904933452606, + "rewards/rejected": -0.239775612950325, + "step": 4826 + }, + { + "epoch": 3.3381742738589213, + "grad_norm": 16.353010177612305, + "learning_rate": 3.7010142923006e-05, + "log_odds_chosen": 9.778549194335938, + "log_odds_ratio": -0.0011052426416426897, + "logits/chosen": -0.46861732006073, + "logits/rejected": -0.5008033514022827, + "logps/chosen": -0.007358612027019262, + "logps/rejected": -2.2930526733398438, + "loss": 2.2518, + "nll_loss": 0.562834620475769, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007358611328527331, + "rewards/margins": 0.2285693883895874, + "rewards/rejected": -0.22930525243282318, + "step": 4827 + }, + { + "epoch": 3.338865836791148, + "grad_norm": 6.472213268280029, + "learning_rate": 3.700630090671585e-05, + "log_odds_chosen": 9.00424575805664, + "log_odds_ratio": -0.0024379806127399206, + "logits/chosen": -0.08923661708831787, + "logits/rejected": -0.1967790573835373, + "logps/chosen": -0.0013902625069022179, + "logps/rejected": -1.270837664604187, + "loss": 1.1089, + "nll_loss": 0.2769761085510254, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013902624777983874, + "rewards/margins": 0.12694475054740906, + "rewards/rejected": -0.12708376348018646, + "step": 4828 + }, + { + "epoch": 3.339557399723375, + "grad_norm": 10.397558212280273, + "learning_rate": 3.7002458890425695e-05, + "log_odds_chosen": 9.968124389648438, + "log_odds_ratio": -0.00025020475732162595, + "logits/chosen": -0.13119478523731232, + "logits/rejected": -0.2607486844062805, + "logps/chosen": -0.013811449520289898, + "logps/rejected": -2.4385852813720703, + "loss": 1.4208, + "nll_loss": 0.35518527030944824, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013811449753120542, + "rewards/margins": 0.2424774020910263, + "rewards/rejected": -0.24385854601860046, + "step": 4829 + }, + { + "epoch": 3.340248962655602, + "grad_norm": 5.243896007537842, + "learning_rate": 3.699861687413555e-05, + "log_odds_chosen": 9.199673652648926, + "log_odds_ratio": -0.0005805004620924592, + "logits/chosen": -0.0678810179233551, + "logits/rejected": -0.017454490065574646, + "logps/chosen": -0.004220792558044195, + "logps/rejected": -1.9428520202636719, + "loss": 1.6269, + "nll_loss": 0.40665626525878906, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004220792616251856, + "rewards/margins": 0.1938631236553192, + "rewards/rejected": -0.19428519904613495, + "step": 4830 + }, + { + "epoch": 3.3409405255878286, + "grad_norm": 9.78297233581543, + "learning_rate": 3.69947748578454e-05, + "log_odds_chosen": 9.642803192138672, + "log_odds_ratio": -0.00012088124640285969, + "logits/chosen": -0.5920174717903137, + "logits/rejected": -0.6880452036857605, + "logps/chosen": -0.0005044254357926548, + "logps/rejected": -1.7257821559906006, + "loss": 1.2987, + "nll_loss": 0.3246620297431946, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.044254066888243e-05, + "rewards/margins": 0.17252777516841888, + "rewards/rejected": -0.17257821559906006, + "step": 4831 + }, + { + "epoch": 3.3416320885200554, + "grad_norm": 17.485815048217773, + "learning_rate": 3.6990932841555246e-05, + "log_odds_chosen": 8.459027290344238, + "log_odds_ratio": -0.009617390111088753, + "logits/chosen": -0.39295753836631775, + "logits/rejected": -0.48550981283187866, + "logps/chosen": -0.03097320720553398, + "logps/rejected": -1.9759016036987305, + "loss": 1.6381, + "nll_loss": 0.4085724651813507, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0030973206739872694, + "rewards/margins": 0.19449284672737122, + "rewards/rejected": -0.197590172290802, + "step": 4832 + }, + { + "epoch": 3.3423236514522823, + "grad_norm": 8.457892417907715, + "learning_rate": 3.6987090825265105e-05, + "log_odds_chosen": 10.580424308776855, + "log_odds_ratio": -5.697977030649781e-05, + "logits/chosen": -0.4374619722366333, + "logits/rejected": -0.5243746042251587, + "logps/chosen": -0.00043220724910497665, + "logps/rejected": -2.430696964263916, + "loss": 1.639, + "nll_loss": 0.40974873304367065, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.322072709328495e-05, + "rewards/margins": 0.243026465177536, + "rewards/rejected": -0.24306970834732056, + "step": 4833 + }, + { + "epoch": 3.343015214384509, + "grad_norm": 10.142221450805664, + "learning_rate": 3.698324880897495e-05, + "log_odds_chosen": 8.102699279785156, + "log_odds_ratio": -0.005135298706591129, + "logits/chosen": -0.7511797547340393, + "logits/rejected": -0.7273877859115601, + "logps/chosen": -0.006073735188692808, + "logps/rejected": -1.4573071002960205, + "loss": 2.3506, + "nll_loss": 0.5871455073356628, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006073735421523452, + "rewards/margins": 0.14512333273887634, + "rewards/rejected": -0.14573070406913757, + "step": 4834 + }, + { + "epoch": 3.343706777316736, + "grad_norm": 8.492439270019531, + "learning_rate": 3.69794067926848e-05, + "log_odds_chosen": 10.246946334838867, + "log_odds_ratio": -0.0009441052097827196, + "logits/chosen": -0.5589309930801392, + "logits/rejected": -0.5878577828407288, + "logps/chosen": -0.016292212530970573, + "logps/rejected": -2.1811840534210205, + "loss": 1.4918, + "nll_loss": 0.3728483021259308, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016292212530970573, + "rewards/margins": 0.21648918092250824, + "rewards/rejected": -0.21811839938163757, + "step": 4835 + }, + { + "epoch": 3.3443983402489628, + "grad_norm": 10.190574645996094, + "learning_rate": 3.6975564776394655e-05, + "log_odds_chosen": 8.5763521194458, + "log_odds_ratio": -0.00470086932182312, + "logits/chosen": -0.5186557173728943, + "logits/rejected": -0.6129405498504639, + "logps/chosen": -0.03856277093291283, + "logps/rejected": -1.8780362606048584, + "loss": 1.6883, + "nll_loss": 0.42161381244659424, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0038562770932912827, + "rewards/margins": 0.18394732475280762, + "rewards/rejected": -0.18780359625816345, + "step": 4836 + }, + { + "epoch": 3.3450899031811896, + "grad_norm": 7.443319797515869, + "learning_rate": 3.697172276010451e-05, + "log_odds_chosen": 9.213088989257812, + "log_odds_ratio": -0.0003929064841940999, + "logits/chosen": -0.303056925535202, + "logits/rejected": -0.3446924388408661, + "logps/chosen": -0.000988618703559041, + "logps/rejected": -1.5611990690231323, + "loss": 1.193, + "nll_loss": 0.29819926619529724, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.88618703559041e-05, + "rewards/margins": 0.15602104365825653, + "rewards/rejected": -0.1561199128627777, + "step": 4837 + }, + { + "epoch": 3.3457814661134164, + "grad_norm": 6.408515453338623, + "learning_rate": 3.6967880743814353e-05, + "log_odds_chosen": 8.536367416381836, + "log_odds_ratio": -0.0014657180290669203, + "logits/chosen": -0.2517034113407135, + "logits/rejected": -0.2618112862110138, + "logps/chosen": -0.024171721190214157, + "logps/rejected": -2.235044479370117, + "loss": 1.3209, + "nll_loss": 0.3300686180591583, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024171723052859306, + "rewards/margins": 0.2210872769355774, + "rewards/rejected": -0.2235044538974762, + "step": 4838 + }, + { + "epoch": 3.3464730290456433, + "grad_norm": 4.3202805519104, + "learning_rate": 3.6964038727524206e-05, + "log_odds_chosen": 9.348167419433594, + "log_odds_ratio": -0.00027577090077102184, + "logits/chosen": -0.2292935848236084, + "logits/rejected": -0.26607123017311096, + "logps/chosen": -0.00010278346599079669, + "logps/rejected": -1.0340757369995117, + "loss": 1.307, + "nll_loss": 0.32673293352127075, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.027834696287755e-05, + "rewards/margins": 0.10339729487895966, + "rewards/rejected": -0.10340756922960281, + "step": 4839 + }, + { + "epoch": 3.34716459197787, + "grad_norm": 8.864399909973145, + "learning_rate": 3.696019671123406e-05, + "log_odds_chosen": 6.516083240509033, + "log_odds_ratio": -0.061890941113233566, + "logits/chosen": -0.3812077045440674, + "logits/rejected": -0.3444467782974243, + "logps/chosen": -0.09658174216747284, + "logps/rejected": -1.4395780563354492, + "loss": 1.7208, + "nll_loss": 0.4240223467350006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009658175520598888, + "rewards/margins": 0.1342996507883072, + "rewards/rejected": -0.14395782351493835, + "step": 4840 + }, + { + "epoch": 3.347856154910097, + "grad_norm": 7.218293190002441, + "learning_rate": 3.6956354694943904e-05, + "log_odds_chosen": 7.828502655029297, + "log_odds_ratio": -0.00611227797344327, + "logits/chosen": -0.01647898741066456, + "logits/rejected": -0.04726487398147583, + "logps/chosen": -0.035098958760499954, + "logps/rejected": -1.889201045036316, + "loss": 1.7379, + "nll_loss": 0.4338761568069458, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0035098965745419264, + "rewards/margins": 0.18541020154953003, + "rewards/rejected": -0.18892011046409607, + "step": 4841 + }, + { + "epoch": 3.3485477178423237, + "grad_norm": 7.2084245681762695, + "learning_rate": 3.695251267865376e-05, + "log_odds_chosen": 8.534614562988281, + "log_odds_ratio": -0.0014289494138211012, + "logits/chosen": -0.6113095283508301, + "logits/rejected": -0.6475774645805359, + "logps/chosen": -0.0049095191061496735, + "logps/rejected": -1.720369815826416, + "loss": 1.427, + "nll_loss": 0.356608510017395, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004909519338980317, + "rewards/margins": 0.17154602706432343, + "rewards/rejected": -0.17203697562217712, + "step": 4842 + }, + { + "epoch": 3.3492392807745506, + "grad_norm": 8.724457740783691, + "learning_rate": 3.694867066236361e-05, + "log_odds_chosen": 10.108199119567871, + "log_odds_ratio": -8.961541607277468e-05, + "logits/chosen": -0.22915613651275635, + "logits/rejected": -0.20698942244052887, + "logps/chosen": -0.00045968289487063885, + "logps/rejected": -1.7437655925750732, + "loss": 1.8042, + "nll_loss": 0.45104360580444336, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.596828875946812e-05, + "rewards/margins": 0.17433059215545654, + "rewards/rejected": -0.17437656223773956, + "step": 4843 + }, + { + "epoch": 3.3499308437067774, + "grad_norm": 7.414542198181152, + "learning_rate": 3.694482864607346e-05, + "log_odds_chosen": 9.796012878417969, + "log_odds_ratio": -0.0003306611906737089, + "logits/chosen": -0.46491843461990356, + "logits/rejected": -0.5566118955612183, + "logps/chosen": -0.0025635913480073214, + "logps/rejected": -1.987081527709961, + "loss": 1.5142, + "nll_loss": 0.378519743680954, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002563591697253287, + "rewards/margins": 0.19845178723335266, + "rewards/rejected": -0.19870814681053162, + "step": 4844 + }, + { + "epoch": 3.3506224066390042, + "grad_norm": 9.725508689880371, + "learning_rate": 3.6940986629783314e-05, + "log_odds_chosen": 10.11623764038086, + "log_odds_ratio": -0.0001505285908933729, + "logits/chosen": -0.8159855604171753, + "logits/rejected": -0.8474183082580566, + "logps/chosen": -0.0009152303100563586, + "logps/rejected": -2.013854742050171, + "loss": 1.4857, + "nll_loss": 0.3714017868041992, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.152302664006129e-05, + "rewards/margins": 0.2012939602136612, + "rewards/rejected": -0.2013854682445526, + "step": 4845 + }, + { + "epoch": 3.351313969571231, + "grad_norm": 10.209623336791992, + "learning_rate": 3.6937144613493166e-05, + "log_odds_chosen": 7.748871326446533, + "log_odds_ratio": -0.204082652926445, + "logits/chosen": -0.4716426134109497, + "logits/rejected": -0.5088870525360107, + "logps/chosen": -0.0646195188164711, + "logps/rejected": -1.4852840900421143, + "loss": 1.7175, + "nll_loss": 0.4089680314064026, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006461952812969685, + "rewards/margins": 0.14206644892692566, + "rewards/rejected": -0.14852841198444366, + "step": 4846 + }, + { + "epoch": 3.352005532503458, + "grad_norm": 40.184871673583984, + "learning_rate": 3.693330259720301e-05, + "log_odds_chosen": 8.97463607788086, + "log_odds_ratio": -0.5296390056610107, + "logits/chosen": -0.5999283194541931, + "logits/rejected": -0.6021513342857361, + "logps/chosen": -0.07349149882793427, + "logps/rejected": -2.196320056915283, + "loss": 2.5445, + "nll_loss": 0.5831631422042847, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007349150255322456, + "rewards/margins": 0.21228285133838654, + "rewards/rejected": -0.21963201463222504, + "step": 4847 + }, + { + "epoch": 3.3526970954356847, + "grad_norm": 8.293537139892578, + "learning_rate": 3.6929460580912864e-05, + "log_odds_chosen": 9.890531539916992, + "log_odds_ratio": -0.00047966151032596827, + "logits/chosen": -0.671005129814148, + "logits/rejected": -0.8335851430892944, + "logps/chosen": -0.013375879265367985, + "logps/rejected": -2.5320022106170654, + "loss": 1.8409, + "nll_loss": 0.46017351746559143, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013375879498198628, + "rewards/margins": 0.25186264514923096, + "rewards/rejected": -0.2532002329826355, + "step": 4848 + }, + { + "epoch": 3.3533886583679116, + "grad_norm": 11.15304946899414, + "learning_rate": 3.692561856462272e-05, + "log_odds_chosen": 8.975371360778809, + "log_odds_ratio": -0.0040519824251532555, + "logits/chosen": -0.7027145624160767, + "logits/rejected": -0.708022952079773, + "logps/chosen": -0.008829191327095032, + "logps/rejected": -1.2773983478546143, + "loss": 2.2991, + "nll_loss": 0.5743774175643921, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000882919121067971, + "rewards/margins": 0.12685692310333252, + "rewards/rejected": -0.12773984670639038, + "step": 4849 + }, + { + "epoch": 3.3540802213001384, + "grad_norm": 7.476949214935303, + "learning_rate": 3.692177654833256e-05, + "log_odds_chosen": 9.539048194885254, + "log_odds_ratio": -0.0002563097223173827, + "logits/chosen": -0.20793417096138, + "logits/rejected": -0.25936827063560486, + "logps/chosen": -0.0006166063249111176, + "logps/rejected": -1.699591875076294, + "loss": 2.0961, + "nll_loss": 0.5239871740341187, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.166063394630328e-05, + "rewards/margins": 0.16989752650260925, + "rewards/rejected": -0.1699592024087906, + "step": 4850 + }, + { + "epoch": 3.354771784232365, + "grad_norm": 11.8095064163208, + "learning_rate": 3.691793453204242e-05, + "log_odds_chosen": 9.624149322509766, + "log_odds_ratio": -0.0007174991187639534, + "logits/chosen": -0.7241888046264648, + "logits/rejected": -0.7914281487464905, + "logps/chosen": -0.0007417750312015414, + "logps/rejected": -1.8069579601287842, + "loss": 1.6439, + "nll_loss": 0.41089504957199097, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.41775002097711e-05, + "rewards/margins": 0.18062162399291992, + "rewards/rejected": -0.1806957870721817, + "step": 4851 + }, + { + "epoch": 3.355463347164592, + "grad_norm": 9.437474250793457, + "learning_rate": 3.691409251575227e-05, + "log_odds_chosen": 10.25767707824707, + "log_odds_ratio": -7.604131678817794e-05, + "logits/chosen": -0.5359310507774353, + "logits/rejected": -0.5045655965805054, + "logps/chosen": -0.00021611034753732383, + "logps/rejected": -1.710390329360962, + "loss": 1.5811, + "nll_loss": 0.39527881145477295, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1611034753732383e-05, + "rewards/margins": 0.17101740837097168, + "rewards/rejected": -0.17103902995586395, + "step": 4852 + }, + { + "epoch": 3.356154910096819, + "grad_norm": 7.6020894050598145, + "learning_rate": 3.691025049946212e-05, + "log_odds_chosen": 7.330630302429199, + "log_odds_ratio": -0.1588706076145172, + "logits/chosen": -0.27602532505989075, + "logits/rejected": -0.3280216157436371, + "logps/chosen": -0.044778451323509216, + "logps/rejected": -1.4952619075775146, + "loss": 1.8527, + "nll_loss": 0.4472947120666504, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0044778455048799515, + "rewards/margins": 0.1450483500957489, + "rewards/rejected": -0.1495261937379837, + "step": 4853 + }, + { + "epoch": 3.3568464730290457, + "grad_norm": 12.778995513916016, + "learning_rate": 3.690640848317197e-05, + "log_odds_chosen": 8.721086502075195, + "log_odds_ratio": -0.12304294109344482, + "logits/chosen": -0.6769004464149475, + "logits/rejected": -0.7507296800613403, + "logps/chosen": -0.03270779550075531, + "logps/rejected": -1.7994873523712158, + "loss": 1.5283, + "nll_loss": 0.36977386474609375, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0032707795035094023, + "rewards/margins": 0.17667795717716217, + "rewards/rejected": -0.17994874715805054, + "step": 4854 + }, + { + "epoch": 3.3575380359612725, + "grad_norm": 12.528250694274902, + "learning_rate": 3.6902566466881825e-05, + "log_odds_chosen": 10.009822845458984, + "log_odds_ratio": -0.0009388787439092994, + "logits/chosen": -0.7347992062568665, + "logits/rejected": -0.8108212351799011, + "logps/chosen": -0.0009514364064671099, + "logps/rejected": -1.9790453910827637, + "loss": 1.9298, + "nll_loss": 0.4823543131351471, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.514363773632795e-05, + "rewards/margins": 0.1978093981742859, + "rewards/rejected": -0.1979045569896698, + "step": 4855 + }, + { + "epoch": 3.3582295988934994, + "grad_norm": 4.950071811676025, + "learning_rate": 3.689872445059167e-05, + "log_odds_chosen": 9.48250961303711, + "log_odds_ratio": -0.0001974797050934285, + "logits/chosen": -0.8554219603538513, + "logits/rejected": -0.9148414731025696, + "logps/chosen": -0.0004030237323604524, + "logps/rejected": -1.7633424997329712, + "loss": 1.3353, + "nll_loss": 0.3338037133216858, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.030237687402405e-05, + "rewards/margins": 0.1762939691543579, + "rewards/rejected": -0.17633426189422607, + "step": 4856 + }, + { + "epoch": 3.358921161825726, + "grad_norm": 10.450413703918457, + "learning_rate": 3.689488243430152e-05, + "log_odds_chosen": 9.980003356933594, + "log_odds_ratio": -0.00019624002743512392, + "logits/chosen": -0.467332661151886, + "logits/rejected": -0.5671140551567078, + "logps/chosen": -0.0003920574963558465, + "logps/rejected": -1.765358328819275, + "loss": 1.4584, + "nll_loss": 0.3645763397216797, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.920574818039313e-05, + "rewards/margins": 0.17649662494659424, + "rewards/rejected": -0.17653582990169525, + "step": 4857 + }, + { + "epoch": 3.359612724757953, + "grad_norm": 8.591510772705078, + "learning_rate": 3.6891040418011375e-05, + "log_odds_chosen": 8.480205535888672, + "log_odds_ratio": -0.0006487661739811301, + "logits/chosen": -0.47236570715904236, + "logits/rejected": -0.5941859483718872, + "logps/chosen": -0.0014029113808646798, + "logps/rejected": -1.2931849956512451, + "loss": 1.4132, + "nll_loss": 0.35322386026382446, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014029112935531884, + "rewards/margins": 0.1291782110929489, + "rewards/rejected": -0.129318505525589, + "step": 4858 + }, + { + "epoch": 3.36030428769018, + "grad_norm": 9.483941078186035, + "learning_rate": 3.688719840172122e-05, + "log_odds_chosen": 8.947179794311523, + "log_odds_ratio": -0.009097904898226261, + "logits/chosen": -0.6214572191238403, + "logits/rejected": -0.6888337135314941, + "logps/chosen": -0.007219300139695406, + "logps/rejected": -1.9767446517944336, + "loss": 1.6643, + "nll_loss": 0.4151747226715088, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007219300023280084, + "rewards/margins": 0.19695252180099487, + "rewards/rejected": -0.1976744532585144, + "step": 4859 + }, + { + "epoch": 3.3609958506224067, + "grad_norm": 5.145646572113037, + "learning_rate": 3.688335638543108e-05, + "log_odds_chosen": 10.351024627685547, + "log_odds_ratio": -0.00010055641178041697, + "logits/chosen": -0.37642523646354675, + "logits/rejected": -0.3933575749397278, + "logps/chosen": -0.00014752685092389584, + "logps/rejected": -1.5574370622634888, + "loss": 1.2852, + "nll_loss": 0.32127830386161804, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4752684364793822e-05, + "rewards/margins": 0.15572895109653473, + "rewards/rejected": -0.15574368834495544, + "step": 4860 + }, + { + "epoch": 3.3616874135546335, + "grad_norm": 7.796165943145752, + "learning_rate": 3.6879514369140926e-05, + "log_odds_chosen": 9.481037139892578, + "log_odds_ratio": -0.0020998227410018444, + "logits/chosen": -0.8685269355773926, + "logits/rejected": -0.9142651557922363, + "logps/chosen": -0.0064911977387964725, + "logps/rejected": -1.3805325031280518, + "loss": 1.9348, + "nll_loss": 0.48348888754844666, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006491197855211794, + "rewards/margins": 0.13740414381027222, + "rewards/rejected": -0.13805325329303741, + "step": 4861 + }, + { + "epoch": 3.3623789764868603, + "grad_norm": 12.897394180297852, + "learning_rate": 3.687567235285078e-05, + "log_odds_chosen": 9.886244773864746, + "log_odds_ratio": -0.0001866334059741348, + "logits/chosen": -0.4743501543998718, + "logits/rejected": -0.5830299258232117, + "logps/chosen": -0.003318520961329341, + "logps/rejected": -1.95000159740448, + "loss": 2.0509, + "nll_loss": 0.5126988887786865, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000331852090312168, + "rewards/margins": 0.19466832280158997, + "rewards/rejected": -0.19500017166137695, + "step": 4862 + }, + { + "epoch": 3.363070539419087, + "grad_norm": 11.533024787902832, + "learning_rate": 3.687183033656063e-05, + "log_odds_chosen": 10.184024810791016, + "log_odds_ratio": -0.00016964529640972614, + "logits/chosen": -0.6249281167984009, + "logits/rejected": -0.6705152988433838, + "logps/chosen": -0.0009269227739423513, + "logps/rejected": -2.1890594959259033, + "loss": 1.7934, + "nll_loss": 0.44832783937454224, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.269227302866057e-05, + "rewards/margins": 0.21881325542926788, + "rewards/rejected": -0.218905970454216, + "step": 4863 + }, + { + "epoch": 3.363762102351314, + "grad_norm": 9.165794372558594, + "learning_rate": 3.686798832027048e-05, + "log_odds_chosen": 9.309822082519531, + "log_odds_ratio": -0.0005651208339259028, + "logits/chosen": -0.6249001622200012, + "logits/rejected": -0.6750528216362, + "logps/chosen": -0.0012620101915672421, + "logps/rejected": -1.5672866106033325, + "loss": 1.7412, + "nll_loss": 0.4352557063102722, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012620101915672421, + "rewards/margins": 0.15660247206687927, + "rewards/rejected": -0.15672868490219116, + "step": 4864 + }, + { + "epoch": 3.364453665283541, + "grad_norm": 11.619812965393066, + "learning_rate": 3.686414630398033e-05, + "log_odds_chosen": 9.562946319580078, + "log_odds_ratio": -8.201718446798623e-05, + "logits/chosen": -0.4055235981941223, + "logits/rejected": -0.4862651228904724, + "logps/chosen": -0.0003127045347355306, + "logps/rejected": -1.5968135595321655, + "loss": 1.5052, + "nll_loss": 0.37629374861717224, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1270450563170016e-05, + "rewards/margins": 0.15965008735656738, + "rewards/rejected": -0.15968134999275208, + "step": 4865 + }, + { + "epoch": 3.3651452282157677, + "grad_norm": 14.077691078186035, + "learning_rate": 3.686030428769018e-05, + "log_odds_chosen": 9.250917434692383, + "log_odds_ratio": -0.005956509616225958, + "logits/chosen": -0.8243609070777893, + "logits/rejected": -0.8653722405433655, + "logps/chosen": -0.008840306662023067, + "logps/rejected": -2.3019497394561768, + "loss": 2.1826, + "nll_loss": 0.5450628995895386, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008840306545607746, + "rewards/margins": 0.22931094467639923, + "rewards/rejected": -0.23019498586654663, + "step": 4866 + }, + { + "epoch": 3.3658367911479945, + "grad_norm": 6.497355937957764, + "learning_rate": 3.6856462271400034e-05, + "log_odds_chosen": 6.605334281921387, + "log_odds_ratio": -0.17641203105449677, + "logits/chosen": -0.40605348348617554, + "logits/rejected": -0.38486599922180176, + "logps/chosen": -0.04633874073624611, + "logps/rejected": -1.4925726652145386, + "loss": 1.6609, + "nll_loss": 0.39758217334747314, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004633874632418156, + "rewards/margins": 0.14462338387966156, + "rewards/rejected": -0.14925727248191833, + "step": 4867 + }, + { + "epoch": 3.3665283540802213, + "grad_norm": 11.494439125061035, + "learning_rate": 3.685262025510988e-05, + "log_odds_chosen": 8.713448524475098, + "log_odds_ratio": -0.08428078889846802, + "logits/chosen": -0.39406687021255493, + "logits/rejected": -0.4824514389038086, + "logps/chosen": -0.014646649360656738, + "logps/rejected": -1.1748372316360474, + "loss": 2.0042, + "nll_loss": 0.4926164150238037, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014646650524809957, + "rewards/margins": 0.11601905524730682, + "rewards/rejected": -0.1174837201833725, + "step": 4868 + }, + { + "epoch": 3.367219917012448, + "grad_norm": 8.511127471923828, + "learning_rate": 3.684877823881974e-05, + "log_odds_chosen": 10.085894584655762, + "log_odds_ratio": -0.00014831179578322917, + "logits/chosen": -0.6100676655769348, + "logits/rejected": -0.6811908483505249, + "logps/chosen": -0.00021410381305031478, + "logps/rejected": -1.6063182353973389, + "loss": 1.3799, + "nll_loss": 0.3449620306491852, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.141038203262724e-05, + "rewards/margins": 0.1606104075908661, + "rewards/rejected": -0.16063182055950165, + "step": 4869 + }, + { + "epoch": 3.367911479944675, + "grad_norm": 8.44887638092041, + "learning_rate": 3.6844936222529584e-05, + "log_odds_chosen": 9.12254524230957, + "log_odds_ratio": -0.0006699742516502738, + "logits/chosen": -0.6472569704055786, + "logits/rejected": -0.8005533814430237, + "logps/chosen": -0.006916233338415623, + "logps/rejected": -1.9187191724777222, + "loss": 1.4938, + "nll_loss": 0.37338870763778687, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006916233105584979, + "rewards/margins": 0.1911802887916565, + "rewards/rejected": -0.19187191128730774, + "step": 4870 + }, + { + "epoch": 3.368603042876902, + "grad_norm": 14.379709243774414, + "learning_rate": 3.684109420623944e-05, + "log_odds_chosen": 9.948789596557617, + "log_odds_ratio": -8.950324263423681e-05, + "logits/chosen": -0.6824163794517517, + "logits/rejected": -0.6648140549659729, + "logps/chosen": -0.0002694717841222882, + "logps/rejected": -1.6847224235534668, + "loss": 1.8646, + "nll_loss": 0.4661399722099304, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.694718205020763e-05, + "rewards/margins": 0.16844528913497925, + "rewards/rejected": -0.16847223043441772, + "step": 4871 + }, + { + "epoch": 3.3692946058091287, + "grad_norm": 6.499044895172119, + "learning_rate": 3.683725218994929e-05, + "log_odds_chosen": 7.629302978515625, + "log_odds_ratio": -0.002777328947558999, + "logits/chosen": -0.327689528465271, + "logits/rejected": -0.2956370711326599, + "logps/chosen": -0.036704547703266144, + "logps/rejected": -2.166947841644287, + "loss": 1.5244, + "nll_loss": 0.38081902265548706, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0036704547237604856, + "rewards/margins": 0.2130243182182312, + "rewards/rejected": -0.21669480204582214, + "step": 4872 + }, + { + "epoch": 3.3699861687413555, + "grad_norm": 8.781851768493652, + "learning_rate": 3.683341017365914e-05, + "log_odds_chosen": 9.797788619995117, + "log_odds_ratio": -0.00033861229894682765, + "logits/chosen": -0.7021996974945068, + "logits/rejected": -0.8226416110992432, + "logps/chosen": -0.0005934900254942477, + "logps/rejected": -1.7794638872146606, + "loss": 1.8316, + "nll_loss": 0.45786577463150024, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.934900400461629e-05, + "rewards/margins": 0.17788705229759216, + "rewards/rejected": -0.17794638872146606, + "step": 4873 + }, + { + "epoch": 3.3706777316735823, + "grad_norm": 8.821776390075684, + "learning_rate": 3.682956815736899e-05, + "log_odds_chosen": 8.472004890441895, + "log_odds_ratio": -0.000865703565068543, + "logits/chosen": -0.5522797703742981, + "logits/rejected": -0.5616713166236877, + "logps/chosen": -0.0003104619972873479, + "logps/rejected": -0.9349074363708496, + "loss": 2.4649, + "nll_loss": 0.6161264181137085, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.104619827354327e-05, + "rewards/margins": 0.09345970302820206, + "rewards/rejected": -0.09349074214696884, + "step": 4874 + }, + { + "epoch": 3.371369294605809, + "grad_norm": 12.452609062194824, + "learning_rate": 3.682572614107884e-05, + "log_odds_chosen": 8.774795532226562, + "log_odds_ratio": -0.0033878334797918797, + "logits/chosen": -0.46289071440696716, + "logits/rejected": -0.45690450072288513, + "logps/chosen": -0.019403763115406036, + "logps/rejected": -1.4844517707824707, + "loss": 1.9597, + "nll_loss": 0.4895821809768677, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019403762416914105, + "rewards/margins": 0.14650480449199677, + "rewards/rejected": -0.14844517409801483, + "step": 4875 + }, + { + "epoch": 3.372060857538036, + "grad_norm": 14.034537315368652, + "learning_rate": 3.682188412478869e-05, + "log_odds_chosen": 9.291325569152832, + "log_odds_ratio": -0.00020458844664972275, + "logits/chosen": -0.5656931400299072, + "logits/rejected": -0.6621861457824707, + "logps/chosen": -0.0002466610458213836, + "logps/rejected": -1.292998194694519, + "loss": 2.6441, + "nll_loss": 0.6610121726989746, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4666105673532e-05, + "rewards/margins": 0.1292751580476761, + "rewards/rejected": -0.1292998194694519, + "step": 4876 + }, + { + "epoch": 3.372752420470263, + "grad_norm": 10.457889556884766, + "learning_rate": 3.681804210849854e-05, + "log_odds_chosen": 9.468912124633789, + "log_odds_ratio": -0.009811985306441784, + "logits/chosen": -0.4400988817214966, + "logits/rejected": -0.516470193862915, + "logps/chosen": -0.0058551025576889515, + "logps/rejected": -1.7885127067565918, + "loss": 1.9934, + "nll_loss": 0.49736106395721436, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005855102790519595, + "rewards/margins": 0.1782657653093338, + "rewards/rejected": -0.17885126173496246, + "step": 4877 + }, + { + "epoch": 3.3734439834024896, + "grad_norm": 5.182315349578857, + "learning_rate": 3.68142000922084e-05, + "log_odds_chosen": 9.558467864990234, + "log_odds_ratio": -0.00024224047956522554, + "logits/chosen": -0.507156252861023, + "logits/rejected": -0.5910125970840454, + "logps/chosen": -0.006765010766685009, + "logps/rejected": -2.19610595703125, + "loss": 1.122, + "nll_loss": 0.2804679572582245, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006765010766685009, + "rewards/margins": 0.2189340889453888, + "rewards/rejected": -0.21961060166358948, + "step": 4878 + }, + { + "epoch": 3.3741355463347165, + "grad_norm": 8.035346984863281, + "learning_rate": 3.681035807591824e-05, + "log_odds_chosen": 8.364526748657227, + "log_odds_ratio": -0.0009362439159303904, + "logits/chosen": -0.5520797967910767, + "logits/rejected": -0.5805567502975464, + "logps/chosen": -0.006305334623903036, + "logps/rejected": -1.6327786445617676, + "loss": 1.2901, + "nll_loss": 0.32244062423706055, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006305334973149002, + "rewards/margins": 0.1626473367214203, + "rewards/rejected": -0.16327786445617676, + "step": 4879 + }, + { + "epoch": 3.3748271092669433, + "grad_norm": 9.362500190734863, + "learning_rate": 3.6806516059628095e-05, + "log_odds_chosen": 6.6161298751831055, + "log_odds_ratio": -0.12561531364917755, + "logits/chosen": -0.332377552986145, + "logits/rejected": -0.4098805785179138, + "logps/chosen": -0.02750096283853054, + "logps/rejected": -1.0053969621658325, + "loss": 1.5629, + "nll_loss": 0.37816768884658813, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0027500963769853115, + "rewards/margins": 0.09778958559036255, + "rewards/rejected": -0.1005396842956543, + "step": 4880 + }, + { + "epoch": 3.37551867219917, + "grad_norm": 12.455388069152832, + "learning_rate": 3.680267404333795e-05, + "log_odds_chosen": 8.346939086914062, + "log_odds_ratio": -0.01503918319940567, + "logits/chosen": -0.5830432176589966, + "logits/rejected": -0.6676121950149536, + "logps/chosen": -0.005626752506941557, + "logps/rejected": -1.3593151569366455, + "loss": 1.3495, + "nll_loss": 0.3358650207519531, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005626753554679453, + "rewards/margins": 0.13536885380744934, + "rewards/rejected": -0.13593152165412903, + "step": 4881 + }, + { + "epoch": 3.376210235131397, + "grad_norm": 12.881577491760254, + "learning_rate": 3.67988320270478e-05, + "log_odds_chosen": 8.353086471557617, + "log_odds_ratio": -0.213613823056221, + "logits/chosen": -0.46307358145713806, + "logits/rejected": -0.4926253855228424, + "logps/chosen": -0.03921017795801163, + "logps/rejected": -1.454315185546875, + "loss": 1.2457, + "nll_loss": 0.2900546193122864, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00392101751640439, + "rewards/margins": 0.1415105164051056, + "rewards/rejected": -0.1454315334558487, + "step": 4882 + }, + { + "epoch": 3.376901798063624, + "grad_norm": 7.863828182220459, + "learning_rate": 3.6794990010757646e-05, + "log_odds_chosen": 8.951114654541016, + "log_odds_ratio": -0.025473404675722122, + "logits/chosen": -0.45563632249832153, + "logits/rejected": -0.5035054683685303, + "logps/chosen": -0.012901807203888893, + "logps/rejected": -1.6659057140350342, + "loss": 1.5606, + "nll_loss": 0.3876059353351593, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012901807203888893, + "rewards/margins": 0.1653003990650177, + "rewards/rejected": -0.16659057140350342, + "step": 4883 + }, + { + "epoch": 3.3775933609958506, + "grad_norm": 4.709164619445801, + "learning_rate": 3.67911479944675e-05, + "log_odds_chosen": 8.327884674072266, + "log_odds_ratio": -0.0027023141738027334, + "logits/chosen": -0.37330159544944763, + "logits/rejected": -0.4148619771003723, + "logps/chosen": -0.0011940773110836744, + "logps/rejected": -1.006005048751831, + "loss": 1.4531, + "nll_loss": 0.36301249265670776, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001194077412947081, + "rewards/margins": 0.10048110783100128, + "rewards/rejected": -0.10060051828622818, + "step": 4884 + }, + { + "epoch": 3.3782849239280774, + "grad_norm": 16.095605850219727, + "learning_rate": 3.678730597817735e-05, + "log_odds_chosen": 9.359745025634766, + "log_odds_ratio": -0.002318818122148514, + "logits/chosen": -0.1334265023469925, + "logits/rejected": -0.3042029142379761, + "logps/chosen": -0.009565573185682297, + "logps/rejected": -1.9956917762756348, + "loss": 1.9727, + "nll_loss": 0.49293580651283264, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009565572836436331, + "rewards/margins": 0.19861261546611786, + "rewards/rejected": -0.1995691955089569, + "step": 4885 + }, + { + "epoch": 3.3789764868603043, + "grad_norm": 5.004652500152588, + "learning_rate": 3.6783463961887196e-05, + "log_odds_chosen": 8.115644454956055, + "log_odds_ratio": -0.01304934173822403, + "logits/chosen": -0.4006246328353882, + "logits/rejected": -0.467983216047287, + "logps/chosen": -0.018195848912000656, + "logps/rejected": -1.8651471138000488, + "loss": 1.3653, + "nll_loss": 0.34003227949142456, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018195848679170012, + "rewards/margins": 0.18469512462615967, + "rewards/rejected": -0.1865147203207016, + "step": 4886 + }, + { + "epoch": 3.379668049792531, + "grad_norm": 8.98936939239502, + "learning_rate": 3.6779621945597055e-05, + "log_odds_chosen": 9.405214309692383, + "log_odds_ratio": -0.0025051278062164783, + "logits/chosen": -0.5249983072280884, + "logits/rejected": -0.5861697793006897, + "logps/chosen": -0.016129275783896446, + "logps/rejected": -1.8816158771514893, + "loss": 1.2716, + "nll_loss": 0.3176405429840088, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016129277646541595, + "rewards/margins": 0.18654866516590118, + "rewards/rejected": -0.18816159665584564, + "step": 4887 + }, + { + "epoch": 3.380359612724758, + "grad_norm": 17.38018798828125, + "learning_rate": 3.67757799293069e-05, + "log_odds_chosen": 9.830801010131836, + "log_odds_ratio": -0.00029084287234582007, + "logits/chosen": -0.48474544286727905, + "logits/rejected": -0.553375244140625, + "logps/chosen": -0.0007540949736721814, + "logps/rejected": -2.2699027061462402, + "loss": 1.4132, + "nll_loss": 0.3532586395740509, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.54095017327927e-05, + "rewards/margins": 0.22691485285758972, + "rewards/rejected": -0.2269902527332306, + "step": 4888 + }, + { + "epoch": 3.3810511756569848, + "grad_norm": 10.87319564819336, + "learning_rate": 3.6771937913016753e-05, + "log_odds_chosen": 8.066292762756348, + "log_odds_ratio": -0.0014551844215020537, + "logits/chosen": -0.37830474972724915, + "logits/rejected": -0.42029958963394165, + "logps/chosen": -0.023234685882925987, + "logps/rejected": -1.609140396118164, + "loss": 1.9007, + "nll_loss": 0.4750228524208069, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0023234684485942125, + "rewards/margins": 0.15859057009220123, + "rewards/rejected": -0.16091403365135193, + "step": 4889 + }, + { + "epoch": 3.3817427385892116, + "grad_norm": 7.3637309074401855, + "learning_rate": 3.6768095896726606e-05, + "log_odds_chosen": 9.156384468078613, + "log_odds_ratio": -0.00031103662331588566, + "logits/chosen": -0.6396990418434143, + "logits/rejected": -0.6863707304000854, + "logps/chosen": -0.0024397203233093023, + "logps/rejected": -1.5034765005111694, + "loss": 1.6916, + "nll_loss": 0.4228590726852417, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00024397202651016414, + "rewards/margins": 0.15010368824005127, + "rewards/rejected": -0.15034765005111694, + "step": 4890 + }, + { + "epoch": 3.3824343015214384, + "grad_norm": 19.589651107788086, + "learning_rate": 3.676425388043646e-05, + "log_odds_chosen": 11.059402465820312, + "log_odds_ratio": -2.0674237021012232e-05, + "logits/chosen": -0.5618711709976196, + "logits/rejected": -0.6643784046173096, + "logps/chosen": -0.00018295198970008641, + "logps/rejected": -2.2173166275024414, + "loss": 2.0076, + "nll_loss": 0.5019065141677856, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.829519897000864e-05, + "rewards/margins": 0.2217133641242981, + "rewards/rejected": -0.22173166275024414, + "step": 4891 + }, + { + "epoch": 3.3831258644536653, + "grad_norm": 6.292947769165039, + "learning_rate": 3.6760411864146304e-05, + "log_odds_chosen": 9.908514022827148, + "log_odds_ratio": -0.00021772683248855174, + "logits/chosen": -0.38789576292037964, + "logits/rejected": -0.48067206144332886, + "logps/chosen": -0.002419173950329423, + "logps/rejected": -1.8695409297943115, + "loss": 1.1321, + "nll_loss": 0.2830020487308502, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00024191739794332534, + "rewards/margins": 0.18671217560768127, + "rewards/rejected": -0.1869540959596634, + "step": 4892 + }, + { + "epoch": 3.383817427385892, + "grad_norm": 7.79653263092041, + "learning_rate": 3.6756569847856156e-05, + "log_odds_chosen": 8.922075271606445, + "log_odds_ratio": -0.0006197122274897993, + "logits/chosen": -0.6272656321525574, + "logits/rejected": -0.6524254679679871, + "logps/chosen": -0.0009733512415550649, + "logps/rejected": -1.533919095993042, + "loss": 1.1412, + "nll_loss": 0.2852276563644409, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.733513434184715e-05, + "rewards/margins": 0.15329457819461823, + "rewards/rejected": -0.15339191257953644, + "step": 4893 + }, + { + "epoch": 3.384508990318119, + "grad_norm": 8.590242385864258, + "learning_rate": 3.675272783156601e-05, + "log_odds_chosen": 7.531237602233887, + "log_odds_ratio": -0.30172964930534363, + "logits/chosen": -0.11642412841320038, + "logits/rejected": -0.13467153906822205, + "logps/chosen": -0.03982772305607796, + "logps/rejected": -1.277442216873169, + "loss": 1.7269, + "nll_loss": 0.4015564024448395, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003982772585004568, + "rewards/margins": 0.12376146018505096, + "rewards/rejected": -0.12774422764778137, + "step": 4894 + }, + { + "epoch": 3.3852005532503457, + "grad_norm": 14.8712797164917, + "learning_rate": 3.6748885815275855e-05, + "log_odds_chosen": 10.017523765563965, + "log_odds_ratio": -0.003008556319400668, + "logits/chosen": -0.7771372199058533, + "logits/rejected": -0.872925877571106, + "logps/chosen": -0.0047239093109965324, + "logps/rejected": -2.0477962493896484, + "loss": 2.0123, + "nll_loss": 0.5027673244476318, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004723909660242498, + "rewards/margins": 0.20430722832679749, + "rewards/rejected": -0.20477962493896484, + "step": 4895 + }, + { + "epoch": 3.3858921161825726, + "grad_norm": 7.275940418243408, + "learning_rate": 3.6745043798985714e-05, + "log_odds_chosen": 8.62596607208252, + "log_odds_ratio": -0.0013201197143644094, + "logits/chosen": -0.3239147365093231, + "logits/rejected": -0.4286247789859772, + "logps/chosen": -0.00924981851130724, + "logps/rejected": -1.9703105688095093, + "loss": 1.6702, + "nll_loss": 0.41741546988487244, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009249818976968527, + "rewards/margins": 0.19610606133937836, + "rewards/rejected": -0.19703106582164764, + "step": 4896 + }, + { + "epoch": 3.3865836791147994, + "grad_norm": 13.720173835754395, + "learning_rate": 3.674120178269556e-05, + "log_odds_chosen": 9.530720710754395, + "log_odds_ratio": -0.0002733979490585625, + "logits/chosen": -0.6544563174247742, + "logits/rejected": -0.7413820028305054, + "logps/chosen": -0.0007776152924634516, + "logps/rejected": -1.820650339126587, + "loss": 1.4968, + "nll_loss": 0.3741660714149475, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.776152779115364e-05, + "rewards/margins": 0.18198728561401367, + "rewards/rejected": -0.18206505477428436, + "step": 4897 + }, + { + "epoch": 3.3872752420470262, + "grad_norm": 8.170251846313477, + "learning_rate": 3.673735976640541e-05, + "log_odds_chosen": 8.625896453857422, + "log_odds_ratio": -0.023443307727575302, + "logits/chosen": -0.5422019958496094, + "logits/rejected": -0.5712448954582214, + "logps/chosen": -0.006744992453604937, + "logps/rejected": -1.371507167816162, + "loss": 1.9066, + "nll_loss": 0.4743000864982605, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006744992570020258, + "rewards/margins": 0.13647620379924774, + "rewards/rejected": -0.13715071976184845, + "step": 4898 + }, + { + "epoch": 3.387966804979253, + "grad_norm": 5.375954627990723, + "learning_rate": 3.6733517750115264e-05, + "log_odds_chosen": 9.618953704833984, + "log_odds_ratio": -0.0001843458303483203, + "logits/chosen": -0.6380875706672668, + "logits/rejected": -0.701280951499939, + "logps/chosen": -0.018095921725034714, + "logps/rejected": -2.783842086791992, + "loss": 1.483, + "nll_loss": 0.3707388639450073, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018095922423526645, + "rewards/margins": 0.27657461166381836, + "rewards/rejected": -0.27838417887687683, + "step": 4899 + }, + { + "epoch": 3.38865836791148, + "grad_norm": 13.91885757446289, + "learning_rate": 3.672967573382512e-05, + "log_odds_chosen": 10.686237335205078, + "log_odds_ratio": -3.720568201970309e-05, + "logits/chosen": -0.6631019115447998, + "logits/rejected": -0.6991225481033325, + "logps/chosen": -0.0006842121947556734, + "logps/rejected": -2.6362667083740234, + "loss": 1.8289, + "nll_loss": 0.4572334587574005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.842121365480125e-05, + "rewards/margins": 0.2635582685470581, + "rewards/rejected": -0.26362669467926025, + "step": 4900 + }, + { + "epoch": 3.3893499308437067, + "grad_norm": 10.791473388671875, + "learning_rate": 3.672583371753496e-05, + "log_odds_chosen": 8.14004898071289, + "log_odds_ratio": -0.17836718261241913, + "logits/chosen": -0.5453934669494629, + "logits/rejected": -0.643043041229248, + "logps/chosen": -0.02400169149041176, + "logps/rejected": -1.5562868118286133, + "loss": 1.136, + "nll_loss": 0.2661687433719635, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0024001693818718195, + "rewards/margins": 0.1532285213470459, + "rewards/rejected": -0.15562868118286133, + "step": 4901 + }, + { + "epoch": 3.3900414937759336, + "grad_norm": 14.164995193481445, + "learning_rate": 3.6721991701244815e-05, + "log_odds_chosen": 9.055171966552734, + "log_odds_ratio": -0.0006158703472465277, + "logits/chosen": -0.559080183506012, + "logits/rejected": -0.6751291155815125, + "logps/chosen": -0.0006700906669721007, + "logps/rejected": -1.8620007038116455, + "loss": 1.955, + "nll_loss": 0.48869603872299194, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.700906669721007e-05, + "rewards/margins": 0.18613307178020477, + "rewards/rejected": -0.1862000823020935, + "step": 4902 + }, + { + "epoch": 3.3907330567081604, + "grad_norm": 9.520732879638672, + "learning_rate": 3.671814968495467e-05, + "log_odds_chosen": 8.1787691116333, + "log_odds_ratio": -0.030842209234833717, + "logits/chosen": -0.5617755055427551, + "logits/rejected": -0.623063325881958, + "logps/chosen": -0.008788838982582092, + "logps/rejected": -1.0116199254989624, + "loss": 1.6207, + "nll_loss": 0.4020839333534241, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000878883816767484, + "rewards/margins": 0.10028310865163803, + "rewards/rejected": -0.10116199404001236, + "step": 4903 + }, + { + "epoch": 3.391424619640387, + "grad_norm": 11.027510643005371, + "learning_rate": 3.671430766866451e-05, + "log_odds_chosen": 8.988664627075195, + "log_odds_ratio": -0.0020216992124915123, + "logits/chosen": -0.5873269438743591, + "logits/rejected": -0.681334912776947, + "logps/chosen": -0.005092856008559465, + "logps/rejected": -1.7773408889770508, + "loss": 1.6894, + "nll_loss": 0.42215490341186523, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005092856008559465, + "rewards/margins": 0.177224799990654, + "rewards/rejected": -0.1777341067790985, + "step": 4904 + }, + { + "epoch": 3.392116182572614, + "grad_norm": 7.16702127456665, + "learning_rate": 3.6710465652374365e-05, + "log_odds_chosen": 8.271673202514648, + "log_odds_ratio": -0.03412136435508728, + "logits/chosen": -0.6266486048698425, + "logits/rejected": -0.6206366419792175, + "logps/chosen": -0.021237920969724655, + "logps/rejected": -1.6427576541900635, + "loss": 1.4746, + "nll_loss": 0.3652297556400299, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002123792190104723, + "rewards/margins": 0.1621519923210144, + "rewards/rejected": -0.16427578032016754, + "step": 4905 + }, + { + "epoch": 3.392807745504841, + "grad_norm": 10.318666458129883, + "learning_rate": 3.670662363608422e-05, + "log_odds_chosen": 9.693625450134277, + "log_odds_ratio": -0.00013604509877040982, + "logits/chosen": -0.9521337151527405, + "logits/rejected": -1.0116444826126099, + "logps/chosen": -0.0003407415933907032, + "logps/rejected": -1.826693058013916, + "loss": 2.1946, + "nll_loss": 0.54863440990448, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.407416079426184e-05, + "rewards/margins": 0.18263523280620575, + "rewards/rejected": -0.18266932666301727, + "step": 4906 + }, + { + "epoch": 3.3934993084370677, + "grad_norm": 7.364135265350342, + "learning_rate": 3.670278161979407e-05, + "log_odds_chosen": 9.088154792785645, + "log_odds_ratio": -0.0007545155822299421, + "logits/chosen": -0.5334247350692749, + "logits/rejected": -0.5444377064704895, + "logps/chosen": -0.001719134277664125, + "logps/rejected": -1.2499489784240723, + "loss": 1.8055, + "nll_loss": 0.451288104057312, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017191344522871077, + "rewards/margins": 0.12482300400733948, + "rewards/rejected": -0.1249949038028717, + "step": 4907 + }, + { + "epoch": 3.3941908713692945, + "grad_norm": 10.528717041015625, + "learning_rate": 3.6698939603503916e-05, + "log_odds_chosen": 10.229093551635742, + "log_odds_ratio": -7.416566222673282e-05, + "logits/chosen": -1.0112329721450806, + "logits/rejected": -1.0509593486785889, + "logps/chosen": -0.0007192917400971055, + "logps/rejected": -2.0609207153320312, + "loss": 2.2368, + "nll_loss": 0.5591920614242554, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.19291710993275e-05, + "rewards/margins": 0.20602013170719147, + "rewards/rejected": -0.20609205961227417, + "step": 4908 + }, + { + "epoch": 3.3948824343015214, + "grad_norm": 7.505864143371582, + "learning_rate": 3.6695097587213775e-05, + "log_odds_chosen": 8.713752746582031, + "log_odds_ratio": -0.024289878085255623, + "logits/chosen": -0.6097840666770935, + "logits/rejected": -0.5826661586761475, + "logps/chosen": -0.19899815320968628, + "logps/rejected": -1.7063466310501099, + "loss": 1.9188, + "nll_loss": 0.4772747755050659, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.019899815320968628, + "rewards/margins": 0.15073484182357788, + "rewards/rejected": -0.1706346720457077, + "step": 4909 + }, + { + "epoch": 3.395573997233748, + "grad_norm": 7.943901062011719, + "learning_rate": 3.669125557092362e-05, + "log_odds_chosen": 9.464241027832031, + "log_odds_ratio": -0.0004103784740436822, + "logits/chosen": -0.8146414756774902, + "logits/rejected": -0.8581461906433105, + "logps/chosen": -0.001535814255475998, + "logps/rejected": -1.7533268928527832, + "loss": 1.5418, + "nll_loss": 0.3854040801525116, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015358140808530152, + "rewards/margins": 0.17517909407615662, + "rewards/rejected": -0.1753326952457428, + "step": 4910 + }, + { + "epoch": 3.396265560165975, + "grad_norm": 6.460813999176025, + "learning_rate": 3.668741355463347e-05, + "log_odds_chosen": 8.076436996459961, + "log_odds_ratio": -0.053030095994472504, + "logits/chosen": -0.4412263035774231, + "logits/rejected": -0.4743978679180145, + "logps/chosen": -0.019328976050019264, + "logps/rejected": -1.1220247745513916, + "loss": 1.2356, + "nll_loss": 0.3035872280597687, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019328977214172482, + "rewards/margins": 0.11026957631111145, + "rewards/rejected": -0.1122024729847908, + "step": 4911 + }, + { + "epoch": 3.396957123098202, + "grad_norm": 10.06266975402832, + "learning_rate": 3.6683571538343326e-05, + "log_odds_chosen": 9.370516777038574, + "log_odds_ratio": -0.002869781805202365, + "logits/chosen": -0.32210591435432434, + "logits/rejected": -0.4086999297142029, + "logps/chosen": -0.0046303002163767815, + "logps/rejected": -1.9719274044036865, + "loss": 1.2744, + "nll_loss": 0.3183148503303528, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00046303000999614596, + "rewards/margins": 0.19672970473766327, + "rewards/rejected": -0.1971927285194397, + "step": 4912 + }, + { + "epoch": 3.3976486860304287, + "grad_norm": 19.497352600097656, + "learning_rate": 3.667972952205317e-05, + "log_odds_chosen": 11.04720687866211, + "log_odds_ratio": -7.522387750213966e-05, + "logits/chosen": -1.0588092803955078, + "logits/rejected": -1.1463772058486938, + "logps/chosen": -0.0001778826117515564, + "logps/rejected": -2.447268009185791, + "loss": 1.5913, + "nll_loss": 0.39780738949775696, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.778826117515564e-05, + "rewards/margins": 0.24470902979373932, + "rewards/rejected": -0.24472680687904358, + "step": 4913 + }, + { + "epoch": 3.3983402489626555, + "grad_norm": 7.700289726257324, + "learning_rate": 3.6675887505763024e-05, + "log_odds_chosen": 9.542242050170898, + "log_odds_ratio": -0.00026488027651794255, + "logits/chosen": -0.5816613435745239, + "logits/rejected": -0.5703340172767639, + "logps/chosen": -0.0014484458370134234, + "logps/rejected": -1.913750410079956, + "loss": 1.6671, + "nll_loss": 0.41675370931625366, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001448446128051728, + "rewards/margins": 0.19123020768165588, + "rewards/rejected": -0.19137504696846008, + "step": 4914 + }, + { + "epoch": 3.3990318118948823, + "grad_norm": 7.914902687072754, + "learning_rate": 3.6672045489472876e-05, + "log_odds_chosen": 8.947216987609863, + "log_odds_ratio": -0.000677384901791811, + "logits/chosen": -0.49002915620803833, + "logits/rejected": -0.4953732490539551, + "logps/chosen": -0.0006747872103005648, + "logps/rejected": -1.2044634819030762, + "loss": 1.1815, + "nll_loss": 0.29530438780784607, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.747871520929039e-05, + "rewards/margins": 0.12037887424230576, + "rewards/rejected": -0.1204463541507721, + "step": 4915 + }, + { + "epoch": 3.399723374827109, + "grad_norm": 7.413805961608887, + "learning_rate": 3.666820347318273e-05, + "log_odds_chosen": 8.656720161437988, + "log_odds_ratio": -0.014592528343200684, + "logits/chosen": -0.6957269906997681, + "logits/rejected": -0.6722280383110046, + "logps/chosen": -0.007290617562830448, + "logps/rejected": -1.567755937576294, + "loss": 1.222, + "nll_loss": 0.3040284514427185, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007290617795661092, + "rewards/margins": 0.156046524643898, + "rewards/rejected": -0.1567755788564682, + "step": 4916 + }, + { + "epoch": 3.400414937759336, + "grad_norm": 7.193758964538574, + "learning_rate": 3.6664361456892574e-05, + "log_odds_chosen": 8.801361083984375, + "log_odds_ratio": -0.00035949796438217163, + "logits/chosen": -0.5192161202430725, + "logits/rejected": -0.5951474905014038, + "logps/chosen": -0.019580980762839317, + "logps/rejected": -1.7406668663024902, + "loss": 1.5407, + "nll_loss": 0.3851466178894043, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001958098029717803, + "rewards/margins": 0.17210859060287476, + "rewards/rejected": -0.1740666925907135, + "step": 4917 + }, + { + "epoch": 3.401106500691563, + "grad_norm": 8.03855037689209, + "learning_rate": 3.6660519440602434e-05, + "log_odds_chosen": 8.034245491027832, + "log_odds_ratio": -0.03657183051109314, + "logits/chosen": -0.6469733715057373, + "logits/rejected": -0.7322840690612793, + "logps/chosen": -0.06051166355609894, + "logps/rejected": -2.151496648788452, + "loss": 1.4062, + "nll_loss": 0.3478994369506836, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006051166914403439, + "rewards/margins": 0.20909848809242249, + "rewards/rejected": -0.2151496559381485, + "step": 4918 + }, + { + "epoch": 3.4017980636237897, + "grad_norm": 6.80123233795166, + "learning_rate": 3.665667742431228e-05, + "log_odds_chosen": 8.935651779174805, + "log_odds_ratio": -0.0021722454112023115, + "logits/chosen": -0.4752444624900818, + "logits/rejected": -0.5332791805267334, + "logps/chosen": -0.015160152688622475, + "logps/rejected": -1.858079195022583, + "loss": 1.183, + "nll_loss": 0.2955396771430969, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015160152688622475, + "rewards/margins": 0.18429191410541534, + "rewards/rejected": -0.1858079433441162, + "step": 4919 + }, + { + "epoch": 3.4024896265560165, + "grad_norm": 8.983878135681152, + "learning_rate": 3.665283540802213e-05, + "log_odds_chosen": 8.991377830505371, + "log_odds_ratio": -0.0006017258856445551, + "logits/chosen": -0.47858548164367676, + "logits/rejected": -0.4981576204299927, + "logps/chosen": -0.002347870497033, + "logps/rejected": -1.3704099655151367, + "loss": 1.2898, + "nll_loss": 0.322380930185318, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00023478706134483218, + "rewards/margins": 0.13680621981620789, + "rewards/rejected": -0.13704100251197815, + "step": 4920 + }, + { + "epoch": 3.4031811894882433, + "grad_norm": 9.267348289489746, + "learning_rate": 3.6648993391731984e-05, + "log_odds_chosen": 9.750808715820312, + "log_odds_ratio": -0.06280557066202164, + "logits/chosen": -0.7340126037597656, + "logits/rejected": -0.7577941417694092, + "logps/chosen": -0.049555785953998566, + "logps/rejected": -1.7277791500091553, + "loss": 3.0292, + "nll_loss": 0.7510218620300293, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004955578595399857, + "rewards/margins": 0.16782233119010925, + "rewards/rejected": -0.17277792096138, + "step": 4921 + }, + { + "epoch": 3.40387275242047, + "grad_norm": 8.101119041442871, + "learning_rate": 3.664515137544183e-05, + "log_odds_chosen": 10.515046119689941, + "log_odds_ratio": -0.00010872560960706323, + "logits/chosen": -0.6100264191627502, + "logits/rejected": -0.6120408773422241, + "logps/chosen": -0.00031758565455675125, + "logps/rejected": -2.23378324508667, + "loss": 1.607, + "nll_loss": 0.401741623878479, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1758565455675125e-05, + "rewards/margins": 0.22334657609462738, + "rewards/rejected": -0.22337834537029266, + "step": 4922 + }, + { + "epoch": 3.404564315352697, + "grad_norm": 14.783284187316895, + "learning_rate": 3.664130935915168e-05, + "log_odds_chosen": 8.665579795837402, + "log_odds_ratio": -0.05588802322745323, + "logits/chosen": -1.0751800537109375, + "logits/rejected": -1.0729825496673584, + "logps/chosen": -0.023833846673369408, + "logps/rejected": -1.7382769584655762, + "loss": 1.8384, + "nll_loss": 0.4540029764175415, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002383384620770812, + "rewards/margins": 0.17144431173801422, + "rewards/rejected": -0.17382769286632538, + "step": 4923 + }, + { + "epoch": 3.405255878284924, + "grad_norm": 7.320456504821777, + "learning_rate": 3.6637467342861535e-05, + "log_odds_chosen": 8.874919891357422, + "log_odds_ratio": -0.07711444050073624, + "logits/chosen": -0.5605841279029846, + "logits/rejected": -0.6308436989784241, + "logps/chosen": -0.0178984422236681, + "logps/rejected": -1.7320085763931274, + "loss": 1.2618, + "nll_loss": 0.3077423572540283, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017898440128192306, + "rewards/margins": 0.17141102254390717, + "rewards/rejected": -0.17320087552070618, + "step": 4924 + }, + { + "epoch": 3.4059474412171507, + "grad_norm": 13.005224227905273, + "learning_rate": 3.663362532657139e-05, + "log_odds_chosen": 9.509387969970703, + "log_odds_ratio": -0.0002891596523113549, + "logits/chosen": -0.9162966012954712, + "logits/rejected": -0.9428678154945374, + "logps/chosen": -0.0006765555590391159, + "logps/rejected": -1.5674023628234863, + "loss": 1.4824, + "nll_loss": 0.37056732177734375, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.765555735910311e-05, + "rewards/margins": 0.15667259693145752, + "rewards/rejected": -0.1567402482032776, + "step": 4925 + }, + { + "epoch": 3.4066390041493775, + "grad_norm": 7.998945713043213, + "learning_rate": 3.662978331028123e-05, + "log_odds_chosen": 9.285704612731934, + "log_odds_ratio": -0.0003611869178712368, + "logits/chosen": -0.6982068419456482, + "logits/rejected": -0.7881613373756409, + "logps/chosen": -0.005835440009832382, + "logps/rejected": -1.9285674095153809, + "loss": 2.1622, + "nll_loss": 0.5405056476593018, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005835440242663026, + "rewards/margins": 0.19227319955825806, + "rewards/rejected": -0.19285675883293152, + "step": 4926 + }, + { + "epoch": 3.4073305670816043, + "grad_norm": 5.620597839355469, + "learning_rate": 3.662594129399109e-05, + "log_odds_chosen": 8.169038772583008, + "log_odds_ratio": -0.0014991657808423042, + "logits/chosen": -0.5135716199874878, + "logits/rejected": -0.6402672529220581, + "logps/chosen": -0.0455855056643486, + "logps/rejected": -2.884558916091919, + "loss": 1.7886, + "nll_loss": 0.44701242446899414, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004558550659567118, + "rewards/margins": 0.283897340297699, + "rewards/rejected": -0.28845590353012085, + "step": 4927 + }, + { + "epoch": 3.408022130013831, + "grad_norm": 13.760601997375488, + "learning_rate": 3.662209927770094e-05, + "log_odds_chosen": 10.625243186950684, + "log_odds_ratio": -6.922941975062713e-05, + "logits/chosen": -0.3769231140613556, + "logits/rejected": -0.5266758799552917, + "logps/chosen": -0.0001821487967390567, + "logps/rejected": -2.1213905811309814, + "loss": 1.661, + "nll_loss": 0.41524389386177063, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.821487967390567e-05, + "rewards/margins": 0.21212083101272583, + "rewards/rejected": -0.2121390402317047, + "step": 4928 + }, + { + "epoch": 3.408713692946058, + "grad_norm": 10.003327369689941, + "learning_rate": 3.661825726141079e-05, + "log_odds_chosen": 7.59705114364624, + "log_odds_ratio": -0.018656501546502113, + "logits/chosen": -0.515770435333252, + "logits/rejected": -0.6440622806549072, + "logps/chosen": -0.01772279106080532, + "logps/rejected": -1.9347506761550903, + "loss": 2.0547, + "nll_loss": 0.5118147134780884, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017722791526466608, + "rewards/margins": 0.19170278310775757, + "rewards/rejected": -0.19347506761550903, + "step": 4929 + }, + { + "epoch": 3.409405255878285, + "grad_norm": 6.344710350036621, + "learning_rate": 3.661441524512064e-05, + "log_odds_chosen": 8.29608154296875, + "log_odds_ratio": -0.0021369177848100662, + "logits/chosen": -0.5516109466552734, + "logits/rejected": -0.5961017608642578, + "logps/chosen": -0.016617944464087486, + "logps/rejected": -1.9796082973480225, + "loss": 1.2063, + "nll_loss": 0.30136656761169434, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016617946093901992, + "rewards/margins": 0.19629904627799988, + "rewards/rejected": -0.19796085357666016, + "step": 4930 + }, + { + "epoch": 3.4100968188105116, + "grad_norm": 7.435446739196777, + "learning_rate": 3.661057322883049e-05, + "log_odds_chosen": 8.216718673706055, + "log_odds_ratio": -0.0028341393917798996, + "logits/chosen": -0.7698273062705994, + "logits/rejected": -0.7450141906738281, + "logps/chosen": -0.012603234499692917, + "logps/rejected": -1.575285792350769, + "loss": 1.4467, + "nll_loss": 0.361391544342041, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012603236827999353, + "rewards/margins": 0.15626825392246246, + "rewards/rejected": -0.1575285792350769, + "step": 4931 + }, + { + "epoch": 3.4107883817427385, + "grad_norm": 10.248863220214844, + "learning_rate": 3.660673121254034e-05, + "log_odds_chosen": 10.4366455078125, + "log_odds_ratio": -0.00032715650741010904, + "logits/chosen": -0.40946805477142334, + "logits/rejected": -0.49355852603912354, + "logps/chosen": -0.0004456047317944467, + "logps/rejected": -1.8036550283432007, + "loss": 1.2218, + "nll_loss": 0.3054129183292389, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4560470996657386e-05, + "rewards/margins": 0.18032094836235046, + "rewards/rejected": -0.18036550283432007, + "step": 4932 + }, + { + "epoch": 3.4114799446749653, + "grad_norm": 9.72933292388916, + "learning_rate": 3.660288919625019e-05, + "log_odds_chosen": 9.800952911376953, + "log_odds_ratio": -0.0003480328305158764, + "logits/chosen": -0.5476824641227722, + "logits/rejected": -0.6130825877189636, + "logps/chosen": -0.00100328354164958, + "logps/rejected": -2.4654011726379395, + "loss": 1.7633, + "nll_loss": 0.4407961666584015, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010032836871687323, + "rewards/margins": 0.24643978476524353, + "rewards/rejected": -0.2465401142835617, + "step": 4933 + }, + { + "epoch": 3.412171507607192, + "grad_norm": 10.899541854858398, + "learning_rate": 3.6599047179960046e-05, + "log_odds_chosen": 7.08076286315918, + "log_odds_ratio": -0.05577649176120758, + "logits/chosen": -0.4027561843395233, + "logits/rejected": -0.43499526381492615, + "logps/chosen": -0.023526165634393692, + "logps/rejected": -1.6664758920669556, + "loss": 1.477, + "nll_loss": 0.36366114020347595, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0023526165168732405, + "rewards/margins": 0.16429497301578522, + "rewards/rejected": -0.16664758324623108, + "step": 4934 + }, + { + "epoch": 3.412863070539419, + "grad_norm": 12.654091835021973, + "learning_rate": 3.659520516366989e-05, + "log_odds_chosen": 10.54169750213623, + "log_odds_ratio": -5.913171116844751e-05, + "logits/chosen": -0.4336695373058319, + "logits/rejected": -0.5459045767784119, + "logps/chosen": -0.00021590096002910286, + "logps/rejected": -2.1453845500946045, + "loss": 1.4929, + "nll_loss": 0.3732162117958069, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1590094547718763e-05, + "rewards/margins": 0.21451690793037415, + "rewards/rejected": -0.21453848481178284, + "step": 4935 + }, + { + "epoch": 3.413554633471646, + "grad_norm": 10.615986824035645, + "learning_rate": 3.659136314737975e-05, + "log_odds_chosen": 10.210262298583984, + "log_odds_ratio": -0.0001049021229846403, + "logits/chosen": -0.45864614844322205, + "logits/rejected": -0.5385798215866089, + "logps/chosen": -0.006646535359323025, + "logps/rejected": -2.5832815170288086, + "loss": 1.3402, + "nll_loss": 0.3350418210029602, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006646536057814956, + "rewards/margins": 0.2576634883880615, + "rewards/rejected": -0.2583281397819519, + "step": 4936 + }, + { + "epoch": 3.4142461964038726, + "grad_norm": 13.86459732055664, + "learning_rate": 3.6587521131089596e-05, + "log_odds_chosen": 10.201940536499023, + "log_odds_ratio": -0.00038717055576853454, + "logits/chosen": -0.4907549023628235, + "logits/rejected": -0.6592799425125122, + "logps/chosen": -0.0009741213289089501, + "logps/rejected": -2.4662675857543945, + "loss": 1.7158, + "nll_loss": 0.42890262603759766, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.741213580127805e-05, + "rewards/margins": 0.24652934074401855, + "rewards/rejected": -0.24662676453590393, + "step": 4937 + }, + { + "epoch": 3.4149377593360994, + "grad_norm": 12.028914451599121, + "learning_rate": 3.658367911479945e-05, + "log_odds_chosen": 9.5020751953125, + "log_odds_ratio": -0.0001335785782430321, + "logits/chosen": -0.7141174077987671, + "logits/rejected": -0.8637199401855469, + "logps/chosen": -0.0005194094264879823, + "logps/rejected": -1.3897552490234375, + "loss": 1.5818, + "nll_loss": 0.3954324424266815, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.194094046601094e-05, + "rewards/margins": 0.13892358541488647, + "rewards/rejected": -0.13897553086280823, + "step": 4938 + }, + { + "epoch": 3.4156293222683263, + "grad_norm": 7.543969631195068, + "learning_rate": 3.65798370985093e-05, + "log_odds_chosen": 9.680569648742676, + "log_odds_ratio": -0.0002025132707785815, + "logits/chosen": -0.7034863233566284, + "logits/rejected": -0.6991147994995117, + "logps/chosen": -0.006458500865846872, + "logps/rejected": -2.526951313018799, + "loss": 1.3906, + "nll_loss": 0.3476356267929077, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006458500865846872, + "rewards/margins": 0.2520492970943451, + "rewards/rejected": -0.25269514322280884, + "step": 4939 + }, + { + "epoch": 3.416320885200553, + "grad_norm": 13.711503982543945, + "learning_rate": 3.657599508221915e-05, + "log_odds_chosen": 10.692815780639648, + "log_odds_ratio": -0.00013191672042012215, + "logits/chosen": -0.6346333622932434, + "logits/rejected": -0.7078859210014343, + "logps/chosen": -0.00046483371988870203, + "logps/rejected": -2.3791491985321045, + "loss": 1.7774, + "nll_loss": 0.44433537125587463, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6483371988870203e-05, + "rewards/margins": 0.23786845803260803, + "rewards/rejected": -0.23791491985321045, + "step": 4940 + }, + { + "epoch": 3.41701244813278, + "grad_norm": 9.96644401550293, + "learning_rate": 3.6572153065929e-05, + "log_odds_chosen": 8.98678207397461, + "log_odds_ratio": -0.0009053864632733166, + "logits/chosen": -0.7697640061378479, + "logits/rejected": -0.8465602397918701, + "logps/chosen": -0.004368194378912449, + "logps/rejected": -1.7595570087432861, + "loss": 1.4312, + "nll_loss": 0.3577001988887787, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000436819507740438, + "rewards/margins": 0.17551888525485992, + "rewards/rejected": -0.17595569789409637, + "step": 4941 + }, + { + "epoch": 3.4177040110650068, + "grad_norm": 16.0850830078125, + "learning_rate": 3.656831104963885e-05, + "log_odds_chosen": 9.48902702331543, + "log_odds_ratio": -0.17636118829250336, + "logits/chosen": -0.7930417060852051, + "logits/rejected": -0.8710612654685974, + "logps/chosen": -0.020490722730755806, + "logps/rejected": -2.0081634521484375, + "loss": 1.847, + "nll_loss": 0.4441096782684326, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0020490719471126795, + "rewards/margins": 0.1987672746181488, + "rewards/rejected": -0.2008163332939148, + "step": 4942 + }, + { + "epoch": 3.4183955739972336, + "grad_norm": 17.637163162231445, + "learning_rate": 3.6564469033348704e-05, + "log_odds_chosen": 8.011113166809082, + "log_odds_ratio": -0.011292091570794582, + "logits/chosen": -0.32651209831237793, + "logits/rejected": -0.34382864832878113, + "logps/chosen": -0.013978070579469204, + "logps/rejected": -1.6209546327590942, + "loss": 1.6851, + "nll_loss": 0.4201478660106659, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001397807034663856, + "rewards/margins": 0.16069765388965607, + "rewards/rejected": -0.16209547221660614, + "step": 4943 + }, + { + "epoch": 3.4190871369294604, + "grad_norm": 8.398159980773926, + "learning_rate": 3.656062701705855e-05, + "log_odds_chosen": 9.178977012634277, + "log_odds_ratio": -0.03616366535425186, + "logits/chosen": -0.5493403077125549, + "logits/rejected": -0.5453891754150391, + "logps/chosen": -0.061218440532684326, + "logps/rejected": -1.525048017501831, + "loss": 1.8663, + "nll_loss": 0.46294963359832764, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006121844053268433, + "rewards/margins": 0.14638295769691467, + "rewards/rejected": -0.1525048017501831, + "step": 4944 + }, + { + "epoch": 3.4197786998616873, + "grad_norm": 10.590446472167969, + "learning_rate": 3.655678500076841e-05, + "log_odds_chosen": 9.760478973388672, + "log_odds_ratio": -0.0004351499956101179, + "logits/chosen": -1.0720016956329346, + "logits/rejected": -1.1331433057785034, + "logps/chosen": -0.004765262361615896, + "logps/rejected": -2.271724224090576, + "loss": 1.7589, + "nll_loss": 0.43967100977897644, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004765262419823557, + "rewards/margins": 0.22669591009616852, + "rewards/rejected": -0.22717243432998657, + "step": 4945 + }, + { + "epoch": 3.420470262793914, + "grad_norm": 7.860476493835449, + "learning_rate": 3.6552942984478255e-05, + "log_odds_chosen": 10.309771537780762, + "log_odds_ratio": -9.26225766306743e-05, + "logits/chosen": -0.49840879440307617, + "logits/rejected": -0.6406351923942566, + "logps/chosen": -0.0018750398885458708, + "logps/rejected": -2.5959949493408203, + "loss": 1.2851, + "nll_loss": 0.32125723361968994, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018750397430267185, + "rewards/margins": 0.25941202044487, + "rewards/rejected": -0.259599506855011, + "step": 4946 + }, + { + "epoch": 3.421161825726141, + "grad_norm": 9.410032272338867, + "learning_rate": 3.654910096818811e-05, + "log_odds_chosen": 9.610300064086914, + "log_odds_ratio": -0.0013579919468611479, + "logits/chosen": -0.7040968537330627, + "logits/rejected": -0.7856951951980591, + "logps/chosen": -0.010495437309145927, + "logps/rejected": -2.737610340118408, + "loss": 2.2543, + "nll_loss": 0.5634455680847168, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010495438473299146, + "rewards/margins": 0.27271148562431335, + "rewards/rejected": -0.2737610340118408, + "step": 4947 + }, + { + "epoch": 3.4218533886583677, + "grad_norm": 8.460103034973145, + "learning_rate": 3.654525895189796e-05, + "log_odds_chosen": 8.626173973083496, + "log_odds_ratio": -0.01648002117872238, + "logits/chosen": -0.5038471221923828, + "logits/rejected": -0.48866933584213257, + "logps/chosen": -0.05864041671156883, + "logps/rejected": -2.3046083450317383, + "loss": 1.3673, + "nll_loss": 0.3401760458946228, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005864041391760111, + "rewards/margins": 0.2245967835187912, + "rewards/rejected": -0.23046083748340607, + "step": 4948 + }, + { + "epoch": 3.4225449515905946, + "grad_norm": 12.7888765335083, + "learning_rate": 3.6541416935607805e-05, + "log_odds_chosen": 9.462170600891113, + "log_odds_ratio": -0.012112999334931374, + "logits/chosen": -0.8550928831100464, + "logits/rejected": -0.8987554311752319, + "logps/chosen": -0.011790499091148376, + "logps/rejected": -2.268587112426758, + "loss": 1.8554, + "nll_loss": 0.46263372898101807, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011790499556809664, + "rewards/margins": 0.2256796658039093, + "rewards/rejected": -0.2268587350845337, + "step": 4949 + }, + { + "epoch": 3.4232365145228214, + "grad_norm": 11.547723770141602, + "learning_rate": 3.653757491931766e-05, + "log_odds_chosen": 10.40541934967041, + "log_odds_ratio": -0.0001380514440825209, + "logits/chosen": -0.3020840585231781, + "logits/rejected": -0.3681895136833191, + "logps/chosen": -0.0003071234095841646, + "logps/rejected": -1.6997017860412598, + "loss": 1.57, + "nll_loss": 0.39249423146247864, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.071234095841646e-05, + "rewards/margins": 0.16993945837020874, + "rewards/rejected": -0.16997016966342926, + "step": 4950 + }, + { + "epoch": 3.4239280774550482, + "grad_norm": 4.667510509490967, + "learning_rate": 3.653373290302751e-05, + "log_odds_chosen": 10.270744323730469, + "log_odds_ratio": -0.00021994294365867972, + "logits/chosen": -0.597452700138092, + "logits/rejected": -0.6206096410751343, + "logps/chosen": -0.010234748013317585, + "logps/rejected": -2.91064715385437, + "loss": 1.2375, + "nll_loss": 0.3093594014644623, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010234748478978872, + "rewards/margins": 0.2900412678718567, + "rewards/rejected": -0.2910647392272949, + "step": 4951 + }, + { + "epoch": 3.424619640387275, + "grad_norm": 5.313857555389404, + "learning_rate": 3.652989088673736e-05, + "log_odds_chosen": 9.118802070617676, + "log_odds_ratio": -0.0009384253062307835, + "logits/chosen": -0.5868411064147949, + "logits/rejected": -0.6619110703468323, + "logps/chosen": -0.0023102620616555214, + "logps/rejected": -1.7506974935531616, + "loss": 1.526, + "nll_loss": 0.3814122676849365, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002310262352693826, + "rewards/margins": 0.17483872175216675, + "rewards/rejected": -0.17506974935531616, + "step": 4952 + }, + { + "epoch": 3.425311203319502, + "grad_norm": 6.661731719970703, + "learning_rate": 3.652604887044721e-05, + "log_odds_chosen": 9.3414945602417, + "log_odds_ratio": -0.029213862493634224, + "logits/chosen": -0.67668217420578, + "logits/rejected": -0.7876390218734741, + "logps/chosen": -0.007439591456204653, + "logps/rejected": -1.9399693012237549, + "loss": 1.4387, + "nll_loss": 0.3567417562007904, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007439591572619975, + "rewards/margins": 0.19325298070907593, + "rewards/rejected": -0.19399693608283997, + "step": 4953 + }, + { + "epoch": 3.4260027662517287, + "grad_norm": 9.601874351501465, + "learning_rate": 3.652220685415707e-05, + "log_odds_chosen": 8.631904602050781, + "log_odds_ratio": -0.012579535134136677, + "logits/chosen": -0.97607421875, + "logits/rejected": -0.9456802606582642, + "logps/chosen": -0.027140891179442406, + "logps/rejected": -1.6035633087158203, + "loss": 1.3115, + "nll_loss": 0.32662340998649597, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002714089583605528, + "rewards/margins": 0.15764223039150238, + "rewards/rejected": -0.1603563129901886, + "step": 4954 + }, + { + "epoch": 3.4266943291839556, + "grad_norm": 14.586040496826172, + "learning_rate": 3.651836483786691e-05, + "log_odds_chosen": 7.647185325622559, + "log_odds_ratio": -0.16055667400360107, + "logits/chosen": -0.6028072834014893, + "logits/rejected": -0.6363714933395386, + "logps/chosen": -0.031981196254491806, + "logps/rejected": -1.623141884803772, + "loss": 2.2166, + "nll_loss": 0.5380910038948059, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0031981198117136955, + "rewards/margins": 0.15911605954170227, + "rewards/rejected": -0.16231419146060944, + "step": 4955 + }, + { + "epoch": 3.4273858921161824, + "grad_norm": 12.094792366027832, + "learning_rate": 3.6514522821576766e-05, + "log_odds_chosen": 8.334588050842285, + "log_odds_ratio": -0.038848213851451874, + "logits/chosen": -0.3661819398403168, + "logits/rejected": -0.4191260039806366, + "logps/chosen": -0.01614089496433735, + "logps/rejected": -1.3177118301391602, + "loss": 0.9914, + "nll_loss": 0.24396342039108276, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016140895895659924, + "rewards/margins": 0.13015709817409515, + "rewards/rejected": -0.13177119195461273, + "step": 4956 + }, + { + "epoch": 3.428077455048409, + "grad_norm": 11.956432342529297, + "learning_rate": 3.651068080528662e-05, + "log_odds_chosen": 9.768950462341309, + "log_odds_ratio": -0.00014484582061413676, + "logits/chosen": -0.4603797197341919, + "logits/rejected": -0.589290976524353, + "logps/chosen": -0.0023116571828722954, + "logps/rejected": -2.05747389793396, + "loss": 1.0762, + "nll_loss": 0.2690298855304718, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00023116568627301604, + "rewards/margins": 0.20551621913909912, + "rewards/rejected": -0.20574738085269928, + "step": 4957 + }, + { + "epoch": 3.428769017980636, + "grad_norm": 12.626391410827637, + "learning_rate": 3.6506838788996464e-05, + "log_odds_chosen": 8.608522415161133, + "log_odds_ratio": -0.3388509154319763, + "logits/chosen": -0.6620832085609436, + "logits/rejected": -0.6999070644378662, + "logps/chosen": -0.04924309626221657, + "logps/rejected": -1.7659050226211548, + "loss": 1.7579, + "nll_loss": 0.40559306740760803, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004924309439957142, + "rewards/margins": 0.1716661900281906, + "rewards/rejected": -0.17659050226211548, + "step": 4958 + }, + { + "epoch": 3.429460580912863, + "grad_norm": 10.061079025268555, + "learning_rate": 3.6502996772706316e-05, + "log_odds_chosen": 9.71721076965332, + "log_odds_ratio": -0.0002621083986014128, + "logits/chosen": -0.60477215051651, + "logits/rejected": -0.7572970390319824, + "logps/chosen": -0.0005827401182614267, + "logps/rejected": -1.649122953414917, + "loss": 1.0116, + "nll_loss": 0.25288063287734985, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.827401764690876e-05, + "rewards/margins": 0.164854034781456, + "rewards/rejected": -0.16491231322288513, + "step": 4959 + }, + { + "epoch": 3.43015214384509, + "grad_norm": 5.07777214050293, + "learning_rate": 3.649915475641617e-05, + "log_odds_chosen": 9.303333282470703, + "log_odds_ratio": -0.0015425317687913775, + "logits/chosen": -0.3931879699230194, + "logits/rejected": -0.3826301097869873, + "logps/chosen": -0.014467225410044193, + "logps/rejected": -2.122796058654785, + "loss": 1.1946, + "nll_loss": 0.2985040545463562, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014467225410044193, + "rewards/margins": 0.21083290874958038, + "rewards/rejected": -0.21227963268756866, + "step": 4960 + }, + { + "epoch": 3.430843706777317, + "grad_norm": 7.497944355010986, + "learning_rate": 3.649531274012602e-05, + "log_odds_chosen": 6.6718292236328125, + "log_odds_ratio": -0.1012139543890953, + "logits/chosen": -0.4591187536716461, + "logits/rejected": -0.3664749562740326, + "logps/chosen": -0.0395648330450058, + "logps/rejected": -1.074185848236084, + "loss": 1.6025, + "nll_loss": 0.39051151275634766, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003956483211368322, + "rewards/margins": 0.1034621000289917, + "rewards/rejected": -0.10741858184337616, + "step": 4961 + }, + { + "epoch": 3.431535269709544, + "grad_norm": 9.274129867553711, + "learning_rate": 3.649147072383587e-05, + "log_odds_chosen": 9.80703353881836, + "log_odds_ratio": -0.0002453567576594651, + "logits/chosen": -0.8744444847106934, + "logits/rejected": -0.8338868618011475, + "logps/chosen": -0.0007531539304181933, + "logps/rejected": -1.9264980554580688, + "loss": 1.8758, + "nll_loss": 0.4689198136329651, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.531539449701086e-05, + "rewards/margins": 0.19257448613643646, + "rewards/rejected": -0.19264981150627136, + "step": 4962 + }, + { + "epoch": 3.4322268326417706, + "grad_norm": 12.359687805175781, + "learning_rate": 3.6487628707545726e-05, + "log_odds_chosen": 9.083701133728027, + "log_odds_ratio": -0.0003218199999537319, + "logits/chosen": -0.4226207137107849, + "logits/rejected": -0.5028167366981506, + "logps/chosen": -0.0009388489997945726, + "logps/rejected": -1.8529709577560425, + "loss": 1.4797, + "nll_loss": 0.3699025511741638, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.388489706907421e-05, + "rewards/margins": 0.1852032095193863, + "rewards/rejected": -0.18529711663722992, + "step": 4963 + }, + { + "epoch": 3.4329183955739975, + "grad_norm": 14.259088516235352, + "learning_rate": 3.648378669125557e-05, + "log_odds_chosen": 8.883432388305664, + "log_odds_ratio": -0.18358714878559113, + "logits/chosen": -0.47711271047592163, + "logits/rejected": -0.5507173538208008, + "logps/chosen": -0.04038437455892563, + "logps/rejected": -1.7237071990966797, + "loss": 1.9157, + "nll_loss": 0.4605787396430969, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004038437269628048, + "rewards/margins": 0.1683322787284851, + "rewards/rejected": -0.17237071692943573, + "step": 4964 + }, + { + "epoch": 3.4336099585062243, + "grad_norm": 9.234458923339844, + "learning_rate": 3.6479944674965424e-05, + "log_odds_chosen": 9.029831886291504, + "log_odds_ratio": -0.004684413317590952, + "logits/chosen": -0.6124022603034973, + "logits/rejected": -0.7258630990982056, + "logps/chosen": -0.0036762619856745005, + "logps/rejected": -1.8446922302246094, + "loss": 1.7796, + "nll_loss": 0.44442451000213623, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003676262276712805, + "rewards/margins": 0.18410161137580872, + "rewards/rejected": -0.18446923792362213, + "step": 4965 + }, + { + "epoch": 3.434301521438451, + "grad_norm": 8.6228609085083, + "learning_rate": 3.6476102658675276e-05, + "log_odds_chosen": 8.592721939086914, + "log_odds_ratio": -0.05192786827683449, + "logits/chosen": -0.7884331345558167, + "logits/rejected": -0.7422505617141724, + "logps/chosen": -0.019336678087711334, + "logps/rejected": -1.6305170059204102, + "loss": 2.0833, + "nll_loss": 0.5156409740447998, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019336676923558116, + "rewards/margins": 0.1611180305480957, + "rewards/rejected": -0.16305169463157654, + "step": 4966 + }, + { + "epoch": 3.434993084370678, + "grad_norm": 7.429285049438477, + "learning_rate": 3.647226064238512e-05, + "log_odds_chosen": 10.410229682922363, + "log_odds_ratio": -6.975359428906813e-05, + "logits/chosen": -0.31530484557151794, + "logits/rejected": -0.38272637128829956, + "logps/chosen": -0.00015621320926584303, + "logps/rejected": -1.8115465641021729, + "loss": 0.9233, + "nll_loss": 0.23082692921161652, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5621320926584303e-05, + "rewards/margins": 0.1811390519142151, + "rewards/rejected": -0.18115466833114624, + "step": 4967 + }, + { + "epoch": 3.435684647302905, + "grad_norm": 16.53629493713379, + "learning_rate": 3.6468418626094975e-05, + "log_odds_chosen": 11.616415023803711, + "log_odds_ratio": -1.644290932745207e-05, + "logits/chosen": -0.6845104694366455, + "logits/rejected": -0.8092417120933533, + "logps/chosen": -0.0001430445263395086, + "logps/rejected": -2.61985445022583, + "loss": 2.3828, + "nll_loss": 0.5956913828849792, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.430445263395086e-05, + "rewards/margins": 0.2619711458683014, + "rewards/rejected": -0.2619854509830475, + "step": 4968 + }, + { + "epoch": 3.4363762102351316, + "grad_norm": 13.607427597045898, + "learning_rate": 3.646457660980483e-05, + "log_odds_chosen": 10.069181442260742, + "log_odds_ratio": -7.897378964116797e-05, + "logits/chosen": -0.7286061644554138, + "logits/rejected": -0.8447732329368591, + "logps/chosen": -0.0006242129602469504, + "logps/rejected": -2.1223394870758057, + "loss": 1.6063, + "nll_loss": 0.4015622138977051, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.242129165912047e-05, + "rewards/margins": 0.2121715545654297, + "rewards/rejected": -0.21223396062850952, + "step": 4969 + }, + { + "epoch": 3.4370677731673585, + "grad_norm": 6.911189556121826, + "learning_rate": 3.646073459351468e-05, + "log_odds_chosen": 10.088189125061035, + "log_odds_ratio": -0.00012798573879990727, + "logits/chosen": -0.6314866542816162, + "logits/rejected": -0.6669266223907471, + "logps/chosen": -0.004457239992916584, + "logps/rejected": -2.2552828788757324, + "loss": 1.7214, + "nll_loss": 0.43033266067504883, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00044572402839548886, + "rewards/margins": 0.22508256137371063, + "rewards/rejected": -0.225528284907341, + "step": 4970 + }, + { + "epoch": 3.4377593360995853, + "grad_norm": 8.085153579711914, + "learning_rate": 3.6456892577224525e-05, + "log_odds_chosen": 8.362859725952148, + "log_odds_ratio": -0.0029876772314310074, + "logits/chosen": -0.6107293367385864, + "logits/rejected": -0.6765274405479431, + "logps/chosen": -0.005427872762084007, + "logps/rejected": -1.735783576965332, + "loss": 1.2286, + "nll_loss": 0.30684149265289307, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005427872529253364, + "rewards/margins": 0.17303556203842163, + "rewards/rejected": -0.17357836663722992, + "step": 4971 + }, + { + "epoch": 3.438450899031812, + "grad_norm": 14.384394645690918, + "learning_rate": 3.6453050560934384e-05, + "log_odds_chosen": 9.993062019348145, + "log_odds_ratio": -0.0009481186280027032, + "logits/chosen": -0.519564151763916, + "logits/rejected": -0.5848604440689087, + "logps/chosen": -0.027457591146230698, + "logps/rejected": -1.879459023475647, + "loss": 2.0429, + "nll_loss": 0.5106297731399536, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0027457589749246836, + "rewards/margins": 0.18520015478134155, + "rewards/rejected": -0.18794593214988708, + "step": 4972 + }, + { + "epoch": 3.439142461964039, + "grad_norm": 9.224777221679688, + "learning_rate": 3.644920854464423e-05, + "log_odds_chosen": 8.9891939163208, + "log_odds_ratio": -0.0007203746354207397, + "logits/chosen": -0.49874958395957947, + "logits/rejected": -0.45370471477508545, + "logps/chosen": -0.016186760738492012, + "logps/rejected": -2.4834234714508057, + "loss": 1.4326, + "nll_loss": 0.35807323455810547, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001618676004000008, + "rewards/margins": 0.24672365188598633, + "rewards/rejected": -0.2483423352241516, + "step": 4973 + }, + { + "epoch": 3.4398340248962658, + "grad_norm": 11.134475708007812, + "learning_rate": 3.644536652835408e-05, + "log_odds_chosen": 9.265724182128906, + "log_odds_ratio": -0.00047064805403351784, + "logits/chosen": -0.7496140003204346, + "logits/rejected": -0.7866104245185852, + "logps/chosen": -0.0027650538831949234, + "logps/rejected": -2.056285858154297, + "loss": 1.5796, + "nll_loss": 0.39485177397727966, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00027650539414025843, + "rewards/margins": 0.2053520828485489, + "rewards/rejected": -0.20562858879566193, + "step": 4974 + }, + { + "epoch": 3.4405255878284926, + "grad_norm": 8.373491287231445, + "learning_rate": 3.6441524512063935e-05, + "log_odds_chosen": 8.949793815612793, + "log_odds_ratio": -0.0005880310200154781, + "logits/chosen": -0.42748188972473145, + "logits/rejected": -0.49028706550598145, + "logps/chosen": -0.001193431206047535, + "logps/rejected": -1.608788251876831, + "loss": 1.1484, + "nll_loss": 0.28704604506492615, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011934312351513654, + "rewards/margins": 0.16075949370861053, + "rewards/rejected": -0.16087885200977325, + "step": 4975 + }, + { + "epoch": 3.4412171507607194, + "grad_norm": 9.026708602905273, + "learning_rate": 3.643768249577378e-05, + "log_odds_chosen": 10.085662841796875, + "log_odds_ratio": -0.0005494834040291607, + "logits/chosen": -0.7614070773124695, + "logits/rejected": -0.7914379835128784, + "logps/chosen": -0.02739373780786991, + "logps/rejected": -2.0095713138580322, + "loss": 1.431, + "nll_loss": 0.35769060254096985, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002739373594522476, + "rewards/margins": 0.19821777939796448, + "rewards/rejected": -0.20095713436603546, + "step": 4976 + }, + { + "epoch": 3.4419087136929463, + "grad_norm": 7.560689449310303, + "learning_rate": 3.643384047948363e-05, + "log_odds_chosen": 8.658706665039062, + "log_odds_ratio": -0.12949354946613312, + "logits/chosen": -0.7013901472091675, + "logits/rejected": -0.7911181449890137, + "logps/chosen": -0.019102217629551888, + "logps/rejected": -1.1386125087738037, + "loss": 1.9893, + "nll_loss": 0.48437100648880005, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0019102217629551888, + "rewards/margins": 0.11195103079080582, + "rewards/rejected": -0.11386125534772873, + "step": 4977 + }, + { + "epoch": 3.442600276625173, + "grad_norm": 12.517111778259277, + "learning_rate": 3.6429998463193485e-05, + "log_odds_chosen": 8.388811111450195, + "log_odds_ratio": -0.001296606264077127, + "logits/chosen": -0.48516207933425903, + "logits/rejected": -0.5658431053161621, + "logps/chosen": -0.002016248879954219, + "logps/rejected": -1.7087645530700684, + "loss": 1.8797, + "nll_loss": 0.4697989821434021, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002016248763538897, + "rewards/margins": 0.17067483067512512, + "rewards/rejected": -0.17087645828723907, + "step": 4978 + }, + { + "epoch": 3.4432918395574, + "grad_norm": 5.525219917297363, + "learning_rate": 3.642615644690334e-05, + "log_odds_chosen": 8.90900707244873, + "log_odds_ratio": -0.005349991377443075, + "logits/chosen": -0.8758846521377563, + "logits/rejected": -0.8599585294723511, + "logps/chosen": -0.0006094533018767834, + "logps/rejected": -1.6298961639404297, + "loss": 1.3454, + "nll_loss": 0.3358096480369568, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0945334553252906e-05, + "rewards/margins": 0.16292867064476013, + "rewards/rejected": -0.16298961639404297, + "step": 4979 + }, + { + "epoch": 3.4439834024896268, + "grad_norm": 11.920831680297852, + "learning_rate": 3.6422314430613184e-05, + "log_odds_chosen": 7.914310455322266, + "log_odds_ratio": -0.11740975826978683, + "logits/chosen": -0.8697179555892944, + "logits/rejected": -0.9304721355438232, + "logps/chosen": -0.01838577538728714, + "logps/rejected": -1.3346633911132812, + "loss": 1.9385, + "nll_loss": 0.4728940725326538, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0018385774455964565, + "rewards/margins": 0.13162776827812195, + "rewards/rejected": -0.13346634805202484, + "step": 4980 + }, + { + "epoch": 3.4446749654218536, + "grad_norm": 15.954282760620117, + "learning_rate": 3.641847241432304e-05, + "log_odds_chosen": 10.502399444580078, + "log_odds_ratio": -5.634710396407172e-05, + "logits/chosen": -0.8933683633804321, + "logits/rejected": -0.8928529620170593, + "logps/chosen": -0.00024164578644558787, + "logps/rejected": -1.8561794757843018, + "loss": 2.2232, + "nll_loss": 0.5558005571365356, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.416457937215455e-05, + "rewards/margins": 0.18559378385543823, + "rewards/rejected": -0.18561795353889465, + "step": 4981 + }, + { + "epoch": 3.4453665283540804, + "grad_norm": 9.633877754211426, + "learning_rate": 3.641463039803289e-05, + "log_odds_chosen": 10.315112113952637, + "log_odds_ratio": -0.00015177467139437795, + "logits/chosen": -0.7014366984367371, + "logits/rejected": -0.7228372097015381, + "logps/chosen": -0.0009540664032101631, + "logps/rejected": -2.119901180267334, + "loss": 1.6241, + "nll_loss": 0.40599820017814636, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.540664177620783e-05, + "rewards/margins": 0.21189472079277039, + "rewards/rejected": -0.2119901180267334, + "step": 4982 + }, + { + "epoch": 3.4460580912863072, + "grad_norm": 5.039992809295654, + "learning_rate": 3.641078838174274e-05, + "log_odds_chosen": 9.058915138244629, + "log_odds_ratio": -0.0014726583613082767, + "logits/chosen": -0.7530060410499573, + "logits/rejected": -0.8648630380630493, + "logps/chosen": -0.0016102747758850455, + "logps/rejected": -1.217201828956604, + "loss": 1.2089, + "nll_loss": 0.3020736277103424, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016102749214041978, + "rewards/margins": 0.12155915796756744, + "rewards/rejected": -0.12172017991542816, + "step": 4983 + }, + { + "epoch": 3.446749654218534, + "grad_norm": 9.161917686462402, + "learning_rate": 3.640694636545259e-05, + "log_odds_chosen": 10.83303165435791, + "log_odds_ratio": -3.968351666117087e-05, + "logits/chosen": -0.6702843904495239, + "logits/rejected": -0.701372504234314, + "logps/chosen": -0.0001426434755558148, + "logps/rejected": -2.0338659286499023, + "loss": 1.288, + "nll_loss": 0.32198911905288696, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4264348465076182e-05, + "rewards/margins": 0.20337235927581787, + "rewards/rejected": -0.2033866047859192, + "step": 4984 + }, + { + "epoch": 3.447441217150761, + "grad_norm": 10.88602066040039, + "learning_rate": 3.6403104349162446e-05, + "log_odds_chosen": 7.93879508972168, + "log_odds_ratio": -0.0036302765365689993, + "logits/chosen": -0.8644218444824219, + "logits/rejected": -0.9524936676025391, + "logps/chosen": -0.00544738257303834, + "logps/rejected": -1.3466427326202393, + "loss": 1.7153, + "nll_loss": 0.4284606873989105, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000544738257303834, + "rewards/margins": 0.13411954045295715, + "rewards/rejected": -0.13466428220272064, + "step": 4985 + }, + { + "epoch": 3.4481327800829877, + "grad_norm": 15.391132354736328, + "learning_rate": 3.639926233287229e-05, + "log_odds_chosen": 8.355426788330078, + "log_odds_ratio": -0.025042179971933365, + "logits/chosen": -0.7084068059921265, + "logits/rejected": -0.7590049505233765, + "logps/chosen": -0.007605067919939756, + "logps/rejected": -1.9548802375793457, + "loss": 1.5422, + "nll_loss": 0.38303864002227783, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007605067803524435, + "rewards/margins": 0.19472752511501312, + "rewards/rejected": -0.19548803567886353, + "step": 4986 + }, + { + "epoch": 3.4488243430152146, + "grad_norm": 6.186641693115234, + "learning_rate": 3.6395420316582144e-05, + "log_odds_chosen": 8.858957290649414, + "log_odds_ratio": -0.0007753237732686102, + "logits/chosen": -0.4936913847923279, + "logits/rejected": -0.5381306409835815, + "logps/chosen": -0.000559426611289382, + "logps/rejected": -1.241579294204712, + "loss": 1.7351, + "nll_loss": 0.43369585275650024, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5942658946150914e-05, + "rewards/margins": 0.12410198152065277, + "rewards/rejected": -0.12415792047977448, + "step": 4987 + }, + { + "epoch": 3.4495159059474414, + "grad_norm": 6.701933860778809, + "learning_rate": 3.6391578300291996e-05, + "log_odds_chosen": 8.373390197753906, + "log_odds_ratio": -0.010810820385813713, + "logits/chosen": -0.554107129573822, + "logits/rejected": -0.5754260420799255, + "logps/chosen": -0.08616117388010025, + "logps/rejected": -1.98459792137146, + "loss": 1.7258, + "nll_loss": 0.4303753972053528, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008616117760539055, + "rewards/margins": 0.18984368443489075, + "rewards/rejected": -0.19845978915691376, + "step": 4988 + }, + { + "epoch": 3.4502074688796682, + "grad_norm": 9.205174446105957, + "learning_rate": 3.638773628400184e-05, + "log_odds_chosen": 9.567306518554688, + "log_odds_ratio": -0.00013541642692871392, + "logits/chosen": -1.1078649759292603, + "logits/rejected": -1.0323755741119385, + "logps/chosen": -0.0006693107425235212, + "logps/rejected": -1.786118507385254, + "loss": 2.0256, + "nll_loss": 0.5063755512237549, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.693107570754364e-05, + "rewards/margins": 0.17854492366313934, + "rewards/rejected": -0.1786118596792221, + "step": 4989 + }, + { + "epoch": 3.450899031811895, + "grad_norm": 8.76229190826416, + "learning_rate": 3.63838942677117e-05, + "log_odds_chosen": 9.120561599731445, + "log_odds_ratio": -0.0009699111105874181, + "logits/chosen": -0.98872971534729, + "logits/rejected": -1.0143227577209473, + "logps/chosen": -0.010068300180137157, + "logps/rejected": -1.867543339729309, + "loss": 1.7556, + "nll_loss": 0.43879449367523193, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001006829901598394, + "rewards/margins": 0.18574751913547516, + "rewards/rejected": -0.18675434589385986, + "step": 4990 + }, + { + "epoch": 3.451590594744122, + "grad_norm": 8.950358390808105, + "learning_rate": 3.638005225142155e-05, + "log_odds_chosen": 7.5963826179504395, + "log_odds_ratio": -0.04105527698993683, + "logits/chosen": -0.9477307796478271, + "logits/rejected": -1.041830062866211, + "logps/chosen": -0.053072813898324966, + "logps/rejected": -1.931291937828064, + "loss": 1.6146, + "nll_loss": 0.3995518088340759, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005307281389832497, + "rewards/margins": 0.1878219097852707, + "rewards/rejected": -0.19312918186187744, + "step": 4991 + }, + { + "epoch": 3.4522821576763487, + "grad_norm": 4.866816997528076, + "learning_rate": 3.63762102351314e-05, + "log_odds_chosen": 9.050535202026367, + "log_odds_ratio": -0.0018034178065136075, + "logits/chosen": -1.0131746530532837, + "logits/rejected": -0.9534136056900024, + "logps/chosen": -0.0012092224787920713, + "logps/rejected": -1.3308520317077637, + "loss": 1.4361, + "nll_loss": 0.3588336110115051, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012092224642401561, + "rewards/margins": 0.13296428322792053, + "rewards/rejected": -0.1330852061510086, + "step": 4992 + }, + { + "epoch": 3.4529737206085755, + "grad_norm": 15.728497505187988, + "learning_rate": 3.637236821884125e-05, + "log_odds_chosen": 10.239688873291016, + "log_odds_ratio": -0.00016703848086763173, + "logits/chosen": -0.6586402654647827, + "logits/rejected": -0.7343249320983887, + "logps/chosen": -0.0003682894166558981, + "logps/rejected": -2.2000532150268555, + "loss": 1.8151, + "nll_loss": 0.4537507891654968, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6828940210398287e-05, + "rewards/margins": 0.2199684977531433, + "rewards/rejected": -0.2200053334236145, + "step": 4993 + }, + { + "epoch": 3.4536652835408024, + "grad_norm": 9.613495826721191, + "learning_rate": 3.6368526202551104e-05, + "log_odds_chosen": 9.965752601623535, + "log_odds_ratio": -0.0001287447230424732, + "logits/chosen": -0.8265679478645325, + "logits/rejected": -0.9368499517440796, + "logps/chosen": -0.0005226809298619628, + "logps/rejected": -2.1379518508911133, + "loss": 1.3859, + "nll_loss": 0.34647077322006226, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.226809298619628e-05, + "rewards/margins": 0.2137429416179657, + "rewards/rejected": -0.21379519999027252, + "step": 4994 + }, + { + "epoch": 3.454356846473029, + "grad_norm": 11.395675659179688, + "learning_rate": 3.636468418626095e-05, + "log_odds_chosen": 8.483444213867188, + "log_odds_ratio": -0.0013658700045198202, + "logits/chosen": -0.8391974568367004, + "logits/rejected": -0.759861171245575, + "logps/chosen": -0.004849501885473728, + "logps/rejected": -1.6757423877716064, + "loss": 1.8679, + "nll_loss": 0.46684902906417847, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000484950200188905, + "rewards/margins": 0.1670892834663391, + "rewards/rejected": -0.1675742268562317, + "step": 4995 + }, + { + "epoch": 3.455048409405256, + "grad_norm": 14.174217224121094, + "learning_rate": 3.63608421699708e-05, + "log_odds_chosen": 9.07601547241211, + "log_odds_ratio": -0.0004858938045799732, + "logits/chosen": -0.6792766451835632, + "logits/rejected": -0.7782905697822571, + "logps/chosen": -0.0033511659130454063, + "logps/rejected": -1.3728395700454712, + "loss": 1.9694, + "nll_loss": 0.4922906756401062, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00033511657966300845, + "rewards/margins": 0.1369488388299942, + "rewards/rejected": -0.13728396594524384, + "step": 4996 + }, + { + "epoch": 3.455739972337483, + "grad_norm": 9.669139862060547, + "learning_rate": 3.6357000153680655e-05, + "log_odds_chosen": 8.411234855651855, + "log_odds_ratio": -0.025437351316213608, + "logits/chosen": -0.3525383770465851, + "logits/rejected": -0.43491697311401367, + "logps/chosen": -0.01492525078356266, + "logps/rejected": -1.4280292987823486, + "loss": 1.2865, + "nll_loss": 0.31906917691230774, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014925252180546522, + "rewards/margins": 0.1413104087114334, + "rewards/rejected": -0.14280293881893158, + "step": 4997 + }, + { + "epoch": 3.4564315352697097, + "grad_norm": 6.708120346069336, + "learning_rate": 3.63531581373905e-05, + "log_odds_chosen": 8.262350082397461, + "log_odds_ratio": -0.009439961053431034, + "logits/chosen": -0.7109168767929077, + "logits/rejected": -0.6905184984207153, + "logps/chosen": -0.045669618993997574, + "logps/rejected": -2.6598198413848877, + "loss": 1.6848, + "nll_loss": 0.420247346162796, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004566962365061045, + "rewards/margins": 0.2614150047302246, + "rewards/rejected": -0.2659819722175598, + "step": 4998 + }, + { + "epoch": 3.4571230982019365, + "grad_norm": 11.655810356140137, + "learning_rate": 3.634931612110036e-05, + "log_odds_chosen": 9.850397109985352, + "log_odds_ratio": -0.0011923499405384064, + "logits/chosen": -0.559964656829834, + "logits/rejected": -0.5518143177032471, + "logps/chosen": -0.016001557931303978, + "logps/rejected": -1.9830666780471802, + "loss": 1.8046, + "nll_loss": 0.45102426409721375, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016001559561118484, + "rewards/margins": 0.19670650362968445, + "rewards/rejected": -0.19830666482448578, + "step": 4999 + }, + { + "epoch": 3.4578146611341634, + "grad_norm": 7.783631324768066, + "learning_rate": 3.6345474104810205e-05, + "log_odds_chosen": 6.684453010559082, + "log_odds_ratio": -0.12163373082876205, + "logits/chosen": -0.8146560192108154, + "logits/rejected": -0.7799224257469177, + "logps/chosen": -0.020185653120279312, + "logps/rejected": -0.8523962497711182, + "loss": 1.86, + "nll_loss": 0.45282793045043945, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0020185650791972876, + "rewards/margins": 0.08322106301784515, + "rewards/rejected": -0.08523963391780853, + "step": 5000 + }, + { + "epoch": 3.45850622406639, + "grad_norm": 7.987496376037598, + "learning_rate": 3.634163208852006e-05, + "log_odds_chosen": 6.474782943725586, + "log_odds_ratio": -0.0698920488357544, + "logits/chosen": -0.5039654970169067, + "logits/rejected": -0.4827998876571655, + "logps/chosen": -0.025293994694948196, + "logps/rejected": -1.415076732635498, + "loss": 1.9146, + "nll_loss": 0.47165244817733765, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0025293994694948196, + "rewards/margins": 0.1389782726764679, + "rewards/rejected": -0.14150768518447876, + "step": 5001 + }, + { + "epoch": 3.459197786998617, + "grad_norm": 5.635584831237793, + "learning_rate": 3.633779007222991e-05, + "log_odds_chosen": 9.930513381958008, + "log_odds_ratio": -0.012761876918375492, + "logits/chosen": -0.47629499435424805, + "logits/rejected": -0.5821311473846436, + "logps/chosen": -0.012218811549246311, + "logps/rejected": -1.8402409553527832, + "loss": 1.2687, + "nll_loss": 0.3158940076828003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012218811316415668, + "rewards/margins": 0.182802215218544, + "rewards/rejected": -0.18402409553527832, + "step": 5002 + }, + { + "epoch": 3.459889349930844, + "grad_norm": 13.069116592407227, + "learning_rate": 3.633394805593976e-05, + "log_odds_chosen": 10.705204010009766, + "log_odds_ratio": -6.245496479095891e-05, + "logits/chosen": -0.7386019229888916, + "logits/rejected": -0.7776150107383728, + "logps/chosen": -0.0003734386991709471, + "logps/rejected": -2.469666004180908, + "loss": 1.2612, + "nll_loss": 0.31529700756073, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.734386700671166e-05, + "rewards/margins": 0.24692925810813904, + "rewards/rejected": -0.24696658551692963, + "step": 5003 + }, + { + "epoch": 3.4605809128630707, + "grad_norm": 9.947142601013184, + "learning_rate": 3.633010603964961e-05, + "log_odds_chosen": 9.268041610717773, + "log_odds_ratio": -0.0005995544488541782, + "logits/chosen": -0.5643569231033325, + "logits/rejected": -0.5897268056869507, + "logps/chosen": -0.0008558162953704596, + "logps/rejected": -1.5376017093658447, + "loss": 2.4439, + "nll_loss": 0.6109213829040527, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.558163972338662e-05, + "rewards/margins": 0.15367458760738373, + "rewards/rejected": -0.1537601798772812, + "step": 5004 + }, + { + "epoch": 3.4612724757952975, + "grad_norm": 11.088629722595215, + "learning_rate": 3.632626402335946e-05, + "log_odds_chosen": 8.69260025024414, + "log_odds_ratio": -0.0019872181583195925, + "logits/chosen": -0.3774298429489136, + "logits/rejected": -0.4351969361305237, + "logps/chosen": -0.044242698699235916, + "logps/rejected": -2.4456701278686523, + "loss": 1.7641, + "nll_loss": 0.44082629680633545, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004424269776791334, + "rewards/margins": 0.24014276266098022, + "rewards/rejected": -0.24456703662872314, + "step": 5005 + }, + { + "epoch": 3.4619640387275243, + "grad_norm": 11.110259056091309, + "learning_rate": 3.632242200706931e-05, + "log_odds_chosen": 10.339826583862305, + "log_odds_ratio": -5.08381963300053e-05, + "logits/chosen": -0.8093332648277283, + "logits/rejected": -0.8982728719711304, + "logps/chosen": -0.00030366991995833814, + "logps/rejected": -1.7938203811645508, + "loss": 1.2675, + "nll_loss": 0.31685981154441833, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0366991268238053e-05, + "rewards/margins": 0.17935167253017426, + "rewards/rejected": -0.17938204109668732, + "step": 5006 + }, + { + "epoch": 3.462655601659751, + "grad_norm": 9.65864372253418, + "learning_rate": 3.631857999077916e-05, + "log_odds_chosen": 10.515408515930176, + "log_odds_ratio": -6.123816274339333e-05, + "logits/chosen": -0.5729230046272278, + "logits/rejected": -0.6748566627502441, + "logps/chosen": -0.00019804044859483838, + "logps/rejected": -1.9237462282180786, + "loss": 1.4539, + "nll_loss": 0.3634702265262604, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.980404522328172e-05, + "rewards/margins": 0.19235482811927795, + "rewards/rejected": -0.19237461686134338, + "step": 5007 + }, + { + "epoch": 3.463347164591978, + "grad_norm": 9.06814193725586, + "learning_rate": 3.631473797448902e-05, + "log_odds_chosen": 10.330401420593262, + "log_odds_ratio": -7.388419908238575e-05, + "logits/chosen": -0.6489390134811401, + "logits/rejected": -0.7348330020904541, + "logps/chosen": -0.00034219425288029015, + "logps/rejected": -1.8815146684646606, + "loss": 1.2441, + "nll_loss": 0.3110177516937256, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4219425288029015e-05, + "rewards/margins": 0.18811725080013275, + "rewards/rejected": -0.18815146386623383, + "step": 5008 + }, + { + "epoch": 3.464038727524205, + "grad_norm": 10.859692573547363, + "learning_rate": 3.6310895958198864e-05, + "log_odds_chosen": 10.254586219787598, + "log_odds_ratio": -7.5828458648175e-05, + "logits/chosen": -0.46878278255462646, + "logits/rejected": -0.6421003937721252, + "logps/chosen": -0.0002131734072463587, + "logps/rejected": -1.9959816932678223, + "loss": 1.721, + "nll_loss": 0.43024158477783203, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.131734072463587e-05, + "rewards/margins": 0.19957688450813293, + "rewards/rejected": -0.19959819316864014, + "step": 5009 + }, + { + "epoch": 3.4647302904564317, + "grad_norm": 14.180079460144043, + "learning_rate": 3.6307053941908716e-05, + "log_odds_chosen": 10.092310905456543, + "log_odds_ratio": -8.331875142175704e-05, + "logits/chosen": -0.5899174213409424, + "logits/rejected": -0.6186763644218445, + "logps/chosen": -0.0002855797647498548, + "logps/rejected": -1.7965035438537598, + "loss": 1.944, + "nll_loss": 0.48598384857177734, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8557977202581242e-05, + "rewards/margins": 0.17962178587913513, + "rewards/rejected": -0.17965035140514374, + "step": 5010 + }, + { + "epoch": 3.4654218533886585, + "grad_norm": 8.489675521850586, + "learning_rate": 3.630321192561857e-05, + "log_odds_chosen": 9.157320022583008, + "log_odds_ratio": -0.17574098706245422, + "logits/chosen": -0.7258381843566895, + "logits/rejected": -0.7407118082046509, + "logps/chosen": -0.020532608032226562, + "logps/rejected": -1.8599942922592163, + "loss": 1.3642, + "nll_loss": 0.32347384095191956, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0020532610360533, + "rewards/margins": 0.1839461624622345, + "rewards/rejected": -0.18599943816661835, + "step": 5011 + }, + { + "epoch": 3.4661134163208853, + "grad_norm": 6.52730131149292, + "learning_rate": 3.629936990932842e-05, + "log_odds_chosen": 8.526227951049805, + "log_odds_ratio": -0.03111344762146473, + "logits/chosen": -0.6624473333358765, + "logits/rejected": -0.687995433807373, + "logps/chosen": -0.007035402115434408, + "logps/rejected": -1.5009098052978516, + "loss": 1.4651, + "nll_loss": 0.3631598949432373, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000703540223184973, + "rewards/margins": 0.1493874490261078, + "rewards/rejected": -0.15009097754955292, + "step": 5012 + }, + { + "epoch": 3.466804979253112, + "grad_norm": 14.020427703857422, + "learning_rate": 3.629552789303827e-05, + "log_odds_chosen": 9.0694580078125, + "log_odds_ratio": -0.0027533157262951136, + "logits/chosen": -0.18365775048732758, + "logits/rejected": -0.2790081799030304, + "logps/chosen": -0.017389468848705292, + "logps/rejected": -1.5542614459991455, + "loss": 1.4566, + "nll_loss": 0.3638818562030792, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001738947001285851, + "rewards/margins": 0.15368719398975372, + "rewards/rejected": -0.15542612969875336, + "step": 5013 + }, + { + "epoch": 3.467496542185339, + "grad_norm": 9.826322555541992, + "learning_rate": 3.629168587674812e-05, + "log_odds_chosen": 7.112824440002441, + "log_odds_ratio": -0.06963668763637543, + "logits/chosen": -0.8219603300094604, + "logits/rejected": -0.8262119293212891, + "logps/chosen": -0.014785894192755222, + "logps/rejected": -1.1341627836227417, + "loss": 1.4779, + "nll_loss": 0.36250919103622437, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014785894891247153, + "rewards/margins": 0.11193770170211792, + "rewards/rejected": -0.11341628432273865, + "step": 5014 + }, + { + "epoch": 3.468188105117566, + "grad_norm": 10.303740501403809, + "learning_rate": 3.628784386045797e-05, + "log_odds_chosen": 8.587909698486328, + "log_odds_ratio": -0.023652495816349983, + "logits/chosen": -0.5451184511184692, + "logits/rejected": -0.6260979771614075, + "logps/chosen": -0.006036615930497646, + "logps/rejected": -1.5167649984359741, + "loss": 1.7524, + "nll_loss": 0.43574270606040955, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006036615814082325, + "rewards/margins": 0.15107285976409912, + "rewards/rejected": -0.1516765058040619, + "step": 5015 + }, + { + "epoch": 3.4688796680497926, + "grad_norm": 10.99515151977539, + "learning_rate": 3.628400184416782e-05, + "log_odds_chosen": 7.448937892913818, + "log_odds_ratio": -0.17324112355709076, + "logits/chosen": -0.9495927095413208, + "logits/rejected": -1.026933193206787, + "logps/chosen": -0.04410245269536972, + "logps/rejected": -1.1555567979812622, + "loss": 1.9708, + "nll_loss": 0.4753641188144684, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004410245455801487, + "rewards/margins": 0.11114543676376343, + "rewards/rejected": -0.11555567383766174, + "step": 5016 + }, + { + "epoch": 3.4695712309820195, + "grad_norm": 14.707378387451172, + "learning_rate": 3.6280159827877676e-05, + "log_odds_chosen": 9.303018569946289, + "log_odds_ratio": -0.0004590075695887208, + "logits/chosen": -0.9421852231025696, + "logits/rejected": -1.010554552078247, + "logps/chosen": -0.0009109095553867519, + "logps/rejected": -1.3994932174682617, + "loss": 1.7846, + "nll_loss": 0.4461010694503784, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.109095844905823e-05, + "rewards/margins": 0.13985824584960938, + "rewards/rejected": -0.13994933664798737, + "step": 5017 + }, + { + "epoch": 3.4702627939142463, + "grad_norm": 7.436912536621094, + "learning_rate": 3.627631781158752e-05, + "log_odds_chosen": 9.307181358337402, + "log_odds_ratio": -0.004700258374214172, + "logits/chosen": -0.5239299535751343, + "logits/rejected": -0.5426942706108093, + "logps/chosen": -0.0029026533011347055, + "logps/rejected": -1.3353371620178223, + "loss": 1.2961, + "nll_loss": 0.3235432505607605, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002902653650380671, + "rewards/margins": 0.13324347138404846, + "rewards/rejected": -0.13353373110294342, + "step": 5018 + }, + { + "epoch": 3.470954356846473, + "grad_norm": 9.484705924987793, + "learning_rate": 3.6272475795297375e-05, + "log_odds_chosen": 9.229263305664062, + "log_odds_ratio": -0.0014138160040602088, + "logits/chosen": -0.693687915802002, + "logits/rejected": -0.7113863229751587, + "logps/chosen": -0.0005861036479473114, + "logps/rejected": -1.4140403270721436, + "loss": 1.8349, + "nll_loss": 0.4585755467414856, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.861037061549723e-05, + "rewards/margins": 0.14134542644023895, + "rewards/rejected": -0.14140403270721436, + "step": 5019 + }, + { + "epoch": 3.4716459197787, + "grad_norm": 10.944015502929688, + "learning_rate": 3.626863377900723e-05, + "log_odds_chosen": 8.77437973022461, + "log_odds_ratio": -0.0020061221439391375, + "logits/chosen": -0.45963379740715027, + "logits/rejected": -0.552274227142334, + "logps/chosen": -0.0023264577612280846, + "logps/rejected": -1.2882215976715088, + "loss": 1.8215, + "nll_loss": 0.45518240332603455, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00023264579067472368, + "rewards/margins": 0.12858951091766357, + "rewards/rejected": -0.12882214784622192, + "step": 5020 + }, + { + "epoch": 3.472337482710927, + "grad_norm": 8.10033893585205, + "learning_rate": 3.626479176271708e-05, + "log_odds_chosen": 7.250068664550781, + "log_odds_ratio": -0.11843190342187881, + "logits/chosen": -0.21467533707618713, + "logits/rejected": -0.2849116623401642, + "logps/chosen": -0.0268861036747694, + "logps/rejected": -1.3084977865219116, + "loss": 1.2667, + "nll_loss": 0.30483385920524597, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0026886104606091976, + "rewards/margins": 0.12816117703914642, + "rewards/rejected": -0.13084977865219116, + "step": 5021 + }, + { + "epoch": 3.4730290456431536, + "grad_norm": 6.548411846160889, + "learning_rate": 3.6260949746426925e-05, + "log_odds_chosen": 8.930583953857422, + "log_odds_ratio": -0.0018793250201269984, + "logits/chosen": -0.38366758823394775, + "logits/rejected": -0.4618600010871887, + "logps/chosen": -0.009512092918157578, + "logps/rejected": -2.154390811920166, + "loss": 1.6325, + "nll_loss": 0.4079264998435974, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009512093383818865, + "rewards/margins": 0.21448788046836853, + "rewards/rejected": -0.2154390960931778, + "step": 5022 + }, + { + "epoch": 3.4737206085753805, + "grad_norm": 5.993508815765381, + "learning_rate": 3.625710773013678e-05, + "log_odds_chosen": 9.173480987548828, + "log_odds_ratio": -0.00028242330881766975, + "logits/chosen": -0.5220339298248291, + "logits/rejected": -0.5704896450042725, + "logps/chosen": -0.000999884563498199, + "logps/rejected": -1.7175759077072144, + "loss": 1.5205, + "nll_loss": 0.3800984025001526, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.998845780501142e-05, + "rewards/margins": 0.17165759205818176, + "rewards/rejected": -0.17175757884979248, + "step": 5023 + }, + { + "epoch": 3.4744121715076073, + "grad_norm": 11.428689002990723, + "learning_rate": 3.625326571384663e-05, + "log_odds_chosen": 9.072298049926758, + "log_odds_ratio": -0.01014068815857172, + "logits/chosen": -0.5121976137161255, + "logits/rejected": -0.4744713008403778, + "logps/chosen": -0.012900039553642273, + "logps/rejected": -2.3478598594665527, + "loss": 1.9418, + "nll_loss": 0.4844461679458618, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001290004001930356, + "rewards/margins": 0.23349598050117493, + "rewards/rejected": -0.2347859889268875, + "step": 5024 + }, + { + "epoch": 3.475103734439834, + "grad_norm": 8.699267387390137, + "learning_rate": 3.6249423697556476e-05, + "log_odds_chosen": 10.239505767822266, + "log_odds_ratio": -4.9830130592454225e-05, + "logits/chosen": -0.5903966426849365, + "logits/rejected": -0.599553108215332, + "logps/chosen": -0.0001406385563313961, + "logps/rejected": -1.375978946685791, + "loss": 1.2361, + "nll_loss": 0.30903035402297974, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4063856724533252e-05, + "rewards/margins": 0.13758382201194763, + "rewards/rejected": -0.13759788870811462, + "step": 5025 + }, + { + "epoch": 3.475795297372061, + "grad_norm": 8.938702583312988, + "learning_rate": 3.6245581681266335e-05, + "log_odds_chosen": 9.332566261291504, + "log_odds_ratio": -0.00019775879627559334, + "logits/chosen": -0.08711080998182297, + "logits/rejected": -0.18843892216682434, + "logps/chosen": -0.000356603559339419, + "logps/rejected": -1.3457491397857666, + "loss": 1.4083, + "nll_loss": 0.35204318165779114, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.56603559339419e-05, + "rewards/margins": 0.13453926146030426, + "rewards/rejected": -0.13457490503787994, + "step": 5026 + }, + { + "epoch": 3.4764868603042878, + "grad_norm": 29.00394058227539, + "learning_rate": 3.624173966497618e-05, + "log_odds_chosen": 7.198078155517578, + "log_odds_ratio": -0.40212199091911316, + "logits/chosen": -0.7504348158836365, + "logits/rejected": -0.7492233514785767, + "logps/chosen": -0.04937904328107834, + "logps/rejected": -1.4801826477050781, + "loss": 2.1109, + "nll_loss": 0.487504780292511, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004937904886901379, + "rewards/margins": 0.14308036863803864, + "rewards/rejected": -0.1480182707309723, + "step": 5027 + }, + { + "epoch": 3.4771784232365146, + "grad_norm": 10.620253562927246, + "learning_rate": 3.623789764868603e-05, + "log_odds_chosen": 8.709571838378906, + "log_odds_ratio": -0.0006758072413504124, + "logits/chosen": -0.32940155267715454, + "logits/rejected": -0.3942253589630127, + "logps/chosen": -0.005983125418424606, + "logps/rejected": -2.287093162536621, + "loss": 1.5783, + "nll_loss": 0.3945064842700958, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005983125302009284, + "rewards/margins": 0.22811101377010345, + "rewards/rejected": -0.22870934009552002, + "step": 5028 + }, + { + "epoch": 3.4778699861687414, + "grad_norm": 10.422560691833496, + "learning_rate": 3.6234055632395885e-05, + "log_odds_chosen": 9.66650676727295, + "log_odds_ratio": -0.0008411741000600159, + "logits/chosen": -0.36648574471473694, + "logits/rejected": -0.43066418170928955, + "logps/chosen": -0.0009089798550121486, + "logps/rejected": -1.8880401849746704, + "loss": 2.4622, + "nll_loss": 0.6154661178588867, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.089798550121486e-05, + "rewards/margins": 0.18871311843395233, + "rewards/rejected": -0.188804030418396, + "step": 5029 + }, + { + "epoch": 3.4785615491009683, + "grad_norm": 6.423739433288574, + "learning_rate": 3.623021361610574e-05, + "log_odds_chosen": 8.680051803588867, + "log_odds_ratio": -0.000523662893101573, + "logits/chosen": -0.35798779129981995, + "logits/rejected": -0.41463935375213623, + "logps/chosen": -0.03764787316322327, + "logps/rejected": -2.4031128883361816, + "loss": 1.343, + "nll_loss": 0.33569198846817017, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003764787456020713, + "rewards/margins": 0.23654648661613464, + "rewards/rejected": -0.24031127989292145, + "step": 5030 + }, + { + "epoch": 3.479253112033195, + "grad_norm": 11.363663673400879, + "learning_rate": 3.6226371599815584e-05, + "log_odds_chosen": 8.720566749572754, + "log_odds_ratio": -0.000603137887082994, + "logits/chosen": -0.5339272022247314, + "logits/rejected": -0.5698242783546448, + "logps/chosen": -0.017068665474653244, + "logps/rejected": -1.91888427734375, + "loss": 1.6891, + "nll_loss": 0.42220330238342285, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017068665474653244, + "rewards/margins": 0.19018155336380005, + "rewards/rejected": -0.19188842177391052, + "step": 5031 + }, + { + "epoch": 3.479944674965422, + "grad_norm": 16.539823532104492, + "learning_rate": 3.6222529583525436e-05, + "log_odds_chosen": 8.81527042388916, + "log_odds_ratio": -0.0015905527397990227, + "logits/chosen": -0.4191141724586487, + "logits/rejected": -0.5238168239593506, + "logps/chosen": -0.021409403532743454, + "logps/rejected": -2.097461700439453, + "loss": 1.6056, + "nll_loss": 0.40124809741973877, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002140940399840474, + "rewards/margins": 0.207605242729187, + "rewards/rejected": -0.20974619686603546, + "step": 5032 + }, + { + "epoch": 3.4806362378976488, + "grad_norm": 8.92994213104248, + "learning_rate": 3.621868756723529e-05, + "log_odds_chosen": 8.732751846313477, + "log_odds_ratio": -0.03898253291845322, + "logits/chosen": -0.44541874527931213, + "logits/rejected": -0.4249739646911621, + "logps/chosen": -0.011088935658335686, + "logps/rejected": -1.4665523767471313, + "loss": 1.2592, + "nll_loss": 0.3109119236469269, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001108893658965826, + "rewards/margins": 0.1455463469028473, + "rewards/rejected": -0.14665524661540985, + "step": 5033 + }, + { + "epoch": 3.4813278008298756, + "grad_norm": 9.916893005371094, + "learning_rate": 3.6214845550945134e-05, + "log_odds_chosen": 9.63333511352539, + "log_odds_ratio": -0.00036752651794813573, + "logits/chosen": -0.7815406322479248, + "logits/rejected": -0.8568220138549805, + "logps/chosen": -0.004876892548054457, + "logps/rejected": -2.339784622192383, + "loss": 1.4612, + "nll_loss": 0.36527514457702637, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00048768927808851004, + "rewards/margins": 0.23349076509475708, + "rewards/rejected": -0.23397845029830933, + "step": 5034 + }, + { + "epoch": 3.4820193637621024, + "grad_norm": 8.489008903503418, + "learning_rate": 3.621100353465499e-05, + "log_odds_chosen": 7.9217400550842285, + "log_odds_ratio": -0.02504688873887062, + "logits/chosen": -0.4744040369987488, + "logits/rejected": -0.45413997769355774, + "logps/chosen": -0.02560979500412941, + "logps/rejected": -1.4098031520843506, + "loss": 1.9231, + "nll_loss": 0.47827842831611633, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0025609794538468122, + "rewards/margins": 0.13841934502124786, + "rewards/rejected": -0.1409803181886673, + "step": 5035 + }, + { + "epoch": 3.4827109266943292, + "grad_norm": 7.887087345123291, + "learning_rate": 3.620716151836484e-05, + "log_odds_chosen": 9.41520881652832, + "log_odds_ratio": -0.0003263282706029713, + "logits/chosen": -0.6199313402175903, + "logits/rejected": -0.6613143682479858, + "logps/chosen": -0.00028231722535565495, + "logps/rejected": -1.521788477897644, + "loss": 1.5075, + "nll_loss": 0.37684372067451477, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8231723263161257e-05, + "rewards/margins": 0.15215063095092773, + "rewards/rejected": -0.15217885375022888, + "step": 5036 + }, + { + "epoch": 3.483402489626556, + "grad_norm": 10.450078964233398, + "learning_rate": 3.620331950207469e-05, + "log_odds_chosen": 7.924431800842285, + "log_odds_ratio": -0.0865463986992836, + "logits/chosen": -0.33945807814598083, + "logits/rejected": -0.33382901549339294, + "logps/chosen": -0.01411413960158825, + "logps/rejected": -1.5017454624176025, + "loss": 1.5724, + "nll_loss": 0.3844349980354309, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014114138903096318, + "rewards/margins": 0.14876313507556915, + "rewards/rejected": -0.1501745581626892, + "step": 5037 + }, + { + "epoch": 3.484094052558783, + "grad_norm": 8.861961364746094, + "learning_rate": 3.6199477485784544e-05, + "log_odds_chosen": 9.79195785522461, + "log_odds_ratio": -0.00022242713021114469, + "logits/chosen": -0.6150322556495667, + "logits/rejected": -0.7293128967285156, + "logps/chosen": -0.05526260286569595, + "logps/rejected": -2.4982879161834717, + "loss": 2.0242, + "nll_loss": 0.5060203075408936, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00552626047283411, + "rewards/margins": 0.24430254101753235, + "rewards/rejected": -0.2498287856578827, + "step": 5038 + }, + { + "epoch": 3.4847856154910097, + "grad_norm": 28.145069122314453, + "learning_rate": 3.6195635469494396e-05, + "log_odds_chosen": 5.657858848571777, + "log_odds_ratio": -0.2621900737285614, + "logits/chosen": -0.7421402335166931, + "logits/rejected": -0.7373098134994507, + "logps/chosen": -0.06877894699573517, + "logps/rejected": -1.5929591655731201, + "loss": 1.9334, + "nll_loss": 0.45713573694229126, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.006877894978970289, + "rewards/margins": 0.15241803228855133, + "rewards/rejected": -0.159295916557312, + "step": 5039 + }, + { + "epoch": 3.4854771784232366, + "grad_norm": 9.978339195251465, + "learning_rate": 3.619179345320424e-05, + "log_odds_chosen": 7.554687976837158, + "log_odds_ratio": -0.20541711151599884, + "logits/chosen": -0.3608725666999817, + "logits/rejected": -0.3803059458732605, + "logps/chosen": -0.040510617196559906, + "logps/rejected": -1.93406343460083, + "loss": 2.0015, + "nll_loss": 0.47983455657958984, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004051061812788248, + "rewards/margins": 0.1893552988767624, + "rewards/rejected": -0.1934063583612442, + "step": 5040 + }, + { + "epoch": 3.4861687413554634, + "grad_norm": 9.79824161529541, + "learning_rate": 3.6187951436914094e-05, + "log_odds_chosen": 8.895830154418945, + "log_odds_ratio": -0.00149711431004107, + "logits/chosen": -0.5272182822227478, + "logits/rejected": -0.5619787573814392, + "logps/chosen": -0.0031608245335519314, + "logps/rejected": -1.6382068395614624, + "loss": 1.9738, + "nll_loss": 0.4933049976825714, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00031608244171366096, + "rewards/margins": 0.16350463032722473, + "rewards/rejected": -0.16382069885730743, + "step": 5041 + }, + { + "epoch": 3.4868603042876902, + "grad_norm": 13.927655220031738, + "learning_rate": 3.618410942062395e-05, + "log_odds_chosen": 7.837332725524902, + "log_odds_ratio": -0.03233502060174942, + "logits/chosen": -0.29475536942481995, + "logits/rejected": -0.33633238077163696, + "logps/chosen": -0.00651334086433053, + "logps/rejected": -1.2358462810516357, + "loss": 1.4919, + "nll_loss": 0.36975133419036865, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006513340631499887, + "rewards/margins": 0.1229332983493805, + "rewards/rejected": -0.12358463555574417, + "step": 5042 + }, + { + "epoch": 3.487551867219917, + "grad_norm": 9.157980918884277, + "learning_rate": 3.618026740433379e-05, + "log_odds_chosen": 8.778959274291992, + "log_odds_ratio": -0.0015918298158794641, + "logits/chosen": -0.3938351273536682, + "logits/rejected": -0.48897871375083923, + "logps/chosen": -0.006256352178752422, + "logps/rejected": -1.4195303916931152, + "loss": 1.1331, + "nll_loss": 0.2831065058708191, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000625635264441371, + "rewards/margins": 0.1413274109363556, + "rewards/rejected": -0.14195305109024048, + "step": 5043 + }, + { + "epoch": 3.488243430152144, + "grad_norm": 9.305924415588379, + "learning_rate": 3.6176425388043645e-05, + "log_odds_chosen": 9.255959510803223, + "log_odds_ratio": -0.002576855244114995, + "logits/chosen": -0.7664635181427002, + "logits/rejected": -0.8265019059181213, + "logps/chosen": -0.024486836045980453, + "logps/rejected": -1.4735629558563232, + "loss": 1.6929, + "nll_loss": 0.42297691106796265, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024486836045980453, + "rewards/margins": 0.1449076235294342, + "rewards/rejected": -0.1473563015460968, + "step": 5044 + }, + { + "epoch": 3.4889349930843707, + "grad_norm": 7.845460414886475, + "learning_rate": 3.61725833717535e-05, + "log_odds_chosen": 8.247480392456055, + "log_odds_ratio": -0.06533218920230865, + "logits/chosen": -0.4811922013759613, + "logits/rejected": -0.5305379033088684, + "logps/chosen": -0.02111988700926304, + "logps/rejected": -1.544506549835205, + "loss": 2.2076, + "nll_loss": 0.5453552603721619, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00211198884062469, + "rewards/margins": 0.1523386687040329, + "rewards/rejected": -0.1544506549835205, + "step": 5045 + }, + { + "epoch": 3.4896265560165975, + "grad_norm": 5.572291374206543, + "learning_rate": 3.616874135546335e-05, + "log_odds_chosen": 8.172150611877441, + "log_odds_ratio": -0.0033650745172053576, + "logits/chosen": -0.2974160313606262, + "logits/rejected": -0.2888872027397156, + "logps/chosen": -0.020543230697512627, + "logps/rejected": -1.5316355228424072, + "loss": 1.5213, + "nll_loss": 0.3799995481967926, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002054323209449649, + "rewards/margins": 0.1511092185974121, + "rewards/rejected": -0.15316355228424072, + "step": 5046 + }, + { + "epoch": 3.4903181189488244, + "grad_norm": 10.916781425476074, + "learning_rate": 3.6164899339173196e-05, + "log_odds_chosen": 7.001707077026367, + "log_odds_ratio": -0.10261018574237823, + "logits/chosen": -0.5108945369720459, + "logits/rejected": -0.5677796006202698, + "logps/chosen": -0.05275255814194679, + "logps/rejected": -1.37205171585083, + "loss": 1.2155, + "nll_loss": 0.29360371828079224, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0052752564661204815, + "rewards/margins": 0.1319299042224884, + "rewards/rejected": -0.13720516860485077, + "step": 5047 + }, + { + "epoch": 3.491009681881051, + "grad_norm": 5.465517520904541, + "learning_rate": 3.6161057322883055e-05, + "log_odds_chosen": 8.64756965637207, + "log_odds_ratio": -0.0032608138862997293, + "logits/chosen": -0.3673758804798126, + "logits/rejected": -0.3721608519554138, + "logps/chosen": -0.010069094598293304, + "logps/rejected": -1.5483287572860718, + "loss": 1.4544, + "nll_loss": 0.36326465010643005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010069095296785235, + "rewards/margins": 0.15382596850395203, + "rewards/rejected": -0.1548328697681427, + "step": 5048 + }, + { + "epoch": 3.491701244813278, + "grad_norm": 11.032556533813477, + "learning_rate": 3.61572153065929e-05, + "log_odds_chosen": 7.748650074005127, + "log_odds_ratio": -0.06495750695466995, + "logits/chosen": -0.6558927297592163, + "logits/rejected": -0.7183180451393127, + "logps/chosen": -0.02538420259952545, + "logps/rejected": -1.5755445957183838, + "loss": 1.4078, + "nll_loss": 0.34544721245765686, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002538420259952545, + "rewards/margins": 0.15501603484153748, + "rewards/rejected": -0.15755446255207062, + "step": 5049 + }, + { + "epoch": 3.492392807745505, + "grad_norm": 12.697063446044922, + "learning_rate": 3.615337329030275e-05, + "log_odds_chosen": 9.086719512939453, + "log_odds_ratio": -0.004768200218677521, + "logits/chosen": -0.7487730979919434, + "logits/rejected": -0.8184801936149597, + "logps/chosen": -0.010872675105929375, + "logps/rejected": -2.005723237991333, + "loss": 1.7075, + "nll_loss": 0.4263884127140045, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010872675338760018, + "rewards/margins": 0.19948504865169525, + "rewards/rejected": -0.20057231187820435, + "step": 5050 + }, + { + "epoch": 3.4930843706777317, + "grad_norm": 13.886307716369629, + "learning_rate": 3.6149531274012605e-05, + "log_odds_chosen": 10.670909881591797, + "log_odds_ratio": -4.9107984523288906e-05, + "logits/chosen": -0.9256460666656494, + "logits/rejected": -0.9379943609237671, + "logps/chosen": -0.00014166987966746092, + "logps/rejected": -1.8907963037490845, + "loss": 2.3547, + "nll_loss": 0.5886602401733398, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.416698705725139e-05, + "rewards/margins": 0.18906547129154205, + "rewards/rejected": -0.1890796422958374, + "step": 5051 + }, + { + "epoch": 3.4937759336099585, + "grad_norm": 8.242508888244629, + "learning_rate": 3.614568925772245e-05, + "log_odds_chosen": 7.944234848022461, + "log_odds_ratio": -0.006905496120452881, + "logits/chosen": -0.48522865772247314, + "logits/rejected": -0.5774377584457397, + "logps/chosen": -0.03180186077952385, + "logps/rejected": -1.4319233894348145, + "loss": 1.3, + "nll_loss": 0.3243020176887512, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0031801860313862562, + "rewards/margins": 0.14001215994358063, + "rewards/rejected": -0.1431923359632492, + "step": 5052 + }, + { + "epoch": 3.4944674965421854, + "grad_norm": 8.319031715393066, + "learning_rate": 3.6141847241432303e-05, + "log_odds_chosen": 7.558518886566162, + "log_odds_ratio": -0.14716488122940063, + "logits/chosen": -0.22034114599227905, + "logits/rejected": -0.32531481981277466, + "logps/chosen": -0.03135522082448006, + "logps/rejected": -1.3388361930847168, + "loss": 1.4963, + "nll_loss": 0.3593598008155823, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003135522361844778, + "rewards/margins": 0.13074809312820435, + "rewards/rejected": -0.13388362526893616, + "step": 5053 + }, + { + "epoch": 3.495159059474412, + "grad_norm": 11.776897430419922, + "learning_rate": 3.6138005225142156e-05, + "log_odds_chosen": 9.902790069580078, + "log_odds_ratio": -0.00023262518516276032, + "logits/chosen": -0.03856794908642769, + "logits/rejected": -0.1859092116355896, + "logps/chosen": -0.0006960300961509347, + "logps/rejected": -2.378844738006592, + "loss": 1.459, + "nll_loss": 0.36472997069358826, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.960301107028499e-05, + "rewards/margins": 0.23781487345695496, + "rewards/rejected": -0.23788444697856903, + "step": 5054 + }, + { + "epoch": 3.495850622406639, + "grad_norm": 10.10391902923584, + "learning_rate": 3.613416320885201e-05, + "log_odds_chosen": 10.151603698730469, + "log_odds_ratio": -0.00014976883539929986, + "logits/chosen": -0.6190488934516907, + "logits/rejected": -0.7602345943450928, + "logps/chosen": -0.0018250253051519394, + "logps/rejected": -2.936875343322754, + "loss": 1.7966, + "nll_loss": 0.4491300582885742, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018250252469442785, + "rewards/margins": 0.2935050427913666, + "rewards/rejected": -0.2936875522136688, + "step": 5055 + }, + { + "epoch": 3.496542185338866, + "grad_norm": 9.116644859313965, + "learning_rate": 3.6130321192561854e-05, + "log_odds_chosen": 6.269155025482178, + "log_odds_ratio": -0.41318410634994507, + "logits/chosen": -0.355681836605072, + "logits/rejected": -0.41957610845565796, + "logps/chosen": -0.05598800256848335, + "logps/rejected": -1.3569520711898804, + "loss": 1.8405, + "nll_loss": 0.41881901025772095, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005598800256848335, + "rewards/margins": 0.1300964057445526, + "rewards/rejected": -0.135695219039917, + "step": 5056 + }, + { + "epoch": 3.4972337482710927, + "grad_norm": 13.085668563842773, + "learning_rate": 3.612647917627171e-05, + "log_odds_chosen": 8.548544883728027, + "log_odds_ratio": -0.0006819585105404258, + "logits/chosen": -0.4421701431274414, + "logits/rejected": -0.5115413665771484, + "logps/chosen": -0.0004435776500031352, + "logps/rejected": -1.0637874603271484, + "loss": 1.9339, + "nll_loss": 0.48339563608169556, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.435776645550504e-05, + "rewards/margins": 0.106334388256073, + "rewards/rejected": -0.10637873411178589, + "step": 5057 + }, + { + "epoch": 3.4979253112033195, + "grad_norm": 6.2441020011901855, + "learning_rate": 3.612263715998156e-05, + "log_odds_chosen": 8.581676483154297, + "log_odds_ratio": -0.03747273609042168, + "logits/chosen": -0.6855819225311279, + "logits/rejected": -0.6639347076416016, + "logps/chosen": -0.011732269078493118, + "logps/rejected": -1.8874115943908691, + "loss": 1.4293, + "nll_loss": 0.35356974601745605, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001173226861283183, + "rewards/margins": 0.18756791949272156, + "rewards/rejected": -0.18874114751815796, + "step": 5058 + }, + { + "epoch": 3.4986168741355463, + "grad_norm": 4.798537254333496, + "learning_rate": 3.611879514369141e-05, + "log_odds_chosen": 9.492433547973633, + "log_odds_ratio": -0.00040241493843495846, + "logits/chosen": -0.48869088292121887, + "logits/rejected": -0.48516204953193665, + "logps/chosen": -0.00044442637590691447, + "logps/rejected": -1.894187569618225, + "loss": 1.2766, + "nll_loss": 0.3191039264202118, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.444263322511688e-05, + "rewards/margins": 0.18937431275844574, + "rewards/rejected": -0.189418762922287, + "step": 5059 + }, + { + "epoch": 3.499308437067773, + "grad_norm": 9.459883689880371, + "learning_rate": 3.6114953127401264e-05, + "log_odds_chosen": 9.808856010437012, + "log_odds_ratio": -0.001135217142291367, + "logits/chosen": -0.6091340184211731, + "logits/rejected": -0.6108609437942505, + "logps/chosen": -0.001535170478746295, + "logps/rejected": -2.7543370723724365, + "loss": 1.3907, + "nll_loss": 0.34757012128829956, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015351705951616168, + "rewards/margins": 0.2752802073955536, + "rewards/rejected": -0.2754337191581726, + "step": 5060 + }, + { + "epoch": 3.5, + "grad_norm": 13.043842315673828, + "learning_rate": 3.611111111111111e-05, + "log_odds_chosen": 8.608272552490234, + "log_odds_ratio": -0.0255854744464159, + "logits/chosen": -0.4813705086708069, + "logits/rejected": -0.500043511390686, + "logps/chosen": -0.006744784768670797, + "logps/rejected": -1.9021813869476318, + "loss": 2.3054, + "nll_loss": 0.5737854838371277, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006744785932824016, + "rewards/margins": 0.18954366445541382, + "rewards/rejected": -0.19021813571453094, + "step": 5061 + }, + { + "epoch": 3.500691562932227, + "grad_norm": 9.071768760681152, + "learning_rate": 3.610726909482096e-05, + "log_odds_chosen": 10.202449798583984, + "log_odds_ratio": -6.68539505568333e-05, + "logits/chosen": -0.6950543522834778, + "logits/rejected": -0.7431255578994751, + "logps/chosen": -0.00206986372359097, + "logps/rejected": -2.522648334503174, + "loss": 1.1268, + "nll_loss": 0.2816920876502991, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00020698636944871396, + "rewards/margins": 0.25205785036087036, + "rewards/rejected": -0.2522648274898529, + "step": 5062 + }, + { + "epoch": 3.5013831258644537, + "grad_norm": 8.379225730895996, + "learning_rate": 3.6103427078530814e-05, + "log_odds_chosen": 9.258936882019043, + "log_odds_ratio": -0.0001494312018621713, + "logits/chosen": -0.5267353057861328, + "logits/rejected": -0.5575824975967407, + "logps/chosen": -0.00039477666723541915, + "logps/rejected": -1.283223032951355, + "loss": 1.7336, + "nll_loss": 0.4333917498588562, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.94776689063292e-05, + "rewards/margins": 0.128282830119133, + "rewards/rejected": -0.1283223032951355, + "step": 5063 + }, + { + "epoch": 3.5020746887966805, + "grad_norm": 10.303986549377441, + "learning_rate": 3.609958506224067e-05, + "log_odds_chosen": 9.066301345825195, + "log_odds_ratio": -0.15924587845802307, + "logits/chosen": -0.5196304321289062, + "logits/rejected": -0.5205535292625427, + "logps/chosen": -0.03134698420763016, + "logps/rejected": -2.229219675064087, + "loss": 1.5492, + "nll_loss": 0.3713781237602234, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0031346981413662434, + "rewards/margins": 0.21978726983070374, + "rewards/rejected": -0.2229219675064087, + "step": 5064 + }, + { + "epoch": 3.5027662517289073, + "grad_norm": 10.489588737487793, + "learning_rate": 3.609574304595051e-05, + "log_odds_chosen": 9.330596923828125, + "log_odds_ratio": -0.0003697268257383257, + "logits/chosen": -0.8499776124954224, + "logits/rejected": -0.892784595489502, + "logps/chosen": -0.001054689404554665, + "logps/rejected": -1.539988398551941, + "loss": 1.5798, + "nll_loss": 0.39492517709732056, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010546894918661565, + "rewards/margins": 0.1538933664560318, + "rewards/rejected": -0.15399885177612305, + "step": 5065 + }, + { + "epoch": 3.503457814661134, + "grad_norm": 9.035969734191895, + "learning_rate": 3.609190102966037e-05, + "log_odds_chosen": 10.0315580368042, + "log_odds_ratio": -0.00021950459631625563, + "logits/chosen": -0.24112743139266968, + "logits/rejected": -0.27213215827941895, + "logps/chosen": -0.012225059792399406, + "logps/rejected": -3.4997692108154297, + "loss": 1.3641, + "nll_loss": 0.34100615978240967, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001222505932673812, + "rewards/margins": 0.3487544059753418, + "rewards/rejected": -0.34997692704200745, + "step": 5066 + }, + { + "epoch": 3.504149377593361, + "grad_norm": 13.972981452941895, + "learning_rate": 3.608805901337022e-05, + "log_odds_chosen": 10.162301063537598, + "log_odds_ratio": -0.00026165239978581667, + "logits/chosen": -0.3325657844543457, + "logits/rejected": -0.4123249053955078, + "logps/chosen": -0.006227482575923204, + "logps/rejected": -2.271134853363037, + "loss": 1.3434, + "nll_loss": 0.3358166217803955, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006227482808753848, + "rewards/margins": 0.22649073600769043, + "rewards/rejected": -0.2271134853363037, + "step": 5067 + }, + { + "epoch": 3.504840940525588, + "grad_norm": 6.947229385375977, + "learning_rate": 3.608421699708007e-05, + "log_odds_chosen": 8.882793426513672, + "log_odds_ratio": -0.0007761258166283369, + "logits/chosen": -0.7312330007553101, + "logits/rejected": -0.7411985397338867, + "logps/chosen": -0.017531974241137505, + "logps/rejected": -2.05059814453125, + "loss": 1.9768, + "nll_loss": 0.4941311478614807, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001753197400830686, + "rewards/margins": 0.20330661535263062, + "rewards/rejected": -0.20505981147289276, + "step": 5068 + }, + { + "epoch": 3.5055325034578146, + "grad_norm": 12.753296852111816, + "learning_rate": 3.608037498078992e-05, + "log_odds_chosen": 9.524681091308594, + "log_odds_ratio": -0.00022812785755377263, + "logits/chosen": -0.8692194819450378, + "logits/rejected": -0.8909227252006531, + "logps/chosen": -0.006189709063619375, + "logps/rejected": -2.2828361988067627, + "loss": 1.7645, + "nll_loss": 0.44110339879989624, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006189708947204053, + "rewards/margins": 0.22766464948654175, + "rewards/rejected": -0.22828364372253418, + "step": 5069 + }, + { + "epoch": 3.5062240663900415, + "grad_norm": 10.580520629882812, + "learning_rate": 3.607653296449977e-05, + "log_odds_chosen": 9.021589279174805, + "log_odds_ratio": -0.008893512189388275, + "logits/chosen": -0.41797518730163574, + "logits/rejected": -0.5197737812995911, + "logps/chosen": -0.009041551500558853, + "logps/rejected": -2.1127045154571533, + "loss": 1.8537, + "nll_loss": 0.46254584193229675, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009041551384143531, + "rewards/margins": 0.21036630868911743, + "rewards/rejected": -0.21127045154571533, + "step": 5070 + }, + { + "epoch": 3.5069156293222683, + "grad_norm": 14.19862174987793, + "learning_rate": 3.607269094820962e-05, + "log_odds_chosen": 9.665410041809082, + "log_odds_ratio": -0.0405154712498188, + "logits/chosen": -0.4829082190990448, + "logits/rejected": -0.492472767829895, + "logps/chosen": -0.010864950716495514, + "logps/rejected": -1.8430681228637695, + "loss": 1.4125, + "nll_loss": 0.3490619659423828, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010864951182156801, + "rewards/margins": 0.18322032690048218, + "rewards/rejected": -0.184306800365448, + "step": 5071 + }, + { + "epoch": 3.507607192254495, + "grad_norm": 8.601861000061035, + "learning_rate": 3.606884893191947e-05, + "log_odds_chosen": 8.293171882629395, + "log_odds_ratio": -0.04796692728996277, + "logits/chosen": -0.47136473655700684, + "logits/rejected": -0.5330482721328735, + "logps/chosen": -0.01917835883796215, + "logps/rejected": -2.065481662750244, + "loss": 1.2136, + "nll_loss": 0.29859915375709534, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019178359070792794, + "rewards/margins": 0.20463034510612488, + "rewards/rejected": -0.20654819905757904, + "step": 5072 + }, + { + "epoch": 3.508298755186722, + "grad_norm": 6.478180408477783, + "learning_rate": 3.6065006915629325e-05, + "log_odds_chosen": 10.741048812866211, + "log_odds_ratio": -5.074698128737509e-05, + "logits/chosen": -0.8846883177757263, + "logits/rejected": -0.967951774597168, + "logps/chosen": -0.00030177010921761394, + "logps/rejected": -2.429570436477661, + "loss": 1.9351, + "nll_loss": 0.48377639055252075, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.017700873897411e-05, + "rewards/margins": 0.24292686581611633, + "rewards/rejected": -0.24295704066753387, + "step": 5073 + }, + { + "epoch": 3.508990318118949, + "grad_norm": 12.065322875976562, + "learning_rate": 3.606116489933917e-05, + "log_odds_chosen": 9.034112930297852, + "log_odds_ratio": -0.0008123770821839571, + "logits/chosen": -0.7525606155395508, + "logits/rejected": -0.7461023330688477, + "logps/chosen": -0.0008287005475722253, + "logps/rejected": -1.683483600616455, + "loss": 1.9323, + "nll_loss": 0.4829895496368408, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.287005039164796e-05, + "rewards/margins": 0.16826549172401428, + "rewards/rejected": -0.16834837198257446, + "step": 5074 + }, + { + "epoch": 3.5096818810511756, + "grad_norm": 14.989665031433105, + "learning_rate": 3.605732288304903e-05, + "log_odds_chosen": 8.394176483154297, + "log_odds_ratio": -0.0017240258166566491, + "logits/chosen": -0.3496403694152832, + "logits/rejected": -0.4731343984603882, + "logps/chosen": -0.023600636050105095, + "logps/rejected": -2.197500467300415, + "loss": 2.4449, + "nll_loss": 0.6110531091690063, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002360063372179866, + "rewards/margins": 0.21738998591899872, + "rewards/rejected": -0.2197500467300415, + "step": 5075 + }, + { + "epoch": 3.5103734439834025, + "grad_norm": 11.799775123596191, + "learning_rate": 3.6053480866758876e-05, + "log_odds_chosen": 8.778727531433105, + "log_odds_ratio": -0.0009676261688582599, + "logits/chosen": -0.5945629477500916, + "logits/rejected": -0.652061939239502, + "logps/chosen": -0.01023485790938139, + "logps/rejected": -1.929757833480835, + "loss": 1.6064, + "nll_loss": 0.40149986743927, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010234859073534608, + "rewards/margins": 0.19195228815078735, + "rewards/rejected": -0.19297577440738678, + "step": 5076 + }, + { + "epoch": 3.5110650069156293, + "grad_norm": 10.793294906616211, + "learning_rate": 3.604963885046873e-05, + "log_odds_chosen": 9.653995513916016, + "log_odds_ratio": -0.00015553759294562042, + "logits/chosen": -0.5547876954078674, + "logits/rejected": -0.6828795075416565, + "logps/chosen": -0.0003929367521777749, + "logps/rejected": -1.7646691799163818, + "loss": 1.3604, + "nll_loss": 0.34009164571762085, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9293678128160536e-05, + "rewards/margins": 0.17642761766910553, + "rewards/rejected": -0.1764669120311737, + "step": 5077 + }, + { + "epoch": 3.511756569847856, + "grad_norm": 11.926183700561523, + "learning_rate": 3.604579683417858e-05, + "log_odds_chosen": 10.17160415649414, + "log_odds_ratio": -8.280223846668378e-05, + "logits/chosen": -0.5874757766723633, + "logits/rejected": -0.6495798826217651, + "logps/chosen": -0.0005028080195188522, + "logps/rejected": -2.0479609966278076, + "loss": 0.914, + "nll_loss": 0.2284971922636032, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.028080704505555e-05, + "rewards/margins": 0.2047458291053772, + "rewards/rejected": -0.20479610562324524, + "step": 5078 + }, + { + "epoch": 3.512448132780083, + "grad_norm": 4.949942588806152, + "learning_rate": 3.6041954817888426e-05, + "log_odds_chosen": 7.430364608764648, + "log_odds_ratio": -0.14765413105487823, + "logits/chosen": -0.4910086989402771, + "logits/rejected": -0.535973072052002, + "logps/chosen": -0.07196828722953796, + "logps/rejected": -2.810218334197998, + "loss": 1.5335, + "nll_loss": 0.36859917640686035, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007196827791631222, + "rewards/margins": 0.2738250195980072, + "rewards/rejected": -0.2810218632221222, + "step": 5079 + }, + { + "epoch": 3.5131396957123098, + "grad_norm": 11.681784629821777, + "learning_rate": 3.603811280159828e-05, + "log_odds_chosen": 10.228755950927734, + "log_odds_ratio": -0.00011042998812627047, + "logits/chosen": -0.5658866763114929, + "logits/rejected": -0.6545494198799133, + "logps/chosen": -0.012667344883084297, + "logps/rejected": -3.262669086456299, + "loss": 1.9467, + "nll_loss": 0.48666873574256897, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012667345581576228, + "rewards/margins": 0.32500016689300537, + "rewards/rejected": -0.326266884803772, + "step": 5080 + }, + { + "epoch": 3.5138312586445366, + "grad_norm": 9.769969940185547, + "learning_rate": 3.603427078530813e-05, + "log_odds_chosen": 9.764698028564453, + "log_odds_ratio": -0.000422182260081172, + "logits/chosen": -0.966056227684021, + "logits/rejected": -1.006844162940979, + "logps/chosen": -0.0004481312062125653, + "logps/rejected": -1.9890567064285278, + "loss": 2.1247, + "nll_loss": 0.5311307311058044, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.48131249868311e-05, + "rewards/margins": 0.19886085391044617, + "rewards/rejected": -0.19890566170215607, + "step": 5081 + }, + { + "epoch": 3.5145228215767634, + "grad_norm": 9.379589080810547, + "learning_rate": 3.6030428769017984e-05, + "log_odds_chosen": 9.625581741333008, + "log_odds_ratio": -0.0015480243600904942, + "logits/chosen": -0.6199181079864502, + "logits/rejected": -0.7238527536392212, + "logps/chosen": -0.0013244760921224952, + "logps/rejected": -1.7693266868591309, + "loss": 1.1488, + "nll_loss": 0.287042498588562, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013244761794339865, + "rewards/margins": 0.1768002212047577, + "rewards/rejected": -0.1769326776266098, + "step": 5082 + }, + { + "epoch": 3.5152143845089903, + "grad_norm": 13.813467979431152, + "learning_rate": 3.602658675272783e-05, + "log_odds_chosen": 9.168367385864258, + "log_odds_ratio": -0.0006410049390979111, + "logits/chosen": -0.7326682806015015, + "logits/rejected": -0.7909855842590332, + "logps/chosen": -0.00048034184146672487, + "logps/rejected": -1.5736289024353027, + "loss": 1.9079, + "nll_loss": 0.476909339427948, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.803418414667249e-05, + "rewards/margins": 0.15731483697891235, + "rewards/rejected": -0.15736287832260132, + "step": 5083 + }, + { + "epoch": 3.515905947441217, + "grad_norm": 9.20576000213623, + "learning_rate": 3.602274473643769e-05, + "log_odds_chosen": 7.716304302215576, + "log_odds_ratio": -0.02225544862449169, + "logits/chosen": -0.3978271484375, + "logits/rejected": -0.4570864737033844, + "logps/chosen": -0.01456506922841072, + "logps/rejected": -1.4820979833602905, + "loss": 1.7357, + "nll_loss": 0.4317033290863037, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014565070159733295, + "rewards/margins": 0.14675329625606537, + "rewards/rejected": -0.148209810256958, + "step": 5084 + }, + { + "epoch": 3.516597510373444, + "grad_norm": 10.250093460083008, + "learning_rate": 3.6018902720147534e-05, + "log_odds_chosen": 9.908461570739746, + "log_odds_ratio": -0.00018903396266978234, + "logits/chosen": -0.9073819518089294, + "logits/rejected": -1.0101597309112549, + "logps/chosen": -0.0003365837619639933, + "logps/rejected": -1.800266981124878, + "loss": 1.4972, + "nll_loss": 0.37427830696105957, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.365837619639933e-05, + "rewards/margins": 0.17999303340911865, + "rewards/rejected": -0.18002669513225555, + "step": 5085 + }, + { + "epoch": 3.5172890733056708, + "grad_norm": 9.994810104370117, + "learning_rate": 3.601506070385739e-05, + "log_odds_chosen": 9.212892532348633, + "log_odds_ratio": -0.00037254547351039946, + "logits/chosen": -0.42903658747673035, + "logits/rejected": -0.5099354982376099, + "logps/chosen": -0.0033264392986893654, + "logps/rejected": -1.9749102592468262, + "loss": 1.2616, + "nll_loss": 0.31536275148391724, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00033264391822740436, + "rewards/margins": 0.19715839624404907, + "rewards/rejected": -0.19749103486537933, + "step": 5086 + }, + { + "epoch": 3.5179806362378976, + "grad_norm": 10.837930679321289, + "learning_rate": 3.601121868756724e-05, + "log_odds_chosen": 8.183601379394531, + "log_odds_ratio": -0.20187465846538544, + "logits/chosen": -0.7843718528747559, + "logits/rejected": -0.78766268491745, + "logps/chosen": -0.032734133303165436, + "logps/rejected": -1.520382046699524, + "loss": 1.5588, + "nll_loss": 0.36951279640197754, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003273413283750415, + "rewards/margins": 0.14876478910446167, + "rewards/rejected": -0.15203820168972015, + "step": 5087 + }, + { + "epoch": 3.5186721991701244, + "grad_norm": 11.267701148986816, + "learning_rate": 3.6007376671277085e-05, + "log_odds_chosen": 8.13360595703125, + "log_odds_ratio": -0.0014181515434756875, + "logits/chosen": -0.8068364858627319, + "logits/rejected": -0.7554510831832886, + "logps/chosen": -0.033745840191841125, + "logps/rejected": -2.1890408992767334, + "loss": 2.5046, + "nll_loss": 0.6260114908218384, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003374584252014756, + "rewards/margins": 0.21552950143814087, + "rewards/rejected": -0.21890407800674438, + "step": 5088 + }, + { + "epoch": 3.5193637621023512, + "grad_norm": 7.490444660186768, + "learning_rate": 3.600353465498694e-05, + "log_odds_chosen": 9.055780410766602, + "log_odds_ratio": -0.00031747232424095273, + "logits/chosen": -0.4991127848625183, + "logits/rejected": -0.6640005707740784, + "logps/chosen": -0.0036325249820947647, + "logps/rejected": -1.712687373161316, + "loss": 1.0071, + "nll_loss": 0.251755028963089, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00036325250403024256, + "rewards/margins": 0.1709054708480835, + "rewards/rejected": -0.17126873135566711, + "step": 5089 + }, + { + "epoch": 3.520055325034578, + "grad_norm": 10.06659984588623, + "learning_rate": 3.599969263869679e-05, + "log_odds_chosen": 7.548827648162842, + "log_odds_ratio": -0.00912503618746996, + "logits/chosen": -0.41075634956359863, + "logits/rejected": -0.44909027218818665, + "logps/chosen": -0.01701589860022068, + "logps/rejected": -1.6517646312713623, + "loss": 1.4971, + "nll_loss": 0.37337014079093933, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001701589790172875, + "rewards/margins": 0.1634748876094818, + "rewards/rejected": -0.16517646610736847, + "step": 5090 + }, + { + "epoch": 3.520746887966805, + "grad_norm": 10.759828567504883, + "learning_rate": 3.599585062240664e-05, + "log_odds_chosen": 9.490163803100586, + "log_odds_ratio": -0.00014201641897670925, + "logits/chosen": -0.6013484001159668, + "logits/rejected": -0.7783204913139343, + "logps/chosen": -0.000322447856888175, + "logps/rejected": -1.555269479751587, + "loss": 1.3042, + "nll_loss": 0.3260456323623657, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.224478496122174e-05, + "rewards/margins": 0.15549471974372864, + "rewards/rejected": -0.15552696585655212, + "step": 5091 + }, + { + "epoch": 3.5214384508990317, + "grad_norm": 13.803914070129395, + "learning_rate": 3.599200860611649e-05, + "log_odds_chosen": 9.127652168273926, + "log_odds_ratio": -0.00017640476289670914, + "logits/chosen": -0.8106200098991394, + "logits/rejected": -0.8595431447029114, + "logps/chosen": -0.0002514416119083762, + "logps/rejected": -1.1357730627059937, + "loss": 2.3148, + "nll_loss": 0.5786784887313843, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5144163373624906e-05, + "rewards/margins": 0.11355216801166534, + "rewards/rejected": -0.11357730627059937, + "step": 5092 + }, + { + "epoch": 3.5221300138312586, + "grad_norm": 14.12282657623291, + "learning_rate": 3.598816658982635e-05, + "log_odds_chosen": 8.557235717773438, + "log_odds_ratio": -0.002839646302163601, + "logits/chosen": -0.5079107880592346, + "logits/rejected": -0.6302869319915771, + "logps/chosen": -0.020289452746510506, + "logps/rejected": -1.8531651496887207, + "loss": 2.7005, + "nll_loss": 0.6748350262641907, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020289451349526644, + "rewards/margins": 0.18328757584095, + "rewards/rejected": -0.1853165179491043, + "step": 5093 + }, + { + "epoch": 3.5228215767634854, + "grad_norm": 6.537510395050049, + "learning_rate": 3.598432457353619e-05, + "log_odds_chosen": 8.796355247497559, + "log_odds_ratio": -0.00046882365131750703, + "logits/chosen": -0.4269121289253235, + "logits/rejected": -0.4743691682815552, + "logps/chosen": -0.014062023721635342, + "logps/rejected": -2.1716971397399902, + "loss": 1.4093, + "nll_loss": 0.35227110981941223, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014062024420127273, + "rewards/margins": 0.21576350927352905, + "rewards/rejected": -0.21716971695423126, + "step": 5094 + }, + { + "epoch": 3.5235131396957122, + "grad_norm": 12.389871597290039, + "learning_rate": 3.5980482557246045e-05, + "log_odds_chosen": 10.240442276000977, + "log_odds_ratio": -7.592760084662586e-05, + "logits/chosen": -0.3653874397277832, + "logits/rejected": -0.4159523844718933, + "logps/chosen": -0.0002882831613533199, + "logps/rejected": -1.8680007457733154, + "loss": 1.3335, + "nll_loss": 0.3333684802055359, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.882831722672563e-05, + "rewards/margins": 0.1867712438106537, + "rewards/rejected": -0.18680007755756378, + "step": 5095 + }, + { + "epoch": 3.524204702627939, + "grad_norm": 12.912257194519043, + "learning_rate": 3.59766405409559e-05, + "log_odds_chosen": 8.562950134277344, + "log_odds_ratio": -0.05261914059519768, + "logits/chosen": -0.639365017414093, + "logits/rejected": -0.649884819984436, + "logps/chosen": -0.012849587015807629, + "logps/rejected": -1.956565260887146, + "loss": 2.0593, + "nll_loss": 0.509568452835083, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012849586782976985, + "rewards/margins": 0.19437158107757568, + "rewards/rejected": -0.19565653800964355, + "step": 5096 + }, + { + "epoch": 3.524896265560166, + "grad_norm": 7.283536911010742, + "learning_rate": 3.597279852466574e-05, + "log_odds_chosen": 6.307476997375488, + "log_odds_ratio": -0.10233741253614426, + "logits/chosen": -0.6674084663391113, + "logits/rejected": -0.6511844396591187, + "logps/chosen": -0.019914401695132256, + "logps/rejected": -0.8803103566169739, + "loss": 1.5538, + "nll_loss": 0.3782210946083069, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0019914403092116117, + "rewards/margins": 0.08603960275650024, + "rewards/rejected": -0.08803103119134903, + "step": 5097 + }, + { + "epoch": 3.5255878284923927, + "grad_norm": 7.762702941894531, + "learning_rate": 3.5968956508375596e-05, + "log_odds_chosen": 6.637197971343994, + "log_odds_ratio": -0.07845356315374374, + "logits/chosen": -0.1810343861579895, + "logits/rejected": -0.2177562117576599, + "logps/chosen": -0.02767745964229107, + "logps/rejected": -1.5530402660369873, + "loss": 1.2597, + "nll_loss": 0.307077020406723, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0027677458710968494, + "rewards/margins": 0.1525362730026245, + "rewards/rejected": -0.15530402958393097, + "step": 5098 + }, + { + "epoch": 3.5262793914246195, + "grad_norm": 15.101137161254883, + "learning_rate": 3.596511449208545e-05, + "log_odds_chosen": 9.25841999053955, + "log_odds_ratio": -0.030140476301312447, + "logits/chosen": -0.21983087062835693, + "logits/rejected": -0.3168938457965851, + "logps/chosen": -0.019699474796652794, + "logps/rejected": -1.9712846279144287, + "loss": 1.3046, + "nll_loss": 0.3231399655342102, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019699474796652794, + "rewards/margins": 0.19515851140022278, + "rewards/rejected": -0.19712845981121063, + "step": 5099 + }, + { + "epoch": 3.5269709543568464, + "grad_norm": 9.824456214904785, + "learning_rate": 3.59612724757953e-05, + "log_odds_chosen": 8.969016075134277, + "log_odds_ratio": -0.00030478276312351227, + "logits/chosen": -0.588179886341095, + "logits/rejected": -0.6493480205535889, + "logps/chosen": -0.009981921873986721, + "logps/rejected": -1.593450665473938, + "loss": 1.727, + "nll_loss": 0.4317193627357483, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009981922339648008, + "rewards/margins": 0.15834686160087585, + "rewards/rejected": -0.15934506058692932, + "step": 5100 + }, + { + "epoch": 3.527662517289073, + "grad_norm": 6.7342963218688965, + "learning_rate": 3.5957430459505146e-05, + "log_odds_chosen": 9.023002624511719, + "log_odds_ratio": -0.0004063228552695364, + "logits/chosen": -0.6179463863372803, + "logits/rejected": -0.6786239147186279, + "logps/chosen": -0.0010950213763862848, + "logps/rejected": -1.3784172534942627, + "loss": 1.5397, + "nll_loss": 0.3848962187767029, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010950213618343696, + "rewards/margins": 0.13773223757743835, + "rewards/rejected": -0.13784173130989075, + "step": 5101 + }, + { + "epoch": 3.5283540802213, + "grad_norm": 13.047355651855469, + "learning_rate": 3.5953588443215005e-05, + "log_odds_chosen": 8.957096099853516, + "log_odds_ratio": -0.0010938331251963973, + "logits/chosen": -0.5095317363739014, + "logits/rejected": -0.5065636038780212, + "logps/chosen": -0.0016800828743726015, + "logps/rejected": -1.350773811340332, + "loss": 1.7472, + "nll_loss": 0.4366909861564636, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016800829325802624, + "rewards/margins": 0.13490936160087585, + "rewards/rejected": -0.13507738709449768, + "step": 5102 + }, + { + "epoch": 3.529045643153527, + "grad_norm": 11.464844703674316, + "learning_rate": 3.594974642692485e-05, + "log_odds_chosen": 10.505243301391602, + "log_odds_ratio": -5.506931120180525e-05, + "logits/chosen": -0.4430898427963257, + "logits/rejected": -0.49872028827667236, + "logps/chosen": -0.00032901056692935526, + "logps/rejected": -1.9516682624816895, + "loss": 1.4478, + "nll_loss": 0.3619363605976105, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.290105451014824e-05, + "rewards/margins": 0.19513392448425293, + "rewards/rejected": -0.19516682624816895, + "step": 5103 + }, + { + "epoch": 3.5297372060857537, + "grad_norm": 12.956223487854004, + "learning_rate": 3.5945904410634704e-05, + "log_odds_chosen": 9.188024520874023, + "log_odds_ratio": -0.004907457623630762, + "logits/chosen": -0.9995465874671936, + "logits/rejected": -1.0715291500091553, + "logps/chosen": -0.0015922407619655132, + "logps/rejected": -1.4828119277954102, + "loss": 1.6385, + "nll_loss": 0.40912729501724243, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015922407328616828, + "rewards/margins": 0.14812196791172028, + "rewards/rejected": -0.14828118681907654, + "step": 5104 + }, + { + "epoch": 3.5304287690179805, + "grad_norm": 7.3393025398254395, + "learning_rate": 3.5942062394344556e-05, + "log_odds_chosen": 7.815474033355713, + "log_odds_ratio": -0.009405778720974922, + "logits/chosen": -0.6533910036087036, + "logits/rejected": -0.6671908497810364, + "logps/chosen": -0.011968421749770641, + "logps/rejected": -2.025322437286377, + "loss": 1.7284, + "nll_loss": 0.43116769194602966, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011968421749770641, + "rewards/margins": 0.20133540034294128, + "rewards/rejected": -0.20253226161003113, + "step": 5105 + }, + { + "epoch": 3.5311203319502074, + "grad_norm": 11.78327465057373, + "learning_rate": 3.59382203780544e-05, + "log_odds_chosen": 10.24453353881836, + "log_odds_ratio": -8.006079588085413e-05, + "logits/chosen": -0.7616961002349854, + "logits/rejected": -0.8472107648849487, + "logps/chosen": -0.0006323190173134208, + "logps/rejected": -2.2396769523620605, + "loss": 1.3703, + "nll_loss": 0.3425724506378174, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.323190609691665e-05, + "rewards/margins": 0.22390446066856384, + "rewards/rejected": -0.22396771609783173, + "step": 5106 + }, + { + "epoch": 3.531811894882434, + "grad_norm": 10.847970962524414, + "learning_rate": 3.5934378361764254e-05, + "log_odds_chosen": 9.180673599243164, + "log_odds_ratio": -0.043109308928251266, + "logits/chosen": -1.0109539031982422, + "logits/rejected": -1.0703439712524414, + "logps/chosen": -0.016480334103107452, + "logps/rejected": -1.9816501140594482, + "loss": 1.8167, + "nll_loss": 0.44986581802368164, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016480335034430027, + "rewards/margins": 0.1965169608592987, + "rewards/rejected": -0.19816499948501587, + "step": 5107 + }, + { + "epoch": 3.532503457814661, + "grad_norm": 10.240374565124512, + "learning_rate": 3.5930536345474107e-05, + "log_odds_chosen": 8.178666114807129, + "log_odds_ratio": -0.006253361236304045, + "logits/chosen": -0.5043379664421082, + "logits/rejected": -0.5600912570953369, + "logps/chosen": -0.01136441994458437, + "logps/rejected": -1.7080068588256836, + "loss": 1.5304, + "nll_loss": 0.38196229934692383, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011364419478923082, + "rewards/margins": 0.16966424882411957, + "rewards/rejected": -0.17080068588256836, + "step": 5108 + }, + { + "epoch": 3.533195020746888, + "grad_norm": 8.874667167663574, + "learning_rate": 3.592669432918396e-05, + "log_odds_chosen": 9.3858642578125, + "log_odds_ratio": -0.00019358650024514645, + "logits/chosen": -0.4227873682975769, + "logits/rejected": -0.4874667823314667, + "logps/chosen": -0.001081955386325717, + "logps/rejected": -2.3752799034118652, + "loss": 1.5544, + "nll_loss": 0.388569176197052, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010819554154295474, + "rewards/margins": 0.23741981387138367, + "rewards/rejected": -0.2375280112028122, + "step": 5109 + }, + { + "epoch": 3.5338865836791147, + "grad_norm": 14.556818962097168, + "learning_rate": 3.5922852312893805e-05, + "log_odds_chosen": 10.08336067199707, + "log_odds_ratio": -8.728246029932052e-05, + "logits/chosen": -0.7301170229911804, + "logits/rejected": -0.7837525010108948, + "logps/chosen": -0.0014646199997514486, + "logps/rejected": -2.3359930515289307, + "loss": 1.7196, + "nll_loss": 0.429879754781723, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001464620145270601, + "rewards/margins": 0.23345284163951874, + "rewards/rejected": -0.23359929025173187, + "step": 5110 + }, + { + "epoch": 3.5345781466113415, + "grad_norm": 8.479273796081543, + "learning_rate": 3.5919010296603664e-05, + "log_odds_chosen": 8.859445571899414, + "log_odds_ratio": -0.000794129678979516, + "logits/chosen": -0.5082720518112183, + "logits/rejected": -0.5971063375473022, + "logps/chosen": -0.0007075598696246743, + "logps/rejected": -1.2506650686264038, + "loss": 1.7882, + "nll_loss": 0.4469757676124573, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.075598841765895e-05, + "rewards/margins": 0.12499573826789856, + "rewards/rejected": -0.12506650388240814, + "step": 5111 + }, + { + "epoch": 3.5352697095435683, + "grad_norm": 9.707127571105957, + "learning_rate": 3.591516828031351e-05, + "log_odds_chosen": 9.019890785217285, + "log_odds_ratio": -0.0011449077865108848, + "logits/chosen": -0.4791383147239685, + "logits/rejected": -0.4929508566856384, + "logps/chosen": -0.02153146266937256, + "logps/rejected": -1.8436179161071777, + "loss": 1.6368, + "nll_loss": 0.4090908467769623, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021531463135033846, + "rewards/margins": 0.18220864236354828, + "rewards/rejected": -0.1843617856502533, + "step": 5112 + }, + { + "epoch": 3.535961272475795, + "grad_norm": 7.732481002807617, + "learning_rate": 3.591132626402336e-05, + "log_odds_chosen": 6.761359691619873, + "log_odds_ratio": -0.133843794465065, + "logits/chosen": -0.1438131034374237, + "logits/rejected": -0.19129794836044312, + "logps/chosen": -0.02227877639234066, + "logps/rejected": -0.9083170294761658, + "loss": 2.0761, + "nll_loss": 0.505638599395752, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00222787749953568, + "rewards/margins": 0.08860382437705994, + "rewards/rejected": -0.0908316969871521, + "step": 5113 + }, + { + "epoch": 3.536652835408022, + "grad_norm": 11.014677047729492, + "learning_rate": 3.5907484247733214e-05, + "log_odds_chosen": 9.456984519958496, + "log_odds_ratio": -0.0004772770043928176, + "logits/chosen": -0.256796658039093, + "logits/rejected": -0.3246535062789917, + "logps/chosen": -0.006421164143830538, + "logps/rejected": -2.324803352355957, + "loss": 1.4841, + "nll_loss": 0.37098228931427, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006421164725907147, + "rewards/margins": 0.23183822631835938, + "rewards/rejected": -0.23248033225536346, + "step": 5114 + }, + { + "epoch": 3.537344398340249, + "grad_norm": 8.51115894317627, + "learning_rate": 3.590364223144306e-05, + "log_odds_chosen": 9.182951927185059, + "log_odds_ratio": -0.0006045700865797698, + "logits/chosen": -0.43073856830596924, + "logits/rejected": -0.5255957245826721, + "logps/chosen": -0.016624998301267624, + "logps/rejected": -2.1651108264923096, + "loss": 1.2989, + "nll_loss": 0.32465484738349915, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016624998534098268, + "rewards/margins": 0.2148485779762268, + "rewards/rejected": -0.2165111005306244, + "step": 5115 + }, + { + "epoch": 3.5380359612724757, + "grad_norm": 14.479390144348145, + "learning_rate": 3.589980021515291e-05, + "log_odds_chosen": 8.885518074035645, + "log_odds_ratio": -0.006611211225390434, + "logits/chosen": -0.7234517335891724, + "logits/rejected": -0.751315712928772, + "logps/chosen": -0.050526347011327744, + "logps/rejected": -1.8813791275024414, + "loss": 1.4642, + "nll_loss": 0.3653944432735443, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005052634514868259, + "rewards/margins": 0.18308529257774353, + "rewards/rejected": -0.1881379336118698, + "step": 5116 + }, + { + "epoch": 3.5387275242047025, + "grad_norm": 12.264790534973145, + "learning_rate": 3.5895958198862765e-05, + "log_odds_chosen": 8.67015266418457, + "log_odds_ratio": -0.0009165530791506171, + "logits/chosen": -0.7622514367103577, + "logits/rejected": -0.8170218467712402, + "logps/chosen": -0.01521426159888506, + "logps/rejected": -1.7560782432556152, + "loss": 2.9703, + "nll_loss": 0.7424764633178711, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015214262530207634, + "rewards/margins": 0.17408640682697296, + "rewards/rejected": -0.175607830286026, + "step": 5117 + }, + { + "epoch": 3.5394190871369293, + "grad_norm": 4.114010810852051, + "learning_rate": 3.589211618257262e-05, + "log_odds_chosen": 7.7912750244140625, + "log_odds_ratio": -0.0045229410752654076, + "logits/chosen": -0.5681171417236328, + "logits/rejected": -0.5650732517242432, + "logps/chosen": -0.011351736262440681, + "logps/rejected": -1.4451227188110352, + "loss": 1.6012, + "nll_loss": 0.39984703063964844, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011351736029610038, + "rewards/margins": 0.14337711036205292, + "rewards/rejected": -0.14451228082180023, + "step": 5118 + }, + { + "epoch": 3.540110650069156, + "grad_norm": 10.561309814453125, + "learning_rate": 3.588827416628246e-05, + "log_odds_chosen": 8.318758010864258, + "log_odds_ratio": -0.23881548643112183, + "logits/chosen": -0.27856093645095825, + "logits/rejected": -0.3536270558834076, + "logps/chosen": -0.04434996470808983, + "logps/rejected": -1.96640944480896, + "loss": 1.41, + "nll_loss": 0.32863014936447144, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004434996284544468, + "rewards/margins": 0.19220595061779022, + "rewards/rejected": -0.1966409534215927, + "step": 5119 + }, + { + "epoch": 3.540802213001383, + "grad_norm": 10.838645935058594, + "learning_rate": 3.588443214999232e-05, + "log_odds_chosen": 10.16311264038086, + "log_odds_ratio": -0.00012633662845473737, + "logits/chosen": -0.5232207775115967, + "logits/rejected": -0.5633898973464966, + "logps/chosen": -0.00033453942160122097, + "logps/rejected": -2.079082489013672, + "loss": 1.371, + "nll_loss": 0.34272632002830505, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.345394361531362e-05, + "rewards/margins": 0.20787480473518372, + "rewards/rejected": -0.2079082578420639, + "step": 5120 + }, + { + "epoch": 3.54149377593361, + "grad_norm": 6.905520915985107, + "learning_rate": 3.588059013370217e-05, + "log_odds_chosen": 8.714499473571777, + "log_odds_ratio": -0.0032024341635406017, + "logits/chosen": -0.12589719891548157, + "logits/rejected": -0.18861685693264008, + "logps/chosen": -0.0030687712132930756, + "logps/rejected": -1.5923564434051514, + "loss": 1.1677, + "nll_loss": 0.29160743951797485, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00030687713297083974, + "rewards/margins": 0.158928781747818, + "rewards/rejected": -0.1592356562614441, + "step": 5121 + }, + { + "epoch": 3.5421853388658366, + "grad_norm": 8.405205726623535, + "learning_rate": 3.587674811741202e-05, + "log_odds_chosen": 8.597480773925781, + "log_odds_ratio": -0.0008526691817678511, + "logits/chosen": -0.4619186818599701, + "logits/rejected": -0.5468069314956665, + "logps/chosen": -0.0013205332215875387, + "logps/rejected": -1.2612684965133667, + "loss": 1.6883, + "nll_loss": 0.42197930812835693, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013205331924837083, + "rewards/margins": 0.12599480152130127, + "rewards/rejected": -0.12612685561180115, + "step": 5122 + }, + { + "epoch": 3.5428769017980635, + "grad_norm": 9.318670272827148, + "learning_rate": 3.587290610112187e-05, + "log_odds_chosen": 6.977090358734131, + "log_odds_ratio": -0.194001704454422, + "logits/chosen": -0.2855735123157501, + "logits/rejected": -0.33359235525131226, + "logps/chosen": -0.06837824732065201, + "logps/rejected": -1.256178855895996, + "loss": 1.4145, + "nll_loss": 0.33423250913619995, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006837825290858746, + "rewards/margins": 0.11878006160259247, + "rewards/rejected": -0.1256178915500641, + "step": 5123 + }, + { + "epoch": 3.5435684647302903, + "grad_norm": 8.318540573120117, + "learning_rate": 3.586906408483172e-05, + "log_odds_chosen": 9.18307113647461, + "log_odds_ratio": -0.0005332131404429674, + "logits/chosen": 0.021685736253857613, + "logits/rejected": -0.05196130648255348, + "logps/chosen": -0.010211940854787827, + "logps/rejected": -2.656543731689453, + "loss": 1.3642, + "nll_loss": 0.34099307656288147, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001021194038912654, + "rewards/margins": 0.2646331787109375, + "rewards/rejected": -0.26565438508987427, + "step": 5124 + }, + { + "epoch": 3.544260027662517, + "grad_norm": 16.354997634887695, + "learning_rate": 3.586522206854157e-05, + "log_odds_chosen": 8.942000389099121, + "log_odds_ratio": -0.015276739373803139, + "logits/chosen": -0.3055412173271179, + "logits/rejected": -0.3241763412952423, + "logps/chosen": -0.011312441900372505, + "logps/rejected": -2.309622287750244, + "loss": 2.0613, + "nll_loss": 0.5137892961502075, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011312442366033792, + "rewards/margins": 0.22983098030090332, + "rewards/rejected": -0.23096221685409546, + "step": 5125 + }, + { + "epoch": 3.544951590594744, + "grad_norm": 8.887134552001953, + "learning_rate": 3.5861380052251423e-05, + "log_odds_chosen": 10.164998054504395, + "log_odds_ratio": -0.00015315644850488752, + "logits/chosen": -0.24806389212608337, + "logits/rejected": -0.3355085253715515, + "logps/chosen": -0.0041093104518949986, + "logps/rejected": -2.6971685886383057, + "loss": 1.6534, + "nll_loss": 0.41333335638046265, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004109310102649033, + "rewards/margins": 0.269305944442749, + "rewards/rejected": -0.26971685886383057, + "step": 5126 + }, + { + "epoch": 3.545643153526971, + "grad_norm": 7.772130489349365, + "learning_rate": 3.5857538035961276e-05, + "log_odds_chosen": 10.5562744140625, + "log_odds_ratio": -7.292352529475465e-05, + "logits/chosen": -0.6387104988098145, + "logits/rejected": -0.6942251324653625, + "logps/chosen": -0.0001461450883653015, + "logps/rejected": -1.6912575960159302, + "loss": 1.3432, + "nll_loss": 0.33579444885253906, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4614510291721672e-05, + "rewards/margins": 0.16911114752292633, + "rewards/rejected": -0.1691257655620575, + "step": 5127 + }, + { + "epoch": 3.5463347164591976, + "grad_norm": 11.181296348571777, + "learning_rate": 3.585369601967112e-05, + "log_odds_chosen": 9.056486129760742, + "log_odds_ratio": -0.017171263694763184, + "logits/chosen": -0.06890146434307098, + "logits/rejected": -0.08440100401639938, + "logps/chosen": -0.01895216852426529, + "logps/rejected": -2.131861448287964, + "loss": 2.0641, + "nll_loss": 0.5143003463745117, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018952169921249151, + "rewards/margins": 0.21129092574119568, + "rewards/rejected": -0.2131861448287964, + "step": 5128 + }, + { + "epoch": 3.5470262793914245, + "grad_norm": 14.617752075195312, + "learning_rate": 3.584985400338098e-05, + "log_odds_chosen": 9.516202926635742, + "log_odds_ratio": -0.0002230665850220248, + "logits/chosen": -0.39011046290397644, + "logits/rejected": -0.5370066165924072, + "logps/chosen": -0.0019659469835460186, + "logps/rejected": -2.265226364135742, + "loss": 1.3383, + "nll_loss": 0.3345586061477661, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001965947012649849, + "rewards/margins": 0.22632606327533722, + "rewards/rejected": -0.22652265429496765, + "step": 5129 + }, + { + "epoch": 3.5477178423236513, + "grad_norm": 8.726752281188965, + "learning_rate": 3.5846011987090826e-05, + "log_odds_chosen": 10.083745002746582, + "log_odds_ratio": -9.456132829654962e-05, + "logits/chosen": -0.736955463886261, + "logits/rejected": -0.8158849477767944, + "logps/chosen": -0.0003738811647053808, + "logps/rejected": -1.9998457431793213, + "loss": 1.202, + "nll_loss": 0.30050134658813477, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7388119380921125e-05, + "rewards/margins": 0.19994717836380005, + "rewards/rejected": -0.1999845653772354, + "step": 5130 + }, + { + "epoch": 3.548409405255878, + "grad_norm": 14.666512489318848, + "learning_rate": 3.584216997080068e-05, + "log_odds_chosen": 9.23165512084961, + "log_odds_ratio": -0.002063015243038535, + "logits/chosen": -0.5247082114219666, + "logits/rejected": -0.5676276683807373, + "logps/chosen": -0.020228393375873566, + "logps/rejected": -2.463697671890259, + "loss": 1.8779, + "nll_loss": 0.4692714214324951, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020228393841534853, + "rewards/margins": 0.24434691667556763, + "rewards/rejected": -0.24636974930763245, + "step": 5131 + }, + { + "epoch": 3.549100968188105, + "grad_norm": 7.39258337020874, + "learning_rate": 3.583832795451053e-05, + "log_odds_chosen": 8.629104614257812, + "log_odds_ratio": -0.005373574793338776, + "logits/chosen": -0.44675952196121216, + "logits/rejected": -0.4921689033508301, + "logps/chosen": -0.008275295607745647, + "logps/rejected": -2.400135040283203, + "loss": 1.2359, + "nll_loss": 0.3084494471549988, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00082752964226529, + "rewards/margins": 0.23918597400188446, + "rewards/rejected": -0.2400135099887848, + "step": 5132 + }, + { + "epoch": 3.5497925311203318, + "grad_norm": 10.347201347351074, + "learning_rate": 3.583448593822038e-05, + "log_odds_chosen": 9.661993980407715, + "log_odds_ratio": -0.00028375154943205416, + "logits/chosen": -0.5061460733413696, + "logits/rejected": -0.5724334716796875, + "logps/chosen": -0.0007418065215460956, + "logps/rejected": -1.877603530883789, + "loss": 1.4939, + "nll_loss": 0.373445987701416, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.418065797537565e-05, + "rewards/margins": 0.18768617510795593, + "rewards/rejected": -0.1877603530883789, + "step": 5133 + }, + { + "epoch": 3.5504840940525586, + "grad_norm": 7.307807445526123, + "learning_rate": 3.583064392193023e-05, + "log_odds_chosen": 9.501052856445312, + "log_odds_ratio": -0.017502374947071075, + "logits/chosen": -0.662177324295044, + "logits/rejected": -0.6623290181159973, + "logps/chosen": -0.00805945135653019, + "logps/rejected": -1.526928186416626, + "loss": 1.5985, + "nll_loss": 0.3978814482688904, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000805945077445358, + "rewards/margins": 0.15188688039779663, + "rewards/rejected": -0.15269280970096588, + "step": 5134 + }, + { + "epoch": 3.5511756569847854, + "grad_norm": 8.619414329528809, + "learning_rate": 3.582680190564008e-05, + "log_odds_chosen": 5.690260887145996, + "log_odds_ratio": -0.12367193400859833, + "logits/chosen": -0.5978754758834839, + "logits/rejected": -0.6153057217597961, + "logps/chosen": -0.041250962764024734, + "logps/rejected": -1.0321168899536133, + "loss": 2.0047, + "nll_loss": 0.48880013823509216, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004125096369534731, + "rewards/margins": 0.09908659756183624, + "rewards/rejected": -0.10321169346570969, + "step": 5135 + }, + { + "epoch": 3.5518672199170123, + "grad_norm": 11.130823135375977, + "learning_rate": 3.5822959889349934e-05, + "log_odds_chosen": 8.389826774597168, + "log_odds_ratio": -0.11120603233575821, + "logits/chosen": -0.821804940700531, + "logits/rejected": -0.8347532749176025, + "logps/chosen": -0.018633171916007996, + "logps/rejected": -1.5349326133728027, + "loss": 1.6964, + "nll_loss": 0.41298729181289673, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0018633169820532203, + "rewards/margins": 0.1516299545764923, + "rewards/rejected": -0.1534932553768158, + "step": 5136 + }, + { + "epoch": 3.552558782849239, + "grad_norm": 9.259598731994629, + "learning_rate": 3.581911787305978e-05, + "log_odds_chosen": 9.88913345336914, + "log_odds_ratio": -0.006824263371527195, + "logits/chosen": -0.684761106967926, + "logits/rejected": -0.6901488900184631, + "logps/chosen": -0.005095028318464756, + "logps/rejected": -2.158032178878784, + "loss": 1.3891, + "nll_loss": 0.3465915620326996, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005095028318464756, + "rewards/margins": 0.21529372036457062, + "rewards/rejected": -0.21580322086811066, + "step": 5137 + }, + { + "epoch": 3.553250345781466, + "grad_norm": 14.307619094848633, + "learning_rate": 3.581527585676964e-05, + "log_odds_chosen": 8.201597213745117, + "log_odds_ratio": -0.05911998078227043, + "logits/chosen": -0.6577879190444946, + "logits/rejected": -0.756862461566925, + "logps/chosen": -0.012355667538940907, + "logps/rejected": -1.847043514251709, + "loss": 1.7893, + "nll_loss": 0.44140928983688354, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012355668004602194, + "rewards/margins": 0.1834687888622284, + "rewards/rejected": -0.18470436334609985, + "step": 5138 + }, + { + "epoch": 3.5539419087136928, + "grad_norm": 8.32098388671875, + "learning_rate": 3.5811433840479485e-05, + "log_odds_chosen": 8.830156326293945, + "log_odds_ratio": -0.036551717668771744, + "logits/chosen": -0.6407181620597839, + "logits/rejected": -0.6718611717224121, + "logps/chosen": -0.016496429219841957, + "logps/rejected": -2.304805040359497, + "loss": 1.4094, + "nll_loss": 0.348699152469635, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001649643061682582, + "rewards/margins": 0.22883085906505585, + "rewards/rejected": -0.23048050701618195, + "step": 5139 + }, + { + "epoch": 3.5546334716459196, + "grad_norm": 26.237749099731445, + "learning_rate": 3.580759182418934e-05, + "log_odds_chosen": 9.193836212158203, + "log_odds_ratio": -0.00029868149431422353, + "logits/chosen": -0.5012434720993042, + "logits/rejected": -0.5276903510093689, + "logps/chosen": -0.008629154413938522, + "logps/rejected": -2.401827573776245, + "loss": 1.7928, + "nll_loss": 0.44816479086875916, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008629155345261097, + "rewards/margins": 0.2393198311328888, + "rewards/rejected": -0.24018274247646332, + "step": 5140 + }, + { + "epoch": 3.5553250345781464, + "grad_norm": 10.040083885192871, + "learning_rate": 3.580374980789919e-05, + "log_odds_chosen": 8.394232749938965, + "log_odds_ratio": -0.004813689272850752, + "logits/chosen": -0.5562333464622498, + "logits/rejected": -0.683615505695343, + "logps/chosen": -0.0046744076535105705, + "logps/rejected": -1.7374382019042969, + "loss": 1.2966, + "nll_loss": 0.323678582906723, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004674407828133553, + "rewards/margins": 0.1732763797044754, + "rewards/rejected": -0.1737438142299652, + "step": 5141 + }, + { + "epoch": 3.5560165975103732, + "grad_norm": 11.459824562072754, + "learning_rate": 3.5799907791609035e-05, + "log_odds_chosen": 8.987030982971191, + "log_odds_ratio": -0.0007204932626336813, + "logits/chosen": -0.7068724036216736, + "logits/rejected": -0.6982411742210388, + "logps/chosen": -0.0034352538641542196, + "logps/rejected": -1.6314517259597778, + "loss": 2.0082, + "nll_loss": 0.5019901990890503, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003435253456700593, + "rewards/margins": 0.16280165314674377, + "rewards/rejected": -0.16314518451690674, + "step": 5142 + }, + { + "epoch": 3.5567081604426, + "grad_norm": 13.1761474609375, + "learning_rate": 3.579606577531889e-05, + "log_odds_chosen": 7.134213447570801, + "log_odds_ratio": -0.07483165711164474, + "logits/chosen": -0.6867763996124268, + "logits/rejected": -0.5705421566963196, + "logps/chosen": -0.08776207268238068, + "logps/rejected": -1.3543072938919067, + "loss": 1.5354, + "nll_loss": 0.3763653039932251, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008776207454502583, + "rewards/margins": 0.1266545206308365, + "rewards/rejected": -0.1354307234287262, + "step": 5143 + }, + { + "epoch": 3.557399723374827, + "grad_norm": 8.90782356262207, + "learning_rate": 3.579222375902874e-05, + "log_odds_chosen": 9.829313278198242, + "log_odds_ratio": -0.00010671962081687525, + "logits/chosen": -0.4369320273399353, + "logits/rejected": -0.5152650475502014, + "logps/chosen": -0.0003849728964269161, + "logps/rejected": -1.4018714427947998, + "loss": 2.5479, + "nll_loss": 0.6369690299034119, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8497295463457704e-05, + "rewards/margins": 0.14014865458011627, + "rewards/rejected": -0.14018715918064117, + "step": 5144 + }, + { + "epoch": 3.5580912863070537, + "grad_norm": 13.46090316772461, + "learning_rate": 3.578838174273859e-05, + "log_odds_chosen": 7.671215534210205, + "log_odds_ratio": -0.21245524287223816, + "logits/chosen": -0.6361114978790283, + "logits/rejected": -0.6454745531082153, + "logps/chosen": -0.029578909277915955, + "logps/rejected": -1.829190969467163, + "loss": 1.7448, + "nll_loss": 0.4149635434150696, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002957891207188368, + "rewards/margins": 0.1799612194299698, + "rewards/rejected": -0.18291908502578735, + "step": 5145 + }, + { + "epoch": 3.5587828492392806, + "grad_norm": 8.752176284790039, + "learning_rate": 3.578453972644844e-05, + "log_odds_chosen": 7.500901699066162, + "log_odds_ratio": -0.014364222064614296, + "logits/chosen": -0.6340488791465759, + "logits/rejected": -0.5995294451713562, + "logps/chosen": -0.027720659971237183, + "logps/rejected": -1.7301421165466309, + "loss": 1.8421, + "nll_loss": 0.45909082889556885, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002772066043689847, + "rewards/margins": 0.17024214565753937, + "rewards/rejected": -0.17301422357559204, + "step": 5146 + }, + { + "epoch": 3.5594744121715074, + "grad_norm": 12.609339714050293, + "learning_rate": 3.57806977101583e-05, + "log_odds_chosen": 9.670016288757324, + "log_odds_ratio": -0.03675759211182594, + "logits/chosen": -0.4056617319583893, + "logits/rejected": -0.433749794960022, + "logps/chosen": -0.009829879738390446, + "logps/rejected": -2.218677520751953, + "loss": 1.6728, + "nll_loss": 0.41451871395111084, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009829880436882377, + "rewards/margins": 0.2208847850561142, + "rewards/rejected": -0.22186775505542755, + "step": 5147 + }, + { + "epoch": 3.5601659751037342, + "grad_norm": 11.2356595993042, + "learning_rate": 3.577685569386814e-05, + "log_odds_chosen": 9.592318534851074, + "log_odds_ratio": -0.0001454985758755356, + "logits/chosen": -0.5702686309814453, + "logits/rejected": -0.7230437397956848, + "logps/chosen": -0.0006389496265910566, + "logps/rejected": -1.6784694194793701, + "loss": 1.3064, + "nll_loss": 0.326596200466156, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.389496411429718e-05, + "rewards/margins": 0.16778305172920227, + "rewards/rejected": -0.1678469479084015, + "step": 5148 + }, + { + "epoch": 3.560857538035961, + "grad_norm": 7.388978004455566, + "learning_rate": 3.5773013677577996e-05, + "log_odds_chosen": 8.739555358886719, + "log_odds_ratio": -0.00031445466447621584, + "logits/chosen": -0.7255094051361084, + "logits/rejected": -0.6713707447052002, + "logps/chosen": -0.0004937859484925866, + "logps/rejected": -1.1414120197296143, + "loss": 1.2413, + "nll_loss": 0.3102880120277405, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.937859557685442e-05, + "rewards/margins": 0.11409182101488113, + "rewards/rejected": -0.11414120346307755, + "step": 5149 + }, + { + "epoch": 3.561549100968188, + "grad_norm": 9.674239158630371, + "learning_rate": 3.576917166128785e-05, + "log_odds_chosen": 9.749626159667969, + "log_odds_ratio": -0.003634081920608878, + "logits/chosen": -0.3100474774837494, + "logits/rejected": -0.35088616609573364, + "logps/chosen": -0.006417962722480297, + "logps/rejected": -1.842191457748413, + "loss": 1.4193, + "nll_loss": 0.354466050863266, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006417962722480297, + "rewards/margins": 0.18357735872268677, + "rewards/rejected": -0.1842191517353058, + "step": 5150 + }, + { + "epoch": 3.5622406639004147, + "grad_norm": 13.221283912658691, + "learning_rate": 3.5765329644997694e-05, + "log_odds_chosen": 7.550267219543457, + "log_odds_ratio": -0.1355026811361313, + "logits/chosen": -0.592994749546051, + "logits/rejected": -0.7096748352050781, + "logps/chosen": -0.03942836821079254, + "logps/rejected": -1.6778159141540527, + "loss": 1.7986, + "nll_loss": 0.43610402941703796, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003942836541682482, + "rewards/margins": 0.16383875906467438, + "rewards/rejected": -0.16778159141540527, + "step": 5151 + }, + { + "epoch": 3.5629322268326415, + "grad_norm": 6.515930652618408, + "learning_rate": 3.5761487628707546e-05, + "log_odds_chosen": 8.004266738891602, + "log_odds_ratio": -0.002393821021541953, + "logits/chosen": -0.533973217010498, + "logits/rejected": -0.6153483390808105, + "logps/chosen": -0.02263014018535614, + "logps/rejected": -1.9999516010284424, + "loss": 1.2658, + "nll_loss": 0.31619948148727417, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0022630139719694853, + "rewards/margins": 0.1977321356534958, + "rewards/rejected": -0.19999516010284424, + "step": 5152 + }, + { + "epoch": 3.5636237897648684, + "grad_norm": 10.917157173156738, + "learning_rate": 3.57576456124174e-05, + "log_odds_chosen": 10.014253616333008, + "log_odds_ratio": -7.547252607764676e-05, + "logits/chosen": -0.6091383695602417, + "logits/rejected": -0.6481549143791199, + "logps/chosen": -0.0008129151538014412, + "logps/rejected": -2.1516170501708984, + "loss": 1.9984, + "nll_loss": 0.49959611892700195, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.129151683533564e-05, + "rewards/margins": 0.21508042514324188, + "rewards/rejected": -0.21516172587871552, + "step": 5153 + }, + { + "epoch": 3.564315352697095, + "grad_norm": 5.88906192779541, + "learning_rate": 3.575380359612725e-05, + "log_odds_chosen": 8.342607498168945, + "log_odds_ratio": -0.00917066354304552, + "logits/chosen": -0.7415870428085327, + "logits/rejected": -0.7444044947624207, + "logps/chosen": -0.020944029092788696, + "logps/rejected": -2.0400819778442383, + "loss": 1.2578, + "nll_loss": 0.31353580951690674, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002094402676448226, + "rewards/margins": 0.2019137740135193, + "rewards/rejected": -0.20400819182395935, + "step": 5154 + }, + { + "epoch": 3.565006915629322, + "grad_norm": 9.904231071472168, + "learning_rate": 3.57499615798371e-05, + "log_odds_chosen": 8.931983947753906, + "log_odds_ratio": -0.0025517181493341923, + "logits/chosen": -0.21259820461273193, + "logits/rejected": -0.3430328369140625, + "logps/chosen": -0.01386320125311613, + "logps/rejected": -2.474242687225342, + "loss": 2.273, + "nll_loss": 0.5680056810379028, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013863201020285487, + "rewards/margins": 0.24603793025016785, + "rewards/rejected": -0.24742424488067627, + "step": 5155 + }, + { + "epoch": 3.565698478561549, + "grad_norm": 8.710456848144531, + "learning_rate": 3.5746119563546956e-05, + "log_odds_chosen": 8.032377243041992, + "log_odds_ratio": -0.027574822306632996, + "logits/chosen": -0.6033883094787598, + "logits/rejected": -0.6674955487251282, + "logps/chosen": -0.018795570358633995, + "logps/rejected": -2.009221315383911, + "loss": 1.6532, + "nll_loss": 0.41054731607437134, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018795570358633995, + "rewards/margins": 0.19904258847236633, + "rewards/rejected": -0.2009221315383911, + "step": 5156 + }, + { + "epoch": 3.5663900414937757, + "grad_norm": 11.037312507629395, + "learning_rate": 3.57422775472568e-05, + "log_odds_chosen": 7.575757026672363, + "log_odds_ratio": -0.04499872028827667, + "logits/chosen": -0.4731323719024658, + "logits/rejected": -0.5606644153594971, + "logps/chosen": -0.024044960737228394, + "logps/rejected": -1.340688943862915, + "loss": 2.1447, + "nll_loss": 0.5316750407218933, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002404496306553483, + "rewards/margins": 0.13166441023349762, + "rewards/rejected": -0.13406890630722046, + "step": 5157 + }, + { + "epoch": 3.5670816044260025, + "grad_norm": 6.969189167022705, + "learning_rate": 3.5738435530966654e-05, + "log_odds_chosen": 8.482478141784668, + "log_odds_ratio": -0.04103895649313927, + "logits/chosen": -0.3462011218070984, + "logits/rejected": -0.3915109932422638, + "logps/chosen": -0.00928918831050396, + "logps/rejected": -1.1896979808807373, + "loss": 1.4382, + "nll_loss": 0.35545119643211365, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009289190056733787, + "rewards/margins": 0.11804087460041046, + "rewards/rejected": -0.11896979808807373, + "step": 5158 + }, + { + "epoch": 3.5677731673582294, + "grad_norm": 12.088088035583496, + "learning_rate": 3.5734593514676507e-05, + "log_odds_chosen": 9.807308197021484, + "log_odds_ratio": -0.002077706390991807, + "logits/chosen": -0.31680384278297424, + "logits/rejected": -0.40869811177253723, + "logps/chosen": -0.0016392001416534185, + "logps/rejected": -2.416490077972412, + "loss": 1.5806, + "nll_loss": 0.3949546813964844, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016392001998610795, + "rewards/margins": 0.2414850890636444, + "rewards/rejected": -0.24164903163909912, + "step": 5159 + }, + { + "epoch": 3.568464730290456, + "grad_norm": 6.884808540344238, + "learning_rate": 3.573075149838635e-05, + "log_odds_chosen": 9.909006118774414, + "log_odds_ratio": -0.0002003078261623159, + "logits/chosen": -0.350724995136261, + "logits/rejected": -0.4701218605041504, + "logps/chosen": -0.0006381743587553501, + "logps/rejected": -1.6747055053710938, + "loss": 1.8905, + "nll_loss": 0.4726088047027588, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.381743878591806e-05, + "rewards/margins": 0.16740673780441284, + "rewards/rejected": -0.1674705445766449, + "step": 5160 + }, + { + "epoch": 3.569156293222683, + "grad_norm": 9.014147758483887, + "learning_rate": 3.5726909482096205e-05, + "log_odds_chosen": 8.882246971130371, + "log_odds_ratio": -0.0020518959499895573, + "logits/chosen": -0.4286271333694458, + "logits/rejected": -0.40721285343170166, + "logps/chosen": -0.0014548527542501688, + "logps/rejected": -1.1933650970458984, + "loss": 1.1218, + "nll_loss": 0.28023436665534973, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014548527542501688, + "rewards/margins": 0.11919102817773819, + "rewards/rejected": -0.11933650076389313, + "step": 5161 + }, + { + "epoch": 3.56984785615491, + "grad_norm": 7.56203031539917, + "learning_rate": 3.572306746580606e-05, + "log_odds_chosen": 9.380385398864746, + "log_odds_ratio": -0.0003798382531385869, + "logits/chosen": -0.7221666574478149, + "logits/rejected": -0.7379388809204102, + "logps/chosen": -0.004587444942444563, + "logps/rejected": -1.950721263885498, + "loss": 2.0756, + "nll_loss": 0.5188639163970947, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004587445582728833, + "rewards/margins": 0.19461336731910706, + "rewards/rejected": -0.19507211446762085, + "step": 5162 + }, + { + "epoch": 3.5705394190871367, + "grad_norm": 7.279213905334473, + "learning_rate": 3.571922544951591e-05, + "log_odds_chosen": 9.02218246459961, + "log_odds_ratio": -0.013631554320454597, + "logits/chosen": -0.7051505446434021, + "logits/rejected": -0.748414158821106, + "logps/chosen": -0.011888135224580765, + "logps/rejected": -1.3814345598220825, + "loss": 1.6526, + "nll_loss": 0.4117904007434845, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011888135923072696, + "rewards/margins": 0.1369546353816986, + "rewards/rejected": -0.13814346492290497, + "step": 5163 + }, + { + "epoch": 3.5712309820193635, + "grad_norm": 4.712618827819824, + "learning_rate": 3.5715383433225755e-05, + "log_odds_chosen": 7.798961162567139, + "log_odds_ratio": -0.025595782324671745, + "logits/chosen": -0.30597984790802, + "logits/rejected": -0.3332618772983551, + "logps/chosen": -0.027721570804715157, + "logps/rejected": -1.0935512781143188, + "loss": 2.3449, + "nll_loss": 0.583656907081604, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0027721570804715157, + "rewards/margins": 0.10658296942710876, + "rewards/rejected": -0.10935512185096741, + "step": 5164 + }, + { + "epoch": 3.5719225449515903, + "grad_norm": 12.295555114746094, + "learning_rate": 3.5711541416935614e-05, + "log_odds_chosen": 11.032272338867188, + "log_odds_ratio": -3.914807894034311e-05, + "logits/chosen": -0.5761994123458862, + "logits/rejected": -0.5990482568740845, + "logps/chosen": -0.00012401210551615804, + "logps/rejected": -2.0997300148010254, + "loss": 1.7092, + "nll_loss": 0.4272838532924652, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2401211279211566e-05, + "rewards/margins": 0.20996060967445374, + "rewards/rejected": -0.20997300744056702, + "step": 5165 + }, + { + "epoch": 3.572614107883817, + "grad_norm": 8.280593872070312, + "learning_rate": 3.570769940064546e-05, + "log_odds_chosen": 8.952764511108398, + "log_odds_ratio": -0.0012806118465960026, + "logits/chosen": -0.7560865879058838, + "logits/rejected": -0.7571930885314941, + "logps/chosen": -0.008908161893486977, + "logps/rejected": -1.8132458925247192, + "loss": 1.1701, + "nll_loss": 0.2924080193042755, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000890816212631762, + "rewards/margins": 0.18043377995491028, + "rewards/rejected": -0.18132458627223969, + "step": 5166 + }, + { + "epoch": 3.573305670816044, + "grad_norm": 7.02388334274292, + "learning_rate": 3.570385738435531e-05, + "log_odds_chosen": 9.885597229003906, + "log_odds_ratio": -0.00014873781765345484, + "logits/chosen": -0.8601112961769104, + "logits/rejected": -0.8761740922927856, + "logps/chosen": -0.011144283227622509, + "logps/rejected": -2.309807300567627, + "loss": 1.6112, + "nll_loss": 0.4027857184410095, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011144281597808003, + "rewards/margins": 0.22986629605293274, + "rewards/rejected": -0.2309807538986206, + "step": 5167 + }, + { + "epoch": 3.573997233748271, + "grad_norm": 12.574030876159668, + "learning_rate": 3.5700015368065165e-05, + "log_odds_chosen": 7.35061502456665, + "log_odds_ratio": -0.28171849250793457, + "logits/chosen": -0.5025765299797058, + "logits/rejected": -0.6474568843841553, + "logps/chosen": -0.25245729088783264, + "logps/rejected": -1.6816558837890625, + "loss": 1.0523, + "nll_loss": 0.23489251732826233, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.025245727971196175, + "rewards/margins": 0.1429198682308197, + "rewards/rejected": -0.16816559433937073, + "step": 5168 + }, + { + "epoch": 3.5746887966804977, + "grad_norm": 18.708730697631836, + "learning_rate": 3.569617335177502e-05, + "log_odds_chosen": 8.130278587341309, + "log_odds_ratio": -0.2269182801246643, + "logits/chosen": -0.7245622873306274, + "logits/rejected": -0.7673302292823792, + "logps/chosen": -0.040166158229112625, + "logps/rejected": -1.931434154510498, + "loss": 1.6429, + "nll_loss": 0.38803112506866455, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00401661591604352, + "rewards/margins": 0.18912678956985474, + "rewards/rejected": -0.19314341247081757, + "step": 5169 + }, + { + "epoch": 3.5753803596127245, + "grad_norm": 8.475849151611328, + "learning_rate": 3.569233133548486e-05, + "log_odds_chosen": 8.871143341064453, + "log_odds_ratio": -0.0027840326074510813, + "logits/chosen": -0.6744045615196228, + "logits/rejected": -0.7654039263725281, + "logps/chosen": -0.012963301502168179, + "logps/rejected": -1.5083293914794922, + "loss": 1.1291, + "nll_loss": 0.2819991111755371, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012963303597643971, + "rewards/margins": 0.1495366096496582, + "rewards/rejected": -0.15083295106887817, + "step": 5170 + }, + { + "epoch": 3.5760719225449513, + "grad_norm": 8.500786781311035, + "learning_rate": 3.5688489319194716e-05, + "log_odds_chosen": 10.066465377807617, + "log_odds_ratio": -0.00014326379459816962, + "logits/chosen": -0.7963685989379883, + "logits/rejected": -0.7845640182495117, + "logps/chosen": -0.000356621399987489, + "logps/rejected": -1.7721641063690186, + "loss": 1.2365, + "nll_loss": 0.30910173058509827, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5662138543557376e-05, + "rewards/margins": 0.17718075215816498, + "rewards/rejected": -0.17721642553806305, + "step": 5171 + }, + { + "epoch": 3.576763485477178, + "grad_norm": 9.517539978027344, + "learning_rate": 3.568464730290457e-05, + "log_odds_chosen": 6.560201168060303, + "log_odds_ratio": -0.23065565526485443, + "logits/chosen": -0.6330443620681763, + "logits/rejected": -0.6648932695388794, + "logps/chosen": -0.07982344925403595, + "logps/rejected": -1.2751703262329102, + "loss": 1.9859, + "nll_loss": 0.4733983278274536, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007982345297932625, + "rewards/margins": 0.1195346862077713, + "rewards/rejected": -0.12751702964305878, + "step": 5172 + }, + { + "epoch": 3.577455048409405, + "grad_norm": 4.817649841308594, + "learning_rate": 3.5680805286614414e-05, + "log_odds_chosen": 7.474740982055664, + "log_odds_ratio": -0.027025185525417328, + "logits/chosen": -0.3730385899543762, + "logits/rejected": -0.4131702780723572, + "logps/chosen": -0.028401460498571396, + "logps/rejected": -1.7878029346466064, + "loss": 1.2991, + "nll_loss": 0.3220648169517517, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0028401461895555258, + "rewards/margins": 0.1759401559829712, + "rewards/rejected": -0.17878031730651855, + "step": 5173 + }, + { + "epoch": 3.5781466113416323, + "grad_norm": 11.787805557250977, + "learning_rate": 3.5676963270324266e-05, + "log_odds_chosen": 10.492147445678711, + "log_odds_ratio": -0.0001426354137947783, + "logits/chosen": -0.674610435962677, + "logits/rejected": -0.6919329762458801, + "logps/chosen": -0.021003739908337593, + "logps/rejected": -2.5441362857818604, + "loss": 1.5442, + "nll_loss": 0.3860280215740204, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021003740839660168, + "rewards/margins": 0.2523132562637329, + "rewards/rejected": -0.2544136345386505, + "step": 5174 + }, + { + "epoch": 3.578838174273859, + "grad_norm": 8.707939147949219, + "learning_rate": 3.567312125403412e-05, + "log_odds_chosen": 9.505112648010254, + "log_odds_ratio": -0.003918660804629326, + "logits/chosen": -0.8012299537658691, + "logits/rejected": -0.8023931980133057, + "logps/chosen": -0.002562303561717272, + "logps/rejected": -1.9537736177444458, + "loss": 2.02, + "nll_loss": 0.5045973062515259, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002562303270678967, + "rewards/margins": 0.19512110948562622, + "rewards/rejected": -0.19537734985351562, + "step": 5175 + }, + { + "epoch": 3.579529737206086, + "grad_norm": 13.434113502502441, + "learning_rate": 3.566927923774397e-05, + "log_odds_chosen": 8.59106159210205, + "log_odds_ratio": -0.0021850857883691788, + "logits/chosen": -1.0124863386154175, + "logits/rejected": -1.021253228187561, + "logps/chosen": -0.01776060089468956, + "logps/rejected": -2.7843875885009766, + "loss": 3.2201, + "nll_loss": 0.8048138618469238, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017760602058842778, + "rewards/margins": 0.2766627073287964, + "rewards/rejected": -0.2784387767314911, + "step": 5176 + }, + { + "epoch": 3.5802213001383127, + "grad_norm": 8.495587348937988, + "learning_rate": 3.566543722145382e-05, + "log_odds_chosen": 10.054089546203613, + "log_odds_ratio": -0.0004434631555341184, + "logits/chosen": -0.5013400316238403, + "logits/rejected": -0.5111294388771057, + "logps/chosen": -0.00820872001349926, + "logps/rejected": -2.961402416229248, + "loss": 1.8074, + "nll_loss": 0.45180925726890564, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008208720246329904, + "rewards/margins": 0.2953193783760071, + "rewards/rejected": -0.29614022374153137, + "step": 5177 + }, + { + "epoch": 3.5809128630705396, + "grad_norm": 9.217135429382324, + "learning_rate": 3.5661595205163676e-05, + "log_odds_chosen": 7.587440490722656, + "log_odds_ratio": -0.014449968934059143, + "logits/chosen": -0.6275132894515991, + "logits/rejected": -0.712116003036499, + "logps/chosen": -0.02014012075960636, + "logps/rejected": -1.2985014915466309, + "loss": 1.5584, + "nll_loss": 0.38814911246299744, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002014012075960636, + "rewards/margins": 0.12783613801002502, + "rewards/rejected": -0.12985014915466309, + "step": 5178 + }, + { + "epoch": 3.5816044260027664, + "grad_norm": 15.308435440063477, + "learning_rate": 3.565775318887352e-05, + "log_odds_chosen": 9.134074211120605, + "log_odds_ratio": -0.005743531044572592, + "logits/chosen": -0.60552978515625, + "logits/rejected": -0.6348068118095398, + "logps/chosen": -0.015729809179902077, + "logps/rejected": -1.2693355083465576, + "loss": 0.9502, + "nll_loss": 0.23696690797805786, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015729808947071433, + "rewards/margins": 0.12536057829856873, + "rewards/rejected": -0.12693354487419128, + "step": 5179 + }, + { + "epoch": 3.5822959889349932, + "grad_norm": 8.912403106689453, + "learning_rate": 3.5653911172583374e-05, + "log_odds_chosen": 8.609804153442383, + "log_odds_ratio": -0.11055989563465118, + "logits/chosen": -0.7532698512077332, + "logits/rejected": -0.8019864559173584, + "logps/chosen": -0.018508533015847206, + "logps/rejected": -1.8376288414001465, + "loss": 1.2348, + "nll_loss": 0.297635555267334, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.001850853324867785, + "rewards/margins": 0.18191203474998474, + "rewards/rejected": -0.18376289308071136, + "step": 5180 + }, + { + "epoch": 3.58298755186722, + "grad_norm": 8.784750938415527, + "learning_rate": 3.5650069156293226e-05, + "log_odds_chosen": 8.036988258361816, + "log_odds_ratio": -0.029761577025055885, + "logits/chosen": -0.4563424587249756, + "logits/rejected": -0.49063539505004883, + "logps/chosen": -0.02047664113342762, + "logps/rejected": -2.1021976470947266, + "loss": 1.8393, + "nll_loss": 0.4568403959274292, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020476640202105045, + "rewards/margins": 0.20817212760448456, + "rewards/rejected": -0.21021978557109833, + "step": 5181 + }, + { + "epoch": 3.583679114799447, + "grad_norm": 9.938516616821289, + "learning_rate": 3.564622714000307e-05, + "log_odds_chosen": 9.53658390045166, + "log_odds_ratio": -0.0001912098377943039, + "logits/chosen": -0.029588311910629272, + "logits/rejected": -0.11934809386730194, + "logps/chosen": -0.0002886332804337144, + "logps/rejected": -1.574800968170166, + "loss": 1.7211, + "nll_loss": 0.4302535057067871, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8863330953754485e-05, + "rewards/margins": 0.15745124220848083, + "rewards/rejected": -0.15748010575771332, + "step": 5182 + }, + { + "epoch": 3.5843706777316737, + "grad_norm": 7.632718563079834, + "learning_rate": 3.5642385123712925e-05, + "log_odds_chosen": 8.880706787109375, + "log_odds_ratio": -0.00037580274511128664, + "logits/chosen": -0.3277152180671692, + "logits/rejected": -0.35892271995544434, + "logps/chosen": -0.0005073597421869636, + "logps/rejected": -1.1650047302246094, + "loss": 1.1231, + "nll_loss": 0.2807457745075226, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.073597276350483e-05, + "rewards/margins": 0.116449736058712, + "rewards/rejected": -0.11650047451257706, + "step": 5183 + }, + { + "epoch": 3.5850622406639006, + "grad_norm": 5.33242130279541, + "learning_rate": 3.563854310742278e-05, + "log_odds_chosen": 8.373812675476074, + "log_odds_ratio": -0.0011708845850080252, + "logits/chosen": -0.35607287287712097, + "logits/rejected": -0.37027066946029663, + "logps/chosen": -0.012119665741920471, + "logps/rejected": -1.9545071125030518, + "loss": 1.3272, + "nll_loss": 0.3316921591758728, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001211966504342854, + "rewards/margins": 0.19423875212669373, + "rewards/rejected": -0.19545072317123413, + "step": 5184 + }, + { + "epoch": 3.5857538035961274, + "grad_norm": 8.848631858825684, + "learning_rate": 3.563470109113263e-05, + "log_odds_chosen": 9.168559074401855, + "log_odds_ratio": -0.0005192652461118996, + "logits/chosen": -0.4697961211204529, + "logits/rejected": -0.5052551031112671, + "logps/chosen": -0.014308737590909004, + "logps/rejected": -2.15948486328125, + "loss": 1.676, + "nll_loss": 0.41894909739494324, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014308736426755786, + "rewards/margins": 0.21451762318611145, + "rewards/rejected": -0.21594849228858948, + "step": 5185 + }, + { + "epoch": 3.586445366528354, + "grad_norm": 11.535073280334473, + "learning_rate": 3.5630859074842475e-05, + "log_odds_chosen": 10.305524826049805, + "log_odds_ratio": -0.00019394996343180537, + "logits/chosen": -0.6018300652503967, + "logits/rejected": -0.6703628301620483, + "logps/chosen": -0.009994926862418652, + "logps/rejected": -2.5256729125976562, + "loss": 1.1935, + "nll_loss": 0.29836463928222656, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009994925931096077, + "rewards/margins": 0.2515678107738495, + "rewards/rejected": -0.252567321062088, + "step": 5186 + }, + { + "epoch": 3.587136929460581, + "grad_norm": 10.138055801391602, + "learning_rate": 3.5627017058552334e-05, + "log_odds_chosen": 8.14724063873291, + "log_odds_ratio": -0.0025975287426263094, + "logits/chosen": -0.4650860130786896, + "logits/rejected": -0.5495781898498535, + "logps/chosen": -0.0034916566219180822, + "logps/rejected": -1.3577914237976074, + "loss": 2.3802, + "nll_loss": 0.5947921276092529, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003491656680125743, + "rewards/margins": 0.1354299783706665, + "rewards/rejected": -0.13577914237976074, + "step": 5187 + }, + { + "epoch": 3.587828492392808, + "grad_norm": 16.81104278564453, + "learning_rate": 3.562317504226218e-05, + "log_odds_chosen": 10.003152847290039, + "log_odds_ratio": -0.00014228149666450918, + "logits/chosen": -0.4464438855648041, + "logits/rejected": -0.4425196945667267, + "logps/chosen": -0.00030187988886609674, + "logps/rejected": -1.8005365133285522, + "loss": 1.6632, + "nll_loss": 0.4157760739326477, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0187989978003316e-05, + "rewards/margins": 0.18002347648143768, + "rewards/rejected": -0.18005365133285522, + "step": 5188 + }, + { + "epoch": 3.5885200553250347, + "grad_norm": 10.235124588012695, + "learning_rate": 3.561933302597203e-05, + "log_odds_chosen": 7.02274751663208, + "log_odds_ratio": -0.07414565235376358, + "logits/chosen": -0.27639496326446533, + "logits/rejected": -0.4222687780857086, + "logps/chosen": -0.0243497546762228, + "logps/rejected": -1.6426656246185303, + "loss": 1.6987, + "nll_loss": 0.41725045442581177, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00243497546762228, + "rewards/margins": 0.1618315875530243, + "rewards/rejected": -0.16426655650138855, + "step": 5189 + }, + { + "epoch": 3.5892116182572615, + "grad_norm": 6.700727939605713, + "learning_rate": 3.5615491009681885e-05, + "log_odds_chosen": 9.764245986938477, + "log_odds_ratio": -0.0013793071266263723, + "logits/chosen": -0.613777220249176, + "logits/rejected": -0.665103018283844, + "logps/chosen": -0.00799685250967741, + "logps/rejected": -2.3952579498291016, + "loss": 0.9901, + "nll_loss": 0.24738426506519318, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007996853091754019, + "rewards/margins": 0.23872610926628113, + "rewards/rejected": -0.23952579498291016, + "step": 5190 + }, + { + "epoch": 3.5899031811894884, + "grad_norm": 11.159008979797363, + "learning_rate": 3.561164899339173e-05, + "log_odds_chosen": 10.08697509765625, + "log_odds_ratio": -0.0002770907594822347, + "logits/chosen": -0.3040257692337036, + "logits/rejected": -0.41321447491645813, + "logps/chosen": -0.0003381900314707309, + "logps/rejected": -2.081603765487671, + "loss": 1.8172, + "nll_loss": 0.4542771279811859, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.381900387466885e-05, + "rewards/margins": 0.20812655985355377, + "rewards/rejected": -0.2081603854894638, + "step": 5191 + }, + { + "epoch": 3.590594744121715, + "grad_norm": 78.92797088623047, + "learning_rate": 3.560780697710158e-05, + "log_odds_chosen": 7.735320091247559, + "log_odds_ratio": -0.07346854358911514, + "logits/chosen": -0.5990978479385376, + "logits/rejected": -0.6643476486206055, + "logps/chosen": -0.024174688383936882, + "logps/rejected": -1.3395992517471313, + "loss": 1.4912, + "nll_loss": 0.3654648959636688, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024174691643565893, + "rewards/margins": 0.13154245913028717, + "rewards/rejected": -0.13395991921424866, + "step": 5192 + }, + { + "epoch": 3.591286307053942, + "grad_norm": 7.0599894523620605, + "learning_rate": 3.5603964960811435e-05, + "log_odds_chosen": 9.842082977294922, + "log_odds_ratio": -0.00015221555077005178, + "logits/chosen": -0.6480206847190857, + "logits/rejected": -0.6932371854782104, + "logps/chosen": -0.00041077513014897704, + "logps/rejected": -1.7274799346923828, + "loss": 1.8491, + "nll_loss": 0.46225303411483765, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1077513742493466e-05, + "rewards/margins": 0.17270690202713013, + "rewards/rejected": -0.17274799942970276, + "step": 5193 + }, + { + "epoch": 3.591977869986169, + "grad_norm": 15.730473518371582, + "learning_rate": 3.560012294452129e-05, + "log_odds_chosen": 9.617203712463379, + "log_odds_ratio": -0.00021409033797681332, + "logits/chosen": -0.3583557605743408, + "logits/rejected": -0.3977838158607483, + "logps/chosen": -0.016692141070961952, + "logps/rejected": -2.3281161785125732, + "loss": 1.7002, + "nll_loss": 0.42503270506858826, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016692141070961952, + "rewards/margins": 0.23114240169525146, + "rewards/rejected": -0.23281162977218628, + "step": 5194 + }, + { + "epoch": 3.5926694329183957, + "grad_norm": 9.38475513458252, + "learning_rate": 3.5596280928231134e-05, + "log_odds_chosen": 9.190800666809082, + "log_odds_ratio": -0.00075269874650985, + "logits/chosen": -0.16665798425674438, + "logits/rejected": -0.24333110451698303, + "logps/chosen": -0.0010805196361616254, + "logps/rejected": -1.378718614578247, + "loss": 1.3852, + "nll_loss": 0.34622547030448914, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010805197234731168, + "rewards/margins": 0.1377638280391693, + "rewards/rejected": -0.1378718763589859, + "step": 5195 + }, + { + "epoch": 3.5933609958506225, + "grad_norm": 13.01176643371582, + "learning_rate": 3.559243891194099e-05, + "log_odds_chosen": 9.629497528076172, + "log_odds_ratio": -0.00012551844702102244, + "logits/chosen": -0.9238946437835693, + "logits/rejected": -0.981550931930542, + "logps/chosen": -0.00043221822124905884, + "logps/rejected": -1.55594801902771, + "loss": 1.9092, + "nll_loss": 0.47729602456092834, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.322181848692708e-05, + "rewards/margins": 0.15555158257484436, + "rewards/rejected": -0.1555948108434677, + "step": 5196 + }, + { + "epoch": 3.5940525587828493, + "grad_norm": 7.340988636016846, + "learning_rate": 3.558859689565084e-05, + "log_odds_chosen": 10.411932945251465, + "log_odds_ratio": -5.617938222712837e-05, + "logits/chosen": -0.36234623193740845, + "logits/rejected": -0.5597435235977173, + "logps/chosen": -0.0002136369002982974, + "logps/rejected": -1.9466426372528076, + "loss": 1.1771, + "nll_loss": 0.29427260160446167, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1363688574638218e-05, + "rewards/margins": 0.19464290142059326, + "rewards/rejected": -0.19466425478458405, + "step": 5197 + }, + { + "epoch": 3.594744121715076, + "grad_norm": 7.5101165771484375, + "learning_rate": 3.558475487936069e-05, + "log_odds_chosen": 7.9836931228637695, + "log_odds_ratio": -0.057652927935123444, + "logits/chosen": -0.5086445808410645, + "logits/rejected": -0.5536125898361206, + "logps/chosen": -0.01708623394370079, + "logps/rejected": -1.378343939781189, + "loss": 1.357, + "nll_loss": 0.3334919214248657, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017086233710870147, + "rewards/margins": 0.13612577319145203, + "rewards/rejected": -0.13783439993858337, + "step": 5198 + }, + { + "epoch": 3.595435684647303, + "grad_norm": 4.695312023162842, + "learning_rate": 3.558091286307054e-05, + "log_odds_chosen": 7.252886772155762, + "log_odds_ratio": -0.1333327293395996, + "logits/chosen": -0.6493362188339233, + "logits/rejected": -0.6358364224433899, + "logps/chosen": -0.04882507771253586, + "logps/rejected": -1.5421638488769531, + "loss": 2.5656, + "nll_loss": 0.6280553340911865, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004882507957518101, + "rewards/margins": 0.1493338793516159, + "rewards/rejected": -0.15421637892723083, + "step": 5199 + }, + { + "epoch": 3.59612724757953, + "grad_norm": 5.692283630371094, + "learning_rate": 3.557707084678039e-05, + "log_odds_chosen": 9.078126907348633, + "log_odds_ratio": -0.0004336358979344368, + "logits/chosen": -0.31813618540763855, + "logits/rejected": -0.34279268980026245, + "logps/chosen": -0.014234584756195545, + "logps/rejected": -2.1005630493164062, + "loss": 1.0926, + "nll_loss": 0.2731185555458069, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001423458568751812, + "rewards/margins": 0.20863284170627594, + "rewards/rejected": -0.21005630493164062, + "step": 5200 + }, + { + "epoch": 3.5968188105117567, + "grad_norm": 7.402063846588135, + "learning_rate": 3.557322883049024e-05, + "log_odds_chosen": 8.764384269714355, + "log_odds_ratio": -0.00031936116283759475, + "logits/chosen": -0.19075855612754822, + "logits/rejected": -0.2277907282114029, + "logps/chosen": -0.026667606085538864, + "logps/rejected": -1.5372223854064941, + "loss": 1.4802, + "nll_loss": 0.37002283334732056, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002666760701686144, + "rewards/margins": 0.1510554850101471, + "rewards/rejected": -0.15372224152088165, + "step": 5201 + }, + { + "epoch": 3.5975103734439835, + "grad_norm": 9.261101722717285, + "learning_rate": 3.5569386814200094e-05, + "log_odds_chosen": 8.320323944091797, + "log_odds_ratio": -0.05530842766165733, + "logits/chosen": -0.4870779514312744, + "logits/rejected": -0.517996609210968, + "logps/chosen": -0.013912579044699669, + "logps/rejected": -1.4187254905700684, + "loss": 2.2164, + "nll_loss": 0.5485726594924927, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013912580907344818, + "rewards/margins": 0.14048129320144653, + "rewards/rejected": -0.14187254011631012, + "step": 5202 + }, + { + "epoch": 3.5982019363762103, + "grad_norm": 7.648223876953125, + "learning_rate": 3.5565544797909946e-05, + "log_odds_chosen": 8.643014907836914, + "log_odds_ratio": -0.0318199060857296, + "logits/chosen": -0.31153562664985657, + "logits/rejected": -0.3480609953403473, + "logps/chosen": -0.03326771408319473, + "logps/rejected": -1.7915986776351929, + "loss": 1.8682, + "nll_loss": 0.46386802196502686, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0033267715480178595, + "rewards/margins": 0.1758331060409546, + "rewards/rejected": -0.17915986478328705, + "step": 5203 + }, + { + "epoch": 3.598893499308437, + "grad_norm": 9.878567695617676, + "learning_rate": 3.556170278161979e-05, + "log_odds_chosen": 9.812541961669922, + "log_odds_ratio": -0.0006717155338265002, + "logits/chosen": -0.5299800038337708, + "logits/rejected": -0.5992415547370911, + "logps/chosen": -0.0010979081271216273, + "logps/rejected": -1.979628562927246, + "loss": 1.4765, + "nll_loss": 0.36904868483543396, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010979082435369492, + "rewards/margins": 0.19785307347774506, + "rewards/rejected": -0.19796285033226013, + "step": 5204 + }, + { + "epoch": 3.599585062240664, + "grad_norm": 9.53571891784668, + "learning_rate": 3.555786076532965e-05, + "log_odds_chosen": 8.218473434448242, + "log_odds_ratio": -0.0811346173286438, + "logits/chosen": -0.4749361276626587, + "logits/rejected": -0.4611550271511078, + "logps/chosen": -0.020573316141963005, + "logps/rejected": -1.3474071025848389, + "loss": 1.7931, + "nll_loss": 0.44016969203948975, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002057331847026944, + "rewards/margins": 0.1326833963394165, + "rewards/rejected": -0.13474072515964508, + "step": 5205 + }, + { + "epoch": 3.600276625172891, + "grad_norm": 6.642927169799805, + "learning_rate": 3.55540187490395e-05, + "log_odds_chosen": 9.684572219848633, + "log_odds_ratio": -0.00022037234157323837, + "logits/chosen": -0.1952618807554245, + "logits/rejected": -0.258468896150589, + "logps/chosen": -0.0021308918949216604, + "logps/rejected": -1.52579665184021, + "loss": 1.2332, + "nll_loss": 0.30827754735946655, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00021308919531293213, + "rewards/margins": 0.15236657857894897, + "rewards/rejected": -0.152579665184021, + "step": 5206 + }, + { + "epoch": 3.6009681881051177, + "grad_norm": 7.0721354484558105, + "learning_rate": 3.555017673274935e-05, + "log_odds_chosen": 7.419760704040527, + "log_odds_ratio": -0.055805519223213196, + "logits/chosen": -0.44695115089416504, + "logits/rejected": -0.4739919900894165, + "logps/chosen": -0.016496581956744194, + "logps/rejected": -1.4811910390853882, + "loss": 2.2693, + "nll_loss": 0.5617491006851196, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016496581956744194, + "rewards/margins": 0.14646944403648376, + "rewards/rejected": -0.14811909198760986, + "step": 5207 + }, + { + "epoch": 3.6016597510373445, + "grad_norm": 8.998069763183594, + "learning_rate": 3.55463347164592e-05, + "log_odds_chosen": 8.890933990478516, + "log_odds_ratio": -0.0005221288884058595, + "logits/chosen": -0.19365093111991882, + "logits/rejected": -0.2810370922088623, + "logps/chosen": -0.005299925804138184, + "logps/rejected": -2.2653985023498535, + "loss": 1.4267, + "nll_loss": 0.3566116690635681, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005299926269799471, + "rewards/margins": 0.22600987553596497, + "rewards/rejected": -0.22653988003730774, + "step": 5208 + }, + { + "epoch": 3.6023513139695713, + "grad_norm": 7.603548526763916, + "learning_rate": 3.554249270016905e-05, + "log_odds_chosen": 9.299885749816895, + "log_odds_ratio": -0.0002216776047134772, + "logits/chosen": -0.6699154376983643, + "logits/rejected": -0.6954845190048218, + "logps/chosen": -0.005512827541679144, + "logps/rejected": -1.7632887363433838, + "loss": 1.0745, + "nll_loss": 0.2686063051223755, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005512827192433178, + "rewards/margins": 0.1757775992155075, + "rewards/rejected": -0.1763288676738739, + "step": 5209 + }, + { + "epoch": 3.603042876901798, + "grad_norm": 6.092649459838867, + "learning_rate": 3.55386506838789e-05, + "log_odds_chosen": 9.46326732635498, + "log_odds_ratio": -0.000469283084385097, + "logits/chosen": -0.451174795627594, + "logits/rejected": -0.49339890480041504, + "logps/chosen": -0.01303679309785366, + "logps/rejected": -2.3563642501831055, + "loss": 1.1468, + "nll_loss": 0.2866421937942505, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001303679309785366, + "rewards/margins": 0.23433274030685425, + "rewards/rejected": -0.23563642799854279, + "step": 5210 + }, + { + "epoch": 3.603734439834025, + "grad_norm": 9.824409484863281, + "learning_rate": 3.553480866758875e-05, + "log_odds_chosen": 8.063066482543945, + "log_odds_ratio": -0.20818378031253815, + "logits/chosen": -0.4809816777706146, + "logits/rejected": -0.559431791305542, + "logps/chosen": -0.03620007634162903, + "logps/rejected": -1.4488892555236816, + "loss": 1.9775, + "nll_loss": 0.47356724739074707, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003620007773861289, + "rewards/margins": 0.14126893877983093, + "rewards/rejected": -0.14488893747329712, + "step": 5211 + }, + { + "epoch": 3.604426002766252, + "grad_norm": 12.211295127868652, + "learning_rate": 3.5530966651298605e-05, + "log_odds_chosen": 10.524577140808105, + "log_odds_ratio": -0.00020419417705852538, + "logits/chosen": -0.7545992732048035, + "logits/rejected": -0.7831495404243469, + "logps/chosen": -0.0003767163143493235, + "logps/rejected": -2.2208974361419678, + "loss": 1.0508, + "nll_loss": 0.26268112659454346, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.76716343453154e-05, + "rewards/margins": 0.22205206751823425, + "rewards/rejected": -0.2220897376537323, + "step": 5212 + }, + { + "epoch": 3.6051175656984786, + "grad_norm": 11.500664710998535, + "learning_rate": 3.552712463500845e-05, + "log_odds_chosen": 10.186233520507812, + "log_odds_ratio": -0.00036904169246554375, + "logits/chosen": -0.3501139283180237, + "logits/rejected": -0.40883195400238037, + "logps/chosen": -0.0018149593379348516, + "logps/rejected": -2.5266244411468506, + "loss": 1.5078, + "nll_loss": 0.3769240379333496, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018149595416616648, + "rewards/margins": 0.25248095393180847, + "rewards/rejected": -0.25266245007514954, + "step": 5213 + }, + { + "epoch": 3.6058091286307055, + "grad_norm": 10.881603240966797, + "learning_rate": 3.552328261871831e-05, + "log_odds_chosen": 8.526883125305176, + "log_odds_ratio": -0.09655605256557465, + "logits/chosen": -0.8187621235847473, + "logits/rejected": -0.8889845609664917, + "logps/chosen": -0.04067990556359291, + "logps/rejected": -2.1676435470581055, + "loss": 2.1095, + "nll_loss": 0.5177196860313416, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004067990463227034, + "rewards/margins": 0.2126963585615158, + "rewards/rejected": -0.21676437556743622, + "step": 5214 + }, + { + "epoch": 3.6065006915629323, + "grad_norm": 8.708210945129395, + "learning_rate": 3.5519440602428155e-05, + "log_odds_chosen": 8.359586715698242, + "log_odds_ratio": -0.07479312270879745, + "logits/chosen": -0.8053072690963745, + "logits/rejected": -0.8438823223114014, + "logps/chosen": -0.016664860770106316, + "logps/rejected": -1.6616318225860596, + "loss": 1.4285, + "nll_loss": 0.3496565520763397, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016664863796904683, + "rewards/margins": 0.16449669003486633, + "rewards/rejected": -0.16616317629814148, + "step": 5215 + }, + { + "epoch": 3.607192254495159, + "grad_norm": 9.633548736572266, + "learning_rate": 3.551559858613801e-05, + "log_odds_chosen": 9.497386932373047, + "log_odds_ratio": -0.0003051602398045361, + "logits/chosen": -0.6601248979568481, + "logits/rejected": -0.6196205615997314, + "logps/chosen": -0.0005067433230578899, + "logps/rejected": -1.4025245904922485, + "loss": 1.5811, + "nll_loss": 0.3952542543411255, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.067433085059747e-05, + "rewards/margins": 0.14020179212093353, + "rewards/rejected": -0.1402524709701538, + "step": 5216 + }, + { + "epoch": 3.607883817427386, + "grad_norm": 7.4141716957092285, + "learning_rate": 3.551175656984786e-05, + "log_odds_chosen": 8.719673156738281, + "log_odds_ratio": -0.0004461368080228567, + "logits/chosen": -0.41223183274269104, + "logits/rejected": -0.47403812408447266, + "logps/chosen": -0.014796635136008263, + "logps/rejected": -2.066044330596924, + "loss": 1.7922, + "nll_loss": 0.44800156354904175, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014796635368838906, + "rewards/margins": 0.20512478053569794, + "rewards/rejected": -0.20660445094108582, + "step": 5217 + }, + { + "epoch": 3.608575380359613, + "grad_norm": 10.772750854492188, + "learning_rate": 3.5507914553557706e-05, + "log_odds_chosen": 9.217242240905762, + "log_odds_ratio": -0.0053271041251719, + "logits/chosen": -0.23242275416851044, + "logits/rejected": -0.3318224549293518, + "logps/chosen": -0.0030850740149617195, + "logps/rejected": -2.3515217304229736, + "loss": 1.584, + "nll_loss": 0.39547258615493774, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00030850741313770413, + "rewards/margins": 0.2348436862230301, + "rewards/rejected": -0.23515218496322632, + "step": 5218 + }, + { + "epoch": 3.6092669432918396, + "grad_norm": 9.194649696350098, + "learning_rate": 3.550407253726756e-05, + "log_odds_chosen": 8.674663543701172, + "log_odds_ratio": -0.04212072864174843, + "logits/chosen": -0.5076473951339722, + "logits/rejected": -0.5080469846725464, + "logps/chosen": -0.009754427708685398, + "logps/rejected": -1.1838669776916504, + "loss": 1.5066, + "nll_loss": 0.37244829535484314, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009754427592270076, + "rewards/margins": 0.11741125583648682, + "rewards/rejected": -0.11838670819997787, + "step": 5219 + }, + { + "epoch": 3.6099585062240664, + "grad_norm": 12.188793182373047, + "learning_rate": 3.550023052097741e-05, + "log_odds_chosen": 9.73184585571289, + "log_odds_ratio": -0.00016213285562116653, + "logits/chosen": -0.8976345062255859, + "logits/rejected": -0.9340048432350159, + "logps/chosen": -0.0012246439000591636, + "logps/rejected": -2.3440892696380615, + "loss": 1.3086, + "nll_loss": 0.32713812589645386, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001224643929162994, + "rewards/margins": 0.23428647220134735, + "rewards/rejected": -0.2344089150428772, + "step": 5220 + }, + { + "epoch": 3.6106500691562933, + "grad_norm": 11.67767333984375, + "learning_rate": 3.549638850468726e-05, + "log_odds_chosen": 8.549223899841309, + "log_odds_ratio": -0.0044251237995922565, + "logits/chosen": -0.6429064869880676, + "logits/rejected": -0.7014204263687134, + "logps/chosen": -0.017330633476376534, + "logps/rejected": -1.9335042238235474, + "loss": 1.4883, + "nll_loss": 0.37162601947784424, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017330633709207177, + "rewards/margins": 0.19161736965179443, + "rewards/rejected": -0.1933504343032837, + "step": 5221 + }, + { + "epoch": 3.61134163208852, + "grad_norm": 15.276002883911133, + "learning_rate": 3.549254648839711e-05, + "log_odds_chosen": 9.483627319335938, + "log_odds_ratio": -0.0002317847975064069, + "logits/chosen": -0.7881807088851929, + "logits/rejected": -0.8484748601913452, + "logps/chosen": -0.0012060196604579687, + "logps/rejected": -1.873993992805481, + "loss": 1.9021, + "nll_loss": 0.47550061345100403, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012060196604579687, + "rewards/margins": 0.18727880716323853, + "rewards/rejected": -0.18739941716194153, + "step": 5222 + }, + { + "epoch": 3.612033195020747, + "grad_norm": 14.034067153930664, + "learning_rate": 3.548870447210697e-05, + "log_odds_chosen": 9.489564895629883, + "log_odds_ratio": -0.032893918454647064, + "logits/chosen": -0.5653495192527771, + "logits/rejected": -0.628448486328125, + "logps/chosen": -0.015954216942191124, + "logps/rejected": -2.7742607593536377, + "loss": 1.327, + "nll_loss": 0.3284568786621094, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015954216942191124, + "rewards/margins": 0.2758306562900543, + "rewards/rejected": -0.2774260938167572, + "step": 5223 + }, + { + "epoch": 3.6127247579529738, + "grad_norm": 8.176433563232422, + "learning_rate": 3.5484862455816814e-05, + "log_odds_chosen": 9.06335163116455, + "log_odds_ratio": -0.0018982174806296825, + "logits/chosen": -0.772789478302002, + "logits/rejected": -0.796208918094635, + "logps/chosen": -0.00784214586019516, + "logps/rejected": -2.7062885761260986, + "loss": 1.3409, + "nll_loss": 0.3350370228290558, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007842145278118551, + "rewards/margins": 0.269844651222229, + "rewards/rejected": -0.27062883973121643, + "step": 5224 + }, + { + "epoch": 3.6134163208852006, + "grad_norm": 9.80726432800293, + "learning_rate": 3.5481020439526666e-05, + "log_odds_chosen": 9.160164833068848, + "log_odds_ratio": -0.009143702685832977, + "logits/chosen": -0.5025213360786438, + "logits/rejected": -0.5475513339042664, + "logps/chosen": -0.005097491666674614, + "logps/rejected": -2.0189099311828613, + "loss": 1.7398, + "nll_loss": 0.434039831161499, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005097491666674614, + "rewards/margins": 0.20138123631477356, + "rewards/rejected": -0.2018910050392151, + "step": 5225 + }, + { + "epoch": 3.6141078838174274, + "grad_norm": 6.498498439788818, + "learning_rate": 3.547717842323652e-05, + "log_odds_chosen": 7.767198085784912, + "log_odds_ratio": -0.1166754812002182, + "logits/chosen": -0.8081409931182861, + "logits/rejected": -0.8334828019142151, + "logps/chosen": -0.06212611868977547, + "logps/rejected": -2.2269105911254883, + "loss": 1.6677, + "nll_loss": 0.40525153279304504, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006212612148374319, + "rewards/margins": 0.21647846698760986, + "rewards/rejected": -0.22269108891487122, + "step": 5226 + }, + { + "epoch": 3.6147994467496543, + "grad_norm": 14.081502914428711, + "learning_rate": 3.5473336406946364e-05, + "log_odds_chosen": 7.013472557067871, + "log_odds_ratio": -0.17359904944896698, + "logits/chosen": -0.37669873237609863, + "logits/rejected": -0.42986077070236206, + "logps/chosen": -0.042637161910533905, + "logps/rejected": -1.6250157356262207, + "loss": 1.7087, + "nll_loss": 0.40981000661849976, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004263716284185648, + "rewards/margins": 0.15823784470558167, + "rewards/rejected": -0.16250157356262207, + "step": 5227 + }, + { + "epoch": 3.615491009681881, + "grad_norm": 4.868067741394043, + "learning_rate": 3.546949439065622e-05, + "log_odds_chosen": 7.783168315887451, + "log_odds_ratio": -0.004499551374465227, + "logits/chosen": -0.5527982115745544, + "logits/rejected": -0.4671393632888794, + "logps/chosen": -0.0024018839467316866, + "logps/rejected": -1.0713505744934082, + "loss": 1.163, + "nll_loss": 0.2903061509132385, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002401883975835517, + "rewards/margins": 0.10689487308263779, + "rewards/rejected": -0.10713505744934082, + "step": 5228 + }, + { + "epoch": 3.616182572614108, + "grad_norm": 11.57120132446289, + "learning_rate": 3.546565237436607e-05, + "log_odds_chosen": 10.204926490783691, + "log_odds_ratio": -0.00031510682310909033, + "logits/chosen": -0.5665751695632935, + "logits/rejected": -0.5830574631690979, + "logps/chosen": -0.0030491678044199944, + "logps/rejected": -2.6864333152770996, + "loss": 1.0716, + "nll_loss": 0.2678784132003784, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00030491675715893507, + "rewards/margins": 0.2683384418487549, + "rewards/rejected": -0.2686433494091034, + "step": 5229 + }, + { + "epoch": 3.6168741355463347, + "grad_norm": 8.560798645019531, + "learning_rate": 3.546181035807592e-05, + "log_odds_chosen": 10.29986572265625, + "log_odds_ratio": -0.00015330745372921228, + "logits/chosen": -0.4275868833065033, + "logits/rejected": -0.5136326551437378, + "logps/chosen": -0.0002663989725988358, + "logps/rejected": -1.7132883071899414, + "loss": 1.699, + "nll_loss": 0.4247225522994995, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.663989653228782e-05, + "rewards/margins": 0.1713021993637085, + "rewards/rejected": -0.1713288277387619, + "step": 5230 + }, + { + "epoch": 3.6175656984785616, + "grad_norm": 10.029023170471191, + "learning_rate": 3.545796834178577e-05, + "log_odds_chosen": 10.008489608764648, + "log_odds_ratio": -6.896184640936553e-05, + "logits/chosen": -0.5981601476669312, + "logits/rejected": -0.662560224533081, + "logps/chosen": -0.0005962676950730383, + "logps/rejected": -1.9099280834197998, + "loss": 1.2771, + "nll_loss": 0.3192793130874634, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.962677096249536e-05, + "rewards/margins": 0.19093316793441772, + "rewards/rejected": -0.1909928023815155, + "step": 5231 + }, + { + "epoch": 3.6182572614107884, + "grad_norm": 6.729194164276123, + "learning_rate": 3.5454126325495627e-05, + "log_odds_chosen": 10.444304466247559, + "log_odds_ratio": -7.478394400095567e-05, + "logits/chosen": -0.43716686964035034, + "logits/rejected": -0.47095102071762085, + "logps/chosen": -0.00014576371177099645, + "logps/rejected": -1.8478639125823975, + "loss": 1.0408, + "nll_loss": 0.2601904571056366, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4576370631402824e-05, + "rewards/margins": 0.1847718209028244, + "rewards/rejected": -0.18478639423847198, + "step": 5232 + }, + { + "epoch": 3.6189488243430152, + "grad_norm": 11.273608207702637, + "learning_rate": 3.545028430920547e-05, + "log_odds_chosen": 9.06591510772705, + "log_odds_ratio": -0.0006158847245387733, + "logits/chosen": -0.8914425373077393, + "logits/rejected": -1.0343084335327148, + "logps/chosen": -0.0025831114035099745, + "logps/rejected": -1.8664512634277344, + "loss": 2.1863, + "nll_loss": 0.5465248227119446, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002583111636340618, + "rewards/margins": 0.1863868236541748, + "rewards/rejected": -0.18664513528347015, + "step": 5233 + }, + { + "epoch": 3.619640387275242, + "grad_norm": 8.003049850463867, + "learning_rate": 3.5446442292915325e-05, + "log_odds_chosen": 10.12773609161377, + "log_odds_ratio": -5.926351514062844e-05, + "logits/chosen": -0.47422224283218384, + "logits/rejected": -0.4392775595188141, + "logps/chosen": -0.0071992347948253155, + "logps/rejected": -2.150129556655884, + "loss": 1.1627, + "nll_loss": 0.290669322013855, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007199235260486603, + "rewards/margins": 0.21429303288459778, + "rewards/rejected": -0.21501296758651733, + "step": 5234 + }, + { + "epoch": 3.620331950207469, + "grad_norm": 10.644028663635254, + "learning_rate": 3.544260027662518e-05, + "log_odds_chosen": 11.33029556274414, + "log_odds_ratio": -1.8505686966818757e-05, + "logits/chosen": -0.7966725826263428, + "logits/rejected": -0.8608373403549194, + "logps/chosen": -0.00024159994791261852, + "logps/rejected": -2.4138331413269043, + "loss": 1.5136, + "nll_loss": 0.3784021735191345, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4159995518857613e-05, + "rewards/margins": 0.241359144449234, + "rewards/rejected": -0.24138331413269043, + "step": 5235 + }, + { + "epoch": 3.6210235131396957, + "grad_norm": 16.928823471069336, + "learning_rate": 3.543875826033502e-05, + "log_odds_chosen": 8.554088592529297, + "log_odds_ratio": -0.027092065662145615, + "logits/chosen": -0.6856877207756042, + "logits/rejected": -0.7653172612190247, + "logps/chosen": -0.000812489481177181, + "logps/rejected": -1.5144953727722168, + "loss": 1.3546, + "nll_loss": 0.33592864871025085, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.124895248329267e-05, + "rewards/margins": 0.15136829018592834, + "rewards/rejected": -0.1514495313167572, + "step": 5236 + }, + { + "epoch": 3.6217150760719226, + "grad_norm": 16.311784744262695, + "learning_rate": 3.5434916244044875e-05, + "log_odds_chosen": 9.777567863464355, + "log_odds_ratio": -0.0001523627433925867, + "logits/chosen": -0.7406394481658936, + "logits/rejected": -0.7905520796775818, + "logps/chosen": -0.0004889132105745375, + "logps/rejected": -1.948346734046936, + "loss": 2.7937, + "nll_loss": 0.6984192132949829, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.889132105745375e-05, + "rewards/margins": 0.19478577375411987, + "rewards/rejected": -0.19483467936515808, + "step": 5237 + }, + { + "epoch": 3.6224066390041494, + "grad_norm": 12.5172119140625, + "learning_rate": 3.543107422775473e-05, + "log_odds_chosen": 9.594234466552734, + "log_odds_ratio": -9.091549145523459e-05, + "logits/chosen": -0.7145823240280151, + "logits/rejected": -0.7423162460327148, + "logps/chosen": -0.00032870128052309155, + "logps/rejected": -1.639130711555481, + "loss": 1.4622, + "nll_loss": 0.365535706281662, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2870128052309155e-05, + "rewards/margins": 0.16388019919395447, + "rewards/rejected": -0.1639130711555481, + "step": 5238 + }, + { + "epoch": 3.623098201936376, + "grad_norm": 9.18506145477295, + "learning_rate": 3.542723221146458e-05, + "log_odds_chosen": 9.633953094482422, + "log_odds_ratio": -0.00023023865651339293, + "logits/chosen": -0.5641602277755737, + "logits/rejected": -0.6728044152259827, + "logps/chosen": -0.0016441429033875465, + "logps/rejected": -1.8026257753372192, + "loss": 1.459, + "nll_loss": 0.3647145628929138, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016441429033875465, + "rewards/margins": 0.18009814620018005, + "rewards/rejected": -0.18026258051395416, + "step": 5239 + }, + { + "epoch": 3.623789764868603, + "grad_norm": 8.023653030395508, + "learning_rate": 3.5423390195174426e-05, + "log_odds_chosen": 10.032447814941406, + "log_odds_ratio": -0.00021187691891100258, + "logits/chosen": -0.5241720676422119, + "logits/rejected": -0.5224738121032715, + "logps/chosen": -0.0022757581900805235, + "logps/rejected": -2.386683225631714, + "loss": 1.8749, + "nll_loss": 0.4687134027481079, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00022757584520149976, + "rewards/margins": 0.23844075202941895, + "rewards/rejected": -0.2386683225631714, + "step": 5240 + }, + { + "epoch": 3.62448132780083, + "grad_norm": 10.000246047973633, + "learning_rate": 3.5419548178884285e-05, + "log_odds_chosen": 10.252166748046875, + "log_odds_ratio": -0.00010917196050286293, + "logits/chosen": -0.5360872149467468, + "logits/rejected": -0.5863335132598877, + "logps/chosen": -0.00992940180003643, + "logps/rejected": -2.6332054138183594, + "loss": 2.0352, + "nll_loss": 0.5087817907333374, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009929401567205787, + "rewards/margins": 0.2623276114463806, + "rewards/rejected": -0.26332053542137146, + "step": 5241 + }, + { + "epoch": 3.6251728907330567, + "grad_norm": 118.457763671875, + "learning_rate": 3.541570616259413e-05, + "log_odds_chosen": 8.625368118286133, + "log_odds_ratio": -0.1461210548877716, + "logits/chosen": -0.7309638857841492, + "logits/rejected": -0.7860568165779114, + "logps/chosen": -0.004215087275952101, + "logps/rejected": -1.4883962869644165, + "loss": 1.5265, + "nll_loss": 0.3670060336589813, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00042150873923674226, + "rewards/margins": 0.148418128490448, + "rewards/rejected": -0.14883963763713837, + "step": 5242 + }, + { + "epoch": 3.6258644536652835, + "grad_norm": 17.151451110839844, + "learning_rate": 3.541186414630398e-05, + "log_odds_chosen": 9.89073657989502, + "log_odds_ratio": -0.0001892504806164652, + "logits/chosen": -0.41966360807418823, + "logits/rejected": -0.434902161359787, + "logps/chosen": -0.0042398408986628056, + "logps/rejected": -2.0369925498962402, + "loss": 1.3297, + "nll_loss": 0.3324141800403595, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00042398410732857883, + "rewards/margins": 0.20327524840831757, + "rewards/rejected": -0.2036992460489273, + "step": 5243 + }, + { + "epoch": 3.6265560165975104, + "grad_norm": 8.913837432861328, + "learning_rate": 3.5408022130013836e-05, + "log_odds_chosen": 10.53475570678711, + "log_odds_ratio": -7.836183067411184e-05, + "logits/chosen": -0.4833153486251831, + "logits/rejected": -0.5408565998077393, + "logps/chosen": -0.0027675952296704054, + "logps/rejected": -2.565074920654297, + "loss": 1.3758, + "nll_loss": 0.34394752979278564, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00027675952878780663, + "rewards/margins": 0.25623074173927307, + "rewards/rejected": -0.2565074861049652, + "step": 5244 + }, + { + "epoch": 3.627247579529737, + "grad_norm": 9.715182304382324, + "learning_rate": 3.540418011372368e-05, + "log_odds_chosen": 10.130058288574219, + "log_odds_ratio": -7.551023736596107e-05, + "logits/chosen": -0.8627445697784424, + "logits/rejected": -0.9407069087028503, + "logps/chosen": -0.00020197762933094054, + "logps/rejected": -1.7689321041107178, + "loss": 1.3774, + "nll_loss": 0.3443450331687927, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0197761841700412e-05, + "rewards/margins": 0.17687302827835083, + "rewards/rejected": -0.1768932342529297, + "step": 5245 + }, + { + "epoch": 3.627939142461964, + "grad_norm": 9.587575912475586, + "learning_rate": 3.5400338097433534e-05, + "log_odds_chosen": 8.340509414672852, + "log_odds_ratio": -0.11384254693984985, + "logits/chosen": -0.6631177663803101, + "logits/rejected": -0.7274695634841919, + "logps/chosen": -0.026078490540385246, + "logps/rejected": -1.0123172998428345, + "loss": 1.9941, + "nll_loss": 0.4871327877044678, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002607849193736911, + "rewards/margins": 0.09862387180328369, + "rewards/rejected": -0.10123172402381897, + "step": 5246 + }, + { + "epoch": 3.628630705394191, + "grad_norm": 11.814026832580566, + "learning_rate": 3.5396496081143386e-05, + "log_odds_chosen": 8.81454849243164, + "log_odds_ratio": -0.00029346495284698904, + "logits/chosen": -0.5719044804573059, + "logits/rejected": -0.6320205330848694, + "logps/chosen": -0.0002783064846880734, + "logps/rejected": -0.9145975112915039, + "loss": 2.0516, + "nll_loss": 0.5128742456436157, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.783064883260522e-05, + "rewards/margins": 0.09143192321062088, + "rewards/rejected": -0.0914597436785698, + "step": 5247 + }, + { + "epoch": 3.6293222683264177, + "grad_norm": 9.833765983581543, + "learning_rate": 3.539265406485324e-05, + "log_odds_chosen": 9.416544914245605, + "log_odds_ratio": -0.00028793158708140254, + "logits/chosen": -0.49396389722824097, + "logits/rejected": -0.5152664184570312, + "logps/chosen": -0.00047146857832558453, + "logps/rejected": -1.8146495819091797, + "loss": 1.5623, + "nll_loss": 0.39054417610168457, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.714686292572878e-05, + "rewards/margins": 0.1814178228378296, + "rewards/rejected": -0.18146497011184692, + "step": 5248 + }, + { + "epoch": 3.6300138312586445, + "grad_norm": 10.226951599121094, + "learning_rate": 3.5388812048563084e-05, + "log_odds_chosen": 8.603385925292969, + "log_odds_ratio": -0.0006840950809419155, + "logits/chosen": -0.2722872793674469, + "logits/rejected": -0.33161452412605286, + "logps/chosen": -0.0016118658240884542, + "logps/rejected": -1.2224658727645874, + "loss": 1.727, + "nll_loss": 0.43167710304260254, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016118655912578106, + "rewards/margins": 0.12208539992570877, + "rewards/rejected": -0.12224658578634262, + "step": 5249 + }, + { + "epoch": 3.6307053941908713, + "grad_norm": 8.28382396697998, + "learning_rate": 3.5384970032272943e-05, + "log_odds_chosen": 9.789243698120117, + "log_odds_ratio": -0.0009390619234181941, + "logits/chosen": -0.7724297046661377, + "logits/rejected": -0.782783031463623, + "logps/chosen": -0.0007653178181499243, + "logps/rejected": -1.5597801208496094, + "loss": 1.3569, + "nll_loss": 0.33912599086761475, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.653178181499243e-05, + "rewards/margins": 0.1559014618396759, + "rewards/rejected": -0.1559779942035675, + "step": 5250 + }, + { + "epoch": 3.631396957123098, + "grad_norm": 6.966404914855957, + "learning_rate": 3.538112801598279e-05, + "log_odds_chosen": 10.608278274536133, + "log_odds_ratio": -8.463065751129761e-05, + "logits/chosen": -0.28429800271987915, + "logits/rejected": -0.3238435387611389, + "logps/chosen": -0.00016617128858342767, + "logps/rejected": -2.1215450763702393, + "loss": 1.1423, + "nll_loss": 0.2855673134326935, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.661713031353429e-05, + "rewards/margins": 0.21213790774345398, + "rewards/rejected": -0.21215450763702393, + "step": 5251 + }, + { + "epoch": 3.632088520055325, + "grad_norm": 7.324312210083008, + "learning_rate": 3.537728599969264e-05, + "log_odds_chosen": 10.257405281066895, + "log_odds_ratio": -7.262609869940206e-05, + "logits/chosen": -0.4230521321296692, + "logits/rejected": -0.45840829610824585, + "logps/chosen": -0.009859025478363037, + "logps/rejected": -2.127150535583496, + "loss": 1.0872, + "nll_loss": 0.27178269624710083, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009859026176854968, + "rewards/margins": 0.21172913908958435, + "rewards/rejected": -0.2127150595188141, + "step": 5252 + }, + { + "epoch": 3.632780082987552, + "grad_norm": 7.544483184814453, + "learning_rate": 3.5373443983402494e-05, + "log_odds_chosen": 9.489943504333496, + "log_odds_ratio": -0.0001858835166785866, + "logits/chosen": -0.4171960949897766, + "logits/rejected": -0.4828304052352905, + "logps/chosen": -0.0004456111346371472, + "logps/rejected": -1.377410888671875, + "loss": 1.0404, + "nll_loss": 0.26008322834968567, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.456111491890624e-05, + "rewards/margins": 0.1376965343952179, + "rewards/rejected": -0.1377410888671875, + "step": 5253 + }, + { + "epoch": 3.6334716459197787, + "grad_norm": 12.763961791992188, + "learning_rate": 3.536960196711234e-05, + "log_odds_chosen": 10.788370132446289, + "log_odds_ratio": -3.103794006165117e-05, + "logits/chosen": -0.693131148815155, + "logits/rejected": -0.8062441945075989, + "logps/chosen": -0.0004897600738331676, + "logps/rejected": -2.7318973541259766, + "loss": 1.6658, + "nll_loss": 0.41644513607025146, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.897600592812523e-05, + "rewards/margins": 0.2731407880783081, + "rewards/rejected": -0.2731897532939911, + "step": 5254 + }, + { + "epoch": 3.6341632088520055, + "grad_norm": 6.80309534072876, + "learning_rate": 3.536575995082219e-05, + "log_odds_chosen": 10.249557495117188, + "log_odds_ratio": -0.00013425902579911053, + "logits/chosen": -0.42770153284072876, + "logits/rejected": -0.4392857551574707, + "logps/chosen": -0.00031385323381982744, + "logps/rejected": -1.693392038345337, + "loss": 1.2865, + "nll_loss": 0.32160404324531555, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.138531974400394e-05, + "rewards/margins": 0.16930781304836273, + "rewards/rejected": -0.16933920979499817, + "step": 5255 + }, + { + "epoch": 3.6348547717842323, + "grad_norm": 19.48051643371582, + "learning_rate": 3.5361917934532045e-05, + "log_odds_chosen": 9.017301559448242, + "log_odds_ratio": -0.013373545370995998, + "logits/chosen": -0.5854345560073853, + "logits/rejected": -0.5446378588676453, + "logps/chosen": -0.04659513384103775, + "logps/rejected": -2.5437159538269043, + "loss": 2.0333, + "nll_loss": 0.5069827437400818, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00465951394289732, + "rewards/margins": 0.24971207976341248, + "rewards/rejected": -0.25437161326408386, + "step": 5256 + }, + { + "epoch": 3.635546334716459, + "grad_norm": 15.161421775817871, + "learning_rate": 3.53580759182419e-05, + "log_odds_chosen": 8.954034805297852, + "log_odds_ratio": -0.00230272114276886, + "logits/chosen": -0.5699727535247803, + "logits/rejected": -0.5900068283081055, + "logps/chosen": -0.00815976969897747, + "logps/rejected": -1.8102946281433105, + "loss": 2.0104, + "nll_loss": 0.5023807287216187, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008159770513884723, + "rewards/margins": 0.18021351099014282, + "rewards/rejected": -0.18102948367595673, + "step": 5257 + }, + { + "epoch": 3.636237897648686, + "grad_norm": 14.364946365356445, + "learning_rate": 3.535423390195174e-05, + "log_odds_chosen": 8.934064865112305, + "log_odds_ratio": -0.00522011611610651, + "logits/chosen": -0.47142425179481506, + "logits/rejected": -0.5534060597419739, + "logps/chosen": -0.01788080856204033, + "logps/rejected": -2.318324089050293, + "loss": 1.8643, + "nll_loss": 0.46554601192474365, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017880809027701616, + "rewards/margins": 0.23004432022571564, + "rewards/rejected": -0.23183239996433258, + "step": 5258 + }, + { + "epoch": 3.636929460580913, + "grad_norm": 4.957000255584717, + "learning_rate": 3.53503918856616e-05, + "log_odds_chosen": 9.00587272644043, + "log_odds_ratio": -0.03742096573114395, + "logits/chosen": -0.2998158037662506, + "logits/rejected": -0.34346118569374084, + "logps/chosen": -0.01705634780228138, + "logps/rejected": -1.776115894317627, + "loss": 1.3161, + "nll_loss": 0.3252926766872406, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017056346405297518, + "rewards/margins": 0.17590594291687012, + "rewards/rejected": -0.1776115894317627, + "step": 5259 + }, + { + "epoch": 3.6376210235131397, + "grad_norm": 7.9335808753967285, + "learning_rate": 3.534654986937145e-05, + "log_odds_chosen": 9.278167724609375, + "log_odds_ratio": -0.00018974630802404135, + "logits/chosen": -0.33290767669677734, + "logits/rejected": -0.3825133144855499, + "logps/chosen": -0.005465247668325901, + "logps/rejected": -1.8166536092758179, + "loss": 1.3369, + "nll_loss": 0.33420246839523315, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000546524825040251, + "rewards/margins": 0.18111884593963623, + "rewards/rejected": -0.1816653609275818, + "step": 5260 + }, + { + "epoch": 3.6383125864453665, + "grad_norm": 5.281066417694092, + "learning_rate": 3.53427078530813e-05, + "log_odds_chosen": 10.223286628723145, + "log_odds_ratio": -8.183442696463317e-05, + "logits/chosen": -0.5100609660148621, + "logits/rejected": -0.5995121002197266, + "logps/chosen": -0.0002764645905699581, + "logps/rejected": -1.9925496578216553, + "loss": 1.8219, + "nll_loss": 0.45547908544540405, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.764645978459157e-05, + "rewards/margins": 0.19922731816768646, + "rewards/rejected": -0.19925497472286224, + "step": 5261 + }, + { + "epoch": 3.6390041493775933, + "grad_norm": 12.383803367614746, + "learning_rate": 3.533886583679115e-05, + "log_odds_chosen": 11.8496732711792, + "log_odds_ratio": -1.212448114529252e-05, + "logits/chosen": -0.6695747971534729, + "logits/rejected": -0.7234416007995605, + "logps/chosen": -0.0002846869465429336, + "logps/rejected": -3.2199788093566895, + "loss": 1.571, + "nll_loss": 0.3927599787712097, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8468695745687e-05, + "rewards/margins": 0.3219693899154663, + "rewards/rejected": -0.32199788093566895, + "step": 5262 + }, + { + "epoch": 3.63969571230982, + "grad_norm": 8.492502212524414, + "learning_rate": 3.5335023820501e-05, + "log_odds_chosen": 8.009116172790527, + "log_odds_ratio": -0.0038440132047981024, + "logits/chosen": -0.8346610069274902, + "logits/rejected": -0.8341476321220398, + "logps/chosen": -0.0035278652794659138, + "logps/rejected": -1.413739800453186, + "loss": 1.9024, + "nll_loss": 0.47520437836647034, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00035278653376735747, + "rewards/margins": 0.14102119207382202, + "rewards/rejected": -0.14137396216392517, + "step": 5263 + }, + { + "epoch": 3.640387275242047, + "grad_norm": 7.315945148468018, + "learning_rate": 3.533118180421085e-05, + "log_odds_chosen": 10.510017395019531, + "log_odds_ratio": -4.9991445848718286e-05, + "logits/chosen": -0.5463863611221313, + "logits/rejected": -0.6001462936401367, + "logps/chosen": -0.0001689201162662357, + "logps/rejected": -1.7600739002227783, + "loss": 1.3571, + "nll_loss": 0.3392818868160248, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6892012354219332e-05, + "rewards/margins": 0.1759905070066452, + "rewards/rejected": -0.17600739002227783, + "step": 5264 + }, + { + "epoch": 3.641078838174274, + "grad_norm": 13.64138126373291, + "learning_rate": 3.53273397879207e-05, + "log_odds_chosen": 9.468687057495117, + "log_odds_ratio": -0.00028739694971591234, + "logits/chosen": -0.36536845564842224, + "logits/rejected": -0.46932682394981384, + "logps/chosen": -0.009552651084959507, + "logps/rejected": -2.0713751316070557, + "loss": 1.5895, + "nll_loss": 0.39733579754829407, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000955265189986676, + "rewards/margins": 0.20618225634098053, + "rewards/rejected": -0.20713752508163452, + "step": 5265 + }, + { + "epoch": 3.6417704011065006, + "grad_norm": 6.013581275939941, + "learning_rate": 3.5323497771630555e-05, + "log_odds_chosen": 10.099183082580566, + "log_odds_ratio": -0.00015212551807053387, + "logits/chosen": -0.6626241207122803, + "logits/rejected": -0.7370970249176025, + "logps/chosen": -0.0008881157846190035, + "logps/rejected": -2.455702781677246, + "loss": 1.099, + "nll_loss": 0.2747448682785034, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.881157555151731e-05, + "rewards/margins": 0.2454814463853836, + "rewards/rejected": -0.24557027220726013, + "step": 5266 + }, + { + "epoch": 3.6424619640387275, + "grad_norm": 17.372495651245117, + "learning_rate": 3.53196557553404e-05, + "log_odds_chosen": 11.318941116333008, + "log_odds_ratio": -2.0537449017865583e-05, + "logits/chosen": -0.6090898513793945, + "logits/rejected": -0.7666717171669006, + "logps/chosen": -0.0004227885219734162, + "logps/rejected": -3.2152180671691895, + "loss": 1.64, + "nll_loss": 0.4099968671798706, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.22788507421501e-05, + "rewards/margins": 0.32147955894470215, + "rewards/rejected": -0.3215217888355255, + "step": 5267 + }, + { + "epoch": 3.6431535269709543, + "grad_norm": 7.159913063049316, + "learning_rate": 3.531581373905026e-05, + "log_odds_chosen": 7.538683891296387, + "log_odds_ratio": -0.08233918249607086, + "logits/chosen": -0.4028078615665436, + "logits/rejected": -0.41910237073898315, + "logps/chosen": -0.02370680682361126, + "logps/rejected": -1.4719009399414062, + "loss": 0.9625, + "nll_loss": 0.23239757120609283, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0023706809151917696, + "rewards/margins": 0.14481940865516663, + "rewards/rejected": -0.14719009399414062, + "step": 5268 + }, + { + "epoch": 3.643845089903181, + "grad_norm": 11.902508735656738, + "learning_rate": 3.5311971722760106e-05, + "log_odds_chosen": 10.27293872833252, + "log_odds_ratio": -0.005822064820677042, + "logits/chosen": -0.9338794946670532, + "logits/rejected": -1.0018417835235596, + "logps/chosen": -0.028985779732465744, + "logps/rejected": -2.964543342590332, + "loss": 1.3907, + "nll_loss": 0.3471011221408844, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002898578066378832, + "rewards/margins": 0.29355576634407043, + "rewards/rejected": -0.2964543402194977, + "step": 5269 + }, + { + "epoch": 3.644536652835408, + "grad_norm": 9.203103065490723, + "learning_rate": 3.530812970646996e-05, + "log_odds_chosen": 9.439620971679688, + "log_odds_ratio": -0.0022923145443201065, + "logits/chosen": -0.7063536047935486, + "logits/rejected": -0.8596088290214539, + "logps/chosen": -0.03340257331728935, + "logps/rejected": -2.259183645248413, + "loss": 1.7079, + "nll_loss": 0.4267502725124359, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0033402573317289352, + "rewards/margins": 0.22257809340953827, + "rewards/rejected": -0.22591835260391235, + "step": 5270 + }, + { + "epoch": 3.645228215767635, + "grad_norm": 9.794646263122559, + "learning_rate": 3.530428769017981e-05, + "log_odds_chosen": 9.5487060546875, + "log_odds_ratio": -0.0002214064879808575, + "logits/chosen": -0.6062708497047424, + "logits/rejected": -0.6592881679534912, + "logps/chosen": -0.0003873534733429551, + "logps/rejected": -1.6813409328460693, + "loss": 1.4108, + "nll_loss": 0.3526845872402191, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.873534660669975e-05, + "rewards/margins": 0.16809535026550293, + "rewards/rejected": -0.16813409328460693, + "step": 5271 + }, + { + "epoch": 3.6459197786998616, + "grad_norm": 6.463134765625, + "learning_rate": 3.5300445673889656e-05, + "log_odds_chosen": 8.663908004760742, + "log_odds_ratio": -0.008357677608728409, + "logits/chosen": -0.45225560665130615, + "logits/rejected": -0.5104571580886841, + "logps/chosen": -0.007889281958341599, + "logps/rejected": -2.049224853515625, + "loss": 1.1875, + "nll_loss": 0.296050488948822, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007889281841926277, + "rewards/margins": 0.20413357019424438, + "rewards/rejected": -0.20492246747016907, + "step": 5272 + }, + { + "epoch": 3.6466113416320884, + "grad_norm": 11.775018692016602, + "learning_rate": 3.529660365759951e-05, + "log_odds_chosen": 10.624938011169434, + "log_odds_ratio": -7.90832273196429e-05, + "logits/chosen": -0.932185173034668, + "logits/rejected": -0.9303665161132812, + "logps/chosen": -0.0003103798080701381, + "logps/rejected": -2.3480958938598633, + "loss": 1.7896, + "nll_loss": 0.44740378856658936, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1037983717396855e-05, + "rewards/margins": 0.23477855324745178, + "rewards/rejected": -0.23480960726737976, + "step": 5273 + }, + { + "epoch": 3.6473029045643153, + "grad_norm": 10.549944877624512, + "learning_rate": 3.529276164130936e-05, + "log_odds_chosen": 10.02077865600586, + "log_odds_ratio": -0.00011639117292361334, + "logits/chosen": -0.9140655398368835, + "logits/rejected": -0.9599613547325134, + "logps/chosen": -0.00046777399256825447, + "logps/rejected": -2.150455951690674, + "loss": 1.932, + "nll_loss": 0.48298266530036926, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.677740071201697e-05, + "rewards/margins": 0.21499884128570557, + "rewards/rejected": -0.21504560112953186, + "step": 5274 + }, + { + "epoch": 3.647994467496542, + "grad_norm": 9.046510696411133, + "learning_rate": 3.5288919625019214e-05, + "log_odds_chosen": 9.285402297973633, + "log_odds_ratio": -0.0009156799060292542, + "logits/chosen": -0.4625971019268036, + "logits/rejected": -0.5015397071838379, + "logps/chosen": -0.0060931481420993805, + "logps/rejected": -2.2665536403656006, + "loss": 1.5615, + "nll_loss": 0.3902891278266907, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006093148258514702, + "rewards/margins": 0.22604604065418243, + "rewards/rejected": -0.22665534913539886, + "step": 5275 + }, + { + "epoch": 3.648686030428769, + "grad_norm": 11.310772895812988, + "learning_rate": 3.528507760872906e-05, + "log_odds_chosen": 10.605499267578125, + "log_odds_ratio": -4.1127186705125496e-05, + "logits/chosen": -0.566016674041748, + "logits/rejected": -0.6915289163589478, + "logps/chosen": -0.00027832394698634744, + "logps/rejected": -2.2766027450561523, + "loss": 1.3251, + "nll_loss": 0.3312658369541168, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7832395062432624e-05, + "rewards/margins": 0.22763243317604065, + "rewards/rejected": -0.22766026854515076, + "step": 5276 + }, + { + "epoch": 3.6493775933609958, + "grad_norm": 4.7569260597229, + "learning_rate": 3.528123559243892e-05, + "log_odds_chosen": 9.075827598571777, + "log_odds_ratio": -0.0006618571933358908, + "logits/chosen": -0.5907098054885864, + "logits/rejected": -0.6073621511459351, + "logps/chosen": -0.0007612211629748344, + "logps/rejected": -1.488851547241211, + "loss": 1.2438, + "nll_loss": 0.3108810484409332, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.612211629748344e-05, + "rewards/margins": 0.1488090455532074, + "rewards/rejected": -0.14888517558574677, + "step": 5277 + }, + { + "epoch": 3.6500691562932226, + "grad_norm": 8.418963432312012, + "learning_rate": 3.5277393576148764e-05, + "log_odds_chosen": 9.35635757446289, + "log_odds_ratio": -0.00023570825578644872, + "logits/chosen": -0.6955604553222656, + "logits/rejected": -0.7115451097488403, + "logps/chosen": -0.008286512456834316, + "logps/rejected": -2.2223093509674072, + "loss": 2.2737, + "nll_loss": 0.5684065818786621, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008286512456834316, + "rewards/margins": 0.22140227258205414, + "rewards/rejected": -0.222230926156044, + "step": 5278 + }, + { + "epoch": 3.6507607192254494, + "grad_norm": 8.238372802734375, + "learning_rate": 3.527355155985862e-05, + "log_odds_chosen": 9.846039772033691, + "log_odds_ratio": -0.00010984414257109165, + "logits/chosen": -0.6819452047348022, + "logits/rejected": -0.5744008421897888, + "logps/chosen": -0.003770021256059408, + "logps/rejected": -2.1398353576660156, + "loss": 1.162, + "nll_loss": 0.29048237204551697, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00037700211396440864, + "rewards/margins": 0.2136065512895584, + "rewards/rejected": -0.21398356556892395, + "step": 5279 + }, + { + "epoch": 3.6514522821576763, + "grad_norm": 8.050226211547852, + "learning_rate": 3.526970954356847e-05, + "log_odds_chosen": 9.431568145751953, + "log_odds_ratio": -0.0002600555890239775, + "logits/chosen": -0.6938271522521973, + "logits/rejected": -0.7513946890830994, + "logps/chosen": -0.0014918470988050103, + "logps/rejected": -2.1459484100341797, + "loss": 1.3419, + "nll_loss": 0.33545613288879395, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014918472152203321, + "rewards/margins": 0.21444565057754517, + "rewards/rejected": -0.21459482610225677, + "step": 5280 + }, + { + "epoch": 3.652143845089903, + "grad_norm": 9.627875328063965, + "learning_rate": 3.5265867527278315e-05, + "log_odds_chosen": 8.359371185302734, + "log_odds_ratio": -0.010650178417563438, + "logits/chosen": -0.6847175359725952, + "logits/rejected": -0.8081383109092712, + "logps/chosen": -0.00730957230553031, + "logps/rejected": -1.5649361610412598, + "loss": 0.9909, + "nll_loss": 0.24665333330631256, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007309572538360953, + "rewards/margins": 0.1557626575231552, + "rewards/rejected": -0.15649361908435822, + "step": 5281 + }, + { + "epoch": 3.65283540802213, + "grad_norm": 9.804635047912598, + "learning_rate": 3.526202551098817e-05, + "log_odds_chosen": 9.294599533081055, + "log_odds_ratio": -0.00021372217452153563, + "logits/chosen": -0.5657236576080322, + "logits/rejected": -0.7095679044723511, + "logps/chosen": -0.003741663182154298, + "logps/rejected": -2.19268798828125, + "loss": 1.2202, + "nll_loss": 0.30502238869667053, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00037416635314002633, + "rewards/margins": 0.21889464557170868, + "rewards/rejected": -0.219268798828125, + "step": 5282 + }, + { + "epoch": 3.6535269709543567, + "grad_norm": 8.655688285827637, + "learning_rate": 3.525818349469802e-05, + "log_odds_chosen": 10.956435203552246, + "log_odds_ratio": -2.724559817579575e-05, + "logits/chosen": -0.6136027574539185, + "logits/rejected": -0.6853924989700317, + "logps/chosen": -0.000356669828761369, + "logps/rejected": -2.442622184753418, + "loss": 1.5616, + "nll_loss": 0.39040645956993103, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5666980693349615e-05, + "rewards/margins": 0.24422653019428253, + "rewards/rejected": -0.2442622035741806, + "step": 5283 + }, + { + "epoch": 3.6542185338865836, + "grad_norm": 6.767275810241699, + "learning_rate": 3.525434147840787e-05, + "log_odds_chosen": 10.967052459716797, + "log_odds_ratio": -3.499734521028586e-05, + "logits/chosen": -0.2755697965621948, + "logits/rejected": -0.314625084400177, + "logps/chosen": -0.003400439629331231, + "logps/rejected": -2.71468186378479, + "loss": 1.586, + "nll_loss": 0.3965045213699341, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003400439745746553, + "rewards/margins": 0.2711281478404999, + "rewards/rejected": -0.2714681923389435, + "step": 5284 + }, + { + "epoch": 3.6549100968188104, + "grad_norm": 18.51266098022461, + "learning_rate": 3.525049946211772e-05, + "log_odds_chosen": 7.9354658126831055, + "log_odds_ratio": -0.3440208435058594, + "logits/chosen": -0.4980732798576355, + "logits/rejected": -0.5682406425476074, + "logps/chosen": -0.09362926334142685, + "logps/rejected": -2.1777169704437256, + "loss": 1.949, + "nll_loss": 0.4528387486934662, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00936292577534914, + "rewards/margins": 0.20840878784656525, + "rewards/rejected": -0.2177717089653015, + "step": 5285 + }, + { + "epoch": 3.6556016597510372, + "grad_norm": 10.901885032653809, + "learning_rate": 3.524665744582758e-05, + "log_odds_chosen": 10.048656463623047, + "log_odds_ratio": -9.540050814393908e-05, + "logits/chosen": -0.42753034830093384, + "logits/rejected": -0.4391542077064514, + "logps/chosen": -0.0019503405783325434, + "logps/rejected": -2.1295058727264404, + "loss": 1.5479, + "nll_loss": 0.3869664669036865, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019503405201248825, + "rewards/margins": 0.21275556087493896, + "rewards/rejected": -0.21295058727264404, + "step": 5286 + }, + { + "epoch": 3.656293222683264, + "grad_norm": 12.719574928283691, + "learning_rate": 3.524281542953742e-05, + "log_odds_chosen": 10.545530319213867, + "log_odds_ratio": -0.00034519375185482204, + "logits/chosen": -0.4994061589241028, + "logits/rejected": -0.5819936394691467, + "logps/chosen": -0.00015317212091758847, + "logps/rejected": -2.056387186050415, + "loss": 1.2105, + "nll_loss": 0.3026004135608673, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.531721318315249e-05, + "rewards/margins": 0.20562341809272766, + "rewards/rejected": -0.20563873648643494, + "step": 5287 + }, + { + "epoch": 3.656984785615491, + "grad_norm": 14.119704246520996, + "learning_rate": 3.5238973413247275e-05, + "log_odds_chosen": 10.87884521484375, + "log_odds_ratio": -5.273066199151799e-05, + "logits/chosen": -0.45117008686065674, + "logits/rejected": -0.5436999797821045, + "logps/chosen": -0.00025493474095128477, + "logps/rejected": -2.254000186920166, + "loss": 1.4113, + "nll_loss": 0.3528318703174591, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5493474822724238e-05, + "rewards/margins": 0.22537453472614288, + "rewards/rejected": -0.22540001571178436, + "step": 5288 + }, + { + "epoch": 3.6576763485477177, + "grad_norm": 5.6090474128723145, + "learning_rate": 3.523513139695713e-05, + "log_odds_chosen": 8.451699256896973, + "log_odds_ratio": -0.0005231672548688948, + "logits/chosen": -0.4306209087371826, + "logits/rejected": -0.47424978017807007, + "logps/chosen": -0.010371813550591469, + "logps/rejected": -2.286710023880005, + "loss": 1.3906, + "nll_loss": 0.3476030230522156, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010371813550591469, + "rewards/margins": 0.22763381898403168, + "rewards/rejected": -0.22867099940776825, + "step": 5289 + }, + { + "epoch": 3.6583679114799446, + "grad_norm": 9.015220642089844, + "learning_rate": 3.523128938066697e-05, + "log_odds_chosen": 10.912769317626953, + "log_odds_ratio": -2.5071371055673808e-05, + "logits/chosen": -0.4544152617454529, + "logits/rejected": -0.5538532733917236, + "logps/chosen": -0.00016627827426418662, + "logps/rejected": -2.1886746883392334, + "loss": 1.3752, + "nll_loss": 0.34378570318222046, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.662782597122714e-05, + "rewards/margins": 0.21885083615779877, + "rewards/rejected": -0.2188674807548523, + "step": 5290 + }, + { + "epoch": 3.6590594744121714, + "grad_norm": 8.339679718017578, + "learning_rate": 3.5227447364376826e-05, + "log_odds_chosen": 10.106023788452148, + "log_odds_ratio": -0.0002883929992094636, + "logits/chosen": -0.5477585792541504, + "logits/rejected": -0.7189350724220276, + "logps/chosen": -0.0016047836979851127, + "logps/rejected": -2.743410348892212, + "loss": 1.4152, + "nll_loss": 0.3537675440311432, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016047836106736213, + "rewards/margins": 0.2741805911064148, + "rewards/rejected": -0.27434107661247253, + "step": 5291 + }, + { + "epoch": 3.659751037344398, + "grad_norm": 7.859814643859863, + "learning_rate": 3.522360534808668e-05, + "log_odds_chosen": 9.92156982421875, + "log_odds_ratio": -0.0001442175853298977, + "logits/chosen": -0.4476546347141266, + "logits/rejected": -0.5333471894264221, + "logps/chosen": -0.00014952296623960137, + "logps/rejected": -1.2835972309112549, + "loss": 1.2927, + "nll_loss": 0.3231571912765503, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4952296623960137e-05, + "rewards/margins": 0.12834477424621582, + "rewards/rejected": -0.12835972011089325, + "step": 5292 + }, + { + "epoch": 3.660442600276625, + "grad_norm": 11.889843940734863, + "learning_rate": 3.521976333179653e-05, + "log_odds_chosen": 9.024433135986328, + "log_odds_ratio": -0.12432266771793365, + "logits/chosen": -0.3937477171421051, + "logits/rejected": -0.31476932764053345, + "logps/chosen": -0.03802483528852463, + "logps/rejected": -1.9400608539581299, + "loss": 0.8401, + "nll_loss": 0.19758589565753937, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0038024834357202053, + "rewards/margins": 0.19020359218120575, + "rewards/rejected": -0.1940060704946518, + "step": 5293 + }, + { + "epoch": 3.661134163208852, + "grad_norm": 9.2792387008667, + "learning_rate": 3.5215921315506376e-05, + "log_odds_chosen": 10.101093292236328, + "log_odds_ratio": -8.72776290634647e-05, + "logits/chosen": -0.8199343085289001, + "logits/rejected": -0.8904087543487549, + "logps/chosen": -0.0005541003774851561, + "logps/rejected": -2.0077261924743652, + "loss": 1.9961, + "nll_loss": 0.49901145696640015, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.541003338294104e-05, + "rewards/margins": 0.20071722567081451, + "rewards/rejected": -0.20077264308929443, + "step": 5294 + }, + { + "epoch": 3.6618257261410787, + "grad_norm": 4.678475379943848, + "learning_rate": 3.5212079299216236e-05, + "log_odds_chosen": 9.025158882141113, + "log_odds_ratio": -0.00037848821375519037, + "logits/chosen": -0.26122820377349854, + "logits/rejected": -0.22307443618774414, + "logps/chosen": -0.013361023738980293, + "logps/rejected": -1.7858524322509766, + "loss": 1.0168, + "nll_loss": 0.2541574537754059, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013361023738980293, + "rewards/margins": 0.17724913358688354, + "rewards/rejected": -0.1785852462053299, + "step": 5295 + }, + { + "epoch": 3.6625172890733055, + "grad_norm": 8.4315185546875, + "learning_rate": 3.520823728292608e-05, + "log_odds_chosen": 8.354340553283691, + "log_odds_ratio": -0.018137505277991295, + "logits/chosen": -0.343766450881958, + "logits/rejected": -0.3963284492492676, + "logps/chosen": -0.006356228142976761, + "logps/rejected": -1.510011911392212, + "loss": 1.3385, + "nll_loss": 0.33280879259109497, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006356228841468692, + "rewards/margins": 0.15036556124687195, + "rewards/rejected": -0.1510011851787567, + "step": 5296 + }, + { + "epoch": 3.6632088520055324, + "grad_norm": 14.743172645568848, + "learning_rate": 3.5204395266635934e-05, + "log_odds_chosen": 9.12619400024414, + "log_odds_ratio": -0.006320980843156576, + "logits/chosen": -0.48610758781433105, + "logits/rejected": -0.46463316679000854, + "logps/chosen": -0.002753177424892783, + "logps/rejected": -1.590240716934204, + "loss": 1.2201, + "nll_loss": 0.3043842315673828, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002753177541308105, + "rewards/margins": 0.1587487757205963, + "rewards/rejected": -0.15902407467365265, + "step": 5297 + }, + { + "epoch": 3.663900414937759, + "grad_norm": 8.207070350646973, + "learning_rate": 3.5200553250345786e-05, + "log_odds_chosen": 9.57107925415039, + "log_odds_ratio": -0.00014903413830325007, + "logits/chosen": -0.44907844066619873, + "logits/rejected": -0.5032300353050232, + "logps/chosen": -0.00021599276806227863, + "logps/rejected": -0.9402081370353699, + "loss": 1.4194, + "nll_loss": 0.3548300266265869, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1599278625217266e-05, + "rewards/margins": 0.0939992144703865, + "rewards/rejected": -0.09402081370353699, + "step": 5298 + }, + { + "epoch": 3.664591977869986, + "grad_norm": 5.6133856773376465, + "learning_rate": 3.519671123405563e-05, + "log_odds_chosen": 6.615942478179932, + "log_odds_ratio": -0.07902704179286957, + "logits/chosen": -0.703774094581604, + "logits/rejected": -0.6925557255744934, + "logps/chosen": -0.033892419189214706, + "logps/rejected": -1.7209391593933105, + "loss": 1.6216, + "nll_loss": 0.3975079655647278, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0033892421051859856, + "rewards/margins": 0.16870468854904175, + "rewards/rejected": -0.17209392786026, + "step": 5299 + }, + { + "epoch": 3.665283540802213, + "grad_norm": 15.447953224182129, + "learning_rate": 3.5192869217765484e-05, + "log_odds_chosen": 10.037712097167969, + "log_odds_ratio": -0.0002351927396375686, + "logits/chosen": -0.7943803071975708, + "logits/rejected": -0.8371597528457642, + "logps/chosen": -0.0004952938761562109, + "logps/rejected": -1.7647054195404053, + "loss": 2.1934, + "nll_loss": 0.5483275651931763, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9529389798408374e-05, + "rewards/margins": 0.17642101645469666, + "rewards/rejected": -0.1764705330133438, + "step": 5300 + }, + { + "epoch": 3.6659751037344397, + "grad_norm": 7.23792028427124, + "learning_rate": 3.518902720147534e-05, + "log_odds_chosen": 10.529439926147461, + "log_odds_ratio": -6.0410486184991896e-05, + "logits/chosen": -0.5384195446968079, + "logits/rejected": -0.5411019921302795, + "logps/chosen": -0.00020644822507165372, + "logps/rejected": -2.137907028198242, + "loss": 1.4403, + "nll_loss": 0.36006906628608704, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0644820324378088e-05, + "rewards/margins": 0.2137700617313385, + "rewards/rejected": -0.21379071474075317, + "step": 5301 + }, + { + "epoch": 3.6666666666666665, + "grad_norm": 9.008337020874023, + "learning_rate": 3.518518518518519e-05, + "log_odds_chosen": 9.11552619934082, + "log_odds_ratio": -0.00044547885772772133, + "logits/chosen": -0.8450495600700378, + "logits/rejected": -0.882699191570282, + "logps/chosen": -0.003004885744303465, + "logps/rejected": -1.6603273153305054, + "loss": 2.0394, + "nll_loss": 0.5097946524620056, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00030048860935494304, + "rewards/margins": 0.1657322496175766, + "rewards/rejected": -0.16603274643421173, + "step": 5302 + }, + { + "epoch": 3.6673582295988933, + "grad_norm": 12.148446083068848, + "learning_rate": 3.5181343168895035e-05, + "log_odds_chosen": 8.527297973632812, + "log_odds_ratio": -0.006359103135764599, + "logits/chosen": -0.7570721507072449, + "logits/rejected": -0.8277689218521118, + "logps/chosen": -0.004634576383978128, + "logps/rejected": -1.966159462928772, + "loss": 2.2884, + "nll_loss": 0.5714757442474365, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00046345763257704675, + "rewards/margins": 0.1961524933576584, + "rewards/rejected": -0.19661596417427063, + "step": 5303 + }, + { + "epoch": 3.66804979253112, + "grad_norm": 13.32793140411377, + "learning_rate": 3.5177501152604894e-05, + "log_odds_chosen": 8.763154983520508, + "log_odds_ratio": -0.0016999999061226845, + "logits/chosen": -0.773772120475769, + "logits/rejected": -0.8412440419197083, + "logps/chosen": -0.024526400491595268, + "logps/rejected": -1.7753115892410278, + "loss": 1.2816, + "nll_loss": 0.32022562623023987, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024526400957256556, + "rewards/margins": 0.17507854104042053, + "rewards/rejected": -0.1775311529636383, + "step": 5304 + }, + { + "epoch": 3.668741355463347, + "grad_norm": 9.776931762695312, + "learning_rate": 3.517365913631474e-05, + "log_odds_chosen": 7.944180488586426, + "log_odds_ratio": -0.04010344296693802, + "logits/chosen": -0.43044549226760864, + "logits/rejected": -0.4351937770843506, + "logps/chosen": -0.014615101739764214, + "logps/rejected": -1.7616087198257446, + "loss": 1.9609, + "nll_loss": 0.4862039387226105, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014615101972594857, + "rewards/margins": 0.17469936609268188, + "rewards/rejected": -0.17616087198257446, + "step": 5305 + }, + { + "epoch": 3.669432918395574, + "grad_norm": 12.19973373413086, + "learning_rate": 3.516981712002459e-05, + "log_odds_chosen": 9.850683212280273, + "log_odds_ratio": -0.0001669059129199013, + "logits/chosen": -0.9862415790557861, + "logits/rejected": -0.9785559177398682, + "logps/chosen": -0.0006409522611647844, + "logps/rejected": -1.5707862377166748, + "loss": 1.9491, + "nll_loss": 0.4872695207595825, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.409522029571235e-05, + "rewards/margins": 0.15701454877853394, + "rewards/rejected": -0.15707863867282867, + "step": 5306 + }, + { + "epoch": 3.6701244813278007, + "grad_norm": 8.544509887695312, + "learning_rate": 3.5165975103734445e-05, + "log_odds_chosen": 9.832954406738281, + "log_odds_ratio": -0.00016655519721098244, + "logits/chosen": -0.9121188521385193, + "logits/rejected": -0.9448140263557434, + "logps/chosen": -0.0003426902985665947, + "logps/rejected": -1.5457923412322998, + "loss": 0.9375, + "nll_loss": 0.23435595631599426, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.426902912906371e-05, + "rewards/margins": 0.15454496443271637, + "rewards/rejected": -0.15457923710346222, + "step": 5307 + }, + { + "epoch": 3.6708160442600275, + "grad_norm": 19.36949920654297, + "learning_rate": 3.516213308744429e-05, + "log_odds_chosen": 9.872499465942383, + "log_odds_ratio": -0.001567936153151095, + "logits/chosen": -0.6543141007423401, + "logits/rejected": -0.7318291068077087, + "logps/chosen": -0.007575335446745157, + "logps/rejected": -2.5318126678466797, + "loss": 2.4277, + "nll_loss": 0.6067792177200317, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007575335330329835, + "rewards/margins": 0.2524237334728241, + "rewards/rejected": -0.25318124890327454, + "step": 5308 + }, + { + "epoch": 3.6715076071922543, + "grad_norm": 13.251935958862305, + "learning_rate": 3.515829107115414e-05, + "log_odds_chosen": 10.378414154052734, + "log_odds_ratio": -3.7448902730830014e-05, + "logits/chosen": -0.716993510723114, + "logits/rejected": -0.7578259110450745, + "logps/chosen": -0.00018267772975377738, + "logps/rejected": -1.6985294818878174, + "loss": 1.3589, + "nll_loss": 0.33973079919815063, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8267772247781977e-05, + "rewards/margins": 0.1698346734046936, + "rewards/rejected": -0.16985295712947845, + "step": 5309 + }, + { + "epoch": 3.6721991701244816, + "grad_norm": 5.7452569007873535, + "learning_rate": 3.515444905486399e-05, + "log_odds_chosen": 8.658774375915527, + "log_odds_ratio": -0.000490661128424108, + "logits/chosen": -0.2783205509185791, + "logits/rejected": -0.3526964783668518, + "logps/chosen": -0.0036929536145180464, + "logps/rejected": -1.6045854091644287, + "loss": 2.5169, + "nll_loss": 0.6291677355766296, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003692953905556351, + "rewards/margins": 0.16008925437927246, + "rewards/rejected": -0.1604585349559784, + "step": 5310 + }, + { + "epoch": 3.6728907330567084, + "grad_norm": 8.994384765625, + "learning_rate": 3.515060703857385e-05, + "log_odds_chosen": 9.59211540222168, + "log_odds_ratio": -0.002721622120589018, + "logits/chosen": -0.9550088047981262, + "logits/rejected": -0.925793468952179, + "logps/chosen": -0.0007333287503570318, + "logps/rejected": -1.986412763595581, + "loss": 1.1019, + "nll_loss": 0.2752057909965515, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.333287794608623e-05, + "rewards/margins": 0.1985679566860199, + "rewards/rejected": -0.19864128530025482, + "step": 5311 + }, + { + "epoch": 3.6735822959889353, + "grad_norm": 9.866668701171875, + "learning_rate": 3.514676502228369e-05, + "log_odds_chosen": 9.764283180236816, + "log_odds_ratio": -0.0001480157079640776, + "logits/chosen": -0.6280882954597473, + "logits/rejected": -0.7089745998382568, + "logps/chosen": -0.007530787028372288, + "logps/rejected": -2.570647716522217, + "loss": 1.5201, + "nll_loss": 0.3800201714038849, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007530787261202931, + "rewards/margins": 0.25631168484687805, + "rewards/rejected": -0.2570647597312927, + "step": 5312 + }, + { + "epoch": 3.674273858921162, + "grad_norm": 10.664302825927734, + "learning_rate": 3.5142923005993546e-05, + "log_odds_chosen": 9.287500381469727, + "log_odds_ratio": -0.0013609788147732615, + "logits/chosen": -0.6665362119674683, + "logits/rejected": -0.7844340205192566, + "logps/chosen": -0.008134718984365463, + "logps/rejected": -1.9219590425491333, + "loss": 1.5939, + "nll_loss": 0.3983459770679474, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008134719682857394, + "rewards/margins": 0.19138242304325104, + "rewards/rejected": -0.19219589233398438, + "step": 5313 + }, + { + "epoch": 3.674965421853389, + "grad_norm": 7.119990348815918, + "learning_rate": 3.51390809897034e-05, + "log_odds_chosen": 10.26752758026123, + "log_odds_ratio": -4.403849015943706e-05, + "logits/chosen": -0.7129647135734558, + "logits/rejected": -0.726944088935852, + "logps/chosen": -0.000326203036820516, + "logps/rejected": -2.0272581577301025, + "loss": 1.9439, + "nll_loss": 0.48595935106277466, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.262030077166855e-05, + "rewards/margins": 0.20269319415092468, + "rewards/rejected": -0.20272579789161682, + "step": 5314 + }, + { + "epoch": 3.6756569847856158, + "grad_norm": 7.7854390144348145, + "learning_rate": 3.513523897341325e-05, + "log_odds_chosen": 8.846216201782227, + "log_odds_ratio": -0.03087105229496956, + "logits/chosen": -0.7045666575431824, + "logits/rejected": -0.7305500507354736, + "logps/chosen": -0.008291316218674183, + "logps/rejected": -1.5737732648849487, + "loss": 1.449, + "nll_loss": 0.35915958881378174, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008291316335089505, + "rewards/margins": 0.15654820203781128, + "rewards/rejected": -0.15737733244895935, + "step": 5315 + }, + { + "epoch": 3.6763485477178426, + "grad_norm": 9.7339448928833, + "learning_rate": 3.5131396957123096e-05, + "log_odds_chosen": 7.857017517089844, + "log_odds_ratio": -0.03638071566820145, + "logits/chosen": -0.9640352129936218, + "logits/rejected": -0.9572303295135498, + "logps/chosen": -0.03163313865661621, + "logps/rejected": -2.537631034851074, + "loss": 2.0547, + "nll_loss": 0.5100435018539429, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003163314191624522, + "rewards/margins": 0.25059980154037476, + "rewards/rejected": -0.2537631094455719, + "step": 5316 + }, + { + "epoch": 3.6770401106500694, + "grad_norm": 8.134242057800293, + "learning_rate": 3.512755494083295e-05, + "log_odds_chosen": 7.235983848571777, + "log_odds_ratio": -0.025236472487449646, + "logits/chosen": -0.3153616487979889, + "logits/rejected": -0.29091522097587585, + "logps/chosen": -0.01593020185828209, + "logps/rejected": -1.111178994178772, + "loss": 1.7571, + "nll_loss": 0.4367576837539673, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015930201625451446, + "rewards/margins": 0.10952487587928772, + "rewards/rejected": -0.1111178994178772, + "step": 5317 + }, + { + "epoch": 3.6777316735822962, + "grad_norm": 9.490833282470703, + "learning_rate": 3.51237129245428e-05, + "log_odds_chosen": 8.130983352661133, + "log_odds_ratio": -0.17193548381328583, + "logits/chosen": -0.7003574371337891, + "logits/rejected": -0.7082849144935608, + "logps/chosen": -0.0677189901471138, + "logps/rejected": -1.5908578634262085, + "loss": 1.9123, + "nll_loss": 0.4608832001686096, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006771899294108152, + "rewards/margins": 0.15231388807296753, + "rewards/rejected": -0.15908578038215637, + "step": 5318 + }, + { + "epoch": 3.678423236514523, + "grad_norm": 7.186295509338379, + "learning_rate": 3.511987090825265e-05, + "log_odds_chosen": 10.44674301147461, + "log_odds_ratio": -4.972759779775515e-05, + "logits/chosen": -0.6706852316856384, + "logits/rejected": -0.7189573049545288, + "logps/chosen": -0.0033246877137571573, + "logps/rejected": -2.9027318954467773, + "loss": 1.89, + "nll_loss": 0.4724842607975006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00033246877137571573, + "rewards/margins": 0.2899407148361206, + "rewards/rejected": -0.29027318954467773, + "step": 5319 + }, + { + "epoch": 3.67911479944675, + "grad_norm": 11.441291809082031, + "learning_rate": 3.5116028891962506e-05, + "log_odds_chosen": 9.759711265563965, + "log_odds_ratio": -0.0005217420402914286, + "logits/chosen": -0.547791063785553, + "logits/rejected": -0.6304538249969482, + "logps/chosen": -0.000919260666705668, + "logps/rejected": -1.9203557968139648, + "loss": 1.4694, + "nll_loss": 0.3673018217086792, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.192607103614137e-05, + "rewards/margins": 0.19194364547729492, + "rewards/rejected": -0.19203555583953857, + "step": 5320 + }, + { + "epoch": 3.6798063623789767, + "grad_norm": 9.163915634155273, + "learning_rate": 3.511218687567235e-05, + "log_odds_chosen": 9.769948959350586, + "log_odds_ratio": -0.0007256059325300157, + "logits/chosen": -0.5265632271766663, + "logits/rejected": -0.6233397126197815, + "logps/chosen": -0.007863366976380348, + "logps/rejected": -2.6629951000213623, + "loss": 1.5086, + "nll_loss": 0.3770705461502075, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007863366045057774, + "rewards/margins": 0.26551318168640137, + "rewards/rejected": -0.2662995457649231, + "step": 5321 + }, + { + "epoch": 3.6804979253112036, + "grad_norm": 12.884857177734375, + "learning_rate": 3.5108344859382204e-05, + "log_odds_chosen": 7.08580207824707, + "log_odds_ratio": -0.4825950860977173, + "logits/chosen": -0.9943395853042603, + "logits/rejected": -0.9396799802780151, + "logps/chosen": -0.3394123613834381, + "logps/rejected": -2.2305188179016113, + "loss": 1.8811, + "nll_loss": 0.4220207631587982, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.03394123539328575, + "rewards/margins": 0.1891106367111206, + "rewards/rejected": -0.22305189073085785, + "step": 5322 + }, + { + "epoch": 3.6811894882434304, + "grad_norm": 7.227943420410156, + "learning_rate": 3.5104502843092057e-05, + "log_odds_chosen": 8.04112434387207, + "log_odds_ratio": -0.015502391383051872, + "logits/chosen": -0.7972258925437927, + "logits/rejected": -0.7825276851654053, + "logps/chosen": -0.005595149472355843, + "logps/rejected": -1.288925051689148, + "loss": 1.648, + "nll_loss": 0.4104374945163727, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005595149705186486, + "rewards/margins": 0.1283329874277115, + "rewards/rejected": -0.12889249622821808, + "step": 5323 + }, + { + "epoch": 3.6818810511756572, + "grad_norm": 9.782866477966309, + "learning_rate": 3.510066082680191e-05, + "log_odds_chosen": 10.535016059875488, + "log_odds_ratio": -4.5212880650069565e-05, + "logits/chosen": -1.0320000648498535, + "logits/rejected": -1.047119140625, + "logps/chosen": -0.00029143691062927246, + "logps/rejected": -2.076714515686035, + "loss": 1.3569, + "nll_loss": 0.33922791481018066, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9143691790523008e-05, + "rewards/margins": 0.2076423019170761, + "rewards/rejected": -0.20767146348953247, + "step": 5324 + }, + { + "epoch": 3.682572614107884, + "grad_norm": 4.98908805847168, + "learning_rate": 3.5096818810511755e-05, + "log_odds_chosen": 8.161107063293457, + "log_odds_ratio": -0.004896479658782482, + "logits/chosen": -0.6709215044975281, + "logits/rejected": -0.6395055055618286, + "logps/chosen": -0.015280601568520069, + "logps/rejected": -1.730186939239502, + "loss": 1.1412, + "nll_loss": 0.2848084568977356, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001528060296550393, + "rewards/margins": 0.1714906245470047, + "rewards/rejected": -0.1730186939239502, + "step": 5325 + }, + { + "epoch": 3.683264177040111, + "grad_norm": 9.627045631408691, + "learning_rate": 3.509297679422161e-05, + "log_odds_chosen": 9.907687187194824, + "log_odds_ratio": -0.0009907097555696964, + "logits/chosen": -0.6797293424606323, + "logits/rejected": -0.7065413594245911, + "logps/chosen": -0.0013398650335147977, + "logps/rejected": -1.8299527168273926, + "loss": 1.2516, + "nll_loss": 0.31279319524765015, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013398649753071368, + "rewards/margins": 0.18286126852035522, + "rewards/rejected": -0.1829952597618103, + "step": 5326 + }, + { + "epoch": 3.6839557399723377, + "grad_norm": 10.69526195526123, + "learning_rate": 3.508913477793146e-05, + "log_odds_chosen": 10.115638732910156, + "log_odds_ratio": -0.0002958014083560556, + "logits/chosen": -0.7780247926712036, + "logits/rejected": -0.8033609390258789, + "logps/chosen": -0.0003876305709127337, + "logps/rejected": -1.706833004951477, + "loss": 1.9479, + "nll_loss": 0.48695021867752075, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8763060729252174e-05, + "rewards/margins": 0.17064453661441803, + "rewards/rejected": -0.17068329453468323, + "step": 5327 + }, + { + "epoch": 3.6846473029045645, + "grad_norm": 14.76516056060791, + "learning_rate": 3.5085292761641305e-05, + "log_odds_chosen": 11.188180923461914, + "log_odds_ratio": -2.028615926974453e-05, + "logits/chosen": -1.0329959392547607, + "logits/rejected": -1.0130128860473633, + "logps/chosen": -0.00010292732622474432, + "logps/rejected": -1.8463349342346191, + "loss": 1.7812, + "nll_loss": 0.4453083574771881, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0292733350070193e-05, + "rewards/margins": 0.18462321162223816, + "rewards/rejected": -0.18463349342346191, + "step": 5328 + }, + { + "epoch": 3.6853388658367914, + "grad_norm": 9.564327239990234, + "learning_rate": 3.5081450745351164e-05, + "log_odds_chosen": 9.434001922607422, + "log_odds_ratio": -0.0002744604425970465, + "logits/chosen": -0.5842224359512329, + "logits/rejected": -0.6542115211486816, + "logps/chosen": -0.0029525586869567633, + "logps/rejected": -1.8732268810272217, + "loss": 1.1858, + "nll_loss": 0.2964109182357788, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00029525585705414414, + "rewards/margins": 0.18702742457389832, + "rewards/rejected": -0.1873226761817932, + "step": 5329 + }, + { + "epoch": 3.686030428769018, + "grad_norm": 12.752079010009766, + "learning_rate": 3.507760872906101e-05, + "log_odds_chosen": 9.772260665893555, + "log_odds_ratio": -8.484002319164574e-05, + "logits/chosen": -0.6685524582862854, + "logits/rejected": -0.7067282199859619, + "logps/chosen": -0.0003044075274374336, + "logps/rejected": -1.6522518396377563, + "loss": 1.5685, + "nll_loss": 0.3921244442462921, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.044075310754124e-05, + "rewards/margins": 0.16519474983215332, + "rewards/rejected": -0.16522517800331116, + "step": 5330 + }, + { + "epoch": 3.686721991701245, + "grad_norm": 6.9672393798828125, + "learning_rate": 3.507376671277086e-05, + "log_odds_chosen": 8.436867713928223, + "log_odds_ratio": -0.0006808569887652993, + "logits/chosen": -0.5947157144546509, + "logits/rejected": -0.6862506866455078, + "logps/chosen": -0.0007226442685350776, + "logps/rejected": -1.2544167041778564, + "loss": 1.2813, + "nll_loss": 0.3202512860298157, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.226442539831623e-05, + "rewards/margins": 0.12536939978599548, + "rewards/rejected": -0.12544165551662445, + "step": 5331 + }, + { + "epoch": 3.687413554633472, + "grad_norm": 9.374478340148926, + "learning_rate": 3.5069924696480715e-05, + "log_odds_chosen": 9.65780258178711, + "log_odds_ratio": -0.004418676253408194, + "logits/chosen": -0.5301334857940674, + "logits/rejected": -0.588121235370636, + "logps/chosen": -0.09868014603853226, + "logps/rejected": -2.407459259033203, + "loss": 1.3604, + "nll_loss": 0.33965355157852173, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009868014603853226, + "rewards/margins": 0.23087790608406067, + "rewards/rejected": -0.2407459318637848, + "step": 5332 + }, + { + "epoch": 3.6881051175656987, + "grad_norm": 7.010161399841309, + "learning_rate": 3.506608268019057e-05, + "log_odds_chosen": 8.303940773010254, + "log_odds_ratio": -0.005083529744297266, + "logits/chosen": -0.7475804090499878, + "logits/rejected": -0.760705828666687, + "logps/chosen": -0.009333855472505093, + "logps/rejected": -1.4144235849380493, + "loss": 1.6738, + "nll_loss": 0.41793039441108704, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009333856287412345, + "rewards/margins": 0.1405089795589447, + "rewards/rejected": -0.14144235849380493, + "step": 5333 + }, + { + "epoch": 3.6887966804979255, + "grad_norm": 11.381121635437012, + "learning_rate": 3.506224066390041e-05, + "log_odds_chosen": 9.073360443115234, + "log_odds_ratio": -0.00032709480728954077, + "logits/chosen": -0.8671411275863647, + "logits/rejected": -0.9612331986427307, + "logps/chosen": -0.00035379567998461425, + "logps/rejected": -1.3636505603790283, + "loss": 1.458, + "nll_loss": 0.36446449160575867, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.537956945365295e-05, + "rewards/margins": 0.13632968068122864, + "rewards/rejected": -0.13636507093906403, + "step": 5334 + }, + { + "epoch": 3.6894882434301524, + "grad_norm": 10.14391803741455, + "learning_rate": 3.5058398647610266e-05, + "log_odds_chosen": 8.98252010345459, + "log_odds_ratio": -0.15078553557395935, + "logits/chosen": -0.7508845329284668, + "logits/rejected": -0.7607273459434509, + "logps/chosen": -0.019194740802049637, + "logps/rejected": -1.7505712509155273, + "loss": 1.5958, + "nll_loss": 0.3838688135147095, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.001919474103488028, + "rewards/margins": 0.17313764989376068, + "rewards/rejected": -0.17505714297294617, + "step": 5335 + }, + { + "epoch": 3.690179806362379, + "grad_norm": 11.787603378295898, + "learning_rate": 3.505455663132012e-05, + "log_odds_chosen": 9.963918685913086, + "log_odds_ratio": -0.0004981214297004044, + "logits/chosen": -1.0922781229019165, + "logits/rejected": -1.1762382984161377, + "logps/chosen": -0.001630566199310124, + "logps/rejected": -2.3342196941375732, + "loss": 1.3523, + "nll_loss": 0.3380275368690491, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016305662575177848, + "rewards/margins": 0.233258917927742, + "rewards/rejected": -0.2334219515323639, + "step": 5336 + }, + { + "epoch": 3.690871369294606, + "grad_norm": 11.271586418151855, + "learning_rate": 3.5050714615029964e-05, + "log_odds_chosen": 10.058823585510254, + "log_odds_ratio": -6.0240603488637134e-05, + "logits/chosen": -0.9258029460906982, + "logits/rejected": -1.0561065673828125, + "logps/chosen": -0.0003865394101012498, + "logps/rejected": -1.8324633836746216, + "loss": 1.7126, + "nll_loss": 0.4281406104564667, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8653943192912266e-05, + "rewards/margins": 0.1832076907157898, + "rewards/rejected": -0.18324634432792664, + "step": 5337 + }, + { + "epoch": 3.691562932226833, + "grad_norm": 9.628790855407715, + "learning_rate": 3.504687259873982e-05, + "log_odds_chosen": 9.142765045166016, + "log_odds_ratio": -0.0002933957439381629, + "logits/chosen": -0.6513609290122986, + "logits/rejected": -0.7071292400360107, + "logps/chosen": -0.0006643411470577121, + "logps/rejected": -1.5814200639724731, + "loss": 1.9233, + "nll_loss": 0.4807976484298706, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.643411325057968e-05, + "rewards/margins": 0.15807557106018066, + "rewards/rejected": -0.15814201533794403, + "step": 5338 + }, + { + "epoch": 3.6922544951590597, + "grad_norm": 9.518936157226562, + "learning_rate": 3.504303058244967e-05, + "log_odds_chosen": 9.003790855407715, + "log_odds_ratio": -0.001304905628785491, + "logits/chosen": -0.618826150894165, + "logits/rejected": -0.7828741073608398, + "logps/chosen": -0.0028582715895026922, + "logps/rejected": -1.3129936456680298, + "loss": 1.4019, + "nll_loss": 0.35033372044563293, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00028582714730873704, + "rewards/margins": 0.13101352751255035, + "rewards/rejected": -0.13129936158657074, + "step": 5339 + }, + { + "epoch": 3.6929460580912865, + "grad_norm": 8.120966911315918, + "learning_rate": 3.503918856615952e-05, + "log_odds_chosen": 8.923418998718262, + "log_odds_ratio": -0.0002836494822986424, + "logits/chosen": -0.7749639749526978, + "logits/rejected": -0.8973751068115234, + "logps/chosen": -0.003522332990542054, + "logps/rejected": -2.081045389175415, + "loss": 1.8535, + "nll_loss": 0.46334409713745117, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003522332990542054, + "rewards/margins": 0.2077522873878479, + "rewards/rejected": -0.20810453593730927, + "step": 5340 + }, + { + "epoch": 3.6936376210235133, + "grad_norm": 7.801592826843262, + "learning_rate": 3.5035346549869373e-05, + "log_odds_chosen": 9.398853302001953, + "log_odds_ratio": -0.0006219418719410896, + "logits/chosen": -0.431623637676239, + "logits/rejected": -0.42418909072875977, + "logps/chosen": -0.000501930364407599, + "logps/rejected": -1.5170776844024658, + "loss": 1.2517, + "nll_loss": 0.31287214159965515, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.019303716835566e-05, + "rewards/margins": 0.1516575962305069, + "rewards/rejected": -0.15170776844024658, + "step": 5341 + }, + { + "epoch": 3.69432918395574, + "grad_norm": 10.358515739440918, + "learning_rate": 3.5031504533579226e-05, + "log_odds_chosen": 9.936261177062988, + "log_odds_ratio": -0.00020719818712677807, + "logits/chosen": -0.7464555501937866, + "logits/rejected": -0.8372625708580017, + "logps/chosen": -0.0009494010009802878, + "logps/rejected": -2.646921396255493, + "loss": 1.5907, + "nll_loss": 0.3976495862007141, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.494010737398639e-05, + "rewards/margins": 0.26459717750549316, + "rewards/rejected": -0.26469212770462036, + "step": 5342 + }, + { + "epoch": 3.695020746887967, + "grad_norm": 15.795845985412598, + "learning_rate": 3.502766251728907e-05, + "log_odds_chosen": 10.614376068115234, + "log_odds_ratio": -0.00011300211190246046, + "logits/chosen": -0.9104953408241272, + "logits/rejected": -1.0255991220474243, + "logps/chosen": -0.00013850632240064442, + "logps/rejected": -2.071258306503296, + "loss": 2.1981, + "nll_loss": 0.5495221018791199, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3850632058165502e-05, + "rewards/margins": 0.20711196959018707, + "rewards/rejected": -0.20712582767009735, + "step": 5343 + }, + { + "epoch": 3.695712309820194, + "grad_norm": 10.562488555908203, + "learning_rate": 3.5023820500998924e-05, + "log_odds_chosen": 9.300116539001465, + "log_odds_ratio": -0.01639566384255886, + "logits/chosen": -0.6258934736251831, + "logits/rejected": -0.6757691502571106, + "logps/chosen": -0.025663437321782112, + "logps/rejected": -2.3334107398986816, + "loss": 2.4101, + "nll_loss": 0.6008975505828857, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0025663438718765974, + "rewards/margins": 0.23077471554279327, + "rewards/rejected": -0.2333410680294037, + "step": 5344 + }, + { + "epoch": 3.6964038727524207, + "grad_norm": 9.328142166137695, + "learning_rate": 3.5019978484708776e-05, + "log_odds_chosen": 7.483302593231201, + "log_odds_ratio": -0.036034103482961655, + "logits/chosen": -0.8576046824455261, + "logits/rejected": -0.8339859843254089, + "logps/chosen": -0.01491495966911316, + "logps/rejected": -1.15474271774292, + "loss": 1.957, + "nll_loss": 0.48564010858535767, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014914961066097021, + "rewards/margins": 0.11398278176784515, + "rewards/rejected": -0.11547426879405975, + "step": 5345 + }, + { + "epoch": 3.6970954356846475, + "grad_norm": 8.036946296691895, + "learning_rate": 3.501613646841863e-05, + "log_odds_chosen": 8.698389053344727, + "log_odds_ratio": -0.0007000649347901344, + "logits/chosen": -0.6593747138977051, + "logits/rejected": -0.7601386904716492, + "logps/chosen": -0.00904887355864048, + "logps/rejected": -1.7365727424621582, + "loss": 1.9866, + "nll_loss": 0.49658799171447754, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009048873907886446, + "rewards/margins": 0.17275238037109375, + "rewards/rejected": -0.17365726828575134, + "step": 5346 + }, + { + "epoch": 3.6977869986168743, + "grad_norm": 5.567675590515137, + "learning_rate": 3.501229445212848e-05, + "log_odds_chosen": 8.867687225341797, + "log_odds_ratio": -0.018153710290789604, + "logits/chosen": -0.6134251356124878, + "logits/rejected": -0.6851860284805298, + "logps/chosen": -0.005485460627824068, + "logps/rejected": -1.1283323764801025, + "loss": 1.0239, + "nll_loss": 0.2541605532169342, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005485460278578103, + "rewards/margins": 0.11228469014167786, + "rewards/rejected": -0.11283324658870697, + "step": 5347 + }, + { + "epoch": 3.698478561549101, + "grad_norm": 12.602523803710938, + "learning_rate": 3.500845243583833e-05, + "log_odds_chosen": 7.065537452697754, + "log_odds_ratio": -0.46093103289604187, + "logits/chosen": -0.28088217973709106, + "logits/rejected": -0.3088432252407074, + "logps/chosen": -0.12832608819007874, + "logps/rejected": -1.7279107570648193, + "loss": 1.9474, + "nll_loss": 0.44075435400009155, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012832608073949814, + "rewards/margins": 0.15995845198631287, + "rewards/rejected": -0.17279106378555298, + "step": 5348 + }, + { + "epoch": 3.699170124481328, + "grad_norm": 10.695401191711426, + "learning_rate": 3.500461041954818e-05, + "log_odds_chosen": 9.166481018066406, + "log_odds_ratio": -0.0002610564115457237, + "logits/chosen": -0.5230810642242432, + "logits/rejected": -0.5313901901245117, + "logps/chosen": -0.0008728259126655757, + "logps/rejected": -1.547613263130188, + "loss": 1.3375, + "nll_loss": 0.3343556523323059, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.728259854251519e-05, + "rewards/margins": 0.15467403829097748, + "rewards/rejected": -0.15476132929325104, + "step": 5349 + }, + { + "epoch": 3.699861687413555, + "grad_norm": 5.585293769836426, + "learning_rate": 3.500076840325803e-05, + "log_odds_chosen": 8.954833030700684, + "log_odds_ratio": -0.013094688765704632, + "logits/chosen": -0.5380806922912598, + "logits/rejected": -0.6527281403541565, + "logps/chosen": -0.004866013769060373, + "logps/rejected": -1.382778286933899, + "loss": 1.0659, + "nll_loss": 0.2651692032814026, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004866014060098678, + "rewards/margins": 0.1377912312746048, + "rewards/rejected": -0.1382778286933899, + "step": 5350 + }, + { + "epoch": 3.7005532503457816, + "grad_norm": 11.971247673034668, + "learning_rate": 3.4996926386967884e-05, + "log_odds_chosen": 9.954809188842773, + "log_odds_ratio": -0.006246030330657959, + "logits/chosen": -0.5458557605743408, + "logits/rejected": -0.5307517647743225, + "logps/chosen": -0.002717132680118084, + "logps/rejected": -1.9100207090377808, + "loss": 1.3756, + "nll_loss": 0.34327322244644165, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00027171324472874403, + "rewards/margins": 0.19073036313056946, + "rewards/rejected": -0.19100208580493927, + "step": 5351 + }, + { + "epoch": 3.7012448132780085, + "grad_norm": 11.080292701721191, + "learning_rate": 3.499308437067773e-05, + "log_odds_chosen": 6.902338981628418, + "log_odds_ratio": -0.15426041185855865, + "logits/chosen": -0.5036279559135437, + "logits/rejected": -0.5924159288406372, + "logps/chosen": -0.051904480904340744, + "logps/rejected": -1.4585072994232178, + "loss": 2.1008, + "nll_loss": 0.5097784399986267, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0051904479041695595, + "rewards/margins": 0.14066028594970703, + "rewards/rejected": -0.14585073292255402, + "step": 5352 + }, + { + "epoch": 3.7019363762102353, + "grad_norm": 9.899896621704102, + "learning_rate": 3.498924235438758e-05, + "log_odds_chosen": 9.366655349731445, + "log_odds_ratio": -0.00028243596898391843, + "logits/chosen": -0.5405033826828003, + "logits/rejected": -0.5533077716827393, + "logps/chosen": -0.00033929411438293755, + "logps/rejected": -1.1259949207305908, + "loss": 1.29, + "nll_loss": 0.32247021794319153, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.392941289348528e-05, + "rewards/margins": 0.11256556957960129, + "rewards/rejected": -0.11259949207305908, + "step": 5353 + }, + { + "epoch": 3.702627939142462, + "grad_norm": 8.126338005065918, + "learning_rate": 3.4985400338097435e-05, + "log_odds_chosen": 9.396369934082031, + "log_odds_ratio": -0.0006253690226003528, + "logits/chosen": -0.4254477024078369, + "logits/rejected": -0.3807203769683838, + "logps/chosen": -0.0022061774507164955, + "logps/rejected": -1.7364078760147095, + "loss": 1.6621, + "nll_loss": 0.41545745730400085, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00022061774507164955, + "rewards/margins": 0.17342017590999603, + "rewards/rejected": -0.17364078760147095, + "step": 5354 + }, + { + "epoch": 3.703319502074689, + "grad_norm": 11.008198738098145, + "learning_rate": 3.498155832180729e-05, + "log_odds_chosen": 8.941949844360352, + "log_odds_ratio": -0.014349130913615227, + "logits/chosen": -0.3872235119342804, + "logits/rejected": -0.5136542320251465, + "logps/chosen": -0.015712972730398178, + "logps/rejected": -2.3724722862243652, + "loss": 1.7264, + "nll_loss": 0.43016284704208374, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015712971799075603, + "rewards/margins": 0.23567596077919006, + "rewards/rejected": -0.2372472584247589, + "step": 5355 + }, + { + "epoch": 3.704011065006916, + "grad_norm": 7.446681976318359, + "learning_rate": 3.497771630551714e-05, + "log_odds_chosen": 9.698359489440918, + "log_odds_ratio": -0.0001687395852059126, + "logits/chosen": -0.5469300746917725, + "logits/rejected": -0.6297659873962402, + "logps/chosen": -0.0029810178093612194, + "logps/rejected": -1.8616513013839722, + "loss": 0.9211, + "nll_loss": 0.23024982213974, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00029810177511535585, + "rewards/margins": 0.185867041349411, + "rewards/rejected": -0.18616515398025513, + "step": 5356 + }, + { + "epoch": 3.7047026279391426, + "grad_norm": 9.188549995422363, + "learning_rate": 3.4973874289226985e-05, + "log_odds_chosen": 9.125092506408691, + "log_odds_ratio": -0.00030094856629148126, + "logits/chosen": -0.8525630235671997, + "logits/rejected": -0.9092421531677246, + "logps/chosen": -0.0007442575879395008, + "logps/rejected": -1.5756902694702148, + "loss": 1.4686, + "nll_loss": 0.3671131432056427, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.442575588356704e-05, + "rewards/margins": 0.15749460458755493, + "rewards/rejected": -0.1575690358877182, + "step": 5357 + }, + { + "epoch": 3.7053941908713695, + "grad_norm": 7.4708051681518555, + "learning_rate": 3.497003227293684e-05, + "log_odds_chosen": 10.437641143798828, + "log_odds_ratio": -0.00015635325689800084, + "logits/chosen": -0.5352045297622681, + "logits/rejected": -0.5188069939613342, + "logps/chosen": -0.0006159612676128745, + "logps/rejected": -2.369760274887085, + "loss": 1.3847, + "nll_loss": 0.34616681933403015, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.159612530609593e-05, + "rewards/margins": 0.23691445589065552, + "rewards/rejected": -0.2369760274887085, + "step": 5358 + }, + { + "epoch": 3.7060857538035963, + "grad_norm": 12.068976402282715, + "learning_rate": 3.496619025664669e-05, + "log_odds_chosen": 10.090932846069336, + "log_odds_ratio": -0.00021271216974128038, + "logits/chosen": -0.7529656887054443, + "logits/rejected": -0.7240673303604126, + "logps/chosen": -0.000268957024673, + "logps/rejected": -1.9103636741638184, + "loss": 1.8827, + "nll_loss": 0.4706517457962036, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6895704650087282e-05, + "rewards/margins": 0.19100944697856903, + "rewards/rejected": -0.19103635847568512, + "step": 5359 + }, + { + "epoch": 3.706777316735823, + "grad_norm": 6.470218658447266, + "learning_rate": 3.496234824035654e-05, + "log_odds_chosen": 8.61813735961914, + "log_odds_ratio": -0.09870389848947525, + "logits/chosen": -0.313279390335083, + "logits/rejected": -0.2931751608848572, + "logps/chosen": -0.027196036651730537, + "logps/rejected": -1.6684269905090332, + "loss": 2.0358, + "nll_loss": 0.4990912973880768, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002719603944569826, + "rewards/margins": 0.16412308812141418, + "rewards/rejected": -0.16684269905090332, + "step": 5360 + }, + { + "epoch": 3.70746887966805, + "grad_norm": 6.061364650726318, + "learning_rate": 3.495850622406639e-05, + "log_odds_chosen": 7.981203079223633, + "log_odds_ratio": -0.01701802760362625, + "logits/chosen": -0.5911697149276733, + "logits/rejected": -0.6583446264266968, + "logps/chosen": -0.011462513357400894, + "logps/rejected": -1.4020397663116455, + "loss": 2.1631, + "nll_loss": 0.5390677452087402, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011462513357400894, + "rewards/margins": 0.1390577107667923, + "rewards/rejected": -0.14020396769046783, + "step": 5361 + }, + { + "epoch": 3.7081604426002768, + "grad_norm": 6.6160888671875, + "learning_rate": 3.495466420777625e-05, + "log_odds_chosen": 9.713098526000977, + "log_odds_ratio": -0.0001639363035792485, + "logits/chosen": -0.7189328670501709, + "logits/rejected": -0.7098298072814941, + "logps/chosen": -0.004341424442827702, + "logps/rejected": -2.150063991546631, + "loss": 1.6854, + "nll_loss": 0.42134472727775574, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00043414239189587533, + "rewards/margins": 0.21457228064537048, + "rewards/rejected": -0.21500641107559204, + "step": 5362 + }, + { + "epoch": 3.7088520055325036, + "grad_norm": 7.108826160430908, + "learning_rate": 3.495082219148609e-05, + "log_odds_chosen": 9.363826751708984, + "log_odds_ratio": -0.0003894947003573179, + "logits/chosen": -0.708694338798523, + "logits/rejected": -0.7024210095405579, + "logps/chosen": -0.0006412908551283181, + "logps/rejected": -1.5811138153076172, + "loss": 1.0492, + "nll_loss": 0.2622511088848114, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.412908987840638e-05, + "rewards/margins": 0.15804724395275116, + "rewards/rejected": -0.15811137855052948, + "step": 5363 + }, + { + "epoch": 3.7095435684647304, + "grad_norm": 9.981987953186035, + "learning_rate": 3.4946980175195946e-05, + "log_odds_chosen": 8.762392044067383, + "log_odds_ratio": -0.11463475972414017, + "logits/chosen": -0.645453929901123, + "logits/rejected": -0.708638608455658, + "logps/chosen": -0.017611129209399223, + "logps/rejected": -1.5133848190307617, + "loss": 1.9759, + "nll_loss": 0.4825035035610199, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0017611129442229867, + "rewards/margins": 0.14957737922668457, + "rewards/rejected": -0.15133848786354065, + "step": 5364 + }, + { + "epoch": 3.7102351313969573, + "grad_norm": 6.762959957122803, + "learning_rate": 3.49431381589058e-05, + "log_odds_chosen": 8.865896224975586, + "log_odds_ratio": -0.0009606637177057564, + "logits/chosen": -0.43857601284980774, + "logits/rejected": -0.5364105105400085, + "logps/chosen": -0.043151963502168655, + "logps/rejected": -2.8622305393218994, + "loss": 1.1842, + "nll_loss": 0.2959616184234619, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004315196070820093, + "rewards/margins": 0.281907856464386, + "rewards/rejected": -0.28622305393218994, + "step": 5365 + }, + { + "epoch": 3.710926694329184, + "grad_norm": 12.004629135131836, + "learning_rate": 3.4939296142615644e-05, + "log_odds_chosen": 9.092486381530762, + "log_odds_ratio": -0.00038122880505397916, + "logits/chosen": -0.6763482093811035, + "logits/rejected": -0.7454387545585632, + "logps/chosen": -0.016808513551950455, + "logps/rejected": -2.034806966781616, + "loss": 1.3493, + "nll_loss": 0.33729538321495056, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001680851331911981, + "rewards/margins": 0.20179985463619232, + "rewards/rejected": -0.20348069071769714, + "step": 5366 + }, + { + "epoch": 3.711618257261411, + "grad_norm": 9.979867935180664, + "learning_rate": 3.4935454126325496e-05, + "log_odds_chosen": 10.370789527893066, + "log_odds_ratio": -9.933464752975851e-05, + "logits/chosen": -0.6684778332710266, + "logits/rejected": -0.6606014966964722, + "logps/chosen": -0.00016815456910990179, + "logps/rejected": -1.7887946367263794, + "loss": 1.6075, + "nll_loss": 0.4018716514110565, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6815456547192298e-05, + "rewards/margins": 0.17886263132095337, + "rewards/rejected": -0.17887946963310242, + "step": 5367 + }, + { + "epoch": 3.7123098201936378, + "grad_norm": 8.801868438720703, + "learning_rate": 3.493161211003535e-05, + "log_odds_chosen": 9.470043182373047, + "log_odds_ratio": -0.0001726085611153394, + "logits/chosen": -0.465656578540802, + "logits/rejected": -0.5587571263313293, + "logps/chosen": -0.000835128128528595, + "logps/rejected": -1.672318458557129, + "loss": 1.0162, + "nll_loss": 0.2540230453014374, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.351281576324254e-05, + "rewards/margins": 0.16714833676815033, + "rewards/rejected": -0.16723184287548065, + "step": 5368 + }, + { + "epoch": 3.7130013831258646, + "grad_norm": 12.182748794555664, + "learning_rate": 3.49277700937452e-05, + "log_odds_chosen": 8.469917297363281, + "log_odds_ratio": -0.06619244813919067, + "logits/chosen": -0.7275235056877136, + "logits/rejected": -0.7132308483123779, + "logps/chosen": -0.03503218665719032, + "logps/rejected": -2.0422232151031494, + "loss": 1.1944, + "nll_loss": 0.29196980595588684, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0035032187588512897, + "rewards/margins": 0.20071911811828613, + "rewards/rejected": -0.20422232151031494, + "step": 5369 + }, + { + "epoch": 3.7136929460580914, + "grad_norm": 9.088739395141602, + "learning_rate": 3.492392807745505e-05, + "log_odds_chosen": 9.97968864440918, + "log_odds_ratio": -0.00012102635810151696, + "logits/chosen": -0.5951199531555176, + "logits/rejected": -0.6818069219589233, + "logps/chosen": -0.00044668448390439153, + "logps/rejected": -1.7563897371292114, + "loss": 1.0097, + "nll_loss": 0.2524169087409973, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.466845348360948e-05, + "rewards/margins": 0.175594300031662, + "rewards/rejected": -0.17563897371292114, + "step": 5370 + }, + { + "epoch": 3.7143845089903182, + "grad_norm": 15.796573638916016, + "learning_rate": 3.4920086061164906e-05, + "log_odds_chosen": 9.849954605102539, + "log_odds_ratio": -0.00020008234423585236, + "logits/chosen": -0.6006171703338623, + "logits/rejected": -0.6448779106140137, + "logps/chosen": -0.0005239631282165647, + "logps/rejected": -1.7191863059997559, + "loss": 1.7053, + "nll_loss": 0.42631492018699646, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.239631354925223e-05, + "rewards/margins": 0.17186623811721802, + "rewards/rejected": -0.17191863059997559, + "step": 5371 + }, + { + "epoch": 3.715076071922545, + "grad_norm": 11.92275333404541, + "learning_rate": 3.491624404487475e-05, + "log_odds_chosen": 7.447054862976074, + "log_odds_ratio": -0.10066209733486176, + "logits/chosen": -0.38511592149734497, + "logits/rejected": -0.43172594904899597, + "logps/chosen": -0.02251449227333069, + "logps/rejected": -1.2307636737823486, + "loss": 1.9626, + "nll_loss": 0.4805947542190552, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0022514492738991976, + "rewards/margins": 0.1208249107003212, + "rewards/rejected": -0.12307636439800262, + "step": 5372 + }, + { + "epoch": 3.715767634854772, + "grad_norm": 10.29927921295166, + "learning_rate": 3.4912402028584604e-05, + "log_odds_chosen": 10.454160690307617, + "log_odds_ratio": -6.0720885812770575e-05, + "logits/chosen": -0.5991085171699524, + "logits/rejected": -0.6778004169464111, + "logps/chosen": -0.00020221697923261672, + "logps/rejected": -1.793766975402832, + "loss": 1.504, + "nll_loss": 0.37600409984588623, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0221699742251076e-05, + "rewards/margins": 0.17935647070407867, + "rewards/rejected": -0.17937669157981873, + "step": 5373 + }, + { + "epoch": 3.7164591977869987, + "grad_norm": 7.523771286010742, + "learning_rate": 3.490856001229446e-05, + "log_odds_chosen": 10.136287689208984, + "log_odds_ratio": -7.101793016772717e-05, + "logits/chosen": -0.5835416316986084, + "logits/rejected": -0.5638810396194458, + "logps/chosen": -0.00041338251321576536, + "logps/rejected": -1.9321492910385132, + "loss": 2.1528, + "nll_loss": 0.5381991267204285, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.133825495955534e-05, + "rewards/margins": 0.1931736171245575, + "rewards/rejected": -0.19321493804454803, + "step": 5374 + }, + { + "epoch": 3.7171507607192256, + "grad_norm": 12.451519966125488, + "learning_rate": 3.49047179960043e-05, + "log_odds_chosen": 9.121818542480469, + "log_odds_ratio": -0.0004346870118752122, + "logits/chosen": -0.8031559586524963, + "logits/rejected": -0.790696382522583, + "logps/chosen": -0.0010023590875789523, + "logps/rejected": -1.4638499021530151, + "loss": 2.2833, + "nll_loss": 0.5707757472991943, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010023592039942741, + "rewards/margins": 0.14628475904464722, + "rewards/rejected": -0.14638498425483704, + "step": 5375 + }, + { + "epoch": 3.7178423236514524, + "grad_norm": 20.69891357421875, + "learning_rate": 3.4900875979714155e-05, + "log_odds_chosen": 9.785394668579102, + "log_odds_ratio": -0.09108485281467438, + "logits/chosen": -0.42490309476852417, + "logits/rejected": -0.6075316667556763, + "logps/chosen": -0.015684589743614197, + "logps/rejected": -2.362088203430176, + "loss": 1.922, + "nll_loss": 0.4714014232158661, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0015684588579460979, + "rewards/margins": 0.23464035987854004, + "rewards/rejected": -0.23620882630348206, + "step": 5376 + }, + { + "epoch": 3.7185338865836792, + "grad_norm": 17.057636260986328, + "learning_rate": 3.489703396342401e-05, + "log_odds_chosen": 10.41865348815918, + "log_odds_ratio": -0.00016036239685490727, + "logits/chosen": -0.5188636779785156, + "logits/rejected": -0.4948766529560089, + "logps/chosen": -0.000365030748071149, + "logps/rejected": -2.019139528274536, + "loss": 1.8005, + "nll_loss": 0.45011138916015625, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.650307917268947e-05, + "rewards/margins": 0.2018774449825287, + "rewards/rejected": -0.2019139528274536, + "step": 5377 + }, + { + "epoch": 3.719225449515906, + "grad_norm": 12.16074275970459, + "learning_rate": 3.489319194713386e-05, + "log_odds_chosen": 9.693315505981445, + "log_odds_ratio": -0.0017601572908461094, + "logits/chosen": -0.9097875356674194, + "logits/rejected": -0.998991847038269, + "logps/chosen": -0.001546016545034945, + "logps/rejected": -1.6606801748275757, + "loss": 1.3411, + "nll_loss": 0.3350999057292938, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015460167196579278, + "rewards/margins": 0.1659134328365326, + "rewards/rejected": -0.16606801748275757, + "step": 5378 + }, + { + "epoch": 3.719917012448133, + "grad_norm": 9.151723861694336, + "learning_rate": 3.4889349930843705e-05, + "log_odds_chosen": 10.32984733581543, + "log_odds_ratio": -6.051865784684196e-05, + "logits/chosen": -0.2843622863292694, + "logits/rejected": -0.37206345796585083, + "logps/chosen": -0.00016166864952538162, + "logps/rejected": -1.6887463331222534, + "loss": 1.1799, + "nll_loss": 0.29497507214546204, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6166864952538162e-05, + "rewards/margins": 0.16885846853256226, + "rewards/rejected": -0.1688746213912964, + "step": 5379 + }, + { + "epoch": 3.7206085753803597, + "grad_norm": 9.702625274658203, + "learning_rate": 3.4885507914553565e-05, + "log_odds_chosen": 9.072803497314453, + "log_odds_ratio": -0.0002321783103980124, + "logits/chosen": -0.5725424289703369, + "logits/rejected": -0.48636266589164734, + "logps/chosen": -0.001193308038637042, + "logps/rejected": -1.3661433458328247, + "loss": 1.5208, + "nll_loss": 0.38018402457237244, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011933079804293811, + "rewards/margins": 0.13649500906467438, + "rewards/rejected": -0.1366143524646759, + "step": 5380 + }, + { + "epoch": 3.7213001383125865, + "grad_norm": 15.534612655639648, + "learning_rate": 3.488166589826341e-05, + "log_odds_chosen": 9.534454345703125, + "log_odds_ratio": -0.032325610518455505, + "logits/chosen": -0.07890317589044571, + "logits/rejected": -0.15461499989032745, + "logps/chosen": -0.008514742366969585, + "logps/rejected": -2.4597206115722656, + "loss": 1.7176, + "nll_loss": 0.4261553883552551, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008514742366969585, + "rewards/margins": 0.2451205849647522, + "rewards/rejected": -0.24597206711769104, + "step": 5381 + }, + { + "epoch": 3.7219917012448134, + "grad_norm": 11.658743858337402, + "learning_rate": 3.487782388197326e-05, + "log_odds_chosen": 9.717710494995117, + "log_odds_ratio": -0.00044204670120961964, + "logits/chosen": -0.6480785012245178, + "logits/rejected": -0.7348360419273376, + "logps/chosen": -0.00075390818528831, + "logps/rejected": -1.9051095247268677, + "loss": 1.1266, + "nll_loss": 0.2816102206707001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.53908243495971e-05, + "rewards/margins": 0.19043557345867157, + "rewards/rejected": -0.19051097333431244, + "step": 5382 + }, + { + "epoch": 3.72268326417704, + "grad_norm": 7.192076683044434, + "learning_rate": 3.4873981865683115e-05, + "log_odds_chosen": 9.726299285888672, + "log_odds_ratio": -0.0001063284435076639, + "logits/chosen": -0.8140656352043152, + "logits/rejected": -0.7850347757339478, + "logps/chosen": -0.00866577960550785, + "logps/rejected": -1.912880301475525, + "loss": 1.1967, + "nll_loss": 0.2991747260093689, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000866578018758446, + "rewards/margins": 0.1904214471578598, + "rewards/rejected": -0.191288024187088, + "step": 5383 + }, + { + "epoch": 3.723374827109267, + "grad_norm": 6.207744121551514, + "learning_rate": 3.487013984939296e-05, + "log_odds_chosen": 10.239447593688965, + "log_odds_ratio": -6.98567891959101e-05, + "logits/chosen": -0.6612863540649414, + "logits/rejected": -0.700016438961029, + "logps/chosen": -0.0005870179738849401, + "logps/rejected": -1.7446274757385254, + "loss": 0.8839, + "nll_loss": 0.22095894813537598, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.870179666089825e-05, + "rewards/margins": 0.1744040548801422, + "rewards/rejected": -0.17446276545524597, + "step": 5384 + }, + { + "epoch": 3.724066390041494, + "grad_norm": 9.887097358703613, + "learning_rate": 3.486629783310281e-05, + "log_odds_chosen": 9.54232406616211, + "log_odds_ratio": -0.00017396220937371254, + "logits/chosen": -0.9332183599472046, + "logits/rejected": -1.0139302015304565, + "logps/chosen": -0.0003737725201062858, + "logps/rejected": -1.457379698753357, + "loss": 1.9432, + "nll_loss": 0.4857736825942993, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7377249100245535e-05, + "rewards/margins": 0.145700603723526, + "rewards/rejected": -0.14573797583580017, + "step": 5385 + }, + { + "epoch": 3.7247579529737207, + "grad_norm": 7.268083095550537, + "learning_rate": 3.4862455816812666e-05, + "log_odds_chosen": 8.003231048583984, + "log_odds_ratio": -0.13872238993644714, + "logits/chosen": -0.6145071387290955, + "logits/rejected": -0.6586422920227051, + "logps/chosen": -0.0258407574146986, + "logps/rejected": -2.195486307144165, + "loss": 1.6876, + "nll_loss": 0.40801793336868286, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0025840753223747015, + "rewards/margins": 0.21696454286575317, + "rewards/rejected": -0.21954862773418427, + "step": 5386 + }, + { + "epoch": 3.7254495159059475, + "grad_norm": 8.43387508392334, + "learning_rate": 3.485861380052252e-05, + "log_odds_chosen": 10.035348892211914, + "log_odds_ratio": -5.451100514619611e-05, + "logits/chosen": -0.5829866528511047, + "logits/rejected": -0.5891355276107788, + "logps/chosen": -0.0002491176419425756, + "logps/rejected": -1.592644214630127, + "loss": 1.5007, + "nll_loss": 0.37516021728515625, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.491176564944908e-05, + "rewards/margins": 0.1592395156621933, + "rewards/rejected": -0.1592644304037094, + "step": 5387 + }, + { + "epoch": 3.7261410788381744, + "grad_norm": 9.96875, + "learning_rate": 3.4854771784232364e-05, + "log_odds_chosen": 9.071399688720703, + "log_odds_ratio": -0.00036526485928334296, + "logits/chosen": -0.4201893210411072, + "logits/rejected": -0.47043901681900024, + "logps/chosen": -0.0005718155298382044, + "logps/rejected": -1.7792240381240845, + "loss": 1.3203, + "nll_loss": 0.3300449550151825, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7181550801033154e-05, + "rewards/margins": 0.17786523699760437, + "rewards/rejected": -0.17792241275310516, + "step": 5388 + }, + { + "epoch": 3.726832641770401, + "grad_norm": 12.416715621948242, + "learning_rate": 3.485092976794222e-05, + "log_odds_chosen": 10.267997741699219, + "log_odds_ratio": -0.00012193172005936503, + "logits/chosen": -0.7236143946647644, + "logits/rejected": -0.8546754717826843, + "logps/chosen": -0.0007985993870534003, + "logps/rejected": -2.2986698150634766, + "loss": 1.4546, + "nll_loss": 0.3636472523212433, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.985993579495698e-05, + "rewards/margins": 0.22978714108467102, + "rewards/rejected": -0.22986699640750885, + "step": 5389 + }, + { + "epoch": 3.727524204702628, + "grad_norm": 17.100522994995117, + "learning_rate": 3.484708775165207e-05, + "log_odds_chosen": 8.397350311279297, + "log_odds_ratio": -0.01968861185014248, + "logits/chosen": -0.8118228316307068, + "logits/rejected": -0.7890980243682861, + "logps/chosen": -0.012317357584834099, + "logps/rejected": -2.130256175994873, + "loss": 1.9555, + "nll_loss": 0.4868970811367035, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012317356886342168, + "rewards/margins": 0.21179388463497162, + "rewards/rejected": -0.21302561461925507, + "step": 5390 + }, + { + "epoch": 3.728215767634855, + "grad_norm": 9.5698881149292, + "learning_rate": 3.484324573536192e-05, + "log_odds_chosen": 8.837024688720703, + "log_odds_ratio": -0.010207761079072952, + "logits/chosen": -0.5338362455368042, + "logits/rejected": -0.6706526279449463, + "logps/chosen": -0.010720196180045605, + "logps/rejected": -1.4015872478485107, + "loss": 1.78, + "nll_loss": 0.44397395849227905, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010720195714384317, + "rewards/margins": 0.13908669352531433, + "rewards/rejected": -0.1401587277650833, + "step": 5391 + }, + { + "epoch": 3.7289073305670817, + "grad_norm": 10.274774551391602, + "learning_rate": 3.4839403719071773e-05, + "log_odds_chosen": 9.285009384155273, + "log_odds_ratio": -0.0002976319519802928, + "logits/chosen": -0.7594529986381531, + "logits/rejected": -0.7697383761405945, + "logps/chosen": -0.002318364568054676, + "logps/rejected": -2.2340967655181885, + "loss": 1.7638, + "nll_loss": 0.4409220814704895, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002318364568054676, + "rewards/margins": 0.22317782044410706, + "rewards/rejected": -0.22340966761112213, + "step": 5392 + }, + { + "epoch": 3.7295988934993085, + "grad_norm": 8.453655242919922, + "learning_rate": 3.483556170278162e-05, + "log_odds_chosen": 11.143664360046387, + "log_odds_ratio": -1.918661655508913e-05, + "logits/chosen": -0.7070326805114746, + "logits/rejected": -0.7929023504257202, + "logps/chosen": -0.0002504032163415104, + "logps/rejected": -2.2312471866607666, + "loss": 1.3668, + "nll_loss": 0.3416998088359833, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5040324544534087e-05, + "rewards/margins": 0.22309967875480652, + "rewards/rejected": -0.22312471270561218, + "step": 5393 + }, + { + "epoch": 3.7302904564315353, + "grad_norm": 12.340773582458496, + "learning_rate": 3.483171968649147e-05, + "log_odds_chosen": 8.7693510055542, + "log_odds_ratio": -0.0003056778514292091, + "logits/chosen": -0.8789324760437012, + "logits/rejected": -0.9095242023468018, + "logps/chosen": -0.0030350149609148502, + "logps/rejected": -1.8134474754333496, + "loss": 1.3986, + "nll_loss": 0.3496093153953552, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000303501496091485, + "rewards/margins": 0.18104124069213867, + "rewards/rejected": -0.18134474754333496, + "step": 5394 + }, + { + "epoch": 3.730982019363762, + "grad_norm": 6.907254219055176, + "learning_rate": 3.4827877670201324e-05, + "log_odds_chosen": 8.385276794433594, + "log_odds_ratio": -0.0015755126951262355, + "logits/chosen": -0.6956421136856079, + "logits/rejected": -0.7536182403564453, + "logps/chosen": -0.00522532919421792, + "logps/rejected": -1.7120745182037354, + "loss": 1.9993, + "nll_loss": 0.49965721368789673, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005225329659879208, + "rewards/margins": 0.17068493366241455, + "rewards/rejected": -0.1712074726819992, + "step": 5395 + }, + { + "epoch": 3.731673582295989, + "grad_norm": 9.952130317687988, + "learning_rate": 3.4824035653911176e-05, + "log_odds_chosen": 8.023850440979004, + "log_odds_ratio": -0.014953254722058773, + "logits/chosen": -0.6111462116241455, + "logits/rejected": -0.6585705280303955, + "logps/chosen": -0.02512936294078827, + "logps/rejected": -1.6922228336334229, + "loss": 1.7928, + "nll_loss": 0.4467039108276367, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002512936247512698, + "rewards/margins": 0.16670936346054077, + "rewards/rejected": -0.16922229528427124, + "step": 5396 + }, + { + "epoch": 3.732365145228216, + "grad_norm": 13.846830368041992, + "learning_rate": 3.482019363762102e-05, + "log_odds_chosen": 9.7283935546875, + "log_odds_ratio": -0.00023480159870814532, + "logits/chosen": 0.01851162314414978, + "logits/rejected": -0.06452546268701553, + "logps/chosen": -0.0014925599098205566, + "logps/rejected": -2.2084391117095947, + "loss": 1.1774, + "nll_loss": 0.29433268308639526, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014925599680282176, + "rewards/margins": 0.2206946760416031, + "rewards/rejected": -0.22084392607212067, + "step": 5397 + }, + { + "epoch": 3.7330567081604427, + "grad_norm": 7.317983627319336, + "learning_rate": 3.481635162133088e-05, + "log_odds_chosen": 7.966273784637451, + "log_odds_ratio": -0.06842464208602905, + "logits/chosen": -0.4417526125907898, + "logits/rejected": -0.4552074670791626, + "logps/chosen": -0.022284694015979767, + "logps/rejected": -1.8117376565933228, + "loss": 1.4527, + "nll_loss": 0.3563276529312134, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0022284695878624916, + "rewards/margins": 0.17894530296325684, + "rewards/rejected": -0.18117377161979675, + "step": 5398 + }, + { + "epoch": 3.7337482710926695, + "grad_norm": 36.7801513671875, + "learning_rate": 3.481250960504073e-05, + "log_odds_chosen": 7.152010440826416, + "log_odds_ratio": -0.19985008239746094, + "logits/chosen": -0.4221605360507965, + "logits/rejected": -0.4445953071117401, + "logps/chosen": -0.035872478038072586, + "logps/rejected": -1.4452604055404663, + "loss": 2.1037, + "nll_loss": 0.5059496760368347, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003587248269468546, + "rewards/margins": 0.14093877375125885, + "rewards/rejected": -0.14452604949474335, + "step": 5399 + }, + { + "epoch": 3.7344398340248963, + "grad_norm": 99.5403823852539, + "learning_rate": 3.480866758875058e-05, + "log_odds_chosen": 7.870386600494385, + "log_odds_ratio": -0.4392143189907074, + "logits/chosen": -0.729932427406311, + "logits/rejected": -0.7658141255378723, + "logps/chosen": -0.21164977550506592, + "logps/rejected": -1.6368392705917358, + "loss": 3.0325, + "nll_loss": 0.7142078280448914, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.02116497792303562, + "rewards/margins": 0.14251896739006042, + "rewards/rejected": -0.1636839210987091, + "step": 5400 + }, + { + "epoch": 3.735131396957123, + "grad_norm": 9.862106323242188, + "learning_rate": 3.480482557246043e-05, + "log_odds_chosen": 8.045354843139648, + "log_odds_ratio": -0.003918115980923176, + "logits/chosen": -0.2770964503288269, + "logits/rejected": -0.3260282874107361, + "logps/chosen": -0.017516067251563072, + "logps/rejected": -1.9411693811416626, + "loss": 1.4297, + "nll_loss": 0.35704267024993896, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001751606585457921, + "rewards/margins": 0.1923653483390808, + "rewards/rejected": -0.19411695003509521, + "step": 5401 + }, + { + "epoch": 3.73582295988935, + "grad_norm": 10.115386962890625, + "learning_rate": 3.480098355617028e-05, + "log_odds_chosen": 8.4605712890625, + "log_odds_ratio": -0.003765811212360859, + "logits/chosen": -0.3988216519355774, + "logits/rejected": -0.5181287527084351, + "logps/chosen": -0.03485646843910217, + "logps/rejected": -1.7947022914886475, + "loss": 1.8983, + "nll_loss": 0.47420254349708557, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0034856467973440886, + "rewards/margins": 0.17598459124565125, + "rewards/rejected": -0.1794702410697937, + "step": 5402 + }, + { + "epoch": 3.736514522821577, + "grad_norm": 13.618650436401367, + "learning_rate": 3.479714153988013e-05, + "log_odds_chosen": 9.20181655883789, + "log_odds_ratio": -0.0016848170198500156, + "logits/chosen": -0.4004411995410919, + "logits/rejected": -0.3985813856124878, + "logps/chosen": -0.002053108997642994, + "logps/rejected": -1.9189532995224, + "loss": 1.612, + "nll_loss": 0.40284085273742676, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00020531090558506548, + "rewards/margins": 0.19169002771377563, + "rewards/rejected": -0.19189533591270447, + "step": 5403 + }, + { + "epoch": 3.7372060857538036, + "grad_norm": 7.179841995239258, + "learning_rate": 3.479329952358998e-05, + "log_odds_chosen": 10.005006790161133, + "log_odds_ratio": -0.00012902371236123145, + "logits/chosen": -0.29995405673980713, + "logits/rejected": -0.4108564257621765, + "logps/chosen": -0.008333981037139893, + "logps/rejected": -2.2930901050567627, + "loss": 1.4825, + "nll_loss": 0.3706183135509491, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008333979640156031, + "rewards/margins": 0.2284756302833557, + "rewards/rejected": -0.22930902242660522, + "step": 5404 + }, + { + "epoch": 3.7378976486860305, + "grad_norm": 13.740462303161621, + "learning_rate": 3.4789457507299835e-05, + "log_odds_chosen": 8.431564331054688, + "log_odds_ratio": -0.007452045567333698, + "logits/chosen": -0.6105407476425171, + "logits/rejected": -0.6141482591629028, + "logps/chosen": -0.05373113974928856, + "logps/rejected": -2.1418652534484863, + "loss": 1.5127, + "nll_loss": 0.3774263858795166, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005373113788664341, + "rewards/margins": 0.20881341397762299, + "rewards/rejected": -0.21418653428554535, + "step": 5405 + }, + { + "epoch": 3.7385892116182573, + "grad_norm": 7.859427452087402, + "learning_rate": 3.478561549100968e-05, + "log_odds_chosen": 9.265692710876465, + "log_odds_ratio": -0.0010991651797667146, + "logits/chosen": -0.33521929383277893, + "logits/rejected": -0.29185032844543457, + "logps/chosen": -0.004679600242525339, + "logps/rejected": -2.2458150386810303, + "loss": 1.8115, + "nll_loss": 0.4527547061443329, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004679600824601948, + "rewards/margins": 0.22411353886127472, + "rewards/rejected": -0.2245815098285675, + "step": 5406 + }, + { + "epoch": 3.739280774550484, + "grad_norm": 8.53417682647705, + "learning_rate": 3.478177347471954e-05, + "log_odds_chosen": 8.067197799682617, + "log_odds_ratio": -0.033554911613464355, + "logits/chosen": -0.1194586306810379, + "logits/rejected": -0.1688614785671234, + "logps/chosen": -0.011145330965518951, + "logps/rejected": -1.4050594568252563, + "loss": 1.7933, + "nll_loss": 0.4449673295021057, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011145330499857664, + "rewards/margins": 0.13939140737056732, + "rewards/rejected": -0.14050595462322235, + "step": 5407 + }, + { + "epoch": 3.739972337482711, + "grad_norm": 8.623878479003906, + "learning_rate": 3.4777931458429385e-05, + "log_odds_chosen": 9.07345199584961, + "log_odds_ratio": -0.0004437947063706815, + "logits/chosen": -0.42635229229927063, + "logits/rejected": -0.4366256892681122, + "logps/chosen": -0.00036963215097784996, + "logps/rejected": -1.2568538188934326, + "loss": 1.5811, + "nll_loss": 0.39522841572761536, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.696321800816804e-05, + "rewards/margins": 0.1256484091281891, + "rewards/rejected": -0.12568537890911102, + "step": 5408 + }, + { + "epoch": 3.740663900414938, + "grad_norm": 8.635771751403809, + "learning_rate": 3.477408944213924e-05, + "log_odds_chosen": 11.017953872680664, + "log_odds_ratio": -8.565557072870433e-05, + "logits/chosen": -0.06105683743953705, + "logits/rejected": -0.16102895140647888, + "logps/chosen": -0.00024903842131607234, + "logps/rejected": -2.231354236602783, + "loss": 1.3598, + "nll_loss": 0.3399292826652527, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4903842131607234e-05, + "rewards/margins": 0.22311052680015564, + "rewards/rejected": -0.22313544154167175, + "step": 5409 + }, + { + "epoch": 3.7413554633471646, + "grad_norm": 5.872206687927246, + "learning_rate": 3.477024742584909e-05, + "log_odds_chosen": 8.316761016845703, + "log_odds_ratio": -0.001023442717269063, + "logits/chosen": -0.6279017925262451, + "logits/rejected": -0.646528959274292, + "logps/chosen": -0.013748231343925, + "logps/rejected": -1.9299241304397583, + "loss": 2.7098, + "nll_loss": 0.6773370504379272, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013748230412602425, + "rewards/margins": 0.19161759316921234, + "rewards/rejected": -0.1929924190044403, + "step": 5410 + }, + { + "epoch": 3.7420470262793915, + "grad_norm": 7.386795520782471, + "learning_rate": 3.4766405409558936e-05, + "log_odds_chosen": 8.211042404174805, + "log_odds_ratio": -0.03969957306981087, + "logits/chosen": -0.6178238391876221, + "logits/rejected": -0.627917468547821, + "logps/chosen": -0.01131765078753233, + "logps/rejected": -1.3942575454711914, + "loss": 1.5187, + "nll_loss": 0.37571465969085693, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011317649623379111, + "rewards/margins": 0.13829399645328522, + "rewards/rejected": -0.13942575454711914, + "step": 5411 + }, + { + "epoch": 3.7427385892116183, + "grad_norm": 6.971358776092529, + "learning_rate": 3.476256339326879e-05, + "log_odds_chosen": 8.650684356689453, + "log_odds_ratio": -0.0025012255646288395, + "logits/chosen": -0.31345134973526, + "logits/rejected": -0.42115840315818787, + "logps/chosen": -0.01780068688094616, + "logps/rejected": -2.3259847164154053, + "loss": 1.1501, + "nll_loss": 0.2872798442840576, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017800686182454228, + "rewards/margins": 0.23081842064857483, + "rewards/rejected": -0.23259848356246948, + "step": 5412 + }, + { + "epoch": 3.743430152143845, + "grad_norm": 13.72053337097168, + "learning_rate": 3.475872137697864e-05, + "log_odds_chosen": 10.826781272888184, + "log_odds_ratio": -3.6744382668985054e-05, + "logits/chosen": -0.48712050914764404, + "logits/rejected": -0.5958147644996643, + "logps/chosen": -0.00024669343838468194, + "logps/rejected": -2.1021265983581543, + "loss": 1.0914, + "nll_loss": 0.2728390097618103, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.466934165568091e-05, + "rewards/margins": 0.21018798649311066, + "rewards/rejected": -0.21021266281604767, + "step": 5413 + }, + { + "epoch": 3.744121715076072, + "grad_norm": 14.538620948791504, + "learning_rate": 3.475487936068849e-05, + "log_odds_chosen": 11.273270606994629, + "log_odds_ratio": -1.5496178093599156e-05, + "logits/chosen": -0.9595328569412231, + "logits/rejected": -0.9811475276947021, + "logps/chosen": -0.0001429565018042922, + "logps/rejected": -2.308250904083252, + "loss": 1.767, + "nll_loss": 0.4417545795440674, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4295650544227101e-05, + "rewards/margins": 0.23081077635288239, + "rewards/rejected": -0.23082508146762848, + "step": 5414 + }, + { + "epoch": 3.7448132780082988, + "grad_norm": 10.922853469848633, + "learning_rate": 3.475103734439834e-05, + "log_odds_chosen": 10.66468620300293, + "log_odds_ratio": -8.033808262553066e-05, + "logits/chosen": -0.4718412756919861, + "logits/rejected": -0.49102020263671875, + "logps/chosen": -0.0015652257716283202, + "logps/rejected": -2.2271728515625, + "loss": 1.4328, + "nll_loss": 0.3581867218017578, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001565225829835981, + "rewards/margins": 0.22256073355674744, + "rewards/rejected": -0.2227172553539276, + "step": 5415 + }, + { + "epoch": 3.7455048409405256, + "grad_norm": 8.63065242767334, + "learning_rate": 3.47471953281082e-05, + "log_odds_chosen": 9.848794937133789, + "log_odds_ratio": -0.00011888021253980696, + "logits/chosen": -0.44257479906082153, + "logits/rejected": -0.5804580450057983, + "logps/chosen": -0.0003653892199508846, + "logps/rejected": -1.6945195198059082, + "loss": 1.2216, + "nll_loss": 0.30539870262145996, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.653892417787574e-05, + "rewards/margins": 0.1694154143333435, + "rewards/rejected": -0.16945196688175201, + "step": 5416 + }, + { + "epoch": 3.7461964038727524, + "grad_norm": 12.192042350769043, + "learning_rate": 3.4743353311818044e-05, + "log_odds_chosen": 8.56835651397705, + "log_odds_ratio": -0.14475424587726593, + "logits/chosen": -0.682823657989502, + "logits/rejected": -0.7653946876525879, + "logps/chosen": -0.021642275154590607, + "logps/rejected": -1.8503457307815552, + "loss": 1.6474, + "nll_loss": 0.39738529920578003, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002164227655157447, + "rewards/margins": 0.18287035822868347, + "rewards/rejected": -0.1850345879793167, + "step": 5417 + }, + { + "epoch": 3.7468879668049793, + "grad_norm": 6.8051371574401855, + "learning_rate": 3.4739511295527896e-05, + "log_odds_chosen": 9.913517951965332, + "log_odds_ratio": -6.164831575006247e-05, + "logits/chosen": -0.4106307625770569, + "logits/rejected": -0.4225896894931793, + "logps/chosen": -0.0002594468533061445, + "logps/rejected": -1.63685941696167, + "loss": 1.0339, + "nll_loss": 0.25846049189567566, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.594468605821021e-05, + "rewards/margins": 0.1636599898338318, + "rewards/rejected": -0.16368593275547028, + "step": 5418 + }, + { + "epoch": 3.747579529737206, + "grad_norm": 11.767422676086426, + "learning_rate": 3.473566927923775e-05, + "log_odds_chosen": 8.98084831237793, + "log_odds_ratio": -0.0005303403595462441, + "logits/chosen": -0.43547505140304565, + "logits/rejected": -0.4607890546321869, + "logps/chosen": -0.010340893641114235, + "logps/rejected": -2.9152090549468994, + "loss": 1.7148, + "nll_loss": 0.42864322662353516, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010340893641114235, + "rewards/margins": 0.29048681259155273, + "rewards/rejected": -0.291520893573761, + "step": 5419 + }, + { + "epoch": 3.748271092669433, + "grad_norm": 8.567863464355469, + "learning_rate": 3.4731827262947594e-05, + "log_odds_chosen": 8.407416343688965, + "log_odds_ratio": -0.03393007814884186, + "logits/chosen": -0.49492907524108887, + "logits/rejected": -0.519908607006073, + "logps/chosen": -0.017692282795906067, + "logps/rejected": -1.5308305025100708, + "loss": 1.185, + "nll_loss": 0.29286208748817444, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017692282563075423, + "rewards/margins": 0.15131384134292603, + "rewards/rejected": -0.15308305621147156, + "step": 5420 + }, + { + "epoch": 3.7489626556016598, + "grad_norm": 11.476261138916016, + "learning_rate": 3.472798524665745e-05, + "log_odds_chosen": 7.869997978210449, + "log_odds_ratio": -0.012655356898903847, + "logits/chosen": -0.370304673910141, + "logits/rejected": -0.3986484408378601, + "logps/chosen": -0.009240656159818172, + "logps/rejected": -1.540095567703247, + "loss": 1.1696, + "nll_loss": 0.29112428426742554, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009240655926987529, + "rewards/margins": 0.15308550000190735, + "rewards/rejected": -0.15400955080986023, + "step": 5421 + }, + { + "epoch": 3.7496542185338866, + "grad_norm": 14.321914672851562, + "learning_rate": 3.47241432303673e-05, + "log_odds_chosen": 9.759228706359863, + "log_odds_ratio": -0.009780412539839745, + "logits/chosen": -0.6264888048171997, + "logits/rejected": -0.6955153942108154, + "logps/chosen": -0.051967553794384, + "logps/rejected": -2.3276937007904053, + "loss": 1.6218, + "nll_loss": 0.4044795036315918, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005196755286306143, + "rewards/margins": 0.22757261991500854, + "rewards/rejected": -0.23276937007904053, + "step": 5422 + }, + { + "epoch": 3.7503457814661134, + "grad_norm": 7.869983196258545, + "learning_rate": 3.472030121407715e-05, + "log_odds_chosen": 9.434892654418945, + "log_odds_ratio": -0.0005775390309281647, + "logits/chosen": -0.6662434339523315, + "logits/rejected": -0.6797504425048828, + "logps/chosen": -0.02035946026444435, + "logps/rejected": -2.3536477088928223, + "loss": 1.3647, + "nll_loss": 0.3411150574684143, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002035945886746049, + "rewards/margins": 0.23332881927490234, + "rewards/rejected": -0.23536476492881775, + "step": 5423 + }, + { + "epoch": 3.7510373443983402, + "grad_norm": 8.915014266967773, + "learning_rate": 3.4716459197787e-05, + "log_odds_chosen": 9.686923027038574, + "log_odds_ratio": -0.00028954161098226905, + "logits/chosen": -0.5250073671340942, + "logits/rejected": -0.5078399181365967, + "logps/chosen": -0.005831631366163492, + "logps/rejected": -1.9717814922332764, + "loss": 1.784, + "nll_loss": 0.4459819495677948, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005831630551256239, + "rewards/margins": 0.1965949833393097, + "rewards/rejected": -0.19717815518379211, + "step": 5424 + }, + { + "epoch": 3.751728907330567, + "grad_norm": 10.456550598144531, + "learning_rate": 3.471261718149686e-05, + "log_odds_chosen": 8.61825942993164, + "log_odds_ratio": -0.000583041284698993, + "logits/chosen": -0.5446162819862366, + "logits/rejected": -0.5490441918373108, + "logps/chosen": -0.0019385780906304717, + "logps/rejected": -1.8861501216888428, + "loss": 1.7346, + "nll_loss": 0.4335922598838806, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019385780615266412, + "rewards/margins": 0.18842115998268127, + "rewards/rejected": -0.18861502408981323, + "step": 5425 + }, + { + "epoch": 3.752420470262794, + "grad_norm": 7.999762535095215, + "learning_rate": 3.47087751652067e-05, + "log_odds_chosen": 9.54379653930664, + "log_odds_ratio": -0.0001644420699449256, + "logits/chosen": -0.29200607538223267, + "logits/rejected": -0.3707253336906433, + "logps/chosen": -0.0003151461132802069, + "logps/rejected": -1.3649253845214844, + "loss": 1.1779, + "nll_loss": 0.29446306824684143, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.151460987282917e-05, + "rewards/margins": 0.1364610344171524, + "rewards/rejected": -0.1364925503730774, + "step": 5426 + }, + { + "epoch": 3.7531120331950207, + "grad_norm": 10.632295608520508, + "learning_rate": 3.4704933148916555e-05, + "log_odds_chosen": 8.853446006774902, + "log_odds_ratio": -0.018114643171429634, + "logits/chosen": -0.7446925640106201, + "logits/rejected": -0.7750409841537476, + "logps/chosen": -0.01745034009218216, + "logps/rejected": -2.154756784439087, + "loss": 2.0903, + "nll_loss": 0.5207608342170715, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017450341256335378, + "rewards/margins": 0.21373064815998077, + "rewards/rejected": -0.2154756784439087, + "step": 5427 + }, + { + "epoch": 3.7538035961272476, + "grad_norm": 17.957927703857422, + "learning_rate": 3.470109113262641e-05, + "log_odds_chosen": 10.859111785888672, + "log_odds_ratio": -5.457565202959813e-05, + "logits/chosen": -0.37744563817977905, + "logits/rejected": -0.5010693073272705, + "logps/chosen": -0.00040545733645558357, + "logps/rejected": -2.7265172004699707, + "loss": 1.4821, + "nll_loss": 0.37053102254867554, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.054573582834564e-05, + "rewards/margins": 0.27261117100715637, + "rewards/rejected": -0.27265170216560364, + "step": 5428 + }, + { + "epoch": 3.7544951590594744, + "grad_norm": 14.420669555664062, + "learning_rate": 3.469724911633625e-05, + "log_odds_chosen": 9.603649139404297, + "log_odds_ratio": -0.0013310567010194063, + "logits/chosen": -0.5402454733848572, + "logits/rejected": -0.6616002321243286, + "logps/chosen": -0.002875036559998989, + "logps/rejected": -1.7499427795410156, + "loss": 1.7188, + "nll_loss": 0.429568886756897, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00028750361525453627, + "rewards/margins": 0.17470680177211761, + "rewards/rejected": -0.17499428987503052, + "step": 5429 + }, + { + "epoch": 3.7551867219917012, + "grad_norm": 5.218062400817871, + "learning_rate": 3.4693407100046105e-05, + "log_odds_chosen": 7.883607387542725, + "log_odds_ratio": -0.005045240744948387, + "logits/chosen": -0.5111541748046875, + "logits/rejected": -0.5074277520179749, + "logps/chosen": -0.008123712614178658, + "logps/rejected": -1.525122880935669, + "loss": 1.946, + "nll_loss": 0.4859926700592041, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008123713196255267, + "rewards/margins": 0.15169993042945862, + "rewards/rejected": -0.1525123119354248, + "step": 5430 + }, + { + "epoch": 3.755878284923928, + "grad_norm": 22.380603790283203, + "learning_rate": 3.468956508375596e-05, + "log_odds_chosen": 8.503776550292969, + "log_odds_ratio": -0.004393002949655056, + "logits/chosen": -0.6649891138076782, + "logits/rejected": -0.6843799948692322, + "logps/chosen": -0.024574745446443558, + "logps/rejected": -1.8706204891204834, + "loss": 1.8433, + "nll_loss": 0.46039655804634094, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002457474824041128, + "rewards/margins": 0.18460458517074585, + "rewards/rejected": -0.18706205487251282, + "step": 5431 + }, + { + "epoch": 3.756569847856155, + "grad_norm": 14.127603530883789, + "learning_rate": 3.468572306746581e-05, + "log_odds_chosen": 10.379104614257812, + "log_odds_ratio": -6.107140507083386e-05, + "logits/chosen": -0.7364578247070312, + "logits/rejected": -0.7871562838554382, + "logps/chosen": -0.00017190205107908696, + "logps/rejected": -1.7768229246139526, + "loss": 1.4118, + "nll_loss": 0.3529343605041504, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.719020656310022e-05, + "rewards/margins": 0.1776650995016098, + "rewards/rejected": -0.1776822805404663, + "step": 5432 + }, + { + "epoch": 3.7572614107883817, + "grad_norm": 9.674400329589844, + "learning_rate": 3.4681881051175656e-05, + "log_odds_chosen": 8.356182098388672, + "log_odds_ratio": -0.029486514627933502, + "logits/chosen": -0.4162288010120392, + "logits/rejected": -0.4873002767562866, + "logps/chosen": -0.007492970675230026, + "logps/rejected": -1.1829661130905151, + "loss": 1.1816, + "nll_loss": 0.29244640469551086, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007492971490137279, + "rewards/margins": 0.11754731833934784, + "rewards/rejected": -0.11829661577939987, + "step": 5433 + }, + { + "epoch": 3.7579529737206085, + "grad_norm": 9.0473051071167, + "learning_rate": 3.4678039034885515e-05, + "log_odds_chosen": 8.62293815612793, + "log_odds_ratio": -0.0006495437119156122, + "logits/chosen": -0.8198421597480774, + "logits/rejected": -0.8905788660049438, + "logps/chosen": -0.004521318711340427, + "logps/rejected": -1.5219128131866455, + "loss": 1.6793, + "nll_loss": 0.419758677482605, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00045213187695480883, + "rewards/margins": 0.15173915028572083, + "rewards/rejected": -0.15219128131866455, + "step": 5434 + }, + { + "epoch": 3.7586445366528354, + "grad_norm": 7.616481781005859, + "learning_rate": 3.467419701859536e-05, + "log_odds_chosen": 9.509088516235352, + "log_odds_ratio": -0.000644803571049124, + "logits/chosen": -0.39439302682876587, + "logits/rejected": -0.45953693985939026, + "logps/chosen": -0.0015192057471722364, + "logps/rejected": -2.0177860260009766, + "loss": 1.2328, + "nll_loss": 0.3081299960613251, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015192058344837278, + "rewards/margins": 0.20162668824195862, + "rewards/rejected": -0.2017786204814911, + "step": 5435 + }, + { + "epoch": 3.759336099585062, + "grad_norm": 9.757256507873535, + "learning_rate": 3.467035500230521e-05, + "log_odds_chosen": 8.616477966308594, + "log_odds_ratio": -0.0004357987781986594, + "logits/chosen": -0.0953388512134552, + "logits/rejected": -0.15484541654586792, + "logps/chosen": -0.0006469248910434544, + "logps/rejected": -1.1625604629516602, + "loss": 1.7917, + "nll_loss": 0.4478727877140045, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.469249638030306e-05, + "rewards/margins": 0.11619135737419128, + "rewards/rejected": -0.11625605821609497, + "step": 5436 + }, + { + "epoch": 3.760027662517289, + "grad_norm": 8.751032829284668, + "learning_rate": 3.4666512986015066e-05, + "log_odds_chosen": 9.165270805358887, + "log_odds_ratio": -0.00036068703047931194, + "logits/chosen": -0.7682297229766846, + "logits/rejected": -0.8347011804580688, + "logps/chosen": -0.008912745863199234, + "logps/rejected": -2.608675479888916, + "loss": 2.0795, + "nll_loss": 0.5198372006416321, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008912745979614556, + "rewards/margins": 0.25997626781463623, + "rewards/rejected": -0.26086753606796265, + "step": 5437 + }, + { + "epoch": 3.760719225449516, + "grad_norm": 9.613621711730957, + "learning_rate": 3.466267096972491e-05, + "log_odds_chosen": 8.684236526489258, + "log_odds_ratio": -0.08695728331804276, + "logits/chosen": -0.3187865614891052, + "logits/rejected": -0.3907451629638672, + "logps/chosen": -0.019023144617676735, + "logps/rejected": -1.8880045413970947, + "loss": 1.1915, + "nll_loss": 0.2891791760921478, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0019023144850507379, + "rewards/margins": 0.1868981420993805, + "rewards/rejected": -0.18880045413970947, + "step": 5438 + }, + { + "epoch": 3.7614107883817427, + "grad_norm": 10.468111991882324, + "learning_rate": 3.4658828953434764e-05, + "log_odds_chosen": 7.793971538543701, + "log_odds_ratio": -0.04423436149954796, + "logits/chosen": -0.4522883892059326, + "logits/rejected": -0.37912601232528687, + "logps/chosen": -0.013493603095412254, + "logps/rejected": -2.09865140914917, + "loss": 1.5326, + "nll_loss": 0.3787163496017456, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013493604492396116, + "rewards/margins": 0.20851579308509827, + "rewards/rejected": -0.20986516773700714, + "step": 5439 + }, + { + "epoch": 3.7621023513139695, + "grad_norm": 6.3440141677856445, + "learning_rate": 3.4654986937144616e-05, + "log_odds_chosen": 9.558298110961914, + "log_odds_ratio": -0.0001284776080865413, + "logits/chosen": -0.2267589271068573, + "logits/rejected": -0.3366238474845886, + "logps/chosen": -0.00021380206453613937, + "logps/rejected": -1.3035376071929932, + "loss": 1.1763, + "nll_loss": 0.2940501868724823, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1380204998422414e-05, + "rewards/margins": 0.13033238053321838, + "rewards/rejected": -0.13035376369953156, + "step": 5440 + }, + { + "epoch": 3.7627939142461964, + "grad_norm": 7.013885974884033, + "learning_rate": 3.465114492085447e-05, + "log_odds_chosen": 9.993585586547852, + "log_odds_ratio": -0.00019201546092517674, + "logits/chosen": -0.19097547233104706, + "logits/rejected": -0.18724490702152252, + "logps/chosen": -0.0007030193228274584, + "logps/rejected": -2.3049535751342773, + "loss": 1.4744, + "nll_loss": 0.36857688426971436, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.030193955870345e-05, + "rewards/margins": 0.23042505979537964, + "rewards/rejected": -0.23049534857273102, + "step": 5441 + }, + { + "epoch": 3.763485477178423, + "grad_norm": 9.381342887878418, + "learning_rate": 3.4647302904564314e-05, + "log_odds_chosen": 10.290952682495117, + "log_odds_ratio": -0.0002553090744186193, + "logits/chosen": -0.35363638401031494, + "logits/rejected": -0.38287413120269775, + "logps/chosen": -0.0024143143091350794, + "logps/rejected": -1.7505064010620117, + "loss": 1.1547, + "nll_loss": 0.2886606752872467, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00024143143673427403, + "rewards/margins": 0.17480921745300293, + "rewards/rejected": -0.17505064606666565, + "step": 5442 + }, + { + "epoch": 3.76417704011065, + "grad_norm": 8.822539329528809, + "learning_rate": 3.4643460888274174e-05, + "log_odds_chosen": 9.295948028564453, + "log_odds_ratio": -0.001861661090515554, + "logits/chosen": -0.46477746963500977, + "logits/rejected": -0.4023154377937317, + "logps/chosen": -0.0046881274320185184, + "logps/rejected": -1.7715680599212646, + "loss": 1.3101, + "nll_loss": 0.32735079526901245, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00046881273738108575, + "rewards/margins": 0.17668798565864563, + "rewards/rejected": -0.17715679109096527, + "step": 5443 + }, + { + "epoch": 3.764868603042877, + "grad_norm": 11.759678840637207, + "learning_rate": 3.463961887198402e-05, + "log_odds_chosen": 7.539575099945068, + "log_odds_ratio": -0.03255218267440796, + "logits/chosen": -0.3159242570400238, + "logits/rejected": -0.3614596426486969, + "logps/chosen": -0.013484388589859009, + "logps/rejected": -1.5943424701690674, + "loss": 2.2776, + "nll_loss": 0.5661398768424988, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013484389055520296, + "rewards/margins": 0.15808580815792084, + "rewards/rejected": -0.1594342440366745, + "step": 5444 + }, + { + "epoch": 3.7655601659751037, + "grad_norm": 10.029745101928711, + "learning_rate": 3.463577685569387e-05, + "log_odds_chosen": 8.731345176696777, + "log_odds_ratio": -0.0019150073640048504, + "logits/chosen": -0.4098225235939026, + "logits/rejected": -0.4700325131416321, + "logps/chosen": -0.00333023676648736, + "logps/rejected": -1.3874382972717285, + "loss": 1.688, + "nll_loss": 0.4218185544013977, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003330236650072038, + "rewards/margins": 0.13841082155704498, + "rewards/rejected": -0.13874384760856628, + "step": 5445 + }, + { + "epoch": 3.7662517289073305, + "grad_norm": 4.650477886199951, + "learning_rate": 3.463193483940372e-05, + "log_odds_chosen": 8.619306564331055, + "log_odds_ratio": -0.010178805328905582, + "logits/chosen": -0.28032341599464417, + "logits/rejected": -0.30539238452911377, + "logps/chosen": -0.012560890056192875, + "logps/rejected": -1.3080579042434692, + "loss": 1.2625, + "nll_loss": 0.3146149814128876, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012560889590531588, + "rewards/margins": 0.12954971194267273, + "rewards/rejected": -0.13080579042434692, + "step": 5446 + }, + { + "epoch": 3.7669432918395573, + "grad_norm": 10.749146461486816, + "learning_rate": 3.462809282311357e-05, + "log_odds_chosen": 10.158448219299316, + "log_odds_ratio": -8.47989649628289e-05, + "logits/chosen": -0.5772304534912109, + "logits/rejected": -0.7337908744812012, + "logps/chosen": -0.0004312293021939695, + "logps/rejected": -1.7817490100860596, + "loss": 1.0402, + "nll_loss": 0.26004502177238464, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.312293458497152e-05, + "rewards/margins": 0.17813177406787872, + "rewards/rejected": -0.1781749129295349, + "step": 5447 + }, + { + "epoch": 3.767634854771784, + "grad_norm": 10.537328720092773, + "learning_rate": 3.462425080682342e-05, + "log_odds_chosen": 10.657795906066895, + "log_odds_ratio": -3.852910958812572e-05, + "logits/chosen": -0.30140772461891174, + "logits/rejected": -0.2932639718055725, + "logps/chosen": -0.00013195013161748648, + "logps/rejected": -1.654346227645874, + "loss": 1.249, + "nll_loss": 0.3122471272945404, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3195011888456065e-05, + "rewards/margins": 0.16542142629623413, + "rewards/rejected": -0.16543462872505188, + "step": 5448 + }, + { + "epoch": 3.768326417704011, + "grad_norm": 7.2749714851379395, + "learning_rate": 3.462040879053327e-05, + "log_odds_chosen": 9.7667236328125, + "log_odds_ratio": -0.0003579896583687514, + "logits/chosen": -0.7348923087120056, + "logits/rejected": -0.7449120879173279, + "logps/chosen": -0.0003365372831467539, + "logps/rejected": -1.845354437828064, + "loss": 1.2263, + "nll_loss": 0.30654260516166687, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3653730497462675e-05, + "rewards/margins": 0.1845017671585083, + "rewards/rejected": -0.1845354437828064, + "step": 5449 + }, + { + "epoch": 3.769017980636238, + "grad_norm": 8.713778495788574, + "learning_rate": 3.461656677424313e-05, + "log_odds_chosen": 9.448784828186035, + "log_odds_ratio": -0.0003078113659285009, + "logits/chosen": -0.4085184633731842, + "logits/rejected": -0.46104303002357483, + "logps/chosen": -0.0005590122891589999, + "logps/rejected": -1.4110808372497559, + "loss": 1.4083, + "nll_loss": 0.35204440355300903, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.590123328147456e-05, + "rewards/margins": 0.14105218648910522, + "rewards/rejected": -0.14110808074474335, + "step": 5450 + }, + { + "epoch": 3.7697095435684647, + "grad_norm": 12.606882095336914, + "learning_rate": 3.461272475795297e-05, + "log_odds_chosen": 10.149349212646484, + "log_odds_ratio": -0.0002808647695928812, + "logits/chosen": -0.7192884683609009, + "logits/rejected": -0.7955207228660583, + "logps/chosen": -0.00038690725341439247, + "logps/rejected": -2.0651695728302, + "loss": 1.6302, + "nll_loss": 0.40752604603767395, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.869072315865196e-05, + "rewards/margins": 0.2064782679080963, + "rewards/rejected": -0.20651696622371674, + "step": 5451 + }, + { + "epoch": 3.7704011065006915, + "grad_norm": 10.577102661132812, + "learning_rate": 3.4608882741662825e-05, + "log_odds_chosen": 9.80659008026123, + "log_odds_ratio": -0.00022310206259135157, + "logits/chosen": -0.4899018704891205, + "logits/rejected": -0.5291027426719666, + "logps/chosen": -0.010257050395011902, + "logps/rejected": -2.334555149078369, + "loss": 1.7324, + "nll_loss": 0.4330710172653198, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010257052490487695, + "rewards/margins": 0.23242978751659393, + "rewards/rejected": -0.23345550894737244, + "step": 5452 + }, + { + "epoch": 3.7710926694329183, + "grad_norm": 7.982278823852539, + "learning_rate": 3.460504072537268e-05, + "log_odds_chosen": 9.754339218139648, + "log_odds_ratio": -0.00030538038117811084, + "logits/chosen": -0.3912992775440216, + "logits/rejected": -0.44544699788093567, + "logps/chosen": -0.0001893314765766263, + "logps/rejected": -1.4891002178192139, + "loss": 1.9684, + "nll_loss": 0.4920760691165924, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.893314765766263e-05, + "rewards/margins": 0.14889109134674072, + "rewards/rejected": -0.1489100158214569, + "step": 5453 + }, + { + "epoch": 3.771784232365145, + "grad_norm": 16.61277198791504, + "learning_rate": 3.460119870908253e-05, + "log_odds_chosen": 9.0419921875, + "log_odds_ratio": -0.19790831208229065, + "logits/chosen": -0.3988495171070099, + "logits/rejected": -0.5029575824737549, + "logps/chosen": -0.031020207330584526, + "logps/rejected": -1.7238812446594238, + "loss": 1.7822, + "nll_loss": 0.42575719952583313, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00310202082619071, + "rewards/margins": 0.1692861169576645, + "rewards/rejected": -0.17238813638687134, + "step": 5454 + }, + { + "epoch": 3.772475795297372, + "grad_norm": 7.851132869720459, + "learning_rate": 3.4597356692792376e-05, + "log_odds_chosen": 9.307454109191895, + "log_odds_ratio": -0.0012680424842983484, + "logits/chosen": -0.6060723662376404, + "logits/rejected": -0.6443691253662109, + "logps/chosen": -0.001814560848288238, + "logps/rejected": -1.7838623523712158, + "loss": 2.055, + "nll_loss": 0.5136182904243469, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000181456096470356, + "rewards/margins": 0.17820480465888977, + "rewards/rejected": -0.17838624119758606, + "step": 5455 + }, + { + "epoch": 3.773167358229599, + "grad_norm": 10.791706085205078, + "learning_rate": 3.459351467650223e-05, + "log_odds_chosen": 9.634557723999023, + "log_odds_ratio": -0.0003079564485233277, + "logits/chosen": -0.46386775374412537, + "logits/rejected": -0.6445642709732056, + "logps/chosen": -0.001995598431676626, + "logps/rejected": -2.0451226234436035, + "loss": 1.7906, + "nll_loss": 0.44761550426483154, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019955984316766262, + "rewards/margins": 0.20431271195411682, + "rewards/rejected": -0.20451226830482483, + "step": 5456 + }, + { + "epoch": 3.7738589211618256, + "grad_norm": 13.1845121383667, + "learning_rate": 3.458967266021208e-05, + "log_odds_chosen": 10.19453239440918, + "log_odds_ratio": -0.0001586043363204226, + "logits/chosen": -0.5500466823577881, + "logits/rejected": -0.6049355268478394, + "logps/chosen": -0.00206363620236516, + "logps/rejected": -2.23915958404541, + "loss": 2.229, + "nll_loss": 0.5572376251220703, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00020636359113268554, + "rewards/margins": 0.2237095981836319, + "rewards/rejected": -0.2239159643650055, + "step": 5457 + }, + { + "epoch": 3.7745504840940525, + "grad_norm": 8.474950790405273, + "learning_rate": 3.4585830643921926e-05, + "log_odds_chosen": 9.389838218688965, + "log_odds_ratio": -0.012782618403434753, + "logits/chosen": -0.5932435393333435, + "logits/rejected": -0.5870939493179321, + "logps/chosen": -0.004997893236577511, + "logps/rejected": -1.9567699432373047, + "loss": 0.9998, + "nll_loss": 0.2486783266067505, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004997893120162189, + "rewards/margins": 0.19517722725868225, + "rewards/rejected": -0.1956770122051239, + "step": 5458 + }, + { + "epoch": 3.7752420470262793, + "grad_norm": 14.28724193572998, + "learning_rate": 3.4581988627631786e-05, + "log_odds_chosen": 11.236981391906738, + "log_odds_ratio": -1.6953132217167877e-05, + "logits/chosen": -0.4602336287498474, + "logits/rejected": -0.5732054114341736, + "logps/chosen": -0.00024608871899545193, + "logps/rejected": -2.546398401260376, + "loss": 1.8179, + "nll_loss": 0.4544837176799774, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.460887117194943e-05, + "rewards/margins": 0.25461524724960327, + "rewards/rejected": -0.2546398639678955, + "step": 5459 + }, + { + "epoch": 3.775933609958506, + "grad_norm": 11.284528732299805, + "learning_rate": 3.457814661134163e-05, + "log_odds_chosen": 9.693879127502441, + "log_odds_ratio": -0.015536747872829437, + "logits/chosen": -0.32484331727027893, + "logits/rejected": -0.3892527222633362, + "logps/chosen": -0.006452981382608414, + "logps/rejected": -1.7182650566101074, + "loss": 2.2155, + "nll_loss": 0.5523088574409485, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006452981033362448, + "rewards/margins": 0.17118120193481445, + "rewards/rejected": -0.17182651162147522, + "step": 5460 + }, + { + "epoch": 3.776625172890733, + "grad_norm": 7.1791911125183105, + "learning_rate": 3.4574304595051484e-05, + "log_odds_chosen": 8.559253692626953, + "log_odds_ratio": -0.0024300473742187023, + "logits/chosen": -0.2576027512550354, + "logits/rejected": -0.33050549030303955, + "logps/chosen": -0.005318128038197756, + "logps/rejected": -1.4191077947616577, + "loss": 1.0917, + "nll_loss": 0.2726795971393585, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005318127805367112, + "rewards/margins": 0.1413789689540863, + "rewards/rejected": -0.14191077649593353, + "step": 5461 + }, + { + "epoch": 3.77731673582296, + "grad_norm": 11.800247192382812, + "learning_rate": 3.4570462578761336e-05, + "log_odds_chosen": 9.39711856842041, + "log_odds_ratio": -0.0012419902486726642, + "logits/chosen": -0.5844036936759949, + "logits/rejected": -0.6802327632904053, + "logps/chosen": -0.0009554505231790245, + "logps/rejected": -1.7297091484069824, + "loss": 1.656, + "nll_loss": 0.41388368606567383, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.554505231790245e-05, + "rewards/margins": 0.17287535965442657, + "rewards/rejected": -0.17297090590000153, + "step": 5462 + }, + { + "epoch": 3.7780082987551866, + "grad_norm": 8.81169605255127, + "learning_rate": 3.456662056247119e-05, + "log_odds_chosen": 10.17690658569336, + "log_odds_ratio": -5.8187048125546426e-05, + "logits/chosen": -0.39503562450408936, + "logits/rejected": -0.4741494059562683, + "logps/chosen": -0.00017182572628371418, + "logps/rejected": -1.5729782581329346, + "loss": 1.3494, + "nll_loss": 0.3373528718948364, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.718257408356294e-05, + "rewards/margins": 0.15728065371513367, + "rewards/rejected": -0.15729783475399017, + "step": 5463 + }, + { + "epoch": 3.7786998616874135, + "grad_norm": 7.847090721130371, + "learning_rate": 3.4562778546181034e-05, + "log_odds_chosen": 10.096391677856445, + "log_odds_ratio": -7.392516999971122e-05, + "logits/chosen": -0.7516282796859741, + "logits/rejected": -0.7118873596191406, + "logps/chosen": -0.00014651704987045377, + "logps/rejected": -1.446256160736084, + "loss": 1.2984, + "nll_loss": 0.3245847523212433, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.465170589654008e-05, + "rewards/margins": 0.14461097121238708, + "rewards/rejected": -0.14462561905384064, + "step": 5464 + }, + { + "epoch": 3.7793914246196403, + "grad_norm": 9.622819900512695, + "learning_rate": 3.455893652989089e-05, + "log_odds_chosen": 7.836572647094727, + "log_odds_ratio": -0.14803458750247955, + "logits/chosen": -0.47771313786506653, + "logits/rejected": -0.5944380760192871, + "logps/chosen": -0.033774301409721375, + "logps/rejected": -1.84254789352417, + "loss": 1.4532, + "nll_loss": 0.348498672246933, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003377429908141494, + "rewards/margins": 0.18087737262248993, + "rewards/rejected": -0.18425479531288147, + "step": 5465 + }, + { + "epoch": 3.780082987551867, + "grad_norm": 13.050857543945312, + "learning_rate": 3.455509451360074e-05, + "log_odds_chosen": 10.594888687133789, + "log_odds_ratio": -6.776869122404605e-05, + "logits/chosen": -0.5613893866539001, + "logits/rejected": -0.6028444170951843, + "logps/chosen": -0.0006440202705562115, + "logps/rejected": -2.637077569961548, + "loss": 1.6949, + "nll_loss": 0.42372721433639526, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.440203287638724e-05, + "rewards/margins": 0.263643354177475, + "rewards/rejected": -0.2637077569961548, + "step": 5466 + }, + { + "epoch": 3.780774550484094, + "grad_norm": 12.926630020141602, + "learning_rate": 3.4551252497310585e-05, + "log_odds_chosen": 10.582441329956055, + "log_odds_ratio": -3.3281525247730315e-05, + "logits/chosen": -0.5533230900764465, + "logits/rejected": -0.5581379532814026, + "logps/chosen": -0.00013502439833246171, + "logps/rejected": -1.6435058116912842, + "loss": 1.3104, + "nll_loss": 0.3276028633117676, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3502440197044052e-05, + "rewards/margins": 0.16433709859848022, + "rewards/rejected": -0.16435059905052185, + "step": 5467 + }, + { + "epoch": 3.7814661134163208, + "grad_norm": 12.386612892150879, + "learning_rate": 3.4547410481020444e-05, + "log_odds_chosen": 7.505845546722412, + "log_odds_ratio": -0.022535445168614388, + "logits/chosen": -0.46632689237594604, + "logits/rejected": -0.4704776108264923, + "logps/chosen": -0.02573973685503006, + "logps/rejected": -1.221304178237915, + "loss": 1.2671, + "nll_loss": 0.31453269720077515, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0025739737320691347, + "rewards/margins": 0.11955644190311432, + "rewards/rejected": -0.12213042378425598, + "step": 5468 + }, + { + "epoch": 3.7821576763485476, + "grad_norm": 9.335777282714844, + "learning_rate": 3.454356846473029e-05, + "log_odds_chosen": 9.750051498413086, + "log_odds_ratio": -0.00015797732339706272, + "logits/chosen": -0.3987863063812256, + "logits/rejected": -0.390306293964386, + "logps/chosen": -0.00020262066391296685, + "logps/rejected": -1.3902456760406494, + "loss": 2.2072, + "nll_loss": 0.5517906546592712, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0262066755094565e-05, + "rewards/margins": 0.13900430500507355, + "rewards/rejected": -0.13902458548545837, + "step": 5469 + }, + { + "epoch": 3.7828492392807744, + "grad_norm": 10.659040451049805, + "learning_rate": 3.453972644844014e-05, + "log_odds_chosen": 10.151968002319336, + "log_odds_ratio": -0.00011392178566893563, + "logits/chosen": -0.5698919296264648, + "logits/rejected": -0.6047060489654541, + "logps/chosen": -0.001036653877235949, + "logps/rejected": -1.7720322608947754, + "loss": 1.4491, + "nll_loss": 0.36225903034210205, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010366539208916947, + "rewards/margins": 0.1770995706319809, + "rewards/rejected": -0.1772032380104065, + "step": 5470 + }, + { + "epoch": 3.7835408022130013, + "grad_norm": 8.225078582763672, + "learning_rate": 3.4535884432149995e-05, + "log_odds_chosen": 8.153675079345703, + "log_odds_ratio": -0.00462321937084198, + "logits/chosen": -0.5210813283920288, + "logits/rejected": -0.5549103021621704, + "logps/chosen": -0.010970650240778923, + "logps/rejected": -1.762834906578064, + "loss": 1.6095, + "nll_loss": 0.4019153416156769, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010970650473609567, + "rewards/margins": 0.175186425447464, + "rewards/rejected": -0.17628349363803864, + "step": 5471 + }, + { + "epoch": 3.784232365145228, + "grad_norm": 10.515767097473145, + "learning_rate": 3.453204241585985e-05, + "log_odds_chosen": 8.277547836303711, + "log_odds_ratio": -0.0133607042953372, + "logits/chosen": -0.6946769952774048, + "logits/rejected": -0.8080295324325562, + "logps/chosen": -0.028481382876634598, + "logps/rejected": -1.5522637367248535, + "loss": 1.68, + "nll_loss": 0.4186699390411377, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002848138101398945, + "rewards/margins": 0.15237824618816376, + "rewards/rejected": -0.15522637963294983, + "step": 5472 + }, + { + "epoch": 3.784923928077455, + "grad_norm": 9.187445640563965, + "learning_rate": 3.452820039956969e-05, + "log_odds_chosen": 8.612812995910645, + "log_odds_ratio": -0.0021816876251250505, + "logits/chosen": -0.6883058547973633, + "logits/rejected": -0.765465259552002, + "logps/chosen": -0.010355999693274498, + "logps/rejected": -1.5710110664367676, + "loss": 1.5977, + "nll_loss": 0.3991968333721161, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010355999693274498, + "rewards/margins": 0.15606552362442017, + "rewards/rejected": -0.157101109623909, + "step": 5473 + }, + { + "epoch": 3.7856154910096818, + "grad_norm": 7.578182220458984, + "learning_rate": 3.4524358383279545e-05, + "log_odds_chosen": 9.225852966308594, + "log_odds_ratio": -0.13350188732147217, + "logits/chosen": -0.33595073223114014, + "logits/rejected": -0.38651329278945923, + "logps/chosen": -0.037560053169727325, + "logps/rejected": -2.838648557662964, + "loss": 1.7924, + "nll_loss": 0.4347480535507202, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003756005549803376, + "rewards/margins": 0.28010886907577515, + "rewards/rejected": -0.2838648557662964, + "step": 5474 + }, + { + "epoch": 3.7863070539419086, + "grad_norm": 32.91754913330078, + "learning_rate": 3.45205163669894e-05, + "log_odds_chosen": 5.964416980743408, + "log_odds_ratio": -0.6886153221130371, + "logits/chosen": -0.5881133079528809, + "logits/rejected": -0.6527892351150513, + "logps/chosen": -0.09204834699630737, + "logps/rejected": -1.7693171501159668, + "loss": 1.8929, + "nll_loss": 0.40435975790023804, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009204834699630737, + "rewards/margins": 0.16772687435150146, + "rewards/rejected": -0.1769317090511322, + "step": 5475 + }, + { + "epoch": 3.7869986168741354, + "grad_norm": 13.364775657653809, + "learning_rate": 3.451667435069924e-05, + "log_odds_chosen": 8.272403717041016, + "log_odds_ratio": -0.027833154425024986, + "logits/chosen": -0.6419265866279602, + "logits/rejected": -0.6762416362762451, + "logps/chosen": -0.007033531554043293, + "logps/rejected": -1.4588756561279297, + "loss": 1.3591, + "nll_loss": 0.33700141310691833, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007033531437627971, + "rewards/margins": 0.1451842188835144, + "rewards/rejected": -0.1458875685930252, + "step": 5476 + }, + { + "epoch": 3.7876901798063622, + "grad_norm": 11.957737922668457, + "learning_rate": 3.45128323344091e-05, + "log_odds_chosen": 10.761993408203125, + "log_odds_ratio": -2.629458685987629e-05, + "logits/chosen": -0.8296110033988953, + "logits/rejected": -0.8898372054100037, + "logps/chosen": -0.00018794195784721524, + "logps/rejected": -2.174938201904297, + "loss": 1.4994, + "nll_loss": 0.3748519718647003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8794196876115166e-05, + "rewards/margins": 0.21747499704360962, + "rewards/rejected": -0.21749380230903625, + "step": 5477 + }, + { + "epoch": 3.788381742738589, + "grad_norm": 4.69172477722168, + "learning_rate": 3.450899031811895e-05, + "log_odds_chosen": 8.808714866638184, + "log_odds_ratio": -0.006193371489644051, + "logits/chosen": -0.48362404108047485, + "logits/rejected": -0.5160248875617981, + "logps/chosen": -0.00858994759619236, + "logps/rejected": -2.0021519660949707, + "loss": 1.4342, + "nll_loss": 0.35793858766555786, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008589947246946394, + "rewards/margins": 0.19935621321201324, + "rewards/rejected": -0.2002152055501938, + "step": 5478 + }, + { + "epoch": 3.789073305670816, + "grad_norm": 7.904622554779053, + "learning_rate": 3.45051483018288e-05, + "log_odds_chosen": 8.666353225708008, + "log_odds_ratio": -0.11096343398094177, + "logits/chosen": -0.42745456099510193, + "logits/rejected": -0.4892045259475708, + "logps/chosen": -0.028296923264861107, + "logps/rejected": -1.572574257850647, + "loss": 1.3645, + "nll_loss": 0.3300341069698334, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0028296923264861107, + "rewards/margins": 0.15442773699760437, + "rewards/rejected": -0.15725743770599365, + "step": 5479 + }, + { + "epoch": 3.7897648686030427, + "grad_norm": 12.057756423950195, + "learning_rate": 3.450130628553865e-05, + "log_odds_chosen": 10.21658706665039, + "log_odds_ratio": -0.00010321993613615632, + "logits/chosen": -0.7294517159461975, + "logits/rejected": -0.748309314250946, + "logps/chosen": -0.00037140565109439194, + "logps/rejected": -1.8634538650512695, + "loss": 1.8853, + "nll_loss": 0.471309095621109, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.714056219905615e-05, + "rewards/margins": 0.18630823493003845, + "rewards/rejected": -0.18634536862373352, + "step": 5480 + }, + { + "epoch": 3.7904564315352696, + "grad_norm": 8.47179126739502, + "learning_rate": 3.4497464269248505e-05, + "log_odds_chosen": 8.91946029663086, + "log_odds_ratio": -0.0006514595006592572, + "logits/chosen": -0.3386085331439972, + "logits/rejected": -0.4115927219390869, + "logps/chosen": -0.0004757773713208735, + "logps/rejected": -1.090081810951233, + "loss": 1.3782, + "nll_loss": 0.34447595477104187, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.757773785968311e-05, + "rewards/margins": 0.1089605987071991, + "rewards/rejected": -0.10900817811489105, + "step": 5481 + }, + { + "epoch": 3.7911479944674964, + "grad_norm": 11.323335647583008, + "learning_rate": 3.449362225295835e-05, + "log_odds_chosen": 9.457518577575684, + "log_odds_ratio": -0.0002437549119349569, + "logits/chosen": -0.793126106262207, + "logits/rejected": -0.8394900560379028, + "logps/chosen": -0.0006705039413645864, + "logps/rejected": -1.7430596351623535, + "loss": 1.8774, + "nll_loss": 0.4693189561367035, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.705040141241625e-05, + "rewards/margins": 0.174238920211792, + "rewards/rejected": -0.1743059754371643, + "step": 5482 + }, + { + "epoch": 3.7918395573997232, + "grad_norm": 9.364946365356445, + "learning_rate": 3.4489780236668204e-05, + "log_odds_chosen": 9.896523475646973, + "log_odds_ratio": -0.0010984577238559723, + "logits/chosen": -0.49681320786476135, + "logits/rejected": -0.40957653522491455, + "logps/chosen": -0.02884014882147312, + "logps/rejected": -1.6628726720809937, + "loss": 1.2071, + "nll_loss": 0.301662802696228, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0028840149752795696, + "rewards/margins": 0.16340325772762299, + "rewards/rejected": -0.16628727316856384, + "step": 5483 + }, + { + "epoch": 3.79253112033195, + "grad_norm": 11.672866821289062, + "learning_rate": 3.4485938220378056e-05, + "log_odds_chosen": 8.283211708068848, + "log_odds_ratio": -0.031149858608841896, + "logits/chosen": -0.12222898006439209, + "logits/rejected": -0.1867779642343521, + "logps/chosen": -0.011603178456425667, + "logps/rejected": -2.018061637878418, + "loss": 1.6232, + "nll_loss": 0.4026898145675659, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001160317799076438, + "rewards/margins": 0.20064584910869598, + "rewards/rejected": -0.20180615782737732, + "step": 5484 + }, + { + "epoch": 3.793222683264177, + "grad_norm": 16.668764114379883, + "learning_rate": 3.44820962040879e-05, + "log_odds_chosen": 8.102465629577637, + "log_odds_ratio": -0.21552255749702454, + "logits/chosen": -0.5265755653381348, + "logits/rejected": -0.5696606040000916, + "logps/chosen": -0.033316683024168015, + "logps/rejected": -1.558127522468567, + "loss": 1.6997, + "nll_loss": 0.40336960554122925, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003331668209284544, + "rewards/margins": 0.1524810940027237, + "rewards/rejected": -0.15581277012825012, + "step": 5485 + }, + { + "epoch": 3.7939142461964037, + "grad_norm": 9.900116920471191, + "learning_rate": 3.447825418779776e-05, + "log_odds_chosen": 10.101983070373535, + "log_odds_ratio": -0.00040225035627372563, + "logits/chosen": -0.45735013484954834, + "logits/rejected": -0.5378046035766602, + "logps/chosen": -0.0009811477502807975, + "logps/rejected": -1.8887546062469482, + "loss": 1.1332, + "nll_loss": 0.2832505702972412, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.811478230403736e-05, + "rewards/margins": 0.188777357339859, + "rewards/rejected": -0.1888754665851593, + "step": 5486 + }, + { + "epoch": 3.7946058091286305, + "grad_norm": 8.449058532714844, + "learning_rate": 3.4474412171507607e-05, + "log_odds_chosen": 10.239374160766602, + "log_odds_ratio": -5.797262565465644e-05, + "logits/chosen": -0.9100509881973267, + "logits/rejected": -0.9818972945213318, + "logps/chosen": -0.0004340135201346129, + "logps/rejected": -1.7591198682785034, + "loss": 1.1093, + "nll_loss": 0.2773301601409912, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.340135274105705e-05, + "rewards/margins": 0.17586857080459595, + "rewards/rejected": -0.17591197788715363, + "step": 5487 + }, + { + "epoch": 3.7952973720608574, + "grad_norm": 8.676505088806152, + "learning_rate": 3.447057015521746e-05, + "log_odds_chosen": 8.678366661071777, + "log_odds_ratio": -0.0011480473913252354, + "logits/chosen": -0.771061360836029, + "logits/rejected": -0.8371139168739319, + "logps/chosen": -0.0025784396566450596, + "logps/rejected": -1.4748327732086182, + "loss": 2.7718, + "nll_loss": 0.6928279995918274, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00025784395984373987, + "rewards/margins": 0.14722543954849243, + "rewards/rejected": -0.14748328924179077, + "step": 5488 + }, + { + "epoch": 3.795988934993084, + "grad_norm": 5.443553924560547, + "learning_rate": 3.446672813892731e-05, + "log_odds_chosen": 9.067232131958008, + "log_odds_ratio": -0.004053841345012188, + "logits/chosen": -0.5806595087051392, + "logits/rejected": -0.620657205581665, + "logps/chosen": -0.002897555474191904, + "logps/rejected": -1.4690403938293457, + "loss": 1.4835, + "nll_loss": 0.37048161029815674, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002897555532399565, + "rewards/margins": 0.14661429822444916, + "rewards/rejected": -0.14690403640270233, + "step": 5489 + }, + { + "epoch": 3.796680497925311, + "grad_norm": 9.907852172851562, + "learning_rate": 3.4462886122637164e-05, + "log_odds_chosen": 9.625545501708984, + "log_odds_ratio": -0.025490716099739075, + "logits/chosen": -0.4287310838699341, + "logits/rejected": -0.47044485807418823, + "logps/chosen": -0.07580657303333282, + "logps/rejected": -2.3400864601135254, + "loss": 1.4314, + "nll_loss": 0.35530906915664673, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00758065702393651, + "rewards/margins": 0.22642800211906433, + "rewards/rejected": -0.23400865495204926, + "step": 5490 + }, + { + "epoch": 3.797372060857538, + "grad_norm": 9.823498725891113, + "learning_rate": 3.445904410634701e-05, + "log_odds_chosen": 10.158860206604004, + "log_odds_ratio": -7.385762000922114e-05, + "logits/chosen": -0.7536473274230957, + "logits/rejected": -0.8191118836402893, + "logps/chosen": -0.00031744063016958535, + "logps/rejected": -1.594215989112854, + "loss": 0.8827, + "nll_loss": 0.2206687480211258, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.174406447215006e-05, + "rewards/margins": 0.15938985347747803, + "rewards/rejected": -0.15942159295082092, + "step": 5491 + }, + { + "epoch": 3.7980636237897647, + "grad_norm": 10.448057174682617, + "learning_rate": 3.445520209005686e-05, + "log_odds_chosen": 9.904424667358398, + "log_odds_ratio": -7.187219307525083e-05, + "logits/chosen": -0.7379270195960999, + "logits/rejected": -0.8148084878921509, + "logps/chosen": -0.00037250755121931434, + "logps/rejected": -1.759903073310852, + "loss": 1.5139, + "nll_loss": 0.3784638047218323, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7250756577122957e-05, + "rewards/margins": 0.17595306038856506, + "rewards/rejected": -0.17599031329154968, + "step": 5492 + }, + { + "epoch": 3.7987551867219915, + "grad_norm": 13.461642265319824, + "learning_rate": 3.4451360073766714e-05, + "log_odds_chosen": 9.684606552124023, + "log_odds_ratio": -0.00027112613315694034, + "logits/chosen": -1.044844150543213, + "logits/rejected": -1.0704052448272705, + "logps/chosen": -0.0007236794335767627, + "logps/rejected": -1.8184692859649658, + "loss": 1.7831, + "nll_loss": 0.4457409381866455, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.236794044729322e-05, + "rewards/margins": 0.1817745566368103, + "rewards/rejected": -0.18184691667556763, + "step": 5493 + }, + { + "epoch": 3.7994467496542184, + "grad_norm": 7.109790325164795, + "learning_rate": 3.444751805747656e-05, + "log_odds_chosen": 8.760310173034668, + "log_odds_ratio": -0.002181628718972206, + "logits/chosen": -0.719219446182251, + "logits/rejected": -0.7013097405433655, + "logps/chosen": -0.005476102232933044, + "logps/rejected": -1.2716690301895142, + "loss": 1.5912, + "nll_loss": 0.39759424328804016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005476102232933044, + "rewards/margins": 0.12661930918693542, + "rewards/rejected": -0.12716691195964813, + "step": 5494 + }, + { + "epoch": 3.800138312586445, + "grad_norm": 6.834258556365967, + "learning_rate": 3.444367604118642e-05, + "log_odds_chosen": 10.347545623779297, + "log_odds_ratio": -0.00019219264504499733, + "logits/chosen": -0.3056133985519409, + "logits/rejected": -0.33867448568344116, + "logps/chosen": -0.00030523439636453986, + "logps/rejected": -1.6999119520187378, + "loss": 1.4002, + "nll_loss": 0.35004106163978577, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.052344254683703e-05, + "rewards/margins": 0.16996067762374878, + "rewards/rejected": -0.16999119520187378, + "step": 5495 + }, + { + "epoch": 3.800829875518672, + "grad_norm": 8.571849822998047, + "learning_rate": 3.4439834024896265e-05, + "log_odds_chosen": 9.261777877807617, + "log_odds_ratio": -0.0028540126513689756, + "logits/chosen": -0.7312589287757874, + "logits/rejected": -0.7759085893630981, + "logps/chosen": -0.0023634701501578093, + "logps/rejected": -1.8596577644348145, + "loss": 1.9622, + "nll_loss": 0.49027568101882935, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00023634699755348265, + "rewards/margins": 0.18572944402694702, + "rewards/rejected": -0.18596579134464264, + "step": 5496 + }, + { + "epoch": 3.801521438450899, + "grad_norm": 7.503764629364014, + "learning_rate": 3.443599200860612e-05, + "log_odds_chosen": 7.809076309204102, + "log_odds_ratio": -0.0015637363540008664, + "logits/chosen": -0.4097944498062134, + "logits/rejected": -0.5135847330093384, + "logps/chosen": -0.0023143496364355087, + "logps/rejected": -1.2307206392288208, + "loss": 1.6174, + "nll_loss": 0.40419769287109375, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00023143496946431696, + "rewards/margins": 0.12284062802791595, + "rewards/rejected": -0.1230720579624176, + "step": 5497 + }, + { + "epoch": 3.8022130013831257, + "grad_norm": 13.338193893432617, + "learning_rate": 3.443214999231597e-05, + "log_odds_chosen": 8.965517044067383, + "log_odds_ratio": -0.29515203833580017, + "logits/chosen": -0.05749291926622391, + "logits/rejected": -0.11087454110383987, + "logps/chosen": -0.03462938219308853, + "logps/rejected": -2.2734875679016113, + "loss": 1.6813, + "nll_loss": 0.39079830050468445, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0034629383590072393, + "rewards/margins": 0.22388583421707153, + "rewards/rejected": -0.22734877467155457, + "step": 5498 + }, + { + "epoch": 3.8029045643153525, + "grad_norm": 6.508269309997559, + "learning_rate": 3.442830797602582e-05, + "log_odds_chosen": 9.821160316467285, + "log_odds_ratio": -0.00017450877930969, + "logits/chosen": -0.846017062664032, + "logits/rejected": -0.8550729751586914, + "logps/chosen": -0.005477503407746553, + "logps/rejected": -2.649533987045288, + "loss": 1.6649, + "nll_loss": 0.41621753573417664, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005477503291331232, + "rewards/margins": 0.2644056975841522, + "rewards/rejected": -0.2649534344673157, + "step": 5499 + }, + { + "epoch": 3.8035961272475793, + "grad_norm": 7.401920795440674, + "learning_rate": 3.442446595973567e-05, + "log_odds_chosen": 7.918020248413086, + "log_odds_ratio": -0.007441325578838587, + "logits/chosen": -0.7410718202590942, + "logits/rejected": -0.829316258430481, + "logps/chosen": -0.03185814246535301, + "logps/rejected": -2.061920404434204, + "loss": 1.6512, + "nll_loss": 0.412044495344162, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0031858140137046576, + "rewards/margins": 0.20300622284412384, + "rewards/rejected": -0.2061920464038849, + "step": 5500 + }, + { + "epoch": 3.804287690179806, + "grad_norm": 12.682905197143555, + "learning_rate": 3.442062394344552e-05, + "log_odds_chosen": 9.693799018859863, + "log_odds_ratio": -0.0005106168682686985, + "logits/chosen": -0.44629478454589844, + "logits/rejected": -0.5728074312210083, + "logps/chosen": -0.0007117882487364113, + "logps/rejected": -1.6500269174575806, + "loss": 1.3445, + "nll_loss": 0.3360791802406311, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.117882341844961e-05, + "rewards/margins": 0.164931520819664, + "rewards/rejected": -0.16500268876552582, + "step": 5501 + }, + { + "epoch": 3.804979253112033, + "grad_norm": 7.961850643157959, + "learning_rate": 3.441678192715537e-05, + "log_odds_chosen": 9.812564849853516, + "log_odds_ratio": -0.0006582457572221756, + "logits/chosen": -0.24773460626602173, + "logits/rejected": -0.3943350911140442, + "logps/chosen": -0.0027490374632179737, + "logps/rejected": -1.855540156364441, + "loss": 1.139, + "nll_loss": 0.2846890687942505, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00027490375214256346, + "rewards/margins": 0.18527910113334656, + "rewards/rejected": -0.18555399775505066, + "step": 5502 + }, + { + "epoch": 3.80567081604426, + "grad_norm": 11.752182960510254, + "learning_rate": 3.441293991086522e-05, + "log_odds_chosen": 9.51585578918457, + "log_odds_ratio": -0.028415260836482048, + "logits/chosen": -0.7475553154945374, + "logits/rejected": -0.8064841628074646, + "logps/chosen": -0.009086564183235168, + "logps/rejected": -2.0079166889190674, + "loss": 1.7488, + "nll_loss": 0.4343549609184265, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009086563950404525, + "rewards/margins": 0.19988302886486053, + "rewards/rejected": -0.20079168677330017, + "step": 5503 + }, + { + "epoch": 3.8063623789764867, + "grad_norm": 12.0503568649292, + "learning_rate": 3.440909789457508e-05, + "log_odds_chosen": 8.72144889831543, + "log_odds_ratio": -0.017577024176716805, + "logits/chosen": -0.5443108081817627, + "logits/rejected": -0.6571115255355835, + "logps/chosen": -0.005644082557410002, + "logps/rejected": -1.5024874210357666, + "loss": 1.7932, + "nll_loss": 0.4465530812740326, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005644082557410002, + "rewards/margins": 0.149684339761734, + "rewards/rejected": -0.15024875104427338, + "step": 5504 + }, + { + "epoch": 3.8070539419087135, + "grad_norm": 7.695371627807617, + "learning_rate": 3.4405255878284923e-05, + "log_odds_chosen": 10.452595710754395, + "log_odds_ratio": -3.497970465105027e-05, + "logits/chosen": -0.7729029059410095, + "logits/rejected": -0.8375166654586792, + "logps/chosen": -0.00011977435497101396, + "logps/rejected": -1.5107817649841309, + "loss": 1.2068, + "nll_loss": 0.3017030954360962, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1977434951404575e-05, + "rewards/margins": 0.15106619894504547, + "rewards/rejected": -0.15107816457748413, + "step": 5505 + }, + { + "epoch": 3.8077455048409403, + "grad_norm": 11.576738357543945, + "learning_rate": 3.4401413861994776e-05, + "log_odds_chosen": 9.514175415039062, + "log_odds_ratio": -0.00028477475279942155, + "logits/chosen": -0.4824485778808594, + "logits/rejected": -0.5558584928512573, + "logps/chosen": -0.000295641046250239, + "logps/rejected": -1.5355809926986694, + "loss": 1.2252, + "nll_loss": 0.3062796890735626, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9564107535406947e-05, + "rewards/margins": 0.15352854132652283, + "rewards/rejected": -0.15355810523033142, + "step": 5506 + }, + { + "epoch": 3.808437067773167, + "grad_norm": 5.991958141326904, + "learning_rate": 3.439757184570463e-05, + "log_odds_chosen": 8.088354110717773, + "log_odds_ratio": -0.020768703892827034, + "logits/chosen": -0.6435887217521667, + "logits/rejected": -0.6004131436347961, + "logps/chosen": -0.024852951988577843, + "logps/rejected": -2.28918194770813, + "loss": 1.4803, + "nll_loss": 0.3679874539375305, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024852952919900417, + "rewards/margins": 0.2264329046010971, + "rewards/rejected": -0.228918194770813, + "step": 5507 + }, + { + "epoch": 3.809128630705394, + "grad_norm": 12.097492218017578, + "learning_rate": 3.439372982941448e-05, + "log_odds_chosen": 9.174922943115234, + "log_odds_ratio": -0.0025260585825890303, + "logits/chosen": -0.769309401512146, + "logits/rejected": -0.8545984029769897, + "logps/chosen": -0.0010624685091897845, + "logps/rejected": -1.3888287544250488, + "loss": 1.4034, + "nll_loss": 0.35060185194015503, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010624684364302084, + "rewards/margins": 0.13877663016319275, + "rewards/rejected": -0.13888287544250488, + "step": 5508 + }, + { + "epoch": 3.809820193637621, + "grad_norm": 11.435340881347656, + "learning_rate": 3.4389887813124326e-05, + "log_odds_chosen": 10.789989471435547, + "log_odds_ratio": -3.569827458704822e-05, + "logits/chosen": -0.4533803164958954, + "logits/rejected": -0.44226211309432983, + "logps/chosen": -0.0005586327752098441, + "logps/rejected": -2.573535203933716, + "loss": 1.5026, + "nll_loss": 0.37563616037368774, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.586327824858017e-05, + "rewards/margins": 0.25729766488075256, + "rewards/rejected": -0.2573535442352295, + "step": 5509 + }, + { + "epoch": 3.8105117565698476, + "grad_norm": 7.45504903793335, + "learning_rate": 3.438604579683418e-05, + "log_odds_chosen": 8.48158073425293, + "log_odds_ratio": -0.001207137480378151, + "logits/chosen": -0.28822168707847595, + "logits/rejected": -0.40649378299713135, + "logps/chosen": -0.0009628716506995261, + "logps/rejected": -1.1258182525634766, + "loss": 1.6578, + "nll_loss": 0.4143317937850952, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.628715633880347e-05, + "rewards/margins": 0.11248555779457092, + "rewards/rejected": -0.11258183419704437, + "step": 5510 + }, + { + "epoch": 3.8112033195020745, + "grad_norm": 11.299424171447754, + "learning_rate": 3.438220378054403e-05, + "log_odds_chosen": 8.825550079345703, + "log_odds_ratio": -0.0006356332451105118, + "logits/chosen": -0.2884122133255005, + "logits/rejected": -0.33634936809539795, + "logps/chosen": -0.0017131754430010915, + "logps/rejected": -1.3680305480957031, + "loss": 2.3697, + "nll_loss": 0.5923528075218201, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017131752974819392, + "rewards/margins": 0.13663172721862793, + "rewards/rejected": -0.1368030607700348, + "step": 5511 + }, + { + "epoch": 3.8118948824343013, + "grad_norm": 10.111493110656738, + "learning_rate": 3.437836176425388e-05, + "log_odds_chosen": 8.188075065612793, + "log_odds_ratio": -0.003308902494609356, + "logits/chosen": -0.4384719133377075, + "logits/rejected": -0.4258015751838684, + "logps/chosen": -0.01320966612547636, + "logps/rejected": -1.672622799873352, + "loss": 1.484, + "nll_loss": 0.37067684531211853, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013209665194153786, + "rewards/margins": 0.16594132781028748, + "rewards/rejected": -0.16726228594779968, + "step": 5512 + }, + { + "epoch": 3.812586445366528, + "grad_norm": 9.90038013458252, + "learning_rate": 3.4374519747963736e-05, + "log_odds_chosen": 10.393072128295898, + "log_odds_ratio": -6.332092016236857e-05, + "logits/chosen": -0.3121805191040039, + "logits/rejected": -0.4258049428462982, + "logps/chosen": -0.00022715130762662739, + "logps/rejected": -2.1377182006835938, + "loss": 1.2688, + "nll_loss": 0.31718710064888, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.271513221785426e-05, + "rewards/margins": 0.21374912559986115, + "rewards/rejected": -0.21377183496952057, + "step": 5513 + }, + { + "epoch": 3.813278008298755, + "grad_norm": 9.171831130981445, + "learning_rate": 3.437067773167358e-05, + "log_odds_chosen": 9.057912826538086, + "log_odds_ratio": -0.0004997519426979125, + "logits/chosen": -0.696790874004364, + "logits/rejected": -0.6328009366989136, + "logps/chosen": -0.001956242835149169, + "logps/rejected": -1.4612929821014404, + "loss": 1.5019, + "nll_loss": 0.37541741132736206, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019562429224606603, + "rewards/margins": 0.14593368768692017, + "rewards/rejected": -0.146129310131073, + "step": 5514 + }, + { + "epoch": 3.813969571230982, + "grad_norm": 6.400440216064453, + "learning_rate": 3.4366835715383434e-05, + "log_odds_chosen": 10.220077514648438, + "log_odds_ratio": -6.414575182134286e-05, + "logits/chosen": -0.5167442560195923, + "logits/rejected": -0.5549056529998779, + "logps/chosen": -0.0003075231797993183, + "logps/rejected": -1.7159326076507568, + "loss": 0.9626, + "nll_loss": 0.24063529074192047, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.075231870752759e-05, + "rewards/margins": 0.17156250774860382, + "rewards/rejected": -0.17159326374530792, + "step": 5515 + }, + { + "epoch": 3.8146611341632086, + "grad_norm": 13.907997131347656, + "learning_rate": 3.436299369909329e-05, + "log_odds_chosen": 9.476810455322266, + "log_odds_ratio": -0.0004220995760988444, + "logits/chosen": -0.3859747052192688, + "logits/rejected": -0.40230223536491394, + "logps/chosen": -0.0011472878977656364, + "logps/rejected": -2.2569823265075684, + "loss": 1.2103, + "nll_loss": 0.302531361579895, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011472879850771278, + "rewards/margins": 0.2255834937095642, + "rewards/rejected": -0.22569823265075684, + "step": 5516 + }, + { + "epoch": 3.8153526970954355, + "grad_norm": 7.599922180175781, + "learning_rate": 3.435915168280314e-05, + "log_odds_chosen": 10.299822807312012, + "log_odds_ratio": -0.0001517213531769812, + "logits/chosen": -0.5298794507980347, + "logits/rejected": -0.5793193578720093, + "logps/chosen": -0.0002915368531830609, + "logps/rejected": -2.1193902492523193, + "loss": 0.8582, + "nll_loss": 0.21453142166137695, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.915368531830609e-05, + "rewards/margins": 0.21190986037254333, + "rewards/rejected": -0.2119390070438385, + "step": 5517 + }, + { + "epoch": 3.8160442600276623, + "grad_norm": 8.166438102722168, + "learning_rate": 3.4355309666512985e-05, + "log_odds_chosen": 9.342557907104492, + "log_odds_ratio": -0.020774465054273605, + "logits/chosen": -0.38634446263313293, + "logits/rejected": -0.43904387950897217, + "logps/chosen": -0.026264827698469162, + "logps/rejected": -1.5071449279785156, + "loss": 1.6013, + "nll_loss": 0.39825937151908875, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00262648263014853, + "rewards/margins": 0.1480880081653595, + "rewards/rejected": -0.15071450173854828, + "step": 5518 + }, + { + "epoch": 3.816735822959889, + "grad_norm": 10.335421562194824, + "learning_rate": 3.435146765022284e-05, + "log_odds_chosen": 9.873821258544922, + "log_odds_ratio": -0.0008224531775340438, + "logits/chosen": -0.5271599292755127, + "logits/rejected": -0.6046086549758911, + "logps/chosen": -0.0232427716255188, + "logps/rejected": -2.3042354583740234, + "loss": 1.9371, + "nll_loss": 0.48419103026390076, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002324277302250266, + "rewards/margins": 0.2280992716550827, + "rewards/rejected": -0.23042356967926025, + "step": 5519 + }, + { + "epoch": 3.817427385892116, + "grad_norm": 10.293402671813965, + "learning_rate": 3.434762563393269e-05, + "log_odds_chosen": 8.963640213012695, + "log_odds_ratio": -0.0009295929921790957, + "logits/chosen": -0.3651862144470215, + "logits/rejected": -0.4004555344581604, + "logps/chosen": -0.006498910952359438, + "logps/rejected": -1.286954402923584, + "loss": 1.6556, + "nll_loss": 0.413795530796051, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006498911534436047, + "rewards/margins": 0.12804555892944336, + "rewards/rejected": -0.12869544327259064, + "step": 5520 + }, + { + "epoch": 3.8181189488243428, + "grad_norm": 9.090123176574707, + "learning_rate": 3.4343783617642535e-05, + "log_odds_chosen": 9.645210266113281, + "log_odds_ratio": -0.0015530278906226158, + "logits/chosen": -0.335877388715744, + "logits/rejected": -0.3623276948928833, + "logps/chosen": -0.02916303090751171, + "logps/rejected": -2.003610610961914, + "loss": 1.7366, + "nll_loss": 0.4339974820613861, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0029163029976189137, + "rewards/margins": 0.19744475185871124, + "rewards/rejected": -0.20036104321479797, + "step": 5521 + }, + { + "epoch": 3.8188105117565696, + "grad_norm": 8.732023239135742, + "learning_rate": 3.4339941601352395e-05, + "log_odds_chosen": 10.926740646362305, + "log_odds_ratio": -2.6167797841480933e-05, + "logits/chosen": -0.33205878734588623, + "logits/rejected": -0.413408488035202, + "logps/chosen": -0.0003121374174952507, + "logps/rejected": -2.4376025199890137, + "loss": 1.6157, + "nll_loss": 0.4039216637611389, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.121374174952507e-05, + "rewards/margins": 0.24372904002666473, + "rewards/rejected": -0.24376025795936584, + "step": 5522 + }, + { + "epoch": 3.8195020746887964, + "grad_norm": 9.307251930236816, + "learning_rate": 3.433609958506224e-05, + "log_odds_chosen": 9.603739738464355, + "log_odds_ratio": -0.0003026507911272347, + "logits/chosen": -0.46003973484039307, + "logits/rejected": -0.49017009139060974, + "logps/chosen": -0.0006151901325210929, + "logps/rejected": -1.4377801418304443, + "loss": 1.3264, + "nll_loss": 0.3315601050853729, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.151902198325843e-05, + "rewards/margins": 0.143716499209404, + "rewards/rejected": -0.1437780261039734, + "step": 5523 + }, + { + "epoch": 3.8201936376210233, + "grad_norm": 13.252114295959473, + "learning_rate": 3.433225756877209e-05, + "log_odds_chosen": 7.5962018966674805, + "log_odds_ratio": -0.2521457076072693, + "logits/chosen": -0.305279016494751, + "logits/rejected": -0.1939001977443695, + "logps/chosen": -0.03892003744840622, + "logps/rejected": -1.0653748512268066, + "loss": 1.4177, + "nll_loss": 0.32921168208122253, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003892003558576107, + "rewards/margins": 0.10264548659324646, + "rewards/rejected": -0.10653748363256454, + "step": 5524 + }, + { + "epoch": 3.82088520055325, + "grad_norm": 8.918256759643555, + "learning_rate": 3.4328415552481945e-05, + "log_odds_chosen": 10.014975547790527, + "log_odds_ratio": -0.0006707090069539845, + "logits/chosen": -0.7125083208084106, + "logits/rejected": -0.7475904822349548, + "logps/chosen": -0.0003842590085696429, + "logps/rejected": -1.7505316734313965, + "loss": 1.1334, + "nll_loss": 0.28329500555992126, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.842590012936853e-05, + "rewards/margins": 0.17501473426818848, + "rewards/rejected": -0.1750531792640686, + "step": 5525 + }, + { + "epoch": 3.821576763485477, + "grad_norm": 11.013578414916992, + "learning_rate": 3.43245735361918e-05, + "log_odds_chosen": 9.857749938964844, + "log_odds_ratio": -0.006169512402266264, + "logits/chosen": -0.8864176869392395, + "logits/rejected": -0.9290403127670288, + "logps/chosen": -0.0030358880758285522, + "logps/rejected": -1.798154354095459, + "loss": 1.2621, + "nll_loss": 0.31491541862487793, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003035888366866857, + "rewards/margins": 0.17951184511184692, + "rewards/rejected": -0.17981544137001038, + "step": 5526 + }, + { + "epoch": 3.8222683264177038, + "grad_norm": 14.562955856323242, + "learning_rate": 3.432073151990164e-05, + "log_odds_chosen": 10.920267105102539, + "log_odds_ratio": -1.9419023374211974e-05, + "logits/chosen": -0.7472702264785767, + "logits/rejected": -0.7232099771499634, + "logps/chosen": -0.00019400370365474373, + "logps/rejected": -2.222071409225464, + "loss": 1.3396, + "nll_loss": 0.334891140460968, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9400371456868015e-05, + "rewards/margins": 0.22218775749206543, + "rewards/rejected": -0.22220715880393982, + "step": 5527 + }, + { + "epoch": 3.8229598893499306, + "grad_norm": 9.685209274291992, + "learning_rate": 3.4316889503611496e-05, + "log_odds_chosen": 9.99222469329834, + "log_odds_ratio": -0.00041643757140263915, + "logits/chosen": -0.8748489618301392, + "logits/rejected": -1.0220654010772705, + "logps/chosen": -0.0005274987197481096, + "logps/rejected": -1.963135004043579, + "loss": 1.2651, + "nll_loss": 0.3162212371826172, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2749870519619435e-05, + "rewards/margins": 0.1962607502937317, + "rewards/rejected": -0.1963135153055191, + "step": 5528 + }, + { + "epoch": 3.8236514522821574, + "grad_norm": 4.451651096343994, + "learning_rate": 3.431304748732135e-05, + "log_odds_chosen": 9.383699417114258, + "log_odds_ratio": -0.00013729330385103822, + "logits/chosen": -0.04785768687725067, + "logits/rejected": -0.14522284269332886, + "logps/chosen": -0.004471118096262217, + "logps/rejected": -1.6100064516067505, + "loss": 1.5907, + "nll_loss": 0.39765363931655884, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00044711181544698775, + "rewards/margins": 0.16055352985858917, + "rewards/rejected": -0.16100063920021057, + "step": 5529 + }, + { + "epoch": 3.8243430152143842, + "grad_norm": 6.9637017250061035, + "learning_rate": 3.4309205471031194e-05, + "log_odds_chosen": 8.777310371398926, + "log_odds_ratio": -0.0008659964660182595, + "logits/chosen": -0.8735266923904419, + "logits/rejected": -0.9311203956604004, + "logps/chosen": -0.002938113873824477, + "logps/rejected": -1.6509557962417603, + "loss": 1.131, + "nll_loss": 0.2826663851737976, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00029381137574091554, + "rewards/margins": 0.16480176150798798, + "rewards/rejected": -0.16509558260440826, + "step": 5530 + }, + { + "epoch": 3.825034578146611, + "grad_norm": 11.936813354492188, + "learning_rate": 3.430536345474105e-05, + "log_odds_chosen": 9.860944747924805, + "log_odds_ratio": -0.0001037710826494731, + "logits/chosen": -0.7240860462188721, + "logits/rejected": -0.8060693740844727, + "logps/chosen": -0.000538259744644165, + "logps/rejected": -1.8071279525756836, + "loss": 1.8437, + "nll_loss": 0.46090230345726013, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.382597737479955e-05, + "rewards/margins": 0.1806589663028717, + "rewards/rejected": -0.18071278929710388, + "step": 5531 + }, + { + "epoch": 3.825726141078838, + "grad_norm": 7.769991874694824, + "learning_rate": 3.43015214384509e-05, + "log_odds_chosen": 9.484786987304688, + "log_odds_ratio": -0.0006122213671915233, + "logits/chosen": -0.7496232390403748, + "logits/rejected": -0.8503137826919556, + "logps/chosen": -0.005784421693533659, + "logps/rejected": -1.4626810550689697, + "loss": 1.5371, + "nll_loss": 0.38420918583869934, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000578442239202559, + "rewards/margins": 0.14568966627120972, + "rewards/rejected": -0.1462681144475937, + "step": 5532 + }, + { + "epoch": 3.8264177040110647, + "grad_norm": 10.754548072814941, + "learning_rate": 3.429767942216075e-05, + "log_odds_chosen": 8.897015571594238, + "log_odds_ratio": -0.0016687134047970176, + "logits/chosen": -0.49642038345336914, + "logits/rejected": -0.5355390310287476, + "logps/chosen": -0.004793742671608925, + "logps/rejected": -1.7796322107315063, + "loss": 1.6755, + "nll_loss": 0.41870787739753723, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00047937428462319076, + "rewards/margins": 0.17748384177684784, + "rewards/rejected": -0.17796321213245392, + "step": 5533 + }, + { + "epoch": 3.8271092669432916, + "grad_norm": 11.34075927734375, + "learning_rate": 3.4293837405870604e-05, + "log_odds_chosen": 8.991256713867188, + "log_odds_ratio": -0.0016179453814402223, + "logits/chosen": -0.7598484754562378, + "logits/rejected": -0.7825421094894409, + "logps/chosen": -0.01104398537427187, + "logps/rejected": -2.0152955055236816, + "loss": 1.9777, + "nll_loss": 0.49426019191741943, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011043986305594444, + "rewards/margins": 0.2004251331090927, + "rewards/rejected": -0.20152954757213593, + "step": 5534 + }, + { + "epoch": 3.8278008298755184, + "grad_norm": 7.396518230438232, + "learning_rate": 3.4289995389580456e-05, + "log_odds_chosen": 8.842217445373535, + "log_odds_ratio": -0.08921416848897934, + "logits/chosen": -0.5817644596099854, + "logits/rejected": -0.6932407021522522, + "logps/chosen": -0.01519980188459158, + "logps/rejected": -1.32765793800354, + "loss": 1.3348, + "nll_loss": 0.32476648688316345, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0015199801418930292, + "rewards/margins": 0.13124582171440125, + "rewards/rejected": -0.13276579976081848, + "step": 5535 + }, + { + "epoch": 3.8284923928077457, + "grad_norm": 8.323075294494629, + "learning_rate": 3.42861533732903e-05, + "log_odds_chosen": 10.417610168457031, + "log_odds_ratio": -6.538983143400401e-05, + "logits/chosen": -0.38365769386291504, + "logits/rejected": -0.4411577880382538, + "logps/chosen": -0.00029318296583369374, + "logps/rejected": -1.8801240921020508, + "loss": 0.8762, + "nll_loss": 0.2190488874912262, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9318298402358778e-05, + "rewards/margins": 0.18798309564590454, + "rewards/rejected": -0.18801242113113403, + "step": 5536 + }, + { + "epoch": 3.8291839557399725, + "grad_norm": 11.362656593322754, + "learning_rate": 3.4282311357000154e-05, + "log_odds_chosen": 7.42609977722168, + "log_odds_ratio": -0.021518532186746597, + "logits/chosen": -0.4237701892852783, + "logits/rejected": -0.5051606893539429, + "logps/chosen": -0.006600437685847282, + "logps/rejected": -1.3271539211273193, + "loss": 2.0503, + "nll_loss": 0.5104222297668457, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006600437918677926, + "rewards/margins": 0.1320553719997406, + "rewards/rejected": -0.1327154040336609, + "step": 5537 + }, + { + "epoch": 3.8298755186721993, + "grad_norm": 7.826541423797607, + "learning_rate": 3.4278469340710007e-05, + "log_odds_chosen": 9.108176231384277, + "log_odds_ratio": -0.0018098466098308563, + "logits/chosen": -0.8225448131561279, + "logits/rejected": -0.869577944278717, + "logps/chosen": -0.014577067457139492, + "logps/rejected": -2.727315902709961, + "loss": 1.5117, + "nll_loss": 0.37774190306663513, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001457706792280078, + "rewards/margins": 0.2712738513946533, + "rewards/rejected": -0.2727315425872803, + "step": 5538 + }, + { + "epoch": 3.830567081604426, + "grad_norm": 14.817159652709961, + "learning_rate": 3.427462732441986e-05, + "log_odds_chosen": 10.341073989868164, + "log_odds_ratio": -0.0002269662218168378, + "logits/chosen": -0.894839882850647, + "logits/rejected": -0.9590165019035339, + "logps/chosen": -0.007867439649999142, + "logps/rejected": -2.476846694946289, + "loss": 1.4406, + "nll_loss": 0.3601387143135071, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007867439999245107, + "rewards/margins": 0.24689790606498718, + "rewards/rejected": -0.24768467247486115, + "step": 5539 + }, + { + "epoch": 3.831258644536653, + "grad_norm": 13.953787803649902, + "learning_rate": 3.427078530812971e-05, + "log_odds_chosen": 9.776470184326172, + "log_odds_ratio": -0.0002314805460628122, + "logits/chosen": -0.9279817342758179, + "logits/rejected": -0.975051760673523, + "logps/chosen": -0.00032981581171043217, + "logps/rejected": -1.6627427339553833, + "loss": 2.7871, + "nll_loss": 0.6967459917068481, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.298158117104322e-05, + "rewards/margins": 0.16624130308628082, + "rewards/rejected": -0.1662742793560028, + "step": 5540 + }, + { + "epoch": 3.83195020746888, + "grad_norm": 5.432536602020264, + "learning_rate": 3.426694329183956e-05, + "log_odds_chosen": 9.824362754821777, + "log_odds_ratio": -0.0005517660174518824, + "logits/chosen": -0.6685186624526978, + "logits/rejected": -0.7586067914962769, + "logps/chosen": -0.008896322920918465, + "logps/rejected": -2.1879920959472656, + "loss": 1.1259, + "nll_loss": 0.28141963481903076, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008896323852241039, + "rewards/margins": 0.2179095596075058, + "rewards/rejected": -0.21879920363426208, + "step": 5541 + }, + { + "epoch": 3.8326417704011067, + "grad_norm": 7.405891418457031, + "learning_rate": 3.426310127554941e-05, + "log_odds_chosen": 10.008780479431152, + "log_odds_ratio": -0.0002761481737252325, + "logits/chosen": -0.5124903917312622, + "logits/rejected": -0.6155083179473877, + "logps/chosen": -0.0004465877136681229, + "logps/rejected": -1.7490488290786743, + "loss": 1.3365, + "nll_loss": 0.3340889811515808, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.465877282200381e-05, + "rewards/margins": 0.17486022412776947, + "rewards/rejected": -0.17490488290786743, + "step": 5542 + }, + { + "epoch": 3.8333333333333335, + "grad_norm": 11.694721221923828, + "learning_rate": 3.425925925925926e-05, + "log_odds_chosen": 8.600055694580078, + "log_odds_ratio": -0.009092436172068119, + "logits/chosen": -0.422126829624176, + "logits/rejected": -0.4881291091442108, + "logps/chosen": -0.004327110014855862, + "logps/rejected": -1.978559970855713, + "loss": 2.0677, + "nll_loss": 0.5160139203071594, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00043271100730635226, + "rewards/margins": 0.1974232941865921, + "rewards/rejected": -0.19785597920417786, + "step": 5543 + }, + { + "epoch": 3.8340248962655603, + "grad_norm": 7.458912372589111, + "learning_rate": 3.4255417242969114e-05, + "log_odds_chosen": 8.836742401123047, + "log_odds_ratio": -0.0018061978043988347, + "logits/chosen": -0.2798689305782318, + "logits/rejected": -0.31368425488471985, + "logps/chosen": -0.001996720675379038, + "logps/rejected": -1.3222445249557495, + "loss": 1.629, + "nll_loss": 0.4070799946784973, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019967206753790379, + "rewards/margins": 0.13202476501464844, + "rewards/rejected": -0.132224440574646, + "step": 5544 + }, + { + "epoch": 3.834716459197787, + "grad_norm": 12.886466026306152, + "learning_rate": 3.425157522667896e-05, + "log_odds_chosen": 11.046795845031738, + "log_odds_ratio": -3.086154174525291e-05, + "logits/chosen": -0.08762979507446289, + "logits/rejected": -0.2459934800863266, + "logps/chosen": -0.00024006012245081365, + "logps/rejected": -2.759465217590332, + "loss": 1.4513, + "nll_loss": 0.36282655596733093, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4006010789889842e-05, + "rewards/margins": 0.2759225368499756, + "rewards/rejected": -0.2759465277194977, + "step": 5545 + }, + { + "epoch": 3.835408022130014, + "grad_norm": 9.756755828857422, + "learning_rate": 3.424773321038882e-05, + "log_odds_chosen": 9.83508586883545, + "log_odds_ratio": -0.000582660548388958, + "logits/chosen": -0.35356366634368896, + "logits/rejected": -0.360950767993927, + "logps/chosen": -0.004178952891379595, + "logps/rejected": -2.676914930343628, + "loss": 1.2778, + "nll_loss": 0.3193887770175934, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00041789532406255603, + "rewards/margins": 0.26727357506752014, + "rewards/rejected": -0.2676914930343628, + "step": 5546 + }, + { + "epoch": 3.836099585062241, + "grad_norm": 7.360530376434326, + "learning_rate": 3.4243891194098665e-05, + "log_odds_chosen": 8.683747291564941, + "log_odds_ratio": -0.0014638010179623961, + "logits/chosen": -0.3876939117908478, + "logits/rejected": -0.4048527777194977, + "logps/chosen": -0.001358279143460095, + "logps/rejected": -1.3281148672103882, + "loss": 0.9462, + "nll_loss": 0.2364034652709961, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013582792598754168, + "rewards/margins": 0.1326756477355957, + "rewards/rejected": -0.13281148672103882, + "step": 5547 + }, + { + "epoch": 3.8367911479944676, + "grad_norm": 15.831966400146484, + "learning_rate": 3.424004917780852e-05, + "log_odds_chosen": 9.272870063781738, + "log_odds_ratio": -0.003424287075176835, + "logits/chosen": -0.40673232078552246, + "logits/rejected": -0.5001996159553528, + "logps/chosen": -0.012605813331902027, + "logps/rejected": -2.0801045894622803, + "loss": 1.2381, + "nll_loss": 0.30918464064598083, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012605813099071383, + "rewards/margins": 0.20674987137317657, + "rewards/rejected": -0.2080104500055313, + "step": 5548 + }, + { + "epoch": 3.8374827109266945, + "grad_norm": 9.429594039916992, + "learning_rate": 3.423620716151837e-05, + "log_odds_chosen": 10.037227630615234, + "log_odds_ratio": -0.000798444205429405, + "logits/chosen": -0.7082902193069458, + "logits/rejected": -0.716722846031189, + "logps/chosen": -0.006209598854184151, + "logps/rejected": -1.9420156478881836, + "loss": 1.5434, + "nll_loss": 0.38577979803085327, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006209598504938185, + "rewards/margins": 0.19358061254024506, + "rewards/rejected": -0.19420158863067627, + "step": 5549 + }, + { + "epoch": 3.8381742738589213, + "grad_norm": 8.317140579223633, + "learning_rate": 3.4232365145228216e-05, + "log_odds_chosen": 8.800092697143555, + "log_odds_ratio": -0.001625056378543377, + "logits/chosen": -0.37635883688926697, + "logits/rejected": -0.4500593841075897, + "logps/chosen": -0.005539075471460819, + "logps/rejected": -1.682387113571167, + "loss": 1.5154, + "nll_loss": 0.37868595123291016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005539075937122107, + "rewards/margins": 0.16768479347229004, + "rewards/rejected": -0.16823871433734894, + "step": 5550 + }, + { + "epoch": 3.838865836791148, + "grad_norm": 6.375285625457764, + "learning_rate": 3.422852312893807e-05, + "log_odds_chosen": 8.28230094909668, + "log_odds_ratio": -0.11828190088272095, + "logits/chosen": -0.5913441181182861, + "logits/rejected": -0.6703944802284241, + "logps/chosen": -0.017180640250444412, + "logps/rejected": -1.775395393371582, + "loss": 1.4604, + "nll_loss": 0.3532821536064148, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0017180641880258918, + "rewards/margins": 0.1758214831352234, + "rewards/rejected": -0.17753954231739044, + "step": 5551 + }, + { + "epoch": 3.839557399723375, + "grad_norm": 12.262701988220215, + "learning_rate": 3.422468111264792e-05, + "log_odds_chosen": 10.857995986938477, + "log_odds_ratio": -9.511434473097324e-05, + "logits/chosen": -0.4358949661254883, + "logits/rejected": -0.4269968271255493, + "logps/chosen": -0.0002777725167106837, + "logps/rejected": -2.5557219982147217, + "loss": 2.4963, + "nll_loss": 0.6240707635879517, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7777254217653535e-05, + "rewards/margins": 0.25554442405700684, + "rewards/rejected": -0.25557222962379456, + "step": 5552 + }, + { + "epoch": 3.840248962655602, + "grad_norm": 10.897068977355957, + "learning_rate": 3.422083909635777e-05, + "log_odds_chosen": 10.079792022705078, + "log_odds_ratio": -8.238962618634105e-05, + "logits/chosen": -0.39618369936943054, + "logits/rejected": -0.4675847291946411, + "logps/chosen": -0.00033035362139344215, + "logps/rejected": -1.5016779899597168, + "loss": 1.3218, + "nll_loss": 0.33044198155403137, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.303536504972726e-05, + "rewards/margins": 0.15013474225997925, + "rewards/rejected": -0.1501677930355072, + "step": 5553 + }, + { + "epoch": 3.8409405255878286, + "grad_norm": 9.962440490722656, + "learning_rate": 3.421699708006762e-05, + "log_odds_chosen": 9.99749755859375, + "log_odds_ratio": -0.00010504462261451408, + "logits/chosen": -0.4203840494155884, + "logits/rejected": -0.45524218678474426, + "logps/chosen": -0.0003157463506795466, + "logps/rejected": -1.7843303680419922, + "loss": 2.0937, + "nll_loss": 0.5234262347221375, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.157463652314618e-05, + "rewards/margins": 0.17840145528316498, + "rewards/rejected": -0.17843303084373474, + "step": 5554 + }, + { + "epoch": 3.8416320885200554, + "grad_norm": 7.3351664543151855, + "learning_rate": 3.421315506377748e-05, + "log_odds_chosen": 8.06844425201416, + "log_odds_ratio": -0.07652094960212708, + "logits/chosen": -0.44341135025024414, + "logits/rejected": -0.4799209237098694, + "logps/chosen": -0.017725473269820213, + "logps/rejected": -1.821255087852478, + "loss": 1.7225, + "nll_loss": 0.4229816198348999, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00177254737354815, + "rewards/margins": 0.18035295605659485, + "rewards/rejected": -0.182125523686409, + "step": 5555 + }, + { + "epoch": 3.8423236514522823, + "grad_norm": 12.786683082580566, + "learning_rate": 3.4209313047487323e-05, + "log_odds_chosen": 10.323480606079102, + "log_odds_ratio": -0.00020878612122032791, + "logits/chosen": -0.7404874563217163, + "logits/rejected": -0.8786669373512268, + "logps/chosen": -0.0006357074016705155, + "logps/rejected": -2.5420761108398438, + "loss": 3.0706, + "nll_loss": 0.7676250338554382, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.357074016705155e-05, + "rewards/margins": 0.2541440427303314, + "rewards/rejected": -0.2542076110839844, + "step": 5556 + }, + { + "epoch": 3.843015214384509, + "grad_norm": 16.721275329589844, + "learning_rate": 3.4205471031197176e-05, + "log_odds_chosen": 7.947993278503418, + "log_odds_ratio": -0.2099430114030838, + "logits/chosen": -0.6919919848442078, + "logits/rejected": -0.7184282541275024, + "logps/chosen": -0.035298943519592285, + "logps/rejected": -1.3522957563400269, + "loss": 1.3653, + "nll_loss": 0.3203205168247223, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0035298941656947136, + "rewards/margins": 0.13169968128204346, + "rewards/rejected": -0.13522957265377045, + "step": 5557 + }, + { + "epoch": 3.843706777316736, + "grad_norm": 7.51786994934082, + "learning_rate": 3.420162901490703e-05, + "log_odds_chosen": 10.105172157287598, + "log_odds_ratio": -0.00017221916641574353, + "logits/chosen": -0.8171368837356567, + "logits/rejected": -0.8305201530456543, + "logps/chosen": -0.0010632172925397754, + "logps/rejected": -2.030632734298706, + "loss": 1.0309, + "nll_loss": 0.2577052712440491, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010632173507474363, + "rewards/margins": 0.2029569447040558, + "rewards/rejected": -0.2030632644891739, + "step": 5558 + }, + { + "epoch": 3.8443983402489628, + "grad_norm": 11.065739631652832, + "learning_rate": 3.4197786998616874e-05, + "log_odds_chosen": 10.4656982421875, + "log_odds_ratio": -0.00015175581211224198, + "logits/chosen": -0.4404027462005615, + "logits/rejected": -0.5089275240898132, + "logps/chosen": -0.00016348894860129803, + "logps/rejected": -1.9598641395568848, + "loss": 1.3562, + "nll_loss": 0.33904480934143066, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6348896679119207e-05, + "rewards/margins": 0.1959700733423233, + "rewards/rejected": -0.19598641991615295, + "step": 5559 + }, + { + "epoch": 3.8450899031811896, + "grad_norm": 6.907554626464844, + "learning_rate": 3.4193944982326726e-05, + "log_odds_chosen": 7.720027923583984, + "log_odds_ratio": -0.03721699118614197, + "logits/chosen": -0.6247544884681702, + "logits/rejected": -0.6959853172302246, + "logps/chosen": -0.016281504184007645, + "logps/rejected": -1.582554578781128, + "loss": 1.461, + "nll_loss": 0.3615221381187439, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016281504649668932, + "rewards/margins": 0.15662731230258942, + "rewards/rejected": -0.1582554578781128, + "step": 5560 + }, + { + "epoch": 3.8457814661134164, + "grad_norm": 7.4110188484191895, + "learning_rate": 3.419010296603658e-05, + "log_odds_chosen": 8.912864685058594, + "log_odds_ratio": -0.0009941515745595098, + "logits/chosen": -0.4958620071411133, + "logits/rejected": -0.5392465591430664, + "logps/chosen": -0.0017727524973452091, + "logps/rejected": -1.8045464754104614, + "loss": 1.4059, + "nll_loss": 0.3513681888580322, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017727524391375482, + "rewards/margins": 0.18027737736701965, + "rewards/rejected": -0.18045464158058167, + "step": 5561 + }, + { + "epoch": 3.8464730290456433, + "grad_norm": 8.460882186889648, + "learning_rate": 3.418626094974643e-05, + "log_odds_chosen": 10.592855453491211, + "log_odds_ratio": -3.915593333658762e-05, + "logits/chosen": -0.7202191352844238, + "logits/rejected": -0.7390909194946289, + "logps/chosen": -0.00015226914547383785, + "logps/rejected": -1.7440904378890991, + "loss": 1.3169, + "nll_loss": 0.32921481132507324, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5226914911181666e-05, + "rewards/margins": 0.17439383268356323, + "rewards/rejected": -0.17440906167030334, + "step": 5562 + }, + { + "epoch": 3.84716459197787, + "grad_norm": 12.310235977172852, + "learning_rate": 3.418241893345628e-05, + "log_odds_chosen": 9.920722007751465, + "log_odds_ratio": -0.0001332084502791986, + "logits/chosen": -0.7831141948699951, + "logits/rejected": -0.802308201789856, + "logps/chosen": -0.0005803712992928922, + "logps/rejected": -1.7757395505905151, + "loss": 0.9645, + "nll_loss": 0.24109968543052673, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.803713429486379e-05, + "rewards/margins": 0.1775159239768982, + "rewards/rejected": -0.17757394909858704, + "step": 5563 + }, + { + "epoch": 3.847856154910097, + "grad_norm": 7.889085292816162, + "learning_rate": 3.4178576917166136e-05, + "log_odds_chosen": 8.473007202148438, + "log_odds_ratio": -0.0009104659548029304, + "logits/chosen": -0.37859463691711426, + "logits/rejected": -0.4623199701309204, + "logps/chosen": -0.008291005156934261, + "logps/rejected": -2.0283498764038086, + "loss": 1.9784, + "nll_loss": 0.4944990277290344, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008291004924103618, + "rewards/margins": 0.20200589299201965, + "rewards/rejected": -0.20283500850200653, + "step": 5564 + }, + { + "epoch": 3.8485477178423237, + "grad_norm": 9.304460525512695, + "learning_rate": 3.417473490087598e-05, + "log_odds_chosen": 9.714195251464844, + "log_odds_ratio": -0.0001703681773506105, + "logits/chosen": -0.6381903290748596, + "logits/rejected": -0.7350766062736511, + "logps/chosen": -0.0015592292184010148, + "logps/rejected": -1.4405004978179932, + "loss": 1.2451, + "nll_loss": 0.31124961376190186, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015592292766086757, + "rewards/margins": 0.14389413595199585, + "rewards/rejected": -0.14405004680156708, + "step": 5565 + }, + { + "epoch": 3.8492392807745506, + "grad_norm": 14.469979286193848, + "learning_rate": 3.4170892884585834e-05, + "log_odds_chosen": 8.90275764465332, + "log_odds_ratio": -0.0023119053803384304, + "logits/chosen": -0.5259624719619751, + "logits/rejected": -0.5423348546028137, + "logps/chosen": -0.00645932462066412, + "logps/rejected": -1.7382960319519043, + "loss": 1.5171, + "nll_loss": 0.3790552020072937, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006459323922172189, + "rewards/margins": 0.17318369448184967, + "rewards/rejected": -0.17382961511611938, + "step": 5566 + }, + { + "epoch": 3.8499308437067774, + "grad_norm": 8.519709587097168, + "learning_rate": 3.416705086829569e-05, + "log_odds_chosen": 10.836234092712402, + "log_odds_ratio": -2.3567565222037956e-05, + "logits/chosen": -0.6077121496200562, + "logits/rejected": -0.635270357131958, + "logps/chosen": -0.00024252112780231982, + "logps/rejected": -2.135042667388916, + "loss": 1.0484, + "nll_loss": 0.2620912194252014, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4252112780231982e-05, + "rewards/margins": 0.21348001062870026, + "rewards/rejected": -0.21350425481796265, + "step": 5567 + }, + { + "epoch": 3.8506224066390042, + "grad_norm": 9.673677444458008, + "learning_rate": 3.416320885200553e-05, + "log_odds_chosen": 9.430411338806152, + "log_odds_ratio": -0.15776720643043518, + "logits/chosen": -0.637581467628479, + "logits/rejected": -0.6428850293159485, + "logps/chosen": -0.02269704081118107, + "logps/rejected": -1.8823751211166382, + "loss": 1.6441, + "nll_loss": 0.3952556252479553, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0022697041276842356, + "rewards/margins": 0.1859678030014038, + "rewards/rejected": -0.1882375180721283, + "step": 5568 + }, + { + "epoch": 3.851313969571231, + "grad_norm": 12.571064949035645, + "learning_rate": 3.4159366835715385e-05, + "log_odds_chosen": 10.705487251281738, + "log_odds_ratio": -6.690446753054857e-05, + "logits/chosen": -0.6356642246246338, + "logits/rejected": -0.6576147675514221, + "logps/chosen": -0.00015859422273933887, + "logps/rejected": -2.0107100009918213, + "loss": 1.3405, + "nll_loss": 0.33511292934417725, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.585942300152965e-05, + "rewards/margins": 0.20105516910552979, + "rewards/rejected": -0.20107102394104004, + "step": 5569 + }, + { + "epoch": 3.852005532503458, + "grad_norm": 11.851554870605469, + "learning_rate": 3.415552481942524e-05, + "log_odds_chosen": 8.76864242553711, + "log_odds_ratio": -0.0007422867347486317, + "logits/chosen": -0.26603615283966064, + "logits/rejected": -0.30356648564338684, + "logps/chosen": -0.004614387173205614, + "logps/rejected": -1.554021954536438, + "loss": 1.7769, + "nll_loss": 0.444142609834671, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00046143875806592405, + "rewards/margins": 0.15494075417518616, + "rewards/rejected": -0.15540219843387604, + "step": 5570 + }, + { + "epoch": 3.8526970954356847, + "grad_norm": 13.143882751464844, + "learning_rate": 3.415168280313509e-05, + "log_odds_chosen": 9.488032341003418, + "log_odds_ratio": -0.024420535191893578, + "logits/chosen": -0.8110833764076233, + "logits/rejected": -0.8133986592292786, + "logps/chosen": -0.01876739226281643, + "logps/rejected": -1.945871114730835, + "loss": 1.5925, + "nll_loss": 0.3956940472126007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018767392029985785, + "rewards/margins": 0.19271036982536316, + "rewards/rejected": -0.1945871114730835, + "step": 5571 + }, + { + "epoch": 3.8533886583679116, + "grad_norm": 8.051220893859863, + "learning_rate": 3.4147840786844935e-05, + "log_odds_chosen": 8.133522987365723, + "log_odds_ratio": -0.02157263271510601, + "logits/chosen": -0.2942221760749817, + "logits/rejected": -0.3327701985836029, + "logps/chosen": -0.007230726070702076, + "logps/rejected": -1.4394934177398682, + "loss": 1.8201, + "nll_loss": 0.4528588056564331, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007230726187117398, + "rewards/margins": 0.1432262659072876, + "rewards/rejected": -0.14394932985305786, + "step": 5572 + }, + { + "epoch": 3.8540802213001384, + "grad_norm": 12.141948699951172, + "learning_rate": 3.4143998770554795e-05, + "log_odds_chosen": 8.340699195861816, + "log_odds_ratio": -0.05566050112247467, + "logits/chosen": -0.5719602704048157, + "logits/rejected": -0.7078933119773865, + "logps/chosen": -0.017384812235832214, + "logps/rejected": -2.164154529571533, + "loss": 1.6659, + "nll_loss": 0.41090866923332214, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017384812235832214, + "rewards/margins": 0.21467696130275726, + "rewards/rejected": -0.2164154350757599, + "step": 5573 + }, + { + "epoch": 3.854771784232365, + "grad_norm": 7.900241851806641, + "learning_rate": 3.414015675426464e-05, + "log_odds_chosen": 7.441177845001221, + "log_odds_ratio": -0.1711406111717224, + "logits/chosen": -0.2587983012199402, + "logits/rejected": -0.3203867971897125, + "logps/chosen": -0.04765608161687851, + "logps/rejected": -1.2207387685775757, + "loss": 1.4017, + "nll_loss": 0.3333078920841217, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004765608347952366, + "rewards/margins": 0.11730826646089554, + "rewards/rejected": -0.12207387387752533, + "step": 5574 + }, + { + "epoch": 3.855463347164592, + "grad_norm": 6.0263824462890625, + "learning_rate": 3.413631473797449e-05, + "log_odds_chosen": 8.33633041381836, + "log_odds_ratio": -0.0022357190027832985, + "logits/chosen": -0.20025485754013062, + "logits/rejected": -0.2713879346847534, + "logps/chosen": -0.024495285004377365, + "logps/rejected": -2.157153367996216, + "loss": 1.7926, + "nll_loss": 0.4479362964630127, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024495285470038652, + "rewards/margins": 0.2132658064365387, + "rewards/rejected": -0.21571531891822815, + "step": 5575 + }, + { + "epoch": 3.856154910096819, + "grad_norm": 11.386039733886719, + "learning_rate": 3.4132472721684345e-05, + "log_odds_chosen": 8.842057228088379, + "log_odds_ratio": -0.1126728430390358, + "logits/chosen": -0.7715977430343628, + "logits/rejected": -0.8066084980964661, + "logps/chosen": -0.018056262284517288, + "logps/rejected": -2.1228909492492676, + "loss": 1.3804, + "nll_loss": 0.3338434398174286, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0018056264379993081, + "rewards/margins": 0.21048344671726227, + "rewards/rejected": -0.21228908002376556, + "step": 5576 + }, + { + "epoch": 3.8568464730290457, + "grad_norm": 21.511932373046875, + "learning_rate": 3.412863070539419e-05, + "log_odds_chosen": 10.852688789367676, + "log_odds_ratio": -0.00026287042419426143, + "logits/chosen": -0.5599167943000793, + "logits/rejected": -0.6122843027114868, + "logps/chosen": -0.003043019911274314, + "logps/rejected": -3.0699446201324463, + "loss": 1.537, + "nll_loss": 0.3842347264289856, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003043019969481975, + "rewards/margins": 0.30669015645980835, + "rewards/rejected": -0.3069944679737091, + "step": 5577 + }, + { + "epoch": 3.8575380359612725, + "grad_norm": 7.178199768066406, + "learning_rate": 3.412478868910404e-05, + "log_odds_chosen": 7.949138641357422, + "log_odds_ratio": -0.014978266321122646, + "logits/chosen": -0.7569225430488586, + "logits/rejected": -0.7971171140670776, + "logps/chosen": -0.009100032970309258, + "logps/rejected": -1.8713810443878174, + "loss": 1.3506, + "nll_loss": 0.3361407518386841, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009100032621063292, + "rewards/margins": 0.18622809648513794, + "rewards/rejected": -0.18713811039924622, + "step": 5578 + }, + { + "epoch": 3.8582295988934994, + "grad_norm": 5.463674545288086, + "learning_rate": 3.4120946672813896e-05, + "log_odds_chosen": 10.04675006866455, + "log_odds_ratio": -0.00010655250662239268, + "logits/chosen": -0.5956853628158569, + "logits/rejected": -0.5343964099884033, + "logps/chosen": -0.004939241334795952, + "logps/rejected": -2.332029342651367, + "loss": 1.4955, + "nll_loss": 0.37386226654052734, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004939241334795952, + "rewards/margins": 0.23270902037620544, + "rewards/rejected": -0.23320293426513672, + "step": 5579 + }, + { + "epoch": 3.858921161825726, + "grad_norm": 11.263189315795898, + "learning_rate": 3.411710465652375e-05, + "log_odds_chosen": 9.413596153259277, + "log_odds_ratio": -0.0013633102644234896, + "logits/chosen": -0.2171023190021515, + "logits/rejected": -0.3067041039466858, + "logps/chosen": -0.006868002470582724, + "logps/rejected": -2.0416359901428223, + "loss": 2.1914, + "nll_loss": 0.5477181077003479, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000686800223775208, + "rewards/margins": 0.20347681641578674, + "rewards/rejected": -0.20416361093521118, + "step": 5580 + }, + { + "epoch": 3.859612724757953, + "grad_norm": 7.4217848777771, + "learning_rate": 3.4113262640233594e-05, + "log_odds_chosen": 10.248290061950684, + "log_odds_ratio": -0.00022609463485423476, + "logits/chosen": -0.7831434607505798, + "logits/rejected": -0.7565454244613647, + "logps/chosen": -0.0007353498367592692, + "logps/rejected": -2.2359209060668945, + "loss": 1.3132, + "nll_loss": 0.3282652497291565, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.35349822207354e-05, + "rewards/margins": 0.22351858019828796, + "rewards/rejected": -0.2235921174287796, + "step": 5581 + }, + { + "epoch": 3.86030428769018, + "grad_norm": 8.249073028564453, + "learning_rate": 3.4109420623943446e-05, + "log_odds_chosen": 9.593080520629883, + "log_odds_ratio": -0.001667341566644609, + "logits/chosen": -0.7265720367431641, + "logits/rejected": -0.7452245354652405, + "logps/chosen": -0.030651377514004707, + "logps/rejected": -1.9601030349731445, + "loss": 1.2229, + "nll_loss": 0.30555465817451477, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003065137891098857, + "rewards/margins": 0.19294516742229462, + "rewards/rejected": -0.19601032137870789, + "step": 5582 + }, + { + "epoch": 3.8609958506224067, + "grad_norm": 4.888029098510742, + "learning_rate": 3.41055786076533e-05, + "log_odds_chosen": 8.515039443969727, + "log_odds_ratio": -0.02231896109879017, + "logits/chosen": -0.2870052456855774, + "logits/rejected": -0.369390606880188, + "logps/chosen": -0.007543592248111963, + "logps/rejected": -1.8465120792388916, + "loss": 0.8638, + "nll_loss": 0.21371084451675415, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007543592946603894, + "rewards/margins": 0.18389683961868286, + "rewards/rejected": -0.1846512258052826, + "step": 5583 + }, + { + "epoch": 3.8616874135546335, + "grad_norm": 10.852818489074707, + "learning_rate": 3.410173659136315e-05, + "log_odds_chosen": 8.885574340820312, + "log_odds_ratio": -0.0029431653674691916, + "logits/chosen": -0.230947345495224, + "logits/rejected": -0.3188367187976837, + "logps/chosen": -0.0016783340834081173, + "logps/rejected": -1.059872031211853, + "loss": 1.6579, + "nll_loss": 0.4141872227191925, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016783342289272696, + "rewards/margins": 0.10581937432289124, + "rewards/rejected": -0.10598720610141754, + "step": 5584 + }, + { + "epoch": 3.8623789764868603, + "grad_norm": 8.725906372070312, + "learning_rate": 3.4097894575073e-05, + "log_odds_chosen": 8.555717468261719, + "log_odds_ratio": -0.001971770077943802, + "logits/chosen": -0.4405558109283447, + "logits/rejected": -0.3410573899745941, + "logps/chosen": -0.0018309359438717365, + "logps/rejected": -1.2396399974822998, + "loss": 1.725, + "nll_loss": 0.4310452938079834, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018309360893908888, + "rewards/margins": 0.12378091365098953, + "rewards/rejected": -0.12396401166915894, + "step": 5585 + }, + { + "epoch": 3.863070539419087, + "grad_norm": 9.858050346374512, + "learning_rate": 3.409405255878285e-05, + "log_odds_chosen": 10.435060501098633, + "log_odds_ratio": -0.0001268967316718772, + "logits/chosen": -0.6413505673408508, + "logits/rejected": -0.7253145575523376, + "logps/chosen": -0.00029369723051786423, + "logps/rejected": -1.9069111347198486, + "loss": 1.2896, + "nll_loss": 0.3223815858364105, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9369724870775826e-05, + "rewards/margins": 0.1906617283821106, + "rewards/rejected": -0.19069111347198486, + "step": 5586 + }, + { + "epoch": 3.863762102351314, + "grad_norm": 8.920366287231445, + "learning_rate": 3.40902105424927e-05, + "log_odds_chosen": 9.892365455627441, + "log_odds_ratio": -0.0001298495044466108, + "logits/chosen": -0.20717889070510864, + "logits/rejected": -0.2652691602706909, + "logps/chosen": -0.00026627822080627084, + "logps/rejected": -1.3087078332901, + "loss": 1.7546, + "nll_loss": 0.43863433599472046, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6627822080627084e-05, + "rewards/margins": 0.1308441460132599, + "rewards/rejected": -0.1308707892894745, + "step": 5587 + }, + { + "epoch": 3.864453665283541, + "grad_norm": 11.133824348449707, + "learning_rate": 3.408636852620255e-05, + "log_odds_chosen": 9.764641761779785, + "log_odds_ratio": -0.0001106760319089517, + "logits/chosen": -0.45230603218078613, + "logits/rejected": -0.5868815183639526, + "logps/chosen": -0.0006661764928139746, + "logps/rejected": -2.2513959407806396, + "loss": 1.8702, + "nll_loss": 0.46753376722335815, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.661764928139746e-05, + "rewards/margins": 0.22507299482822418, + "rewards/rejected": -0.22513961791992188, + "step": 5588 + }, + { + "epoch": 3.8651452282157677, + "grad_norm": 10.524619102478027, + "learning_rate": 3.408252650991241e-05, + "log_odds_chosen": 9.430778503417969, + "log_odds_ratio": -0.0005737305618822575, + "logits/chosen": -0.42517948150634766, + "logits/rejected": -0.5130538940429688, + "logps/chosen": -0.0003639504429884255, + "logps/rejected": -1.6744965314865112, + "loss": 1.5258, + "nll_loss": 0.38138049840927124, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6395042116055265e-05, + "rewards/margins": 0.16741326451301575, + "rewards/rejected": -0.16744965314865112, + "step": 5589 + }, + { + "epoch": 3.8658367911479945, + "grad_norm": 14.197625160217285, + "learning_rate": 3.407868449362225e-05, + "log_odds_chosen": 9.485942840576172, + "log_odds_ratio": -0.004377941135317087, + "logits/chosen": -0.32564330101013184, + "logits/rejected": -0.4865632951259613, + "logps/chosen": -0.0027153741102665663, + "logps/rejected": -2.0737974643707275, + "loss": 1.7531, + "nll_loss": 0.437848836183548, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002715374284889549, + "rewards/margins": 0.20710822939872742, + "rewards/rejected": -0.2073797583580017, + "step": 5590 + }, + { + "epoch": 3.8665283540802213, + "grad_norm": 11.992483139038086, + "learning_rate": 3.4074842477332105e-05, + "log_odds_chosen": 10.378591537475586, + "log_odds_ratio": -8.182358578778803e-05, + "logits/chosen": -0.544563889503479, + "logits/rejected": -0.5972481966018677, + "logps/chosen": -0.00017176619439851493, + "logps/rejected": -1.5616424083709717, + "loss": 1.4838, + "nll_loss": 0.3709515631198883, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7176620531245135e-05, + "rewards/margins": 0.1561470627784729, + "rewards/rejected": -0.1561642289161682, + "step": 5591 + }, + { + "epoch": 3.867219917012448, + "grad_norm": 7.904953479766846, + "learning_rate": 3.407100046104196e-05, + "log_odds_chosen": 10.234663009643555, + "log_odds_ratio": -0.0001564006961416453, + "logits/chosen": -0.7005908489227295, + "logits/rejected": -0.7648482322692871, + "logps/chosen": -0.0009522702312096953, + "logps/rejected": -2.065946102142334, + "loss": 0.8907, + "nll_loss": 0.22265967726707458, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.522702021058649e-05, + "rewards/margins": 0.2064993679523468, + "rewards/rejected": -0.20659461617469788, + "step": 5592 + }, + { + "epoch": 3.867911479944675, + "grad_norm": 8.900137901306152, + "learning_rate": 3.406715844475181e-05, + "log_odds_chosen": 8.831513404846191, + "log_odds_ratio": -0.010963935405015945, + "logits/chosen": -0.5970246195793152, + "logits/rejected": -0.7142374515533447, + "logps/chosen": -0.020394207909703255, + "logps/rejected": -1.9975173473358154, + "loss": 1.9492, + "nll_loss": 0.48620539903640747, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002039420884102583, + "rewards/margins": 0.19771233201026917, + "rewards/rejected": -0.19975173473358154, + "step": 5593 + }, + { + "epoch": 3.868603042876902, + "grad_norm": 10.622045516967773, + "learning_rate": 3.4063316428461655e-05, + "log_odds_chosen": 9.73248291015625, + "log_odds_ratio": -0.0004534787149168551, + "logits/chosen": -0.5400159358978271, + "logits/rejected": -0.6580114364624023, + "logps/chosen": -0.0007937573827803135, + "logps/rejected": -2.3123836517333984, + "loss": 2.226, + "nll_loss": 0.556454062461853, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.937574264360592e-05, + "rewards/margins": 0.23115897178649902, + "rewards/rejected": -0.23123835027217865, + "step": 5594 + }, + { + "epoch": 3.8692946058091287, + "grad_norm": 9.243762016296387, + "learning_rate": 3.405947441217151e-05, + "log_odds_chosen": 8.463890075683594, + "log_odds_ratio": -0.002155021531507373, + "logits/chosen": -0.5418992638587952, + "logits/rejected": -0.5896996259689331, + "logps/chosen": -0.011017933487892151, + "logps/rejected": -2.1293439865112305, + "loss": 1.3508, + "nll_loss": 0.3374752402305603, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011017934884876013, + "rewards/margins": 0.21183261275291443, + "rewards/rejected": -0.21293440461158752, + "step": 5595 + }, + { + "epoch": 3.8699861687413555, + "grad_norm": 21.918975830078125, + "learning_rate": 3.405563239588136e-05, + "log_odds_chosen": 7.5077643394470215, + "log_odds_ratio": -0.1952972561120987, + "logits/chosen": -0.5486396551132202, + "logits/rejected": -0.5754981637001038, + "logps/chosen": -0.025723986327648163, + "logps/rejected": -1.0680969953536987, + "loss": 2.4363, + "nll_loss": 0.5895346999168396, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0025723983999341726, + "rewards/margins": 0.10423729568719864, + "rewards/rejected": -0.10680970549583435, + "step": 5596 + }, + { + "epoch": 3.8706777316735823, + "grad_norm": 10.214221000671387, + "learning_rate": 3.4051790379591206e-05, + "log_odds_chosen": 8.93708324432373, + "log_odds_ratio": -0.011676867492496967, + "logits/chosen": -0.7973469495773315, + "logits/rejected": -0.8488380312919617, + "logps/chosen": -0.037191543728113174, + "logps/rejected": -1.7417047023773193, + "loss": 1.0208, + "nll_loss": 0.25402408838272095, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0037191545125097036, + "rewards/margins": 0.1704513281583786, + "rewards/rejected": -0.17417047917842865, + "step": 5597 + }, + { + "epoch": 3.871369294605809, + "grad_norm": 8.746747016906738, + "learning_rate": 3.4047948363301065e-05, + "log_odds_chosen": 10.5672607421875, + "log_odds_ratio": -7.664141594432294e-05, + "logits/chosen": -0.3067360818386078, + "logits/rejected": -0.42486873269081116, + "logps/chosen": -0.0032729327213019133, + "logps/rejected": -2.47634220123291, + "loss": 1.5306, + "nll_loss": 0.3826458156108856, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00032729326630942523, + "rewards/margins": 0.2473069131374359, + "rewards/rejected": -0.24763420224189758, + "step": 5598 + }, + { + "epoch": 3.872060857538036, + "grad_norm": 13.36917781829834, + "learning_rate": 3.404410634701091e-05, + "log_odds_chosen": 9.875089645385742, + "log_odds_ratio": -0.02879687026143074, + "logits/chosen": -0.45609021186828613, + "logits/rejected": -0.5271731615066528, + "logps/chosen": -0.06482497602701187, + "logps/rejected": -1.9527268409729004, + "loss": 1.3291, + "nll_loss": 0.329405277967453, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0064824968576431274, + "rewards/margins": 0.1887901872396469, + "rewards/rejected": -0.19527268409729004, + "step": 5599 + }, + { + "epoch": 3.872752420470263, + "grad_norm": 8.476516723632812, + "learning_rate": 3.404026433072076e-05, + "log_odds_chosen": 9.027252197265625, + "log_odds_ratio": -0.0002830424637068063, + "logits/chosen": -0.8720872402191162, + "logits/rejected": -0.9834346771240234, + "logps/chosen": -0.010724175721406937, + "logps/rejected": -1.8943570852279663, + "loss": 1.5209, + "nll_loss": 0.3802030384540558, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001072417595423758, + "rewards/margins": 0.18836328387260437, + "rewards/rejected": -0.1894357055425644, + "step": 5600 + }, + { + "epoch": 3.8734439834024896, + "grad_norm": 8.783854484558105, + "learning_rate": 3.4036422314430616e-05, + "log_odds_chosen": 9.651050567626953, + "log_odds_ratio": -0.0009334798669442534, + "logits/chosen": -0.331780344247818, + "logits/rejected": -0.4134882092475891, + "logps/chosen": -0.0129983089864254, + "logps/rejected": -2.7373316287994385, + "loss": 1.3279, + "nll_loss": 0.3318936228752136, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001299830968491733, + "rewards/margins": 0.272433340549469, + "rewards/rejected": -0.2737331986427307, + "step": 5601 + }, + { + "epoch": 3.8741355463347165, + "grad_norm": 4.210104942321777, + "learning_rate": 3.403258029814047e-05, + "log_odds_chosen": 9.92902660369873, + "log_odds_ratio": -0.000435270689195022, + "logits/chosen": -0.4320386052131653, + "logits/rejected": -0.5295803546905518, + "logps/chosen": -0.0008208720246329904, + "logps/rejected": -1.9246351718902588, + "loss": 0.8369, + "nll_loss": 0.20917457342147827, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.208720828406513e-05, + "rewards/margins": 0.19238142669200897, + "rewards/rejected": -0.19246351718902588, + "step": 5602 + }, + { + "epoch": 3.8748271092669433, + "grad_norm": 6.597437381744385, + "learning_rate": 3.4028738281850314e-05, + "log_odds_chosen": 8.948493957519531, + "log_odds_ratio": -0.0007399824680760503, + "logits/chosen": -0.055549319833517075, + "logits/rejected": -0.141621395945549, + "logps/chosen": -0.0026224683970212936, + "logps/rejected": -1.7882872819900513, + "loss": 0.897, + "nll_loss": 0.2241741567850113, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002622468746267259, + "rewards/margins": 0.17856647074222565, + "rewards/rejected": -0.17882871627807617, + "step": 5603 + }, + { + "epoch": 3.87551867219917, + "grad_norm": 11.046610832214355, + "learning_rate": 3.4024896265560166e-05, + "log_odds_chosen": 9.968111038208008, + "log_odds_ratio": -0.00023064689594320953, + "logits/chosen": -0.9279760122299194, + "logits/rejected": -0.9948770403862, + "logps/chosen": -0.00037911301478743553, + "logps/rejected": -1.96274995803833, + "loss": 1.0898, + "nll_loss": 0.2724343538284302, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.79113043891266e-05, + "rewards/margins": 0.19623705744743347, + "rewards/rejected": -0.196274995803833, + "step": 5604 + }, + { + "epoch": 3.876210235131397, + "grad_norm": 6.500091075897217, + "learning_rate": 3.402105424927002e-05, + "log_odds_chosen": 8.721122741699219, + "log_odds_ratio": -0.0005944301374256611, + "logits/chosen": -0.755002498626709, + "logits/rejected": -0.8262910842895508, + "logps/chosen": -0.001076082931831479, + "logps/rejected": -1.5059388875961304, + "loss": 1.3978, + "nll_loss": 0.34940218925476074, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000107608299003914, + "rewards/margins": 0.1504862755537033, + "rewards/rejected": -0.15059387683868408, + "step": 5605 + }, + { + "epoch": 3.876901798063624, + "grad_norm": 5.433056831359863, + "learning_rate": 3.4017212232979864e-05, + "log_odds_chosen": 8.765649795532227, + "log_odds_ratio": -0.0014494097558781505, + "logits/chosen": -0.501061737537384, + "logits/rejected": -0.5340345501899719, + "logps/chosen": -0.001163999317213893, + "logps/rejected": -1.2552547454833984, + "loss": 1.1483, + "nll_loss": 0.28693222999572754, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001163999259006232, + "rewards/margins": 0.1254090666770935, + "rewards/rejected": -0.12552547454833984, + "step": 5606 + }, + { + "epoch": 3.8775933609958506, + "grad_norm": 8.457118034362793, + "learning_rate": 3.4013370216689724e-05, + "log_odds_chosen": 10.066720008850098, + "log_odds_ratio": -0.0003545557556208223, + "logits/chosen": -0.19247552752494812, + "logits/rejected": -0.2578640580177307, + "logps/chosen": -0.0016848837258294225, + "logps/rejected": -2.502847194671631, + "loss": 1.6179, + "nll_loss": 0.40442922711372375, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016848836094141006, + "rewards/margins": 0.250116229057312, + "rewards/rejected": -0.25028473138809204, + "step": 5607 + }, + { + "epoch": 3.8782849239280774, + "grad_norm": 8.580554962158203, + "learning_rate": 3.400952820039957e-05, + "log_odds_chosen": 11.60051441192627, + "log_odds_ratio": -1.2054061699018348e-05, + "logits/chosen": -0.5695351362228394, + "logits/rejected": -0.6658599972724915, + "logps/chosen": -0.00024254321760963649, + "logps/rejected": -2.7273976802825928, + "loss": 0.9558, + "nll_loss": 0.23895087838172913, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4254321033367887e-05, + "rewards/margins": 0.2727155387401581, + "rewards/rejected": -0.27273979783058167, + "step": 5608 + }, + { + "epoch": 3.8789764868603043, + "grad_norm": 17.818029403686523, + "learning_rate": 3.400568618410942e-05, + "log_odds_chosen": 10.083698272705078, + "log_odds_ratio": -0.00014503306010738015, + "logits/chosen": -0.4443364143371582, + "logits/rejected": -0.5567833185195923, + "logps/chosen": -0.000489625264890492, + "logps/rejected": -2.1325652599334717, + "loss": 1.8269, + "nll_loss": 0.45671117305755615, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.896252721664496e-05, + "rewards/margins": 0.21320757269859314, + "rewards/rejected": -0.21325653791427612, + "step": 5609 + }, + { + "epoch": 3.879668049792531, + "grad_norm": 15.383901596069336, + "learning_rate": 3.4001844167819274e-05, + "log_odds_chosen": 9.334062576293945, + "log_odds_ratio": -0.0005784723325632513, + "logits/chosen": -0.7563439607620239, + "logits/rejected": -0.778277575969696, + "logps/chosen": -0.0020408525597304106, + "logps/rejected": -2.2630202770233154, + "loss": 1.3577, + "nll_loss": 0.3393765985965729, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00020408528507687151, + "rewards/margins": 0.226097971200943, + "rewards/rejected": -0.22630205750465393, + "step": 5610 + }, + { + "epoch": 3.880359612724758, + "grad_norm": 13.376522064208984, + "learning_rate": 3.3998002151529127e-05, + "log_odds_chosen": 9.306199073791504, + "log_odds_ratio": -0.0012351719196885824, + "logits/chosen": -0.57845139503479, + "logits/rejected": -0.6579585075378418, + "logps/chosen": -0.002754670102149248, + "logps/rejected": -1.6692554950714111, + "loss": 1.4071, + "nll_loss": 0.3516432046890259, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00027546699857339263, + "rewards/margins": 0.16665008664131165, + "rewards/rejected": -0.1669255495071411, + "step": 5611 + }, + { + "epoch": 3.8810511756569848, + "grad_norm": 10.505668640136719, + "learning_rate": 3.399416013523897e-05, + "log_odds_chosen": 9.230743408203125, + "log_odds_ratio": -0.0024349126033484936, + "logits/chosen": -0.3715485632419586, + "logits/rejected": -0.4621528387069702, + "logps/chosen": -0.014623850584030151, + "logps/rejected": -1.5341432094573975, + "loss": 1.193, + "nll_loss": 0.29800331592559814, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014623850584030151, + "rewards/margins": 0.15195192396640778, + "rewards/rejected": -0.1534143090248108, + "step": 5612 + }, + { + "epoch": 3.8817427385892116, + "grad_norm": 6.59618616104126, + "learning_rate": 3.3990318118948825e-05, + "log_odds_chosen": 10.229150772094727, + "log_odds_ratio": -4.0353632357437164e-05, + "logits/chosen": -0.8435558080673218, + "logits/rejected": -0.8600744605064392, + "logps/chosen": -9.384715667692944e-05, + "logps/rejected": -1.167233943939209, + "loss": 0.885, + "nll_loss": 0.2212393581867218, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.384715667692944e-06, + "rewards/margins": 0.11671401560306549, + "rewards/rejected": -0.11672340333461761, + "step": 5613 + }, + { + "epoch": 3.8824343015214384, + "grad_norm": 8.425680160522461, + "learning_rate": 3.398647610265868e-05, + "log_odds_chosen": 9.704595565795898, + "log_odds_ratio": -0.0006063667242415249, + "logits/chosen": -0.3385249972343445, + "logits/rejected": -0.4208109974861145, + "logps/chosen": -0.0007622221601195633, + "logps/rejected": -1.711651086807251, + "loss": 1.4288, + "nll_loss": 0.35713446140289307, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.622221164638177e-05, + "rewards/margins": 0.17108888924121857, + "rewards/rejected": -0.1711651086807251, + "step": 5614 + }, + { + "epoch": 3.8831258644536653, + "grad_norm": 7.727184295654297, + "learning_rate": 3.398263408636852e-05, + "log_odds_chosen": 8.956552505493164, + "log_odds_ratio": -0.020546168088912964, + "logits/chosen": -0.7198243737220764, + "logits/rejected": -0.7512015700340271, + "logps/chosen": -0.008983590640127659, + "logps/rejected": -2.044522762298584, + "loss": 2.3386, + "nll_loss": 0.5826031565666199, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008983589941635728, + "rewards/margins": 0.2035539150238037, + "rewards/rejected": -0.2044522762298584, + "step": 5615 + }, + { + "epoch": 3.883817427385892, + "grad_norm": 7.387112140655518, + "learning_rate": 3.397879207007838e-05, + "log_odds_chosen": 10.073363304138184, + "log_odds_ratio": -5.7628094509709626e-05, + "logits/chosen": -0.7603466510772705, + "logits/rejected": -0.8240086436271667, + "logps/chosen": -0.00038087720167823136, + "logps/rejected": -1.7687740325927734, + "loss": 1.1982, + "nll_loss": 0.2995460629463196, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.80877208954189e-05, + "rewards/margins": 0.17683932185173035, + "rewards/rejected": -0.17687739431858063, + "step": 5616 + }, + { + "epoch": 3.884508990318119, + "grad_norm": 7.941006183624268, + "learning_rate": 3.397495005378823e-05, + "log_odds_chosen": 11.146267890930176, + "log_odds_ratio": -2.293909346917644e-05, + "logits/chosen": -0.4033728837966919, + "logits/rejected": -0.5747873783111572, + "logps/chosen": -0.00014363299123942852, + "logps/rejected": -2.323676109313965, + "loss": 1.2775, + "nll_loss": 0.3193705677986145, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4363298760144971e-05, + "rewards/margins": 0.23235327005386353, + "rewards/rejected": -0.2323676347732544, + "step": 5617 + }, + { + "epoch": 3.8852005532503457, + "grad_norm": 16.74596405029297, + "learning_rate": 3.397110803749808e-05, + "log_odds_chosen": 6.846477508544922, + "log_odds_ratio": -0.2526886761188507, + "logits/chosen": -0.7779616117477417, + "logits/rejected": -0.7846757769584656, + "logps/chosen": -0.03462180495262146, + "logps/rejected": -1.091729760169983, + "loss": 1.6382, + "nll_loss": 0.3842869997024536, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0034621807280927896, + "rewards/margins": 0.10571078956127167, + "rewards/rejected": -0.10917297005653381, + "step": 5618 + }, + { + "epoch": 3.8858921161825726, + "grad_norm": 12.336228370666504, + "learning_rate": 3.396726602120793e-05, + "log_odds_chosen": 10.408349990844727, + "log_odds_ratio": -5.093478102935478e-05, + "logits/chosen": -0.46157726645469666, + "logits/rejected": -0.5192270278930664, + "logps/chosen": -0.0016178932273760438, + "logps/rejected": -1.9334490299224854, + "loss": 1.9929, + "nll_loss": 0.49821946024894714, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001617893431102857, + "rewards/margins": 0.19318309426307678, + "rewards/rejected": -0.19334489107131958, + "step": 5619 + }, + { + "epoch": 3.8865836791147994, + "grad_norm": 10.457387924194336, + "learning_rate": 3.3963424004917785e-05, + "log_odds_chosen": 8.117521286010742, + "log_odds_ratio": -0.010431942529976368, + "logits/chosen": -0.5782761573791504, + "logits/rejected": -0.6354106664657593, + "logps/chosen": -0.13770128786563873, + "logps/rejected": -1.486854910850525, + "loss": 1.3899, + "nll_loss": 0.3464207351207733, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013770130462944508, + "rewards/margins": 0.13491535186767578, + "rewards/rejected": -0.148685485124588, + "step": 5620 + }, + { + "epoch": 3.8872752420470262, + "grad_norm": 6.53814172744751, + "learning_rate": 3.395958198862763e-05, + "log_odds_chosen": 10.368768692016602, + "log_odds_ratio": -6.338141247397289e-05, + "logits/chosen": -0.7327615022659302, + "logits/rejected": -0.753582775592804, + "logps/chosen": -0.00012228148989379406, + "logps/rejected": -1.6724853515625, + "loss": 1.0725, + "nll_loss": 0.26810744404792786, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2228148079884704e-05, + "rewards/margins": 0.1672362983226776, + "rewards/rejected": -0.16724853217601776, + "step": 5621 + }, + { + "epoch": 3.887966804979253, + "grad_norm": 15.1021728515625, + "learning_rate": 3.395573997233748e-05, + "log_odds_chosen": 7.218578338623047, + "log_odds_ratio": -0.2036381959915161, + "logits/chosen": -0.5184769034385681, + "logits/rejected": -0.6829452514648438, + "logps/chosen": -0.0356486439704895, + "logps/rejected": -1.2603774070739746, + "loss": 1.8508, + "nll_loss": 0.44233378767967224, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003564863931387663, + "rewards/margins": 0.12247288227081299, + "rewards/rejected": -0.12603774666786194, + "step": 5622 + }, + { + "epoch": 3.88865836791148, + "grad_norm": 8.256840705871582, + "learning_rate": 3.3951897956047336e-05, + "log_odds_chosen": 9.119070053100586, + "log_odds_ratio": -0.0005543202278204262, + "logits/chosen": -0.4936408996582031, + "logits/rejected": -0.5425743460655212, + "logps/chosen": -0.02127469703555107, + "logps/rejected": -2.053802967071533, + "loss": 1.9283, + "nll_loss": 0.48202627897262573, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002127469517290592, + "rewards/margins": 0.20325282216072083, + "rewards/rejected": -0.20538030564785004, + "step": 5623 + }, + { + "epoch": 3.8893499308437067, + "grad_norm": 7.375672817230225, + "learning_rate": 3.394805593975718e-05, + "log_odds_chosen": 10.586771011352539, + "log_odds_ratio": -4.97624023410026e-05, + "logits/chosen": -0.4665629267692566, + "logits/rejected": -0.4793822765350342, + "logps/chosen": -0.0001592121843714267, + "logps/rejected": -1.8775938749313354, + "loss": 1.4208, + "nll_loss": 0.3551906645298004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.592121770954691e-05, + "rewards/margins": 0.18774347007274628, + "rewards/rejected": -0.1877593845129013, + "step": 5624 + }, + { + "epoch": 3.8900414937759336, + "grad_norm": 8.654998779296875, + "learning_rate": 3.394421392346704e-05, + "log_odds_chosen": 10.283309936523438, + "log_odds_ratio": -0.0001582879776833579, + "logits/chosen": -0.37717336416244507, + "logits/rejected": -0.5226336717605591, + "logps/chosen": -0.000453198270406574, + "logps/rejected": -2.113046646118164, + "loss": 1.0426, + "nll_loss": 0.2606269121170044, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.531982995104045e-05, + "rewards/margins": 0.21125935018062592, + "rewards/rejected": -0.2113046795129776, + "step": 5625 + }, + { + "epoch": 3.8907330567081604, + "grad_norm": 8.630017280578613, + "learning_rate": 3.3940371907176886e-05, + "log_odds_chosen": 8.697437286376953, + "log_odds_ratio": -0.0015453390078619123, + "logits/chosen": -0.9022117853164673, + "logits/rejected": -0.905013918876648, + "logps/chosen": -0.011609883978962898, + "logps/rejected": -1.6822335720062256, + "loss": 1.2948, + "nll_loss": 0.32355785369873047, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001160988351330161, + "rewards/margins": 0.16706237196922302, + "rewards/rejected": -0.16822335124015808, + "step": 5626 + }, + { + "epoch": 3.891424619640387, + "grad_norm": 7.9558424949646, + "learning_rate": 3.393652989088674e-05, + "log_odds_chosen": 8.38878345489502, + "log_odds_ratio": -0.04084280505776405, + "logits/chosen": -0.7905771732330322, + "logits/rejected": -0.8350014090538025, + "logps/chosen": -0.04026196897029877, + "logps/rejected": -1.4470926523208618, + "loss": 1.4188, + "nll_loss": 0.35061413049697876, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004026196897029877, + "rewards/margins": 0.14068305492401123, + "rewards/rejected": -0.1447092592716217, + "step": 5627 + }, + { + "epoch": 3.892116182572614, + "grad_norm": 11.995192527770996, + "learning_rate": 3.393268787459659e-05, + "log_odds_chosen": 9.498333930969238, + "log_odds_ratio": -0.000704533071257174, + "logits/chosen": -0.6381096839904785, + "logits/rejected": -0.6799237132072449, + "logps/chosen": -0.0012884100433439016, + "logps/rejected": -1.744314432144165, + "loss": 2.1094, + "nll_loss": 0.5272815823554993, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001288410130655393, + "rewards/margins": 0.1743026077747345, + "rewards/rejected": -0.1744314581155777, + "step": 5628 + }, + { + "epoch": 3.892807745504841, + "grad_norm": 9.74692153930664, + "learning_rate": 3.3928845858306443e-05, + "log_odds_chosen": 9.394281387329102, + "log_odds_ratio": -0.007654293440282345, + "logits/chosen": -0.483235627412796, + "logits/rejected": -0.5125336050987244, + "logps/chosen": -0.004396006464958191, + "logps/rejected": -1.7465304136276245, + "loss": 1.774, + "nll_loss": 0.4427341818809509, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000439600640675053, + "rewards/margins": 0.1742134392261505, + "rewards/rejected": -0.1746530383825302, + "step": 5629 + }, + { + "epoch": 3.8934993084370677, + "grad_norm": 12.278393745422363, + "learning_rate": 3.392500384201629e-05, + "log_odds_chosen": 10.144163131713867, + "log_odds_ratio": -0.0001901520590763539, + "logits/chosen": -0.6058459281921387, + "logits/rejected": -0.6597455143928528, + "logps/chosen": -0.00046932417899370193, + "logps/rejected": -1.7287942171096802, + "loss": 1.0825, + "nll_loss": 0.2706070840358734, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.693241498898715e-05, + "rewards/margins": 0.17283248901367188, + "rewards/rejected": -0.1728794276714325, + "step": 5630 + }, + { + "epoch": 3.8941908713692945, + "grad_norm": 5.5625081062316895, + "learning_rate": 3.392116182572614e-05, + "log_odds_chosen": 8.977718353271484, + "log_odds_ratio": -0.04539618268609047, + "logits/chosen": -0.13042470812797546, + "logits/rejected": -0.2869882583618164, + "logps/chosen": -0.01703779771924019, + "logps/rejected": -1.7658898830413818, + "loss": 0.8727, + "nll_loss": 0.21363988518714905, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017037795623764396, + "rewards/margins": 0.17488521337509155, + "rewards/rejected": -0.1765889972448349, + "step": 5631 + }, + { + "epoch": 3.8948824343015214, + "grad_norm": 14.509231567382812, + "learning_rate": 3.3917319809435994e-05, + "log_odds_chosen": 8.715024948120117, + "log_odds_ratio": -0.005328967701643705, + "logits/chosen": -0.4364680051803589, + "logits/rejected": -0.4962000846862793, + "logps/chosen": -0.03114049881696701, + "logps/rejected": -1.6229883432388306, + "loss": 2.0157, + "nll_loss": 0.5033845901489258, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0031140500213950872, + "rewards/margins": 0.1591847836971283, + "rewards/rejected": -0.16229882836341858, + "step": 5632 + }, + { + "epoch": 3.895573997233748, + "grad_norm": 4.437415599822998, + "learning_rate": 3.391347779314584e-05, + "log_odds_chosen": 9.241409301757812, + "log_odds_ratio": -0.0003560621989890933, + "logits/chosen": -0.6067017316818237, + "logits/rejected": -0.6117865443229675, + "logps/chosen": -0.020522449165582657, + "logps/rejected": -2.357513904571533, + "loss": 1.061, + "nll_loss": 0.2652171552181244, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020522449631243944, + "rewards/margins": 0.23369914293289185, + "rewards/rejected": -0.23575140535831451, + "step": 5633 + }, + { + "epoch": 3.896265560165975, + "grad_norm": 96.32704162597656, + "learning_rate": 3.39096357768557e-05, + "log_odds_chosen": 8.560503959655762, + "log_odds_ratio": -0.544462263584137, + "logits/chosen": -0.6925035119056702, + "logits/rejected": -0.7223390936851501, + "logps/chosen": -0.07953737676143646, + "logps/rejected": -1.662315011024475, + "loss": 2.0527, + "nll_loss": 0.45873352885246277, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007953736931085587, + "rewards/margins": 0.1582777500152588, + "rewards/rejected": -0.16623149812221527, + "step": 5634 + }, + { + "epoch": 3.896957123098202, + "grad_norm": 4.607226848602295, + "learning_rate": 3.3905793760565545e-05, + "log_odds_chosen": 9.335061073303223, + "log_odds_ratio": -0.0002637306461110711, + "logits/chosen": -0.36337152123451233, + "logits/rejected": -0.4686540365219116, + "logps/chosen": -0.008282117545604706, + "logps/rejected": -1.8453588485717773, + "loss": 1.0499, + "nll_loss": 0.2624465525150299, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000828211719635874, + "rewards/margins": 0.18370766937732697, + "rewards/rejected": -0.18453587591648102, + "step": 5635 + }, + { + "epoch": 3.8976486860304287, + "grad_norm": 10.918571472167969, + "learning_rate": 3.39019517442754e-05, + "log_odds_chosen": 7.754406929016113, + "log_odds_ratio": -0.03191447630524635, + "logits/chosen": -0.6927816271781921, + "logits/rejected": -0.7175300121307373, + "logps/chosen": -0.01036337111145258, + "logps/rejected": -1.0414409637451172, + "loss": 1.9009, + "nll_loss": 0.47203582525253296, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001036337111145258, + "rewards/margins": 0.103107750415802, + "rewards/rejected": -0.10414409637451172, + "step": 5636 + }, + { + "epoch": 3.8983402489626555, + "grad_norm": 10.202701568603516, + "learning_rate": 3.389810972798525e-05, + "log_odds_chosen": 10.172609329223633, + "log_odds_ratio": -0.0006169604021124542, + "logits/chosen": -0.8397464156150818, + "logits/rejected": -0.9218321442604065, + "logps/chosen": -0.0015594592550769448, + "logps/rejected": -2.2077858448028564, + "loss": 1.175, + "nll_loss": 0.293697714805603, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001559459196869284, + "rewards/margins": 0.22062262892723083, + "rewards/rejected": -0.22077858448028564, + "step": 5637 + }, + { + "epoch": 3.8990318118948823, + "grad_norm": 69.57799530029297, + "learning_rate": 3.38942677116951e-05, + "log_odds_chosen": 7.8932390213012695, + "log_odds_ratio": -0.1768307238817215, + "logits/chosen": -0.665978729724884, + "logits/rejected": -0.6855136752128601, + "logps/chosen": -0.03082280419766903, + "logps/rejected": -2.3422088623046875, + "loss": 1.8585, + "nll_loss": 0.4469505846500397, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003082280745729804, + "rewards/margins": 0.23113863170146942, + "rewards/rejected": -0.23422090709209442, + "step": 5638 + }, + { + "epoch": 3.899723374827109, + "grad_norm": 10.527148246765137, + "learning_rate": 3.389042569540495e-05, + "log_odds_chosen": 10.70540714263916, + "log_odds_ratio": -9.131423576036468e-05, + "logits/chosen": -0.9421408176422119, + "logits/rejected": -0.9778171181678772, + "logps/chosen": -0.00026267359498888254, + "logps/rejected": -2.4190683364868164, + "loss": 1.4453, + "nll_loss": 0.3613080680370331, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6267360226484016e-05, + "rewards/margins": 0.24188058078289032, + "rewards/rejected": -0.24190685153007507, + "step": 5639 + }, + { + "epoch": 3.900414937759336, + "grad_norm": 6.1640825271606445, + "learning_rate": 3.38865836791148e-05, + "log_odds_chosen": 8.700206756591797, + "log_odds_ratio": -0.06421246379613876, + "logits/chosen": -0.17634619772434235, + "logits/rejected": -0.28321805596351624, + "logps/chosen": -0.010652851313352585, + "logps/rejected": -1.4425766468048096, + "loss": 1.4418, + "nll_loss": 0.3540385365486145, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001065285294316709, + "rewards/margins": 0.1431923657655716, + "rewards/rejected": -0.14425766468048096, + "step": 5640 + }, + { + "epoch": 3.901106500691563, + "grad_norm": 7.589430332183838, + "learning_rate": 3.388274166282465e-05, + "log_odds_chosen": 9.661093711853027, + "log_odds_ratio": -0.0003108852542936802, + "logits/chosen": -0.6649161577224731, + "logits/rejected": -0.685472309589386, + "logps/chosen": -0.000709467101842165, + "logps/rejected": -1.601885437965393, + "loss": 0.9837, + "nll_loss": 0.2459021657705307, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.094671309459955e-05, + "rewards/margins": 0.16011759638786316, + "rewards/rejected": -0.16018852591514587, + "step": 5641 + }, + { + "epoch": 3.9017980636237897, + "grad_norm": 10.761141777038574, + "learning_rate": 3.38788996465345e-05, + "log_odds_chosen": 10.6596097946167, + "log_odds_ratio": -5.332134969648905e-05, + "logits/chosen": -0.5889623165130615, + "logits/rejected": -0.6932306885719299, + "logps/chosen": -0.00045756419422104955, + "logps/rejected": -2.1941545009613037, + "loss": 1.7087, + "nll_loss": 0.4271653890609741, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.575642378767952e-05, + "rewards/margins": 0.21936970949172974, + "rewards/rejected": -0.21941545605659485, + "step": 5642 + }, + { + "epoch": 3.9024896265560165, + "grad_norm": 9.148547172546387, + "learning_rate": 3.387505763024436e-05, + "log_odds_chosen": 9.726945877075195, + "log_odds_ratio": -0.048618048429489136, + "logits/chosen": -0.5311049222946167, + "logits/rejected": -0.5555057525634766, + "logps/chosen": -0.008680197410285473, + "logps/rejected": -1.5641781091690063, + "loss": 1.1858, + "nll_loss": 0.29159092903137207, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008680198225192726, + "rewards/margins": 0.1555497944355011, + "rewards/rejected": -0.1564178168773651, + "step": 5643 + }, + { + "epoch": 3.9031811894882433, + "grad_norm": 10.668489456176758, + "learning_rate": 3.38712156139542e-05, + "log_odds_chosen": 8.888282775878906, + "log_odds_ratio": -0.014365678653120995, + "logits/chosen": -0.7868924736976624, + "logits/rejected": -0.8093586564064026, + "logps/chosen": -0.006576868239790201, + "logps/rejected": -1.955217719078064, + "loss": 1.3799, + "nll_loss": 0.34352636337280273, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006576868472620845, + "rewards/margins": 0.19486409425735474, + "rewards/rejected": -0.1955217719078064, + "step": 5644 + }, + { + "epoch": 3.90387275242047, + "grad_norm": 9.662392616271973, + "learning_rate": 3.3867373597664055e-05, + "log_odds_chosen": 9.729461669921875, + "log_odds_ratio": -0.004915539175271988, + "logits/chosen": -0.8054319620132446, + "logits/rejected": -0.8861163854598999, + "logps/chosen": -0.0031532247085124254, + "logps/rejected": -1.9161059856414795, + "loss": 1.1258, + "nll_loss": 0.2809663712978363, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00031532245338894427, + "rewards/margins": 0.19129526615142822, + "rewards/rejected": -0.19161058962345123, + "step": 5645 + }, + { + "epoch": 3.904564315352697, + "grad_norm": 12.379378318786621, + "learning_rate": 3.386353158137391e-05, + "log_odds_chosen": 8.922910690307617, + "log_odds_ratio": -0.15809865295886993, + "logits/chosen": -0.28489434719085693, + "logits/rejected": -0.35203734040260315, + "logps/chosen": -0.02762320078909397, + "logps/rejected": -2.4533705711364746, + "loss": 1.3379, + "nll_loss": 0.3186749815940857, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0027623199857771397, + "rewards/margins": 0.2425747513771057, + "rewards/rejected": -0.24533706903457642, + "step": 5646 + }, + { + "epoch": 3.905255878284924, + "grad_norm": 12.518477439880371, + "learning_rate": 3.385968956508376e-05, + "log_odds_chosen": 9.158768653869629, + "log_odds_ratio": -0.0021483541931957006, + "logits/chosen": -0.23381444811820984, + "logits/rejected": -0.4085695147514343, + "logps/chosen": -0.021755579859018326, + "logps/rejected": -2.0978522300720215, + "loss": 1.7394, + "nll_loss": 0.4346234202384949, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021755581256002188, + "rewards/margins": 0.20760969817638397, + "rewards/rejected": -0.20978525280952454, + "step": 5647 + }, + { + "epoch": 3.9059474412171507, + "grad_norm": 8.521187782287598, + "learning_rate": 3.3855847548793606e-05, + "log_odds_chosen": 10.767740249633789, + "log_odds_ratio": -8.845600677886978e-05, + "logits/chosen": -0.3243058919906616, + "logits/rejected": -0.4415555000305176, + "logps/chosen": -0.0006823982112109661, + "logps/rejected": -2.7912864685058594, + "loss": 1.1311, + "nll_loss": 0.2827630639076233, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.823982403147966e-05, + "rewards/margins": 0.279060423374176, + "rewards/rejected": -0.27912867069244385, + "step": 5648 + }, + { + "epoch": 3.9066390041493775, + "grad_norm": 7.364728927612305, + "learning_rate": 3.385200553250346e-05, + "log_odds_chosen": 9.6636962890625, + "log_odds_ratio": -0.00014321855269372463, + "logits/chosen": -0.31962475180625916, + "logits/rejected": -0.39147666096687317, + "logps/chosen": -0.01126229576766491, + "logps/rejected": -2.1666345596313477, + "loss": 1.6725, + "nll_loss": 0.4181104600429535, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001126229646615684, + "rewards/margins": 0.21553722023963928, + "rewards/rejected": -0.2166634500026703, + "step": 5649 + }, + { + "epoch": 3.9073305670816043, + "grad_norm": 9.632866859436035, + "learning_rate": 3.384816351621331e-05, + "log_odds_chosen": 6.390057563781738, + "log_odds_ratio": -0.09337200969457626, + "logits/chosen": -0.6340508460998535, + "logits/rejected": -0.7051093578338623, + "logps/chosen": -0.03799796849489212, + "logps/rejected": -1.526399850845337, + "loss": 1.7607, + "nll_loss": 0.4308300018310547, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0037997972685843706, + "rewards/margins": 0.14884020388126373, + "rewards/rejected": -0.1526399850845337, + "step": 5650 + }, + { + "epoch": 3.908022130013831, + "grad_norm": 12.593082427978516, + "learning_rate": 3.3844321499923156e-05, + "log_odds_chosen": 11.50406265258789, + "log_odds_ratio": -1.4786578503844794e-05, + "logits/chosen": -0.3148428499698639, + "logits/rejected": -0.37387746572494507, + "logps/chosen": -9.004796447698027e-05, + "logps/rejected": -2.1949687004089355, + "loss": 1.3158, + "nll_loss": 0.32895559072494507, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.004796083900146e-06, + "rewards/margins": 0.21948787569999695, + "rewards/rejected": -0.21949687600135803, + "step": 5651 + }, + { + "epoch": 3.908713692946058, + "grad_norm": 12.804461479187012, + "learning_rate": 3.3840479483633016e-05, + "log_odds_chosen": 10.317901611328125, + "log_odds_ratio": -0.00017403802485205233, + "logits/chosen": -0.42475345730781555, + "logits/rejected": -0.5792452096939087, + "logps/chosen": -0.0003738144878298044, + "logps/rejected": -1.867620587348938, + "loss": 1.8099, + "nll_loss": 0.45245620608329773, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7381450965767726e-05, + "rewards/margins": 0.1867246776819229, + "rewards/rejected": -0.18676206469535828, + "step": 5652 + }, + { + "epoch": 3.909405255878285, + "grad_norm": 5.101190090179443, + "learning_rate": 3.383663746734286e-05, + "log_odds_chosen": 8.503926277160645, + "log_odds_ratio": -0.0007011541747488081, + "logits/chosen": -0.4765605926513672, + "logits/rejected": -0.5443054437637329, + "logps/chosen": -0.02928958460688591, + "logps/rejected": -1.743721842765808, + "loss": 1.9917, + "nll_loss": 0.4978662133216858, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0029289585072547197, + "rewards/margins": 0.17144320905208588, + "rewards/rejected": -0.17437216639518738, + "step": 5653 + }, + { + "epoch": 3.9100968188105116, + "grad_norm": 8.05077075958252, + "learning_rate": 3.3832795451052714e-05, + "log_odds_chosen": 9.06122875213623, + "log_odds_ratio": -0.0043915510177612305, + "logits/chosen": -0.7249891757965088, + "logits/rejected": -0.7603964805603027, + "logps/chosen": -0.0036959440913051367, + "logps/rejected": -1.5966659784317017, + "loss": 1.4972, + "nll_loss": 0.3738667070865631, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003695944033097476, + "rewards/margins": 0.15929700434207916, + "rewards/rejected": -0.15966659784317017, + "step": 5654 + }, + { + "epoch": 3.9107883817427385, + "grad_norm": 14.369111061096191, + "learning_rate": 3.3828953434762566e-05, + "log_odds_chosen": 8.608853340148926, + "log_odds_ratio": -0.20911157131195068, + "logits/chosen": -0.7546738386154175, + "logits/rejected": -0.8016392588615417, + "logps/chosen": -0.02957131341099739, + "logps/rejected": -1.8817180395126343, + "loss": 1.7217, + "nll_loss": 0.40951788425445557, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002957131713628769, + "rewards/margins": 0.18521468341350555, + "rewards/rejected": -0.18817181885242462, + "step": 5655 + }, + { + "epoch": 3.9114799446749653, + "grad_norm": 7.642223358154297, + "learning_rate": 3.382511141847242e-05, + "log_odds_chosen": 7.869269847869873, + "log_odds_ratio": -0.05908142402768135, + "logits/chosen": -0.5965639352798462, + "logits/rejected": -0.6018823981285095, + "logps/chosen": -0.016461383551359177, + "logps/rejected": -1.225979208946228, + "loss": 1.16, + "nll_loss": 0.2840805649757385, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016461381455883384, + "rewards/margins": 0.12095178663730621, + "rewards/rejected": -0.12259792536497116, + "step": 5656 + }, + { + "epoch": 3.912171507607192, + "grad_norm": 8.684945106506348, + "learning_rate": 3.3821269402182264e-05, + "log_odds_chosen": 9.467185974121094, + "log_odds_ratio": -0.0003374728839844465, + "logits/chosen": -0.34399327635765076, + "logits/rejected": -0.42578768730163574, + "logps/chosen": -0.0021155558060854673, + "logps/rejected": -2.233706474304199, + "loss": 1.6564, + "nll_loss": 0.4140542149543762, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00021155556896701455, + "rewards/margins": 0.2231590747833252, + "rewards/rejected": -0.22337064146995544, + "step": 5657 + }, + { + "epoch": 3.912863070539419, + "grad_norm": 8.879938125610352, + "learning_rate": 3.381742738589212e-05, + "log_odds_chosen": 9.210071563720703, + "log_odds_ratio": -0.0010100032668560743, + "logits/chosen": -0.4090440571308136, + "logits/rejected": -0.537046492099762, + "logps/chosen": -0.006548542529344559, + "logps/rejected": -1.4390618801116943, + "loss": 1.4456, + "nll_loss": 0.36130374670028687, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006548542296513915, + "rewards/margins": 0.14325134456157684, + "rewards/rejected": -0.14390620589256287, + "step": 5658 + }, + { + "epoch": 3.913554633471646, + "grad_norm": 15.874176025390625, + "learning_rate": 3.381358536960197e-05, + "log_odds_chosen": 9.65210247039795, + "log_odds_ratio": -0.00018606259254738688, + "logits/chosen": -0.16119661927223206, + "logits/rejected": -0.2798956632614136, + "logps/chosen": -0.0009072792017832398, + "logps/rejected": -1.4413468837738037, + "loss": 1.7459, + "nll_loss": 0.4364451766014099, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.072791726794094e-05, + "rewards/margins": 0.1440439522266388, + "rewards/rejected": -0.14413470029830933, + "step": 5659 + }, + { + "epoch": 3.9142461964038726, + "grad_norm": 8.610029220581055, + "learning_rate": 3.3809743353311815e-05, + "log_odds_chosen": 9.745826721191406, + "log_odds_ratio": -0.0004060302453581244, + "logits/chosen": -0.7324758172035217, + "logits/rejected": -0.7614333629608154, + "logps/chosen": -0.010122316889464855, + "logps/rejected": -1.9247747659683228, + "loss": 1.4778, + "nll_loss": 0.36940690875053406, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010122316889464855, + "rewards/margins": 0.19146525859832764, + "rewards/rejected": -0.1924774944782257, + "step": 5660 + }, + { + "epoch": 3.9149377593360994, + "grad_norm": 10.20022201538086, + "learning_rate": 3.3805901337021674e-05, + "log_odds_chosen": 10.281898498535156, + "log_odds_ratio": -6.861681322334334e-05, + "logits/chosen": -0.3770533800125122, + "logits/rejected": -0.43447551131248474, + "logps/chosen": -0.00025260145775973797, + "logps/rejected": -2.160262107849121, + "loss": 1.2095, + "nll_loss": 0.3023737967014313, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5260145775973797e-05, + "rewards/margins": 0.21600095927715302, + "rewards/rejected": -0.2160262167453766, + "step": 5661 + }, + { + "epoch": 3.9156293222683263, + "grad_norm": 14.370477676391602, + "learning_rate": 3.380205932073152e-05, + "log_odds_chosen": 9.886101722717285, + "log_odds_ratio": -6.754438072675839e-05, + "logits/chosen": -0.8756242990493774, + "logits/rejected": -0.867035984992981, + "logps/chosen": -0.0002980373101308942, + "logps/rejected": -1.5816593170166016, + "loss": 3.0271, + "nll_loss": 0.756769061088562, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9803730285493657e-05, + "rewards/margins": 0.15813612937927246, + "rewards/rejected": -0.15816593170166016, + "step": 5662 + }, + { + "epoch": 3.916320885200553, + "grad_norm": 16.824203491210938, + "learning_rate": 3.379821730444137e-05, + "log_odds_chosen": 10.197331428527832, + "log_odds_ratio": -6.708221189910546e-05, + "logits/chosen": -0.6757776737213135, + "logits/rejected": -0.7043853998184204, + "logps/chosen": -0.0004643636057153344, + "logps/rejected": -2.069650411605835, + "loss": 1.6953, + "nll_loss": 0.4238058924674988, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6436362026724964e-05, + "rewards/margins": 0.2069185972213745, + "rewards/rejected": -0.20696504414081573, + "step": 5663 + }, + { + "epoch": 3.91701244813278, + "grad_norm": 9.39612102508545, + "learning_rate": 3.3794375288151225e-05, + "log_odds_chosen": 9.340601921081543, + "log_odds_ratio": -0.04161971062421799, + "logits/chosen": -0.47248268127441406, + "logits/rejected": -0.46865522861480713, + "logps/chosen": -0.009520137682557106, + "logps/rejected": -1.5743775367736816, + "loss": 1.4817, + "nll_loss": 0.366254061460495, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009520137682557106, + "rewards/margins": 0.15648573637008667, + "rewards/rejected": -0.1574377417564392, + "step": 5664 + }, + { + "epoch": 3.9177040110650068, + "grad_norm": 12.282084465026855, + "learning_rate": 3.379053327186108e-05, + "log_odds_chosen": 9.280634880065918, + "log_odds_ratio": -0.0007975812768563628, + "logits/chosen": -1.0122318267822266, + "logits/rejected": -1.0686604976654053, + "logps/chosen": -0.004065337125211954, + "logps/rejected": -2.2676191329956055, + "loss": 1.3949, + "nll_loss": 0.3486459255218506, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00040653368341736495, + "rewards/margins": 0.2263554036617279, + "rewards/rejected": -0.22676193714141846, + "step": 5665 + }, + { + "epoch": 3.9183955739972336, + "grad_norm": 16.394123077392578, + "learning_rate": 3.378669125557092e-05, + "log_odds_chosen": 9.665855407714844, + "log_odds_ratio": -0.01791047677397728, + "logits/chosen": -0.394491970539093, + "logits/rejected": -0.51012122631073, + "logps/chosen": -0.10202533006668091, + "logps/rejected": -1.9771158695220947, + "loss": 1.2788, + "nll_loss": 0.31791606545448303, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010202532634139061, + "rewards/margins": 0.18750904500484467, + "rewards/rejected": -0.19771158695220947, + "step": 5666 + }, + { + "epoch": 3.9190871369294604, + "grad_norm": 6.717284202575684, + "learning_rate": 3.3782849239280775e-05, + "log_odds_chosen": 9.576358795166016, + "log_odds_ratio": -0.0009835807140916586, + "logits/chosen": -0.6450543403625488, + "logits/rejected": -0.7207726836204529, + "logps/chosen": -0.0023794856388121843, + "logps/rejected": -1.3716387748718262, + "loss": 1.2093, + "nll_loss": 0.3022391200065613, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00023794855223968625, + "rewards/margins": 0.13692592084407806, + "rewards/rejected": -0.13716387748718262, + "step": 5667 + }, + { + "epoch": 3.9197786998616873, + "grad_norm": 7.360776901245117, + "learning_rate": 3.377900722299063e-05, + "log_odds_chosen": 9.744741439819336, + "log_odds_ratio": -0.0002790922881104052, + "logits/chosen": -0.5799486637115479, + "logits/rejected": -0.5917710661888123, + "logps/chosen": -0.0005377319175750017, + "logps/rejected": -1.6921937465667725, + "loss": 1.4989, + "nll_loss": 0.37468501925468445, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3773193940287456e-05, + "rewards/margins": 0.16916561126708984, + "rewards/rejected": -0.16921937465667725, + "step": 5668 + }, + { + "epoch": 3.920470262793914, + "grad_norm": 5.970398902893066, + "learning_rate": 3.377516520670047e-05, + "log_odds_chosen": 8.449975967407227, + "log_odds_ratio": -0.008219233714044094, + "logits/chosen": -0.6749115586280823, + "logits/rejected": -0.7539016008377075, + "logps/chosen": -0.004821880254894495, + "logps/rejected": -1.5712368488311768, + "loss": 1.6453, + "nll_loss": 0.41051411628723145, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004821880429517478, + "rewards/margins": 0.15664149820804596, + "rewards/rejected": -0.15712368488311768, + "step": 5669 + }, + { + "epoch": 3.921161825726141, + "grad_norm": 11.404149055480957, + "learning_rate": 3.377132319041033e-05, + "log_odds_chosen": 8.849227905273438, + "log_odds_ratio": -0.0011710242833942175, + "logits/chosen": -0.6489973664283752, + "logits/rejected": -0.6897670030593872, + "logps/chosen": -0.028854500502347946, + "logps/rejected": -2.122948169708252, + "loss": 1.3919, + "nll_loss": 0.34785789251327515, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0028854499105364084, + "rewards/margins": 0.20940934121608734, + "rewards/rejected": -0.212294802069664, + "step": 5670 + }, + { + "epoch": 3.9218533886583677, + "grad_norm": 12.35291862487793, + "learning_rate": 3.376748117412018e-05, + "log_odds_chosen": 10.676984786987305, + "log_odds_ratio": -0.0003543601487763226, + "logits/chosen": -0.8129048347473145, + "logits/rejected": -0.8609099984169006, + "logps/chosen": -0.01920601725578308, + "logps/rejected": -3.159191131591797, + "loss": 1.425, + "nll_loss": 0.3562049865722656, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001920601585879922, + "rewards/margins": 0.3139985203742981, + "rewards/rejected": -0.3159191310405731, + "step": 5671 + }, + { + "epoch": 3.922544951590595, + "grad_norm": 13.723042488098145, + "learning_rate": 3.376363915783003e-05, + "log_odds_chosen": 11.369161605834961, + "log_odds_ratio": -1.8434815501677804e-05, + "logits/chosen": -0.6580374240875244, + "logits/rejected": -0.7136290073394775, + "logps/chosen": -0.00016635411884635687, + "logps/rejected": -2.5891010761260986, + "loss": 1.3817, + "nll_loss": 0.3454234302043915, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6635411157039925e-05, + "rewards/margins": 0.2588934600353241, + "rewards/rejected": -0.2589101195335388, + "step": 5672 + }, + { + "epoch": 3.923236514522822, + "grad_norm": 12.602025032043457, + "learning_rate": 3.375979714153988e-05, + "log_odds_chosen": 10.25037956237793, + "log_odds_ratio": -7.309335342142731e-05, + "logits/chosen": -0.41974616050720215, + "logits/rejected": -0.5605471134185791, + "logps/chosen": -0.0003384738811291754, + "logps/rejected": -1.9320080280303955, + "loss": 1.2772, + "nll_loss": 0.31928902864456177, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.384738738532178e-05, + "rewards/margins": 0.19316695630550385, + "rewards/rejected": -0.19320081174373627, + "step": 5673 + }, + { + "epoch": 3.9239280774550487, + "grad_norm": 10.467412948608398, + "learning_rate": 3.3755955125249736e-05, + "log_odds_chosen": 10.826410293579102, + "log_odds_ratio": -2.9135328077245504e-05, + "logits/chosen": -1.025895118713379, + "logits/rejected": -1.1272532939910889, + "logps/chosen": -0.00017180161376018077, + "logps/rejected": -2.1393187046051025, + "loss": 1.2129, + "nll_loss": 0.3032238185405731, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.718016210361384e-05, + "rewards/margins": 0.2139146775007248, + "rewards/rejected": -0.2139318585395813, + "step": 5674 + }, + { + "epoch": 3.9246196403872755, + "grad_norm": 9.592738151550293, + "learning_rate": 3.375211310895958e-05, + "log_odds_chosen": 10.144782066345215, + "log_odds_ratio": -0.0037850146181881428, + "logits/chosen": -0.4527406096458435, + "logits/rejected": -0.5927005410194397, + "logps/chosen": -0.003005788428708911, + "logps/rejected": -2.244025230407715, + "loss": 1.4077, + "nll_loss": 0.35155072808265686, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003005788312293589, + "rewards/margins": 0.2241019308567047, + "rewards/rejected": -0.2244025319814682, + "step": 5675 + }, + { + "epoch": 3.9253112033195023, + "grad_norm": 9.165721893310547, + "learning_rate": 3.3748271092669434e-05, + "log_odds_chosen": 8.65027141571045, + "log_odds_ratio": -0.006758753210306168, + "logits/chosen": -0.6444883942604065, + "logits/rejected": -0.7739410400390625, + "logps/chosen": -0.006450993940234184, + "logps/rejected": -1.4249351024627686, + "loss": 1.9798, + "nll_loss": 0.4942636489868164, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006450994405895472, + "rewards/margins": 0.1418484002351761, + "rewards/rejected": -0.14249351620674133, + "step": 5676 + }, + { + "epoch": 3.926002766251729, + "grad_norm": 8.881304740905762, + "learning_rate": 3.3744429076379286e-05, + "log_odds_chosen": 8.597208023071289, + "log_odds_ratio": -0.002491341670975089, + "logits/chosen": -0.9394902586936951, + "logits/rejected": -0.9371085166931152, + "logps/chosen": -0.002545249182730913, + "logps/rejected": -1.9115054607391357, + "loss": 1.7249, + "nll_loss": 0.4309871792793274, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002545249299146235, + "rewards/margins": 0.19089603424072266, + "rewards/rejected": -0.19115056097507477, + "step": 5677 + }, + { + "epoch": 3.926694329183956, + "grad_norm": 6.6902689933776855, + "learning_rate": 3.374058706008913e-05, + "log_odds_chosen": 9.036705017089844, + "log_odds_ratio": -0.05169745907187462, + "logits/chosen": -0.7906173467636108, + "logits/rejected": -0.8020066022872925, + "logps/chosen": -0.013565192930400372, + "logps/rejected": -1.5790175199508667, + "loss": 1.0445, + "nll_loss": 0.25596269965171814, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013565192930400372, + "rewards/margins": 0.1565452218055725, + "rewards/rejected": -0.15790174901485443, + "step": 5678 + }, + { + "epoch": 3.927385892116183, + "grad_norm": 8.133184432983398, + "learning_rate": 3.373674504379899e-05, + "log_odds_chosen": 9.908307075500488, + "log_odds_ratio": -0.0002685143263079226, + "logits/chosen": -0.7017204761505127, + "logits/rejected": -0.8481262922286987, + "logps/chosen": -0.0005481558619067073, + "logps/rejected": -2.0266849994659424, + "loss": 1.3127, + "nll_loss": 0.3281383216381073, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.481558764586225e-05, + "rewards/margins": 0.2026136815547943, + "rewards/rejected": -0.20266850292682648, + "step": 5679 + }, + { + "epoch": 3.9280774550484097, + "grad_norm": 13.340405464172363, + "learning_rate": 3.373290302750884e-05, + "log_odds_chosen": 9.175450325012207, + "log_odds_ratio": -0.0004610381438396871, + "logits/chosen": -0.667832612991333, + "logits/rejected": -0.7125696539878845, + "logps/chosen": -0.0016391351819038391, + "logps/rejected": -2.527400255203247, + "loss": 2.3522, + "nll_loss": 0.5880076885223389, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016391351528000087, + "rewards/margins": 0.25257614254951477, + "rewards/rejected": -0.2527400255203247, + "step": 5680 + }, + { + "epoch": 3.9287690179806365, + "grad_norm": 9.222174644470215, + "learning_rate": 3.372906101121869e-05, + "log_odds_chosen": 7.210216522216797, + "log_odds_ratio": -0.17598360776901245, + "logits/chosen": -0.9044046998023987, + "logits/rejected": -0.9510660171508789, + "logps/chosen": -0.02575918287038803, + "logps/rejected": -1.4387961626052856, + "loss": 1.1473, + "nll_loss": 0.2692229449748993, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002575918333604932, + "rewards/margins": 0.14130370318889618, + "rewards/rejected": -0.14387962222099304, + "step": 5681 + }, + { + "epoch": 3.9294605809128633, + "grad_norm": 11.76744270324707, + "learning_rate": 3.372521899492854e-05, + "log_odds_chosen": 9.367456436157227, + "log_odds_ratio": -0.0007709745550528169, + "logits/chosen": -0.6113981008529663, + "logits/rejected": -0.7086760997772217, + "logps/chosen": -0.010728825815021992, + "logps/rejected": -2.1775169372558594, + "loss": 1.6858, + "nll_loss": 0.4213826060295105, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010728825582191348, + "rewards/margins": 0.21667879819869995, + "rewards/rejected": -0.21775168180465698, + "step": 5682 + }, + { + "epoch": 3.93015214384509, + "grad_norm": 7.538707256317139, + "learning_rate": 3.3721376978638394e-05, + "log_odds_chosen": 8.055183410644531, + "log_odds_ratio": -0.006664213724434376, + "logits/chosen": -0.5837987065315247, + "logits/rejected": -0.6343657970428467, + "logps/chosen": -0.003828394692391157, + "logps/rejected": -1.2160768508911133, + "loss": 1.213, + "nll_loss": 0.3025856018066406, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003828394692391157, + "rewards/margins": 0.12122484296560287, + "rewards/rejected": -0.12160768359899521, + "step": 5683 + }, + { + "epoch": 3.930843706777317, + "grad_norm": 7.9338297843933105, + "learning_rate": 3.371753496234824e-05, + "log_odds_chosen": 10.308126449584961, + "log_odds_ratio": -6.612496508751065e-05, + "logits/chosen": -0.1366608887910843, + "logits/rejected": -0.29656341671943665, + "logps/chosen": -0.00024257070617750287, + "logps/rejected": -1.475003957748413, + "loss": 1.5041, + "nll_loss": 0.37602758407592773, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.425707134534605e-05, + "rewards/margins": 0.14747613668441772, + "rewards/rejected": -0.1475003957748413, + "step": 5684 + }, + { + "epoch": 3.931535269709544, + "grad_norm": 12.777952194213867, + "learning_rate": 3.371369294605809e-05, + "log_odds_chosen": 8.208662986755371, + "log_odds_ratio": -0.013890150934457779, + "logits/chosen": -0.7391720414161682, + "logits/rejected": -0.7633452415466309, + "logps/chosen": -0.029466290026903152, + "logps/rejected": -2.0348939895629883, + "loss": 1.6546, + "nll_loss": 0.4122610092163086, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0029466289561241865, + "rewards/margins": 0.20054274797439575, + "rewards/rejected": -0.20348937809467316, + "step": 5685 + }, + { + "epoch": 3.9322268326417706, + "grad_norm": 5.8640031814575195, + "learning_rate": 3.3709850929767945e-05, + "log_odds_chosen": 8.976058006286621, + "log_odds_ratio": -0.0009105091448873281, + "logits/chosen": -0.17427073419094086, + "logits/rejected": -0.16047167778015137, + "logps/chosen": -0.00046488974476233125, + "logps/rejected": -1.3876928091049194, + "loss": 1.0374, + "nll_loss": 0.2592521011829376, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6488974476233125e-05, + "rewards/margins": 0.13872277736663818, + "rewards/rejected": -0.138769268989563, + "step": 5686 + }, + { + "epoch": 3.9329183955739975, + "grad_norm": 10.039111137390137, + "learning_rate": 3.370600891347779e-05, + "log_odds_chosen": 8.653139114379883, + "log_odds_ratio": -0.006638075225055218, + "logits/chosen": -0.770908772945404, + "logits/rejected": -0.7579331398010254, + "logps/chosen": -0.027387000620365143, + "logps/rejected": -1.4432952404022217, + "loss": 1.4031, + "nll_loss": 0.3501099646091461, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002738700248301029, + "rewards/margins": 0.14159083366394043, + "rewards/rejected": -0.1443295180797577, + "step": 5687 + }, + { + "epoch": 3.9336099585062243, + "grad_norm": 12.49831771850586, + "learning_rate": 3.370216689718765e-05, + "log_odds_chosen": 9.67283821105957, + "log_odds_ratio": -0.011118143796920776, + "logits/chosen": -0.4001826047897339, + "logits/rejected": -0.5056847333908081, + "logps/chosen": -0.01285476516932249, + "logps/rejected": -2.169782876968384, + "loss": 1.5136, + "nll_loss": 0.3772900700569153, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012854763772338629, + "rewards/margins": 0.21569281816482544, + "rewards/rejected": -0.2169782668352127, + "step": 5688 + }, + { + "epoch": 3.934301521438451, + "grad_norm": 8.358305931091309, + "learning_rate": 3.3698324880897495e-05, + "log_odds_chosen": 9.098575592041016, + "log_odds_ratio": -0.010579775087535381, + "logits/chosen": 0.1532040536403656, + "logits/rejected": 0.04398436099290848, + "logps/chosen": -0.016014760360121727, + "logps/rejected": -2.351327657699585, + "loss": 1.2948, + "nll_loss": 0.32264819741249084, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016014762222766876, + "rewards/margins": 0.23353126645088196, + "rewards/rejected": -0.23513275384902954, + "step": 5689 + }, + { + "epoch": 3.934993084370678, + "grad_norm": 11.339001655578613, + "learning_rate": 3.369448286460735e-05, + "log_odds_chosen": 9.736185073852539, + "log_odds_ratio": -0.0001969828736037016, + "logits/chosen": -0.41910022497177124, + "logits/rejected": -0.44641178846359253, + "logps/chosen": -0.0006493827095255256, + "logps/rejected": -1.912126064300537, + "loss": 1.242, + "nll_loss": 0.3104857802391052, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.493827095255256e-05, + "rewards/margins": 0.19114765524864197, + "rewards/rejected": -0.19121260941028595, + "step": 5690 + }, + { + "epoch": 3.935684647302905, + "grad_norm": 7.187035083770752, + "learning_rate": 3.36906408483172e-05, + "log_odds_chosen": 9.698208808898926, + "log_odds_ratio": -0.00012226369290146977, + "logits/chosen": -0.15801896154880524, + "logits/rejected": -0.17950023710727692, + "logps/chosen": -0.0003952296101488173, + "logps/rejected": -1.661260962486267, + "loss": 1.518, + "nll_loss": 0.37948352098464966, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.95229653804563e-05, + "rewards/margins": 0.1660865694284439, + "rewards/rejected": -0.16612611711025238, + "step": 5691 + }, + { + "epoch": 3.9363762102351316, + "grad_norm": 10.54863452911377, + "learning_rate": 3.368679883202705e-05, + "log_odds_chosen": 10.954380989074707, + "log_odds_ratio": -4.8866688302950934e-05, + "logits/chosen": -0.13272906839847565, + "logits/rejected": -0.2529900372028351, + "logps/chosen": -0.0001786511711543426, + "logps/rejected": -2.4072556495666504, + "loss": 1.3262, + "nll_loss": 0.33153992891311646, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.786511711543426e-05, + "rewards/margins": 0.24070771038532257, + "rewards/rejected": -0.240725576877594, + "step": 5692 + }, + { + "epoch": 3.9370677731673585, + "grad_norm": 10.217341423034668, + "learning_rate": 3.36829568157369e-05, + "log_odds_chosen": 10.294401168823242, + "log_odds_ratio": -0.0006365490262396634, + "logits/chosen": -0.5235608220100403, + "logits/rejected": -0.6177823543548584, + "logps/chosen": -0.00029386149253696203, + "logps/rejected": -2.145314931869507, + "loss": 1.2532, + "nll_loss": 0.31324440240859985, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9386150345089845e-05, + "rewards/margins": 0.21450212597846985, + "rewards/rejected": -0.21453151106834412, + "step": 5693 + }, + { + "epoch": 3.9377593360995853, + "grad_norm": 9.897907257080078, + "learning_rate": 3.367911479944675e-05, + "log_odds_chosen": 7.953948020935059, + "log_odds_ratio": -0.2536156177520752, + "logits/chosen": -0.17607209086418152, + "logits/rejected": -0.23834289610385895, + "logps/chosen": -0.03188952058553696, + "logps/rejected": -1.685053825378418, + "loss": 1.5987, + "nll_loss": 0.37431544065475464, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0031889521051198244, + "rewards/margins": 0.16531643271446228, + "rewards/rejected": -0.16850540041923523, + "step": 5694 + }, + { + "epoch": 3.938450899031812, + "grad_norm": 11.081581115722656, + "learning_rate": 3.36752727831566e-05, + "log_odds_chosen": 9.44058895111084, + "log_odds_ratio": -0.0004364719206932932, + "logits/chosen": -0.5609422326087952, + "logits/rejected": -0.5870753526687622, + "logps/chosen": -0.03841325640678406, + "logps/rejected": -2.3334743976593018, + "loss": 1.6473, + "nll_loss": 0.41177529096603394, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003841325407847762, + "rewards/margins": 0.22950612008571625, + "rewards/rejected": -0.23334744572639465, + "step": 5695 + }, + { + "epoch": 3.939142461964039, + "grad_norm": 7.237489700317383, + "learning_rate": 3.367143076686645e-05, + "log_odds_chosen": 9.057950973510742, + "log_odds_ratio": -0.0006810713675804436, + "logits/chosen": -0.6686182022094727, + "logits/rejected": -0.7706915736198425, + "logps/chosen": -0.0011979506816715002, + "logps/rejected": -1.3306667804718018, + "loss": 1.5321, + "nll_loss": 0.38295263051986694, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011979506234638393, + "rewards/margins": 0.1329468935728073, + "rewards/rejected": -0.13306669890880585, + "step": 5696 + }, + { + "epoch": 3.9398340248962658, + "grad_norm": 11.648604393005371, + "learning_rate": 3.366758875057631e-05, + "log_odds_chosen": 10.94399642944336, + "log_odds_ratio": -3.988520984421484e-05, + "logits/chosen": -0.6112430691719055, + "logits/rejected": -0.6546251773834229, + "logps/chosen": -0.0002443194971419871, + "logps/rejected": -2.443819284439087, + "loss": 1.1959, + "nll_loss": 0.2989806830883026, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4431948986602947e-05, + "rewards/margins": 0.24435749650001526, + "rewards/rejected": -0.24438193440437317, + "step": 5697 + }, + { + "epoch": 3.9405255878284926, + "grad_norm": 6.354919910430908, + "learning_rate": 3.3663746734286154e-05, + "log_odds_chosen": 10.356807708740234, + "log_odds_ratio": -0.00037150000571273267, + "logits/chosen": -0.25689586997032166, + "logits/rejected": -0.3123002052307129, + "logps/chosen": -0.0004208147875033319, + "logps/rejected": -1.9435677528381348, + "loss": 1.0913, + "nll_loss": 0.27279186248779297, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2081475839950144e-05, + "rewards/margins": 0.19431471824645996, + "rewards/rejected": -0.1943567991256714, + "step": 5698 + }, + { + "epoch": 3.9412171507607194, + "grad_norm": 8.171834945678711, + "learning_rate": 3.3659904717996006e-05, + "log_odds_chosen": 10.111719131469727, + "log_odds_ratio": -7.267138425959274e-05, + "logits/chosen": -0.6796890497207642, + "logits/rejected": -0.6865609884262085, + "logps/chosen": -0.00021459744311869144, + "logps/rejected": -1.6100369691848755, + "loss": 0.8555, + "nll_loss": 0.2138572633266449, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1459745767060667e-05, + "rewards/margins": 0.16098225116729736, + "rewards/rejected": -0.1610036939382553, + "step": 5699 + }, + { + "epoch": 3.9419087136929463, + "grad_norm": 5.196160316467285, + "learning_rate": 3.365606270170586e-05, + "log_odds_chosen": 9.61539077758789, + "log_odds_ratio": -0.00044482407975010574, + "logits/chosen": -0.8210750818252563, + "logits/rejected": -0.8469647765159607, + "logps/chosen": -0.001331267412751913, + "logps/rejected": -2.3655765056610107, + "loss": 0.9967, + "nll_loss": 0.2491181343793869, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001331267412751913, + "rewards/margins": 0.2364245355129242, + "rewards/rejected": -0.23655766248703003, + "step": 5700 + }, + { + "epoch": 3.942600276625173, + "grad_norm": 13.016851425170898, + "learning_rate": 3.365222068541571e-05, + "log_odds_chosen": 7.50762939453125, + "log_odds_ratio": -0.07106940448284149, + "logits/chosen": -0.6622728109359741, + "logits/rejected": -0.6977077722549438, + "logps/chosen": -0.01938733085989952, + "logps/rejected": -1.0778499841690063, + "loss": 1.5959, + "nll_loss": 0.391880065202713, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001938733272254467, + "rewards/margins": 0.10584627091884613, + "rewards/rejected": -0.10778500139713287, + "step": 5701 + }, + { + "epoch": 3.9432918395574, + "grad_norm": 9.43426513671875, + "learning_rate": 3.3648378669125557e-05, + "log_odds_chosen": 10.202520370483398, + "log_odds_ratio": -0.0006938951555639505, + "logits/chosen": -0.6339164972305298, + "logits/rejected": -0.6359383463859558, + "logps/chosen": -0.0009514871053397655, + "logps/rejected": -2.173431396484375, + "loss": 1.6117, + "nll_loss": 0.40285149216651917, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.514870907878503e-05, + "rewards/margins": 0.21724799275398254, + "rewards/rejected": -0.21734313666820526, + "step": 5702 + }, + { + "epoch": 3.9439834024896268, + "grad_norm": 8.37575626373291, + "learning_rate": 3.364453665283541e-05, + "log_odds_chosen": 9.026674270629883, + "log_odds_ratio": -0.0002642723557073623, + "logits/chosen": -0.6051280498504639, + "logits/rejected": -0.6261605620384216, + "logps/chosen": -0.001482822117395699, + "logps/rejected": -1.4893754720687866, + "loss": 1.0054, + "nll_loss": 0.25131821632385254, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001482822117395699, + "rewards/margins": 0.14878925681114197, + "rewards/rejected": -0.14893755316734314, + "step": 5703 + }, + { + "epoch": 3.9446749654218536, + "grad_norm": 4.9485955238342285, + "learning_rate": 3.364069463654526e-05, + "log_odds_chosen": 9.150157928466797, + "log_odds_ratio": -0.0018625075463205576, + "logits/chosen": -0.32784304022789, + "logits/rejected": -0.42211171984672546, + "logps/chosen": -0.012415789999067783, + "logps/rejected": -1.776462197303772, + "loss": 1.2726, + "nll_loss": 0.3179532289505005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012415789533406496, + "rewards/margins": 0.17640462517738342, + "rewards/rejected": -0.1776462197303772, + "step": 5704 + }, + { + "epoch": 3.9453665283540804, + "grad_norm": 9.812108039855957, + "learning_rate": 3.363685262025511e-05, + "log_odds_chosen": 10.123137474060059, + "log_odds_ratio": -6.920234591234475e-05, + "logits/chosen": -0.7869176864624023, + "logits/rejected": -0.8563541769981384, + "logps/chosen": -0.0007024909136816859, + "logps/rejected": -2.032478094100952, + "loss": 1.3503, + "nll_loss": 0.3375677466392517, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.024908700259402e-05, + "rewards/margins": 0.2031775563955307, + "rewards/rejected": -0.2032478153705597, + "step": 5705 + }, + { + "epoch": 3.9460580912863072, + "grad_norm": 7.877689838409424, + "learning_rate": 3.3633010603964966e-05, + "log_odds_chosen": 9.2701416015625, + "log_odds_ratio": -0.0006889035576023161, + "logits/chosen": -0.43943047523498535, + "logits/rejected": -0.4963938593864441, + "logps/chosen": -0.01611155830323696, + "logps/rejected": -1.872589349746704, + "loss": 1.4708, + "nll_loss": 0.3676352798938751, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016111559234559536, + "rewards/margins": 0.18564778566360474, + "rewards/rejected": -0.18725895881652832, + "step": 5706 + }, + { + "epoch": 3.946749654218534, + "grad_norm": 7.677238464355469, + "learning_rate": 3.362916858767481e-05, + "log_odds_chosen": 10.964299201965332, + "log_odds_ratio": -3.7906993384240195e-05, + "logits/chosen": -0.7371675968170166, + "logits/rejected": -0.6771216988563538, + "logps/chosen": -0.00017539318650960922, + "logps/rejected": -2.1775312423706055, + "loss": 1.119, + "nll_loss": 0.2797532379627228, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7539319742354564e-05, + "rewards/margins": 0.21773558855056763, + "rewards/rejected": -0.21775312721729279, + "step": 5707 + }, + { + "epoch": 3.947441217150761, + "grad_norm": 8.230490684509277, + "learning_rate": 3.3625326571384664e-05, + "log_odds_chosen": 10.742960929870605, + "log_odds_ratio": -4.162584446021356e-05, + "logits/chosen": -0.32022398710250854, + "logits/rejected": -0.3552589416503906, + "logps/chosen": -8.393789175897837e-05, + "logps/rejected": -1.5506086349487305, + "loss": 1.3166, + "nll_loss": 0.32913970947265625, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.393788448302075e-06, + "rewards/margins": 0.15505248308181763, + "rewards/rejected": -0.15506088733673096, + "step": 5708 + }, + { + "epoch": 3.9481327800829877, + "grad_norm": 8.795544624328613, + "learning_rate": 3.362148455509452e-05, + "log_odds_chosen": 10.026924133300781, + "log_odds_ratio": -0.000733112683519721, + "logits/chosen": -0.48883938789367676, + "logits/rejected": -0.4959717392921448, + "logps/chosen": -0.004272299353033304, + "logps/rejected": -2.4745564460754395, + "loss": 1.4151, + "nll_loss": 0.3537033796310425, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00042722991202026606, + "rewards/margins": 0.2470284104347229, + "rewards/rejected": -0.2474556416273117, + "step": 5709 + }, + { + "epoch": 3.9488243430152146, + "grad_norm": 7.566889762878418, + "learning_rate": 3.361764253880437e-05, + "log_odds_chosen": 9.145713806152344, + "log_odds_ratio": -0.00047047666157595813, + "logits/chosen": -0.5946757197380066, + "logits/rejected": -0.6835692524909973, + "logps/chosen": -0.0008808871498331428, + "logps/rejected": -1.5034514665603638, + "loss": 1.1975, + "nll_loss": 0.2993380129337311, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.808870916254818e-05, + "rewards/margins": 0.15025705099105835, + "rewards/rejected": -0.15034514665603638, + "step": 5710 + }, + { + "epoch": 3.9495159059474414, + "grad_norm": 10.708879470825195, + "learning_rate": 3.3613800522514215e-05, + "log_odds_chosen": 10.223733901977539, + "log_odds_ratio": -0.00016319968563038856, + "logits/chosen": -0.5908886194229126, + "logits/rejected": -0.5645523071289062, + "logps/chosen": -0.00048043689457699656, + "logps/rejected": -1.9205724000930786, + "loss": 1.5375, + "nll_loss": 0.38436540961265564, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8043693823274225e-05, + "rewards/margins": 0.19200919568538666, + "rewards/rejected": -0.19205725193023682, + "step": 5711 + }, + { + "epoch": 3.9502074688796682, + "grad_norm": 8.03261661529541, + "learning_rate": 3.360995850622407e-05, + "log_odds_chosen": 8.706552505493164, + "log_odds_ratio": -0.0011576716788113117, + "logits/chosen": -0.5434591770172119, + "logits/rejected": -0.5303152203559875, + "logps/chosen": -0.0019687768071889877, + "logps/rejected": -1.3109357357025146, + "loss": 1.622, + "nll_loss": 0.4053952991962433, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019687767780851573, + "rewards/margins": 0.1308967024087906, + "rewards/rejected": -0.1310935765504837, + "step": 5712 + }, + { + "epoch": 3.950899031811895, + "grad_norm": 19.672039031982422, + "learning_rate": 3.360611648993392e-05, + "log_odds_chosen": 8.991646766662598, + "log_odds_ratio": -0.19604425132274628, + "logits/chosen": -0.7051516771316528, + "logits/rejected": -0.6763105392456055, + "logps/chosen": -0.02533097378909588, + "logps/rejected": -1.375767469406128, + "loss": 1.6721, + "nll_loss": 0.3984111547470093, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002533097518607974, + "rewards/margins": 0.1350436508655548, + "rewards/rejected": -0.13757675886154175, + "step": 5713 + }, + { + "epoch": 3.951590594744122, + "grad_norm": 8.680757522583008, + "learning_rate": 3.3602274473643766e-05, + "log_odds_chosen": 9.2004976272583, + "log_odds_ratio": -0.005787692964076996, + "logits/chosen": -0.2553695738315582, + "logits/rejected": -0.3262069821357727, + "logps/chosen": -0.003667776472866535, + "logps/rejected": -1.7895536422729492, + "loss": 1.3477, + "nll_loss": 0.3363400101661682, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003667776472866535, + "rewards/margins": 0.17858858406543732, + "rewards/rejected": -0.17895537614822388, + "step": 5714 + }, + { + "epoch": 3.9522821576763487, + "grad_norm": 10.9926118850708, + "learning_rate": 3.3598432457353625e-05, + "log_odds_chosen": 9.518470764160156, + "log_odds_ratio": -0.0008939065737649798, + "logits/chosen": -0.47945836186408997, + "logits/rejected": -0.5430272221565247, + "logps/chosen": -0.0026755905710160732, + "logps/rejected": -1.9028359651565552, + "loss": 1.4604, + "nll_loss": 0.3650098443031311, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002675590803846717, + "rewards/margins": 0.1900160312652588, + "rewards/rejected": -0.19028359651565552, + "step": 5715 + }, + { + "epoch": 3.9529737206085755, + "grad_norm": 12.968902587890625, + "learning_rate": 3.359459044106347e-05, + "log_odds_chosen": 9.561538696289062, + "log_odds_ratio": -0.000785676937084645, + "logits/chosen": -0.7379172444343567, + "logits/rejected": -0.7460415959358215, + "logps/chosen": -0.0024614909198135138, + "logps/rejected": -1.8133800029754639, + "loss": 1.9004, + "nll_loss": 0.47502267360687256, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002461490803398192, + "rewards/margins": 0.18109184503555298, + "rewards/rejected": -0.18133798241615295, + "step": 5716 + }, + { + "epoch": 3.9536652835408024, + "grad_norm": 13.025182723999023, + "learning_rate": 3.359074842477332e-05, + "log_odds_chosen": 8.811513900756836, + "log_odds_ratio": -0.0005601159064099193, + "logits/chosen": -0.08626483380794525, + "logits/rejected": -0.18160587549209595, + "logps/chosen": -0.0009426804026588798, + "logps/rejected": -1.5935416221618652, + "loss": 1.5662, + "nll_loss": 0.3914954960346222, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.426804899703711e-05, + "rewards/margins": 0.15925991535186768, + "rewards/rejected": -0.15935416519641876, + "step": 5717 + }, + { + "epoch": 3.954356846473029, + "grad_norm": 9.333796501159668, + "learning_rate": 3.358690640848317e-05, + "log_odds_chosen": 10.182829856872559, + "log_odds_ratio": -0.00021176054724492133, + "logits/chosen": -0.239266037940979, + "logits/rejected": -0.3665693998336792, + "logps/chosen": -0.0002250690886285156, + "logps/rejected": -1.7776684761047363, + "loss": 1.1581, + "nll_loss": 0.2895086407661438, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2506910681840964e-05, + "rewards/margins": 0.1777443289756775, + "rewards/rejected": -0.1777668446302414, + "step": 5718 + }, + { + "epoch": 3.955048409405256, + "grad_norm": 9.824963569641113, + "learning_rate": 3.358306439219303e-05, + "log_odds_chosen": 8.434147834777832, + "log_odds_ratio": -0.11634311825037003, + "logits/chosen": -0.603499710559845, + "logits/rejected": -0.6114636659622192, + "logps/chosen": -0.025828877463936806, + "logps/rejected": -1.4695003032684326, + "loss": 1.4495, + "nll_loss": 0.3507324457168579, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002582887653261423, + "rewards/margins": 0.14436712861061096, + "rewards/rejected": -0.14695002138614655, + "step": 5719 + }, + { + "epoch": 3.955739972337483, + "grad_norm": 7.860080242156982, + "learning_rate": 3.3579222375902873e-05, + "log_odds_chosen": 8.629996299743652, + "log_odds_ratio": -0.004712705500423908, + "logits/chosen": -0.3507615327835083, + "logits/rejected": -0.43837425112724304, + "logps/chosen": -0.004645884968340397, + "logps/rejected": -1.5997428894042969, + "loss": 1.5647, + "nll_loss": 0.39069458842277527, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00046458852011710405, + "rewards/margins": 0.15950970351696014, + "rewards/rejected": -0.15997430682182312, + "step": 5720 + }, + { + "epoch": 3.9564315352697097, + "grad_norm": 7.354146480560303, + "learning_rate": 3.3575380359612726e-05, + "log_odds_chosen": 9.55754566192627, + "log_odds_ratio": -0.0001672496582614258, + "logits/chosen": -0.5126395225524902, + "logits/rejected": -0.5443432331085205, + "logps/chosen": -0.0005185157060623169, + "logps/rejected": -1.7254047393798828, + "loss": 1.0127, + "nll_loss": 0.25316306948661804, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1851573516614735e-05, + "rewards/margins": 0.17248864471912384, + "rewards/rejected": -0.17254048585891724, + "step": 5721 + }, + { + "epoch": 3.9571230982019365, + "grad_norm": 10.185460090637207, + "learning_rate": 3.357153834332258e-05, + "log_odds_chosen": 9.248023986816406, + "log_odds_ratio": -0.0015630690613761544, + "logits/chosen": -0.35558661818504333, + "logits/rejected": -0.47364428639411926, + "logps/chosen": -0.008763710036873817, + "logps/rejected": -2.00473690032959, + "loss": 1.3867, + "nll_loss": 0.34651124477386475, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008763709920458496, + "rewards/margins": 0.1995973289012909, + "rewards/rejected": -0.20047369599342346, + "step": 5722 + }, + { + "epoch": 3.9578146611341634, + "grad_norm": 8.607810020446777, + "learning_rate": 3.356769632703243e-05, + "log_odds_chosen": 9.193594932556152, + "log_odds_ratio": -0.0009846854954957962, + "logits/chosen": -0.5531086325645447, + "logits/rejected": -0.6834915280342102, + "logps/chosen": -0.0011036059586331248, + "logps/rejected": -1.6625056266784668, + "loss": 1.0703, + "nll_loss": 0.26746901869773865, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000110360597318504, + "rewards/margins": 0.16614019870758057, + "rewards/rejected": -0.1662505567073822, + "step": 5723 + }, + { + "epoch": 3.95850622406639, + "grad_norm": 11.07143497467041, + "learning_rate": 3.3563854310742276e-05, + "log_odds_chosen": 8.199675559997559, + "log_odds_ratio": -0.015890540555119514, + "logits/chosen": -0.3973831832408905, + "logits/rejected": -0.4261188507080078, + "logps/chosen": -0.19782760739326477, + "logps/rejected": -1.8517847061157227, + "loss": 2.1722, + "nll_loss": 0.5414702892303467, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.019782761111855507, + "rewards/margins": 0.16539573669433594, + "rewards/rejected": -0.1851784884929657, + "step": 5724 + }, + { + "epoch": 3.959197786998617, + "grad_norm": 8.371910095214844, + "learning_rate": 3.356001229445213e-05, + "log_odds_chosen": 8.336164474487305, + "log_odds_ratio": -0.07454461604356766, + "logits/chosen": -0.5055246353149414, + "logits/rejected": -0.5621579885482788, + "logps/chosen": -0.014081336557865143, + "logps/rejected": -1.6171096563339233, + "loss": 1.7341, + "nll_loss": 0.42607590556144714, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014081336557865143, + "rewards/margins": 0.16030283272266388, + "rewards/rejected": -0.1617109626531601, + "step": 5725 + }, + { + "epoch": 3.959889349930844, + "grad_norm": 7.506570816040039, + "learning_rate": 3.355617027816198e-05, + "log_odds_chosen": 9.852703094482422, + "log_odds_ratio": -0.00010472921712789685, + "logits/chosen": -0.14278888702392578, + "logits/rejected": -0.22829201817512512, + "logps/chosen": -0.006612797733396292, + "logps/rejected": -2.2164673805236816, + "loss": 1.1552, + "nll_loss": 0.28878581523895264, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006612797733396292, + "rewards/margins": 0.22098545730113983, + "rewards/rejected": -0.2216467261314392, + "step": 5726 + }, + { + "epoch": 3.9605809128630707, + "grad_norm": 19.204822540283203, + "learning_rate": 3.355232826187183e-05, + "log_odds_chosen": 8.902791976928711, + "log_odds_ratio": -0.004878794774413109, + "logits/chosen": -0.07866685092449188, + "logits/rejected": -0.14008517563343048, + "logps/chosen": -0.05354490503668785, + "logps/rejected": -1.7077854871749878, + "loss": 1.5674, + "nll_loss": 0.39136484265327454, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005354490131139755, + "rewards/margins": 0.16542406380176544, + "rewards/rejected": -0.1707785427570343, + "step": 5727 + }, + { + "epoch": 3.9612724757952975, + "grad_norm": 11.002792358398438, + "learning_rate": 3.3548486245581686e-05, + "log_odds_chosen": 10.120767593383789, + "log_odds_ratio": -0.00026730989338830113, + "logits/chosen": -0.6612759828567505, + "logits/rejected": -0.6622802019119263, + "logps/chosen": -0.00566504243761301, + "logps/rejected": -2.387166976928711, + "loss": 1.8902, + "nll_loss": 0.4725325107574463, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005665042554028332, + "rewards/margins": 0.2381501942873001, + "rewards/rejected": -0.2387167066335678, + "step": 5728 + }, + { + "epoch": 3.9619640387275243, + "grad_norm": 10.607829093933105, + "learning_rate": 3.354464422929153e-05, + "log_odds_chosen": 10.475570678710938, + "log_odds_ratio": -0.0001675660751061514, + "logits/chosen": -0.7578421831130981, + "logits/rejected": -0.7980519533157349, + "logps/chosen": -0.0006828827317804098, + "logps/rejected": -2.6656992435455322, + "loss": 1.8156, + "nll_loss": 0.4538804888725281, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.82882746332325e-05, + "rewards/margins": 0.26650163531303406, + "rewards/rejected": -0.26656991243362427, + "step": 5729 + }, + { + "epoch": 3.962655601659751, + "grad_norm": 9.261698722839355, + "learning_rate": 3.3540802213001384e-05, + "log_odds_chosen": 8.758027076721191, + "log_odds_ratio": -0.0015625649830326438, + "logits/chosen": -0.7957751154899597, + "logits/rejected": -0.8652870655059814, + "logps/chosen": -0.005631886888295412, + "logps/rejected": -1.8644614219665527, + "loss": 1.8505, + "nll_loss": 0.46247023344039917, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005631886888295412, + "rewards/margins": 0.18588295578956604, + "rewards/rejected": -0.1864461451768875, + "step": 5730 + }, + { + "epoch": 3.963347164591978, + "grad_norm": 14.256701469421387, + "learning_rate": 3.353696019671124e-05, + "log_odds_chosen": 9.452154159545898, + "log_odds_ratio": -0.00022387137869372964, + "logits/chosen": -0.6790257692337036, + "logits/rejected": -0.7358689308166504, + "logps/chosen": -0.00040158629417419434, + "logps/rejected": -1.465136170387268, + "loss": 1.4173, + "nll_loss": 0.3543011546134949, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0158629417419434e-05, + "rewards/margins": 0.1464734822511673, + "rewards/rejected": -0.14651362597942352, + "step": 5731 + }, + { + "epoch": 3.964038727524205, + "grad_norm": 16.448766708374023, + "learning_rate": 3.353311818042109e-05, + "log_odds_chosen": 10.256701469421387, + "log_odds_ratio": -8.063411951297894e-05, + "logits/chosen": -0.6722682118415833, + "logits/rejected": -0.681743860244751, + "logps/chosen": -0.008627118542790413, + "logps/rejected": -2.3765411376953125, + "loss": 1.8795, + "nll_loss": 0.46986570954322815, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008627119823358953, + "rewards/margins": 0.23679141700267792, + "rewards/rejected": -0.23765411972999573, + "step": 5732 + }, + { + "epoch": 3.9647302904564317, + "grad_norm": 18.156047821044922, + "learning_rate": 3.3529276164130935e-05, + "log_odds_chosen": 9.920660972595215, + "log_odds_ratio": -0.00036892094067297876, + "logits/chosen": -0.5115818977355957, + "logits/rejected": -0.5205098390579224, + "logps/chosen": -0.001742643304169178, + "logps/rejected": -1.9661567211151123, + "loss": 1.2746, + "nll_loss": 0.3186228275299072, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001742643362376839, + "rewards/margins": 0.1964414119720459, + "rewards/rejected": -0.19661568105220795, + "step": 5733 + }, + { + "epoch": 3.9654218533886585, + "grad_norm": 6.548203468322754, + "learning_rate": 3.352543414784079e-05, + "log_odds_chosen": 9.376199722290039, + "log_odds_ratio": -0.00019775544933509082, + "logits/chosen": -0.7781115174293518, + "logits/rejected": -0.8361927270889282, + "logps/chosen": -0.00040810625068843365, + "logps/rejected": -1.6837494373321533, + "loss": 1.2971, + "nll_loss": 0.3242448568344116, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.08106243412476e-05, + "rewards/margins": 0.16833411157131195, + "rewards/rejected": -0.1683749407529831, + "step": 5734 + }, + { + "epoch": 3.9661134163208853, + "grad_norm": 12.527767181396484, + "learning_rate": 3.352159213155064e-05, + "log_odds_chosen": 9.324653625488281, + "log_odds_ratio": -0.0006891811499372125, + "logits/chosen": -0.6570389270782471, + "logits/rejected": -0.6967424154281616, + "logps/chosen": -0.005305243190377951, + "logps/rejected": -2.3198208808898926, + "loss": 2.0938, + "nll_loss": 0.5233712196350098, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005305242957547307, + "rewards/margins": 0.23145155608654022, + "rewards/rejected": -0.23198208212852478, + "step": 5735 + }, + { + "epoch": 3.966804979253112, + "grad_norm": 10.369845390319824, + "learning_rate": 3.3517750115260485e-05, + "log_odds_chosen": 9.477441787719727, + "log_odds_ratio": -0.0003035986446775496, + "logits/chosen": -0.8958337903022766, + "logits/rejected": -0.9264642000198364, + "logps/chosen": -0.0002738877374213189, + "logps/rejected": -1.3348115682601929, + "loss": 1.3162, + "nll_loss": 0.32901012897491455, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.738877446972765e-05, + "rewards/margins": 0.13345377147197723, + "rewards/rejected": -0.13348117470741272, + "step": 5736 + }, + { + "epoch": 3.967496542185339, + "grad_norm": 9.416356086730957, + "learning_rate": 3.3513908098970345e-05, + "log_odds_chosen": 8.403035163879395, + "log_odds_ratio": -0.02166915126144886, + "logits/chosen": -0.4370851516723633, + "logits/rejected": -0.5026123523712158, + "logps/chosen": -0.06585156917572021, + "logps/rejected": -1.6196904182434082, + "loss": 1.4708, + "nll_loss": 0.3655407428741455, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006585157476365566, + "rewards/margins": 0.15538389980793, + "rewards/rejected": -0.16196905076503754, + "step": 5737 + }, + { + "epoch": 3.968188105117566, + "grad_norm": 9.437515258789062, + "learning_rate": 3.351006608268019e-05, + "log_odds_chosen": 8.181971549987793, + "log_odds_ratio": -0.011595248244702816, + "logits/chosen": -0.29133880138397217, + "logits/rejected": -0.3592085540294647, + "logps/chosen": -0.007398087531328201, + "logps/rejected": -1.042150855064392, + "loss": 1.8839, + "nll_loss": 0.4698105454444885, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007398087182082236, + "rewards/margins": 0.10347528010606766, + "rewards/rejected": -0.10421508550643921, + "step": 5738 + }, + { + "epoch": 3.9688796680497926, + "grad_norm": 12.901277542114258, + "learning_rate": 3.350622406639004e-05, + "log_odds_chosen": 9.17611312866211, + "log_odds_ratio": -0.048881880939006805, + "logits/chosen": -0.6290692687034607, + "logits/rejected": -0.6658339500427246, + "logps/chosen": -0.01046671625226736, + "logps/rejected": -1.2178230285644531, + "loss": 1.7287, + "nll_loss": 0.42728084325790405, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010466716485098004, + "rewards/margins": 0.12073563039302826, + "rewards/rejected": -0.12178229540586472, + "step": 5739 + }, + { + "epoch": 3.9695712309820195, + "grad_norm": 18.288244247436523, + "learning_rate": 3.3502382050099895e-05, + "log_odds_chosen": 9.014623641967773, + "log_odds_ratio": -0.05034981295466423, + "logits/chosen": -0.651118278503418, + "logits/rejected": -0.7484937310218811, + "logps/chosen": -0.005496119614690542, + "logps/rejected": -1.8427544832229614, + "loss": 1.4967, + "nll_loss": 0.3691369593143463, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005496119847521186, + "rewards/margins": 0.18372584879398346, + "rewards/rejected": -0.18427544832229614, + "step": 5740 + }, + { + "epoch": 3.9702627939142463, + "grad_norm": 7.95030403137207, + "learning_rate": 3.349854003380975e-05, + "log_odds_chosen": 9.588155746459961, + "log_odds_ratio": -0.0029957296792417765, + "logits/chosen": 0.09066282957792282, + "logits/rejected": 0.057482749223709106, + "logps/chosen": -0.0023572836071252823, + "logps/rejected": -2.472792387008667, + "loss": 1.5405, + "nll_loss": 0.38481464982032776, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00023572838108520955, + "rewards/margins": 0.24704353511333466, + "rewards/rejected": -0.24727925658226013, + "step": 5741 + }, + { + "epoch": 3.970954356846473, + "grad_norm": 8.950033187866211, + "learning_rate": 3.349469801751959e-05, + "log_odds_chosen": 9.638641357421875, + "log_odds_ratio": -0.00031053705606609583, + "logits/chosen": -0.699450671672821, + "logits/rejected": -0.7877517938613892, + "logps/chosen": -0.00048190244706347585, + "logps/rejected": -2.0894455909729004, + "loss": 1.1709, + "nll_loss": 0.29270097613334656, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.819024616153911e-05, + "rewards/margins": 0.20889636874198914, + "rewards/rejected": -0.20894454419612885, + "step": 5742 + }, + { + "epoch": 3.9716459197787, + "grad_norm": 7.144364833831787, + "learning_rate": 3.3490856001229446e-05, + "log_odds_chosen": 9.663379669189453, + "log_odds_ratio": -0.007051974069327116, + "logits/chosen": -0.503765881061554, + "logits/rejected": -0.5336912870407104, + "logps/chosen": -0.007468795869499445, + "logps/rejected": -2.4491522312164307, + "loss": 2.5254, + "nll_loss": 0.6306518316268921, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007468796102330089, + "rewards/margins": 0.24416834115982056, + "rewards/rejected": -0.2449152171611786, + "step": 5743 + }, + { + "epoch": 3.972337482710927, + "grad_norm": 7.711954116821289, + "learning_rate": 3.34870139849393e-05, + "log_odds_chosen": 10.248712539672852, + "log_odds_ratio": -0.0002956142125185579, + "logits/chosen": -0.7737203240394592, + "logits/rejected": -0.8155643939971924, + "logps/chosen": -0.0010178536176681519, + "logps/rejected": -2.6119794845581055, + "loss": 1.5636, + "nll_loss": 0.390875905752182, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010178536467719823, + "rewards/margins": 0.26109617948532104, + "rewards/rejected": -0.26119792461395264, + "step": 5744 + }, + { + "epoch": 3.9730290456431536, + "grad_norm": 9.639405250549316, + "learning_rate": 3.3483171968649144e-05, + "log_odds_chosen": 10.47792911529541, + "log_odds_ratio": -4.7168614401016384e-05, + "logits/chosen": -0.5498504042625427, + "logits/rejected": -0.5651625394821167, + "logps/chosen": -0.00014222090248949826, + "logps/rejected": -1.7951178550720215, + "loss": 2.0419, + "nll_loss": 0.5104638934135437, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4222088793758303e-05, + "rewards/margins": 0.17949756979942322, + "rewards/rejected": -0.17951178550720215, + "step": 5745 + }, + { + "epoch": 3.9737206085753805, + "grad_norm": 11.414177894592285, + "learning_rate": 3.3479329952359e-05, + "log_odds_chosen": 11.441179275512695, + "log_odds_ratio": -1.4189552530297078e-05, + "logits/chosen": -0.11674871295690536, + "logits/rejected": -0.2236751914024353, + "logps/chosen": -0.00010913712321780622, + "logps/rejected": -2.2911489009857178, + "loss": 1.3601, + "nll_loss": 0.3400111794471741, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0913712685578503e-05, + "rewards/margins": 0.22910398244857788, + "rewards/rejected": -0.22911489009857178, + "step": 5746 + }, + { + "epoch": 3.9744121715076073, + "grad_norm": 7.950533866882324, + "learning_rate": 3.347548793606885e-05, + "log_odds_chosen": 9.580647468566895, + "log_odds_ratio": -0.0008307815878652036, + "logits/chosen": -0.15887480974197388, + "logits/rejected": -0.2429768145084381, + "logps/chosen": -0.0018538651056587696, + "logps/rejected": -1.919063925743103, + "loss": 0.8787, + "nll_loss": 0.21959391236305237, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018538651056587696, + "rewards/margins": 0.19172099232673645, + "rewards/rejected": -0.1919063925743103, + "step": 5747 + }, + { + "epoch": 3.975103734439834, + "grad_norm": 14.538896560668945, + "learning_rate": 3.34716459197787e-05, + "log_odds_chosen": 9.95122241973877, + "log_odds_ratio": -9.063062316272408e-05, + "logits/chosen": -0.2590225338935852, + "logits/rejected": -0.36000490188598633, + "logps/chosen": -0.0005485797300934792, + "logps/rejected": -2.2005417346954346, + "loss": 1.9276, + "nll_loss": 0.4818934202194214, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4857977374922484e-05, + "rewards/margins": 0.21999932825565338, + "rewards/rejected": -0.22005417943000793, + "step": 5748 + }, + { + "epoch": 3.975795297372061, + "grad_norm": 7.641196250915527, + "learning_rate": 3.3467803903488554e-05, + "log_odds_chosen": 10.689547538757324, + "log_odds_ratio": -3.348453901708126e-05, + "logits/chosen": -0.6348323225975037, + "logits/rejected": -0.5878068208694458, + "logps/chosen": -0.00016802028403617442, + "logps/rejected": -1.9155058860778809, + "loss": 1.0619, + "nll_loss": 0.2654733657836914, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6802028767415322e-05, + "rewards/margins": 0.19153380393981934, + "rewards/rejected": -0.191550612449646, + "step": 5749 + }, + { + "epoch": 3.9764868603042878, + "grad_norm": 12.81608772277832, + "learning_rate": 3.3463961887198406e-05, + "log_odds_chosen": 9.441143035888672, + "log_odds_ratio": -0.007615984883159399, + "logits/chosen": -0.3101378083229065, + "logits/rejected": -0.30015456676483154, + "logps/chosen": -0.03704637289047241, + "logps/rejected": -2.1652421951293945, + "loss": 1.1186, + "nll_loss": 0.2788885831832886, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003704637521877885, + "rewards/margins": 0.21281959116458893, + "rewards/rejected": -0.21652421355247498, + "step": 5750 + }, + { + "epoch": 3.9771784232365146, + "grad_norm": 7.47943115234375, + "learning_rate": 3.346011987090825e-05, + "log_odds_chosen": 10.377918243408203, + "log_odds_ratio": -0.00010015325096901506, + "logits/chosen": -0.5926077961921692, + "logits/rejected": -0.5092741250991821, + "logps/chosen": -0.0001969319419004023, + "logps/rejected": -1.7772661447525024, + "loss": 1.2954, + "nll_loss": 0.32384994626045227, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.969319419004023e-05, + "rewards/margins": 0.17770692706108093, + "rewards/rejected": -0.1777266263961792, + "step": 5751 + }, + { + "epoch": 3.9778699861687414, + "grad_norm": 9.103669166564941, + "learning_rate": 3.3456277854618104e-05, + "log_odds_chosen": 10.58312702178955, + "log_odds_ratio": -5.431749377748929e-05, + "logits/chosen": -0.5314761996269226, + "logits/rejected": -0.5825152397155762, + "logps/chosen": -0.003828480839729309, + "logps/rejected": -2.600499153137207, + "loss": 1.9518, + "nll_loss": 0.4879487156867981, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003828480839729309, + "rewards/margins": 0.2596670985221863, + "rewards/rejected": -0.2600499391555786, + "step": 5752 + }, + { + "epoch": 3.9785615491009683, + "grad_norm": 7.952563285827637, + "learning_rate": 3.345243583832796e-05, + "log_odds_chosen": 8.267032623291016, + "log_odds_ratio": -0.001137011917307973, + "logits/chosen": -0.31987977027893066, + "logits/rejected": -0.3656128942966461, + "logps/chosen": -0.007970752194523811, + "logps/rejected": -1.5903152227401733, + "loss": 1.3859, + "nll_loss": 0.3463681936264038, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007970751030370593, + "rewards/margins": 0.15823444724082947, + "rewards/rejected": -0.15903152525424957, + "step": 5753 + }, + { + "epoch": 3.979253112033195, + "grad_norm": 15.227071762084961, + "learning_rate": 3.34485938220378e-05, + "log_odds_chosen": 9.630619049072266, + "log_odds_ratio": -0.00011478106898721308, + "logits/chosen": -0.6500551104545593, + "logits/rejected": -0.6647905707359314, + "logps/chosen": -0.0004875340382568538, + "logps/rejected": -1.898605465888977, + "loss": 1.7484, + "nll_loss": 0.43709835410118103, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8753405280876905e-05, + "rewards/margins": 0.1898117959499359, + "rewards/rejected": -0.18986055254936218, + "step": 5754 + }, + { + "epoch": 3.979944674965422, + "grad_norm": 10.086196899414062, + "learning_rate": 3.344475180574766e-05, + "log_odds_chosen": 10.998220443725586, + "log_odds_ratio": -2.8250318791833706e-05, + "logits/chosen": -0.5312788486480713, + "logits/rejected": -0.5879403352737427, + "logps/chosen": -9.712098108138889e-05, + "logps/rejected": -1.8667224645614624, + "loss": 1.5129, + "nll_loss": 0.37821367383003235, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.712097380543128e-06, + "rewards/margins": 0.18666253983974457, + "rewards/rejected": -0.18667224049568176, + "step": 5755 + }, + { + "epoch": 3.9806362378976488, + "grad_norm": 11.90705394744873, + "learning_rate": 3.344090978945751e-05, + "log_odds_chosen": 10.894936561584473, + "log_odds_ratio": -5.3509866120293736e-05, + "logits/chosen": -0.4861408770084381, + "logits/rejected": -0.5648641586303711, + "logps/chosen": -0.00018957615247927606, + "logps/rejected": -1.8090975284576416, + "loss": 1.1082, + "nll_loss": 0.27704793214797974, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.895761670311913e-05, + "rewards/margins": 0.18089079856872559, + "rewards/rejected": -0.18090975284576416, + "step": 5756 + }, + { + "epoch": 3.9813278008298756, + "grad_norm": 5.89326286315918, + "learning_rate": 3.343706777316736e-05, + "log_odds_chosen": 10.03038215637207, + "log_odds_ratio": -0.0003101792826782912, + "logits/chosen": -0.4241410791873932, + "logits/rejected": -0.48782768845558167, + "logps/chosen": -0.0013601405080407858, + "logps/rejected": -2.202002763748169, + "loss": 1.3956, + "nll_loss": 0.34887629747390747, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013601405953522772, + "rewards/margins": 0.22006428241729736, + "rewards/rejected": -0.2202003002166748, + "step": 5757 + }, + { + "epoch": 3.9820193637621024, + "grad_norm": 9.086535453796387, + "learning_rate": 3.343322575687721e-05, + "log_odds_chosen": 11.076787948608398, + "log_odds_ratio": -4.0664192056283355e-05, + "logits/chosen": -0.7246346473693848, + "logits/rejected": -0.7437798976898193, + "logps/chosen": -0.00011383210949134082, + "logps/rejected": -1.774545431137085, + "loss": 1.4516, + "nll_loss": 0.3628990650177002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1383212040527724e-05, + "rewards/margins": 0.17744316160678864, + "rewards/rejected": -0.17745453119277954, + "step": 5758 + }, + { + "epoch": 3.9827109266943292, + "grad_norm": 10.311131477355957, + "learning_rate": 3.3429383740587065e-05, + "log_odds_chosen": 9.026152610778809, + "log_odds_ratio": -0.00034249460441060364, + "logits/chosen": -0.6311862468719482, + "logits/rejected": -0.58100426197052, + "logps/chosen": -0.0006225037504918873, + "logps/rejected": -1.2492806911468506, + "loss": 1.3607, + "nll_loss": 0.34014812111854553, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.225037213880569e-05, + "rewards/margins": 0.12486580014228821, + "rewards/rejected": -0.1249280646443367, + "step": 5759 + }, + { + "epoch": 3.983402489626556, + "grad_norm": 9.150677680969238, + "learning_rate": 3.342554172429691e-05, + "log_odds_chosen": 7.137037754058838, + "log_odds_ratio": -0.1796838790178299, + "logits/chosen": -0.38474273681640625, + "logits/rejected": -0.26010262966156006, + "logps/chosen": -0.03558574989438057, + "logps/rejected": -1.1326240301132202, + "loss": 1.5331, + "nll_loss": 0.36530226469039917, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0035585747100412846, + "rewards/margins": 0.10970382392406464, + "rewards/rejected": -0.11326240003108978, + "step": 5760 + }, + { + "epoch": 3.984094052558783, + "grad_norm": 7.6114630699157715, + "learning_rate": 3.342169970800676e-05, + "log_odds_chosen": 9.663246154785156, + "log_odds_ratio": -0.00026955429348163307, + "logits/chosen": -0.3328566551208496, + "logits/rejected": -0.32750049233436584, + "logps/chosen": -0.0006487221107818186, + "logps/rejected": -1.4827356338500977, + "loss": 1.2445, + "nll_loss": 0.3111001253128052, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.487221253337339e-05, + "rewards/margins": 0.14820870757102966, + "rewards/rejected": -0.14827357232570648, + "step": 5761 + }, + { + "epoch": 3.9847856154910097, + "grad_norm": 9.025335311889648, + "learning_rate": 3.3417857691716615e-05, + "log_odds_chosen": 9.366215705871582, + "log_odds_ratio": -0.00027741739177145064, + "logits/chosen": -0.6465136408805847, + "logits/rejected": -0.5712451934814453, + "logps/chosen": -0.0003560830373317003, + "logps/rejected": -1.2528438568115234, + "loss": 1.9347, + "nll_loss": 0.4836418032646179, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.560830373317003e-05, + "rewards/margins": 0.12524878978729248, + "rewards/rejected": -0.12528440356254578, + "step": 5762 + }, + { + "epoch": 3.9854771784232366, + "grad_norm": 6.303955554962158, + "learning_rate": 3.341401567542646e-05, + "log_odds_chosen": 10.434503555297852, + "log_odds_ratio": -0.000102290025097318, + "logits/chosen": -0.562961757183075, + "logits/rejected": -0.673736572265625, + "logps/chosen": -0.0002883031265810132, + "logps/rejected": -2.2828972339630127, + "loss": 0.9964, + "nll_loss": 0.24909597635269165, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.883031265810132e-05, + "rewards/margins": 0.22826090455055237, + "rewards/rejected": -0.22828972339630127, + "step": 5763 + }, + { + "epoch": 3.9861687413554634, + "grad_norm": 12.010201454162598, + "learning_rate": 3.341017365913632e-05, + "log_odds_chosen": 7.408069610595703, + "log_odds_ratio": -0.025262746959924698, + "logits/chosen": -0.7419548034667969, + "logits/rejected": -0.7320268154144287, + "logps/chosen": -0.00811043381690979, + "logps/rejected": -1.610243797302246, + "loss": 1.5759, + "nll_loss": 0.3914604187011719, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008110433700494468, + "rewards/margins": 0.16021332144737244, + "rewards/rejected": -0.16102437674999237, + "step": 5764 + }, + { + "epoch": 3.9868603042876902, + "grad_norm": 6.950577259063721, + "learning_rate": 3.3406331642846166e-05, + "log_odds_chosen": 8.224448204040527, + "log_odds_ratio": -0.007369033992290497, + "logits/chosen": -0.808485209941864, + "logits/rejected": -0.788963258266449, + "logps/chosen": -0.02338700369000435, + "logps/rejected": -2.2966105937957764, + "loss": 1.6337, + "nll_loss": 0.4076803922653198, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0023387002293020487, + "rewards/margins": 0.22732235491275787, + "rewards/rejected": -0.22966104745864868, + "step": 5765 + }, + { + "epoch": 3.987551867219917, + "grad_norm": 7.650085926055908, + "learning_rate": 3.340248962655602e-05, + "log_odds_chosen": 10.956058502197266, + "log_odds_ratio": -4.377203731564805e-05, + "logits/chosen": -0.4419000744819641, + "logits/rejected": -0.5032147169113159, + "logps/chosen": -0.00018789824389386922, + "logps/rejected": -2.5352671146392822, + "loss": 0.8885, + "nll_loss": 0.2221318781375885, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.878982402558904e-05, + "rewards/margins": 0.25350794196128845, + "rewards/rejected": -0.2535267472267151, + "step": 5766 + }, + { + "epoch": 3.988243430152144, + "grad_norm": 8.995047569274902, + "learning_rate": 3.339864761026587e-05, + "log_odds_chosen": 9.685747146606445, + "log_odds_ratio": -0.00021190382540225983, + "logits/chosen": -0.6376558542251587, + "logits/rejected": -0.6977887749671936, + "logps/chosen": -0.000918483070563525, + "logps/rejected": -1.987686038017273, + "loss": 1.1773, + "nll_loss": 0.2943156957626343, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.184830560116097e-05, + "rewards/margins": 0.19867676496505737, + "rewards/rejected": -0.19876858592033386, + "step": 5767 + }, + { + "epoch": 3.9889349930843707, + "grad_norm": 9.66429328918457, + "learning_rate": 3.339480559397572e-05, + "log_odds_chosen": 9.874263763427734, + "log_odds_ratio": -0.03149972856044769, + "logits/chosen": -0.5896898508071899, + "logits/rejected": -0.698781430721283, + "logps/chosen": -0.014882597140967846, + "logps/rejected": -2.3216664791107178, + "loss": 2.0424, + "nll_loss": 0.5074531435966492, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014882597606629133, + "rewards/margins": 0.23067839443683624, + "rewards/rejected": -0.23216666281223297, + "step": 5768 + }, + { + "epoch": 3.9896265560165975, + "grad_norm": 10.384910583496094, + "learning_rate": 3.339096357768557e-05, + "log_odds_chosen": 10.813526153564453, + "log_odds_ratio": -2.9791222914354876e-05, + "logits/chosen": -0.48536092042922974, + "logits/rejected": -0.4883587062358856, + "logps/chosen": -0.0001726519549265504, + "logps/rejected": -2.2120351791381836, + "loss": 1.0716, + "nll_loss": 0.26789313554763794, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.72651962202508e-05, + "rewards/margins": 0.22118626534938812, + "rewards/rejected": -0.2212035059928894, + "step": 5769 + }, + { + "epoch": 3.9903181189488244, + "grad_norm": 224.6512908935547, + "learning_rate": 3.338712156139542e-05, + "log_odds_chosen": 6.486282825469971, + "log_odds_ratio": -1.4710427522659302, + "logits/chosen": -0.4123340845108032, + "logits/rejected": -0.39311158657073975, + "logps/chosen": -0.2972959578037262, + "logps/rejected": -1.7887805700302124, + "loss": 2.2346, + "nll_loss": 0.411540150642395, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.02972959727048874, + "rewards/margins": 0.14914844930171967, + "rewards/rejected": -0.1788780689239502, + "step": 5770 + }, + { + "epoch": 3.991009681881051, + "grad_norm": 7.9631876945495605, + "learning_rate": 3.3383279545105273e-05, + "log_odds_chosen": 10.068863868713379, + "log_odds_ratio": -8.86126363184303e-05, + "logits/chosen": -0.6509562134742737, + "logits/rejected": -0.7054494023323059, + "logps/chosen": -0.0013785153860226274, + "logps/rejected": -1.69233238697052, + "loss": 0.8035, + "nll_loss": 0.2008584439754486, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013785154442302883, + "rewards/margins": 0.16909539699554443, + "rewards/rejected": -0.16923324763774872, + "step": 5771 + }, + { + "epoch": 3.991701244813278, + "grad_norm": 16.560514450073242, + "learning_rate": 3.337943752881512e-05, + "log_odds_chosen": 9.463789939880371, + "log_odds_ratio": -0.005806229077279568, + "logits/chosen": -0.5243285298347473, + "logits/rejected": -0.5927683711051941, + "logps/chosen": -0.0042679328471422195, + "logps/rejected": -2.5110771656036377, + "loss": 2.4263, + "nll_loss": 0.6059852242469788, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00042679329635575414, + "rewards/margins": 0.25068092346191406, + "rewards/rejected": -0.25110769271850586, + "step": 5772 + }, + { + "epoch": 3.992392807745505, + "grad_norm": 14.416969299316406, + "learning_rate": 3.337559551252498e-05, + "log_odds_chosen": 10.008670806884766, + "log_odds_ratio": -0.00036832113983109593, + "logits/chosen": -0.652914822101593, + "logits/rejected": -0.7095764875411987, + "logps/chosen": -0.0006406122702173889, + "logps/rejected": -1.8933131694793701, + "loss": 1.6654, + "nll_loss": 0.4163016974925995, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.406122702173889e-05, + "rewards/margins": 0.18926726281642914, + "rewards/rejected": -0.1893313229084015, + "step": 5773 + }, + { + "epoch": 3.9930843706777317, + "grad_norm": 11.007437705993652, + "learning_rate": 3.3371753496234824e-05, + "log_odds_chosen": 9.9483642578125, + "log_odds_ratio": -0.055682096630334854, + "logits/chosen": -0.6526095867156982, + "logits/rejected": -0.6657837629318237, + "logps/chosen": -0.013267319649457932, + "logps/rejected": -2.245244026184082, + "loss": 1.2503, + "nll_loss": 0.30701395869255066, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013267318718135357, + "rewards/margins": 0.22319768369197845, + "rewards/rejected": -0.22452440857887268, + "step": 5774 + }, + { + "epoch": 3.9937759336099585, + "grad_norm": 11.40454387664795, + "learning_rate": 3.3367911479944676e-05, + "log_odds_chosen": 9.399166107177734, + "log_odds_ratio": -0.0002439269155729562, + "logits/chosen": -0.3749449849128723, + "logits/rejected": -0.46598711609840393, + "logps/chosen": -0.0005690623656846583, + "logps/rejected": -1.5011813640594482, + "loss": 0.9484, + "nll_loss": 0.23707221448421478, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6906239478848875e-05, + "rewards/margins": 0.15006123483181, + "rewards/rejected": -0.1501181423664093, + "step": 5775 + }, + { + "epoch": 3.9944674965421854, + "grad_norm": 11.258934020996094, + "learning_rate": 3.336406946365453e-05, + "log_odds_chosen": 9.372721672058105, + "log_odds_ratio": -0.00011832044401671737, + "logits/chosen": -0.5779632925987244, + "logits/rejected": -0.5768287181854248, + "logps/chosen": -0.00047448737313970923, + "logps/rejected": -1.6736748218536377, + "loss": 1.5988, + "nll_loss": 0.39969536662101746, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7448738769162446e-05, + "rewards/margins": 0.16732004284858704, + "rewards/rejected": -0.16736748814582825, + "step": 5776 + }, + { + "epoch": 3.995159059474412, + "grad_norm": 13.851411819458008, + "learning_rate": 3.336022744736438e-05, + "log_odds_chosen": 8.354120254516602, + "log_odds_ratio": -0.3387634754180908, + "logits/chosen": -0.58873051404953, + "logits/rejected": -0.6785441637039185, + "logps/chosen": -0.0636986568570137, + "logps/rejected": -1.902686595916748, + "loss": 1.4361, + "nll_loss": 0.3251374363899231, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00636986643075943, + "rewards/margins": 0.18389879167079926, + "rewards/rejected": -0.19026866555213928, + "step": 5777 + }, + { + "epoch": 3.995850622406639, + "grad_norm": 8.498478889465332, + "learning_rate": 3.335638543107423e-05, + "log_odds_chosen": 8.823246002197266, + "log_odds_ratio": -0.04847763851284981, + "logits/chosen": -0.5457412004470825, + "logits/rejected": -0.6254887580871582, + "logps/chosen": -0.009564734995365143, + "logps/rejected": -1.7078287601470947, + "loss": 1.5128, + "nll_loss": 0.3733523190021515, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009564735228195786, + "rewards/margins": 0.1698264181613922, + "rewards/rejected": -0.1707828789949417, + "step": 5778 + }, + { + "epoch": 3.996542185338866, + "grad_norm": 10.335137367248535, + "learning_rate": 3.335254341478408e-05, + "log_odds_chosen": 10.140752792358398, + "log_odds_ratio": -0.00010038846812676638, + "logits/chosen": -0.5030477046966553, + "logits/rejected": -0.5023469924926758, + "logps/chosen": -0.0018512359820306301, + "logps/rejected": -2.288264274597168, + "loss": 1.9676, + "nll_loss": 0.4918965995311737, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018512360111344606, + "rewards/margins": 0.2286413162946701, + "rewards/rejected": -0.22882644832134247, + "step": 5779 + }, + { + "epoch": 3.9972337482710927, + "grad_norm": 6.054286003112793, + "learning_rate": 3.334870139849393e-05, + "log_odds_chosen": 7.87191915512085, + "log_odds_ratio": -0.12952713668346405, + "logits/chosen": -0.4336947500705719, + "logits/rejected": -0.5151492953300476, + "logps/chosen": -0.01956653967499733, + "logps/rejected": -1.6524310111999512, + "loss": 0.9637, + "nll_loss": 0.2279704511165619, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0019566540140658617, + "rewards/margins": 0.16328644752502441, + "rewards/rejected": -0.16524310410022736, + "step": 5780 + }, + { + "epoch": 3.9979253112033195, + "grad_norm": 12.384289741516113, + "learning_rate": 3.334485938220378e-05, + "log_odds_chosen": 9.222118377685547, + "log_odds_ratio": -0.0014353328151628375, + "logits/chosen": -0.7566649317741394, + "logits/rejected": -0.8445166349411011, + "logps/chosen": -0.023272883147001266, + "logps/rejected": -2.233126163482666, + "loss": 1.6794, + "nll_loss": 0.41969895362854004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002327288268133998, + "rewards/margins": 0.2209853231906891, + "rewards/rejected": -0.2233126163482666, + "step": 5781 + }, + { + "epoch": 3.9986168741355463, + "grad_norm": 11.377348899841309, + "learning_rate": 3.334101736591364e-05, + "log_odds_chosen": 10.25976848602295, + "log_odds_ratio": -0.00017066083091776818, + "logits/chosen": -0.3861597180366516, + "logits/rejected": -0.5417386293411255, + "logps/chosen": -0.0005861219833604991, + "logps/rejected": -2.005835771560669, + "loss": 1.5664, + "nll_loss": 0.3915861248970032, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.861219688085839e-05, + "rewards/margins": 0.2005249559879303, + "rewards/rejected": -0.2005835920572281, + "step": 5782 + }, + { + "epoch": 3.999308437067773, + "grad_norm": 18.3858585357666, + "learning_rate": 3.333717534962348e-05, + "log_odds_chosen": 10.059211730957031, + "log_odds_ratio": -0.0001356978464173153, + "logits/chosen": -0.836675763130188, + "logits/rejected": -0.828482985496521, + "logps/chosen": -0.00039792529423721135, + "logps/rejected": -2.2047948837280273, + "loss": 1.9703, + "nll_loss": 0.49256181716918945, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.979253233410418e-05, + "rewards/margins": 0.22043968737125397, + "rewards/rejected": -0.22047948837280273, + "step": 5783 + }, + { + "epoch": 4.0, + "grad_norm": 19.170635223388672, + "learning_rate": 3.3333333333333335e-05, + "log_odds_chosen": 8.822633743286133, + "log_odds_ratio": -0.1293601244688034, + "logits/chosen": -0.4684547185897827, + "logits/rejected": -0.5892400741577148, + "logps/chosen": -0.019132127985358238, + "logps/rejected": -1.3569003343582153, + "loss": 2.0622, + "nll_loss": 0.502617597579956, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0019132127054035664, + "rewards/margins": 0.13377681374549866, + "rewards/rejected": -0.13569003343582153, + "step": 5784 + }, + { + "epoch": 4.000691562932227, + "grad_norm": 7.128642559051514, + "learning_rate": 3.332949131704319e-05, + "log_odds_chosen": 9.820199966430664, + "log_odds_ratio": -0.000142527642310597, + "logits/chosen": -0.2510155141353607, + "logits/rejected": -0.3180846571922302, + "logps/chosen": -0.00030701240757480264, + "logps/rejected": -1.6696772575378418, + "loss": 0.9549, + "nll_loss": 0.23871463537216187, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0701241485076025e-05, + "rewards/margins": 0.16693702340126038, + "rewards/rejected": -0.1669677346944809, + "step": 5785 + }, + { + "epoch": 4.001383125864454, + "grad_norm": 6.740342617034912, + "learning_rate": 3.332564930075304e-05, + "log_odds_chosen": 10.16427993774414, + "log_odds_ratio": -0.0003346512676216662, + "logits/chosen": -0.3441002666950226, + "logits/rejected": -0.3391599655151367, + "logps/chosen": -0.000762086478061974, + "logps/rejected": -2.062274217605591, + "loss": 1.1732, + "nll_loss": 0.29325664043426514, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.620864198543131e-05, + "rewards/margins": 0.20615121722221375, + "rewards/rejected": -0.20622742176055908, + "step": 5786 + }, + { + "epoch": 4.0020746887966805, + "grad_norm": 9.397114753723145, + "learning_rate": 3.3321807284462885e-05, + "log_odds_chosen": 9.37049674987793, + "log_odds_ratio": -0.0008974755764938891, + "logits/chosen": -0.21732686460018158, + "logits/rejected": -0.30178898572921753, + "logps/chosen": -0.003378002205863595, + "logps/rejected": -2.089550018310547, + "loss": 1.0666, + "nll_loss": 0.2665643095970154, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003378002729732543, + "rewards/margins": 0.2086172103881836, + "rewards/rejected": -0.20895501971244812, + "step": 5787 + }, + { + "epoch": 4.002766251728907, + "grad_norm": 6.804192066192627, + "learning_rate": 3.331796526817274e-05, + "log_odds_chosen": 8.74463176727295, + "log_odds_ratio": -0.008925949223339558, + "logits/chosen": 0.05974145978689194, + "logits/rejected": -0.018502473831176758, + "logps/chosen": -0.0030243650544434786, + "logps/rejected": -1.566483974456787, + "loss": 1.4798, + "nll_loss": 0.36905479431152344, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00030243649962358177, + "rewards/margins": 0.15634596347808838, + "rewards/rejected": -0.1566484123468399, + "step": 5788 + }, + { + "epoch": 4.003457814661134, + "grad_norm": 7.862604141235352, + "learning_rate": 3.331412325188259e-05, + "log_odds_chosen": 10.536462783813477, + "log_odds_ratio": -8.758976764511317e-05, + "logits/chosen": -0.43617433309555054, + "logits/rejected": -0.4724277853965759, + "logps/chosen": -0.00041452451841905713, + "logps/rejected": -2.274538040161133, + "loss": 1.2811, + "nll_loss": 0.3202652633190155, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1452447476331145e-05, + "rewards/margins": 0.2274123728275299, + "rewards/rejected": -0.2274537980556488, + "step": 5789 + }, + { + "epoch": 4.004149377593361, + "grad_norm": 6.825366973876953, + "learning_rate": 3.3310281235592436e-05, + "log_odds_chosen": 9.179971694946289, + "log_odds_ratio": -0.02155376970767975, + "logits/chosen": -0.5471166968345642, + "logits/rejected": -0.5032497644424438, + "logps/chosen": -0.012416576966643333, + "logps/rejected": -1.6312053203582764, + "loss": 1.114, + "nll_loss": 0.276351660490036, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012416577665135264, + "rewards/margins": 0.16187886893749237, + "rewards/rejected": -0.16312053799629211, + "step": 5790 + }, + { + "epoch": 4.004840940525588, + "grad_norm": 7.924466609954834, + "learning_rate": 3.3306439219302295e-05, + "log_odds_chosen": 10.494912147521973, + "log_odds_ratio": -8.59973588376306e-05, + "logits/chosen": -0.2867485582828522, + "logits/rejected": -0.3789327144622803, + "logps/chosen": -0.00026885856641456485, + "logps/rejected": -1.7884368896484375, + "loss": 1.0137, + "nll_loss": 0.2534157931804657, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6885860279435292e-05, + "rewards/margins": 0.1788167953491211, + "rewards/rejected": -0.178843691945076, + "step": 5791 + }, + { + "epoch": 4.005532503457815, + "grad_norm": 5.553402900695801, + "learning_rate": 3.330259720301214e-05, + "log_odds_chosen": 10.350786209106445, + "log_odds_ratio": -6.509172089863569e-05, + "logits/chosen": -0.7506746649742126, + "logits/rejected": -0.7882779836654663, + "logps/chosen": -0.0001626495795790106, + "logps/rejected": -1.4804134368896484, + "loss": 1.3166, + "nll_loss": 0.32914409041404724, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6264959413092583e-05, + "rewards/margins": 0.14802506566047668, + "rewards/rejected": -0.14804133772850037, + "step": 5792 + }, + { + "epoch": 4.0062240663900415, + "grad_norm": 7.096441745758057, + "learning_rate": 3.329875518672199e-05, + "log_odds_chosen": 10.433094024658203, + "log_odds_ratio": -4.2833940824493766e-05, + "logits/chosen": -0.7019106149673462, + "logits/rejected": -0.7552410364151001, + "logps/chosen": -0.0001473083975724876, + "logps/rejected": -1.739577293395996, + "loss": 1.2298, + "nll_loss": 0.30744898319244385, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4730839211551938e-05, + "rewards/margins": 0.17394298315048218, + "rewards/rejected": -0.1739577353000641, + "step": 5793 + }, + { + "epoch": 4.006915629322268, + "grad_norm": 11.629804611206055, + "learning_rate": 3.3294913170431846e-05, + "log_odds_chosen": 9.714581489562988, + "log_odds_ratio": -0.004458730109035969, + "logits/chosen": -0.5414714813232422, + "logits/rejected": -0.5641164183616638, + "logps/chosen": -0.0026589545886963606, + "logps/rejected": -1.81493079662323, + "loss": 1.8955, + "nll_loss": 0.47342973947525024, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00026589547633193433, + "rewards/margins": 0.1812271922826767, + "rewards/rejected": -0.18149308860301971, + "step": 5794 + }, + { + "epoch": 4.007607192254495, + "grad_norm": 9.95302963256836, + "learning_rate": 3.32910711541417e-05, + "log_odds_chosen": 9.88841438293457, + "log_odds_ratio": -0.0001660238776821643, + "logits/chosen": -0.5515083074569702, + "logits/rejected": -0.6294294595718384, + "logps/chosen": -0.0006042959867045283, + "logps/rejected": -1.9443670511245728, + "loss": 1.36, + "nll_loss": 0.33998462557792664, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.042960376362316e-05, + "rewards/margins": 0.19437627494335175, + "rewards/rejected": -0.1944366991519928, + "step": 5795 + }, + { + "epoch": 4.008298755186722, + "grad_norm": 5.845525741577148, + "learning_rate": 3.3287229137851544e-05, + "log_odds_chosen": 9.629524230957031, + "log_odds_ratio": -0.0003176818136125803, + "logits/chosen": -0.6585010290145874, + "logits/rejected": -0.6973461508750916, + "logps/chosen": -0.005589592270553112, + "logps/rejected": -2.166471242904663, + "loss": 0.9621, + "nll_loss": 0.2404988408088684, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005589592037722468, + "rewards/margins": 0.2160881757736206, + "rewards/rejected": -0.21664713323116302, + "step": 5796 + }, + { + "epoch": 4.008990318118949, + "grad_norm": 5.421998500823975, + "learning_rate": 3.3283387121561396e-05, + "log_odds_chosen": 9.164302825927734, + "log_odds_ratio": -0.0008697113371454179, + "logits/chosen": -0.6519193649291992, + "logits/rejected": -0.6003968119621277, + "logps/chosen": -0.01484967116266489, + "logps/rejected": -2.3859739303588867, + "loss": 2.3477, + "nll_loss": 0.5868465304374695, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014849671861156821, + "rewards/margins": 0.23711243271827698, + "rewards/rejected": -0.23859739303588867, + "step": 5797 + }, + { + "epoch": 4.009681881051176, + "grad_norm": 9.98675537109375, + "learning_rate": 3.327954510527125e-05, + "log_odds_chosen": 10.505779266357422, + "log_odds_ratio": -5.718199099646881e-05, + "logits/chosen": -0.9864379167556763, + "logits/rejected": -1.007617473602295, + "logps/chosen": -0.0009500356391072273, + "logps/rejected": -2.0847883224487305, + "loss": 1.2023, + "nll_loss": 0.3005639314651489, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.500356100033969e-05, + "rewards/margins": 0.20838382840156555, + "rewards/rejected": -0.20847883820533752, + "step": 5798 + }, + { + "epoch": 4.0103734439834025, + "grad_norm": 8.891336441040039, + "learning_rate": 3.3275703088981094e-05, + "log_odds_chosen": 9.008024215698242, + "log_odds_ratio": -0.0032351433765143156, + "logits/chosen": -0.703883171081543, + "logits/rejected": -0.689159631729126, + "logps/chosen": -0.0022129842545837164, + "logps/rejected": -1.6951110363006592, + "loss": 0.9387, + "nll_loss": 0.23434747755527496, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00022129842545837164, + "rewards/margins": 0.16928979754447937, + "rewards/rejected": -0.1695111095905304, + "step": 5799 + }, + { + "epoch": 4.011065006915629, + "grad_norm": 6.8816142082214355, + "learning_rate": 3.3271861072690954e-05, + "log_odds_chosen": 9.651518821716309, + "log_odds_ratio": -0.0003201818326488137, + "logits/chosen": -0.559990644454956, + "logits/rejected": -0.6841223239898682, + "logps/chosen": -0.002246101386845112, + "logps/rejected": -1.9939563274383545, + "loss": 1.3129, + "nll_loss": 0.32818740606307983, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00022461013577412814, + "rewards/margins": 0.1991710364818573, + "rewards/rejected": -0.19939564168453217, + "step": 5800 + }, + { + "epoch": 4.011756569847856, + "grad_norm": 6.750901222229004, + "learning_rate": 3.32680190564008e-05, + "log_odds_chosen": 8.320624351501465, + "log_odds_ratio": -0.0023899758234620094, + "logits/chosen": -0.9084970355033875, + "logits/rejected": -0.9268815517425537, + "logps/chosen": -0.0070159039460122585, + "logps/rejected": -1.546889066696167, + "loss": 1.1501, + "nll_loss": 0.2872742712497711, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007015903829596937, + "rewards/margins": 0.153987318277359, + "rewards/rejected": -0.15468890964984894, + "step": 5801 + }, + { + "epoch": 4.012448132780083, + "grad_norm": 13.624855995178223, + "learning_rate": 3.326417704011065e-05, + "log_odds_chosen": 10.405290603637695, + "log_odds_ratio": -4.9031819798983634e-05, + "logits/chosen": -0.9404025077819824, + "logits/rejected": -1.0046405792236328, + "logps/chosen": -0.00026822343352250755, + "logps/rejected": -1.8819442987442017, + "loss": 1.0397, + "nll_loss": 0.25991424918174744, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6822342988452874e-05, + "rewards/margins": 0.18816760182380676, + "rewards/rejected": -0.1881944239139557, + "step": 5802 + }, + { + "epoch": 4.01313969571231, + "grad_norm": 7.276597499847412, + "learning_rate": 3.3260335023820504e-05, + "log_odds_chosen": 9.873104095458984, + "log_odds_ratio": -9.630187560105696e-05, + "logits/chosen": -0.5927930474281311, + "logits/rejected": -0.6485029458999634, + "logps/chosen": -0.0012197827454656363, + "logps/rejected": -2.201451063156128, + "loss": 1.2459, + "nll_loss": 0.3114704191684723, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001219782789121382, + "rewards/margins": 0.22002311050891876, + "rewards/rejected": -0.2201451063156128, + "step": 5803 + }, + { + "epoch": 4.013831258644537, + "grad_norm": 8.02657413482666, + "learning_rate": 3.325649300753036e-05, + "log_odds_chosen": 8.938360214233398, + "log_odds_ratio": -0.0008153109229169786, + "logits/chosen": -0.7673556804656982, + "logits/rejected": -0.8766248226165771, + "logps/chosen": -0.00693178316578269, + "logps/rejected": -2.1472220420837402, + "loss": 1.2568, + "nll_loss": 0.3141286075115204, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006931783864274621, + "rewards/margins": 0.21402904391288757, + "rewards/rejected": -0.21472221612930298, + "step": 5804 + }, + { + "epoch": 4.014522821576763, + "grad_norm": 7.6537041664123535, + "learning_rate": 3.32526509912402e-05, + "log_odds_chosen": 7.112116813659668, + "log_odds_ratio": -0.15588873624801636, + "logits/chosen": -0.3076091408729553, + "logits/rejected": -0.3353484272956848, + "logps/chosen": -0.03844211995601654, + "logps/rejected": -1.5237889289855957, + "loss": 1.308, + "nll_loss": 0.3114077150821686, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003844211809337139, + "rewards/margins": 0.14853468537330627, + "rewards/rejected": -0.1523789018392563, + "step": 5805 + }, + { + "epoch": 4.01521438450899, + "grad_norm": 7.396170616149902, + "learning_rate": 3.3248808974950055e-05, + "log_odds_chosen": 8.05859375, + "log_odds_ratio": -0.04836432635784149, + "logits/chosen": -0.5385514497756958, + "logits/rejected": -0.6036756634712219, + "logps/chosen": -0.01102085318416357, + "logps/rejected": -0.7705286145210266, + "loss": 1.4619, + "nll_loss": 0.36063531041145325, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011020853416994214, + "rewards/margins": 0.07595077902078629, + "rewards/rejected": -0.07705286890268326, + "step": 5806 + }, + { + "epoch": 4.015905947441217, + "grad_norm": 13.535194396972656, + "learning_rate": 3.324496695865991e-05, + "log_odds_chosen": 10.356252670288086, + "log_odds_ratio": -8.396595512749627e-05, + "logits/chosen": -0.6381219029426575, + "logits/rejected": -0.7172715067863464, + "logps/chosen": -0.00027148399385623634, + "logps/rejected": -1.8285762071609497, + "loss": 0.8793, + "nll_loss": 0.2198178768157959, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7148398658027872e-05, + "rewards/margins": 0.18283048272132874, + "rewards/rejected": -0.18285763263702393, + "step": 5807 + }, + { + "epoch": 4.016597510373444, + "grad_norm": 12.470870971679688, + "learning_rate": 3.324112494236975e-05, + "log_odds_chosen": 9.309225082397461, + "log_odds_ratio": -0.004393730312585831, + "logits/chosen": -0.423007607460022, + "logits/rejected": -0.48332124948501587, + "logps/chosen": -0.0026422408409416676, + "logps/rejected": -1.7035892009735107, + "loss": 1.0427, + "nll_loss": 0.26024723052978516, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002642240724526346, + "rewards/margins": 0.17009469866752625, + "rewards/rejected": -0.17035892605781555, + "step": 5808 + }, + { + "epoch": 4.017289073305671, + "grad_norm": 8.032957077026367, + "learning_rate": 3.323728292607961e-05, + "log_odds_chosen": 8.703680038452148, + "log_odds_ratio": -0.08522697538137436, + "logits/chosen": -0.41288989782333374, + "logits/rejected": -0.4818640947341919, + "logps/chosen": -0.014834209345281124, + "logps/rejected": -1.533931016921997, + "loss": 1.2859, + "nll_loss": 0.31296151876449585, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014834211906418204, + "rewards/margins": 0.15190967917442322, + "rewards/rejected": -0.15339310467243195, + "step": 5809 + }, + { + "epoch": 4.017980636237898, + "grad_norm": 5.716402053833008, + "learning_rate": 3.323344090978946e-05, + "log_odds_chosen": 8.23971176147461, + "log_odds_ratio": -0.036382004618644714, + "logits/chosen": 0.07608754932880402, + "logits/rejected": 0.08648036420345306, + "logps/chosen": -0.011090649291872978, + "logps/rejected": -1.2002122402191162, + "loss": 1.1303, + "nll_loss": 0.27892497181892395, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011090649059042335, + "rewards/margins": 0.11891216039657593, + "rewards/rejected": -0.12002123147249222, + "step": 5810 + }, + { + "epoch": 4.018672199170124, + "grad_norm": 6.547496795654297, + "learning_rate": 3.322959889349931e-05, + "log_odds_chosen": 8.85584831237793, + "log_odds_ratio": -0.08818444609642029, + "logits/chosen": -0.5728781819343567, + "logits/rejected": -0.6478579044342041, + "logps/chosen": -0.015129598788917065, + "logps/rejected": -1.615843415260315, + "loss": 1.0228, + "nll_loss": 0.2468700110912323, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0015129598323255777, + "rewards/margins": 0.16007138788700104, + "rewards/rejected": -0.16158434748649597, + "step": 5811 + }, + { + "epoch": 4.019363762102351, + "grad_norm": 5.890070915222168, + "learning_rate": 3.322575687720916e-05, + "log_odds_chosen": 8.961607933044434, + "log_odds_ratio": -0.0023988212924450636, + "logits/chosen": -0.500460684299469, + "logits/rejected": -0.5649879574775696, + "logps/chosen": -0.0030070669017732143, + "logps/rejected": -1.4147664308547974, + "loss": 0.6675, + "nll_loss": 0.16662628948688507, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00030070668435655534, + "rewards/margins": 0.14117594063282013, + "rewards/rejected": -0.14147664606571198, + "step": 5812 + }, + { + "epoch": 4.020055325034578, + "grad_norm": 8.608002662658691, + "learning_rate": 3.3221914860919015e-05, + "log_odds_chosen": 10.015937805175781, + "log_odds_ratio": -0.00011480056855361909, + "logits/chosen": -0.38741636276245117, + "logits/rejected": -0.47290775179862976, + "logps/chosen": -0.000604795990511775, + "logps/rejected": -2.022552013397217, + "loss": 0.9456, + "nll_loss": 0.23638921976089478, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.047959686839022e-05, + "rewards/margins": 0.2021946907043457, + "rewards/rejected": -0.20225518941879272, + "step": 5813 + }, + { + "epoch": 4.020746887966805, + "grad_norm": 8.891815185546875, + "learning_rate": 3.321807284462886e-05, + "log_odds_chosen": 10.402395248413086, + "log_odds_ratio": -6.092392504797317e-05, + "logits/chosen": -0.6218123435974121, + "logits/rejected": -0.6601628065109253, + "logps/chosen": -0.00022192316828295588, + "logps/rejected": -1.955885648727417, + "loss": 1.2569, + "nll_loss": 0.31421294808387756, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.219231828348711e-05, + "rewards/margins": 0.19556638598442078, + "rewards/rejected": -0.19558857381343842, + "step": 5814 + }, + { + "epoch": 4.021438450899032, + "grad_norm": 6.3259100914001465, + "learning_rate": 3.321423082833871e-05, + "log_odds_chosen": 9.721456527709961, + "log_odds_ratio": -0.0003540450125001371, + "logits/chosen": -0.442880779504776, + "logits/rejected": -0.5284356474876404, + "logps/chosen": -0.0006333962082862854, + "logps/rejected": -1.8140935897827148, + "loss": 1.0699, + "nll_loss": 0.2674439549446106, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.333962664939463e-05, + "rewards/margins": 0.18134601414203644, + "rewards/rejected": -0.18140935897827148, + "step": 5815 + }, + { + "epoch": 4.022130013831259, + "grad_norm": 10.469477653503418, + "learning_rate": 3.3210388812048566e-05, + "log_odds_chosen": 9.853558540344238, + "log_odds_ratio": -0.00032846731483004987, + "logits/chosen": -0.5319367051124573, + "logits/rejected": -0.5036752223968506, + "logps/chosen": -0.011169369332492352, + "logps/rejected": -2.269057273864746, + "loss": 0.9128, + "nll_loss": 0.22815820574760437, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011169369099661708, + "rewards/margins": 0.22578881680965424, + "rewards/rejected": -0.22690574824810028, + "step": 5816 + }, + { + "epoch": 4.022821576763485, + "grad_norm": 12.238142013549805, + "learning_rate": 3.320654679575841e-05, + "log_odds_chosen": 10.812685012817383, + "log_odds_ratio": -3.920509334420785e-05, + "logits/chosen": -0.5341860055923462, + "logits/rejected": -0.5553966760635376, + "logps/chosen": -0.000269267096882686, + "logps/rejected": -2.453526020050049, + "loss": 1.8054, + "nll_loss": 0.45134228467941284, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6926711143460125e-05, + "rewards/margins": 0.24532568454742432, + "rewards/rejected": -0.2453525960445404, + "step": 5817 + }, + { + "epoch": 4.023513139695712, + "grad_norm": 9.472021102905273, + "learning_rate": 3.320270477946827e-05, + "log_odds_chosen": 10.345442771911621, + "log_odds_ratio": -0.00016254739603027701, + "logits/chosen": -0.6953545808792114, + "logits/rejected": -0.7270474433898926, + "logps/chosen": -0.002389824017882347, + "logps/rejected": -2.3488526344299316, + "loss": 1.2988, + "nll_loss": 0.3246712386608124, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002389824076090008, + "rewards/margins": 0.2346462905406952, + "rewards/rejected": -0.23488527536392212, + "step": 5818 + }, + { + "epoch": 4.024204702627939, + "grad_norm": 9.914827346801758, + "learning_rate": 3.3198862763178116e-05, + "log_odds_chosen": 9.170671463012695, + "log_odds_ratio": -0.0005998075939714909, + "logits/chosen": -0.6479396820068359, + "logits/rejected": -0.6544699668884277, + "logps/chosen": -0.016290009021759033, + "logps/rejected": -1.6676925420761108, + "loss": 1.4749, + "nll_loss": 0.3686674237251282, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016290009953081608, + "rewards/margins": 0.16514025628566742, + "rewards/rejected": -0.16676926612854004, + "step": 5819 + }, + { + "epoch": 4.024896265560166, + "grad_norm": 16.734146118164062, + "learning_rate": 3.319502074688797e-05, + "log_odds_chosen": 10.265254020690918, + "log_odds_ratio": -4.8487512685824186e-05, + "logits/chosen": -0.18163836002349854, + "logits/rejected": -0.30264967679977417, + "logps/chosen": -0.0008192628156393766, + "logps/rejected": -2.293407917022705, + "loss": 1.3015, + "nll_loss": 0.3253812789916992, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.192627865355462e-05, + "rewards/margins": 0.22925885021686554, + "rewards/rejected": -0.22934077680110931, + "step": 5820 + }, + { + "epoch": 4.025587828492393, + "grad_norm": 8.877284049987793, + "learning_rate": 3.319117873059782e-05, + "log_odds_chosen": 10.271307945251465, + "log_odds_ratio": -0.00017034000484272838, + "logits/chosen": -0.3948308825492859, + "logits/rejected": -0.4352070987224579, + "logps/chosen": -0.00041239382699131966, + "logps/rejected": -1.8108956813812256, + "loss": 0.9203, + "nll_loss": 0.23004809021949768, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1239381971536204e-05, + "rewards/margins": 0.18104831874370575, + "rewards/rejected": -0.18108955025672913, + "step": 5821 + }, + { + "epoch": 4.0262793914246195, + "grad_norm": 9.338384628295898, + "learning_rate": 3.3187336714307674e-05, + "log_odds_chosen": 9.377090454101562, + "log_odds_ratio": -0.0036353226751089096, + "logits/chosen": -0.27123838663101196, + "logits/rejected": -0.35256731510162354, + "logps/chosen": -0.002619482111185789, + "logps/rejected": -1.7505947351455688, + "loss": 0.8744, + "nll_loss": 0.21822936832904816, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00026194824022240937, + "rewards/margins": 0.17479753494262695, + "rewards/rejected": -0.1750594824552536, + "step": 5822 + }, + { + "epoch": 4.026970954356846, + "grad_norm": 6.480832099914551, + "learning_rate": 3.318349469801752e-05, + "log_odds_chosen": 10.178642272949219, + "log_odds_ratio": -9.12517643882893e-05, + "logits/chosen": -0.6648082733154297, + "logits/rejected": -0.702701210975647, + "logps/chosen": -0.00019598891958594322, + "logps/rejected": -1.6632938385009766, + "loss": 1.1788, + "nll_loss": 0.2946855127811432, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9598892322392203e-05, + "rewards/margins": 0.16630978882312775, + "rewards/rejected": -0.16632938385009766, + "step": 5823 + }, + { + "epoch": 4.027662517289073, + "grad_norm": 7.462141513824463, + "learning_rate": 3.317965268172737e-05, + "log_odds_chosen": 8.131479263305664, + "log_odds_ratio": -0.0034697859082370996, + "logits/chosen": -0.7409053444862366, + "logits/rejected": -0.7156566381454468, + "logps/chosen": -0.0027547297067940235, + "logps/rejected": -1.4278616905212402, + "loss": 1.5324, + "nll_loss": 0.38275811076164246, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002754729939624667, + "rewards/margins": 0.14251069724559784, + "rewards/rejected": -0.1427861750125885, + "step": 5824 + }, + { + "epoch": 4.0283540802213, + "grad_norm": 8.907909393310547, + "learning_rate": 3.3175810665437224e-05, + "log_odds_chosen": 9.18642807006836, + "log_odds_ratio": -0.0003177436883561313, + "logits/chosen": -0.6876662969589233, + "logits/rejected": -0.6967380046844482, + "logps/chosen": -0.0016029981197789311, + "logps/rejected": -1.6288145780563354, + "loss": 1.5022, + "nll_loss": 0.37551790475845337, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001602998236194253, + "rewards/margins": 0.1627211570739746, + "rewards/rejected": -0.16288146376609802, + "step": 5825 + }, + { + "epoch": 4.029045643153527, + "grad_norm": 9.733176231384277, + "learning_rate": 3.317196864914707e-05, + "log_odds_chosen": 10.043136596679688, + "log_odds_ratio": -0.00010084448149427772, + "logits/chosen": -0.6539413928985596, + "logits/rejected": -0.7563472986221313, + "logps/chosen": -0.0002622072061058134, + "logps/rejected": -1.8707082271575928, + "loss": 1.3907, + "nll_loss": 0.3476561903953552, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.62207213381771e-05, + "rewards/margins": 0.18704460561275482, + "rewards/rejected": -0.187070831656456, + "step": 5826 + }, + { + "epoch": 4.029737206085754, + "grad_norm": 5.350778102874756, + "learning_rate": 3.316812663285693e-05, + "log_odds_chosen": 9.771782875061035, + "log_odds_ratio": -0.00010835661669261754, + "logits/chosen": -0.5402613282203674, + "logits/rejected": -0.6298444271087646, + "logps/chosen": -0.00028526870300993323, + "logps/rejected": -1.5779385566711426, + "loss": 1.8248, + "nll_loss": 0.45618361234664917, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8526872483780608e-05, + "rewards/margins": 0.15776532888412476, + "rewards/rejected": -0.15779386460781097, + "step": 5827 + }, + { + "epoch": 4.0304287690179805, + "grad_norm": 6.235321998596191, + "learning_rate": 3.3164284616566775e-05, + "log_odds_chosen": 10.532633781433105, + "log_odds_ratio": -3.9244259824045e-05, + "logits/chosen": -0.2530810236930847, + "logits/rejected": -0.2999076843261719, + "logps/chosen": -0.0009787610033527017, + "logps/rejected": -2.2996246814727783, + "loss": 1.0636, + "nll_loss": 0.26590120792388916, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.78761090664193e-05, + "rewards/margins": 0.22986459732055664, + "rewards/rejected": -0.22996249794960022, + "step": 5828 + }, + { + "epoch": 4.031120331950207, + "grad_norm": 12.791536331176758, + "learning_rate": 3.316044260027663e-05, + "log_odds_chosen": 10.55655288696289, + "log_odds_ratio": -6.204926467034966e-05, + "logits/chosen": -0.5250865817070007, + "logits/rejected": -0.6933255791664124, + "logps/chosen": -0.0002122572623193264, + "logps/rejected": -1.961160659790039, + "loss": 1.3719, + "nll_loss": 0.34295907616615295, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.122572550433688e-05, + "rewards/margins": 0.1960948407649994, + "rewards/rejected": -0.19611608982086182, + "step": 5829 + }, + { + "epoch": 4.031811894882434, + "grad_norm": 75.29803466796875, + "learning_rate": 3.315660058398648e-05, + "log_odds_chosen": 9.58288288116455, + "log_odds_ratio": -0.014156394638121128, + "logits/chosen": -0.46651431918144226, + "logits/rejected": -0.5524017214775085, + "logps/chosen": -0.2065291553735733, + "logps/rejected": -2.4342660903930664, + "loss": 1.4847, + "nll_loss": 0.36975815892219543, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02065291441977024, + "rewards/margins": 0.2227737009525299, + "rewards/rejected": -0.2434266209602356, + "step": 5830 + }, + { + "epoch": 4.032503457814661, + "grad_norm": 10.947818756103516, + "learning_rate": 3.315275856769633e-05, + "log_odds_chosen": 10.092506408691406, + "log_odds_ratio": -7.060384814394638e-05, + "logits/chosen": -0.801364541053772, + "logits/rejected": -0.8676729202270508, + "logps/chosen": -0.00020597776165232062, + "logps/rejected": -1.7199057340621948, + "loss": 1.7281, + "nll_loss": 0.4320223927497864, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.059777580143418e-05, + "rewards/margins": 0.17196998000144958, + "rewards/rejected": -0.17199058830738068, + "step": 5831 + }, + { + "epoch": 4.033195020746888, + "grad_norm": 5.659107685089111, + "learning_rate": 3.314891655140618e-05, + "log_odds_chosen": 9.809650421142578, + "log_odds_ratio": -0.00014637268031947315, + "logits/chosen": -0.37774717807769775, + "logits/rejected": -0.39840513467788696, + "logps/chosen": -0.0003810464695561677, + "logps/rejected": -1.6779835224151611, + "loss": 1.6287, + "nll_loss": 0.40714868903160095, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.810464477282949e-05, + "rewards/margins": 0.16776025295257568, + "rewards/rejected": -0.16779834032058716, + "step": 5832 + }, + { + "epoch": 4.033886583679115, + "grad_norm": 5.58001708984375, + "learning_rate": 3.314507453511603e-05, + "log_odds_chosen": 9.410863876342773, + "log_odds_ratio": -0.0003080420719925314, + "logits/chosen": -0.025425251573324203, + "logits/rejected": -0.09473808109760284, + "logps/chosen": -0.00043381942668929696, + "logps/rejected": -1.3813542127609253, + "loss": 0.7886, + "nll_loss": 0.19711832702159882, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.338194048614241e-05, + "rewards/margins": 0.1380920559167862, + "rewards/rejected": -0.13813543319702148, + "step": 5833 + }, + { + "epoch": 4.0345781466113415, + "grad_norm": 7.627755165100098, + "learning_rate": 3.314123251882588e-05, + "log_odds_chosen": 10.607280731201172, + "log_odds_ratio": -0.0001868123799795285, + "logits/chosen": -0.47707587480545044, + "logits/rejected": -0.5821393728256226, + "logps/chosen": -0.00018466573965270072, + "logps/rejected": -2.233959674835205, + "loss": 1.2688, + "nll_loss": 0.3171829581260681, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8466576875653118e-05, + "rewards/margins": 0.223377525806427, + "rewards/rejected": -0.22339597344398499, + "step": 5834 + }, + { + "epoch": 4.035269709543568, + "grad_norm": 10.557724952697754, + "learning_rate": 3.313739050253573e-05, + "log_odds_chosen": 10.566575050354004, + "log_odds_ratio": -0.00015645605162717402, + "logits/chosen": -0.575480580329895, + "logits/rejected": -0.6633766293525696, + "logps/chosen": -0.0013164678821340203, + "logps/rejected": -1.9831597805023193, + "loss": 1.5592, + "nll_loss": 0.3897833228111267, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013164679694455117, + "rewards/margins": 0.1981843262910843, + "rewards/rejected": -0.19831597805023193, + "step": 5835 + }, + { + "epoch": 4.035961272475795, + "grad_norm": 11.694565773010254, + "learning_rate": 3.313354848624559e-05, + "log_odds_chosen": 10.140708923339844, + "log_odds_ratio": -0.00014054967323318124, + "logits/chosen": -1.0209980010986328, + "logits/rejected": -1.130377173423767, + "logps/chosen": -0.0005266097723506391, + "logps/rejected": -1.9271914958953857, + "loss": 1.2674, + "nll_loss": 0.31684738397598267, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2660980145446956e-05, + "rewards/margins": 0.19266650080680847, + "rewards/rejected": -0.19271916151046753, + "step": 5836 + }, + { + "epoch": 4.036652835408022, + "grad_norm": 10.414692878723145, + "learning_rate": 3.312970646995543e-05, + "log_odds_chosen": 9.67306900024414, + "log_odds_ratio": -0.0002543226000852883, + "logits/chosen": -0.4915074110031128, + "logits/rejected": -0.49034228920936584, + "logps/chosen": -0.006264523137360811, + "logps/rejected": -2.669769287109375, + "loss": 0.9577, + "nll_loss": 0.23939737677574158, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006264523253776133, + "rewards/margins": 0.26635047793388367, + "rewards/rejected": -0.266976922750473, + "step": 5837 + }, + { + "epoch": 4.037344398340249, + "grad_norm": 8.162940979003906, + "learning_rate": 3.3125864453665286e-05, + "log_odds_chosen": 8.675409317016602, + "log_odds_ratio": -0.0008966091554611921, + "logits/chosen": -0.8568264245986938, + "logits/rejected": -0.8570026755332947, + "logps/chosen": -0.0007508368580602109, + "logps/rejected": -1.2639950513839722, + "loss": 1.5968, + "nll_loss": 0.39911770820617676, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.508369162678719e-05, + "rewards/margins": 0.12632441520690918, + "rewards/rejected": -0.12639950215816498, + "step": 5838 + }, + { + "epoch": 4.038035961272476, + "grad_norm": 20.370845794677734, + "learning_rate": 3.312202243737514e-05, + "log_odds_chosen": 10.564475059509277, + "log_odds_ratio": -4.5465276343747973e-05, + "logits/chosen": -0.6202433109283447, + "logits/rejected": -0.6588761210441589, + "logps/chosen": -0.00035935192136093974, + "logps/rejected": -2.3072285652160645, + "loss": 1.3037, + "nll_loss": 0.325916588306427, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.593519068090245e-05, + "rewards/margins": 0.23068693280220032, + "rewards/rejected": -0.2307228446006775, + "step": 5839 + }, + { + "epoch": 4.0387275242047025, + "grad_norm": 9.245139122009277, + "learning_rate": 3.311818042108499e-05, + "log_odds_chosen": 10.530648231506348, + "log_odds_ratio": -4.213380452711135e-05, + "logits/chosen": -0.6391951441764832, + "logits/rejected": -0.6505733728408813, + "logps/chosen": -0.00027310033328831196, + "logps/rejected": -2.1863694190979004, + "loss": 0.7774, + "nll_loss": 0.19433549046516418, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7310034056426957e-05, + "rewards/margins": 0.21860966086387634, + "rewards/rejected": -0.21863695979118347, + "step": 5840 + }, + { + "epoch": 4.039419087136929, + "grad_norm": 8.130341529846191, + "learning_rate": 3.3114338404794836e-05, + "log_odds_chosen": 10.152312278747559, + "log_odds_ratio": -0.0006681890808977187, + "logits/chosen": -0.6517927050590515, + "logits/rejected": -0.7043352127075195, + "logps/chosen": -0.0008484551799483597, + "logps/rejected": -1.9009888172149658, + "loss": 0.9638, + "nll_loss": 0.240884929895401, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.484552381560206e-05, + "rewards/margins": 0.1900140345096588, + "rewards/rejected": -0.19009888172149658, + "step": 5841 + }, + { + "epoch": 4.040110650069156, + "grad_norm": 5.752309799194336, + "learning_rate": 3.311049638850469e-05, + "log_odds_chosen": 7.149251937866211, + "log_odds_ratio": -0.01412150077521801, + "logits/chosen": -0.5170456171035767, + "logits/rejected": -0.5047367215156555, + "logps/chosen": -0.00545818917453289, + "logps/rejected": -1.265357494354248, + "loss": 1.0664, + "nll_loss": 0.26519855856895447, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005458188825286925, + "rewards/margins": 0.12598992884159088, + "rewards/rejected": -0.12653574347496033, + "step": 5842 + }, + { + "epoch": 4.040802213001383, + "grad_norm": 6.732954978942871, + "learning_rate": 3.310665437221454e-05, + "log_odds_chosen": 7.924806118011475, + "log_odds_ratio": -0.004892845172435045, + "logits/chosen": -0.7184340357780457, + "logits/rejected": -0.6960312128067017, + "logps/chosen": -0.00998085830360651, + "logps/rejected": -1.0627918243408203, + "loss": 1.4859, + "nll_loss": 0.37099653482437134, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009980859467759728, + "rewards/margins": 0.10528109222650528, + "rewards/rejected": -0.10627917945384979, + "step": 5843 + }, + { + "epoch": 4.04149377593361, + "grad_norm": 9.003263473510742, + "learning_rate": 3.310281235592439e-05, + "log_odds_chosen": 8.91492748260498, + "log_odds_ratio": -0.004036875907331705, + "logits/chosen": -0.5508736371994019, + "logits/rejected": -0.5939282178878784, + "logps/chosen": -0.004110867623239756, + "logps/rejected": -1.9761884212493896, + "loss": 1.5571, + "nll_loss": 0.38886570930480957, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004110867448616773, + "rewards/margins": 0.1972077488899231, + "rewards/rejected": -0.19761884212493896, + "step": 5844 + }, + { + "epoch": 4.042185338865837, + "grad_norm": 8.566186904907227, + "learning_rate": 3.3098970339634246e-05, + "log_odds_chosen": 9.330158233642578, + "log_odds_ratio": -0.00024974337429739535, + "logits/chosen": -0.7203190326690674, + "logits/rejected": -0.6908995509147644, + "logps/chosen": -0.0022361972369253635, + "logps/rejected": -1.909590721130371, + "loss": 1.2201, + "nll_loss": 0.30500558018684387, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002236197324236855, + "rewards/margins": 0.19073545932769775, + "rewards/rejected": -0.19095909595489502, + "step": 5845 + }, + { + "epoch": 4.0428769017980635, + "grad_norm": 9.80972671508789, + "learning_rate": 3.309512832334409e-05, + "log_odds_chosen": 9.790373802185059, + "log_odds_ratio": -0.0003783097490668297, + "logits/chosen": -0.6303945183753967, + "logits/rejected": -0.698731541633606, + "logps/chosen": -0.000483025040011853, + "logps/rejected": -1.6333590745925903, + "loss": 1.0825, + "nll_loss": 0.27057844400405884, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8302506911568344e-05, + "rewards/margins": 0.16328760981559753, + "rewards/rejected": -0.1633358895778656, + "step": 5846 + }, + { + "epoch": 4.04356846473029, + "grad_norm": 11.211024284362793, + "learning_rate": 3.3091286307053944e-05, + "log_odds_chosen": 10.19981861114502, + "log_odds_ratio": -0.0003373716026544571, + "logits/chosen": -0.548815131187439, + "logits/rejected": -0.6491366624832153, + "logps/chosen": -0.0005771011346951127, + "logps/rejected": -1.7962886095046997, + "loss": 1.2562, + "nll_loss": 0.3140169382095337, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.77101091039367e-05, + "rewards/margins": 0.17957115173339844, + "rewards/rejected": -0.17962884902954102, + "step": 5847 + }, + { + "epoch": 4.044260027662517, + "grad_norm": 6.113912105560303, + "learning_rate": 3.3087444290763796e-05, + "log_odds_chosen": 9.33714485168457, + "log_odds_ratio": -0.0007019840413704515, + "logits/chosen": -0.4767472743988037, + "logits/rejected": -0.5687282681465149, + "logps/chosen": -0.0005971384234726429, + "logps/rejected": -1.553364872932434, + "loss": 1.2813, + "nll_loss": 0.3202533423900604, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.971384234726429e-05, + "rewards/margins": 0.15527677536010742, + "rewards/rejected": -0.15533646941184998, + "step": 5848 + }, + { + "epoch": 4.044951590594744, + "grad_norm": 13.186535835266113, + "learning_rate": 3.308360227447365e-05, + "log_odds_chosen": 9.782808303833008, + "log_odds_ratio": -0.0001403852365911007, + "logits/chosen": -0.42614397406578064, + "logits/rejected": -0.4612559676170349, + "logps/chosen": -0.0003595305315684527, + "logps/rejected": -1.6214947700500488, + "loss": 1.3976, + "nll_loss": 0.3493949770927429, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.595305315684527e-05, + "rewards/margins": 0.16211353242397308, + "rewards/rejected": -0.16214948892593384, + "step": 5849 + }, + { + "epoch": 4.045643153526971, + "grad_norm": 8.229986190795898, + "learning_rate": 3.3079760258183495e-05, + "log_odds_chosen": 9.515350341796875, + "log_odds_ratio": -0.000164119090186432, + "logits/chosen": -0.2830381393432617, + "logits/rejected": -0.32949697971343994, + "logps/chosen": -0.013506095856428146, + "logps/rejected": -2.190258026123047, + "loss": 0.9495, + "nll_loss": 0.23736843466758728, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013506095856428146, + "rewards/margins": 0.21767520904541016, + "rewards/rejected": -0.21902580559253693, + "step": 5850 + }, + { + "epoch": 4.046334716459198, + "grad_norm": 10.434491157531738, + "learning_rate": 3.307591824189335e-05, + "log_odds_chosen": 9.21742057800293, + "log_odds_ratio": -0.013323817402124405, + "logits/chosen": -0.29416847229003906, + "logits/rejected": -0.32375025749206543, + "logps/chosen": -0.004796840250492096, + "logps/rejected": -1.5443867444992065, + "loss": 1.422, + "nll_loss": 0.3541748523712158, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004796840075869113, + "rewards/margins": 0.1539589762687683, + "rewards/rejected": -0.15443867444992065, + "step": 5851 + }, + { + "epoch": 4.0470262793914245, + "grad_norm": 11.886966705322266, + "learning_rate": 3.30720762256032e-05, + "log_odds_chosen": 9.648627281188965, + "log_odds_ratio": -0.00014911373727954924, + "logits/chosen": -0.6481659412384033, + "logits/rejected": -0.7246516346931458, + "logps/chosen": -0.009942540898919106, + "logps/rejected": -1.962467908859253, + "loss": 1.2039, + "nll_loss": 0.3009604513645172, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009942540200427175, + "rewards/margins": 0.19525253772735596, + "rewards/rejected": -0.19624680280685425, + "step": 5852 + }, + { + "epoch": 4.047717842323651, + "grad_norm": 8.506421089172363, + "learning_rate": 3.3068234209313045e-05, + "log_odds_chosen": 10.214273452758789, + "log_odds_ratio": -9.400276758242399e-05, + "logits/chosen": -0.1977856159210205, + "logits/rejected": -0.24321135878562927, + "logps/chosen": -0.00024938437854871154, + "logps/rejected": -1.7478985786437988, + "loss": 0.8723, + "nll_loss": 0.21805506944656372, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.493843567208387e-05, + "rewards/margins": 0.17476493120193481, + "rewards/rejected": -0.17478986084461212, + "step": 5853 + }, + { + "epoch": 4.048409405255878, + "grad_norm": 13.32843017578125, + "learning_rate": 3.30643921930229e-05, + "log_odds_chosen": 10.405963897705078, + "log_odds_ratio": -0.0002303352957824245, + "logits/chosen": -0.41388577222824097, + "logits/rejected": -0.4686071276664734, + "logps/chosen": -0.00018964394985232502, + "logps/rejected": -2.0773470401763916, + "loss": 1.3127, + "nll_loss": 0.32814282178878784, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.896439425763674e-05, + "rewards/margins": 0.20771574974060059, + "rewards/rejected": -0.20773470401763916, + "step": 5854 + }, + { + "epoch": 4.049100968188105, + "grad_norm": 13.760919570922852, + "learning_rate": 3.306055017673275e-05, + "log_odds_chosen": 9.816307067871094, + "log_odds_ratio": -0.0015375094953924417, + "logits/chosen": -0.34091717004776, + "logits/rejected": -0.41033634543418884, + "logps/chosen": -0.0015424349112436175, + "logps/rejected": -1.463100790977478, + "loss": 1.694, + "nll_loss": 0.42334362864494324, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015424350567627698, + "rewards/margins": 0.14615583419799805, + "rewards/rejected": -0.14631007611751556, + "step": 5855 + }, + { + "epoch": 4.049792531120332, + "grad_norm": 5.521707057952881, + "learning_rate": 3.30567081604426e-05, + "log_odds_chosen": 8.408859252929688, + "log_odds_ratio": -0.0010520406067371368, + "logits/chosen": -0.24230000376701355, + "logits/rejected": -0.22912943363189697, + "logps/chosen": -0.0007799810264259577, + "logps/rejected": -1.02225661277771, + "loss": 1.505, + "nll_loss": 0.37614238262176514, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.799810555297881e-05, + "rewards/margins": 0.102147676050663, + "rewards/rejected": -0.10222566872835159, + "step": 5856 + }, + { + "epoch": 4.050484094052559, + "grad_norm": 8.019282341003418, + "learning_rate": 3.305286614415245e-05, + "log_odds_chosen": 9.622739791870117, + "log_odds_ratio": -0.0003253653703723103, + "logits/chosen": -0.6645296216011047, + "logits/rejected": -0.6519110798835754, + "logps/chosen": -0.0003871311782859266, + "logps/rejected": -2.032543182373047, + "loss": 0.873, + "nll_loss": 0.21822920441627502, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8713122194167227e-05, + "rewards/margins": 0.20321562886238098, + "rewards/rejected": -0.2032543271780014, + "step": 5857 + }, + { + "epoch": 4.051175656984785, + "grad_norm": 7.612847328186035, + "learning_rate": 3.304902412786231e-05, + "log_odds_chosen": 9.509521484375, + "log_odds_ratio": -0.0007174806669354439, + "logits/chosen": -0.18259556591510773, + "logits/rejected": -0.18433810770511627, + "logps/chosen": -0.0008035643259063363, + "logps/rejected": -1.6824941635131836, + "loss": 1.0933, + "nll_loss": 0.2732608914375305, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.035643259063363e-05, + "rewards/margins": 0.1681690663099289, + "rewards/rejected": -0.16824942827224731, + "step": 5858 + }, + { + "epoch": 4.051867219917012, + "grad_norm": 10.871746063232422, + "learning_rate": 3.304518211157215e-05, + "log_odds_chosen": 9.347953796386719, + "log_odds_ratio": -0.0002416951465420425, + "logits/chosen": -0.5143193006515503, + "logits/rejected": -0.6148710250854492, + "logps/chosen": -0.0004571893368847668, + "logps/rejected": -1.458450198173523, + "loss": 1.7587, + "nll_loss": 0.43964099884033203, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5718930778093636e-05, + "rewards/margins": 0.14579930901527405, + "rewards/rejected": -0.14584502577781677, + "step": 5859 + }, + { + "epoch": 4.052558782849239, + "grad_norm": 9.969888687133789, + "learning_rate": 3.3041340095282005e-05, + "log_odds_chosen": 10.789196968078613, + "log_odds_ratio": -7.73576321080327e-05, + "logits/chosen": -0.6059550642967224, + "logits/rejected": -0.656543493270874, + "logps/chosen": -0.0004085530526936054, + "logps/rejected": -2.321756601333618, + "loss": 1.2811, + "nll_loss": 0.32025599479675293, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.085530963493511e-05, + "rewards/margins": 0.23213481903076172, + "rewards/rejected": -0.23217566311359406, + "step": 5860 + }, + { + "epoch": 4.053250345781466, + "grad_norm": 6.3312530517578125, + "learning_rate": 3.303749807899186e-05, + "log_odds_chosen": 10.216769218444824, + "log_odds_ratio": -0.0002273907302878797, + "logits/chosen": -0.5803669691085815, + "logits/rejected": -0.7455939650535583, + "logps/chosen": -0.00029386900132521987, + "logps/rejected": -2.0365443229675293, + "loss": 1.4209, + "nll_loss": 0.35520580410957336, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9386899768724106e-05, + "rewards/margins": 0.20362505316734314, + "rewards/rejected": -0.2036544531583786, + "step": 5861 + }, + { + "epoch": 4.053941908713693, + "grad_norm": 9.015466690063477, + "learning_rate": 3.3033656062701704e-05, + "log_odds_chosen": 10.769645690917969, + "log_odds_ratio": -3.3408618037356064e-05, + "logits/chosen": -0.477365106344223, + "logits/rejected": -0.5517987608909607, + "logps/chosen": -0.00012404685548972338, + "logps/rejected": -1.8976612091064453, + "loss": 0.952, + "nll_loss": 0.2380056381225586, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2404685548972338e-05, + "rewards/margins": 0.1897537112236023, + "rewards/rejected": -0.18976612389087677, + "step": 5862 + }, + { + "epoch": 4.05463347164592, + "grad_norm": 7.8228912353515625, + "learning_rate": 3.3029814046411556e-05, + "log_odds_chosen": 10.78010368347168, + "log_odds_ratio": -2.2395632186089642e-05, + "logits/chosen": -0.5051406025886536, + "logits/rejected": -0.52671217918396, + "logps/chosen": -0.00011659066512947902, + "logps/rejected": -1.7614760398864746, + "loss": 0.78, + "nll_loss": 0.1949884295463562, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.165906542155426e-05, + "rewards/margins": 0.17613595724105835, + "rewards/rejected": -0.17614760994911194, + "step": 5863 + }, + { + "epoch": 4.055325034578146, + "grad_norm": 7.346597671508789, + "learning_rate": 3.302597203012141e-05, + "log_odds_chosen": 10.719219207763672, + "log_odds_ratio": -8.285381773021072e-05, + "logits/chosen": -0.5942940711975098, + "logits/rejected": -0.6187726259231567, + "logps/chosen": -0.0016607241705060005, + "logps/rejected": -2.183518886566162, + "loss": 1.2496, + "nll_loss": 0.3124021887779236, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016607240831945091, + "rewards/margins": 0.21818581223487854, + "rewards/rejected": -0.21835188567638397, + "step": 5864 + }, + { + "epoch": 4.056016597510373, + "grad_norm": 20.964130401611328, + "learning_rate": 3.302213001383126e-05, + "log_odds_chosen": 7.885706901550293, + "log_odds_ratio": -0.8424516916275024, + "logits/chosen": -0.69463050365448, + "logits/rejected": -0.7300902009010315, + "logps/chosen": -0.08962702751159668, + "logps/rejected": -1.2243645191192627, + "loss": 1.5402, + "nll_loss": 0.300813227891922, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.008962703868746758, + "rewards/margins": 0.11347375810146332, + "rewards/rejected": -0.12243645638227463, + "step": 5865 + }, + { + "epoch": 4.0567081604426, + "grad_norm": 10.524365425109863, + "learning_rate": 3.3018287997541107e-05, + "log_odds_chosen": 10.204052925109863, + "log_odds_ratio": -0.0017390053253620863, + "logits/chosen": -0.6175505518913269, + "logits/rejected": -0.7030659317970276, + "logps/chosen": -0.010915473103523254, + "logps/rejected": -2.2788500785827637, + "loss": 1.283, + "nll_loss": 0.3205869197845459, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010915474267676473, + "rewards/margins": 0.22679348289966583, + "rewards/rejected": -0.22788502275943756, + "step": 5866 + }, + { + "epoch": 4.057399723374827, + "grad_norm": 7.630101680755615, + "learning_rate": 3.3014445981250966e-05, + "log_odds_chosen": 10.327079772949219, + "log_odds_ratio": -4.9537731683813035e-05, + "logits/chosen": -0.6859206557273865, + "logits/rejected": -0.7285531163215637, + "logps/chosen": -0.00025954050943255424, + "logps/rejected": -1.7925751209259033, + "loss": 0.9484, + "nll_loss": 0.2370968908071518, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.595405385363847e-05, + "rewards/margins": 0.17923158407211304, + "rewards/rejected": -0.17925751209259033, + "step": 5867 + }, + { + "epoch": 4.058091286307054, + "grad_norm": 7.37345027923584, + "learning_rate": 3.301060396496081e-05, + "log_odds_chosen": 8.724294662475586, + "log_odds_ratio": -0.0013263337314128876, + "logits/chosen": -0.7585796117782593, + "logits/rejected": -0.7691759467124939, + "logps/chosen": -0.012353342026472092, + "logps/rejected": -1.8685768842697144, + "loss": 1.0109, + "nll_loss": 0.25259870290756226, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012353342026472092, + "rewards/margins": 0.18562236428260803, + "rewards/rejected": -0.1868577003479004, + "step": 5868 + }, + { + "epoch": 4.058782849239281, + "grad_norm": 8.354995727539062, + "learning_rate": 3.3006761948670664e-05, + "log_odds_chosen": 8.951905250549316, + "log_odds_ratio": -0.005012372508645058, + "logits/chosen": -0.4756959080696106, + "logits/rejected": -0.6075941324234009, + "logps/chosen": -0.01927120052278042, + "logps/rejected": -1.5122439861297607, + "loss": 1.3131, + "nll_loss": 0.32777372002601624, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019271199125796556, + "rewards/margins": 0.14929728209972382, + "rewards/rejected": -0.15122440457344055, + "step": 5869 + }, + { + "epoch": 4.059474412171507, + "grad_norm": 7.3892107009887695, + "learning_rate": 3.3002919932380516e-05, + "log_odds_chosen": 8.95905590057373, + "log_odds_ratio": -0.000225025272811763, + "logits/chosen": -0.27506279945373535, + "logits/rejected": -0.28999996185302734, + "logps/chosen": -0.0003333079512231052, + "logps/rejected": -1.255396842956543, + "loss": 1.0496, + "nll_loss": 0.26238468289375305, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.333079075673595e-05, + "rewards/margins": 0.12550634145736694, + "rewards/rejected": -0.12553967535495758, + "step": 5870 + }, + { + "epoch": 4.060165975103734, + "grad_norm": 28.17425537109375, + "learning_rate": 3.299907791609036e-05, + "log_odds_chosen": 9.628087997436523, + "log_odds_ratio": -0.00012001794675597921, + "logits/chosen": -0.43030136823654175, + "logits/rejected": -0.4921196699142456, + "logps/chosen": -0.0012534643756225705, + "logps/rejected": -2.3265280723571777, + "loss": 1.735, + "nll_loss": 0.433744877576828, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012534644338302314, + "rewards/margins": 0.2325274646282196, + "rewards/rejected": -0.23265281319618225, + "step": 5871 + }, + { + "epoch": 4.060857538035961, + "grad_norm": 9.745065689086914, + "learning_rate": 3.2995235899800214e-05, + "log_odds_chosen": 10.053963661193848, + "log_odds_ratio": -0.0008054873323999345, + "logits/chosen": -0.353712797164917, + "logits/rejected": -0.3067231774330139, + "logps/chosen": -0.006351853255182505, + "logps/rejected": -2.8184359073638916, + "loss": 1.1014, + "nll_loss": 0.27525702118873596, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006351853371597826, + "rewards/margins": 0.2812083959579468, + "rewards/rejected": -0.2818436026573181, + "step": 5872 + }, + { + "epoch": 4.061549100968188, + "grad_norm": 9.960372924804688, + "learning_rate": 3.299139388351007e-05, + "log_odds_chosen": 9.378376007080078, + "log_odds_ratio": -0.000319282291457057, + "logits/chosen": -0.5158789157867432, + "logits/rejected": -0.47944214940071106, + "logps/chosen": -0.003047320758923888, + "logps/rejected": -1.903006911277771, + "loss": 0.9215, + "nll_loss": 0.23033401370048523, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003047320933546871, + "rewards/margins": 0.18999595940113068, + "rewards/rejected": -0.19030068814754486, + "step": 5873 + }, + { + "epoch": 4.062240663900415, + "grad_norm": 8.335897445678711, + "learning_rate": 3.298755186721992e-05, + "log_odds_chosen": 9.377771377563477, + "log_odds_ratio": -0.0021932560484856367, + "logits/chosen": -0.4992346465587616, + "logits/rejected": -0.5463720560073853, + "logps/chosen": -0.007180570159107447, + "logps/rejected": -1.7047597169876099, + "loss": 0.8746, + "nll_loss": 0.21843618154525757, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000718057039193809, + "rewards/margins": 0.1697579324245453, + "rewards/rejected": -0.17047598958015442, + "step": 5874 + }, + { + "epoch": 4.0629322268326415, + "grad_norm": 12.052624702453613, + "learning_rate": 3.2983709850929765e-05, + "log_odds_chosen": 9.854325294494629, + "log_odds_ratio": -0.000265687849605456, + "logits/chosen": -0.3645329773426056, + "logits/rejected": -0.40780341625213623, + "logps/chosen": -0.0005900555406697094, + "logps/rejected": -1.7319858074188232, + "loss": 1.2671, + "nll_loss": 0.3167441487312317, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.900555333937518e-05, + "rewards/margins": 0.17313960194587708, + "rewards/rejected": -0.1731986105442047, + "step": 5875 + }, + { + "epoch": 4.063623789764868, + "grad_norm": 7.595922470092773, + "learning_rate": 3.2979867834639624e-05, + "log_odds_chosen": 10.499154090881348, + "log_odds_ratio": -3.721401299117133e-05, + "logits/chosen": -0.5955951809883118, + "logits/rejected": -0.5733447074890137, + "logps/chosen": -0.00019262160640209913, + "logps/rejected": -1.7049528360366821, + "loss": 0.8514, + "nll_loss": 0.21284781396389008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9262162822997198e-05, + "rewards/margins": 0.1704760193824768, + "rewards/rejected": -0.17049528658390045, + "step": 5876 + }, + { + "epoch": 4.064315352697095, + "grad_norm": 7.582157611846924, + "learning_rate": 3.297602581834947e-05, + "log_odds_chosen": 9.53189468383789, + "log_odds_ratio": -0.0004492271691560745, + "logits/chosen": -0.46577298641204834, + "logits/rejected": -0.4716201722621918, + "logps/chosen": -0.0042579807341098785, + "logps/rejected": -2.3687758445739746, + "loss": 1.5047, + "nll_loss": 0.3761416971683502, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00042579806176945567, + "rewards/margins": 0.23645178973674774, + "rewards/rejected": -0.23687759041786194, + "step": 5877 + }, + { + "epoch": 4.065006915629322, + "grad_norm": 11.61339282989502, + "learning_rate": 3.297218380205932e-05, + "log_odds_chosen": 8.871149063110352, + "log_odds_ratio": -0.003240432823076844, + "logits/chosen": -0.18867714703083038, + "logits/rejected": -0.22036635875701904, + "logps/chosen": -0.0015413928776979446, + "logps/rejected": -1.6997482776641846, + "loss": 1.182, + "nll_loss": 0.2951643466949463, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015413928485941142, + "rewards/margins": 0.16982069611549377, + "rewards/rejected": -0.16997483372688293, + "step": 5878 + }, + { + "epoch": 4.065698478561549, + "grad_norm": 8.902300834655762, + "learning_rate": 3.2968341785769175e-05, + "log_odds_chosen": 9.484688758850098, + "log_odds_ratio": -0.0074333013035357, + "logits/chosen": -0.5499727725982666, + "logits/rejected": -0.6351827383041382, + "logps/chosen": -0.003113050712272525, + "logps/rejected": -1.3787400722503662, + "loss": 0.7673, + "nll_loss": 0.1910793036222458, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00031130510615184903, + "rewards/margins": 0.13756272196769714, + "rewards/rejected": -0.13787400722503662, + "step": 5879 + }, + { + "epoch": 4.066390041493776, + "grad_norm": 8.004378318786621, + "learning_rate": 3.296449976947902e-05, + "log_odds_chosen": 10.061744689941406, + "log_odds_ratio": -8.924882422434166e-05, + "logits/chosen": -0.5520544052124023, + "logits/rejected": -0.6138145327568054, + "logps/chosen": -0.00017678551375865936, + "logps/rejected": -1.2566429376602173, + "loss": 1.1891, + "nll_loss": 0.29726946353912354, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.767855246725958e-05, + "rewards/margins": 0.12564662098884583, + "rewards/rejected": -0.12566430866718292, + "step": 5880 + }, + { + "epoch": 4.0670816044260025, + "grad_norm": 5.754605770111084, + "learning_rate": 3.296065775318887e-05, + "log_odds_chosen": 9.56634521484375, + "log_odds_ratio": -0.00027094371034763753, + "logits/chosen": -0.3173554241657257, + "logits/rejected": -0.4609263837337494, + "logps/chosen": -0.00039389560697600245, + "logps/rejected": -1.7693027257919312, + "loss": 0.8986, + "nll_loss": 0.22461257874965668, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9389560697600245e-05, + "rewards/margins": 0.17689087986946106, + "rewards/rejected": -0.1769302785396576, + "step": 5881 + }, + { + "epoch": 4.067773167358229, + "grad_norm": 11.797194480895996, + "learning_rate": 3.2956815736898725e-05, + "log_odds_chosen": 10.15972900390625, + "log_odds_ratio": -7.304631435545161e-05, + "logits/chosen": -0.23659618198871613, + "logits/rejected": -0.38847583532333374, + "logps/chosen": -0.00039499488775618374, + "logps/rejected": -2.1488943099975586, + "loss": 1.0133, + "nll_loss": 0.2533252239227295, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.949948586523533e-05, + "rewards/margins": 0.21484996378421783, + "rewards/rejected": -0.21488946676254272, + "step": 5882 + }, + { + "epoch": 4.068464730290456, + "grad_norm": 13.173077583312988, + "learning_rate": 3.295297372060858e-05, + "log_odds_chosen": 9.202802658081055, + "log_odds_ratio": -0.0028482081834226847, + "logits/chosen": -0.4829055368900299, + "logits/rejected": -0.6257842779159546, + "logps/chosen": -0.02677040360867977, + "logps/rejected": -1.9920315742492676, + "loss": 1.2244, + "nll_loss": 0.30581286549568176, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0026770401746034622, + "rewards/margins": 0.19652612507343292, + "rewards/rejected": -0.19920317828655243, + "step": 5883 + }, + { + "epoch": 4.069156293222683, + "grad_norm": 6.314594268798828, + "learning_rate": 3.2949131704318423e-05, + "log_odds_chosen": 10.545787811279297, + "log_odds_ratio": -0.00012237382179591805, + "logits/chosen": -0.338174968957901, + "logits/rejected": -0.5499405860900879, + "logps/chosen": -0.0005069324979558587, + "logps/rejected": -2.1837666034698486, + "loss": 0.7223, + "nll_loss": 0.1805533468723297, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.069325561635196e-05, + "rewards/margins": 0.21832597255706787, + "rewards/rejected": -0.21837666630744934, + "step": 5884 + }, + { + "epoch": 4.06984785615491, + "grad_norm": 8.39501667022705, + "learning_rate": 3.294528968802828e-05, + "log_odds_chosen": 8.74677848815918, + "log_odds_ratio": -0.07410023361444473, + "logits/chosen": -0.29483741521835327, + "logits/rejected": -0.35221293568611145, + "logps/chosen": -0.013385063037276268, + "logps/rejected": -1.4697821140289307, + "loss": 1.2343, + "nll_loss": 0.3011668920516968, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013385062338784337, + "rewards/margins": 0.14563970267772675, + "rewards/rejected": -0.1469782143831253, + "step": 5885 + }, + { + "epoch": 4.070539419087137, + "grad_norm": 9.42487907409668, + "learning_rate": 3.294144767173813e-05, + "log_odds_chosen": 10.418317794799805, + "log_odds_ratio": -0.00018019750132225454, + "logits/chosen": -0.2804778814315796, + "logits/rejected": -0.26137036085128784, + "logps/chosen": -0.003630939172580838, + "logps/rejected": -2.4958736896514893, + "loss": 0.998, + "nll_loss": 0.24948766827583313, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003630939172580838, + "rewards/margins": 0.24922429025173187, + "rewards/rejected": -0.24958738684654236, + "step": 5886 + }, + { + "epoch": 4.0712309820193635, + "grad_norm": 7.741333484649658, + "learning_rate": 3.293760565544798e-05, + "log_odds_chosen": 8.39094352722168, + "log_odds_ratio": -0.04762697592377663, + "logits/chosen": -0.5252317786216736, + "logits/rejected": -0.586992621421814, + "logps/chosen": -0.037948936223983765, + "logps/rejected": -1.6404378414154053, + "loss": 0.9747, + "nll_loss": 0.2389063537120819, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00379489385522902, + "rewards/margins": 0.16024890542030334, + "rewards/rejected": -0.16404379904270172, + "step": 5887 + }, + { + "epoch": 4.07192254495159, + "grad_norm": 9.15391731262207, + "learning_rate": 3.293376363915783e-05, + "log_odds_chosen": 9.346750259399414, + "log_odds_ratio": -0.06653932482004166, + "logits/chosen": -0.4693371057510376, + "logits/rejected": -0.535291314125061, + "logps/chosen": -0.014121782034635544, + "logps/rejected": -1.8337665796279907, + "loss": 1.1923, + "nll_loss": 0.29142358899116516, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014121782733127475, + "rewards/margins": 0.1819644719362259, + "rewards/rejected": -0.18337665498256683, + "step": 5888 + }, + { + "epoch": 4.072614107883817, + "grad_norm": 7.018202304840088, + "learning_rate": 3.292992162286768e-05, + "log_odds_chosen": 9.713285446166992, + "log_odds_ratio": -0.00047497553168796003, + "logits/chosen": -0.6435208916664124, + "logits/rejected": -0.6868486404418945, + "logps/chosen": -0.0004327491042204201, + "logps/rejected": -1.6850796937942505, + "loss": 0.8882, + "nll_loss": 0.22199746966362, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.327491842559539e-05, + "rewards/margins": 0.16846470534801483, + "rewards/rejected": -0.16850797832012177, + "step": 5889 + }, + { + "epoch": 4.073305670816044, + "grad_norm": 12.950318336486816, + "learning_rate": 3.292607960657753e-05, + "log_odds_chosen": 8.975205421447754, + "log_odds_ratio": -0.0029498322401195765, + "logits/chosen": -0.47128579020500183, + "logits/rejected": -0.5512241721153259, + "logps/chosen": -0.0190330371260643, + "logps/rejected": -1.9600040912628174, + "loss": 1.4559, + "nll_loss": 0.363680362701416, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001903303898870945, + "rewards/margins": 0.1940971165895462, + "rewards/rejected": -0.19600041210651398, + "step": 5890 + }, + { + "epoch": 4.073997233748271, + "grad_norm": 10.255099296569824, + "learning_rate": 3.2922237590287384e-05, + "log_odds_chosen": 10.904985427856445, + "log_odds_ratio": -2.5688645109767094e-05, + "logits/chosen": -0.6291865706443787, + "logits/rejected": -0.7621033787727356, + "logps/chosen": -0.0003484275075607002, + "logps/rejected": -2.4972190856933594, + "loss": 1.2966, + "nll_loss": 0.3241395652294159, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.48427529388573e-05, + "rewards/margins": 0.2496870756149292, + "rewards/rejected": -0.24972191452980042, + "step": 5891 + }, + { + "epoch": 4.074688796680498, + "grad_norm": 14.77944564819336, + "learning_rate": 3.2918395573997236e-05, + "log_odds_chosen": 10.145472526550293, + "log_odds_ratio": -0.00012422248255461454, + "logits/chosen": -0.5283649563789368, + "logits/rejected": -0.6732466816902161, + "logps/chosen": -0.0004888318944722414, + "logps/rejected": -2.26007080078125, + "loss": 1.7568, + "nll_loss": 0.439181387424469, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.88831901748199e-05, + "rewards/margins": 0.2259582281112671, + "rewards/rejected": -0.2260071039199829, + "step": 5892 + }, + { + "epoch": 4.0753803596127245, + "grad_norm": 9.560647010803223, + "learning_rate": 3.291455355770708e-05, + "log_odds_chosen": 10.0001220703125, + "log_odds_ratio": -0.0001762525353115052, + "logits/chosen": -0.5513603687286377, + "logits/rejected": -0.5678353309631348, + "logps/chosen": -0.0002527603064663708, + "logps/rejected": -1.3682398796081543, + "loss": 0.715, + "nll_loss": 0.17874178290367126, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.527602919144556e-05, + "rewards/margins": 0.1367987096309662, + "rewards/rejected": -0.13682398200035095, + "step": 5893 + }, + { + "epoch": 4.076071922544951, + "grad_norm": 6.863475322723389, + "learning_rate": 3.291071154141694e-05, + "log_odds_chosen": 9.999677658081055, + "log_odds_ratio": -0.0005250984104350209, + "logits/chosen": -0.38750988245010376, + "logits/rejected": -0.481981486082077, + "logps/chosen": -0.0008838768699206412, + "logps/rejected": -1.9657940864562988, + "loss": 0.7248, + "nll_loss": 0.18114393949508667, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.83876855368726e-05, + "rewards/margins": 0.1964910328388214, + "rewards/rejected": -0.19657942652702332, + "step": 5894 + }, + { + "epoch": 4.076763485477178, + "grad_norm": 9.547638893127441, + "learning_rate": 3.290686952512679e-05, + "log_odds_chosen": 10.852001190185547, + "log_odds_ratio": -5.883872654521838e-05, + "logits/chosen": -0.6101143956184387, + "logits/rejected": -0.6539497971534729, + "logps/chosen": -0.00033553678076714277, + "logps/rejected": -2.486168384552002, + "loss": 1.0673, + "nll_loss": 0.2668067514896393, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.355367880431004e-05, + "rewards/margins": 0.24858328700065613, + "rewards/rejected": -0.24861682951450348, + "step": 5895 + }, + { + "epoch": 4.077455048409405, + "grad_norm": 12.845402717590332, + "learning_rate": 3.290302750883664e-05, + "log_odds_chosen": 10.063456535339355, + "log_odds_ratio": -0.001193615491501987, + "logits/chosen": -0.5881873369216919, + "logits/rejected": -0.6536589860916138, + "logps/chosen": -0.0010188799351453781, + "logps/rejected": -1.858687162399292, + "loss": 1.304, + "nll_loss": 0.32588329911231995, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010188797750743106, + "rewards/margins": 0.18576683104038239, + "rewards/rejected": -0.1858687400817871, + "step": 5896 + }, + { + "epoch": 4.078146611341632, + "grad_norm": 8.264073371887207, + "learning_rate": 3.289918549254649e-05, + "log_odds_chosen": 11.341009140014648, + "log_odds_ratio": -3.128061871393584e-05, + "logits/chosen": -0.8096005916595459, + "logits/rejected": -0.9112792015075684, + "logps/chosen": -0.0003100544272456318, + "logps/rejected": -2.4255685806274414, + "loss": 1.4221, + "nll_loss": 0.3555248975753784, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.100544199696742e-05, + "rewards/margins": 0.2425258457660675, + "rewards/rejected": -0.2425568550825119, + "step": 5897 + }, + { + "epoch": 4.078838174273859, + "grad_norm": 11.904677391052246, + "learning_rate": 3.289534347625634e-05, + "log_odds_chosen": 9.696477890014648, + "log_odds_ratio": -0.00014181065489538014, + "logits/chosen": -0.41866156458854675, + "logits/rejected": -0.4474356174468994, + "logps/chosen": -0.0012857395922765136, + "logps/rejected": -1.9312913417816162, + "loss": 1.4583, + "nll_loss": 0.3645554780960083, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012857397086918354, + "rewards/margins": 0.19300055503845215, + "rewards/rejected": -0.19312912225723267, + "step": 5898 + }, + { + "epoch": 4.0795297372060855, + "grad_norm": 12.756428718566895, + "learning_rate": 3.289150145996619e-05, + "log_odds_chosen": 10.162872314453125, + "log_odds_ratio": -0.0003494315897114575, + "logits/chosen": -0.5441153049468994, + "logits/rejected": -0.6091464161872864, + "logps/chosen": -0.0005706208175979555, + "logps/rejected": -1.493101954460144, + "loss": 0.9581, + "nll_loss": 0.23949317634105682, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.706208321498707e-05, + "rewards/margins": 0.14925314486026764, + "rewards/rejected": -0.14931020140647888, + "step": 5899 + }, + { + "epoch": 4.080221300138312, + "grad_norm": 8.728287696838379, + "learning_rate": 3.288765944367604e-05, + "log_odds_chosen": 9.576757431030273, + "log_odds_ratio": -0.000568702700547874, + "logits/chosen": -0.9261612296104431, + "logits/rejected": -0.887792706489563, + "logps/chosen": -0.00765953678637743, + "logps/rejected": -2.2173871994018555, + "loss": 1.3768, + "nll_loss": 0.3441358804702759, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007659537368454039, + "rewards/margins": 0.22097276151180267, + "rewards/rejected": -0.22173872590065002, + "step": 5900 + }, + { + "epoch": 4.080912863070539, + "grad_norm": 12.955114364624023, + "learning_rate": 3.2883817427385895e-05, + "log_odds_chosen": 10.813081741333008, + "log_odds_ratio": -6.771107291569933e-05, + "logits/chosen": -0.6529577374458313, + "logits/rejected": -0.6758928298950195, + "logps/chosen": -0.0003291813191026449, + "logps/rejected": -2.374316453933716, + "loss": 1.3182, + "nll_loss": 0.32954463362693787, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.291813118266873e-05, + "rewards/margins": 0.23739872872829437, + "rewards/rejected": -0.23743166029453278, + "step": 5901 + }, + { + "epoch": 4.081604426002766, + "grad_norm": 12.583176612854004, + "learning_rate": 3.287997541109574e-05, + "log_odds_chosen": 9.861529350280762, + "log_odds_ratio": -0.00019614600751083344, + "logits/chosen": -0.6209136247634888, + "logits/rejected": -0.6244215369224548, + "logps/chosen": -0.0006503669428639114, + "logps/rejected": -2.1978304386138916, + "loss": 1.3384, + "nll_loss": 0.33457767963409424, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.503669283119962e-05, + "rewards/margins": 0.21971800923347473, + "rewards/rejected": -0.21978303790092468, + "step": 5902 + }, + { + "epoch": 4.082295988934993, + "grad_norm": 7.011631011962891, + "learning_rate": 3.28761333948056e-05, + "log_odds_chosen": 10.310678482055664, + "log_odds_ratio": -5.392678576754406e-05, + "logits/chosen": -0.10984927415847778, + "logits/rejected": -0.21680112183094025, + "logps/chosen": -0.0005354200839065015, + "logps/rejected": -2.361393928527832, + "loss": 0.8099, + "nll_loss": 0.20247748494148254, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.354200766305439e-05, + "rewards/margins": 0.23608583211898804, + "rewards/rejected": -0.23613938689231873, + "step": 5903 + }, + { + "epoch": 4.08298755186722, + "grad_norm": 6.9156718254089355, + "learning_rate": 3.2872291378515445e-05, + "log_odds_chosen": 9.817750930786133, + "log_odds_ratio": -0.0004971225862391293, + "logits/chosen": -0.2331920862197876, + "logits/rejected": -0.341392457485199, + "logps/chosen": -0.0009805553127080202, + "logps/rejected": -2.3136556148529053, + "loss": 1.1062, + "nll_loss": 0.2765083611011505, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.805553418118507e-05, + "rewards/margins": 0.231267511844635, + "rewards/rejected": -0.23136556148529053, + "step": 5904 + }, + { + "epoch": 4.0836791147994465, + "grad_norm": 8.643861770629883, + "learning_rate": 3.28684493622253e-05, + "log_odds_chosen": 10.287995338439941, + "log_odds_ratio": -0.00011583154264371842, + "logits/chosen": -0.13520589470863342, + "logits/rejected": -0.305339515209198, + "logps/chosen": -0.0003736851504072547, + "logps/rejected": -2.2782511711120605, + "loss": 0.934, + "nll_loss": 0.2334843873977661, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.736852158908732e-05, + "rewards/margins": 0.22778773307800293, + "rewards/rejected": -0.2278251051902771, + "step": 5905 + }, + { + "epoch": 4.084370677731673, + "grad_norm": 6.616100788116455, + "learning_rate": 3.286460734593515e-05, + "log_odds_chosen": 9.227767944335938, + "log_odds_ratio": -0.00036859133979305625, + "logits/chosen": -0.5632534623146057, + "logits/rejected": -0.5830227136611938, + "logps/chosen": -0.0007286292966455221, + "logps/rejected": -1.7913846969604492, + "loss": 1.7213, + "nll_loss": 0.43027839064598083, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.286293111974373e-05, + "rewards/margins": 0.17906561493873596, + "rewards/rejected": -0.17913848161697388, + "step": 5906 + }, + { + "epoch": 4.0850622406639, + "grad_norm": 9.714203834533691, + "learning_rate": 3.2860765329644996e-05, + "log_odds_chosen": 10.036661148071289, + "log_odds_ratio": -0.00015963260375428945, + "logits/chosen": -0.545129656791687, + "logits/rejected": -0.6559819579124451, + "logps/chosen": -0.00045840261736884713, + "logps/rejected": -2.1178178787231445, + "loss": 1.0919, + "nll_loss": 0.272967666387558, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.584026100928895e-05, + "rewards/margins": 0.21173596382141113, + "rewards/rejected": -0.2117818146944046, + "step": 5907 + }, + { + "epoch": 4.085753803596127, + "grad_norm": 21.723194122314453, + "learning_rate": 3.285692331335485e-05, + "log_odds_chosen": 10.542654037475586, + "log_odds_ratio": -0.00011348607222316787, + "logits/chosen": -0.46963703632354736, + "logits/rejected": -0.531532347202301, + "logps/chosen": -0.0001715036341920495, + "logps/rejected": -1.9184401035308838, + "loss": 0.9849, + "nll_loss": 0.24622534215450287, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.715036341920495e-05, + "rewards/margins": 0.19182685017585754, + "rewards/rejected": -0.19184401631355286, + "step": 5908 + }, + { + "epoch": 4.086445366528354, + "grad_norm": 8.491382598876953, + "learning_rate": 3.28530812970647e-05, + "log_odds_chosen": 10.362955093383789, + "log_odds_ratio": -8.813407475827262e-05, + "logits/chosen": -0.45842936635017395, + "logits/rejected": -0.5122732520103455, + "logps/chosen": -0.0004604756541084498, + "logps/rejected": -2.116807460784912, + "loss": 0.7397, + "nll_loss": 0.18491125106811523, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6047567593632266e-05, + "rewards/margins": 0.21163472533226013, + "rewards/rejected": -0.21168076992034912, + "step": 5909 + }, + { + "epoch": 4.087136929460581, + "grad_norm": 11.866226196289062, + "learning_rate": 3.284923928077455e-05, + "log_odds_chosen": 9.371916770935059, + "log_odds_ratio": -0.0015452952357009053, + "logits/chosen": -0.6193146705627441, + "logits/rejected": -0.5984625220298767, + "logps/chosen": -0.0016424513887614012, + "logps/rejected": -1.958803415298462, + "loss": 1.8707, + "nll_loss": 0.46751517057418823, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016424513887614012, + "rewards/margins": 0.19571609795093536, + "rewards/rejected": -0.19588035345077515, + "step": 5910 + }, + { + "epoch": 4.087828492392807, + "grad_norm": 16.31894302368164, + "learning_rate": 3.28453972644844e-05, + "log_odds_chosen": 9.167207717895508, + "log_odds_ratio": -0.04448318853974342, + "logits/chosen": -0.7948864698410034, + "logits/rejected": -0.8742651343345642, + "logps/chosen": -0.07551628351211548, + "logps/rejected": -1.8025906085968018, + "loss": 1.9373, + "nll_loss": 0.47987082600593567, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0075516290962696075, + "rewards/margins": 0.1727074384689331, + "rewards/rejected": -0.1802590787410736, + "step": 5911 + }, + { + "epoch": 4.088520055325034, + "grad_norm": 8.925230026245117, + "learning_rate": 3.284155524819426e-05, + "log_odds_chosen": 9.596630096435547, + "log_odds_ratio": -0.0008474764181300998, + "logits/chosen": -0.5018314123153687, + "logits/rejected": -0.5587818026542664, + "logps/chosen": -0.014918200671672821, + "logps/rejected": -1.6959295272827148, + "loss": 0.7926, + "nll_loss": 0.19807027280330658, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014918200904503465, + "rewards/margins": 0.16810114681720734, + "rewards/rejected": -0.1695929616689682, + "step": 5912 + }, + { + "epoch": 4.089211618257261, + "grad_norm": 21.428508758544922, + "learning_rate": 3.2837713231904104e-05, + "log_odds_chosen": 8.879332542419434, + "log_odds_ratio": -0.0006559221656061709, + "logits/chosen": -0.3005560338497162, + "logits/rejected": -0.2967386841773987, + "logps/chosen": -0.0012168160174041986, + "logps/rejected": -1.986401081085205, + "loss": 1.3465, + "nll_loss": 0.33656826615333557, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012168160901637748, + "rewards/margins": 0.19851842522621155, + "rewards/rejected": -0.1986401081085205, + "step": 5913 + }, + { + "epoch": 4.089903181189488, + "grad_norm": 11.837137222290039, + "learning_rate": 3.2833871215613956e-05, + "log_odds_chosen": 8.946540832519531, + "log_odds_ratio": -0.0020058578811585903, + "logits/chosen": -0.2927224636077881, + "logits/rejected": -0.3152255117893219, + "logps/chosen": -0.011459745466709137, + "logps/rejected": -1.322188377380371, + "loss": 1.6239, + "nll_loss": 0.4057755768299103, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001145974500104785, + "rewards/margins": 0.13107284903526306, + "rewards/rejected": -0.1322188377380371, + "step": 5914 + }, + { + "epoch": 4.090594744121715, + "grad_norm": 6.9727559089660645, + "learning_rate": 3.283002919932381e-05, + "log_odds_chosen": 8.786075592041016, + "log_odds_ratio": -0.000865088019054383, + "logits/chosen": -0.30533480644226074, + "logits/rejected": -0.37163785099983215, + "logps/chosen": -0.0008568483171984553, + "logps/rejected": -1.5327558517456055, + "loss": 1.0, + "nll_loss": 0.24990104138851166, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.568483463022858e-05, + "rewards/margins": 0.15318989753723145, + "rewards/rejected": -0.15327557921409607, + "step": 5915 + }, + { + "epoch": 4.091286307053942, + "grad_norm": 12.23672866821289, + "learning_rate": 3.282618718303366e-05, + "log_odds_chosen": 10.677852630615234, + "log_odds_ratio": -0.00016602696268819273, + "logits/chosen": -0.635785698890686, + "logits/rejected": -0.6920471787452698, + "logps/chosen": -0.0003736467915587127, + "logps/rejected": -2.306975841522217, + "loss": 1.0924, + "nll_loss": 0.273092120885849, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7364679883467034e-05, + "rewards/margins": 0.23066022992134094, + "rewards/rejected": -0.2306976020336151, + "step": 5916 + }, + { + "epoch": 4.091977869986168, + "grad_norm": 8.536312103271484, + "learning_rate": 3.2822345166743507e-05, + "log_odds_chosen": 8.663702011108398, + "log_odds_ratio": -0.00042895443039014935, + "logits/chosen": -0.5982036590576172, + "logits/rejected": -0.6158012747764587, + "logps/chosen": -0.0025946851819753647, + "logps/rejected": -1.8932663202285767, + "loss": 1.1592, + "nll_loss": 0.2897559702396393, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002594685065560043, + "rewards/margins": 0.18906715512275696, + "rewards/rejected": -0.18932661414146423, + "step": 5917 + }, + { + "epoch": 4.092669432918395, + "grad_norm": 5.972506046295166, + "learning_rate": 3.281850315045336e-05, + "log_odds_chosen": 10.543180465698242, + "log_odds_ratio": -6.71078814775683e-05, + "logits/chosen": -0.05855588614940643, + "logits/rejected": -0.2059129923582077, + "logps/chosen": -0.0003192424192093313, + "logps/rejected": -2.2500996589660645, + "loss": 1.1449, + "nll_loss": 0.2862182855606079, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.192424264852889e-05, + "rewards/margins": 0.22497805953025818, + "rewards/rejected": -0.2250099778175354, + "step": 5918 + }, + { + "epoch": 4.093360995850622, + "grad_norm": 14.27978515625, + "learning_rate": 3.281466113416321e-05, + "log_odds_chosen": 10.131693840026855, + "log_odds_ratio": -0.00028685503639280796, + "logits/chosen": -0.45370054244995117, + "logits/rejected": -0.4926850497722626, + "logps/chosen": -0.00024568854132667184, + "logps/rejected": -1.7987271547317505, + "loss": 1.5017, + "nll_loss": 0.3754034638404846, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.456885704305023e-05, + "rewards/margins": 0.17984813451766968, + "rewards/rejected": -0.17987270653247833, + "step": 5919 + }, + { + "epoch": 4.094052558782849, + "grad_norm": 9.181960105895996, + "learning_rate": 3.281081911787306e-05, + "log_odds_chosen": 10.21621322631836, + "log_odds_ratio": -0.0005674352869391441, + "logits/chosen": -0.40525904297828674, + "logits/rejected": -0.4408324360847473, + "logps/chosen": -0.0017286634538322687, + "logps/rejected": -2.215144634246826, + "loss": 0.9703, + "nll_loss": 0.24252784252166748, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017286634829360992, + "rewards/margins": 0.22134160995483398, + "rewards/rejected": -0.22151446342468262, + "step": 5920 + }, + { + "epoch": 4.094744121715076, + "grad_norm": 8.669586181640625, + "learning_rate": 3.2806977101582916e-05, + "log_odds_chosen": 8.907610893249512, + "log_odds_ratio": -0.00045416090870276093, + "logits/chosen": -0.30150651931762695, + "logits/rejected": -0.3959370255470276, + "logps/chosen": -0.0006600356427952647, + "logps/rejected": -1.5230951309204102, + "loss": 1.2002, + "nll_loss": 0.3000123202800751, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6003565734718e-05, + "rewards/margins": 0.15224352478981018, + "rewards/rejected": -0.15230952203273773, + "step": 5921 + }, + { + "epoch": 4.095435684647303, + "grad_norm": 8.433770179748535, + "learning_rate": 3.280313508529276e-05, + "log_odds_chosen": 8.238189697265625, + "log_odds_ratio": -0.09835493564605713, + "logits/chosen": -0.3608969748020172, + "logits/rejected": -0.45487987995147705, + "logps/chosen": -0.014181341975927353, + "logps/rejected": -1.7871878147125244, + "loss": 1.357, + "nll_loss": 0.32940763235092163, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0014181341975927353, + "rewards/margins": 0.17730064690113068, + "rewards/rejected": -0.17871877551078796, + "step": 5922 + }, + { + "epoch": 4.096127247579529, + "grad_norm": 10.358454704284668, + "learning_rate": 3.2799293069002614e-05, + "log_odds_chosen": 10.86201286315918, + "log_odds_ratio": -6.974166899453849e-05, + "logits/chosen": -0.3564392924308777, + "logits/rejected": -0.44083625078201294, + "logps/chosen": -0.0003634452586993575, + "logps/rejected": -2.568542957305908, + "loss": 1.2318, + "nll_loss": 0.3079310953617096, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.63445287803188e-05, + "rewards/margins": 0.2568179666996002, + "rewards/rejected": -0.2568542957305908, + "step": 5923 + }, + { + "epoch": 4.096818810511756, + "grad_norm": 7.985740661621094, + "learning_rate": 3.279545105271247e-05, + "log_odds_chosen": 9.483537673950195, + "log_odds_ratio": -0.0002802881645038724, + "logits/chosen": -0.5317890644073486, + "logits/rejected": -0.5612384676933289, + "logps/chosen": -0.0015085403574630618, + "logps/rejected": -1.8522597551345825, + "loss": 1.7163, + "nll_loss": 0.4290497601032257, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001508540299255401, + "rewards/margins": 0.18507513403892517, + "rewards/rejected": -0.1852259635925293, + "step": 5924 + }, + { + "epoch": 4.097510373443983, + "grad_norm": 34.76710891723633, + "learning_rate": 3.279160903642232e-05, + "log_odds_chosen": 7.234254360198975, + "log_odds_ratio": -0.29028746485710144, + "logits/chosen": -0.2311570942401886, + "logits/rejected": -0.24773958325386047, + "logps/chosen": -0.03140858933329582, + "logps/rejected": -1.6474460363388062, + "loss": 1.4262, + "nll_loss": 0.3275095820426941, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0031408590730279684, + "rewards/margins": 0.16160376369953156, + "rewards/rejected": -0.16474461555480957, + "step": 5925 + }, + { + "epoch": 4.09820193637621, + "grad_norm": 8.7240571975708, + "learning_rate": 3.2787767020132165e-05, + "log_odds_chosen": 9.578914642333984, + "log_odds_ratio": -0.000270306714810431, + "logits/chosen": -0.7370679974555969, + "logits/rejected": -0.8224969506263733, + "logps/chosen": -0.002277072286233306, + "logps/rejected": -1.888828992843628, + "loss": 1.3327, + "nll_loss": 0.33315569162368774, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00022770720534026623, + "rewards/margins": 0.18865519762039185, + "rewards/rejected": -0.18888290226459503, + "step": 5926 + }, + { + "epoch": 4.098893499308437, + "grad_norm": 11.401165008544922, + "learning_rate": 3.278392500384202e-05, + "log_odds_chosen": 10.467416763305664, + "log_odds_ratio": -9.527485963189974e-05, + "logits/chosen": -0.3944201171398163, + "logits/rejected": -0.48259276151657104, + "logps/chosen": -0.0002736057504080236, + "logps/rejected": -2.027223587036133, + "loss": 1.4882, + "nll_loss": 0.37202945351600647, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.736057467700448e-05, + "rewards/margins": 0.20269499719142914, + "rewards/rejected": -0.20272235572338104, + "step": 5927 + }, + { + "epoch": 4.0995850622406635, + "grad_norm": 10.183175086975098, + "learning_rate": 3.278008298755187e-05, + "log_odds_chosen": 9.941876411437988, + "log_odds_ratio": -0.019247492775321007, + "logits/chosen": -0.6036807298660278, + "logits/rejected": -0.6116605997085571, + "logps/chosen": -0.00579418009147048, + "logps/rejected": -1.7697887420654297, + "loss": 0.8289, + "nll_loss": 0.20528829097747803, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005794180324301124, + "rewards/margins": 0.17639945447444916, + "rewards/rejected": -0.17697887122631073, + "step": 5928 + }, + { + "epoch": 4.10027662517289, + "grad_norm": 13.983654975891113, + "learning_rate": 3.2776240971261716e-05, + "log_odds_chosen": 10.866411209106445, + "log_odds_ratio": -4.261564026819542e-05, + "logits/chosen": -0.7931185960769653, + "logits/rejected": -0.857774555683136, + "logps/chosen": -0.0002357493358431384, + "logps/rejected": -2.452587127685547, + "loss": 1.4298, + "nll_loss": 0.35744524002075195, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.357493394811172e-05, + "rewards/margins": 0.2452351450920105, + "rewards/rejected": -0.24525870382785797, + "step": 5929 + }, + { + "epoch": 4.100968188105117, + "grad_norm": 12.813995361328125, + "learning_rate": 3.2772398954971575e-05, + "log_odds_chosen": 9.736326217651367, + "log_odds_ratio": -0.002923061139881611, + "logits/chosen": -0.6696832180023193, + "logits/rejected": -0.7407538890838623, + "logps/chosen": -0.01896968111395836, + "logps/rejected": -2.72799015045166, + "loss": 1.2213, + "nll_loss": 0.3050318658351898, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001896967994980514, + "rewards/margins": 0.27090203762054443, + "rewards/rejected": -0.272799015045166, + "step": 5930 + }, + { + "epoch": 4.101659751037344, + "grad_norm": 8.693153381347656, + "learning_rate": 3.276855693868142e-05, + "log_odds_chosen": 10.148609161376953, + "log_odds_ratio": -0.00025045208167284727, + "logits/chosen": -0.9423700571060181, + "logits/rejected": -0.9496661424636841, + "logps/chosen": -0.00045394038897939026, + "logps/rejected": -1.9736984968185425, + "loss": 1.135, + "nll_loss": 0.28372570872306824, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.539404108072631e-05, + "rewards/margins": 0.1973244547843933, + "rewards/rejected": -0.19736984372138977, + "step": 5931 + }, + { + "epoch": 4.102351313969571, + "grad_norm": 9.80392837524414, + "learning_rate": 3.276471492239127e-05, + "log_odds_chosen": 9.37426471710205, + "log_odds_ratio": -0.0005990730132907629, + "logits/chosen": -0.48925837874412537, + "logits/rejected": -0.5344066023826599, + "logps/chosen": -0.009337247349321842, + "logps/rejected": -1.8132996559143066, + "loss": 1.218, + "nll_loss": 0.30442941188812256, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009337248047813773, + "rewards/margins": 0.18039622902870178, + "rewards/rejected": -0.18132996559143066, + "step": 5932 + }, + { + "epoch": 4.103042876901798, + "grad_norm": 9.872029304504395, + "learning_rate": 3.2760872906101125e-05, + "log_odds_chosen": 9.1129150390625, + "log_odds_ratio": -0.0005607136990875006, + "logits/chosen": -0.6379801630973816, + "logits/rejected": -0.7192505598068237, + "logps/chosen": -0.002543968614190817, + "logps/rejected": -1.6058814525604248, + "loss": 1.0869, + "nll_loss": 0.27167174220085144, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002543968439567834, + "rewards/margins": 0.1603337526321411, + "rewards/rejected": -0.16058816015720367, + "step": 5933 + }, + { + "epoch": 4.1037344398340245, + "grad_norm": 12.728527069091797, + "learning_rate": 3.275703088981098e-05, + "log_odds_chosen": 9.624202728271484, + "log_odds_ratio": -0.001561567303724587, + "logits/chosen": -0.6482115983963013, + "logits/rejected": -0.6900525093078613, + "logps/chosen": -0.004731070715934038, + "logps/rejected": -2.31296443939209, + "loss": 1.2704, + "nll_loss": 0.3174405097961426, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004731070948764682, + "rewards/margins": 0.2308233380317688, + "rewards/rejected": -0.23129644989967346, + "step": 5934 + }, + { + "epoch": 4.104426002766251, + "grad_norm": 9.520516395568848, + "learning_rate": 3.2753188873520823e-05, + "log_odds_chosen": 8.726845741271973, + "log_odds_ratio": -0.0048818690702319145, + "logits/chosen": -0.7764834761619568, + "logits/rejected": -0.813412070274353, + "logps/chosen": -0.009942354634404182, + "logps/rejected": -1.928078293800354, + "loss": 1.0051, + "nll_loss": 0.25079038739204407, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000994235510006547, + "rewards/margins": 0.19181358814239502, + "rewards/rejected": -0.19280782341957092, + "step": 5935 + }, + { + "epoch": 4.105117565698478, + "grad_norm": 11.137310981750488, + "learning_rate": 3.2749346857230676e-05, + "log_odds_chosen": 9.733144760131836, + "log_odds_ratio": -0.0005869278684258461, + "logits/chosen": -0.8129700422286987, + "logits/rejected": -0.8991367816925049, + "logps/chosen": -0.0008950646151788533, + "logps/rejected": -2.1298701763153076, + "loss": 1.359, + "nll_loss": 0.339703768491745, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.950645860750228e-05, + "rewards/margins": 0.21289752423763275, + "rewards/rejected": -0.2129870355129242, + "step": 5936 + }, + { + "epoch": 4.105809128630705, + "grad_norm": 9.725931167602539, + "learning_rate": 3.274550484094053e-05, + "log_odds_chosen": 9.57811164855957, + "log_odds_ratio": -0.0005339644267223775, + "logits/chosen": -0.5569449663162231, + "logits/rejected": -0.5920923948287964, + "logps/chosen": -0.0014632527017965913, + "logps/rejected": -1.5597307682037354, + "loss": 0.9906, + "nll_loss": 0.24760675430297852, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014632527017965913, + "rewards/margins": 0.15582674741744995, + "rewards/rejected": -0.15597307682037354, + "step": 5937 + }, + { + "epoch": 4.106500691562932, + "grad_norm": 11.63660717010498, + "learning_rate": 3.2741662824650374e-05, + "log_odds_chosen": 8.343074798583984, + "log_odds_ratio": -0.06201335787773132, + "logits/chosen": -0.5703570246696472, + "logits/rejected": -0.5562098026275635, + "logps/chosen": -0.023394770920276642, + "logps/rejected": -1.5619940757751465, + "loss": 1.1968, + "nll_loss": 0.2929861843585968, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002339476952329278, + "rewards/margins": 0.1538599133491516, + "rewards/rejected": -0.1561993956565857, + "step": 5938 + }, + { + "epoch": 4.107192254495159, + "grad_norm": 6.6649956703186035, + "learning_rate": 3.273782080836023e-05, + "log_odds_chosen": 8.792590141296387, + "log_odds_ratio": -0.002866230206564069, + "logits/chosen": -0.7098940014839172, + "logits/rejected": -0.7736493349075317, + "logps/chosen": -0.0035133520141243935, + "logps/rejected": -1.5739152431488037, + "loss": 1.0678, + "nll_loss": 0.2666718661785126, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00035133521305397153, + "rewards/margins": 0.15704019367694855, + "rewards/rejected": -0.1573915332555771, + "step": 5939 + }, + { + "epoch": 4.1078838174273855, + "grad_norm": 10.560246467590332, + "learning_rate": 3.273397879207008e-05, + "log_odds_chosen": 8.729288101196289, + "log_odds_ratio": -0.016969040036201477, + "logits/chosen": -0.3396521210670471, + "logits/rejected": -0.41665220260620117, + "logps/chosen": -0.05379095673561096, + "logps/rejected": -2.278257369995117, + "loss": 0.9527, + "nll_loss": 0.23648518323898315, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005379095207899809, + "rewards/margins": 0.22244666516780853, + "rewards/rejected": -0.22782574594020844, + "step": 5940 + }, + { + "epoch": 4.108575380359612, + "grad_norm": 13.353851318359375, + "learning_rate": 3.273013677577993e-05, + "log_odds_chosen": 8.815929412841797, + "log_odds_ratio": -0.0002879203821066767, + "logits/chosen": -0.40576013922691345, + "logits/rejected": -0.5046470165252686, + "logps/chosen": -0.0004096684278920293, + "logps/rejected": -1.1543430089950562, + "loss": 1.2258, + "nll_loss": 0.30642008781433105, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.096684278920293e-05, + "rewards/margins": 0.11539334058761597, + "rewards/rejected": -0.11543430387973785, + "step": 5941 + }, + { + "epoch": 4.109266943291839, + "grad_norm": 8.844964027404785, + "learning_rate": 3.2726294759489784e-05, + "log_odds_chosen": 9.867124557495117, + "log_odds_ratio": -0.00035422889050096273, + "logits/chosen": -0.3862367868423462, + "logits/rejected": -0.43661126494407654, + "logps/chosen": -0.0004143910191487521, + "logps/rejected": -1.955275535583496, + "loss": 1.4467, + "nll_loss": 0.36163824796676636, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.143910337006673e-05, + "rewards/margins": 0.1954861283302307, + "rewards/rejected": -0.1955275535583496, + "step": 5942 + }, + { + "epoch": 4.109958506224066, + "grad_norm": 9.529248237609863, + "learning_rate": 3.2722452743199636e-05, + "log_odds_chosen": 8.813203811645508, + "log_odds_ratio": -0.004686347208917141, + "logits/chosen": -0.21849806606769562, + "logits/rejected": -0.3129451274871826, + "logps/chosen": -0.003993101883679628, + "logps/rejected": -2.194714307785034, + "loss": 1.1136, + "nll_loss": 0.2779342830181122, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00039931017090566456, + "rewards/margins": 0.2190721482038498, + "rewards/rejected": -0.21947146952152252, + "step": 5943 + }, + { + "epoch": 4.110650069156293, + "grad_norm": 7.093349456787109, + "learning_rate": 3.271861072690948e-05, + "log_odds_chosen": 8.87593936920166, + "log_odds_ratio": -0.0005173565004952252, + "logits/chosen": -0.6296029686927795, + "logits/rejected": -0.6486787796020508, + "logps/chosen": -0.0005817078636027873, + "logps/rejected": -1.1000468730926514, + "loss": 1.2732, + "nll_loss": 0.31825223565101624, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.817079363623634e-05, + "rewards/margins": 0.10994651913642883, + "rewards/rejected": -0.11000467836856842, + "step": 5944 + }, + { + "epoch": 4.11134163208852, + "grad_norm": 12.155810356140137, + "learning_rate": 3.2714768710619334e-05, + "log_odds_chosen": 9.416703224182129, + "log_odds_ratio": -0.1775832176208496, + "logits/chosen": -0.5456818342208862, + "logits/rejected": -0.7242247462272644, + "logps/chosen": -0.028735145926475525, + "logps/rejected": -2.180703639984131, + "loss": 1.2973, + "nll_loss": 0.3065558075904846, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0028735145460814238, + "rewards/margins": 0.2151968777179718, + "rewards/rejected": -0.2180703729391098, + "step": 5945 + }, + { + "epoch": 4.1120331950207465, + "grad_norm": 6.187151908874512, + "learning_rate": 3.271092669432919e-05, + "log_odds_chosen": 9.89011287689209, + "log_odds_ratio": -0.00014637774438597262, + "logits/chosen": -0.4840518832206726, + "logits/rejected": -0.5275214910507202, + "logps/chosen": -0.0006061262683942914, + "logps/rejected": -2.1634793281555176, + "loss": 0.7992, + "nll_loss": 0.19979476928710938, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.061262683942914e-05, + "rewards/margins": 0.21628734469413757, + "rewards/rejected": -0.21634796261787415, + "step": 5946 + }, + { + "epoch": 4.112724757952973, + "grad_norm": 9.527305603027344, + "learning_rate": 3.270708467803903e-05, + "log_odds_chosen": 10.183591842651367, + "log_odds_ratio": -0.00022850897221360356, + "logits/chosen": -0.7199068665504456, + "logits/rejected": -0.7739315629005432, + "logps/chosen": -0.010009121149778366, + "logps/rejected": -2.580620288848877, + "loss": 1.4392, + "nll_loss": 0.35976576805114746, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010009119287133217, + "rewards/margins": 0.25706109404563904, + "rewards/rejected": -0.2580620050430298, + "step": 5947 + }, + { + "epoch": 4.1134163208852, + "grad_norm": 16.418354034423828, + "learning_rate": 3.270324266174889e-05, + "log_odds_chosen": 9.409860610961914, + "log_odds_ratio": -0.0008784055826254189, + "logits/chosen": -0.1743004024028778, + "logits/rejected": -0.29258230328559875, + "logps/chosen": -0.0021755893249064684, + "logps/rejected": -1.563657522201538, + "loss": 1.2913, + "nll_loss": 0.32274794578552246, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00021755897614639252, + "rewards/margins": 0.15614819526672363, + "rewards/rejected": -0.1563657522201538, + "step": 5948 + }, + { + "epoch": 4.114107883817427, + "grad_norm": 6.429084777832031, + "learning_rate": 3.269940064545874e-05, + "log_odds_chosen": 7.539295196533203, + "log_odds_ratio": -0.026218703016638756, + "logits/chosen": -0.5419434309005737, + "logits/rejected": -0.573211133480072, + "logps/chosen": -0.018203264102339745, + "logps/rejected": -1.1337330341339111, + "loss": 0.9326, + "nll_loss": 0.2305281162261963, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018203264335170388, + "rewards/margins": 0.11155297607183456, + "rewards/rejected": -0.113373301923275, + "step": 5949 + }, + { + "epoch": 4.114799446749654, + "grad_norm": 7.231356143951416, + "learning_rate": 3.269555862916859e-05, + "log_odds_chosen": 9.106095314025879, + "log_odds_ratio": -0.03225746005773544, + "logits/chosen": -0.3878445327281952, + "logits/rejected": -0.4633275866508484, + "logps/chosen": -0.008714303374290466, + "logps/rejected": -1.9162147045135498, + "loss": 1.1961, + "nll_loss": 0.29580968618392944, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008714303257875144, + "rewards/margins": 0.19075004756450653, + "rewards/rejected": -0.19162148237228394, + "step": 5950 + }, + { + "epoch": 4.115491009681881, + "grad_norm": 9.443856239318848, + "learning_rate": 3.269171661287844e-05, + "log_odds_chosen": 7.6959004402160645, + "log_odds_ratio": -0.04945721477270126, + "logits/chosen": -0.49826863408088684, + "logits/rejected": -0.5104885101318359, + "logps/chosen": -0.013496211729943752, + "logps/rejected": -2.165086507797241, + "loss": 1.1163, + "nll_loss": 0.27412450313568115, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013496209867298603, + "rewards/margins": 0.21515902876853943, + "rewards/rejected": -0.2165086567401886, + "step": 5951 + }, + { + "epoch": 4.1161825726141075, + "grad_norm": 9.486602783203125, + "learning_rate": 3.2687874596588295e-05, + "log_odds_chosen": 9.971977233886719, + "log_odds_ratio": -0.007441829890012741, + "logits/chosen": -0.46394485235214233, + "logits/rejected": -0.5294165015220642, + "logps/chosen": -0.0038535038474947214, + "logps/rejected": -2.2446508407592773, + "loss": 1.1127, + "nll_loss": 0.27743101119995117, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003853504022117704, + "rewards/margins": 0.22407975792884827, + "rewards/rejected": -0.22446510195732117, + "step": 5952 + }, + { + "epoch": 4.116874135546334, + "grad_norm": 7.0303635597229, + "learning_rate": 3.268403258029814e-05, + "log_odds_chosen": 9.403999328613281, + "log_odds_ratio": -0.000792883918620646, + "logits/chosen": -0.4152238368988037, + "logits/rejected": -0.4461762607097626, + "logps/chosen": -0.0014086526352912188, + "logps/rejected": -1.9261103868484497, + "loss": 1.0918, + "nll_loss": 0.27288228273391724, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014086527517065406, + "rewards/margins": 0.19247017800807953, + "rewards/rejected": -0.19261103868484497, + "step": 5953 + }, + { + "epoch": 4.117565698478561, + "grad_norm": 9.143287658691406, + "learning_rate": 3.268019056400799e-05, + "log_odds_chosen": 8.954416275024414, + "log_odds_ratio": -0.0022243014536798, + "logits/chosen": -0.4166088104248047, + "logits/rejected": -0.5308199524879456, + "logps/chosen": -0.0026472746394574642, + "logps/rejected": -1.9135946035385132, + "loss": 1.0367, + "nll_loss": 0.25895246863365173, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002647274814080447, + "rewards/margins": 0.19109472632408142, + "rewards/rejected": -0.19135946035385132, + "step": 5954 + }, + { + "epoch": 4.118257261410788, + "grad_norm": 12.52299976348877, + "learning_rate": 3.2676348547717845e-05, + "log_odds_chosen": 10.522491455078125, + "log_odds_ratio": -5.735613376600668e-05, + "logits/chosen": -0.7633590698242188, + "logits/rejected": -0.8200657367706299, + "logps/chosen": -0.0006438453565351665, + "logps/rejected": -2.771487236022949, + "loss": 1.3546, + "nll_loss": 0.3386405110359192, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.438454147428274e-05, + "rewards/margins": 0.2770843505859375, + "rewards/rejected": -0.2771487236022949, + "step": 5955 + }, + { + "epoch": 4.118948824343015, + "grad_norm": 9.641544342041016, + "learning_rate": 3.267250653142769e-05, + "log_odds_chosen": 9.222244262695312, + "log_odds_ratio": -0.0002797696506604552, + "logits/chosen": -0.4564547836780548, + "logits/rejected": -0.47546225786209106, + "logps/chosen": -0.0338638573884964, + "logps/rejected": -2.6891396045684814, + "loss": 1.0424, + "nll_loss": 0.26056718826293945, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00338638573884964, + "rewards/margins": 0.2655275762081146, + "rewards/rejected": -0.26891398429870605, + "step": 5956 + }, + { + "epoch": 4.119640387275242, + "grad_norm": 8.429539680480957, + "learning_rate": 3.266866451513755e-05, + "log_odds_chosen": 9.318424224853516, + "log_odds_ratio": -0.0015697049675509334, + "logits/chosen": -0.891433835029602, + "logits/rejected": -0.9093539714813232, + "logps/chosen": -0.023992005735635757, + "logps/rejected": -2.0142667293548584, + "loss": 1.6457, + "nll_loss": 0.41125792264938354, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002399200340732932, + "rewards/margins": 0.19902749359607697, + "rewards/rejected": -0.2014266848564148, + "step": 5957 + }, + { + "epoch": 4.1203319502074685, + "grad_norm": 7.106624126434326, + "learning_rate": 3.2664822498847396e-05, + "log_odds_chosen": 10.787242889404297, + "log_odds_ratio": -3.572547575458884e-05, + "logits/chosen": -0.33771446347236633, + "logits/rejected": -0.4578153192996979, + "logps/chosen": -0.00013065329403616488, + "logps/rejected": -1.9293015003204346, + "loss": 1.1003, + "nll_loss": 0.275082528591156, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.306533067690907e-05, + "rewards/margins": 0.1929170936346054, + "rewards/rejected": -0.1929301619529724, + "step": 5958 + }, + { + "epoch": 4.121023513139695, + "grad_norm": 10.316193580627441, + "learning_rate": 3.266098048255725e-05, + "log_odds_chosen": 9.866926193237305, + "log_odds_ratio": -0.00028171919984743, + "logits/chosen": -0.833706259727478, + "logits/rejected": -0.9109463691711426, + "logps/chosen": -0.0019655900541692972, + "logps/rejected": -2.0055766105651855, + "loss": 1.1965, + "nll_loss": 0.299089640378952, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019655900541692972, + "rewards/margins": 0.20036110281944275, + "rewards/rejected": -0.2005576640367508, + "step": 5959 + }, + { + "epoch": 4.121715076071922, + "grad_norm": 13.061858177185059, + "learning_rate": 3.26571384662671e-05, + "log_odds_chosen": 8.520166397094727, + "log_odds_ratio": -0.007341983262449503, + "logits/chosen": -0.25020256638526917, + "logits/rejected": -0.357946515083313, + "logps/chosen": -0.017194848507642746, + "logps/rejected": -1.7626746892929077, + "loss": 1.01, + "nll_loss": 0.25176340341567993, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017194848041981459, + "rewards/margins": 0.1745479702949524, + "rewards/rejected": -0.17626747488975525, + "step": 5960 + }, + { + "epoch": 4.122406639004149, + "grad_norm": 12.465418815612793, + "learning_rate": 3.265329644997695e-05, + "log_odds_chosen": 9.764495849609375, + "log_odds_ratio": -0.00030658257310278714, + "logits/chosen": -0.5246774554252625, + "logits/rejected": -0.5476590991020203, + "logps/chosen": -0.0007004796643741429, + "logps/rejected": -1.784714698791504, + "loss": 1.1105, + "nll_loss": 0.2775907516479492, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.004797225818038e-05, + "rewards/margins": 0.1784014254808426, + "rewards/rejected": -0.17847149074077606, + "step": 5961 + }, + { + "epoch": 4.123098201936376, + "grad_norm": 6.7162556648254395, + "learning_rate": 3.26494544336868e-05, + "log_odds_chosen": 9.411355018615723, + "log_odds_ratio": -0.008453583344817162, + "logits/chosen": -0.456272155046463, + "logits/rejected": -0.41793495416641235, + "logps/chosen": -0.0042070625349879265, + "logps/rejected": -1.43600594997406, + "loss": 1.748, + "nll_loss": 0.4361457824707031, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00042070625931955874, + "rewards/margins": 0.14317987859249115, + "rewards/rejected": -0.14360059797763824, + "step": 5962 + }, + { + "epoch": 4.123789764868603, + "grad_norm": 13.096224784851074, + "learning_rate": 3.264561241739665e-05, + "log_odds_chosen": 8.157176971435547, + "log_odds_ratio": -0.2519817650318146, + "logits/chosen": -0.47622549533843994, + "logits/rejected": -0.492038756608963, + "logps/chosen": -0.04018167033791542, + "logps/rejected": -1.1400073766708374, + "loss": 1.2235, + "nll_loss": 0.2806675136089325, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004018167033791542, + "rewards/margins": 0.10998257249593735, + "rewards/rejected": -0.11400073766708374, + "step": 5963 + }, + { + "epoch": 4.124481327800829, + "grad_norm": 12.945378303527832, + "learning_rate": 3.2641770401106504e-05, + "log_odds_chosen": 8.599679946899414, + "log_odds_ratio": -0.26467394828796387, + "logits/chosen": -0.2246679961681366, + "logits/rejected": -0.31600359082221985, + "logps/chosen": -0.0332237184047699, + "logps/rejected": -2.024475574493408, + "loss": 0.8778, + "nll_loss": 0.1929716169834137, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0033223717473447323, + "rewards/margins": 0.19912518560886383, + "rewards/rejected": -0.2024475634098053, + "step": 5964 + }, + { + "epoch": 4.125172890733056, + "grad_norm": 5.979857444763184, + "learning_rate": 3.263792838481635e-05, + "log_odds_chosen": 10.021464347839355, + "log_odds_ratio": -0.002926250221207738, + "logits/chosen": -0.4512186050415039, + "logits/rejected": -0.5685581564903259, + "logps/chosen": -0.0035476628690958023, + "logps/rejected": -1.887199878692627, + "loss": 0.9868, + "nll_loss": 0.2464103400707245, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00035476626362651587, + "rewards/margins": 0.18836522102355957, + "rewards/rejected": -0.1887200027704239, + "step": 5965 + }, + { + "epoch": 4.125864453665283, + "grad_norm": 7.337460994720459, + "learning_rate": 3.263408636852621e-05, + "log_odds_chosen": 9.19233512878418, + "log_odds_ratio": -0.0003316085785627365, + "logits/chosen": -0.7004691958427429, + "logits/rejected": -0.7161996960639954, + "logps/chosen": -0.0005070206825621426, + "logps/rejected": -1.1151583194732666, + "loss": 1.3041, + "nll_loss": 0.32599785923957825, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.070206680102274e-05, + "rewards/margins": 0.11146514117717743, + "rewards/rejected": -0.11151584982872009, + "step": 5966 + }, + { + "epoch": 4.12655601659751, + "grad_norm": 9.277113914489746, + "learning_rate": 3.2630244352236054e-05, + "log_odds_chosen": 9.497467041015625, + "log_odds_ratio": -0.00043352588545531034, + "logits/chosen": -0.5457701683044434, + "logits/rejected": -0.6004650592803955, + "logps/chosen": -0.001261774217709899, + "logps/rejected": -1.639230489730835, + "loss": 1.5775, + "nll_loss": 0.3943275809288025, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001261774159502238, + "rewards/margins": 0.16379685699939728, + "rewards/rejected": -0.16392304003238678, + "step": 5967 + }, + { + "epoch": 4.127247579529737, + "grad_norm": 8.806751251220703, + "learning_rate": 3.262640233594591e-05, + "log_odds_chosen": 9.815351486206055, + "log_odds_ratio": -0.00011921973782591522, + "logits/chosen": -0.5170784592628479, + "logits/rejected": -0.5860993266105652, + "logps/chosen": -0.00905107706785202, + "logps/rejected": -3.105226993560791, + "loss": 1.6434, + "nll_loss": 0.41084566712379456, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009051076485775411, + "rewards/margins": 0.3096176087856293, + "rewards/rejected": -0.31052273511886597, + "step": 5968 + }, + { + "epoch": 4.127939142461964, + "grad_norm": 10.547689437866211, + "learning_rate": 3.262256031965576e-05, + "log_odds_chosen": 10.503625869750977, + "log_odds_ratio": -4.8341156798414886e-05, + "logits/chosen": -0.7468377947807312, + "logits/rejected": -0.8255564570426941, + "logps/chosen": -0.0004737896961160004, + "logps/rejected": -1.9888479709625244, + "loss": 1.3947, + "nll_loss": 0.34866786003112793, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.737896961160004e-05, + "rewards/margins": 0.19883739948272705, + "rewards/rejected": -0.1988847851753235, + "step": 5969 + }, + { + "epoch": 4.12863070539419, + "grad_norm": 8.894291877746582, + "learning_rate": 3.261871830336561e-05, + "log_odds_chosen": 8.193218231201172, + "log_odds_ratio": -0.039914391934871674, + "logits/chosen": -0.44322913885116577, + "logits/rejected": -0.5547171831130981, + "logps/chosen": -0.02041376568377018, + "logps/rejected": -1.4175244569778442, + "loss": 1.4973, + "nll_loss": 0.3703390657901764, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002041376894339919, + "rewards/margins": 0.13971105217933655, + "rewards/rejected": -0.1417524367570877, + "step": 5970 + }, + { + "epoch": 4.129322268326418, + "grad_norm": 8.237187385559082, + "learning_rate": 3.261487628707546e-05, + "log_odds_chosen": 11.154354095458984, + "log_odds_ratio": -4.79549344163388e-05, + "logits/chosen": -0.7316851019859314, + "logits/rejected": -0.7923774123191833, + "logps/chosen": -0.00024489694624207914, + "logps/rejected": -2.307917594909668, + "loss": 0.8898, + "nll_loss": 0.2224467545747757, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4489694624207914e-05, + "rewards/margins": 0.23076725006103516, + "rewards/rejected": -0.23079174757003784, + "step": 5971 + }, + { + "epoch": 4.130013831258645, + "grad_norm": 7.732690334320068, + "learning_rate": 3.261103427078531e-05, + "log_odds_chosen": 9.728826522827148, + "log_odds_ratio": -0.00044432198046706617, + "logits/chosen": -1.0977262258529663, + "logits/rejected": -1.1038000583648682, + "logps/chosen": -0.008239268325269222, + "logps/rejected": -2.0336647033691406, + "loss": 1.7029, + "nll_loss": 0.425682008266449, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008239267626777291, + "rewards/margins": 0.2025425285100937, + "rewards/rejected": -0.2033664733171463, + "step": 5972 + }, + { + "epoch": 4.130705394190872, + "grad_norm": 11.485577583312988, + "learning_rate": 3.260719225449516e-05, + "log_odds_chosen": 8.480979919433594, + "log_odds_ratio": -0.021923229098320007, + "logits/chosen": -0.4793844223022461, + "logits/rejected": -0.4833126366138458, + "logps/chosen": -0.006414573173969984, + "logps/rejected": -1.7797949314117432, + "loss": 1.2513, + "nll_loss": 0.310627281665802, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006414573872461915, + "rewards/margins": 0.17733803391456604, + "rewards/rejected": -0.1779794991016388, + "step": 5973 + }, + { + "epoch": 4.131396957123099, + "grad_norm": 10.626336097717285, + "learning_rate": 3.260335023820501e-05, + "log_odds_chosen": 9.561973571777344, + "log_odds_ratio": -0.010487427935004234, + "logits/chosen": -0.9466665983200073, + "logits/rejected": -0.9845972061157227, + "logps/chosen": -0.0037271876353770494, + "logps/rejected": -1.9134477376937866, + "loss": 1.0319, + "nll_loss": 0.25691381096839905, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003727187868207693, + "rewards/margins": 0.19097205996513367, + "rewards/rejected": -0.19134476780891418, + "step": 5974 + }, + { + "epoch": 4.1320885200553255, + "grad_norm": 7.434225082397461, + "learning_rate": 3.259950822191487e-05, + "log_odds_chosen": 10.406455993652344, + "log_odds_ratio": -0.0002542786533012986, + "logits/chosen": -0.5065848231315613, + "logits/rejected": -0.5438984632492065, + "logps/chosen": -0.0002799753565341234, + "logps/rejected": -2.0893630981445312, + "loss": 1.3304, + "nll_loss": 0.3325809836387634, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.799753929139115e-05, + "rewards/margins": 0.2089083343744278, + "rewards/rejected": -0.20893631875514984, + "step": 5975 + }, + { + "epoch": 4.132780082987552, + "grad_norm": 43.933433532714844, + "learning_rate": 3.259566620562471e-05, + "log_odds_chosen": 8.503948211669922, + "log_odds_ratio": -0.059916265308856964, + "logits/chosen": -0.5901418328285217, + "logits/rejected": -0.6609665155410767, + "logps/chosen": -0.14128419756889343, + "logps/rejected": -2.0661349296569824, + "loss": 1.0805, + "nll_loss": 0.2641289234161377, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014128419570624828, + "rewards/margins": 0.19248507916927338, + "rewards/rejected": -0.20661349594593048, + "step": 5976 + }, + { + "epoch": 4.133471645919779, + "grad_norm": 14.006975173950195, + "learning_rate": 3.2591824189334565e-05, + "log_odds_chosen": 9.184225082397461, + "log_odds_ratio": -0.007922169752418995, + "logits/chosen": -0.7215189337730408, + "logits/rejected": -0.7678462266921997, + "logps/chosen": -0.004983365070074797, + "logps/rejected": -1.9281487464904785, + "loss": 1.1481, + "nll_loss": 0.2862243950366974, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004983365070074797, + "rewards/margins": 0.19231653213500977, + "rewards/rejected": -0.1928148865699768, + "step": 5977 + }, + { + "epoch": 4.134163208852006, + "grad_norm": 7.512627124786377, + "learning_rate": 3.258798217304442e-05, + "log_odds_chosen": 8.804177284240723, + "log_odds_ratio": -0.0018154741264879704, + "logits/chosen": -0.4251983165740967, + "logits/rejected": -0.4081365466117859, + "logps/chosen": -0.0031050737015902996, + "logps/rejected": -1.4643402099609375, + "loss": 1.2657, + "nll_loss": 0.31624752283096313, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003105073992628604, + "rewards/margins": 0.1461235135793686, + "rewards/rejected": -0.1464340090751648, + "step": 5978 + }, + { + "epoch": 4.134854771784233, + "grad_norm": 8.988731384277344, + "learning_rate": 3.258414015675427e-05, + "log_odds_chosen": 8.137323379516602, + "log_odds_ratio": -0.02129383198916912, + "logits/chosen": -0.7253506779670715, + "logits/rejected": -0.7172971367835999, + "logps/chosen": -0.009386150166392326, + "logps/rejected": -1.9563298225402832, + "loss": 1.0706, + "nll_loss": 0.26553285121917725, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009386150049977005, + "rewards/margins": 0.1946943700313568, + "rewards/rejected": -0.19563297927379608, + "step": 5979 + }, + { + "epoch": 4.13554633471646, + "grad_norm": 10.024322509765625, + "learning_rate": 3.2580298140464116e-05, + "log_odds_chosen": 10.15527057647705, + "log_odds_ratio": -9.017070988193154e-05, + "logits/chosen": -0.5249866843223572, + "logits/rejected": -0.6138657927513123, + "logps/chosen": -0.00031795757240615785, + "logps/rejected": -1.9348928928375244, + "loss": 1.443, + "nll_loss": 0.3607480525970459, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.179576015099883e-05, + "rewards/margins": 0.1934574991464615, + "rewards/rejected": -0.19348928332328796, + "step": 5980 + }, + { + "epoch": 4.136237897648686, + "grad_norm": 9.696584701538086, + "learning_rate": 3.257645612417397e-05, + "log_odds_chosen": 8.965103149414062, + "log_odds_ratio": -0.010557899251580238, + "logits/chosen": -0.49012356996536255, + "logits/rejected": -0.5335181951522827, + "logps/chosen": -0.009183174930512905, + "logps/rejected": -2.1040782928466797, + "loss": 1.2164, + "nll_loss": 0.30304914712905884, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009183174697682261, + "rewards/margins": 0.20948949456214905, + "rewards/rejected": -0.2104078084230423, + "step": 5981 + }, + { + "epoch": 4.136929460580913, + "grad_norm": 9.6136474609375, + "learning_rate": 3.257261410788382e-05, + "log_odds_chosen": 10.920928955078125, + "log_odds_ratio": -4.304420144762844e-05, + "logits/chosen": -0.7003531455993652, + "logits/rejected": -0.7433496713638306, + "logps/chosen": -0.00014741663471795619, + "logps/rejected": -2.029831647872925, + "loss": 0.8727, + "nll_loss": 0.2181766927242279, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4741663107997738e-05, + "rewards/margins": 0.20296841859817505, + "rewards/rejected": -0.20298317074775696, + "step": 5982 + }, + { + "epoch": 4.13762102351314, + "grad_norm": 6.659314155578613, + "learning_rate": 3.2568772091593666e-05, + "log_odds_chosen": 8.73141098022461, + "log_odds_ratio": -0.0003280085220467299, + "logits/chosen": -0.25254154205322266, + "logits/rejected": -0.2507188320159912, + "logps/chosen": -0.0036556655541062355, + "logps/rejected": -1.4496879577636719, + "loss": 1.2142, + "nll_loss": 0.3035261034965515, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00036556654958985746, + "rewards/margins": 0.14460323750972748, + "rewards/rejected": -0.14496880769729614, + "step": 5983 + }, + { + "epoch": 4.138312586445367, + "grad_norm": 9.571125984191895, + "learning_rate": 3.2564930075303525e-05, + "log_odds_chosen": 9.067570686340332, + "log_odds_ratio": -0.0011896053329110146, + "logits/chosen": -0.6112010478973389, + "logits/rejected": -0.6654389500617981, + "logps/chosen": -0.009722664020955563, + "logps/rejected": -2.00520920753479, + "loss": 1.3412, + "nll_loss": 0.33517637848854065, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009722664253786206, + "rewards/margins": 0.1995486617088318, + "rewards/rejected": -0.20052093267440796, + "step": 5984 + }, + { + "epoch": 4.139004149377594, + "grad_norm": 5.55220365524292, + "learning_rate": 3.256108805901337e-05, + "log_odds_chosen": 8.475414276123047, + "log_odds_ratio": -0.026588575914502144, + "logits/chosen": -0.3219301104545593, + "logits/rejected": -0.3104025721549988, + "logps/chosen": -0.008884113281965256, + "logps/rejected": -1.4837077856063843, + "loss": 0.7646, + "nll_loss": 0.18848538398742676, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008884113049134612, + "rewards/margins": 0.14748236536979675, + "rewards/rejected": -0.14837077260017395, + "step": 5985 + }, + { + "epoch": 4.139695712309821, + "grad_norm": 10.565631866455078, + "learning_rate": 3.2557246042723224e-05, + "log_odds_chosen": 10.836447715759277, + "log_odds_ratio": -2.8640048185479827e-05, + "logits/chosen": -0.3206827640533447, + "logits/rejected": -0.4384026825428009, + "logps/chosen": -0.00023758437600918114, + "logps/rejected": -2.334228038787842, + "loss": 1.1158, + "nll_loss": 0.27893880009651184, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3758439056109637e-05, + "rewards/margins": 0.23339903354644775, + "rewards/rejected": -0.23342278599739075, + "step": 5986 + }, + { + "epoch": 4.140387275242047, + "grad_norm": 6.649505138397217, + "learning_rate": 3.2553404026433076e-05, + "log_odds_chosen": 8.939205169677734, + "log_odds_ratio": -0.0010217225644737482, + "logits/chosen": -0.3970273435115814, + "logits/rejected": -0.47130677103996277, + "logps/chosen": -0.0017841707449406385, + "logps/rejected": -1.7476348876953125, + "loss": 1.4555, + "nll_loss": 0.3637797236442566, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017841707449406385, + "rewards/margins": 0.17458505928516388, + "rewards/rejected": -0.17476347088813782, + "step": 5987 + }, + { + "epoch": 4.141078838174274, + "grad_norm": 7.781863689422607, + "learning_rate": 3.254956201014293e-05, + "log_odds_chosen": 9.778545379638672, + "log_odds_ratio": -0.00030032815993763506, + "logits/chosen": -0.5041961669921875, + "logits/rejected": -0.49716609716415405, + "logps/chosen": -0.00019634825002867728, + "logps/rejected": -1.313286304473877, + "loss": 1.7611, + "nll_loss": 0.4402513802051544, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.963482645805925e-05, + "rewards/margins": 0.13130898773670197, + "rewards/rejected": -0.13132862746715546, + "step": 5988 + }, + { + "epoch": 4.141770401106501, + "grad_norm": 7.763805389404297, + "learning_rate": 3.2545719993852774e-05, + "log_odds_chosen": 11.122587203979492, + "log_odds_ratio": -2.156953269150108e-05, + "logits/chosen": -0.4305421710014343, + "logits/rejected": -0.48841428756713867, + "logps/chosen": -0.00044233925291337073, + "logps/rejected": -2.886995315551758, + "loss": 0.9509, + "nll_loss": 0.23773378133773804, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4233926018932834e-05, + "rewards/margins": 0.2886553108692169, + "rewards/rejected": -0.28869953751564026, + "step": 5989 + }, + { + "epoch": 4.142461964038728, + "grad_norm": 9.990039825439453, + "learning_rate": 3.2541877977562627e-05, + "log_odds_chosen": 9.962656021118164, + "log_odds_ratio": -0.00012990219693165272, + "logits/chosen": -0.8702992796897888, + "logits/rejected": -0.8638290166854858, + "logps/chosen": -0.00043330626795068383, + "logps/rejected": -1.8089728355407715, + "loss": 1.1995, + "nll_loss": 0.2998722791671753, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.333062679506838e-05, + "rewards/margins": 0.1808539628982544, + "rewards/rejected": -0.1808972954750061, + "step": 5990 + }, + { + "epoch": 4.143153526970955, + "grad_norm": 8.116752624511719, + "learning_rate": 3.253803596127248e-05, + "log_odds_chosen": 8.72532844543457, + "log_odds_ratio": -0.025739800184965134, + "logits/chosen": -0.43284478783607483, + "logits/rejected": -0.4666554927825928, + "logps/chosen": -0.008339660242199898, + "logps/rejected": -1.546666145324707, + "loss": 0.9343, + "nll_loss": 0.23101186752319336, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008339660707861185, + "rewards/margins": 0.15383264422416687, + "rewards/rejected": -0.15466661751270294, + "step": 5991 + }, + { + "epoch": 4.143845089903182, + "grad_norm": 9.78846263885498, + "learning_rate": 3.2534193944982325e-05, + "log_odds_chosen": 6.673300743103027, + "log_odds_ratio": -0.09282121807336807, + "logits/chosen": -0.6692243814468384, + "logits/rejected": -0.7034744024276733, + "logps/chosen": -0.033201564103364944, + "logps/rejected": -1.3767527341842651, + "loss": 1.9898, + "nll_loss": 0.4881555140018463, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0033201563637703657, + "rewards/margins": 0.13435512781143188, + "rewards/rejected": -0.13767528533935547, + "step": 5992 + }, + { + "epoch": 4.144536652835408, + "grad_norm": 13.220309257507324, + "learning_rate": 3.253035192869218e-05, + "log_odds_chosen": 10.948552131652832, + "log_odds_ratio": -3.2378186006098986e-05, + "logits/chosen": -0.5019373893737793, + "logits/rejected": -0.564258337020874, + "logps/chosen": -0.00020370143465697765, + "logps/rejected": -2.3918418884277344, + "loss": 1.1606, + "nll_loss": 0.2901498079299927, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0370142010506243e-05, + "rewards/margins": 0.2391638308763504, + "rewards/rejected": -0.2391842007637024, + "step": 5993 + }, + { + "epoch": 4.145228215767635, + "grad_norm": 13.55427360534668, + "learning_rate": 3.252650991240203e-05, + "log_odds_chosen": 8.909460067749023, + "log_odds_ratio": -0.0011914423666894436, + "logits/chosen": -0.8773664832115173, + "logits/rejected": -0.8870227336883545, + "logps/chosen": -0.005369079299271107, + "logps/rejected": -1.924501657485962, + "loss": 1.248, + "nll_loss": 0.3118761479854584, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005369078717194498, + "rewards/margins": 0.19191326200962067, + "rewards/rejected": -0.1924501657485962, + "step": 5994 + }, + { + "epoch": 4.145919778699862, + "grad_norm": 12.313704490661621, + "learning_rate": 3.252266789611188e-05, + "log_odds_chosen": 10.855825424194336, + "log_odds_ratio": -3.5704721085494384e-05, + "logits/chosen": -0.8360105156898499, + "logits/rejected": -0.8496856689453125, + "logps/chosen": -0.0002516081731300801, + "logps/rejected": -2.481499671936035, + "loss": 1.0471, + "nll_loss": 0.2617621421813965, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5160818040603772e-05, + "rewards/margins": 0.24812480807304382, + "rewards/rejected": -0.24814999103546143, + "step": 5995 + }, + { + "epoch": 4.146611341632089, + "grad_norm": 8.56289291381836, + "learning_rate": 3.251882587982173e-05, + "log_odds_chosen": 9.165348052978516, + "log_odds_ratio": -0.001278581446968019, + "logits/chosen": -0.679673433303833, + "logits/rejected": -0.7639566659927368, + "logps/chosen": -0.006403455510735512, + "logps/rejected": -1.2529271841049194, + "loss": 1.136, + "nll_loss": 0.28387773036956787, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006403456209227443, + "rewards/margins": 0.12465237081050873, + "rewards/rejected": -0.12529271841049194, + "step": 5996 + }, + { + "epoch": 4.147302904564316, + "grad_norm": 7.371829032897949, + "learning_rate": 3.251498386353159e-05, + "log_odds_chosen": 9.111982345581055, + "log_odds_ratio": -0.0002587471390143037, + "logits/chosen": -0.8423949480056763, + "logits/rejected": -0.976094663143158, + "logps/chosen": -0.01260296069085598, + "logps/rejected": -2.5366899967193604, + "loss": 1.7986, + "nll_loss": 0.4496348798274994, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012602962087839842, + "rewards/margins": 0.2524087131023407, + "rewards/rejected": -0.25366899371147156, + "step": 5997 + }, + { + "epoch": 4.1479944674965425, + "grad_norm": 7.737312316894531, + "learning_rate": 3.251114184724143e-05, + "log_odds_chosen": 10.224498748779297, + "log_odds_ratio": -4.952462040819228e-05, + "logits/chosen": -0.49761393666267395, + "logits/rejected": -0.5714750289916992, + "logps/chosen": -0.000272135715931654, + "logps/rejected": -1.5678317546844482, + "loss": 1.0023, + "nll_loss": 0.2505626082420349, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.721357304835692e-05, + "rewards/margins": 0.1567559540271759, + "rewards/rejected": -0.15678316354751587, + "step": 5998 + }, + { + "epoch": 4.148686030428769, + "grad_norm": 6.962085723876953, + "learning_rate": 3.2507299830951285e-05, + "log_odds_chosen": 8.856086730957031, + "log_odds_ratio": -0.003185291076079011, + "logits/chosen": -0.8304173946380615, + "logits/rejected": -0.8658832311630249, + "logps/chosen": -0.013688081875443459, + "logps/rejected": -1.5797288417816162, + "loss": 1.4232, + "nll_loss": 0.35549092292785645, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013688082108274102, + "rewards/margins": 0.15660406649112701, + "rewards/rejected": -0.15797287225723267, + "step": 5999 + }, + { + "epoch": 4.149377593360996, + "grad_norm": 8.609755516052246, + "learning_rate": 3.250345781466114e-05, + "log_odds_chosen": 8.917791366577148, + "log_odds_ratio": -0.003855043789371848, + "logits/chosen": -0.7871185541152954, + "logits/rejected": -0.8323012590408325, + "logps/chosen": -0.005348569247871637, + "logps/rejected": -1.6735996007919312, + "loss": 1.0782, + "nll_loss": 0.26915350556373596, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005348569247871637, + "rewards/margins": 0.1668251007795334, + "rewards/rejected": -0.16735996305942535, + "step": 6000 + }, + { + "epoch": 4.150069156293223, + "grad_norm": 9.403129577636719, + "learning_rate": 3.249961579837098e-05, + "log_odds_chosen": 10.51172161102295, + "log_odds_ratio": -5.671913459082134e-05, + "logits/chosen": -0.5075657367706299, + "logits/rejected": -0.5350244045257568, + "logps/chosen": -0.0002172726672142744, + "logps/rejected": -2.0582528114318848, + "loss": 0.8376, + "nll_loss": 0.20939427614212036, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1727268176618963e-05, + "rewards/margins": 0.20580355823040009, + "rewards/rejected": -0.20582528412342072, + "step": 6001 + }, + { + "epoch": 4.15076071922545, + "grad_norm": 17.984451293945312, + "learning_rate": 3.2495773782080836e-05, + "log_odds_chosen": 8.329596519470215, + "log_odds_ratio": -0.19380486011505127, + "logits/chosen": -0.691654622554779, + "logits/rejected": -0.7160911560058594, + "logps/chosen": -0.026909837499260902, + "logps/rejected": -1.85858154296875, + "loss": 1.4193, + "nll_loss": 0.33545446395874023, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0026909837033599615, + "rewards/margins": 0.18316717445850372, + "rewards/rejected": -0.18585816025733948, + "step": 6002 + }, + { + "epoch": 4.151452282157677, + "grad_norm": 13.927128791809082, + "learning_rate": 3.249193176579069e-05, + "log_odds_chosen": 9.670804023742676, + "log_odds_ratio": -0.0007266352185979486, + "logits/chosen": -0.5136786699295044, + "logits/rejected": -0.5342156291007996, + "logps/chosen": -0.0009479423752054572, + "logps/rejected": -2.0707037448883057, + "loss": 1.089, + "nll_loss": 0.2721821069717407, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.479424625169486e-05, + "rewards/margins": 0.20697560906410217, + "rewards/rejected": -0.20707038044929504, + "step": 6003 + }, + { + "epoch": 4.1521438450899035, + "grad_norm": 10.575544357299805, + "learning_rate": 3.248808974950054e-05, + "log_odds_chosen": 10.414182662963867, + "log_odds_ratio": -0.00011947475286433473, + "logits/chosen": -0.3136584162712097, + "logits/rejected": -0.35366952419281006, + "logps/chosen": -0.004044681787490845, + "logps/rejected": -2.415982961654663, + "loss": 1.3192, + "nll_loss": 0.3297957181930542, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00040446818456985056, + "rewards/margins": 0.24119384586811066, + "rewards/rejected": -0.24159829318523407, + "step": 6004 + }, + { + "epoch": 4.15283540802213, + "grad_norm": 8.583888053894043, + "learning_rate": 3.2484247733210386e-05, + "log_odds_chosen": 10.239829063415527, + "log_odds_ratio": -0.0005477681988850236, + "logits/chosen": -0.6634764671325684, + "logits/rejected": -0.7279617786407471, + "logps/chosen": -0.008838672190904617, + "logps/rejected": -2.911771297454834, + "loss": 0.7103, + "nll_loss": 0.17753028869628906, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008838672074489295, + "rewards/margins": 0.29029324650764465, + "rewards/rejected": -0.2911771535873413, + "step": 6005 + }, + { + "epoch": 4.153526970954357, + "grad_norm": 14.97877311706543, + "learning_rate": 3.2480405716920245e-05, + "log_odds_chosen": 10.376019477844238, + "log_odds_ratio": -0.002244369825348258, + "logits/chosen": -0.4436874985694885, + "logits/rejected": -0.4845767021179199, + "logps/chosen": -0.0003641161310952157, + "logps/rejected": -2.0352377891540527, + "loss": 1.0946, + "nll_loss": 0.27343636751174927, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.641161310952157e-05, + "rewards/margins": 0.203487366437912, + "rewards/rejected": -0.20352376997470856, + "step": 6006 + }, + { + "epoch": 4.154218533886584, + "grad_norm": 7.991260051727295, + "learning_rate": 3.247656370063009e-05, + "log_odds_chosen": 10.304679870605469, + "log_odds_ratio": -0.00010443619976285845, + "logits/chosen": -0.498263418674469, + "logits/rejected": -0.5627723336219788, + "logps/chosen": -0.0007239045226015151, + "logps/rejected": -1.9947481155395508, + "loss": 0.8668, + "nll_loss": 0.21668842434883118, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.23904522601515e-05, + "rewards/margins": 0.19940242171287537, + "rewards/rejected": -0.19947482645511627, + "step": 6007 + }, + { + "epoch": 4.154910096818811, + "grad_norm": 11.769681930541992, + "learning_rate": 3.2472721684339943e-05, + "log_odds_chosen": 9.573690414428711, + "log_odds_ratio": -0.0007894306909292936, + "logits/chosen": -0.8522692322731018, + "logits/rejected": -0.971172034740448, + "logps/chosen": -0.002234040992334485, + "logps/rejected": -1.9229815006256104, + "loss": 2.2819, + "nll_loss": 0.5703853964805603, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00022340410214383155, + "rewards/margins": 0.1920747309923172, + "rewards/rejected": -0.19229814410209656, + "step": 6008 + }, + { + "epoch": 4.155601659751038, + "grad_norm": 7.188262939453125, + "learning_rate": 3.2468879668049796e-05, + "log_odds_chosen": 8.566203117370605, + "log_odds_ratio": -0.0005539363482967019, + "logits/chosen": -0.6005150675773621, + "logits/rejected": -0.7028006315231323, + "logps/chosen": -0.0007507450645789504, + "logps/rejected": -1.3089596033096313, + "loss": 0.7489, + "nll_loss": 0.18715739250183105, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.507450936827809e-05, + "rewards/margins": 0.1308208703994751, + "rewards/rejected": -0.1308959573507309, + "step": 6009 + }, + { + "epoch": 4.1562932226832645, + "grad_norm": 10.46820068359375, + "learning_rate": 3.246503765175964e-05, + "log_odds_chosen": 9.431066513061523, + "log_odds_ratio": -0.0006439237622544169, + "logits/chosen": -0.2439686357975006, + "logits/rejected": -0.3215882182121277, + "logps/chosen": -0.0037022149190306664, + "logps/rejected": -1.7961797714233398, + "loss": 1.076, + "nll_loss": 0.26894134283065796, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003702215035445988, + "rewards/margins": 0.17924776673316956, + "rewards/rejected": -0.1796179711818695, + "step": 6010 + }, + { + "epoch": 4.156984785615491, + "grad_norm": 8.864563941955566, + "learning_rate": 3.2461195635469494e-05, + "log_odds_chosen": 9.370769500732422, + "log_odds_ratio": -0.0011887021828442812, + "logits/chosen": -0.9394341707229614, + "logits/rejected": -1.0180696249008179, + "logps/chosen": -0.0038085738196969032, + "logps/rejected": -2.3024024963378906, + "loss": 1.4105, + "nll_loss": 0.35250598192214966, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003808573819696903, + "rewards/margins": 0.22985941171646118, + "rewards/rejected": -0.23024027049541473, + "step": 6011 + }, + { + "epoch": 4.157676348547718, + "grad_norm": 6.366844177246094, + "learning_rate": 3.2457353619179346e-05, + "log_odds_chosen": 9.5271635055542, + "log_odds_ratio": -0.0001840710174292326, + "logits/chosen": -0.537406325340271, + "logits/rejected": -0.5141395330429077, + "logps/chosen": -0.0017021159874275327, + "logps/rejected": -2.4246888160705566, + "loss": 0.9852, + "nll_loss": 0.24628528952598572, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001702116132946685, + "rewards/margins": 0.2422986775636673, + "rewards/rejected": -0.24246887862682343, + "step": 6012 + }, + { + "epoch": 4.158367911479945, + "grad_norm": 16.682363510131836, + "learning_rate": 3.24535116028892e-05, + "log_odds_chosen": 10.383573532104492, + "log_odds_ratio": -8.243302727350965e-05, + "logits/chosen": -0.6444257497787476, + "logits/rejected": -0.6103654503822327, + "logps/chosen": -0.0012489922810345888, + "logps/rejected": -2.256441593170166, + "loss": 0.7474, + "nll_loss": 0.18683573603630066, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012489923392422497, + "rewards/margins": 0.22551925480365753, + "rewards/rejected": -0.22564415633678436, + "step": 6013 + }, + { + "epoch": 4.159059474412172, + "grad_norm": 11.687685012817383, + "learning_rate": 3.2449669586599045e-05, + "log_odds_chosen": 10.128734588623047, + "log_odds_ratio": -0.0001414440048392862, + "logits/chosen": -0.9767643809318542, + "logits/rejected": -1.0264098644256592, + "logps/chosen": -0.0003028717765118927, + "logps/rejected": -1.7673587799072266, + "loss": 1.4607, + "nll_loss": 0.3651718199253082, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0287175832199864e-05, + "rewards/margins": 0.17670559883117676, + "rewards/rejected": -0.17673589289188385, + "step": 6014 + }, + { + "epoch": 4.159751037344399, + "grad_norm": 8.603378295898438, + "learning_rate": 3.2445827570308904e-05, + "log_odds_chosen": 10.377304077148438, + "log_odds_ratio": -8.300002082251012e-05, + "logits/chosen": -0.7923702001571655, + "logits/rejected": -0.8099848031997681, + "logps/chosen": -0.0005034460918977857, + "logps/rejected": -2.1220457553863525, + "loss": 0.7945, + "nll_loss": 0.19862057268619537, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0344613555353135e-05, + "rewards/margins": 0.21215423941612244, + "rewards/rejected": -0.21220457553863525, + "step": 6015 + }, + { + "epoch": 4.1604426002766255, + "grad_norm": 8.598065376281738, + "learning_rate": 3.244198555401875e-05, + "log_odds_chosen": 10.105928421020508, + "log_odds_ratio": -8.371302101295441e-05, + "logits/chosen": -0.9319807887077332, + "logits/rejected": -0.9607030749320984, + "logps/chosen": -0.0002785869291983545, + "logps/rejected": -1.7898892164230347, + "loss": 1.0916, + "nll_loss": 0.2728910744190216, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7858695830218494e-05, + "rewards/margins": 0.17896106839179993, + "rewards/rejected": -0.17898890376091003, + "step": 6016 + }, + { + "epoch": 4.161134163208852, + "grad_norm": 8.079885482788086, + "learning_rate": 3.24381435377286e-05, + "log_odds_chosen": 10.28216552734375, + "log_odds_ratio": -0.00011916200310224667, + "logits/chosen": -0.5735573768615723, + "logits/rejected": -0.6151795983314514, + "logps/chosen": -0.00511480076238513, + "logps/rejected": -2.6346943378448486, + "loss": 1.3447, + "nll_loss": 0.33617204427719116, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005114800878800452, + "rewards/margins": 0.26295799016952515, + "rewards/rejected": -0.2634694576263428, + "step": 6017 + }, + { + "epoch": 4.161825726141079, + "grad_norm": 6.784422874450684, + "learning_rate": 3.2434301521438454e-05, + "log_odds_chosen": 9.672140121459961, + "log_odds_ratio": -0.12526734173297882, + "logits/chosen": -0.12474574148654938, + "logits/rejected": -0.20955440402030945, + "logps/chosen": -0.019273709505796432, + "logps/rejected": -1.6844278573989868, + "loss": 1.4583, + "nll_loss": 0.35204043984413147, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0019273711368441582, + "rewards/margins": 0.16651540994644165, + "rewards/rejected": -0.16844278573989868, + "step": 6018 + }, + { + "epoch": 4.162517289073306, + "grad_norm": 7.922595500946045, + "learning_rate": 3.24304595051483e-05, + "log_odds_chosen": 7.988283157348633, + "log_odds_ratio": -0.019988220185041428, + "logits/chosen": -0.6160153150558472, + "logits/rejected": -0.6896790862083435, + "logps/chosen": -0.014414435252547264, + "logps/rejected": -1.2084660530090332, + "loss": 1.2632, + "nll_loss": 0.31379175186157227, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014414435718208551, + "rewards/margins": 0.11940516531467438, + "rewards/rejected": -0.12084661424160004, + "step": 6019 + }, + { + "epoch": 4.163208852005533, + "grad_norm": 6.944870948791504, + "learning_rate": 3.242661748885815e-05, + "log_odds_chosen": 9.153783798217773, + "log_odds_ratio": -0.005235993769019842, + "logits/chosen": -0.6062872409820557, + "logits/rejected": -0.6818249821662903, + "logps/chosen": -0.00888950563967228, + "logps/rejected": -1.3861886262893677, + "loss": 1.1206, + "nll_loss": 0.27963629364967346, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008889504824765027, + "rewards/margins": 0.13772991299629211, + "rewards/rejected": -0.1386188566684723, + "step": 6020 + }, + { + "epoch": 4.16390041493776, + "grad_norm": 8.618224143981934, + "learning_rate": 3.2422775472568005e-05, + "log_odds_chosen": 8.96036148071289, + "log_odds_ratio": -0.002292029093950987, + "logits/chosen": -0.6807083487510681, + "logits/rejected": -0.7891625165939331, + "logps/chosen": -0.035568371415138245, + "logps/rejected": -2.684354066848755, + "loss": 0.9246, + "nll_loss": 0.23092928528785706, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0035568373277783394, + "rewards/margins": 0.26487860083580017, + "rewards/rejected": -0.26843541860580444, + "step": 6021 + }, + { + "epoch": 4.1645919778699865, + "grad_norm": 8.75467300415039, + "learning_rate": 3.241893345627786e-05, + "log_odds_chosen": 10.150123596191406, + "log_odds_ratio": -0.00011984707089141011, + "logits/chosen": -0.8578680157661438, + "logits/rejected": -0.9064348936080933, + "logps/chosen": -0.00032188548357225955, + "logps/rejected": -1.9927043914794922, + "loss": 1.2899, + "nll_loss": 0.32247376441955566, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2188549084821716e-05, + "rewards/margins": 0.19923825562000275, + "rewards/rejected": -0.19927042722702026, + "step": 6022 + }, + { + "epoch": 4.165283540802213, + "grad_norm": 8.196616172790527, + "learning_rate": 3.24150914399877e-05, + "log_odds_chosen": 9.288247108459473, + "log_odds_ratio": -0.00015818187966942787, + "logits/chosen": -0.5348951816558838, + "logits/rejected": -0.49097248911857605, + "logps/chosen": -0.0005929345497861505, + "logps/rejected": -1.3997464179992676, + "loss": 1.2029, + "nll_loss": 0.3007069528102875, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9293452068232e-05, + "rewards/margins": 0.1399153620004654, + "rewards/rejected": -0.1399746537208557, + "step": 6023 + }, + { + "epoch": 4.16597510373444, + "grad_norm": 11.910942077636719, + "learning_rate": 3.241124942369756e-05, + "log_odds_chosen": 10.133342742919922, + "log_odds_ratio": -0.00024989733356051147, + "logits/chosen": -0.22623278200626373, + "logits/rejected": -0.28069326281547546, + "logps/chosen": -0.0007148812874220312, + "logps/rejected": -1.95946204662323, + "loss": 1.1296, + "nll_loss": 0.28237199783325195, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.148813165258616e-05, + "rewards/margins": 0.19587473571300507, + "rewards/rejected": -0.19594621658325195, + "step": 6024 + }, + { + "epoch": 4.166666666666667, + "grad_norm": 9.333582878112793, + "learning_rate": 3.240740740740741e-05, + "log_odds_chosen": 9.219890594482422, + "log_odds_ratio": -0.0006107841618359089, + "logits/chosen": -0.5428364872932434, + "logits/rejected": -0.5398375988006592, + "logps/chosen": -0.001833610818721354, + "logps/rejected": -1.6555602550506592, + "loss": 1.1237, + "nll_loss": 0.28086787462234497, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018336107314098626, + "rewards/margins": 0.16537266969680786, + "rewards/rejected": -0.16555601358413696, + "step": 6025 + }, + { + "epoch": 4.167358229598894, + "grad_norm": 7.693150997161865, + "learning_rate": 3.240356539111726e-05, + "log_odds_chosen": 10.226139068603516, + "log_odds_ratio": -9.946373756974936e-05, + "logits/chosen": -0.5391544699668884, + "logits/rejected": -0.47298184037208557, + "logps/chosen": -0.00015591408009640872, + "logps/rejected": -1.5290985107421875, + "loss": 1.9471, + "nll_loss": 0.486769437789917, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5591409464832395e-05, + "rewards/margins": 0.1528942734003067, + "rewards/rejected": -0.15290986001491547, + "step": 6026 + }, + { + "epoch": 4.168049792531121, + "grad_norm": 9.954532623291016, + "learning_rate": 3.239972337482711e-05, + "log_odds_chosen": 9.627957344055176, + "log_odds_ratio": -0.0001280440337723121, + "logits/chosen": -0.47433945536613464, + "logits/rejected": -0.519112229347229, + "logps/chosen": -0.0002949607733171433, + "logps/rejected": -1.4962272644042969, + "loss": 0.9205, + "nll_loss": 0.23010030388832092, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9496077331714332e-05, + "rewards/margins": 0.14959323406219482, + "rewards/rejected": -0.14962273836135864, + "step": 6027 + }, + { + "epoch": 4.1687413554633475, + "grad_norm": 7.624865531921387, + "learning_rate": 3.239588135853696e-05, + "log_odds_chosen": 9.83966064453125, + "log_odds_ratio": -0.0001383407216053456, + "logits/chosen": -0.6526281237602234, + "logits/rejected": -0.7059519290924072, + "logps/chosen": -0.0007061379146762192, + "logps/rejected": -1.6809070110321045, + "loss": 1.1578, + "nll_loss": 0.2894425392150879, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.061379437800497e-05, + "rewards/margins": 0.168020099401474, + "rewards/rejected": -0.16809071600437164, + "step": 6028 + }, + { + "epoch": 4.169432918395574, + "grad_norm": 12.61556339263916, + "learning_rate": 3.239203934224681e-05, + "log_odds_chosen": 8.602940559387207, + "log_odds_ratio": -0.0024029456544667482, + "logits/chosen": -0.789209246635437, + "logits/rejected": -0.879492461681366, + "logps/chosen": -0.04383961856365204, + "logps/rejected": -1.9742767810821533, + "loss": 1.3313, + "nll_loss": 0.3325907588005066, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004383962135761976, + "rewards/margins": 0.19304370880126953, + "rewards/rejected": -0.1974276900291443, + "step": 6029 + }, + { + "epoch": 4.170124481327801, + "grad_norm": 11.348913192749023, + "learning_rate": 3.238819732595666e-05, + "log_odds_chosen": 10.986472129821777, + "log_odds_ratio": -0.00034344103187322617, + "logits/chosen": -0.9471420049667358, + "logits/rejected": -0.9723103046417236, + "logps/chosen": -0.012529644183814526, + "logps/rejected": -2.8672900199890137, + "loss": 1.6722, + "nll_loss": 0.41800376772880554, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012529646046459675, + "rewards/margins": 0.28547605872154236, + "rewards/rejected": -0.28672903776168823, + "step": 6030 + }, + { + "epoch": 4.170816044260028, + "grad_norm": 11.74988079071045, + "learning_rate": 3.2384355309666516e-05, + "log_odds_chosen": 10.14430046081543, + "log_odds_ratio": -6.812495121266693e-05, + "logits/chosen": -0.7544984817504883, + "logits/rejected": -0.7648979425430298, + "logps/chosen": -0.0002719055919442326, + "logps/rejected": -1.9476408958435059, + "loss": 1.1991, + "nll_loss": 0.29976290464401245, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7190561013412662e-05, + "rewards/margins": 0.19473689794540405, + "rewards/rejected": -0.19476407766342163, + "step": 6031 + }, + { + "epoch": 4.171507607192255, + "grad_norm": 7.633362770080566, + "learning_rate": 3.238051329337636e-05, + "log_odds_chosen": 10.356497764587402, + "log_odds_ratio": -0.0013323475141078234, + "logits/chosen": -0.45199599862098694, + "logits/rejected": -0.5082833766937256, + "logps/chosen": -0.0002783353556878865, + "logps/rejected": -1.9102857112884521, + "loss": 1.0295, + "nll_loss": 0.25723761320114136, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7833535568788648e-05, + "rewards/margins": 0.19100074470043182, + "rewards/rejected": -0.19102856516838074, + "step": 6032 + }, + { + "epoch": 4.172199170124482, + "grad_norm": 19.848237991333008, + "learning_rate": 3.237667127708622e-05, + "log_odds_chosen": 9.494232177734375, + "log_odds_ratio": -0.0001813523704186082, + "logits/chosen": -0.8049036264419556, + "logits/rejected": -0.9353877902030945, + "logps/chosen": -0.0007863644859753549, + "logps/rejected": -1.993652105331421, + "loss": 1.5769, + "nll_loss": 0.3941996991634369, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.863643986638635e-05, + "rewards/margins": 0.1992865949869156, + "rewards/rejected": -0.19936522841453552, + "step": 6033 + }, + { + "epoch": 4.172890733056708, + "grad_norm": 11.86107349395752, + "learning_rate": 3.2372829260796066e-05, + "log_odds_chosen": 10.998078346252441, + "log_odds_ratio": -2.3432841771864332e-05, + "logits/chosen": -0.5509462952613831, + "logits/rejected": -0.6317671537399292, + "logps/chosen": -0.00016442319611087441, + "logps/rejected": -2.290289878845215, + "loss": 1.177, + "nll_loss": 0.29424944519996643, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6442319974885322e-05, + "rewards/margins": 0.22901256382465363, + "rewards/rejected": -0.22902899980545044, + "step": 6034 + }, + { + "epoch": 4.173582295988935, + "grad_norm": 7.371344089508057, + "learning_rate": 3.236898724450592e-05, + "log_odds_chosen": 10.621955871582031, + "log_odds_ratio": -0.0003117120068054646, + "logits/chosen": -0.45636072754859924, + "logits/rejected": -0.5469639897346497, + "logps/chosen": -0.00021510719670914114, + "logps/rejected": -2.3043928146362305, + "loss": 1.0254, + "nll_loss": 0.25631406903266907, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1510721126105636e-05, + "rewards/margins": 0.23041778802871704, + "rewards/rejected": -0.23043929040431976, + "step": 6035 + }, + { + "epoch": 4.174273858921162, + "grad_norm": 6.2735419273376465, + "learning_rate": 3.236514522821577e-05, + "log_odds_chosen": 9.205724716186523, + "log_odds_ratio": -0.004912311211228371, + "logits/chosen": -0.6135444641113281, + "logits/rejected": -0.5729051828384399, + "logps/chosen": -0.027633341029286385, + "logps/rejected": -1.7607975006103516, + "loss": 0.9219, + "nll_loss": 0.22998502850532532, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002763334196060896, + "rewards/margins": 0.17331641912460327, + "rewards/rejected": -0.17607976496219635, + "step": 6036 + }, + { + "epoch": 4.174965421853389, + "grad_norm": 6.859205722808838, + "learning_rate": 3.236130321192562e-05, + "log_odds_chosen": 7.7524919509887695, + "log_odds_ratio": -0.002604874549433589, + "logits/chosen": -0.7248696684837341, + "logits/rejected": -0.8061745762825012, + "logps/chosen": -0.0017519703833386302, + "logps/rejected": -1.1978111267089844, + "loss": 1.1287, + "nll_loss": 0.28192347288131714, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017519704124424607, + "rewards/margins": 0.1196059063076973, + "rewards/rejected": -0.11978110671043396, + "step": 6037 + }, + { + "epoch": 4.175656984785616, + "grad_norm": 8.528666496276855, + "learning_rate": 3.235746119563547e-05, + "log_odds_chosen": 9.001119613647461, + "log_odds_ratio": -0.005455498117953539, + "logits/chosen": -0.4776851534843445, + "logits/rejected": -0.5432164669036865, + "logps/chosen": -0.005602252669632435, + "logps/rejected": -1.5672550201416016, + "loss": 1.2448, + "nll_loss": 0.3106645941734314, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005602252203971148, + "rewards/margins": 0.15616527199745178, + "rewards/rejected": -0.15672549605369568, + "step": 6038 + }, + { + "epoch": 4.176348547717843, + "grad_norm": 8.460310935974121, + "learning_rate": 3.235361917934532e-05, + "log_odds_chosen": 10.947973251342773, + "log_odds_ratio": -2.6163972506765276e-05, + "logits/chosen": -0.5510140061378479, + "logits/rejected": -0.45879802107810974, + "logps/chosen": -0.002526005730032921, + "logps/rejected": -2.913262128829956, + "loss": 1.3312, + "nll_loss": 0.33279237151145935, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00025260058464482427, + "rewards/margins": 0.29107362031936646, + "rewards/rejected": -0.29132622480392456, + "step": 6039 + }, + { + "epoch": 4.177040110650069, + "grad_norm": 6.418223857879639, + "learning_rate": 3.2349777163055174e-05, + "log_odds_chosen": 8.813223838806152, + "log_odds_ratio": -0.0008950755000114441, + "logits/chosen": -0.4318494498729706, + "logits/rejected": -0.4654315114021301, + "logps/chosen": -0.009978784248232841, + "logps/rejected": -2.0951032638549805, + "loss": 1.0086, + "nll_loss": 0.252058744430542, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009978783782571554, + "rewards/margins": 0.20851242542266846, + "rewards/rejected": -0.20951034128665924, + "step": 6040 + }, + { + "epoch": 4.177731673582296, + "grad_norm": 13.69494342803955, + "learning_rate": 3.234593514676502e-05, + "log_odds_chosen": 8.5537748336792, + "log_odds_ratio": -0.04404618591070175, + "logits/chosen": -0.49824628233909607, + "logits/rejected": -0.5926130414009094, + "logps/chosen": -0.011127600446343422, + "logps/rejected": -1.970362901687622, + "loss": 1.3995, + "nll_loss": 0.3454715609550476, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011127600446343422, + "rewards/margins": 0.19592353701591492, + "rewards/rejected": -0.19703629612922668, + "step": 6041 + }, + { + "epoch": 4.178423236514523, + "grad_norm": 14.875024795532227, + "learning_rate": 3.234209313047488e-05, + "log_odds_chosen": 9.159958839416504, + "log_odds_ratio": -0.011396014131605625, + "logits/chosen": -0.36483901739120483, + "logits/rejected": -0.4657962918281555, + "logps/chosen": -0.012494352646172047, + "logps/rejected": -2.1094937324523926, + "loss": 1.254, + "nll_loss": 0.312368243932724, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012494352413341403, + "rewards/margins": 0.20969991385936737, + "rewards/rejected": -0.2109493613243103, + "step": 6042 + }, + { + "epoch": 4.17911479944675, + "grad_norm": 10.145478248596191, + "learning_rate": 3.2338251114184725e-05, + "log_odds_chosen": 10.829462051391602, + "log_odds_ratio": -4.55024819530081e-05, + "logits/chosen": -0.6555365324020386, + "logits/rejected": -0.7552803754806519, + "logps/chosen": -0.0007520442013628781, + "logps/rejected": -2.7278504371643066, + "loss": 1.8365, + "nll_loss": 0.4591206908226013, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.520441431552172e-05, + "rewards/margins": 0.27270984649658203, + "rewards/rejected": -0.2727850377559662, + "step": 6043 + }, + { + "epoch": 4.179806362378977, + "grad_norm": 6.937947750091553, + "learning_rate": 3.233440909789458e-05, + "log_odds_chosen": 10.597908020019531, + "log_odds_ratio": -6.170808774186298e-05, + "logits/chosen": -0.6114860773086548, + "logits/rejected": -0.5894403457641602, + "logps/chosen": -0.004680998623371124, + "logps/rejected": -2.3617563247680664, + "loss": 1.0449, + "nll_loss": 0.2612136900424957, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00046809983905404806, + "rewards/margins": 0.23570753633975983, + "rewards/rejected": -0.23617564141750336, + "step": 6044 + }, + { + "epoch": 4.180497925311204, + "grad_norm": 7.551707744598389, + "learning_rate": 3.233056708160443e-05, + "log_odds_chosen": 10.2476806640625, + "log_odds_ratio": -0.0013617995427921414, + "logits/chosen": -0.7141506671905518, + "logits/rejected": -0.7077866196632385, + "logps/chosen": -0.0011035851202905178, + "logps/rejected": -2.494922637939453, + "loss": 1.4364, + "nll_loss": 0.358967125415802, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011035851639462635, + "rewards/margins": 0.2493819147348404, + "rewards/rejected": -0.24949225783348083, + "step": 6045 + }, + { + "epoch": 4.18118948824343, + "grad_norm": 9.954728126525879, + "learning_rate": 3.2326725065314275e-05, + "log_odds_chosen": 9.69566535949707, + "log_odds_ratio": -0.008394381031394005, + "logits/chosen": -0.4188861846923828, + "logits/rejected": -0.47607100009918213, + "logps/chosen": -0.007114926818758249, + "logps/rejected": -2.03810453414917, + "loss": 1.0229, + "nll_loss": 0.25488823652267456, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007114927866496146, + "rewards/margins": 0.20309896767139435, + "rewards/rejected": -0.203810453414917, + "step": 6046 + }, + { + "epoch": 4.181881051175657, + "grad_norm": 10.269842147827148, + "learning_rate": 3.232288304902413e-05, + "log_odds_chosen": 11.128933906555176, + "log_odds_ratio": -3.3597352739889175e-05, + "logits/chosen": -0.438433974981308, + "logits/rejected": -0.5809503793716431, + "logps/chosen": -0.0003488063521217555, + "logps/rejected": -2.880452871322632, + "loss": 1.9092, + "nll_loss": 0.4772918224334717, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.488063521217555e-05, + "rewards/margins": 0.2880104184150696, + "rewards/rejected": -0.2880452871322632, + "step": 6047 + }, + { + "epoch": 4.182572614107884, + "grad_norm": 7.514199256896973, + "learning_rate": 3.231904103273398e-05, + "log_odds_chosen": 9.966431617736816, + "log_odds_ratio": -0.00013272061187308282, + "logits/chosen": -0.5462458729743958, + "logits/rejected": -0.5374727845191956, + "logps/chosen": -0.00018501865270081908, + "logps/rejected": -1.1819729804992676, + "loss": 0.9363, + "nll_loss": 0.23406431078910828, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8501865270081908e-05, + "rewards/margins": 0.11817879974842072, + "rewards/rejected": -0.11819729954004288, + "step": 6048 + }, + { + "epoch": 4.183264177040111, + "grad_norm": 8.622661590576172, + "learning_rate": 3.231519901644383e-05, + "log_odds_chosen": 10.133877754211426, + "log_odds_ratio": -0.00022927882673684508, + "logits/chosen": -0.3894578218460083, + "logits/rejected": -0.4564048945903778, + "logps/chosen": -0.0002046562294708565, + "logps/rejected": -1.751450777053833, + "loss": 0.9345, + "nll_loss": 0.2336038500070572, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0465622583287768e-05, + "rewards/margins": 0.17512460052967072, + "rewards/rejected": -0.17514505982398987, + "step": 6049 + }, + { + "epoch": 4.183955739972338, + "grad_norm": 8.824128150939941, + "learning_rate": 3.231135700015368e-05, + "log_odds_chosen": 10.109630584716797, + "log_odds_ratio": -9.241971565643325e-05, + "logits/chosen": -0.7996799945831299, + "logits/rejected": -0.868754506111145, + "logps/chosen": -0.0006303410045802593, + "logps/rejected": -1.9893128871917725, + "loss": 1.4641, + "nll_loss": 0.36602720618247986, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.303410191321746e-05, + "rewards/margins": 0.1988682597875595, + "rewards/rejected": -0.19893130660057068, + "step": 6050 + }, + { + "epoch": 4.1846473029045645, + "grad_norm": 7.818981170654297, + "learning_rate": 3.230751498386354e-05, + "log_odds_chosen": 8.395126342773438, + "log_odds_ratio": -0.0010139414807781577, + "logits/chosen": -0.7074143290519714, + "logits/rejected": -0.5708560943603516, + "logps/chosen": -0.006374886259436607, + "logps/rejected": -1.904820203781128, + "loss": 1.7666, + "nll_loss": 0.44153669476509094, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006374885560944676, + "rewards/margins": 0.18984454870224, + "rewards/rejected": -0.1904820203781128, + "step": 6051 + }, + { + "epoch": 4.185338865836791, + "grad_norm": 9.535639762878418, + "learning_rate": 3.230367296757338e-05, + "log_odds_chosen": 9.560710906982422, + "log_odds_ratio": -0.0009351727785542607, + "logits/chosen": -0.7795642018318176, + "logits/rejected": -0.8280699253082275, + "logps/chosen": -0.009284489788115025, + "logps/rejected": -2.2715892791748047, + "loss": 1.5569, + "nll_loss": 0.38913029432296753, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009284489788115025, + "rewards/margins": 0.22623050212860107, + "rewards/rejected": -0.22715893387794495, + "step": 6052 + }, + { + "epoch": 4.186030428769018, + "grad_norm": 10.58439826965332, + "learning_rate": 3.2299830951283236e-05, + "log_odds_chosen": 10.500917434692383, + "log_odds_ratio": -5.0223650760017335e-05, + "logits/chosen": -0.6961247324943542, + "logits/rejected": -0.7314223051071167, + "logps/chosen": -0.000207190663786605, + "logps/rejected": -1.9150999784469604, + "loss": 0.9968, + "nll_loss": 0.2492009401321411, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.071906601486262e-05, + "rewards/margins": 0.1914893090724945, + "rewards/rejected": -0.19151002168655396, + "step": 6053 + }, + { + "epoch": 4.186721991701245, + "grad_norm": 7.235872745513916, + "learning_rate": 3.229598893499309e-05, + "log_odds_chosen": 8.103301048278809, + "log_odds_ratio": -0.026408672332763672, + "logits/chosen": -0.683377742767334, + "logits/rejected": -0.6405139565467834, + "logps/chosen": -0.007550099398940802, + "logps/rejected": -1.4345066547393799, + "loss": 1.1267, + "nll_loss": 0.279028058052063, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007550099398940802, + "rewards/margins": 0.14269566535949707, + "rewards/rejected": -0.14345067739486694, + "step": 6054 + }, + { + "epoch": 4.187413554633472, + "grad_norm": 7.28474760055542, + "learning_rate": 3.2292146918702934e-05, + "log_odds_chosen": 9.414963722229004, + "log_odds_ratio": -0.000253773556323722, + "logits/chosen": -0.6031832098960876, + "logits/rejected": -0.6954954266548157, + "logps/chosen": -0.00045355164911597967, + "logps/rejected": -1.8198481798171997, + "loss": 1.0316, + "nll_loss": 0.2578781247138977, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.535516200121492e-05, + "rewards/margins": 0.18193946778774261, + "rewards/rejected": -0.1819848120212555, + "step": 6055 + }, + { + "epoch": 4.188105117565699, + "grad_norm": 7.456570148468018, + "learning_rate": 3.2288304902412786e-05, + "log_odds_chosen": 9.448654174804688, + "log_odds_ratio": -0.010886706411838531, + "logits/chosen": -0.6217695474624634, + "logits/rejected": -0.5620329976081848, + "logps/chosen": -0.08244097232818604, + "logps/rejected": -1.5050716400146484, + "loss": 0.8834, + "nll_loss": 0.21975077688694, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008244097232818604, + "rewards/margins": 0.14226306974887848, + "rewards/rejected": -0.1505071520805359, + "step": 6056 + }, + { + "epoch": 4.1887966804979255, + "grad_norm": 13.942699432373047, + "learning_rate": 3.228446288612264e-05, + "log_odds_chosen": 10.055791854858398, + "log_odds_ratio": -0.0003354833461344242, + "logits/chosen": -0.6835440397262573, + "logits/rejected": -0.7492095232009888, + "logps/chosen": -0.0004275651299394667, + "logps/rejected": -1.910922646522522, + "loss": 1.3045, + "nll_loss": 0.3260917067527771, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2756510083563626e-05, + "rewards/margins": 0.1910495012998581, + "rewards/rejected": -0.19109223783016205, + "step": 6057 + }, + { + "epoch": 4.189488243430152, + "grad_norm": 6.805962562561035, + "learning_rate": 3.228062086983249e-05, + "log_odds_chosen": 9.221537590026855, + "log_odds_ratio": -0.0026556141674518585, + "logits/chosen": -0.34238967299461365, + "logits/rejected": -0.40595924854278564, + "logps/chosen": -0.011685644276440144, + "logps/rejected": -2.2613413333892822, + "loss": 1.145, + "nll_loss": 0.28597864508628845, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011685644276440144, + "rewards/margins": 0.22496557235717773, + "rewards/rejected": -0.22613412141799927, + "step": 6058 + }, + { + "epoch": 4.190179806362379, + "grad_norm": 9.257378578186035, + "learning_rate": 3.227677885354234e-05, + "log_odds_chosen": 10.462129592895508, + "log_odds_ratio": -0.00010772267705760896, + "logits/chosen": -0.955758810043335, + "logits/rejected": -0.9561267495155334, + "logps/chosen": -0.0004893806180916727, + "logps/rejected": -2.2030954360961914, + "loss": 1.0503, + "nll_loss": 0.26257139444351196, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8938061809167266e-05, + "rewards/margins": 0.2202606201171875, + "rewards/rejected": -0.2203095555305481, + "step": 6059 + }, + { + "epoch": 4.190871369294606, + "grad_norm": 8.771308898925781, + "learning_rate": 3.2272936837252196e-05, + "log_odds_chosen": 8.560272216796875, + "log_odds_ratio": -0.024763397872447968, + "logits/chosen": -0.5189086198806763, + "logits/rejected": -0.5635733008384705, + "logps/chosen": -0.0065836599096655846, + "logps/rejected": -1.3034086227416992, + "loss": 0.9276, + "nll_loss": 0.22943153977394104, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006583660142496228, + "rewards/margins": 0.1296824961900711, + "rewards/rejected": -0.13034087419509888, + "step": 6060 + }, + { + "epoch": 4.191562932226833, + "grad_norm": 7.0978546142578125, + "learning_rate": 3.226909482096204e-05, + "log_odds_chosen": 8.697175979614258, + "log_odds_ratio": -0.00700350059196353, + "logits/chosen": -0.8847252130508423, + "logits/rejected": -0.8667929172515869, + "logps/chosen": -0.00935453362762928, + "logps/rejected": -2.0996615886688232, + "loss": 0.8984, + "nll_loss": 0.22390399873256683, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009354533394798636, + "rewards/margins": 0.20903068780899048, + "rewards/rejected": -0.20996615290641785, + "step": 6061 + }, + { + "epoch": 4.19225449515906, + "grad_norm": 7.424405097961426, + "learning_rate": 3.2265252804671894e-05, + "log_odds_chosen": 8.875204086303711, + "log_odds_ratio": -0.001359203364700079, + "logits/chosen": -0.4519699811935425, + "logits/rejected": -0.39701735973358154, + "logps/chosen": -0.02748076431453228, + "logps/rejected": -2.3076295852661133, + "loss": 1.6804, + "nll_loss": 0.4199597239494324, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0027480763383209705, + "rewards/margins": 0.22801488637924194, + "rewards/rejected": -0.23076295852661133, + "step": 6062 + }, + { + "epoch": 4.1929460580912865, + "grad_norm": 11.73322582244873, + "learning_rate": 3.2261410788381746e-05, + "log_odds_chosen": 7.903585910797119, + "log_odds_ratio": -0.0016246134182438254, + "logits/chosen": -0.2822548449039459, + "logits/rejected": -0.40047261118888855, + "logps/chosen": -0.0024944001343101263, + "logps/rejected": -1.2181968688964844, + "loss": 1.5106, + "nll_loss": 0.3774777948856354, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002494400250725448, + "rewards/margins": 0.12157024443149567, + "rewards/rejected": -0.12181967496871948, + "step": 6063 + }, + { + "epoch": 4.193637621023513, + "grad_norm": 13.860588073730469, + "learning_rate": 3.225756877209159e-05, + "log_odds_chosen": 11.175796508789062, + "log_odds_ratio": -2.1426389139378443e-05, + "logits/chosen": -0.8253310918807983, + "logits/rejected": -0.8749798536300659, + "logps/chosen": -0.0001329722290392965, + "logps/rejected": -2.177316665649414, + "loss": 1.5405, + "nll_loss": 0.3851134181022644, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3297223631525412e-05, + "rewards/margins": 0.21771836280822754, + "rewards/rejected": -0.21773165464401245, + "step": 6064 + }, + { + "epoch": 4.19432918395574, + "grad_norm": 6.953963279724121, + "learning_rate": 3.2253726755801445e-05, + "log_odds_chosen": 9.380655288696289, + "log_odds_ratio": -0.0009703417308628559, + "logits/chosen": -0.09669845551252365, + "logits/rejected": -0.08982955664396286, + "logps/chosen": -0.0030628573149442673, + "logps/rejected": -1.5637767314910889, + "loss": 1.3549, + "nll_loss": 0.33862051367759705, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003062857431359589, + "rewards/margins": 0.15607139468193054, + "rewards/rejected": -0.1563776731491089, + "step": 6065 + }, + { + "epoch": 4.195020746887967, + "grad_norm": 8.322160720825195, + "learning_rate": 3.22498847395113e-05, + "log_odds_chosen": 10.143966674804688, + "log_odds_ratio": -0.0001574133784743026, + "logits/chosen": -0.4795479476451874, + "logits/rejected": -0.6363297700881958, + "logps/chosen": -0.0005211975076235831, + "logps/rejected": -2.026139497756958, + "loss": 0.9717, + "nll_loss": 0.24290986359119415, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.21197471243795e-05, + "rewards/margins": 0.20256184041500092, + "rewards/rejected": -0.2026139497756958, + "step": 6066 + }, + { + "epoch": 4.195712309820194, + "grad_norm": 6.1845269203186035, + "learning_rate": 3.224604272322115e-05, + "log_odds_chosen": 9.356364250183105, + "log_odds_ratio": -0.00019514214363880455, + "logits/chosen": -0.3387003540992737, + "logits/rejected": -0.29374903440475464, + "logps/chosen": -0.0002730107225943357, + "logps/rejected": -1.181304693222046, + "loss": 0.6521, + "nll_loss": 0.16300448775291443, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7301071895635687e-05, + "rewards/margins": 0.11810317635536194, + "rewards/rejected": -0.11813047528266907, + "step": 6067 + }, + { + "epoch": 4.196403872752421, + "grad_norm": 13.75908088684082, + "learning_rate": 3.2242200706930995e-05, + "log_odds_chosen": 8.38686752319336, + "log_odds_ratio": -0.00156727759167552, + "logits/chosen": -0.7334190607070923, + "logits/rejected": -0.7994363307952881, + "logps/chosen": -0.009411602281033993, + "logps/rejected": -1.4189223051071167, + "loss": 1.4641, + "nll_loss": 0.3658694624900818, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009411601931788027, + "rewards/margins": 0.14095106720924377, + "rewards/rejected": -0.1418922245502472, + "step": 6068 + }, + { + "epoch": 4.1970954356846475, + "grad_norm": 9.981428146362305, + "learning_rate": 3.2238358690640854e-05, + "log_odds_chosen": 10.802669525146484, + "log_odds_ratio": -4.3216430640313774e-05, + "logits/chosen": -0.30952149629592896, + "logits/rejected": -0.4717317223548889, + "logps/chosen": -0.0001675213425187394, + "logps/rejected": -2.0709903240203857, + "loss": 1.3458, + "nll_loss": 0.33644914627075195, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.675213388807606e-05, + "rewards/margins": 0.20708227157592773, + "rewards/rejected": -0.20709902048110962, + "step": 6069 + }, + { + "epoch": 4.197786998616874, + "grad_norm": 6.36790657043457, + "learning_rate": 3.22345166743507e-05, + "log_odds_chosen": 10.169801712036133, + "log_odds_ratio": -6.394273805199191e-05, + "logits/chosen": -0.6206622123718262, + "logits/rejected": -0.671150803565979, + "logps/chosen": -0.00029006582917645574, + "logps/rejected": -1.9227371215820312, + "loss": 1.2688, + "nll_loss": 0.31719785928726196, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9006583645241335e-05, + "rewards/margins": 0.19224470853805542, + "rewards/rejected": -0.19227372109889984, + "step": 6070 + }, + { + "epoch": 4.198478561549101, + "grad_norm": 13.407061576843262, + "learning_rate": 3.223067465806055e-05, + "log_odds_chosen": 10.443319320678711, + "log_odds_ratio": -7.405476935673505e-05, + "logits/chosen": -0.9477553367614746, + "logits/rejected": -0.9680919051170349, + "logps/chosen": -0.0002495343505870551, + "logps/rejected": -1.7604784965515137, + "loss": 1.0552, + "nll_loss": 0.26378288865089417, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4953433239716105e-05, + "rewards/margins": 0.17602290213108063, + "rewards/rejected": -0.17604784667491913, + "step": 6071 + }, + { + "epoch": 4.199170124481328, + "grad_norm": 6.7353034019470215, + "learning_rate": 3.2226832641770405e-05, + "log_odds_chosen": 9.591629028320312, + "log_odds_ratio": -0.0017070891335606575, + "logits/chosen": -0.2949681282043457, + "logits/rejected": -0.3625943064689636, + "logps/chosen": -0.0026625811588019133, + "logps/rejected": -2.1500141620635986, + "loss": 1.0277, + "nll_loss": 0.2567523121833801, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00026625811005942523, + "rewards/margins": 0.21473515033721924, + "rewards/rejected": -0.2150014191865921, + "step": 6072 + }, + { + "epoch": 4.199861687413555, + "grad_norm": 8.42748737335205, + "learning_rate": 3.222299062548025e-05, + "log_odds_chosen": 9.877130508422852, + "log_odds_ratio": -0.00023209548089653254, + "logits/chosen": -0.34057578444480896, + "logits/rejected": -0.370511531829834, + "logps/chosen": -0.0006250985898077488, + "logps/rejected": -1.5764187574386597, + "loss": 1.2644, + "nll_loss": 0.3160645365715027, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.250986189115793e-05, + "rewards/margins": 0.15757934749126434, + "rewards/rejected": -0.15764187276363373, + "step": 6073 + }, + { + "epoch": 4.200553250345782, + "grad_norm": 10.6553316116333, + "learning_rate": 3.22191486091901e-05, + "log_odds_chosen": 9.872819900512695, + "log_odds_ratio": -0.0001839471369748935, + "logits/chosen": -0.517216682434082, + "logits/rejected": -0.6004024147987366, + "logps/chosen": -0.0005334233283065259, + "logps/rejected": -1.601365089416504, + "loss": 1.3588, + "nll_loss": 0.339683473110199, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3342333558248356e-05, + "rewards/margins": 0.16008317470550537, + "rewards/rejected": -0.16013650596141815, + "step": 6074 + }, + { + "epoch": 4.2012448132780085, + "grad_norm": 9.332054138183594, + "learning_rate": 3.2215306592899955e-05, + "log_odds_chosen": 9.281045913696289, + "log_odds_ratio": -0.00027333354228176177, + "logits/chosen": -0.6094177961349487, + "logits/rejected": -0.8387130498886108, + "logps/chosen": -0.013858338817954063, + "logps/rejected": -1.8963396549224854, + "loss": 1.2064, + "nll_loss": 0.30156952142715454, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013858338352292776, + "rewards/margins": 0.18824812769889832, + "rewards/rejected": -0.18963396549224854, + "step": 6075 + }, + { + "epoch": 4.201936376210235, + "grad_norm": 11.993613243103027, + "learning_rate": 3.221146457660981e-05, + "log_odds_chosen": 10.28180980682373, + "log_odds_ratio": -0.0001393863931298256, + "logits/chosen": -0.34310775995254517, + "logits/rejected": -0.4047355055809021, + "logps/chosen": -0.0015538227744400501, + "logps/rejected": -2.500300407409668, + "loss": 0.732, + "nll_loss": 0.18299169838428497, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015538226580247283, + "rewards/margins": 0.24987468123435974, + "rewards/rejected": -0.2500300407409668, + "step": 6076 + }, + { + "epoch": 4.202627939142462, + "grad_norm": 9.744317054748535, + "learning_rate": 3.2207622560319654e-05, + "log_odds_chosen": 9.809011459350586, + "log_odds_ratio": -0.00011489679309306666, + "logits/chosen": -1.0774192810058594, + "logits/rejected": -1.0722367763519287, + "logps/chosen": -0.001422966131940484, + "logps/rejected": -2.1753878593444824, + "loss": 1.4331, + "nll_loss": 0.35827144980430603, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014229661610443145, + "rewards/margins": 0.21739648282527924, + "rewards/rejected": -0.2175387740135193, + "step": 6077 + }, + { + "epoch": 4.203319502074689, + "grad_norm": 10.738659858703613, + "learning_rate": 3.220378054402951e-05, + "log_odds_chosen": 10.447037696838379, + "log_odds_ratio": -7.902842480689287e-05, + "logits/chosen": -0.37279537320137024, + "logits/rejected": -0.4409979283809662, + "logps/chosen": -0.00048452045302838087, + "logps/rejected": -2.268338918685913, + "loss": 1.3094, + "nll_loss": 0.3273436725139618, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8452049668412656e-05, + "rewards/margins": 0.22678545117378235, + "rewards/rejected": -0.22683387994766235, + "step": 6078 + }, + { + "epoch": 4.204011065006916, + "grad_norm": 8.255573272705078, + "learning_rate": 3.219993852773936e-05, + "log_odds_chosen": 9.974161148071289, + "log_odds_ratio": -7.075396570144221e-05, + "logits/chosen": -0.8444625735282898, + "logits/rejected": -0.7926241159439087, + "logps/chosen": -0.0003179244522470981, + "logps/rejected": -1.792923927307129, + "loss": 1.1419, + "nll_loss": 0.2854565382003784, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.179244595230557e-05, + "rewards/margins": 0.17926061153411865, + "rewards/rejected": -0.17929241061210632, + "step": 6079 + }, + { + "epoch": 4.204702627939143, + "grad_norm": 11.725409507751465, + "learning_rate": 3.219609651144921e-05, + "log_odds_chosen": 8.379392623901367, + "log_odds_ratio": -0.002833909820765257, + "logits/chosen": -0.6038598418235779, + "logits/rejected": -0.5764943361282349, + "logps/chosen": -0.0019874493591487408, + "logps/rejected": -1.4380276203155518, + "loss": 1.4758, + "nll_loss": 0.3686673045158386, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019874492136295885, + "rewards/margins": 0.14360404014587402, + "rewards/rejected": -0.14380277693271637, + "step": 6080 + }, + { + "epoch": 4.2053941908713695, + "grad_norm": 9.011848449707031, + "learning_rate": 3.219225449515906e-05, + "log_odds_chosen": 10.082165718078613, + "log_odds_ratio": -5.680105823557824e-05, + "logits/chosen": -0.429607093334198, + "logits/rejected": -0.4168073534965515, + "logps/chosen": -0.0007915243622846901, + "logps/rejected": -2.1282901763916016, + "loss": 1.1148, + "nll_loss": 0.27870285511016846, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.915243622846901e-05, + "rewards/margins": 0.21274986863136292, + "rewards/rejected": -0.21282902359962463, + "step": 6081 + }, + { + "epoch": 4.206085753803596, + "grad_norm": 7.932432174682617, + "learning_rate": 3.218841247886891e-05, + "log_odds_chosen": 9.787649154663086, + "log_odds_ratio": -0.00029163056751713157, + "logits/chosen": -0.6433913707733154, + "logits/rejected": -0.7329627871513367, + "logps/chosen": -0.00033288367558270693, + "logps/rejected": -1.5147624015808105, + "loss": 1.0933, + "nll_loss": 0.27328595519065857, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3288368285866454e-05, + "rewards/margins": 0.15144294500350952, + "rewards/rejected": -0.15147623419761658, + "step": 6082 + }, + { + "epoch": 4.206777316735823, + "grad_norm": 5.991988182067871, + "learning_rate": 3.218457046257876e-05, + "log_odds_chosen": 10.22545051574707, + "log_odds_ratio": -5.5978794989641756e-05, + "logits/chosen": -0.2843632102012634, + "logits/rejected": -0.3155117630958557, + "logps/chosen": -0.00017813252634368837, + "logps/rejected": -1.5342832803726196, + "loss": 1.5026, + "nll_loss": 0.37563425302505493, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.781325408956036e-05, + "rewards/margins": 0.15341052412986755, + "rewards/rejected": -0.1534283459186554, + "step": 6083 + }, + { + "epoch": 4.20746887966805, + "grad_norm": 9.368627548217773, + "learning_rate": 3.2180728446288614e-05, + "log_odds_chosen": 10.176862716674805, + "log_odds_ratio": -6.35625547147356e-05, + "logits/chosen": -0.2407834827899933, + "logits/rejected": -0.3474578559398651, + "logps/chosen": -0.00033351860474795103, + "logps/rejected": -2.1866698265075684, + "loss": 1.2583, + "nll_loss": 0.3145698308944702, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3351861929986626e-05, + "rewards/margins": 0.21863365173339844, + "rewards/rejected": -0.21866700053215027, + "step": 6084 + }, + { + "epoch": 4.208160442600277, + "grad_norm": 8.605694770812988, + "learning_rate": 3.2176886429998466e-05, + "log_odds_chosen": 8.470521926879883, + "log_odds_ratio": -0.03702199459075928, + "logits/chosen": -0.5160700082778931, + "logits/rejected": -0.5076208114624023, + "logps/chosen": -0.03687213361263275, + "logps/rejected": -2.100747585296631, + "loss": 0.8199, + "nll_loss": 0.20127034187316895, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0036872131749987602, + "rewards/margins": 0.20638757944107056, + "rewards/rejected": -0.210074782371521, + "step": 6085 + }, + { + "epoch": 4.208852005532504, + "grad_norm": 11.147346496582031, + "learning_rate": 3.217304441370831e-05, + "log_odds_chosen": 9.931164741516113, + "log_odds_ratio": -0.0015833813231438398, + "logits/chosen": -0.9578123688697815, + "logits/rejected": -0.9660002589225769, + "logps/chosen": -0.0014081323752179742, + "logps/rejected": -1.8071503639221191, + "loss": 1.2738, + "nll_loss": 0.31828776001930237, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014081323752179742, + "rewards/margins": 0.1805742084980011, + "rewards/rejected": -0.18071502447128296, + "step": 6086 + }, + { + "epoch": 4.20954356846473, + "grad_norm": 5.810502052307129, + "learning_rate": 3.216920239741817e-05, + "log_odds_chosen": 8.74907398223877, + "log_odds_ratio": -0.0006889136275276542, + "logits/chosen": -0.3961918354034424, + "logits/rejected": -0.32647937536239624, + "logps/chosen": -0.000981375458650291, + "logps/rejected": -1.149702787399292, + "loss": 1.1302, + "nll_loss": 0.2824803292751312, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.813754877541214e-05, + "rewards/margins": 0.11487214267253876, + "rewards/rejected": -0.11497028172016144, + "step": 6087 + }, + { + "epoch": 4.210235131396957, + "grad_norm": 8.0988187789917, + "learning_rate": 3.216536038112802e-05, + "log_odds_chosen": 7.66169548034668, + "log_odds_ratio": -0.00979495607316494, + "logits/chosen": -0.4049184024333954, + "logits/rejected": -0.34466010332107544, + "logps/chosen": -0.0033011361956596375, + "logps/rejected": -0.9623667001724243, + "loss": 1.0782, + "nll_loss": 0.26856890320777893, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00033011360210366547, + "rewards/margins": 0.095906563103199, + "rewards/rejected": -0.09623667597770691, + "step": 6088 + }, + { + "epoch": 4.210926694329184, + "grad_norm": 9.195820808410645, + "learning_rate": 3.216151836483787e-05, + "log_odds_chosen": 10.497115135192871, + "log_odds_ratio": -4.924969471176155e-05, + "logits/chosen": -0.7107508778572083, + "logits/rejected": -0.7157828211784363, + "logps/chosen": -0.0003697610227391124, + "logps/rejected": -2.3502285480499268, + "loss": 0.9972, + "nll_loss": 0.2492949366569519, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6976100091123953e-05, + "rewards/margins": 0.23498587310314178, + "rewards/rejected": -0.23502285778522491, + "step": 6089 + }, + { + "epoch": 4.211618257261411, + "grad_norm": 9.332670211791992, + "learning_rate": 3.215767634854772e-05, + "log_odds_chosen": 9.460079193115234, + "log_odds_ratio": -0.02920219488441944, + "logits/chosen": -0.2606007158756256, + "logits/rejected": -0.2581832706928253, + "logps/chosen": -0.007522523868829012, + "logps/rejected": -1.9656100273132324, + "loss": 1.801, + "nll_loss": 0.44732433557510376, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007522524101659656, + "rewards/margins": 0.19580873847007751, + "rewards/rejected": -0.19656097888946533, + "step": 6090 + }, + { + "epoch": 4.212309820193638, + "grad_norm": 4.959421157836914, + "learning_rate": 3.215383433225757e-05, + "log_odds_chosen": 9.708085060119629, + "log_odds_ratio": -0.000136367860250175, + "logits/chosen": -0.41468676924705505, + "logits/rejected": -0.48830556869506836, + "logps/chosen": -0.00017094127542804927, + "logps/rejected": -1.119558334350586, + "loss": 1.7192, + "nll_loss": 0.42978495359420776, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7094127542804927e-05, + "rewards/margins": 0.11193873733282089, + "rewards/rejected": -0.11195583641529083, + "step": 6091 + }, + { + "epoch": 4.213001383125865, + "grad_norm": 6.860667705535889, + "learning_rate": 3.214999231596742e-05, + "log_odds_chosen": 11.162261962890625, + "log_odds_ratio": -3.2493371691089123e-05, + "logits/chosen": -0.3308905363082886, + "logits/rejected": -0.3256247639656067, + "logps/chosen": -0.00015509540389757603, + "logps/rejected": -2.1066527366638184, + "loss": 0.7364, + "nll_loss": 0.18409138917922974, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5509540389757603e-05, + "rewards/margins": 0.2106497585773468, + "rewards/rejected": -0.2106652855873108, + "step": 6092 + }, + { + "epoch": 4.213692946058091, + "grad_norm": 10.44353199005127, + "learning_rate": 3.214615029967727e-05, + "log_odds_chosen": 8.433902740478516, + "log_odds_ratio": -0.012419860810041428, + "logits/chosen": -0.6420484781265259, + "logits/rejected": -0.7362602949142456, + "logps/chosen": -0.07046063244342804, + "logps/rejected": -1.4887151718139648, + "loss": 1.8438, + "nll_loss": 0.4597092270851135, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007046062964946032, + "rewards/margins": 0.14182545244693756, + "rewards/rejected": -0.148871511220932, + "step": 6093 + }, + { + "epoch": 4.214384508990318, + "grad_norm": 9.768402099609375, + "learning_rate": 3.2142308283387125e-05, + "log_odds_chosen": 9.392027854919434, + "log_odds_ratio": -0.0011272934498265386, + "logits/chosen": -0.37182796001434326, + "logits/rejected": -0.39350610971450806, + "logps/chosen": -0.000708098232280463, + "logps/rejected": -1.8146228790283203, + "loss": 1.1423, + "nll_loss": 0.28546077013015747, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.08098232280463e-05, + "rewards/margins": 0.18139147758483887, + "rewards/rejected": -0.18146228790283203, + "step": 6094 + }, + { + "epoch": 4.215076071922545, + "grad_norm": 9.150002479553223, + "learning_rate": 3.213846626709697e-05, + "log_odds_chosen": 8.399335861206055, + "log_odds_ratio": -0.0009091562824323773, + "logits/chosen": -0.6835078001022339, + "logits/rejected": -0.685696005821228, + "logps/chosen": -0.0009837490506470203, + "logps/rejected": -1.2630008459091187, + "loss": 1.8023, + "nll_loss": 0.4504738450050354, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.837490506470203e-05, + "rewards/margins": 0.12620171904563904, + "rewards/rejected": -0.12630009651184082, + "step": 6095 + }, + { + "epoch": 4.215767634854772, + "grad_norm": 6.342299461364746, + "learning_rate": 3.213462425080683e-05, + "log_odds_chosen": 8.81633472442627, + "log_odds_ratio": -0.001122811110690236, + "logits/chosen": -0.24323329329490662, + "logits/rejected": -0.17171940207481384, + "logps/chosen": -0.0029868108686059713, + "logps/rejected": -1.5076463222503662, + "loss": 1.367, + "nll_loss": 0.3416462242603302, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00029868108686059713, + "rewards/margins": 0.1504659503698349, + "rewards/rejected": -0.15076464414596558, + "step": 6096 + }, + { + "epoch": 4.216459197786999, + "grad_norm": 10.462725639343262, + "learning_rate": 3.2130782234516675e-05, + "log_odds_chosen": 10.151618957519531, + "log_odds_ratio": -0.0014048486482352018, + "logits/chosen": -0.5716694593429565, + "logits/rejected": -0.7377324104309082, + "logps/chosen": -0.0014676820719614625, + "logps/rejected": -1.750305414199829, + "loss": 1.2301, + "nll_loss": 0.3073880076408386, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014676823047921062, + "rewards/margins": 0.17488376796245575, + "rewards/rejected": -0.17503052949905396, + "step": 6097 + }, + { + "epoch": 4.217150760719226, + "grad_norm": 7.823307037353516, + "learning_rate": 3.212694021822653e-05, + "log_odds_chosen": 10.025672912597656, + "log_odds_ratio": -0.0014221564633771777, + "logits/chosen": -0.534694492816925, + "logits/rejected": -0.5644404888153076, + "logps/chosen": -0.008679674938321114, + "logps/rejected": -3.0878677368164062, + "loss": 1.4253, + "nll_loss": 0.35618501901626587, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008679674356244504, + "rewards/margins": 0.30791881680488586, + "rewards/rejected": -0.3087867498397827, + "step": 6098 + }, + { + "epoch": 4.217842323651452, + "grad_norm": 6.906431198120117, + "learning_rate": 3.212309820193638e-05, + "log_odds_chosen": 10.416519165039062, + "log_odds_ratio": -0.00010263586591463536, + "logits/chosen": -0.6076978445053101, + "logits/rejected": -0.5971615314483643, + "logps/chosen": -0.001731468946672976, + "logps/rejected": -2.080808162689209, + "loss": 1.1623, + "nll_loss": 0.29056233167648315, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017314690921921283, + "rewards/margins": 0.20790766179561615, + "rewards/rejected": -0.20808082818984985, + "step": 6099 + }, + { + "epoch": 4.218533886583679, + "grad_norm": 3.3100812435150146, + "learning_rate": 3.211925618564623e-05, + "log_odds_chosen": 9.550172805786133, + "log_odds_ratio": -0.002463590120896697, + "logits/chosen": -0.4040372669696808, + "logits/rejected": -0.3574890196323395, + "logps/chosen": -0.00257070641964674, + "logps/rejected": -2.1435904502868652, + "loss": 1.1569, + "nll_loss": 0.2889706492424011, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000257070641964674, + "rewards/margins": 0.21410197019577026, + "rewards/rejected": -0.21435905992984772, + "step": 6100 + }, + { + "epoch": 4.219225449515906, + "grad_norm": 9.177789688110352, + "learning_rate": 3.211541416935608e-05, + "log_odds_chosen": 8.393549919128418, + "log_odds_ratio": -0.07194874435663223, + "logits/chosen": -0.1837049126625061, + "logits/rejected": -0.1863904595375061, + "logps/chosen": -0.039502695202827454, + "logps/rejected": -1.401501178741455, + "loss": 1.1874, + "nll_loss": 0.2896571457386017, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003950269892811775, + "rewards/margins": 0.13619986176490784, + "rewards/rejected": -0.14015012979507446, + "step": 6101 + }, + { + "epoch": 4.219917012448133, + "grad_norm": 7.0887579917907715, + "learning_rate": 3.211157215306593e-05, + "log_odds_chosen": 9.437784194946289, + "log_odds_ratio": -0.00438450463116169, + "logits/chosen": -0.5653685331344604, + "logits/rejected": -0.6047671437263489, + "logps/chosen": -0.0028303221333771944, + "logps/rejected": -1.6747334003448486, + "loss": 0.8287, + "nll_loss": 0.2067372351884842, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000283032248262316, + "rewards/margins": 0.1671903133392334, + "rewards/rejected": -0.16747334599494934, + "step": 6102 + }, + { + "epoch": 4.22060857538036, + "grad_norm": 12.398027420043945, + "learning_rate": 3.210773013677578e-05, + "log_odds_chosen": 10.742816925048828, + "log_odds_ratio": -0.00032149453181773424, + "logits/chosen": -0.616948127746582, + "logits/rejected": -0.6294673085212708, + "logps/chosen": -0.0004734890826512128, + "logps/rejected": -2.062647819519043, + "loss": 1.9112, + "nll_loss": 0.47776588797569275, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.734890899271704e-05, + "rewards/margins": 0.2062174379825592, + "rewards/rejected": -0.20626477897167206, + "step": 6103 + }, + { + "epoch": 4.2213001383125865, + "grad_norm": 8.384824752807617, + "learning_rate": 3.210388812048563e-05, + "log_odds_chosen": 8.673848152160645, + "log_odds_ratio": -0.013089235872030258, + "logits/chosen": -0.26488542556762695, + "logits/rejected": -0.3186490535736084, + "logps/chosen": -0.07050205767154694, + "logps/rejected": -1.806774616241455, + "loss": 1.0463, + "nll_loss": 0.26025834679603577, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007050206419080496, + "rewards/margins": 0.17362727224826813, + "rewards/rejected": -0.18067745864391327, + "step": 6104 + }, + { + "epoch": 4.221991701244813, + "grad_norm": 8.230053901672363, + "learning_rate": 3.210004610419549e-05, + "log_odds_chosen": 9.35822582244873, + "log_odds_ratio": -0.0004413676797412336, + "logits/chosen": -0.4035882353782654, + "logits/rejected": -0.570648729801178, + "logps/chosen": -0.0005642552860081196, + "logps/rejected": -1.2417488098144531, + "loss": 1.6527, + "nll_loss": 0.4131428003311157, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.64255278732162e-05, + "rewards/margins": 0.12411844730377197, + "rewards/rejected": -0.12417487800121307, + "step": 6105 + }, + { + "epoch": 4.22268326417704, + "grad_norm": 8.342848777770996, + "learning_rate": 3.2096204087905334e-05, + "log_odds_chosen": 9.080486297607422, + "log_odds_ratio": -0.01734662614762783, + "logits/chosen": -0.4084089398384094, + "logits/rejected": -0.392818808555603, + "logps/chosen": -0.00646335119381547, + "logps/rejected": -1.2589843273162842, + "loss": 1.5447, + "nll_loss": 0.38444066047668457, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006463351310230792, + "rewards/margins": 0.12525209784507751, + "rewards/rejected": -0.12589843571186066, + "step": 6106 + }, + { + "epoch": 4.223374827109267, + "grad_norm": 9.799078941345215, + "learning_rate": 3.2092362071615186e-05, + "log_odds_chosen": 10.004591941833496, + "log_odds_ratio": -9.156642772722989e-05, + "logits/chosen": -0.48572254180908203, + "logits/rejected": -0.5722478628158569, + "logps/chosen": -0.00025361331063322723, + "logps/rejected": -1.7181507349014282, + "loss": 1.7266, + "nll_loss": 0.43164747953414917, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5361332518514246e-05, + "rewards/margins": 0.1717897206544876, + "rewards/rejected": -0.17181509733200073, + "step": 6107 + }, + { + "epoch": 4.224066390041494, + "grad_norm": 7.455623149871826, + "learning_rate": 3.208852005532504e-05, + "log_odds_chosen": 10.768928527832031, + "log_odds_ratio": -5.826863707625307e-05, + "logits/chosen": -0.40005171298980713, + "logits/rejected": -0.47438183426856995, + "logps/chosen": -0.00014770789130125195, + "logps/rejected": -1.9838204383850098, + "loss": 1.1787, + "nll_loss": 0.29465824365615845, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4770789675822016e-05, + "rewards/margins": 0.19836726784706116, + "rewards/rejected": -0.19838203489780426, + "step": 6108 + }, + { + "epoch": 4.224757952973721, + "grad_norm": 7.88972806930542, + "learning_rate": 3.208467803903489e-05, + "log_odds_chosen": 10.738612174987793, + "log_odds_ratio": -4.431870911503211e-05, + "logits/chosen": -0.6616283059120178, + "logits/rejected": -0.7461217641830444, + "logps/chosen": -0.00019027273810934275, + "logps/rejected": -2.072432041168213, + "loss": 1.2048, + "nll_loss": 0.3012027144432068, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9027273083338514e-05, + "rewards/margins": 0.20722419023513794, + "rewards/rejected": -0.20724321901798248, + "step": 6109 + }, + { + "epoch": 4.2254495159059475, + "grad_norm": 13.2335844039917, + "learning_rate": 3.208083602274474e-05, + "log_odds_chosen": 10.215333938598633, + "log_odds_ratio": -0.0009812734788283706, + "logits/chosen": -0.5087462663650513, + "logits/rejected": -0.5887828469276428, + "logps/chosen": -0.0011902485275641084, + "logps/rejected": -2.5018463134765625, + "loss": 1.3774, + "nll_loss": 0.3442583680152893, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011902485857717693, + "rewards/margins": 0.2500656247138977, + "rewards/rejected": -0.2501846253871918, + "step": 6110 + }, + { + "epoch": 4.226141078838174, + "grad_norm": 7.052605152130127, + "learning_rate": 3.207699400645459e-05, + "log_odds_chosen": 10.94500732421875, + "log_odds_ratio": -5.660950409946963e-05, + "logits/chosen": -0.5007820725440979, + "logits/rejected": -0.571925699710846, + "logps/chosen": -0.00013881264021620154, + "logps/rejected": -1.9232251644134521, + "loss": 1.068, + "nll_loss": 0.2669881284236908, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3881262930226512e-05, + "rewards/margins": 0.19230863451957703, + "rewards/rejected": -0.1923225224018097, + "step": 6111 + }, + { + "epoch": 4.226832641770401, + "grad_norm": 7.348779678344727, + "learning_rate": 3.207315199016444e-05, + "log_odds_chosen": 10.577987670898438, + "log_odds_ratio": -5.887104634894058e-05, + "logits/chosen": -0.3846423029899597, + "logits/rejected": -0.4360952079296112, + "logps/chosen": -0.00017599599959794432, + "logps/rejected": -1.9604169130325317, + "loss": 1.1102, + "nll_loss": 0.27753946185112, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7599601051188074e-05, + "rewards/margins": 0.1960241049528122, + "rewards/rejected": -0.19604171812534332, + "step": 6112 + }, + { + "epoch": 4.227524204702628, + "grad_norm": 6.602800369262695, + "learning_rate": 3.206930997387429e-05, + "log_odds_chosen": 9.438689231872559, + "log_odds_ratio": -0.000783280935138464, + "logits/chosen": -0.5962222814559937, + "logits/rejected": -0.612580418586731, + "logps/chosen": -0.0053511569276452065, + "logps/rejected": -1.8292964696884155, + "loss": 1.7798, + "nll_loss": 0.44486165046691895, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005351156578399241, + "rewards/margins": 0.18239453434944153, + "rewards/rejected": -0.1829296350479126, + "step": 6113 + }, + { + "epoch": 4.228215767634855, + "grad_norm": 22.292438507080078, + "learning_rate": 3.2065467957584147e-05, + "log_odds_chosen": 8.830830574035645, + "log_odds_ratio": -0.07559003680944443, + "logits/chosen": -0.3832665681838989, + "logits/rejected": -0.43286067247390747, + "logps/chosen": -0.25410130620002747, + "logps/rejected": -2.479095220565796, + "loss": 1.4115, + "nll_loss": 0.3453068137168884, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.025410132482647896, + "rewards/margins": 0.22249938547611237, + "rewards/rejected": -0.2479095309972763, + "step": 6114 + }, + { + "epoch": 4.228907330567082, + "grad_norm": 9.745912551879883, + "learning_rate": 3.206162594129399e-05, + "log_odds_chosen": 8.932182312011719, + "log_odds_ratio": -0.08399414271116257, + "logits/chosen": -0.11780837923288345, + "logits/rejected": -0.20581534504890442, + "logps/chosen": -0.06577721983194351, + "logps/rejected": -1.7126953601837158, + "loss": 0.9885, + "nll_loss": 0.23871365189552307, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006577721331268549, + "rewards/margins": 0.16469180583953857, + "rewards/rejected": -0.17126955091953278, + "step": 6115 + }, + { + "epoch": 4.2295988934993085, + "grad_norm": 8.25196647644043, + "learning_rate": 3.2057783925003845e-05, + "log_odds_chosen": 9.533069610595703, + "log_odds_ratio": -0.03513272851705551, + "logits/chosen": -0.6124993562698364, + "logits/rejected": -0.5530567169189453, + "logps/chosen": -0.007125381845980883, + "logps/rejected": -1.7799084186553955, + "loss": 1.0589, + "nll_loss": 0.26121440529823303, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007125381380319595, + "rewards/margins": 0.1772783100605011, + "rewards/rejected": -0.1779908537864685, + "step": 6116 + }, + { + "epoch": 4.230290456431535, + "grad_norm": 15.340441703796387, + "learning_rate": 3.20539419087137e-05, + "log_odds_chosen": 9.796329498291016, + "log_odds_ratio": -0.00018708838615566492, + "logits/chosen": -0.23748236894607544, + "logits/rejected": -0.30873650312423706, + "logps/chosen": -0.00044121668906882405, + "logps/rejected": -1.911644697189331, + "loss": 1.0922, + "nll_loss": 0.2730366289615631, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4121668906882405e-05, + "rewards/margins": 0.19112035632133484, + "rewards/rejected": -0.19116447865962982, + "step": 6117 + }, + { + "epoch": 4.230982019363762, + "grad_norm": 8.803054809570312, + "learning_rate": 3.205009989242355e-05, + "log_odds_chosen": 9.855988502502441, + "log_odds_ratio": -0.0022794578690081835, + "logits/chosen": -0.4125463366508484, + "logits/rejected": -0.48373299837112427, + "logps/chosen": -0.0019304307643324137, + "logps/rejected": -1.82631254196167, + "loss": 1.1137, + "nll_loss": 0.2781933844089508, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019304307352285832, + "rewards/margins": 0.1824381947517395, + "rewards/rejected": -0.182631254196167, + "step": 6118 + }, + { + "epoch": 4.231673582295989, + "grad_norm": 10.609899520874023, + "learning_rate": 3.2046257876133395e-05, + "log_odds_chosen": 9.470155715942383, + "log_odds_ratio": -0.0007128569413907826, + "logits/chosen": -0.9156774282455444, + "logits/rejected": -0.9039434194564819, + "logps/chosen": -0.0009397302637808025, + "logps/rejected": -1.8147461414337158, + "loss": 2.4276, + "nll_loss": 0.606838583946228, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.39730234676972e-05, + "rewards/margins": 0.18138065934181213, + "rewards/rejected": -0.18147462606430054, + "step": 6119 + }, + { + "epoch": 4.232365145228216, + "grad_norm": 10.940679550170898, + "learning_rate": 3.204241585984325e-05, + "log_odds_chosen": 10.973442077636719, + "log_odds_ratio": -1.9855779100907966e-05, + "logits/chosen": -0.09074971824884415, + "logits/rejected": -0.25256481766700745, + "logps/chosen": -0.0001564031554153189, + "logps/rejected": -2.141129970550537, + "loss": 0.962, + "nll_loss": 0.2405095249414444, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5640316632925533e-05, + "rewards/margins": 0.21409735083580017, + "rewards/rejected": -0.2141129970550537, + "step": 6120 + }, + { + "epoch": 4.233056708160443, + "grad_norm": 9.780346870422363, + "learning_rate": 3.20385738435531e-05, + "log_odds_chosen": 10.847525596618652, + "log_odds_ratio": -0.00013202108675614, + "logits/chosen": -0.5023176074028015, + "logits/rejected": -0.5953850746154785, + "logps/chosen": -0.00022254750365391374, + "logps/rejected": -2.1902830600738525, + "loss": 0.767, + "nll_loss": 0.19174344837665558, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2254751456785016e-05, + "rewards/margins": 0.21900604665279388, + "rewards/rejected": -0.2190282940864563, + "step": 6121 + }, + { + "epoch": 4.2337482710926695, + "grad_norm": 7.069876194000244, + "learning_rate": 3.2034731827262946e-05, + "log_odds_chosen": 9.487290382385254, + "log_odds_ratio": -0.00037358151166699827, + "logits/chosen": -0.4292406439781189, + "logits/rejected": -0.4715169370174408, + "logps/chosen": -0.0007413811981678009, + "logps/rejected": -1.953993320465088, + "loss": 0.8595, + "nll_loss": 0.21483467519283295, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.413812272716314e-05, + "rewards/margins": 0.19532519578933716, + "rewards/rejected": -0.19539934396743774, + "step": 6122 + }, + { + "epoch": 4.234439834024896, + "grad_norm": 8.1299467086792, + "learning_rate": 3.20308898109728e-05, + "log_odds_chosen": 9.821857452392578, + "log_odds_ratio": -0.00037624494871124625, + "logits/chosen": -0.34869810938835144, + "logits/rejected": -0.4733930230140686, + "logps/chosen": -0.005378293804824352, + "logps/rejected": -2.4110655784606934, + "loss": 1.1122, + "nll_loss": 0.27802199125289917, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005378293571993709, + "rewards/margins": 0.24056872725486755, + "rewards/rejected": -0.2411065399646759, + "step": 6123 + }, + { + "epoch": 4.235131396957123, + "grad_norm": 6.791889190673828, + "learning_rate": 3.202704779468265e-05, + "log_odds_chosen": 9.134946823120117, + "log_odds_ratio": -0.000719600124284625, + "logits/chosen": -0.43868210911750793, + "logits/rejected": -0.429718554019928, + "logps/chosen": -0.0015843857545405626, + "logps/rejected": -1.8186661005020142, + "loss": 1.1613, + "nll_loss": 0.2902475595474243, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015843857545405626, + "rewards/margins": 0.1817081868648529, + "rewards/rejected": -0.1818666309118271, + "step": 6124 + }, + { + "epoch": 4.23582295988935, + "grad_norm": 6.706216812133789, + "learning_rate": 3.20232057783925e-05, + "log_odds_chosen": 10.093393325805664, + "log_odds_ratio": -0.00034954206785187125, + "logits/chosen": -0.4765441417694092, + "logits/rejected": -0.47055739164352417, + "logps/chosen": -0.0024698227643966675, + "logps/rejected": -1.943615436553955, + "loss": 1.2416, + "nll_loss": 0.31037530303001404, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000246982293901965, + "rewards/margins": 0.1941145360469818, + "rewards/rejected": -0.19436152279376984, + "step": 6125 + }, + { + "epoch": 4.236514522821577, + "grad_norm": 7.8888983726501465, + "learning_rate": 3.201936376210235e-05, + "log_odds_chosen": 7.003107070922852, + "log_odds_ratio": -0.03631995618343353, + "logits/chosen": -0.4368114769458771, + "logits/rejected": -0.5176033973693848, + "logps/chosen": -0.011342689394950867, + "logps/rejected": -1.1231927871704102, + "loss": 1.4229, + "nll_loss": 0.3520943522453308, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011342689394950867, + "rewards/margins": 0.11118502169847488, + "rewards/rejected": -0.11231927573680878, + "step": 6126 + }, + { + "epoch": 4.237206085753804, + "grad_norm": 12.106274604797363, + "learning_rate": 3.201552174581221e-05, + "log_odds_chosen": 9.750165939331055, + "log_odds_ratio": -0.0005116397514939308, + "logits/chosen": -0.30245599150657654, + "logits/rejected": -0.35725438594818115, + "logps/chosen": -0.003118205117061734, + "logps/rejected": -2.2988009452819824, + "loss": 1.3329, + "nll_loss": 0.3331792652606964, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003118205349892378, + "rewards/margins": 0.2295682728290558, + "rewards/rejected": -0.22988007962703705, + "step": 6127 + }, + { + "epoch": 4.2378976486860305, + "grad_norm": 5.6999969482421875, + "learning_rate": 3.2011679729522054e-05, + "log_odds_chosen": 10.503633499145508, + "log_odds_ratio": -9.613503061700612e-05, + "logits/chosen": -0.5862594842910767, + "logits/rejected": -0.5881119966506958, + "logps/chosen": -0.0017665009945631027, + "logps/rejected": -2.8170573711395264, + "loss": 1.2073, + "nll_loss": 0.30181971192359924, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001766501081874594, + "rewards/margins": 0.28152909874916077, + "rewards/rejected": -0.281705766916275, + "step": 6128 + }, + { + "epoch": 4.238589211618257, + "grad_norm": 6.479320049285889, + "learning_rate": 3.2007837713231906e-05, + "log_odds_chosen": 8.953347206115723, + "log_odds_ratio": -0.0009635446476750076, + "logits/chosen": -0.5452961921691895, + "logits/rejected": -0.5558996796607971, + "logps/chosen": -0.014149404130876064, + "logps/rejected": -2.3288004398345947, + "loss": 1.6097, + "nll_loss": 0.4023188054561615, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001414940576069057, + "rewards/margins": 0.23146511614322662, + "rewards/rejected": -0.23288005590438843, + "step": 6129 + }, + { + "epoch": 4.239280774550484, + "grad_norm": 9.907674789428711, + "learning_rate": 3.200399569694176e-05, + "log_odds_chosen": 9.380223274230957, + "log_odds_ratio": -0.007749211508780718, + "logits/chosen": -0.5595596432685852, + "logits/rejected": -0.6764500737190247, + "logps/chosen": -0.002896445570513606, + "logps/rejected": -2.051048755645752, + "loss": 0.9159, + "nll_loss": 0.22821009159088135, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002896445512305945, + "rewards/margins": 0.20481520891189575, + "rewards/rejected": -0.20510487258434296, + "step": 6130 + }, + { + "epoch": 4.239972337482711, + "grad_norm": 8.833740234375, + "learning_rate": 3.2000153680651604e-05, + "log_odds_chosen": 10.789620399475098, + "log_odds_ratio": -5.9470221458468586e-05, + "logits/chosen": -0.7925410866737366, + "logits/rejected": -0.8144933581352234, + "logps/chosen": -0.00027380624669604003, + "logps/rejected": -2.0729591846466064, + "loss": 0.9128, + "nll_loss": 0.22818849980831146, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7380625397199765e-05, + "rewards/margins": 0.20726853609085083, + "rewards/rejected": -0.20729590952396393, + "step": 6131 + }, + { + "epoch": 4.240663900414938, + "grad_norm": 11.288015365600586, + "learning_rate": 3.199631166436146e-05, + "log_odds_chosen": 10.079020500183105, + "log_odds_ratio": -8.671080286148936e-05, + "logits/chosen": -0.8377615213394165, + "logits/rejected": -0.9092694520950317, + "logps/chosen": -0.00032979599200189114, + "logps/rejected": -1.8943513631820679, + "loss": 1.6129, + "nll_loss": 0.4032214879989624, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2979594834614545e-05, + "rewards/margins": 0.18940216302871704, + "rewards/rejected": -0.18943513929843903, + "step": 6132 + }, + { + "epoch": 4.241355463347165, + "grad_norm": 9.863534927368164, + "learning_rate": 3.199246964807131e-05, + "log_odds_chosen": 10.014764785766602, + "log_odds_ratio": -0.00013936430332250893, + "logits/chosen": -0.7015513777732849, + "logits/rejected": -0.7403870224952698, + "logps/chosen": -0.0027051009237766266, + "logps/rejected": -2.1331331729888916, + "loss": 1.6729, + "nll_loss": 0.41820576786994934, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00027051009237766266, + "rewards/margins": 0.21304281055927277, + "rewards/rejected": -0.21331332623958588, + "step": 6133 + }, + { + "epoch": 4.2420470262793915, + "grad_norm": 9.676593780517578, + "learning_rate": 3.198862763178116e-05, + "log_odds_chosen": 9.681331634521484, + "log_odds_ratio": -0.00056239910190925, + "logits/chosen": -0.7100040316581726, + "logits/rejected": -0.8493018746376038, + "logps/chosen": -0.0024421019479632378, + "logps/rejected": -2.028809070587158, + "loss": 1.0036, + "nll_loss": 0.25083646178245544, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002442101831547916, + "rewards/margins": 0.2026366889476776, + "rewards/rejected": -0.20288090407848358, + "step": 6134 + }, + { + "epoch": 4.242738589211618, + "grad_norm": 20.33088493347168, + "learning_rate": 3.198478561549101e-05, + "log_odds_chosen": 9.505403518676758, + "log_odds_ratio": -0.044929858297109604, + "logits/chosen": -0.6986443996429443, + "logits/rejected": -0.7685960531234741, + "logps/chosen": -0.1546137034893036, + "logps/rejected": -2.1600124835968018, + "loss": 0.9972, + "nll_loss": 0.24481570720672607, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015461370348930359, + "rewards/margins": 0.20053988695144653, + "rewards/rejected": -0.21600127220153809, + "step": 6135 + }, + { + "epoch": 4.243430152143845, + "grad_norm": 11.125940322875977, + "learning_rate": 3.1980943599200866e-05, + "log_odds_chosen": 10.615583419799805, + "log_odds_ratio": -4.1170831536874175e-05, + "logits/chosen": -0.4305480718612671, + "logits/rejected": -0.5694938898086548, + "logps/chosen": -0.00042377153295092285, + "logps/rejected": -2.0867981910705566, + "loss": 1.0256, + "nll_loss": 0.25639212131500244, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.237715620547533e-05, + "rewards/margins": 0.20863744616508484, + "rewards/rejected": -0.20867982506752014, + "step": 6136 + }, + { + "epoch": 4.244121715076072, + "grad_norm": 16.073240280151367, + "learning_rate": 3.197710158291071e-05, + "log_odds_chosen": 10.713496208190918, + "log_odds_ratio": -4.822468326892704e-05, + "logits/chosen": -0.9513100385665894, + "logits/rejected": -0.9958503246307373, + "logps/chosen": -0.00024816146469675004, + "logps/rejected": -2.3876280784606934, + "loss": 1.1473, + "nll_loss": 0.2868252694606781, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.481614501448348e-05, + "rewards/margins": 0.23873798549175262, + "rewards/rejected": -0.23876279592514038, + "step": 6137 + }, + { + "epoch": 4.244813278008299, + "grad_norm": 7.69055700302124, + "learning_rate": 3.1973259566620565e-05, + "log_odds_chosen": 10.11435317993164, + "log_odds_ratio": -4.604416972142644e-05, + "logits/chosen": -0.3621489107608795, + "logits/rejected": -0.46428364515304565, + "logps/chosen": -0.00031951890559867024, + "logps/rejected": -2.013823986053467, + "loss": 0.9587, + "nll_loss": 0.23967821896076202, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.195188764948398e-05, + "rewards/margins": 0.20135048031806946, + "rewards/rejected": -0.20138242840766907, + "step": 6138 + }, + { + "epoch": 4.245504840940526, + "grad_norm": 7.717804908752441, + "learning_rate": 3.196941755033042e-05, + "log_odds_chosen": 10.34365177154541, + "log_odds_ratio": -7.24259516573511e-05, + "logits/chosen": -0.28142252564430237, + "logits/rejected": -0.41735485196113586, + "logps/chosen": -0.0002807824348565191, + "logps/rejected": -1.962577223777771, + "loss": 1.4669, + "nll_loss": 0.36671096086502075, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8078244213247672e-05, + "rewards/margins": 0.19622963666915894, + "rewards/rejected": -0.19625772535800934, + "step": 6139 + }, + { + "epoch": 4.246196403872752, + "grad_norm": 11.58918571472168, + "learning_rate": 3.196557553404026e-05, + "log_odds_chosen": 9.673727035522461, + "log_odds_ratio": -0.0018433015793561935, + "logits/chosen": -0.54544997215271, + "logits/rejected": -0.5666953325271606, + "logps/chosen": -0.02644137106835842, + "logps/rejected": -1.6399683952331543, + "loss": 1.3217, + "nll_loss": 0.3302415609359741, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0026441370137035847, + "rewards/margins": 0.1613527089357376, + "rewards/rejected": -0.1639968454837799, + "step": 6140 + }, + { + "epoch": 4.246887966804979, + "grad_norm": 8.090474128723145, + "learning_rate": 3.1961733517750115e-05, + "log_odds_chosen": 9.784919738769531, + "log_odds_ratio": -0.00011589626228669658, + "logits/chosen": -0.6708372831344604, + "logits/rejected": -0.6841962337493896, + "logps/chosen": -0.00021240772912278771, + "logps/rejected": -1.6052401065826416, + "loss": 1.5484, + "nll_loss": 0.3870971202850342, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.124077218468301e-05, + "rewards/margins": 0.16050276160240173, + "rewards/rejected": -0.16052399575710297, + "step": 6141 + }, + { + "epoch": 4.247579529737206, + "grad_norm": 7.676374435424805, + "learning_rate": 3.195789150145997e-05, + "log_odds_chosen": 9.621414184570312, + "log_odds_ratio": -0.00040039775194600224, + "logits/chosen": -0.45882120728492737, + "logits/rejected": -0.5545141100883484, + "logps/chosen": -0.0005856946809217334, + "logps/rejected": -1.842532992362976, + "loss": 1.1008, + "nll_loss": 0.27515920996665955, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.85694688197691e-05, + "rewards/margins": 0.18419474363327026, + "rewards/rejected": -0.18425330519676208, + "step": 6142 + }, + { + "epoch": 4.248271092669433, + "grad_norm": 13.426033973693848, + "learning_rate": 3.195404948516982e-05, + "log_odds_chosen": 10.054722785949707, + "log_odds_ratio": -0.00023639341816306114, + "logits/chosen": -0.8353263735771179, + "logits/rejected": -0.8713321685791016, + "logps/chosen": -0.0007747645722702146, + "logps/rejected": -1.8731052875518799, + "loss": 1.5143, + "nll_loss": 0.3785462975502014, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.747646304778755e-05, + "rewards/margins": 0.18723304569721222, + "rewards/rejected": -0.18731053173542023, + "step": 6143 + }, + { + "epoch": 4.24896265560166, + "grad_norm": 4.346104145050049, + "learning_rate": 3.1950207468879666e-05, + "log_odds_chosen": 8.975990295410156, + "log_odds_ratio": -0.001990825869143009, + "logits/chosen": -0.36998724937438965, + "logits/rejected": -0.3497503995895386, + "logps/chosen": -0.002082593971863389, + "logps/rejected": -1.9057161808013916, + "loss": 1.5676, + "nll_loss": 0.3916909694671631, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00020825938554480672, + "rewards/margins": 0.19036336243152618, + "rewards/rejected": -0.1905716359615326, + "step": 6144 + }, + { + "epoch": 4.249654218533887, + "grad_norm": 6.410309791564941, + "learning_rate": 3.1946365452589525e-05, + "log_odds_chosen": 10.1351957321167, + "log_odds_ratio": -0.00023676696582697332, + "logits/chosen": -0.27143821120262146, + "logits/rejected": -0.30729198455810547, + "logps/chosen": -0.0009003905579447746, + "logps/rejected": -2.2765510082244873, + "loss": 0.6741, + "nll_loss": 0.16851022839546204, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.003906598081812e-05, + "rewards/margins": 0.22756507992744446, + "rewards/rejected": -0.22765511274337769, + "step": 6145 + }, + { + "epoch": 4.250345781466113, + "grad_norm": 8.370780944824219, + "learning_rate": 3.194252343629937e-05, + "log_odds_chosen": 8.947935104370117, + "log_odds_ratio": -0.0002222056791651994, + "logits/chosen": -0.4107566177845001, + "logits/rejected": -0.4923899173736572, + "logps/chosen": -0.0005199002334848046, + "logps/rejected": -1.4588299989700317, + "loss": 1.1181, + "nll_loss": 0.27949339151382446, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1990024076076224e-05, + "rewards/margins": 0.14583101868629456, + "rewards/rejected": -0.1458830088376999, + "step": 6146 + }, + { + "epoch": 4.25103734439834, + "grad_norm": 14.382808685302734, + "learning_rate": 3.193868142000922e-05, + "log_odds_chosen": 9.37031078338623, + "log_odds_ratio": -0.1772003322839737, + "logits/chosen": -0.5559031367301941, + "logits/rejected": -0.5951072573661804, + "logps/chosen": -0.02392636425793171, + "logps/rejected": -2.083078384399414, + "loss": 1.1989, + "nll_loss": 0.2820078134536743, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002392636379227042, + "rewards/margins": 0.2059151828289032, + "rewards/rejected": -0.20830783247947693, + "step": 6147 + }, + { + "epoch": 4.251728907330567, + "grad_norm": 11.454696655273438, + "learning_rate": 3.1934839403719075e-05, + "log_odds_chosen": 10.160030364990234, + "log_odds_ratio": -0.00027405653963796794, + "logits/chosen": -0.1272168755531311, + "logits/rejected": -0.24657240509986877, + "logps/chosen": -0.00119110569357872, + "logps/rejected": -2.3499598503112793, + "loss": 0.901, + "nll_loss": 0.22522665560245514, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011911056935787201, + "rewards/margins": 0.2348768562078476, + "rewards/rejected": -0.2349959760904312, + "step": 6148 + }, + { + "epoch": 4.252420470262794, + "grad_norm": 7.529397487640381, + "learning_rate": 3.193099738742892e-05, + "log_odds_chosen": 9.81788444519043, + "log_odds_ratio": -0.00012479073484428227, + "logits/chosen": -0.46000170707702637, + "logits/rejected": -0.5587924718856812, + "logps/chosen": -0.00022344016178976744, + "logps/rejected": -1.3893221616744995, + "loss": 1.178, + "nll_loss": 0.29448843002319336, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2344016542774625e-05, + "rewards/margins": 0.13890986144542694, + "rewards/rejected": -0.1389322131872177, + "step": 6149 + }, + { + "epoch": 4.253112033195021, + "grad_norm": 7.670037746429443, + "learning_rate": 3.1927155371138773e-05, + "log_odds_chosen": 10.348284721374512, + "log_odds_ratio": -8.794210589258e-05, + "logits/chosen": -0.736275315284729, + "logits/rejected": -0.7626364231109619, + "logps/chosen": -0.0004248587938491255, + "logps/rejected": -2.323251962661743, + "loss": 0.9088, + "nll_loss": 0.22719423472881317, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2485877202125266e-05, + "rewards/margins": 0.23228272795677185, + "rewards/rejected": -0.23232519626617432, + "step": 6150 + }, + { + "epoch": 4.253803596127248, + "grad_norm": 17.140338897705078, + "learning_rate": 3.1923313354848626e-05, + "log_odds_chosen": 9.948582649230957, + "log_odds_ratio": -0.00014420642401091754, + "logits/chosen": -0.376644492149353, + "logits/rejected": -0.456285297870636, + "logps/chosen": -0.00035677163396030664, + "logps/rejected": -1.8041694164276123, + "loss": 1.3221, + "nll_loss": 0.33050060272216797, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5677163396030664e-05, + "rewards/margins": 0.18038126826286316, + "rewards/rejected": -0.18041695654392242, + "step": 6151 + }, + { + "epoch": 4.254495159059474, + "grad_norm": 22.46241569519043, + "learning_rate": 3.191947133855848e-05, + "log_odds_chosen": 9.51242446899414, + "log_odds_ratio": -0.1370656043291092, + "logits/chosen": -0.9688135385513306, + "logits/rejected": -1.068524718284607, + "logps/chosen": -0.2347746342420578, + "logps/rejected": -2.2272887229919434, + "loss": 1.0347, + "nll_loss": 0.2449595332145691, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.02347746305167675, + "rewards/margins": 0.19925141334533691, + "rewards/rejected": -0.2227288782596588, + "step": 6152 + }, + { + "epoch": 4.255186721991701, + "grad_norm": 7.2648491859436035, + "learning_rate": 3.1915629322268324e-05, + "log_odds_chosen": 9.57111644744873, + "log_odds_ratio": -0.0026391155552119017, + "logits/chosen": -0.5854471921920776, + "logits/rejected": -0.6443982720375061, + "logps/chosen": -0.0010829295497387648, + "logps/rejected": -2.35952091217041, + "loss": 1.5768, + "nll_loss": 0.3939476013183594, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010829296661540866, + "rewards/margins": 0.23584382236003876, + "rewards/rejected": -0.23595212399959564, + "step": 6153 + }, + { + "epoch": 4.255878284923928, + "grad_norm": 5.667422294616699, + "learning_rate": 3.191178730597818e-05, + "log_odds_chosen": 9.090551376342773, + "log_odds_ratio": -0.0007788903312757611, + "logits/chosen": -0.538144588470459, + "logits/rejected": -0.5147003531455994, + "logps/chosen": -0.006274771876633167, + "logps/rejected": -1.5023272037506104, + "loss": 1.3573, + "nll_loss": 0.3392564654350281, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006274771294556558, + "rewards/margins": 0.14960524439811707, + "rewards/rejected": -0.1502327024936676, + "step": 6154 + }, + { + "epoch": 4.256569847856155, + "grad_norm": 7.264804363250732, + "learning_rate": 3.190794528968803e-05, + "log_odds_chosen": 8.99667739868164, + "log_odds_ratio": -0.004868743941187859, + "logits/chosen": -0.6970754265785217, + "logits/rejected": -0.6954235434532166, + "logps/chosen": -0.003572126617655158, + "logps/rejected": -1.9685618877410889, + "loss": 1.0954, + "nll_loss": 0.2733573019504547, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000357212673407048, + "rewards/margins": 0.19649897515773773, + "rewards/rejected": -0.19685617089271545, + "step": 6155 + }, + { + "epoch": 4.257261410788382, + "grad_norm": 7.831867694854736, + "learning_rate": 3.190410327339788e-05, + "log_odds_chosen": 10.755487442016602, + "log_odds_ratio": -8.687889931024984e-05, + "logits/chosen": -0.25262823700904846, + "logits/rejected": -0.3432296812534332, + "logps/chosen": -0.0003574866277631372, + "logps/rejected": -2.4831430912017822, + "loss": 0.8574, + "nll_loss": 0.21433907747268677, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5748664231505245e-05, + "rewards/margins": 0.24827855825424194, + "rewards/rejected": -0.24831432104110718, + "step": 6156 + }, + { + "epoch": 4.2579529737206085, + "grad_norm": 9.009108543395996, + "learning_rate": 3.1900261257107734e-05, + "log_odds_chosen": 9.415932655334473, + "log_odds_ratio": -0.00192168727517128, + "logits/chosen": -0.6164818406105042, + "logits/rejected": -0.704775869846344, + "logps/chosen": -0.0009632366127334535, + "logps/rejected": -1.8765302896499634, + "loss": 1.2155, + "nll_loss": 0.303676038980484, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.632366709411144e-05, + "rewards/margins": 0.18755671381950378, + "rewards/rejected": -0.18765303492546082, + "step": 6157 + }, + { + "epoch": 4.258644536652835, + "grad_norm": 13.167975425720215, + "learning_rate": 3.189641924081758e-05, + "log_odds_chosen": 9.81691837310791, + "log_odds_ratio": -0.0013511620927602053, + "logits/chosen": -0.5068100690841675, + "logits/rejected": -0.5617337226867676, + "logps/chosen": -0.0005183805478736758, + "logps/rejected": -1.5767027139663696, + "loss": 1.2908, + "nll_loss": 0.3225646913051605, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.183805842534639e-05, + "rewards/margins": 0.1576184332370758, + "rewards/rejected": -0.1576702892780304, + "step": 6158 + }, + { + "epoch": 4.259336099585062, + "grad_norm": 8.414799690246582, + "learning_rate": 3.189257722452743e-05, + "log_odds_chosen": 10.277162551879883, + "log_odds_ratio": -8.107912435662001e-05, + "logits/chosen": 0.052820704877376556, + "logits/rejected": -0.06814444065093994, + "logps/chosen": -0.00030761188827455044, + "logps/rejected": -1.597118854522705, + "loss": 0.6989, + "nll_loss": 0.1747276932001114, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.076119173783809e-05, + "rewards/margins": 0.15968112647533417, + "rewards/rejected": -0.15971189737319946, + "step": 6159 + }, + { + "epoch": 4.260027662517289, + "grad_norm": 12.140266418457031, + "learning_rate": 3.1888735208237284e-05, + "log_odds_chosen": 11.18593978881836, + "log_odds_ratio": -0.0001817693846533075, + "logits/chosen": -0.6569154262542725, + "logits/rejected": -0.7095228433609009, + "logps/chosen": -0.00019287460600025952, + "logps/rejected": -2.2339508533477783, + "loss": 1.0071, + "nll_loss": 0.2517620921134949, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9287461327621713e-05, + "rewards/margins": 0.22337576746940613, + "rewards/rejected": -0.22339506447315216, + "step": 6160 + }, + { + "epoch": 4.260719225449516, + "grad_norm": 7.150200366973877, + "learning_rate": 3.188489319194714e-05, + "log_odds_chosen": 10.353399276733398, + "log_odds_ratio": -0.0003910641244146973, + "logits/chosen": -0.19342248141765594, + "logits/rejected": -0.1816299855709076, + "logps/chosen": -0.0005973252700641751, + "logps/rejected": -2.1960625648498535, + "loss": 0.8542, + "nll_loss": 0.21351881325244904, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9732523368438706e-05, + "rewards/margins": 0.21954652667045593, + "rewards/rejected": -0.21960625052452087, + "step": 6161 + }, + { + "epoch": 4.261410788381743, + "grad_norm": 13.170788764953613, + "learning_rate": 3.188105117565698e-05, + "log_odds_chosen": 8.552861213684082, + "log_odds_ratio": -0.0017542075365781784, + "logits/chosen": -0.08158275485038757, + "logits/rejected": -0.18088936805725098, + "logps/chosen": -0.0019205547869205475, + "logps/rejected": -1.5736401081085205, + "loss": 1.5537, + "nll_loss": 0.3882403075695038, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019205547869205475, + "rewards/margins": 0.15717196464538574, + "rewards/rejected": -0.15736402571201324, + "step": 6162 + }, + { + "epoch": 4.2621023513139695, + "grad_norm": 17.794445037841797, + "learning_rate": 3.187720915936684e-05, + "log_odds_chosen": 10.468133926391602, + "log_odds_ratio": -9.857804980129004e-05, + "logits/chosen": -0.4853760302066803, + "logits/rejected": -0.49158012866973877, + "logps/chosen": -0.000775833148509264, + "logps/rejected": -2.0211195945739746, + "loss": 1.2288, + "nll_loss": 0.3071891665458679, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.758331776130944e-05, + "rewards/margins": 0.2020343840122223, + "rewards/rejected": -0.20211195945739746, + "step": 6163 + }, + { + "epoch": 4.262793914246196, + "grad_norm": 15.141548156738281, + "learning_rate": 3.187336714307669e-05, + "log_odds_chosen": 10.6614351272583, + "log_odds_ratio": -7.608214218635112e-05, + "logits/chosen": -0.2411142736673355, + "logits/rejected": -0.2971709668636322, + "logps/chosen": -0.0001604513672646135, + "logps/rejected": -2.035788059234619, + "loss": 0.9799, + "nll_loss": 0.24496668577194214, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6045138181652874e-05, + "rewards/margins": 0.20356276631355286, + "rewards/rejected": -0.20357881486415863, + "step": 6164 + }, + { + "epoch": 4.263485477178423, + "grad_norm": 9.609676361083984, + "learning_rate": 3.186952512678654e-05, + "log_odds_chosen": 10.652984619140625, + "log_odds_ratio": -4.42389864474535e-05, + "logits/chosen": -0.40380859375, + "logits/rejected": -0.47176897525787354, + "logps/chosen": -0.0005353515734896064, + "logps/rejected": -2.288562774658203, + "loss": 0.9475, + "nll_loss": 0.2368660271167755, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.353515734896064e-05, + "rewards/margins": 0.22880274057388306, + "rewards/rejected": -0.22885626554489136, + "step": 6165 + }, + { + "epoch": 4.26417704011065, + "grad_norm": 18.316030502319336, + "learning_rate": 3.186568311049639e-05, + "log_odds_chosen": 9.397259712219238, + "log_odds_ratio": -0.00036490714410319924, + "logits/chosen": -0.2662919759750366, + "logits/rejected": -0.2668308615684509, + "logps/chosen": -0.0013899412006139755, + "logps/rejected": -1.6413580179214478, + "loss": 1.5478, + "nll_loss": 0.3869023323059082, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013899413170292974, + "rewards/margins": 0.16399678587913513, + "rewards/rejected": -0.16413578391075134, + "step": 6166 + }, + { + "epoch": 4.264868603042877, + "grad_norm": 5.9011054039001465, + "learning_rate": 3.186184109420624e-05, + "log_odds_chosen": 9.216154098510742, + "log_odds_ratio": -0.004653660114854574, + "logits/chosen": -0.42850807309150696, + "logits/rejected": -0.43218332529067993, + "logps/chosen": -0.009668417274951935, + "logps/rejected": -1.7757195234298706, + "loss": 0.9863, + "nll_loss": 0.24610421061515808, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009668418788351119, + "rewards/margins": 0.17660510540008545, + "rewards/rejected": -0.17757193744182587, + "step": 6167 + }, + { + "epoch": 4.265560165975104, + "grad_norm": 7.113187313079834, + "learning_rate": 3.185799907791609e-05, + "log_odds_chosen": 9.243945121765137, + "log_odds_ratio": -0.0005347700789570808, + "logits/chosen": -0.3659195601940155, + "logits/rejected": -0.4238894581794739, + "logps/chosen": -0.0038989405147731304, + "logps/rejected": -2.413435220718384, + "loss": 1.2477, + "nll_loss": 0.31186720728874207, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003898940631188452, + "rewards/margins": 0.24095362424850464, + "rewards/rejected": -0.24134352803230286, + "step": 6168 + }, + { + "epoch": 4.2662517289073305, + "grad_norm": 9.815381050109863, + "learning_rate": 3.185415706162594e-05, + "log_odds_chosen": 9.05932331085205, + "log_odds_ratio": -0.00020118005340918899, + "logits/chosen": -0.10414472222328186, + "logits/rejected": -0.23452311754226685, + "logps/chosen": -0.0012550798710435629, + "logps/rejected": -1.68681001663208, + "loss": 1.0896, + "nll_loss": 0.2723812162876129, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012550799874588847, + "rewards/margins": 0.16855549812316895, + "rewards/rejected": -0.16868099570274353, + "step": 6169 + }, + { + "epoch": 4.266943291839557, + "grad_norm": 9.147217750549316, + "learning_rate": 3.1850315045335795e-05, + "log_odds_chosen": 9.957201957702637, + "log_odds_ratio": -9.266338020097464e-05, + "logits/chosen": -0.16600045561790466, + "logits/rejected": -0.21061308681964874, + "logps/chosen": -0.005030377767980099, + "logps/rejected": -2.65336275100708, + "loss": 1.3004, + "nll_loss": 0.325092077255249, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005030377651564777, + "rewards/margins": 0.2648332118988037, + "rewards/rejected": -0.265336275100708, + "step": 6170 + }, + { + "epoch": 4.267634854771784, + "grad_norm": 8.045340538024902, + "learning_rate": 3.184647302904564e-05, + "log_odds_chosen": 9.198465347290039, + "log_odds_ratio": -0.0001735072728479281, + "logits/chosen": -0.47205400466918945, + "logits/rejected": -0.5205153226852417, + "logps/chosen": -0.0007983793038874865, + "logps/rejected": -1.7312718629837036, + "loss": 1.3807, + "nll_loss": 0.3451688587665558, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.983793329913169e-05, + "rewards/margins": 0.17304734885692596, + "rewards/rejected": -0.1731271743774414, + "step": 6171 + }, + { + "epoch": 4.268326417704011, + "grad_norm": 6.69143533706665, + "learning_rate": 3.18426310127555e-05, + "log_odds_chosen": 10.010408401489258, + "log_odds_ratio": -0.0001274587557418272, + "logits/chosen": -0.27231353521347046, + "logits/rejected": -0.28820914030075073, + "logps/chosen": -0.0008799899369478226, + "logps/rejected": -2.1249094009399414, + "loss": 1.2909, + "nll_loss": 0.3227103352546692, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.799900388112292e-05, + "rewards/margins": 0.21240293979644775, + "rewards/rejected": -0.21249093115329742, + "step": 6172 + }, + { + "epoch": 4.269017980636238, + "grad_norm": 6.601019382476807, + "learning_rate": 3.1838788996465346e-05, + "log_odds_chosen": 9.913965225219727, + "log_odds_ratio": -0.0002963221340905875, + "logits/chosen": -0.46160951256752014, + "logits/rejected": -0.4814470410346985, + "logps/chosen": -0.00020028551807627082, + "logps/rejected": -1.554571270942688, + "loss": 1.0545, + "nll_loss": 0.26360398530960083, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0028553990414366e-05, + "rewards/margins": 0.15543711185455322, + "rewards/rejected": -0.15545713901519775, + "step": 6173 + }, + { + "epoch": 4.269709543568465, + "grad_norm": 12.24593448638916, + "learning_rate": 3.18349469801752e-05, + "log_odds_chosen": 9.407413482666016, + "log_odds_ratio": -0.0007775399135425687, + "logits/chosen": -0.32303255796432495, + "logits/rejected": -0.34748154878616333, + "logps/chosen": -0.0036095932591706514, + "logps/rejected": -1.9337127208709717, + "loss": 1.3275, + "nll_loss": 0.3318028748035431, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00036095932591706514, + "rewards/margins": 0.1930103302001953, + "rewards/rejected": -0.19337128102779388, + "step": 6174 + }, + { + "epoch": 4.2704011065006915, + "grad_norm": 15.678728103637695, + "learning_rate": 3.183110496388505e-05, + "log_odds_chosen": 10.00384521484375, + "log_odds_ratio": -0.00040300548425875604, + "logits/chosen": -0.6204206943511963, + "logits/rejected": -0.6617852449417114, + "logps/chosen": -0.0006737353978678584, + "logps/rejected": -2.5181639194488525, + "loss": 1.2319, + "nll_loss": 0.3079381287097931, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.737354124197736e-05, + "rewards/margins": 0.25174903869628906, + "rewards/rejected": -0.25181639194488525, + "step": 6175 + }, + { + "epoch": 4.271092669432918, + "grad_norm": 7.809788703918457, + "learning_rate": 3.1827262947594896e-05, + "log_odds_chosen": 9.518634796142578, + "log_odds_ratio": -0.0002678804157767445, + "logits/chosen": -0.3987533450126648, + "logits/rejected": -0.49139589071273804, + "logps/chosen": -0.0005189487710595131, + "logps/rejected": -1.4620037078857422, + "loss": 0.7988, + "nll_loss": 0.19967755675315857, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1894880016334355e-05, + "rewards/margins": 0.1461484730243683, + "rewards/rejected": -0.14620037376880646, + "step": 6176 + }, + { + "epoch": 4.271784232365145, + "grad_norm": 8.224957466125488, + "learning_rate": 3.182342093130475e-05, + "log_odds_chosen": 11.28808307647705, + "log_odds_ratio": -3.974015999119729e-05, + "logits/chosen": -0.4204134941101074, + "logits/rejected": -0.5216044187545776, + "logps/chosen": -0.00029299373272806406, + "logps/rejected": -2.9208827018737793, + "loss": 1.3039, + "nll_loss": 0.32597795128822327, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9299371817614883e-05, + "rewards/margins": 0.2920589745044708, + "rewards/rejected": -0.29208827018737793, + "step": 6177 + }, + { + "epoch": 4.272475795297372, + "grad_norm": 6.33683443069458, + "learning_rate": 3.18195789150146e-05, + "log_odds_chosen": 8.957198143005371, + "log_odds_ratio": -0.0002472910564392805, + "logits/chosen": -0.582464337348938, + "logits/rejected": -0.6484727263450623, + "logps/chosen": -0.0011252148542553186, + "logps/rejected": -1.8942865133285522, + "loss": 1.2003, + "nll_loss": 0.3000393509864807, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011252149124629796, + "rewards/margins": 0.18931612372398376, + "rewards/rejected": -0.1894286572933197, + "step": 6178 + }, + { + "epoch": 4.273167358229599, + "grad_norm": 14.971855163574219, + "learning_rate": 3.1815736898724454e-05, + "log_odds_chosen": 8.94025993347168, + "log_odds_ratio": -0.00026140769477933645, + "logits/chosen": -0.7007652521133423, + "logits/rejected": -0.687438428401947, + "logps/chosen": -0.0008105351589620113, + "logps/rejected": -1.8527700901031494, + "loss": 1.067, + "nll_loss": 0.26671651005744934, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.10535202617757e-05, + "rewards/margins": 0.1851959526538849, + "rewards/rejected": -0.18527701497077942, + "step": 6179 + }, + { + "epoch": 4.273858921161826, + "grad_norm": 11.929402351379395, + "learning_rate": 3.18118948824343e-05, + "log_odds_chosen": 9.497038841247559, + "log_odds_ratio": -0.002557209460064769, + "logits/chosen": -0.5874805450439453, + "logits/rejected": -0.5811681747436523, + "logps/chosen": -0.0019133985042572021, + "logps/rejected": -1.8315576314926147, + "loss": 1.4228, + "nll_loss": 0.3554553985595703, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019133984460495412, + "rewards/margins": 0.18296441435813904, + "rewards/rejected": -0.18315577507019043, + "step": 6180 + }, + { + "epoch": 4.2745504840940525, + "grad_norm": 7.399724006652832, + "learning_rate": 3.180805286614416e-05, + "log_odds_chosen": 10.53152847290039, + "log_odds_ratio": -0.0001274074602406472, + "logits/chosen": -0.6589803695678711, + "logits/rejected": -0.7211213111877441, + "logps/chosen": -0.0033396054059267044, + "logps/rejected": -3.2292990684509277, + "loss": 0.9119, + "nll_loss": 0.2279733568429947, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000333960575517267, + "rewards/margins": 0.3225959539413452, + "rewards/rejected": -0.32292991876602173, + "step": 6181 + }, + { + "epoch": 4.275242047026279, + "grad_norm": 7.8806843757629395, + "learning_rate": 3.1804210849854004e-05, + "log_odds_chosen": 8.394222259521484, + "log_odds_ratio": -0.050156790763139725, + "logits/chosen": -0.6371682286262512, + "logits/rejected": -0.6561437249183655, + "logps/chosen": -0.01156701985746622, + "logps/rejected": -1.389466404914856, + "loss": 1.1804, + "nll_loss": 0.29009053111076355, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011567020555958152, + "rewards/margins": 0.13778993487358093, + "rewards/rejected": -0.13894663751125336, + "step": 6182 + }, + { + "epoch": 4.275933609958506, + "grad_norm": 10.309439659118652, + "learning_rate": 3.180036883356386e-05, + "log_odds_chosen": 11.258016586303711, + "log_odds_ratio": -2.8753411243087612e-05, + "logits/chosen": -0.5273156762123108, + "logits/rejected": -0.5840225219726562, + "logps/chosen": -0.0003254458715673536, + "logps/rejected": -2.6107728481292725, + "loss": 1.5655, + "nll_loss": 0.391368567943573, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.254459079471417e-05, + "rewards/margins": 0.2610447406768799, + "rewards/rejected": -0.26107728481292725, + "step": 6183 + }, + { + "epoch": 4.276625172890733, + "grad_norm": 6.367475509643555, + "learning_rate": 3.179652681727371e-05, + "log_odds_chosen": 9.545506477355957, + "log_odds_ratio": -0.0002627922222018242, + "logits/chosen": -0.5451605916023254, + "logits/rejected": -0.5639867186546326, + "logps/chosen": -0.0010228962637484074, + "logps/rejected": -1.7442550659179688, + "loss": 1.5626, + "nll_loss": 0.3906342387199402, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010228962491964921, + "rewards/margins": 0.17432323098182678, + "rewards/rejected": -0.17442551255226135, + "step": 6184 + }, + { + "epoch": 4.27731673582296, + "grad_norm": 9.653816223144531, + "learning_rate": 3.1792684800983555e-05, + "log_odds_chosen": 10.542591094970703, + "log_odds_ratio": -8.921239350456744e-05, + "logits/chosen": -0.4528236985206604, + "logits/rejected": -0.416860431432724, + "logps/chosen": -0.0014932905323803425, + "logps/rejected": -2.6712212562561035, + "loss": 0.9257, + "nll_loss": 0.23141497373580933, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014932906196918339, + "rewards/margins": 0.26697278022766113, + "rewards/rejected": -0.2671221196651459, + "step": 6185 + }, + { + "epoch": 4.278008298755187, + "grad_norm": 7.284701824188232, + "learning_rate": 3.178884278469341e-05, + "log_odds_chosen": 10.624105453491211, + "log_odds_ratio": -5.059983232058585e-05, + "logits/chosen": -0.7741506695747375, + "logits/rejected": -0.7754232883453369, + "logps/chosen": -0.00011048675514757633, + "logps/rejected": -1.6449944972991943, + "loss": 1.0486, + "nll_loss": 0.26214975118637085, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1048676242353395e-05, + "rewards/margins": 0.16448840498924255, + "rewards/rejected": -0.1644994616508484, + "step": 6186 + }, + { + "epoch": 4.2786998616874135, + "grad_norm": 9.619255065917969, + "learning_rate": 3.178500076840326e-05, + "log_odds_chosen": 10.326604843139648, + "log_odds_ratio": -5.458838495542295e-05, + "logits/chosen": -0.5391168594360352, + "logits/rejected": -0.6189460158348083, + "logps/chosen": -0.0002454041095916182, + "logps/rejected": -1.8849753141403198, + "loss": 1.1774, + "nll_loss": 0.29435694217681885, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.454041168675758e-05, + "rewards/margins": 0.18847298622131348, + "rewards/rejected": -0.18849752843379974, + "step": 6187 + }, + { + "epoch": 4.27939142461964, + "grad_norm": 11.625914573669434, + "learning_rate": 3.178115875211311e-05, + "log_odds_chosen": 9.457094192504883, + "log_odds_ratio": -0.0007933979504741728, + "logits/chosen": -0.6278635263442993, + "logits/rejected": -0.6144816875457764, + "logps/chosen": -0.004008726682513952, + "logps/rejected": -1.4434616565704346, + "loss": 1.273, + "nll_loss": 0.31816577911376953, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004008726973552257, + "rewards/margins": 0.14394529163837433, + "rewards/rejected": -0.14434616267681122, + "step": 6188 + }, + { + "epoch": 4.280082987551867, + "grad_norm": 9.663738250732422, + "learning_rate": 3.177731673582296e-05, + "log_odds_chosen": 10.204988479614258, + "log_odds_ratio": -0.0002085541345877573, + "logits/chosen": -0.5629463791847229, + "logits/rejected": -0.5773528814315796, + "logps/chosen": -0.0008364799432456493, + "logps/rejected": -1.5560381412506104, + "loss": 1.031, + "nll_loss": 0.2577328085899353, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.364799577975646e-05, + "rewards/margins": 0.15552017092704773, + "rewards/rejected": -0.1556038111448288, + "step": 6189 + }, + { + "epoch": 4.280774550484094, + "grad_norm": 9.566874504089355, + "learning_rate": 3.177347471953282e-05, + "log_odds_chosen": 10.460851669311523, + "log_odds_ratio": -5.972566214040853e-05, + "logits/chosen": -0.4141422212123871, + "logits/rejected": -0.4708189070224762, + "logps/chosen": -0.00026144867297261953, + "logps/rejected": -2.0418572425842285, + "loss": 0.9027, + "nll_loss": 0.22567889094352722, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6144867661059834e-05, + "rewards/margins": 0.20415958762168884, + "rewards/rejected": -0.20418575406074524, + "step": 6190 + }, + { + "epoch": 4.281466113416321, + "grad_norm": 11.769757270812988, + "learning_rate": 3.176963270324266e-05, + "log_odds_chosen": 8.909507751464844, + "log_odds_ratio": -0.0021207600366324186, + "logits/chosen": -0.3657255172729492, + "logits/rejected": -0.3715681731700897, + "logps/chosen": -0.009017495438456535, + "logps/rejected": -1.7288877964019775, + "loss": 1.09, + "nll_loss": 0.2722766399383545, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009017496486194432, + "rewards/margins": 0.17198702692985535, + "rewards/rejected": -0.17288878560066223, + "step": 6191 + }, + { + "epoch": 4.282157676348548, + "grad_norm": 6.800326347351074, + "learning_rate": 3.1765790686952515e-05, + "log_odds_chosen": 9.953897476196289, + "log_odds_ratio": -0.00017238502914551646, + "logits/chosen": -0.557531476020813, + "logits/rejected": -0.5738213062286377, + "logps/chosen": -0.00028701216797344387, + "logps/rejected": -2.059114456176758, + "loss": 1.4842, + "nll_loss": 0.37102198600769043, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8701215342152864e-05, + "rewards/margins": 0.2058827430009842, + "rewards/rejected": -0.20591145753860474, + "step": 6192 + }, + { + "epoch": 4.282849239280774, + "grad_norm": 6.554988861083984, + "learning_rate": 3.176194867066237e-05, + "log_odds_chosen": 10.263959884643555, + "log_odds_ratio": -6.602435314562172e-05, + "logits/chosen": -0.3205175995826721, + "logits/rejected": -0.38556766510009766, + "logps/chosen": -0.0001721544103929773, + "logps/rejected": -1.56480073928833, + "loss": 0.9438, + "nll_loss": 0.23595106601715088, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.721544140309561e-05, + "rewards/margins": 0.1564628630876541, + "rewards/rejected": -0.156480073928833, + "step": 6193 + }, + { + "epoch": 4.283540802213001, + "grad_norm": 5.991519927978516, + "learning_rate": 3.175810665437221e-05, + "log_odds_chosen": 9.662172317504883, + "log_odds_ratio": -0.002522186143323779, + "logits/chosen": -0.22076928615570068, + "logits/rejected": -0.24936668574810028, + "logps/chosen": -0.00199855281971395, + "logps/rejected": -2.0251848697662354, + "loss": 1.2241, + "nll_loss": 0.3057834208011627, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019985527615062892, + "rewards/margins": 0.20231863856315613, + "rewards/rejected": -0.202518492937088, + "step": 6194 + }, + { + "epoch": 4.284232365145228, + "grad_norm": 8.247119903564453, + "learning_rate": 3.1754264638082066e-05, + "log_odds_chosen": 8.842205047607422, + "log_odds_ratio": -0.0006784327561035752, + "logits/chosen": -0.5647929906845093, + "logits/rejected": -0.5543622374534607, + "logps/chosen": -0.01458799373358488, + "logps/rejected": -1.484726905822754, + "loss": 1.3746, + "nll_loss": 0.34358879923820496, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014587993500754237, + "rewards/margins": 0.14701390266418457, + "rewards/rejected": -0.14847269654273987, + "step": 6195 + }, + { + "epoch": 4.284923928077455, + "grad_norm": 13.228196144104004, + "learning_rate": 3.175042262179192e-05, + "log_odds_chosen": 9.361620903015137, + "log_odds_ratio": -0.01754622533917427, + "logits/chosen": -0.45240429043769836, + "logits/rejected": -0.522560715675354, + "logps/chosen": -0.005461498629301786, + "logps/rejected": -1.9183518886566162, + "loss": 1.6339, + "nll_loss": 0.40672767162323, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005461499094963074, + "rewards/margins": 0.1912890374660492, + "rewards/rejected": -0.1918351948261261, + "step": 6196 + }, + { + "epoch": 4.285615491009682, + "grad_norm": 10.74985408782959, + "learning_rate": 3.174658060550177e-05, + "log_odds_chosen": 9.648872375488281, + "log_odds_ratio": -0.0002749493869487196, + "logits/chosen": -0.5711510181427002, + "logits/rejected": -0.667496919631958, + "logps/chosen": -0.0009153534774668515, + "logps/rejected": -2.0735342502593994, + "loss": 0.9794, + "nll_loss": 0.2448323518037796, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.153535211225972e-05, + "rewards/margins": 0.20726190507411957, + "rewards/rejected": -0.20735344290733337, + "step": 6197 + }, + { + "epoch": 4.286307053941909, + "grad_norm": 13.684673309326172, + "learning_rate": 3.1742738589211616e-05, + "log_odds_chosen": 9.251256942749023, + "log_odds_ratio": -0.04825928807258606, + "logits/chosen": -0.17080645263195038, + "logits/rejected": -0.33765342831611633, + "logps/chosen": -0.00878140889108181, + "logps/rejected": -1.4698766469955444, + "loss": 1.8133, + "nll_loss": 0.44850003719329834, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008781409705989063, + "rewards/margins": 0.14610953629016876, + "rewards/rejected": -0.1469876766204834, + "step": 6198 + }, + { + "epoch": 4.286998616874135, + "grad_norm": 10.797607421875, + "learning_rate": 3.1738896572921475e-05, + "log_odds_chosen": 9.443811416625977, + "log_odds_ratio": -0.000161836898769252, + "logits/chosen": -0.6183329820632935, + "logits/rejected": -0.6296303272247314, + "logps/chosen": -0.00029242795426398516, + "logps/rejected": -1.2201664447784424, + "loss": 0.8209, + "nll_loss": 0.2052021622657776, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9242795790196396e-05, + "rewards/margins": 0.12198740243911743, + "rewards/rejected": -0.12201663851737976, + "step": 6199 + }, + { + "epoch": 4.287690179806362, + "grad_norm": 8.2676420211792, + "learning_rate": 3.173505455663132e-05, + "log_odds_chosen": 9.324875831604004, + "log_odds_ratio": -0.001229931483976543, + "logits/chosen": -0.6896675825119019, + "logits/rejected": -0.7380616068840027, + "logps/chosen": -0.0015790637116879225, + "logps/rejected": -1.530112862586975, + "loss": 0.7646, + "nll_loss": 0.19102557003498077, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001579063682584092, + "rewards/margins": 0.1528533697128296, + "rewards/rejected": -0.1530112773180008, + "step": 6200 + }, + { + "epoch": 4.288381742738589, + "grad_norm": 6.931770324707031, + "learning_rate": 3.1731212540341174e-05, + "log_odds_chosen": 9.740365982055664, + "log_odds_ratio": -0.00048356314073316753, + "logits/chosen": -0.6223162412643433, + "logits/rejected": -0.692702054977417, + "logps/chosen": -0.001973244594410062, + "logps/rejected": -2.078831911087036, + "loss": 1.1004, + "nll_loss": 0.27504581212997437, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019732445070985705, + "rewards/margins": 0.20768587291240692, + "rewards/rejected": -0.20788319408893585, + "step": 6201 + }, + { + "epoch": 4.289073305670816, + "grad_norm": 5.612523078918457, + "learning_rate": 3.1727370524051026e-05, + "log_odds_chosen": 10.49863052368164, + "log_odds_ratio": -0.00012968035298399627, + "logits/chosen": -0.47685426473617554, + "logits/rejected": -0.41339313983917236, + "logps/chosen": -0.00019079844059888273, + "logps/rejected": -1.6685700416564941, + "loss": 0.9857, + "nll_loss": 0.24640297889709473, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9079845515079796e-05, + "rewards/margins": 0.1668379306793213, + "rewards/rejected": -0.16685700416564941, + "step": 6202 + }, + { + "epoch": 4.289764868603043, + "grad_norm": 15.797565460205078, + "learning_rate": 3.172352850776087e-05, + "log_odds_chosen": 9.270092010498047, + "log_odds_ratio": -0.26327821612358093, + "logits/chosen": -0.49884092807769775, + "logits/rejected": -0.5388543605804443, + "logps/chosen": -0.04154008626937866, + "logps/rejected": -2.206192970275879, + "loss": 1.4869, + "nll_loss": 0.34539881348609924, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004154008813202381, + "rewards/margins": 0.2164652943611145, + "rewards/rejected": -0.2206193059682846, + "step": 6203 + }, + { + "epoch": 4.29045643153527, + "grad_norm": 7.614452838897705, + "learning_rate": 3.1719686491470724e-05, + "log_odds_chosen": 10.388860702514648, + "log_odds_ratio": -5.83199507673271e-05, + "logits/chosen": -0.3829791843891144, + "logits/rejected": -0.5302500128746033, + "logps/chosen": -0.0001508757413830608, + "logps/rejected": -1.7036199569702148, + "loss": 1.3709, + "nll_loss": 0.34271174669265747, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5087575775396544e-05, + "rewards/margins": 0.17034690082073212, + "rewards/rejected": -0.17036199569702148, + "step": 6204 + }, + { + "epoch": 4.291147994467496, + "grad_norm": 7.703399181365967, + "learning_rate": 3.1715844475180577e-05, + "log_odds_chosen": 10.075663566589355, + "log_odds_ratio": -0.00024130381643772125, + "logits/chosen": -0.6611663103103638, + "logits/rejected": -0.6568069458007812, + "logps/chosen": -0.0006587379612028599, + "logps/rejected": -1.9626967906951904, + "loss": 0.6575, + "nll_loss": 0.1643550843000412, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.587379903066903e-05, + "rewards/margins": 0.1962037980556488, + "rewards/rejected": -0.1962696760892868, + "step": 6205 + }, + { + "epoch": 4.291839557399723, + "grad_norm": 6.695903301239014, + "learning_rate": 3.171200245889043e-05, + "log_odds_chosen": 10.23775863647461, + "log_odds_ratio": -0.00025228591402992606, + "logits/chosen": -0.6362022161483765, + "logits/rejected": -0.5913698673248291, + "logps/chosen": -0.0008647385984659195, + "logps/rejected": -1.8681186437606812, + "loss": 1.7754, + "nll_loss": 0.4438358545303345, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.647386857774109e-05, + "rewards/margins": 0.18672539293766022, + "rewards/rejected": -0.1868118792772293, + "step": 6206 + }, + { + "epoch": 4.29253112033195, + "grad_norm": 11.357057571411133, + "learning_rate": 3.1708160442600275e-05, + "log_odds_chosen": 9.543838500976562, + "log_odds_ratio": -0.003506321692839265, + "logits/chosen": -0.7389904856681824, + "logits/rejected": -0.8157459497451782, + "logps/chosen": -0.002916330471634865, + "logps/rejected": -2.448678970336914, + "loss": 1.3222, + "nll_loss": 0.3302033245563507, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002916330413427204, + "rewards/margins": 0.24457626044750214, + "rewards/rejected": -0.24486789107322693, + "step": 6207 + }, + { + "epoch": 4.293222683264177, + "grad_norm": 10.25238037109375, + "learning_rate": 3.1704318426310134e-05, + "log_odds_chosen": 9.856980323791504, + "log_odds_ratio": -0.0007413313142023981, + "logits/chosen": -0.6136857867240906, + "logits/rejected": -0.6719917058944702, + "logps/chosen": -0.0017734833527356386, + "logps/rejected": -1.808726191520691, + "loss": 1.108, + "nll_loss": 0.2769239842891693, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001773483381839469, + "rewards/margins": 0.1806952804327011, + "rewards/rejected": -0.18087263405323029, + "step": 6208 + }, + { + "epoch": 4.293914246196404, + "grad_norm": 9.573114395141602, + "learning_rate": 3.170047641001998e-05, + "log_odds_chosen": 9.353816032409668, + "log_odds_ratio": -0.11365321278572083, + "logits/chosen": -0.40987884998321533, + "logits/rejected": -0.5696350932121277, + "logps/chosen": -0.01775352470576763, + "logps/rejected": -2.1001336574554443, + "loss": 1.404, + "nll_loss": 0.33963605761528015, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0017753525171428919, + "rewards/margins": 0.20823803544044495, + "rewards/rejected": -0.21001337468624115, + "step": 6209 + }, + { + "epoch": 4.2946058091286305, + "grad_norm": 13.752728462219238, + "learning_rate": 3.169663439372983e-05, + "log_odds_chosen": 10.185070037841797, + "log_odds_ratio": -0.0002414601040072739, + "logits/chosen": -0.6724650859832764, + "logits/rejected": -0.6352249383926392, + "logps/chosen": -0.00029455244657583535, + "logps/rejected": -1.8959944248199463, + "loss": 1.0568, + "nll_loss": 0.26417943835258484, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9455244657583535e-05, + "rewards/margins": 0.18957000970840454, + "rewards/rejected": -0.18959945440292358, + "step": 6210 + }, + { + "epoch": 4.295297372060857, + "grad_norm": 8.722634315490723, + "learning_rate": 3.1692792377439684e-05, + "log_odds_chosen": 9.901902198791504, + "log_odds_ratio": -0.0005723107024095953, + "logits/chosen": -0.6653061509132385, + "logits/rejected": -0.745096743106842, + "logps/chosen": -0.0008094396907836199, + "logps/rejected": -1.9767580032348633, + "loss": 0.8833, + "nll_loss": 0.22076524794101715, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.094397344393656e-05, + "rewards/margins": 0.19759485125541687, + "rewards/rejected": -0.19767579436302185, + "step": 6211 + }, + { + "epoch": 4.295988934993084, + "grad_norm": 8.75367259979248, + "learning_rate": 3.168895036114953e-05, + "log_odds_chosen": 10.493782043457031, + "log_odds_ratio": -4.299761349102482e-05, + "logits/chosen": -0.6574910283088684, + "logits/rejected": -0.6594315767288208, + "logps/chosen": -0.0008926051668822765, + "logps/rejected": -2.085134744644165, + "loss": 1.665, + "nll_loss": 0.41624513268470764, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.926051668822765e-05, + "rewards/margins": 0.20842422544956207, + "rewards/rejected": -0.20851348340511322, + "step": 6212 + }, + { + "epoch": 4.296680497925311, + "grad_norm": 8.879002571105957, + "learning_rate": 3.168510834485938e-05, + "log_odds_chosen": 10.296270370483398, + "log_odds_ratio": -0.0007908447296358645, + "logits/chosen": -0.4450835585594177, + "logits/rejected": -0.546353280544281, + "logps/chosen": -0.004724219441413879, + "logps/rejected": -2.280630111694336, + "loss": 1.0695, + "nll_loss": 0.26730722188949585, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00047242193249985576, + "rewards/margins": 0.22759059071540833, + "rewards/rejected": -0.22806301712989807, + "step": 6213 + }, + { + "epoch": 4.297372060857538, + "grad_norm": 10.082377433776855, + "learning_rate": 3.1681266328569235e-05, + "log_odds_chosen": 9.513387680053711, + "log_odds_ratio": -0.0001838295574998483, + "logits/chosen": -0.626352071762085, + "logits/rejected": -0.6274489760398865, + "logps/chosen": -0.0003809796180576086, + "logps/rejected": -1.644523024559021, + "loss": 0.9256, + "nll_loss": 0.2313939332962036, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.809796180576086e-05, + "rewards/margins": 0.1644142121076584, + "rewards/rejected": -0.16445229947566986, + "step": 6214 + }, + { + "epoch": 4.298063623789765, + "grad_norm": 7.109057426452637, + "learning_rate": 3.167742431227909e-05, + "log_odds_chosen": 9.066092491149902, + "log_odds_ratio": -0.0006189306732267141, + "logits/chosen": -0.8764826655387878, + "logits/rejected": -0.9166613817214966, + "logps/chosen": -0.007097132503986359, + "logps/rejected": -2.346010684967041, + "loss": 1.5469, + "nll_loss": 0.38666069507598877, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007097133784554899, + "rewards/margins": 0.2338913381099701, + "rewards/rejected": -0.23460106551647186, + "step": 6215 + }, + { + "epoch": 4.2987551867219915, + "grad_norm": 11.553153991699219, + "learning_rate": 3.167358229598893e-05, + "log_odds_chosen": 9.455713272094727, + "log_odds_ratio": -0.0008415338816121221, + "logits/chosen": -0.4361748993396759, + "logits/rejected": -0.500927209854126, + "logps/chosen": -0.012969731353223324, + "logps/rejected": -1.6395647525787354, + "loss": 1.77, + "nll_loss": 0.4424084424972534, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012969732051715255, + "rewards/margins": 0.16265949606895447, + "rewards/rejected": -0.16395646333694458, + "step": 6216 + }, + { + "epoch": 4.299446749654218, + "grad_norm": 9.645062446594238, + "learning_rate": 3.166974027969879e-05, + "log_odds_chosen": 9.888082504272461, + "log_odds_ratio": -0.0001669441699050367, + "logits/chosen": -0.1050456166267395, + "logits/rejected": -0.24527603387832642, + "logps/chosen": -0.00054099754197523, + "logps/rejected": -1.55792236328125, + "loss": 1.1636, + "nll_loss": 0.2908813953399658, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4099757107906044e-05, + "rewards/margins": 0.15573813021183014, + "rewards/rejected": -0.155792236328125, + "step": 6217 + }, + { + "epoch": 4.300138312586445, + "grad_norm": 5.756036758422852, + "learning_rate": 3.166589826340864e-05, + "log_odds_chosen": 9.767498970031738, + "log_odds_ratio": -0.0009746247669681907, + "logits/chosen": -0.6501675844192505, + "logits/rejected": -0.6296141147613525, + "logps/chosen": -0.007666432298719883, + "logps/rejected": -2.4295947551727295, + "loss": 1.1535, + "nll_loss": 0.28826743364334106, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007666432065889239, + "rewards/margins": 0.2421928197145462, + "rewards/rejected": -0.24295946955680847, + "step": 6218 + }, + { + "epoch": 4.300829875518672, + "grad_norm": 11.394264221191406, + "learning_rate": 3.166205624711849e-05, + "log_odds_chosen": 10.402400970458984, + "log_odds_ratio": -9.294199844589457e-05, + "logits/chosen": -0.5928707718849182, + "logits/rejected": -0.5991271138191223, + "logps/chosen": -0.0001955939515028149, + "logps/rejected": -1.826383352279663, + "loss": 1.2356, + "nll_loss": 0.3088921904563904, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9559396605473012e-05, + "rewards/margins": 0.18261878192424774, + "rewards/rejected": -0.18263834714889526, + "step": 6219 + }, + { + "epoch": 4.301521438450899, + "grad_norm": 11.74155044555664, + "learning_rate": 3.165821423082834e-05, + "log_odds_chosen": 8.715950965881348, + "log_odds_ratio": -0.01336581725627184, + "logits/chosen": -0.2825634181499481, + "logits/rejected": -0.387153685092926, + "logps/chosen": -0.012938362546265125, + "logps/rejected": -1.8848291635513306, + "loss": 1.0797, + "nll_loss": 0.2685927152633667, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001293836277909577, + "rewards/margins": 0.18718910217285156, + "rewards/rejected": -0.18848291039466858, + "step": 6220 + }, + { + "epoch": 4.302213001383126, + "grad_norm": 11.069859504699707, + "learning_rate": 3.165437221453819e-05, + "log_odds_chosen": 10.328326225280762, + "log_odds_ratio": -6.953918637009338e-05, + "logits/chosen": -0.75736403465271, + "logits/rejected": -0.7972024083137512, + "logps/chosen": -0.00018067838391289115, + "logps/rejected": -1.670365810394287, + "loss": 0.8068, + "nll_loss": 0.20169062912464142, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8067839846480638e-05, + "rewards/margins": 0.16701850295066833, + "rewards/rejected": -0.16703656315803528, + "step": 6221 + }, + { + "epoch": 4.3029045643153525, + "grad_norm": 9.046951293945312, + "learning_rate": 3.165053019824804e-05, + "log_odds_chosen": 10.310076713562012, + "log_odds_ratio": -0.00022209499729797244, + "logits/chosen": -0.1708485186100006, + "logits/rejected": -0.25514480471611023, + "logps/chosen": -0.001422532950527966, + "logps/rejected": -2.1108036041259766, + "loss": 1.151, + "nll_loss": 0.28773412108421326, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014225330960471183, + "rewards/margins": 0.21093809604644775, + "rewards/rejected": -0.2110803723335266, + "step": 6222 + }, + { + "epoch": 4.303596127247579, + "grad_norm": 5.351454257965088, + "learning_rate": 3.1646688181957893e-05, + "log_odds_chosen": 8.290478706359863, + "log_odds_ratio": -0.0019745633471757174, + "logits/chosen": -0.47701704502105713, + "logits/rejected": -0.42275407910346985, + "logps/chosen": -0.0068691265769302845, + "logps/rejected": -2.001173496246338, + "loss": 1.1783, + "nll_loss": 0.2943659722805023, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006869126809760928, + "rewards/margins": 0.199430450797081, + "rewards/rejected": -0.20011736452579498, + "step": 6223 + }, + { + "epoch": 4.304287690179806, + "grad_norm": 10.639006614685059, + "learning_rate": 3.1642846165667746e-05, + "log_odds_chosen": 7.769967079162598, + "log_odds_ratio": -0.10666661709547043, + "logits/chosen": -0.21983850002288818, + "logits/rejected": -0.13200643658638, + "logps/chosen": -0.02141474559903145, + "logps/rejected": -1.41822350025177, + "loss": 1.8189, + "nll_loss": 0.44406723976135254, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002141474513337016, + "rewards/margins": 0.139680877327919, + "rewards/rejected": -0.14182235300540924, + "step": 6224 + }, + { + "epoch": 4.304979253112033, + "grad_norm": 5.231021881103516, + "learning_rate": 3.163900414937759e-05, + "log_odds_chosen": 9.701105117797852, + "log_odds_ratio": -0.00032720022136345506, + "logits/chosen": -0.24703583121299744, + "logits/rejected": -0.290180504322052, + "logps/chosen": -0.006611797958612442, + "logps/rejected": -2.07330322265625, + "loss": 0.6531, + "nll_loss": 0.1632453203201294, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006611798307858407, + "rewards/margins": 0.2066691517829895, + "rewards/rejected": -0.20733033120632172, + "step": 6225 + }, + { + "epoch": 4.30567081604426, + "grad_norm": 11.784645080566406, + "learning_rate": 3.163516213308745e-05, + "log_odds_chosen": 10.546085357666016, + "log_odds_ratio": -9.521207539364696e-05, + "logits/chosen": -0.21548408269882202, + "logits/rejected": -0.2985669672489166, + "logps/chosen": -0.0006060738814994693, + "logps/rejected": -2.327183485031128, + "loss": 0.7961, + "nll_loss": 0.1990167200565338, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0607391787925735e-05, + "rewards/margins": 0.23265774548053741, + "rewards/rejected": -0.2327183485031128, + "step": 6226 + }, + { + "epoch": 4.306362378976487, + "grad_norm": 14.44436264038086, + "learning_rate": 3.1631320116797296e-05, + "log_odds_chosen": 10.643296241760254, + "log_odds_ratio": -0.0004135722410865128, + "logits/chosen": -0.5684934854507446, + "logits/rejected": -0.4808126986026764, + "logps/chosen": -0.001616842346265912, + "logps/rejected": -2.3420138359069824, + "loss": 1.3186, + "nll_loss": 0.3295997381210327, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016168422007467598, + "rewards/margins": 0.23403970897197723, + "rewards/rejected": -0.2342013716697693, + "step": 6227 + }, + { + "epoch": 4.3070539419087135, + "grad_norm": 12.899102210998535, + "learning_rate": 3.162747810050715e-05, + "log_odds_chosen": 8.68464469909668, + "log_odds_ratio": -0.0008347373805008829, + "logits/chosen": -0.35283803939819336, + "logits/rejected": -0.4004957675933838, + "logps/chosen": -0.0025889326352626085, + "logps/rejected": -1.9696290493011475, + "loss": 1.6656, + "nll_loss": 0.41631948947906494, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002588932402431965, + "rewards/margins": 0.19670403003692627, + "rewards/rejected": -0.19696292281150818, + "step": 6228 + }, + { + "epoch": 4.30774550484094, + "grad_norm": 8.760489463806152, + "learning_rate": 3.1623636084217e-05, + "log_odds_chosen": 10.442303657531738, + "log_odds_ratio": -5.676070213667117e-05, + "logits/chosen": -0.6148850917816162, + "logits/rejected": -0.5957847237586975, + "logps/chosen": -0.0001890771382022649, + "logps/rejected": -1.8346233367919922, + "loss": 0.9068, + "nll_loss": 0.22670197486877441, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8907714547822252e-05, + "rewards/margins": 0.18344342708587646, + "rewards/rejected": -0.18346232175827026, + "step": 6229 + }, + { + "epoch": 4.308437067773167, + "grad_norm": 12.87412166595459, + "learning_rate": 3.161979406792685e-05, + "log_odds_chosen": 9.761995315551758, + "log_odds_ratio": -0.0008566225878894329, + "logits/chosen": -0.41904377937316895, + "logits/rejected": -0.5353478789329529, + "logps/chosen": -0.0015510877128690481, + "logps/rejected": -1.9486550092697144, + "loss": 1.1563, + "nll_loss": 0.2889835834503174, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001551087771076709, + "rewards/margins": 0.1947103887796402, + "rewards/rejected": -0.19486550986766815, + "step": 6230 + }, + { + "epoch": 4.309128630705394, + "grad_norm": 6.854430198669434, + "learning_rate": 3.16159520516367e-05, + "log_odds_chosen": 9.272346496582031, + "log_odds_ratio": -0.00026201200671494007, + "logits/chosen": 0.016521714627742767, + "logits/rejected": -0.06517700105905533, + "logps/chosen": -0.0007249008631333709, + "logps/rejected": -1.488654613494873, + "loss": 0.9257, + "nll_loss": 0.2313927710056305, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.249008922372013e-05, + "rewards/margins": 0.14879296720027924, + "rewards/rejected": -0.1488654613494873, + "step": 6231 + }, + { + "epoch": 4.309820193637621, + "grad_norm": 11.679051399230957, + "learning_rate": 3.161211003534655e-05, + "log_odds_chosen": 9.514870643615723, + "log_odds_ratio": -0.000159199204063043, + "logits/chosen": -0.3904048800468445, + "logits/rejected": -0.39881831407546997, + "logps/chosen": -0.0010039537446573377, + "logps/rejected": -2.0680408477783203, + "loss": 1.295, + "nll_loss": 0.3237430453300476, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010039537301054224, + "rewards/margins": 0.20670370757579803, + "rewards/rejected": -0.206804096698761, + "step": 6232 + }, + { + "epoch": 4.310511756569848, + "grad_norm": 6.508286476135254, + "learning_rate": 3.1608268019056404e-05, + "log_odds_chosen": 9.726067543029785, + "log_odds_ratio": -0.0003728815645445138, + "logits/chosen": -0.5468158721923828, + "logits/rejected": -0.6554831266403198, + "logps/chosen": -0.0006326130824163556, + "logps/rejected": -1.5322580337524414, + "loss": 1.2165, + "nll_loss": 0.3040837049484253, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.326130824163556e-05, + "rewards/margins": 0.15316253900527954, + "rewards/rejected": -0.15322580933570862, + "step": 6233 + }, + { + "epoch": 4.3112033195020745, + "grad_norm": 10.364480018615723, + "learning_rate": 3.160442600276625e-05, + "log_odds_chosen": 9.975172996520996, + "log_odds_ratio": -0.00010962605301756412, + "logits/chosen": -0.60814368724823, + "logits/rejected": -0.6393572688102722, + "logps/chosen": -0.00020576076349243522, + "logps/rejected": -1.593639850616455, + "loss": 0.886, + "nll_loss": 0.22148962318897247, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0576077076839283e-05, + "rewards/margins": 0.159343421459198, + "rewards/rejected": -0.1593639999628067, + "step": 6234 + }, + { + "epoch": 4.311894882434301, + "grad_norm": 11.097878456115723, + "learning_rate": 3.160058398647611e-05, + "log_odds_chosen": 10.253120422363281, + "log_odds_ratio": -7.164876296883449e-05, + "logits/chosen": -0.2703378200531006, + "logits/rejected": -0.33766970038414, + "logps/chosen": -0.00017250265227630734, + "logps/rejected": -1.751900553703308, + "loss": 0.9669, + "nll_loss": 0.24171766638755798, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7250265955226496e-05, + "rewards/margins": 0.1751728057861328, + "rewards/rejected": -0.1751900613307953, + "step": 6235 + }, + { + "epoch": 4.312586445366528, + "grad_norm": 7.3292083740234375, + "learning_rate": 3.1596741970185955e-05, + "log_odds_chosen": 9.296087265014648, + "log_odds_ratio": -0.00020717288134619594, + "logits/chosen": -0.4497448205947876, + "logits/rejected": -0.48011958599090576, + "logps/chosen": -0.0043256874196231365, + "logps/rejected": -1.6357231140136719, + "loss": 1.1735, + "nll_loss": 0.29336437582969666, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004325687186792493, + "rewards/margins": 0.16313976049423218, + "rewards/rejected": -0.16357232630252838, + "step": 6236 + }, + { + "epoch": 4.313278008298755, + "grad_norm": 5.9431352615356445, + "learning_rate": 3.159289995389581e-05, + "log_odds_chosen": 10.564034461975098, + "log_odds_ratio": -4.40002913819626e-05, + "logits/chosen": -0.2893856465816498, + "logits/rejected": -0.27585193514823914, + "logps/chosen": -0.00027149386005476117, + "logps/rejected": -1.8761788606643677, + "loss": 0.8388, + "nll_loss": 0.20970144867897034, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7149384550284594e-05, + "rewards/margins": 0.18759074807167053, + "rewards/rejected": -0.18761791288852692, + "step": 6237 + }, + { + "epoch": 4.313969571230982, + "grad_norm": 5.800085067749023, + "learning_rate": 3.158905793760566e-05, + "log_odds_chosen": 9.571613311767578, + "log_odds_ratio": -0.00014140141138341278, + "logits/chosen": -0.4417632520198822, + "logits/rejected": -0.5179417729377747, + "logps/chosen": -0.0002516951353754848, + "logps/rejected": -1.5897767543792725, + "loss": 0.905, + "nll_loss": 0.22622837126255035, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5169516447931528e-05, + "rewards/margins": 0.1589525192975998, + "rewards/rejected": -0.1589776873588562, + "step": 6238 + }, + { + "epoch": 4.314661134163209, + "grad_norm": 7.026059150695801, + "learning_rate": 3.1585215921315505e-05, + "log_odds_chosen": 9.260969161987305, + "log_odds_ratio": -0.0007084406679496169, + "logits/chosen": -0.6787703037261963, + "logits/rejected": -0.6812416911125183, + "logps/chosen": -0.0006535067805089056, + "logps/rejected": -1.1790461540222168, + "loss": 0.7697, + "nll_loss": 0.1923505812883377, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.535067950608209e-05, + "rewards/margins": 0.1178392618894577, + "rewards/rejected": -0.11790461093187332, + "step": 6239 + }, + { + "epoch": 4.3153526970954355, + "grad_norm": 7.0646185874938965, + "learning_rate": 3.158137390502536e-05, + "log_odds_chosen": 9.476990699768066, + "log_odds_ratio": -0.00029607085161842406, + "logits/chosen": -0.32096603512763977, + "logits/rejected": -0.37244951725006104, + "logps/chosen": -0.0006322484114207327, + "logps/rejected": -1.5482921600341797, + "loss": 0.7637, + "nll_loss": 0.190888911485672, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.322484841803089e-05, + "rewards/margins": 0.1547659933567047, + "rewards/rejected": -0.1548292189836502, + "step": 6240 + }, + { + "epoch": 4.316044260027662, + "grad_norm": 10.357381820678711, + "learning_rate": 3.157753188873521e-05, + "log_odds_chosen": 10.52917766571045, + "log_odds_ratio": -9.83021454885602e-05, + "logits/chosen": -0.43881893157958984, + "logits/rejected": -0.44511866569519043, + "logps/chosen": -0.00018113931582774967, + "logps/rejected": -1.9013128280639648, + "loss": 1.425, + "nll_loss": 0.3562512993812561, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8113931218977086e-05, + "rewards/margins": 0.19011318683624268, + "rewards/rejected": -0.1901313066482544, + "step": 6241 + }, + { + "epoch": 4.316735822959889, + "grad_norm": 7.102433204650879, + "learning_rate": 3.157368987244506e-05, + "log_odds_chosen": 9.688699722290039, + "log_odds_ratio": -8.351320866495371e-05, + "logits/chosen": -0.4052876830101013, + "logits/rejected": -0.48662394285202026, + "logps/chosen": -0.00021907503833062947, + "logps/rejected": -1.4428023099899292, + "loss": 1.0505, + "nll_loss": 0.2626084089279175, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1907504560658708e-05, + "rewards/margins": 0.1442583203315735, + "rewards/rejected": -0.14428022503852844, + "step": 6242 + }, + { + "epoch": 4.317427385892116, + "grad_norm": 7.361631870269775, + "learning_rate": 3.156984785615491e-05, + "log_odds_chosen": 10.357135772705078, + "log_odds_ratio": -0.00010510971333133057, + "logits/chosen": -0.6652143001556396, + "logits/rejected": -0.6585478186607361, + "logps/chosen": -0.000304955814499408, + "logps/rejected": -1.9736860990524292, + "loss": 1.1196, + "nll_loss": 0.27989059686660767, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0495582905132324e-05, + "rewards/margins": 0.19733810424804688, + "rewards/rejected": -0.19736860692501068, + "step": 6243 + }, + { + "epoch": 4.318118948824343, + "grad_norm": 9.14620304107666, + "learning_rate": 3.156600583986477e-05, + "log_odds_chosen": 9.410257339477539, + "log_odds_ratio": -0.00020506588043645024, + "logits/chosen": -0.6210223436355591, + "logits/rejected": -0.7020326256752014, + "logps/chosen": -0.0007779947482049465, + "logps/rejected": -1.7426540851593018, + "loss": 0.9272, + "nll_loss": 0.2317844182252884, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.77994719101116e-05, + "rewards/margins": 0.17418763041496277, + "rewards/rejected": -0.17426541447639465, + "step": 6244 + }, + { + "epoch": 4.31881051175657, + "grad_norm": 8.903249740600586, + "learning_rate": 3.156216382357461e-05, + "log_odds_chosen": 9.798042297363281, + "log_odds_ratio": -0.00030079117277637124, + "logits/chosen": -0.6821163296699524, + "logits/rejected": -0.69622802734375, + "logps/chosen": -0.00029044965049251914, + "logps/rejected": -1.3761014938354492, + "loss": 1.0466, + "nll_loss": 0.26163235306739807, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.904496795963496e-05, + "rewards/margins": 0.13758111000061035, + "rewards/rejected": -0.13761015236377716, + "step": 6245 + }, + { + "epoch": 4.319502074688796, + "grad_norm": 13.108745574951172, + "learning_rate": 3.1558321807284466e-05, + "log_odds_chosen": 11.272746086120605, + "log_odds_ratio": -1.8102173271472566e-05, + "logits/chosen": -0.5722870826721191, + "logits/rejected": -0.6570054292678833, + "logps/chosen": -0.0001335109700448811, + "logps/rejected": -2.3407795429229736, + "loss": 0.8703, + "nll_loss": 0.21758487820625305, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3351098459679633e-05, + "rewards/margins": 0.23406460881233215, + "rewards/rejected": -0.23407796025276184, + "step": 6246 + }, + { + "epoch": 4.320193637621023, + "grad_norm": 4.177562713623047, + "learning_rate": 3.155447979099432e-05, + "log_odds_chosen": 8.351842880249023, + "log_odds_ratio": -0.008854147978127003, + "logits/chosen": -0.3191889226436615, + "logits/rejected": -0.37525224685668945, + "logps/chosen": -0.004266391042619944, + "logps/rejected": -1.2601978778839111, + "loss": 0.7204, + "nll_loss": 0.1792147159576416, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00042663910426199436, + "rewards/margins": 0.1255931556224823, + "rewards/rejected": -0.12601980566978455, + "step": 6247 + }, + { + "epoch": 4.32088520055325, + "grad_norm": 9.98576831817627, + "learning_rate": 3.1550637774704164e-05, + "log_odds_chosen": 8.758403778076172, + "log_odds_ratio": -0.0007475988240912557, + "logits/chosen": -0.4082600772380829, + "logits/rejected": -0.4571593105792999, + "logps/chosen": -0.001500868471339345, + "logps/rejected": -1.382610559463501, + "loss": 0.8104, + "nll_loss": 0.20252512395381927, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001500868529547006, + "rewards/margins": 0.13811098039150238, + "rewards/rejected": -0.13826106488704681, + "step": 6248 + }, + { + "epoch": 4.321576763485477, + "grad_norm": 10.829977035522461, + "learning_rate": 3.1546795758414016e-05, + "log_odds_chosen": 10.14036750793457, + "log_odds_ratio": -7.867669046390802e-05, + "logits/chosen": -0.6110687851905823, + "logits/rejected": -0.6491025686264038, + "logps/chosen": -0.0002936455130111426, + "logps/rejected": -1.9828588962554932, + "loss": 1.0259, + "nll_loss": 0.25645485520362854, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9364551664912142e-05, + "rewards/margins": 0.19825652241706848, + "rewards/rejected": -0.19828589260578156, + "step": 6249 + }, + { + "epoch": 4.322268326417704, + "grad_norm": 10.261870384216309, + "learning_rate": 3.154295374212387e-05, + "log_odds_chosen": 10.657238006591797, + "log_odds_ratio": -0.0002526202879380435, + "logits/chosen": -0.0940227285027504, + "logits/rejected": -0.11147341132164001, + "logps/chosen": -0.00025920767802745104, + "logps/rejected": -2.4953396320343018, + "loss": 1.2257, + "nll_loss": 0.3063918948173523, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5920769985532388e-05, + "rewards/margins": 0.2495080530643463, + "rewards/rejected": -0.2495339810848236, + "step": 6250 + }, + { + "epoch": 4.322959889349931, + "grad_norm": 6.061576843261719, + "learning_rate": 3.153911172583372e-05, + "log_odds_chosen": 9.507963180541992, + "log_odds_ratio": -0.009043208323419094, + "logits/chosen": -0.03186788409948349, + "logits/rejected": -0.15542441606521606, + "logps/chosen": -0.007601817604154348, + "logps/rejected": -2.2683682441711426, + "loss": 1.1288, + "nll_loss": 0.28129690885543823, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007601818069815636, + "rewards/margins": 0.2260766476392746, + "rewards/rejected": -0.22683684527873993, + "step": 6251 + }, + { + "epoch": 4.323651452282157, + "grad_norm": 10.646059036254883, + "learning_rate": 3.153526970954357e-05, + "log_odds_chosen": 11.553914070129395, + "log_odds_ratio": -2.8313286748016253e-05, + "logits/chosen": -0.6371780037879944, + "logits/rejected": -0.666010320186615, + "logps/chosen": -0.0007130609010346234, + "logps/rejected": -3.1989545822143555, + "loss": 0.8338, + "nll_loss": 0.20845037698745728, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.130609446903691e-05, + "rewards/margins": 0.31982409954071045, + "rewards/rejected": -0.3198954463005066, + "step": 6252 + }, + { + "epoch": 4.324343015214384, + "grad_norm": 6.541245460510254, + "learning_rate": 3.1531427693253426e-05, + "log_odds_chosen": 8.380668640136719, + "log_odds_ratio": -0.001909209880977869, + "logits/chosen": -0.39806440472602844, + "logits/rejected": -0.35889142751693726, + "logps/chosen": -0.0015298908110707998, + "logps/rejected": -1.3094111680984497, + "loss": 1.3593, + "nll_loss": 0.3396381139755249, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015298907237593085, + "rewards/margins": 0.13078811764717102, + "rewards/rejected": -0.13094110786914825, + "step": 6253 + }, + { + "epoch": 4.325034578146611, + "grad_norm": 12.133667945861816, + "learning_rate": 3.152758567696327e-05, + "log_odds_chosen": 9.911739349365234, + "log_odds_ratio": -0.00013189579476602376, + "logits/chosen": -0.47079241275787354, + "logits/rejected": -0.4168504476547241, + "logps/chosen": -0.00032032723538577557, + "logps/rejected": -1.798808217048645, + "loss": 1.2075, + "nll_loss": 0.30186164379119873, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2032723538577557e-05, + "rewards/margins": 0.1798487901687622, + "rewards/rejected": -0.17988081276416779, + "step": 6254 + }, + { + "epoch": 4.325726141078838, + "grad_norm": 7.795219421386719, + "learning_rate": 3.1523743660673124e-05, + "log_odds_chosen": 8.880459785461426, + "log_odds_ratio": -0.008680138736963272, + "logits/chosen": -0.5198830962181091, + "logits/rejected": -0.5463992953300476, + "logps/chosen": -0.004741398151963949, + "logps/rejected": -1.9662740230560303, + "loss": 0.6808, + "nll_loss": 0.1693269908428192, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00047413978609256446, + "rewards/margins": 0.19615328311920166, + "rewards/rejected": -0.1966274082660675, + "step": 6255 + }, + { + "epoch": 4.326417704011065, + "grad_norm": 13.251655578613281, + "learning_rate": 3.151990164438298e-05, + "log_odds_chosen": 10.546645164489746, + "log_odds_ratio": -0.00021031236974522471, + "logits/chosen": -0.6055862307548523, + "logits/rejected": -0.671293318271637, + "logps/chosen": -0.001339723588898778, + "logps/rejected": -2.2606611251831055, + "loss": 1.0264, + "nll_loss": 0.2565711438655853, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013397236762102693, + "rewards/margins": 0.22593215107917786, + "rewards/rejected": -0.22606611251831055, + "step": 6256 + }, + { + "epoch": 4.327109266943292, + "grad_norm": 8.937263488769531, + "learning_rate": 3.151605962809282e-05, + "log_odds_chosen": 10.133855819702148, + "log_odds_ratio": -7.504695531679317e-05, + "logits/chosen": -0.540047287940979, + "logits/rejected": -0.5837610363960266, + "logps/chosen": -0.00033649199758656323, + "logps/rejected": -1.7779037952423096, + "loss": 0.6846, + "nll_loss": 0.1711336225271225, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3649201213847846e-05, + "rewards/margins": 0.17775672674179077, + "rewards/rejected": -0.17779038846492767, + "step": 6257 + }, + { + "epoch": 4.327800829875518, + "grad_norm": 7.280256271362305, + "learning_rate": 3.1512217611802675e-05, + "log_odds_chosen": 9.665289878845215, + "log_odds_ratio": -0.0004072503070347011, + "logits/chosen": -0.03371931612491608, + "logits/rejected": -0.18863540887832642, + "logps/chosen": -0.002507374854758382, + "logps/rejected": -2.404458522796631, + "loss": 0.9425, + "nll_loss": 0.23557284474372864, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00025073750293813646, + "rewards/margins": 0.2401951253414154, + "rewards/rejected": -0.24044585227966309, + "step": 6258 + }, + { + "epoch": 4.328492392807745, + "grad_norm": 5.6786394119262695, + "learning_rate": 3.150837559551252e-05, + "log_odds_chosen": 9.919912338256836, + "log_odds_ratio": -0.00016488172695972025, + "logits/chosen": -0.6246330738067627, + "logits/rejected": -0.6343197822570801, + "logps/chosen": -0.0025698572862893343, + "logps/rejected": -2.1179940700531006, + "loss": 0.6839, + "nll_loss": 0.17096826434135437, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002569857460912317, + "rewards/margins": 0.21154239773750305, + "rewards/rejected": -0.21179938316345215, + "step": 6259 + }, + { + "epoch": 4.329183955739972, + "grad_norm": 11.633374214172363, + "learning_rate": 3.150453357922238e-05, + "log_odds_chosen": 9.088045120239258, + "log_odds_ratio": -0.030026573687791824, + "logits/chosen": -0.5848643779754639, + "logits/rejected": -0.5763362050056458, + "logps/chosen": -0.006531553342938423, + "logps/rejected": -1.5310980081558228, + "loss": 1.1287, + "nll_loss": 0.2791779637336731, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006531553226523101, + "rewards/margins": 0.1524566411972046, + "rewards/rejected": -0.15310980379581451, + "step": 6260 + }, + { + "epoch": 4.329875518672199, + "grad_norm": 10.868045806884766, + "learning_rate": 3.1500691562932225e-05, + "log_odds_chosen": 10.415914535522461, + "log_odds_ratio": -0.00022055921726860106, + "logits/chosen": -0.14247390627861023, + "logits/rejected": -0.2097904235124588, + "logps/chosen": -0.0006233732565306127, + "logps/rejected": -2.1600489616394043, + "loss": 0.8514, + "nll_loss": 0.21282441914081573, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.23373271082528e-05, + "rewards/margins": 0.21594256162643433, + "rewards/rejected": -0.216004878282547, + "step": 6261 + }, + { + "epoch": 4.330567081604426, + "grad_norm": 12.684412002563477, + "learning_rate": 3.149684954664208e-05, + "log_odds_chosen": 11.01368522644043, + "log_odds_ratio": -9.89335312624462e-05, + "logits/chosen": -0.7479545474052429, + "logits/rejected": -0.8491002321243286, + "logps/chosen": -0.00012943128240294755, + "logps/rejected": -1.9096832275390625, + "loss": 1.6489, + "nll_loss": 0.4122098684310913, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2943129149789456e-05, + "rewards/margins": 0.19095538556575775, + "rewards/rejected": -0.1909683346748352, + "step": 6262 + }, + { + "epoch": 4.3312586445366525, + "grad_norm": 9.980140686035156, + "learning_rate": 3.149300753035193e-05, + "log_odds_chosen": 10.222119331359863, + "log_odds_ratio": -0.00010748507338576019, + "logits/chosen": -0.844120979309082, + "logits/rejected": -0.9237422943115234, + "logps/chosen": -0.0004031779244542122, + "logps/rejected": -1.920005202293396, + "loss": 1.0495, + "nll_loss": 0.2623547315597534, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0317790990229696e-05, + "rewards/margins": 0.19196021556854248, + "rewards/rejected": -0.19200052320957184, + "step": 6263 + }, + { + "epoch": 4.331950207468879, + "grad_norm": 9.320433616638184, + "learning_rate": 3.148916551406178e-05, + "log_odds_chosen": 10.097317695617676, + "log_odds_ratio": -0.002059478312730789, + "logits/chosen": -0.7607566714286804, + "logits/rejected": -0.7903776168823242, + "logps/chosen": -0.012105059809982777, + "logps/rejected": -2.138204336166382, + "loss": 1.2992, + "nll_loss": 0.3245936334133148, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012105060741305351, + "rewards/margins": 0.2126099318265915, + "rewards/rejected": -0.2138204574584961, + "step": 6264 + }, + { + "epoch": 4.332641770401106, + "grad_norm": 14.335369110107422, + "learning_rate": 3.148532349777163e-05, + "log_odds_chosen": 10.663875579833984, + "log_odds_ratio": -5.3084073442732915e-05, + "logits/chosen": -0.6468909382820129, + "logits/rejected": -0.6795991063117981, + "logps/chosen": -0.0003323418786749244, + "logps/rejected": -2.278066396713257, + "loss": 1.4742, + "nll_loss": 0.36855268478393555, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.323418786749244e-05, + "rewards/margins": 0.22777341306209564, + "rewards/rejected": -0.22780664265155792, + "step": 6265 + }, + { + "epoch": 4.333333333333333, + "grad_norm": 9.74260139465332, + "learning_rate": 3.148148148148148e-05, + "log_odds_chosen": 10.406856536865234, + "log_odds_ratio": -5.082024654257111e-05, + "logits/chosen": -0.359982967376709, + "logits/rejected": -0.37254437804222107, + "logps/chosen": -0.0001446517271688208, + "logps/rejected": -1.5443391799926758, + "loss": 0.9381, + "nll_loss": 0.23451650142669678, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4465171261690557e-05, + "rewards/margins": 0.1544194370508194, + "rewards/rejected": -0.15443390607833862, + "step": 6266 + }, + { + "epoch": 4.33402489626556, + "grad_norm": 9.490935325622559, + "learning_rate": 3.147763946519133e-05, + "log_odds_chosen": 9.49018669128418, + "log_odds_ratio": -0.003614935325458646, + "logits/chosen": -0.7341195344924927, + "logits/rejected": -0.7036961913108826, + "logps/chosen": -0.00993090309202671, + "logps/rejected": -1.7946966886520386, + "loss": 1.4416, + "nll_loss": 0.360050767660141, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009930903324857354, + "rewards/margins": 0.17847657203674316, + "rewards/rejected": -0.17946967482566833, + "step": 6267 + }, + { + "epoch": 4.334716459197787, + "grad_norm": 12.99151611328125, + "learning_rate": 3.147379744890118e-05, + "log_odds_chosen": 8.455595970153809, + "log_odds_ratio": -0.0108507564291358, + "logits/chosen": -0.175982266664505, + "logits/rejected": -0.15738755464553833, + "logps/chosen": -0.004563149530440569, + "logps/rejected": -1.2955995798110962, + "loss": 1.3614, + "nll_loss": 0.3392714560031891, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00045631497050635517, + "rewards/margins": 0.1291036456823349, + "rewards/rejected": -0.1295599639415741, + "step": 6268 + }, + { + "epoch": 4.3354080221300135, + "grad_norm": 10.58656120300293, + "learning_rate": 3.146995543261104e-05, + "log_odds_chosen": 9.847926139831543, + "log_odds_ratio": -0.00018483387248124927, + "logits/chosen": -0.2619816064834595, + "logits/rejected": -0.36310145258903503, + "logps/chosen": -0.0008300583576783538, + "logps/rejected": -2.2136294841766357, + "loss": 0.9725, + "nll_loss": 0.24309849739074707, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.300583431264386e-05, + "rewards/margins": 0.22127996385097504, + "rewards/rejected": -0.22136294841766357, + "step": 6269 + }, + { + "epoch": 4.33609958506224, + "grad_norm": 5.926123142242432, + "learning_rate": 3.1466113416320884e-05, + "log_odds_chosen": 9.165082931518555, + "log_odds_ratio": -0.0006193573353812099, + "logits/chosen": -0.5741816759109497, + "logits/rejected": -0.5986604690551758, + "logps/chosen": -0.002135189948603511, + "logps/rejected": -1.836089015007019, + "loss": 1.3148, + "nll_loss": 0.3286486566066742, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00021351898612920195, + "rewards/margins": 0.1833954006433487, + "rewards/rejected": -0.18360891938209534, + "step": 6270 + }, + { + "epoch": 4.336791147994467, + "grad_norm": 12.717452049255371, + "learning_rate": 3.1462271400030736e-05, + "log_odds_chosen": 10.55025863647461, + "log_odds_ratio": -8.598245040047914e-05, + "logits/chosen": -0.4449648857116699, + "logits/rejected": -0.6279736161231995, + "logps/chosen": -0.00019479054026305676, + "logps/rejected": -1.6564853191375732, + "loss": 1.5201, + "nll_loss": 0.380012571811676, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9479053662507795e-05, + "rewards/margins": 0.1656290590763092, + "rewards/rejected": -0.16564851999282837, + "step": 6271 + }, + { + "epoch": 4.337482710926694, + "grad_norm": 7.52541971206665, + "learning_rate": 3.145842938374059e-05, + "log_odds_chosen": 9.816986083984375, + "log_odds_ratio": -0.0005100779235363007, + "logits/chosen": -0.39175713062286377, + "logits/rejected": -0.45417341589927673, + "logps/chosen": -0.0009921849705278873, + "logps/rejected": -2.0651535987854004, + "loss": 1.6299, + "nll_loss": 0.4074295461177826, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.921850869432092e-05, + "rewards/margins": 0.20641617476940155, + "rewards/rejected": -0.20651540160179138, + "step": 6272 + }, + { + "epoch": 4.338174273858921, + "grad_norm": 5.848926067352295, + "learning_rate": 3.145458736745044e-05, + "log_odds_chosen": 10.613710403442383, + "log_odds_ratio": -0.00014787871623411775, + "logits/chosen": -0.25838908553123474, + "logits/rejected": -0.3856915235519409, + "logps/chosen": -0.007207159884274006, + "logps/rejected": -2.8834474086761475, + "loss": 1.2895, + "nll_loss": 0.3223702609539032, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007207159651443362, + "rewards/margins": 0.2876240313053131, + "rewards/rejected": -0.28834474086761475, + "step": 6273 + }, + { + "epoch": 4.338865836791148, + "grad_norm": 5.730781078338623, + "learning_rate": 3.145074535116029e-05, + "log_odds_chosen": 10.336332321166992, + "log_odds_ratio": -9.005053289001808e-05, + "logits/chosen": -0.3600419759750366, + "logits/rejected": -0.4638018012046814, + "logps/chosen": -0.00014610840298701078, + "logps/rejected": -1.6459441184997559, + "loss": 0.6049, + "nll_loss": 0.15122506022453308, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4610840480600018e-05, + "rewards/margins": 0.16457980871200562, + "rewards/rejected": -0.16459441184997559, + "step": 6274 + }, + { + "epoch": 4.3395573997233745, + "grad_norm": 10.16214656829834, + "learning_rate": 3.144690333487014e-05, + "log_odds_chosen": 11.042724609375, + "log_odds_ratio": -0.00011868192814290524, + "logits/chosen": -0.5489569306373596, + "logits/rejected": -0.548592746257782, + "logps/chosen": -0.00011969899060204625, + "logps/rejected": -2.194108724594116, + "loss": 1.259, + "nll_loss": 0.3147435784339905, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1969898878305685e-05, + "rewards/margins": 0.21939890086650848, + "rewards/rejected": -0.21941088140010834, + "step": 6275 + }, + { + "epoch": 4.340248962655601, + "grad_norm": 5.415022373199463, + "learning_rate": 3.144306131857999e-05, + "log_odds_chosen": 8.669913291931152, + "log_odds_ratio": -0.02633490413427353, + "logits/chosen": -0.24330535531044006, + "logits/rejected": -0.24419859051704407, + "logps/chosen": -0.009514041244983673, + "logps/rejected": -2.1480984687805176, + "loss": 1.089, + "nll_loss": 0.26962801814079285, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009514041012153029, + "rewards/margins": 0.2138584554195404, + "rewards/rejected": -0.2148098647594452, + "step": 6276 + }, + { + "epoch": 4.340940525587828, + "grad_norm": 10.989092826843262, + "learning_rate": 3.1439219302289844e-05, + "log_odds_chosen": 9.908502578735352, + "log_odds_ratio": -0.0016798458527773619, + "logits/chosen": -0.25677382946014404, + "logits/rejected": -0.2983488440513611, + "logps/chosen": -0.0018546772189438343, + "logps/rejected": -2.5655713081359863, + "loss": 1.1805, + "nll_loss": 0.2949519157409668, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018546771025285125, + "rewards/margins": 0.2563716769218445, + "rewards/rejected": -0.2565571665763855, + "step": 6277 + }, + { + "epoch": 4.341632088520055, + "grad_norm": 8.060803413391113, + "learning_rate": 3.1435377285999696e-05, + "log_odds_chosen": 8.83830451965332, + "log_odds_ratio": -0.0004681129357777536, + "logits/chosen": -0.5368528962135315, + "logits/rejected": -0.5375138521194458, + "logps/chosen": -0.00047587224980816245, + "logps/rejected": -1.2851731777191162, + "loss": 1.236, + "nll_loss": 0.30894869565963745, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7587229346390814e-05, + "rewards/margins": 0.12846973538398743, + "rewards/rejected": -0.12851732969284058, + "step": 6278 + }, + { + "epoch": 4.342323651452282, + "grad_norm": 8.903595924377441, + "learning_rate": 3.143153526970954e-05, + "log_odds_chosen": 10.817100524902344, + "log_odds_ratio": -3.656329499790445e-05, + "logits/chosen": -0.19402526319026947, + "logits/rejected": -0.29778486490249634, + "logps/chosen": -0.00024108865181915462, + "logps/rejected": -2.496133327484131, + "loss": 1.2467, + "nll_loss": 0.31167399883270264, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4108867364702746e-05, + "rewards/margins": 0.2495892345905304, + "rewards/rejected": -0.24961334466934204, + "step": 6279 + }, + { + "epoch": 4.343015214384509, + "grad_norm": 6.821666240692139, + "learning_rate": 3.1427693253419395e-05, + "log_odds_chosen": 10.714070320129395, + "log_odds_ratio": -8.616933337179944e-05, + "logits/chosen": -0.5038744807243347, + "logits/rejected": -0.5399574041366577, + "logps/chosen": -0.0026380920317023993, + "logps/rejected": -3.0238983631134033, + "loss": 0.9245, + "nll_loss": 0.23110723495483398, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00026380919734947383, + "rewards/margins": 0.3021259903907776, + "rewards/rejected": -0.30238983035087585, + "step": 6280 + }, + { + "epoch": 4.3437067773167355, + "grad_norm": 10.771331787109375, + "learning_rate": 3.142385123712925e-05, + "log_odds_chosen": 10.633634567260742, + "log_odds_ratio": -3.2800868211779743e-05, + "logits/chosen": -0.2504116892814636, + "logits/rejected": -0.2996535301208496, + "logps/chosen": -0.00015955072012729943, + "logps/rejected": -1.9660744667053223, + "loss": 1.3028, + "nll_loss": 0.32570430636405945, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.59550709213363e-05, + "rewards/margins": 0.19659149646759033, + "rewards/rejected": -0.19660745561122894, + "step": 6281 + }, + { + "epoch": 4.344398340248962, + "grad_norm": 5.823162078857422, + "learning_rate": 3.14200092208391e-05, + "log_odds_chosen": 8.517678260803223, + "log_odds_ratio": -0.0007794699631631374, + "logits/chosen": -0.6882646083831787, + "logits/rejected": -0.7718308568000793, + "logps/chosen": -0.00037540867924690247, + "logps/rejected": -1.1263272762298584, + "loss": 1.5819, + "nll_loss": 0.39540088176727295, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.754087083507329e-05, + "rewards/margins": 0.11259518563747406, + "rewards/rejected": -0.11263272911310196, + "step": 6282 + }, + { + "epoch": 4.345089903181189, + "grad_norm": 6.239787578582764, + "learning_rate": 3.1416167204548945e-05, + "log_odds_chosen": 8.946929931640625, + "log_odds_ratio": -0.004028361290693283, + "logits/chosen": -0.254109263420105, + "logits/rejected": -0.32316648960113525, + "logps/chosen": -0.0019965223036706448, + "logps/rejected": -1.1533297300338745, + "loss": 1.5395, + "nll_loss": 0.38447076082229614, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001996522187255323, + "rewards/margins": 0.11513333022594452, + "rewards/rejected": -0.11533297598361969, + "step": 6283 + }, + { + "epoch": 4.345781466113416, + "grad_norm": 4.709649085998535, + "learning_rate": 3.1412325188258804e-05, + "log_odds_chosen": 7.86823844909668, + "log_odds_ratio": -0.018861282616853714, + "logits/chosen": -0.785298764705658, + "logits/rejected": -0.7515165209770203, + "logps/chosen": -0.007227227091789246, + "logps/rejected": -1.1606642007827759, + "loss": 1.1264, + "nll_loss": 0.2797185480594635, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007227227324619889, + "rewards/margins": 0.11534368991851807, + "rewards/rejected": -0.11606641858816147, + "step": 6284 + }, + { + "epoch": 4.346473029045643, + "grad_norm": 12.456677436828613, + "learning_rate": 3.140848317196865e-05, + "log_odds_chosen": 9.091700553894043, + "log_odds_ratio": -0.0003683864197228104, + "logits/chosen": -0.6549928784370422, + "logits/rejected": -0.6938451528549194, + "logps/chosen": -0.0008419096702709794, + "logps/rejected": -1.842326045036316, + "loss": 1.144, + "nll_loss": 0.2859728932380676, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.41909641167149e-05, + "rewards/margins": 0.1841484010219574, + "rewards/rejected": -0.18423257768154144, + "step": 6285 + }, + { + "epoch": 4.34716459197787, + "grad_norm": 12.204995155334473, + "learning_rate": 3.14046411556785e-05, + "log_odds_chosen": 11.336495399475098, + "log_odds_ratio": -9.398195834364742e-05, + "logits/chosen": -0.28229689598083496, + "logits/rejected": -0.40144163370132446, + "logps/chosen": -0.00024727190611884, + "logps/rejected": -2.923536539077759, + "loss": 1.3709, + "nll_loss": 0.34271159768104553, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4727192794671282e-05, + "rewards/margins": 0.29232892394065857, + "rewards/rejected": -0.29235365986824036, + "step": 6286 + }, + { + "epoch": 4.3478561549100965, + "grad_norm": 13.519350051879883, + "learning_rate": 3.1400799139388355e-05, + "log_odds_chosen": 10.042278289794922, + "log_odds_ratio": -0.00012913253158330917, + "logits/chosen": -0.526986300945282, + "logits/rejected": -0.6442042589187622, + "logps/chosen": -0.0019563455134630203, + "logps/rejected": -2.9566519260406494, + "loss": 1.6818, + "nll_loss": 0.4204367995262146, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001956345367943868, + "rewards/margins": 0.29546958208084106, + "rewards/rejected": -0.2956652045249939, + "step": 6287 + }, + { + "epoch": 4.348547717842323, + "grad_norm": 6.363492965698242, + "learning_rate": 3.13969571230982e-05, + "log_odds_chosen": 9.305254936218262, + "log_odds_ratio": -0.00025624711997807026, + "logits/chosen": -0.6630715131759644, + "logits/rejected": -0.6110356450080872, + "logps/chosen": -0.00042787453276105225, + "logps/rejected": -1.1967389583587646, + "loss": 0.5836, + "nll_loss": 0.14586985111236572, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.278745473129675e-05, + "rewards/margins": 0.11963111162185669, + "rewards/rejected": -0.11967390775680542, + "step": 6288 + }, + { + "epoch": 4.34923928077455, + "grad_norm": 7.60081672668457, + "learning_rate": 3.139311510680805e-05, + "log_odds_chosen": 10.671063423156738, + "log_odds_ratio": -5.429089651443064e-05, + "logits/chosen": -0.28530365228652954, + "logits/rejected": -0.4221542775630951, + "logps/chosen": -0.00037182040978223085, + "logps/rejected": -2.3406002521514893, + "loss": 1.2078, + "nll_loss": 0.30194365978240967, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.718204243341461e-05, + "rewards/margins": 0.2340228259563446, + "rewards/rejected": -0.23406001925468445, + "step": 6289 + }, + { + "epoch": 4.349930843706777, + "grad_norm": 5.530222415924072, + "learning_rate": 3.1389273090517905e-05, + "log_odds_chosen": 9.497110366821289, + "log_odds_ratio": -0.0004657884710468352, + "logits/chosen": -0.3096838593482971, + "logits/rejected": -0.475458562374115, + "logps/chosen": -0.00798887200653553, + "logps/rejected": -1.6125645637512207, + "loss": 0.9461, + "nll_loss": 0.23648670315742493, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007988871657289565, + "rewards/margins": 0.1604575663805008, + "rewards/rejected": -0.16125646233558655, + "step": 6290 + }, + { + "epoch": 4.350622406639004, + "grad_norm": 8.4190673828125, + "learning_rate": 3.138543107422776e-05, + "log_odds_chosen": 8.526147842407227, + "log_odds_ratio": -0.03643810376524925, + "logits/chosen": -0.5177884697914124, + "logits/rejected": -0.7100130319595337, + "logps/chosen": -0.011017205193638802, + "logps/rejected": -1.941372275352478, + "loss": 1.2788, + "nll_loss": 0.31605058908462524, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011017204960808158, + "rewards/margins": 0.1930355280637741, + "rewards/rejected": -0.19413723051548004, + "step": 6291 + }, + { + "epoch": 4.351313969571231, + "grad_norm": 232.70631408691406, + "learning_rate": 3.1381589057937604e-05, + "log_odds_chosen": 8.946935653686523, + "log_odds_ratio": -0.7460908889770508, + "logits/chosen": -0.2698379158973694, + "logits/rejected": -0.3084045648574829, + "logps/chosen": -0.03984811156988144, + "logps/rejected": -2.1382994651794434, + "loss": 1.2856, + "nll_loss": 0.2467801868915558, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003984811250120401, + "rewards/margins": 0.2098451405763626, + "rewards/rejected": -0.21382997930049896, + "step": 6292 + }, + { + "epoch": 4.3520055325034575, + "grad_norm": 16.808666229248047, + "learning_rate": 3.137774704164746e-05, + "log_odds_chosen": 9.995148658752441, + "log_odds_ratio": -5.602094461210072e-05, + "logits/chosen": -0.23712369799613953, + "logits/rejected": -0.39177846908569336, + "logps/chosen": -0.0004925878602080047, + "logps/rejected": -2.1555447578430176, + "loss": 1.1607, + "nll_loss": 0.2901768386363983, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.925878965877928e-05, + "rewards/margins": 0.2155052125453949, + "rewards/rejected": -0.21555446088314056, + "step": 6293 + }, + { + "epoch": 4.352697095435684, + "grad_norm": 14.556714057922363, + "learning_rate": 3.137390502535731e-05, + "log_odds_chosen": 10.153697967529297, + "log_odds_ratio": -0.000147096739965491, + "logits/chosen": -1.0491799116134644, + "logits/rejected": -1.1178945302963257, + "logps/chosen": -0.0002521372225601226, + "logps/rejected": -1.6516971588134766, + "loss": 1.8252, + "nll_loss": 0.4562966525554657, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5213725166395307e-05, + "rewards/margins": 0.16514450311660767, + "rewards/rejected": -0.16516971588134766, + "step": 6294 + }, + { + "epoch": 4.353388658367911, + "grad_norm": 6.729737758636475, + "learning_rate": 3.137006300906716e-05, + "log_odds_chosen": 9.903938293457031, + "log_odds_ratio": -0.0006013888050802052, + "logits/chosen": -0.2919148802757263, + "logits/rejected": -0.3714301884174347, + "logps/chosen": -0.0004974387702532113, + "logps/rejected": -1.703548789024353, + "loss": 1.3713, + "nll_loss": 0.34275490045547485, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.974387411493808e-05, + "rewards/margins": 0.17030513286590576, + "rewards/rejected": -0.17035487294197083, + "step": 6295 + }, + { + "epoch": 4.354080221300138, + "grad_norm": 8.606863021850586, + "learning_rate": 3.136622099277701e-05, + "log_odds_chosen": 9.143202781677246, + "log_odds_ratio": -0.0030732681043446064, + "logits/chosen": -0.5923776030540466, + "logits/rejected": -0.5626744031906128, + "logps/chosen": -0.0018515931442379951, + "logps/rejected": -1.5052129030227661, + "loss": 1.0512, + "nll_loss": 0.26248684525489807, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001851593260653317, + "rewards/margins": 0.1503361314535141, + "rewards/rejected": -0.15052127838134766, + "step": 6296 + }, + { + "epoch": 4.354771784232365, + "grad_norm": 7.347190856933594, + "learning_rate": 3.136237897648686e-05, + "log_odds_chosen": 10.128762245178223, + "log_odds_ratio": -0.0003108138043899089, + "logits/chosen": -0.21433761715888977, + "logits/rejected": -0.2856504023075104, + "logps/chosen": -0.0016921722562983632, + "logps/rejected": -2.441856861114502, + "loss": 1.0754, + "nll_loss": 0.26881054043769836, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016921722271945328, + "rewards/margins": 0.24401646852493286, + "rewards/rejected": -0.2441856861114502, + "step": 6297 + }, + { + "epoch": 4.355463347164592, + "grad_norm": 8.210977554321289, + "learning_rate": 3.135853696019671e-05, + "log_odds_chosen": 9.857641220092773, + "log_odds_ratio": -0.0008883035625331104, + "logits/chosen": -0.40693163871765137, + "logits/rejected": -0.4411735236644745, + "logps/chosen": -0.0008371093426831067, + "logps/rejected": -1.812008261680603, + "loss": 0.8135, + "nll_loss": 0.20329168438911438, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.371093281311914e-05, + "rewards/margins": 0.18111711740493774, + "rewards/rejected": -0.1812008172273636, + "step": 6298 + }, + { + "epoch": 4.356154910096818, + "grad_norm": 11.318500518798828, + "learning_rate": 3.1354694943906564e-05, + "log_odds_chosen": 8.595135688781738, + "log_odds_ratio": -0.0008043124689720571, + "logits/chosen": -0.5766262412071228, + "logits/rejected": -0.6325322389602661, + "logps/chosen": -0.003487899899482727, + "logps/rejected": -1.8228943347930908, + "loss": 1.0605, + "nll_loss": 0.2650408446788788, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003487899957690388, + "rewards/margins": 0.18194065988063812, + "rewards/rejected": -0.18228945136070251, + "step": 6299 + }, + { + "epoch": 4.356846473029045, + "grad_norm": 7.36438512802124, + "learning_rate": 3.1350852927616416e-05, + "log_odds_chosen": 8.635614395141602, + "log_odds_ratio": -0.003634521272033453, + "logits/chosen": -0.7639305591583252, + "logits/rejected": -0.7949274778366089, + "logps/chosen": -0.026982376351952553, + "logps/rejected": -2.173861503601074, + "loss": 1.2849, + "nll_loss": 0.3208683133125305, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002698237542062998, + "rewards/margins": 0.21468791365623474, + "rewards/rejected": -0.2173861414194107, + "step": 6300 + }, + { + "epoch": 4.357538035961272, + "grad_norm": 8.610318183898926, + "learning_rate": 3.134701091132626e-05, + "log_odds_chosen": 11.18701457977295, + "log_odds_ratio": -2.265540933876764e-05, + "logits/chosen": -0.5687964558601379, + "logits/rejected": -0.6419916152954102, + "logps/chosen": -0.00025438808370381594, + "logps/rejected": -2.238746404647827, + "loss": 0.8686, + "nll_loss": 0.2171362340450287, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5438808734179474e-05, + "rewards/margins": 0.22384920716285706, + "rewards/rejected": -0.22387462854385376, + "step": 6301 + }, + { + "epoch": 4.358229598893499, + "grad_norm": 8.536808013916016, + "learning_rate": 3.134316889503612e-05, + "log_odds_chosen": 9.764930725097656, + "log_odds_ratio": -0.0027465950697660446, + "logits/chosen": -0.7701665163040161, + "logits/rejected": -0.8795304298400879, + "logps/chosen": -0.0021428868640214205, + "logps/rejected": -1.8732999563217163, + "loss": 0.9178, + "nll_loss": 0.22917912900447845, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00021428869513329118, + "rewards/margins": 0.18711569905281067, + "rewards/rejected": -0.18733000755310059, + "step": 6302 + }, + { + "epoch": 4.358921161825726, + "grad_norm": 6.359549522399902, + "learning_rate": 3.133932687874597e-05, + "log_odds_chosen": 8.725833892822266, + "log_odds_ratio": -0.0040402826853096485, + "logits/chosen": -0.015667788684368134, + "logits/rejected": -0.0844547301530838, + "logps/chosen": -0.0029326328076422215, + "logps/rejected": -1.3450301885604858, + "loss": 1.1471, + "nll_loss": 0.2863708734512329, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00029326329240575433, + "rewards/margins": 0.1342097520828247, + "rewards/rejected": -0.13450302183628082, + "step": 6303 + }, + { + "epoch": 4.359612724757953, + "grad_norm": 11.345746040344238, + "learning_rate": 3.133548486245582e-05, + "log_odds_chosen": 9.424605369567871, + "log_odds_ratio": -0.011097854934632778, + "logits/chosen": -0.3127516806125641, + "logits/rejected": -0.4279908537864685, + "logps/chosen": -0.010858694091439247, + "logps/rejected": -2.0717811584472656, + "loss": 0.9532, + "nll_loss": 0.2371962070465088, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010858693858608603, + "rewards/margins": 0.20609226822853088, + "rewards/rejected": -0.20717814564704895, + "step": 6304 + }, + { + "epoch": 4.360304287690179, + "grad_norm": 10.459247589111328, + "learning_rate": 3.133164284616567e-05, + "log_odds_chosen": 10.67799186706543, + "log_odds_ratio": -3.142370769637637e-05, + "logits/chosen": -0.2904345989227295, + "logits/rejected": -0.3955543637275696, + "logps/chosen": -0.0003603932564146817, + "logps/rejected": -2.526035785675049, + "loss": 0.9621, + "nll_loss": 0.24051694571971893, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.603932782425545e-05, + "rewards/margins": 0.2525675594806671, + "rewards/rejected": -0.25260359048843384, + "step": 6305 + }, + { + "epoch": 4.360995850622406, + "grad_norm": 7.519941806793213, + "learning_rate": 3.132780082987552e-05, + "log_odds_chosen": 8.597479820251465, + "log_odds_ratio": -0.0254450011998415, + "logits/chosen": -0.4681594967842102, + "logits/rejected": -0.5076048970222473, + "logps/chosen": -0.007410486228764057, + "logps/rejected": -1.4946553707122803, + "loss": 1.5485, + "nll_loss": 0.38457822799682617, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007410486578010023, + "rewards/margins": 0.1487244963645935, + "rewards/rejected": -0.14946553111076355, + "step": 6306 + }, + { + "epoch": 4.361687413554633, + "grad_norm": 14.771382331848145, + "learning_rate": 3.132395881358537e-05, + "log_odds_chosen": 10.483898162841797, + "log_odds_ratio": -6.732901238137856e-05, + "logits/chosen": -0.7547286152839661, + "logits/rejected": -0.6616960167884827, + "logps/chosen": -0.00047322813770733774, + "logps/rejected": -2.4050559997558594, + "loss": 0.9754, + "nll_loss": 0.24383097887039185, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7322813770733774e-05, + "rewards/margins": 0.24045827984809875, + "rewards/rejected": -0.24050560593605042, + "step": 6307 + }, + { + "epoch": 4.36237897648686, + "grad_norm": 10.824990272521973, + "learning_rate": 3.132011679729522e-05, + "log_odds_chosen": 9.566398620605469, + "log_odds_ratio": -0.017600275576114655, + "logits/chosen": -0.2919943034648895, + "logits/rejected": -0.2732120752334595, + "logps/chosen": -0.005583908874541521, + "logps/rejected": -2.318268299102783, + "loss": 1.231, + "nll_loss": 0.3060001730918884, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005583909805864096, + "rewards/margins": 0.23126843571662903, + "rewards/rejected": -0.23182684183120728, + "step": 6308 + }, + { + "epoch": 4.363070539419087, + "grad_norm": 6.783447742462158, + "learning_rate": 3.1316274781005075e-05, + "log_odds_chosen": 10.260091781616211, + "log_odds_ratio": -5.726013478124514e-05, + "logits/chosen": -0.5818588137626648, + "logits/rejected": -0.6268680095672607, + "logps/chosen": -0.00023265733034349978, + "logps/rejected": -1.4145288467407227, + "loss": 0.9643, + "nll_loss": 0.24107356369495392, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3265733034349978e-05, + "rewards/margins": 0.141429603099823, + "rewards/rejected": -0.1414528787136078, + "step": 6309 + }, + { + "epoch": 4.363762102351314, + "grad_norm": 7.472887992858887, + "learning_rate": 3.131243276471492e-05, + "log_odds_chosen": 10.513906478881836, + "log_odds_ratio": -0.0002800179354380816, + "logits/chosen": 0.02222248911857605, + "logits/rejected": -0.04449723660945892, + "logps/chosen": -0.017652522772550583, + "logps/rejected": -2.3833508491516113, + "loss": 1.0615, + "nll_loss": 0.2653350234031677, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001765252323821187, + "rewards/margins": 0.2365698516368866, + "rewards/rejected": -0.23833508789539337, + "step": 6310 + }, + { + "epoch": 4.36445366528354, + "grad_norm": 9.911407470703125, + "learning_rate": 3.130859074842478e-05, + "log_odds_chosen": 9.882485389709473, + "log_odds_ratio": -6.964397471165285e-05, + "logits/chosen": -0.66880202293396, + "logits/rejected": -0.7102699279785156, + "logps/chosen": -0.000869112613145262, + "logps/rejected": -1.9686014652252197, + "loss": 1.0695, + "nll_loss": 0.26737716794013977, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.691126276971772e-05, + "rewards/margins": 0.1967732310295105, + "rewards/rejected": -0.19686013460159302, + "step": 6311 + }, + { + "epoch": 4.365145228215767, + "grad_norm": 8.024314880371094, + "learning_rate": 3.1304748732134625e-05, + "log_odds_chosen": 9.696148872375488, + "log_odds_ratio": -0.00016530677385162562, + "logits/chosen": -0.5593657493591309, + "logits/rejected": -0.5686834454536438, + "logps/chosen": -0.0015895981341600418, + "logps/rejected": -2.1018128395080566, + "loss": 0.8795, + "nll_loss": 0.2198611944913864, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015895982505753636, + "rewards/margins": 0.21002231538295746, + "rewards/rejected": -0.21018128097057343, + "step": 6312 + }, + { + "epoch": 4.365836791147994, + "grad_norm": 7.493190288543701, + "learning_rate": 3.130090671584448e-05, + "log_odds_chosen": 10.404024124145508, + "log_odds_ratio": -0.0001439659099560231, + "logits/chosen": -0.19742217659950256, + "logits/rejected": -0.3219975531101227, + "logps/chosen": -0.007236870937049389, + "logps/rejected": -2.362229824066162, + "loss": 1.133, + "nll_loss": 0.28323426842689514, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007236871169880033, + "rewards/margins": 0.235499307513237, + "rewards/rejected": -0.2362229824066162, + "step": 6313 + }, + { + "epoch": 4.366528354080221, + "grad_norm": 11.898686408996582, + "learning_rate": 3.129706469955433e-05, + "log_odds_chosen": 9.762331008911133, + "log_odds_ratio": -0.0001290647342102602, + "logits/chosen": -0.6055392026901245, + "logits/rejected": -0.6020311117172241, + "logps/chosen": -0.0004267815675120801, + "logps/rejected": -1.929985761642456, + "loss": 2.3559, + "nll_loss": 0.5889644622802734, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2678155296016484e-05, + "rewards/margins": 0.1929558962583542, + "rewards/rejected": -0.19299857318401337, + "step": 6314 + }, + { + "epoch": 4.367219917012449, + "grad_norm": 15.363786697387695, + "learning_rate": 3.1293222683264176e-05, + "log_odds_chosen": 9.683354377746582, + "log_odds_ratio": -0.00012579330359585583, + "logits/chosen": -0.4168752133846283, + "logits/rejected": -0.44384777545928955, + "logps/chosen": -0.0011151648359373212, + "logps/rejected": -1.5676586627960205, + "loss": 0.9781, + "nll_loss": 0.24452057480812073, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011151648504892364, + "rewards/margins": 0.15665435791015625, + "rewards/rejected": -0.156765878200531, + "step": 6315 + }, + { + "epoch": 4.367911479944675, + "grad_norm": 8.174242973327637, + "learning_rate": 3.128938066697403e-05, + "log_odds_chosen": 10.951789855957031, + "log_odds_ratio": -2.8747548640239984e-05, + "logits/chosen": -0.719359278678894, + "logits/rejected": -0.7517074346542358, + "logps/chosen": -9.578256140230224e-05, + "logps/rejected": -1.6426482200622559, + "loss": 0.8635, + "nll_loss": 0.2158716768026352, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.578255230735522e-06, + "rewards/margins": 0.16425524652004242, + "rewards/rejected": -0.16426481306552887, + "step": 6316 + }, + { + "epoch": 4.368603042876902, + "grad_norm": 8.694875717163086, + "learning_rate": 3.128553865068388e-05, + "log_odds_chosen": 9.255899429321289, + "log_odds_ratio": -0.0016122335800901055, + "logits/chosen": -0.46941107511520386, + "logits/rejected": -0.43659359216690063, + "logps/chosen": -0.0011213673278689384, + "logps/rejected": -1.4043328762054443, + "loss": 1.3327, + "nll_loss": 0.33301347494125366, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011213673133170232, + "rewards/margins": 0.14032115042209625, + "rewards/rejected": -0.14043329656124115, + "step": 6317 + }, + { + "epoch": 4.369294605809129, + "grad_norm": 10.649413108825684, + "learning_rate": 3.128169663439373e-05, + "log_odds_chosen": 9.875417709350586, + "log_odds_ratio": -0.00015953306865412742, + "logits/chosen": -0.12319202721118927, + "logits/rejected": -0.1773851215839386, + "logps/chosen": -0.00036378856748342514, + "logps/rejected": -1.6384358406066895, + "loss": 1.3269, + "nll_loss": 0.3316992223262787, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6378856748342514e-05, + "rewards/margins": 0.163807213306427, + "rewards/rejected": -0.16384358704090118, + "step": 6318 + }, + { + "epoch": 4.369986168741356, + "grad_norm": 8.679130554199219, + "learning_rate": 3.127785461810358e-05, + "log_odds_chosen": 10.387129783630371, + "log_odds_ratio": -0.00014563562581315637, + "logits/chosen": -0.24549484252929688, + "logits/rejected": -0.3316296339035034, + "logps/chosen": -0.00041450935532338917, + "logps/rejected": -2.2968225479125977, + "loss": 0.9877, + "nll_loss": 0.24691948294639587, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1450934077147394e-05, + "rewards/margins": 0.22964079678058624, + "rewards/rejected": -0.22968225181102753, + "step": 6319 + }, + { + "epoch": 4.370677731673583, + "grad_norm": 6.213496685028076, + "learning_rate": 3.127401260181344e-05, + "log_odds_chosen": 9.122066497802734, + "log_odds_ratio": -0.00042584422044456005, + "logits/chosen": -0.34517765045166016, + "logits/rejected": -0.3442385196685791, + "logps/chosen": -0.003846704261377454, + "logps/rejected": -2.2931177616119385, + "loss": 1.6242, + "nll_loss": 0.40601515769958496, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00038467044942080975, + "rewards/margins": 0.2289271056652069, + "rewards/rejected": -0.2293117642402649, + "step": 6320 + }, + { + "epoch": 4.37136929460581, + "grad_norm": 10.913430213928223, + "learning_rate": 3.1270170585523284e-05, + "log_odds_chosen": 10.11327838897705, + "log_odds_ratio": -0.0007298666751012206, + "logits/chosen": -0.581683337688446, + "logits/rejected": -0.621495246887207, + "logps/chosen": -0.0038006172981113195, + "logps/rejected": -2.459005355834961, + "loss": 0.8858, + "nll_loss": 0.22136807441711426, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00038006174145266414, + "rewards/margins": 0.2455204725265503, + "rewards/rejected": -0.24590054154396057, + "step": 6321 + }, + { + "epoch": 4.372060857538036, + "grad_norm": 7.445157527923584, + "learning_rate": 3.1266328569233136e-05, + "log_odds_chosen": 8.718201637268066, + "log_odds_ratio": -0.00027666238020174205, + "logits/chosen": -0.4421314597129822, + "logits/rejected": -0.3595300018787384, + "logps/chosen": -0.009511098265647888, + "logps/rejected": -1.9657456874847412, + "loss": 1.519, + "nll_loss": 0.37971681356430054, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009511099196970463, + "rewards/margins": 0.1956234574317932, + "rewards/rejected": -0.19657456874847412, + "step": 6322 + }, + { + "epoch": 4.372752420470263, + "grad_norm": 7.530955791473389, + "learning_rate": 3.126248655294299e-05, + "log_odds_chosen": 10.011698722839355, + "log_odds_ratio": -0.0005805735709145665, + "logits/chosen": -0.4899846017360687, + "logits/rejected": -0.5263997316360474, + "logps/chosen": -0.000975239381659776, + "logps/rejected": -2.055481195449829, + "loss": 1.2532, + "nll_loss": 0.31323084235191345, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.75239381659776e-05, + "rewards/margins": 0.20545059442520142, + "rewards/rejected": -0.20554812252521515, + "step": 6323 + }, + { + "epoch": 4.37344398340249, + "grad_norm": 5.401859283447266, + "learning_rate": 3.1258644536652834e-05, + "log_odds_chosen": 9.362093925476074, + "log_odds_ratio": -0.0017716753063723445, + "logits/chosen": -0.5734344720840454, + "logits/rejected": -0.5739961862564087, + "logps/chosen": -0.0014673115219920874, + "logps/rejected": -1.9922149181365967, + "loss": 0.9307, + "nll_loss": 0.23249852657318115, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014673115219920874, + "rewards/margins": 0.19907476007938385, + "rewards/rejected": -0.19922150671482086, + "step": 6324 + }, + { + "epoch": 4.374135546334717, + "grad_norm": 15.756174087524414, + "learning_rate": 3.125480252036269e-05, + "log_odds_chosen": 10.876068115234375, + "log_odds_ratio": -0.0008979838457889855, + "logits/chosen": -1.0250927209854126, + "logits/rejected": -1.001251459121704, + "logps/chosen": -0.007843158207833767, + "logps/rejected": -3.035405158996582, + "loss": 1.179, + "nll_loss": 0.29465389251708984, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007843158091418445, + "rewards/margins": 0.3027561902999878, + "rewards/rejected": -0.30354049801826477, + "step": 6325 + }, + { + "epoch": 4.374827109266944, + "grad_norm": 8.404903411865234, + "learning_rate": 3.125096050407254e-05, + "log_odds_chosen": 10.507247924804688, + "log_odds_ratio": -7.449003896908835e-05, + "logits/chosen": -0.7850874066352844, + "logits/rejected": -0.85020512342453, + "logps/chosen": -0.0002060161205008626, + "logps/rejected": -2.043398857116699, + "loss": 1.2735, + "nll_loss": 0.318358451128006, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0601613869075663e-05, + "rewards/margins": 0.2043193131685257, + "rewards/rejected": -0.2043399214744568, + "step": 6326 + }, + { + "epoch": 4.375518672199171, + "grad_norm": 6.757132053375244, + "learning_rate": 3.124711848778239e-05, + "log_odds_chosen": 8.752026557922363, + "log_odds_ratio": -0.0007465857197530568, + "logits/chosen": -0.3212154507637024, + "logits/rejected": -0.4050557613372803, + "logps/chosen": -0.003038684604689479, + "logps/rejected": -1.4287402629852295, + "loss": 1.6756, + "nll_loss": 0.41883230209350586, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000303868466289714, + "rewards/margins": 0.1425701528787613, + "rewards/rejected": -0.14287403225898743, + "step": 6327 + }, + { + "epoch": 4.376210235131397, + "grad_norm": 7.012808322906494, + "learning_rate": 3.124327647149224e-05, + "log_odds_chosen": 9.273959159851074, + "log_odds_ratio": -0.0010292872320860624, + "logits/chosen": -0.7706098556518555, + "logits/rejected": -0.7494393587112427, + "logps/chosen": -0.0008519127150066197, + "logps/rejected": -1.6534192562103271, + "loss": 0.9163, + "nll_loss": 0.2289770245552063, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.519127004547045e-05, + "rewards/margins": 0.16525673866271973, + "rewards/rejected": -0.16534192860126495, + "step": 6328 + }, + { + "epoch": 4.376901798063624, + "grad_norm": 11.536850929260254, + "learning_rate": 3.1239434455202097e-05, + "log_odds_chosen": 10.570297241210938, + "log_odds_ratio": -5.9236168453935534e-05, + "logits/chosen": -0.4841303825378418, + "logits/rejected": -0.5646402835845947, + "logps/chosen": -0.00024312842288054526, + "logps/rejected": -2.0626015663146973, + "loss": 0.877, + "nll_loss": 0.21923774480819702, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4312843379448168e-05, + "rewards/margins": 0.2062358409166336, + "rewards/rejected": -0.20626014471054077, + "step": 6329 + }, + { + "epoch": 4.377593360995851, + "grad_norm": 8.52833080291748, + "learning_rate": 3.123559243891194e-05, + "log_odds_chosen": 10.625216484069824, + "log_odds_ratio": -5.232186958892271e-05, + "logits/chosen": -0.3533399701118469, + "logits/rejected": -0.34043389558792114, + "logps/chosen": -0.00021401699632406235, + "logps/rejected": -2.0122456550598145, + "loss": 0.9324, + "nll_loss": 0.23309096693992615, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1401698177214712e-05, + "rewards/margins": 0.20120316743850708, + "rewards/rejected": -0.20122458040714264, + "step": 6330 + }, + { + "epoch": 4.378284923928078, + "grad_norm": 5.83406400680542, + "learning_rate": 3.1231750422621795e-05, + "log_odds_chosen": 9.288875579833984, + "log_odds_ratio": -0.01151504460722208, + "logits/chosen": -0.4276590347290039, + "logits/rejected": -0.46351057291030884, + "logps/chosen": -0.004148704465478659, + "logps/rejected": -1.5049939155578613, + "loss": 0.697, + "nll_loss": 0.17310747504234314, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004148704756516963, + "rewards/margins": 0.15008452534675598, + "rewards/rejected": -0.1504994034767151, + "step": 6331 + }, + { + "epoch": 4.378976486860305, + "grad_norm": 33.9369010925293, + "learning_rate": 3.122790840633165e-05, + "log_odds_chosen": 8.93118667602539, + "log_odds_ratio": -0.15788762271404266, + "logits/chosen": -0.644578218460083, + "logits/rejected": -0.685411274433136, + "logps/chosen": -0.024288659915328026, + "logps/rejected": -2.2254717350006104, + "loss": 1.7872, + "nll_loss": 0.43102213740348816, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0024288659915328026, + "rewards/margins": 0.22011832892894745, + "rewards/rejected": -0.22254718840122223, + "step": 6332 + }, + { + "epoch": 4.3796680497925315, + "grad_norm": 9.28854751586914, + "learning_rate": 3.122406639004149e-05, + "log_odds_chosen": 9.086782455444336, + "log_odds_ratio": -0.001112618949264288, + "logits/chosen": -0.4650830924510956, + "logits/rejected": -0.495891273021698, + "logps/chosen": -0.001777901779860258, + "logps/rejected": -1.8395483493804932, + "loss": 1.2764, + "nll_loss": 0.3189956843852997, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017779016343411058, + "rewards/margins": 0.18377703428268433, + "rewards/rejected": -0.18395483493804932, + "step": 6333 + }, + { + "epoch": 4.380359612724758, + "grad_norm": 8.1270751953125, + "learning_rate": 3.1220224373751345e-05, + "log_odds_chosen": 9.927444458007812, + "log_odds_ratio": -0.00011174234532518312, + "logits/chosen": -0.48753228783607483, + "logits/rejected": -0.503075361251831, + "logps/chosen": -0.0002461006515659392, + "logps/rejected": -1.4193822145462036, + "loss": 1.0163, + "nll_loss": 0.25406113266944885, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4610068066976964e-05, + "rewards/margins": 0.14191360771656036, + "rewards/rejected": -0.1419382244348526, + "step": 6334 + }, + { + "epoch": 4.381051175656985, + "grad_norm": 9.07845401763916, + "learning_rate": 3.12163823574612e-05, + "log_odds_chosen": 10.563726425170898, + "log_odds_ratio": -0.00018957343127112836, + "logits/chosen": -0.21047934889793396, + "logits/rejected": -0.2606376111507416, + "logps/chosen": -0.0005684728967025876, + "logps/rejected": -2.5454013347625732, + "loss": 1.1947, + "nll_loss": 0.2986586391925812, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.684728603227995e-05, + "rewards/margins": 0.25448331236839294, + "rewards/rejected": -0.2545401453971863, + "step": 6335 + }, + { + "epoch": 4.381742738589212, + "grad_norm": 7.290050983428955, + "learning_rate": 3.121254034117105e-05, + "log_odds_chosen": 9.2028169631958, + "log_odds_ratio": -0.01780695840716362, + "logits/chosen": -0.3207651972770691, + "logits/rejected": -0.4059835970401764, + "logps/chosen": -0.005791170988231897, + "logps/rejected": -1.461303949356079, + "loss": 1.0177, + "nll_loss": 0.2526489794254303, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005791170406155288, + "rewards/margins": 0.14555127918720245, + "rewards/rejected": -0.14613039791584015, + "step": 6336 + }, + { + "epoch": 4.382434301521439, + "grad_norm": 10.609784126281738, + "learning_rate": 3.1208698324880896e-05, + "log_odds_chosen": 10.936665534973145, + "log_odds_ratio": -4.089557842235081e-05, + "logits/chosen": -0.6534034013748169, + "logits/rejected": -0.6795228719711304, + "logps/chosen": -0.0002644860651344061, + "logps/rejected": -2.4768505096435547, + "loss": 0.7925, + "nll_loss": 0.19811291992664337, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6448606149642728e-05, + "rewards/margins": 0.2476586103439331, + "rewards/rejected": -0.24768505990505219, + "step": 6337 + }, + { + "epoch": 4.383125864453666, + "grad_norm": 10.921443939208984, + "learning_rate": 3.1204856308590755e-05, + "log_odds_chosen": 10.012323379516602, + "log_odds_ratio": -0.0001081754598999396, + "logits/chosen": -0.3615493178367615, + "logits/rejected": -0.44846972823143005, + "logps/chosen": -0.000523362890817225, + "logps/rejected": -1.896093726158142, + "loss": 1.0746, + "nll_loss": 0.2686314880847931, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.233629053691402e-05, + "rewards/margins": 0.1895570307970047, + "rewards/rejected": -0.1896093785762787, + "step": 6338 + }, + { + "epoch": 4.3838174273858925, + "grad_norm": 11.553305625915527, + "learning_rate": 3.12010142923006e-05, + "log_odds_chosen": 10.591811180114746, + "log_odds_ratio": -0.00015707314014434814, + "logits/chosen": -0.3892832398414612, + "logits/rejected": -0.353343665599823, + "logps/chosen": -0.0004059493658132851, + "logps/rejected": -2.3211452960968018, + "loss": 1.2325, + "nll_loss": 0.30811676383018494, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.059493585373275e-05, + "rewards/margins": 0.23207393288612366, + "rewards/rejected": -0.2321145236492157, + "step": 6339 + }, + { + "epoch": 4.384508990318119, + "grad_norm": 6.500206470489502, + "learning_rate": 3.119717227601045e-05, + "log_odds_chosen": 10.269033432006836, + "log_odds_ratio": -5.843197868671268e-05, + "logits/chosen": -0.5071641802787781, + "logits/rejected": -0.546855628490448, + "logps/chosen": -0.0001225728919962421, + "logps/rejected": -1.1638572216033936, + "loss": 0.9306, + "nll_loss": 0.2326522022485733, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.225728919962421e-05, + "rewards/margins": 0.1163734719157219, + "rewards/rejected": -0.11638573557138443, + "step": 6340 + }, + { + "epoch": 4.385200553250346, + "grad_norm": 14.92447280883789, + "learning_rate": 3.1193330259720306e-05, + "log_odds_chosen": 10.092230796813965, + "log_odds_ratio": -0.00015422774595208466, + "logits/chosen": -0.7137937545776367, + "logits/rejected": -0.7913176417350769, + "logps/chosen": -0.0002549771743360907, + "logps/rejected": -1.6001590490341187, + "loss": 1.1712, + "nll_loss": 0.2927763760089874, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5497716706013307e-05, + "rewards/margins": 0.15999040007591248, + "rewards/rejected": -0.16001591086387634, + "step": 6341 + }, + { + "epoch": 4.385892116182573, + "grad_norm": 10.537651062011719, + "learning_rate": 3.118948824343015e-05, + "log_odds_chosen": 10.228643417358398, + "log_odds_ratio": -7.193143392214552e-05, + "logits/chosen": -0.2224959433078766, + "logits/rejected": -0.28621044754981995, + "logps/chosen": -0.00033477373654022813, + "logps/rejected": -1.872227668762207, + "loss": 1.0915, + "nll_loss": 0.2728642225265503, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.347737219883129e-05, + "rewards/margins": 0.18718931078910828, + "rewards/rejected": -0.18722279369831085, + "step": 6342 + }, + { + "epoch": 4.3865836791148, + "grad_norm": 9.82940673828125, + "learning_rate": 3.1185646227140004e-05, + "log_odds_chosen": 10.144380569458008, + "log_odds_ratio": -0.022902216762304306, + "logits/chosen": -0.8177899122238159, + "logits/rejected": -0.8910137414932251, + "logps/chosen": -0.05426086485385895, + "logps/rejected": -2.715169906616211, + "loss": 1.2558, + "nll_loss": 0.3116682767868042, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005426086951047182, + "rewards/margins": 0.26609086990356445, + "rewards/rejected": -0.27151697874069214, + "step": 6343 + }, + { + "epoch": 4.387275242047027, + "grad_norm": 11.173566818237305, + "learning_rate": 3.1181804210849856e-05, + "log_odds_chosen": 11.273879051208496, + "log_odds_ratio": -1.8894152162829414e-05, + "logits/chosen": -0.36173272132873535, + "logits/rejected": -0.4322207570075989, + "logps/chosen": -0.00011111483763670549, + "logps/rejected": -2.1379709243774414, + "loss": 1.1858, + "nll_loss": 0.2964479625225067, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.111148412746843e-05, + "rewards/margins": 0.21378597617149353, + "rewards/rejected": -0.21379709243774414, + "step": 6344 + }, + { + "epoch": 4.3879668049792535, + "grad_norm": 9.317232131958008, + "learning_rate": 3.117796219455971e-05, + "log_odds_chosen": 8.893519401550293, + "log_odds_ratio": -0.0006592870340682566, + "logits/chosen": -0.2592763900756836, + "logits/rejected": -0.31366080045700073, + "logps/chosen": -0.0014565077144652605, + "logps/rejected": -1.7281951904296875, + "loss": 1.1022, + "nll_loss": 0.27548471093177795, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014565078890882432, + "rewards/margins": 0.17267388105392456, + "rewards/rejected": -0.17281952500343323, + "step": 6345 + }, + { + "epoch": 4.38865836791148, + "grad_norm": 11.148494720458984, + "learning_rate": 3.1174120178269554e-05, + "log_odds_chosen": 9.223249435424805, + "log_odds_ratio": -0.0002908221213147044, + "logits/chosen": -0.5727559328079224, + "logits/rejected": -0.7116425037384033, + "logps/chosen": -0.0018146205693483353, + "logps/rejected": -1.5348750352859497, + "loss": 1.2702, + "nll_loss": 0.3175121545791626, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018146206275559962, + "rewards/margins": 0.1533060371875763, + "rewards/rejected": -0.15348750352859497, + "step": 6346 + }, + { + "epoch": 4.389349930843707, + "grad_norm": 10.441041946411133, + "learning_rate": 3.1170278161979413e-05, + "log_odds_chosen": 8.41359806060791, + "log_odds_ratio": -0.03371895104646683, + "logits/chosen": -0.4587884545326233, + "logits/rejected": -0.5229263305664062, + "logps/chosen": -0.04264894500374794, + "logps/rejected": -1.6140475273132324, + "loss": 1.4225, + "nll_loss": 0.3522500991821289, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0042648944072425365, + "rewards/margins": 0.1571398675441742, + "rewards/rejected": -0.16140475869178772, + "step": 6347 + }, + { + "epoch": 4.390041493775934, + "grad_norm": 9.487386703491211, + "learning_rate": 3.116643614568926e-05, + "log_odds_chosen": 10.76372241973877, + "log_odds_ratio": -9.364535799250007e-05, + "logits/chosen": -0.6684786081314087, + "logits/rejected": -0.7083771228790283, + "logps/chosen": -0.00019393152615521103, + "logps/rejected": -1.8834095001220703, + "loss": 1.2263, + "nll_loss": 0.30655691027641296, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9393151887925342e-05, + "rewards/margins": 0.1883215457201004, + "rewards/rejected": -0.1883409470319748, + "step": 6348 + }, + { + "epoch": 4.390733056708161, + "grad_norm": 8.466039657592773, + "learning_rate": 3.116259412939911e-05, + "log_odds_chosen": 8.854142189025879, + "log_odds_ratio": -0.0010371842654421926, + "logits/chosen": -0.7207614183425903, + "logits/rejected": -0.7611744999885559, + "logps/chosen": -0.0017626279732212424, + "logps/rejected": -1.460201621055603, + "loss": 1.6915, + "nll_loss": 0.42276665568351746, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017626280896365643, + "rewards/margins": 0.14584389328956604, + "rewards/rejected": -0.14602015912532806, + "step": 6349 + }, + { + "epoch": 4.391424619640388, + "grad_norm": 9.773248672485352, + "learning_rate": 3.1158752113108964e-05, + "log_odds_chosen": 10.251541137695312, + "log_odds_ratio": -4.190437539364211e-05, + "logits/chosen": -0.4113655090332031, + "logits/rejected": -0.5633144378662109, + "logps/chosen": -0.0003510084352456033, + "logps/rejected": -2.211672306060791, + "loss": 0.7652, + "nll_loss": 0.19130098819732666, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5100845707347617e-05, + "rewards/margins": 0.22113212943077087, + "rewards/rejected": -0.22116723656654358, + "step": 6350 + }, + { + "epoch": 4.3921161825726145, + "grad_norm": 7.457752227783203, + "learning_rate": 3.115491009681881e-05, + "log_odds_chosen": 10.767346382141113, + "log_odds_ratio": -6.526858487632126e-05, + "logits/chosen": -0.7429406046867371, + "logits/rejected": -0.7705117464065552, + "logps/chosen": -0.00023447003331966698, + "logps/rejected": -2.1938061714172363, + "loss": 1.336, + "nll_loss": 0.33399924635887146, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3447002604370937e-05, + "rewards/margins": 0.21935716271400452, + "rewards/rejected": -0.21938061714172363, + "step": 6351 + }, + { + "epoch": 4.392807745504841, + "grad_norm": 10.649615287780762, + "learning_rate": 3.115106808052866e-05, + "log_odds_chosen": 9.68832778930664, + "log_odds_ratio": -0.12552544474601746, + "logits/chosen": -0.8307643532752991, + "logits/rejected": -0.8816390037536621, + "logps/chosen": -0.019055670127272606, + "logps/rejected": -1.485122799873352, + "loss": 1.8428, + "nll_loss": 0.4481407105922699, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0019055670127272606, + "rewards/margins": 0.14660672843456268, + "rewards/rejected": -0.14851228892803192, + "step": 6352 + }, + { + "epoch": 4.393499308437068, + "grad_norm": 9.400788307189941, + "learning_rate": 3.1147226064238515e-05, + "log_odds_chosen": 10.734479904174805, + "log_odds_ratio": -7.558297511423007e-05, + "logits/chosen": -0.6922191977500916, + "logits/rejected": -0.7359859943389893, + "logps/chosen": -0.00017174580716527998, + "logps/rejected": -2.06654691696167, + "loss": 0.7905, + "nll_loss": 0.1976223587989807, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.717458144412376e-05, + "rewards/margins": 0.20663750171661377, + "rewards/rejected": -0.20665468275547028, + "step": 6353 + }, + { + "epoch": 4.394190871369295, + "grad_norm": 7.185258865356445, + "learning_rate": 3.114338404794837e-05, + "log_odds_chosen": 9.011981964111328, + "log_odds_ratio": -0.0004941418301314116, + "logits/chosen": -0.5035545229911804, + "logits/rejected": -0.5419560670852661, + "logps/chosen": -0.0006765194702893496, + "logps/rejected": -1.283606767654419, + "loss": 0.939, + "nll_loss": 0.2347015142440796, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.765194848412648e-05, + "rewards/margins": 0.1282930225133896, + "rewards/rejected": -0.12836067378520966, + "step": 6354 + }, + { + "epoch": 4.394882434301522, + "grad_norm": 8.787233352661133, + "learning_rate": 3.113954203165821e-05, + "log_odds_chosen": 9.925066947937012, + "log_odds_ratio": -0.0003282953693997115, + "logits/chosen": -0.45993995666503906, + "logits/rejected": -0.4733354449272156, + "logps/chosen": -0.000683745660353452, + "logps/rejected": -1.690459132194519, + "loss": 0.9997, + "nll_loss": 0.2498854249715805, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.83745602145791e-05, + "rewards/margins": 0.1689775437116623, + "rewards/rejected": -0.16904591023921967, + "step": 6355 + }, + { + "epoch": 4.395573997233749, + "grad_norm": 10.263931274414062, + "learning_rate": 3.113570001536807e-05, + "log_odds_chosen": 8.73983097076416, + "log_odds_ratio": -0.011168360710144043, + "logits/chosen": -0.3822953402996063, + "logits/rejected": -0.43949460983276367, + "logps/chosen": -0.008786053396761417, + "logps/rejected": -1.7901290655136108, + "loss": 1.0338, + "nll_loss": 0.25732412934303284, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008786054095253348, + "rewards/margins": 0.17813430726528168, + "rewards/rejected": -0.17901290953159332, + "step": 6356 + }, + { + "epoch": 4.3962655601659755, + "grad_norm": 5.810608386993408, + "learning_rate": 3.113185799907792e-05, + "log_odds_chosen": 10.540541648864746, + "log_odds_ratio": -5.537183824344538e-05, + "logits/chosen": -0.2416747510433197, + "logits/rejected": -0.29514622688293457, + "logps/chosen": -0.007341462187469006, + "logps/rejected": -2.8202743530273438, + "loss": 1.2999, + "nll_loss": 0.3249596357345581, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007341462187469006, + "rewards/margins": 0.28129327297210693, + "rewards/rejected": -0.2820274531841278, + "step": 6357 + }, + { + "epoch": 4.396957123098202, + "grad_norm": 5.3782877922058105, + "learning_rate": 3.112801598278777e-05, + "log_odds_chosen": 9.902215957641602, + "log_odds_ratio": -0.0002081981801893562, + "logits/chosen": -0.5407758355140686, + "logits/rejected": -0.4762265384197235, + "logps/chosen": -0.0003181890642736107, + "logps/rejected": -1.5239753723144531, + "loss": 0.9757, + "nll_loss": 0.2439117431640625, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1818908610148355e-05, + "rewards/margins": 0.15236571431159973, + "rewards/rejected": -0.1523975431919098, + "step": 6358 + }, + { + "epoch": 4.397648686030429, + "grad_norm": 8.642234802246094, + "learning_rate": 3.112417396649762e-05, + "log_odds_chosen": 10.313976287841797, + "log_odds_ratio": -0.00011284256470389664, + "logits/chosen": -0.7431192398071289, + "logits/rejected": -0.7744413614273071, + "logps/chosen": -0.0001350109523627907, + "logps/rejected": -1.5020142793655396, + "loss": 1.1418, + "nll_loss": 0.28544408082962036, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.350109505438013e-05, + "rewards/margins": 0.15018793940544128, + "rewards/rejected": -0.1502014398574829, + "step": 6359 + }, + { + "epoch": 4.398340248962656, + "grad_norm": 11.897019386291504, + "learning_rate": 3.112033195020747e-05, + "log_odds_chosen": 10.22813606262207, + "log_odds_ratio": -0.00019520701607689261, + "logits/chosen": -0.452121376991272, + "logits/rejected": -0.5452828407287598, + "logps/chosen": -0.0006089904345571995, + "logps/rejected": -2.3550171852111816, + "loss": 1.0218, + "nll_loss": 0.25543949007987976, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0899052186869085e-05, + "rewards/margins": 0.23544083535671234, + "rewards/rejected": -0.2355017364025116, + "step": 6360 + }, + { + "epoch": 4.399031811894883, + "grad_norm": 7.805273056030273, + "learning_rate": 3.111648993391732e-05, + "log_odds_chosen": 9.458495140075684, + "log_odds_ratio": -0.00022262008860707283, + "logits/chosen": -0.7785831093788147, + "logits/rejected": -0.7868790030479431, + "logps/chosen": -0.0004530495498329401, + "logps/rejected": -1.3462885618209839, + "loss": 0.9961, + "nll_loss": 0.24900603294372559, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5304957893677056e-05, + "rewards/margins": 0.13458356261253357, + "rewards/rejected": -0.13462886214256287, + "step": 6361 + }, + { + "epoch": 4.39972337482711, + "grad_norm": 5.2602338790893555, + "learning_rate": 3.111264791762717e-05, + "log_odds_chosen": 9.917596817016602, + "log_odds_ratio": -0.00048271557898260653, + "logits/chosen": -0.5518268346786499, + "logits/rejected": -0.6171162128448486, + "logps/chosen": -0.006441672332584858, + "logps/rejected": -1.653882384300232, + "loss": 1.098, + "nll_loss": 0.2744504511356354, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000644167244900018, + "rewards/margins": 0.1647440642118454, + "rewards/rejected": -0.16538822650909424, + "step": 6362 + }, + { + "epoch": 4.4004149377593365, + "grad_norm": 10.160521507263184, + "learning_rate": 3.1108805901337025e-05, + "log_odds_chosen": 10.352978706359863, + "log_odds_ratio": -9.755916835274547e-05, + "logits/chosen": -0.21207648515701294, + "logits/rejected": -0.3640451729297638, + "logps/chosen": -0.00020385342941153795, + "logps/rejected": -1.6681032180786133, + "loss": 1.612, + "nll_loss": 0.40298330783843994, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.038534512394108e-05, + "rewards/margins": 0.1667899638414383, + "rewards/rejected": -0.16681033372879028, + "step": 6363 + }, + { + "epoch": 4.401106500691563, + "grad_norm": 10.753729820251465, + "learning_rate": 3.110496388504687e-05, + "log_odds_chosen": 10.651169776916504, + "log_odds_ratio": -4.766930578625761e-05, + "logits/chosen": -0.510490357875824, + "logits/rejected": -0.6057192087173462, + "logps/chosen": -0.0003168170223943889, + "logps/rejected": -2.441676616668701, + "loss": 1.157, + "nll_loss": 0.28923308849334717, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.168170223943889e-05, + "rewards/margins": 0.24413597583770752, + "rewards/rejected": -0.24416767060756683, + "step": 6364 + }, + { + "epoch": 4.40179806362379, + "grad_norm": 9.026993751525879, + "learning_rate": 3.110112186875673e-05, + "log_odds_chosen": 10.503043174743652, + "log_odds_ratio": -9.254955512005836e-05, + "logits/chosen": -0.5653536319732666, + "logits/rejected": -0.5738909244537354, + "logps/chosen": -0.00036744290264323354, + "logps/rejected": -2.0169739723205566, + "loss": 0.8859, + "nll_loss": 0.2214638888835907, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6744291719514877e-05, + "rewards/margins": 0.2016606628894806, + "rewards/rejected": -0.20169739425182343, + "step": 6365 + }, + { + "epoch": 4.402489626556017, + "grad_norm": 10.971336364746094, + "learning_rate": 3.1097279852466576e-05, + "log_odds_chosen": 10.245159149169922, + "log_odds_ratio": -0.00012390792835503817, + "logits/chosen": -0.7902606725692749, + "logits/rejected": -0.8872899413108826, + "logps/chosen": -0.00017842264787759632, + "logps/rejected": -1.6078989505767822, + "loss": 0.9098, + "nll_loss": 0.2274371087551117, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7842265151557513e-05, + "rewards/margins": 0.16077205538749695, + "rewards/rejected": -0.16078990697860718, + "step": 6366 + }, + { + "epoch": 4.403181189488244, + "grad_norm": 17.538488388061523, + "learning_rate": 3.109343783617643e-05, + "log_odds_chosen": 10.931398391723633, + "log_odds_ratio": -3.503288826323114e-05, + "logits/chosen": -0.7548744678497314, + "logits/rejected": -0.8804567456245422, + "logps/chosen": -0.0001960860681720078, + "logps/rejected": -2.3025503158569336, + "loss": 1.2374, + "nll_loss": 0.309344083070755, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.960860754479654e-05, + "rewards/margins": 0.23023542761802673, + "rewards/rejected": -0.23025503754615784, + "step": 6367 + }, + { + "epoch": 4.403872752420471, + "grad_norm": 6.541409015655518, + "learning_rate": 3.108959581988628e-05, + "log_odds_chosen": 9.534074783325195, + "log_odds_ratio": -0.0003987067029811442, + "logits/chosen": -0.43024882674217224, + "logits/rejected": -0.44275692105293274, + "logps/chosen": -0.0007749908836558461, + "logps/rejected": -1.574808955192566, + "loss": 0.6758, + "nll_loss": 0.16890332102775574, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.749909127596766e-05, + "rewards/margins": 0.15740340948104858, + "rewards/rejected": -0.1574808955192566, + "step": 6368 + }, + { + "epoch": 4.404564315352697, + "grad_norm": 17.76714515686035, + "learning_rate": 3.1085753803596127e-05, + "log_odds_chosen": 8.868440628051758, + "log_odds_ratio": -0.00660840654745698, + "logits/chosen": -0.5223338007926941, + "logits/rejected": -0.5196986198425293, + "logps/chosen": -0.03560849279165268, + "logps/rejected": -1.5677039623260498, + "loss": 0.6861, + "nll_loss": 0.17087195813655853, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0035608489997684956, + "rewards/margins": 0.15320953726768494, + "rewards/rejected": -0.15677037835121155, + "step": 6369 + }, + { + "epoch": 4.405255878284924, + "grad_norm": 7.546093463897705, + "learning_rate": 3.108191178730598e-05, + "log_odds_chosen": 9.531181335449219, + "log_odds_ratio": -0.029602551832795143, + "logits/chosen": -0.5928676724433899, + "logits/rejected": -0.4486514925956726, + "logps/chosen": -0.006365879438817501, + "logps/rejected": -1.5473785400390625, + "loss": 0.9026, + "nll_loss": 0.22268284857273102, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006365880253724754, + "rewards/margins": 0.15410126745700836, + "rewards/rejected": -0.15473784506320953, + "step": 6370 + }, + { + "epoch": 4.405947441217151, + "grad_norm": 13.640862464904785, + "learning_rate": 3.107806977101583e-05, + "log_odds_chosen": 8.941659927368164, + "log_odds_ratio": -0.00953722931444645, + "logits/chosen": -0.16276182234287262, + "logits/rejected": -0.22773955762386322, + "logps/chosen": -0.05365453660488129, + "logps/rejected": -2.1357078552246094, + "loss": 0.9216, + "nll_loss": 0.22944633662700653, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005365454591810703, + "rewards/margins": 0.20820534229278564, + "rewards/rejected": -0.21357078850269318, + "step": 6371 + }, + { + "epoch": 4.406639004149378, + "grad_norm": 8.92506217956543, + "learning_rate": 3.1074227754725684e-05, + "log_odds_chosen": 9.07198715209961, + "log_odds_ratio": -0.016817396506667137, + "logits/chosen": -0.10156579315662384, + "logits/rejected": -0.19527952373027802, + "logps/chosen": -0.023559454828500748, + "logps/rejected": -2.2837753295898438, + "loss": 1.3893, + "nll_loss": 0.34563571214675903, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0023559455294162035, + "rewards/margins": 0.22602161765098572, + "rewards/rejected": -0.2283775508403778, + "step": 6372 + }, + { + "epoch": 4.407330567081605, + "grad_norm": 7.370696067810059, + "learning_rate": 3.107038573843553e-05, + "log_odds_chosen": 9.33350944519043, + "log_odds_ratio": -0.0006806927849538624, + "logits/chosen": -0.6236019134521484, + "logits/rejected": -0.6567580103874207, + "logps/chosen": -0.007249999791383743, + "logps/rejected": -1.7281427383422852, + "loss": 1.4798, + "nll_loss": 0.36987632513046265, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007249999325722456, + "rewards/margins": 0.17208926379680634, + "rewards/rejected": -0.1728142648935318, + "step": 6373 + }, + { + "epoch": 4.408022130013832, + "grad_norm": 18.29096031188965, + "learning_rate": 3.106654372214539e-05, + "log_odds_chosen": 10.78128719329834, + "log_odds_ratio": -6.995126022957265e-05, + "logits/chosen": -0.6699973940849304, + "logits/rejected": -0.731697678565979, + "logps/chosen": -0.0005232581752352417, + "logps/rejected": -2.4144153594970703, + "loss": 1.048, + "nll_loss": 0.2619859576225281, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.232582043390721e-05, + "rewards/margins": 0.2413892298936844, + "rewards/rejected": -0.241441547870636, + "step": 6374 + }, + { + "epoch": 4.408713692946058, + "grad_norm": 7.64169454574585, + "learning_rate": 3.1062701705855234e-05, + "log_odds_chosen": 10.888839721679688, + "log_odds_ratio": -0.00017014621698763222, + "logits/chosen": -0.40166181325912476, + "logits/rejected": -0.4729452431201935, + "logps/chosen": -0.0003403805603738874, + "logps/rejected": -2.4208157062530518, + "loss": 0.8522, + "nll_loss": 0.21304450929164886, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.403805385460146e-05, + "rewards/margins": 0.242047518491745, + "rewards/rejected": -0.24208158254623413, + "step": 6375 + }, + { + "epoch": 4.409405255878285, + "grad_norm": 5.552724838256836, + "learning_rate": 3.105885968956509e-05, + "log_odds_chosen": 9.83051872253418, + "log_odds_ratio": -0.0004149650048930198, + "logits/chosen": -0.5414485931396484, + "logits/rejected": -0.5747936964035034, + "logps/chosen": -0.0008759694756008685, + "logps/rejected": -2.1983699798583984, + "loss": 0.9518, + "nll_loss": 0.23790419101715088, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.759694901527837e-05, + "rewards/margins": 0.21974940598011017, + "rewards/rejected": -0.2198370099067688, + "step": 6376 + }, + { + "epoch": 4.410096818810512, + "grad_norm": 6.212398052215576, + "learning_rate": 3.105501767327494e-05, + "log_odds_chosen": 9.110426902770996, + "log_odds_ratio": -0.0009867295157164335, + "logits/chosen": -0.7143625617027283, + "logits/rejected": -0.6959802508354187, + "logps/chosen": -0.0036927165929228067, + "logps/rejected": -1.8916161060333252, + "loss": 1.0186, + "nll_loss": 0.25453901290893555, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00036927167093381286, + "rewards/margins": 0.18879231810569763, + "rewards/rejected": -0.18916161358356476, + "step": 6377 + }, + { + "epoch": 4.410788381742739, + "grad_norm": 8.868108749389648, + "learning_rate": 3.1051175656984785e-05, + "log_odds_chosen": 8.546025276184082, + "log_odds_ratio": -0.008631639182567596, + "logits/chosen": -0.3716889023780823, + "logits/rejected": -0.43401286005973816, + "logps/chosen": -0.016441212967038155, + "logps/rejected": -1.6564141511917114, + "loss": 1.308, + "nll_loss": 0.3261297941207886, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016441213665530086, + "rewards/margins": 0.16399729251861572, + "rewards/rejected": -0.1656414121389389, + "step": 6378 + }, + { + "epoch": 4.411479944674966, + "grad_norm": 6.24019718170166, + "learning_rate": 3.104733364069464e-05, + "log_odds_chosen": 10.215981483459473, + "log_odds_ratio": -0.00044016563333570957, + "logits/chosen": -0.7779475450515747, + "logits/rejected": -0.7803343534469604, + "logps/chosen": -0.0005113465595059097, + "logps/rejected": -2.0377840995788574, + "loss": 1.1477, + "nll_loss": 0.28688517212867737, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1134658860974014e-05, + "rewards/margins": 0.20372727513313293, + "rewards/rejected": -0.20377840101718903, + "step": 6379 + }, + { + "epoch": 4.412171507607193, + "grad_norm": 12.92465877532959, + "learning_rate": 3.104349162440449e-05, + "log_odds_chosen": 11.562097549438477, + "log_odds_ratio": -1.4962448403821327e-05, + "logits/chosen": -0.7335084676742554, + "logits/rejected": -0.7306284308433533, + "logps/chosen": -0.0002169413783121854, + "logps/rejected": -2.4687299728393555, + "loss": 1.3994, + "nll_loss": 0.34985631704330444, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1694138922612183e-05, + "rewards/margins": 0.24685129523277283, + "rewards/rejected": -0.24687300622463226, + "step": 6380 + }, + { + "epoch": 4.412863070539419, + "grad_norm": 9.519418716430664, + "learning_rate": 3.103964960811434e-05, + "log_odds_chosen": 9.936283111572266, + "log_odds_ratio": -0.00035743307671509683, + "logits/chosen": -0.6547790169715881, + "logits/rejected": -0.6877670288085938, + "logps/chosen": -0.0006231877487152815, + "logps/rejected": -2.062743663787842, + "loss": 0.9886, + "nll_loss": 0.2471020221710205, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.231878069229424e-05, + "rewards/margins": 0.20621204376220703, + "rewards/rejected": -0.2062743753194809, + "step": 6381 + }, + { + "epoch": 4.413554633471646, + "grad_norm": 11.097110748291016, + "learning_rate": 3.103580759182419e-05, + "log_odds_chosen": 10.018781661987305, + "log_odds_ratio": -0.00010880576155614108, + "logits/chosen": -0.36143845319747925, + "logits/rejected": -0.40808019042015076, + "logps/chosen": -0.00034866592613980174, + "logps/rejected": -1.9138808250427246, + "loss": 1.2034, + "nll_loss": 0.30082690715789795, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.486659625195898e-05, + "rewards/margins": 0.1913532167673111, + "rewards/rejected": -0.1913880705833435, + "step": 6382 + }, + { + "epoch": 4.414246196403873, + "grad_norm": 6.786111831665039, + "learning_rate": 3.103196557553405e-05, + "log_odds_chosen": 10.440179824829102, + "log_odds_ratio": -0.0006543896161019802, + "logits/chosen": -0.48476123809814453, + "logits/rejected": -0.5525182485580444, + "logps/chosen": -0.00040884173358790576, + "logps/rejected": -1.9244616031646729, + "loss": 0.9288, + "nll_loss": 0.23214299976825714, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0884173358790576e-05, + "rewards/margins": 0.19240528345108032, + "rewards/rejected": -0.19244614243507385, + "step": 6383 + }, + { + "epoch": 4.4149377593361, + "grad_norm": 8.502795219421387, + "learning_rate": 3.102812355924389e-05, + "log_odds_chosen": 9.403182983398438, + "log_odds_ratio": -0.0007035625749267638, + "logits/chosen": -0.5949018597602844, + "logits/rejected": -0.5342052578926086, + "logps/chosen": -0.0016120446380227804, + "logps/rejected": -1.7205469608306885, + "loss": 1.262, + "nll_loss": 0.31543827056884766, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016120447253342718, + "rewards/margins": 0.17189349234104156, + "rewards/rejected": -0.1720547080039978, + "step": 6384 + }, + { + "epoch": 4.415629322268327, + "grad_norm": 6.596683025360107, + "learning_rate": 3.1024281542953745e-05, + "log_odds_chosen": 9.544157028198242, + "log_odds_ratio": -0.00027197145391255617, + "logits/chosen": -0.5985953211784363, + "logits/rejected": -0.6238245964050293, + "logps/chosen": -0.0003750473551917821, + "logps/rejected": -1.8366684913635254, + "loss": 1.1725, + "nll_loss": 0.2931087613105774, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.750473479158245e-05, + "rewards/margins": 0.1836293488740921, + "rewards/rejected": -0.18366685509681702, + "step": 6385 + }, + { + "epoch": 4.4163208852005535, + "grad_norm": 9.299519538879395, + "learning_rate": 3.10204395266636e-05, + "log_odds_chosen": 10.27207088470459, + "log_odds_ratio": -0.00013783818576484919, + "logits/chosen": -0.26307037472724915, + "logits/rejected": -0.4357321560382843, + "logps/chosen": -0.0016038173343986273, + "logps/rejected": -2.7095847129821777, + "loss": 0.923, + "nll_loss": 0.23072925209999084, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016038172179833055, + "rewards/margins": 0.27079811692237854, + "rewards/rejected": -0.27095848321914673, + "step": 6386 + }, + { + "epoch": 4.41701244813278, + "grad_norm": 10.660256385803223, + "learning_rate": 3.1016597510373443e-05, + "log_odds_chosen": 9.878748893737793, + "log_odds_ratio": -0.0004966585547663271, + "logits/chosen": -0.390649676322937, + "logits/rejected": -0.5493044853210449, + "logps/chosen": -0.008036092855036259, + "logps/rejected": -2.27475905418396, + "loss": 0.9765, + "nll_loss": 0.244069904088974, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008036093786358833, + "rewards/margins": 0.22667229175567627, + "rewards/rejected": -0.22747588157653809, + "step": 6387 + }, + { + "epoch": 4.417704011065007, + "grad_norm": 11.35327434539795, + "learning_rate": 3.1012755494083296e-05, + "log_odds_chosen": 9.402727127075195, + "log_odds_ratio": -0.0061118570156395435, + "logits/chosen": -0.4596131145954132, + "logits/rejected": -0.6097520589828491, + "logps/chosen": -0.0035455625038594007, + "logps/rejected": -1.9800617694854736, + "loss": 0.8287, + "nll_loss": 0.20656505227088928, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003545562212821096, + "rewards/margins": 0.19765162467956543, + "rewards/rejected": -0.19800618290901184, + "step": 6388 + }, + { + "epoch": 4.418395573997234, + "grad_norm": 8.991832733154297, + "learning_rate": 3.100891347779315e-05, + "log_odds_chosen": 9.527231216430664, + "log_odds_ratio": -0.00015416370297316462, + "logits/chosen": -0.24660900235176086, + "logits/rejected": -0.290757417678833, + "logps/chosen": -0.001949973520822823, + "logps/rejected": -2.1968846321105957, + "loss": 0.8304, + "nll_loss": 0.2075803279876709, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001949973520822823, + "rewards/margins": 0.21949344873428345, + "rewards/rejected": -0.21968846023082733, + "step": 6389 + }, + { + "epoch": 4.419087136929461, + "grad_norm": 107.30193328857422, + "learning_rate": 3.1005071461503e-05, + "log_odds_chosen": 8.85942554473877, + "log_odds_ratio": -0.4232620298862457, + "logits/chosen": -0.19119130074977875, + "logits/rejected": -0.2185167372226715, + "logps/chosen": -0.053555119782686234, + "logps/rejected": -2.0136759281158447, + "loss": 1.255, + "nll_loss": 0.2714240550994873, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005355512257665396, + "rewards/margins": 0.19601207971572876, + "rewards/rejected": -0.20136758685112, + "step": 6390 + }, + { + "epoch": 4.419778699861688, + "grad_norm": 5.519765853881836, + "learning_rate": 3.1001229445212846e-05, + "log_odds_chosen": 10.144405364990234, + "log_odds_ratio": -0.0001967347925528884, + "logits/chosen": -0.37597039341926575, + "logits/rejected": -0.4154529571533203, + "logps/chosen": -0.01816502772271633, + "logps/rejected": -2.4218828678131104, + "loss": 1.3425, + "nll_loss": 0.3355969786643982, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001816502888686955, + "rewards/margins": 0.24037177860736847, + "rewards/rejected": -0.24218828976154327, + "step": 6391 + }, + { + "epoch": 4.4204702627939145, + "grad_norm": 9.79228687286377, + "learning_rate": 3.0997387428922706e-05, + "log_odds_chosen": 9.638640403747559, + "log_odds_ratio": -0.0003855983086396009, + "logits/chosen": -0.3042322099208832, + "logits/rejected": -0.35943084955215454, + "logps/chosen": -0.0038778900634497404, + "logps/rejected": -2.6894869804382324, + "loss": 1.006, + "nll_loss": 0.2514561116695404, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00038778901216574013, + "rewards/margins": 0.2685609459877014, + "rewards/rejected": -0.2689487338066101, + "step": 6392 + }, + { + "epoch": 4.421161825726141, + "grad_norm": 10.364742279052734, + "learning_rate": 3.099354541263255e-05, + "log_odds_chosen": 10.168045043945312, + "log_odds_ratio": -0.00010480596392881125, + "logits/chosen": -0.30890512466430664, + "logits/rejected": -0.37130510807037354, + "logps/chosen": -0.0009393454529345036, + "logps/rejected": -1.9779996871948242, + "loss": 1.1569, + "nll_loss": 0.28921347856521606, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.393454092787579e-05, + "rewards/margins": 0.19770604372024536, + "rewards/rejected": -0.19779998064041138, + "step": 6393 + }, + { + "epoch": 4.421853388658368, + "grad_norm": 7.593661308288574, + "learning_rate": 3.0989703396342404e-05, + "log_odds_chosen": 10.138404846191406, + "log_odds_ratio": -0.0004462719371076673, + "logits/chosen": -0.5672686100006104, + "logits/rejected": -0.6298636794090271, + "logps/chosen": -0.0006593248108401895, + "logps/rejected": -2.124025344848633, + "loss": 1.2576, + "nll_loss": 0.3143640160560608, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.593247962882742e-05, + "rewards/margins": 0.21233659982681274, + "rewards/rejected": -0.21240252256393433, + "step": 6394 + }, + { + "epoch": 4.422544951590595, + "grad_norm": 11.665322303771973, + "learning_rate": 3.098586138005225e-05, + "log_odds_chosen": 10.167012214660645, + "log_odds_ratio": -0.0001747005881043151, + "logits/chosen": -0.9220602512359619, + "logits/rejected": -0.9767961502075195, + "logps/chosen": -0.0002635978162288666, + "logps/rejected": -1.346954345703125, + "loss": 1.2314, + "nll_loss": 0.30783140659332275, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.635978307807818e-05, + "rewards/margins": 0.13466906547546387, + "rewards/rejected": -0.13469544053077698, + "step": 6395 + }, + { + "epoch": 4.423236514522822, + "grad_norm": 7.8194661140441895, + "learning_rate": 3.09820193637621e-05, + "log_odds_chosen": 10.15457534790039, + "log_odds_ratio": -8.2727252447512e-05, + "logits/chosen": -0.7054091095924377, + "logits/rejected": -0.7562676668167114, + "logps/chosen": -0.00047401481424458325, + "logps/rejected": -2.0918338298797607, + "loss": 1.4992, + "nll_loss": 0.37479984760284424, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7401481424458325e-05, + "rewards/margins": 0.2091359794139862, + "rewards/rejected": -0.20918338000774384, + "step": 6396 + }, + { + "epoch": 4.423928077455049, + "grad_norm": 6.56026029586792, + "learning_rate": 3.0978177347471954e-05, + "log_odds_chosen": 10.212728500366211, + "log_odds_ratio": -5.804194006486796e-05, + "logits/chosen": -0.5783580541610718, + "logits/rejected": -0.6343774199485779, + "logps/chosen": -0.0001326277997577563, + "logps/rejected": -1.5737571716308594, + "loss": 0.9005, + "nll_loss": 0.2251134216785431, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3262779248179868e-05, + "rewards/margins": 0.1573624610900879, + "rewards/rejected": -0.15737572312355042, + "step": 6397 + }, + { + "epoch": 4.4246196403872755, + "grad_norm": 8.338546752929688, + "learning_rate": 3.09743353311818e-05, + "log_odds_chosen": 10.239168167114258, + "log_odds_ratio": -0.00016068453260231763, + "logits/chosen": -0.7848932147026062, + "logits/rejected": -0.8591190576553345, + "logps/chosen": -0.00024313712492585182, + "logps/rejected": -1.6348729133605957, + "loss": 1.0218, + "nll_loss": 0.2554447054862976, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4313714675372466e-05, + "rewards/margins": 0.16346299648284912, + "rewards/rejected": -0.16348731517791748, + "step": 6398 + }, + { + "epoch": 4.425311203319502, + "grad_norm": 20.944805145263672, + "learning_rate": 3.097049331489166e-05, + "log_odds_chosen": 10.49547004699707, + "log_odds_ratio": -0.0018726128619164228, + "logits/chosen": -0.7779617309570312, + "logits/rejected": -0.8085945844650269, + "logps/chosen": -0.0009949326049536467, + "logps/rejected": -1.942845344543457, + "loss": 1.1049, + "nll_loss": 0.27604442834854126, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.949326340574771e-05, + "rewards/margins": 0.1941850334405899, + "rewards/rejected": -0.19428452849388123, + "step": 6399 + }, + { + "epoch": 4.426002766251729, + "grad_norm": 16.597238540649414, + "learning_rate": 3.0966651298601505e-05, + "log_odds_chosen": 10.280326843261719, + "log_odds_ratio": -0.0021524475887417793, + "logits/chosen": -0.3056260347366333, + "logits/rejected": -0.3296273350715637, + "logps/chosen": -0.0016646343283355236, + "logps/rejected": -2.4135384559631348, + "loss": 1.0892, + "nll_loss": 0.2720944285392761, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016646343283355236, + "rewards/margins": 0.24118739366531372, + "rewards/rejected": -0.2413538694381714, + "step": 6400 + }, + { + "epoch": 4.426694329183956, + "grad_norm": 13.069977760314941, + "learning_rate": 3.096280928231136e-05, + "log_odds_chosen": 9.882538795471191, + "log_odds_ratio": -0.00026573645300231874, + "logits/chosen": -0.5362889766693115, + "logits/rejected": -0.5939916968345642, + "logps/chosen": -0.0003392535727471113, + "logps/rejected": -1.5014183521270752, + "loss": 1.0444, + "nll_loss": 0.2610677480697632, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.392536018509418e-05, + "rewards/margins": 0.15010792016983032, + "rewards/rejected": -0.1501418501138687, + "step": 6401 + }, + { + "epoch": 4.427385892116183, + "grad_norm": 12.240621566772461, + "learning_rate": 3.095896726602121e-05, + "log_odds_chosen": 10.700019836425781, + "log_odds_ratio": -9.252676682081074e-05, + "logits/chosen": -0.3738962411880493, + "logits/rejected": -0.4703482389450073, + "logps/chosen": -0.0011295323492959142, + "logps/rejected": -2.3972387313842773, + "loss": 1.3156, + "nll_loss": 0.3288910686969757, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011295323201920837, + "rewards/margins": 0.23961091041564941, + "rewards/rejected": -0.23972387611865997, + "step": 6402 + }, + { + "epoch": 4.42807745504841, + "grad_norm": 11.340660095214844, + "learning_rate": 3.095512524973106e-05, + "log_odds_chosen": 10.539658546447754, + "log_odds_ratio": -0.00026402639923617244, + "logits/chosen": -0.5166309475898743, + "logits/rejected": -0.6453360915184021, + "logps/chosen": -0.0009884284809231758, + "logps/rejected": -1.8809010982513428, + "loss": 1.2957, + "nll_loss": 0.32388707995414734, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.884285100270063e-05, + "rewards/margins": 0.18799127638339996, + "rewards/rejected": -0.18809011578559875, + "step": 6403 + }, + { + "epoch": 4.4287690179806365, + "grad_norm": 9.22094440460205, + "learning_rate": 3.095128323344091e-05, + "log_odds_chosen": 8.828676223754883, + "log_odds_ratio": -0.007879544980823994, + "logits/chosen": -0.6498576402664185, + "logits/rejected": -0.6279178857803345, + "logps/chosen": -0.04624735936522484, + "logps/rejected": -1.2368273735046387, + "loss": 1.3293, + "nll_loss": 0.33152979612350464, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004624736495316029, + "rewards/margins": 0.11905801296234131, + "rewards/rejected": -0.12368274480104446, + "step": 6404 + }, + { + "epoch": 4.429460580912863, + "grad_norm": 10.181614875793457, + "learning_rate": 3.094744121715076e-05, + "log_odds_chosen": 9.462580680847168, + "log_odds_ratio": -0.06620946526527405, + "logits/chosen": -0.7551283240318298, + "logits/rejected": -0.7843388319015503, + "logps/chosen": -0.014280532486736774, + "logps/rejected": -1.8985339403152466, + "loss": 1.3754, + "nll_loss": 0.3372199535369873, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014280533650889993, + "rewards/margins": 0.18842534720897675, + "rewards/rejected": -0.18985339999198914, + "step": 6405 + }, + { + "epoch": 4.43015214384509, + "grad_norm": 12.048741340637207, + "learning_rate": 3.094359920086061e-05, + "log_odds_chosen": 9.604146957397461, + "log_odds_ratio": -0.0002486729063093662, + "logits/chosen": -0.16522309184074402, + "logits/rejected": -0.2706667184829712, + "logps/chosen": -0.0008669736562296748, + "logps/rejected": -1.7650139331817627, + "loss": 1.1396, + "nll_loss": 0.28488093614578247, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.669737144373357e-05, + "rewards/margins": 0.17641469836235046, + "rewards/rejected": -0.17650139331817627, + "step": 6406 + }, + { + "epoch": 4.430843706777317, + "grad_norm": 7.385758399963379, + "learning_rate": 3.093975718457046e-05, + "log_odds_chosen": 10.225251197814941, + "log_odds_ratio": -0.0005238738958723843, + "logits/chosen": -0.6043409109115601, + "logits/rejected": -0.6176189184188843, + "logps/chosen": -0.022112663835287094, + "logps/rejected": -2.721525192260742, + "loss": 2.0196, + "nll_loss": 0.5048423409461975, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002211266430094838, + "rewards/margins": 0.2699412703514099, + "rewards/rejected": -0.27215251326560974, + "step": 6407 + }, + { + "epoch": 4.431535269709544, + "grad_norm": 5.175098896026611, + "learning_rate": 3.093591516828032e-05, + "log_odds_chosen": 9.267239570617676, + "log_odds_ratio": -0.0011088968021795154, + "logits/chosen": -0.6202181577682495, + "logits/rejected": -0.6301971673965454, + "logps/chosen": -0.0027066871989518404, + "logps/rejected": -2.233309030532837, + "loss": 1.1131, + "nll_loss": 0.2781616449356079, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002706687373574823, + "rewards/margins": 0.2230602353811264, + "rewards/rejected": -0.22333090007305145, + "step": 6408 + }, + { + "epoch": 4.432226832641771, + "grad_norm": 9.984957695007324, + "learning_rate": 3.093207315199016e-05, + "log_odds_chosen": 9.920219421386719, + "log_odds_ratio": -0.0005398921784944832, + "logits/chosen": -0.5337777137756348, + "logits/rejected": -0.5695146322250366, + "logps/chosen": -0.0006002856534905732, + "logps/rejected": -1.1492040157318115, + "loss": 1.2768, + "nll_loss": 0.3191404938697815, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.002855661790818e-05, + "rewards/margins": 0.11486037075519562, + "rewards/rejected": -0.11492040008306503, + "step": 6409 + }, + { + "epoch": 4.4329183955739975, + "grad_norm": 11.91898250579834, + "learning_rate": 3.0928231135700016e-05, + "log_odds_chosen": 10.906294822692871, + "log_odds_ratio": -5.3284056775737554e-05, + "logits/chosen": -0.6611661911010742, + "logits/rejected": -0.7557975053787231, + "logps/chosen": -0.0003193242009729147, + "logps/rejected": -2.520918607711792, + "loss": 1.267, + "nll_loss": 0.3167416453361511, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.193242082488723e-05, + "rewards/margins": 0.2520599067211151, + "rewards/rejected": -0.2520918548107147, + "step": 6410 + }, + { + "epoch": 4.433609958506224, + "grad_norm": 8.004185676574707, + "learning_rate": 3.092438911940987e-05, + "log_odds_chosen": 10.14210319519043, + "log_odds_ratio": -8.355508180102333e-05, + "logits/chosen": -0.7471209764480591, + "logits/rejected": -0.7696855068206787, + "logps/chosen": -0.0011674391571432352, + "logps/rejected": -2.0500221252441406, + "loss": 1.0659, + "nll_loss": 0.26646915078163147, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011674391134874895, + "rewards/margins": 0.20488549768924713, + "rewards/rejected": -0.20500224828720093, + "step": 6411 + }, + { + "epoch": 4.434301521438451, + "grad_norm": 12.455580711364746, + "learning_rate": 3.092054710311972e-05, + "log_odds_chosen": 10.676868438720703, + "log_odds_ratio": -0.00018224478117190301, + "logits/chosen": -0.7852025032043457, + "logits/rejected": -0.8711831569671631, + "logps/chosen": -0.00014174616080708802, + "logps/rejected": -1.6316860914230347, + "loss": 1.4967, + "nll_loss": 0.37414783239364624, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4174616808304563e-05, + "rewards/margins": 0.1631544530391693, + "rewards/rejected": -0.16316860914230347, + "step": 6412 + }, + { + "epoch": 4.434993084370678, + "grad_norm": 7.661368370056152, + "learning_rate": 3.0916705086829566e-05, + "log_odds_chosen": 10.154672622680664, + "log_odds_ratio": -0.00014710980758536607, + "logits/chosen": -0.7227736711502075, + "logits/rejected": -0.8169190883636475, + "logps/chosen": -0.0003246809064876288, + "logps/rejected": -2.028472661972046, + "loss": 1.1493, + "nll_loss": 0.2873007655143738, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.246809137635864e-05, + "rewards/margins": 0.20281481742858887, + "rewards/rejected": -0.20284727215766907, + "step": 6413 + }, + { + "epoch": 4.435684647302905, + "grad_norm": 8.186695098876953, + "learning_rate": 3.091286307053942e-05, + "log_odds_chosen": 9.463798522949219, + "log_odds_ratio": -0.00047613875358365476, + "logits/chosen": -0.4769138693809509, + "logits/rejected": -0.4970892667770386, + "logps/chosen": -0.0004365852801129222, + "logps/rejected": -1.2462592124938965, + "loss": 0.9771, + "nll_loss": 0.24422718584537506, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3658525100909173e-05, + "rewards/margins": 0.12458226084709167, + "rewards/rejected": -0.12462591379880905, + "step": 6414 + }, + { + "epoch": 4.436376210235132, + "grad_norm": 10.404152870178223, + "learning_rate": 3.090902105424927e-05, + "log_odds_chosen": 9.704734802246094, + "log_odds_ratio": -0.0008971289498731494, + "logits/chosen": -0.6355952024459839, + "logits/rejected": -0.6960784196853638, + "logps/chosen": -0.0017384829698130488, + "logps/rejected": -2.2069873809814453, + "loss": 1.1935, + "nll_loss": 0.2982823848724365, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001738483115332201, + "rewards/margins": 0.2205248773097992, + "rewards/rejected": -0.22069872915744781, + "step": 6415 + }, + { + "epoch": 4.4370677731673585, + "grad_norm": 11.764803886413574, + "learning_rate": 3.090517903795912e-05, + "log_odds_chosen": 10.937488555908203, + "log_odds_ratio": -3.620861025410704e-05, + "logits/chosen": -0.21870945394039154, + "logits/rejected": -0.3768613636493683, + "logps/chosen": -0.0001105781557271257, + "logps/rejected": -1.834923267364502, + "loss": 1.0417, + "nll_loss": 0.2604144215583801, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.105781575461151e-05, + "rewards/margins": 0.18348127603530884, + "rewards/rejected": -0.18349234759807587, + "step": 6416 + }, + { + "epoch": 4.437759336099585, + "grad_norm": 9.569924354553223, + "learning_rate": 3.0901337021668976e-05, + "log_odds_chosen": 8.868842124938965, + "log_odds_ratio": -0.006081722676753998, + "logits/chosen": -0.6976808905601501, + "logits/rejected": -0.7641477584838867, + "logps/chosen": -0.0033662666101008654, + "logps/rejected": -1.2174699306488037, + "loss": 0.9788, + "nll_loss": 0.24408631026744843, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00033662666101008654, + "rewards/margins": 0.12141037732362747, + "rewards/rejected": -0.1217469871044159, + "step": 6417 + }, + { + "epoch": 4.438450899031812, + "grad_norm": 8.44909954071045, + "learning_rate": 3.089749500537882e-05, + "log_odds_chosen": 10.522440910339355, + "log_odds_ratio": -0.00027663138462230563, + "logits/chosen": -0.876654863357544, + "logits/rejected": -0.9026960134506226, + "logps/chosen": -0.0003937871369998902, + "logps/rejected": -2.161076545715332, + "loss": 1.7714, + "nll_loss": 0.44281840324401855, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.93787122447975e-05, + "rewards/margins": 0.21606828272342682, + "rewards/rejected": -0.21610765159130096, + "step": 6418 + }, + { + "epoch": 4.439142461964039, + "grad_norm": 7.009344577789307, + "learning_rate": 3.0893652989088674e-05, + "log_odds_chosen": 9.771029472351074, + "log_odds_ratio": -0.0011895447969436646, + "logits/chosen": -0.49803614616394043, + "logits/rejected": -0.6052780151367188, + "logps/chosen": -0.04391154646873474, + "logps/rejected": -2.5477042198181152, + "loss": 1.2561, + "nll_loss": 0.313909113407135, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004391154740005732, + "rewards/margins": 0.2503792941570282, + "rewards/rejected": -0.254770427942276, + "step": 6419 + }, + { + "epoch": 4.439834024896266, + "grad_norm": 6.5041093826293945, + "learning_rate": 3.0889810972798527e-05, + "log_odds_chosen": 9.302213668823242, + "log_odds_ratio": -0.0004543719405774027, + "logits/chosen": -0.7108435034751892, + "logits/rejected": -0.7111088633537292, + "logps/chosen": -0.0005316220922395587, + "logps/rejected": -1.6619925498962402, + "loss": 1.1103, + "nll_loss": 0.2775220274925232, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.316221358953044e-05, + "rewards/margins": 0.16614609956741333, + "rewards/rejected": -0.16619926691055298, + "step": 6420 + }, + { + "epoch": 4.440525587828493, + "grad_norm": 4.889501094818115, + "learning_rate": 3.088596895650838e-05, + "log_odds_chosen": 9.731374740600586, + "log_odds_ratio": -0.00034095943556167185, + "logits/chosen": -0.3538818657398224, + "logits/rejected": -0.32288655638694763, + "logps/chosen": -0.0011292172130197287, + "logps/rejected": -1.4478421211242676, + "loss": 1.3477, + "nll_loss": 0.3369019627571106, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011292171984678134, + "rewards/margins": 0.14467130601406097, + "rewards/rejected": -0.14478421211242676, + "step": 6421 + }, + { + "epoch": 4.441217150760719, + "grad_norm": 8.64886474609375, + "learning_rate": 3.0882126940218225e-05, + "log_odds_chosen": 10.265148162841797, + "log_odds_ratio": -0.00011314827861497179, + "logits/chosen": -0.942933201789856, + "logits/rejected": -0.8902233839035034, + "logps/chosen": -0.000461632531369105, + "logps/rejected": -1.9104479551315308, + "loss": 0.6952, + "nll_loss": 0.17379961907863617, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.616325531969778e-05, + "rewards/margins": 0.1909986287355423, + "rewards/rejected": -0.19104479253292084, + "step": 6422 + }, + { + "epoch": 4.441908713692946, + "grad_norm": 15.804269790649414, + "learning_rate": 3.087828492392808e-05, + "log_odds_chosen": 10.559988021850586, + "log_odds_ratio": -5.758218321716413e-05, + "logits/chosen": -0.8409276008605957, + "logits/rejected": -0.8524722456932068, + "logps/chosen": -0.0002882361295633018, + "logps/rejected": -2.1920557022094727, + "loss": 1.7781, + "nll_loss": 0.44452598690986633, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8823611501138657e-05, + "rewards/margins": 0.2191767692565918, + "rewards/rejected": -0.2192055881023407, + "step": 6423 + }, + { + "epoch": 4.442600276625173, + "grad_norm": 15.704327583312988, + "learning_rate": 3.087444290763793e-05, + "log_odds_chosen": 10.257490158081055, + "log_odds_ratio": -0.0005293237045407295, + "logits/chosen": -0.48299890756607056, + "logits/rejected": -0.47468554973602295, + "logps/chosen": -0.001382496440783143, + "logps/rejected": -2.5976834297180176, + "loss": 1.1784, + "nll_loss": 0.2945585250854492, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013824964116793126, + "rewards/margins": 0.25963011384010315, + "rewards/rejected": -0.25976836681365967, + "step": 6424 + }, + { + "epoch": 4.4432918395574, + "grad_norm": 5.465470790863037, + "learning_rate": 3.0870600891347775e-05, + "log_odds_chosen": 11.397879600524902, + "log_odds_ratio": -1.5060177247505635e-05, + "logits/chosen": -0.5835100412368774, + "logits/rejected": -0.6525250673294067, + "logps/chosen": -0.00014226968050934374, + "logps/rejected": -2.534497022628784, + "loss": 0.7325, + "nll_loss": 0.1831332892179489, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4226966413843911e-05, + "rewards/margins": 0.25343549251556396, + "rewards/rejected": -0.2534497082233429, + "step": 6425 + }, + { + "epoch": 4.443983402489627, + "grad_norm": 11.010247230529785, + "learning_rate": 3.0866758875057634e-05, + "log_odds_chosen": 10.234821319580078, + "log_odds_ratio": -8.235462155425921e-05, + "logits/chosen": -0.19324612617492676, + "logits/rejected": -0.24131500720977783, + "logps/chosen": -0.0001237574906554073, + "logps/rejected": -1.435868263244629, + "loss": 1.371, + "nll_loss": 0.3427380323410034, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.237574815604603e-05, + "rewards/margins": 0.14357444643974304, + "rewards/rejected": -0.14358682930469513, + "step": 6426 + }, + { + "epoch": 4.444674965421854, + "grad_norm": 11.74059772491455, + "learning_rate": 3.086291685876748e-05, + "log_odds_chosen": 10.10733413696289, + "log_odds_ratio": -8.586710464442149e-05, + "logits/chosen": -0.42839187383651733, + "logits/rejected": -0.5178070068359375, + "logps/chosen": -0.0004466324462555349, + "logps/rejected": -1.95094633102417, + "loss": 1.1682, + "nll_loss": 0.2920527458190918, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4663247535936534e-05, + "rewards/margins": 0.19504998624324799, + "rewards/rejected": -0.19509464502334595, + "step": 6427 + }, + { + "epoch": 4.44536652835408, + "grad_norm": 7.334704875946045, + "learning_rate": 3.085907484247733e-05, + "log_odds_chosen": 8.667322158813477, + "log_odds_ratio": -0.0066236890852451324, + "logits/chosen": -0.44771382212638855, + "logits/rejected": -0.545290470123291, + "logps/chosen": -0.007994197309017181, + "logps/rejected": -1.48850417137146, + "loss": 1.5761, + "nll_loss": 0.3933669328689575, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007994197658263147, + "rewards/margins": 0.14805099368095398, + "rewards/rejected": -0.1488504260778427, + "step": 6428 + }, + { + "epoch": 4.446058091286307, + "grad_norm": 6.081279754638672, + "learning_rate": 3.0855232826187185e-05, + "log_odds_chosen": 9.644600868225098, + "log_odds_ratio": -0.0006043565226718783, + "logits/chosen": -0.5438819527626038, + "logits/rejected": -0.6006056070327759, + "logps/chosen": -0.013640167191624641, + "logps/rejected": -2.112248420715332, + "loss": 0.953, + "nll_loss": 0.23818287253379822, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013640167890116572, + "rewards/margins": 0.20986083149909973, + "rewards/rejected": -0.21122485399246216, + "step": 6429 + }, + { + "epoch": 4.446749654218534, + "grad_norm": 9.497159004211426, + "learning_rate": 3.085139080989704e-05, + "log_odds_chosen": 8.957687377929688, + "log_odds_ratio": -0.001704613328911364, + "logits/chosen": -0.8124638795852661, + "logits/rejected": -0.7367603182792664, + "logps/chosen": -0.0011274907737970352, + "logps/rejected": -1.1338789463043213, + "loss": 1.1416, + "nll_loss": 0.28522783517837524, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011274906864855438, + "rewards/margins": 0.11327514052391052, + "rewards/rejected": -0.11338789016008377, + "step": 6430 + }, + { + "epoch": 4.447441217150761, + "grad_norm": 7.188299179077148, + "learning_rate": 3.084754879360688e-05, + "log_odds_chosen": 8.436171531677246, + "log_odds_ratio": -0.0023574642837047577, + "logits/chosen": -0.4236351251602173, + "logits/rejected": -0.4923994541168213, + "logps/chosen": -0.0010589384473860264, + "logps/rejected": -1.0145111083984375, + "loss": 1.0346, + "nll_loss": 0.25842535495758057, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010589385055936873, + "rewards/margins": 0.10134520381689072, + "rewards/rejected": -0.1014510989189148, + "step": 6431 + }, + { + "epoch": 4.448132780082988, + "grad_norm": 8.30783462524414, + "learning_rate": 3.0843706777316736e-05, + "log_odds_chosen": 8.656213760375977, + "log_odds_ratio": -0.04048136621713638, + "logits/chosen": -0.4624170958995819, + "logits/rejected": -0.4649544358253479, + "logps/chosen": -0.00918676145374775, + "logps/rejected": -1.6840345859527588, + "loss": 1.0427, + "nll_loss": 0.25663626194000244, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000918676087167114, + "rewards/margins": 0.16748477518558502, + "rewards/rejected": -0.16840344667434692, + "step": 6432 + }, + { + "epoch": 4.448824343015215, + "grad_norm": 10.003007888793945, + "learning_rate": 3.083986476102659e-05, + "log_odds_chosen": 11.435136795043945, + "log_odds_ratio": -4.180811811238527e-05, + "logits/chosen": -0.6977967023849487, + "logits/rejected": -0.6912387609481812, + "logps/chosen": -0.00014621099398937076, + "logps/rejected": -2.687011480331421, + "loss": 1.6182, + "nll_loss": 0.40453970432281494, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4621098671341315e-05, + "rewards/margins": 0.26868653297424316, + "rewards/rejected": -0.2687011659145355, + "step": 6433 + }, + { + "epoch": 4.449515905947441, + "grad_norm": 11.087772369384766, + "learning_rate": 3.0836022744736434e-05, + "log_odds_chosen": 9.166482925415039, + "log_odds_ratio": -0.00566418282687664, + "logits/chosen": -0.45822763442993164, + "logits/rejected": -0.48969143629074097, + "logps/chosen": -0.0015105127822607756, + "logps/rejected": -1.6272878646850586, + "loss": 1.8693, + "nll_loss": 0.4667499363422394, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015105126658454537, + "rewards/margins": 0.16257771849632263, + "rewards/rejected": -0.16272878646850586, + "step": 6434 + }, + { + "epoch": 4.450207468879668, + "grad_norm": 9.10877799987793, + "learning_rate": 3.083218072844629e-05, + "log_odds_chosen": 9.429122924804688, + "log_odds_ratio": -0.001028799219056964, + "logits/chosen": -0.6517131924629211, + "logits/rejected": -0.6948776841163635, + "logps/chosen": -0.015063981525599957, + "logps/rejected": -1.857469081878662, + "loss": 0.8447, + "nll_loss": 0.2110617756843567, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015063981991261244, + "rewards/margins": 0.18424050509929657, + "rewards/rejected": -0.1857469230890274, + "step": 6435 + }, + { + "epoch": 4.450899031811895, + "grad_norm": 7.889139652252197, + "learning_rate": 3.082833871215614e-05, + "log_odds_chosen": 11.177837371826172, + "log_odds_ratio": -2.0480580133153126e-05, + "logits/chosen": -0.5944574475288391, + "logits/rejected": -0.5678659677505493, + "logps/chosen": -0.00023821931972634047, + "logps/rejected": -2.3895411491394043, + "loss": 1.0383, + "nll_loss": 0.25957247614860535, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.382193270022981e-05, + "rewards/margins": 0.23893029987812042, + "rewards/rejected": -0.23895412683486938, + "step": 6436 + }, + { + "epoch": 4.451590594744122, + "grad_norm": 12.398857116699219, + "learning_rate": 3.082449669586599e-05, + "log_odds_chosen": 9.734447479248047, + "log_odds_ratio": -0.0419284924864769, + "logits/chosen": -0.9242825508117676, + "logits/rejected": -1.0619961023330688, + "logps/chosen": -0.009608970023691654, + "logps/rejected": -1.9355764389038086, + "loss": 0.9802, + "nll_loss": 0.24086391925811768, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009608970722183585, + "rewards/margins": 0.19259673357009888, + "rewards/rejected": -0.19355764985084534, + "step": 6437 + }, + { + "epoch": 4.452282157676349, + "grad_norm": 23.13532829284668, + "learning_rate": 3.0820654679575843e-05, + "log_odds_chosen": 8.321240425109863, + "log_odds_ratio": -0.15772351622581482, + "logits/chosen": -0.487088680267334, + "logits/rejected": -0.5112364888191223, + "logps/chosen": -0.022836901247501373, + "logps/rejected": -1.2704685926437378, + "loss": 1.113, + "nll_loss": 0.2624875605106354, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0022836902644485235, + "rewards/margins": 0.12476316839456558, + "rewards/rejected": -0.1270468682050705, + "step": 6438 + }, + { + "epoch": 4.4529737206085755, + "grad_norm": 11.514544486999512, + "learning_rate": 3.0816812663285696e-05, + "log_odds_chosen": 11.126787185668945, + "log_odds_ratio": -7.575732888653874e-05, + "logits/chosen": -0.7251293659210205, + "logits/rejected": -0.809004545211792, + "logps/chosen": -0.0003191279247403145, + "logps/rejected": -2.6014175415039062, + "loss": 0.9218, + "nll_loss": 0.23044687509536743, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.191279392922297e-05, + "rewards/margins": 0.2601098418235779, + "rewards/rejected": -0.2601417601108551, + "step": 6439 + }, + { + "epoch": 4.453665283540802, + "grad_norm": 10.459208488464355, + "learning_rate": 3.081297064699554e-05, + "log_odds_chosen": 10.432282447814941, + "log_odds_ratio": -0.0002344041276955977, + "logits/chosen": -0.32301580905914307, + "logits/rejected": -0.3637202978134155, + "logps/chosen": -0.0006796496454626322, + "logps/rejected": -2.54196834564209, + "loss": 0.824, + "nll_loss": 0.20596429705619812, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.796496745664626e-05, + "rewards/margins": 0.2541288733482361, + "rewards/rejected": -0.2541968524456024, + "step": 6440 + }, + { + "epoch": 4.454356846473029, + "grad_norm": 10.875798225402832, + "learning_rate": 3.0809128630705394e-05, + "log_odds_chosen": 10.470844268798828, + "log_odds_ratio": -0.0001139358791988343, + "logits/chosen": -0.4594096541404724, + "logits/rejected": -0.5815310478210449, + "logps/chosen": -0.0007354323752224445, + "logps/rejected": -2.4845809936523438, + "loss": 0.9644, + "nll_loss": 0.24107995629310608, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.35432404326275e-05, + "rewards/margins": 0.24838455021381378, + "rewards/rejected": -0.24845808744430542, + "step": 6441 + }, + { + "epoch": 4.455048409405256, + "grad_norm": 9.098546981811523, + "learning_rate": 3.0805286614415246e-05, + "log_odds_chosen": 11.00687313079834, + "log_odds_ratio": -8.472923218505457e-05, + "logits/chosen": -0.6078069806098938, + "logits/rejected": -0.6600771546363831, + "logps/chosen": -0.00038548995507881045, + "logps/rejected": -2.265836715698242, + "loss": 0.6532, + "nll_loss": 0.16329284012317657, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8548994780285284e-05, + "rewards/margins": 0.22654514014720917, + "rewards/rejected": -0.22658368945121765, + "step": 6442 + }, + { + "epoch": 4.455739972337483, + "grad_norm": 10.756627082824707, + "learning_rate": 3.080144459812509e-05, + "log_odds_chosen": 10.610451698303223, + "log_odds_ratio": -0.0009124780190177262, + "logits/chosen": -0.6079075336456299, + "logits/rejected": -0.6510196924209595, + "logps/chosen": -0.0014088767347857356, + "logps/rejected": -2.842536449432373, + "loss": 1.286, + "nll_loss": 0.3214002847671509, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014088767056819052, + "rewards/margins": 0.28411275148391724, + "rewards/rejected": -0.28425362706184387, + "step": 6443 + }, + { + "epoch": 4.45643153526971, + "grad_norm": 6.399550914764404, + "learning_rate": 3.079760258183495e-05, + "log_odds_chosen": 10.297996520996094, + "log_odds_ratio": -0.0007506664260290563, + "logits/chosen": -0.3183915615081787, + "logits/rejected": -0.41979384422302246, + "logps/chosen": -0.0004420267359819263, + "logps/rejected": -1.9215115308761597, + "loss": 1.0138, + "nll_loss": 0.253369003534317, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.42026685050223e-05, + "rewards/margins": 0.1921069324016571, + "rewards/rejected": -0.19215114414691925, + "step": 6444 + }, + { + "epoch": 4.4571230982019365, + "grad_norm": 7.257493019104004, + "learning_rate": 3.07937605655448e-05, + "log_odds_chosen": 11.059222221374512, + "log_odds_ratio": -5.228218651609495e-05, + "logits/chosen": -0.4313526749610901, + "logits/rejected": -0.5512241721153259, + "logps/chosen": -0.0009454325772821903, + "logps/rejected": -2.021420955657959, + "loss": 0.6734, + "nll_loss": 0.16833902895450592, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.45432620937936e-05, + "rewards/margins": 0.20204755663871765, + "rewards/rejected": -0.20214208960533142, + "step": 6445 + }, + { + "epoch": 4.457814661134163, + "grad_norm": 7.653397560119629, + "learning_rate": 3.078991854925465e-05, + "log_odds_chosen": 10.379366874694824, + "log_odds_ratio": -9.936068090610206e-05, + "logits/chosen": -0.5582969188690186, + "logits/rejected": -0.5746288299560547, + "logps/chosen": -0.00010474787268321961, + "logps/rejected": -1.4376403093338013, + "loss": 0.9908, + "nll_loss": 0.24768760800361633, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.047478690452408e-05, + "rewards/margins": 0.1437535434961319, + "rewards/rejected": -0.14376403391361237, + "step": 6446 + }, + { + "epoch": 4.45850622406639, + "grad_norm": 8.728631019592285, + "learning_rate": 3.07860765329645e-05, + "log_odds_chosen": 8.934124946594238, + "log_odds_ratio": -0.012789107859134674, + "logits/chosen": -0.6111518144607544, + "logits/rejected": -0.6506301760673523, + "logps/chosen": -0.004878000356256962, + "logps/rejected": -1.5417579412460327, + "loss": 1.0899, + "nll_loss": 0.2711877226829529, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00048779998905956745, + "rewards/margins": 0.153687983751297, + "rewards/rejected": -0.1541757881641388, + "step": 6447 + }, + { + "epoch": 4.459197786998617, + "grad_norm": 8.42003059387207, + "learning_rate": 3.0782234516674354e-05, + "log_odds_chosen": 9.99289321899414, + "log_odds_ratio": -6.565036164829507e-05, + "logits/chosen": -0.3047183156013489, + "logits/rejected": -0.33375582098960876, + "logps/chosen": -0.00017102361016441137, + "logps/rejected": -1.4403502941131592, + "loss": 0.8766, + "nll_loss": 0.21914011240005493, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7102360288845375e-05, + "rewards/margins": 0.14401793479919434, + "rewards/rejected": -0.14403502643108368, + "step": 6448 + }, + { + "epoch": 4.459889349930844, + "grad_norm": 7.8015594482421875, + "learning_rate": 3.07783925003842e-05, + "log_odds_chosen": 9.683923721313477, + "log_odds_ratio": -0.00011912950139958411, + "logits/chosen": -0.5940296053886414, + "logits/rejected": -0.6487449407577515, + "logps/chosen": -0.00021323611144907773, + "logps/rejected": -1.2047635316848755, + "loss": 1.4649, + "nll_loss": 0.3662136495113373, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1323612600099295e-05, + "rewards/margins": 0.12045504152774811, + "rewards/rejected": -0.12047635018825531, + "step": 6449 + }, + { + "epoch": 4.460580912863071, + "grad_norm": 5.781506538391113, + "learning_rate": 3.077455048409405e-05, + "log_odds_chosen": 9.560298919677734, + "log_odds_ratio": -0.00018497445853427052, + "logits/chosen": -0.45896121859550476, + "logits/rejected": -0.4588027000427246, + "logps/chosen": -0.00031572478474117815, + "logps/rejected": -1.33500075340271, + "loss": 0.8983, + "nll_loss": 0.22454404830932617, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.157247920171358e-05, + "rewards/margins": 0.13346850872039795, + "rewards/rejected": -0.13350006937980652, + "step": 6450 + }, + { + "epoch": 4.4612724757952975, + "grad_norm": 5.633298397064209, + "learning_rate": 3.0770708467803905e-05, + "log_odds_chosen": 11.570259094238281, + "log_odds_ratio": -3.354436921654269e-05, + "logits/chosen": -0.3385199010372162, + "logits/rejected": -0.44875243306159973, + "logps/chosen": -0.000133796245791018, + "logps/rejected": -2.294816732406616, + "loss": 0.9183, + "nll_loss": 0.22957469522953033, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3379624761000741e-05, + "rewards/margins": 0.22946830093860626, + "rewards/rejected": -0.22948168218135834, + "step": 6451 + }, + { + "epoch": 4.461964038727524, + "grad_norm": 13.917255401611328, + "learning_rate": 3.076686645151375e-05, + "log_odds_chosen": 7.53969144821167, + "log_odds_ratio": -0.21071849763393402, + "logits/chosen": -0.3512672185897827, + "logits/rejected": -0.3968029320240021, + "logps/chosen": -0.030562492087483406, + "logps/rejected": -1.0880842208862305, + "loss": 1.3055, + "nll_loss": 0.3052915334701538, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003056249115616083, + "rewards/margins": 0.10575217008590698, + "rewards/rejected": -0.10880842059850693, + "step": 6452 + }, + { + "epoch": 4.462655601659751, + "grad_norm": 10.778855323791504, + "learning_rate": 3.076302443522361e-05, + "log_odds_chosen": 9.772245407104492, + "log_odds_ratio": -0.0013269998598843813, + "logits/chosen": -0.3936904966831207, + "logits/rejected": -0.530383825302124, + "logps/chosen": -0.0011856453493237495, + "logps/rejected": -1.6621289253234863, + "loss": 1.3422, + "nll_loss": 0.33542922139167786, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011856453056680039, + "rewards/margins": 0.1660943180322647, + "rewards/rejected": -0.16621288657188416, + "step": 6453 + }, + { + "epoch": 4.463347164591978, + "grad_norm": 7.1760406494140625, + "learning_rate": 3.0759182418933455e-05, + "log_odds_chosen": 9.600784301757812, + "log_odds_ratio": -0.0004721590084955096, + "logits/chosen": -0.08456657826900482, + "logits/rejected": -0.17342565953731537, + "logps/chosen": -0.0016557632479816675, + "logps/rejected": -2.121009588241577, + "loss": 1.121, + "nll_loss": 0.2802083194255829, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001655763335293159, + "rewards/margins": 0.2119353711605072, + "rewards/rejected": -0.21210096776485443, + "step": 6454 + }, + { + "epoch": 4.464038727524205, + "grad_norm": 8.384750366210938, + "learning_rate": 3.075534040264331e-05, + "log_odds_chosen": 11.154319763183594, + "log_odds_ratio": -3.194598320988007e-05, + "logits/chosen": -0.3221395015716553, + "logits/rejected": -0.3981391191482544, + "logps/chosen": -0.00023946535657159984, + "logps/rejected": -2.5700619220733643, + "loss": 0.7746, + "nll_loss": 0.19364149868488312, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3946535293362103e-05, + "rewards/margins": 0.2569822371006012, + "rewards/rejected": -0.2570061683654785, + "step": 6455 + }, + { + "epoch": 4.464730290456432, + "grad_norm": 8.794690132141113, + "learning_rate": 3.075149838635316e-05, + "log_odds_chosen": 9.15900707244873, + "log_odds_ratio": -0.0008501263218931854, + "logits/chosen": -0.28918007016181946, + "logits/rejected": -0.3754921555519104, + "logps/chosen": -0.0006217118352651596, + "logps/rejected": -1.515347957611084, + "loss": 1.0513, + "nll_loss": 0.26275113224983215, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.217118789209053e-05, + "rewards/margins": 0.15147262811660767, + "rewards/rejected": -0.1515347957611084, + "step": 6456 + }, + { + "epoch": 4.4654218533886585, + "grad_norm": 19.42965316772461, + "learning_rate": 3.074765637006301e-05, + "log_odds_chosen": 9.063629150390625, + "log_odds_ratio": -0.0010017786407843232, + "logits/chosen": -0.06623756885528564, + "logits/rejected": -0.08045337349176407, + "logps/chosen": -0.001309290062636137, + "logps/rejected": -1.6273454427719116, + "loss": 1.1708, + "nll_loss": 0.29259809851646423, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013092900917399675, + "rewards/margins": 0.16260361671447754, + "rewards/rejected": -0.16273455321788788, + "step": 6457 + }, + { + "epoch": 4.466113416320885, + "grad_norm": 6.670252799987793, + "learning_rate": 3.074381435377286e-05, + "log_odds_chosen": 9.334833145141602, + "log_odds_ratio": -0.00016046586097218096, + "logits/chosen": -0.1623792052268982, + "logits/rejected": -0.1207355409860611, + "logps/chosen": -0.00028514739824458957, + "logps/rejected": -1.278767466545105, + "loss": 0.9254, + "nll_loss": 0.23132863640785217, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8514739824458957e-05, + "rewards/margins": 0.12784823775291443, + "rewards/rejected": -0.12787675857543945, + "step": 6458 + }, + { + "epoch": 4.466804979253112, + "grad_norm": 9.468250274658203, + "learning_rate": 3.073997233748271e-05, + "log_odds_chosen": 9.812359809875488, + "log_odds_ratio": -0.08580458164215088, + "logits/chosen": -0.17165197432041168, + "logits/rejected": -0.2862173914909363, + "logps/chosen": -0.015067455358803272, + "logps/rejected": -2.258084297180176, + "loss": 1.4031, + "nll_loss": 0.34219345450401306, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015067454660311341, + "rewards/margins": 0.22430166602134705, + "rewards/rejected": -0.22580842673778534, + "step": 6459 + }, + { + "epoch": 4.467496542185339, + "grad_norm": 9.811150550842285, + "learning_rate": 3.073613032119256e-05, + "log_odds_chosen": 9.866673469543457, + "log_odds_ratio": -9.497060091234744e-05, + "logits/chosen": -0.4492124021053314, + "logits/rejected": -0.48748695850372314, + "logps/chosen": -0.0005426113493740559, + "logps/rejected": -1.7650691270828247, + "loss": 0.9769, + "nll_loss": 0.24422520399093628, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.426114148576744e-05, + "rewards/margins": 0.1764526516199112, + "rewards/rejected": -0.1765069216489792, + "step": 6460 + }, + { + "epoch": 4.468188105117566, + "grad_norm": 13.469379425048828, + "learning_rate": 3.0732288304902416e-05, + "log_odds_chosen": 10.870882987976074, + "log_odds_ratio": -0.00022644597629550844, + "logits/chosen": -0.7534062266349792, + "logits/rejected": -0.7338254451751709, + "logps/chosen": -0.000653579889331013, + "logps/rejected": -2.304877758026123, + "loss": 1.039, + "nll_loss": 0.25973108410835266, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.535799184348434e-05, + "rewards/margins": 0.23042240738868713, + "rewards/rejected": -0.23048776388168335, + "step": 6461 + }, + { + "epoch": 4.468879668049793, + "grad_norm": 6.386672019958496, + "learning_rate": 3.072844628861227e-05, + "log_odds_chosen": 9.686408996582031, + "log_odds_ratio": -0.0006362023414112628, + "logits/chosen": -0.3017466962337494, + "logits/rejected": -0.290844589471817, + "logps/chosen": -0.00386090693064034, + "logps/rejected": -1.9374815225601196, + "loss": 1.3238, + "nll_loss": 0.3308817744255066, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003860906872432679, + "rewards/margins": 0.19336208701133728, + "rewards/rejected": -0.19374816119670868, + "step": 6462 + }, + { + "epoch": 4.4695712309820195, + "grad_norm": 11.212063789367676, + "learning_rate": 3.0724604272322114e-05, + "log_odds_chosen": 10.193517684936523, + "log_odds_ratio": -9.134951687883586e-05, + "logits/chosen": -0.5317569971084595, + "logits/rejected": -0.611341655254364, + "logps/chosen": -0.0002531587961129844, + "logps/rejected": -1.7362111806869507, + "loss": 0.9071, + "nll_loss": 0.2267615795135498, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5315879611298442e-05, + "rewards/margins": 0.17359580099582672, + "rewards/rejected": -0.17362111806869507, + "step": 6463 + }, + { + "epoch": 4.470262793914246, + "grad_norm": 6.128077030181885, + "learning_rate": 3.0720762256031966e-05, + "log_odds_chosen": 8.539804458618164, + "log_odds_ratio": -0.004929071757942438, + "logits/chosen": -0.5161978006362915, + "logits/rejected": -0.5088739395141602, + "logps/chosen": -0.002719791140407324, + "logps/rejected": -1.3809276819229126, + "loss": 0.9154, + "nll_loss": 0.2283586859703064, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00027197913732379675, + "rewards/margins": 0.1378207802772522, + "rewards/rejected": -0.1380927711725235, + "step": 6464 + }, + { + "epoch": 4.470954356846473, + "grad_norm": 6.816812992095947, + "learning_rate": 3.071692023974182e-05, + "log_odds_chosen": 9.135784149169922, + "log_odds_ratio": -0.013926029205322266, + "logits/chosen": -0.5467950105667114, + "logits/rejected": -0.6252622604370117, + "logps/chosen": -0.005378572270274162, + "logps/rejected": -1.3269848823547363, + "loss": 0.8008, + "nll_loss": 0.19880104064941406, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005378572386689484, + "rewards/margins": 0.13216063380241394, + "rewards/rejected": -0.13269847631454468, + "step": 6465 + }, + { + "epoch": 4.4716459197787, + "grad_norm": 11.138849258422852, + "learning_rate": 3.071307822345167e-05, + "log_odds_chosen": 10.323291778564453, + "log_odds_ratio": -0.00015442782023455948, + "logits/chosen": -0.30465757846832275, + "logits/rejected": -0.25000882148742676, + "logps/chosen": -0.00015198114851955324, + "logps/rejected": -1.6996678113937378, + "loss": 0.9524, + "nll_loss": 0.2380809187889099, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5198114851955324e-05, + "rewards/margins": 0.16995158791542053, + "rewards/rejected": -0.16996678709983826, + "step": 6466 + }, + { + "epoch": 4.472337482710927, + "grad_norm": 10.755675315856934, + "learning_rate": 3.070923620716152e-05, + "log_odds_chosen": 10.522125244140625, + "log_odds_ratio": -3.707344876602292e-05, + "logits/chosen": -0.3853769898414612, + "logits/rejected": -0.44511574506759644, + "logps/chosen": -0.00046376383397728205, + "logps/rejected": -2.301818609237671, + "loss": 1.1055, + "nll_loss": 0.27637752890586853, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.637638630811125e-05, + "rewards/margins": 0.2301354855298996, + "rewards/rejected": -0.23018187284469604, + "step": 6467 + }, + { + "epoch": 4.473029045643154, + "grad_norm": 8.67910385131836, + "learning_rate": 3.070539419087137e-05, + "log_odds_chosen": 11.46983528137207, + "log_odds_ratio": -2.3607193725183606e-05, + "logits/chosen": -0.30719006061553955, + "logits/rejected": -0.4418572783470154, + "logps/chosen": -0.00018790410831570625, + "logps/rejected": -2.8087122440338135, + "loss": 0.8674, + "nll_loss": 0.21685031056404114, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8790411559166387e-05, + "rewards/margins": 0.28085243701934814, + "rewards/rejected": -0.2808712124824524, + "step": 6468 + }, + { + "epoch": 4.4737206085753805, + "grad_norm": 51.31665802001953, + "learning_rate": 3.070155217458122e-05, + "log_odds_chosen": 8.52737808227539, + "log_odds_ratio": -0.4789644181728363, + "logits/chosen": -0.2602759897708893, + "logits/rejected": -0.309836745262146, + "logps/chosen": -0.20493757724761963, + "logps/rejected": -2.1075263023376465, + "loss": 1.8546, + "nll_loss": 0.41575437784194946, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.020493758842349052, + "rewards/margins": 0.19025887548923492, + "rewards/rejected": -0.21075263619422913, + "step": 6469 + }, + { + "epoch": 4.474412171507607, + "grad_norm": 18.089824676513672, + "learning_rate": 3.0697710158291074e-05, + "log_odds_chosen": 9.630249977111816, + "log_odds_ratio": -0.0002327169495401904, + "logits/chosen": -0.34218454360961914, + "logits/rejected": -0.378109335899353, + "logps/chosen": -0.000677041825838387, + "logps/rejected": -2.0278592109680176, + "loss": 1.1102, + "nll_loss": 0.27751752734184265, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.770417530788109e-05, + "rewards/margins": 0.20271822810173035, + "rewards/rejected": -0.2027859091758728, + "step": 6470 + }, + { + "epoch": 4.475103734439834, + "grad_norm": 11.263705253601074, + "learning_rate": 3.069386814200093e-05, + "log_odds_chosen": 10.034893035888672, + "log_odds_ratio": -0.00014404130342882127, + "logits/chosen": -0.6196680665016174, + "logits/rejected": -0.6530418395996094, + "logps/chosen": -0.0002830620505847037, + "logps/rejected": -1.627251386642456, + "loss": 1.2924, + "nll_loss": 0.323082834482193, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8306207241257653e-05, + "rewards/margins": 0.16269683837890625, + "rewards/rejected": -0.16272515058517456, + "step": 6471 + }, + { + "epoch": 4.475795297372061, + "grad_norm": 10.325894355773926, + "learning_rate": 3.069002612571077e-05, + "log_odds_chosen": 9.838895797729492, + "log_odds_ratio": -0.0002516931272111833, + "logits/chosen": -0.5684917569160461, + "logits/rejected": -0.574885904788971, + "logps/chosen": -0.001969832694157958, + "logps/rejected": -2.108513116836548, + "loss": 0.7378, + "nll_loss": 0.18441295623779297, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019698326650541276, + "rewards/margins": 0.2106543481349945, + "rewards/rejected": -0.21085131168365479, + "step": 6472 + }, + { + "epoch": 4.476486860304288, + "grad_norm": 8.056211471557617, + "learning_rate": 3.0686184109420625e-05, + "log_odds_chosen": 10.88155460357666, + "log_odds_ratio": -3.325043508084491e-05, + "logits/chosen": -0.27730709314346313, + "logits/rejected": -0.4008949398994446, + "logps/chosen": -0.00010624650894897059, + "logps/rejected": -1.9483246803283691, + "loss": 1.4361, + "nll_loss": 0.359016478061676, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0624650712998118e-05, + "rewards/margins": 0.19482184946537018, + "rewards/rejected": -0.1948324739933014, + "step": 6473 + }, + { + "epoch": 4.477178423236515, + "grad_norm": 17.21849822998047, + "learning_rate": 3.068234209313048e-05, + "log_odds_chosen": 10.295563697814941, + "log_odds_ratio": -0.001130000571720302, + "logits/chosen": -0.6260051727294922, + "logits/rejected": -0.6374498605728149, + "logps/chosen": -0.0025418612640351057, + "logps/rejected": -1.9003947973251343, + "loss": 0.8236, + "nll_loss": 0.20578572154045105, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00025418613222427666, + "rewards/margins": 0.18978530168533325, + "rewards/rejected": -0.1900394707918167, + "step": 6474 + }, + { + "epoch": 4.477869986168741, + "grad_norm": 8.649698257446289, + "learning_rate": 3.067850007684033e-05, + "log_odds_chosen": 9.734735488891602, + "log_odds_ratio": -0.0014319606125354767, + "logits/chosen": -0.45698633790016174, + "logits/rejected": -0.5206592082977295, + "logps/chosen": -0.0013818284496665, + "logps/rejected": -1.6835741996765137, + "loss": 1.5192, + "nll_loss": 0.379658579826355, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013818284787703305, + "rewards/margins": 0.16821923851966858, + "rewards/rejected": -0.16835743188858032, + "step": 6475 + }, + { + "epoch": 4.478561549100968, + "grad_norm": 22.343177795410156, + "learning_rate": 3.0674658060550175e-05, + "log_odds_chosen": 9.140680313110352, + "log_odds_ratio": -0.00031606812262907624, + "logits/chosen": -0.6005803942680359, + "logits/rejected": -0.5960612893104553, + "logps/chosen": -0.0007477106410078704, + "logps/rejected": -1.4693350791931152, + "loss": 0.8878, + "nll_loss": 0.2219061553478241, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.477107283193618e-05, + "rewards/margins": 0.14685875177383423, + "rewards/rejected": -0.14693352580070496, + "step": 6476 + }, + { + "epoch": 4.479253112033195, + "grad_norm": 6.908090114593506, + "learning_rate": 3.0670816044260035e-05, + "log_odds_chosen": 10.161052703857422, + "log_odds_ratio": -5.6024065997917205e-05, + "logits/chosen": -0.5555582642555237, + "logits/rejected": -0.4909493029117584, + "logps/chosen": -0.00017970267799682915, + "logps/rejected": -1.46084463596344, + "loss": 1.7021, + "nll_loss": 0.4255087375640869, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7970269254874438e-05, + "rewards/margins": 0.14606650173664093, + "rewards/rejected": -0.1460844725370407, + "step": 6477 + }, + { + "epoch": 4.479944674965422, + "grad_norm": 9.877211570739746, + "learning_rate": 3.066697402796988e-05, + "log_odds_chosen": 9.934467315673828, + "log_odds_ratio": -0.00014345439558383077, + "logits/chosen": -0.6109917759895325, + "logits/rejected": -0.7236210703849792, + "logps/chosen": -0.0007814106647856534, + "logps/rejected": -2.0042483806610107, + "loss": 1.0619, + "nll_loss": 0.2654609978199005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.814106356818229e-05, + "rewards/margins": 0.2003467082977295, + "rewards/rejected": -0.20042484998703003, + "step": 6478 + }, + { + "epoch": 4.480636237897649, + "grad_norm": 10.250088691711426, + "learning_rate": 3.066313201167973e-05, + "log_odds_chosen": 10.149007797241211, + "log_odds_ratio": -0.0005294461152516305, + "logits/chosen": -0.5758021473884583, + "logits/rejected": -0.6397185325622559, + "logps/chosen": -0.0003705144044943154, + "logps/rejected": -1.7317607402801514, + "loss": 1.1609, + "nll_loss": 0.29016971588134766, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7051438994240016e-05, + "rewards/margins": 0.1731390357017517, + "rewards/rejected": -0.17317607998847961, + "step": 6479 + }, + { + "epoch": 4.481327800829876, + "grad_norm": 6.251984119415283, + "learning_rate": 3.0659289995389585e-05, + "log_odds_chosen": 9.889983177185059, + "log_odds_ratio": -0.00018252171867061406, + "logits/chosen": -0.23073536157608032, + "logits/rejected": -0.31376710534095764, + "logps/chosen": -0.00015304457338061184, + "logps/rejected": -1.3972463607788086, + "loss": 1.0859, + "nll_loss": 0.27145934104919434, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5304456610465422e-05, + "rewards/margins": 0.13970933854579926, + "rewards/rejected": -0.13972464203834534, + "step": 6480 + }, + { + "epoch": 4.482019363762102, + "grad_norm": 6.738919734954834, + "learning_rate": 3.065544797909943e-05, + "log_odds_chosen": 9.208086967468262, + "log_odds_ratio": -0.0006657785852439702, + "logits/chosen": -0.5761962532997131, + "logits/rejected": -0.4735212028026581, + "logps/chosen": -0.004164504818618298, + "logps/rejected": -1.194311499595642, + "loss": 1.0736, + "nll_loss": 0.2683413624763489, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004164504643995315, + "rewards/margins": 0.1190146952867508, + "rewards/rejected": -0.11943115293979645, + "step": 6481 + }, + { + "epoch": 4.482710926694329, + "grad_norm": 11.047977447509766, + "learning_rate": 3.065160596280928e-05, + "log_odds_chosen": 8.649320602416992, + "log_odds_ratio": -0.0034021895844489336, + "logits/chosen": 0.0005720322951674461, + "logits/rejected": -0.038629673421382904, + "logps/chosen": -0.0032845381647348404, + "logps/rejected": -1.5344747304916382, + "loss": 0.9752, + "nll_loss": 0.2434644252061844, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00032845381065271795, + "rewards/margins": 0.1531190127134323, + "rewards/rejected": -0.1534474790096283, + "step": 6482 + }, + { + "epoch": 4.483402489626556, + "grad_norm": 14.265397071838379, + "learning_rate": 3.0647763946519136e-05, + "log_odds_chosen": 9.652074813842773, + "log_odds_ratio": -0.00034976223832927644, + "logits/chosen": -0.26749786734580994, + "logits/rejected": -0.29552537202835083, + "logps/chosen": -0.0014181090518832207, + "logps/rejected": -2.2106587886810303, + "loss": 1.0416, + "nll_loss": 0.26037660241127014, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014181091682985425, + "rewards/margins": 0.22092406451702118, + "rewards/rejected": -0.22106587886810303, + "step": 6483 + }, + { + "epoch": 4.484094052558783, + "grad_norm": 12.375883102416992, + "learning_rate": 3.064392193022899e-05, + "log_odds_chosen": 8.698393821716309, + "log_odds_ratio": -0.030465498566627502, + "logits/chosen": 0.23788906633853912, + "logits/rejected": 0.11636831611394882, + "logps/chosen": -0.010362344793975353, + "logps/rejected": -2.2280845642089844, + "loss": 1.5567, + "nll_loss": 0.3861318528652191, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010362345492467284, + "rewards/margins": 0.2217722237110138, + "rewards/rejected": -0.22280846536159515, + "step": 6484 + }, + { + "epoch": 4.48478561549101, + "grad_norm": 13.210124015808105, + "learning_rate": 3.0640079913938834e-05, + "log_odds_chosen": 10.175982475280762, + "log_odds_ratio": -0.00024061251315288246, + "logits/chosen": -0.5196781754493713, + "logits/rejected": -0.5784398913383484, + "logps/chosen": -0.0012963440967723727, + "logps/rejected": -2.0012803077697754, + "loss": 0.727, + "nll_loss": 0.18171842396259308, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012963441258762032, + "rewards/margins": 0.1999983787536621, + "rewards/rejected": -0.2001280039548874, + "step": 6485 + }, + { + "epoch": 4.485477178423237, + "grad_norm": 5.110747814178467, + "learning_rate": 3.063623789764869e-05, + "log_odds_chosen": 10.602209091186523, + "log_odds_ratio": -5.619807416223921e-05, + "logits/chosen": -0.5373456478118896, + "logits/rejected": -0.6098401546478271, + "logps/chosen": -0.00016629225865472108, + "logps/rejected": -1.6668782234191895, + "loss": 1.095, + "nll_loss": 0.27373725175857544, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.662922659306787e-05, + "rewards/margins": 0.16667118668556213, + "rewards/rejected": -0.16668781638145447, + "step": 6486 + }, + { + "epoch": 4.486168741355463, + "grad_norm": 12.7305326461792, + "learning_rate": 3.063239588135854e-05, + "log_odds_chosen": 10.567861557006836, + "log_odds_ratio": -0.00015114758571144193, + "logits/chosen": -0.9034011363983154, + "logits/rejected": -0.9532999992370605, + "logps/chosen": -0.0006381792481988668, + "logps/rejected": -2.1844778060913086, + "loss": 0.9608, + "nll_loss": 0.24017715454101562, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.381792627507821e-05, + "rewards/margins": 0.21838395297527313, + "rewards/rejected": -0.21844777464866638, + "step": 6487 + }, + { + "epoch": 4.48686030428769, + "grad_norm": 7.28488826751709, + "learning_rate": 3.062855386506839e-05, + "log_odds_chosen": 10.389266967773438, + "log_odds_ratio": -0.00010775520786410198, + "logits/chosen": -0.3085196316242218, + "logits/rejected": -0.30753573775291443, + "logps/chosen": -0.0011101680574938655, + "logps/rejected": -2.559150218963623, + "loss": 0.8785, + "nll_loss": 0.21961821615695953, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001110168086597696, + "rewards/margins": 0.25580400228500366, + "rewards/rejected": -0.2559150457382202, + "step": 6488 + }, + { + "epoch": 4.487551867219917, + "grad_norm": 10.296831130981445, + "learning_rate": 3.0624711848778244e-05, + "log_odds_chosen": 10.753174781799316, + "log_odds_ratio": -2.9885119147365913e-05, + "logits/chosen": -0.8495357036590576, + "logits/rejected": -0.8660904169082642, + "logps/chosen": -0.0001955320913111791, + "logps/rejected": -1.82537043094635, + "loss": 0.6896, + "nll_loss": 0.17240005731582642, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.955320840352215e-05, + "rewards/margins": 0.18251748383045197, + "rewards/rejected": -0.1825370341539383, + "step": 6489 + }, + { + "epoch": 4.488243430152144, + "grad_norm": 7.585280418395996, + "learning_rate": 3.062086983248809e-05, + "log_odds_chosen": 11.144027709960938, + "log_odds_ratio": -2.5699517209432088e-05, + "logits/chosen": -0.4447624683380127, + "logits/rejected": -0.526018500328064, + "logps/chosen": -0.00010345203190809116, + "logps/rejected": -1.9488626718521118, + "loss": 0.7421, + "nll_loss": 0.18552720546722412, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0345203008910175e-05, + "rewards/margins": 0.19487592577934265, + "rewards/rejected": -0.19488626718521118, + "step": 6490 + }, + { + "epoch": 4.488934993084371, + "grad_norm": 7.347438812255859, + "learning_rate": 3.061702781619794e-05, + "log_odds_chosen": 10.01000690460205, + "log_odds_ratio": -9.229998249793425e-05, + "logits/chosen": -0.25663602352142334, + "logits/rejected": -0.3326743245124817, + "logps/chosen": -0.0004510592552833259, + "logps/rejected": -1.9487425088882446, + "loss": 0.8923, + "nll_loss": 0.22306877374649048, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.510592407314107e-05, + "rewards/margins": 0.19482913613319397, + "rewards/rejected": -0.19487425684928894, + "step": 6491 + }, + { + "epoch": 4.4896265560165975, + "grad_norm": 10.743110656738281, + "learning_rate": 3.0613185799907794e-05, + "log_odds_chosen": 9.014936447143555, + "log_odds_ratio": -0.0004175748908892274, + "logits/chosen": -0.616611659526825, + "logits/rejected": -0.6440533995628357, + "logps/chosen": -0.0017504625720903277, + "logps/rejected": -1.6501240730285645, + "loss": 1.3402, + "nll_loss": 0.33499693870544434, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017504626885056496, + "rewards/margins": 0.16483736038208008, + "rewards/rejected": -0.1650124043226242, + "step": 6492 + }, + { + "epoch": 4.490318118948824, + "grad_norm": 5.233790397644043, + "learning_rate": 3.0609343783617647e-05, + "log_odds_chosen": 10.454694747924805, + "log_odds_ratio": -9.337958181276917e-05, + "logits/chosen": -0.5710014700889587, + "logits/rejected": -0.5607348084449768, + "logps/chosen": -0.00013453798601403832, + "logps/rejected": -1.7762622833251953, + "loss": 0.6604, + "nll_loss": 0.16509346663951874, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3453798601403832e-05, + "rewards/margins": 0.1776127815246582, + "rewards/rejected": -0.17762622237205505, + "step": 6493 + }, + { + "epoch": 4.491009681881051, + "grad_norm": 10.930121421813965, + "learning_rate": 3.060550176732749e-05, + "log_odds_chosen": 10.477840423583984, + "log_odds_ratio": -5.168091593077406e-05, + "logits/chosen": -0.3589015007019043, + "logits/rejected": -0.4596545100212097, + "logps/chosen": -0.00032352475682273507, + "logps/rejected": -2.120523452758789, + "loss": 0.9069, + "nll_loss": 0.22672848403453827, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.235247277189046e-05, + "rewards/margins": 0.21202000975608826, + "rewards/rejected": -0.2120523750782013, + "step": 6494 + }, + { + "epoch": 4.491701244813278, + "grad_norm": 13.32094669342041, + "learning_rate": 3.060165975103735e-05, + "log_odds_chosen": 9.130834579467773, + "log_odds_ratio": -0.0003811029309872538, + "logits/chosen": -0.5448084473609924, + "logits/rejected": -0.6247289776802063, + "logps/chosen": -0.0015338326338678598, + "logps/rejected": -1.9310564994812012, + "loss": 1.1071, + "nll_loss": 0.27673590183258057, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015338326920755208, + "rewards/margins": 0.19295227527618408, + "rewards/rejected": -0.19310565292835236, + "step": 6495 + }, + { + "epoch": 4.492392807745505, + "grad_norm": 6.493412971496582, + "learning_rate": 3.05978177347472e-05, + "log_odds_chosen": 10.520059585571289, + "log_odds_ratio": -3.027506136277225e-05, + "logits/chosen": -0.3154093623161316, + "logits/rejected": -0.3896132707595825, + "logps/chosen": -0.00016197854711208493, + "logps/rejected": -1.7350319623947144, + "loss": 1.006, + "nll_loss": 0.2514877915382385, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.619785325601697e-05, + "rewards/margins": 0.17348699271678925, + "rewards/rejected": -0.17350319027900696, + "step": 6496 + }, + { + "epoch": 4.493084370677732, + "grad_norm": 9.13463020324707, + "learning_rate": 3.059397571845705e-05, + "log_odds_chosen": 10.163230895996094, + "log_odds_ratio": -0.0003173485165461898, + "logits/chosen": -0.39032599329948425, + "logits/rejected": -0.41342228651046753, + "logps/chosen": -0.000749665021430701, + "logps/rejected": -2.1161184310913086, + "loss": 1.3606, + "nll_loss": 0.34011974930763245, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.496650505345315e-05, + "rewards/margins": 0.21153688430786133, + "rewards/rejected": -0.21161183714866638, + "step": 6497 + }, + { + "epoch": 4.4937759336099585, + "grad_norm": 8.330862998962402, + "learning_rate": 3.05901337021669e-05, + "log_odds_chosen": 10.870183944702148, + "log_odds_ratio": -2.5071134587051347e-05, + "logits/chosen": -0.5879911184310913, + "logits/rejected": -0.6214119791984558, + "logps/chosen": -0.00018948808428831398, + "logps/rejected": -2.0447065830230713, + "loss": 1.5259, + "nll_loss": 0.38148459792137146, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8948812794405967e-05, + "rewards/margins": 0.20445170998573303, + "rewards/rejected": -0.2044706642627716, + "step": 6498 + }, + { + "epoch": 4.494467496542185, + "grad_norm": 11.30947208404541, + "learning_rate": 3.058629168587675e-05, + "log_odds_chosen": 10.4102783203125, + "log_odds_ratio": -0.0002449329767841846, + "logits/chosen": -0.5021913647651672, + "logits/rejected": -0.5918756127357483, + "logps/chosen": -0.01916118897497654, + "logps/rejected": -2.790419816970825, + "loss": 1.3731, + "nll_loss": 0.34324946999549866, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001916118897497654, + "rewards/margins": 0.27712589502334595, + "rewards/rejected": -0.27904200553894043, + "step": 6499 + }, + { + "epoch": 4.495159059474412, + "grad_norm": 7.023859977722168, + "learning_rate": 3.05824496695866e-05, + "log_odds_chosen": 9.978353500366211, + "log_odds_ratio": -7.063882367219776e-05, + "logits/chosen": -0.611998975276947, + "logits/rejected": -0.649333655834198, + "logps/chosen": -0.0001519985671620816, + "logps/rejected": -1.3650856018066406, + "loss": 0.8083, + "nll_loss": 0.20207872986793518, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5199855624814518e-05, + "rewards/margins": 0.13649335503578186, + "rewards/rejected": -0.13650855422019958, + "step": 6500 + }, + { + "epoch": 4.495850622406639, + "grad_norm": 8.580329895019531, + "learning_rate": 3.057860765329645e-05, + "log_odds_chosen": 10.203696250915527, + "log_odds_ratio": -8.89887596713379e-05, + "logits/chosen": -0.6458160281181335, + "logits/rejected": -0.6352592706680298, + "logps/chosen": -0.0002670662652235478, + "logps/rejected": -1.7849435806274414, + "loss": 0.5684, + "nll_loss": 0.14209191501140594, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6706629796535708e-05, + "rewards/margins": 0.17846766114234924, + "rewards/rejected": -0.17849434912204742, + "step": 6501 + }, + { + "epoch": 4.496542185338866, + "grad_norm": 11.85151481628418, + "learning_rate": 3.0574765637006305e-05, + "log_odds_chosen": 9.779272079467773, + "log_odds_ratio": -0.00041906890692189336, + "logits/chosen": -0.676744282245636, + "logits/rejected": -0.7503317594528198, + "logps/chosen": -0.004989419132471085, + "logps/rejected": -2.3462343215942383, + "loss": 1.365, + "nll_loss": 0.34119948744773865, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004989419248886406, + "rewards/margins": 0.23412448167800903, + "rewards/rejected": -0.23462343215942383, + "step": 6502 + }, + { + "epoch": 4.497233748271093, + "grad_norm": 10.800078392028809, + "learning_rate": 3.057092362071615e-05, + "log_odds_chosen": 9.714326858520508, + "log_odds_ratio": -0.00018371779879089445, + "logits/chosen": -0.20595361292362213, + "logits/rejected": -0.2719736099243164, + "logps/chosen": -0.00038460243376903236, + "logps/rejected": -1.7641628980636597, + "loss": 1.1704, + "nll_loss": 0.2925931513309479, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.846023537334986e-05, + "rewards/margins": 0.17637783288955688, + "rewards/rejected": -0.176416277885437, + "step": 6503 + }, + { + "epoch": 4.4979253112033195, + "grad_norm": 8.698437690734863, + "learning_rate": 3.056708160442601e-05, + "log_odds_chosen": 9.91480541229248, + "log_odds_ratio": -0.000267309311311692, + "logits/chosen": -0.3980754315853119, + "logits/rejected": -0.45291000604629517, + "logps/chosen": -0.0006971318507567048, + "logps/rejected": -1.9680414199829102, + "loss": 1.8652, + "nll_loss": 0.46628376841545105, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.971318362047896e-05, + "rewards/margins": 0.19673442840576172, + "rewards/rejected": -0.19680413603782654, + "step": 6504 + }, + { + "epoch": 4.498616874135546, + "grad_norm": 6.617920398712158, + "learning_rate": 3.0563239588135856e-05, + "log_odds_chosen": 9.921056747436523, + "log_odds_ratio": -0.0003054399276152253, + "logits/chosen": -0.4499887228012085, + "logits/rejected": -0.48313918709754944, + "logps/chosen": -0.0006958750309422612, + "logps/rejected": -1.9423108100891113, + "loss": 1.5065, + "nll_loss": 0.3765985369682312, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.958749872865155e-05, + "rewards/margins": 0.19416150450706482, + "rewards/rejected": -0.1942310780286789, + "step": 6505 + }, + { + "epoch": 4.499308437067773, + "grad_norm": 8.874361991882324, + "learning_rate": 3.055939757184571e-05, + "log_odds_chosen": 9.87329387664795, + "log_odds_ratio": -0.0001342103787465021, + "logits/chosen": -0.4978351294994354, + "logits/rejected": -0.5142601132392883, + "logps/chosen": -0.0016254674410447478, + "logps/rejected": -2.251915693283081, + "loss": 0.7337, + "nll_loss": 0.1834157556295395, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016254674119409174, + "rewards/margins": 0.22502902150154114, + "rewards/rejected": -0.22519156336784363, + "step": 6506 + }, + { + "epoch": 4.5, + "grad_norm": 13.488443374633789, + "learning_rate": 3.055555555555556e-05, + "log_odds_chosen": 10.155275344848633, + "log_odds_ratio": -5.018026786274277e-05, + "logits/chosen": -0.44602784514427185, + "logits/rejected": -0.6105251908302307, + "logps/chosen": -0.00018949707737192512, + "logps/rejected": -1.6713563203811646, + "loss": 1.015, + "nll_loss": 0.25374555587768555, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.894970773719251e-05, + "rewards/margins": 0.1671166867017746, + "rewards/rejected": -0.16713562607765198, + "step": 6507 + }, + { + "epoch": 4.500691562932227, + "grad_norm": 10.886319160461426, + "learning_rate": 3.0551713539265406e-05, + "log_odds_chosen": 10.691394805908203, + "log_odds_ratio": -3.336165173095651e-05, + "logits/chosen": -0.57883620262146, + "logits/rejected": -0.6944460272789001, + "logps/chosen": -0.00029359126347117126, + "logps/rejected": -1.981209397315979, + "loss": 1.2622, + "nll_loss": 0.3155488967895508, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9359125619521365e-05, + "rewards/margins": 0.19809159636497498, + "rewards/rejected": -0.19812093675136566, + "step": 6508 + }, + { + "epoch": 4.501383125864454, + "grad_norm": 7.991898059844971, + "learning_rate": 3.054787152297526e-05, + "log_odds_chosen": 10.266471862792969, + "log_odds_ratio": -0.002679745201021433, + "logits/chosen": -0.4057050943374634, + "logits/rejected": -0.4801580309867859, + "logps/chosen": -0.001888161525130272, + "logps/rejected": -2.244232654571533, + "loss": 0.9319, + "nll_loss": 0.23269890248775482, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018881616415455937, + "rewards/margins": 0.2242344468832016, + "rewards/rejected": -0.22442325949668884, + "step": 6509 + }, + { + "epoch": 4.5020746887966805, + "grad_norm": 11.097415924072266, + "learning_rate": 3.054402950668511e-05, + "log_odds_chosen": 8.94814682006836, + "log_odds_ratio": -0.00044577824883162975, + "logits/chosen": -0.31504395604133606, + "logits/rejected": -0.42398470640182495, + "logps/chosen": -0.0010490479180589318, + "logps/rejected": -1.37216317653656, + "loss": 1.145, + "nll_loss": 0.28619518876075745, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010490478598512709, + "rewards/margins": 0.13711142539978027, + "rewards/rejected": -0.13721632957458496, + "step": 6510 + }, + { + "epoch": 4.502766251728907, + "grad_norm": 10.749506950378418, + "learning_rate": 3.0540187490394963e-05, + "log_odds_chosen": 10.62763786315918, + "log_odds_ratio": -0.0008419828373007476, + "logits/chosen": -0.1942656934261322, + "logits/rejected": -0.28289175033569336, + "logps/chosen": -0.0006234937463887036, + "logps/rejected": -2.479548454284668, + "loss": 1.0909, + "nll_loss": 0.27264609932899475, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.234937609406188e-05, + "rewards/margins": 0.24789251387119293, + "rewards/rejected": -0.24795487523078918, + "step": 6511 + }, + { + "epoch": 4.503457814661134, + "grad_norm": 5.3080244064331055, + "learning_rate": 3.053634547410481e-05, + "log_odds_chosen": 9.412324905395508, + "log_odds_ratio": -0.00030437344685196877, + "logits/chosen": -0.7221852540969849, + "logits/rejected": -0.7768763303756714, + "logps/chosen": -0.0006163233192637563, + "logps/rejected": -1.5118619203567505, + "loss": 0.9364, + "nll_loss": 0.23406724631786346, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.16323304711841e-05, + "rewards/margins": 0.15112455189228058, + "rewards/rejected": -0.15118618309497833, + "step": 6512 + }, + { + "epoch": 4.504149377593361, + "grad_norm": 6.281959533691406, + "learning_rate": 3.053250345781467e-05, + "log_odds_chosen": 9.127216339111328, + "log_odds_ratio": -0.0005243317573331296, + "logits/chosen": -0.3163529634475708, + "logits/rejected": -0.3212625980377197, + "logps/chosen": -0.0006598001928068697, + "logps/rejected": -1.6097946166992188, + "loss": 0.9903, + "nll_loss": 0.2475152164697647, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.598001345992088e-05, + "rewards/margins": 0.16091348230838776, + "rewards/rejected": -0.16097944974899292, + "step": 6513 + }, + { + "epoch": 4.504840940525588, + "grad_norm": 7.1390700340271, + "learning_rate": 3.0528661441524514e-05, + "log_odds_chosen": 10.395563125610352, + "log_odds_ratio": -6.140669574961066e-05, + "logits/chosen": -0.5050083994865417, + "logits/rejected": -0.574657142162323, + "logps/chosen": -0.013657084666192532, + "logps/rejected": -2.174135684967041, + "loss": 0.7363, + "nll_loss": 0.1840781271457672, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013657084200531244, + "rewards/margins": 0.21604785323143005, + "rewards/rejected": -0.21741357445716858, + "step": 6514 + }, + { + "epoch": 4.505532503457815, + "grad_norm": 8.09498119354248, + "learning_rate": 3.0524819425234366e-05, + "log_odds_chosen": 10.00520133972168, + "log_odds_ratio": -0.00016035634325817227, + "logits/chosen": -0.7113980650901794, + "logits/rejected": -0.7848362326622009, + "logps/chosen": -0.0005661610630340874, + "logps/rejected": -1.7406871318817139, + "loss": 1.0059, + "nll_loss": 0.25144797563552856, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.66161070310045e-05, + "rewards/margins": 0.17401209473609924, + "rewards/rejected": -0.17406870424747467, + "step": 6515 + }, + { + "epoch": 4.5062240663900415, + "grad_norm": 3.606623649597168, + "learning_rate": 3.052097740894422e-05, + "log_odds_chosen": 8.800888061523438, + "log_odds_ratio": -0.0008264086209237576, + "logits/chosen": -0.6696504354476929, + "logits/rejected": -0.6730940341949463, + "logps/chosen": -0.005246603395789862, + "logps/rejected": -2.150418758392334, + "loss": 1.0627, + "nll_loss": 0.26559382677078247, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005246603395789862, + "rewards/margins": 0.2145172357559204, + "rewards/rejected": -0.2150418758392334, + "step": 6516 + }, + { + "epoch": 4.506915629322268, + "grad_norm": 8.892373085021973, + "learning_rate": 3.0517135392654068e-05, + "log_odds_chosen": 9.085613250732422, + "log_odds_ratio": -0.017949793487787247, + "logits/chosen": -0.39097902178764343, + "logits/rejected": -0.41564783453941345, + "logps/chosen": -0.006918535102158785, + "logps/rejected": -2.56168270111084, + "loss": 1.3884, + "nll_loss": 0.3452964127063751, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006918534636497498, + "rewards/margins": 0.2554764151573181, + "rewards/rejected": -0.25616827607154846, + "step": 6517 + }, + { + "epoch": 4.507607192254495, + "grad_norm": 8.086743354797363, + "learning_rate": 3.0513293376363917e-05, + "log_odds_chosen": 8.997087478637695, + "log_odds_ratio": -0.0004165216232649982, + "logits/chosen": -0.587999701499939, + "logits/rejected": -0.5053616762161255, + "logps/chosen": -0.0005531564820557833, + "logps/rejected": -1.6527694463729858, + "loss": 1.4879, + "nll_loss": 0.3719237446784973, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.531564966076985e-05, + "rewards/margins": 0.16522163152694702, + "rewards/rejected": -0.16527694463729858, + "step": 6518 + }, + { + "epoch": 4.508298755186722, + "grad_norm": 10.235040664672852, + "learning_rate": 3.050945136007377e-05, + "log_odds_chosen": 9.41419506072998, + "log_odds_ratio": -0.049024879932403564, + "logits/chosen": -0.44523167610168457, + "logits/rejected": -0.46845299005508423, + "logps/chosen": -0.010855313390493393, + "logps/rejected": -2.408637523651123, + "loss": 1.0393, + "nll_loss": 0.2549152374267578, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010855314321815968, + "rewards/margins": 0.23977823555469513, + "rewards/rejected": -0.24086375534534454, + "step": 6519 + }, + { + "epoch": 4.508990318118949, + "grad_norm": 13.492453575134277, + "learning_rate": 3.050560934378362e-05, + "log_odds_chosen": 9.1912260055542, + "log_odds_ratio": -0.05707675591111183, + "logits/chosen": -0.22407862544059753, + "logits/rejected": -0.24346131086349487, + "logps/chosen": -0.011582519859075546, + "logps/rejected": -1.6870681047439575, + "loss": 1.1594, + "nll_loss": 0.28415244817733765, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001158252009190619, + "rewards/margins": 0.1675485521554947, + "rewards/rejected": -0.16870680451393127, + "step": 6520 + }, + { + "epoch": 4.509681881051176, + "grad_norm": 9.874868392944336, + "learning_rate": 3.0501767327493468e-05, + "log_odds_chosen": 8.210152626037598, + "log_odds_ratio": -0.1597500443458557, + "logits/chosen": -0.33186623454093933, + "logits/rejected": -0.3340374827384949, + "logps/chosen": -0.019689546898007393, + "logps/rejected": -1.824156403541565, + "loss": 1.8054, + "nll_loss": 0.4353860020637512, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0019689546898007393, + "rewards/margins": 0.18044669926166534, + "rewards/rejected": -0.1824156492948532, + "step": 6521 + }, + { + "epoch": 4.5103734439834025, + "grad_norm": 9.211250305175781, + "learning_rate": 3.0497925311203323e-05, + "log_odds_chosen": 9.563366889953613, + "log_odds_ratio": -0.0007702955044806004, + "logits/chosen": -0.7755285501480103, + "logits/rejected": -0.7752860188484192, + "logps/chosen": -0.00045156345004215837, + "logps/rejected": -1.422647476196289, + "loss": 1.1282, + "nll_loss": 0.2819834351539612, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.515634645940736e-05, + "rewards/margins": 0.14221958816051483, + "rewards/rejected": -0.14226475358009338, + "step": 6522 + }, + { + "epoch": 4.511065006915629, + "grad_norm": 5.907479763031006, + "learning_rate": 3.0494083294913172e-05, + "log_odds_chosen": 10.348957061767578, + "log_odds_ratio": -0.0003926011559087783, + "logits/chosen": -0.40856266021728516, + "logits/rejected": -0.4991058111190796, + "logps/chosen": -0.0006147118401713669, + "logps/rejected": -2.118276596069336, + "loss": 0.9335, + "nll_loss": 0.2333364188671112, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.147118256194517e-05, + "rewards/margins": 0.21176619827747345, + "rewards/rejected": -0.21182768046855927, + "step": 6523 + }, + { + "epoch": 4.511756569847856, + "grad_norm": 5.421164512634277, + "learning_rate": 3.049024127862302e-05, + "log_odds_chosen": 9.136701583862305, + "log_odds_ratio": -0.00031635037157684565, + "logits/chosen": -0.5312263369560242, + "logits/rejected": -0.5826066136360168, + "logps/chosen": -0.0030627246014773846, + "logps/rejected": -1.6864638328552246, + "loss": 1.5047, + "nll_loss": 0.3761472702026367, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00030627247178927064, + "rewards/margins": 0.16834010183811188, + "rewards/rejected": -0.16864638030529022, + "step": 6524 + }, + { + "epoch": 4.512448132780083, + "grad_norm": 6.3993706703186035, + "learning_rate": 3.0486399262332877e-05, + "log_odds_chosen": 9.904411315917969, + "log_odds_ratio": -0.00023630354553461075, + "logits/chosen": -0.5708715915679932, + "logits/rejected": -0.6928766965866089, + "logps/chosen": -0.0031272037886083126, + "logps/rejected": -2.286712408065796, + "loss": 1.4193, + "nll_loss": 0.354805588722229, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00031272039632312953, + "rewards/margins": 0.22835853695869446, + "rewards/rejected": -0.22867125272750854, + "step": 6525 + }, + { + "epoch": 4.51313969571231, + "grad_norm": 6.600321292877197, + "learning_rate": 3.0482557246042726e-05, + "log_odds_chosen": 9.932918548583984, + "log_odds_ratio": -0.00024182464403565973, + "logits/chosen": -0.40240278840065, + "logits/rejected": -0.5140780210494995, + "logps/chosen": -0.0005220412276685238, + "logps/rejected": -2.092681884765625, + "loss": 0.9942, + "nll_loss": 0.2485257387161255, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.220412276685238e-05, + "rewards/margins": 0.20921599864959717, + "rewards/rejected": -0.20926819741725922, + "step": 6526 + }, + { + "epoch": 4.513831258644537, + "grad_norm": 11.148608207702637, + "learning_rate": 3.0478715229752575e-05, + "log_odds_chosen": 10.128450393676758, + "log_odds_ratio": -5.4566891776630655e-05, + "logits/chosen": -0.49932363629341125, + "logits/rejected": -0.5284522771835327, + "logps/chosen": -0.00018085417104884982, + "logps/rejected": -1.2286375761032104, + "loss": 1.2895, + "nll_loss": 0.3223586082458496, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.80854167410871e-05, + "rewards/margins": 0.12284567952156067, + "rewards/rejected": -0.1228637546300888, + "step": 6527 + }, + { + "epoch": 4.514522821576763, + "grad_norm": 8.504036903381348, + "learning_rate": 3.0474873213462428e-05, + "log_odds_chosen": 7.822513103485107, + "log_odds_ratio": -0.05407997593283653, + "logits/chosen": -0.38704991340637207, + "logits/rejected": -0.42549827694892883, + "logps/chosen": -0.012933324091136456, + "logps/rejected": -1.0580989122390747, + "loss": 0.8781, + "nll_loss": 0.2141069769859314, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00129333243239671, + "rewards/margins": 0.10451656579971313, + "rewards/rejected": -0.10580989718437195, + "step": 6528 + }, + { + "epoch": 4.51521438450899, + "grad_norm": 5.502275466918945, + "learning_rate": 3.0471031197172277e-05, + "log_odds_chosen": 8.165353775024414, + "log_odds_ratio": -0.015324725769460201, + "logits/chosen": -0.40444430708885193, + "logits/rejected": -0.43285876512527466, + "logps/chosen": -0.004935602191835642, + "logps/rejected": -1.6389837265014648, + "loss": 0.7102, + "nll_loss": 0.17601147294044495, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004935602191835642, + "rewards/margins": 0.16340482234954834, + "rewards/rejected": -0.16389837861061096, + "step": 6529 + }, + { + "epoch": 4.515905947441217, + "grad_norm": 10.776494026184082, + "learning_rate": 3.0467189180882126e-05, + "log_odds_chosen": 9.77366828918457, + "log_odds_ratio": -0.0003069191880058497, + "logits/chosen": -0.17800062894821167, + "logits/rejected": -0.2688555121421814, + "logps/chosen": -0.0002740153868217021, + "logps/rejected": -1.6104249954223633, + "loss": 1.5437, + "nll_loss": 0.3858945965766907, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.740153831837233e-05, + "rewards/margins": 0.1610150933265686, + "rewards/rejected": -0.16104251146316528, + "step": 6530 + }, + { + "epoch": 4.516597510373444, + "grad_norm": 9.113598823547363, + "learning_rate": 3.0463347164591975e-05, + "log_odds_chosen": 9.05614948272705, + "log_odds_ratio": -0.0004940159851685166, + "logits/chosen": -0.371293842792511, + "logits/rejected": -0.45829761028289795, + "logps/chosen": -0.003316486719995737, + "logps/rejected": -1.857425332069397, + "loss": 0.8088, + "nll_loss": 0.20213846862316132, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000331648625433445, + "rewards/margins": 0.18541088700294495, + "rewards/rejected": -0.1857425421476364, + "step": 6531 + }, + { + "epoch": 4.517289073305671, + "grad_norm": 9.714481353759766, + "learning_rate": 3.045950514830183e-05, + "log_odds_chosen": 10.28732681274414, + "log_odds_ratio": -0.0032634905073791742, + "logits/chosen": -0.5074493885040283, + "logits/rejected": -0.6017922759056091, + "logps/chosen": -0.014635481871664524, + "logps/rejected": -2.667628526687622, + "loss": 1.7365, + "nll_loss": 0.433795690536499, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014635482802987099, + "rewards/margins": 0.26529932022094727, + "rewards/rejected": -0.2667628526687622, + "step": 6532 + }, + { + "epoch": 4.517980636237898, + "grad_norm": 8.554143905639648, + "learning_rate": 3.045566313201168e-05, + "log_odds_chosen": 10.311903953552246, + "log_odds_ratio": -9.492110257269815e-05, + "logits/chosen": -0.5861060619354248, + "logits/rejected": -0.6993334889411926, + "logps/chosen": -0.000430218962719664, + "logps/rejected": -2.0976133346557617, + "loss": 0.9784, + "nll_loss": 0.24457991123199463, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3021900637540966e-05, + "rewards/margins": 0.20971831679344177, + "rewards/rejected": -0.2097613364458084, + "step": 6533 + }, + { + "epoch": 4.518672199170124, + "grad_norm": 7.542330265045166, + "learning_rate": 3.045182111572153e-05, + "log_odds_chosen": 10.505435943603516, + "log_odds_ratio": -5.5059033911675215e-05, + "logits/chosen": -0.6798256635665894, + "logits/rejected": -0.8034828305244446, + "logps/chosen": -0.0003200229548383504, + "logps/rejected": -1.9811010360717773, + "loss": 0.6408, + "nll_loss": 0.1601899266242981, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.200229548383504e-05, + "rewards/margins": 0.19807809591293335, + "rewards/rejected": -0.19811010360717773, + "step": 6534 + }, + { + "epoch": 4.519363762102351, + "grad_norm": 14.879232406616211, + "learning_rate": 3.0447979099431385e-05, + "log_odds_chosen": 8.73969841003418, + "log_odds_ratio": -0.00043307646410539746, + "logits/chosen": -0.3079538941383362, + "logits/rejected": -0.3495687246322632, + "logps/chosen": -0.001202198676764965, + "logps/rejected": -1.1880905628204346, + "loss": 1.2734, + "nll_loss": 0.3183126151561737, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012021987640764564, + "rewards/margins": 0.11868884414434433, + "rewards/rejected": -0.1188090592622757, + "step": 6535 + }, + { + "epoch": 4.520055325034578, + "grad_norm": 11.615407943725586, + "learning_rate": 3.0444137083141234e-05, + "log_odds_chosen": 10.744592666625977, + "log_odds_ratio": -4.009983967989683e-05, + "logits/chosen": -0.5666415095329285, + "logits/rejected": -0.5954309701919556, + "logps/chosen": -0.00016624416457489133, + "logps/rejected": -1.9238814115524292, + "loss": 0.9044, + "nll_loss": 0.22610604763031006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6624417185084894e-05, + "rewards/margins": 0.19237151741981506, + "rewards/rejected": -0.1923881471157074, + "step": 6536 + }, + { + "epoch": 4.520746887966805, + "grad_norm": 8.567818641662598, + "learning_rate": 3.0440295066851083e-05, + "log_odds_chosen": 10.016960144042969, + "log_odds_ratio": -0.000617867975961417, + "logits/chosen": -0.4099767208099365, + "logits/rejected": -0.5116313099861145, + "logps/chosen": -0.0036224813666194677, + "logps/rejected": -2.081411838531494, + "loss": 1.0917, + "nll_loss": 0.27285081148147583, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00036224815994501114, + "rewards/margins": 0.2077789306640625, + "rewards/rejected": -0.20814116299152374, + "step": 6537 + }, + { + "epoch": 4.521438450899032, + "grad_norm": 11.559277534484863, + "learning_rate": 3.0436453050560935e-05, + "log_odds_chosen": 10.627859115600586, + "log_odds_ratio": -3.2798125175759196e-05, + "logits/chosen": -0.46019408106803894, + "logits/rejected": -0.4336419403553009, + "logps/chosen": -0.0002833693288266659, + "logps/rejected": -1.8214410543441772, + "loss": 1.3351, + "nll_loss": 0.3337723910808563, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.833693361026235e-05, + "rewards/margins": 0.18211576342582703, + "rewards/rejected": -0.18214410543441772, + "step": 6538 + }, + { + "epoch": 4.522130013831259, + "grad_norm": 11.562444686889648, + "learning_rate": 3.0432611034270784e-05, + "log_odds_chosen": 10.475576400756836, + "log_odds_ratio": -0.00014733010903000832, + "logits/chosen": -0.6835733652114868, + "logits/rejected": -0.7739354372024536, + "logps/chosen": -0.0013565432745963335, + "logps/rejected": -2.290761947631836, + "loss": 1.5765, + "nll_loss": 0.3941211700439453, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001356543361907825, + "rewards/margins": 0.22894051671028137, + "rewards/rejected": -0.22907620668411255, + "step": 6539 + }, + { + "epoch": 4.522821576763485, + "grad_norm": 7.830057621002197, + "learning_rate": 3.0428769017980633e-05, + "log_odds_chosen": 8.738075256347656, + "log_odds_ratio": -0.016228843480348587, + "logits/chosen": -0.6461495161056519, + "logits/rejected": -0.6525962352752686, + "logps/chosen": -0.024869779124855995, + "logps/rejected": -2.2403388023376465, + "loss": 1.2305, + "nll_loss": 0.30600258708000183, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024869779590517282, + "rewards/margins": 0.22154691815376282, + "rewards/rejected": -0.2240338772535324, + "step": 6540 + }, + { + "epoch": 4.523513139695712, + "grad_norm": 5.756031513214111, + "learning_rate": 3.042492700169049e-05, + "log_odds_chosen": 9.648249626159668, + "log_odds_ratio": -0.0002620015002321452, + "logits/chosen": -0.3646390438079834, + "logits/rejected": -0.49688512086868286, + "logps/chosen": -0.0002835288178175688, + "logps/rejected": -1.7063332796096802, + "loss": 1.483, + "nll_loss": 0.37071168422698975, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.835288250935264e-05, + "rewards/margins": 0.17060495913028717, + "rewards/rejected": -0.17063331604003906, + "step": 6541 + }, + { + "epoch": 4.524204702627939, + "grad_norm": 5.895205497741699, + "learning_rate": 3.042108498540034e-05, + "log_odds_chosen": 9.715417861938477, + "log_odds_ratio": -0.00039025815203785896, + "logits/chosen": -0.6709186434745789, + "logits/rejected": -0.7191147208213806, + "logps/chosen": -0.00036088767228648067, + "logps/rejected": -1.4903755187988281, + "loss": 0.7793, + "nll_loss": 0.19479727745056152, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.608876795624383e-05, + "rewards/margins": 0.14900147914886475, + "rewards/rejected": -0.14903756976127625, + "step": 6542 + }, + { + "epoch": 4.524896265560166, + "grad_norm": 14.820718765258789, + "learning_rate": 3.0417242969110187e-05, + "log_odds_chosen": 10.488899230957031, + "log_odds_ratio": -5.811005030409433e-05, + "logits/chosen": -0.25905176997184753, + "logits/rejected": -0.3118452727794647, + "logps/chosen": -0.00025490691768936813, + "logps/rejected": -2.0734028816223145, + "loss": 1.54, + "nll_loss": 0.3849845230579376, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5490691768936813e-05, + "rewards/margins": 0.20731480419635773, + "rewards/rejected": -0.2073403000831604, + "step": 6543 + }, + { + "epoch": 4.525587828492393, + "grad_norm": 10.09522533416748, + "learning_rate": 3.0413400952820043e-05, + "log_odds_chosen": 8.911540985107422, + "log_odds_ratio": -0.07205822318792343, + "logits/chosen": -0.7996432781219482, + "logits/rejected": -0.8874554634094238, + "logps/chosen": -0.20625396072864532, + "logps/rejected": -2.42454195022583, + "loss": 1.3134, + "nll_loss": 0.3211488425731659, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0206253994256258, + "rewards/margins": 0.22182880342006683, + "rewards/rejected": -0.2424541860818863, + "step": 6544 + }, + { + "epoch": 4.5262793914246195, + "grad_norm": 10.470040321350098, + "learning_rate": 3.0409558936529892e-05, + "log_odds_chosen": 9.751822471618652, + "log_odds_ratio": -0.00018719106446951628, + "logits/chosen": -0.43893668055534363, + "logits/rejected": -0.5430324077606201, + "logps/chosen": -0.0002558599226176739, + "logps/rejected": -1.4737515449523926, + "loss": 1.1636, + "nll_loss": 0.29087692499160767, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5585992261767387e-05, + "rewards/margins": 0.14734956622123718, + "rewards/rejected": -0.1473751664161682, + "step": 6545 + }, + { + "epoch": 4.526970954356846, + "grad_norm": 12.051141738891602, + "learning_rate": 3.040571692023974e-05, + "log_odds_chosen": 8.957733154296875, + "log_odds_ratio": -0.035229574888944626, + "logits/chosen": -0.2745705544948578, + "logits/rejected": -0.42236384749412537, + "logps/chosen": -0.00745656481012702, + "logps/rejected": -1.6540604829788208, + "loss": 0.9206, + "nll_loss": 0.22662924230098724, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007456564926542342, + "rewards/margins": 0.16466039419174194, + "rewards/rejected": -0.16540606319904327, + "step": 6546 + }, + { + "epoch": 4.527662517289073, + "grad_norm": 8.523053169250488, + "learning_rate": 3.0401874903949594e-05, + "log_odds_chosen": 9.304712295532227, + "log_odds_ratio": -0.07581852376461029, + "logits/chosen": -0.41242682933807373, + "logits/rejected": -0.497077614068985, + "logps/chosen": -0.013863787986338139, + "logps/rejected": -2.0134148597717285, + "loss": 0.9989, + "nll_loss": 0.24213922023773193, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013863787753507495, + "rewards/margins": 0.19995509088039398, + "rewards/rejected": -0.20134148001670837, + "step": 6547 + }, + { + "epoch": 4.5283540802213, + "grad_norm": 6.447927474975586, + "learning_rate": 3.0398032887659443e-05, + "log_odds_chosen": 8.518516540527344, + "log_odds_ratio": -0.004801702219992876, + "logits/chosen": -0.37904903292655945, + "logits/rejected": -0.4070984125137329, + "logps/chosen": -0.0038324713241308928, + "logps/rejected": -1.227960228919983, + "loss": 1.0774, + "nll_loss": 0.2688811123371124, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00038324715569615364, + "rewards/margins": 0.1224127858877182, + "rewards/rejected": -0.12279602885246277, + "step": 6548 + }, + { + "epoch": 4.529045643153527, + "grad_norm": 6.222280025482178, + "learning_rate": 3.0394190871369292e-05, + "log_odds_chosen": 11.532266616821289, + "log_odds_ratio": -2.348639100091532e-05, + "logits/chosen": -0.692290186882019, + "logits/rejected": -0.7823889255523682, + "logps/chosen": -0.00016611188766546547, + "logps/rejected": -2.6906960010528564, + "loss": 0.8388, + "nll_loss": 0.20968790352344513, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.661118949414231e-05, + "rewards/margins": 0.26905298233032227, + "rewards/rejected": -0.2690696120262146, + "step": 6549 + }, + { + "epoch": 4.529737206085754, + "grad_norm": 5.384705066680908, + "learning_rate": 3.0390348855079148e-05, + "log_odds_chosen": 9.603094100952148, + "log_odds_ratio": -0.00023756190785206854, + "logits/chosen": -0.7226128578186035, + "logits/rejected": -0.6962193846702576, + "logps/chosen": -0.0005082216230221093, + "logps/rejected": -1.7970054149627686, + "loss": 1.6197, + "nll_loss": 0.40490108728408813, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.082216011942364e-05, + "rewards/margins": 0.1796497106552124, + "rewards/rejected": -0.1797005534172058, + "step": 6550 + }, + { + "epoch": 4.5304287690179805, + "grad_norm": 12.002609252929688, + "learning_rate": 3.0386506838788997e-05, + "log_odds_chosen": 10.036380767822266, + "log_odds_ratio": -0.0004266396281309426, + "logits/chosen": -0.14297831058502197, + "logits/rejected": -0.21859431266784668, + "logps/chosen": -0.002509652404114604, + "logps/rejected": -2.0817997455596924, + "loss": 1.5089, + "nll_loss": 0.37718573212623596, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00025096521130762994, + "rewards/margins": 0.20792901515960693, + "rewards/rejected": -0.20817998051643372, + "step": 6551 + }, + { + "epoch": 4.531120331950207, + "grad_norm": 6.717325210571289, + "learning_rate": 3.0382664822498846e-05, + "log_odds_chosen": 11.133275985717773, + "log_odds_ratio": -2.4246892280643806e-05, + "logits/chosen": -0.5152009129524231, + "logits/rejected": -0.5383793711662292, + "logps/chosen": -0.0005616003763861954, + "logps/rejected": -2.65570330619812, + "loss": 0.9919, + "nll_loss": 0.24797604978084564, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.616003181785345e-05, + "rewards/margins": 0.26551416516304016, + "rewards/rejected": -0.2655703127384186, + "step": 6552 + }, + { + "epoch": 4.531811894882434, + "grad_norm": 8.466388702392578, + "learning_rate": 3.03788228062087e-05, + "log_odds_chosen": 9.45691204071045, + "log_odds_ratio": -0.0012083580950275064, + "logits/chosen": -0.7959653735160828, + "logits/rejected": -0.8534533977508545, + "logps/chosen": -0.0015747640281915665, + "logps/rejected": -1.6520307064056396, + "loss": 0.7995, + "nll_loss": 0.19975440204143524, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015747641737107188, + "rewards/margins": 0.1650455892086029, + "rewards/rejected": -0.16520309448242188, + "step": 6553 + }, + { + "epoch": 4.532503457814661, + "grad_norm": 6.245697498321533, + "learning_rate": 3.037498078991855e-05, + "log_odds_chosen": 8.997611999511719, + "log_odds_ratio": -0.003799182828515768, + "logits/chosen": -0.1710015833377838, + "logits/rejected": -0.26420480012893677, + "logps/chosen": -0.002259556669741869, + "logps/rejected": -1.2888202667236328, + "loss": 1.653, + "nll_loss": 0.4128655791282654, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00022595567861571908, + "rewards/margins": 0.1286560595035553, + "rewards/rejected": -0.12888203561306, + "step": 6554 + }, + { + "epoch": 4.533195020746888, + "grad_norm": 9.199127197265625, + "learning_rate": 3.03711387736284e-05, + "log_odds_chosen": 11.216801643371582, + "log_odds_ratio": -5.718818283639848e-05, + "logits/chosen": -0.8356324434280396, + "logits/rejected": -0.9238957166671753, + "logps/chosen": -0.000302387576084584, + "logps/rejected": -2.171398162841797, + "loss": 1.0406, + "nll_loss": 0.2601334750652313, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0238759791245684e-05, + "rewards/margins": 0.2171095758676529, + "rewards/rejected": -0.2171398103237152, + "step": 6555 + }, + { + "epoch": 4.533886583679115, + "grad_norm": 6.731876373291016, + "learning_rate": 3.0367296757338252e-05, + "log_odds_chosen": 9.419376373291016, + "log_odds_ratio": -0.0002908431924879551, + "logits/chosen": -0.7840916514396667, + "logits/rejected": -0.7459971308708191, + "logps/chosen": -0.0008626552298665047, + "logps/rejected": -1.6166490316390991, + "loss": 0.8508, + "nll_loss": 0.212665855884552, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.626552880741656e-05, + "rewards/margins": 0.16157862544059753, + "rewards/rejected": -0.1616649031639099, + "step": 6556 + }, + { + "epoch": 4.5345781466113415, + "grad_norm": 11.93840217590332, + "learning_rate": 3.03634547410481e-05, + "log_odds_chosen": 10.537141799926758, + "log_odds_ratio": -3.9206992369145155e-05, + "logits/chosen": -0.7244991064071655, + "logits/rejected": -0.853850245475769, + "logps/chosen": -0.0005426603020168841, + "logps/rejected": -2.082249164581299, + "loss": 1.3752, + "nll_loss": 0.34379222989082336, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.426602729130536e-05, + "rewards/margins": 0.20817065238952637, + "rewards/rejected": -0.20822492241859436, + "step": 6557 + }, + { + "epoch": 4.535269709543568, + "grad_norm": 7.598918914794922, + "learning_rate": 3.0359612724757954e-05, + "log_odds_chosen": 9.795328140258789, + "log_odds_ratio": -0.00011863884719787166, + "logits/chosen": -0.6790354251861572, + "logits/rejected": -0.747089147567749, + "logps/chosen": -0.0004407150554470718, + "logps/rejected": -1.7884000539779663, + "loss": 1.4007, + "nll_loss": 0.35017552971839905, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.407150481711142e-05, + "rewards/margins": 0.1787959486246109, + "rewards/rejected": -0.1788400113582611, + "step": 6558 + }, + { + "epoch": 4.535961272475795, + "grad_norm": 7.28376579284668, + "learning_rate": 3.0355770708467806e-05, + "log_odds_chosen": 10.144323348999023, + "log_odds_ratio": -0.00016057609173003584, + "logits/chosen": -0.8811751008033752, + "logits/rejected": -0.8111578226089478, + "logps/chosen": -0.00015959145093802363, + "logps/rejected": -1.4356104135513306, + "loss": 1.8257, + "nll_loss": 0.45640096068382263, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5959145457600243e-05, + "rewards/margins": 0.14354507625102997, + "rewards/rejected": -0.14356103539466858, + "step": 6559 + }, + { + "epoch": 4.536652835408022, + "grad_norm": 8.035957336425781, + "learning_rate": 3.0351928692177655e-05, + "log_odds_chosen": 9.968259811401367, + "log_odds_ratio": -0.00022156504564918578, + "logits/chosen": -0.6383379697799683, + "logits/rejected": -0.6144269704818726, + "logps/chosen": -0.0004586254362948239, + "logps/rejected": -1.715651512145996, + "loss": 0.7176, + "nll_loss": 0.1793805956840515, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5862547267461196e-05, + "rewards/margins": 0.17151929438114166, + "rewards/rejected": -0.17156516015529633, + "step": 6560 + }, + { + "epoch": 4.537344398340249, + "grad_norm": 7.693199634552002, + "learning_rate": 3.0348086675887504e-05, + "log_odds_chosen": 10.411820411682129, + "log_odds_ratio": -0.00023046185378916562, + "logits/chosen": -0.6372178196907043, + "logits/rejected": -0.7216589450836182, + "logps/chosen": -0.00032094272319227457, + "logps/rejected": -1.8828961849212646, + "loss": 0.9919, + "nll_loss": 0.24796098470687866, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2094270864035934e-05, + "rewards/margins": 0.18825753033161163, + "rewards/rejected": -0.18828962743282318, + "step": 6561 + }, + { + "epoch": 4.538035961272476, + "grad_norm": 5.328170299530029, + "learning_rate": 3.034424465959736e-05, + "log_odds_chosen": 9.515695571899414, + "log_odds_ratio": -0.03716710954904556, + "logits/chosen": -0.6656532883644104, + "logits/rejected": -0.8682924509048462, + "logps/chosen": -0.019652245566248894, + "logps/rejected": -1.7537295818328857, + "loss": 0.8466, + "nll_loss": 0.20792116224765778, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019652247428894043, + "rewards/margins": 0.17340771853923798, + "rewards/rejected": -0.17537295818328857, + "step": 6562 + }, + { + "epoch": 4.5387275242047025, + "grad_norm": 20.874311447143555, + "learning_rate": 3.034040264330721e-05, + "log_odds_chosen": 9.981575965881348, + "log_odds_ratio": -0.0005322595243342221, + "logits/chosen": -0.4351813495159149, + "logits/rejected": -0.5474547743797302, + "logps/chosen": -0.0009682047530077398, + "logps/rejected": -1.9328224658966064, + "loss": 1.3389, + "nll_loss": 0.3346773386001587, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.682048403192312e-05, + "rewards/margins": 0.1931854486465454, + "rewards/rejected": -0.19328224658966064, + "step": 6563 + }, + { + "epoch": 4.539419087136929, + "grad_norm": 14.790209770202637, + "learning_rate": 3.0336560627017058e-05, + "log_odds_chosen": 10.135683059692383, + "log_odds_ratio": -0.00022825025371275842, + "logits/chosen": -0.9268226027488708, + "logits/rejected": -1.0066332817077637, + "logps/chosen": -0.006441335193812847, + "logps/rejected": -2.714794635772705, + "loss": 1.5193, + "nll_loss": 0.3798065483570099, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006441335426643491, + "rewards/margins": 0.27083534002304077, + "rewards/rejected": -0.27147945761680603, + "step": 6564 + }, + { + "epoch": 4.540110650069156, + "grad_norm": 13.425837516784668, + "learning_rate": 3.0332718610726914e-05, + "log_odds_chosen": 9.612133979797363, + "log_odds_ratio": -0.0017814553575590253, + "logits/chosen": -0.5331589579582214, + "logits/rejected": -0.5802890062332153, + "logps/chosen": -0.002197981346398592, + "logps/rejected": -1.5549582242965698, + "loss": 1.1525, + "nll_loss": 0.28794315457344055, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00021979815210215747, + "rewards/margins": 0.15527603030204773, + "rewards/rejected": -0.15549582242965698, + "step": 6565 + }, + { + "epoch": 4.540802213001383, + "grad_norm": 10.138154983520508, + "learning_rate": 3.0328876594436763e-05, + "log_odds_chosen": 10.31335735321045, + "log_odds_ratio": -5.144106762600131e-05, + "logits/chosen": -0.5672279596328735, + "logits/rejected": -0.5084483027458191, + "logps/chosen": -0.00024329419829882681, + "logps/rejected": -1.719193458557129, + "loss": 1.0099, + "nll_loss": 0.25245851278305054, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.432941982988268e-05, + "rewards/margins": 0.17189502716064453, + "rewards/rejected": -0.1719193458557129, + "step": 6566 + }, + { + "epoch": 4.54149377593361, + "grad_norm": 7.8678789138793945, + "learning_rate": 3.0325034578146612e-05, + "log_odds_chosen": 10.099241256713867, + "log_odds_ratio": -5.9387311921454966e-05, + "logits/chosen": -0.49886399507522583, + "logits/rejected": -0.48791369795799255, + "logps/chosen": -0.0002885722788050771, + "logps/rejected": -1.4851117134094238, + "loss": 0.9878, + "nll_loss": 0.24695292115211487, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8857228244305588e-05, + "rewards/margins": 0.1484823226928711, + "rewards/rejected": -0.14851117134094238, + "step": 6567 + }, + { + "epoch": 4.542185338865837, + "grad_norm": 20.60369873046875, + "learning_rate": 3.0321192561856465e-05, + "log_odds_chosen": 9.15850830078125, + "log_odds_ratio": -0.0005096118547953665, + "logits/chosen": -0.37946510314941406, + "logits/rejected": -0.4805373251438141, + "logps/chosen": -0.0014331219717860222, + "logps/rejected": -1.675501823425293, + "loss": 1.1373, + "nll_loss": 0.28428125381469727, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001433122088201344, + "rewards/margins": 0.1674068719148636, + "rewards/rejected": -0.167550191283226, + "step": 6568 + }, + { + "epoch": 4.5428769017980635, + "grad_norm": 7.6536736488342285, + "learning_rate": 3.0317350545566314e-05, + "log_odds_chosen": 10.412943840026855, + "log_odds_ratio": -0.0005767575348727405, + "logits/chosen": -0.6405270099639893, + "logits/rejected": -0.7021207809448242, + "logps/chosen": -0.00048545480240136385, + "logps/rejected": -2.3228001594543457, + "loss": 0.7183, + "nll_loss": 0.1795252412557602, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8545480240136385e-05, + "rewards/margins": 0.23223146796226501, + "rewards/rejected": -0.23228003084659576, + "step": 6569 + }, + { + "epoch": 4.54356846473029, + "grad_norm": 5.79603385925293, + "learning_rate": 3.0313508529276163e-05, + "log_odds_chosen": 8.871224403381348, + "log_odds_ratio": -0.001314416411332786, + "logits/chosen": -0.34749066829681396, + "logits/rejected": -0.3158177137374878, + "logps/chosen": -0.0010376510908827186, + "logps/rejected": -1.6415541172027588, + "loss": 1.1476, + "nll_loss": 0.28676947951316833, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010376510908827186, + "rewards/margins": 0.16405165195465088, + "rewards/rejected": -0.16415540874004364, + "step": 6570 + }, + { + "epoch": 4.544260027662517, + "grad_norm": 13.5281343460083, + "learning_rate": 3.030966651298602e-05, + "log_odds_chosen": 8.49252986907959, + "log_odds_ratio": -0.041920870542526245, + "logits/chosen": -0.26285240054130554, + "logits/rejected": -0.41422227025032043, + "logps/chosen": -0.009888852015137672, + "logps/rejected": -1.6656150817871094, + "loss": 1.1532, + "nll_loss": 0.28410616517066956, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009888852946460247, + "rewards/margins": 0.1655726581811905, + "rewards/rejected": -0.1665615290403366, + "step": 6571 + }, + { + "epoch": 4.544951590594744, + "grad_norm": 9.392267227172852, + "learning_rate": 3.0305824496695868e-05, + "log_odds_chosen": 9.859603881835938, + "log_odds_ratio": -0.00022053778229746968, + "logits/chosen": -0.2102208435535431, + "logits/rejected": -0.2464631199836731, + "logps/chosen": -0.0003437441191636026, + "logps/rejected": -1.7045176029205322, + "loss": 1.5048, + "nll_loss": 0.37617313861846924, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.437441409914754e-05, + "rewards/margins": 0.1704173982143402, + "rewards/rejected": -0.17045177519321442, + "step": 6572 + }, + { + "epoch": 4.545643153526971, + "grad_norm": 12.560931205749512, + "learning_rate": 3.0301982480405717e-05, + "log_odds_chosen": 10.292903900146484, + "log_odds_ratio": -5.359947317629121e-05, + "logits/chosen": -0.6326181292533875, + "logits/rejected": -0.6115179061889648, + "logps/chosen": -0.00027635638252831995, + "logps/rejected": -1.6791993379592896, + "loss": 1.0485, + "nll_loss": 0.2621276378631592, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7635642254608683e-05, + "rewards/margins": 0.16789229214191437, + "rewards/rejected": -0.16791994869709015, + "step": 6573 + }, + { + "epoch": 4.546334716459198, + "grad_norm": 12.031844139099121, + "learning_rate": 3.0298140464115572e-05, + "log_odds_chosen": 10.533636093139648, + "log_odds_ratio": -5.4832369642099366e-05, + "logits/chosen": -0.1512179672718048, + "logits/rejected": -0.2702445387840271, + "logps/chosen": -0.0006668913993053138, + "logps/rejected": -2.733823776245117, + "loss": 1.0078, + "nll_loss": 0.2519344985485077, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.668914284091443e-05, + "rewards/margins": 0.2733156979084015, + "rewards/rejected": -0.27338239550590515, + "step": 6574 + }, + { + "epoch": 4.5470262793914245, + "grad_norm": 7.381800651550293, + "learning_rate": 3.029429844782542e-05, + "log_odds_chosen": 9.192039489746094, + "log_odds_ratio": -0.0007829120149835944, + "logits/chosen": -0.6911160945892334, + "logits/rejected": -0.6769638061523438, + "logps/chosen": -0.0007873540162108839, + "logps/rejected": -1.4364888668060303, + "loss": 1.0262, + "nll_loss": 0.25647637248039246, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.873540016589686e-05, + "rewards/margins": 0.1435701549053192, + "rewards/rejected": -0.1436488926410675, + "step": 6575 + }, + { + "epoch": 4.547717842323651, + "grad_norm": 8.626969337463379, + "learning_rate": 3.029045643153527e-05, + "log_odds_chosen": 10.28857707977295, + "log_odds_ratio": -0.00033702142536640167, + "logits/chosen": -0.5012588500976562, + "logits/rejected": -0.5341583490371704, + "logps/chosen": -0.00022805618937127292, + "logps/rejected": -1.5251436233520508, + "loss": 0.8619, + "nll_loss": 0.21544235944747925, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2805619664723054e-05, + "rewards/margins": 0.15249155461788177, + "rewards/rejected": -0.15251435339450836, + "step": 6576 + }, + { + "epoch": 4.548409405255878, + "grad_norm": 8.244384765625, + "learning_rate": 3.0286614415245123e-05, + "log_odds_chosen": 9.377249717712402, + "log_odds_ratio": -0.00011702909978339449, + "logits/chosen": -0.3914976119995117, + "logits/rejected": -0.46458885073661804, + "logps/chosen": -0.00035335979191586375, + "logps/rejected": -1.3842849731445312, + "loss": 0.9243, + "nll_loss": 0.23106145858764648, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5335979191586375e-05, + "rewards/margins": 0.13839316368103027, + "rewards/rejected": -0.13842850923538208, + "step": 6577 + }, + { + "epoch": 4.549100968188105, + "grad_norm": 9.749436378479004, + "learning_rate": 3.0282772398954972e-05, + "log_odds_chosen": 11.123355865478516, + "log_odds_ratio": -2.415657763776835e-05, + "logits/chosen": -0.37832289934158325, + "logits/rejected": -0.4357031583786011, + "logps/chosen": -0.00014345777162816375, + "logps/rejected": -2.0811164379119873, + "loss": 1.0, + "nll_loss": 0.25000274181365967, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4345778254210018e-05, + "rewards/margins": 0.20809729397296906, + "rewards/rejected": -0.20811164379119873, + "step": 6578 + }, + { + "epoch": 4.549792531120332, + "grad_norm": 9.854357719421387, + "learning_rate": 3.027893038266482e-05, + "log_odds_chosen": 10.580504417419434, + "log_odds_ratio": -0.00043930544052273035, + "logits/chosen": -0.7240288853645325, + "logits/rejected": -0.7891085147857666, + "logps/chosen": -0.0024362949188798666, + "logps/rejected": -2.18922758102417, + "loss": 1.3597, + "nll_loss": 0.339883416891098, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00024362948897760361, + "rewards/margins": 0.21867913007736206, + "rewards/rejected": -0.21892276406288147, + "step": 6579 + }, + { + "epoch": 4.550484094052559, + "grad_norm": 11.040898323059082, + "learning_rate": 3.0275088366374677e-05, + "log_odds_chosen": 10.449048042297363, + "log_odds_ratio": -5.974875239189714e-05, + "logits/chosen": -0.7696495652198792, + "logits/rejected": -0.8473649621009827, + "logps/chosen": -0.00033175962744280696, + "logps/rejected": -2.1812098026275635, + "loss": 1.4198, + "nll_loss": 0.35495179891586304, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.317596565466374e-05, + "rewards/margins": 0.2180878072977066, + "rewards/rejected": -0.2181209921836853, + "step": 6580 + }, + { + "epoch": 4.551175656984785, + "grad_norm": 9.304978370666504, + "learning_rate": 3.0271246350084526e-05, + "log_odds_chosen": 10.155534744262695, + "log_odds_ratio": -0.00018998852465301752, + "logits/chosen": -0.8298165798187256, + "logits/rejected": -0.8783495426177979, + "logps/chosen": -0.0010503771482035518, + "logps/rejected": -1.9879919290542603, + "loss": 1.5645, + "nll_loss": 0.3911066949367523, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010503771773073822, + "rewards/margins": 0.1986941695213318, + "rewards/rejected": -0.19879920780658722, + "step": 6581 + }, + { + "epoch": 4.551867219917012, + "grad_norm": 7.36096715927124, + "learning_rate": 3.0267404333794375e-05, + "log_odds_chosen": 10.879387855529785, + "log_odds_ratio": -6.295489583862945e-05, + "logits/chosen": -0.4926462471485138, + "logits/rejected": -0.5912123322486877, + "logps/chosen": -0.0021177981980144978, + "logps/rejected": -2.5622596740722656, + "loss": 0.841, + "nll_loss": 0.21025186777114868, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00021177981398068368, + "rewards/margins": 0.25601422786712646, + "rewards/rejected": -0.2562260031700134, + "step": 6582 + }, + { + "epoch": 4.552558782849239, + "grad_norm": 7.8839898109436035, + "learning_rate": 3.026356231750423e-05, + "log_odds_chosen": 10.247995376586914, + "log_odds_ratio": -0.0001559545489726588, + "logits/chosen": -0.7979452013969421, + "logits/rejected": -0.9169696569442749, + "logps/chosen": -0.0002751094871200621, + "logps/rejected": -2.117699384689331, + "loss": 1.2424, + "nll_loss": 0.3105788230895996, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.751094871200621e-05, + "rewards/margins": 0.21174244582653046, + "rewards/rejected": -0.2117699384689331, + "step": 6583 + }, + { + "epoch": 4.553250345781466, + "grad_norm": 12.00692367553711, + "learning_rate": 3.025972030121408e-05, + "log_odds_chosen": 9.980520248413086, + "log_odds_ratio": -0.00025905261281877756, + "logits/chosen": -0.424941748380661, + "logits/rejected": -0.5358071327209473, + "logps/chosen": -0.0001751706877257675, + "logps/rejected": -1.4843164682388306, + "loss": 1.0665, + "nll_loss": 0.26660993695259094, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.751706986397039e-05, + "rewards/margins": 0.14841413497924805, + "rewards/rejected": -0.148431658744812, + "step": 6584 + }, + { + "epoch": 4.553941908713693, + "grad_norm": 7.942290782928467, + "learning_rate": 3.025587828492393e-05, + "log_odds_chosen": 8.972213745117188, + "log_odds_ratio": -0.0004230959457345307, + "logits/chosen": -0.8286534547805786, + "logits/rejected": -0.8645509481430054, + "logps/chosen": -0.0003187672991771251, + "logps/rejected": -1.2250981330871582, + "loss": 1.1575, + "nll_loss": 0.28933191299438477, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.187673064530827e-05, + "rewards/margins": 0.1224779412150383, + "rewards/rejected": -0.12250981479883194, + "step": 6585 + }, + { + "epoch": 4.55463347164592, + "grad_norm": 11.324723243713379, + "learning_rate": 3.025203626863378e-05, + "log_odds_chosen": 9.58649730682373, + "log_odds_ratio": -0.012563646771013737, + "logits/chosen": -0.7872684597969055, + "logits/rejected": -0.8913931250572205, + "logps/chosen": -0.0046211532317101955, + "logps/rejected": -2.069636821746826, + "loss": 1.3229, + "nll_loss": 0.3294662833213806, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00046211533481255174, + "rewards/margins": 0.2065015733242035, + "rewards/rejected": -0.2069637030363083, + "step": 6586 + }, + { + "epoch": 4.555325034578146, + "grad_norm": 11.3200044631958, + "learning_rate": 3.024819425234363e-05, + "log_odds_chosen": 10.222295761108398, + "log_odds_ratio": -0.0001525928091723472, + "logits/chosen": -0.5663999915122986, + "logits/rejected": -0.6826832294464111, + "logps/chosen": -0.002781313844025135, + "logps/rejected": -2.424255847930908, + "loss": 0.9093, + "nll_loss": 0.2273210734128952, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00027813136694021523, + "rewards/margins": 0.24214744567871094, + "rewards/rejected": -0.2424255907535553, + "step": 6587 + }, + { + "epoch": 4.556016597510373, + "grad_norm": 11.659857749938965, + "learning_rate": 3.024435223605348e-05, + "log_odds_chosen": 10.368592262268066, + "log_odds_ratio": -0.000933436444029212, + "logits/chosen": -0.918364405632019, + "logits/rejected": -0.9740538597106934, + "logps/chosen": -0.009822577238082886, + "logps/rejected": -2.1933836936950684, + "loss": 1.291, + "nll_loss": 0.32265663146972656, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009822577703744173, + "rewards/margins": 0.21835610270500183, + "rewards/rejected": -0.21933835744857788, + "step": 6588 + }, + { + "epoch": 4.5567081604426, + "grad_norm": 6.759733200073242, + "learning_rate": 3.0240510219763335e-05, + "log_odds_chosen": 9.884389877319336, + "log_odds_ratio": -0.0005357956397347152, + "logits/chosen": -0.5267442464828491, + "logits/rejected": -0.5922637581825256, + "logps/chosen": -0.004163261502981186, + "logps/rejected": -2.33642315864563, + "loss": 1.1374, + "nll_loss": 0.2843058705329895, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004163261619396508, + "rewards/margins": 0.2332259863615036, + "rewards/rejected": -0.2336423099040985, + "step": 6589 + }, + { + "epoch": 4.557399723374827, + "grad_norm": 6.967859268188477, + "learning_rate": 3.0236668203473184e-05, + "log_odds_chosen": 9.72664737701416, + "log_odds_ratio": -0.0008469682070426643, + "logits/chosen": -0.24553634226322174, + "logits/rejected": -0.2926129996776581, + "logps/chosen": -0.0009131368133239448, + "logps/rejected": -1.4904640913009644, + "loss": 0.6963, + "nll_loss": 0.17398318648338318, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.1313682787586e-05, + "rewards/margins": 0.1489550918340683, + "rewards/rejected": -0.1490464061498642, + "step": 6590 + }, + { + "epoch": 4.558091286307054, + "grad_norm": 14.491275787353516, + "learning_rate": 3.0232826187183034e-05, + "log_odds_chosen": 9.976200103759766, + "log_odds_ratio": -0.00011364954843884334, + "logits/chosen": -0.7161533832550049, + "logits/rejected": -0.7391790747642517, + "logps/chosen": -0.00015934955445118248, + "logps/rejected": -1.5275839567184448, + "loss": 1.0246, + "nll_loss": 0.2561395764350891, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5934954717522487e-05, + "rewards/margins": 0.15274246037006378, + "rewards/rejected": -0.1527584046125412, + "step": 6591 + }, + { + "epoch": 4.558782849239281, + "grad_norm": 10.900084495544434, + "learning_rate": 3.022898417089289e-05, + "log_odds_chosen": 11.52933120727539, + "log_odds_ratio": -1.7895972632686608e-05, + "logits/chosen": -0.504792332649231, + "logits/rejected": -0.6362007856369019, + "logps/chosen": -0.00036690360866487026, + "logps/rejected": -3.0588414669036865, + "loss": 0.8552, + "nll_loss": 0.21380570530891418, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.669036232167855e-05, + "rewards/margins": 0.3058474659919739, + "rewards/rejected": -0.3058841824531555, + "step": 6592 + }, + { + "epoch": 4.559474412171507, + "grad_norm": 8.694879531860352, + "learning_rate": 3.022514215460274e-05, + "log_odds_chosen": 8.654937744140625, + "log_odds_ratio": -0.00045291121932677925, + "logits/chosen": -0.46551069617271423, + "logits/rejected": -0.520065426826477, + "logps/chosen": -0.0035642811562865973, + "logps/rejected": -1.6542596817016602, + "loss": 1.5287, + "nll_loss": 0.3821191191673279, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003564281214494258, + "rewards/margins": 0.16506953537464142, + "rewards/rejected": -0.16542597115039825, + "step": 6593 + }, + { + "epoch": 4.560165975103734, + "grad_norm": 9.74400520324707, + "learning_rate": 3.0221300138312587e-05, + "log_odds_chosen": 9.620219230651855, + "log_odds_ratio": -0.00015013368101790547, + "logits/chosen": -0.6944003701210022, + "logits/rejected": -0.690482497215271, + "logps/chosen": -0.0019507486140355468, + "logps/rejected": -1.684833288192749, + "loss": 1.1181, + "nll_loss": 0.2795025706291199, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001950748555827886, + "rewards/margins": 0.1682882457971573, + "rewards/rejected": -0.16848333179950714, + "step": 6594 + }, + { + "epoch": 4.560857538035961, + "grad_norm": 5.794257164001465, + "learning_rate": 3.021745812202244e-05, + "log_odds_chosen": 9.387247085571289, + "log_odds_ratio": -0.0009698733920231462, + "logits/chosen": -0.18023420870304108, + "logits/rejected": -0.19014205038547516, + "logps/chosen": -0.004922578576952219, + "logps/rejected": -2.4882915019989014, + "loss": 1.1872, + "nll_loss": 0.2966940402984619, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004922578809782863, + "rewards/margins": 0.24833691120147705, + "rewards/rejected": -0.2488291710615158, + "step": 6595 + }, + { + "epoch": 4.561549100968188, + "grad_norm": 7.39327335357666, + "learning_rate": 3.021361610573229e-05, + "log_odds_chosen": 10.013833999633789, + "log_odds_ratio": -0.0006084858905524015, + "logits/chosen": -0.7817709445953369, + "logits/rejected": -0.7556931972503662, + "logps/chosen": -0.006435707677155733, + "logps/rejected": -1.9030237197875977, + "loss": 0.9846, + "nll_loss": 0.24608328938484192, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006435707909986377, + "rewards/margins": 0.18965880572795868, + "rewards/rejected": -0.19030238687992096, + "step": 6596 + }, + { + "epoch": 4.562240663900415, + "grad_norm": 7.989497661590576, + "learning_rate": 3.0209774089442138e-05, + "log_odds_chosen": 10.590084075927734, + "log_odds_ratio": -0.0002257568994536996, + "logits/chosen": -0.6370759010314941, + "logits/rejected": -0.7055598497390747, + "logps/chosen": -0.0007681610295549035, + "logps/rejected": -2.212214708328247, + "loss": 0.8154, + "nll_loss": 0.2038293480873108, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.681611168663949e-05, + "rewards/margins": 0.2211446613073349, + "rewards/rejected": -0.22122147679328918, + "step": 6597 + }, + { + "epoch": 4.5629322268326415, + "grad_norm": 8.126029968261719, + "learning_rate": 3.0205932073151994e-05, + "log_odds_chosen": 10.009937286376953, + "log_odds_ratio": -9.851453796727583e-05, + "logits/chosen": -0.2403627336025238, + "logits/rejected": -0.2549085319042206, + "logps/chosen": -0.00030068133492022753, + "logps/rejected": -1.7907345294952393, + "loss": 1.3476, + "nll_loss": 0.33689039945602417, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0068133128224872e-05, + "rewards/margins": 0.17904338240623474, + "rewards/rejected": -0.17907343804836273, + "step": 6598 + }, + { + "epoch": 4.563623789764868, + "grad_norm": 10.333507537841797, + "learning_rate": 3.0202090056861843e-05, + "log_odds_chosen": 10.019725799560547, + "log_odds_ratio": -0.00012765916471835226, + "logits/chosen": -0.33758291602134705, + "logits/rejected": -0.3801264464855194, + "logps/chosen": -0.00041800137842074037, + "logps/rejected": -2.058382987976074, + "loss": 0.921, + "nll_loss": 0.23023031651973724, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.180014366284013e-05, + "rewards/margins": 0.2057965099811554, + "rewards/rejected": -0.20583830773830414, + "step": 6599 + }, + { + "epoch": 4.564315352697095, + "grad_norm": 6.865717887878418, + "learning_rate": 3.0198248040571692e-05, + "log_odds_chosen": 9.156005859375, + "log_odds_ratio": -0.00019638205412775278, + "logits/chosen": -0.8517537713050842, + "logits/rejected": -0.8063177466392517, + "logps/chosen": -0.0004978242795914412, + "logps/rejected": -1.4446221590042114, + "loss": 1.0236, + "nll_loss": 0.25587552785873413, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.978242941433564e-05, + "rewards/margins": 0.14441244304180145, + "rewards/rejected": -0.1444622129201889, + "step": 6600 + }, + { + "epoch": 4.565006915629322, + "grad_norm": 14.249171257019043, + "learning_rate": 3.0194406024281548e-05, + "log_odds_chosen": 9.464423179626465, + "log_odds_ratio": -0.05572696775197983, + "logits/chosen": -0.488655686378479, + "logits/rejected": -0.6141406297683716, + "logps/chosen": -0.013542162254452705, + "logps/rejected": -2.2569494247436523, + "loss": 0.8959, + "nll_loss": 0.21839742362499237, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001354216248728335, + "rewards/margins": 0.22434073686599731, + "rewards/rejected": -0.22569496929645538, + "step": 6601 + }, + { + "epoch": 4.565698478561549, + "grad_norm": 11.11478042602539, + "learning_rate": 3.0190564007991397e-05, + "log_odds_chosen": 11.521812438964844, + "log_odds_ratio": -1.6018464521039277e-05, + "logits/chosen": -0.3399466276168823, + "logits/rejected": -0.4136958718299866, + "logps/chosen": -8.431501191807911e-05, + "logps/rejected": -2.1025118827819824, + "loss": 1.0616, + "nll_loss": 0.2653941512107849, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.431501555605792e-06, + "rewards/margins": 0.21024277806282043, + "rewards/rejected": -0.21025118231773376, + "step": 6602 + }, + { + "epoch": 4.566390041493776, + "grad_norm": 12.986268043518066, + "learning_rate": 3.0186721991701246e-05, + "log_odds_chosen": 9.19881820678711, + "log_odds_ratio": -0.02131696231663227, + "logits/chosen": -0.4616313576698303, + "logits/rejected": -0.558713436126709, + "logps/chosen": -0.0075446078553795815, + "logps/rejected": -1.5928486585617065, + "loss": 0.8881, + "nll_loss": 0.21988779306411743, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007544608088210225, + "rewards/margins": 0.15853041410446167, + "rewards/rejected": -0.15928487479686737, + "step": 6603 + }, + { + "epoch": 4.5670816044260025, + "grad_norm": 12.540678024291992, + "learning_rate": 3.01828799754111e-05, + "log_odds_chosen": 10.581785202026367, + "log_odds_ratio": -5.553047958528623e-05, + "logits/chosen": -0.42860329151153564, + "logits/rejected": -0.5122525095939636, + "logps/chosen": -0.0002509926271159202, + "logps/rejected": -2.055676221847534, + "loss": 1.0941, + "nll_loss": 0.27351298928260803, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.509926343918778e-05, + "rewards/margins": 0.20554250478744507, + "rewards/rejected": -0.2055675983428955, + "step": 6604 + }, + { + "epoch": 4.567773167358229, + "grad_norm": 7.179683685302734, + "learning_rate": 3.0179037959120947e-05, + "log_odds_chosen": 9.905824661254883, + "log_odds_ratio": -0.00036644781357608736, + "logits/chosen": -0.6355658173561096, + "logits/rejected": -0.6388107538223267, + "logps/chosen": -0.00045511574717238545, + "logps/rejected": -2.179323434829712, + "loss": 0.9415, + "nll_loss": 0.2353421449661255, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.551157326204702e-05, + "rewards/margins": 0.21788683533668518, + "rewards/rejected": -0.21793235838413239, + "step": 6605 + }, + { + "epoch": 4.568464730290456, + "grad_norm": 6.183163642883301, + "learning_rate": 3.0175195942830796e-05, + "log_odds_chosen": 10.373852729797363, + "log_odds_ratio": -0.00013491684512700886, + "logits/chosen": -0.4657822847366333, + "logits/rejected": -0.5364692807197571, + "logps/chosen": -0.01596393622457981, + "logps/rejected": -2.2673511505126953, + "loss": 0.8466, + "nll_loss": 0.21162846684455872, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015963935293257236, + "rewards/margins": 0.22513870894908905, + "rewards/rejected": -0.22673510015010834, + "step": 6606 + }, + { + "epoch": 4.569156293222683, + "grad_norm": 10.276148796081543, + "learning_rate": 3.0171353926540652e-05, + "log_odds_chosen": 10.319684982299805, + "log_odds_ratio": -4.735357651952654e-05, + "logits/chosen": -0.5993685126304626, + "logits/rejected": -0.6914522647857666, + "logps/chosen": -0.0001445843663532287, + "logps/rejected": -1.6822584867477417, + "loss": 0.7713, + "nll_loss": 0.19281740486621857, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4458439181908034e-05, + "rewards/margins": 0.16821140050888062, + "rewards/rejected": -0.16822585463523865, + "step": 6607 + }, + { + "epoch": 4.56984785615491, + "grad_norm": 11.633027076721191, + "learning_rate": 3.01675119102505e-05, + "log_odds_chosen": 9.985593795776367, + "log_odds_ratio": -0.0004732572997454554, + "logits/chosen": -0.41848012804985046, + "logits/rejected": -0.4558772146701813, + "logps/chosen": -0.006857479456812143, + "logps/rejected": -2.1081955432891846, + "loss": 1.1383, + "nll_loss": 0.28452807664871216, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006857480038888752, + "rewards/margins": 0.21013380587100983, + "rewards/rejected": -0.2108195573091507, + "step": 6608 + }, + { + "epoch": 4.570539419087137, + "grad_norm": 10.77037239074707, + "learning_rate": 3.016366989396035e-05, + "log_odds_chosen": 9.556285858154297, + "log_odds_ratio": -0.00019223152776248753, + "logits/chosen": -0.5291285514831543, + "logits/rejected": -0.5538058876991272, + "logps/chosen": -0.0005219366285018623, + "logps/rejected": -1.4201699495315552, + "loss": 1.3276, + "nll_loss": 0.33189016580581665, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.21936672157608e-05, + "rewards/margins": 0.14196480810642242, + "rewards/rejected": -0.14201700687408447, + "step": 6609 + }, + { + "epoch": 4.5712309820193635, + "grad_norm": 6.790186882019043, + "learning_rate": 3.0159827877670206e-05, + "log_odds_chosen": 9.672239303588867, + "log_odds_ratio": -0.00028869410743936896, + "logits/chosen": -0.3598228991031647, + "logits/rejected": -0.4058411419391632, + "logps/chosen": -0.00029092555632814765, + "logps/rejected": -1.4371891021728516, + "loss": 1.1986, + "nll_loss": 0.29960960149765015, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.909255272243172e-05, + "rewards/margins": 0.1436898112297058, + "rewards/rejected": -0.1437188982963562, + "step": 6610 + }, + { + "epoch": 4.57192254495159, + "grad_norm": 10.04366397857666, + "learning_rate": 3.0155985861380055e-05, + "log_odds_chosen": 10.897849082946777, + "log_odds_ratio": -4.32228043791838e-05, + "logits/chosen": -0.6647764444351196, + "logits/rejected": -0.8023943901062012, + "logps/chosen": -0.00021091816597618163, + "logps/rejected": -2.00454044342041, + "loss": 1.3433, + "nll_loss": 0.33581259846687317, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1091816961416043e-05, + "rewards/margins": 0.20043295621871948, + "rewards/rejected": -0.20045407116413116, + "step": 6611 + }, + { + "epoch": 4.572614107883817, + "grad_norm": 11.911702156066895, + "learning_rate": 3.0152143845089904e-05, + "log_odds_chosen": 11.172669410705566, + "log_odds_ratio": -0.0001469549461035058, + "logits/chosen": -0.6482466459274292, + "logits/rejected": -0.6237611770629883, + "logps/chosen": -0.00021537227439694107, + "logps/rejected": -2.3251004219055176, + "loss": 0.8104, + "nll_loss": 0.20259681344032288, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1537227439694107e-05, + "rewards/margins": 0.2324884831905365, + "rewards/rejected": -0.2325100302696228, + "step": 6612 + }, + { + "epoch": 4.573305670816044, + "grad_norm": 9.78806209564209, + "learning_rate": 3.0148301828799757e-05, + "log_odds_chosen": 9.983158111572266, + "log_odds_ratio": -0.00013181474059820175, + "logits/chosen": -0.7097947597503662, + "logits/rejected": -0.7412753105163574, + "logps/chosen": -0.003653676016256213, + "logps/rejected": -2.4717650413513184, + "loss": 1.0662, + "nll_loss": 0.2665305435657501, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003653676249086857, + "rewards/margins": 0.2468111515045166, + "rewards/rejected": -0.24717652797698975, + "step": 6613 + }, + { + "epoch": 4.573997233748271, + "grad_norm": 10.576286315917969, + "learning_rate": 3.0144459812509606e-05, + "log_odds_chosen": 10.688820838928223, + "log_odds_ratio": -8.817969501251355e-05, + "logits/chosen": -0.5782037377357483, + "logits/rejected": -0.633255660533905, + "logps/chosen": -0.00026176689425483346, + "logps/rejected": -1.9208297729492188, + "loss": 1.1002, + "nll_loss": 0.2750369608402252, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6176689061685465e-05, + "rewards/margins": 0.192056804895401, + "rewards/rejected": -0.1920829713344574, + "step": 6614 + }, + { + "epoch": 4.574688796680498, + "grad_norm": 12.326896667480469, + "learning_rate": 3.0140617796219455e-05, + "log_odds_chosen": 10.266992568969727, + "log_odds_ratio": -5.857451105839573e-05, + "logits/chosen": -0.19259458780288696, + "logits/rejected": -0.33749744296073914, + "logps/chosen": -0.0005202666507102549, + "logps/rejected": -2.267693281173706, + "loss": 1.2466, + "nll_loss": 0.31163233518600464, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.202666943660006e-05, + "rewards/margins": 0.22671732306480408, + "rewards/rejected": -0.2267693281173706, + "step": 6615 + }, + { + "epoch": 4.5753803596127245, + "grad_norm": 8.438218116760254, + "learning_rate": 3.013677577992931e-05, + "log_odds_chosen": 11.793233871459961, + "log_odds_ratio": -1.9513814550009556e-05, + "logits/chosen": -0.4520335793495178, + "logits/rejected": -0.5310763120651245, + "logps/chosen": -0.00013092244626022875, + "logps/rejected": -2.669154405593872, + "loss": 0.7859, + "nll_loss": 0.19648477435112, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3092246263113339e-05, + "rewards/margins": 0.266902357339859, + "rewards/rejected": -0.2669154405593872, + "step": 6616 + }, + { + "epoch": 4.576071922544951, + "grad_norm": 5.844875335693359, + "learning_rate": 3.013293376363916e-05, + "log_odds_chosen": 9.786355972290039, + "log_odds_ratio": -0.003753600874915719, + "logits/chosen": -0.35187357664108276, + "logits/rejected": -0.44419077038764954, + "logps/chosen": -0.024547820910811424, + "logps/rejected": -2.0552241802215576, + "loss": 0.9076, + "nll_loss": 0.2265336960554123, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002454782137647271, + "rewards/margins": 0.20306764543056488, + "rewards/rejected": -0.20552241802215576, + "step": 6617 + }, + { + "epoch": 4.576763485477178, + "grad_norm": 10.234806060791016, + "learning_rate": 3.012909174734901e-05, + "log_odds_chosen": 10.920913696289062, + "log_odds_ratio": -0.00010152783215744421, + "logits/chosen": -0.806179404258728, + "logits/rejected": -0.823478102684021, + "logps/chosen": -0.017593462020158768, + "logps/rejected": -2.7665202617645264, + "loss": 0.8564, + "nll_loss": 0.21409085392951965, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017593461088836193, + "rewards/margins": 0.2748926877975464, + "rewards/rejected": -0.2766520082950592, + "step": 6618 + }, + { + "epoch": 4.577455048409405, + "grad_norm": 13.536564826965332, + "learning_rate": 3.0125249731058865e-05, + "log_odds_chosen": 9.243236541748047, + "log_odds_ratio": -0.00024905483587644994, + "logits/chosen": -0.6628894805908203, + "logits/rejected": -0.7527157664299011, + "logps/chosen": -0.0009462746093049645, + "logps/rejected": -1.8274426460266113, + "loss": 1.1826, + "nll_loss": 0.29562896490097046, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.462745947530493e-05, + "rewards/margins": 0.1826496571302414, + "rewards/rejected": -0.18274426460266113, + "step": 6619 + }, + { + "epoch": 4.578146611341632, + "grad_norm": 10.473002433776855, + "learning_rate": 3.0121407714768714e-05, + "log_odds_chosen": 11.051270484924316, + "log_odds_ratio": -3.43362771673128e-05, + "logits/chosen": -0.38918614387512207, + "logits/rejected": -0.44833943247795105, + "logps/chosen": -0.00021262890368234366, + "logps/rejected": -2.373044013977051, + "loss": 0.8369, + "nll_loss": 0.2092183381319046, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1262891095830128e-05, + "rewards/margins": 0.2372831404209137, + "rewards/rejected": -0.2373044192790985, + "step": 6620 + }, + { + "epoch": 4.578838174273859, + "grad_norm": 9.426041603088379, + "learning_rate": 3.0117565698478563e-05, + "log_odds_chosen": 9.773505210876465, + "log_odds_ratio": -0.010695736855268478, + "logits/chosen": -0.36876046657562256, + "logits/rejected": -0.4833451509475708, + "logps/chosen": -0.00378953549079597, + "logps/rejected": -1.9265997409820557, + "loss": 0.9171, + "nll_loss": 0.22820425033569336, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003789535549003631, + "rewards/margins": 0.19228103756904602, + "rewards/rejected": -0.19265997409820557, + "step": 6621 + }, + { + "epoch": 4.5795297372060855, + "grad_norm": 7.291322708129883, + "learning_rate": 3.0113723682188415e-05, + "log_odds_chosen": 10.150293350219727, + "log_odds_ratio": -8.896956569515169e-05, + "logits/chosen": -0.40212830901145935, + "logits/rejected": -0.453365683555603, + "logps/chosen": -0.00020818223129026592, + "logps/rejected": -1.8073318004608154, + "loss": 0.8227, + "nll_loss": 0.20566749572753906, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0818220946239308e-05, + "rewards/margins": 0.18071237206459045, + "rewards/rejected": -0.18073318898677826, + "step": 6622 + }, + { + "epoch": 4.580221300138312, + "grad_norm": 7.882122039794922, + "learning_rate": 3.0109881665898264e-05, + "log_odds_chosen": 11.177387237548828, + "log_odds_ratio": -2.653186493262183e-05, + "logits/chosen": -0.40092629194259644, + "logits/rejected": -0.4798882007598877, + "logps/chosen": -0.00017878400103654712, + "logps/rejected": -2.480536699295044, + "loss": 1.1797, + "nll_loss": 0.29491573572158813, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7878401195048355e-05, + "rewards/margins": 0.24803580343723297, + "rewards/rejected": -0.2480536699295044, + "step": 6623 + }, + { + "epoch": 4.580912863070539, + "grad_norm": 14.837285041809082, + "learning_rate": 3.0106039649608113e-05, + "log_odds_chosen": 10.875441551208496, + "log_odds_ratio": -0.00019159108342137188, + "logits/chosen": -0.43085187673568726, + "logits/rejected": -0.4576089382171631, + "logps/chosen": -0.0002225075295427814, + "logps/rejected": -1.8764679431915283, + "loss": 1.2767, + "nll_loss": 0.31915417313575745, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2250755137065426e-05, + "rewards/margins": 0.18762452900409698, + "rewards/rejected": -0.1876468062400818, + "step": 6624 + }, + { + "epoch": 4.581604426002766, + "grad_norm": 7.431987762451172, + "learning_rate": 3.010219763331797e-05, + "log_odds_chosen": 11.103547096252441, + "log_odds_ratio": -3.522342740325257e-05, + "logits/chosen": -0.29567962884902954, + "logits/rejected": -0.3659539818763733, + "logps/chosen": -0.00013930797285865992, + "logps/rejected": -2.1855380535125732, + "loss": 0.7527, + "nll_loss": 0.18817052245140076, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3930797649663873e-05, + "rewards/margins": 0.21853987872600555, + "rewards/rejected": -0.2185538113117218, + "step": 6625 + }, + { + "epoch": 4.582295988934993, + "grad_norm": 12.275331497192383, + "learning_rate": 3.0098355617027818e-05, + "log_odds_chosen": 8.130497932434082, + "log_odds_ratio": -0.25943076610565186, + "logits/chosen": -0.5913535952568054, + "logits/rejected": -0.6271120309829712, + "logps/chosen": -0.04674238711595535, + "logps/rejected": -1.2596242427825928, + "loss": 1.1692, + "nll_loss": 0.2663576006889343, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004674238618463278, + "rewards/margins": 0.12128818035125732, + "rewards/rejected": -0.12596242129802704, + "step": 6626 + }, + { + "epoch": 4.58298755186722, + "grad_norm": 11.490532875061035, + "learning_rate": 3.0094513600737667e-05, + "log_odds_chosen": 10.451854705810547, + "log_odds_ratio": -6.06868889008183e-05, + "logits/chosen": -0.5514692664146423, + "logits/rejected": -0.5786818265914917, + "logps/chosen": -0.00012749088637065142, + "logps/rejected": -1.7657053470611572, + "loss": 0.8133, + "nll_loss": 0.20331361889839172, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2749089364660904e-05, + "rewards/margins": 0.1765577793121338, + "rewards/rejected": -0.17657053470611572, + "step": 6627 + }, + { + "epoch": 4.5836791147994465, + "grad_norm": 9.477652549743652, + "learning_rate": 3.0090671584447523e-05, + "log_odds_chosen": 10.639892578125, + "log_odds_ratio": -6.321460386971012e-05, + "logits/chosen": -0.5839773416519165, + "logits/rejected": -0.675246000289917, + "logps/chosen": -0.00020599793060682714, + "logps/rejected": -2.062002420425415, + "loss": 0.896, + "nll_loss": 0.22398124635219574, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0599793060682714e-05, + "rewards/margins": 0.2061796486377716, + "rewards/rejected": -0.2062002420425415, + "step": 6628 + }, + { + "epoch": 4.584370677731673, + "grad_norm": 9.168537139892578, + "learning_rate": 3.0086829568157372e-05, + "log_odds_chosen": 10.252494812011719, + "log_odds_ratio": -0.00018270703731104732, + "logits/chosen": -0.3839903771877289, + "logits/rejected": -0.43178194761276245, + "logps/chosen": -0.00047848522081039846, + "logps/rejected": -2.5032529830932617, + "loss": 0.9095, + "nll_loss": 0.22736404836177826, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.784852353623137e-05, + "rewards/margins": 0.25027742981910706, + "rewards/rejected": -0.2503252923488617, + "step": 6629 + }, + { + "epoch": 4.5850622406639, + "grad_norm": 6.157984733581543, + "learning_rate": 3.008298755186722e-05, + "log_odds_chosen": 10.422435760498047, + "log_odds_ratio": -0.00046115758595988154, + "logits/chosen": -0.4528391659259796, + "logits/rejected": -0.37236273288726807, + "logps/chosen": -0.0007764915935695171, + "logps/rejected": -2.314472198486328, + "loss": 0.6048, + "nll_loss": 0.15114331245422363, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.764915062580258e-05, + "rewards/margins": 0.23136958479881287, + "rewards/rejected": -0.2314472198486328, + "step": 6630 + }, + { + "epoch": 4.585753803596127, + "grad_norm": 8.872358322143555, + "learning_rate": 3.0079145535577074e-05, + "log_odds_chosen": 10.070481300354004, + "log_odds_ratio": -0.00012420877465046942, + "logits/chosen": -0.6568784117698669, + "logits/rejected": -0.7422617673873901, + "logps/chosen": -0.000309359427774325, + "logps/rejected": -1.989261507987976, + "loss": 1.4695, + "nll_loss": 0.367355614900589, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.093594204983674e-05, + "rewards/margins": 0.19889521598815918, + "rewards/rejected": -0.1989261358976364, + "step": 6631 + }, + { + "epoch": 4.586445366528354, + "grad_norm": 14.159653663635254, + "learning_rate": 3.0075303519286923e-05, + "log_odds_chosen": 9.802715301513672, + "log_odds_ratio": -0.00021838059183210135, + "logits/chosen": -0.6921088099479675, + "logits/rejected": -0.7150027751922607, + "logps/chosen": -0.0007769321673549712, + "logps/rejected": -1.8968806266784668, + "loss": 1.1374, + "nll_loss": 0.28433793783187866, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.769322110107169e-05, + "rewards/margins": 0.18961036205291748, + "rewards/rejected": -0.1896880567073822, + "step": 6632 + }, + { + "epoch": 4.587136929460581, + "grad_norm": 4.186121463775635, + "learning_rate": 3.0071461502996772e-05, + "log_odds_chosen": 9.516704559326172, + "log_odds_ratio": -0.0005803394014947116, + "logits/chosen": -0.5224634408950806, + "logits/rejected": -0.5306433439254761, + "logps/chosen": -0.0054843733087182045, + "logps/rejected": -1.9898731708526611, + "loss": 1.0931, + "nll_loss": 0.27322930097579956, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005484373541548848, + "rewards/margins": 0.1984388828277588, + "rewards/rejected": -0.19898732006549835, + "step": 6633 + }, + { + "epoch": 4.587828492392807, + "grad_norm": 9.91395092010498, + "learning_rate": 3.0067619486706628e-05, + "log_odds_chosen": 10.172961235046387, + "log_odds_ratio": -0.00016539839270990342, + "logits/chosen": -0.8813390135765076, + "logits/rejected": -0.9066269397735596, + "logps/chosen": -0.0005005454295314848, + "logps/rejected": -1.9067983627319336, + "loss": 0.9538, + "nll_loss": 0.23842957615852356, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0054542953148484e-05, + "rewards/margins": 0.190629780292511, + "rewards/rejected": -0.19067983329296112, + "step": 6634 + }, + { + "epoch": 4.588520055325034, + "grad_norm": 8.857604026794434, + "learning_rate": 3.0063777470416477e-05, + "log_odds_chosen": 10.181537628173828, + "log_odds_ratio": -0.00013955699978396297, + "logits/chosen": -0.8722149133682251, + "logits/rejected": -0.8384766578674316, + "logps/chosen": -0.0005608652718365192, + "logps/rejected": -1.980457067489624, + "loss": 1.0769, + "nll_loss": 0.2692164480686188, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6086522818077356e-05, + "rewards/margins": 0.19798964262008667, + "rewards/rejected": -0.19804571568965912, + "step": 6635 + }, + { + "epoch": 4.589211618257261, + "grad_norm": 9.029706954956055, + "learning_rate": 3.0059935454126326e-05, + "log_odds_chosen": 10.915395736694336, + "log_odds_ratio": -3.5561828553909436e-05, + "logits/chosen": -0.16081270575523376, + "logits/rejected": -0.23908120393753052, + "logps/chosen": -0.0010874989675357938, + "logps/rejected": -2.6500515937805176, + "loss": 1.1949, + "nll_loss": 0.29872551560401917, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010874988947762176, + "rewards/margins": 0.26489636301994324, + "rewards/rejected": -0.26500511169433594, + "step": 6636 + }, + { + "epoch": 4.589903181189488, + "grad_norm": 8.428266525268555, + "learning_rate": 3.005609343783618e-05, + "log_odds_chosen": 9.293031692504883, + "log_odds_ratio": -0.07677154242992401, + "logits/chosen": -0.26036933064460754, + "logits/rejected": -0.34439903497695923, + "logps/chosen": -0.013922506012022495, + "logps/rejected": -2.2738609313964844, + "loss": 1.1792, + "nll_loss": 0.2871167063713074, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013922505313530564, + "rewards/margins": 0.22599384188652039, + "rewards/rejected": -0.22738608717918396, + "step": 6637 + }, + { + "epoch": 4.590594744121715, + "grad_norm": 11.015109062194824, + "learning_rate": 3.005225142154603e-05, + "log_odds_chosen": 9.079282760620117, + "log_odds_ratio": -0.0012578824535012245, + "logits/chosen": -0.577182412147522, + "logits/rejected": -0.6306379437446594, + "logps/chosen": -0.0030781887471675873, + "logps/rejected": -1.887145757675171, + "loss": 0.8156, + "nll_loss": 0.20376461744308472, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00030781884561292827, + "rewards/margins": 0.18840676546096802, + "rewards/rejected": -0.18871459364891052, + "step": 6638 + }, + { + "epoch": 4.591286307053942, + "grad_norm": 7.341928005218506, + "learning_rate": 3.004840940525588e-05, + "log_odds_chosen": 9.25500202178955, + "log_odds_ratio": -0.00026959230308420956, + "logits/chosen": -0.505585253238678, + "logits/rejected": -0.5270559787750244, + "logps/chosen": -0.0005544669693335891, + "logps/rejected": -1.2817394733428955, + "loss": 1.031, + "nll_loss": 0.2577342092990875, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.54466932953801e-05, + "rewards/margins": 0.12811851501464844, + "rewards/rejected": -0.12817394733428955, + "step": 6639 + }, + { + "epoch": 4.591977869986168, + "grad_norm": 6.349297046661377, + "learning_rate": 3.0044567388965732e-05, + "log_odds_chosen": 9.84512710571289, + "log_odds_ratio": -0.0002761775394901633, + "logits/chosen": -0.41043686866760254, + "logits/rejected": -0.4271596074104309, + "logps/chosen": -0.0007267515175044537, + "logps/rejected": -1.9643175601959229, + "loss": 0.921, + "nll_loss": 0.23021897673606873, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.267515320563689e-05, + "rewards/margins": 0.1963590681552887, + "rewards/rejected": -0.1964317411184311, + "step": 6640 + }, + { + "epoch": 4.592669432918395, + "grad_norm": 9.569697380065918, + "learning_rate": 3.004072537267558e-05, + "log_odds_chosen": 11.393840789794922, + "log_odds_ratio": -2.0055922504980117e-05, + "logits/chosen": -0.5695845484733582, + "logits/rejected": -0.600030243396759, + "logps/chosen": -9.230027353623882e-05, + "logps/rejected": -2.1485538482666016, + "loss": 1.2374, + "nll_loss": 0.30935972929000854, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.230027899320703e-06, + "rewards/margins": 0.2148461490869522, + "rewards/rejected": -0.2148553729057312, + "step": 6641 + }, + { + "epoch": 4.593360995850622, + "grad_norm": 7.704564094543457, + "learning_rate": 3.003688335638543e-05, + "log_odds_chosen": 10.57839584350586, + "log_odds_ratio": -0.00010343089525122195, + "logits/chosen": -0.5169810652732849, + "logits/rejected": -0.6554980874061584, + "logps/chosen": -0.000415709859225899, + "logps/rejected": -2.6744332313537598, + "loss": 1.0457, + "nll_loss": 0.2614179253578186, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.157098737778142e-05, + "rewards/margins": 0.2674017548561096, + "rewards/rejected": -0.26744332909584045, + "step": 6642 + }, + { + "epoch": 4.594052558782849, + "grad_norm": 8.762954711914062, + "learning_rate": 3.0033041340095286e-05, + "log_odds_chosen": 10.157194137573242, + "log_odds_ratio": -0.0011035792995244265, + "logits/chosen": -0.33303841948509216, + "logits/rejected": -0.38343989849090576, + "logps/chosen": -0.0021737630013376474, + "logps/rejected": -1.5332502126693726, + "loss": 1.388, + "nll_loss": 0.3468846082687378, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002173762914026156, + "rewards/margins": 0.1531076431274414, + "rewards/rejected": -0.15332502126693726, + "step": 6643 + }, + { + "epoch": 4.594744121715076, + "grad_norm": 8.015052795410156, + "learning_rate": 3.0029199323805135e-05, + "log_odds_chosen": 9.884615898132324, + "log_odds_ratio": -0.00012229596904944628, + "logits/chosen": -0.5854188799858093, + "logits/rejected": -0.6728988885879517, + "logps/chosen": -0.001457500271499157, + "logps/rejected": -2.1083319187164307, + "loss": 1.3311, + "nll_loss": 0.33276960253715515, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001457500329706818, + "rewards/margins": 0.21068742871284485, + "rewards/rejected": -0.21083319187164307, + "step": 6644 + }, + { + "epoch": 4.595435684647303, + "grad_norm": 6.856884002685547, + "learning_rate": 3.0025357307514984e-05, + "log_odds_chosen": 9.551240921020508, + "log_odds_ratio": -0.00017050621681846678, + "logits/chosen": -0.8499241471290588, + "logits/rejected": -0.8596094250679016, + "logps/chosen": -0.002166606020182371, + "logps/rejected": -2.0629520416259766, + "loss": 1.1256, + "nll_loss": 0.2813722491264343, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00021666062821168453, + "rewards/margins": 0.20607852935791016, + "rewards/rejected": -0.2062952071428299, + "step": 6645 + }, + { + "epoch": 4.596127247579529, + "grad_norm": 8.078551292419434, + "learning_rate": 3.002151529122484e-05, + "log_odds_chosen": 10.179945945739746, + "log_odds_ratio": -8.246686775237322e-05, + "logits/chosen": -0.618579089641571, + "logits/rejected": -0.6546553373336792, + "logps/chosen": -0.0015918964054435492, + "logps/rejected": -2.2097814083099365, + "loss": 0.8485, + "nll_loss": 0.21211211383342743, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015918964345473796, + "rewards/margins": 0.22081895172595978, + "rewards/rejected": -0.22097815573215485, + "step": 6646 + }, + { + "epoch": 4.596818810511756, + "grad_norm": 8.41762924194336, + "learning_rate": 3.001767327493469e-05, + "log_odds_chosen": 9.121322631835938, + "log_odds_ratio": -0.0009582031634636223, + "logits/chosen": -0.46929413080215454, + "logits/rejected": -0.4976672828197479, + "logps/chosen": -0.0007144063129089773, + "logps/rejected": -1.3871290683746338, + "loss": 1.5951, + "nll_loss": 0.39868098497390747, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.144063420128077e-05, + "rewards/margins": 0.13864146173000336, + "rewards/rejected": -0.13871291279792786, + "step": 6647 + }, + { + "epoch": 4.597510373443983, + "grad_norm": 12.470500946044922, + "learning_rate": 3.0013831258644538e-05, + "log_odds_chosen": 11.022992134094238, + "log_odds_ratio": -2.5863595510600135e-05, + "logits/chosen": -0.43043211102485657, + "logits/rejected": -0.5416529774665833, + "logps/chosen": -0.00022176577476784587, + "logps/rejected": -2.37802791595459, + "loss": 1.1301, + "nll_loss": 0.2825261056423187, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2176576749188825e-05, + "rewards/margins": 0.2377806156873703, + "rewards/rejected": -0.23780277371406555, + "step": 6648 + }, + { + "epoch": 4.59820193637621, + "grad_norm": 11.435942649841309, + "learning_rate": 3.000998924235439e-05, + "log_odds_chosen": 10.821590423583984, + "log_odds_ratio": -2.8062824640073813e-05, + "logits/chosen": -0.7152904272079468, + "logits/rejected": -0.7026421427726746, + "logps/chosen": -0.000827385054435581, + "logps/rejected": -2.3652238845825195, + "loss": 0.9629, + "nll_loss": 0.2407165765762329, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.273850107798353e-05, + "rewards/margins": 0.23643964529037476, + "rewards/rejected": -0.2365223914384842, + "step": 6649 + }, + { + "epoch": 4.598893499308437, + "grad_norm": 9.441259384155273, + "learning_rate": 3.000614722606424e-05, + "log_odds_chosen": 9.505755424499512, + "log_odds_ratio": -0.0006548656383529305, + "logits/chosen": -0.29557374119758606, + "logits/rejected": -0.3252328634262085, + "logps/chosen": -0.001687394455075264, + "logps/rejected": -1.5947422981262207, + "loss": 1.0515, + "nll_loss": 0.26281145215034485, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001687394396867603, + "rewards/margins": 0.15930549800395966, + "rewards/rejected": -0.1594742387533188, + "step": 6650 + }, + { + "epoch": 4.5995850622406635, + "grad_norm": 6.911854267120361, + "learning_rate": 3.000230520977409e-05, + "log_odds_chosen": 9.908332824707031, + "log_odds_ratio": -0.0005658690934069455, + "logits/chosen": -0.3408907949924469, + "logits/rejected": -0.42232099175453186, + "logps/chosen": -0.00037419985164888203, + "logps/rejected": -1.9144506454467773, + "loss": 1.3619, + "nll_loss": 0.3404099941253662, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.741998807527125e-05, + "rewards/margins": 0.19140766561031342, + "rewards/rejected": -0.19144508242607117, + "step": 6651 + }, + { + "epoch": 4.60027662517289, + "grad_norm": 25.65146827697754, + "learning_rate": 2.9998463193483944e-05, + "log_odds_chosen": 10.697637557983398, + "log_odds_ratio": -6.30640861345455e-05, + "logits/chosen": -0.5637302398681641, + "logits/rejected": -0.6442798376083374, + "logps/chosen": -0.00011240709864068776, + "logps/rejected": -1.6637163162231445, + "loss": 0.9666, + "nll_loss": 0.24165429174900055, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1240710591664538e-05, + "rewards/margins": 0.16636039316654205, + "rewards/rejected": -0.16637162864208221, + "step": 6652 + }, + { + "epoch": 4.600968188105117, + "grad_norm": 9.794515609741211, + "learning_rate": 2.9994621177193794e-05, + "log_odds_chosen": 9.300960540771484, + "log_odds_ratio": -0.0009341944241896272, + "logits/chosen": -0.3178715109825134, + "logits/rejected": -0.4060957133769989, + "logps/chosen": -0.0024310587905347347, + "logps/rejected": -1.4401997327804565, + "loss": 0.8431, + "nll_loss": 0.2106890082359314, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002431058674119413, + "rewards/margins": 0.14377684891223907, + "rewards/rejected": -0.1440199613571167, + "step": 6653 + }, + { + "epoch": 4.601659751037344, + "grad_norm": 7.053846836090088, + "learning_rate": 2.9990779160903643e-05, + "log_odds_chosen": 8.772018432617188, + "log_odds_ratio": -0.0012893974781036377, + "logits/chosen": -0.3111286163330078, + "logits/rejected": -0.42209941148757935, + "logps/chosen": -0.0010461423080414534, + "logps/rejected": -1.2797952890396118, + "loss": 1.3091, + "nll_loss": 0.3271556794643402, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001046142351697199, + "rewards/margins": 0.1278749257326126, + "rewards/rejected": -0.12797953188419342, + "step": 6654 + }, + { + "epoch": 4.602351313969571, + "grad_norm": 5.7435407638549805, + "learning_rate": 2.99869371446135e-05, + "log_odds_chosen": 9.842205047607422, + "log_odds_ratio": -0.010721296072006226, + "logits/chosen": -0.5009051561355591, + "logits/rejected": -0.5647544264793396, + "logps/chosen": -0.0034427782520651817, + "logps/rejected": -2.2130255699157715, + "loss": 1.0543, + "nll_loss": 0.2624974548816681, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003442778252065182, + "rewards/margins": 0.22095826268196106, + "rewards/rejected": -0.22130253911018372, + "step": 6655 + }, + { + "epoch": 4.603042876901798, + "grad_norm": 7.0054779052734375, + "learning_rate": 2.9983095128323347e-05, + "log_odds_chosen": 8.576118469238281, + "log_odds_ratio": -0.0006403782172128558, + "logits/chosen": -0.6368761658668518, + "logits/rejected": -0.7257099151611328, + "logps/chosen": -0.0013416606234386563, + "logps/rejected": -1.0327801704406738, + "loss": 0.9233, + "nll_loss": 0.2307559698820114, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001341660536127165, + "rewards/margins": 0.10314386337995529, + "rewards/rejected": -0.1032780259847641, + "step": 6656 + }, + { + "epoch": 4.6037344398340245, + "grad_norm": 14.184807777404785, + "learning_rate": 2.9979253112033196e-05, + "log_odds_chosen": 8.543257713317871, + "log_odds_ratio": -0.002161895390599966, + "logits/chosen": -0.7052701711654663, + "logits/rejected": -0.6956675052642822, + "logps/chosen": -0.0020439941436052322, + "logps/rejected": -1.7696683406829834, + "loss": 1.2847, + "nll_loss": 0.3209618330001831, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00020439941727090627, + "rewards/margins": 0.17676246166229248, + "rewards/rejected": -0.1769668459892273, + "step": 6657 + }, + { + "epoch": 4.604426002766251, + "grad_norm": 15.939481735229492, + "learning_rate": 2.997541109574305e-05, + "log_odds_chosen": 10.017705917358398, + "log_odds_ratio": -0.00015626417007297277, + "logits/chosen": -1.0082440376281738, + "logits/rejected": -1.1075737476348877, + "logps/chosen": -0.0001465157838538289, + "logps/rejected": -1.5758423805236816, + "loss": 0.8355, + "nll_loss": 0.20884928107261658, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4651579476776533e-05, + "rewards/margins": 0.15756958723068237, + "rewards/rejected": -0.15758424997329712, + "step": 6658 + }, + { + "epoch": 4.605117565698478, + "grad_norm": 5.694345951080322, + "learning_rate": 2.9971569079452898e-05, + "log_odds_chosen": 9.860860824584961, + "log_odds_ratio": -9.92153218248859e-05, + "logits/chosen": -0.67734694480896, + "logits/rejected": -0.6845455169677734, + "logps/chosen": -0.0003241387603338808, + "logps/rejected": -1.8322769403457642, + "loss": 0.8143, + "nll_loss": 0.20356862246990204, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.241387821617536e-05, + "rewards/margins": 0.1831952929496765, + "rewards/rejected": -0.18322770297527313, + "step": 6659 + }, + { + "epoch": 4.605809128630705, + "grad_norm": 18.043245315551758, + "learning_rate": 2.9967727063162747e-05, + "log_odds_chosen": 9.604475021362305, + "log_odds_ratio": -0.09748678654432297, + "logits/chosen": -0.9602915644645691, + "logits/rejected": -1.0716558694839478, + "logps/chosen": -0.015980206429958344, + "logps/rejected": -1.7664905786514282, + "loss": 1.471, + "nll_loss": 0.3579968810081482, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0015980205498635769, + "rewards/margins": 0.17505104839801788, + "rewards/rejected": -0.1766490638256073, + "step": 6660 + }, + { + "epoch": 4.606500691562932, + "grad_norm": 14.086503028869629, + "learning_rate": 2.9963885046872603e-05, + "log_odds_chosen": 9.341814041137695, + "log_odds_ratio": -0.0008506190497428179, + "logits/chosen": -0.623991847038269, + "logits/rejected": -0.6753250360488892, + "logps/chosen": -0.10699453204870224, + "logps/rejected": -2.235908031463623, + "loss": 1.6336, + "nll_loss": 0.4083053469657898, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010699453763663769, + "rewards/margins": 0.2128913700580597, + "rewards/rejected": -0.22359079122543335, + "step": 6661 + }, + { + "epoch": 4.607192254495159, + "grad_norm": 13.7391996383667, + "learning_rate": 2.9960043030582452e-05, + "log_odds_chosen": 10.506267547607422, + "log_odds_ratio": -0.00021020628628320992, + "logits/chosen": -1.1174015998840332, + "logits/rejected": -1.1321393251419067, + "logps/chosen": -0.00016034118016250432, + "logps/rejected": -1.6753671169281006, + "loss": 1.1868, + "nll_loss": 0.2966703772544861, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.603411692485679e-05, + "rewards/margins": 0.1675206869840622, + "rewards/rejected": -0.16753672063350677, + "step": 6662 + }, + { + "epoch": 4.6078838174273855, + "grad_norm": 7.0717267990112305, + "learning_rate": 2.99562010142923e-05, + "log_odds_chosen": 10.244396209716797, + "log_odds_ratio": -0.0005225987988524139, + "logits/chosen": -0.3503478765487671, + "logits/rejected": -0.42929190397262573, + "logps/chosen": -0.0022827768698334694, + "logps/rejected": -2.1327946186065674, + "loss": 0.9079, + "nll_loss": 0.2269156128168106, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00022827771317679435, + "rewards/margins": 0.21305117011070251, + "rewards/rejected": -0.21327945590019226, + "step": 6663 + }, + { + "epoch": 4.608575380359612, + "grad_norm": 8.944998741149902, + "learning_rate": 2.9952358998002157e-05, + "log_odds_chosen": 10.742843627929688, + "log_odds_ratio": -6.899197614984587e-05, + "logits/chosen": -0.6813872456550598, + "logits/rejected": -0.7148157358169556, + "logps/chosen": -0.00014297892630565912, + "logps/rejected": -1.8107256889343262, + "loss": 0.9531, + "nll_loss": 0.23827821016311646, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4297892448666971e-05, + "rewards/margins": 0.18105828762054443, + "rewards/rejected": -0.18107259273529053, + "step": 6664 + }, + { + "epoch": 4.609266943291839, + "grad_norm": 7.837378978729248, + "learning_rate": 2.9948516981712006e-05, + "log_odds_chosen": 10.556222915649414, + "log_odds_ratio": -0.0005359348724596202, + "logits/chosen": -0.6345524787902832, + "logits/rejected": -0.6113138794898987, + "logps/chosen": -0.00039145839400589466, + "logps/rejected": -1.9433598518371582, + "loss": 1.4939, + "nll_loss": 0.37342697381973267, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.914583794539794e-05, + "rewards/margins": 0.19429683685302734, + "rewards/rejected": -0.1943359673023224, + "step": 6665 + }, + { + "epoch": 4.609958506224066, + "grad_norm": 7.503310203552246, + "learning_rate": 2.9944674965421855e-05, + "log_odds_chosen": 10.827353477478027, + "log_odds_ratio": -7.019040640443563e-05, + "logits/chosen": -0.9183793067932129, + "logits/rejected": -0.9392495155334473, + "logps/chosen": -0.00016982029774226248, + "logps/rejected": -1.765545129776001, + "loss": 0.7157, + "nll_loss": 0.17891749739646912, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.698203050182201e-05, + "rewards/margins": 0.17653754353523254, + "rewards/rejected": -0.17655451595783234, + "step": 6666 + }, + { + "epoch": 4.610650069156293, + "grad_norm": 7.25770378112793, + "learning_rate": 2.9940832949131704e-05, + "log_odds_chosen": 9.678262710571289, + "log_odds_ratio": -0.000425715115852654, + "logits/chosen": -0.500344455242157, + "logits/rejected": -0.5148862600326538, + "logps/chosen": -0.0018647005781531334, + "logps/rejected": -2.219193458557129, + "loss": 1.1087, + "nll_loss": 0.2771277129650116, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018647006072569638, + "rewards/margins": 0.22173286974430084, + "rewards/rejected": -0.22191934287548065, + "step": 6667 + }, + { + "epoch": 4.61134163208852, + "grad_norm": 10.05141830444336, + "learning_rate": 2.9936990932841556e-05, + "log_odds_chosen": 10.017276763916016, + "log_odds_ratio": -0.0006272240425460041, + "logits/chosen": -0.5370017290115356, + "logits/rejected": -0.625502347946167, + "logps/chosen": -0.020561806857585907, + "logps/rejected": -2.392383098602295, + "loss": 1.302, + "nll_loss": 0.3254457712173462, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020561807323247194, + "rewards/margins": 0.2371821403503418, + "rewards/rejected": -0.23923832178115845, + "step": 6668 + }, + { + "epoch": 4.6120331950207465, + "grad_norm": 8.749216079711914, + "learning_rate": 2.9933148916551405e-05, + "log_odds_chosen": 9.878754615783691, + "log_odds_ratio": -0.00017952104099094868, + "logits/chosen": -0.6044222116470337, + "logits/rejected": -0.6368842124938965, + "logps/chosen": -0.0011460966197773814, + "logps/rejected": -1.6823590993881226, + "loss": 0.9386, + "nll_loss": 0.2346310317516327, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011460966197773814, + "rewards/margins": 0.1681213080883026, + "rewards/rejected": -0.1682359278202057, + "step": 6669 + }, + { + "epoch": 4.612724757952973, + "grad_norm": 8.973983764648438, + "learning_rate": 2.9929306900261255e-05, + "log_odds_chosen": 10.453866958618164, + "log_odds_ratio": -0.00011220773740205914, + "logits/chosen": -0.29473161697387695, + "logits/rejected": -0.3772168755531311, + "logps/chosen": -0.0007200624095275998, + "logps/rejected": -1.8716492652893066, + "loss": 1.2924, + "nll_loss": 0.32308429479599, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.200624531833455e-05, + "rewards/margins": 0.18709293007850647, + "rewards/rejected": -0.18716493248939514, + "step": 6670 + }, + { + "epoch": 4.6134163208852, + "grad_norm": 11.71799373626709, + "learning_rate": 2.992546488397111e-05, + "log_odds_chosen": 11.238321304321289, + "log_odds_ratio": -1.8379301764070988e-05, + "logits/chosen": -0.7530157566070557, + "logits/rejected": -0.7985197901725769, + "logps/chosen": -7.886964158387855e-05, + "logps/rejected": -1.859510898590088, + "loss": 0.8885, + "nll_loss": 0.22211512923240662, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.886964340286795e-06, + "rewards/margins": 0.18594320118427277, + "rewards/rejected": -0.1859510838985443, + "step": 6671 + }, + { + "epoch": 4.614107883817427, + "grad_norm": 11.972702980041504, + "learning_rate": 2.992162286768096e-05, + "log_odds_chosen": 10.402179718017578, + "log_odds_ratio": -4.1663435695227236e-05, + "logits/chosen": -0.7485939860343933, + "logits/rejected": -0.8353657126426697, + "logps/chosen": -0.0001987726427614689, + "logps/rejected": -1.8271870613098145, + "loss": 0.9217, + "nll_loss": 0.23042625188827515, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.987726500374265e-05, + "rewards/margins": 0.1826988309621811, + "rewards/rejected": -0.18271872401237488, + "step": 6672 + }, + { + "epoch": 4.614799446749654, + "grad_norm": 6.361045837402344, + "learning_rate": 2.991778085139081e-05, + "log_odds_chosen": 9.737350463867188, + "log_odds_ratio": -0.0010446513770148158, + "logits/chosen": -0.6925402283668518, + "logits/rejected": -0.7096436023712158, + "logps/chosen": -0.0007525760447606444, + "logps/rejected": -1.0624769926071167, + "loss": 1.0312, + "nll_loss": 0.2577068507671356, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.52576015656814e-05, + "rewards/margins": 0.10617244243621826, + "rewards/rejected": -0.10624770075082779, + "step": 6673 + }, + { + "epoch": 4.615491009681881, + "grad_norm": 8.369453430175781, + "learning_rate": 2.9913938835100664e-05, + "log_odds_chosen": 9.764873504638672, + "log_odds_ratio": -0.0006577305030077696, + "logits/chosen": -0.2709742486476898, + "logits/rejected": -0.31986692547798157, + "logps/chosen": -0.001359713263809681, + "logps/rejected": -1.6538102626800537, + "loss": 0.9001, + "nll_loss": 0.22496314346790314, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000135971320560202, + "rewards/margins": 0.16524508595466614, + "rewards/rejected": -0.1653810441493988, + "step": 6674 + }, + { + "epoch": 4.6161825726141075, + "grad_norm": 9.031880378723145, + "learning_rate": 2.9910096818810513e-05, + "log_odds_chosen": 11.301053047180176, + "log_odds_ratio": -1.9644532585516572e-05, + "logits/chosen": -0.4663847088813782, + "logits/rejected": -0.5085259079933167, + "logps/chosen": -0.0001327487698290497, + "logps/rejected": -2.331967830657959, + "loss": 0.7171, + "nll_loss": 0.1792832463979721, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.327487643720815e-05, + "rewards/margins": 0.23318353295326233, + "rewards/rejected": -0.23319679498672485, + "step": 6675 + }, + { + "epoch": 4.616874135546334, + "grad_norm": 6.838178634643555, + "learning_rate": 2.9906254802520362e-05, + "log_odds_chosen": 10.975125312805176, + "log_odds_ratio": -0.0002458269300404936, + "logits/chosen": -0.6259713172912598, + "logits/rejected": -0.676510214805603, + "logps/chosen": -0.0003355609951540828, + "logps/rejected": -2.4494009017944336, + "loss": 0.725, + "nll_loss": 0.18123364448547363, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3556098060216755e-05, + "rewards/margins": 0.24490654468536377, + "rewards/rejected": -0.2449401170015335, + "step": 6676 + }, + { + "epoch": 4.617565698478561, + "grad_norm": 5.756682872772217, + "learning_rate": 2.9902412786230215e-05, + "log_odds_chosen": 10.777154922485352, + "log_odds_ratio": -7.546051347162575e-05, + "logits/chosen": -0.6829020380973816, + "logits/rejected": -0.7713437080383301, + "logps/chosen": -0.00017902077524922788, + "logps/rejected": -2.108368158340454, + "loss": 0.6858, + "nll_loss": 0.17143920063972473, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.790207898011431e-05, + "rewards/margins": 0.21081890165805817, + "rewards/rejected": -0.21083681285381317, + "step": 6677 + }, + { + "epoch": 4.618257261410788, + "grad_norm": 11.32691764831543, + "learning_rate": 2.9898570769940064e-05, + "log_odds_chosen": 10.667409896850586, + "log_odds_ratio": -2.6573019567877054e-05, + "logits/chosen": -0.674767255783081, + "logits/rejected": -0.7759418487548828, + "logps/chosen": -0.00016158992366399616, + "logps/rejected": -1.9589648246765137, + "loss": 1.1013, + "nll_loss": 0.27531570196151733, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6158992366399616e-05, + "rewards/margins": 0.19588032364845276, + "rewards/rejected": -0.1958964765071869, + "step": 6678 + }, + { + "epoch": 4.618948824343015, + "grad_norm": 13.166940689086914, + "learning_rate": 2.9894728753649913e-05, + "log_odds_chosen": 10.21392822265625, + "log_odds_ratio": -0.000317515863571316, + "logits/chosen": -0.8075710535049438, + "logits/rejected": -0.8033132553100586, + "logps/chosen": -0.003124582814052701, + "logps/rejected": -2.2208189964294434, + "loss": 1.0702, + "nll_loss": 0.2675139307975769, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003124582872260362, + "rewards/margins": 0.22176943719387054, + "rewards/rejected": -0.22208191454410553, + "step": 6679 + }, + { + "epoch": 4.619640387275242, + "grad_norm": 9.520648956298828, + "learning_rate": 2.989088673735977e-05, + "log_odds_chosen": 10.81861686706543, + "log_odds_ratio": -6.064638728275895e-05, + "logits/chosen": -0.5004931092262268, + "logits/rejected": -0.6433913707733154, + "logps/chosen": -0.0003089347155764699, + "logps/rejected": -2.1328179836273193, + "loss": 0.746, + "nll_loss": 0.1864846795797348, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.089347228524275e-05, + "rewards/margins": 0.21325090527534485, + "rewards/rejected": -0.2132818102836609, + "step": 6680 + }, + { + "epoch": 4.6203319502074685, + "grad_norm": 12.623364448547363, + "learning_rate": 2.9887044721069618e-05, + "log_odds_chosen": 8.48847770690918, + "log_odds_ratio": -0.19390060007572174, + "logits/chosen": -0.7172576189041138, + "logits/rejected": -0.793793261051178, + "logps/chosen": -0.027234375476837158, + "logps/rejected": -1.615464448928833, + "loss": 0.7798, + "nll_loss": 0.17555385828018188, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002723437501117587, + "rewards/margins": 0.15882301330566406, + "rewards/rejected": -0.16154645383358002, + "step": 6681 + }, + { + "epoch": 4.621023513139695, + "grad_norm": 16.15542221069336, + "learning_rate": 2.9883202704779467e-05, + "log_odds_chosen": 10.003597259521484, + "log_odds_ratio": -0.0015024865278974175, + "logits/chosen": -0.5204415917396545, + "logits/rejected": -0.625032901763916, + "logps/chosen": -0.0031819252762943506, + "logps/rejected": -2.1989731788635254, + "loss": 0.9172, + "nll_loss": 0.2291489839553833, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00031819252762943506, + "rewards/margins": 0.21957910060882568, + "rewards/rejected": -0.2198973149061203, + "step": 6682 + }, + { + "epoch": 4.621715076071922, + "grad_norm": 15.578567504882812, + "learning_rate": 2.9879360688489323e-05, + "log_odds_chosen": 11.986330032348633, + "log_odds_ratio": -2.9383349101408385e-05, + "logits/chosen": -0.6631225943565369, + "logits/rejected": -0.7706457376480103, + "logps/chosen": -0.0003626207762863487, + "logps/rejected": -3.311293125152588, + "loss": 1.02, + "nll_loss": 0.2550080120563507, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.626207762863487e-05, + "rewards/margins": 0.3310930132865906, + "rewards/rejected": -0.3311293125152588, + "step": 6683 + }, + { + "epoch": 4.622406639004149, + "grad_norm": 10.762539863586426, + "learning_rate": 2.9875518672199172e-05, + "log_odds_chosen": 9.830475807189941, + "log_odds_ratio": -0.0014509977772831917, + "logits/chosen": -0.36546534299850464, + "logits/rejected": -0.35679420828819275, + "logps/chosen": -0.002010797383263707, + "logps/rejected": -1.9924137592315674, + "loss": 0.9966, + "nll_loss": 0.2490108609199524, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00020107974705751985, + "rewards/margins": 0.19904029369354248, + "rewards/rejected": -0.19924138486385345, + "step": 6684 + }, + { + "epoch": 4.623098201936376, + "grad_norm": 8.75740909576416, + "learning_rate": 2.987167665590902e-05, + "log_odds_chosen": 9.379545211791992, + "log_odds_ratio": -0.0076197548769414425, + "logits/chosen": -0.4256450831890106, + "logits/rejected": -0.4568532109260559, + "logps/chosen": -0.0030377001967281103, + "logps/rejected": -1.3667492866516113, + "loss": 0.8778, + "nll_loss": 0.21868903934955597, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00030377000803127885, + "rewards/margins": 0.13637115061283112, + "rewards/rejected": -0.1366749256849289, + "step": 6685 + }, + { + "epoch": 4.623789764868603, + "grad_norm": 14.677732467651367, + "learning_rate": 2.9867834639618873e-05, + "log_odds_chosen": 10.845108032226562, + "log_odds_ratio": -0.001355968415737152, + "logits/chosen": -0.3229242265224457, + "logits/rejected": -0.4597609043121338, + "logps/chosen": -0.00671126926317811, + "logps/rejected": -2.8743770122528076, + "loss": 0.995, + "nll_loss": 0.2486252337694168, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006711269379593432, + "rewards/margins": 0.28676658868789673, + "rewards/rejected": -0.2874377369880676, + "step": 6686 + }, + { + "epoch": 4.624481327800829, + "grad_norm": 15.354605674743652, + "learning_rate": 2.9863992623328722e-05, + "log_odds_chosen": 9.914780616760254, + "log_odds_ratio": -0.0001268537453142926, + "logits/chosen": -0.42282068729400635, + "logits/rejected": -0.5017947554588318, + "logps/chosen": -0.0005255751311779022, + "logps/rejected": -1.6391267776489258, + "loss": 0.8269, + "nll_loss": 0.20670977234840393, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.255751602817327e-05, + "rewards/margins": 0.16386012732982635, + "rewards/rejected": -0.16391268372535706, + "step": 6687 + }, + { + "epoch": 4.625172890733056, + "grad_norm": 14.23159408569336, + "learning_rate": 2.986015060703857e-05, + "log_odds_chosen": 9.756415367126465, + "log_odds_ratio": -0.00016371147648897022, + "logits/chosen": -0.33216923475265503, + "logits/rejected": -0.42489707469940186, + "logps/chosen": -0.0005953738000243902, + "logps/rejected": -1.8340036869049072, + "loss": 0.952, + "nll_loss": 0.23797619342803955, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.95373785472475e-05, + "rewards/margins": 0.18334081768989563, + "rewards/rejected": -0.18340037763118744, + "step": 6688 + }, + { + "epoch": 4.625864453665283, + "grad_norm": 8.754727363586426, + "learning_rate": 2.9856308590748427e-05, + "log_odds_chosen": 10.663585662841797, + "log_odds_ratio": -0.00016630203754175454, + "logits/chosen": -0.46455100178718567, + "logits/rejected": -0.48470622301101685, + "logps/chosen": -0.00017484716954641044, + "logps/rejected": -1.9499876499176025, + "loss": 0.7495, + "nll_loss": 0.187362939119339, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7484717318438925e-05, + "rewards/margins": 0.19498127698898315, + "rewards/rejected": -0.19499877095222473, + "step": 6689 + }, + { + "epoch": 4.62655601659751, + "grad_norm": 16.4685115814209, + "learning_rate": 2.9852466574458276e-05, + "log_odds_chosen": 9.15806770324707, + "log_odds_ratio": -0.31617483496665955, + "logits/chosen": -0.44033369421958923, + "logits/rejected": -0.5191015601158142, + "logps/chosen": -0.040293145924806595, + "logps/rejected": -2.562631130218506, + "loss": 1.799, + "nll_loss": 0.418133020401001, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004029314499348402, + "rewards/margins": 0.2522338032722473, + "rewards/rejected": -0.2562631368637085, + "step": 6690 + }, + { + "epoch": 4.627247579529737, + "grad_norm": 13.75235366821289, + "learning_rate": 2.9848624558168125e-05, + "log_odds_chosen": 10.918336868286133, + "log_odds_ratio": -0.00023003183014225215, + "logits/chosen": -0.15504327416419983, + "logits/rejected": -0.15456047654151917, + "logps/chosen": -0.001960280817002058, + "logps/rejected": -2.830104351043701, + "loss": 1.0103, + "nll_loss": 0.25255751609802246, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019602806423790753, + "rewards/margins": 0.2828144133090973, + "rewards/rejected": -0.28301042318344116, + "step": 6691 + }, + { + "epoch": 4.627939142461964, + "grad_norm": 6.367249965667725, + "learning_rate": 2.984478254187798e-05, + "log_odds_chosen": 10.774747848510742, + "log_odds_ratio": -0.00012701345258392394, + "logits/chosen": -0.5788582563400269, + "logits/rejected": -0.6444706916809082, + "logps/chosen": -0.00046742905396968126, + "logps/rejected": -2.019412040710449, + "loss": 1.008, + "nll_loss": 0.25199609994888306, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6742905396968126e-05, + "rewards/margins": 0.20189446210861206, + "rewards/rejected": -0.20194122195243835, + "step": 6692 + }, + { + "epoch": 4.62863070539419, + "grad_norm": 11.876404762268066, + "learning_rate": 2.984094052558783e-05, + "log_odds_chosen": 9.97553539276123, + "log_odds_ratio": -0.0002181089366786182, + "logits/chosen": 0.052924394607543945, + "logits/rejected": -0.01140899583697319, + "logps/chosen": -0.0006699280929751694, + "logps/rejected": -2.2521538734436035, + "loss": 1.353, + "nll_loss": 0.33823224902153015, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.699281220789999e-05, + "rewards/margins": 0.22514837980270386, + "rewards/rejected": -0.2252153903245926, + "step": 6693 + }, + { + "epoch": 4.629322268326417, + "grad_norm": 8.747437477111816, + "learning_rate": 2.983709850929768e-05, + "log_odds_chosen": 9.873016357421875, + "log_odds_ratio": -0.0003475734847597778, + "logits/chosen": -0.8836798071861267, + "logits/rejected": -0.9364238977432251, + "logps/chosen": -0.0031481364276260138, + "logps/rejected": -2.0192229747772217, + "loss": 1.5869, + "nll_loss": 0.39668264985084534, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003148136311210692, + "rewards/margins": 0.20160748064517975, + "rewards/rejected": -0.20192229747772217, + "step": 6694 + }, + { + "epoch": 4.630013831258644, + "grad_norm": 14.169708251953125, + "learning_rate": 2.9833256493007532e-05, + "log_odds_chosen": 10.764555931091309, + "log_odds_ratio": -2.6315743525628932e-05, + "logits/chosen": -0.5093426704406738, + "logits/rejected": -0.5909894108772278, + "logps/chosen": -0.0002890737378038466, + "logps/rejected": -2.246802806854248, + "loss": 0.9648, + "nll_loss": 0.24118672311306, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.890737414418254e-05, + "rewards/margins": 0.22465135157108307, + "rewards/rejected": -0.22468025982379913, + "step": 6695 + }, + { + "epoch": 4.630705394190871, + "grad_norm": 10.401350975036621, + "learning_rate": 2.982941447671738e-05, + "log_odds_chosen": 8.530662536621094, + "log_odds_ratio": -0.04185483232140541, + "logits/chosen": -0.09511050581932068, + "logits/rejected": -0.1275017112493515, + "logps/chosen": -0.013998882845044136, + "logps/rejected": -1.9299473762512207, + "loss": 0.9557, + "nll_loss": 0.23473045229911804, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013998884242027998, + "rewards/margins": 0.19159486889839172, + "rewards/rejected": -0.19299474358558655, + "step": 6696 + }, + { + "epoch": 4.631396957123098, + "grad_norm": 12.37734603881836, + "learning_rate": 2.982557246042723e-05, + "log_odds_chosen": 9.830785751342773, + "log_odds_ratio": -0.0006041385349817574, + "logits/chosen": -0.5130643844604492, + "logits/rejected": -0.6958015561103821, + "logps/chosen": -0.0009830754715949297, + "logps/rejected": -1.7544587850570679, + "loss": 1.1146, + "nll_loss": 0.27858293056488037, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.830755152506754e-05, + "rewards/margins": 0.17534756660461426, + "rewards/rejected": -0.17544588446617126, + "step": 6697 + }, + { + "epoch": 4.632088520055325, + "grad_norm": 14.123680114746094, + "learning_rate": 2.9821730444137086e-05, + "log_odds_chosen": 10.47813606262207, + "log_odds_ratio": -5.120155037730001e-05, + "logits/chosen": -0.6141031980514526, + "logits/rejected": -0.6569243669509888, + "logps/chosen": -0.00016339441935997456, + "logps/rejected": -1.7974165678024292, + "loss": 1.1983, + "nll_loss": 0.29958146810531616, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6339441572199576e-05, + "rewards/margins": 0.17972531914710999, + "rewards/rejected": -0.17974165081977844, + "step": 6698 + }, + { + "epoch": 4.632780082987551, + "grad_norm": 9.06856918334961, + "learning_rate": 2.9817888427846935e-05, + "log_odds_chosen": 10.46054458618164, + "log_odds_ratio": -7.147344149416313e-05, + "logits/chosen": -0.7953734397888184, + "logits/rejected": -0.850338339805603, + "logps/chosen": -0.0003077928558923304, + "logps/rejected": -1.904348611831665, + "loss": 1.7438, + "nll_loss": 0.43594974279403687, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0779283406445757e-05, + "rewards/margins": 0.19040407240390778, + "rewards/rejected": -0.19043487310409546, + "step": 6699 + }, + { + "epoch": 4.633471645919778, + "grad_norm": 8.853693008422852, + "learning_rate": 2.9814046411556784e-05, + "log_odds_chosen": 9.68885326385498, + "log_odds_ratio": -0.0001513104361947626, + "logits/chosen": -0.6373350620269775, + "logits/rejected": -0.6955586671829224, + "logps/chosen": -0.008956330828368664, + "logps/rejected": -1.9807580709457397, + "loss": 1.4056, + "nll_loss": 0.35137683153152466, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008956331294029951, + "rewards/margins": 0.19718018174171448, + "rewards/rejected": -0.1980758011341095, + "step": 6700 + }, + { + "epoch": 4.634163208852005, + "grad_norm": 6.464890480041504, + "learning_rate": 2.981020439526664e-05, + "log_odds_chosen": 9.772090911865234, + "log_odds_ratio": -0.000506377371493727, + "logits/chosen": -0.8277691602706909, + "logits/rejected": -0.8677041530609131, + "logps/chosen": -0.007026453502476215, + "logps/rejected": -1.6709896326065063, + "loss": 0.7766, + "nll_loss": 0.19409973919391632, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007026453968137503, + "rewards/margins": 0.16639631986618042, + "rewards/rejected": -0.1670989692211151, + "step": 6701 + }, + { + "epoch": 4.634854771784232, + "grad_norm": 7.539913654327393, + "learning_rate": 2.980636237897649e-05, + "log_odds_chosen": 10.478494644165039, + "log_odds_ratio": -0.0006916585261933506, + "logits/chosen": -1.0056345462799072, + "logits/rejected": -0.9335618019104004, + "logps/chosen": -0.0024261826183646917, + "logps/rejected": -2.136960029602051, + "loss": 1.0764, + "nll_loss": 0.26902419328689575, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00024261824728455395, + "rewards/margins": 0.21345339715480804, + "rewards/rejected": -0.21369600296020508, + "step": 6702 + }, + { + "epoch": 4.635546334716459, + "grad_norm": 12.231609344482422, + "learning_rate": 2.9802520362686338e-05, + "log_odds_chosen": 10.67332649230957, + "log_odds_ratio": -3.185367677360773e-05, + "logits/chosen": -0.632622241973877, + "logits/rejected": -0.6896726489067078, + "logps/chosen": -0.00011290707334410399, + "logps/rejected": -1.5393511056900024, + "loss": 1.2231, + "nll_loss": 0.30576059222221375, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1290708243905101e-05, + "rewards/margins": 0.15392382442951202, + "rewards/rejected": -0.15393511950969696, + "step": 6703 + }, + { + "epoch": 4.6362378976486855, + "grad_norm": 9.596137046813965, + "learning_rate": 2.979867834639619e-05, + "log_odds_chosen": 10.572694778442383, + "log_odds_ratio": -9.505627531325445e-05, + "logits/chosen": -0.7113038897514343, + "logits/rejected": -0.7726638317108154, + "logps/chosen": -0.0003162118955515325, + "logps/rejected": -2.251645088195801, + "loss": 1.2044, + "nll_loss": 0.30109548568725586, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.162118809996173e-05, + "rewards/margins": 0.22513288259506226, + "rewards/rejected": -0.2251645028591156, + "step": 6704 + }, + { + "epoch": 4.636929460580912, + "grad_norm": 7.570445537567139, + "learning_rate": 2.979483633010604e-05, + "log_odds_chosen": 10.802045822143555, + "log_odds_ratio": -4.337707287049852e-05, + "logits/chosen": -0.5815101861953735, + "logits/rejected": -0.5723499059677124, + "logps/chosen": -0.00032818655017763376, + "logps/rejected": -2.4690756797790527, + "loss": 1.1733, + "nll_loss": 0.29332929849624634, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2818654290167615e-05, + "rewards/margins": 0.24687474966049194, + "rewards/rejected": -0.2469075620174408, + "step": 6705 + }, + { + "epoch": 4.637621023513139, + "grad_norm": 11.220012664794922, + "learning_rate": 2.9790994313815888e-05, + "log_odds_chosen": 10.171663284301758, + "log_odds_ratio": -0.00024045373720582575, + "logits/chosen": -0.6686966419219971, + "logits/rejected": -0.7283098697662354, + "logps/chosen": -0.00045101437717676163, + "logps/rejected": -1.970219612121582, + "loss": 1.1927, + "nll_loss": 0.29814326763153076, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5101438445271924e-05, + "rewards/margins": 0.19697685539722443, + "rewards/rejected": -0.1970219612121582, + "step": 6706 + }, + { + "epoch": 4.638312586445366, + "grad_norm": 10.200971603393555, + "learning_rate": 2.9787152297525744e-05, + "log_odds_chosen": 9.55989933013916, + "log_odds_ratio": -0.001099316868931055, + "logits/chosen": -0.6456737518310547, + "logits/rejected": -0.6532997488975525, + "logps/chosen": -0.0012371373595669866, + "logps/rejected": -2.002163887023926, + "loss": 1.0476, + "nll_loss": 0.26179665327072144, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001237137330463156, + "rewards/margins": 0.20009265840053558, + "rewards/rejected": -0.2002163827419281, + "step": 6707 + }, + { + "epoch": 4.639004149377593, + "grad_norm": 7.387045860290527, + "learning_rate": 2.9783310281235593e-05, + "log_odds_chosen": 9.868995666503906, + "log_odds_ratio": -0.00022995221661403775, + "logits/chosen": -0.5058600306510925, + "logits/rejected": -0.5563660264015198, + "logps/chosen": -0.0004528115096036345, + "logps/rejected": -2.0728092193603516, + "loss": 0.7037, + "nll_loss": 0.17589400708675385, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.528115096036345e-05, + "rewards/margins": 0.2072356641292572, + "rewards/rejected": -0.2072809487581253, + "step": 6708 + }, + { + "epoch": 4.63969571230982, + "grad_norm": 6.884265899658203, + "learning_rate": 2.9779468264945442e-05, + "log_odds_chosen": 9.497474670410156, + "log_odds_ratio": -0.006152651272714138, + "logits/chosen": -0.453260600566864, + "logits/rejected": -0.4585949778556824, + "logps/chosen": -0.015122218057513237, + "logps/rejected": -2.5086190700531006, + "loss": 1.8145, + "nll_loss": 0.4530009627342224, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001512221759185195, + "rewards/margins": 0.2493496686220169, + "rewards/rejected": -0.25086188316345215, + "step": 6709 + }, + { + "epoch": 4.6403872752420465, + "grad_norm": 5.996611595153809, + "learning_rate": 2.9775626248655298e-05, + "log_odds_chosen": 9.754794120788574, + "log_odds_ratio": -0.0006676408229395747, + "logits/chosen": -0.34979021549224854, + "logits/rejected": -0.3468340039253235, + "logps/chosen": -0.0010450142435729504, + "logps/rejected": -2.0860064029693604, + "loss": 1.5725, + "nll_loss": 0.39306214451789856, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010450142144691199, + "rewards/margins": 0.20849615335464478, + "rewards/rejected": -0.20860064029693604, + "step": 6710 + }, + { + "epoch": 4.641078838174274, + "grad_norm": 15.907814025878906, + "learning_rate": 2.9771784232365147e-05, + "log_odds_chosen": 10.96168327331543, + "log_odds_ratio": -6.310870230663568e-05, + "logits/chosen": -0.27718105912208557, + "logits/rejected": -0.4333638846874237, + "logps/chosen": -0.00034265409340150654, + "logps/rejected": -2.578684091567993, + "loss": 0.8577, + "nll_loss": 0.21442833542823792, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.426541297812946e-05, + "rewards/margins": 0.25783413648605347, + "rewards/rejected": -0.2578684091567993, + "step": 6711 + }, + { + "epoch": 4.641770401106501, + "grad_norm": 6.910033226013184, + "learning_rate": 2.9767942216074996e-05, + "log_odds_chosen": 10.08421516418457, + "log_odds_ratio": -0.00010271971405018121, + "logits/chosen": -0.7868772745132446, + "logits/rejected": -0.9348423480987549, + "logps/chosen": -0.00020229278015904129, + "logps/rejected": -1.6391842365264893, + "loss": 0.7629, + "nll_loss": 0.19071084260940552, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0229277652106248e-05, + "rewards/margins": 0.16389819979667664, + "rewards/rejected": -0.1639184206724167, + "step": 6712 + }, + { + "epoch": 4.642461964038728, + "grad_norm": 10.08545970916748, + "learning_rate": 2.976410019978485e-05, + "log_odds_chosen": 11.719478607177734, + "log_odds_ratio": -2.821564339683391e-05, + "logits/chosen": -0.612644612789154, + "logits/rejected": -0.666480541229248, + "logps/chosen": -0.00043208003626205027, + "logps/rejected": -3.3896830081939697, + "loss": 1.0204, + "nll_loss": 0.25510790944099426, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.320800144341774e-05, + "rewards/margins": 0.3389251232147217, + "rewards/rejected": -0.33896830677986145, + "step": 6713 + }, + { + "epoch": 4.643153526970955, + "grad_norm": 8.085092544555664, + "learning_rate": 2.9760258183494698e-05, + "log_odds_chosen": 9.24812126159668, + "log_odds_ratio": -0.00013786503404844552, + "logits/chosen": -0.44048011302948, + "logits/rejected": -0.4726550877094269, + "logps/chosen": -0.0008194476831704378, + "logps/rejected": -1.8271818161010742, + "loss": 1.387, + "nll_loss": 0.34672486782073975, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.194477413780987e-05, + "rewards/margins": 0.18263621628284454, + "rewards/rejected": -0.1827181577682495, + "step": 6714 + }, + { + "epoch": 4.643845089903182, + "grad_norm": 15.791669845581055, + "learning_rate": 2.9756416167204547e-05, + "log_odds_chosen": 10.043708801269531, + "log_odds_ratio": -0.00042942730942741036, + "logits/chosen": -0.10093079507350922, + "logits/rejected": -0.1990877091884613, + "logps/chosen": -0.0007182598346844316, + "logps/rejected": -1.9333560466766357, + "loss": 0.9672, + "nll_loss": 0.24174702167510986, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.182598346844316e-05, + "rewards/margins": 0.19326378405094147, + "rewards/rejected": -0.1933356076478958, + "step": 6715 + }, + { + "epoch": 4.644536652835408, + "grad_norm": 20.379234313964844, + "learning_rate": 2.9752574150914403e-05, + "log_odds_chosen": 10.522616386413574, + "log_odds_ratio": -0.0002614251570776105, + "logits/chosen": -0.4869431257247925, + "logits/rejected": -0.6128153800964355, + "logps/chosen": -0.00046562464558519423, + "logps/rejected": -2.1855411529541016, + "loss": 0.9109, + "nll_loss": 0.2276930809020996, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6562465286115184e-05, + "rewards/margins": 0.2185075581073761, + "rewards/rejected": -0.21855413913726807, + "step": 6716 + }, + { + "epoch": 4.645228215767635, + "grad_norm": 8.88293743133545, + "learning_rate": 2.974873213462425e-05, + "log_odds_chosen": 10.745494842529297, + "log_odds_ratio": -0.0006010960787534714, + "logits/chosen": -0.540441632270813, + "logits/rejected": -0.6040372252464294, + "logps/chosen": -0.0009351319749839604, + "logps/rejected": -1.978389859199524, + "loss": 0.9779, + "nll_loss": 0.24442104995250702, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.351320477435365e-05, + "rewards/margins": 0.19774548709392548, + "rewards/rejected": -0.19783899188041687, + "step": 6717 + }, + { + "epoch": 4.645919778699862, + "grad_norm": 9.662896156311035, + "learning_rate": 2.97448901183341e-05, + "log_odds_chosen": 10.538196563720703, + "log_odds_ratio": -0.03499438986182213, + "logits/chosen": -0.39672914147377014, + "logits/rejected": -0.4780488908290863, + "logps/chosen": -0.007500995881855488, + "logps/rejected": -3.2057886123657227, + "loss": 1.1877, + "nll_loss": 0.29342931509017944, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000750099599827081, + "rewards/margins": 0.3198287785053253, + "rewards/rejected": -0.3205788731575012, + "step": 6718 + }, + { + "epoch": 4.646611341632089, + "grad_norm": 12.759986877441406, + "learning_rate": 2.9741048102043957e-05, + "log_odds_chosen": 9.300508499145508, + "log_odds_ratio": -0.0013926469255238771, + "logits/chosen": -0.0835866630077362, + "logits/rejected": -0.16006775200366974, + "logps/chosen": -0.002518139313906431, + "logps/rejected": -2.1676414012908936, + "loss": 1.5449, + "nll_loss": 0.3860914707183838, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00025181396631523967, + "rewards/margins": 0.21651235222816467, + "rewards/rejected": -0.2167641669511795, + "step": 6719 + }, + { + "epoch": 4.647302904564316, + "grad_norm": 14.2500581741333, + "learning_rate": 2.9737206085753806e-05, + "log_odds_chosen": 11.568772315979004, + "log_odds_ratio": -1.765798151609488e-05, + "logits/chosen": -0.4689595401287079, + "logits/rejected": -0.598329484462738, + "logps/chosen": -0.00032325286883860826, + "logps/rejected": -3.025010585784912, + "loss": 0.9417, + "nll_loss": 0.23542800545692444, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.232528979424387e-05, + "rewards/margins": 0.30246874690055847, + "rewards/rejected": -0.30250105261802673, + "step": 6720 + }, + { + "epoch": 4.6479944674965425, + "grad_norm": 6.891107082366943, + "learning_rate": 2.9733364069463655e-05, + "log_odds_chosen": 10.009754180908203, + "log_odds_ratio": -0.00011356735194567591, + "logits/chosen": -1.004388689994812, + "logits/rejected": -1.014186143875122, + "logps/chosen": -0.0022962328512221575, + "logps/rejected": -2.5089192390441895, + "loss": 1.0871, + "nll_loss": 0.2717720866203308, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002296233142260462, + "rewards/margins": 0.25066232681274414, + "rewards/rejected": -0.25089192390441895, + "step": 6721 + }, + { + "epoch": 4.648686030428769, + "grad_norm": 15.071685791015625, + "learning_rate": 2.9729522053173507e-05, + "log_odds_chosen": 10.099405288696289, + "log_odds_ratio": -0.00013610887981485575, + "logits/chosen": -0.996317982673645, + "logits/rejected": -1.0054402351379395, + "logps/chosen": -0.00043816506513394415, + "logps/rejected": -1.9022955894470215, + "loss": 1.911, + "nll_loss": 0.47772806882858276, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.381650796858594e-05, + "rewards/margins": 0.19018574059009552, + "rewards/rejected": -0.19022956490516663, + "step": 6722 + }, + { + "epoch": 4.649377593360996, + "grad_norm": 10.62243938446045, + "learning_rate": 2.9725680036883356e-05, + "log_odds_chosen": 10.941905975341797, + "log_odds_ratio": -3.1653813493903726e-05, + "logits/chosen": -0.5696731209754944, + "logits/rejected": -0.5938451886177063, + "logps/chosen": -0.00010829799430212006, + "logps/rejected": -1.8097453117370605, + "loss": 0.9972, + "nll_loss": 0.24928641319274902, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0829799066414125e-05, + "rewards/margins": 0.1809636950492859, + "rewards/rejected": -0.180974543094635, + "step": 6723 + }, + { + "epoch": 4.650069156293223, + "grad_norm": 6.662535667419434, + "learning_rate": 2.9721838020593205e-05, + "log_odds_chosen": 9.740900993347168, + "log_odds_ratio": -0.00012547167716547847, + "logits/chosen": -0.6915889978408813, + "logits/rejected": -0.6611911058425903, + "logps/chosen": -0.000395122857298702, + "logps/rejected": -1.763127088546753, + "loss": 1.0219, + "nll_loss": 0.2554568648338318, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9512287912657484e-05, + "rewards/margins": 0.1762731969356537, + "rewards/rejected": -0.17631271481513977, + "step": 6724 + }, + { + "epoch": 4.65076071922545, + "grad_norm": 8.516170501708984, + "learning_rate": 2.971799600430306e-05, + "log_odds_chosen": 9.447857856750488, + "log_odds_ratio": -0.0008436216157861054, + "logits/chosen": -0.9329970479011536, + "logits/rejected": -0.9244383573532104, + "logps/chosen": -0.002910643583163619, + "logps/rejected": -2.0838112831115723, + "loss": 1.6259, + "nll_loss": 0.40639549493789673, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002910643524955958, + "rewards/margins": 0.20809006690979004, + "rewards/rejected": -0.20838113129138947, + "step": 6725 + }, + { + "epoch": 4.651452282157677, + "grad_norm": 7.389032363891602, + "learning_rate": 2.971415398801291e-05, + "log_odds_chosen": 10.360387802124023, + "log_odds_ratio": -4.4586155127035454e-05, + "logits/chosen": -0.48345616459846497, + "logits/rejected": -0.48519355058670044, + "logps/chosen": -0.0008797052432782948, + "logps/rejected": -2.5228095054626465, + "loss": 0.9653, + "nll_loss": 0.2413209080696106, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.7970525783021e-05, + "rewards/margins": 0.25219297409057617, + "rewards/rejected": -0.25228095054626465, + "step": 6726 + }, + { + "epoch": 4.6521438450899035, + "grad_norm": 10.816040992736816, + "learning_rate": 2.971031197172276e-05, + "log_odds_chosen": 11.398842811584473, + "log_odds_ratio": -1.864444675447885e-05, + "logits/chosen": -0.472339928150177, + "logits/rejected": -0.501425564289093, + "logps/chosen": -0.00012852560030296445, + "logps/rejected": -2.451239585876465, + "loss": 0.8833, + "nll_loss": 0.22082111239433289, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2852560757892206e-05, + "rewards/margins": 0.2451111227273941, + "rewards/rejected": -0.2451239675283432, + "step": 6727 + }, + { + "epoch": 4.65283540802213, + "grad_norm": 9.398346900939941, + "learning_rate": 2.9706469955432615e-05, + "log_odds_chosen": 9.201940536499023, + "log_odds_ratio": -0.0012506047496572137, + "logits/chosen": -0.456091046333313, + "logits/rejected": -0.5010344982147217, + "logps/chosen": -0.018052559345960617, + "logps/rejected": -2.53879451751709, + "loss": 1.0523, + "nll_loss": 0.26295000314712524, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018052560044452548, + "rewards/margins": 0.2520741820335388, + "rewards/rejected": -0.2538794279098511, + "step": 6728 + }, + { + "epoch": 4.653526970954357, + "grad_norm": 9.349139213562012, + "learning_rate": 2.9702627939142464e-05, + "log_odds_chosen": 8.978045463562012, + "log_odds_ratio": -0.0026159649714827538, + "logits/chosen": -0.6020793914794922, + "logits/rejected": -0.6840202808380127, + "logps/chosen": -0.044595979154109955, + "logps/rejected": -2.194462776184082, + "loss": 1.3426, + "nll_loss": 0.3353860378265381, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004459597636014223, + "rewards/margins": 0.2149866819381714, + "rewards/rejected": -0.2194463014602661, + "step": 6729 + }, + { + "epoch": 4.654218533886584, + "grad_norm": 6.017029285430908, + "learning_rate": 2.9698785922852313e-05, + "log_odds_chosen": 10.089128494262695, + "log_odds_ratio": -0.00041553491610102355, + "logits/chosen": -0.7684400081634521, + "logits/rejected": -0.7028689384460449, + "logps/chosen": -0.00038414757000282407, + "logps/rejected": -1.840576171875, + "loss": 0.5227, + "nll_loss": 0.13064205646514893, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.841475700028241e-05, + "rewards/margins": 0.18401920795440674, + "rewards/rejected": -0.18405762314796448, + "step": 6730 + }, + { + "epoch": 4.654910096818811, + "grad_norm": 22.103477478027344, + "learning_rate": 2.9694943906562165e-05, + "log_odds_chosen": 9.41515827178955, + "log_odds_ratio": -0.23566730320453644, + "logits/chosen": -0.9601256251335144, + "logits/rejected": -1.0031406879425049, + "logps/chosen": -0.037202395498752594, + "logps/rejected": -2.241128444671631, + "loss": 1.5142, + "nll_loss": 0.3549814224243164, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003720239968970418, + "rewards/margins": 0.2203925997018814, + "rewards/rejected": -0.2241128385066986, + "step": 6731 + }, + { + "epoch": 4.655601659751038, + "grad_norm": 8.17127799987793, + "learning_rate": 2.9691101890272015e-05, + "log_odds_chosen": 9.61920166015625, + "log_odds_ratio": -0.0006140960031189024, + "logits/chosen": -0.8896629810333252, + "logits/rejected": -0.9282146692276001, + "logps/chosen": -0.0010857881279662251, + "logps/rejected": -1.5764262676239014, + "loss": 0.6273, + "nll_loss": 0.1567581444978714, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010857881716219708, + "rewards/margins": 0.15753406286239624, + "rewards/rejected": -0.1576426476240158, + "step": 6732 + }, + { + "epoch": 4.6562932226832645, + "grad_norm": 6.767904281616211, + "learning_rate": 2.9687259873981864e-05, + "log_odds_chosen": 11.021403312683105, + "log_odds_ratio": -7.743245805613697e-05, + "logits/chosen": -0.7251055836677551, + "logits/rejected": -0.6987432837486267, + "logps/chosen": -0.00013259478146210313, + "logps/rejected": -1.8017305135726929, + "loss": 0.5248, + "nll_loss": 0.13118384778499603, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3259479601401836e-05, + "rewards/margins": 0.1801597774028778, + "rewards/rejected": -0.18017305433750153, + "step": 6733 + }, + { + "epoch": 4.656984785615491, + "grad_norm": 14.025473594665527, + "learning_rate": 2.968341785769172e-05, + "log_odds_chosen": 9.692667007446289, + "log_odds_ratio": -0.005310252774506807, + "logits/chosen": -0.8718782067298889, + "logits/rejected": -0.9011925458908081, + "logps/chosen": -0.00242875749245286, + "logps/rejected": -1.8067294359207153, + "loss": 1.2001, + "nll_loss": 0.2994995713233948, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002428757434245199, + "rewards/margins": 0.180430069565773, + "rewards/rejected": -0.18067294359207153, + "step": 6734 + }, + { + "epoch": 4.657676348547718, + "grad_norm": 10.400674819946289, + "learning_rate": 2.967957584140157e-05, + "log_odds_chosen": 10.835912704467773, + "log_odds_ratio": -0.00011656482820399106, + "logits/chosen": -0.751828134059906, + "logits/rejected": -0.7273005247116089, + "logps/chosen": -0.00023792957654222846, + "logps/rejected": -2.1163973808288574, + "loss": 1.4838, + "nll_loss": 0.3709476590156555, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3792958018020727e-05, + "rewards/margins": 0.21161596477031708, + "rewards/rejected": -0.21163977682590485, + "step": 6735 + }, + { + "epoch": 4.658367911479945, + "grad_norm": 9.04215145111084, + "learning_rate": 2.9675733825111418e-05, + "log_odds_chosen": 10.640592575073242, + "log_odds_ratio": -6.056567144696601e-05, + "logits/chosen": -0.47394564747810364, + "logits/rejected": -0.48224952816963196, + "logps/chosen": -0.00046629508142359555, + "logps/rejected": -2.1928296089172363, + "loss": 0.8759, + "nll_loss": 0.21896211802959442, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.662950959755108e-05, + "rewards/margins": 0.2192363440990448, + "rewards/rejected": -0.21928296983242035, + "step": 6736 + }, + { + "epoch": 4.659059474412172, + "grad_norm": 10.252370834350586, + "learning_rate": 2.9671891808821273e-05, + "log_odds_chosen": 9.582907676696777, + "log_odds_ratio": -0.0002878558880183846, + "logits/chosen": -0.41817861795425415, + "logits/rejected": -0.48817765712738037, + "logps/chosen": -0.013663535937666893, + "logps/rejected": -2.3412153720855713, + "loss": 1.0591, + "nll_loss": 0.2647481858730316, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013663534773513675, + "rewards/margins": 0.23275518417358398, + "rewards/rejected": -0.23412156105041504, + "step": 6737 + }, + { + "epoch": 4.659751037344399, + "grad_norm": 8.530871391296387, + "learning_rate": 2.9668049792531122e-05, + "log_odds_chosen": 10.306585311889648, + "log_odds_ratio": -0.00010736883996287361, + "logits/chosen": -0.975071907043457, + "logits/rejected": -0.9711197018623352, + "logps/chosen": -0.00044086261186748743, + "logps/rejected": -1.8548561334609985, + "loss": 0.894, + "nll_loss": 0.22348177433013916, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.408626045915298e-05, + "rewards/margins": 0.1854415386915207, + "rewards/rejected": -0.1854856312274933, + "step": 6738 + }, + { + "epoch": 4.6604426002766255, + "grad_norm": 8.039578437805176, + "learning_rate": 2.966420777624097e-05, + "log_odds_chosen": 10.206424713134766, + "log_odds_ratio": -0.00015417771646752954, + "logits/chosen": -0.329103946685791, + "logits/rejected": -0.362267404794693, + "logps/chosen": -0.0006552881095558405, + "logps/rejected": -2.4694182872772217, + "loss": 0.8473, + "nll_loss": 0.21181637048721313, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.552880950039253e-05, + "rewards/margins": 0.2468762993812561, + "rewards/rejected": -0.24694183468818665, + "step": 6739 + }, + { + "epoch": 4.661134163208852, + "grad_norm": 25.52229118347168, + "learning_rate": 2.9660365759950824e-05, + "log_odds_chosen": 9.05695629119873, + "log_odds_ratio": -0.016237886622548103, + "logits/chosen": -0.40049076080322266, + "logits/rejected": -0.47549617290496826, + "logps/chosen": -0.036526232957839966, + "logps/rejected": -1.8961656093597412, + "loss": 1.2379, + "nll_loss": 0.3078601658344269, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0036526236217468977, + "rewards/margins": 0.1859639585018158, + "rewards/rejected": -0.18961656093597412, + "step": 6740 + }, + { + "epoch": 4.661825726141079, + "grad_norm": 9.990700721740723, + "learning_rate": 2.9656523743660673e-05, + "log_odds_chosen": 11.605825424194336, + "log_odds_ratio": -2.0440449588932097e-05, + "logits/chosen": -0.7656416296958923, + "logits/rejected": -0.8104311227798462, + "logps/chosen": -0.00011805635585915297, + "logps/rejected": -2.349353075027466, + "loss": 0.9251, + "nll_loss": 0.2312828004360199, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1805635949713178e-05, + "rewards/margins": 0.23492351174354553, + "rewards/rejected": -0.23493532836437225, + "step": 6741 + }, + { + "epoch": 4.662517289073306, + "grad_norm": 13.435784339904785, + "learning_rate": 2.9652681727370525e-05, + "log_odds_chosen": 10.331426620483398, + "log_odds_ratio": -0.0005047993618063629, + "logits/chosen": -0.8178121447563171, + "logits/rejected": -0.8129633665084839, + "logps/chosen": -0.0013931768480688334, + "logps/rejected": -2.429903984069824, + "loss": 1.2838, + "nll_loss": 0.3208959102630615, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013931769353803247, + "rewards/margins": 0.2428511083126068, + "rewards/rejected": -0.2429904341697693, + "step": 6742 + }, + { + "epoch": 4.663208852005533, + "grad_norm": 15.896458625793457, + "learning_rate": 2.9648839711080378e-05, + "log_odds_chosen": 9.113409996032715, + "log_odds_ratio": -0.00859312154352665, + "logits/chosen": -0.47966164350509644, + "logits/rejected": -0.512639582157135, + "logps/chosen": -0.004731173627078533, + "logps/rejected": -2.3525338172912598, + "loss": 1.606, + "nll_loss": 0.4006463289260864, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00047311733942478895, + "rewards/margins": 0.23478025197982788, + "rewards/rejected": -0.23525336384773254, + "step": 6743 + }, + { + "epoch": 4.66390041493776, + "grad_norm": 22.674076080322266, + "learning_rate": 2.9644997694790227e-05, + "log_odds_chosen": 10.03700065612793, + "log_odds_ratio": -0.00355674815364182, + "logits/chosen": -0.22789748013019562, + "logits/rejected": -0.30582594871520996, + "logps/chosen": -0.02659655548632145, + "logps/rejected": -2.2190628051757812, + "loss": 1.1222, + "nll_loss": 0.2802049517631531, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0026596554089337587, + "rewards/margins": 0.21924662590026855, + "rewards/rejected": -0.22190627455711365, + "step": 6744 + }, + { + "epoch": 4.6645919778699865, + "grad_norm": 6.5764689445495605, + "learning_rate": 2.9641155678500076e-05, + "log_odds_chosen": 8.920831680297852, + "log_odds_ratio": -0.0003492921532597393, + "logits/chosen": -0.4340789318084717, + "logits/rejected": -0.47069665789604187, + "logps/chosen": -0.0005598999559879303, + "logps/rejected": -1.394984483718872, + "loss": 1.0783, + "nll_loss": 0.26954346895217896, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5989992688409984e-05, + "rewards/margins": 0.13944245874881744, + "rewards/rejected": -0.13949845731258392, + "step": 6745 + }, + { + "epoch": 4.665283540802213, + "grad_norm": 9.251626014709473, + "learning_rate": 2.9637313662209932e-05, + "log_odds_chosen": 9.983993530273438, + "log_odds_ratio": -0.00015731611347291619, + "logits/chosen": -0.758710503578186, + "logits/rejected": -0.8479531407356262, + "logps/chosen": -0.001559579512104392, + "logps/rejected": -2.231616973876953, + "loss": 1.3107, + "nll_loss": 0.3276580274105072, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015595793956890702, + "rewards/margins": 0.2230057418346405, + "rewards/rejected": -0.2231617271900177, + "step": 6746 + }, + { + "epoch": 4.66597510373444, + "grad_norm": 14.86572551727295, + "learning_rate": 2.963347164591978e-05, + "log_odds_chosen": 9.572809219360352, + "log_odds_ratio": -0.005756652448326349, + "logits/chosen": -0.013816140592098236, + "logits/rejected": -0.1874159425497055, + "logps/chosen": -0.05472245439887047, + "logps/rejected": -1.966597318649292, + "loss": 1.0252, + "nll_loss": 0.2557242214679718, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005472245626151562, + "rewards/margins": 0.19118750095367432, + "rewards/rejected": -0.19665974378585815, + "step": 6747 + }, + { + "epoch": 4.666666666666667, + "grad_norm": 8.244834899902344, + "learning_rate": 2.962962962962963e-05, + "log_odds_chosen": 9.516754150390625, + "log_odds_ratio": -0.00035925908014178276, + "logits/chosen": -0.8049564361572266, + "logits/rejected": -0.857810914516449, + "logps/chosen": -0.0058831567876040936, + "logps/rejected": -2.215717315673828, + "loss": 0.9812, + "nll_loss": 0.24527311325073242, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005883157136850059, + "rewards/margins": 0.22098343074321747, + "rewards/rejected": -0.22157174348831177, + "step": 6748 + }, + { + "epoch": 4.667358229598894, + "grad_norm": 7.945059299468994, + "learning_rate": 2.9625787613339482e-05, + "log_odds_chosen": 9.842604637145996, + "log_odds_ratio": -0.0027358822990208864, + "logits/chosen": -0.5420747995376587, + "logits/rejected": -0.5979031324386597, + "logps/chosen": -0.0017957030795514584, + "logps/rejected": -2.231954336166382, + "loss": 1.0751, + "nll_loss": 0.26851096749305725, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017957028467208147, + "rewards/margins": 0.22301585972309113, + "rewards/rejected": -0.22319543361663818, + "step": 6749 + }, + { + "epoch": 4.668049792531121, + "grad_norm": 8.897541999816895, + "learning_rate": 2.9621945597049335e-05, + "log_odds_chosen": 10.37528133392334, + "log_odds_ratio": -0.0002767530968412757, + "logits/chosen": -0.3038465976715088, + "logits/rejected": -0.3836557865142822, + "logps/chosen": -0.0015654441667720675, + "logps/rejected": -2.045405626296997, + "loss": 0.9581, + "nll_loss": 0.23949073255062103, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015654441085644066, + "rewards/margins": 0.20438402891159058, + "rewards/rejected": -0.20454056560993195, + "step": 6750 + }, + { + "epoch": 4.6687413554633475, + "grad_norm": 14.269709587097168, + "learning_rate": 2.9618103580759184e-05, + "log_odds_chosen": 9.976061820983887, + "log_odds_ratio": -0.003950594458729029, + "logits/chosen": -0.5603251457214355, + "logits/rejected": -0.6679450869560242, + "logps/chosen": -0.0018375938525423408, + "logps/rejected": -1.8488361835479736, + "loss": 0.9504, + "nll_loss": 0.23719294369220734, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001837593736127019, + "rewards/margins": 0.1846998631954193, + "rewards/rejected": -0.18488362431526184, + "step": 6751 + }, + { + "epoch": 4.669432918395574, + "grad_norm": 9.75061321258545, + "learning_rate": 2.9614261564469036e-05, + "log_odds_chosen": 10.370887756347656, + "log_odds_ratio": -0.0001470722199883312, + "logits/chosen": -0.7315962910652161, + "logits/rejected": -0.8423492312431335, + "logps/chosen": -0.0003737437364179641, + "logps/rejected": -1.7472317218780518, + "loss": 0.6407, + "nll_loss": 0.16015413403511047, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.737437509698793e-05, + "rewards/margins": 0.1746857762336731, + "rewards/rejected": -0.17472316324710846, + "step": 6752 + }, + { + "epoch": 4.670124481327801, + "grad_norm": 6.04262113571167, + "learning_rate": 2.9610419548178885e-05, + "log_odds_chosen": 9.649259567260742, + "log_odds_ratio": -0.00010987659334205091, + "logits/chosen": -0.5185578465461731, + "logits/rejected": -0.5149763822555542, + "logps/chosen": -0.00021657871548086405, + "logps/rejected": -1.3449153900146484, + "loss": 0.8995, + "nll_loss": 0.22486598789691925, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1657871911884286e-05, + "rewards/margins": 0.1344698965549469, + "rewards/rejected": -0.13449154794216156, + "step": 6753 + }, + { + "epoch": 4.670816044260028, + "grad_norm": 8.17538070678711, + "learning_rate": 2.9606577531888734e-05, + "log_odds_chosen": 10.649566650390625, + "log_odds_ratio": -5.23365379194729e-05, + "logits/chosen": -0.3867025673389435, + "logits/rejected": -0.5043997764587402, + "logps/chosen": -0.00031243887497112155, + "logps/rejected": -2.469163179397583, + "loss": 1.3914, + "nll_loss": 0.3478538990020752, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.124388604192063e-05, + "rewards/margins": 0.24688507616519928, + "rewards/rejected": -0.24691632390022278, + "step": 6754 + }, + { + "epoch": 4.671507607192255, + "grad_norm": 7.780572414398193, + "learning_rate": 2.960273551559859e-05, + "log_odds_chosen": 9.025650978088379, + "log_odds_ratio": -0.013080338016152382, + "logits/chosen": -0.8236268162727356, + "logits/rejected": -0.9438607692718506, + "logps/chosen": -0.008138904348015785, + "logps/rejected": -1.5952751636505127, + "loss": 0.9462, + "nll_loss": 0.2352454960346222, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008138904813677073, + "rewards/margins": 0.15871362388134003, + "rewards/rejected": -0.1595275104045868, + "step": 6755 + }, + { + "epoch": 4.672199170124482, + "grad_norm": 10.750596046447754, + "learning_rate": 2.959889349930844e-05, + "log_odds_chosen": 10.115555763244629, + "log_odds_ratio": -0.00031970939016900957, + "logits/chosen": -0.7900500297546387, + "logits/rejected": -0.8546708822250366, + "logps/chosen": -0.00033075647661462426, + "logps/rejected": -1.9938645362854004, + "loss": 1.102, + "nll_loss": 0.27545589208602905, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.307564475107938e-05, + "rewards/margins": 0.1993533968925476, + "rewards/rejected": -0.19938646256923676, + "step": 6756 + }, + { + "epoch": 4.672890733056708, + "grad_norm": 7.898308277130127, + "learning_rate": 2.959505148301829e-05, + "log_odds_chosen": 9.88327693939209, + "log_odds_ratio": -0.00012817922106478363, + "logits/chosen": -0.6938830614089966, + "logits/rejected": -0.7637455463409424, + "logps/chosen": -0.00039965560426935554, + "logps/rejected": -1.8069370985031128, + "loss": 1.6343, + "nll_loss": 0.4085546135902405, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.996555824414827e-05, + "rewards/margins": 0.18065372109413147, + "rewards/rejected": -0.18069370090961456, + "step": 6757 + }, + { + "epoch": 4.673582295988935, + "grad_norm": 12.937272071838379, + "learning_rate": 2.9591209466728144e-05, + "log_odds_chosen": 11.028230667114258, + "log_odds_ratio": -1.9701441487995908e-05, + "logits/chosen": -0.4903797507286072, + "logits/rejected": -0.5154451131820679, + "logps/chosen": -0.00011774554150179029, + "logps/rejected": -2.0176382064819336, + "loss": 0.9532, + "nll_loss": 0.238307923078537, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.177455487777479e-05, + "rewards/margins": 0.20175206661224365, + "rewards/rejected": -0.2017638385295868, + "step": 6758 + }, + { + "epoch": 4.674273858921162, + "grad_norm": 9.832000732421875, + "learning_rate": 2.9587367450437993e-05, + "log_odds_chosen": 11.508541107177734, + "log_odds_ratio": -1.2472798516682815e-05, + "logits/chosen": -0.28365594148635864, + "logits/rejected": -0.37036430835723877, + "logps/chosen": -0.00010214448411716148, + "logps/rejected": -2.215914249420166, + "loss": 0.8492, + "nll_loss": 0.21230274438858032, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0214447684120387e-05, + "rewards/margins": 0.22158122062683105, + "rewards/rejected": -0.22159142792224884, + "step": 6759 + }, + { + "epoch": 4.674965421853389, + "grad_norm": 18.991636276245117, + "learning_rate": 2.9583525434147842e-05, + "log_odds_chosen": 10.068572998046875, + "log_odds_ratio": -0.00012414647790137678, + "logits/chosen": -0.45053717494010925, + "logits/rejected": -0.5302484035491943, + "logps/chosen": -0.000786515069194138, + "logps/rejected": -2.0815036296844482, + "loss": 0.9152, + "nll_loss": 0.22878772020339966, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.865150837460533e-05, + "rewards/margins": 0.2080717235803604, + "rewards/rejected": -0.20815038681030273, + "step": 6760 + }, + { + "epoch": 4.675656984785616, + "grad_norm": 10.791906356811523, + "learning_rate": 2.9579683417857695e-05, + "log_odds_chosen": 10.770875930786133, + "log_odds_ratio": -4.5727851102128625e-05, + "logits/chosen": -0.692563533782959, + "logits/rejected": -0.785979151725769, + "logps/chosen": -0.00026987557066604495, + "logps/rejected": -2.103133201599121, + "loss": 0.9404, + "nll_loss": 0.23508614301681519, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6987556339008734e-05, + "rewards/margins": 0.21028634905815125, + "rewards/rejected": -0.2103133201599121, + "step": 6761 + }, + { + "epoch": 4.676348547717843, + "grad_norm": 5.9802565574646, + "learning_rate": 2.9575841401567544e-05, + "log_odds_chosen": 10.031012535095215, + "log_odds_ratio": -0.00016975995094981045, + "logits/chosen": -0.717975378036499, + "logits/rejected": -0.7597091197967529, + "logps/chosen": -0.0008287794189527631, + "logps/rejected": -2.2093329429626465, + "loss": 0.8268, + "nll_loss": 0.2066713571548462, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.287794480565935e-05, + "rewards/margins": 0.22085043787956238, + "rewards/rejected": -0.22093330323696136, + "step": 6762 + }, + { + "epoch": 4.677040110650069, + "grad_norm": 11.813547134399414, + "learning_rate": 2.9571999385277393e-05, + "log_odds_chosen": 10.200616836547852, + "log_odds_ratio": -0.0007247006869874895, + "logits/chosen": -0.20666056871414185, + "logits/rejected": -0.32452890276908875, + "logps/chosen": -0.0031705538276582956, + "logps/rejected": -2.181896209716797, + "loss": 0.8624, + "nll_loss": 0.21553808450698853, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00031705538276582956, + "rewards/margins": 0.21787258982658386, + "rewards/rejected": -0.21818962693214417, + "step": 6763 + }, + { + "epoch": 4.677731673582296, + "grad_norm": 13.046178817749023, + "learning_rate": 2.956815736898725e-05, + "log_odds_chosen": 9.904052734375, + "log_odds_ratio": -0.0009172922000288963, + "logits/chosen": -0.33849143981933594, + "logits/rejected": -0.4988017678260803, + "logps/chosen": -0.0004887538962066174, + "logps/rejected": -1.6745448112487793, + "loss": 1.2527, + "nll_loss": 0.31307268142700195, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.887538671027869e-05, + "rewards/margins": 0.1674056053161621, + "rewards/rejected": -0.16745448112487793, + "step": 6764 + }, + { + "epoch": 4.678423236514523, + "grad_norm": 16.583633422851562, + "learning_rate": 2.9564315352697098e-05, + "log_odds_chosen": 11.349023818969727, + "log_odds_ratio": -2.5245026336051524e-05, + "logits/chosen": -0.42005637288093567, + "logits/rejected": -0.4679103493690491, + "logps/chosen": -0.0001921855000546202, + "logps/rejected": -2.653156280517578, + "loss": 0.9384, + "nll_loss": 0.23458930850028992, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9218550733057782e-05, + "rewards/margins": 0.26529639959335327, + "rewards/rejected": -0.2653156518936157, + "step": 6765 + }, + { + "epoch": 4.67911479944675, + "grad_norm": 9.391258239746094, + "learning_rate": 2.9560473336406947e-05, + "log_odds_chosen": 10.527746200561523, + "log_odds_ratio": -3.056988862226717e-05, + "logits/chosen": -0.516139566898346, + "logits/rejected": -0.5341812968254089, + "logps/chosen": -0.0001992563484236598, + "logps/rejected": -1.9834439754486084, + "loss": 0.8513, + "nll_loss": 0.21281671524047852, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.992563556996174e-05, + "rewards/margins": 0.19832447171211243, + "rewards/rejected": -0.1983444094657898, + "step": 6766 + }, + { + "epoch": 4.679806362378977, + "grad_norm": 7.875655651092529, + "learning_rate": 2.9556631320116803e-05, + "log_odds_chosen": 9.65318489074707, + "log_odds_ratio": -0.001145646208897233, + "logits/chosen": -0.3034501075744629, + "logits/rejected": -0.3228107988834381, + "logps/chosen": -0.0034056699369102716, + "logps/rejected": -1.8219208717346191, + "loss": 0.8401, + "nll_loss": 0.2099209874868393, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00034056699951179326, + "rewards/margins": 0.18185152113437653, + "rewards/rejected": -0.18219208717346191, + "step": 6767 + }, + { + "epoch": 4.680497925311204, + "grad_norm": 15.802404403686523, + "learning_rate": 2.955278930382665e-05, + "log_odds_chosen": 10.450215339660645, + "log_odds_ratio": -0.00019292582874186337, + "logits/chosen": -0.48566311597824097, + "logits/rejected": -0.5424797534942627, + "logps/chosen": -0.0002574539976194501, + "logps/rejected": -2.1139955520629883, + "loss": 1.3705, + "nll_loss": 0.3426019549369812, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5745401217136532e-05, + "rewards/margins": 0.21137382090091705, + "rewards/rejected": -0.21139955520629883, + "step": 6768 + }, + { + "epoch": 4.68118948824343, + "grad_norm": 11.465133666992188, + "learning_rate": 2.95489472875365e-05, + "log_odds_chosen": 10.353599548339844, + "log_odds_ratio": -6.696392665617168e-05, + "logits/chosen": -0.6689953207969666, + "logits/rejected": -0.7008723020553589, + "logps/chosen": -0.0002801914815790951, + "logps/rejected": -1.9261640310287476, + "loss": 1.2188, + "nll_loss": 0.3047032356262207, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8019148885505274e-05, + "rewards/margins": 0.19258840382099152, + "rewards/rejected": -0.19261643290519714, + "step": 6769 + }, + { + "epoch": 4.681881051175657, + "grad_norm": 5.23617696762085, + "learning_rate": 2.9545105271246353e-05, + "log_odds_chosen": 9.388136863708496, + "log_odds_ratio": -0.0003933067782782018, + "logits/chosen": -0.0530322790145874, + "logits/rejected": -0.10469657182693481, + "logps/chosen": -0.0008069298346526921, + "logps/rejected": -1.6773535013198853, + "loss": 0.7605, + "nll_loss": 0.19009101390838623, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.069298201007769e-05, + "rewards/margins": 0.16765466332435608, + "rewards/rejected": -0.16773535311222076, + "step": 6770 + }, + { + "epoch": 4.682572614107884, + "grad_norm": 6.162405490875244, + "learning_rate": 2.9541263254956202e-05, + "log_odds_chosen": 10.414556503295898, + "log_odds_ratio": -0.00015066277410369366, + "logits/chosen": -0.8306690454483032, + "logits/rejected": -0.8582077026367188, + "logps/chosen": -0.00023762512137182057, + "logps/rejected": -1.9316320419311523, + "loss": 0.9783, + "nll_loss": 0.24455897510051727, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.376251359237358e-05, + "rewards/margins": 0.1931394636631012, + "rewards/rejected": -0.1931632161140442, + "step": 6771 + }, + { + "epoch": 4.683264177040111, + "grad_norm": 8.889054298400879, + "learning_rate": 2.953742123866605e-05, + "log_odds_chosen": 10.634115219116211, + "log_odds_ratio": -5.0110269512515515e-05, + "logits/chosen": -0.12986613810062408, + "logits/rejected": -0.28570762276649475, + "logps/chosen": -0.0003423771704547107, + "logps/rejected": -2.379361391067505, + "loss": 1.523, + "nll_loss": 0.3807332515716553, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.423772068344988e-05, + "rewards/margins": 0.23790189623832703, + "rewards/rejected": -0.2379361242055893, + "step": 6772 + }, + { + "epoch": 4.683955739972338, + "grad_norm": 9.803380966186523, + "learning_rate": 2.9533579222375907e-05, + "log_odds_chosen": 9.352560043334961, + "log_odds_ratio": -0.04867429658770561, + "logits/chosen": -0.12249897420406342, + "logits/rejected": -0.13682352006435394, + "logps/chosen": -0.2188890129327774, + "logps/rejected": -1.6129618883132935, + "loss": 1.1042, + "nll_loss": 0.2711852788925171, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02188890241086483, + "rewards/margins": 0.13940727710723877, + "rewards/rejected": -0.16129618883132935, + "step": 6773 + }, + { + "epoch": 4.6846473029045645, + "grad_norm": 9.887007713317871, + "learning_rate": 2.9529737206085756e-05, + "log_odds_chosen": 9.593646049499512, + "log_odds_ratio": -0.00030946338665671647, + "logits/chosen": -0.2552592158317566, + "logits/rejected": -0.3207557499408722, + "logps/chosen": -0.004805741831660271, + "logps/rejected": -1.8524577617645264, + "loss": 1.0016, + "nll_loss": 0.2503683865070343, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000480574177345261, + "rewards/margins": 0.18476520478725433, + "rewards/rejected": -0.18524578213691711, + "step": 6774 + }, + { + "epoch": 4.685338865836791, + "grad_norm": 9.28144359588623, + "learning_rate": 2.9525895189795605e-05, + "log_odds_chosen": 8.273391723632812, + "log_odds_ratio": -0.039550162851810455, + "logits/chosen": -0.5619184970855713, + "logits/rejected": -0.6418176293373108, + "logps/chosen": -0.014480775222182274, + "logps/rejected": -1.4526318311691284, + "loss": 2.157, + "nll_loss": 0.5352879762649536, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014480776153504848, + "rewards/margins": 0.14381510019302368, + "rewards/rejected": -0.1452631652355194, + "step": 6775 + }, + { + "epoch": 4.686030428769018, + "grad_norm": 11.07194995880127, + "learning_rate": 2.952205317350546e-05, + "log_odds_chosen": 9.83755111694336, + "log_odds_ratio": -0.0002198005822720006, + "logits/chosen": -0.5238659381866455, + "logits/rejected": -0.5820973515510559, + "logps/chosen": -0.00036986565100960433, + "logps/rejected": -1.439079999923706, + "loss": 1.0547, + "nll_loss": 0.263644814491272, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6986566556151956e-05, + "rewards/margins": 0.143871009349823, + "rewards/rejected": -0.14390799403190613, + "step": 6776 + }, + { + "epoch": 4.686721991701245, + "grad_norm": 5.638495445251465, + "learning_rate": 2.951821115721531e-05, + "log_odds_chosen": 10.339385986328125, + "log_odds_ratio": -8.466203871648759e-05, + "logits/chosen": -0.391963392496109, + "logits/rejected": -0.4621970057487488, + "logps/chosen": -0.00017859251238405704, + "logps/rejected": -1.751267910003662, + "loss": 1.2182, + "nll_loss": 0.30454888939857483, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7859250874607824e-05, + "rewards/margins": 0.17510893940925598, + "rewards/rejected": -0.17512677609920502, + "step": 6777 + }, + { + "epoch": 4.687413554633472, + "grad_norm": 7.306526184082031, + "learning_rate": 2.951436914092516e-05, + "log_odds_chosen": 8.994571685791016, + "log_odds_ratio": -0.0004123014223296195, + "logits/chosen": -0.48963162302970886, + "logits/rejected": -0.5226364731788635, + "logps/chosen": -0.0007051755674183369, + "logps/rejected": -1.1294283866882324, + "loss": 0.9442, + "nll_loss": 0.23601235449314117, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.051755528664216e-05, + "rewards/margins": 0.11287231743335724, + "rewards/rejected": -0.11294284462928772, + "step": 6778 + }, + { + "epoch": 4.688105117565699, + "grad_norm": 10.07761287689209, + "learning_rate": 2.951052712463501e-05, + "log_odds_chosen": 11.039661407470703, + "log_odds_ratio": -3.118627500953153e-05, + "logits/chosen": -0.7747355699539185, + "logits/rejected": -0.7683219909667969, + "logps/chosen": -0.0003158682957291603, + "logps/rejected": -2.298959493637085, + "loss": 1.2224, + "nll_loss": 0.3056063652038574, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1586831028107554e-05, + "rewards/margins": 0.22986435890197754, + "rewards/rejected": -0.2298959493637085, + "step": 6779 + }, + { + "epoch": 4.6887966804979255, + "grad_norm": 10.291252136230469, + "learning_rate": 2.950668510834486e-05, + "log_odds_chosen": 11.137717247009277, + "log_odds_ratio": -2.4501310690538958e-05, + "logits/chosen": -0.6697853207588196, + "logits/rejected": -0.7131739854812622, + "logps/chosen": -0.00019688297470565885, + "logps/rejected": -2.2473249435424805, + "loss": 0.7454, + "nll_loss": 0.18635433912277222, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9688297470565885e-05, + "rewards/margins": 0.2247128188610077, + "rewards/rejected": -0.22473251819610596, + "step": 6780 + }, + { + "epoch": 4.689488243430152, + "grad_norm": 6.318768501281738, + "learning_rate": 2.950284309205471e-05, + "log_odds_chosen": 11.049291610717773, + "log_odds_ratio": -2.7139243684359826e-05, + "logits/chosen": -0.8393341302871704, + "logits/rejected": -0.8682998418807983, + "logps/chosen": -0.000242653870373033, + "logps/rejected": -2.561972141265869, + "loss": 0.9783, + "nll_loss": 0.24456657469272614, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4265385945909657e-05, + "rewards/margins": 0.25617295503616333, + "rewards/rejected": -0.2561972141265869, + "step": 6781 + }, + { + "epoch": 4.690179806362379, + "grad_norm": 12.883466720581055, + "learning_rate": 2.9499001075764566e-05, + "log_odds_chosen": 9.238396644592285, + "log_odds_ratio": -0.0039947787299752235, + "logits/chosen": -0.43368393182754517, + "logits/rejected": -0.5059782266616821, + "logps/chosen": -0.0029813749715685844, + "logps/rejected": -2.0180764198303223, + "loss": 0.9323, + "nll_loss": 0.23266473412513733, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00029813748551532626, + "rewards/margins": 0.2015094757080078, + "rewards/rejected": -0.2018076330423355, + "step": 6782 + }, + { + "epoch": 4.690871369294606, + "grad_norm": 5.375931262969971, + "learning_rate": 2.9495159059474415e-05, + "log_odds_chosen": 8.770668029785156, + "log_odds_ratio": -0.002029500436037779, + "logits/chosen": -0.3985563814640045, + "logits/rejected": -0.4183902144432068, + "logps/chosen": -0.011122825555503368, + "logps/rejected": -1.9470322132110596, + "loss": 1.6924, + "nll_loss": 0.4229055643081665, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00111228262539953, + "rewards/margins": 0.1935909390449524, + "rewards/rejected": -0.19470323622226715, + "step": 6783 + }, + { + "epoch": 4.691562932226833, + "grad_norm": 9.022359848022461, + "learning_rate": 2.9491317043184264e-05, + "log_odds_chosen": 10.731500625610352, + "log_odds_ratio": -4.688445551437326e-05, + "logits/chosen": -0.31862491369247437, + "logits/rejected": -0.39162176847457886, + "logps/chosen": -0.0008599141729064286, + "logps/rejected": -3.046576738357544, + "loss": 1.2466, + "nll_loss": 0.3116372227668762, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.599141438025981e-05, + "rewards/margins": 0.30457165837287903, + "rewards/rejected": -0.3046576678752899, + "step": 6784 + }, + { + "epoch": 4.69225449515906, + "grad_norm": 14.344029426574707, + "learning_rate": 2.948747502689412e-05, + "log_odds_chosen": 11.06399917602539, + "log_odds_ratio": -3.6989782529417425e-05, + "logits/chosen": -0.3671875, + "logits/rejected": -0.46688222885131836, + "logps/chosen": -0.00018051578081212938, + "logps/rejected": -2.368514060974121, + "loss": 0.9802, + "nll_loss": 0.24503794312477112, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8051578081212938e-05, + "rewards/margins": 0.2368333637714386, + "rewards/rejected": -0.23685140907764435, + "step": 6785 + }, + { + "epoch": 4.6929460580912865, + "grad_norm": 61.70487594604492, + "learning_rate": 2.948363301060397e-05, + "log_odds_chosen": 9.329105377197266, + "log_odds_ratio": -0.15523530542850494, + "logits/chosen": -0.5905463695526123, + "logits/rejected": -0.6490134000778198, + "logps/chosen": -0.009185466915369034, + "logps/rejected": -1.9207974672317505, + "loss": 1.0517, + "nll_loss": 0.2474137246608734, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0009185466915369034, + "rewards/margins": 0.19116121530532837, + "rewards/rejected": -0.19207975268363953, + "step": 6786 + }, + { + "epoch": 4.693637621023513, + "grad_norm": 15.269037246704102, + "learning_rate": 2.9479790994313818e-05, + "log_odds_chosen": 10.547459602355957, + "log_odds_ratio": -0.0004707665357273072, + "logits/chosen": -0.39600372314453125, + "logits/rejected": -0.43267565965652466, + "logps/chosen": -0.0004747907514683902, + "logps/rejected": -2.174017906188965, + "loss": 2.1666, + "nll_loss": 0.541592538356781, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.747907951241359e-05, + "rewards/margins": 0.21735432744026184, + "rewards/rejected": -0.21740183234214783, + "step": 6787 + }, + { + "epoch": 4.69432918395574, + "grad_norm": 7.543539047241211, + "learning_rate": 2.947594897802367e-05, + "log_odds_chosen": 9.116312026977539, + "log_odds_ratio": -0.00036397125222720206, + "logits/chosen": -0.35737016797065735, + "logits/rejected": -0.4064314067363739, + "logps/chosen": -0.0007826816872693598, + "logps/rejected": -1.6374640464782715, + "loss": 0.819, + "nll_loss": 0.20471778512001038, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.826816727174446e-05, + "rewards/margins": 0.16366812586784363, + "rewards/rejected": -0.16374638676643372, + "step": 6788 + }, + { + "epoch": 4.695020746887967, + "grad_norm": 10.115254402160645, + "learning_rate": 2.947210696173352e-05, + "log_odds_chosen": 10.277286529541016, + "log_odds_ratio": -7.167382864281535e-05, + "logits/chosen": -0.6997770071029663, + "logits/rejected": -0.701981246471405, + "logps/chosen": -0.0008305530645884573, + "logps/rejected": -2.0024566650390625, + "loss": 0.7632, + "nll_loss": 0.19080215692520142, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.30553108244203e-05, + "rewards/margins": 0.2001626044511795, + "rewards/rejected": -0.20024564862251282, + "step": 6789 + }, + { + "epoch": 4.695712309820194, + "grad_norm": 12.732426643371582, + "learning_rate": 2.9468264945443368e-05, + "log_odds_chosen": 10.833272933959961, + "log_odds_ratio": -9.282723476644605e-05, + "logits/chosen": -0.6699355244636536, + "logits/rejected": -0.5854648351669312, + "logps/chosen": -0.0005036251386627555, + "logps/rejected": -2.8181521892547607, + "loss": 1.1987, + "nll_loss": 0.29966798424720764, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.036251968704164e-05, + "rewards/margins": 0.2817648649215698, + "rewards/rejected": -0.28181523084640503, + "step": 6790 + }, + { + "epoch": 4.696403872752421, + "grad_norm": 7.614867210388184, + "learning_rate": 2.9464422929153224e-05, + "log_odds_chosen": 9.232556343078613, + "log_odds_ratio": -0.0149430176243186, + "logits/chosen": -0.5365698337554932, + "logits/rejected": -0.542522132396698, + "logps/chosen": -0.021838007494807243, + "logps/rejected": -2.3092708587646484, + "loss": 0.9792, + "nll_loss": 0.2432941496372223, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002183800796046853, + "rewards/margins": 0.228743314743042, + "rewards/rejected": -0.23092710971832275, + "step": 6791 + }, + { + "epoch": 4.6970954356846475, + "grad_norm": 11.789693832397461, + "learning_rate": 2.9460580912863073e-05, + "log_odds_chosen": 10.382354736328125, + "log_odds_ratio": -0.00018708905554376543, + "logits/chosen": -0.47791990637779236, + "logits/rejected": -0.5692132115364075, + "logps/chosen": -0.00045102040166966617, + "logps/rejected": -2.1589295864105225, + "loss": 0.9743, + "nll_loss": 0.24356132745742798, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.51020423497539e-05, + "rewards/margins": 0.21584787964820862, + "rewards/rejected": -0.2158929705619812, + "step": 6792 + }, + { + "epoch": 4.697786998616874, + "grad_norm": 15.505138397216797, + "learning_rate": 2.9456738896572922e-05, + "log_odds_chosen": 10.668023109436035, + "log_odds_ratio": -3.8151094486238435e-05, + "logits/chosen": -0.767189621925354, + "logits/rejected": -0.7929633855819702, + "logps/chosen": -0.00047995190834626555, + "logps/rejected": -2.420189380645752, + "loss": 1.1575, + "nll_loss": 0.28936582803726196, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.799519228981808e-05, + "rewards/margins": 0.241970956325531, + "rewards/rejected": -0.2420189529657364, + "step": 6793 + }, + { + "epoch": 4.698478561549101, + "grad_norm": 7.923763751983643, + "learning_rate": 2.9452896880282778e-05, + "log_odds_chosen": 10.526309967041016, + "log_odds_ratio": -6.047174974810332e-05, + "logits/chosen": -0.7259615659713745, + "logits/rejected": -0.8289102911949158, + "logps/chosen": -0.0004142590332776308, + "logps/rejected": -2.5150129795074463, + "loss": 0.9214, + "nll_loss": 0.23034964501857758, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1425904782954603e-05, + "rewards/margins": 0.25145989656448364, + "rewards/rejected": -0.25150129199028015, + "step": 6794 + }, + { + "epoch": 4.699170124481328, + "grad_norm": 8.245205879211426, + "learning_rate": 2.9449054863992627e-05, + "log_odds_chosen": 9.464591979980469, + "log_odds_ratio": -0.00016946755931712687, + "logits/chosen": -0.5645436644554138, + "logits/rejected": -0.6196568608283997, + "logps/chosen": -0.00036220205947756767, + "logps/rejected": -1.5313994884490967, + "loss": 0.859, + "nll_loss": 0.2147316336631775, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.622020813054405e-05, + "rewards/margins": 0.15310373902320862, + "rewards/rejected": -0.15313996374607086, + "step": 6795 + }, + { + "epoch": 4.699861687413555, + "grad_norm": 7.095120906829834, + "learning_rate": 2.9445212847702476e-05, + "log_odds_chosen": 10.993948936462402, + "log_odds_ratio": -0.0001342833274975419, + "logits/chosen": -0.5555762052536011, + "logits/rejected": -0.6703388094902039, + "logps/chosen": -0.0022787712514400482, + "logps/rejected": -2.9189109802246094, + "loss": 0.7338, + "nll_loss": 0.1834450513124466, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002278771426063031, + "rewards/margins": 0.29166319966316223, + "rewards/rejected": -0.29189109802246094, + "step": 6796 + }, + { + "epoch": 4.700553250345782, + "grad_norm": 10.602357864379883, + "learning_rate": 2.944137083141233e-05, + "log_odds_chosen": 10.636397361755371, + "log_odds_ratio": -3.553461283445358e-05, + "logits/chosen": -0.6809238791465759, + "logits/rejected": -0.6949537396430969, + "logps/chosen": -0.0002272947458550334, + "logps/rejected": -1.9710800647735596, + "loss": 0.7617, + "nll_loss": 0.19042199850082397, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2729476768290624e-05, + "rewards/margins": 0.19708527624607086, + "rewards/rejected": -0.19710800051689148, + "step": 6797 + }, + { + "epoch": 4.7012448132780085, + "grad_norm": 7.388942718505859, + "learning_rate": 2.9437528815122178e-05, + "log_odds_chosen": 9.084571838378906, + "log_odds_ratio": -0.0004327888018451631, + "logits/chosen": -0.2653324604034424, + "logits/rejected": -0.25310999155044556, + "logps/chosen": -0.00107409933116287, + "logps/rejected": -1.4760522842407227, + "loss": 1.0311, + "nll_loss": 0.25771990418434143, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001074099272955209, + "rewards/margins": 0.14749783277511597, + "rewards/rejected": -0.14760524034500122, + "step": 6798 + }, + { + "epoch": 4.701936376210235, + "grad_norm": 9.694303512573242, + "learning_rate": 2.9433686798832027e-05, + "log_odds_chosen": 10.724264144897461, + "log_odds_ratio": -9.620962373446673e-05, + "logits/chosen": -0.4596264362335205, + "logits/rejected": -0.6023463606834412, + "logps/chosen": -0.00062222481938079, + "logps/rejected": -2.387749195098877, + "loss": 0.8183, + "nll_loss": 0.20455913245677948, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.222247611731291e-05, + "rewards/margins": 0.23871271312236786, + "rewards/rejected": -0.23877492547035217, + "step": 6799 + }, + { + "epoch": 4.702627939142462, + "grad_norm": 8.140204429626465, + "learning_rate": 2.9429844782541882e-05, + "log_odds_chosen": 9.990387916564941, + "log_odds_ratio": -6.377643148880452e-05, + "logits/chosen": -0.6972765326499939, + "logits/rejected": -0.6815809011459351, + "logps/chosen": -0.000211845021112822, + "logps/rejected": -1.403898000717163, + "loss": 0.6581, + "nll_loss": 0.1645159125328064, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.118450174748432e-05, + "rewards/margins": 0.14036861062049866, + "rewards/rejected": -0.1403898000717163, + "step": 6800 + }, + { + "epoch": 4.703319502074689, + "grad_norm": 9.126114845275879, + "learning_rate": 2.942600276625173e-05, + "log_odds_chosen": 9.252166748046875, + "log_odds_ratio": -0.003878280520439148, + "logits/chosen": -0.42757394909858704, + "logits/rejected": -0.43310630321502686, + "logps/chosen": -0.002153117908164859, + "logps/rejected": -1.7966618537902832, + "loss": 0.9991, + "nll_loss": 0.2493833601474762, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00021531182574108243, + "rewards/margins": 0.1794508695602417, + "rewards/rejected": -0.1796661913394928, + "step": 6801 + }, + { + "epoch": 4.704011065006916, + "grad_norm": 10.610753059387207, + "learning_rate": 2.942216074996158e-05, + "log_odds_chosen": 10.365510940551758, + "log_odds_ratio": -6.0238788137212396e-05, + "logits/chosen": -0.6273984313011169, + "logits/rejected": -0.7365648746490479, + "logps/chosen": -0.0004422231577336788, + "logps/rejected": -1.8178184032440186, + "loss": 1.3457, + "nll_loss": 0.336431086063385, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4222317228559405e-05, + "rewards/margins": 0.18173763155937195, + "rewards/rejected": -0.1817818433046341, + "step": 6802 + }, + { + "epoch": 4.704702627939143, + "grad_norm": 7.150958061218262, + "learning_rate": 2.941831873367143e-05, + "log_odds_chosen": 9.965583801269531, + "log_odds_ratio": -0.00029022121452726424, + "logits/chosen": -0.0954047217965126, + "logits/rejected": -0.19729548692703247, + "logps/chosen": -0.0007090939325280488, + "logps/rejected": -1.9928028583526611, + "loss": 0.7298, + "nll_loss": 0.18241362273693085, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.09093947079964e-05, + "rewards/margins": 0.19920937716960907, + "rewards/rejected": -0.1992802768945694, + "step": 6803 + }, + { + "epoch": 4.7053941908713695, + "grad_norm": 11.659815788269043, + "learning_rate": 2.9414476717381285e-05, + "log_odds_chosen": 11.124892234802246, + "log_odds_ratio": -9.604761726222932e-05, + "logits/chosen": -0.016843080520629883, + "logits/rejected": -0.1653369814157486, + "logps/chosen": -0.0004137184005230665, + "logps/rejected": -2.80672025680542, + "loss": 1.8558, + "nll_loss": 0.4639298915863037, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1371840779902413e-05, + "rewards/margins": 0.2806306481361389, + "rewards/rejected": -0.28067201375961304, + "step": 6804 + }, + { + "epoch": 4.706085753803596, + "grad_norm": 6.719081401824951, + "learning_rate": 2.9410634701091134e-05, + "log_odds_chosen": 9.30002498626709, + "log_odds_ratio": -0.0004191567131783813, + "logits/chosen": -0.4892222285270691, + "logits/rejected": -0.5243061780929565, + "logps/chosen": -0.0003415496030356735, + "logps/rejected": -1.4499791860580444, + "loss": 0.7867, + "nll_loss": 0.19663241505622864, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.415496030356735e-05, + "rewards/margins": 0.14496377110481262, + "rewards/rejected": -0.14499792456626892, + "step": 6805 + }, + { + "epoch": 4.706777316735823, + "grad_norm": 10.404929161071777, + "learning_rate": 2.9406792684800984e-05, + "log_odds_chosen": 9.235153198242188, + "log_odds_ratio": -0.01029971707612276, + "logits/chosen": -0.48685115575790405, + "logits/rejected": -0.5751073360443115, + "logps/chosen": -0.00509566580876708, + "logps/rejected": -2.1039867401123047, + "loss": 0.7998, + "nll_loss": 0.19892632961273193, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005095665692351758, + "rewards/margins": 0.20988911390304565, + "rewards/rejected": -0.21039867401123047, + "step": 6806 + }, + { + "epoch": 4.70746887966805, + "grad_norm": 10.810821533203125, + "learning_rate": 2.9402950668510836e-05, + "log_odds_chosen": 8.567014694213867, + "log_odds_ratio": -0.16073843836784363, + "logits/chosen": -0.3772326707839966, + "logits/rejected": -0.3111382722854614, + "logps/chosen": -0.026545803993940353, + "logps/rejected": -1.2393962144851685, + "loss": 1.171, + "nll_loss": 0.2766638696193695, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0026545801665633917, + "rewards/margins": 0.12128505110740662, + "rewards/rejected": -0.1239396333694458, + "step": 6807 + }, + { + "epoch": 4.708160442600277, + "grad_norm": 9.465850830078125, + "learning_rate": 2.9399108652220685e-05, + "log_odds_chosen": 10.434772491455078, + "log_odds_ratio": -8.693186100572348e-05, + "logits/chosen": -0.27980881929397583, + "logits/rejected": -0.2941112518310547, + "logps/chosen": -0.0003163870715070516, + "logps/rejected": -1.8002769947052002, + "loss": 1.1756, + "nll_loss": 0.29389774799346924, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.163870860589668e-05, + "rewards/margins": 0.1799960732460022, + "rewards/rejected": -0.18002769351005554, + "step": 6808 + }, + { + "epoch": 4.708852005532504, + "grad_norm": 13.45466423034668, + "learning_rate": 2.9395266635930534e-05, + "log_odds_chosen": 9.926715850830078, + "log_odds_ratio": -0.00035792539711110294, + "logits/chosen": -0.47436320781707764, + "logits/rejected": -0.4171980321407318, + "logps/chosen": -0.00326165952719748, + "logps/rejected": -1.8485264778137207, + "loss": 1.2901, + "nll_loss": 0.32247692346572876, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003261659585405141, + "rewards/margins": 0.1845264881849289, + "rewards/rejected": -0.18485264480113983, + "step": 6809 + }, + { + "epoch": 4.70954356846473, + "grad_norm": 7.703126430511475, + "learning_rate": 2.939142461964039e-05, + "log_odds_chosen": 11.158015251159668, + "log_odds_ratio": -2.2229780370253138e-05, + "logits/chosen": -0.27547189593315125, + "logits/rejected": -0.5276872515678406, + "logps/chosen": -8.773449371801689e-05, + "logps/rejected": -1.8417258262634277, + "loss": 0.9583, + "nll_loss": 0.23958033323287964, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.773448826104868e-06, + "rewards/margins": 0.18416382372379303, + "rewards/rejected": -0.18417257070541382, + "step": 6810 + }, + { + "epoch": 4.710235131396957, + "grad_norm": 12.61095142364502, + "learning_rate": 2.938758260335024e-05, + "log_odds_chosen": 9.955721855163574, + "log_odds_ratio": -0.00022129954595584422, + "logits/chosen": -0.5432331562042236, + "logits/rejected": -0.5477120876312256, + "logps/chosen": -0.0053403074853122234, + "logps/rejected": -2.3294897079467773, + "loss": 1.3859, + "nll_loss": 0.34646186232566833, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005340307834558189, + "rewards/margins": 0.23241494596004486, + "rewards/rejected": -0.23294898867607117, + "step": 6811 + }, + { + "epoch": 4.710926694329184, + "grad_norm": 6.147278785705566, + "learning_rate": 2.9383740587060088e-05, + "log_odds_chosen": 7.364340305328369, + "log_odds_ratio": -0.009784827940165997, + "logits/chosen": -0.7493782639503479, + "logits/rejected": -0.7773865461349487, + "logps/chosen": -0.004916701465845108, + "logps/rejected": -0.7641756534576416, + "loss": 1.4489, + "nll_loss": 0.36123567819595337, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004916701000183821, + "rewards/margins": 0.07592590153217316, + "rewards/rejected": -0.07641757279634476, + "step": 6812 + }, + { + "epoch": 4.711618257261411, + "grad_norm": 9.089730262756348, + "learning_rate": 2.9379898570769944e-05, + "log_odds_chosen": 9.9685640335083, + "log_odds_ratio": -0.00010080543870572001, + "logits/chosen": -0.4879155158996582, + "logits/rejected": -0.5909554362297058, + "logps/chosen": -0.00021607377857435495, + "logps/rejected": -1.5597730875015259, + "loss": 1.2522, + "nll_loss": 0.31304243206977844, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1607378585031256e-05, + "rewards/margins": 0.15595568716526031, + "rewards/rejected": -0.1559773087501526, + "step": 6813 + }, + { + "epoch": 4.712309820193638, + "grad_norm": 9.110308647155762, + "learning_rate": 2.9376056554479793e-05, + "log_odds_chosen": 9.04574203491211, + "log_odds_ratio": -0.0009579684119671583, + "logits/chosen": -0.40703973174095154, + "logits/rejected": -0.46325159072875977, + "logps/chosen": -0.0026330589316785336, + "logps/rejected": -1.9530531167984009, + "loss": 0.9536, + "nll_loss": 0.23829597234725952, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00026330590480938554, + "rewards/margins": 0.19504201412200928, + "rewards/rejected": -0.19530531764030457, + "step": 6814 + }, + { + "epoch": 4.713001383125865, + "grad_norm": 9.90314769744873, + "learning_rate": 2.9372214538189642e-05, + "log_odds_chosen": 9.989084243774414, + "log_odds_ratio": -0.0004183893615845591, + "logits/chosen": -0.8353537321090698, + "logits/rejected": -0.8668037056922913, + "logps/chosen": -0.0021147681400179863, + "logps/rejected": -1.9255058765411377, + "loss": 1.2717, + "nll_loss": 0.31787949800491333, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002114767994498834, + "rewards/margins": 0.19233912229537964, + "rewards/rejected": -0.19255059957504272, + "step": 6815 + }, + { + "epoch": 4.713692946058091, + "grad_norm": 14.415225982666016, + "learning_rate": 2.9368372521899494e-05, + "log_odds_chosen": 10.086376190185547, + "log_odds_ratio": -0.0005009483429603279, + "logits/chosen": -0.4449045658111572, + "logits/rejected": -0.47074446082115173, + "logps/chosen": -0.0013042137725278735, + "logps/rejected": -2.4045162200927734, + "loss": 1.4341, + "nll_loss": 0.3584754467010498, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001304213801631704, + "rewards/margins": 0.24032121896743774, + "rewards/rejected": -0.2404516339302063, + "step": 6816 + }, + { + "epoch": 4.714384508990318, + "grad_norm": 10.669299125671387, + "learning_rate": 2.9364530505609343e-05, + "log_odds_chosen": 11.092180252075195, + "log_odds_ratio": -3.602875949582085e-05, + "logits/chosen": -0.5740249156951904, + "logits/rejected": -0.7104464173316956, + "logps/chosen": -0.00017480396491009742, + "logps/rejected": -2.3945038318634033, + "loss": 0.8741, + "nll_loss": 0.2185106873512268, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7480397218605503e-05, + "rewards/margins": 0.2394329011440277, + "rewards/rejected": -0.2394503802061081, + "step": 6817 + }, + { + "epoch": 4.715076071922545, + "grad_norm": 12.476048469543457, + "learning_rate": 2.9360688489319193e-05, + "log_odds_chosen": 10.14078140258789, + "log_odds_ratio": -0.0008515632362104952, + "logits/chosen": -0.34306657314300537, + "logits/rejected": -0.4307641386985779, + "logps/chosen": -0.00048003694973886013, + "logps/rejected": -2.012887716293335, + "loss": 0.7408, + "nll_loss": 0.18511009216308594, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.800369788426906e-05, + "rewards/margins": 0.20124077796936035, + "rewards/rejected": -0.20128877460956573, + "step": 6818 + }, + { + "epoch": 4.715767634854772, + "grad_norm": 5.907188892364502, + "learning_rate": 2.935684647302905e-05, + "log_odds_chosen": 10.725056648254395, + "log_odds_ratio": -6.276977364905179e-05, + "logits/chosen": -0.5912138223648071, + "logits/rejected": -0.5068807601928711, + "logps/chosen": -0.00017866448615677655, + "logps/rejected": -2.0803234577178955, + "loss": 1.3981, + "nll_loss": 0.34951943159103394, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7866448615677655e-05, + "rewards/margins": 0.20801447331905365, + "rewards/rejected": -0.20803233981132507, + "step": 6819 + }, + { + "epoch": 4.716459197786999, + "grad_norm": 11.002961158752441, + "learning_rate": 2.9353004456738897e-05, + "log_odds_chosen": 10.004314422607422, + "log_odds_ratio": -0.0002287816460011527, + "logits/chosen": -0.515632152557373, + "logits/rejected": -0.7423189878463745, + "logps/chosen": -0.00037495637661777437, + "logps/rejected": -1.7557978630065918, + "loss": 1.0634, + "nll_loss": 0.26582497358322144, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7495636206585914e-05, + "rewards/margins": 0.17554229497909546, + "rewards/rejected": -0.17557978630065918, + "step": 6820 + }, + { + "epoch": 4.717150760719226, + "grad_norm": 5.770873069763184, + "learning_rate": 2.9349162440448746e-05, + "log_odds_chosen": 9.757637023925781, + "log_odds_ratio": -0.00031008111545816064, + "logits/chosen": -0.37380141019821167, + "logits/rejected": -0.4303475320339203, + "logps/chosen": -0.00025031069526448846, + "logps/rejected": -1.5108685493469238, + "loss": 1.1455, + "nll_loss": 0.2863425016403198, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5031067707459442e-05, + "rewards/margins": 0.15106181800365448, + "rewards/rejected": -0.15108685195446014, + "step": 6821 + }, + { + "epoch": 4.717842323651452, + "grad_norm": 7.252089977264404, + "learning_rate": 2.9345320424158602e-05, + "log_odds_chosen": 10.901185989379883, + "log_odds_ratio": -4.147412983002141e-05, + "logits/chosen": -0.5456046462059021, + "logits/rejected": -0.6253412961959839, + "logps/chosen": -0.00021441985154524446, + "logps/rejected": -2.2036609649658203, + "loss": 0.9259, + "nll_loss": 0.2314816415309906, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.144198697351385e-05, + "rewards/margins": 0.2203446626663208, + "rewards/rejected": -0.22036609053611755, + "step": 6822 + }, + { + "epoch": 4.718533886583679, + "grad_norm": 15.433568954467773, + "learning_rate": 2.934147840786845e-05, + "log_odds_chosen": 10.816705703735352, + "log_odds_ratio": -6.392317300196737e-05, + "logits/chosen": -0.1447380781173706, + "logits/rejected": -0.2733813524246216, + "logps/chosen": -7.367720536421984e-05, + "logps/rejected": -1.4634238481521606, + "loss": 1.1584, + "nll_loss": 0.2896004319190979, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.367720172624104e-06, + "rewards/margins": 0.14633502066135406, + "rewards/rejected": -0.14634239673614502, + "step": 6823 + }, + { + "epoch": 4.719225449515906, + "grad_norm": 5.212158679962158, + "learning_rate": 2.93376363915783e-05, + "log_odds_chosen": 9.519055366516113, + "log_odds_ratio": -0.0023928459268063307, + "logits/chosen": -0.5571908950805664, + "logits/rejected": -0.5788753032684326, + "logps/chosen": -0.004182653967291117, + "logps/rejected": -1.5381381511688232, + "loss": 0.6618, + "nll_loss": 0.16522231698036194, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004182653210591525, + "rewards/margins": 0.15339556336402893, + "rewards/rejected": -0.15381380915641785, + "step": 6824 + }, + { + "epoch": 4.719917012448133, + "grad_norm": 6.5578203201293945, + "learning_rate": 2.9333794375288153e-05, + "log_odds_chosen": 9.255062103271484, + "log_odds_ratio": -0.005615750327706337, + "logits/chosen": -0.46286746859550476, + "logits/rejected": -0.5253312587738037, + "logps/chosen": -0.004062678664922714, + "logps/rejected": -2.030621290206909, + "loss": 1.6068, + "nll_loss": 0.4011441767215729, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00040626790723763406, + "rewards/margins": 0.2026558518409729, + "rewards/rejected": -0.20306211709976196, + "step": 6825 + }, + { + "epoch": 4.72060857538036, + "grad_norm": 6.186581134796143, + "learning_rate": 2.9329952358998002e-05, + "log_odds_chosen": 10.776817321777344, + "log_odds_ratio": -3.6418459785636514e-05, + "logits/chosen": -0.2636483907699585, + "logits/rejected": -0.3309072256088257, + "logps/chosen": -0.0003143001231364906, + "logps/rejected": -2.2573060989379883, + "loss": 1.2208, + "nll_loss": 0.3051925003528595, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.143001231364906e-05, + "rewards/margins": 0.22569917142391205, + "rewards/rejected": -0.22573062777519226, + "step": 6826 + }, + { + "epoch": 4.7213001383125865, + "grad_norm": 14.888287544250488, + "learning_rate": 2.932611034270785e-05, + "log_odds_chosen": 7.935850620269775, + "log_odds_ratio": -0.18102018535137177, + "logits/chosen": -0.5066199898719788, + "logits/rejected": -0.3732776343822479, + "logps/chosen": -0.09034513682126999, + "logps/rejected": -2.89693021774292, + "loss": 1.7254, + "nll_loss": 0.41325053572654724, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.009034513495862484, + "rewards/margins": 0.2806585133075714, + "rewards/rejected": -0.28969305753707886, + "step": 6827 + }, + { + "epoch": 4.721991701244813, + "grad_norm": 6.673293590545654, + "learning_rate": 2.9322268326417707e-05, + "log_odds_chosen": 9.484441757202148, + "log_odds_ratio": -0.00032633356750011444, + "logits/chosen": -0.3358270823955536, + "logits/rejected": -0.429345965385437, + "logps/chosen": -0.015161263756453991, + "logps/rejected": -2.182262420654297, + "loss": 0.8388, + "nll_loss": 0.20967671275138855, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015161264454945922, + "rewards/margins": 0.21671010553836823, + "rewards/rejected": -0.21822622418403625, + "step": 6828 + }, + { + "epoch": 4.72268326417704, + "grad_norm": 11.503182411193848, + "learning_rate": 2.9318426310127556e-05, + "log_odds_chosen": 9.144638061523438, + "log_odds_ratio": -0.0009379127295687795, + "logits/chosen": -0.14240528643131256, + "logits/rejected": -0.28116291761398315, + "logps/chosen": -0.0031801690347492695, + "logps/rejected": -2.391510486602783, + "loss": 0.652, + "nll_loss": 0.16289681196212769, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00031801691511645913, + "rewards/margins": 0.23883303999900818, + "rewards/rejected": -0.23915104568004608, + "step": 6829 + }, + { + "epoch": 4.723374827109267, + "grad_norm": 12.40114974975586, + "learning_rate": 2.9314584293837405e-05, + "log_odds_chosen": 9.611428260803223, + "log_odds_ratio": -0.0002756261674221605, + "logits/chosen": -0.016961220651865005, + "logits/rejected": -0.09042391180992126, + "logps/chosen": -0.0017954304348677397, + "logps/rejected": -2.43148136138916, + "loss": 1.3062, + "nll_loss": 0.3265213966369629, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017954302893485874, + "rewards/margins": 0.2429685890674591, + "rewards/rejected": -0.24314813315868378, + "step": 6830 + }, + { + "epoch": 4.724066390041494, + "grad_norm": 14.327370643615723, + "learning_rate": 2.931074227754726e-05, + "log_odds_chosen": 11.19952392578125, + "log_odds_ratio": -2.174938163079787e-05, + "logits/chosen": -0.6013371348381042, + "logits/rejected": -0.5689268708229065, + "logps/chosen": -0.00013965470134280622, + "logps/rejected": -2.163695812225342, + "loss": 0.7687, + "nll_loss": 0.1921716034412384, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.396546940668486e-05, + "rewards/margins": 0.21635562181472778, + "rewards/rejected": -0.2163695991039276, + "step": 6831 + }, + { + "epoch": 4.724757952973721, + "grad_norm": 6.135366439819336, + "learning_rate": 2.930690026125711e-05, + "log_odds_chosen": 9.640559196472168, + "log_odds_ratio": -9.722045069793239e-05, + "logits/chosen": -0.5914327502250671, + "logits/rejected": -0.5960256457328796, + "logps/chosen": -0.00017935251526068896, + "logps/rejected": -1.1278718709945679, + "loss": 0.82, + "nll_loss": 0.20498919486999512, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7935251889866777e-05, + "rewards/margins": 0.11276926100254059, + "rewards/rejected": -0.11278717964887619, + "step": 6832 + }, + { + "epoch": 4.7254495159059475, + "grad_norm": 9.616642951965332, + "learning_rate": 2.930305824496696e-05, + "log_odds_chosen": 10.35329818725586, + "log_odds_ratio": -0.00021150981774553657, + "logits/chosen": -0.3476685881614685, + "logits/rejected": -0.427687406539917, + "logps/chosen": -0.00041303792386315763, + "logps/rejected": -2.0688071250915527, + "loss": 1.236, + "nll_loss": 0.3089710474014282, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.130379602429457e-05, + "rewards/margins": 0.2068394124507904, + "rewards/rejected": -0.20688071846961975, + "step": 6833 + }, + { + "epoch": 4.726141078838174, + "grad_norm": 9.028099060058594, + "learning_rate": 2.929921622867681e-05, + "log_odds_chosen": 10.01650619506836, + "log_odds_ratio": -0.00020999395928811282, + "logits/chosen": -0.3519641160964966, + "logits/rejected": -0.47386786341667175, + "logps/chosen": -0.0007251882343553007, + "logps/rejected": -2.154243230819702, + "loss": 0.8524, + "nll_loss": 0.21308580040931702, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.251882925629616e-05, + "rewards/margins": 0.21535180509090424, + "rewards/rejected": -0.2154243290424347, + "step": 6834 + }, + { + "epoch": 4.726832641770401, + "grad_norm": 6.53804874420166, + "learning_rate": 2.929537421238666e-05, + "log_odds_chosen": 9.094326972961426, + "log_odds_ratio": -0.0007695475942455232, + "logits/chosen": -0.5255453586578369, + "logits/rejected": -0.4840846061706543, + "logps/chosen": -0.007395816966891289, + "logps/rejected": -2.5426483154296875, + "loss": 0.8317, + "nll_loss": 0.20785032212734222, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000739581766538322, + "rewards/margins": 0.2535252869129181, + "rewards/rejected": -0.25426483154296875, + "step": 6835 + }, + { + "epoch": 4.727524204702628, + "grad_norm": 13.52680778503418, + "learning_rate": 2.929153219609651e-05, + "log_odds_chosen": 9.964470863342285, + "log_odds_ratio": -0.00014706116053275764, + "logits/chosen": -0.9494578838348389, + "logits/rejected": -0.9703213572502136, + "logps/chosen": -0.01268111914396286, + "logps/rejected": -2.3685264587402344, + "loss": 1.1147, + "nll_loss": 0.2786535620689392, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001268112100660801, + "rewards/margins": 0.23558452725410461, + "rewards/rejected": -0.23685264587402344, + "step": 6836 + }, + { + "epoch": 4.728215767634855, + "grad_norm": 8.254740715026855, + "learning_rate": 2.9287690179806365e-05, + "log_odds_chosen": 9.981782913208008, + "log_odds_ratio": -0.00010545213444856927, + "logits/chosen": -0.5261760354042053, + "logits/rejected": -0.6000370979309082, + "logps/chosen": -0.001380457542836666, + "logps/rejected": -2.0302622318267822, + "loss": 0.9971, + "nll_loss": 0.24925638735294342, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013804573973175138, + "rewards/margins": 0.20288817584514618, + "rewards/rejected": -0.20302622020244598, + "step": 6837 + }, + { + "epoch": 4.728907330567082, + "grad_norm": 14.098108291625977, + "learning_rate": 2.9283848163516214e-05, + "log_odds_chosen": 10.196685791015625, + "log_odds_ratio": -0.00651351734995842, + "logits/chosen": -0.5857874751091003, + "logits/rejected": -0.6654095649719238, + "logps/chosen": -0.002720482647418976, + "logps/rejected": -1.7905726432800293, + "loss": 0.8894, + "nll_loss": 0.22168682515621185, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002720482589211315, + "rewards/margins": 0.17878523468971252, + "rewards/rejected": -0.1790572851896286, + "step": 6838 + }, + { + "epoch": 4.7295988934993085, + "grad_norm": 15.732999801635742, + "learning_rate": 2.9280006147226063e-05, + "log_odds_chosen": 10.79551887512207, + "log_odds_ratio": -0.00019581010565161705, + "logits/chosen": -0.34896230697631836, + "logits/rejected": -0.44549262523651123, + "logps/chosen": -0.0003223324310965836, + "logps/rejected": -2.4413161277770996, + "loss": 1.4394, + "nll_loss": 0.35984066128730774, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.223324165446684e-05, + "rewards/margins": 0.24409937858581543, + "rewards/rejected": -0.24413160979747772, + "step": 6839 + }, + { + "epoch": 4.730290456431535, + "grad_norm": 6.080704689025879, + "learning_rate": 2.927616413093592e-05, + "log_odds_chosen": 11.242830276489258, + "log_odds_ratio": -8.2915517850779e-05, + "logits/chosen": -0.5376768112182617, + "logits/rejected": -0.5856378078460693, + "logps/chosen": -0.0006905734771862626, + "logps/rejected": -3.2126030921936035, + "loss": 0.9296, + "nll_loss": 0.23237961530685425, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.90573506290093e-05, + "rewards/margins": 0.3211912512779236, + "rewards/rejected": -0.32126033306121826, + "step": 6840 + }, + { + "epoch": 4.730982019363762, + "grad_norm": 10.972521781921387, + "learning_rate": 2.9272322114645768e-05, + "log_odds_chosen": 10.36833381652832, + "log_odds_ratio": -4.3805652239825577e-05, + "logits/chosen": -0.86460942029953, + "logits/rejected": -0.9351431131362915, + "logps/chosen": -0.0007878030883148313, + "logps/rejected": -2.054579019546509, + "loss": 1.3458, + "nll_loss": 0.3364557921886444, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.87803073762916e-05, + "rewards/margins": 0.20537912845611572, + "rewards/rejected": -0.2054579108953476, + "step": 6841 + }, + { + "epoch": 4.731673582295989, + "grad_norm": 9.36708927154541, + "learning_rate": 2.9268480098355617e-05, + "log_odds_chosen": 8.909065246582031, + "log_odds_ratio": -0.009562905877828598, + "logits/chosen": -0.41860371828079224, + "logits/rejected": -0.43275922536849976, + "logps/chosen": -0.005152152851223946, + "logps/rejected": -1.9994559288024902, + "loss": 1.0685, + "nll_loss": 0.2661742568016052, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005152152734808624, + "rewards/margins": 0.19943037629127502, + "rewards/rejected": -0.1999455988407135, + "step": 6842 + }, + { + "epoch": 4.732365145228216, + "grad_norm": 17.818878173828125, + "learning_rate": 2.926463808206547e-05, + "log_odds_chosen": 11.118095397949219, + "log_odds_ratio": -2.2907794118509628e-05, + "logits/chosen": -0.6833236813545227, + "logits/rejected": -0.7515619993209839, + "logps/chosen": -0.0003684722469188273, + "logps/rejected": -2.491759777069092, + "loss": 0.9781, + "nll_loss": 0.24452534317970276, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.684722105390392e-05, + "rewards/margins": 0.24913913011550903, + "rewards/rejected": -0.24917598068714142, + "step": 6843 + }, + { + "epoch": 4.733056708160443, + "grad_norm": 9.49808406829834, + "learning_rate": 2.926079606577532e-05, + "log_odds_chosen": 10.65138053894043, + "log_odds_ratio": -4.175342837697826e-05, + "logits/chosen": -0.7912575006484985, + "logits/rejected": -0.7968187928199768, + "logps/chosen": -0.0003375337109901011, + "logps/rejected": -2.363999128341675, + "loss": 1.4575, + "nll_loss": 0.3643767833709717, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.375337109901011e-05, + "rewards/margins": 0.2363661676645279, + "rewards/rejected": -0.23639993369579315, + "step": 6844 + }, + { + "epoch": 4.7337482710926695, + "grad_norm": 5.025328159332275, + "learning_rate": 2.9256954049485168e-05, + "log_odds_chosen": 9.391399383544922, + "log_odds_ratio": -0.0013213662896305323, + "logits/chosen": -0.6365231871604919, + "logits/rejected": -0.6581612825393677, + "logps/chosen": -0.0017269393429160118, + "logps/rejected": -2.219834089279175, + "loss": 1.985, + "nll_loss": 0.4961143434047699, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017269395175389946, + "rewards/margins": 0.2218107283115387, + "rewards/rejected": -0.221983402967453, + "step": 6845 + }, + { + "epoch": 4.734439834024896, + "grad_norm": 9.549975395202637, + "learning_rate": 2.9253112033195024e-05, + "log_odds_chosen": 9.500479698181152, + "log_odds_ratio": -0.0008104207227006555, + "logits/chosen": -0.44024544954299927, + "logits/rejected": -0.5364531874656677, + "logps/chosen": -0.001147839822806418, + "logps/rejected": -1.707168459892273, + "loss": 1.1765, + "nll_loss": 0.2940320372581482, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011478398664621636, + "rewards/margins": 0.17060205340385437, + "rewards/rejected": -0.17071683704853058, + "step": 6846 + }, + { + "epoch": 4.735131396957123, + "grad_norm": 7.652698993682861, + "learning_rate": 2.9249270016904873e-05, + "log_odds_chosen": 10.424861907958984, + "log_odds_ratio": -5.540785423363559e-05, + "logits/chosen": -0.8006737232208252, + "logits/rejected": -0.8251084089279175, + "logps/chosen": -0.0002055955264950171, + "logps/rejected": -1.7508656978607178, + "loss": 0.7046, + "nll_loss": 0.1761522740125656, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0559553377097473e-05, + "rewards/margins": 0.1750660091638565, + "rewards/rejected": -0.17508655786514282, + "step": 6847 + }, + { + "epoch": 4.73582295988935, + "grad_norm": 11.136614799499512, + "learning_rate": 2.9245428000614722e-05, + "log_odds_chosen": 11.116748809814453, + "log_odds_ratio": -1.792412695067469e-05, + "logits/chosen": -0.6997305154800415, + "logits/rejected": -0.7258143424987793, + "logps/chosen": -0.00017949882021639496, + "logps/rejected": -2.3606107234954834, + "loss": 1.0916, + "nll_loss": 0.27288857102394104, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7949882021639496e-05, + "rewards/margins": 0.23604314029216766, + "rewards/rejected": -0.23606108129024506, + "step": 6848 + }, + { + "epoch": 4.736514522821577, + "grad_norm": 6.447893142700195, + "learning_rate": 2.9241585984324578e-05, + "log_odds_chosen": 9.89136791229248, + "log_odds_ratio": -0.00013746600598096848, + "logits/chosen": -0.31088072061538696, + "logits/rejected": -0.37981265783309937, + "logps/chosen": -0.0003857373376376927, + "logps/rejected": -1.7355481386184692, + "loss": 0.8318, + "nll_loss": 0.20794863998889923, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.857373303617351e-05, + "rewards/margins": 0.17351624369621277, + "rewards/rejected": -0.17355480790138245, + "step": 6849 + }, + { + "epoch": 4.737206085753804, + "grad_norm": 4.731841564178467, + "learning_rate": 2.9237743968034427e-05, + "log_odds_chosen": 10.951615333557129, + "log_odds_ratio": -6.95253474987112e-05, + "logits/chosen": -0.2506423592567444, + "logits/rejected": -0.26202479004859924, + "logps/chosen": -0.00010808245133375749, + "logps/rejected": -1.7184841632843018, + "loss": 0.5642, + "nll_loss": 0.14103692770004272, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.080824586097151e-05, + "rewards/margins": 0.17183761298656464, + "rewards/rejected": -0.17184841632843018, + "step": 6850 + }, + { + "epoch": 4.7378976486860305, + "grad_norm": 6.2841339111328125, + "learning_rate": 2.9233901951744276e-05, + "log_odds_chosen": 9.07808780670166, + "log_odds_ratio": -0.0015401438577100635, + "logits/chosen": -0.6515994071960449, + "logits/rejected": -0.670263409614563, + "logps/chosen": -0.0038971693720668554, + "logps/rejected": -1.562159776687622, + "loss": 0.9586, + "nll_loss": 0.23948949575424194, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000389716966310516, + "rewards/margins": 0.15582627058029175, + "rewards/rejected": -0.15621599555015564, + "step": 6851 + }, + { + "epoch": 4.738589211618257, + "grad_norm": 14.31385612487793, + "learning_rate": 2.9230059935454128e-05, + "log_odds_chosen": 10.582845687866211, + "log_odds_ratio": -0.00012897816486656666, + "logits/chosen": -0.2380611151456833, + "logits/rejected": -0.2947745621204376, + "logps/chosen": -0.0022557827178388834, + "logps/rejected": -2.843648910522461, + "loss": 1.2785, + "nll_loss": 0.31961789727211, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00022557828924618661, + "rewards/margins": 0.28413933515548706, + "rewards/rejected": -0.28436487913131714, + "step": 6852 + }, + { + "epoch": 4.739280774550484, + "grad_norm": 18.392780303955078, + "learning_rate": 2.9226217919163977e-05, + "log_odds_chosen": 9.665806770324707, + "log_odds_ratio": -0.0036647897213697433, + "logits/chosen": 0.08682667464017868, + "logits/rejected": 0.012252740561962128, + "logps/chosen": -0.002794300438836217, + "logps/rejected": -1.483237624168396, + "loss": 0.7833, + "nll_loss": 0.19546152651309967, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00027943006716668606, + "rewards/margins": 0.14804433286190033, + "rewards/rejected": -0.14832375943660736, + "step": 6853 + }, + { + "epoch": 4.739972337482711, + "grad_norm": 8.896881103515625, + "learning_rate": 2.9222375902873826e-05, + "log_odds_chosen": 9.814126968383789, + "log_odds_ratio": -0.002193056046962738, + "logits/chosen": -0.3735635280609131, + "logits/rejected": -0.40650835633277893, + "logps/chosen": -0.016320127993822098, + "logps/rejected": -2.8269453048706055, + "loss": 0.9176, + "nll_loss": 0.22918462753295898, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016320126596838236, + "rewards/margins": 0.2810625433921814, + "rewards/rejected": -0.2826945185661316, + "step": 6854 + }, + { + "epoch": 4.740663900414938, + "grad_norm": 12.259506225585938, + "learning_rate": 2.9218533886583682e-05, + "log_odds_chosen": 10.840656280517578, + "log_odds_ratio": -3.960132016800344e-05, + "logits/chosen": -0.402145117521286, + "logits/rejected": -0.424393892288208, + "logps/chosen": -0.00025384893524460495, + "logps/rejected": -1.978333592414856, + "loss": 1.0334, + "nll_loss": 0.25833389163017273, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5384893888258375e-05, + "rewards/margins": 0.19780796766281128, + "rewards/rejected": -0.1978333741426468, + "step": 6855 + }, + { + "epoch": 4.741355463347165, + "grad_norm": 10.188279151916504, + "learning_rate": 2.921469187029353e-05, + "log_odds_chosen": 10.682395935058594, + "log_odds_ratio": -4.144748891121708e-05, + "logits/chosen": -0.2968418598175049, + "logits/rejected": -0.28772151470184326, + "logps/chosen": -0.00025522822397761047, + "logps/rejected": -2.024625301361084, + "loss": 1.1162, + "nll_loss": 0.27904069423675537, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5522822397761047e-05, + "rewards/margins": 0.20243701338768005, + "rewards/rejected": -0.20246252417564392, + "step": 6856 + }, + { + "epoch": 4.7420470262793915, + "grad_norm": 12.18539810180664, + "learning_rate": 2.921084985400338e-05, + "log_odds_chosen": 10.338611602783203, + "log_odds_ratio": -6.693832256132737e-05, + "logits/chosen": -0.431133896112442, + "logits/rejected": -0.46184462308883667, + "logps/chosen": -0.00026900594821199775, + "logps/rejected": -1.8532644510269165, + "loss": 0.7654, + "nll_loss": 0.1913406252861023, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.690059773158282e-05, + "rewards/margins": 0.18529954552650452, + "rewards/rejected": -0.1853264570236206, + "step": 6857 + }, + { + "epoch": 4.742738589211618, + "grad_norm": 12.808045387268066, + "learning_rate": 2.9207007837713236e-05, + "log_odds_chosen": 7.228079795837402, + "log_odds_ratio": -0.03855385258793831, + "logits/chosen": -0.7129365801811218, + "logits/rejected": -0.6985089778900146, + "logps/chosen": -0.05296050012111664, + "logps/rejected": -1.012087106704712, + "loss": 0.9236, + "nll_loss": 0.2270495742559433, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005296050570905209, + "rewards/margins": 0.09591265767812729, + "rewards/rejected": -0.10120870918035507, + "step": 6858 + }, + { + "epoch": 4.743430152143845, + "grad_norm": 12.793832778930664, + "learning_rate": 2.9203165821423085e-05, + "log_odds_chosen": 9.732256889343262, + "log_odds_ratio": -0.0052786460146307945, + "logits/chosen": -0.6305315494537354, + "logits/rejected": -0.7240175008773804, + "logps/chosen": -0.0033087388146668673, + "logps/rejected": -1.9252982139587402, + "loss": 1.2624, + "nll_loss": 0.31506380438804626, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003308738814666867, + "rewards/margins": 0.1921989470720291, + "rewards/rejected": -0.1925298124551773, + "step": 6859 + }, + { + "epoch": 4.744121715076072, + "grad_norm": 12.654400825500488, + "learning_rate": 2.9199323805132934e-05, + "log_odds_chosen": 9.324658393859863, + "log_odds_ratio": -0.024602821096777916, + "logits/chosen": -0.8696132898330688, + "logits/rejected": -0.9861457347869873, + "logps/chosen": -0.0066756573505699635, + "logps/rejected": -2.0346033573150635, + "loss": 1.4154, + "nll_loss": 0.3513864278793335, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006675656768493354, + "rewards/margins": 0.20279279351234436, + "rewards/rejected": -0.20346036553382874, + "step": 6860 + }, + { + "epoch": 4.744813278008299, + "grad_norm": 8.300000190734863, + "learning_rate": 2.9195481788842787e-05, + "log_odds_chosen": 10.4987154006958, + "log_odds_ratio": -5.7042387197725475e-05, + "logits/chosen": -0.3381150960922241, + "logits/rejected": -0.46609026193618774, + "logps/chosen": -0.00037316203815862536, + "logps/rejected": -1.864619493484497, + "loss": 0.7372, + "nll_loss": 0.18428348004817963, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7316207453841344e-05, + "rewards/margins": 0.1864246428012848, + "rewards/rejected": -0.18646195530891418, + "step": 6861 + }, + { + "epoch": 4.745504840940526, + "grad_norm": 7.251094341278076, + "learning_rate": 2.9191639772552636e-05, + "log_odds_chosen": 11.42138385772705, + "log_odds_ratio": -1.9719334886758588e-05, + "logits/chosen": -0.4525086283683777, + "logits/rejected": -0.44969817996025085, + "logps/chosen": -0.00018888995691668242, + "logps/rejected": -2.7225518226623535, + "loss": 0.6575, + "nll_loss": 0.16436880826950073, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.888899532787036e-05, + "rewards/margins": 0.27223631739616394, + "rewards/rejected": -0.27225518226623535, + "step": 6862 + }, + { + "epoch": 4.746196403872752, + "grad_norm": 8.857154846191406, + "learning_rate": 2.9187797756262485e-05, + "log_odds_chosen": 9.985649108886719, + "log_odds_ratio": -5.7827353884931654e-05, + "logits/chosen": -0.5242704749107361, + "logits/rejected": -0.5169914364814758, + "logps/chosen": -0.00016215082723647356, + "logps/rejected": -1.2925277948379517, + "loss": 0.8603, + "nll_loss": 0.21506929397583008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6215082723647356e-05, + "rewards/margins": 0.12923656404018402, + "rewards/rejected": -0.12925279140472412, + "step": 6863 + }, + { + "epoch": 4.746887966804979, + "grad_norm": 9.788476943969727, + "learning_rate": 2.918395573997234e-05, + "log_odds_chosen": 10.543601036071777, + "log_odds_ratio": -6.150496483314782e-05, + "logits/chosen": -0.5418503880500793, + "logits/rejected": -0.599971354007721, + "logps/chosen": -0.00018318725051358342, + "logps/rejected": -1.932619333267212, + "loss": 0.7238, + "nll_loss": 0.1809331774711609, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8318725778954104e-05, + "rewards/margins": 0.193243607878685, + "rewards/rejected": -0.19326192140579224, + "step": 6864 + }, + { + "epoch": 4.747579529737206, + "grad_norm": 9.583395957946777, + "learning_rate": 2.918011372368219e-05, + "log_odds_chosen": 8.546165466308594, + "log_odds_ratio": -0.002676730277016759, + "logits/chosen": -0.3589290380477905, + "logits/rejected": -0.3195071220397949, + "logps/chosen": -0.0016474723815917969, + "logps/rejected": -1.278213620185852, + "loss": 1.1595, + "nll_loss": 0.28960418701171875, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001647472381591797, + "rewards/margins": 0.12765660881996155, + "rewards/rejected": -0.12782135605812073, + "step": 6865 + }, + { + "epoch": 4.748271092669433, + "grad_norm": 9.593999862670898, + "learning_rate": 2.917627170739204e-05, + "log_odds_chosen": 10.382241249084473, + "log_odds_ratio": -0.00010884056246140972, + "logits/chosen": -0.20423482358455658, + "logits/rejected": -0.24997065961360931, + "logps/chosen": -0.0003937993897125125, + "logps/rejected": -2.322338581085205, + "loss": 1.2207, + "nll_loss": 0.3051546514034271, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.937993824365549e-05, + "rewards/margins": 0.23219448328018188, + "rewards/rejected": -0.23223388195037842, + "step": 6866 + }, + { + "epoch": 4.74896265560166, + "grad_norm": 5.929619789123535, + "learning_rate": 2.9172429691101894e-05, + "log_odds_chosen": 9.464195251464844, + "log_odds_ratio": -0.0007255043019540608, + "logits/chosen": -0.45147010684013367, + "logits/rejected": -0.62174391746521, + "logps/chosen": -0.006944280583411455, + "logps/rejected": -2.5819144248962402, + "loss": 1.4374, + "nll_loss": 0.35927289724349976, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006944281049072742, + "rewards/margins": 0.25749704241752625, + "rewards/rejected": -0.25819146633148193, + "step": 6867 + }, + { + "epoch": 4.749654218533887, + "grad_norm": 6.857417583465576, + "learning_rate": 2.9168587674811744e-05, + "log_odds_chosen": 10.632462501525879, + "log_odds_ratio": -0.0001170021205325611, + "logits/chosen": -0.8040260076522827, + "logits/rejected": -0.8201639652252197, + "logps/chosen": -0.001678706961683929, + "logps/rejected": -2.558511734008789, + "loss": 1.5705, + "nll_loss": 0.3926093280315399, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001678706903476268, + "rewards/margins": 0.2556833028793335, + "rewards/rejected": -0.255851149559021, + "step": 6868 + }, + { + "epoch": 4.750345781466113, + "grad_norm": 9.932305335998535, + "learning_rate": 2.9164745658521593e-05, + "log_odds_chosen": 10.164902687072754, + "log_odds_ratio": -0.000663359765894711, + "logits/chosen": -0.8776459693908691, + "logits/rejected": -0.9496436715126038, + "logps/chosen": -0.0014868256403133273, + "logps/rejected": -1.9420034885406494, + "loss": 1.1408, + "nll_loss": 0.2851316034793854, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014868256403133273, + "rewards/margins": 0.19405166804790497, + "rewards/rejected": -0.194200336933136, + "step": 6869 + }, + { + "epoch": 4.75103734439834, + "grad_norm": 11.761977195739746, + "learning_rate": 2.9160903642231445e-05, + "log_odds_chosen": 11.273965835571289, + "log_odds_ratio": -1.961475209100172e-05, + "logits/chosen": -0.9739042520523071, + "logits/rejected": -1.0394816398620605, + "logps/chosen": -0.00017334794392809272, + "logps/rejected": -2.333815574645996, + "loss": 1.4907, + "nll_loss": 0.37267541885375977, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7334794392809272e-05, + "rewards/margins": 0.23336420953273773, + "rewards/rejected": -0.23338152468204498, + "step": 6870 + }, + { + "epoch": 4.751728907330567, + "grad_norm": 4.890721321105957, + "learning_rate": 2.9157061625941294e-05, + "log_odds_chosen": 9.884302139282227, + "log_odds_ratio": -0.0005739243933930993, + "logits/chosen": -0.5567978620529175, + "logits/rejected": -0.5279030799865723, + "logps/chosen": -0.0034497843589633703, + "logps/rejected": -2.587547540664673, + "loss": 1.3542, + "nll_loss": 0.3385036587715149, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003449784417171031, + "rewards/margins": 0.2584097981452942, + "rewards/rejected": -0.25875476002693176, + "step": 6871 + }, + { + "epoch": 4.752420470262794, + "grad_norm": 8.71142578125, + "learning_rate": 2.9153219609651143e-05, + "log_odds_chosen": 10.85763931274414, + "log_odds_ratio": -3.525703868945129e-05, + "logits/chosen": -0.7038207054138184, + "logits/rejected": -0.6762052178382874, + "logps/chosen": -0.0002038024686044082, + "logps/rejected": -2.1100528240203857, + "loss": 1.3561, + "nll_loss": 0.33902662992477417, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0380248315632343e-05, + "rewards/margins": 0.21098490059375763, + "rewards/rejected": -0.21100527048110962, + "step": 6872 + }, + { + "epoch": 4.753112033195021, + "grad_norm": 16.125587463378906, + "learning_rate": 2.9149377593361e-05, + "log_odds_chosen": 10.835062026977539, + "log_odds_ratio": -8.323652582475916e-05, + "logits/chosen": -0.7778855562210083, + "logits/rejected": -0.897720217704773, + "logps/chosen": -0.0006206457619555295, + "logps/rejected": -2.8107075691223145, + "loss": 1.0211, + "nll_loss": 0.255276620388031, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.20645732851699e-05, + "rewards/margins": 0.28100869059562683, + "rewards/rejected": -0.281070739030838, + "step": 6873 + }, + { + "epoch": 4.753803596127248, + "grad_norm": 5.896915435791016, + "learning_rate": 2.9145535577070848e-05, + "log_odds_chosen": 10.160299301147461, + "log_odds_ratio": -5.5131924455054104e-05, + "logits/chosen": -0.7155916690826416, + "logits/rejected": -0.6823525428771973, + "logps/chosen": -0.0005040961550548673, + "logps/rejected": -2.3147830963134766, + "loss": 0.819, + "nll_loss": 0.20473623275756836, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.040961696067825e-05, + "rewards/margins": 0.23142790794372559, + "rewards/rejected": -0.23147833347320557, + "step": 6874 + }, + { + "epoch": 4.754495159059474, + "grad_norm": 10.174572944641113, + "learning_rate": 2.9141693560780697e-05, + "log_odds_chosen": 9.549734115600586, + "log_odds_ratio": -0.0004066435503773391, + "logits/chosen": -0.4515111446380615, + "logits/rejected": -0.5325211882591248, + "logps/chosen": -0.002241352340206504, + "logps/rejected": -1.8842123746871948, + "loss": 1.1395, + "nll_loss": 0.28482872247695923, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00022413526312448084, + "rewards/margins": 0.18819710612297058, + "rewards/rejected": -0.18842124938964844, + "step": 6875 + }, + { + "epoch": 4.755186721991701, + "grad_norm": 13.478486061096191, + "learning_rate": 2.9137851544490553e-05, + "log_odds_chosen": 11.715140342712402, + "log_odds_ratio": -1.260038152395282e-05, + "logits/chosen": -0.6951454281806946, + "logits/rejected": -0.664121687412262, + "logps/chosen": -0.00015733492909930646, + "logps/rejected": -2.7464029788970947, + "loss": 0.9162, + "nll_loss": 0.22904790937900543, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.573349436512217e-05, + "rewards/margins": 0.2746245563030243, + "rewards/rejected": -0.274640291929245, + "step": 6876 + }, + { + "epoch": 4.755878284923928, + "grad_norm": 9.419013977050781, + "learning_rate": 2.9134009528200402e-05, + "log_odds_chosen": 10.264875411987305, + "log_odds_ratio": -8.103143773041666e-05, + "logits/chosen": -0.9880549907684326, + "logits/rejected": -1.0210462808609009, + "logps/chosen": -0.0005025397404097021, + "logps/rejected": -2.2138586044311523, + "loss": 0.8977, + "nll_loss": 0.22442609071731567, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0253973313374445e-05, + "rewards/margins": 0.22133558988571167, + "rewards/rejected": -0.2213858664035797, + "step": 6877 + }, + { + "epoch": 4.756569847856155, + "grad_norm": 9.813071250915527, + "learning_rate": 2.913016751191025e-05, + "log_odds_chosen": 10.428069114685059, + "log_odds_ratio": -6.0980233683949336e-05, + "logits/chosen": -0.4846192002296448, + "logits/rejected": -0.5764331221580505, + "logps/chosen": -0.0006243964890018106, + "logps/rejected": -2.4788196086883545, + "loss": 0.7249, + "nll_loss": 0.1812150776386261, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.243964890018106e-05, + "rewards/margins": 0.24781951308250427, + "rewards/rejected": -0.2478819638490677, + "step": 6878 + }, + { + "epoch": 4.757261410788382, + "grad_norm": 8.915365219116211, + "learning_rate": 2.9126325495620103e-05, + "log_odds_chosen": 11.576354026794434, + "log_odds_ratio": -1.2781900295522064e-05, + "logits/chosen": -1.0454456806182861, + "logits/rejected": -1.0896204710006714, + "logps/chosen": -0.00016211725596804172, + "logps/rejected": -2.7902166843414307, + "loss": 0.8306, + "nll_loss": 0.20765334367752075, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6211726688197814e-05, + "rewards/margins": 0.2790054678916931, + "rewards/rejected": -0.279021680355072, + "step": 6879 + }, + { + "epoch": 4.7579529737206085, + "grad_norm": 13.442499160766602, + "learning_rate": 2.9122483479329953e-05, + "log_odds_chosen": 9.946861267089844, + "log_odds_ratio": -0.0005052194464951754, + "logits/chosen": -0.7467355728149414, + "logits/rejected": -0.7708727121353149, + "logps/chosen": -0.0042512849904596806, + "logps/rejected": -2.125576972961426, + "loss": 1.4994, + "nll_loss": 0.3748067021369934, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00042512849904596806, + "rewards/margins": 0.21213257312774658, + "rewards/rejected": -0.21255768835544586, + "step": 6880 + }, + { + "epoch": 4.758644536652835, + "grad_norm": 4.847219944000244, + "learning_rate": 2.91186414630398e-05, + "log_odds_chosen": 9.050625801086426, + "log_odds_ratio": -0.017848612740635872, + "logits/chosen": -0.199097141623497, + "logits/rejected": -0.2752664089202881, + "logps/chosen": -0.006846034899353981, + "logps/rejected": -1.7001570463180542, + "loss": 1.1058, + "nll_loss": 0.2746548652648926, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006846035248599946, + "rewards/margins": 0.16933110356330872, + "rewards/rejected": -0.17001570761203766, + "step": 6881 + }, + { + "epoch": 4.759336099585062, + "grad_norm": 13.729485511779785, + "learning_rate": 2.9114799446749657e-05, + "log_odds_chosen": 9.440155029296875, + "log_odds_ratio": -0.00013803632464259863, + "logits/chosen": -0.2779269814491272, + "logits/rejected": -0.3506007492542267, + "logps/chosen": -0.00039239716716110706, + "logps/rejected": -1.6433520317077637, + "loss": 0.9868, + "nll_loss": 0.2466796338558197, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9239715988514945e-05, + "rewards/margins": 0.164295956492424, + "rewards/rejected": -0.1643352061510086, + "step": 6882 + }, + { + "epoch": 4.760027662517289, + "grad_norm": 7.702147960662842, + "learning_rate": 2.9110957430459506e-05, + "log_odds_chosen": 10.948719024658203, + "log_odds_ratio": -7.656020170543343e-05, + "logits/chosen": -0.36923375725746155, + "logits/rejected": -0.3752695918083191, + "logps/chosen": -0.00018135455320589244, + "logps/rejected": -2.171945810317993, + "loss": 0.9153, + "nll_loss": 0.22881731390953064, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8135455320589244e-05, + "rewards/margins": 0.2171764373779297, + "rewards/rejected": -0.2171945720911026, + "step": 6883 + }, + { + "epoch": 4.760719225449516, + "grad_norm": 9.405759811401367, + "learning_rate": 2.9107115414169356e-05, + "log_odds_chosen": 10.371192932128906, + "log_odds_ratio": -3.8315774872899055e-05, + "logits/chosen": -0.370733380317688, + "logits/rejected": -0.4181835949420929, + "logps/chosen": -0.00018985618953593075, + "logps/rejected": -1.7754912376403809, + "loss": 0.741, + "nll_loss": 0.1852462887763977, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8985620044986717e-05, + "rewards/margins": 0.17753013968467712, + "rewards/rejected": -0.17754912376403809, + "step": 6884 + }, + { + "epoch": 4.761410788381743, + "grad_norm": 6.149433135986328, + "learning_rate": 2.910327339787921e-05, + "log_odds_chosen": 9.182944297790527, + "log_odds_ratio": -0.004304947331547737, + "logits/chosen": -0.48084306716918945, + "logits/rejected": -0.5079762935638428, + "logps/chosen": -0.004953990690410137, + "logps/rejected": -2.269893169403076, + "loss": 0.9361, + "nll_loss": 0.23360510170459747, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004953990573994815, + "rewards/margins": 0.2264939248561859, + "rewards/rejected": -0.22698931396007538, + "step": 6885 + }, + { + "epoch": 4.7621023513139695, + "grad_norm": 10.778986930847168, + "learning_rate": 2.909943138158906e-05, + "log_odds_chosen": 10.12750244140625, + "log_odds_ratio": -0.0005071478663012385, + "logits/chosen": -0.8861774802207947, + "logits/rejected": -0.950569212436676, + "logps/chosen": -0.0003685950068756938, + "logps/rejected": -1.6489312648773193, + "loss": 1.4241, + "nll_loss": 0.35597312450408936, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.685949923237786e-05, + "rewards/margins": 0.16485625505447388, + "rewards/rejected": -0.16489310562610626, + "step": 6886 + }, + { + "epoch": 4.762793914246196, + "grad_norm": 11.243009567260742, + "learning_rate": 2.909558936529891e-05, + "log_odds_chosen": 9.722782135009766, + "log_odds_ratio": -0.002325496170669794, + "logits/chosen": -0.7638850212097168, + "logits/rejected": -0.7820266485214233, + "logps/chosen": -0.002243445487692952, + "logps/rejected": -2.244614362716675, + "loss": 1.1933, + "nll_loss": 0.2981026768684387, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00022434454876929522, + "rewards/margins": 0.2242370843887329, + "rewards/rejected": -0.2244614213705063, + "step": 6887 + }, + { + "epoch": 4.763485477178423, + "grad_norm": 7.690074920654297, + "learning_rate": 2.9091747349008762e-05, + "log_odds_chosen": 10.77735710144043, + "log_odds_ratio": -0.0010174752678722143, + "logits/chosen": -0.7401840090751648, + "logits/rejected": -0.8211042284965515, + "logps/chosen": -0.0016121247317641973, + "logps/rejected": -3.074896812438965, + "loss": 0.8382, + "nll_loss": 0.20945952832698822, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016121247608680278, + "rewards/margins": 0.307328462600708, + "rewards/rejected": -0.30748969316482544, + "step": 6888 + }, + { + "epoch": 4.76417704011065, + "grad_norm": 9.203757286071777, + "learning_rate": 2.908790533271861e-05, + "log_odds_chosen": 9.383280754089355, + "log_odds_ratio": -0.004346000496298075, + "logits/chosen": -0.9272629022598267, + "logits/rejected": -0.9472211599349976, + "logps/chosen": -0.0026920849923044443, + "logps/rejected": -1.4084715843200684, + "loss": 1.5712, + "nll_loss": 0.39237409830093384, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00026920848176814616, + "rewards/margins": 0.14057794213294983, + "rewards/rejected": -0.14084716141223907, + "step": 6889 + }, + { + "epoch": 4.764868603042877, + "grad_norm": 10.183735847473145, + "learning_rate": 2.908406331642846e-05, + "log_odds_chosen": 9.105962753295898, + "log_odds_ratio": -0.0018381911795586348, + "logits/chosen": -0.7122400999069214, + "logits/rejected": -0.8362756371498108, + "logps/chosen": -0.0014686386566609144, + "logps/rejected": -1.8283908367156982, + "loss": 0.8191, + "nll_loss": 0.20458099246025085, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014686388021800667, + "rewards/margins": 0.18269222974777222, + "rewards/rejected": -0.1828390657901764, + "step": 6890 + }, + { + "epoch": 4.765560165975104, + "grad_norm": 12.078200340270996, + "learning_rate": 2.9080221300138316e-05, + "log_odds_chosen": 8.823981285095215, + "log_odds_ratio": -0.014966276474297047, + "logits/chosen": -0.7389836311340332, + "logits/rejected": -0.7439814805984497, + "logps/chosen": -0.005864677019417286, + "logps/rejected": -2.1136207580566406, + "loss": 1.0892, + "nll_loss": 0.27079248428344727, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005864677368663251, + "rewards/margins": 0.21077561378479004, + "rewards/rejected": -0.2113620638847351, + "step": 6891 + }, + { + "epoch": 4.7662517289073305, + "grad_norm": 9.881623268127441, + "learning_rate": 2.9076379283848165e-05, + "log_odds_chosen": 10.488458633422852, + "log_odds_ratio": -0.00015489206998609006, + "logits/chosen": -0.6555848121643066, + "logits/rejected": -0.6229965686798096, + "logps/chosen": -0.000846775365062058, + "logps/rejected": -2.6446585655212402, + "loss": 1.1214, + "nll_loss": 0.28033578395843506, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.46775365062058e-05, + "rewards/margins": 0.26438117027282715, + "rewards/rejected": -0.264465868473053, + "step": 6892 + }, + { + "epoch": 4.766943291839557, + "grad_norm": 10.037402153015137, + "learning_rate": 2.9072537267558014e-05, + "log_odds_chosen": 10.82370376586914, + "log_odds_ratio": -6.398136611096561e-05, + "logits/chosen": -0.12798471748828888, + "logits/rejected": -0.1659693568944931, + "logps/chosen": -0.0010921619832515717, + "logps/rejected": -2.795205593109131, + "loss": 1.1708, + "nll_loss": 0.2927056849002838, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010921619832515717, + "rewards/margins": 0.27941134572029114, + "rewards/rejected": -0.27952057123184204, + "step": 6893 + }, + { + "epoch": 4.767634854771784, + "grad_norm": 6.187631607055664, + "learning_rate": 2.906869525126787e-05, + "log_odds_chosen": 10.588701248168945, + "log_odds_ratio": -6.957812001928687e-05, + "logits/chosen": -0.5498100519180298, + "logits/rejected": -0.6913700103759766, + "logps/chosen": -0.000816626416053623, + "logps/rejected": -2.9310169219970703, + "loss": 0.8164, + "nll_loss": 0.20408526062965393, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.166264888131991e-05, + "rewards/margins": 0.29302000999450684, + "rewards/rejected": -0.2931016981601715, + "step": 6894 + }, + { + "epoch": 4.768326417704011, + "grad_norm": 10.408868789672852, + "learning_rate": 2.906485323497772e-05, + "log_odds_chosen": 9.834101676940918, + "log_odds_ratio": -0.02988821268081665, + "logits/chosen": -0.3863828182220459, + "logits/rejected": -0.5003218054771423, + "logps/chosen": -0.008340238593518734, + "logps/rejected": -2.438636302947998, + "loss": 0.918, + "nll_loss": 0.2265024036169052, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008340239292010665, + "rewards/margins": 0.2430296242237091, + "rewards/rejected": -0.24386364221572876, + "step": 6895 + }, + { + "epoch": 4.769017980636238, + "grad_norm": 9.838829040527344, + "learning_rate": 2.9061011218687568e-05, + "log_odds_chosen": 9.691939353942871, + "log_odds_ratio": -0.0018959257286041975, + "logits/chosen": -0.6795417070388794, + "logits/rejected": -0.7193290591239929, + "logps/chosen": -0.0024579009041190147, + "logps/rejected": -2.421403646469116, + "loss": 0.9841, + "nll_loss": 0.24582822620868683, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00024579011369496584, + "rewards/margins": 0.24189457297325134, + "rewards/rejected": -0.24214035272598267, + "step": 6896 + }, + { + "epoch": 4.769709543568465, + "grad_norm": 9.481804847717285, + "learning_rate": 2.905716920239742e-05, + "log_odds_chosen": 9.713380813598633, + "log_odds_ratio": -0.0004562270478345454, + "logits/chosen": -0.9557902812957764, + "logits/rejected": -0.9785552620887756, + "logps/chosen": -0.0007574007613584399, + "logps/rejected": -1.7303085327148438, + "loss": 1.0776, + "nll_loss": 0.26935750246047974, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.57400703150779e-05, + "rewards/margins": 0.17295509576797485, + "rewards/rejected": -0.17303083837032318, + "step": 6897 + }, + { + "epoch": 4.7704011065006915, + "grad_norm": 15.33812141418457, + "learning_rate": 2.905332718610727e-05, + "log_odds_chosen": 10.432807922363281, + "log_odds_ratio": -5.272179623716511e-05, + "logits/chosen": -0.8567076325416565, + "logits/rejected": -0.8398425579071045, + "logps/chosen": -0.00020101238624192774, + "logps/rejected": -1.5139999389648438, + "loss": 1.3735, + "nll_loss": 0.3433811366558075, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.010124080698006e-05, + "rewards/margins": 0.15137988328933716, + "rewards/rejected": -0.15139998495578766, + "step": 6898 + }, + { + "epoch": 4.771092669432918, + "grad_norm": 8.230432510375977, + "learning_rate": 2.904948516981712e-05, + "log_odds_chosen": 10.600896835327148, + "log_odds_ratio": -0.00017199788999278098, + "logits/chosen": -0.5474926233291626, + "logits/rejected": -0.5394644737243652, + "logps/chosen": -0.00012897196575067937, + "logps/rejected": -1.8010598421096802, + "loss": 0.979, + "nll_loss": 0.24473422765731812, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2897196938865818e-05, + "rewards/margins": 0.18009309470653534, + "rewards/rejected": -0.18010598421096802, + "step": 6899 + }, + { + "epoch": 4.771784232365145, + "grad_norm": 6.7657151222229, + "learning_rate": 2.9045643153526974e-05, + "log_odds_chosen": 9.881768226623535, + "log_odds_ratio": -0.0007866210653446615, + "logits/chosen": -0.6773836016654968, + "logits/rejected": -0.7118434906005859, + "logps/chosen": -0.0003718239313457161, + "logps/rejected": -1.8470865488052368, + "loss": 1.406, + "nll_loss": 0.3514169752597809, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.718238804140128e-05, + "rewards/margins": 0.18467146158218384, + "rewards/rejected": -0.18470865488052368, + "step": 6900 + }, + { + "epoch": 4.772475795297372, + "grad_norm": 7.639023780822754, + "learning_rate": 2.9041801137236823e-05, + "log_odds_chosen": 10.500572204589844, + "log_odds_ratio": -3.654634565464221e-05, + "logits/chosen": -0.7020278573036194, + "logits/rejected": -0.7347521781921387, + "logps/chosen": -0.00024071653024293482, + "logps/rejected": -1.8748853206634521, + "loss": 0.8128, + "nll_loss": 0.20319299399852753, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4071654479485005e-05, + "rewards/margins": 0.18746446073055267, + "rewards/rejected": -0.18748852610588074, + "step": 6901 + }, + { + "epoch": 4.773167358229599, + "grad_norm": 17.48367691040039, + "learning_rate": 2.9037959120946672e-05, + "log_odds_chosen": 11.267285346984863, + "log_odds_ratio": -7.754185207886621e-05, + "logits/chosen": -0.3741925358772278, + "logits/rejected": -0.33184921741485596, + "logps/chosen": -0.0003230986767448485, + "logps/rejected": -2.374788284301758, + "loss": 0.8192, + "nll_loss": 0.20479440689086914, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.230986840208061e-05, + "rewards/margins": 0.23744650185108185, + "rewards/rejected": -0.2374788224697113, + "step": 6902 + }, + { + "epoch": 4.773858921161826, + "grad_norm": 12.380620002746582, + "learning_rate": 2.9034117104656528e-05, + "log_odds_chosen": 9.672462463378906, + "log_odds_ratio": -0.0028193434700369835, + "logits/chosen": -0.5997171401977539, + "logits/rejected": -0.651904284954071, + "logps/chosen": -0.019300207495689392, + "logps/rejected": -1.9906163215637207, + "loss": 1.003, + "nll_loss": 0.25047749280929565, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019300205167382956, + "rewards/margins": 0.19713161885738373, + "rewards/rejected": -0.19906164705753326, + "step": 6903 + }, + { + "epoch": 4.7745504840940525, + "grad_norm": 11.972405433654785, + "learning_rate": 2.9030275088366377e-05, + "log_odds_chosen": 9.76927661895752, + "log_odds_ratio": -0.0007554758340120316, + "logits/chosen": -0.7512736320495605, + "logits/rejected": -0.8519138693809509, + "logps/chosen": -0.0012535990681499243, + "logps/rejected": -2.0746865272521973, + "loss": 0.9445, + "nll_loss": 0.23604007065296173, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012535988935269415, + "rewards/margins": 0.20734326541423798, + "rewards/rejected": -0.20746862888336182, + "step": 6904 + }, + { + "epoch": 4.775242047026279, + "grad_norm": 8.559404373168945, + "learning_rate": 2.9026433072076226e-05, + "log_odds_chosen": 10.705414772033691, + "log_odds_ratio": -4.388433080748655e-05, + "logits/chosen": -0.17357826232910156, + "logits/rejected": -0.2473318874835968, + "logps/chosen": -0.00044088560389354825, + "logps/rejected": -2.56740403175354, + "loss": 1.0881, + "nll_loss": 0.27202650904655457, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.408856329973787e-05, + "rewards/margins": 0.25669634342193604, + "rewards/rejected": -0.25674039125442505, + "step": 6905 + }, + { + "epoch": 4.775933609958506, + "grad_norm": 9.186101913452148, + "learning_rate": 2.902259105578608e-05, + "log_odds_chosen": 9.613700866699219, + "log_odds_ratio": -0.00048775109462440014, + "logits/chosen": -0.17357760667800903, + "logits/rejected": -0.20531266927719116, + "logps/chosen": -0.0005673019913956523, + "logps/rejected": -1.8855153322219849, + "loss": 1.0829, + "nll_loss": 0.27068501710891724, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6730197684373707e-05, + "rewards/margins": 0.18849480152130127, + "rewards/rejected": -0.18855154514312744, + "step": 6906 + }, + { + "epoch": 4.776625172890733, + "grad_norm": 11.298944473266602, + "learning_rate": 2.9018749039495928e-05, + "log_odds_chosen": 11.490623474121094, + "log_odds_ratio": -3.84019294870086e-05, + "logits/chosen": -0.6738793253898621, + "logits/rejected": -0.7730412483215332, + "logps/chosen": -0.00020757513993885368, + "logps/rejected": -2.5061240196228027, + "loss": 1.0231, + "nll_loss": 0.25577008724212646, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0757517631864175e-05, + "rewards/margins": 0.2505916357040405, + "rewards/rejected": -0.25061243772506714, + "step": 6907 + }, + { + "epoch": 4.77731673582296, + "grad_norm": 5.515003204345703, + "learning_rate": 2.9014907023205777e-05, + "log_odds_chosen": 9.199296951293945, + "log_odds_ratio": -0.000681077188346535, + "logits/chosen": -0.410367488861084, + "logits/rejected": -0.46633514761924744, + "logps/chosen": -0.016598787158727646, + "logps/rejected": -2.2026970386505127, + "loss": 1.5174, + "nll_loss": 0.3792707920074463, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016598786460235715, + "rewards/margins": 0.21860985457897186, + "rewards/rejected": -0.22026972472667694, + "step": 6908 + }, + { + "epoch": 4.778008298755187, + "grad_norm": 8.467878341674805, + "learning_rate": 2.9011065006915633e-05, + "log_odds_chosen": 10.931889533996582, + "log_odds_ratio": -0.00010012050188379362, + "logits/chosen": -0.6796239614486694, + "logits/rejected": -0.5677720308303833, + "logps/chosen": -0.00023513483756687492, + "logps/rejected": -2.034045934677124, + "loss": 0.8391, + "nll_loss": 0.20975741744041443, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3513486667070538e-05, + "rewards/margins": 0.20338107645511627, + "rewards/rejected": -0.20340459048748016, + "step": 6909 + }, + { + "epoch": 4.7786998616874135, + "grad_norm": 7.321889400482178, + "learning_rate": 2.9007222990625482e-05, + "log_odds_chosen": 9.465747833251953, + "log_odds_ratio": -0.00028930528787896037, + "logits/chosen": -0.7294983863830566, + "logits/rejected": -0.7918939590454102, + "logps/chosen": -0.000825887662358582, + "logps/rejected": -1.7185850143432617, + "loss": 0.8373, + "nll_loss": 0.2092876136302948, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.258876914624125e-05, + "rewards/margins": 0.1717759221792221, + "rewards/rejected": -0.1718585044145584, + "step": 6910 + }, + { + "epoch": 4.77939142461964, + "grad_norm": 5.821832656860352, + "learning_rate": 2.900338097433533e-05, + "log_odds_chosen": 9.919578552246094, + "log_odds_ratio": -0.00020514108473435044, + "logits/chosen": -0.3137022852897644, + "logits/rejected": -0.3225104808807373, + "logps/chosen": -0.013574248179793358, + "logps/rejected": -2.6222431659698486, + "loss": 1.6493, + "nll_loss": 0.4123116731643677, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013574250042438507, + "rewards/margins": 0.2608668804168701, + "rewards/rejected": -0.26222431659698486, + "step": 6911 + }, + { + "epoch": 4.780082987551867, + "grad_norm": 5.551862716674805, + "learning_rate": 2.8999538958045187e-05, + "log_odds_chosen": 10.600739479064941, + "log_odds_ratio": -4.490656647249125e-05, + "logits/chosen": -0.9346702694892883, + "logits/rejected": -0.8946956992149353, + "logps/chosen": -9.617433534003794e-05, + "logps/rejected": -1.5342824459075928, + "loss": 0.8074, + "nll_loss": 0.20183630287647247, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.617434443498496e-06, + "rewards/margins": 0.1534186154603958, + "rewards/rejected": -0.15342822670936584, + "step": 6912 + }, + { + "epoch": 4.780774550484094, + "grad_norm": 7.531041622161865, + "learning_rate": 2.8995696941755036e-05, + "log_odds_chosen": 10.089245796203613, + "log_odds_ratio": -6.500841118395329e-05, + "logits/chosen": -0.8466919660568237, + "logits/rejected": -0.8212411403656006, + "logps/chosen": -0.0002608322538435459, + "logps/rejected": -1.7257875204086304, + "loss": 1.0531, + "nll_loss": 0.26327186822891235, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6083227567141876e-05, + "rewards/margins": 0.1725526750087738, + "rewards/rejected": -0.17257876694202423, + "step": 6913 + }, + { + "epoch": 4.781466113416321, + "grad_norm": 15.634546279907227, + "learning_rate": 2.8991854925464885e-05, + "log_odds_chosen": 11.55288314819336, + "log_odds_ratio": -1.260529097635299e-05, + "logits/chosen": -0.9646028876304626, + "logits/rejected": -0.9988402128219604, + "logps/chosen": -0.000123454665299505, + "logps/rejected": -2.4872028827667236, + "loss": 1.3473, + "nll_loss": 0.3368183672428131, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2345464710961096e-05, + "rewards/margins": 0.24870797991752625, + "rewards/rejected": -0.24872031807899475, + "step": 6914 + }, + { + "epoch": 4.782157676348548, + "grad_norm": 6.2947916984558105, + "learning_rate": 2.8988012909174737e-05, + "log_odds_chosen": 10.003267288208008, + "log_odds_ratio": -7.919715426396579e-05, + "logits/chosen": -0.7195393443107605, + "logits/rejected": -0.7393893003463745, + "logps/chosen": -0.0005369444843381643, + "logps/rejected": -1.976283073425293, + "loss": 0.7504, + "nll_loss": 0.18759770691394806, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.369445352698676e-05, + "rewards/margins": 0.19757461547851562, + "rewards/rejected": -0.19762831926345825, + "step": 6915 + }, + { + "epoch": 4.782849239280774, + "grad_norm": 9.371685981750488, + "learning_rate": 2.8984170892884586e-05, + "log_odds_chosen": 9.39436149597168, + "log_odds_ratio": -0.029092937707901, + "logits/chosen": -0.7665365934371948, + "logits/rejected": -0.7613022327423096, + "logps/chosen": -0.009415735490620136, + "logps/rejected": -1.9681740999221802, + "loss": 0.8, + "nll_loss": 0.197085440158844, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009415735257789493, + "rewards/margins": 0.1958758533000946, + "rewards/rejected": -0.19681741297245026, + "step": 6916 + }, + { + "epoch": 4.783540802213001, + "grad_norm": 8.897584915161133, + "learning_rate": 2.8980328876594435e-05, + "log_odds_chosen": 8.432744026184082, + "log_odds_ratio": -0.006203706841915846, + "logits/chosen": -1.054870843887329, + "logits/rejected": -1.0128101110458374, + "logps/chosen": -0.0035966283176094294, + "logps/rejected": -1.6831315755844116, + "loss": 1.1094, + "nll_loss": 0.2767356038093567, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00035966283758170903, + "rewards/margins": 0.1679534912109375, + "rewards/rejected": -0.1683131456375122, + "step": 6917 + }, + { + "epoch": 4.784232365145228, + "grad_norm": 9.129132270812988, + "learning_rate": 2.897648686030429e-05, + "log_odds_chosen": 10.543561935424805, + "log_odds_ratio": -6.228917482076213e-05, + "logits/chosen": -0.9463077187538147, + "logits/rejected": -0.9844076037406921, + "logps/chosen": -0.00043040671152994037, + "logps/rejected": -2.390472412109375, + "loss": 0.8443, + "nll_loss": 0.2110660970211029, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.304067260818556e-05, + "rewards/margins": 0.23900417983531952, + "rewards/rejected": -0.23904724419116974, + "step": 6918 + }, + { + "epoch": 4.784923928077455, + "grad_norm": 12.14979362487793, + "learning_rate": 2.897264484401414e-05, + "log_odds_chosen": 9.436766624450684, + "log_odds_ratio": -0.002850313438102603, + "logits/chosen": -0.7446776628494263, + "logits/rejected": -0.7585545778274536, + "logps/chosen": -0.007610342465341091, + "logps/rejected": -1.6009125709533691, + "loss": 0.6803, + "nll_loss": 0.16978859901428223, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007610342581756413, + "rewards/margins": 0.15933021903038025, + "rewards/rejected": -0.16009125113487244, + "step": 6919 + }, + { + "epoch": 4.785615491009682, + "grad_norm": 9.8521146774292, + "learning_rate": 2.896880282772399e-05, + "log_odds_chosen": 10.63294506072998, + "log_odds_ratio": -0.0001836131705204025, + "logits/chosen": -0.8492832183837891, + "logits/rejected": -0.9085828065872192, + "logps/chosen": -0.0003643471281975508, + "logps/rejected": -2.2177181243896484, + "loss": 1.1283, + "nll_loss": 0.2820499837398529, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.64347142749466e-05, + "rewards/margins": 0.2217353880405426, + "rewards/rejected": -0.22177180647850037, + "step": 6920 + }, + { + "epoch": 4.786307053941909, + "grad_norm": 6.861515998840332, + "learning_rate": 2.8964960811433845e-05, + "log_odds_chosen": 8.752782821655273, + "log_odds_ratio": -0.007573779206722975, + "logits/chosen": -0.6578083038330078, + "logits/rejected": -0.7704108357429504, + "logps/chosen": -0.006619012448936701, + "logps/rejected": -1.6753588914871216, + "loss": 0.8632, + "nll_loss": 0.2150411754846573, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006619012565352023, + "rewards/margins": 0.1668739914894104, + "rewards/rejected": -0.16753588616847992, + "step": 6921 + }, + { + "epoch": 4.786998616874135, + "grad_norm": 7.481729030609131, + "learning_rate": 2.8961118795143694e-05, + "log_odds_chosen": 9.5519437789917, + "log_odds_ratio": -0.002999893156811595, + "logits/chosen": -0.6682575345039368, + "logits/rejected": -0.7227833867073059, + "logps/chosen": -0.0021522603929042816, + "logps/rejected": -1.8938777446746826, + "loss": 0.7787, + "nll_loss": 0.19438041746616364, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00021522602764889598, + "rewards/margins": 0.18917253613471985, + "rewards/rejected": -0.18938776850700378, + "step": 6922 + }, + { + "epoch": 4.787690179806362, + "grad_norm": 16.188194274902344, + "learning_rate": 2.8957276778853543e-05, + "log_odds_chosen": 10.199849128723145, + "log_odds_ratio": -0.00026228633942082524, + "logits/chosen": -0.32946816086769104, + "logits/rejected": -0.42485928535461426, + "logps/chosen": -0.0007974720792844892, + "logps/rejected": -2.426337242126465, + "loss": 1.249, + "nll_loss": 0.31223201751708984, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.974720938364044e-05, + "rewards/margins": 0.2425539791584015, + "rewards/rejected": -0.24263374507427216, + "step": 6923 + }, + { + "epoch": 4.788381742738589, + "grad_norm": 10.802689552307129, + "learning_rate": 2.8953434762563396e-05, + "log_odds_chosen": 9.829700469970703, + "log_odds_ratio": -0.002595470752567053, + "logits/chosen": -0.7074468731880188, + "logits/rejected": -0.7966371774673462, + "logps/chosen": -0.004519506823271513, + "logps/rejected": -2.0750341415405273, + "loss": 0.8556, + "nll_loss": 0.21363969147205353, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004519507347140461, + "rewards/margins": 0.20705148577690125, + "rewards/rejected": -0.20750342309474945, + "step": 6924 + }, + { + "epoch": 4.789073305670816, + "grad_norm": 11.141003608703613, + "learning_rate": 2.8949592746273245e-05, + "log_odds_chosen": 10.795259475708008, + "log_odds_ratio": -5.5939930462045595e-05, + "logits/chosen": -0.5992904901504517, + "logits/rejected": -0.584691047668457, + "logps/chosen": -0.00029081429238431156, + "logps/rejected": -2.1754791736602783, + "loss": 1.3131, + "nll_loss": 0.3282589316368103, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9081427783239633e-05, + "rewards/margins": 0.21751883625984192, + "rewards/rejected": -0.21754790842533112, + "step": 6925 + }, + { + "epoch": 4.789764868603043, + "grad_norm": 7.459900856018066, + "learning_rate": 2.8945750729983094e-05, + "log_odds_chosen": 9.909021377563477, + "log_odds_ratio": -0.0002982753503601998, + "logits/chosen": -0.25852513313293457, + "logits/rejected": -0.30407804250717163, + "logps/chosen": -0.0006219090428203344, + "logps/rejected": -2.053335666656494, + "loss": 0.9451, + "nll_loss": 0.236250102519989, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.219090573722497e-05, + "rewards/margins": 0.2052713930606842, + "rewards/rejected": -0.20533359050750732, + "step": 6926 + }, + { + "epoch": 4.79045643153527, + "grad_norm": 11.572301864624023, + "learning_rate": 2.894190871369295e-05, + "log_odds_chosen": 10.061004638671875, + "log_odds_ratio": -0.0001310033112531528, + "logits/chosen": -0.1928636133670807, + "logits/rejected": -0.28308868408203125, + "logps/chosen": -0.002506204880774021, + "logps/rejected": -1.9154969453811646, + "loss": 1.1552, + "nll_loss": 0.2887880206108093, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00025062047643586993, + "rewards/margins": 0.19129906594753265, + "rewards/rejected": -0.19154968857765198, + "step": 6927 + }, + { + "epoch": 4.791147994467496, + "grad_norm": 9.191129684448242, + "learning_rate": 2.89380666974028e-05, + "log_odds_chosen": 9.518013000488281, + "log_odds_ratio": -0.0001670484634814784, + "logits/chosen": -0.460419237613678, + "logits/rejected": -0.42179930210113525, + "logps/chosen": -0.0162142775952816, + "logps/rejected": -2.1252405643463135, + "loss": 1.0023, + "nll_loss": 0.25056424736976624, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016214278293773532, + "rewards/margins": 0.2109026312828064, + "rewards/rejected": -0.21252407133579254, + "step": 6928 + }, + { + "epoch": 4.791839557399723, + "grad_norm": 7.784974575042725, + "learning_rate": 2.8934224681112648e-05, + "log_odds_chosen": 9.323896408081055, + "log_odds_ratio": -0.0005712855490855873, + "logits/chosen": -0.7076651453971863, + "logits/rejected": -0.7251378297805786, + "logps/chosen": -0.004440257325768471, + "logps/rejected": -2.320218324661255, + "loss": 1.2093, + "nll_loss": 0.30227917432785034, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004440257325768471, + "rewards/margins": 0.2315778136253357, + "rewards/rejected": -0.23202183842658997, + "step": 6929 + }, + { + "epoch": 4.79253112033195, + "grad_norm": 7.4357476234436035, + "learning_rate": 2.8930382664822504e-05, + "log_odds_chosen": 9.982955932617188, + "log_odds_ratio": -0.0002719922049436718, + "logits/chosen": -0.6078499555587769, + "logits/rejected": -0.6500560641288757, + "logps/chosen": -0.0003774300857912749, + "logps/rejected": -1.7039949893951416, + "loss": 1.2563, + "nll_loss": 0.3140376806259155, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.774300785153173e-05, + "rewards/margins": 0.17036175727844238, + "rewards/rejected": -0.1703994870185852, + "step": 6930 + }, + { + "epoch": 4.793222683264177, + "grad_norm": 7.098720073699951, + "learning_rate": 2.8926540648532353e-05, + "log_odds_chosen": 10.781723022460938, + "log_odds_ratio": -2.407423744443804e-05, + "logits/chosen": -0.47065818309783936, + "logits/rejected": -0.5136620998382568, + "logps/chosen": -0.00033951684599742293, + "logps/rejected": -2.5275063514709473, + "loss": 0.654, + "nll_loss": 0.1635047197341919, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.395168459974229e-05, + "rewards/margins": 0.25271666049957275, + "rewards/rejected": -0.2527506351470947, + "step": 6931 + }, + { + "epoch": 4.793914246196404, + "grad_norm": 8.527052879333496, + "learning_rate": 2.89226986322422e-05, + "log_odds_chosen": 10.775848388671875, + "log_odds_ratio": -3.8844307709950954e-05, + "logits/chosen": -0.12336946278810501, + "logits/rejected": -0.16141879558563232, + "logps/chosen": -0.0003509092202875763, + "logps/rejected": -2.6161699295043945, + "loss": 0.7901, + "nll_loss": 0.1975254863500595, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.509092493914068e-05, + "rewards/margins": 0.2615818977355957, + "rewards/rejected": -0.2616170048713684, + "step": 6932 + }, + { + "epoch": 4.7946058091286305, + "grad_norm": 9.076807975769043, + "learning_rate": 2.8918856615952054e-05, + "log_odds_chosen": 11.056121826171875, + "log_odds_ratio": -2.2091770006227307e-05, + "logits/chosen": -0.25333231687545776, + "logits/rejected": -0.30482399463653564, + "logps/chosen": -0.0003058099828194827, + "logps/rejected": -2.733250617980957, + "loss": 0.8876, + "nll_loss": 0.22190183401107788, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.058099900954403e-05, + "rewards/margins": 0.27329447865486145, + "rewards/rejected": -0.2733250856399536, + "step": 6933 + }, + { + "epoch": 4.795297372060857, + "grad_norm": 9.712004661560059, + "learning_rate": 2.8915014599661907e-05, + "log_odds_chosen": 10.782461166381836, + "log_odds_ratio": -4.8897571105044335e-05, + "logits/chosen": -0.5463672876358032, + "logits/rejected": -0.6378750801086426, + "logps/chosen": -0.0003280511009506881, + "logps/rejected": -2.362738609313965, + "loss": 1.1677, + "nll_loss": 0.29191526770591736, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.280511009506881e-05, + "rewards/margins": 0.23624107241630554, + "rewards/rejected": -0.2362738847732544, + "step": 6934 + }, + { + "epoch": 4.795988934993084, + "grad_norm": 10.277175903320312, + "learning_rate": 2.8911172583371756e-05, + "log_odds_chosen": 10.328908920288086, + "log_odds_ratio": -0.00012547847290989012, + "logits/chosen": -0.8211703896522522, + "logits/rejected": -0.8321343660354614, + "logps/chosen": -0.00041813915595412254, + "logps/rejected": -2.0395636558532715, + "loss": 0.8992, + "nll_loss": 0.22478783130645752, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.181391341262497e-05, + "rewards/margins": 0.2039145529270172, + "rewards/rejected": -0.20395638048648834, + "step": 6935 + }, + { + "epoch": 4.796680497925311, + "grad_norm": 4.285028457641602, + "learning_rate": 2.8907330567081608e-05, + "log_odds_chosen": 9.490989685058594, + "log_odds_ratio": -0.0008424659026786685, + "logits/chosen": -0.1907077431678772, + "logits/rejected": -0.25019198656082153, + "logps/chosen": -0.0022601436357945204, + "logps/rejected": -2.04176664352417, + "loss": 1.5389, + "nll_loss": 0.38463160395622253, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00022601439559366554, + "rewards/margins": 0.2039506435394287, + "rewards/rejected": -0.204176664352417, + "step": 6936 + }, + { + "epoch": 4.797372060857538, + "grad_norm": 11.503093719482422, + "learning_rate": 2.8903488550791457e-05, + "log_odds_chosen": 10.554071426391602, + "log_odds_ratio": -0.0002274377184221521, + "logits/chosen": -0.6637147665023804, + "logits/rejected": -0.6363039016723633, + "logps/chosen": -0.00023640092695131898, + "logps/rejected": -2.3359031677246094, + "loss": 1.4716, + "nll_loss": 0.36787402629852295, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3640095605514944e-05, + "rewards/margins": 0.23356668651103973, + "rewards/rejected": -0.2335902899503708, + "step": 6937 + }, + { + "epoch": 4.798063623789765, + "grad_norm": 13.230957984924316, + "learning_rate": 2.8899646534501306e-05, + "log_odds_chosen": 9.819869041442871, + "log_odds_ratio": -0.0002255823346786201, + "logits/chosen": -0.4164173901081085, + "logits/rejected": -0.47124138474464417, + "logps/chosen": -0.00040291884215548635, + "logps/rejected": -1.3943617343902588, + "loss": 1.0964, + "nll_loss": 0.27407020330429077, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.029188130516559e-05, + "rewards/margins": 0.13939589262008667, + "rewards/rejected": -0.13943618535995483, + "step": 6938 + }, + { + "epoch": 4.7987551867219915, + "grad_norm": 6.186459541320801, + "learning_rate": 2.8895804518211155e-05, + "log_odds_chosen": 10.776379585266113, + "log_odds_ratio": -2.662005317688454e-05, + "logits/chosen": -0.652151882648468, + "logits/rejected": -0.6157358288764954, + "logps/chosen": -0.0001764782064128667, + "logps/rejected": -1.6163636445999146, + "loss": 1.0078, + "nll_loss": 0.2519550919532776, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.764782064128667e-05, + "rewards/margins": 0.16161870956420898, + "rewards/rejected": -0.1616363674402237, + "step": 6939 + }, + { + "epoch": 4.799446749654218, + "grad_norm": 14.097229957580566, + "learning_rate": 2.889196250192101e-05, + "log_odds_chosen": 10.038839340209961, + "log_odds_ratio": -0.00015504976909141988, + "logits/chosen": -0.4597371518611908, + "logits/rejected": -0.544449508190155, + "logps/chosen": -0.0006131302798166871, + "logps/rejected": -1.6367748975753784, + "loss": 1.2847, + "nll_loss": 0.3211716413497925, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.131303234724328e-05, + "rewards/margins": 0.16361618041992188, + "rewards/rejected": -0.16367748379707336, + "step": 6940 + }, + { + "epoch": 4.800138312586445, + "grad_norm": 9.109824180603027, + "learning_rate": 2.888812048563086e-05, + "log_odds_chosen": 10.58295726776123, + "log_odds_ratio": -5.280112964101136e-05, + "logits/chosen": -0.5606398582458496, + "logits/rejected": -0.6050992012023926, + "logps/chosen": -0.0002038203674601391, + "logps/rejected": -1.8081471920013428, + "loss": 0.7582, + "nll_loss": 0.18954379856586456, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.038203638221603e-05, + "rewards/margins": 0.1807943433523178, + "rewards/rejected": -0.180814728140831, + "step": 6941 + }, + { + "epoch": 4.800829875518672, + "grad_norm": 9.375306129455566, + "learning_rate": 2.888427846934071e-05, + "log_odds_chosen": 9.825284004211426, + "log_odds_ratio": -0.006563317961990833, + "logits/chosen": -0.10774577409029007, + "logits/rejected": -0.20767997205257416, + "logps/chosen": -0.019267892464995384, + "logps/rejected": -2.3732481002807617, + "loss": 1.004, + "nll_loss": 0.250335156917572, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019267891766503453, + "rewards/margins": 0.23539802432060242, + "rewards/rejected": -0.2373248189687729, + "step": 6942 + }, + { + "epoch": 4.801521438450899, + "grad_norm": 7.552305221557617, + "learning_rate": 2.8880436453050565e-05, + "log_odds_chosen": 10.752092361450195, + "log_odds_ratio": -2.9556467779912055e-05, + "logits/chosen": -0.6632587909698486, + "logits/rejected": -0.6243708729743958, + "logps/chosen": -0.00019705788872670382, + "logps/rejected": -2.006537914276123, + "loss": 0.8495, + "nll_loss": 0.21236670017242432, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9705788872670382e-05, + "rewards/margins": 0.20063409209251404, + "rewards/rejected": -0.2006537914276123, + "step": 6943 + }, + { + "epoch": 4.802213001383126, + "grad_norm": 12.195704460144043, + "learning_rate": 2.8876594436760414e-05, + "log_odds_chosen": 10.260480880737305, + "log_odds_ratio": -0.00013605033745989203, + "logits/chosen": -0.6474810242652893, + "logits/rejected": -0.7049196362495422, + "logps/chosen": -0.001023083459585905, + "logps/rejected": -2.2923169136047363, + "loss": 0.8756, + "nll_loss": 0.218886598944664, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010230834595859051, + "rewards/margins": 0.2291293889284134, + "rewards/rejected": -0.22923171520233154, + "step": 6944 + }, + { + "epoch": 4.8029045643153525, + "grad_norm": 5.283492088317871, + "learning_rate": 2.8872752420470263e-05, + "log_odds_chosen": 10.04088020324707, + "log_odds_ratio": -6.29095738986507e-05, + "logits/chosen": -0.645579993724823, + "logits/rejected": -0.6827752590179443, + "logps/chosen": -0.00022650565369985998, + "logps/rejected": -1.4571340084075928, + "loss": 0.6205, + "nll_loss": 0.15512149035930634, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2650565369985998e-05, + "rewards/margins": 0.14569075405597687, + "rewards/rejected": -0.14571340382099152, + "step": 6945 + }, + { + "epoch": 4.803596127247579, + "grad_norm": 6.904294967651367, + "learning_rate": 2.8868910404180116e-05, + "log_odds_chosen": 10.803171157836914, + "log_odds_ratio": -0.00027451833011582494, + "logits/chosen": -0.5428705215454102, + "logits/rejected": -0.5604684948921204, + "logps/chosen": -0.00022846259525977075, + "logps/rejected": -2.276235342025757, + "loss": 1.5928, + "nll_loss": 0.3981609046459198, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2846259525977075e-05, + "rewards/margins": 0.22760069370269775, + "rewards/rejected": -0.22762353718280792, + "step": 6946 + }, + { + "epoch": 4.804287690179806, + "grad_norm": 10.15185546875, + "learning_rate": 2.8865068387889965e-05, + "log_odds_chosen": 9.572251319885254, + "log_odds_ratio": -0.000584468012675643, + "logits/chosen": -0.568747341632843, + "logits/rejected": -0.5709191560745239, + "logps/chosen": -0.0007926247199065983, + "logps/rejected": -1.5048116445541382, + "loss": 1.4307, + "nll_loss": 0.3576134443283081, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.926247781142592e-05, + "rewards/margins": 0.15040190517902374, + "rewards/rejected": -0.150481179356575, + "step": 6947 + }, + { + "epoch": 4.804979253112033, + "grad_norm": 7.460203170776367, + "learning_rate": 2.8861226371599814e-05, + "log_odds_chosen": 9.168164253234863, + "log_odds_ratio": -0.0009161671041510999, + "logits/chosen": -0.5154004096984863, + "logits/rejected": -0.5313372015953064, + "logps/chosen": -0.001504352898336947, + "logps/rejected": -1.4024771451950073, + "loss": 0.778, + "nll_loss": 0.19440346956253052, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015043529856484383, + "rewards/margins": 0.14009729027748108, + "rewards/rejected": -0.14024771749973297, + "step": 6948 + }, + { + "epoch": 4.80567081604426, + "grad_norm": 8.82359504699707, + "learning_rate": 2.885738435530967e-05, + "log_odds_chosen": 10.764310836791992, + "log_odds_ratio": -3.1426825444214046e-05, + "logits/chosen": -0.5414690971374512, + "logits/rejected": -0.5535372495651245, + "logps/chosen": -0.0002997158153448254, + "logps/rejected": -2.4745771884918213, + "loss": 0.8529, + "nll_loss": 0.2132188379764557, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9971581170684658e-05, + "rewards/margins": 0.24742776155471802, + "rewards/rejected": -0.24745772778987885, + "step": 6949 + }, + { + "epoch": 4.806362378976487, + "grad_norm": 8.205737113952637, + "learning_rate": 2.885354233901952e-05, + "log_odds_chosen": 10.115606307983398, + "log_odds_ratio": -0.0003620981296990067, + "logits/chosen": -0.7475257515907288, + "logits/rejected": -0.8717905879020691, + "logps/chosen": -0.0007908196421340108, + "logps/rejected": -1.9689276218414307, + "loss": 1.127, + "nll_loss": 0.2817220389842987, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.908196130301803e-05, + "rewards/margins": 0.1968136727809906, + "rewards/rejected": -0.19689278304576874, + "step": 6950 + }, + { + "epoch": 4.8070539419087135, + "grad_norm": 8.313371658325195, + "learning_rate": 2.8849700322729368e-05, + "log_odds_chosen": 8.338306427001953, + "log_odds_ratio": -0.006208367180079222, + "logits/chosen": -0.356538861989975, + "logits/rejected": -0.37412649393081665, + "logps/chosen": -0.0012255026958882809, + "logps/rejected": -1.243715524673462, + "loss": 1.6415, + "nll_loss": 0.4097641706466675, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012255027831997722, + "rewards/margins": 0.12424901127815247, + "rewards/rejected": -0.12437155842781067, + "step": 6951 + }, + { + "epoch": 4.80774550484094, + "grad_norm": 6.488125801086426, + "learning_rate": 2.8845858306439223e-05, + "log_odds_chosen": 9.322070121765137, + "log_odds_ratio": -0.0010448751272633672, + "logits/chosen": -0.4465481638908386, + "logits/rejected": -0.5063095688819885, + "logps/chosen": -0.001341567956842482, + "logps/rejected": -1.9834538698196411, + "loss": 0.856, + "nll_loss": 0.21390560269355774, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013415678404271603, + "rewards/margins": 0.19821123778820038, + "rewards/rejected": -0.1983453780412674, + "step": 6952 + }, + { + "epoch": 4.808437067773167, + "grad_norm": 7.999490261077881, + "learning_rate": 2.8842016290149072e-05, + "log_odds_chosen": 11.270200729370117, + "log_odds_ratio": -2.683803359104786e-05, + "logits/chosen": -0.26797768473625183, + "logits/rejected": -0.31660839915275574, + "logps/chosen": -0.00011563336738618091, + "logps/rejected": -2.1397147178649902, + "loss": 1.0249, + "nll_loss": 0.25621429085731506, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1563337466213852e-05, + "rewards/margins": 0.2139599323272705, + "rewards/rejected": -0.21397149562835693, + "step": 6953 + }, + { + "epoch": 4.809128630705394, + "grad_norm": 8.063843727111816, + "learning_rate": 2.883817427385892e-05, + "log_odds_chosen": 8.964914321899414, + "log_odds_ratio": -0.04095021262764931, + "logits/chosen": -0.2575796842575073, + "logits/rejected": -0.3876993656158447, + "logps/chosen": -0.009480806067585945, + "logps/rejected": -2.020704746246338, + "loss": 1.1192, + "nll_loss": 0.27570194005966187, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009480806766077876, + "rewards/margins": 0.20112240314483643, + "rewards/rejected": -0.2020704746246338, + "step": 6954 + }, + { + "epoch": 4.809820193637621, + "grad_norm": 4.980713367462158, + "learning_rate": 2.8834332257568774e-05, + "log_odds_chosen": 9.91356372833252, + "log_odds_ratio": -0.00010528671555221081, + "logits/chosen": -0.5045120120048523, + "logits/rejected": -0.5280041694641113, + "logps/chosen": -0.0001594717614352703, + "logps/rejected": -1.4369676113128662, + "loss": 0.914, + "nll_loss": 0.2284928560256958, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5947174688335508e-05, + "rewards/margins": 0.14368081092834473, + "rewards/rejected": -0.14369675517082214, + "step": 6955 + }, + { + "epoch": 4.810511756569848, + "grad_norm": 10.412702560424805, + "learning_rate": 2.8830490241278623e-05, + "log_odds_chosen": 10.395805358886719, + "log_odds_ratio": -7.902117067715153e-05, + "logits/chosen": -0.2385944426059723, + "logits/rejected": -0.3181019425392151, + "logps/chosen": -0.0003422585141379386, + "logps/rejected": -2.067488431930542, + "loss": 1.0646, + "nll_loss": 0.26614975929260254, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.422584995860234e-05, + "rewards/margins": 0.20671463012695312, + "rewards/rejected": -0.2067488580942154, + "step": 6956 + }, + { + "epoch": 4.8112033195020745, + "grad_norm": 6.461427211761475, + "learning_rate": 2.8826648224988472e-05, + "log_odds_chosen": 9.895060539245605, + "log_odds_ratio": -0.00010841433686437085, + "logits/chosen": -0.5785304307937622, + "logits/rejected": -0.6983880996704102, + "logps/chosen": -0.0001771124079823494, + "logps/rejected": -1.1903425455093384, + "loss": 1.2011, + "nll_loss": 0.3002629280090332, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.77112415258307e-05, + "rewards/margins": 0.11901654303073883, + "rewards/rejected": -0.11903425306081772, + "step": 6957 + }, + { + "epoch": 4.811894882434301, + "grad_norm": 10.47810173034668, + "learning_rate": 2.8822806208698328e-05, + "log_odds_chosen": 9.634604454040527, + "log_odds_ratio": -0.00022385548800230026, + "logits/chosen": -0.31653398275375366, + "logits/rejected": -0.3796745538711548, + "logps/chosen": -0.0011035995557904243, + "logps/rejected": -2.300333023071289, + "loss": 0.8245, + "nll_loss": 0.20609070360660553, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011035994975827634, + "rewards/margins": 0.22992295026779175, + "rewards/rejected": -0.230033278465271, + "step": 6958 + }, + { + "epoch": 4.812586445366528, + "grad_norm": 10.434643745422363, + "learning_rate": 2.8818964192408177e-05, + "log_odds_chosen": 8.491510391235352, + "log_odds_ratio": -0.010704604908823967, + "logits/chosen": -0.6227741837501526, + "logits/rejected": -0.6373161673545837, + "logps/chosen": -0.00477764243260026, + "logps/rejected": -1.6983509063720703, + "loss": 1.2679, + "nll_loss": 0.31591036915779114, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00047776426072232425, + "rewards/margins": 0.1693573296070099, + "rewards/rejected": -0.16983510553836823, + "step": 6959 + }, + { + "epoch": 4.813278008298755, + "grad_norm": 17.066213607788086, + "learning_rate": 2.8815122176118026e-05, + "log_odds_chosen": 10.402935028076172, + "log_odds_ratio": -0.00013788833166472614, + "logits/chosen": -0.44745564460754395, + "logits/rejected": -0.5753912329673767, + "logps/chosen": -0.0005248216330073774, + "logps/rejected": -2.174628734588623, + "loss": 1.7491, + "nll_loss": 0.43725520372390747, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.248216257314198e-05, + "rewards/margins": 0.2174103856086731, + "rewards/rejected": -0.21746288239955902, + "step": 6960 + }, + { + "epoch": 4.813969571230982, + "grad_norm": 12.821651458740234, + "learning_rate": 2.8811280159827882e-05, + "log_odds_chosen": 10.219192504882812, + "log_odds_ratio": -0.0001237966789631173, + "logits/chosen": -0.627615213394165, + "logits/rejected": -0.6616013646125793, + "logps/chosen": -0.0002719200274441391, + "logps/rejected": -1.7700080871582031, + "loss": 0.9606, + "nll_loss": 0.24014118313789368, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.719200165302027e-05, + "rewards/margins": 0.1769736111164093, + "rewards/rejected": -0.17700082063674927, + "step": 6961 + }, + { + "epoch": 4.814661134163209, + "grad_norm": 11.982014656066895, + "learning_rate": 2.880743814353773e-05, + "log_odds_chosen": 8.90776538848877, + "log_odds_ratio": -0.05733673647046089, + "logits/chosen": -0.6063227653503418, + "logits/rejected": -0.7158474326133728, + "logps/chosen": -0.012450098991394043, + "logps/rejected": -1.8934953212738037, + "loss": 1.4886, + "nll_loss": 0.3664115071296692, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001245009945705533, + "rewards/margins": 0.188104510307312, + "rewards/rejected": -0.18934953212738037, + "step": 6962 + }, + { + "epoch": 4.8153526970954355, + "grad_norm": 9.394391059875488, + "learning_rate": 2.880359612724758e-05, + "log_odds_chosen": 10.815286636352539, + "log_odds_ratio": -3.915661363862455e-05, + "logits/chosen": -0.7519272565841675, + "logits/rejected": -0.8650994300842285, + "logps/chosen": -0.00031088574905879796, + "logps/rejected": -2.595947742462158, + "loss": 0.7355, + "nll_loss": 0.18388155102729797, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.108857345068827e-05, + "rewards/margins": 0.259563684463501, + "rewards/rejected": -0.25959476828575134, + "step": 6963 + }, + { + "epoch": 4.816044260027662, + "grad_norm": 8.491909980773926, + "learning_rate": 2.8799754110957432e-05, + "log_odds_chosen": 10.1375732421875, + "log_odds_ratio": -0.00012776776566170156, + "logits/chosen": -0.4785597622394562, + "logits/rejected": -0.5305933952331543, + "logps/chosen": -0.000312845193548128, + "logps/rejected": -1.6996479034423828, + "loss": 0.8897, + "nll_loss": 0.2224000245332718, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.128451862721704e-05, + "rewards/margins": 0.1699334979057312, + "rewards/rejected": -0.16996479034423828, + "step": 6964 + }, + { + "epoch": 4.816735822959889, + "grad_norm": 7.3495073318481445, + "learning_rate": 2.879591209466728e-05, + "log_odds_chosen": 10.194000244140625, + "log_odds_ratio": -0.0005381208611652255, + "logits/chosen": -0.5459569096565247, + "logits/rejected": -0.5725046992301941, + "logps/chosen": -0.00019581487867981195, + "logps/rejected": -1.9669796228408813, + "loss": 1.2325, + "nll_loss": 0.30806687474250793, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9581486412789673e-05, + "rewards/margins": 0.19667838513851166, + "rewards/rejected": -0.19669798016548157, + "step": 6965 + }, + { + "epoch": 4.817427385892116, + "grad_norm": 10.577987670898438, + "learning_rate": 2.879207007837713e-05, + "log_odds_chosen": 9.530058860778809, + "log_odds_ratio": -0.0007562245009467006, + "logits/chosen": -0.7595070004463196, + "logits/rejected": -0.7329185009002686, + "logps/chosen": -0.01416083239018917, + "logps/rejected": -2.174567461013794, + "loss": 1.1374, + "nll_loss": 0.2842686176300049, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014160831924527884, + "rewards/margins": 0.21604067087173462, + "rewards/rejected": -0.21745674312114716, + "step": 6966 + }, + { + "epoch": 4.818118948824343, + "grad_norm": 8.284407615661621, + "learning_rate": 2.8788228062086986e-05, + "log_odds_chosen": 9.523808479309082, + "log_odds_ratio": -0.00033556658308953047, + "logits/chosen": -0.4195675849914551, + "logits/rejected": -0.4756912291049957, + "logps/chosen": -0.0008030449389480054, + "logps/rejected": -1.5736851692199707, + "loss": 1.4564, + "nll_loss": 0.364074170589447, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.030448952922598e-05, + "rewards/margins": 0.15728820860385895, + "rewards/rejected": -0.1573685258626938, + "step": 6967 + }, + { + "epoch": 4.81881051175657, + "grad_norm": 8.860459327697754, + "learning_rate": 2.8784386045796835e-05, + "log_odds_chosen": 10.552122116088867, + "log_odds_ratio": -0.0001919109927257523, + "logits/chosen": -0.9970296025276184, + "logits/rejected": -1.0275627374649048, + "logps/chosen": -0.0004448512918315828, + "logps/rejected": -2.17071533203125, + "loss": 0.8643, + "nll_loss": 0.21605589985847473, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.448512481758371e-05, + "rewards/margins": 0.21702706813812256, + "rewards/rejected": -0.2170715630054474, + "step": 6968 + }, + { + "epoch": 4.819502074688796, + "grad_norm": 8.528804779052734, + "learning_rate": 2.8780544029506684e-05, + "log_odds_chosen": 10.231672286987305, + "log_odds_ratio": -0.00021445140009745955, + "logits/chosen": -0.7950515151023865, + "logits/rejected": -0.8399736881256104, + "logps/chosen": -0.0004185446014162153, + "logps/rejected": -1.7746440172195435, + "loss": 0.9838, + "nll_loss": 0.24592530727386475, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.185445868643001e-05, + "rewards/margins": 0.17742255330085754, + "rewards/rejected": -0.17746439576148987, + "step": 6969 + }, + { + "epoch": 4.820193637621023, + "grad_norm": 7.747142791748047, + "learning_rate": 2.877670201321654e-05, + "log_odds_chosen": 9.8212251663208, + "log_odds_ratio": -0.00026398696354590356, + "logits/chosen": -0.6424762010574341, + "logits/rejected": -0.7730410099029541, + "logps/chosen": -0.00431458605453372, + "logps/rejected": -2.3638572692871094, + "loss": 1.0091, + "nll_loss": 0.25225630402565, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00043145858217030764, + "rewards/margins": 0.23595428466796875, + "rewards/rejected": -0.23638573288917542, + "step": 6970 + }, + { + "epoch": 4.82088520055325, + "grad_norm": 7.259042739868164, + "learning_rate": 2.877285999692639e-05, + "log_odds_chosen": 10.099575996398926, + "log_odds_ratio": -0.0002996406110469252, + "logits/chosen": -0.4730404019355774, + "logits/rejected": -0.535588264465332, + "logps/chosen": -0.0002547122712712735, + "logps/rejected": -1.7693817615509033, + "loss": 1.0013, + "nll_loss": 0.25030577182769775, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.547122676332947e-05, + "rewards/margins": 0.17691272497177124, + "rewards/rejected": -0.17693819105625153, + "step": 6971 + }, + { + "epoch": 4.821576763485477, + "grad_norm": 6.7854509353637695, + "learning_rate": 2.876901798063624e-05, + "log_odds_chosen": 10.333219528198242, + "log_odds_ratio": -0.0025019964668899775, + "logits/chosen": -0.3313809037208557, + "logits/rejected": -0.3299313187599182, + "logps/chosen": -0.0035334054846316576, + "logps/rejected": -2.027862071990967, + "loss": 0.9505, + "nll_loss": 0.23737327754497528, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003533405833877623, + "rewards/margins": 0.20243285596370697, + "rewards/rejected": -0.20278620719909668, + "step": 6972 + }, + { + "epoch": 4.822268326417704, + "grad_norm": 13.108231544494629, + "learning_rate": 2.876517596434609e-05, + "log_odds_chosen": 9.169116973876953, + "log_odds_ratio": -0.021218866109848022, + "logits/chosen": -0.5680206418037415, + "logits/rejected": -0.6453905701637268, + "logps/chosen": -0.006536668166518211, + "logps/rejected": -1.1700851917266846, + "loss": 0.6656, + "nll_loss": 0.16427476704120636, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006536668515764177, + "rewards/margins": 0.11635485291481018, + "rewards/rejected": -0.1170085221529007, + "step": 6973 + }, + { + "epoch": 4.822959889349931, + "grad_norm": 6.058053970336914, + "learning_rate": 2.876133394805594e-05, + "log_odds_chosen": 9.656610488891602, + "log_odds_ratio": -0.00018438557162880898, + "logits/chosen": -0.2926297187805176, + "logits/rejected": -0.3582976758480072, + "logps/chosen": -0.007463652174919844, + "logps/rejected": -2.2228102684020996, + "loss": 0.9425, + "nll_loss": 0.2356175184249878, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007463652291335166, + "rewards/margins": 0.2215346395969391, + "rewards/rejected": -0.22228102385997772, + "step": 6974 + }, + { + "epoch": 4.823651452282157, + "grad_norm": 8.221524238586426, + "learning_rate": 2.875749193176579e-05, + "log_odds_chosen": 8.097722053527832, + "log_odds_ratio": -0.012327348813414574, + "logits/chosen": -0.2094922661781311, + "logits/rejected": -0.2288666069507599, + "logps/chosen": -0.006490036379545927, + "logps/rejected": -1.4914829730987549, + "loss": 1.5915, + "nll_loss": 0.3966403007507324, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006490036030299962, + "rewards/margins": 0.1484992951154709, + "rewards/rejected": -0.14914830029010773, + "step": 6975 + }, + { + "epoch": 4.824343015214384, + "grad_norm": 7.369786262512207, + "learning_rate": 2.8753649915475645e-05, + "log_odds_chosen": 11.036026954650879, + "log_odds_ratio": -5.5173979490064085e-05, + "logits/chosen": -0.25902676582336426, + "logits/rejected": -0.28121620416641235, + "logps/chosen": -0.0019505223026499152, + "logps/rejected": -2.6775636672973633, + "loss": 1.3716, + "nll_loss": 0.34289392828941345, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019505222735460848, + "rewards/margins": 0.26756131649017334, + "rewards/rejected": -0.2677563726902008, + "step": 6976 + }, + { + "epoch": 4.825034578146611, + "grad_norm": 5.510984897613525, + "learning_rate": 2.8749807899185494e-05, + "log_odds_chosen": 9.41744613647461, + "log_odds_ratio": -0.0012238244526088238, + "logits/chosen": -0.10021279007196426, + "logits/rejected": -0.1837349683046341, + "logps/chosen": -0.0012109712697565556, + "logps/rejected": -1.9403990507125854, + "loss": 1.0165, + "nll_loss": 0.2540140151977539, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012109712406527251, + "rewards/margins": 0.19391882419586182, + "rewards/rejected": -0.19403991103172302, + "step": 6977 + }, + { + "epoch": 4.825726141078838, + "grad_norm": 10.768878936767578, + "learning_rate": 2.8745965882895343e-05, + "log_odds_chosen": 11.214912414550781, + "log_odds_ratio": -4.51734995294828e-05, + "logits/chosen": -0.8881174325942993, + "logits/rejected": -0.8989272713661194, + "logps/chosen": -0.00020168480114080012, + "logps/rejected": -2.6242618560791016, + "loss": 1.1215, + "nll_loss": 0.28037723898887634, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.016847975028213e-05, + "rewards/margins": 0.26240602135658264, + "rewards/rejected": -0.2624261975288391, + "step": 6978 + }, + { + "epoch": 4.826417704011065, + "grad_norm": 6.553099155426025, + "learning_rate": 2.87421238666052e-05, + "log_odds_chosen": 9.763384819030762, + "log_odds_ratio": -0.00017629990179557353, + "logits/chosen": -0.584071695804596, + "logits/rejected": -0.5996840000152588, + "logps/chosen": -0.0059030367992818356, + "logps/rejected": -1.842178463935852, + "loss": 1.1901, + "nll_loss": 0.29749614000320435, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005903037963435054, + "rewards/margins": 0.18362754583358765, + "rewards/rejected": -0.18421784043312073, + "step": 6979 + }, + { + "epoch": 4.827109266943292, + "grad_norm": 9.622838973999023, + "learning_rate": 2.8738281850315048e-05, + "log_odds_chosen": 10.019664764404297, + "log_odds_ratio": -0.0002728485851548612, + "logits/chosen": -0.7304296493530273, + "logits/rejected": -0.7358352541923523, + "logps/chosen": -0.0003393233346287161, + "logps/rejected": -1.7413800954818726, + "loss": 1.1464, + "nll_loss": 0.28656476736068726, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.393233419046737e-05, + "rewards/margins": 0.17410407960414886, + "rewards/rejected": -0.17413800954818726, + "step": 6980 + }, + { + "epoch": 4.827800829875518, + "grad_norm": 7.335915565490723, + "learning_rate": 2.8734439834024897e-05, + "log_odds_chosen": 9.990732192993164, + "log_odds_ratio": -8.258214802481234e-05, + "logits/chosen": -0.3298628330230713, + "logits/rejected": -0.2266494482755661, + "logps/chosen": -0.0004717921547126025, + "logps/rejected": -1.9673501253128052, + "loss": 0.9213, + "nll_loss": 0.23031499981880188, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.717921547126025e-05, + "rewards/margins": 0.19668781757354736, + "rewards/rejected": -0.19673500955104828, + "step": 6981 + }, + { + "epoch": 4.828492392807745, + "grad_norm": 9.187235832214355, + "learning_rate": 2.873059781773475e-05, + "log_odds_chosen": 10.611069679260254, + "log_odds_ratio": -9.819894330576062e-05, + "logits/chosen": -0.44648605585098267, + "logits/rejected": -0.5582661032676697, + "logps/chosen": -0.00030931332730688155, + "logps/rejected": -2.1796324253082275, + "loss": 0.9753, + "nll_loss": 0.24380630254745483, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.093133636866696e-05, + "rewards/margins": 0.2179323136806488, + "rewards/rejected": -0.21796324849128723, + "step": 6982 + }, + { + "epoch": 4.829183955739972, + "grad_norm": 16.725278854370117, + "learning_rate": 2.87267558014446e-05, + "log_odds_chosen": 9.19306755065918, + "log_odds_ratio": -0.00032923344406299293, + "logits/chosen": -0.07299592345952988, + "logits/rejected": -0.12843886017799377, + "logps/chosen": -0.0005057539092376828, + "logps/rejected": -1.6225523948669434, + "loss": 1.0773, + "nll_loss": 0.269281268119812, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.057539601693861e-05, + "rewards/margins": 0.16220466792583466, + "rewards/rejected": -0.16225524246692657, + "step": 6983 + }, + { + "epoch": 4.829875518672199, + "grad_norm": 13.59363842010498, + "learning_rate": 2.8722913785154447e-05, + "log_odds_chosen": 10.44688606262207, + "log_odds_ratio": -0.000149241866893135, + "logits/chosen": -0.41675865650177, + "logits/rejected": -0.5150689482688904, + "logps/chosen": -0.0002905310539063066, + "logps/rejected": -1.9604620933532715, + "loss": 1.1596, + "nll_loss": 0.28988033533096313, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.905310429923702e-05, + "rewards/margins": 0.19601714611053467, + "rewards/rejected": -0.19604620337486267, + "step": 6984 + }, + { + "epoch": 4.830567081604426, + "grad_norm": 12.085798263549805, + "learning_rate": 2.8719071768864303e-05, + "log_odds_chosen": 11.489034652709961, + "log_odds_ratio": -1.973729013116099e-05, + "logits/chosen": -0.454725980758667, + "logits/rejected": -0.5726956725120544, + "logps/chosen": -0.00019914706354029477, + "logps/rejected": -2.6913857460021973, + "loss": 0.9174, + "nll_loss": 0.22934210300445557, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9914707081625238e-05, + "rewards/margins": 0.26911866664886475, + "rewards/rejected": -0.2691385746002197, + "step": 6985 + }, + { + "epoch": 4.8312586445366525, + "grad_norm": 12.423589706420898, + "learning_rate": 2.8715229752574152e-05, + "log_odds_chosen": 9.722612380981445, + "log_odds_ratio": -0.10177444666624069, + "logits/chosen": -0.38128212094306946, + "logits/rejected": -0.39975857734680176, + "logps/chosen": -0.25344493985176086, + "logps/rejected": -2.009124279022217, + "loss": 0.9418, + "nll_loss": 0.22527502477169037, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.025344498455524445, + "rewards/margins": 0.17556792497634888, + "rewards/rejected": -0.2009124457836151, + "step": 6986 + }, + { + "epoch": 4.831950207468879, + "grad_norm": 13.209919929504395, + "learning_rate": 2.8711387736284e-05, + "log_odds_chosen": 10.672974586486816, + "log_odds_ratio": -0.020712081342935562, + "logits/chosen": -0.2563137114048004, + "logits/rejected": -0.28360506892204285, + "logps/chosen": -0.00576416403055191, + "logps/rejected": -2.3452091217041016, + "loss": 1.2627, + "nll_loss": 0.3136104345321655, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005764163797721267, + "rewards/margins": 0.23394452035427094, + "rewards/rejected": -0.23452092707157135, + "step": 6987 + }, + { + "epoch": 4.832641770401106, + "grad_norm": 8.65539264678955, + "learning_rate": 2.8707545719993857e-05, + "log_odds_chosen": 10.266307830810547, + "log_odds_ratio": -7.117915811249986e-05, + "logits/chosen": -0.08035125583410263, + "logits/rejected": -0.17950567603111267, + "logps/chosen": -0.0002787821867968887, + "logps/rejected": -2.0251896381378174, + "loss": 1.1863, + "nll_loss": 0.296577513217926, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.787821904348675e-05, + "rewards/margins": 0.20249108970165253, + "rewards/rejected": -0.20251896977424622, + "step": 6988 + }, + { + "epoch": 4.833333333333333, + "grad_norm": 17.296316146850586, + "learning_rate": 2.8703703703703706e-05, + "log_odds_chosen": 9.091753005981445, + "log_odds_ratio": -0.048046279698610306, + "logits/chosen": -0.3633042275905609, + "logits/rejected": -0.3943461775779724, + "logps/chosen": -0.01235463097691536, + "logps/rejected": -2.001265048980713, + "loss": 1.1691, + "nll_loss": 0.2874664068222046, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012354630744084716, + "rewards/margins": 0.19889101386070251, + "rewards/rejected": -0.2001264989376068, + "step": 6989 + }, + { + "epoch": 4.83402489626556, + "grad_norm": 12.84473705291748, + "learning_rate": 2.8699861687413555e-05, + "log_odds_chosen": 10.6940279006958, + "log_odds_ratio": -0.00028842902975156903, + "logits/chosen": -0.41434893012046814, + "logits/rejected": -0.48944592475891113, + "logps/chosen": -0.00044106371933594346, + "logps/rejected": -2.1125221252441406, + "loss": 2.0763, + "nll_loss": 0.519040584564209, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4106371205998585e-05, + "rewards/margins": 0.21120813488960266, + "rewards/rejected": -0.21125222742557526, + "step": 6990 + }, + { + "epoch": 4.834716459197787, + "grad_norm": 8.409330368041992, + "learning_rate": 2.8696019671123408e-05, + "log_odds_chosen": 10.525460243225098, + "log_odds_ratio": -0.0009910862427204847, + "logits/chosen": -0.2692156136035919, + "logits/rejected": -0.24852915108203888, + "logps/chosen": -0.015924440696835518, + "logps/rejected": -2.109683036804199, + "loss": 1.0572, + "nll_loss": 0.2641902565956116, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001592444023117423, + "rewards/margins": 0.20937585830688477, + "rewards/rejected": -0.21096831560134888, + "step": 6991 + }, + { + "epoch": 4.8354080221300135, + "grad_norm": 8.460469245910645, + "learning_rate": 2.8692177654833257e-05, + "log_odds_chosen": 9.595645904541016, + "log_odds_ratio": -0.00011171397636644542, + "logits/chosen": -0.2434559166431427, + "logits/rejected": -0.3016476631164551, + "logps/chosen": -0.0008374938042834401, + "logps/rejected": -1.8347755670547485, + "loss": 1.1008, + "nll_loss": 0.2751849889755249, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.37493862491101e-05, + "rewards/margins": 0.18339380621910095, + "rewards/rejected": -0.18347755074501038, + "step": 6992 + }, + { + "epoch": 4.83609958506224, + "grad_norm": 12.384178161621094, + "learning_rate": 2.8688335638543106e-05, + "log_odds_chosen": 10.073097229003906, + "log_odds_ratio": -0.00015708267164882272, + "logits/chosen": -0.8474388718605042, + "logits/rejected": -0.8515263795852661, + "logps/chosen": -0.00024788122391328216, + "logps/rejected": -1.6563420295715332, + "loss": 0.7981, + "nll_loss": 0.1995052993297577, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4788121663732454e-05, + "rewards/margins": 0.1656094193458557, + "rewards/rejected": -0.1656341850757599, + "step": 6993 + }, + { + "epoch": 4.836791147994467, + "grad_norm": 18.428924560546875, + "learning_rate": 2.868449362225296e-05, + "log_odds_chosen": 8.584789276123047, + "log_odds_ratio": -0.01099585834890604, + "logits/chosen": -0.6042527556419373, + "logits/rejected": -0.6270811557769775, + "logps/chosen": -0.10832807421684265, + "logps/rejected": -2.304208993911743, + "loss": 1.3723, + "nll_loss": 0.3419734239578247, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01083280798047781, + "rewards/margins": 0.21958810091018677, + "rewards/rejected": -0.23042091727256775, + "step": 6994 + }, + { + "epoch": 4.837482710926694, + "grad_norm": 13.578597068786621, + "learning_rate": 2.868065160596281e-05, + "log_odds_chosen": 9.709571838378906, + "log_odds_ratio": -0.0009176249150186777, + "logits/chosen": -0.922393798828125, + "logits/rejected": -0.9200150966644287, + "logps/chosen": -0.012347464449703693, + "logps/rejected": -1.901907205581665, + "loss": 1.4535, + "nll_loss": 0.3632957339286804, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001234746421687305, + "rewards/margins": 0.18895597755908966, + "rewards/rejected": -0.19019073247909546, + "step": 6995 + }, + { + "epoch": 4.838174273858921, + "grad_norm": 8.315958023071289, + "learning_rate": 2.867680958967266e-05, + "log_odds_chosen": 9.92038345336914, + "log_odds_ratio": -0.0030150411184877157, + "logits/chosen": -0.27385860681533813, + "logits/rejected": -0.32332712411880493, + "logps/chosen": -0.0008827511919662356, + "logps/rejected": -2.2805981636047363, + "loss": 1.1089, + "nll_loss": 0.27691400051116943, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.827511919662356e-05, + "rewards/margins": 0.22797155380249023, + "rewards/rejected": -0.2280598282814026, + "step": 6996 + }, + { + "epoch": 4.838865836791148, + "grad_norm": 7.787275791168213, + "learning_rate": 2.8672967573382516e-05, + "log_odds_chosen": 10.508706092834473, + "log_odds_ratio": -9.931164822774008e-05, + "logits/chosen": -0.5204631686210632, + "logits/rejected": -0.5042663812637329, + "logps/chosen": -0.0002793048042804003, + "logps/rejected": -2.2063889503479004, + "loss": 0.8487, + "nll_loss": 0.21217229962348938, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.793048224702943e-05, + "rewards/margins": 0.22061097621917725, + "rewards/rejected": -0.2206389158964157, + "step": 6997 + }, + { + "epoch": 4.8395573997233745, + "grad_norm": 9.41072940826416, + "learning_rate": 2.8669125557092365e-05, + "log_odds_chosen": 10.23240852355957, + "log_odds_ratio": -5.44110698683653e-05, + "logits/chosen": -0.5337792634963989, + "logits/rejected": -0.634337842464447, + "logps/chosen": -0.0015219207853078842, + "logps/rejected": -2.103792190551758, + "loss": 0.7162, + "nll_loss": 0.17903977632522583, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001521920639788732, + "rewards/margins": 0.21022702753543854, + "rewards/rejected": -0.2103792130947113, + "step": 6998 + }, + { + "epoch": 4.840248962655601, + "grad_norm": 13.882169723510742, + "learning_rate": 2.8665283540802214e-05, + "log_odds_chosen": 10.442000389099121, + "log_odds_ratio": -0.00022704862931277603, + "logits/chosen": -0.5982738137245178, + "logits/rejected": -0.7945213317871094, + "logps/chosen": -0.003369520418345928, + "logps/rejected": -2.137873888015747, + "loss": 0.9619, + "nll_loss": 0.24045637249946594, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003369520418345928, + "rewards/margins": 0.21345043182373047, + "rewards/rejected": -0.21378740668296814, + "step": 6999 + }, + { + "epoch": 4.840940525587828, + "grad_norm": 7.5849409103393555, + "learning_rate": 2.8661441524512066e-05, + "log_odds_chosen": 8.664499282836914, + "log_odds_ratio": -0.0005070245242677629, + "logits/chosen": -0.5758646130561829, + "logits/rejected": -0.5692422986030579, + "logps/chosen": -0.0009210213320329785, + "logps/rejected": -1.2004210948944092, + "loss": 0.7885, + "nll_loss": 0.19706769287586212, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.210212738253176e-05, + "rewards/margins": 0.11995001137256622, + "rewards/rejected": -0.1200421079993248, + "step": 7000 + }, + { + "epoch": 4.841632088520055, + "grad_norm": 6.227912902832031, + "learning_rate": 2.8657599508221915e-05, + "log_odds_chosen": 9.58493709564209, + "log_odds_ratio": -0.0006402077851817012, + "logits/chosen": -0.5111309885978699, + "logits/rejected": -0.5352218747138977, + "logps/chosen": -0.0005271884147077799, + "logps/rejected": -1.3354413509368896, + "loss": 0.7259, + "nll_loss": 0.1814233660697937, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.271883856039494e-05, + "rewards/margins": 0.1334914267063141, + "rewards/rejected": -0.13354414701461792, + "step": 7001 + }, + { + "epoch": 4.842323651452282, + "grad_norm": 11.530202865600586, + "learning_rate": 2.8653757491931764e-05, + "log_odds_chosen": 10.981690406799316, + "log_odds_ratio": -2.312998731213156e-05, + "logits/chosen": -0.8751830458641052, + "logits/rejected": -0.8583577871322632, + "logps/chosen": -0.00014386913971975446, + "logps/rejected": -2.095196008682251, + "loss": 1.1425, + "nll_loss": 0.2856108248233795, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4386915609065909e-05, + "rewards/margins": 0.20950521528720856, + "rewards/rejected": -0.20951959490776062, + "step": 7002 + }, + { + "epoch": 4.843015214384509, + "grad_norm": 5.708413124084473, + "learning_rate": 2.864991547564162e-05, + "log_odds_chosen": 9.564705848693848, + "log_odds_ratio": -0.000253094854997471, + "logits/chosen": -0.4983338713645935, + "logits/rejected": -0.4991118907928467, + "logps/chosen": -0.00042362918611615896, + "logps/rejected": -1.703924298286438, + "loss": 1.1459, + "nll_loss": 0.2864604890346527, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2362917156424373e-05, + "rewards/margins": 0.1703500598669052, + "rewards/rejected": -0.17039242386817932, + "step": 7003 + }, + { + "epoch": 4.8437067773167355, + "grad_norm": 6.260480880737305, + "learning_rate": 2.864607345935147e-05, + "log_odds_chosen": 9.71456527709961, + "log_odds_ratio": -0.00022481786436401308, + "logits/chosen": -0.38238197565078735, + "logits/rejected": -0.4862619638442993, + "logps/chosen": -0.006300437729805708, + "logps/rejected": -2.351069450378418, + "loss": 1.3251, + "nll_loss": 0.3312584161758423, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006300437962636352, + "rewards/margins": 0.23447692394256592, + "rewards/rejected": -0.235106959939003, + "step": 7004 + }, + { + "epoch": 4.844398340248962, + "grad_norm": 10.423199653625488, + "learning_rate": 2.8642231443061318e-05, + "log_odds_chosen": 9.46685791015625, + "log_odds_ratio": -0.0017505851574242115, + "logits/chosen": -0.457302451133728, + "logits/rejected": -0.4845985770225525, + "logps/chosen": -0.0013079033233225346, + "logps/rejected": -1.8090081214904785, + "loss": 1.1001, + "nll_loss": 0.2748434543609619, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001307903294218704, + "rewards/margins": 0.18077000975608826, + "rewards/rejected": -0.18090081214904785, + "step": 7005 + }, + { + "epoch": 4.845089903181189, + "grad_norm": 11.410719871520996, + "learning_rate": 2.8638389426771174e-05, + "log_odds_chosen": 10.684247970581055, + "log_odds_ratio": -0.00015776841610204428, + "logits/chosen": -1.0845293998718262, + "logits/rejected": -1.1410024166107178, + "logps/chosen": -0.0006103913183324039, + "logps/rejected": -2.401841640472412, + "loss": 1.2811, + "nll_loss": 0.32025855779647827, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.103913619881496e-05, + "rewards/margins": 0.24012315273284912, + "rewards/rejected": -0.24018418788909912, + "step": 7006 + }, + { + "epoch": 4.845781466113416, + "grad_norm": 6.313530445098877, + "learning_rate": 2.8634547410481023e-05, + "log_odds_chosen": 10.980323791503906, + "log_odds_ratio": -0.0005783824599348009, + "logits/chosen": -0.41384977102279663, + "logits/rejected": -0.47831642627716064, + "logps/chosen": -0.00023087850422598422, + "logps/rejected": -2.7741446495056152, + "loss": 1.0061, + "nll_loss": 0.25145581364631653, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.308785042259842e-05, + "rewards/margins": 0.27739137411117554, + "rewards/rejected": -0.277414470911026, + "step": 7007 + }, + { + "epoch": 4.846473029045643, + "grad_norm": 11.239285469055176, + "learning_rate": 2.8630705394190872e-05, + "log_odds_chosen": 11.080429077148438, + "log_odds_ratio": -3.404112794669345e-05, + "logits/chosen": -0.07994014769792557, + "logits/rejected": -0.24010121822357178, + "logps/chosen": -0.0002886131114792079, + "logps/rejected": -2.016791582107544, + "loss": 0.9119, + "nll_loss": 0.2279655486345291, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8861313694505952e-05, + "rewards/margins": 0.20165029168128967, + "rewards/rejected": -0.20167915523052216, + "step": 7008 + }, + { + "epoch": 4.84716459197787, + "grad_norm": 10.09489917755127, + "learning_rate": 2.8626863377900725e-05, + "log_odds_chosen": 10.397052764892578, + "log_odds_ratio": -0.0014343769289553165, + "logits/chosen": -0.4876552224159241, + "logits/rejected": -0.5108673572540283, + "logps/chosen": -0.0017708453815430403, + "logps/rejected": -2.815434217453003, + "loss": 1.2702, + "nll_loss": 0.3173943758010864, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017708452651277184, + "rewards/margins": 0.28136634826660156, + "rewards/rejected": -0.28154343366622925, + "step": 7009 + }, + { + "epoch": 4.8478561549100965, + "grad_norm": 5.118479251861572, + "learning_rate": 2.8623021361610574e-05, + "log_odds_chosen": 10.387958526611328, + "log_odds_ratio": -0.0003742785775102675, + "logits/chosen": -0.7877905368804932, + "logits/rejected": -0.8531047105789185, + "logps/chosen": -0.004792619496583939, + "logps/rejected": -2.6325814723968506, + "loss": 0.7409, + "nll_loss": 0.18518517911434174, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004792619729414582, + "rewards/margins": 0.2627788782119751, + "rewards/rejected": -0.263258159160614, + "step": 7010 + }, + { + "epoch": 4.848547717842323, + "grad_norm": 8.308259010314941, + "learning_rate": 2.8619179345320423e-05, + "log_odds_chosen": 10.528495788574219, + "log_odds_ratio": -3.735262362170033e-05, + "logits/chosen": -0.07867129892110825, + "logits/rejected": -0.20108526945114136, + "logps/chosen": -0.0003321646945551038, + "logps/rejected": -2.288141965866089, + "loss": 0.8417, + "nll_loss": 0.2104172706604004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.32164709107019e-05, + "rewards/margins": 0.22878098487854004, + "rewards/rejected": -0.22881419956684113, + "step": 7011 + }, + { + "epoch": 4.84923928077455, + "grad_norm": 6.285458564758301, + "learning_rate": 2.861533732903028e-05, + "log_odds_chosen": 9.053383827209473, + "log_odds_ratio": -0.018818309530615807, + "logits/chosen": -0.09861317276954651, + "logits/rejected": -0.31000280380249023, + "logps/chosen": -0.0163175780326128, + "logps/rejected": -1.712219476699829, + "loss": 1.0068, + "nll_loss": 0.24982908368110657, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016317577101290226, + "rewards/margins": 0.16959017515182495, + "rewards/rejected": -0.17122194170951843, + "step": 7012 + }, + { + "epoch": 4.849930843706777, + "grad_norm": 17.221817016601562, + "learning_rate": 2.8611495312740128e-05, + "log_odds_chosen": 10.633796691894531, + "log_odds_ratio": -4.5296914322534576e-05, + "logits/chosen": -0.3142685890197754, + "logits/rejected": -0.3037078380584717, + "logps/chosen": -0.00020182921434752643, + "logps/rejected": -1.8377907276153564, + "loss": 0.9146, + "nll_loss": 0.2286510318517685, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.018292070715688e-05, + "rewards/margins": 0.18375888466835022, + "rewards/rejected": -0.1837790608406067, + "step": 7013 + }, + { + "epoch": 4.850622406639004, + "grad_norm": 5.6909990310668945, + "learning_rate": 2.8607653296449977e-05, + "log_odds_chosen": 9.910855293273926, + "log_odds_ratio": -0.0014734583673998713, + "logits/chosen": -0.3158652186393738, + "logits/rejected": -0.31831100583076477, + "logps/chosen": -0.0014183268649503589, + "logps/rejected": -2.021665096282959, + "loss": 1.088, + "nll_loss": 0.27186426520347595, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001418326864950359, + "rewards/margins": 0.2020246684551239, + "rewards/rejected": -0.20216649770736694, + "step": 7014 + }, + { + "epoch": 4.851313969571231, + "grad_norm": 10.531902313232422, + "learning_rate": 2.8603811280159832e-05, + "log_odds_chosen": 11.14271354675293, + "log_odds_ratio": -3.343543357914314e-05, + "logits/chosen": -0.63079434633255, + "logits/rejected": -0.6695267558097839, + "logps/chosen": -9.733759361552075e-05, + "logps/rejected": -1.8637280464172363, + "loss": 0.9089, + "nll_loss": 0.22723162174224854, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.733759725349955e-06, + "rewards/margins": 0.1863630712032318, + "rewards/rejected": -0.1863728165626526, + "step": 7015 + }, + { + "epoch": 4.8520055325034575, + "grad_norm": 12.947765350341797, + "learning_rate": 2.859996926386968e-05, + "log_odds_chosen": 11.508096694946289, + "log_odds_ratio": -2.6764759240904823e-05, + "logits/chosen": -1.0222357511520386, + "logits/rejected": -1.0842270851135254, + "logps/chosen": -0.00017423175449948758, + "logps/rejected": -2.8178083896636963, + "loss": 1.558, + "nll_loss": 0.3895052671432495, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7423175449948758e-05, + "rewards/margins": 0.2817634344100952, + "rewards/rejected": -0.281780868768692, + "step": 7016 + }, + { + "epoch": 4.852697095435684, + "grad_norm": 20.07524871826172, + "learning_rate": 2.859612724757953e-05, + "log_odds_chosen": 11.379074096679688, + "log_odds_ratio": -1.8707838535192423e-05, + "logits/chosen": -0.13574762642383575, + "logits/rejected": -0.19763877987861633, + "logps/chosen": -0.0002729441621340811, + "logps/rejected": -2.996054172515869, + "loss": 1.7399, + "nll_loss": 0.4349702298641205, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7294418032397516e-05, + "rewards/margins": 0.29957813024520874, + "rewards/rejected": -0.29960542917251587, + "step": 7017 + }, + { + "epoch": 4.853388658367911, + "grad_norm": 7.500363826751709, + "learning_rate": 2.8592285231289383e-05, + "log_odds_chosen": 9.627655982971191, + "log_odds_ratio": -0.0007912968285381794, + "logits/chosen": -0.3740471601486206, + "logits/rejected": -0.49173182249069214, + "logps/chosen": -0.0012759091332554817, + "logps/rejected": -2.257746696472168, + "loss": 1.0627, + "nll_loss": 0.2655911445617676, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012759091623593122, + "rewards/margins": 0.22564706206321716, + "rewards/rejected": -0.22577466070652008, + "step": 7018 + }, + { + "epoch": 4.854080221300138, + "grad_norm": 8.293935775756836, + "learning_rate": 2.8588443214999232e-05, + "log_odds_chosen": 10.88177490234375, + "log_odds_ratio": -2.4730215955059975e-05, + "logits/chosen": -0.7868159413337708, + "logits/rejected": -0.8734317421913147, + "logps/chosen": -0.00011132473446195945, + "logps/rejected": -1.7708433866500854, + "loss": 0.6616, + "nll_loss": 0.16538910567760468, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1132473446195945e-05, + "rewards/margins": 0.17707321047782898, + "rewards/rejected": -0.17708435654640198, + "step": 7019 + }, + { + "epoch": 4.854771784232365, + "grad_norm": 8.555705070495605, + "learning_rate": 2.858460119870908e-05, + "log_odds_chosen": 10.491169929504395, + "log_odds_ratio": -5.717053500120528e-05, + "logits/chosen": -0.24532398581504822, + "logits/rejected": -0.32132548093795776, + "logps/chosen": -0.0002730230917222798, + "logps/rejected": -2.187023162841797, + "loss": 0.7002, + "nll_loss": 0.1750478297472, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7302312446408905e-05, + "rewards/margins": 0.21867501735687256, + "rewards/rejected": -0.2187023162841797, + "step": 7020 + }, + { + "epoch": 4.855463347164592, + "grad_norm": 10.897348403930664, + "learning_rate": 2.8580759182418937e-05, + "log_odds_chosen": 10.51218032836914, + "log_odds_ratio": -4.524239921011031e-05, + "logits/chosen": -0.3870241045951843, + "logits/rejected": -0.4795756936073303, + "logps/chosen": -0.00019211246399208903, + "logps/rejected": -1.650565505027771, + "loss": 0.9654, + "nll_loss": 0.24134519696235657, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9211245671613142e-05, + "rewards/margins": 0.1650373339653015, + "rewards/rejected": -0.16505655646324158, + "step": 7021 + }, + { + "epoch": 4.856154910096818, + "grad_norm": 9.62659740447998, + "learning_rate": 2.8576917166128786e-05, + "log_odds_chosen": 10.597943305969238, + "log_odds_ratio": -9.808303730096668e-05, + "logits/chosen": -0.6156014204025269, + "logits/rejected": -0.6655142903327942, + "logps/chosen": -0.0015430478379130363, + "logps/rejected": -2.7433271408081055, + "loss": 1.0996, + "nll_loss": 0.2748914062976837, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015430479834321886, + "rewards/margins": 0.2741783857345581, + "rewards/rejected": -0.2743327021598816, + "step": 7022 + }, + { + "epoch": 4.856846473029045, + "grad_norm": 16.081926345825195, + "learning_rate": 2.8573075149838635e-05, + "log_odds_chosen": 10.689600944519043, + "log_odds_ratio": -0.0007699825218878686, + "logits/chosen": -0.5941533446311951, + "logits/rejected": -0.5842257738113403, + "logps/chosen": -0.0030286216642707586, + "logps/rejected": -2.574476718902588, + "loss": 0.8608, + "nll_loss": 0.21511797606945038, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00030286217224784195, + "rewards/margins": 0.2571448087692261, + "rewards/rejected": -0.25744765996932983, + "step": 7023 + }, + { + "epoch": 4.857538035961272, + "grad_norm": 9.591567993164062, + "learning_rate": 2.856923313354849e-05, + "log_odds_chosen": 11.012365341186523, + "log_odds_ratio": -3.3873315260279924e-05, + "logits/chosen": -0.5489360094070435, + "logits/rejected": -0.6033146977424622, + "logps/chosen": -0.00011565893510123715, + "logps/rejected": -1.677449107170105, + "loss": 0.6449, + "nll_loss": 0.16123202443122864, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1565893146325834e-05, + "rewards/margins": 0.1677333414554596, + "rewards/rejected": -0.16774490475654602, + "step": 7024 + }, + { + "epoch": 4.858229598893499, + "grad_norm": 22.097078323364258, + "learning_rate": 2.856539111725834e-05, + "log_odds_chosen": 8.750368118286133, + "log_odds_ratio": -0.19398075342178345, + "logits/chosen": -0.41244447231292725, + "logits/rejected": -0.40756645798683167, + "logps/chosen": -0.022159744054079056, + "logps/rejected": -1.5988523960113525, + "loss": 0.9419, + "nll_loss": 0.21607756614685059, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002215974498540163, + "rewards/margins": 0.1576692759990692, + "rewards/rejected": -0.1598852425813675, + "step": 7025 + }, + { + "epoch": 4.858921161825726, + "grad_norm": 8.726022720336914, + "learning_rate": 2.856154910096819e-05, + "log_odds_chosen": 9.916749954223633, + "log_odds_ratio": -0.00024162212503142655, + "logits/chosen": -0.3288005590438843, + "logits/rejected": -0.5277249813079834, + "logps/chosen": -0.0008131344802677631, + "logps/rejected": -1.8410141468048096, + "loss": 1.2813, + "nll_loss": 0.3203001022338867, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.13134538475424e-05, + "rewards/margins": 0.18402010202407837, + "rewards/rejected": -0.1841014176607132, + "step": 7026 + }, + { + "epoch": 4.8596127247579535, + "grad_norm": 5.5701704025268555, + "learning_rate": 2.855770708467804e-05, + "log_odds_chosen": 10.251022338867188, + "log_odds_ratio": -0.00010756327537819743, + "logits/chosen": -0.4105292558670044, + "logits/rejected": -0.47585058212280273, + "logps/chosen": -0.0001659254776313901, + "logps/rejected": -1.7069003582000732, + "loss": 0.636, + "nll_loss": 0.1589965969324112, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.659254849073477e-05, + "rewards/margins": 0.1706734299659729, + "rewards/rejected": -0.17069002985954285, + "step": 7027 + }, + { + "epoch": 4.86030428769018, + "grad_norm": 8.971461296081543, + "learning_rate": 2.855386506838789e-05, + "log_odds_chosen": 11.052430152893066, + "log_odds_ratio": -3.381207716302015e-05, + "logits/chosen": -0.43431317806243896, + "logits/rejected": -0.46402910351753235, + "logps/chosen": -0.00028077964088879526, + "logps/rejected": -2.6134989261627197, + "loss": 0.7517, + "nll_loss": 0.18791866302490234, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.807796590786893e-05, + "rewards/margins": 0.2613218128681183, + "rewards/rejected": -0.2613498866558075, + "step": 7028 + }, + { + "epoch": 4.860995850622407, + "grad_norm": 8.809112548828125, + "learning_rate": 2.855002305209774e-05, + "log_odds_chosen": 8.960859298706055, + "log_odds_ratio": -0.0003623150405474007, + "logits/chosen": -0.8475053310394287, + "logits/rejected": -0.8835227489471436, + "logps/chosen": -0.0005206743371672928, + "logps/rejected": -1.0224153995513916, + "loss": 1.1576, + "nll_loss": 0.2893637418746948, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2067429351154715e-05, + "rewards/margins": 0.10218948125839233, + "rewards/rejected": -0.10224154591560364, + "step": 7029 + }, + { + "epoch": 4.861687413554634, + "grad_norm": 8.545720100402832, + "learning_rate": 2.8546181035807595e-05, + "log_odds_chosen": 9.107865333557129, + "log_odds_ratio": -0.004004120826721191, + "logits/chosen": -0.6395995616912842, + "logits/rejected": -0.6510448455810547, + "logps/chosen": -0.010273730382323265, + "logps/rejected": -2.113751173019409, + "loss": 1.369, + "nll_loss": 0.34184643626213074, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010273730149492621, + "rewards/margins": 0.2103477418422699, + "rewards/rejected": -0.2113751322031021, + "step": 7030 + }, + { + "epoch": 4.862378976486861, + "grad_norm": 9.708702087402344, + "learning_rate": 2.8542339019517444e-05, + "log_odds_chosen": 10.770926475524902, + "log_odds_ratio": -3.156045568175614e-05, + "logits/chosen": -0.30541956424713135, + "logits/rejected": -0.33389005064964294, + "logps/chosen": -0.0002467721060384065, + "logps/rejected": -2.4047722816467285, + "loss": 1.1034, + "nll_loss": 0.27585557103157043, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.467721060384065e-05, + "rewards/margins": 0.24045255780220032, + "rewards/rejected": -0.24047723412513733, + "step": 7031 + }, + { + "epoch": 4.863070539419088, + "grad_norm": 5.549326419830322, + "learning_rate": 2.8538497003227294e-05, + "log_odds_chosen": 10.177644729614258, + "log_odds_ratio": -7.75428197812289e-05, + "logits/chosen": -0.5677988529205322, + "logits/rejected": -0.5510009527206421, + "logps/chosen": -0.000404900754801929, + "logps/rejected": -1.6147857904434204, + "loss": 1.0492, + "nll_loss": 0.26228851079940796, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.049007475259714e-05, + "rewards/margins": 0.16143809258937836, + "rewards/rejected": -0.16147857904434204, + "step": 7032 + }, + { + "epoch": 4.8637621023513145, + "grad_norm": 7.651041030883789, + "learning_rate": 2.853465498693715e-05, + "log_odds_chosen": 9.282649993896484, + "log_odds_ratio": -0.0006824180600233376, + "logits/chosen": -0.8561673164367676, + "logits/rejected": -0.893168032169342, + "logps/chosen": -0.007127598859369755, + "logps/rejected": -1.5555087327957153, + "loss": 1.0692, + "nll_loss": 0.2672296166419983, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007127599674277008, + "rewards/margins": 0.15483811497688293, + "rewards/rejected": -0.15555086731910706, + "step": 7033 + }, + { + "epoch": 4.864453665283541, + "grad_norm": 13.443490982055664, + "learning_rate": 2.8530812970647e-05, + "log_odds_chosen": 7.656833648681641, + "log_odds_ratio": -0.35919690132141113, + "logits/chosen": -0.3445935845375061, + "logits/rejected": -0.3648972809314728, + "logps/chosen": -0.051969386637210846, + "logps/rejected": -2.030459403991699, + "loss": 1.4184, + "nll_loss": 0.31868651509284973, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005196938756853342, + "rewards/margins": 0.19784902036190033, + "rewards/rejected": -0.20304596424102783, + "step": 7034 + }, + { + "epoch": 4.865145228215768, + "grad_norm": 7.0616278648376465, + "learning_rate": 2.8526970954356847e-05, + "log_odds_chosen": 9.736413955688477, + "log_odds_ratio": -0.00019674711802508682, + "logits/chosen": -0.4405166804790497, + "logits/rejected": -0.5529405474662781, + "logps/chosen": -0.0004194923967588693, + "logps/rejected": -1.604013204574585, + "loss": 1.2811, + "nll_loss": 0.32026198506355286, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.194923531031236e-05, + "rewards/margins": 0.16035938262939453, + "rewards/rejected": -0.16040131449699402, + "step": 7035 + }, + { + "epoch": 4.865836791147995, + "grad_norm": 6.439877986907959, + "learning_rate": 2.85231289380667e-05, + "log_odds_chosen": 9.398344039916992, + "log_odds_ratio": -0.0017098677344620228, + "logits/chosen": -0.5163373351097107, + "logits/rejected": -0.5247904062271118, + "logps/chosen": -0.001775981392711401, + "logps/rejected": -1.9271403551101685, + "loss": 0.9267, + "nll_loss": 0.23149679601192474, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017759812180884182, + "rewards/margins": 0.19253644347190857, + "rewards/rejected": -0.19271403551101685, + "step": 7036 + }, + { + "epoch": 4.866528354080222, + "grad_norm": 16.124523162841797, + "learning_rate": 2.851928692177655e-05, + "log_odds_chosen": 10.64400863647461, + "log_odds_ratio": -6.0084301367169246e-05, + "logits/chosen": -0.8334964513778687, + "logits/rejected": -0.8010177612304688, + "logps/chosen": -0.0005602404708042741, + "logps/rejected": -2.1358814239501953, + "loss": 0.9616, + "nll_loss": 0.2403859794139862, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.602405144600198e-05, + "rewards/margins": 0.21353211998939514, + "rewards/rejected": -0.213588148355484, + "step": 7037 + }, + { + "epoch": 4.867219917012449, + "grad_norm": 6.622456073760986, + "learning_rate": 2.8515444905486398e-05, + "log_odds_chosen": 10.069990158081055, + "log_odds_ratio": -0.0001646141754463315, + "logits/chosen": -0.4626843333244324, + "logits/rejected": -0.5455853343009949, + "logps/chosen": -0.004076323471963406, + "logps/rejected": -2.6835875511169434, + "loss": 1.5953, + "nll_loss": 0.3988024592399597, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00040763235301710665, + "rewards/margins": 0.2679511308670044, + "rewards/rejected": -0.2683587670326233, + "step": 7038 + }, + { + "epoch": 4.867911479944675, + "grad_norm": 6.874902725219727, + "learning_rate": 2.8511602889196254e-05, + "log_odds_chosen": 10.539509773254395, + "log_odds_ratio": -4.3701493268599734e-05, + "logits/chosen": -0.6427075862884521, + "logits/rejected": -0.727043092250824, + "logps/chosen": -0.00018337485380470753, + "logps/rejected": -1.7754486799240112, + "loss": 0.85, + "nll_loss": 0.21248428523540497, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8337485016672872e-05, + "rewards/margins": 0.1775265336036682, + "rewards/rejected": -0.17754487693309784, + "step": 7039 + }, + { + "epoch": 4.868603042876902, + "grad_norm": 5.979666233062744, + "learning_rate": 2.8507760872906103e-05, + "log_odds_chosen": 10.752289772033691, + "log_odds_ratio": -6.954609852982685e-05, + "logits/chosen": -0.3465648889541626, + "logits/rejected": -0.4407429099082947, + "logps/chosen": -0.000171839288668707, + "logps/rejected": -1.8220527172088623, + "loss": 0.9089, + "nll_loss": 0.2272091805934906, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7183931049657986e-05, + "rewards/margins": 0.18218809366226196, + "rewards/rejected": -0.18220525979995728, + "step": 7040 + }, + { + "epoch": 4.869294605809129, + "grad_norm": 3.938154935836792, + "learning_rate": 2.8503918856615952e-05, + "log_odds_chosen": 9.546857833862305, + "log_odds_ratio": -0.0006905286572873592, + "logits/chosen": -0.13503634929656982, + "logits/rejected": -0.14862604439258575, + "logps/chosen": -0.0007053675362840295, + "logps/rejected": -1.8098433017730713, + "loss": 0.9726, + "nll_loss": 0.24308504164218903, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.053676381474361e-05, + "rewards/margins": 0.1809138059616089, + "rewards/rejected": -0.18098433315753937, + "step": 7041 + }, + { + "epoch": 4.869986168741356, + "grad_norm": 8.758733749389648, + "learning_rate": 2.8500076840325808e-05, + "log_odds_chosen": 9.40017318725586, + "log_odds_ratio": -0.00036749555147252977, + "logits/chosen": -0.7903873920440674, + "logits/rejected": -0.8652528524398804, + "logps/chosen": -0.007027729880064726, + "logps/rejected": -2.663066864013672, + "loss": 1.3046, + "nll_loss": 0.32612505555152893, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007027729880064726, + "rewards/margins": 0.2656038999557495, + "rewards/rejected": -0.26630666851997375, + "step": 7042 + }, + { + "epoch": 4.870677731673583, + "grad_norm": 12.399148941040039, + "learning_rate": 2.8496234824035657e-05, + "log_odds_chosen": 9.200675010681152, + "log_odds_ratio": -0.0005780202336609364, + "logits/chosen": -0.45060375332832336, + "logits/rejected": -0.4034438133239746, + "logps/chosen": -0.003487096168100834, + "logps/rejected": -1.8712232112884521, + "loss": 1.0474, + "nll_loss": 0.26178497076034546, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003487096109893173, + "rewards/margins": 0.18677359819412231, + "rewards/rejected": -0.18712231516838074, + "step": 7043 + }, + { + "epoch": 4.87136929460581, + "grad_norm": 17.244375228881836, + "learning_rate": 2.8492392807745506e-05, + "log_odds_chosen": 10.586780548095703, + "log_odds_ratio": -0.0002845847629942, + "logits/chosen": -0.7591636776924133, + "logits/rejected": -0.7484526634216309, + "logps/chosen": -0.00022362990421243012, + "logps/rejected": -2.1559767723083496, + "loss": 1.7396, + "nll_loss": 0.4348672032356262, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2362992240232415e-05, + "rewards/margins": 0.21557533740997314, + "rewards/rejected": -0.2155977040529251, + "step": 7044 + }, + { + "epoch": 4.872060857538036, + "grad_norm": 8.071002960205078, + "learning_rate": 2.848855079145536e-05, + "log_odds_chosen": 9.05749225616455, + "log_odds_ratio": -0.11127299070358276, + "logits/chosen": -0.3790587782859802, + "logits/rejected": -0.41623783111572266, + "logps/chosen": -0.021599093452095985, + "logps/rejected": -2.0281240940093994, + "loss": 1.1519, + "nll_loss": 0.27685868740081787, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0021599093452095985, + "rewards/margins": 0.20065252482891083, + "rewards/rejected": -0.20281243324279785, + "step": 7045 + }, + { + "epoch": 4.872752420470263, + "grad_norm": 6.992929935455322, + "learning_rate": 2.8484708775165207e-05, + "log_odds_chosen": 11.098962783813477, + "log_odds_ratio": -5.613052780972794e-05, + "logits/chosen": -0.5032197833061218, + "logits/rejected": -0.4811609387397766, + "logps/chosen": -0.00013649038737639785, + "logps/rejected": -2.3080101013183594, + "loss": 0.7479, + "nll_loss": 0.1869587004184723, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3649039829033427e-05, + "rewards/margins": 0.23078739643096924, + "rewards/rejected": -0.2308010309934616, + "step": 7046 + }, + { + "epoch": 4.87344398340249, + "grad_norm": 9.915785789489746, + "learning_rate": 2.8480866758875056e-05, + "log_odds_chosen": 9.496391296386719, + "log_odds_ratio": -0.02560707926750183, + "logits/chosen": -0.6899237036705017, + "logits/rejected": -0.7254764437675476, + "logps/chosen": -0.011045449413359165, + "logps/rejected": -1.974931240081787, + "loss": 1.4553, + "nll_loss": 0.3612610399723053, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011045449646189809, + "rewards/margins": 0.19638857245445251, + "rewards/rejected": -0.19749312102794647, + "step": 7047 + }, + { + "epoch": 4.874135546334717, + "grad_norm": 7.999567031860352, + "learning_rate": 2.8477024742584912e-05, + "log_odds_chosen": 9.674551963806152, + "log_odds_ratio": -0.0001579619711264968, + "logits/chosen": -0.13578563928604126, + "logits/rejected": -0.24292123317718506, + "logps/chosen": -0.0016413903795182705, + "logps/rejected": -2.1390011310577393, + "loss": 0.7133, + "nll_loss": 0.1783016175031662, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016413902631029487, + "rewards/margins": 0.21373598277568817, + "rewards/rejected": -0.2139001190662384, + "step": 7048 + }, + { + "epoch": 4.874827109266944, + "grad_norm": 11.47680950164795, + "learning_rate": 2.847318272629476e-05, + "log_odds_chosen": 9.738288879394531, + "log_odds_ratio": -0.005372277460992336, + "logits/chosen": -0.3812277615070343, + "logits/rejected": -0.4588232636451721, + "logps/chosen": -0.0028886208310723305, + "logps/rejected": -2.0673182010650635, + "loss": 0.9856, + "nll_loss": 0.24587345123291016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002888621238525957, + "rewards/margins": 0.2064429521560669, + "rewards/rejected": -0.20673182606697083, + "step": 7049 + }, + { + "epoch": 4.875518672199171, + "grad_norm": 7.6114935874938965, + "learning_rate": 2.846934071000461e-05, + "log_odds_chosen": 9.799309730529785, + "log_odds_ratio": -9.547019726596773e-05, + "logits/chosen": -0.5523931384086609, + "logits/rejected": -0.5366276502609253, + "logps/chosen": -0.00037644183612428606, + "logps/rejected": -1.6992120742797852, + "loss": 1.224, + "nll_loss": 0.30597805976867676, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.764418579521589e-05, + "rewards/margins": 0.1698835790157318, + "rewards/rejected": -0.16992120444774628, + "step": 7050 + }, + { + "epoch": 4.876210235131397, + "grad_norm": 9.410329818725586, + "learning_rate": 2.8465498693714466e-05, + "log_odds_chosen": 7.662788391113281, + "log_odds_ratio": -0.014153570868074894, + "logits/chosen": -0.4911881387233734, + "logits/rejected": -0.4591422975063324, + "logps/chosen": -0.07480232417583466, + "logps/rejected": -1.362932801246643, + "loss": 0.779, + "nll_loss": 0.19333398342132568, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007480232045054436, + "rewards/margins": 0.12881305813789368, + "rewards/rejected": -0.13629327714443207, + "step": 7051 + }, + { + "epoch": 4.876901798063624, + "grad_norm": 11.054753303527832, + "learning_rate": 2.8461656677424315e-05, + "log_odds_chosen": 10.068439483642578, + "log_odds_ratio": -9.35841744649224e-05, + "logits/chosen": -0.5366771817207336, + "logits/rejected": -0.556289792060852, + "logps/chosen": -0.0003213782620150596, + "logps/rejected": -1.6700217723846436, + "loss": 1.0708, + "nll_loss": 0.2676818370819092, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2137824746314436e-05, + "rewards/margins": 0.16697004437446594, + "rewards/rejected": -0.16700220108032227, + "step": 7052 + }, + { + "epoch": 4.877593360995851, + "grad_norm": 23.047603607177734, + "learning_rate": 2.8457814661134164e-05, + "log_odds_chosen": 10.605345726013184, + "log_odds_ratio": -4.5849927118979394e-05, + "logits/chosen": -0.6093308925628662, + "logits/rejected": -0.5740828514099121, + "logps/chosen": -0.0003509022935759276, + "logps/rejected": -2.091120958328247, + "loss": 0.9871, + "nll_loss": 0.2467714101076126, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.509023008518852e-05, + "rewards/margins": 0.20907700061798096, + "rewards/rejected": -0.20911210775375366, + "step": 7053 + }, + { + "epoch": 4.878284923928078, + "grad_norm": 6.905877590179443, + "learning_rate": 2.8453972644844017e-05, + "log_odds_chosen": 9.971370697021484, + "log_odds_ratio": -0.0015021146973595023, + "logits/chosen": -0.6287491321563721, + "logits/rejected": -0.7699867486953735, + "logps/chosen": -0.002430099993944168, + "logps/rejected": -2.4299771785736084, + "loss": 0.7239, + "nll_loss": 0.18082213401794434, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00024300998484250158, + "rewards/margins": 0.24275469779968262, + "rewards/rejected": -0.24299770593643188, + "step": 7054 + }, + { + "epoch": 4.878976486860305, + "grad_norm": 16.925321578979492, + "learning_rate": 2.8450130628553866e-05, + "log_odds_chosen": 9.296889305114746, + "log_odds_ratio": -0.004414747469127178, + "logits/chosen": -0.6305980086326599, + "logits/rejected": -0.6567308306694031, + "logps/chosen": -0.003485491033643484, + "logps/rejected": -2.01444673538208, + "loss": 1.4225, + "nll_loss": 0.3551926016807556, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003485491033643484, + "rewards/margins": 0.20109611749649048, + "rewards/rejected": -0.20144465565681458, + "step": 7055 + }, + { + "epoch": 4.8796680497925315, + "grad_norm": 7.701079368591309, + "learning_rate": 2.8446288612263715e-05, + "log_odds_chosen": 10.75440788269043, + "log_odds_ratio": -3.690827725222334e-05, + "logits/chosen": -0.5414289236068726, + "logits/rejected": -0.6197251677513123, + "logps/chosen": -0.0001932993473019451, + "logps/rejected": -2.0484259128570557, + "loss": 0.8056, + "nll_loss": 0.20138521492481232, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9329936549183913e-05, + "rewards/margins": 0.20482327044010162, + "rewards/rejected": -0.20484259724617004, + "step": 7056 + }, + { + "epoch": 4.880359612724758, + "grad_norm": 7.884543418884277, + "learning_rate": 2.844244659597357e-05, + "log_odds_chosen": 10.570260047912598, + "log_odds_ratio": -4.935210745315999e-05, + "logits/chosen": -0.2323668897151947, + "logits/rejected": -0.3230237662792206, + "logps/chosen": -0.00016158647486008704, + "logps/rejected": -1.9191505908966064, + "loss": 0.8309, + "nll_loss": 0.207707941532135, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6158646758412942e-05, + "rewards/margins": 0.191898912191391, + "rewards/rejected": -0.19191506505012512, + "step": 7057 + }, + { + "epoch": 4.881051175656985, + "grad_norm": 13.345209121704102, + "learning_rate": 2.843860457968342e-05, + "log_odds_chosen": 10.143343925476074, + "log_odds_ratio": -8.016329229576513e-05, + "logits/chosen": -0.8679466247558594, + "logits/rejected": -0.9242144227027893, + "logps/chosen": -0.0001684374874457717, + "logps/rejected": -1.3030248880386353, + "loss": 1.1419, + "nll_loss": 0.2854706943035126, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.684374910837505e-05, + "rewards/margins": 0.13028565049171448, + "rewards/rejected": -0.13030248880386353, + "step": 7058 + }, + { + "epoch": 4.881742738589212, + "grad_norm": 9.145713806152344, + "learning_rate": 2.843476256339327e-05, + "log_odds_chosen": 10.114141464233398, + "log_odds_ratio": -0.0005261494661681354, + "logits/chosen": -0.6389458775520325, + "logits/rejected": -0.6943326592445374, + "logps/chosen": -0.000395385519368574, + "logps/rejected": -1.9835808277130127, + "loss": 0.7446, + "nll_loss": 0.18608568608760834, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9538554119644687e-05, + "rewards/margins": 0.19831854104995728, + "rewards/rejected": -0.19835807383060455, + "step": 7059 + }, + { + "epoch": 4.882434301521439, + "grad_norm": 9.99998950958252, + "learning_rate": 2.8430920547103125e-05, + "log_odds_chosen": 10.815114974975586, + "log_odds_ratio": -0.0009209688869304955, + "logits/chosen": -0.2506277561187744, + "logits/rejected": -0.2800312638282776, + "logps/chosen": -0.000968419888522476, + "logps/rejected": -2.3028392791748047, + "loss": 1.0574, + "nll_loss": 0.2642573118209839, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.684199176263064e-05, + "rewards/margins": 0.2301870882511139, + "rewards/rejected": -0.2302839159965515, + "step": 7060 + }, + { + "epoch": 4.883125864453666, + "grad_norm": 8.69649887084961, + "learning_rate": 2.8427078530812974e-05, + "log_odds_chosen": 10.826623916625977, + "log_odds_ratio": -0.00020552946079988033, + "logits/chosen": -0.5005373358726501, + "logits/rejected": -0.5175811052322388, + "logps/chosen": -0.00024266143736895174, + "logps/rejected": -2.551815986633301, + "loss": 0.9927, + "nll_loss": 0.24816061556339264, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.426614264550153e-05, + "rewards/margins": 0.25515735149383545, + "rewards/rejected": -0.25518161058425903, + "step": 7061 + }, + { + "epoch": 4.8838174273858925, + "grad_norm": 6.960662841796875, + "learning_rate": 2.8423236514522823e-05, + "log_odds_chosen": 10.739348411560059, + "log_odds_ratio": -0.00014458272198680788, + "logits/chosen": -0.3545234203338623, + "logits/rejected": -0.43811148405075073, + "logps/chosen": -0.0006923141772858799, + "logps/rejected": -2.5062310695648193, + "loss": 0.7574, + "nll_loss": 0.18933825194835663, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.923142063897103e-05, + "rewards/margins": 0.25055384635925293, + "rewards/rejected": -0.25062310695648193, + "step": 7062 + }, + { + "epoch": 4.884508990318119, + "grad_norm": 7.970198631286621, + "learning_rate": 2.8419394498232675e-05, + "log_odds_chosen": 10.426850318908691, + "log_odds_ratio": -6.352874333970249e-05, + "logits/chosen": -0.18800753355026245, + "logits/rejected": -0.37803998589515686, + "logps/chosen": -0.0001434234291082248, + "logps/rejected": -1.4915777444839478, + "loss": 0.55, + "nll_loss": 0.1375032365322113, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4342344002216123e-05, + "rewards/margins": 0.14914342761039734, + "rewards/rejected": -0.14915776252746582, + "step": 7063 + }, + { + "epoch": 4.885200553250346, + "grad_norm": 10.392330169677734, + "learning_rate": 2.8415552481942524e-05, + "log_odds_chosen": 10.369099617004395, + "log_odds_ratio": -0.00018756282224785537, + "logits/chosen": -0.5978999733924866, + "logits/rejected": -0.6408947706222534, + "logps/chosen": -0.00031749578192830086, + "logps/rejected": -2.243783473968506, + "loss": 0.7938, + "nll_loss": 0.1984332650899887, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.174957964802161e-05, + "rewards/margins": 0.2243466079235077, + "rewards/rejected": -0.22437834739685059, + "step": 7064 + }, + { + "epoch": 4.885892116182573, + "grad_norm": 8.495529174804688, + "learning_rate": 2.8411710465652373e-05, + "log_odds_chosen": 10.161759376525879, + "log_odds_ratio": -9.85856240731664e-05, + "logits/chosen": -0.2775004804134369, + "logits/rejected": -0.3813784718513489, + "logps/chosen": -0.00040236441418528557, + "logps/rejected": -1.987892508506775, + "loss": 0.7487, + "nll_loss": 0.18717548251152039, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0236445784103125e-05, + "rewards/margins": 0.19874900579452515, + "rewards/rejected": -0.19878923892974854, + "step": 7065 + }, + { + "epoch": 4.8865836791148, + "grad_norm": 7.843557357788086, + "learning_rate": 2.840786844936223e-05, + "log_odds_chosen": 9.858935356140137, + "log_odds_ratio": -0.0006947257206775248, + "logits/chosen": -0.4358893930912018, + "logits/rejected": -0.47982048988342285, + "logps/chosen": -0.001503008883446455, + "logps/rejected": -2.213164806365967, + "loss": 1.4627, + "nll_loss": 0.36561495065689087, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015030089707579464, + "rewards/margins": 0.22116619348526, + "rewards/rejected": -0.22131648659706116, + "step": 7066 + }, + { + "epoch": 4.887275242047027, + "grad_norm": 8.010547637939453, + "learning_rate": 2.8404026433072078e-05, + "log_odds_chosen": 9.644980430603027, + "log_odds_ratio": -0.0008684393833391368, + "logits/chosen": -0.25602462887763977, + "logits/rejected": -0.3479723632335663, + "logps/chosen": -0.053071144968271255, + "logps/rejected": -2.9593915939331055, + "loss": 1.121, + "nll_loss": 0.2801644206047058, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0053071146830916405, + "rewards/margins": 0.29063206911087036, + "rewards/rejected": -0.295939177274704, + "step": 7067 + }, + { + "epoch": 4.8879668049792535, + "grad_norm": 7.515931606292725, + "learning_rate": 2.8400184416781927e-05, + "log_odds_chosen": 9.98626708984375, + "log_odds_ratio": -8.94946715561673e-05, + "logits/chosen": -0.7496136426925659, + "logits/rejected": -0.7395554780960083, + "logps/chosen": -0.0012114938581362367, + "logps/rejected": -2.4176342487335205, + "loss": 0.9002, + "nll_loss": 0.22504302859306335, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001211493945447728, + "rewards/margins": 0.24164226651191711, + "rewards/rejected": -0.2417634129524231, + "step": 7068 + }, + { + "epoch": 4.88865836791148, + "grad_norm": 7.007376670837402, + "learning_rate": 2.8396342400491783e-05, + "log_odds_chosen": 10.143765449523926, + "log_odds_ratio": -7.062454096740112e-05, + "logits/chosen": -0.6521151065826416, + "logits/rejected": -0.6742969751358032, + "logps/chosen": -0.0001779589947545901, + "logps/rejected": -1.59269380569458, + "loss": 0.5192, + "nll_loss": 0.12978312373161316, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7795900930650532e-05, + "rewards/margins": 0.15925158560276031, + "rewards/rejected": -0.15926937758922577, + "step": 7069 + }, + { + "epoch": 4.889349930843707, + "grad_norm": 7.883659362792969, + "learning_rate": 2.8392500384201632e-05, + "log_odds_chosen": 9.163580894470215, + "log_odds_ratio": -0.0011066696606576443, + "logits/chosen": -0.008930400013923645, + "logits/rejected": -0.0676572397351265, + "logps/chosen": -0.0009055061964318156, + "logps/rejected": -1.5239794254302979, + "loss": 1.0772, + "nll_loss": 0.2691981792449951, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.055061673279852e-05, + "rewards/margins": 0.1523074060678482, + "rewards/rejected": -0.15239794552326202, + "step": 7070 + }, + { + "epoch": 4.890041493775934, + "grad_norm": 9.622332572937012, + "learning_rate": 2.838865836791148e-05, + "log_odds_chosen": 10.254562377929688, + "log_odds_ratio": -0.0001300430449191481, + "logits/chosen": -0.43594586849212646, + "logits/rejected": -0.46360427141189575, + "logps/chosen": -0.0010361828608438373, + "logps/rejected": -2.300558567047119, + "loss": 0.848, + "nll_loss": 0.2119934856891632, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010361829481553286, + "rewards/margins": 0.22995226085186005, + "rewards/rejected": -0.23005586862564087, + "step": 7071 + }, + { + "epoch": 4.890733056708161, + "grad_norm": 6.660597324371338, + "learning_rate": 2.838481635162133e-05, + "log_odds_chosen": 9.821388244628906, + "log_odds_ratio": -0.0002597160346340388, + "logits/chosen": -0.09285390377044678, + "logits/rejected": -0.23921740055084229, + "logps/chosen": -0.0010828624945133924, + "logps/rejected": -2.154656171798706, + "loss": 0.6899, + "nll_loss": 0.17244890332221985, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010828624363057315, + "rewards/margins": 0.21535731852054596, + "rewards/rejected": -0.21546560525894165, + "step": 7072 + }, + { + "epoch": 4.891424619640388, + "grad_norm": 10.330202102661133, + "learning_rate": 2.8380974335331183e-05, + "log_odds_chosen": 11.040985107421875, + "log_odds_ratio": -2.5466662918915972e-05, + "logits/chosen": -0.39895886182785034, + "logits/rejected": -0.3868694007396698, + "logps/chosen": -0.00016861945914570242, + "logps/rejected": -2.319244146347046, + "loss": 1.0313, + "nll_loss": 0.2578234076499939, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6861946278368123e-05, + "rewards/margins": 0.23190754652023315, + "rewards/rejected": -0.2319244146347046, + "step": 7073 + }, + { + "epoch": 4.8921161825726145, + "grad_norm": 10.073043823242188, + "learning_rate": 2.8377132319041032e-05, + "log_odds_chosen": 9.917614936828613, + "log_odds_ratio": -0.06503642350435257, + "logits/chosen": -0.6255109906196594, + "logits/rejected": -0.6862890720367432, + "logps/chosen": -0.019769448786973953, + "logps/rejected": -2.653496503829956, + "loss": 1.3958, + "nll_loss": 0.34243640303611755, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001976944738999009, + "rewards/margins": 0.2633727192878723, + "rewards/rejected": -0.2653496563434601, + "step": 7074 + }, + { + "epoch": 4.892807745504841, + "grad_norm": 9.930667877197266, + "learning_rate": 2.837329030275088e-05, + "log_odds_chosen": 10.876503944396973, + "log_odds_ratio": -2.9976836231071502e-05, + "logits/chosen": -0.6453677415847778, + "logits/rejected": -0.6829240322113037, + "logps/chosen": -8.378988422919065e-05, + "logps/rejected": -1.5633125305175781, + "loss": 0.7998, + "nll_loss": 0.19993676245212555, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.378989150514826e-06, + "rewards/margins": 0.15632286667823792, + "rewards/rejected": -0.15633124113082886, + "step": 7075 + }, + { + "epoch": 4.893499308437068, + "grad_norm": 8.007487297058105, + "learning_rate": 2.8369448286460737e-05, + "log_odds_chosen": 10.561995506286621, + "log_odds_ratio": -0.0045930189080536366, + "logits/chosen": -0.25250446796417236, + "logits/rejected": -0.4099150002002716, + "logps/chosen": -0.002251701895147562, + "logps/rejected": -2.5110809803009033, + "loss": 0.8776, + "nll_loss": 0.21893686056137085, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00022517020988743752, + "rewards/margins": 0.25088292360305786, + "rewards/rejected": -0.2511081099510193, + "step": 7076 + }, + { + "epoch": 4.894190871369295, + "grad_norm": 7.112217903137207, + "learning_rate": 2.8365606270170586e-05, + "log_odds_chosen": 10.102551460266113, + "log_odds_ratio": -0.0003846402687486261, + "logits/chosen": -0.4543187916278839, + "logits/rejected": -0.5494933128356934, + "logps/chosen": -0.0003145010559819639, + "logps/rejected": -1.8796675205230713, + "loss": 1.1046, + "nll_loss": 0.27610817551612854, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.145010850857943e-05, + "rewards/margins": 0.18793530762195587, + "rewards/rejected": -0.1879667490720749, + "step": 7077 + }, + { + "epoch": 4.894882434301522, + "grad_norm": 10.677894592285156, + "learning_rate": 2.8361764253880435e-05, + "log_odds_chosen": 9.235824584960938, + "log_odds_ratio": -0.0006279587978497148, + "logits/chosen": -0.7199358344078064, + "logits/rejected": -0.71857088804245, + "logps/chosen": -0.000889734597876668, + "logps/rejected": -1.353074312210083, + "loss": 1.3665, + "nll_loss": 0.34156861901283264, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.897345833247527e-05, + "rewards/margins": 0.13521845638751984, + "rewards/rejected": -0.1353074163198471, + "step": 7078 + }, + { + "epoch": 4.895573997233749, + "grad_norm": 5.909156322479248, + "learning_rate": 2.835792223759029e-05, + "log_odds_chosen": 9.649864196777344, + "log_odds_ratio": -0.0005108444020152092, + "logits/chosen": -0.32373201847076416, + "logits/rejected": -0.37446802854537964, + "logps/chosen": -0.0009714511688798666, + "logps/rejected": -1.7121704816818237, + "loss": 1.1864, + "nll_loss": 0.2965487241744995, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.714511543279514e-05, + "rewards/margins": 0.17111989855766296, + "rewards/rejected": -0.17121705412864685, + "step": 7079 + }, + { + "epoch": 4.8962655601659755, + "grad_norm": 7.765115737915039, + "learning_rate": 2.835408022130014e-05, + "log_odds_chosen": 11.179744720458984, + "log_odds_ratio": -6.359211693052202e-05, + "logits/chosen": -0.48739296197891235, + "logits/rejected": -0.5888567566871643, + "logps/chosen": -0.00020552228670567274, + "logps/rejected": -2.5004618167877197, + "loss": 0.647, + "nll_loss": 0.16173242032527924, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0552230125758797e-05, + "rewards/margins": 0.2500256299972534, + "rewards/rejected": -0.2500461935997009, + "step": 7080 + }, + { + "epoch": 4.896957123098202, + "grad_norm": 6.932773590087891, + "learning_rate": 2.835023820500999e-05, + "log_odds_chosen": 10.954737663269043, + "log_odds_ratio": -8.48414929350838e-05, + "logits/chosen": -0.5642880201339722, + "logits/rejected": -0.6650699973106384, + "logps/chosen": -0.00023761890770401806, + "logps/rejected": -2.4510226249694824, + "loss": 0.9495, + "nll_loss": 0.23736077547073364, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3761891497997567e-05, + "rewards/margins": 0.24507847428321838, + "rewards/rejected": -0.24510225653648376, + "step": 7081 + }, + { + "epoch": 4.897648686030429, + "grad_norm": 7.493467330932617, + "learning_rate": 2.834639618871984e-05, + "log_odds_chosen": 10.005603790283203, + "log_odds_ratio": -0.0001240583078470081, + "logits/chosen": -0.7019184827804565, + "logits/rejected": -0.8387026786804199, + "logps/chosen": -0.00021465314785018563, + "logps/rejected": -1.8058335781097412, + "loss": 1.0316, + "nll_loss": 0.25787556171417236, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1465315512614325e-05, + "rewards/margins": 0.1805618852376938, + "rewards/rejected": -0.18058335781097412, + "step": 7082 + }, + { + "epoch": 4.898340248962656, + "grad_norm": 8.680520057678223, + "learning_rate": 2.834255417242969e-05, + "log_odds_chosen": 9.830467224121094, + "log_odds_ratio": -0.00024964113254100084, + "logits/chosen": -0.6379505395889282, + "logits/rejected": -0.6919896006584167, + "logps/chosen": -0.0005304609076119959, + "logps/rejected": -1.8459217548370361, + "loss": 0.787, + "nll_loss": 0.1967170685529709, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.304608930600807e-05, + "rewards/margins": 0.18453910946846008, + "rewards/rejected": -0.18459217250347137, + "step": 7083 + }, + { + "epoch": 4.899031811894883, + "grad_norm": 12.539398193359375, + "learning_rate": 2.833871215613954e-05, + "log_odds_chosen": 9.541128158569336, + "log_odds_ratio": -0.0040365769527852535, + "logits/chosen": -0.973002016544342, + "logits/rejected": -0.9376418590545654, + "logps/chosen": -0.12223078310489655, + "logps/rejected": -2.7071738243103027, + "loss": 0.8637, + "nll_loss": 0.21553358435630798, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012223077937960625, + "rewards/margins": 0.2584943175315857, + "rewards/rejected": -0.27071741223335266, + "step": 7084 + }, + { + "epoch": 4.89972337482711, + "grad_norm": 6.391757965087891, + "learning_rate": 2.8334870139849395e-05, + "log_odds_chosen": 9.40736198425293, + "log_odds_ratio": -0.0002655688440427184, + "logits/chosen": -0.6223936080932617, + "logits/rejected": -0.7707822322845459, + "logps/chosen": -0.006117125973105431, + "logps/rejected": -2.1625566482543945, + "loss": 0.8992, + "nll_loss": 0.22478394210338593, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006117126322351396, + "rewards/margins": 0.21564392745494843, + "rewards/rejected": -0.21625563502311707, + "step": 7085 + }, + { + "epoch": 4.9004149377593365, + "grad_norm": 5.640289783477783, + "learning_rate": 2.8331028123559244e-05, + "log_odds_chosen": 9.61543083190918, + "log_odds_ratio": -0.00035600896808318794, + "logits/chosen": -0.4709721505641937, + "logits/rejected": -0.504892110824585, + "logps/chosen": -0.001349491416476667, + "logps/rejected": -1.618531346321106, + "loss": 1.3241, + "nll_loss": 0.3310004472732544, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001349491358269006, + "rewards/margins": 0.1617181897163391, + "rewards/rejected": -0.1618531197309494, + "step": 7086 + }, + { + "epoch": 4.901106500691563, + "grad_norm": 18.874542236328125, + "learning_rate": 2.8327186107269093e-05, + "log_odds_chosen": 10.87725830078125, + "log_odds_ratio": -3.1916541047394276e-05, + "logits/chosen": -0.390527606010437, + "logits/rejected": -0.5419718027114868, + "logps/chosen": -0.00040930911200121045, + "logps/rejected": -2.722926378250122, + "loss": 1.035, + "nll_loss": 0.25874730944633484, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0930910472525284e-05, + "rewards/margins": 0.2722517251968384, + "rewards/rejected": -0.2722926437854767, + "step": 7087 + }, + { + "epoch": 4.90179806362379, + "grad_norm": 8.923735618591309, + "learning_rate": 2.832334409097895e-05, + "log_odds_chosen": 10.070110321044922, + "log_odds_ratio": -6.992067937972024e-05, + "logits/chosen": -0.4296715259552002, + "logits/rejected": -0.47074654698371887, + "logps/chosen": -0.00021902300068177283, + "logps/rejected": -1.5266903638839722, + "loss": 1.0921, + "nll_loss": 0.27300915122032166, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.190229861298576e-05, + "rewards/margins": 0.15264713764190674, + "rewards/rejected": -0.1526690423488617, + "step": 7088 + }, + { + "epoch": 4.902489626556017, + "grad_norm": 12.43345832824707, + "learning_rate": 2.8319502074688798e-05, + "log_odds_chosen": 10.237215042114258, + "log_odds_ratio": -8.461821562377736e-05, + "logits/chosen": -0.7066891193389893, + "logits/rejected": -0.7382270097732544, + "logps/chosen": -0.0008181549492292106, + "logps/rejected": -2.758671283721924, + "loss": 1.1065, + "nll_loss": 0.2766094207763672, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.181549492292106e-05, + "rewards/margins": 0.27578532695770264, + "rewards/rejected": -0.27586713433265686, + "step": 7089 + }, + { + "epoch": 4.903181189488244, + "grad_norm": 9.602688789367676, + "learning_rate": 2.8315660058398647e-05, + "log_odds_chosen": 9.875539779663086, + "log_odds_ratio": -0.00012150348629802465, + "logits/chosen": -0.832602858543396, + "logits/rejected": -0.7552993297576904, + "logps/chosen": -0.0005037355585955083, + "logps/rejected": -1.9359135627746582, + "loss": 2.19, + "nll_loss": 0.5474786758422852, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0373557314742357e-05, + "rewards/margins": 0.19354099035263062, + "rewards/rejected": -0.19359135627746582, + "step": 7090 + }, + { + "epoch": 4.903872752420471, + "grad_norm": 8.904827117919922, + "learning_rate": 2.83118180421085e-05, + "log_odds_chosen": 9.818799018859863, + "log_odds_ratio": -0.0008873422048054636, + "logits/chosen": -0.8685452938079834, + "logits/rejected": -0.9092991352081299, + "logps/chosen": -0.0036431835032999516, + "logps/rejected": -1.6690925359725952, + "loss": 0.7524, + "nll_loss": 0.18801911175251007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003643183736130595, + "rewards/margins": 0.16654494404792786, + "rewards/rejected": -0.16690924763679504, + "step": 7091 + }, + { + "epoch": 4.904564315352697, + "grad_norm": 7.237812042236328, + "learning_rate": 2.830797602581835e-05, + "log_odds_chosen": 9.628220558166504, + "log_odds_ratio": -0.0002161394222639501, + "logits/chosen": -0.759405791759491, + "logits/rejected": -0.7982853055000305, + "logps/chosen": -0.0005565644823946059, + "logps/rejected": -1.7072293758392334, + "loss": 0.6136, + "nll_loss": 0.1533796787261963, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5656451877439395e-05, + "rewards/margins": 0.17066729068756104, + "rewards/rejected": -0.17072294652462006, + "step": 7092 + }, + { + "epoch": 4.905255878284924, + "grad_norm": 6.623456001281738, + "learning_rate": 2.8304134009528198e-05, + "log_odds_chosen": 9.786426544189453, + "log_odds_ratio": -0.0002996628754772246, + "logits/chosen": -0.399116575717926, + "logits/rejected": -0.4428660273551941, + "logps/chosen": -0.004113033413887024, + "logps/rejected": -2.5627689361572266, + "loss": 1.4367, + "nll_loss": 0.3591574728488922, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00041130336467176676, + "rewards/margins": 0.25586557388305664, + "rewards/rejected": -0.2562769055366516, + "step": 7093 + }, + { + "epoch": 4.905947441217151, + "grad_norm": 12.373395919799805, + "learning_rate": 2.8300291993238054e-05, + "log_odds_chosen": 9.527667045593262, + "log_odds_ratio": -0.0003717107174452394, + "logits/chosen": -0.9429394006729126, + "logits/rejected": -0.9072678089141846, + "logps/chosen": -0.0004049554408993572, + "logps/rejected": -1.8451400995254517, + "loss": 1.1802, + "nll_loss": 0.295009046792984, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0495546272723004e-05, + "rewards/margins": 0.18447351455688477, + "rewards/rejected": -0.18451401591300964, + "step": 7094 + }, + { + "epoch": 4.906639004149378, + "grad_norm": 6.210014820098877, + "learning_rate": 2.8296449976947903e-05, + "log_odds_chosen": 10.036115646362305, + "log_odds_ratio": -0.0001186348672490567, + "logits/chosen": -0.6889455318450928, + "logits/rejected": -0.7251041531562805, + "logps/chosen": -0.0006964325439184904, + "logps/rejected": -1.879245400428772, + "loss": 0.9805, + "nll_loss": 0.2451135814189911, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.964324711589143e-05, + "rewards/margins": 0.18785491585731506, + "rewards/rejected": -0.1879245489835739, + "step": 7095 + }, + { + "epoch": 4.907330567081605, + "grad_norm": 10.427117347717285, + "learning_rate": 2.829260796065775e-05, + "log_odds_chosen": 10.610797882080078, + "log_odds_ratio": -0.00011776221072068438, + "logits/chosen": -0.610815167427063, + "logits/rejected": -0.6836470365524292, + "logps/chosen": -0.0021078032441437244, + "logps/rejected": -2.2562201023101807, + "loss": 1.006, + "nll_loss": 0.2514980435371399, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002107803156832233, + "rewards/margins": 0.22541120648384094, + "rewards/rejected": -0.2256219983100891, + "step": 7096 + }, + { + "epoch": 4.908022130013832, + "grad_norm": 8.188974380493164, + "learning_rate": 2.8288765944367607e-05, + "log_odds_chosen": 10.249313354492188, + "log_odds_ratio": -0.014942159876227379, + "logits/chosen": -0.2611965835094452, + "logits/rejected": -0.36121267080307007, + "logps/chosen": -0.0052610295824706554, + "logps/rejected": -2.3836238384246826, + "loss": 0.8521, + "nll_loss": 0.21152344346046448, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005261029582470655, + "rewards/margins": 0.23783627152442932, + "rewards/rejected": -0.2383623719215393, + "step": 7097 + }, + { + "epoch": 4.908713692946058, + "grad_norm": 7.87142276763916, + "learning_rate": 2.8284923928077457e-05, + "log_odds_chosen": 10.834593772888184, + "log_odds_ratio": -0.0003307293518446386, + "logits/chosen": -0.7625287771224976, + "logits/rejected": -0.6767745018005371, + "logps/chosen": -0.0034907907247543335, + "logps/rejected": -3.2138097286224365, + "loss": 1.5473, + "nll_loss": 0.3867877721786499, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003490790550131351, + "rewards/margins": 0.32103192806243896, + "rewards/rejected": -0.32138100266456604, + "step": 7098 + }, + { + "epoch": 4.909405255878285, + "grad_norm": 6.212900638580322, + "learning_rate": 2.8281081911787306e-05, + "log_odds_chosen": 8.325897216796875, + "log_odds_ratio": -0.005754491779953241, + "logits/chosen": -0.5125560164451599, + "logits/rejected": -0.4441109001636505, + "logps/chosen": -0.0019233720377087593, + "logps/rejected": -1.003936767578125, + "loss": 0.8757, + "nll_loss": 0.21836042404174805, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019233721832279116, + "rewards/margins": 0.10020134598016739, + "rewards/rejected": -0.10039368271827698, + "step": 7099 + }, + { + "epoch": 4.910096818810512, + "grad_norm": 11.607251167297363, + "learning_rate": 2.8277239895497158e-05, + "log_odds_chosen": 9.61685848236084, + "log_odds_ratio": -0.056531526148319244, + "logits/chosen": -0.47347909212112427, + "logits/rejected": -0.487918883562088, + "logps/chosen": -0.08753280341625214, + "logps/rejected": -1.9314079284667969, + "loss": 1.0522, + "nll_loss": 0.25739336013793945, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008753281086683273, + "rewards/margins": 0.18438750505447388, + "rewards/rejected": -0.19314078986644745, + "step": 7100 + }, + { + "epoch": 4.910788381742739, + "grad_norm": 8.894390106201172, + "learning_rate": 2.8273397879207007e-05, + "log_odds_chosen": 10.345115661621094, + "log_odds_ratio": -0.00022988113050814718, + "logits/chosen": -0.7872570157051086, + "logits/rejected": -0.8193588852882385, + "logps/chosen": -0.0006045059417374432, + "logps/rejected": -1.8319408893585205, + "loss": 0.8216, + "nll_loss": 0.20537596940994263, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0450591263361275e-05, + "rewards/margins": 0.18313364684581757, + "rewards/rejected": -0.183194100856781, + "step": 7101 + }, + { + "epoch": 4.911479944674966, + "grad_norm": 9.430028915405273, + "learning_rate": 2.8269555862916856e-05, + "log_odds_chosen": 10.861661911010742, + "log_odds_ratio": -5.2497900469461456e-05, + "logits/chosen": -0.6316702365875244, + "logits/rejected": -0.6554051041603088, + "logps/chosen": -0.00034378620330244303, + "logps/rejected": -2.400463819503784, + "loss": 0.7852, + "nll_loss": 0.19629782438278198, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.437861960264854e-05, + "rewards/margins": 0.2400120198726654, + "rewards/rejected": -0.2400463968515396, + "step": 7102 + }, + { + "epoch": 4.912171507607193, + "grad_norm": 9.566428184509277, + "learning_rate": 2.8265713846626712e-05, + "log_odds_chosen": 9.804618835449219, + "log_odds_ratio": -8.933767821872607e-05, + "logits/chosen": -0.5262328386306763, + "logits/rejected": -0.556152880191803, + "logps/chosen": -0.0026998610701411963, + "logps/rejected": -2.2227210998535156, + "loss": 1.2569, + "nll_loss": 0.31422197818756104, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002699861070141196, + "rewards/margins": 0.22200213372707367, + "rewards/rejected": -0.2222720980644226, + "step": 7103 + }, + { + "epoch": 4.912863070539419, + "grad_norm": 7.35467529296875, + "learning_rate": 2.826187183033656e-05, + "log_odds_chosen": 10.32733154296875, + "log_odds_ratio": -0.000474480475531891, + "logits/chosen": -0.6774312257766724, + "logits/rejected": -0.7373085021972656, + "logps/chosen": -0.0009708892903290689, + "logps/rejected": -2.3438022136688232, + "loss": 1.1322, + "nll_loss": 0.2829919159412384, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.708893776405603e-05, + "rewards/margins": 0.23428313434123993, + "rewards/rejected": -0.23438023030757904, + "step": 7104 + }, + { + "epoch": 4.913554633471646, + "grad_norm": 4.576963901519775, + "learning_rate": 2.825802981404641e-05, + "log_odds_chosen": 9.78951358795166, + "log_odds_ratio": -0.0002390899317106232, + "logits/chosen": -0.5074937343597412, + "logits/rejected": -0.4873002767562866, + "logps/chosen": -0.00036632048431783915, + "logps/rejected": -1.5628546476364136, + "loss": 1.3782, + "nll_loss": 0.34453463554382324, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.663205279735848e-05, + "rewards/margins": 0.15624882280826569, + "rewards/rejected": -0.15628546476364136, + "step": 7105 + }, + { + "epoch": 4.914246196403873, + "grad_norm": 10.732585906982422, + "learning_rate": 2.8254187797756266e-05, + "log_odds_chosen": 9.88197135925293, + "log_odds_ratio": -0.0027279232162982225, + "logits/chosen": -0.714326024055481, + "logits/rejected": -0.7508030533790588, + "logps/chosen": -0.0015103038167580962, + "logps/rejected": -1.6368227005004883, + "loss": 0.7312, + "nll_loss": 0.18253552913665771, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001510303991381079, + "rewards/margins": 0.16353122889995575, + "rewards/rejected": -0.1636822670698166, + "step": 7106 + }, + { + "epoch": 4.9149377593361, + "grad_norm": 5.092597961425781, + "learning_rate": 2.8250345781466115e-05, + "log_odds_chosen": 8.184118270874023, + "log_odds_ratio": -0.002079986035823822, + "logits/chosen": -0.513596773147583, + "logits/rejected": -0.6206921339035034, + "logps/chosen": -0.0010421369224786758, + "logps/rejected": -1.033446192741394, + "loss": 1.1106, + "nll_loss": 0.27744877338409424, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010421368642710149, + "rewards/margins": 0.10324040055274963, + "rewards/rejected": -0.10334461182355881, + "step": 7107 + }, + { + "epoch": 4.915629322268327, + "grad_norm": 9.54631519317627, + "learning_rate": 2.8246503765175964e-05, + "log_odds_chosen": 10.113319396972656, + "log_odds_ratio": -0.0001325900957453996, + "logits/chosen": -0.5567490458488464, + "logits/rejected": -0.6400761008262634, + "logps/chosen": -0.0008515854133293033, + "logps/rejected": -2.127261161804199, + "loss": 0.7234, + "nll_loss": 0.18084672093391418, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.515853551216424e-05, + "rewards/margins": 0.21264095604419708, + "rewards/rejected": -0.21272613108158112, + "step": 7108 + }, + { + "epoch": 4.9163208852005535, + "grad_norm": 5.1094560623168945, + "learning_rate": 2.8242661748885816e-05, + "log_odds_chosen": 10.541118621826172, + "log_odds_ratio": -0.00010706786997616291, + "logits/chosen": -0.7783686518669128, + "logits/rejected": -0.6153342723846436, + "logps/chosen": -0.0002766298421192914, + "logps/rejected": -1.9104892015457153, + "loss": 0.7444, + "nll_loss": 0.18608124554157257, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7662985303322785e-05, + "rewards/margins": 0.19102126359939575, + "rewards/rejected": -0.19104892015457153, + "step": 7109 + }, + { + "epoch": 4.91701244813278, + "grad_norm": 6.176060676574707, + "learning_rate": 2.8238819732595665e-05, + "log_odds_chosen": 9.143302917480469, + "log_odds_ratio": -0.002591161523014307, + "logits/chosen": -0.28634414076805115, + "logits/rejected": -0.4724721312522888, + "logps/chosen": -0.003382663941010833, + "logps/rejected": -1.5004411935806274, + "loss": 0.9143, + "nll_loss": 0.22832506895065308, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00033826639992184937, + "rewards/margins": 0.14970585703849792, + "rewards/rejected": -0.15004411339759827, + "step": 7110 + }, + { + "epoch": 4.917704011065007, + "grad_norm": 5.31650447845459, + "learning_rate": 2.8234977716305518e-05, + "log_odds_chosen": 9.15156364440918, + "log_odds_ratio": -0.000578921171836555, + "logits/chosen": -0.691253125667572, + "logits/rejected": -0.7496019005775452, + "logps/chosen": -0.0018787914887070656, + "logps/rejected": -1.509812831878662, + "loss": 1.1418, + "nll_loss": 0.28538262844085693, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018787916633300483, + "rewards/margins": 0.1507934182882309, + "rewards/rejected": -0.15098129212856293, + "step": 7111 + }, + { + "epoch": 4.918395573997234, + "grad_norm": 5.6800856590271, + "learning_rate": 2.823113570001537e-05, + "log_odds_chosen": 8.849769592285156, + "log_odds_ratio": -0.000252558384090662, + "logits/chosen": -0.5067331790924072, + "logits/rejected": -0.6426399946212769, + "logps/chosen": -0.0010645565344020724, + "logps/rejected": -1.4535598754882812, + "loss": 0.9729, + "nll_loss": 0.24319294095039368, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001064556636265479, + "rewards/margins": 0.14524953067302704, + "rewards/rejected": -0.1453559845685959, + "step": 7112 + }, + { + "epoch": 4.919087136929461, + "grad_norm": 17.448562622070312, + "learning_rate": 2.822729368372522e-05, + "log_odds_chosen": 10.552684783935547, + "log_odds_ratio": -0.00010548812861088663, + "logits/chosen": -0.7737633585929871, + "logits/rejected": -0.8090916275978088, + "logps/chosen": -0.0003632918233051896, + "logps/rejected": -1.9622200727462769, + "loss": 0.7682, + "nll_loss": 0.19204050302505493, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6329183785710484e-05, + "rewards/margins": 0.19618570804595947, + "rewards/rejected": -0.19622200727462769, + "step": 7113 + }, + { + "epoch": 4.919778699861688, + "grad_norm": 8.25586223602295, + "learning_rate": 2.822345166743507e-05, + "log_odds_chosen": 10.54365348815918, + "log_odds_ratio": -9.378491813549772e-05, + "logits/chosen": -0.5775281190872192, + "logits/rejected": -0.6037197113037109, + "logps/chosen": -0.0002865190908778459, + "logps/rejected": -1.636115312576294, + "loss": 0.8237, + "nll_loss": 0.2059212476015091, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8651907996390946e-05, + "rewards/margins": 0.16358289122581482, + "rewards/rejected": -0.1636115312576294, + "step": 7114 + }, + { + "epoch": 4.9204702627939145, + "grad_norm": 7.199887275695801, + "learning_rate": 2.8219609651144924e-05, + "log_odds_chosen": 10.180112838745117, + "log_odds_ratio": -0.00013731225044466555, + "logits/chosen": -0.2917217016220093, + "logits/rejected": -0.3498440384864807, + "logps/chosen": -0.00036871773772872984, + "logps/rejected": -2.0209054946899414, + "loss": 0.7018, + "nll_loss": 0.1754380762577057, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6871773772872984e-05, + "rewards/margins": 0.20205369591712952, + "rewards/rejected": -0.2020905613899231, + "step": 7115 + }, + { + "epoch": 4.921161825726141, + "grad_norm": 9.046202659606934, + "learning_rate": 2.8215767634854773e-05, + "log_odds_chosen": 10.894472122192383, + "log_odds_ratio": -4.8574976972304285e-05, + "logits/chosen": -0.4562535285949707, + "logits/rejected": -0.4837872087955475, + "logps/chosen": -0.00042225071229040623, + "logps/rejected": -2.4725146293640137, + "loss": 1.2179, + "nll_loss": 0.30447250604629517, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.222507413942367e-05, + "rewards/margins": 0.24720925092697144, + "rewards/rejected": -0.2472514808177948, + "step": 7116 + }, + { + "epoch": 4.921853388658368, + "grad_norm": 11.569889068603516, + "learning_rate": 2.8211925618564622e-05, + "log_odds_chosen": 10.427947044372559, + "log_odds_ratio": -0.00016333235544152558, + "logits/chosen": -0.44238170981407166, + "logits/rejected": -0.5646301507949829, + "logps/chosen": -0.0005494217621162534, + "logps/rejected": -2.6457605361938477, + "loss": 1.1981, + "nll_loss": 0.2994979918003082, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.494217839441262e-05, + "rewards/margins": 0.26452112197875977, + "rewards/rejected": -0.2645760476589203, + "step": 7117 + }, + { + "epoch": 4.922544951590595, + "grad_norm": 8.835566520690918, + "learning_rate": 2.8208083602274475e-05, + "log_odds_chosen": 9.710332870483398, + "log_odds_ratio": -0.00014777285105083138, + "logits/chosen": -0.5706153512001038, + "logits/rejected": -0.6766719818115234, + "logps/chosen": -0.0003430528158787638, + "logps/rejected": -1.5136841535568237, + "loss": 1.0568, + "nll_loss": 0.2641783356666565, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.430528158787638e-05, + "rewards/margins": 0.15133410692214966, + "rewards/rejected": -0.1513684093952179, + "step": 7118 + }, + { + "epoch": 4.923236514522822, + "grad_norm": 11.772375106811523, + "learning_rate": 2.8204241585984327e-05, + "log_odds_chosen": 11.087120056152344, + "log_odds_ratio": -4.5395812776405364e-05, + "logits/chosen": -0.5636887550354004, + "logits/rejected": -0.5888187289237976, + "logps/chosen": -0.0004168281448073685, + "logps/rejected": -1.8479971885681152, + "loss": 0.8673, + "nll_loss": 0.21683260798454285, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1682811570353806e-05, + "rewards/margins": 0.1847580522298813, + "rewards/rejected": -0.18479973077774048, + "step": 7119 + }, + { + "epoch": 4.923928077455049, + "grad_norm": 7.783324718475342, + "learning_rate": 2.8200399569694176e-05, + "log_odds_chosen": 9.75548267364502, + "log_odds_ratio": -0.00065141316736117, + "logits/chosen": -0.3341536819934845, + "logits/rejected": -0.3753906488418579, + "logps/chosen": -0.0032086719293147326, + "logps/rejected": -2.3842759132385254, + "loss": 0.8875, + "nll_loss": 0.22179758548736572, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00032086719875223935, + "rewards/margins": 0.23810669779777527, + "rewards/rejected": -0.23842757940292358, + "step": 7120 + }, + { + "epoch": 4.9246196403872755, + "grad_norm": 8.361810684204102, + "learning_rate": 2.819655755340403e-05, + "log_odds_chosen": 10.507516860961914, + "log_odds_ratio": -3.8556561776204035e-05, + "logits/chosen": -0.3292539715766907, + "logits/rejected": -0.4065108299255371, + "logps/chosen": -0.0001679821580182761, + "logps/rejected": -1.817864179611206, + "loss": 0.7462, + "nll_loss": 0.1865575909614563, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6798217984614894e-05, + "rewards/margins": 0.18176962435245514, + "rewards/rejected": -0.1817864179611206, + "step": 7121 + }, + { + "epoch": 4.925311203319502, + "grad_norm": 20.088041305541992, + "learning_rate": 2.8192715537113878e-05, + "log_odds_chosen": 12.001582145690918, + "log_odds_ratio": -1.2325194802542683e-05, + "logits/chosen": -0.48205146193504333, + "logits/rejected": -0.5669339299201965, + "logps/chosen": -0.00016101561777759343, + "logps/rejected": -2.9314229488372803, + "loss": 1.2894, + "nll_loss": 0.322337806224823, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6101563232950866e-05, + "rewards/margins": 0.2931261956691742, + "rewards/rejected": -0.29314231872558594, + "step": 7122 + }, + { + "epoch": 4.926002766251729, + "grad_norm": 46.864810943603516, + "learning_rate": 2.8188873520823727e-05, + "log_odds_chosen": 8.082642555236816, + "log_odds_ratio": -0.6128248572349548, + "logits/chosen": -0.5687606334686279, + "logits/rejected": -0.6862469911575317, + "logps/chosen": -0.0712505653500557, + "logps/rejected": -1.3998901844024658, + "loss": 1.241, + "nll_loss": 0.24896618723869324, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007125055883079767, + "rewards/margins": 0.13286396861076355, + "rewards/rejected": -0.13998901844024658, + "step": 7123 + }, + { + "epoch": 4.926694329183956, + "grad_norm": 10.988321304321289, + "learning_rate": 2.8185031504533583e-05, + "log_odds_chosen": 10.095243453979492, + "log_odds_ratio": -0.0001219494006363675, + "logits/chosen": -0.7098445892333984, + "logits/rejected": -0.8073045015335083, + "logps/chosen": -0.00027236994355916977, + "logps/rejected": -1.8411864042282104, + "loss": 1.0152, + "nll_loss": 0.25377851724624634, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7236994355916977e-05, + "rewards/margins": 0.18409138917922974, + "rewards/rejected": -0.18411864340305328, + "step": 7124 + }, + { + "epoch": 4.927385892116183, + "grad_norm": 8.891566276550293, + "learning_rate": 2.8181189488243432e-05, + "log_odds_chosen": 9.694140434265137, + "log_odds_ratio": -0.0010962700471282005, + "logits/chosen": -0.39752835035324097, + "logits/rejected": -0.41798263788223267, + "logps/chosen": -0.0014211098896339536, + "logps/rejected": -1.951611876487732, + "loss": 0.8885, + "nll_loss": 0.22201602160930634, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001421109918737784, + "rewards/margins": 0.19501908123493195, + "rewards/rejected": -0.19516119360923767, + "step": 7125 + }, + { + "epoch": 4.92807745504841, + "grad_norm": 16.277318954467773, + "learning_rate": 2.817734747195328e-05, + "log_odds_chosen": 10.069537162780762, + "log_odds_ratio": -0.0037255052011460066, + "logits/chosen": -0.7205549478530884, + "logits/rejected": -0.6914384961128235, + "logps/chosen": -0.006732581183314323, + "logps/rejected": -1.870539903640747, + "loss": 1.5036, + "nll_loss": 0.3755381405353546, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006732581532560289, + "rewards/margins": 0.18638074398040771, + "rewards/rejected": -0.18705399334430695, + "step": 7126 + }, + { + "epoch": 4.9287690179806365, + "grad_norm": 7.239488124847412, + "learning_rate": 2.8173505455663137e-05, + "log_odds_chosen": 10.33989143371582, + "log_odds_ratio": -7.038117473712191e-05, + "logits/chosen": -0.11135803908109665, + "logits/rejected": -0.1713535189628601, + "logps/chosen": -0.00010937307524727657, + "logps/rejected": -1.5407695770263672, + "loss": 1.2532, + "nll_loss": 0.31329673528671265, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.09373086161213e-05, + "rewards/margins": 0.1540660262107849, + "rewards/rejected": -0.1540769636631012, + "step": 7127 + }, + { + "epoch": 4.929460580912863, + "grad_norm": 8.722808837890625, + "learning_rate": 2.8169663439372986e-05, + "log_odds_chosen": 10.938897132873535, + "log_odds_ratio": -4.421987250680104e-05, + "logits/chosen": -0.2911446690559387, + "logits/rejected": -0.4413534700870514, + "logps/chosen": -0.00021581347391474992, + "logps/rejected": -2.3797059059143066, + "loss": 0.9606, + "nll_loss": 0.24014431238174438, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1581348846666515e-05, + "rewards/margins": 0.23794902861118317, + "rewards/rejected": -0.23797062039375305, + "step": 7128 + }, + { + "epoch": 4.93015214384509, + "grad_norm": 12.4188814163208, + "learning_rate": 2.8165821423082835e-05, + "log_odds_chosen": 9.827089309692383, + "log_odds_ratio": -0.00031211768509820104, + "logits/chosen": -0.584121584892273, + "logits/rejected": -0.5907494425773621, + "logps/chosen": -0.0003768773749470711, + "logps/rejected": -1.5813324451446533, + "loss": 1.3272, + "nll_loss": 0.3317672908306122, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.768773967749439e-05, + "rewards/margins": 0.1580955684185028, + "rewards/rejected": -0.15813326835632324, + "step": 7129 + }, + { + "epoch": 4.930843706777317, + "grad_norm": 8.122848510742188, + "learning_rate": 2.8161979406792687e-05, + "log_odds_chosen": 10.707836151123047, + "log_odds_ratio": -4.415389776113443e-05, + "logits/chosen": -0.373961478471756, + "logits/rejected": -0.4320948123931885, + "logps/chosen": -0.0007988472352735698, + "logps/rejected": -2.38476300239563, + "loss": 0.7998, + "nll_loss": 0.1999528706073761, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.98847249825485e-05, + "rewards/margins": 0.23839640617370605, + "rewards/rejected": -0.23847629129886627, + "step": 7130 + }, + { + "epoch": 4.931535269709544, + "grad_norm": 5.815088272094727, + "learning_rate": 2.8158137390502536e-05, + "log_odds_chosen": 9.746646881103516, + "log_odds_ratio": -0.00011611563240876421, + "logits/chosen": -0.42615267634391785, + "logits/rejected": -0.4549277722835541, + "logps/chosen": -0.0003674745967146009, + "logps/rejected": -1.1377838850021362, + "loss": 1.0002, + "nll_loss": 0.2500423192977905, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6747456761077046e-05, + "rewards/margins": 0.11374164372682571, + "rewards/rejected": -0.11377838999032974, + "step": 7131 + }, + { + "epoch": 4.932226832641771, + "grad_norm": 8.254155158996582, + "learning_rate": 2.8154295374212385e-05, + "log_odds_chosen": 9.269465446472168, + "log_odds_ratio": -0.0017738983733579516, + "logits/chosen": -0.3891604542732239, + "logits/rejected": -0.3855813145637512, + "logps/chosen": -0.0017140365671366453, + "logps/rejected": -1.8060569763183594, + "loss": 1.0991, + "nll_loss": 0.27460891008377075, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017140366253443062, + "rewards/margins": 0.18043428659439087, + "rewards/rejected": -0.1806056946516037, + "step": 7132 + }, + { + "epoch": 4.9329183955739975, + "grad_norm": 5.22756290435791, + "learning_rate": 2.815045335792224e-05, + "log_odds_chosen": 8.7532377243042, + "log_odds_ratio": -0.0049340082332491875, + "logits/chosen": -0.23282581567764282, + "logits/rejected": -0.3273771405220032, + "logps/chosen": -0.0010553733445703983, + "logps/rejected": -1.7135753631591797, + "loss": 1.1454, + "nll_loss": 0.2858678102493286, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010553734318818897, + "rewards/margins": 0.17125201225280762, + "rewards/rejected": -0.17135754227638245, + "step": 7133 + }, + { + "epoch": 4.933609958506224, + "grad_norm": 11.229767799377441, + "learning_rate": 2.814661134163209e-05, + "log_odds_chosen": 11.21669864654541, + "log_odds_ratio": -2.4041275537456386e-05, + "logits/chosen": -0.3968821167945862, + "logits/rejected": -0.48639771342277527, + "logps/chosen": -0.00011265697685303167, + "logps/rejected": -2.149205207824707, + "loss": 1.2412, + "nll_loss": 0.31030285358428955, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1265698049101047e-05, + "rewards/margins": 0.21490925550460815, + "rewards/rejected": -0.2149205207824707, + "step": 7134 + }, + { + "epoch": 4.934301521438451, + "grad_norm": 5.015374660491943, + "learning_rate": 2.814276932534194e-05, + "log_odds_chosen": 9.667210578918457, + "log_odds_ratio": -0.001268291613087058, + "logits/chosen": -0.48809224367141724, + "logits/rejected": -0.6303143501281738, + "logps/chosen": -0.0014353214064612985, + "logps/rejected": -1.379345417022705, + "loss": 0.7382, + "nll_loss": 0.18442240357398987, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014353214646689594, + "rewards/margins": 0.13779102265834808, + "rewards/rejected": -0.13793453574180603, + "step": 7135 + }, + { + "epoch": 4.934993084370678, + "grad_norm": 5.099924564361572, + "learning_rate": 2.8138927309051795e-05, + "log_odds_chosen": 11.092218399047852, + "log_odds_ratio": -4.954072937835008e-05, + "logits/chosen": -0.5482072234153748, + "logits/rejected": -0.5716742277145386, + "logps/chosen": -0.0003784211294259876, + "logps/rejected": -3.1070432662963867, + "loss": 0.7207, + "nll_loss": 0.18016774952411652, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7842110032215714e-05, + "rewards/margins": 0.3106665015220642, + "rewards/rejected": -0.3107043206691742, + "step": 7136 + }, + { + "epoch": 4.935684647302905, + "grad_norm": 6.366814136505127, + "learning_rate": 2.8135085292761644e-05, + "log_odds_chosen": 10.189170837402344, + "log_odds_ratio": -0.0005262996419332922, + "logits/chosen": -0.0720197930932045, + "logits/rejected": -0.11023354530334473, + "logps/chosen": -0.000608351023402065, + "logps/rejected": -2.044856548309326, + "loss": 0.9023, + "nll_loss": 0.22551508247852325, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.083510379539803e-05, + "rewards/margins": 0.20442481338977814, + "rewards/rejected": -0.20448563992977142, + "step": 7137 + }, + { + "epoch": 4.936376210235132, + "grad_norm": 11.162799835205078, + "learning_rate": 2.8131243276471493e-05, + "log_odds_chosen": 9.664872169494629, + "log_odds_ratio": -0.021280528977513313, + "logits/chosen": -0.3223462998867035, + "logits/rejected": -0.4159541428089142, + "logps/chosen": -0.006408995017409325, + "logps/rejected": -1.8025219440460205, + "loss": 1.3599, + "nll_loss": 0.3378509283065796, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000640899408608675, + "rewards/margins": 0.17961131036281586, + "rewards/rejected": -0.18025220930576324, + "step": 7138 + }, + { + "epoch": 4.9370677731673585, + "grad_norm": 8.728205680847168, + "learning_rate": 2.8127401260181346e-05, + "log_odds_chosen": 10.00544548034668, + "log_odds_ratio": -0.00018134855781681836, + "logits/chosen": -0.6819294691085815, + "logits/rejected": -0.7730222940444946, + "logps/chosen": -0.0005425587296485901, + "logps/rejected": -1.7225030660629272, + "loss": 0.7433, + "nll_loss": 0.18581163883209229, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.425587005447596e-05, + "rewards/margins": 0.17219604551792145, + "rewards/rejected": -0.17225030064582825, + "step": 7139 + }, + { + "epoch": 4.937759336099585, + "grad_norm": 17.405189514160156, + "learning_rate": 2.8123559243891195e-05, + "log_odds_chosen": 10.889158248901367, + "log_odds_ratio": -2.6139588953810744e-05, + "logits/chosen": -0.37216049432754517, + "logits/rejected": -0.45730894804000854, + "logps/chosen": -0.00019618839723989367, + "logps/rejected": -2.3127663135528564, + "loss": 1.0295, + "nll_loss": 0.2573762536048889, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.961884117918089e-05, + "rewards/margins": 0.23125702142715454, + "rewards/rejected": -0.23127663135528564, + "step": 7140 + }, + { + "epoch": 4.938450899031812, + "grad_norm": 7.012986183166504, + "learning_rate": 2.8119717227601044e-05, + "log_odds_chosen": 10.775957107543945, + "log_odds_ratio": -3.96674768126104e-05, + "logits/chosen": -0.3651018738746643, + "logits/rejected": -0.4808062016963959, + "logps/chosen": -0.00010476561146788299, + "logps/rejected": -1.7233092784881592, + "loss": 0.7375, + "nll_loss": 0.18437014520168304, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0476562238181941e-05, + "rewards/margins": 0.17232045531272888, + "rewards/rejected": -0.17233094573020935, + "step": 7141 + }, + { + "epoch": 4.939142461964039, + "grad_norm": 7.365050315856934, + "learning_rate": 2.81158752113109e-05, + "log_odds_chosen": 10.282831192016602, + "log_odds_ratio": -6.569912511622533e-05, + "logits/chosen": -0.5295911431312561, + "logits/rejected": -0.49910950660705566, + "logps/chosen": -0.00039605889469385147, + "logps/rejected": -2.092613697052002, + "loss": 0.8776, + "nll_loss": 0.21940162777900696, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.960588946938515e-05, + "rewards/margins": 0.2092217653989792, + "rewards/rejected": -0.20926138758659363, + "step": 7142 + }, + { + "epoch": 4.939834024896266, + "grad_norm": 8.847102165222168, + "learning_rate": 2.811203319502075e-05, + "log_odds_chosen": 10.336320877075195, + "log_odds_ratio": -6.936783756827936e-05, + "logits/chosen": -0.3571697771549225, + "logits/rejected": -0.36846020817756653, + "logps/chosen": -0.00019183357653673738, + "logps/rejected": -1.95271897315979, + "loss": 0.8321, + "nll_loss": 0.20801444351673126, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9183356926077977e-05, + "rewards/margins": 0.1952527016401291, + "rewards/rejected": -0.19527189433574677, + "step": 7143 + }, + { + "epoch": 4.940525587828493, + "grad_norm": 9.471915245056152, + "learning_rate": 2.8108191178730598e-05, + "log_odds_chosen": 11.665063858032227, + "log_odds_ratio": -1.2130387403885834e-05, + "logits/chosen": -0.4402073621749878, + "logits/rejected": -0.6375857591629028, + "logps/chosen": -9.851202776189893e-05, + "logps/rejected": -2.3184638023376465, + "loss": 0.9339, + "nll_loss": 0.23347574472427368, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.851202776189893e-06, + "rewards/margins": 0.23183652758598328, + "rewards/rejected": -0.2318463772535324, + "step": 7144 + }, + { + "epoch": 4.941217150760719, + "grad_norm": 10.781583786010742, + "learning_rate": 2.8104349162440454e-05, + "log_odds_chosen": 9.704204559326172, + "log_odds_ratio": -0.0012408954789862037, + "logits/chosen": -0.6811150908470154, + "logits/rejected": -0.6740538477897644, + "logps/chosen": -0.005846824496984482, + "logps/rejected": -2.4501309394836426, + "loss": 0.9965, + "nll_loss": 0.24899712204933167, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005846824496984482, + "rewards/margins": 0.24442841112613678, + "rewards/rejected": -0.24501308798789978, + "step": 7145 + }, + { + "epoch": 4.941908713692946, + "grad_norm": 7.452633857727051, + "learning_rate": 2.8100507146150303e-05, + "log_odds_chosen": 9.428400039672852, + "log_odds_ratio": -0.0004272775840945542, + "logits/chosen": -0.3518384099006653, + "logits/rejected": -0.36135247349739075, + "logps/chosen": -0.0015641606878489256, + "logps/rejected": -1.492548942565918, + "loss": 1.0784, + "nll_loss": 0.26955974102020264, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015641606296412647, + "rewards/margins": 0.1490984857082367, + "rewards/rejected": -0.1492549031972885, + "step": 7146 + }, + { + "epoch": 4.942600276625173, + "grad_norm": 10.414713859558105, + "learning_rate": 2.809666512986015e-05, + "log_odds_chosen": 11.029153823852539, + "log_odds_ratio": -0.0001740563748171553, + "logits/chosen": -0.7576574087142944, + "logits/rejected": -0.8908603191375732, + "logps/chosen": -0.00031457317527383566, + "logps/rejected": -2.333446502685547, + "loss": 1.0123, + "nll_loss": 0.25305071473121643, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.145731534459628e-05, + "rewards/margins": 0.2333131730556488, + "rewards/rejected": -0.2333446443080902, + "step": 7147 + }, + { + "epoch": 4.9432918395574, + "grad_norm": 7.546963691711426, + "learning_rate": 2.8092823113570004e-05, + "log_odds_chosen": 10.32200813293457, + "log_odds_ratio": -4.672615250456147e-05, + "logits/chosen": -1.0263385772705078, + "logits/rejected": -1.0607138872146606, + "logps/chosen": -0.00017116457456722856, + "logps/rejected": -1.5321038961410522, + "loss": 0.8777, + "nll_loss": 0.2194298803806305, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.711645927571226e-05, + "rewards/margins": 0.15319326519966125, + "rewards/rejected": -0.15321038663387299, + "step": 7148 + }, + { + "epoch": 4.943983402489627, + "grad_norm": 8.159418106079102, + "learning_rate": 2.8088981097279853e-05, + "log_odds_chosen": 9.63131046295166, + "log_odds_ratio": -0.012594000436365604, + "logits/chosen": -0.610599160194397, + "logits/rejected": -0.6731650233268738, + "logps/chosen": -0.007956145331263542, + "logps/rejected": -2.213127851486206, + "loss": 0.9725, + "nll_loss": 0.2418547123670578, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007956145564094186, + "rewards/margins": 0.22051715850830078, + "rewards/rejected": -0.2213127762079239, + "step": 7149 + }, + { + "epoch": 4.944674965421854, + "grad_norm": 10.675069808959961, + "learning_rate": 2.8085139080989702e-05, + "log_odds_chosen": 8.94044017791748, + "log_odds_ratio": -0.001029696548357606, + "logits/chosen": -0.011049099266529083, + "logits/rejected": -0.12137185037136078, + "logps/chosen": -0.03985258564352989, + "logps/rejected": -2.9449570178985596, + "loss": 1.2629, + "nll_loss": 0.3156171143054962, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003985258284956217, + "rewards/margins": 0.2905104458332062, + "rewards/rejected": -0.29449570178985596, + "step": 7150 + }, + { + "epoch": 4.94536652835408, + "grad_norm": 23.415855407714844, + "learning_rate": 2.8081297064699558e-05, + "log_odds_chosen": 9.784978866577148, + "log_odds_ratio": -0.12540185451507568, + "logits/chosen": -0.3491271436214447, + "logits/rejected": -0.4001074433326721, + "logps/chosen": -0.01915506273508072, + "logps/rejected": -2.304745674133301, + "loss": 0.9497, + "nll_loss": 0.22488565742969513, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0019155063200742006, + "rewards/margins": 0.22855907678604126, + "rewards/rejected": -0.2304745614528656, + "step": 7151 + }, + { + "epoch": 4.946058091286307, + "grad_norm": 6.221928596496582, + "learning_rate": 2.8077455048409407e-05, + "log_odds_chosen": 10.982514381408691, + "log_odds_ratio": -2.903457971115131e-05, + "logits/chosen": -0.07943695038557053, + "logits/rejected": -0.1861354261636734, + "logps/chosen": -0.0002079754340229556, + "logps/rejected": -1.972883939743042, + "loss": 0.6171, + "nll_loss": 0.15427665412425995, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0797542674699798e-05, + "rewards/margins": 0.1972675919532776, + "rewards/rejected": -0.1972883939743042, + "step": 7152 + }, + { + "epoch": 4.946749654218534, + "grad_norm": 9.135435104370117, + "learning_rate": 2.8073613032119256e-05, + "log_odds_chosen": 10.792257308959961, + "log_odds_ratio": -8.120985148707405e-05, + "logits/chosen": -0.48409971594810486, + "logits/rejected": -0.4654289186000824, + "logps/chosen": -0.0002383202954661101, + "logps/rejected": -1.8510081768035889, + "loss": 1.2696, + "nll_loss": 0.31740251183509827, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3832026272430085e-05, + "rewards/margins": 0.1850769966840744, + "rewards/rejected": -0.18510082364082336, + "step": 7153 + }, + { + "epoch": 4.947441217150761, + "grad_norm": 4.866416931152344, + "learning_rate": 2.8069771015829112e-05, + "log_odds_chosen": 9.464195251464844, + "log_odds_ratio": -0.0020351430866867304, + "logits/chosen": -0.3395107388496399, + "logits/rejected": -0.2789302468299866, + "logps/chosen": -0.0017817820189520717, + "logps/rejected": -1.871229887008667, + "loss": 1.2115, + "nll_loss": 0.3026636838912964, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017817817570175976, + "rewards/margins": 0.18694482743740082, + "rewards/rejected": -0.18712300062179565, + "step": 7154 + }, + { + "epoch": 4.948132780082988, + "grad_norm": 8.04308032989502, + "learning_rate": 2.806592899953896e-05, + "log_odds_chosen": 10.79904556274414, + "log_odds_ratio": -4.452460052561946e-05, + "logits/chosen": -0.4465823769569397, + "logits/rejected": -0.5061565637588501, + "logps/chosen": -0.00028760547866113484, + "logps/rejected": -1.9426701068878174, + "loss": 0.9098, + "nll_loss": 0.22744998335838318, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8760547138517722e-05, + "rewards/margins": 0.19423826038837433, + "rewards/rejected": -0.19426700472831726, + "step": 7155 + }, + { + "epoch": 4.948824343015215, + "grad_norm": 13.30815601348877, + "learning_rate": 2.806208698324881e-05, + "log_odds_chosen": 10.1293306350708, + "log_odds_ratio": -0.00010954002937069163, + "logits/chosen": -0.6125750541687012, + "logits/rejected": -0.6850671768188477, + "logps/chosen": -0.0003307849692646414, + "logps/rejected": -1.939996600151062, + "loss": 0.9422, + "nll_loss": 0.23554423451423645, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.307849692646414e-05, + "rewards/margins": 0.1939665675163269, + "rewards/rejected": -0.19399964809417725, + "step": 7156 + }, + { + "epoch": 4.949515905947441, + "grad_norm": 6.909346103668213, + "learning_rate": 2.8058244966958663e-05, + "log_odds_chosen": 9.113512992858887, + "log_odds_ratio": -0.0011480834800750017, + "logits/chosen": -0.3116508424282074, + "logits/rejected": -0.4106285572052002, + "logps/chosen": -0.001304534263908863, + "logps/rejected": -1.1426763534545898, + "loss": 1.0868, + "nll_loss": 0.2715791165828705, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013045342348050326, + "rewards/margins": 0.1141371801495552, + "rewards/rejected": -0.11426763981580734, + "step": 7157 + }, + { + "epoch": 4.950207468879668, + "grad_norm": 15.718669891357422, + "learning_rate": 2.805440295066851e-05, + "log_odds_chosen": 6.992504596710205, + "log_odds_ratio": -0.10088855028152466, + "logits/chosen": -0.43080681562423706, + "logits/rejected": -0.49850064516067505, + "logps/chosen": -0.01973787322640419, + "logps/rejected": -1.4150389432907104, + "loss": 1.1032, + "nll_loss": 0.2657163739204407, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001973787322640419, + "rewards/margins": 0.13953009247779846, + "rewards/rejected": -0.14150390028953552, + "step": 7158 + }, + { + "epoch": 4.950899031811895, + "grad_norm": 10.54096794128418, + "learning_rate": 2.805056093437836e-05, + "log_odds_chosen": 9.446221351623535, + "log_odds_ratio": -0.0005509358597919345, + "logits/chosen": -0.09963419288396835, + "logits/rejected": -0.15448901057243347, + "logps/chosen": -0.0333227813243866, + "logps/rejected": -1.847109317779541, + "loss": 0.9162, + "nll_loss": 0.22899924218654633, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003332278225570917, + "rewards/margins": 0.18137866258621216, + "rewards/rejected": -0.18471094965934753, + "step": 7159 + }, + { + "epoch": 4.951590594744122, + "grad_norm": 9.546717643737793, + "learning_rate": 2.8046718918088217e-05, + "log_odds_chosen": 11.02538013458252, + "log_odds_ratio": -4.7218050895025954e-05, + "logits/chosen": -0.509639322757721, + "logits/rejected": -0.5242058038711548, + "logps/chosen": -0.0010804994963109493, + "logps/rejected": -2.4925341606140137, + "loss": 0.7729, + "nll_loss": 0.1932220757007599, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010804997145896778, + "rewards/margins": 0.24914538860321045, + "rewards/rejected": -0.24925342202186584, + "step": 7160 + }, + { + "epoch": 4.952282157676349, + "grad_norm": 9.92019271850586, + "learning_rate": 2.8042876901798066e-05, + "log_odds_chosen": 11.617349624633789, + "log_odds_ratio": -1.9354396499693394e-05, + "logits/chosen": -0.3191656470298767, + "logits/rejected": -0.3231254816055298, + "logps/chosen": -0.00020129804033786058, + "logps/rejected": -2.650850296020508, + "loss": 1.3355, + "nll_loss": 0.33388522267341614, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0129802578594536e-05, + "rewards/margins": 0.26506489515304565, + "rewards/rejected": -0.26508504152297974, + "step": 7161 + }, + { + "epoch": 4.9529737206085755, + "grad_norm": 9.240811347961426, + "learning_rate": 2.8039034885507915e-05, + "log_odds_chosen": 10.988016128540039, + "log_odds_ratio": -4.003685899078846e-05, + "logits/chosen": -0.3881503939628601, + "logits/rejected": -0.32961368560791016, + "logps/chosen": -0.0013664222788065672, + "logps/rejected": -2.931913137435913, + "loss": 1.1138, + "nll_loss": 0.27844059467315674, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013664223661180586, + "rewards/margins": 0.2930546700954437, + "rewards/rejected": -0.2931913137435913, + "step": 7162 + }, + { + "epoch": 4.953665283540802, + "grad_norm": 12.443624496459961, + "learning_rate": 2.803519286921777e-05, + "log_odds_chosen": 11.71902084350586, + "log_odds_ratio": -3.129677497781813e-05, + "logits/chosen": -0.5320737361907959, + "logits/rejected": -0.5102954506874084, + "logps/chosen": -0.0008184172911569476, + "logps/rejected": -3.300198554992676, + "loss": 0.7979, + "nll_loss": 0.19947592914104462, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.184173202607781e-05, + "rewards/margins": 0.32993799448013306, + "rewards/rejected": -0.33001986145973206, + "step": 7163 + }, + { + "epoch": 4.954356846473029, + "grad_norm": 19.79650115966797, + "learning_rate": 2.803135085292762e-05, + "log_odds_chosen": 7.7862958908081055, + "log_odds_ratio": -0.358162522315979, + "logits/chosen": -0.2744470238685608, + "logits/rejected": -0.352327823638916, + "logps/chosen": -0.06442242860794067, + "logps/rejected": -1.561842679977417, + "loss": 1.1889, + "nll_loss": 0.2614128291606903, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006442242302000523, + "rewards/margins": 0.1497420370578766, + "rewards/rejected": -0.15618427097797394, + "step": 7164 + }, + { + "epoch": 4.955048409405256, + "grad_norm": 8.847530364990234, + "learning_rate": 2.802750883663747e-05, + "log_odds_chosen": 9.146276473999023, + "log_odds_ratio": -0.0027183485217392445, + "logits/chosen": -0.18752719461917877, + "logits/rejected": -0.289531409740448, + "logps/chosen": -0.005841393955051899, + "logps/rejected": -1.2292550802230835, + "loss": 0.9267, + "nll_loss": 0.23141279816627502, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005841394304297864, + "rewards/margins": 0.12234137952327728, + "rewards/rejected": -0.1229255199432373, + "step": 7165 + }, + { + "epoch": 4.955739972337483, + "grad_norm": 6.832130432128906, + "learning_rate": 2.802366682034732e-05, + "log_odds_chosen": 9.88257122039795, + "log_odds_ratio": -0.0002631518291309476, + "logits/chosen": -0.34477508068084717, + "logits/rejected": -0.43727973103523254, + "logps/chosen": -0.0010147679131478071, + "logps/rejected": -2.3121986389160156, + "loss": 0.9036, + "nll_loss": 0.22586682438850403, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010147679859073833, + "rewards/margins": 0.23111838102340698, + "rewards/rejected": -0.2312198430299759, + "step": 7166 + }, + { + "epoch": 4.95643153526971, + "grad_norm": 11.183934211730957, + "learning_rate": 2.801982480405717e-05, + "log_odds_chosen": 9.253469467163086, + "log_odds_ratio": -0.0005922535201534629, + "logits/chosen": -0.5971043705940247, + "logits/rejected": -0.584150493144989, + "logps/chosen": -0.0020415710750967264, + "logps/rejected": -1.5619051456451416, + "loss": 1.2975, + "nll_loss": 0.3243168592453003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00020415711333043873, + "rewards/margins": 0.15598636865615845, + "rewards/rejected": -0.15619052946567535, + "step": 7167 + }, + { + "epoch": 4.9571230982019365, + "grad_norm": 14.405940055847168, + "learning_rate": 2.801598278776702e-05, + "log_odds_chosen": 11.010702133178711, + "log_odds_ratio": -2.6105004508281127e-05, + "logits/chosen": -0.33093854784965515, + "logits/rejected": -0.40159279108047485, + "logps/chosen": -9.661898366175592e-05, + "logps/rejected": -1.8814085721969604, + "loss": 1.1622, + "nll_loss": 0.29055318236351013, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.661898729973473e-06, + "rewards/margins": 0.1881311982870102, + "rewards/rejected": -0.1881408542394638, + "step": 7168 + }, + { + "epoch": 4.957814661134163, + "grad_norm": 10.01386547088623, + "learning_rate": 2.8012140771476875e-05, + "log_odds_chosen": 10.201593399047852, + "log_odds_ratio": -7.670342893106863e-05, + "logits/chosen": -0.613256573677063, + "logits/rejected": -0.6443556547164917, + "logps/chosen": -0.002180723240599036, + "logps/rejected": -2.1001086235046387, + "loss": 0.8454, + "nll_loss": 0.21134643256664276, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002180723095079884, + "rewards/margins": 0.20979279279708862, + "rewards/rejected": -0.21001088619232178, + "step": 7169 + }, + { + "epoch": 4.95850622406639, + "grad_norm": 8.180740356445312, + "learning_rate": 2.8008298755186724e-05, + "log_odds_chosen": 9.803583145141602, + "log_odds_ratio": -0.00024330374435521662, + "logits/chosen": -0.5474072098731995, + "logits/rejected": -0.5633154511451721, + "logps/chosen": -0.0019682589918375015, + "logps/rejected": -2.279942750930786, + "loss": 1.1217, + "nll_loss": 0.28041279315948486, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019682593119796365, + "rewards/margins": 0.22779744863510132, + "rewards/rejected": -0.22799426317214966, + "step": 7170 + }, + { + "epoch": 4.959197786998617, + "grad_norm": 9.358325958251953, + "learning_rate": 2.8004456738896573e-05, + "log_odds_chosen": 8.370705604553223, + "log_odds_ratio": -0.009722158312797546, + "logits/chosen": -0.43345358967781067, + "logits/rejected": -0.562606930732727, + "logps/chosen": -0.006823183968663216, + "logps/rejected": -1.5810699462890625, + "loss": 0.9671, + "nll_loss": 0.24081367254257202, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006823184085078537, + "rewards/margins": 0.1574246734380722, + "rewards/rejected": -0.1581069827079773, + "step": 7171 + }, + { + "epoch": 4.959889349930844, + "grad_norm": 11.307122230529785, + "learning_rate": 2.800061472260643e-05, + "log_odds_chosen": 11.312531471252441, + "log_odds_ratio": -1.7610067516216077e-05, + "logits/chosen": -0.4398566484451294, + "logits/rejected": -0.49697765707969666, + "logps/chosen": -0.00024596997536718845, + "logps/rejected": -2.504828691482544, + "loss": 0.7498, + "nll_loss": 0.18744593858718872, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.459700044710189e-05, + "rewards/margins": 0.250458300113678, + "rewards/rejected": -0.2504828870296478, + "step": 7172 + }, + { + "epoch": 4.960580912863071, + "grad_norm": 11.591588973999023, + "learning_rate": 2.7996772706316278e-05, + "log_odds_chosen": 11.175124168395996, + "log_odds_ratio": -3.3854037610581145e-05, + "logits/chosen": -0.2384709119796753, + "logits/rejected": -0.3869898319244385, + "logps/chosen": -0.0001587655715411529, + "logps/rejected": -2.376006603240967, + "loss": 1.0801, + "nll_loss": 0.2700336277484894, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.587655788171105e-05, + "rewards/margins": 0.23758478462696075, + "rewards/rejected": -0.2376006543636322, + "step": 7173 + }, + { + "epoch": 4.9612724757952975, + "grad_norm": 5.465503215789795, + "learning_rate": 2.7992930690026127e-05, + "log_odds_chosen": 10.007530212402344, + "log_odds_ratio": -8.857608190737665e-05, + "logits/chosen": -0.4582262337207794, + "logits/rejected": -0.506310224533081, + "logps/chosen": -0.000461061776150018, + "logps/rejected": -2.222295045852661, + "loss": 0.8939, + "nll_loss": 0.22345994412899017, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.610617907019332e-05, + "rewards/margins": 0.22218340635299683, + "rewards/rejected": -0.2222295105457306, + "step": 7174 + }, + { + "epoch": 4.961964038727524, + "grad_norm": 6.3152289390563965, + "learning_rate": 2.798908867373598e-05, + "log_odds_chosen": 9.170822143554688, + "log_odds_ratio": -0.00022362380695994943, + "logits/chosen": -0.39062464237213135, + "logits/rejected": -0.4400513768196106, + "logps/chosen": -0.0003000005381181836, + "logps/rejected": -1.2168389558792114, + "loss": 1.2484, + "nll_loss": 0.31207793951034546, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0000055630807765e-05, + "rewards/margins": 0.12165389209985733, + "rewards/rejected": -0.12168390303850174, + "step": 7175 + }, + { + "epoch": 4.962655601659751, + "grad_norm": 7.906181335449219, + "learning_rate": 2.798524665744583e-05, + "log_odds_chosen": 10.254146575927734, + "log_odds_ratio": -0.00024489304632879794, + "logits/chosen": -0.5680465698242188, + "logits/rejected": -0.5180726647377014, + "logps/chosen": -0.00018432183424010873, + "logps/rejected": -1.705075979232788, + "loss": 1.3018, + "nll_loss": 0.3254315257072449, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8432185243000276e-05, + "rewards/margins": 0.17048917710781097, + "rewards/rejected": -0.17050760984420776, + "step": 7176 + }, + { + "epoch": 4.963347164591978, + "grad_norm": 7.250458240509033, + "learning_rate": 2.7981404641155678e-05, + "log_odds_chosen": 9.816474914550781, + "log_odds_ratio": -0.000194189342437312, + "logits/chosen": -0.23732808232307434, + "logits/rejected": -0.27680909633636475, + "logps/chosen": -0.00036069515044800937, + "logps/rejected": -1.7613012790679932, + "loss": 1.253, + "nll_loss": 0.31322553753852844, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.606951213441789e-05, + "rewards/margins": 0.17609405517578125, + "rewards/rejected": -0.17613013088703156, + "step": 7177 + }, + { + "epoch": 4.964038727524205, + "grad_norm": 8.668585777282715, + "learning_rate": 2.7977562624865533e-05, + "log_odds_chosen": 10.292119026184082, + "log_odds_ratio": -0.0008847196586430073, + "logits/chosen": -0.1949756145477295, + "logits/rejected": -0.23017552495002747, + "logps/chosen": -0.0005422401009127498, + "logps/rejected": -1.8730604648590088, + "loss": 0.7748, + "nll_loss": 0.19360435009002686, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.422401227406226e-05, + "rewards/margins": 0.18725183606147766, + "rewards/rejected": -0.18730604648590088, + "step": 7178 + }, + { + "epoch": 4.964730290456432, + "grad_norm": 7.899402141571045, + "learning_rate": 2.7973720608575382e-05, + "log_odds_chosen": 10.159967422485352, + "log_odds_ratio": -0.00010139452933799475, + "logits/chosen": -0.19844774901866913, + "logits/rejected": -0.2780728042125702, + "logps/chosen": -0.00024211732670664787, + "logps/rejected": -1.8791581392288208, + "loss": 0.7403, + "nll_loss": 0.1850677728652954, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4211733034462668e-05, + "rewards/margins": 0.1878916174173355, + "rewards/rejected": -0.1879158318042755, + "step": 7179 + }, + { + "epoch": 4.9654218533886585, + "grad_norm": 8.387503623962402, + "learning_rate": 2.796987859228523e-05, + "log_odds_chosen": 9.664134979248047, + "log_odds_ratio": -0.00022844914929009974, + "logits/chosen": -0.39181891083717346, + "logits/rejected": -0.3865174949169159, + "logps/chosen": -0.00030988542130216956, + "logps/rejected": -1.6006946563720703, + "loss": 0.8644, + "nll_loss": 0.21606658399105072, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.098854358540848e-05, + "rewards/margins": 0.16003848612308502, + "rewards/rejected": -0.16006948053836823, + "step": 7180 + }, + { + "epoch": 4.966113416320885, + "grad_norm": 8.570829391479492, + "learning_rate": 2.7966036575995087e-05, + "log_odds_chosen": 8.541007995605469, + "log_odds_ratio": -0.0035613575018942356, + "logits/chosen": -0.13719545304775238, + "logits/rejected": -0.14414338767528534, + "logps/chosen": -0.004820042755454779, + "logps/rejected": -1.8059543371200562, + "loss": 1.2728, + "nll_loss": 0.3178354799747467, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00048200428136624396, + "rewards/margins": 0.18011343479156494, + "rewards/rejected": -0.18059545755386353, + "step": 7181 + }, + { + "epoch": 4.966804979253112, + "grad_norm": 10.654635429382324, + "learning_rate": 2.7962194559704936e-05, + "log_odds_chosen": 10.080081939697266, + "log_odds_ratio": -0.00025363650638610125, + "logits/chosen": 0.14342381060123444, + "logits/rejected": 0.042509760707616806, + "logps/chosen": -0.0006070620147511363, + "logps/rejected": -1.8312735557556152, + "loss": 0.6985, + "nll_loss": 0.1746012568473816, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.070620656828396e-05, + "rewards/margins": 0.18306663632392883, + "rewards/rejected": -0.18312734365463257, + "step": 7182 + }, + { + "epoch": 4.967496542185339, + "grad_norm": 8.130351066589355, + "learning_rate": 2.7958352543414785e-05, + "log_odds_chosen": 10.405618667602539, + "log_odds_ratio": -5.621470700134523e-05, + "logits/chosen": -0.11633479595184326, + "logits/rejected": -0.1447855681180954, + "logps/chosen": -0.002370691392570734, + "logps/rejected": -1.6240513324737549, + "loss": 1.2587, + "nll_loss": 0.3146775960922241, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00023706913634669036, + "rewards/margins": 0.16216805577278137, + "rewards/rejected": -0.1624051332473755, + "step": 7183 + }, + { + "epoch": 4.968188105117566, + "grad_norm": 8.688759803771973, + "learning_rate": 2.7954510527124638e-05, + "log_odds_chosen": 10.065336227416992, + "log_odds_ratio": -0.00010105091496370733, + "logits/chosen": -0.4148634374141693, + "logits/rejected": -0.4110666513442993, + "logps/chosen": -0.00040934234857559204, + "logps/rejected": -1.5639256238937378, + "loss": 0.7909, + "nll_loss": 0.1977076232433319, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0934235585154966e-05, + "rewards/margins": 0.15635162591934204, + "rewards/rejected": -0.15639255940914154, + "step": 7184 + }, + { + "epoch": 4.968879668049793, + "grad_norm": 7.783679485321045, + "learning_rate": 2.7950668510834487e-05, + "log_odds_chosen": 10.572954177856445, + "log_odds_ratio": -5.103446892462671e-05, + "logits/chosen": -0.26260286569595337, + "logits/rejected": -0.28437915444374084, + "logps/chosen": -0.0011634384281933308, + "logps/rejected": -2.1006100177764893, + "loss": 0.8019, + "nll_loss": 0.20047153532505035, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011634385009529069, + "rewards/margins": 0.20994466543197632, + "rewards/rejected": -0.21006101369857788, + "step": 7185 + }, + { + "epoch": 4.9695712309820195, + "grad_norm": 9.223774909973145, + "learning_rate": 2.7946826494544336e-05, + "log_odds_chosen": 10.326700210571289, + "log_odds_ratio": -0.00012215416063554585, + "logits/chosen": -0.22556382417678833, + "logits/rejected": -0.2984452545642853, + "logps/chosen": -0.00024488597409799695, + "logps/rejected": -2.0094027519226074, + "loss": 0.6946, + "nll_loss": 0.17364706099033356, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4488595954608172e-05, + "rewards/margins": 0.20091575384140015, + "rewards/rejected": -0.20094025135040283, + "step": 7186 + }, + { + "epoch": 4.970262793914246, + "grad_norm": 11.973540306091309, + "learning_rate": 2.7942984478254192e-05, + "log_odds_chosen": 10.546344757080078, + "log_odds_ratio": -0.00012341087858658284, + "logits/chosen": -0.5267561674118042, + "logits/rejected": -0.5304883718490601, + "logps/chosen": -0.0005036446964368224, + "logps/rejected": -2.3137152194976807, + "loss": 1.2295, + "nll_loss": 0.3073546886444092, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.036446964368224e-05, + "rewards/margins": 0.23132115602493286, + "rewards/rejected": -0.23137152194976807, + "step": 7187 + }, + { + "epoch": 4.970954356846473, + "grad_norm": 11.056028366088867, + "learning_rate": 2.793914246196404e-05, + "log_odds_chosen": 10.640401840209961, + "log_odds_ratio": -3.979918619734235e-05, + "logits/chosen": -0.5876413583755493, + "logits/rejected": -0.5758023858070374, + "logps/chosen": -0.00012887921184301376, + "logps/rejected": -1.7237038612365723, + "loss": 0.9218, + "nll_loss": 0.23043809831142426, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2887921911897138e-05, + "rewards/margins": 0.1723574995994568, + "rewards/rejected": -0.17237038910388947, + "step": 7188 + }, + { + "epoch": 4.9716459197787, + "grad_norm": 5.960145950317383, + "learning_rate": 2.793530044567389e-05, + "log_odds_chosen": 11.051551818847656, + "log_odds_ratio": -2.116498217219487e-05, + "logits/chosen": -0.35925549268722534, + "logits/rejected": -0.4869048595428467, + "logps/chosen": -0.00010636688966769725, + "logps/rejected": -1.9271949529647827, + "loss": 0.6382, + "nll_loss": 0.1595507115125656, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0636688784870785e-05, + "rewards/margins": 0.1927088499069214, + "rewards/rejected": -0.1927194893360138, + "step": 7189 + }, + { + "epoch": 4.972337482710927, + "grad_norm": 8.614933013916016, + "learning_rate": 2.7931458429383746e-05, + "log_odds_chosen": 11.913887977600098, + "log_odds_ratio": -8.357697879546322e-06, + "logits/chosen": -0.6220681071281433, + "logits/rejected": -0.646041214466095, + "logps/chosen": -9.512872929917648e-05, + "logps/rejected": -2.6356735229492188, + "loss": 0.9438, + "nll_loss": 0.23593756556510925, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.512872566119768e-06, + "rewards/margins": 0.2635578513145447, + "rewards/rejected": -0.26356735825538635, + "step": 7190 + }, + { + "epoch": 4.973029045643154, + "grad_norm": 12.957245826721191, + "learning_rate": 2.7927616413093595e-05, + "log_odds_chosen": 11.058793067932129, + "log_odds_ratio": -0.00019432292901910841, + "logits/chosen": -0.27864354848861694, + "logits/rejected": -0.3478084206581116, + "logps/chosen": -0.0017332588322460651, + "logps/rejected": -2.526738405227661, + "loss": 0.5722, + "nll_loss": 0.1430254876613617, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017332589777652174, + "rewards/margins": 0.2525005340576172, + "rewards/rejected": -0.252673864364624, + "step": 7191 + }, + { + "epoch": 4.9737206085753805, + "grad_norm": 16.11777687072754, + "learning_rate": 2.7923774396803444e-05, + "log_odds_chosen": 10.152289390563965, + "log_odds_ratio": -0.00022718863328918815, + "logits/chosen": -0.40325024724006653, + "logits/rejected": -0.4796661138534546, + "logps/chosen": -0.003221320454031229, + "logps/rejected": -2.233243942260742, + "loss": 0.9004, + "nll_loss": 0.2250874638557434, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000322132051223889, + "rewards/margins": 0.22300225496292114, + "rewards/rejected": -0.22332441806793213, + "step": 7192 + }, + { + "epoch": 4.974412171507607, + "grad_norm": 9.064397811889648, + "learning_rate": 2.7919932380513296e-05, + "log_odds_chosen": 10.586275100708008, + "log_odds_ratio": -5.1827355491695926e-05, + "logits/chosen": -0.28463008999824524, + "logits/rejected": -0.487646222114563, + "logps/chosen": -0.0004171731125097722, + "logps/rejected": -2.5234835147857666, + "loss": 0.8846, + "nll_loss": 0.2211383581161499, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.171731052338146e-05, + "rewards/margins": 0.25230664014816284, + "rewards/rejected": -0.2523483335971832, + "step": 7193 + }, + { + "epoch": 4.975103734439834, + "grad_norm": 12.817768096923828, + "learning_rate": 2.7916090364223145e-05, + "log_odds_chosen": 10.754351615905762, + "log_odds_ratio": -4.719572461908683e-05, + "logits/chosen": -0.7275233864784241, + "logits/rejected": -0.7562810778617859, + "logps/chosen": -0.00023445190163329244, + "logps/rejected": -2.0250232219696045, + "loss": 0.5522, + "nll_loss": 0.13805551826953888, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3445190890925005e-05, + "rewards/margins": 0.20247888565063477, + "rewards/rejected": -0.2025023102760315, + "step": 7194 + }, + { + "epoch": 4.975795297372061, + "grad_norm": 12.647860527038574, + "learning_rate": 2.7912248347932994e-05, + "log_odds_chosen": 7.500641822814941, + "log_odds_ratio": -0.31233054399490356, + "logits/chosen": -0.5068243741989136, + "logits/rejected": -0.5850129127502441, + "logps/chosen": -0.19046106934547424, + "logps/rejected": -1.045350432395935, + "loss": 1.1691, + "nll_loss": 0.26104840636253357, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.019046107307076454, + "rewards/margins": 0.0854889377951622, + "rewards/rejected": -0.1045350506901741, + "step": 7195 + }, + { + "epoch": 4.976486860304288, + "grad_norm": 11.317513465881348, + "learning_rate": 2.790840633164285e-05, + "log_odds_chosen": 10.024337768554688, + "log_odds_ratio": -0.0001522502425359562, + "logits/chosen": -0.4047994613647461, + "logits/rejected": -0.49443766474723816, + "logps/chosen": -0.00027306549600325525, + "logps/rejected": -1.7729493379592896, + "loss": 0.6732, + "nll_loss": 0.16828899085521698, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7306548872729763e-05, + "rewards/margins": 0.1772676259279251, + "rewards/rejected": -0.17729492485523224, + "step": 7196 + }, + { + "epoch": 4.977178423236515, + "grad_norm": 12.056559562683105, + "learning_rate": 2.79045643153527e-05, + "log_odds_chosen": 9.828668594360352, + "log_odds_ratio": -0.00015435523528140038, + "logits/chosen": -0.42752793431282043, + "logits/rejected": -0.5025291442871094, + "logps/chosen": -0.0008905132999643683, + "logps/rejected": -1.8689935207366943, + "loss": 0.9808, + "nll_loss": 0.24518075585365295, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.905133290681988e-05, + "rewards/margins": 0.18681031465530396, + "rewards/rejected": -0.1868993490934372, + "step": 7197 + }, + { + "epoch": 4.977869986168741, + "grad_norm": 7.273468017578125, + "learning_rate": 2.790072229906255e-05, + "log_odds_chosen": 10.339853286743164, + "log_odds_ratio": -0.0003497989382594824, + "logits/chosen": -0.5535563230514526, + "logits/rejected": -0.6326808333396912, + "logps/chosen": -0.0003252012247685343, + "logps/rejected": -2.080918788909912, + "loss": 0.9554, + "nll_loss": 0.2388230413198471, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.252012174925767e-05, + "rewards/margins": 0.2080593854188919, + "rewards/rejected": -0.20809191465377808, + "step": 7198 + }, + { + "epoch": 4.978561549100968, + "grad_norm": 7.87250280380249, + "learning_rate": 2.7896880282772404e-05, + "log_odds_chosen": 9.142486572265625, + "log_odds_ratio": -0.0016601777169853449, + "logits/chosen": -0.298819899559021, + "logits/rejected": -0.3006221055984497, + "logps/chosen": -0.00171962333843112, + "logps/rejected": -1.6306955814361572, + "loss": 1.2686, + "nll_loss": 0.31698286533355713, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001719623542157933, + "rewards/margins": 0.16289760172367096, + "rewards/rejected": -0.16306956112384796, + "step": 7199 + }, + { + "epoch": 4.979253112033195, + "grad_norm": 7.517648220062256, + "learning_rate": 2.7893038266482253e-05, + "log_odds_chosen": 9.140575408935547, + "log_odds_ratio": -0.00042438553646206856, + "logits/chosen": -0.47512996196746826, + "logits/rejected": -0.43797892332077026, + "logps/chosen": -0.0017509328899905086, + "logps/rejected": -1.7444941997528076, + "loss": 0.7559, + "nll_loss": 0.18893952667713165, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017509330064058304, + "rewards/margins": 0.17427432537078857, + "rewards/rejected": -0.17444941401481628, + "step": 7200 + }, + { + "epoch": 4.979944674965422, + "grad_norm": 14.481325149536133, + "learning_rate": 2.7889196250192102e-05, + "log_odds_chosen": 11.609492301940918, + "log_odds_ratio": -2.4778462829999626e-05, + "logits/chosen": -0.6213642358779907, + "logits/rejected": -0.6164488196372986, + "logps/chosen": -0.00010396288416814059, + "logps/rejected": -2.2234671115875244, + "loss": 0.9045, + "nll_loss": 0.22612226009368896, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0396288416814059e-05, + "rewards/margins": 0.2223363071680069, + "rewards/rejected": -0.2223467081785202, + "step": 7201 + }, + { + "epoch": 4.980636237897649, + "grad_norm": 10.874162673950195, + "learning_rate": 2.7885354233901955e-05, + "log_odds_chosen": 9.768011093139648, + "log_odds_ratio": -0.0003575875307433307, + "logits/chosen": -0.9017777442932129, + "logits/rejected": -0.8507659435272217, + "logps/chosen": -0.0015366828301921487, + "logps/rejected": -1.9805184602737427, + "loss": 1.0513, + "nll_loss": 0.26278311014175415, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015366828301921487, + "rewards/margins": 0.19789819419384003, + "rewards/rejected": -0.1980518400669098, + "step": 7202 + }, + { + "epoch": 4.981327800829876, + "grad_norm": 13.416150093078613, + "learning_rate": 2.7881512217611804e-05, + "log_odds_chosen": 10.33548355102539, + "log_odds_ratio": -8.06988391559571e-05, + "logits/chosen": -0.5759831070899963, + "logits/rejected": -0.6761252284049988, + "logps/chosen": -0.000314599514240399, + "logps/rejected": -2.159302234649658, + "loss": 1.2363, + "nll_loss": 0.3090607225894928, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.145995287923142e-05, + "rewards/margins": 0.21589875221252441, + "rewards/rejected": -0.21593022346496582, + "step": 7203 + }, + { + "epoch": 4.982019363762102, + "grad_norm": 9.852493286132812, + "learning_rate": 2.7877670201321653e-05, + "log_odds_chosen": 11.337201118469238, + "log_odds_ratio": -2.104753366438672e-05, + "logits/chosen": -0.7978764772415161, + "logits/rejected": -0.9169949889183044, + "logps/chosen": -0.0002170755760744214, + "logps/rejected": -2.6007652282714844, + "loss": 0.8359, + "nll_loss": 0.20897330343723297, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.170755760744214e-05, + "rewards/margins": 0.2600547671318054, + "rewards/rejected": -0.26007649302482605, + "step": 7204 + }, + { + "epoch": 4.982710926694329, + "grad_norm": 6.295581340789795, + "learning_rate": 2.787382818503151e-05, + "log_odds_chosen": 10.652413368225098, + "log_odds_ratio": -4.092457311344333e-05, + "logits/chosen": -0.35074669122695923, + "logits/rejected": -0.4243549406528473, + "logps/chosen": -0.0004147972504142672, + "logps/rejected": -2.2696712017059326, + "loss": 1.1565, + "nll_loss": 0.28911954164505005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.147972504142672e-05, + "rewards/margins": 0.22692564129829407, + "rewards/rejected": -0.22696711122989655, + "step": 7205 + }, + { + "epoch": 4.983402489626556, + "grad_norm": 6.3069329261779785, + "learning_rate": 2.7869986168741358e-05, + "log_odds_chosen": 10.849081993103027, + "log_odds_ratio": -6.578413012903184e-05, + "logits/chosen": -0.30424827337265015, + "logits/rejected": -0.5037944912910461, + "logps/chosen": -0.0002677696757018566, + "logps/rejected": -2.5074574947357178, + "loss": 1.0339, + "nll_loss": 0.2584582567214966, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6776968297781423e-05, + "rewards/margins": 0.25071901082992554, + "rewards/rejected": -0.2507457733154297, + "step": 7206 + }, + { + "epoch": 4.984094052558783, + "grad_norm": 7.28257942199707, + "learning_rate": 2.7866144152451207e-05, + "log_odds_chosen": 10.359865188598633, + "log_odds_ratio": -8.066420559771359e-05, + "logits/chosen": -0.40999922156333923, + "logits/rejected": -0.4879832863807678, + "logps/chosen": -0.0004006815142929554, + "logps/rejected": -2.262136220932007, + "loss": 0.694, + "nll_loss": 0.17349769175052643, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.006815288448706e-05, + "rewards/margins": 0.22617356479167938, + "rewards/rejected": -0.22621361911296844, + "step": 7207 + }, + { + "epoch": 4.98478561549101, + "grad_norm": 15.563834190368652, + "learning_rate": 2.7862302136161056e-05, + "log_odds_chosen": 8.274723052978516, + "log_odds_ratio": -0.7375338077545166, + "logits/chosen": 0.01283140480518341, + "logits/rejected": -0.041081175208091736, + "logps/chosen": -0.1275419443845749, + "logps/rejected": -2.047375202178955, + "loss": 1.3067, + "nll_loss": 0.25292617082595825, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.012754194438457489, + "rewards/margins": 0.19198331236839294, + "rewards/rejected": -0.20473751425743103, + "step": 7208 + }, + { + "epoch": 4.985477178423237, + "grad_norm": 13.405542373657227, + "learning_rate": 2.785846011987091e-05, + "log_odds_chosen": 10.846002578735352, + "log_odds_ratio": -4.18073614127934e-05, + "logits/chosen": -0.6093755960464478, + "logits/rejected": -0.7273604273796082, + "logps/chosen": -0.00028661603573709726, + "logps/rejected": -2.459135055541992, + "loss": 0.7619, + "nll_loss": 0.1904742419719696, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8661603209911846e-05, + "rewards/margins": 0.24588486552238464, + "rewards/rejected": -0.2459135204553604, + "step": 7209 + }, + { + "epoch": 4.986168741355463, + "grad_norm": 6.826845169067383, + "learning_rate": 2.785461810358076e-05, + "log_odds_chosen": 9.772747993469238, + "log_odds_ratio": -0.0009639077470637858, + "logits/chosen": -0.49732786417007446, + "logits/rejected": -0.5413493514060974, + "logps/chosen": -0.001487374771386385, + "logps/rejected": -1.667379379272461, + "loss": 0.4762, + "nll_loss": 0.11894873529672623, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001487374829594046, + "rewards/margins": 0.16658920049667358, + "rewards/rejected": -0.16673794388771057, + "step": 7210 + }, + { + "epoch": 4.98686030428769, + "grad_norm": 6.9307122230529785, + "learning_rate": 2.785077608729061e-05, + "log_odds_chosen": 9.978912353515625, + "log_odds_ratio": -0.00010862557246582583, + "logits/chosen": -0.4285060167312622, + "logits/rejected": -0.4519844651222229, + "logps/chosen": -0.000182077128556557, + "logps/rejected": -1.3714263439178467, + "loss": 0.7845, + "nll_loss": 0.19610899686813354, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8207714674645104e-05, + "rewards/margins": 0.1371244341135025, + "rewards/rejected": -0.1371426284313202, + "step": 7211 + }, + { + "epoch": 4.987551867219917, + "grad_norm": 10.555225372314453, + "learning_rate": 2.7846934071000462e-05, + "log_odds_chosen": 10.284852027893066, + "log_odds_ratio": -0.04881151393055916, + "logits/chosen": -0.6675065159797668, + "logits/rejected": -0.7506628036499023, + "logps/chosen": -0.010504978708922863, + "logps/rejected": -2.603940010070801, + "loss": 1.0814, + "nll_loss": 0.26547157764434814, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010504978708922863, + "rewards/margins": 0.2593434751033783, + "rewards/rejected": -0.26039397716522217, + "step": 7212 + }, + { + "epoch": 4.988243430152144, + "grad_norm": 8.400644302368164, + "learning_rate": 2.784309205471031e-05, + "log_odds_chosen": 10.498527526855469, + "log_odds_ratio": -5.62083805561997e-05, + "logits/chosen": -0.379863977432251, + "logits/rejected": -0.45190173387527466, + "logps/chosen": -0.0003514225536491722, + "logps/rejected": -2.075934886932373, + "loss": 1.04, + "nll_loss": 0.25999289751052856, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5142253182129934e-05, + "rewards/margins": 0.20755833387374878, + "rewards/rejected": -0.20759347081184387, + "step": 7213 + }, + { + "epoch": 4.988934993084371, + "grad_norm": 7.191450595855713, + "learning_rate": 2.783925003842016e-05, + "log_odds_chosen": 10.816597938537598, + "log_odds_ratio": -3.74543851648923e-05, + "logits/chosen": -0.23765279352664948, + "logits/rejected": -0.39486026763916016, + "logps/chosen": -0.0001370952813886106, + "logps/rejected": -1.9729348421096802, + "loss": 0.8655, + "nll_loss": 0.2163611650466919, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3709528502658941e-05, + "rewards/margins": 0.19727978110313416, + "rewards/rejected": -0.1972934901714325, + "step": 7214 + }, + { + "epoch": 4.9896265560165975, + "grad_norm": 6.2734246253967285, + "learning_rate": 2.7835408022130016e-05, + "log_odds_chosen": 8.643949508666992, + "log_odds_ratio": -0.0012646322138607502, + "logits/chosen": -0.25888389348983765, + "logits/rejected": -0.31428784132003784, + "logps/chosen": -0.003130083903670311, + "logps/rejected": -2.032175064086914, + "loss": 0.6893, + "nll_loss": 0.17218798398971558, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00031300840782932937, + "rewards/margins": 0.20290449261665344, + "rewards/rejected": -0.2032174915075302, + "step": 7215 + }, + { + "epoch": 4.990318118948824, + "grad_norm": 6.024848461151123, + "learning_rate": 2.7831566005839865e-05, + "log_odds_chosen": 10.480557441711426, + "log_odds_ratio": -0.00015379002434201539, + "logits/chosen": -0.3707646131515503, + "logits/rejected": -0.5105197429656982, + "logps/chosen": -0.010387999936938286, + "logps/rejected": -2.8377151489257812, + "loss": 0.935, + "nll_loss": 0.2337241768836975, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010387999936938286, + "rewards/margins": 0.2827327251434326, + "rewards/rejected": -0.2837715148925781, + "step": 7216 + }, + { + "epoch": 4.991009681881051, + "grad_norm": 5.96161413192749, + "learning_rate": 2.7827723989549714e-05, + "log_odds_chosen": 8.475927352905273, + "log_odds_ratio": -0.01788167841732502, + "logits/chosen": -0.4176740050315857, + "logits/rejected": -0.5164976119995117, + "logps/chosen": -0.005372497718781233, + "logps/rejected": -1.046644687652588, + "loss": 0.6608, + "nll_loss": 0.16340121626853943, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005372497835196555, + "rewards/margins": 0.10412721335887909, + "rewards/rejected": -0.10466445982456207, + "step": 7217 + }, + { + "epoch": 4.991701244813278, + "grad_norm": 10.724852561950684, + "learning_rate": 2.782388197325957e-05, + "log_odds_chosen": 10.809329986572266, + "log_odds_ratio": -3.2888256100704893e-05, + "logits/chosen": -0.26822346448898315, + "logits/rejected": -0.33375710248947144, + "logps/chosen": -0.00012892685481347144, + "logps/rejected": -1.8372243642807007, + "loss": 0.6983, + "nll_loss": 0.17457273602485657, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2892686754639726e-05, + "rewards/margins": 0.1837095469236374, + "rewards/rejected": -0.18372243642807007, + "step": 7218 + }, + { + "epoch": 4.992392807745505, + "grad_norm": 15.549614906311035, + "learning_rate": 2.782003995696942e-05, + "log_odds_chosen": 9.113948822021484, + "log_odds_ratio": -0.008212381973862648, + "logits/chosen": -0.5484795570373535, + "logits/rejected": -0.6041221618652344, + "logps/chosen": -0.03184864670038223, + "logps/rejected": -1.857212781906128, + "loss": 0.8248, + "nll_loss": 0.20537768304347992, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0031848649960011244, + "rewards/margins": 0.18253639340400696, + "rewards/rejected": -0.1857212781906128, + "step": 7219 + }, + { + "epoch": 4.993084370677732, + "grad_norm": 9.258271217346191, + "learning_rate": 2.7816197940679268e-05, + "log_odds_chosen": 10.35129451751709, + "log_odds_ratio": -0.0004313408280722797, + "logits/chosen": -0.5563446879386902, + "logits/rejected": -0.4806095361709595, + "logps/chosen": -0.012187390588223934, + "logps/rejected": -2.4066896438598633, + "loss": 0.9017, + "nll_loss": 0.22538450360298157, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012187390821054578, + "rewards/margins": 0.23945021629333496, + "rewards/rejected": -0.24066895246505737, + "step": 7220 + }, + { + "epoch": 4.9937759336099585, + "grad_norm": 8.295549392700195, + "learning_rate": 2.781235592438912e-05, + "log_odds_chosen": 10.136941909790039, + "log_odds_ratio": -0.00016842293553054333, + "logits/chosen": -0.7361425161361694, + "logits/rejected": -0.7589133381843567, + "logps/chosen": -0.000340710103046149, + "logps/rejected": -1.756840467453003, + "loss": 0.8442, + "nll_loss": 0.21102437376976013, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.407101394259371e-05, + "rewards/margins": 0.1756500005722046, + "rewards/rejected": -0.17568406462669373, + "step": 7221 + }, + { + "epoch": 4.994467496542185, + "grad_norm": 7.772411346435547, + "learning_rate": 2.780851390809897e-05, + "log_odds_chosen": 8.843536376953125, + "log_odds_ratio": -0.002176887821406126, + "logits/chosen": -0.32384103536605835, + "logits/rejected": -0.4336056709289551, + "logps/chosen": -0.0019777941051870584, + "logps/rejected": -1.5082619190216064, + "loss": 1.6187, + "nll_loss": 0.4044612646102905, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019777943089138716, + "rewards/margins": 0.15062838792800903, + "rewards/rejected": -0.15082618594169617, + "step": 7222 + }, + { + "epoch": 4.995159059474412, + "grad_norm": 5.236146926879883, + "learning_rate": 2.780467189180882e-05, + "log_odds_chosen": 8.80754280090332, + "log_odds_ratio": -0.014589495956897736, + "logits/chosen": -0.2995303273200989, + "logits/rejected": -0.38191771507263184, + "logps/chosen": -0.00858377292752266, + "logps/rejected": -1.7309571504592896, + "loss": 1.0827, + "nll_loss": 0.26920682191848755, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008583773742429912, + "rewards/margins": 0.1722373366355896, + "rewards/rejected": -0.1730957180261612, + "step": 7223 + }, + { + "epoch": 4.995850622406639, + "grad_norm": 7.186947822570801, + "learning_rate": 2.7800829875518675e-05, + "log_odds_chosen": 7.228179454803467, + "log_odds_ratio": -0.026927338913083076, + "logits/chosen": -0.7289636135101318, + "logits/rejected": -0.668282151222229, + "logps/chosen": -0.07573521882295609, + "logps/rejected": -1.5513174533843994, + "loss": 0.9258, + "nll_loss": 0.2287617325782776, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007573522161692381, + "rewards/margins": 0.14755821228027344, + "rewards/rejected": -0.1551317423582077, + "step": 7224 + }, + { + "epoch": 4.996542185338866, + "grad_norm": 10.229729652404785, + "learning_rate": 2.7796987859228524e-05, + "log_odds_chosen": 9.654919624328613, + "log_odds_ratio": -0.0002468491729814559, + "logits/chosen": -0.253642201423645, + "logits/rejected": -0.3441639840602875, + "logps/chosen": -0.0013339454308152199, + "logps/rejected": -2.110731840133667, + "loss": 0.7524, + "nll_loss": 0.1880699098110199, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013339455472305417, + "rewards/margins": 0.21093979477882385, + "rewards/rejected": -0.21107318997383118, + "step": 7225 + }, + { + "epoch": 4.997233748271093, + "grad_norm": 7.260266304016113, + "learning_rate": 2.7793145842938373e-05, + "log_odds_chosen": 9.03848934173584, + "log_odds_ratio": -0.0012857945403084159, + "logits/chosen": -0.6567116975784302, + "logits/rejected": -0.632990837097168, + "logps/chosen": -0.0010413994314149022, + "logps/rejected": -1.5133137702941895, + "loss": 0.9957, + "nll_loss": 0.24879108369350433, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010413994459668174, + "rewards/margins": 0.15122725069522858, + "rewards/rejected": -0.15133139491081238, + "step": 7226 + }, + { + "epoch": 4.9979253112033195, + "grad_norm": 5.915309429168701, + "learning_rate": 2.778930382664823e-05, + "log_odds_chosen": 10.508499145507812, + "log_odds_ratio": -4.196947702439502e-05, + "logits/chosen": -0.5167663097381592, + "logits/rejected": -0.6766731142997742, + "logps/chosen": -0.002997783711180091, + "logps/rejected": -3.167526960372925, + "loss": 0.9541, + "nll_loss": 0.23851193487644196, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002997783594764769, + "rewards/margins": 0.31645292043685913, + "rewards/rejected": -0.31675270199775696, + "step": 7227 + }, + { + "epoch": 4.998616874135546, + "grad_norm": 9.131293296813965, + "learning_rate": 2.7785461810358078e-05, + "log_odds_chosen": 9.421924591064453, + "log_odds_ratio": -0.00113877619151026, + "logits/chosen": -0.8295114040374756, + "logits/rejected": -0.8581159710884094, + "logps/chosen": -0.0010786966886371374, + "logps/rejected": -1.5355415344238281, + "loss": 1.6171, + "nll_loss": 0.404154896736145, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001078696659533307, + "rewards/margins": 0.1534462869167328, + "rewards/rejected": -0.15355415642261505, + "step": 7228 + }, + { + "epoch": 4.999308437067773, + "grad_norm": 9.949240684509277, + "learning_rate": 2.7781619794067927e-05, + "log_odds_chosen": 9.65066146850586, + "log_odds_ratio": -0.0008199802250601351, + "logits/chosen": -0.8208500742912292, + "logits/rejected": -0.8325585126876831, + "logps/chosen": -0.004914685618132353, + "logps/rejected": -2.468127489089966, + "loss": 0.8896, + "nll_loss": 0.2223082333803177, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004914685850962996, + "rewards/margins": 0.24632127583026886, + "rewards/rejected": -0.24681273102760315, + "step": 7229 + }, + { + "epoch": 5.0, + "grad_norm": 7.1928815841674805, + "learning_rate": 2.777777777777778e-05, + "log_odds_chosen": 10.277364730834961, + "log_odds_ratio": -0.0025406470522284508, + "logits/chosen": -0.13449889421463013, + "logits/rejected": -0.21222570538520813, + "logps/chosen": -0.0019070475827902555, + "logps/rejected": -2.5370426177978516, + "loss": 1.0403, + "nll_loss": 0.25981298089027405, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019070478447247297, + "rewards/margins": 0.2535135746002197, + "rewards/rejected": -0.2537042796611786, + "step": 7230 + }, + { + "epoch": 5.000691562932227, + "grad_norm": 7.312842845916748, + "learning_rate": 2.7773935761487628e-05, + "log_odds_chosen": 11.099580764770508, + "log_odds_ratio": -5.0948812713613734e-05, + "logits/chosen": -0.7584883570671082, + "logits/rejected": -0.7560362815856934, + "logps/chosen": -0.0005695630679838359, + "logps/rejected": -2.5925183296203613, + "loss": 0.9966, + "nll_loss": 0.24914821982383728, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.695630898117088e-05, + "rewards/margins": 0.25919491052627563, + "rewards/rejected": -0.25925183296203613, + "step": 7231 + }, + { + "epoch": 5.001383125864454, + "grad_norm": 6.061283111572266, + "learning_rate": 2.7770093745197477e-05, + "log_odds_chosen": 11.408888816833496, + "log_odds_ratio": -4.97270593768917e-05, + "logits/chosen": -0.45496147871017456, + "logits/rejected": -0.5137842893600464, + "logps/chosen": -0.00019665747822728008, + "logps/rejected": -2.7457075119018555, + "loss": 0.6718, + "nll_loss": 0.1679365336894989, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.966574927791953e-05, + "rewards/margins": 0.2745510935783386, + "rewards/rejected": -0.2745707631111145, + "step": 7232 + }, + { + "epoch": 5.0020746887966805, + "grad_norm": 5.719961166381836, + "learning_rate": 2.7766251728907333e-05, + "log_odds_chosen": 10.571435928344727, + "log_odds_ratio": -8.010861347429454e-05, + "logits/chosen": -0.6030963659286499, + "logits/rejected": -0.5233713388442993, + "logps/chosen": -0.0022244458086788654, + "logps/rejected": -1.9987239837646484, + "loss": 0.6556, + "nll_loss": 0.16389495134353638, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002224445779575035, + "rewards/margins": 0.19964995980262756, + "rewards/rejected": -0.19987240433692932, + "step": 7233 + }, + { + "epoch": 5.002766251728907, + "grad_norm": 12.926962852478027, + "learning_rate": 2.7762409712617182e-05, + "log_odds_chosen": 10.400911331176758, + "log_odds_ratio": -0.0005559362471103668, + "logits/chosen": -0.11768453568220139, + "logits/rejected": -0.13520941138267517, + "logps/chosen": -0.0016663175774738193, + "logps/rejected": -1.829831838607788, + "loss": 0.9972, + "nll_loss": 0.24924415349960327, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016663174028508365, + "rewards/margins": 0.1828165352344513, + "rewards/rejected": -0.1829831600189209, + "step": 7234 + }, + { + "epoch": 5.003457814661134, + "grad_norm": 5.983175754547119, + "learning_rate": 2.775856769632703e-05, + "log_odds_chosen": 10.03680419921875, + "log_odds_ratio": -0.0002549725177232176, + "logits/chosen": -0.49738869071006775, + "logits/rejected": -0.5541632771492004, + "logps/chosen": -0.0003511386748868972, + "logps/rejected": -1.8507411479949951, + "loss": 0.4813, + "nll_loss": 0.12028904259204865, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.511387330945581e-05, + "rewards/margins": 0.1850389987230301, + "rewards/rejected": -0.185074120759964, + "step": 7235 + }, + { + "epoch": 5.004149377593361, + "grad_norm": 6.617901802062988, + "learning_rate": 2.7754725680036887e-05, + "log_odds_chosen": 10.194670677185059, + "log_odds_ratio": -5.910087929805741e-05, + "logits/chosen": -0.5459590554237366, + "logits/rejected": -0.5887160897254944, + "logps/chosen": -0.0002461395342834294, + "logps/rejected": -1.8155051469802856, + "loss": 0.5684, + "nll_loss": 0.14209213852882385, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4613957066321746e-05, + "rewards/margins": 0.18152591586112976, + "rewards/rejected": -0.1815505027770996, + "step": 7236 + }, + { + "epoch": 5.004840940525588, + "grad_norm": 6.73017692565918, + "learning_rate": 2.7750883663746736e-05, + "log_odds_chosen": 9.60072135925293, + "log_odds_ratio": -0.0004344746412243694, + "logits/chosen": -0.5849573612213135, + "logits/rejected": -0.6097676753997803, + "logps/chosen": -0.0005806325352750719, + "logps/rejected": -1.6669625043869019, + "loss": 0.7656, + "nll_loss": 0.1913515031337738, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8063258620677516e-05, + "rewards/margins": 0.16663819551467896, + "rewards/rejected": -0.16669625043869019, + "step": 7237 + }, + { + "epoch": 5.005532503457815, + "grad_norm": 11.552806854248047, + "learning_rate": 2.7747041647456585e-05, + "log_odds_chosen": 11.124504089355469, + "log_odds_ratio": -6.782137643313035e-05, + "logits/chosen": -0.3950381577014923, + "logits/rejected": -0.577907383441925, + "logps/chosen": -0.00024894665693864226, + "logps/rejected": -2.787130117416382, + "loss": 0.6576, + "nll_loss": 0.16438151895999908, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.489466714905575e-05, + "rewards/margins": 0.27868813276290894, + "rewards/rejected": -0.2787129878997803, + "step": 7238 + }, + { + "epoch": 5.0062240663900415, + "grad_norm": 8.356595039367676, + "learning_rate": 2.7743199631166438e-05, + "log_odds_chosen": 10.580275535583496, + "log_odds_ratio": -4.7172503400361165e-05, + "logits/chosen": -0.29390430450439453, + "logits/rejected": -0.38561365008354187, + "logps/chosen": -0.0001928009296534583, + "logps/rejected": -2.058584213256836, + "loss": 0.4335, + "nll_loss": 0.10836201161146164, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9280094420537353e-05, + "rewards/margins": 0.2058391571044922, + "rewards/rejected": -0.20585842430591583, + "step": 7239 + }, + { + "epoch": 5.006915629322268, + "grad_norm": 4.8350982666015625, + "learning_rate": 2.7739357614876287e-05, + "log_odds_chosen": 9.761199951171875, + "log_odds_ratio": -0.01201231125742197, + "logits/chosen": -0.42824968695640564, + "logits/rejected": -0.5191749930381775, + "logps/chosen": -0.009147546254098415, + "logps/rejected": -2.1493453979492188, + "loss": 0.6846, + "nll_loss": 0.1699400395154953, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009147546952590346, + "rewards/margins": 0.2140198051929474, + "rewards/rejected": -0.2149345576763153, + "step": 7240 + }, + { + "epoch": 5.007607192254495, + "grad_norm": 12.261672973632812, + "learning_rate": 2.7735515598586136e-05, + "log_odds_chosen": 11.049886703491211, + "log_odds_ratio": -0.00011858274228870869, + "logits/chosen": -0.6273783445358276, + "logits/rejected": -0.633759617805481, + "logps/chosen": -0.00024194586148951203, + "logps/rejected": -2.5788331031799316, + "loss": 0.688, + "nll_loss": 0.17198488116264343, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.419458542135544e-05, + "rewards/margins": 0.25785911083221436, + "rewards/rejected": -0.25788331031799316, + "step": 7241 + }, + { + "epoch": 5.008298755186722, + "grad_norm": 16.330045700073242, + "learning_rate": 2.773167358229599e-05, + "log_odds_chosen": 11.262862205505371, + "log_odds_ratio": -7.536452176282182e-05, + "logits/chosen": -0.5741608142852783, + "logits/rejected": -0.6448097229003906, + "logps/chosen": -0.00036441374686546624, + "logps/rejected": -2.5372867584228516, + "loss": 0.7904, + "nll_loss": 0.19759222865104675, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.644137905212119e-05, + "rewards/margins": 0.25369223952293396, + "rewards/rejected": -0.2537286877632141, + "step": 7242 + }, + { + "epoch": 5.008990318118949, + "grad_norm": 7.2465386390686035, + "learning_rate": 2.772783156600584e-05, + "log_odds_chosen": 9.80888843536377, + "log_odds_ratio": -0.0011844683904200792, + "logits/chosen": -0.689521074295044, + "logits/rejected": -0.6528528332710266, + "logps/chosen": -0.001049173646606505, + "logps/rejected": -1.6593786478042603, + "loss": 0.6494, + "nll_loss": 0.16222906112670898, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010491736611584201, + "rewards/margins": 0.16583296656608582, + "rewards/rejected": -0.1659378856420517, + "step": 7243 + }, + { + "epoch": 5.009681881051176, + "grad_norm": 10.320951461791992, + "learning_rate": 2.772398954971569e-05, + "log_odds_chosen": 10.070535659790039, + "log_odds_ratio": -0.00011470584286144003, + "logits/chosen": -0.40506500005722046, + "logits/rejected": -0.4523780643939972, + "logps/chosen": -0.0030764671973884106, + "logps/rejected": -2.1235923767089844, + "loss": 0.8448, + "nll_loss": 0.2111879140138626, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003076466964557767, + "rewards/margins": 0.2120516151189804, + "rewards/rejected": -0.2123592644929886, + "step": 7244 + }, + { + "epoch": 5.0103734439834025, + "grad_norm": 14.852372169494629, + "learning_rate": 2.7720147533425545e-05, + "log_odds_chosen": 10.029454231262207, + "log_odds_ratio": -0.00010282750736223534, + "logits/chosen": -0.50398850440979, + "logits/rejected": -0.6273823380470276, + "logps/chosen": -0.0004358980804681778, + "logps/rejected": -2.150851011276245, + "loss": 0.8415, + "nll_loss": 0.21036122739315033, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.358980731922202e-05, + "rewards/margins": 0.21504151821136475, + "rewards/rejected": -0.21508511900901794, + "step": 7245 + }, + { + "epoch": 5.011065006915629, + "grad_norm": 11.475605010986328, + "learning_rate": 2.7716305517135394e-05, + "log_odds_chosen": 10.059589385986328, + "log_odds_ratio": -0.00017879570077639073, + "logits/chosen": -0.5300588607788086, + "logits/rejected": -0.6242033243179321, + "logps/chosen": -0.0006024139001965523, + "logps/rejected": -1.965653419494629, + "loss": 0.6115, + "nll_loss": 0.15284880995750427, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0241389292059466e-05, + "rewards/margins": 0.1965051293373108, + "rewards/rejected": -0.19656535983085632, + "step": 7246 + }, + { + "epoch": 5.011756569847856, + "grad_norm": 4.2483062744140625, + "learning_rate": 2.7712463500845244e-05, + "log_odds_chosen": 10.718931198120117, + "log_odds_ratio": -5.807676279800944e-05, + "logits/chosen": -0.6688945889472961, + "logits/rejected": -0.6880615949630737, + "logps/chosen": -0.00013566450797952712, + "logps/rejected": -1.7550441026687622, + "loss": 0.3919, + "nll_loss": 0.09796933084726334, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3566450434154831e-05, + "rewards/margins": 0.1754908561706543, + "rewards/rejected": -0.1755044162273407, + "step": 7247 + }, + { + "epoch": 5.012448132780083, + "grad_norm": 7.51168966293335, + "learning_rate": 2.7708621484555096e-05, + "log_odds_chosen": 9.49942684173584, + "log_odds_ratio": -0.0002443839912302792, + "logits/chosen": -0.49505460262298584, + "logits/rejected": -0.5230669975280762, + "logps/chosen": -0.0012824471341446042, + "logps/rejected": -1.38538658618927, + "loss": 1.1477, + "nll_loss": 0.28689271211624146, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001282447192352265, + "rewards/margins": 0.13841041922569275, + "rewards/rejected": -0.1385386735200882, + "step": 7248 + }, + { + "epoch": 5.01313969571231, + "grad_norm": 6.2242889404296875, + "learning_rate": 2.7704779468264945e-05, + "log_odds_chosen": 9.833250999450684, + "log_odds_ratio": -0.00011980785347986966, + "logits/chosen": -0.4105455279350281, + "logits/rejected": -0.45921966433525085, + "logps/chosen": -0.00029663456371054053, + "logps/rejected": -1.7124733924865723, + "loss": 0.5858, + "nll_loss": 0.14644891023635864, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.966345527966041e-05, + "rewards/margins": 0.171217679977417, + "rewards/rejected": -0.17124733328819275, + "step": 7249 + }, + { + "epoch": 5.013831258644537, + "grad_norm": 8.974833488464355, + "learning_rate": 2.7700937451974794e-05, + "log_odds_chosen": 9.523042678833008, + "log_odds_ratio": -0.0032822166103869677, + "logits/chosen": -0.5493214130401611, + "logits/rejected": -0.5524061918258667, + "logps/chosen": -0.009554987773299217, + "logps/rejected": -2.449441432952881, + "loss": 0.964, + "nll_loss": 0.24066194891929626, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009554987191222608, + "rewards/margins": 0.24398863315582275, + "rewards/rejected": -0.24494412541389465, + "step": 7250 + }, + { + "epoch": 5.014522821576763, + "grad_norm": 11.83836555480957, + "learning_rate": 2.769709543568465e-05, + "log_odds_chosen": 10.720413208007812, + "log_odds_ratio": -0.00014413942699320614, + "logits/chosen": -0.815521240234375, + "logits/rejected": -0.9365243911743164, + "logps/chosen": -0.0003172925498802215, + "logps/rejected": -2.3871865272521973, + "loss": 0.8637, + "nll_loss": 0.21590059995651245, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1729257898405194e-05, + "rewards/margins": 0.2386869490146637, + "rewards/rejected": -0.2387186884880066, + "step": 7251 + }, + { + "epoch": 5.01521438450899, + "grad_norm": 6.946929931640625, + "learning_rate": 2.76932534193945e-05, + "log_odds_chosen": 9.54383659362793, + "log_odds_ratio": -0.00016248160682152957, + "logits/chosen": -0.4637078642845154, + "logits/rejected": -0.5163708329200745, + "logps/chosen": -0.0003959203895647079, + "logps/rejected": -1.657344937324524, + "loss": 0.7034, + "nll_loss": 0.17582513391971588, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.959203968406655e-05, + "rewards/margins": 0.16569490730762482, + "rewards/rejected": -0.16573449969291687, + "step": 7252 + }, + { + "epoch": 5.015905947441217, + "grad_norm": 6.577551364898682, + "learning_rate": 2.7689411403104348e-05, + "log_odds_chosen": 9.708788871765137, + "log_odds_ratio": -0.0003885244659613818, + "logits/chosen": -0.1782318502664566, + "logits/rejected": -0.23638510704040527, + "logps/chosen": -0.0003596053284127265, + "logps/rejected": -1.6300208568572998, + "loss": 0.9062, + "nll_loss": 0.2265045940876007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.596053284127265e-05, + "rewards/margins": 0.16296613216400146, + "rewards/rejected": -0.16300208866596222, + "step": 7253 + }, + { + "epoch": 5.016597510373444, + "grad_norm": 7.003281593322754, + "learning_rate": 2.7685569386814204e-05, + "log_odds_chosen": 10.26560115814209, + "log_odds_ratio": -7.491197175113484e-05, + "logits/chosen": -0.275127112865448, + "logits/rejected": -0.3356919288635254, + "logps/chosen": -0.000538189138751477, + "logps/rejected": -2.156519889831543, + "loss": 0.7139, + "nll_loss": 0.1784665733575821, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3818916057934985e-05, + "rewards/margins": 0.21559816598892212, + "rewards/rejected": -0.2156520038843155, + "step": 7254 + }, + { + "epoch": 5.017289073305671, + "grad_norm": 8.421640396118164, + "learning_rate": 2.7681727370524053e-05, + "log_odds_chosen": 11.054540634155273, + "log_odds_ratio": -1.7887143258121796e-05, + "logits/chosen": 0.0565011166036129, + "logits/rejected": -0.08696407079696655, + "logps/chosen": -0.00017183017916977406, + "logps/rejected": -2.2747251987457275, + "loss": 0.7991, + "nll_loss": 0.19977036118507385, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7183017916977406e-05, + "rewards/margins": 0.22745534777641296, + "rewards/rejected": -0.22747252881526947, + "step": 7255 + }, + { + "epoch": 5.017980636237898, + "grad_norm": 9.712133407592773, + "learning_rate": 2.7677885354233902e-05, + "log_odds_chosen": 10.734992027282715, + "log_odds_ratio": -0.0001417091116309166, + "logits/chosen": -0.5213885307312012, + "logits/rejected": -0.534921407699585, + "logps/chosen": -0.0018052636878564954, + "logps/rejected": -2.5130221843719482, + "loss": 1.0096, + "nll_loss": 0.25238972902297974, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018052638915833086, + "rewards/margins": 0.2511216998100281, + "rewards/rejected": -0.25130224227905273, + "step": 7256 + }, + { + "epoch": 5.018672199170124, + "grad_norm": 6.777373790740967, + "learning_rate": 2.7674043337943754e-05, + "log_odds_chosen": 9.396719932556152, + "log_odds_ratio": -0.00021104556799400598, + "logits/chosen": -0.6140926480293274, + "logits/rejected": -0.7252911925315857, + "logps/chosen": -0.0006496300920844078, + "logps/rejected": -1.5737005472183228, + "loss": 0.9829, + "nll_loss": 0.24570497870445251, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.496300920844078e-05, + "rewards/margins": 0.15730509161949158, + "rewards/rejected": -0.15737006068229675, + "step": 7257 + }, + { + "epoch": 5.019363762102351, + "grad_norm": 5.959966659545898, + "learning_rate": 2.7670201321653603e-05, + "log_odds_chosen": 9.696066856384277, + "log_odds_ratio": -0.00020695854618679732, + "logits/chosen": -0.46064212918281555, + "logits/rejected": -0.5616683959960938, + "logps/chosen": -0.0001567387516843155, + "logps/rejected": -1.2842741012573242, + "loss": 0.7888, + "nll_loss": 0.19716843962669373, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.567387516843155e-05, + "rewards/margins": 0.12841174006462097, + "rewards/rejected": -0.1284274160861969, + "step": 7258 + }, + { + "epoch": 5.020055325034578, + "grad_norm": 10.227088928222656, + "learning_rate": 2.7666359305363453e-05, + "log_odds_chosen": 10.79662036895752, + "log_odds_ratio": -0.0001424798829248175, + "logits/chosen": -0.46717405319213867, + "logits/rejected": -0.5267354846000671, + "logps/chosen": -0.00027820401010103524, + "logps/rejected": -1.9651286602020264, + "loss": 0.8254, + "nll_loss": 0.20633377134799957, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7820402465295047e-05, + "rewards/margins": 0.19648504257202148, + "rewards/rejected": -0.1965128481388092, + "step": 7259 + }, + { + "epoch": 5.020746887966805, + "grad_norm": 8.242298126220703, + "learning_rate": 2.766251728907331e-05, + "log_odds_chosen": 10.206131935119629, + "log_odds_ratio": -0.002142932265996933, + "logits/chosen": -0.7705875039100647, + "logits/rejected": -0.8422242999076843, + "logps/chosen": -0.0015574386343359947, + "logps/rejected": -2.130014657974243, + "loss": 0.8617, + "nll_loss": 0.21520650386810303, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015574386634398252, + "rewards/margins": 0.21284572780132294, + "rewards/rejected": -0.21300145983695984, + "step": 7260 + }, + { + "epoch": 5.021438450899032, + "grad_norm": 9.300799369812012, + "learning_rate": 2.7658675272783157e-05, + "log_odds_chosen": 10.081828117370605, + "log_odds_ratio": -9.091423271456733e-05, + "logits/chosen": -0.43533748388290405, + "logits/rejected": -0.4441641867160797, + "logps/chosen": -0.000783787458203733, + "logps/rejected": -2.2557663917541504, + "loss": 0.6918, + "nll_loss": 0.1729460507631302, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.837874727556482e-05, + "rewards/margins": 0.2254982590675354, + "rewards/rejected": -0.22557662427425385, + "step": 7261 + }, + { + "epoch": 5.022130013831259, + "grad_norm": 7.336118221282959, + "learning_rate": 2.7654833256493006e-05, + "log_odds_chosen": 10.239450454711914, + "log_odds_ratio": -0.0003725987917277962, + "logits/chosen": -0.6500371694564819, + "logits/rejected": -0.6669209003448486, + "logps/chosen": -0.0006331615149974823, + "logps/rejected": -1.8425328731536865, + "loss": 0.7544, + "nll_loss": 0.1885695457458496, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.331615441013128e-05, + "rewards/margins": 0.18418999016284943, + "rewards/rejected": -0.18425330519676208, + "step": 7262 + }, + { + "epoch": 5.022821576763485, + "grad_norm": 9.546539306640625, + "learning_rate": 2.7650991240202862e-05, + "log_odds_chosen": 10.925753593444824, + "log_odds_ratio": -4.614323552232236e-05, + "logits/chosen": -0.5992140769958496, + "logits/rejected": -0.65520840883255, + "logps/chosen": -0.0001765150809660554, + "logps/rejected": -2.0011026859283447, + "loss": 1.016, + "nll_loss": 0.2540021240711212, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7651509551797062e-05, + "rewards/margins": 0.200092613697052, + "rewards/rejected": -0.2001102864742279, + "step": 7263 + }, + { + "epoch": 5.023513139695712, + "grad_norm": 6.58445930480957, + "learning_rate": 2.764714922391271e-05, + "log_odds_chosen": 10.151383399963379, + "log_odds_ratio": -6.918309372849762e-05, + "logits/chosen": -0.6456372737884521, + "logits/rejected": -0.7023369073867798, + "logps/chosen": -0.0014546513557434082, + "logps/rejected": -1.9670419692993164, + "loss": 1.2457, + "nll_loss": 0.3114243149757385, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014546513557434082, + "rewards/margins": 0.19655874371528625, + "rewards/rejected": -0.1967042088508606, + "step": 7264 + }, + { + "epoch": 5.024204702627939, + "grad_norm": 5.597912311553955, + "learning_rate": 2.764330720762256e-05, + "log_odds_chosen": 10.879961013793945, + "log_odds_ratio": -5.5353310017380863e-05, + "logits/chosen": -0.20097941160202026, + "logits/rejected": -0.3843633830547333, + "logps/chosen": -0.0003638151101768017, + "logps/rejected": -2.6702191829681396, + "loss": 0.5997, + "nll_loss": 0.14991742372512817, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.638151247287169e-05, + "rewards/margins": 0.26698553562164307, + "rewards/rejected": -0.26702192425727844, + "step": 7265 + }, + { + "epoch": 5.024896265560166, + "grad_norm": 6.466996669769287, + "learning_rate": 2.7639465191332413e-05, + "log_odds_chosen": 9.323081016540527, + "log_odds_ratio": -0.0008883294649422169, + "logits/chosen": -0.4426640570163727, + "logits/rejected": -0.47450733184814453, + "logps/chosen": -0.0012046220945194364, + "logps/rejected": -1.328416109085083, + "loss": 0.7256, + "nll_loss": 0.18131092190742493, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012046222400385886, + "rewards/margins": 0.1327211558818817, + "rewards/rejected": -0.13284161686897278, + "step": 7266 + }, + { + "epoch": 5.025587828492393, + "grad_norm": 7.6741228103637695, + "learning_rate": 2.7635623175042262e-05, + "log_odds_chosen": 10.802885055541992, + "log_odds_ratio": -0.00011306915985187516, + "logits/chosen": -0.40469425916671753, + "logits/rejected": -0.47581946849823, + "logps/chosen": -0.00025673132040537894, + "logps/rejected": -2.3214166164398193, + "loss": 0.5633, + "nll_loss": 0.1408044546842575, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5673132768133655e-05, + "rewards/margins": 0.23211601376533508, + "rewards/rejected": -0.2321416735649109, + "step": 7267 + }, + { + "epoch": 5.0262793914246195, + "grad_norm": 6.7040815353393555, + "learning_rate": 2.763178115875211e-05, + "log_odds_chosen": 10.767393112182617, + "log_odds_ratio": -3.190072311554104e-05, + "logits/chosen": -0.6101112365722656, + "logits/rejected": -0.6725625395774841, + "logps/chosen": -0.0001772954419720918, + "logps/rejected": -2.14335036277771, + "loss": 0.7389, + "nll_loss": 0.18472109735012054, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.772954419720918e-05, + "rewards/margins": 0.21431732177734375, + "rewards/rejected": -0.21433503925800323, + "step": 7268 + }, + { + "epoch": 5.026970954356846, + "grad_norm": 11.36666488647461, + "learning_rate": 2.7627939142461967e-05, + "log_odds_chosen": 9.944623947143555, + "log_odds_ratio": -0.004825376905500889, + "logits/chosen": -0.14808431267738342, + "logits/rejected": -0.07526087760925293, + "logps/chosen": -0.019000336527824402, + "logps/rejected": -2.203162670135498, + "loss": 1.2546, + "nll_loss": 0.31317025423049927, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019000339088961482, + "rewards/margins": 0.218416228890419, + "rewards/rejected": -0.22031627595424652, + "step": 7269 + }, + { + "epoch": 5.027662517289073, + "grad_norm": 8.129460334777832, + "learning_rate": 2.7624097126171816e-05, + "log_odds_chosen": 10.38253116607666, + "log_odds_ratio": -0.0021352546755224466, + "logits/chosen": -0.7980105876922607, + "logits/rejected": -0.8242998719215393, + "logps/chosen": -0.0009977391455322504, + "logps/rejected": -2.466899871826172, + "loss": 0.8465, + "nll_loss": 0.21141372621059418, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.977391891879961e-05, + "rewards/margins": 0.24659022688865662, + "rewards/rejected": -0.24668997526168823, + "step": 7270 + }, + { + "epoch": 5.0283540802213, + "grad_norm": 6.1578898429870605, + "learning_rate": 2.7620255109881665e-05, + "log_odds_chosen": 10.121634483337402, + "log_odds_ratio": -0.0015102961333468556, + "logits/chosen": -0.8386377096176147, + "logits/rejected": -0.8806687593460083, + "logps/chosen": -0.0004191715852357447, + "logps/rejected": -2.1044983863830566, + "loss": 0.9582, + "nll_loss": 0.23941028118133545, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.191716288914904e-05, + "rewards/margins": 0.21040791273117065, + "rewards/rejected": -0.21044982969760895, + "step": 7271 + }, + { + "epoch": 5.029045643153527, + "grad_norm": 8.820459365844727, + "learning_rate": 2.761641309359152e-05, + "log_odds_chosen": 10.972347259521484, + "log_odds_ratio": -2.266202136524953e-05, + "logits/chosen": -0.33121025562286377, + "logits/rejected": -0.3720765709877014, + "logps/chosen": -0.00029626936884596944, + "logps/rejected": -2.2669551372528076, + "loss": 0.6732, + "nll_loss": 0.16830147802829742, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.962693542940542e-05, + "rewards/margins": 0.2266658991575241, + "rewards/rejected": -0.22669553756713867, + "step": 7272 + }, + { + "epoch": 5.029737206085754, + "grad_norm": 11.16493034362793, + "learning_rate": 2.761257107730137e-05, + "log_odds_chosen": 11.278556823730469, + "log_odds_ratio": -2.037870217463933e-05, + "logits/chosen": -0.4210745096206665, + "logits/rejected": -0.4552077054977417, + "logps/chosen": -0.00014141926658339798, + "logps/rejected": -2.2942113876342773, + "loss": 0.9183, + "nll_loss": 0.229561448097229, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4141928659228142e-05, + "rewards/margins": 0.22940698266029358, + "rewards/rejected": -0.22942113876342773, + "step": 7273 + }, + { + "epoch": 5.0304287690179805, + "grad_norm": 8.953804016113281, + "learning_rate": 2.760872906101122e-05, + "log_odds_chosen": 9.440807342529297, + "log_odds_ratio": -0.00022025183716323227, + "logits/chosen": -0.49695202708244324, + "logits/rejected": -0.5508847236633301, + "logps/chosen": -0.00028177339117974043, + "logps/rejected": -1.4282622337341309, + "loss": 0.8057, + "nll_loss": 0.20140813291072845, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8177339117974043e-05, + "rewards/margins": 0.14279805123806, + "rewards/rejected": -0.14282622933387756, + "step": 7274 + }, + { + "epoch": 5.031120331950207, + "grad_norm": 13.167290687561035, + "learning_rate": 2.760488704472107e-05, + "log_odds_chosen": 10.361078262329102, + "log_odds_ratio": -0.0012096577556803823, + "logits/chosen": -0.6894783973693848, + "logits/rejected": -0.7884131073951721, + "logps/chosen": -0.0008816584595479071, + "logps/rejected": -1.886361837387085, + "loss": 1.0739, + "nll_loss": 0.2683638632297516, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.816583431325853e-05, + "rewards/margins": 0.1885480284690857, + "rewards/rejected": -0.1886361837387085, + "step": 7275 + }, + { + "epoch": 5.031811894882434, + "grad_norm": 5.316054344177246, + "learning_rate": 2.760104502843092e-05, + "log_odds_chosen": 9.974425315856934, + "log_odds_ratio": -0.00026799910119734704, + "logits/chosen": -0.34158962965011597, + "logits/rejected": -0.3852129578590393, + "logps/chosen": -0.000451413361588493, + "logps/rejected": -1.9002914428710938, + "loss": 0.8266, + "nll_loss": 0.20661142468452454, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.51413361588493e-05, + "rewards/margins": 0.1899840086698532, + "rewards/rejected": -0.19002914428710938, + "step": 7276 + }, + { + "epoch": 5.032503457814661, + "grad_norm": 7.2751569747924805, + "learning_rate": 2.759720301214077e-05, + "log_odds_chosen": 10.29867172241211, + "log_odds_ratio": -5.476947262650356e-05, + "logits/chosen": -0.22670063376426697, + "logits/rejected": -0.23984383046627045, + "logps/chosen": -0.0004981955280527472, + "logps/rejected": -1.9702985286712646, + "loss": 0.6271, + "nll_loss": 0.15675778687000275, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.98195513500832e-05, + "rewards/margins": 0.1969800591468811, + "rewards/rejected": -0.19702985882759094, + "step": 7277 + }, + { + "epoch": 5.033195020746888, + "grad_norm": 8.760295867919922, + "learning_rate": 2.7593360995850625e-05, + "log_odds_chosen": 10.028202056884766, + "log_odds_ratio": -0.00024880870478227735, + "logits/chosen": -0.4461353123188019, + "logits/rejected": -0.5395488739013672, + "logps/chosen": -0.000239362838328816, + "logps/rejected": -1.6638593673706055, + "loss": 0.8082, + "nll_loss": 0.2020374834537506, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.393628346908372e-05, + "rewards/margins": 0.166362002491951, + "rewards/rejected": -0.1663859486579895, + "step": 7278 + }, + { + "epoch": 5.033886583679115, + "grad_norm": 8.667499542236328, + "learning_rate": 2.7589518979560474e-05, + "log_odds_chosen": 10.32560920715332, + "log_odds_ratio": -9.192282595904544e-05, + "logits/chosen": -0.7508265376091003, + "logits/rejected": -0.8265612125396729, + "logps/chosen": -0.00017297209706157446, + "logps/rejected": -1.863836407661438, + "loss": 0.7363, + "nll_loss": 0.18405523896217346, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7297210433753207e-05, + "rewards/margins": 0.18636634945869446, + "rewards/rejected": -0.18638364970684052, + "step": 7279 + }, + { + "epoch": 5.0345781466113415, + "grad_norm": 8.381052017211914, + "learning_rate": 2.7585676963270323e-05, + "log_odds_chosen": 10.612117767333984, + "log_odds_ratio": -9.46400105021894e-05, + "logits/chosen": -0.5738332867622375, + "logits/rejected": -0.6243719458580017, + "logps/chosen": -0.006455769296735525, + "logps/rejected": -2.590888500213623, + "loss": 0.6972, + "nll_loss": 0.1742965281009674, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006455769180320203, + "rewards/margins": 0.25844329595565796, + "rewards/rejected": -0.2590888738632202, + "step": 7280 + }, + { + "epoch": 5.035269709543568, + "grad_norm": 5.367280960083008, + "learning_rate": 2.758183494698018e-05, + "log_odds_chosen": 11.281238555908203, + "log_odds_ratio": -1.8807790183927864e-05, + "logits/chosen": -0.22567470371723175, + "logits/rejected": -0.32583460211753845, + "logps/chosen": -0.0001243324513779953, + "logps/rejected": -2.2370567321777344, + "loss": 0.4467, + "nll_loss": 0.11168432235717773, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2433246411092114e-05, + "rewards/margins": 0.22369323670864105, + "rewards/rejected": -0.22370567917823792, + "step": 7281 + }, + { + "epoch": 5.035961272475795, + "grad_norm": 8.441225051879883, + "learning_rate": 2.7577992930690028e-05, + "log_odds_chosen": 9.585426330566406, + "log_odds_ratio": -0.0005702337948605418, + "logits/chosen": -0.26939302682876587, + "logits/rejected": -0.31572502851486206, + "logps/chosen": -0.0015275696059688926, + "logps/rejected": -1.8812816143035889, + "loss": 0.8862, + "nll_loss": 0.2214984893798828, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001527569693280384, + "rewards/margins": 0.18797540664672852, + "rewards/rejected": -0.18812817335128784, + "step": 7282 + }, + { + "epoch": 5.036652835408022, + "grad_norm": 7.499341011047363, + "learning_rate": 2.7574150914399877e-05, + "log_odds_chosen": 9.687578201293945, + "log_odds_ratio": -0.0005626450874842703, + "logits/chosen": -0.4453797936439514, + "logits/rejected": -0.4718892574310303, + "logps/chosen": -0.0005303797079250216, + "logps/rejected": -1.8079785108566284, + "loss": 0.9008, + "nll_loss": 0.22513696551322937, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3037976613268256e-05, + "rewards/margins": 0.18074482679367065, + "rewards/rejected": -0.18079787492752075, + "step": 7283 + }, + { + "epoch": 5.037344398340249, + "grad_norm": 7.906566143035889, + "learning_rate": 2.757030889810973e-05, + "log_odds_chosen": 11.452741622924805, + "log_odds_ratio": -2.4343857148778625e-05, + "logits/chosen": -0.21651677787303925, + "logits/rejected": -0.2888755798339844, + "logps/chosen": -0.00034281317493878305, + "logps/rejected": -3.1185402870178223, + "loss": 1.0789, + "nll_loss": 0.269711434841156, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4281318221474066e-05, + "rewards/margins": 0.31181973218917847, + "rewards/rejected": -0.3118540346622467, + "step": 7284 + }, + { + "epoch": 5.038035961272476, + "grad_norm": 7.8299384117126465, + "learning_rate": 2.756646688181958e-05, + "log_odds_chosen": 9.673845291137695, + "log_odds_ratio": -0.00026930117746815085, + "logits/chosen": -0.5036507844924927, + "logits/rejected": -0.6252288818359375, + "logps/chosen": -0.0005948683246970177, + "logps/rejected": -1.937495231628418, + "loss": 0.8598, + "nll_loss": 0.2149186134338379, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.948682883172296e-05, + "rewards/margins": 0.19369004666805267, + "rewards/rejected": -0.1937495321035385, + "step": 7285 + }, + { + "epoch": 5.0387275242047025, + "grad_norm": 10.01733684539795, + "learning_rate": 2.7562624865529428e-05, + "log_odds_chosen": 10.357145309448242, + "log_odds_ratio": -0.0006056310376152396, + "logits/chosen": -0.21898457407951355, + "logits/rejected": -0.2727906405925751, + "logps/chosen": -0.0008674904238432646, + "logps/rejected": -2.3992435932159424, + "loss": 0.846, + "nll_loss": 0.21144171059131622, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.67490452947095e-05, + "rewards/margins": 0.23983760178089142, + "rewards/rejected": -0.2399243712425232, + "step": 7286 + }, + { + "epoch": 5.039419087136929, + "grad_norm": 7.567254066467285, + "learning_rate": 2.7558782849239284e-05, + "log_odds_chosen": 9.905765533447266, + "log_odds_ratio": -0.0006043082103133202, + "logits/chosen": -0.45671600103378296, + "logits/rejected": -0.5718494653701782, + "logps/chosen": -0.0013060432393103838, + "logps/rejected": -1.7504911422729492, + "loss": 0.6407, + "nll_loss": 0.16010862588882446, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013060432684142143, + "rewards/margins": 0.1749185174703598, + "rewards/rejected": -0.17504912614822388, + "step": 7287 + }, + { + "epoch": 5.040110650069156, + "grad_norm": 9.728728294372559, + "learning_rate": 2.7554940832949133e-05, + "log_odds_chosen": 9.93949031829834, + "log_odds_ratio": -0.00012081762542948127, + "logits/chosen": -0.3270997405052185, + "logits/rejected": -0.40328624844551086, + "logps/chosen": -0.00031918910099193454, + "logps/rejected": -1.9016844034194946, + "loss": 0.6612, + "nll_loss": 0.16527943313121796, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1918905733618885e-05, + "rewards/margins": 0.19013653695583344, + "rewards/rejected": -0.19016844034194946, + "step": 7288 + }, + { + "epoch": 5.040802213001383, + "grad_norm": 6.479235649108887, + "learning_rate": 2.7551098816658982e-05, + "log_odds_chosen": 10.745867729187012, + "log_odds_ratio": -0.0001523199025541544, + "logits/chosen": -0.10512614250183105, + "logits/rejected": -0.025525301694869995, + "logps/chosen": -0.000199339963728562, + "logps/rejected": -2.0925867557525635, + "loss": 1.1551, + "nll_loss": 0.28876954317092896, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.993399564526044e-05, + "rewards/margins": 0.20923873782157898, + "rewards/rejected": -0.20925866067409515, + "step": 7289 + }, + { + "epoch": 5.04149377593361, + "grad_norm": 11.356147766113281, + "learning_rate": 2.7547256800368838e-05, + "log_odds_chosen": 10.036778450012207, + "log_odds_ratio": -8.606391202192754e-05, + "logits/chosen": -0.4650914669036865, + "logits/rejected": -0.6013932228088379, + "logps/chosen": -0.001222763443365693, + "logps/rejected": -1.8660969734191895, + "loss": 1.1791, + "nll_loss": 0.29475611448287964, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012227633851580322, + "rewards/margins": 0.18648743629455566, + "rewards/rejected": -0.18660971522331238, + "step": 7290 + }, + { + "epoch": 5.042185338865837, + "grad_norm": 5.68522834777832, + "learning_rate": 2.7543414784078687e-05, + "log_odds_chosen": 9.280818939208984, + "log_odds_ratio": -0.001574191846884787, + "logits/chosen": -0.30058932304382324, + "logits/rejected": -0.3923916220664978, + "logps/chosen": -0.003772944677621126, + "logps/rejected": -2.097243547439575, + "loss": 0.7078, + "nll_loss": 0.17678092420101166, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000377294491045177, + "rewards/margins": 0.20934706926345825, + "rewards/rejected": -0.20972435176372528, + "step": 7291 + }, + { + "epoch": 5.0428769017980635, + "grad_norm": 24.780282974243164, + "learning_rate": 2.7539572767788536e-05, + "log_odds_chosen": 7.424704551696777, + "log_odds_ratio": -0.23528823256492615, + "logits/chosen": -0.8567458391189575, + "logits/rejected": -0.8633086085319519, + "logps/chosen": -0.035740286111831665, + "logps/rejected": -1.280559778213501, + "loss": 0.8606, + "nll_loss": 0.19162975251674652, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00357402884401381, + "rewards/margins": 0.12448194622993469, + "rewards/rejected": -0.12805597484111786, + "step": 7292 + }, + { + "epoch": 5.04356846473029, + "grad_norm": 7.827977657318115, + "learning_rate": 2.7535730751498388e-05, + "log_odds_chosen": 10.188173294067383, + "log_odds_ratio": -0.00014951504999771714, + "logits/chosen": -0.6818506121635437, + "logits/rejected": -0.7837034463882446, + "logps/chosen": -0.0002506999298930168, + "logps/rejected": -1.8527156114578247, + "loss": 0.9025, + "nll_loss": 0.22561517357826233, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.506999226170592e-05, + "rewards/margins": 0.1852465122938156, + "rewards/rejected": -0.18527157604694366, + "step": 7293 + }, + { + "epoch": 5.044260027662517, + "grad_norm": 7.4346842765808105, + "learning_rate": 2.7531888735208237e-05, + "log_odds_chosen": 9.61988639831543, + "log_odds_ratio": -0.015739865601062775, + "logits/chosen": -0.6515390872955322, + "logits/rejected": -0.7199310064315796, + "logps/chosen": -0.0055860369466245174, + "logps/rejected": -1.8236140012741089, + "loss": 0.6803, + "nll_loss": 0.168507918715477, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005586037295870483, + "rewards/margins": 0.18180277943611145, + "rewards/rejected": -0.1823614090681076, + "step": 7294 + }, + { + "epoch": 5.044951590594744, + "grad_norm": 15.84222412109375, + "learning_rate": 2.7528046718918086e-05, + "log_odds_chosen": 11.02806282043457, + "log_odds_ratio": -2.5386010747752152e-05, + "logits/chosen": -0.5460261702537537, + "logits/rejected": -0.6166951060295105, + "logps/chosen": -0.000791882339399308, + "logps/rejected": -2.6014323234558105, + "loss": 1.221, + "nll_loss": 0.3052525818347931, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.91882339399308e-05, + "rewards/margins": 0.260064035654068, + "rewards/rejected": -0.2601432204246521, + "step": 7295 + }, + { + "epoch": 5.045643153526971, + "grad_norm": 7.974617958068848, + "learning_rate": 2.7524204702627942e-05, + "log_odds_chosen": 11.188053131103516, + "log_odds_ratio": -5.614932888420299e-05, + "logits/chosen": -0.27668648958206177, + "logits/rejected": -0.37699154019355774, + "logps/chosen": -0.00037966459058225155, + "logps/rejected": -2.7652714252471924, + "loss": 1.0089, + "nll_loss": 0.25221163034439087, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.796645614784211e-05, + "rewards/margins": 0.27648916840553284, + "rewards/rejected": -0.27652713656425476, + "step": 7296 + }, + { + "epoch": 5.046334716459198, + "grad_norm": 19.900930404663086, + "learning_rate": 2.752036268633779e-05, + "log_odds_chosen": 11.485966682434082, + "log_odds_ratio": -2.118528209393844e-05, + "logits/chosen": -0.5126395225524902, + "logits/rejected": -0.601915717124939, + "logps/chosen": -0.0002069872571155429, + "logps/rejected": -2.5941524505615234, + "loss": 0.9578, + "nll_loss": 0.23944054543972015, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0698724256362766e-05, + "rewards/margins": 0.2593945264816284, + "rewards/rejected": -0.25941523909568787, + "step": 7297 + }, + { + "epoch": 5.0470262793914245, + "grad_norm": 7.200878620147705, + "learning_rate": 2.751652067004764e-05, + "log_odds_chosen": 10.318074226379395, + "log_odds_ratio": -0.0001252438232768327, + "logits/chosen": -0.71943199634552, + "logits/rejected": -0.792367160320282, + "logps/chosen": -0.0020204363390803337, + "logps/rejected": -2.4491629600524902, + "loss": 0.6531, + "nll_loss": 0.16327083110809326, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002020436222665012, + "rewards/margins": 0.24471423029899597, + "rewards/rejected": -0.24491627514362335, + "step": 7298 + }, + { + "epoch": 5.047717842323651, + "grad_norm": 7.3060221672058105, + "learning_rate": 2.7512678653757496e-05, + "log_odds_chosen": 9.833172798156738, + "log_odds_ratio": -0.00016102934023365378, + "logits/chosen": -0.6357143521308899, + "logits/rejected": -0.6339840888977051, + "logps/chosen": -0.0005614220863208175, + "logps/rejected": -2.1665122509002686, + "loss": 0.8519, + "nll_loss": 0.21295638382434845, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.61422057216987e-05, + "rewards/margins": 0.2165950983762741, + "rewards/rejected": -0.21665123105049133, + "step": 7299 + }, + { + "epoch": 5.048409405255878, + "grad_norm": 9.484922409057617, + "learning_rate": 2.7508836637467345e-05, + "log_odds_chosen": 9.575933456420898, + "log_odds_ratio": -0.0013263956643640995, + "logits/chosen": -0.818328320980072, + "logits/rejected": -0.7978004813194275, + "logps/chosen": -0.0018352242186665535, + "logps/rejected": -2.237055778503418, + "loss": 0.7705, + "nll_loss": 0.1924881637096405, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001835224247770384, + "rewards/margins": 0.22352203726768494, + "rewards/rejected": -0.22370555996894836, + "step": 7300 + }, + { + "epoch": 5.049100968188105, + "grad_norm": 5.502103328704834, + "learning_rate": 2.7504994621177194e-05, + "log_odds_chosen": 11.25271987915039, + "log_odds_ratio": -2.2424899725592695e-05, + "logits/chosen": -0.7001416683197021, + "logits/rejected": -0.7362242341041565, + "logps/chosen": -0.00014827025006525218, + "logps/rejected": -2.2779436111450195, + "loss": 1.0947, + "nll_loss": 0.27368372678756714, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4827023733232636e-05, + "rewards/margins": 0.22777950763702393, + "rewards/rejected": -0.227794349193573, + "step": 7301 + }, + { + "epoch": 5.049792531120332, + "grad_norm": 8.534310340881348, + "learning_rate": 2.7501152604887047e-05, + "log_odds_chosen": 10.193739891052246, + "log_odds_ratio": -7.256300887092948e-05, + "logits/chosen": -0.5930569767951965, + "logits/rejected": -0.5903542637825012, + "logps/chosen": -0.004373773001134396, + "logps/rejected": -1.9866235256195068, + "loss": 0.7531, + "nll_loss": 0.18827757239341736, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00043737731175497174, + "rewards/margins": 0.1982249766588211, + "rewards/rejected": -0.19866235554218292, + "step": 7302 + }, + { + "epoch": 5.050484094052559, + "grad_norm": 10.663469314575195, + "learning_rate": 2.7497310588596896e-05, + "log_odds_chosen": 11.076746940612793, + "log_odds_ratio": -3.118633685517125e-05, + "logits/chosen": -0.9300976991653442, + "logits/rejected": -0.972251296043396, + "logps/chosen": -0.00029384903609752655, + "logps/rejected": -2.5527446269989014, + "loss": 0.7093, + "nll_loss": 0.1773204654455185, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9384904337348416e-05, + "rewards/margins": 0.2552450895309448, + "rewards/rejected": -0.2552744746208191, + "step": 7303 + }, + { + "epoch": 5.051175656984785, + "grad_norm": 10.101683616638184, + "learning_rate": 2.7493468572306748e-05, + "log_odds_chosen": 9.031973838806152, + "log_odds_ratio": -0.00035060991649515927, + "logits/chosen": -0.14871618151664734, + "logits/rejected": -0.2351362407207489, + "logps/chosen": -0.000733660242985934, + "logps/rejected": -1.4281890392303467, + "loss": 0.7831, + "nll_loss": 0.19575095176696777, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.336602720897645e-05, + "rewards/margins": 0.14274555444717407, + "rewards/rejected": -0.14281892776489258, + "step": 7304 + }, + { + "epoch": 5.051867219917012, + "grad_norm": 7.80236291885376, + "learning_rate": 2.74896265560166e-05, + "log_odds_chosen": 10.683979034423828, + "log_odds_ratio": -4.006546805612743e-05, + "logits/chosen": -0.5397990345954895, + "logits/rejected": -0.6449406743049622, + "logps/chosen": -0.0005074103828519583, + "logps/rejected": -2.488528251647949, + "loss": 0.9742, + "nll_loss": 0.24354231357574463, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.07410331920255e-05, + "rewards/margins": 0.2488020658493042, + "rewards/rejected": -0.24885281920433044, + "step": 7305 + }, + { + "epoch": 5.052558782849239, + "grad_norm": 15.392332077026367, + "learning_rate": 2.748578453972645e-05, + "log_odds_chosen": 9.010686874389648, + "log_odds_ratio": -0.0005412200698629022, + "logits/chosen": -0.5475982427597046, + "logits/rejected": -0.5727043747901917, + "logps/chosen": -0.0009371995693072677, + "logps/rejected": -1.937116265296936, + "loss": 0.8893, + "nll_loss": 0.2222771793603897, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.371995110996068e-05, + "rewards/margins": 0.19361791014671326, + "rewards/rejected": -0.19371163845062256, + "step": 7306 + }, + { + "epoch": 5.053250345781466, + "grad_norm": 4.404837131500244, + "learning_rate": 2.74819425234363e-05, + "log_odds_chosen": 10.56973648071289, + "log_odds_ratio": -0.00021840019326191396, + "logits/chosen": -0.4324110448360443, + "logits/rejected": -0.4252541661262512, + "logps/chosen": -0.0003612586879171431, + "logps/rejected": -2.1879324913024902, + "loss": 0.7681, + "nll_loss": 0.19199644029140472, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6125871702097356e-05, + "rewards/margins": 0.21875713765621185, + "rewards/rejected": -0.21879325807094574, + "step": 7307 + }, + { + "epoch": 5.053941908713693, + "grad_norm": 5.678922176361084, + "learning_rate": 2.7478100507146154e-05, + "log_odds_chosen": 9.939447402954102, + "log_odds_ratio": -0.0010102377273142338, + "logits/chosen": -0.25071343779563904, + "logits/rejected": -0.2884729504585266, + "logps/chosen": -0.001003112643957138, + "logps/rejected": -1.7373353242874146, + "loss": 0.8569, + "nll_loss": 0.2141135334968567, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001003112702164799, + "rewards/margins": 0.17363321781158447, + "rewards/rejected": -0.17373353242874146, + "step": 7308 + }, + { + "epoch": 5.05463347164592, + "grad_norm": 7.243196487426758, + "learning_rate": 2.7474258490856004e-05, + "log_odds_chosen": 10.330777168273926, + "log_odds_ratio": -0.00013779370055999607, + "logits/chosen": -0.38157716393470764, + "logits/rejected": -0.43057459592819214, + "logps/chosen": -0.0005845414707437158, + "logps/rejected": -2.229933977127075, + "loss": 0.7357, + "nll_loss": 0.18391013145446777, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.845414489158429e-05, + "rewards/margins": 0.22293496131896973, + "rewards/rejected": -0.222993403673172, + "step": 7309 + }, + { + "epoch": 5.055325034578146, + "grad_norm": 10.608085632324219, + "learning_rate": 2.7470416474565853e-05, + "log_odds_chosen": 10.949653625488281, + "log_odds_ratio": -0.00029080972308292985, + "logits/chosen": -0.2496720403432846, + "logits/rejected": -0.3969630002975464, + "logps/chosen": -0.0003734467609319836, + "logps/rejected": -2.2889750003814697, + "loss": 0.9614, + "nll_loss": 0.2403145283460617, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7344674638006836e-05, + "rewards/margins": 0.22886013984680176, + "rewards/rejected": -0.22889748215675354, + "step": 7310 + }, + { + "epoch": 5.056016597510373, + "grad_norm": 6.699407577514648, + "learning_rate": 2.746657445827571e-05, + "log_odds_chosen": 8.956399917602539, + "log_odds_ratio": -0.001024306402541697, + "logits/chosen": -0.3196716904640198, + "logits/rejected": -0.3115221858024597, + "logps/chosen": -0.0013646759325638413, + "logps/rejected": -1.663041591644287, + "loss": 0.7016, + "nll_loss": 0.17530988156795502, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013646761362906545, + "rewards/margins": 0.16616767644882202, + "rewards/rejected": -0.16630415618419647, + "step": 7311 + }, + { + "epoch": 5.0567081604426, + "grad_norm": 7.288957118988037, + "learning_rate": 2.7462732441985557e-05, + "log_odds_chosen": 10.09207534790039, + "log_odds_ratio": -0.003282478777691722, + "logits/chosen": -0.40368348360061646, + "logits/rejected": -0.5203957557678223, + "logps/chosen": -0.0042105019092559814, + "logps/rejected": -1.6452527046203613, + "loss": 0.7136, + "nll_loss": 0.17807680368423462, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00042105026659555733, + "rewards/margins": 0.16410420835018158, + "rewards/rejected": -0.16452525556087494, + "step": 7312 + }, + { + "epoch": 5.057399723374827, + "grad_norm": 6.980625629425049, + "learning_rate": 2.7458890425695407e-05, + "log_odds_chosen": 9.844117164611816, + "log_odds_ratio": -0.00035047222627326846, + "logits/chosen": -0.41656243801116943, + "logits/rejected": -0.37486982345581055, + "logps/chosen": -0.00019175885245203972, + "logps/rejected": -1.1824655532836914, + "loss": 0.8217, + "nll_loss": 0.2053995132446289, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.917588451760821e-05, + "rewards/margins": 0.11822737753391266, + "rewards/rejected": -0.11824654787778854, + "step": 7313 + }, + { + "epoch": 5.058091286307054, + "grad_norm": 13.701227188110352, + "learning_rate": 2.745504840940526e-05, + "log_odds_chosen": 11.325311660766602, + "log_odds_ratio": -0.00020471982134040445, + "logits/chosen": -0.414910227060318, + "logits/rejected": -0.5247098803520203, + "logps/chosen": -0.0010233953362330794, + "logps/rejected": -2.807422399520874, + "loss": 0.7551, + "nll_loss": 0.1887424886226654, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001023395307129249, + "rewards/margins": 0.2806398868560791, + "rewards/rejected": -0.28074222803115845, + "step": 7314 + }, + { + "epoch": 5.058782849239281, + "grad_norm": 10.800251007080078, + "learning_rate": 2.7451206393115108e-05, + "log_odds_chosen": 12.077896118164062, + "log_odds_ratio": -9.185761882690713e-06, + "logits/chosen": -0.5681400299072266, + "logits/rejected": -0.5691365003585815, + "logps/chosen": -0.0001330829836660996, + "logps/rejected": -2.8732047080993652, + "loss": 1.2474, + "nll_loss": 0.31185391545295715, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3308298548508901e-05, + "rewards/margins": 0.28730714321136475, + "rewards/rejected": -0.28732046484947205, + "step": 7315 + }, + { + "epoch": 5.059474412171507, + "grad_norm": 14.8779935836792, + "learning_rate": 2.7447364376824957e-05, + "log_odds_chosen": 10.254196166992188, + "log_odds_ratio": -7.926978287287056e-05, + "logits/chosen": -0.1750495433807373, + "logits/rejected": -0.273913711309433, + "logps/chosen": -0.0005362760275602341, + "logps/rejected": -2.002532482147217, + "loss": 0.845, + "nll_loss": 0.21123018860816956, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.362760566640645e-05, + "rewards/margins": 0.20019961893558502, + "rewards/rejected": -0.20025324821472168, + "step": 7316 + }, + { + "epoch": 5.060165975103734, + "grad_norm": 6.954501152038574, + "learning_rate": 2.7443522360534813e-05, + "log_odds_chosen": 8.149320602416992, + "log_odds_ratio": -0.000663488288410008, + "logits/chosen": -0.4530641436576843, + "logits/rejected": -0.469480037689209, + "logps/chosen": -0.0008364081149920821, + "logps/rejected": -1.3134746551513672, + "loss": 1.0815, + "nll_loss": 0.2703148126602173, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.364080713363364e-05, + "rewards/margins": 0.1312638372182846, + "rewards/rejected": -0.13134747743606567, + "step": 7317 + }, + { + "epoch": 5.060857538035961, + "grad_norm": 5.969472885131836, + "learning_rate": 2.7439680344244662e-05, + "log_odds_chosen": 10.451945304870605, + "log_odds_ratio": -0.0001245027524419129, + "logits/chosen": -0.5547382831573486, + "logits/rejected": -0.5758868455886841, + "logps/chosen": -0.005893957335501909, + "logps/rejected": -2.719212770462036, + "loss": 0.8003, + "nll_loss": 0.20006687939167023, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005893956986255944, + "rewards/margins": 0.27133187651634216, + "rewards/rejected": -0.2719212770462036, + "step": 7318 + }, + { + "epoch": 5.061549100968188, + "grad_norm": 13.521698951721191, + "learning_rate": 2.743583832795451e-05, + "log_odds_chosen": 9.812695503234863, + "log_odds_ratio": -0.00014395485050044954, + "logits/chosen": -0.48720940947532654, + "logits/rejected": -0.6409310102462769, + "logps/chosen": -0.0002551696088630706, + "logps/rejected": -1.7317194938659668, + "loss": 0.6948, + "nll_loss": 0.17368364334106445, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.551695979491342e-05, + "rewards/margins": 0.17314641177654266, + "rewards/rejected": -0.17317193746566772, + "step": 7319 + }, + { + "epoch": 5.062240663900415, + "grad_norm": 9.973909378051758, + "learning_rate": 2.7431996311664367e-05, + "log_odds_chosen": 9.408581733703613, + "log_odds_ratio": -0.0003007837221957743, + "logits/chosen": -0.9688934087753296, + "logits/rejected": -0.948418378829956, + "logps/chosen": -0.001488229027017951, + "logps/rejected": -1.7405766248703003, + "loss": 1.0078, + "nll_loss": 0.2519216537475586, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001488229027017951, + "rewards/margins": 0.17390884459018707, + "rewards/rejected": -0.17405766248703003, + "step": 7320 + }, + { + "epoch": 5.0629322268326415, + "grad_norm": 16.34572410583496, + "learning_rate": 2.7428154295374216e-05, + "log_odds_chosen": 10.63037109375, + "log_odds_ratio": -0.0007433656137436628, + "logits/chosen": -0.5187327861785889, + "logits/rejected": -0.5670936107635498, + "logps/chosen": -0.0008166545303538442, + "logps/rejected": -2.5092692375183105, + "loss": 1.3585, + "nll_loss": 0.33956095576286316, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.166545012500137e-05, + "rewards/margins": 0.2508452534675598, + "rewards/rejected": -0.2509269118309021, + "step": 7321 + }, + { + "epoch": 5.063623789764868, + "grad_norm": 7.8338212966918945, + "learning_rate": 2.7424312279084065e-05, + "log_odds_chosen": 10.2802734375, + "log_odds_ratio": -6.797789683332667e-05, + "logits/chosen": -0.6781834959983826, + "logits/rejected": -0.8167017102241516, + "logps/chosen": -0.0003164065128657967, + "logps/rejected": -2.061220645904541, + "loss": 0.7272, + "nll_loss": 0.18178972601890564, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.164065128657967e-05, + "rewards/margins": 0.20609039068222046, + "rewards/rejected": -0.206122025847435, + "step": 7322 + }, + { + "epoch": 5.064315352697095, + "grad_norm": 10.21096134185791, + "learning_rate": 2.7420470262793917e-05, + "log_odds_chosen": 10.725156784057617, + "log_odds_ratio": -5.2025687182322145e-05, + "logits/chosen": -0.781559944152832, + "logits/rejected": -0.8966841697692871, + "logps/chosen": -0.00017686965293250978, + "logps/rejected": -1.9730557203292847, + "loss": 0.5743, + "nll_loss": 0.14357338845729828, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7686963474261574e-05, + "rewards/margins": 0.19728787243366241, + "rewards/rejected": -0.1973055601119995, + "step": 7323 + }, + { + "epoch": 5.065006915629322, + "grad_norm": 10.335928916931152, + "learning_rate": 2.7416628246503766e-05, + "log_odds_chosen": 10.741065979003906, + "log_odds_ratio": -0.000119962845928967, + "logits/chosen": -0.5748330354690552, + "logits/rejected": -0.6753748655319214, + "logps/chosen": -0.0003316613147035241, + "logps/rejected": -2.3247146606445312, + "loss": 1.0588, + "nll_loss": 0.26468318700790405, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.316613219794817e-05, + "rewards/margins": 0.23243829607963562, + "rewards/rejected": -0.23247148096561432, + "step": 7324 + }, + { + "epoch": 5.065698478561549, + "grad_norm": 5.846102714538574, + "learning_rate": 2.7412786230213616e-05, + "log_odds_chosen": 11.197221755981445, + "log_odds_ratio": -5.849684021086432e-05, + "logits/chosen": -0.5642470717430115, + "logits/rejected": -0.5235009789466858, + "logps/chosen": -0.0014971166383475065, + "logps/rejected": -2.859696388244629, + "loss": 1.5394, + "nll_loss": 0.38484257459640503, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001497116609243676, + "rewards/margins": 0.2858199179172516, + "rewards/rejected": -0.28596964478492737, + "step": 7325 + }, + { + "epoch": 5.066390041493776, + "grad_norm": 6.826424598693848, + "learning_rate": 2.740894421392347e-05, + "log_odds_chosen": 10.296684265136719, + "log_odds_ratio": -0.0001223309664055705, + "logits/chosen": -0.4076666235923767, + "logits/rejected": -0.46820512413978577, + "logps/chosen": -0.0005055164219811559, + "logps/rejected": -2.137256622314453, + "loss": 0.6483, + "nll_loss": 0.16206586360931396, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.055164729128592e-05, + "rewards/margins": 0.2136751115322113, + "rewards/rejected": -0.21372565627098083, + "step": 7326 + }, + { + "epoch": 5.0670816044260025, + "grad_norm": 7.7066450119018555, + "learning_rate": 2.740510219763332e-05, + "log_odds_chosen": 11.011165618896484, + "log_odds_ratio": -0.001046941615641117, + "logits/chosen": -0.7362217903137207, + "logits/rejected": -0.830572247505188, + "logps/chosen": -0.004729542415589094, + "logps/rejected": -3.1200973987579346, + "loss": 0.6127, + "nll_loss": 0.1530696600675583, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00047295421245507896, + "rewards/margins": 0.3115368187427521, + "rewards/rejected": -0.3120097517967224, + "step": 7327 + }, + { + "epoch": 5.067773167358229, + "grad_norm": 14.417914390563965, + "learning_rate": 2.740126018134317e-05, + "log_odds_chosen": 10.635137557983398, + "log_odds_ratio": -6.574903090950102e-05, + "logits/chosen": -0.5258181095123291, + "logits/rejected": -0.568513035774231, + "logps/chosen": -0.00028529533301480114, + "logps/rejected": -2.3477580547332764, + "loss": 0.7924, + "nll_loss": 0.1981056034564972, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8529530027299188e-05, + "rewards/margins": 0.23474730551242828, + "rewards/rejected": -0.23477581143379211, + "step": 7328 + }, + { + "epoch": 5.068464730290456, + "grad_norm": 10.248303413391113, + "learning_rate": 2.7397418165053025e-05, + "log_odds_chosen": 10.010307312011719, + "log_odds_ratio": -0.0003728670999407768, + "logits/chosen": -0.05163934826850891, + "logits/rejected": -0.262073278427124, + "logps/chosen": -0.0023998182732611895, + "logps/rejected": -2.346627712249756, + "loss": 0.9532, + "nll_loss": 0.23825368285179138, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002399818185949698, + "rewards/margins": 0.23442280292510986, + "rewards/rejected": -0.23466277122497559, + "step": 7329 + }, + { + "epoch": 5.069156293222683, + "grad_norm": 8.726041793823242, + "learning_rate": 2.7393576148762874e-05, + "log_odds_chosen": 10.123150825500488, + "log_odds_ratio": -6.815577944507822e-05, + "logits/chosen": -0.5572763085365295, + "logits/rejected": -0.5811817646026611, + "logps/chosen": -0.00027071969816461205, + "logps/rejected": -1.9652814865112305, + "loss": 0.6151, + "nll_loss": 0.15378038585186005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.707197199924849e-05, + "rewards/margins": 0.19650107622146606, + "rewards/rejected": -0.19652815163135529, + "step": 7330 + }, + { + "epoch": 5.06984785615491, + "grad_norm": 6.076335430145264, + "learning_rate": 2.7389734132472723e-05, + "log_odds_chosen": 10.358654975891113, + "log_odds_ratio": -0.0001471362920710817, + "logits/chosen": -0.6471813917160034, + "logits/rejected": -0.7003373503684998, + "logps/chosen": -0.0006569478427991271, + "logps/rejected": -1.9787683486938477, + "loss": 0.9182, + "nll_loss": 0.22953800857067108, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.569478864548728e-05, + "rewards/margins": 0.19781114161014557, + "rewards/rejected": -0.19787684082984924, + "step": 7331 + }, + { + "epoch": 5.070539419087137, + "grad_norm": 8.848478317260742, + "learning_rate": 2.7385892116182576e-05, + "log_odds_chosen": 10.515992164611816, + "log_odds_ratio": -8.050731412367895e-05, + "logits/chosen": -0.7456662058830261, + "logits/rejected": -0.645315945148468, + "logps/chosen": -0.0002744827070273459, + "logps/rejected": -1.9360021352767944, + "loss": 1.0445, + "nll_loss": 0.2611117362976074, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7448269975138828e-05, + "rewards/margins": 0.1935727596282959, + "rewards/rejected": -0.19360020756721497, + "step": 7332 + }, + { + "epoch": 5.0712309820193635, + "grad_norm": 6.7651472091674805, + "learning_rate": 2.7382050099892425e-05, + "log_odds_chosen": 9.035691261291504, + "log_odds_ratio": -0.0013945155078545213, + "logits/chosen": -0.1447266787290573, + "logits/rejected": -0.22644981741905212, + "logps/chosen": -0.0022594670299440622, + "logps/rejected": -1.7893898487091064, + "loss": 0.7243, + "nll_loss": 0.18092525005340576, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002259467146359384, + "rewards/margins": 0.17871303856372833, + "rewards/rejected": -0.17893898487091064, + "step": 7333 + }, + { + "epoch": 5.07192254495159, + "grad_norm": 9.09512996673584, + "learning_rate": 2.7378208083602274e-05, + "log_odds_chosen": 11.089442253112793, + "log_odds_ratio": -2.5343755623907782e-05, + "logits/chosen": -0.3498130440711975, + "logits/rejected": -0.3135882019996643, + "logps/chosen": -0.00015191901184152812, + "logps/rejected": -2.361804485321045, + "loss": 0.9629, + "nll_loss": 0.24072042107582092, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5191902093647514e-05, + "rewards/margins": 0.23616525530815125, + "rewards/rejected": -0.23618043959140778, + "step": 7334 + }, + { + "epoch": 5.072614107883817, + "grad_norm": 8.32583999633789, + "learning_rate": 2.737436606731213e-05, + "log_odds_chosen": 9.538285255432129, + "log_odds_ratio": -0.0002937999670393765, + "logits/chosen": -0.7607426047325134, + "logits/rejected": -0.8624207973480225, + "logps/chosen": -0.0005625184276141226, + "logps/rejected": -1.5791518688201904, + "loss": 0.7533, + "nll_loss": 0.1882893443107605, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.625184712698683e-05, + "rewards/margins": 0.1578589528799057, + "rewards/rejected": -0.15791520476341248, + "step": 7335 + }, + { + "epoch": 5.073305670816044, + "grad_norm": 9.384604454040527, + "learning_rate": 2.737052405102198e-05, + "log_odds_chosen": 9.253545761108398, + "log_odds_ratio": -0.006250377744436264, + "logits/chosen": -0.2119435966014862, + "logits/rejected": -0.2176445722579956, + "logps/chosen": -0.004084007814526558, + "logps/rejected": -1.9495849609375, + "loss": 0.8884, + "nll_loss": 0.22148054838180542, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00040840081055648625, + "rewards/margins": 0.19455008208751678, + "rewards/rejected": -0.19495847821235657, + "step": 7336 + }, + { + "epoch": 5.073997233748271, + "grad_norm": 6.155551433563232, + "learning_rate": 2.7366682034731828e-05, + "log_odds_chosen": 11.532613754272461, + "log_odds_ratio": -2.104714985762257e-05, + "logits/chosen": -0.16297248005867004, + "logits/rejected": -0.21947115659713745, + "logps/chosen": -8.627733041066676e-05, + "logps/rejected": -2.2461464405059814, + "loss": 0.843, + "nll_loss": 0.21074417233467102, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.627734132460319e-06, + "rewards/margins": 0.22460602223873138, + "rewards/rejected": -0.22461465001106262, + "step": 7337 + }, + { + "epoch": 5.074688796680498, + "grad_norm": 4.333896636962891, + "learning_rate": 2.7362840018441684e-05, + "log_odds_chosen": 11.221064567565918, + "log_odds_ratio": -4.2140080040553585e-05, + "logits/chosen": -0.5966606140136719, + "logits/rejected": -0.6740995049476624, + "logps/chosen": -0.00012577198504004627, + "logps/rejected": -2.4205164909362793, + "loss": 0.6636, + "nll_loss": 0.1658967137336731, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2577198504004627e-05, + "rewards/margins": 0.24203908443450928, + "rewards/rejected": -0.2420516312122345, + "step": 7338 + }, + { + "epoch": 5.0753803596127245, + "grad_norm": 7.258881092071533, + "learning_rate": 2.7358998002151533e-05, + "log_odds_chosen": 9.731328964233398, + "log_odds_ratio": -0.0002475330838933587, + "logits/chosen": -0.43540024757385254, + "logits/rejected": -0.541413426399231, + "logps/chosen": -0.0006951102986931801, + "logps/rejected": -1.5809580087661743, + "loss": 0.8467, + "nll_loss": 0.21164801716804504, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.951102841412649e-05, + "rewards/margins": 0.1580262929201126, + "rewards/rejected": -0.1580958068370819, + "step": 7339 + }, + { + "epoch": 5.076071922544951, + "grad_norm": 11.20781135559082, + "learning_rate": 2.7355155985861382e-05, + "log_odds_chosen": 10.96607780456543, + "log_odds_ratio": -5.328706538421102e-05, + "logits/chosen": -0.6485635638237, + "logits/rejected": -0.6323709487915039, + "logps/chosen": -0.0002674778224900365, + "logps/rejected": -2.192117691040039, + "loss": 0.9264, + "nll_loss": 0.23160181939601898, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6747784431790933e-05, + "rewards/margins": 0.2191850244998932, + "rewards/rejected": -0.21921177208423615, + "step": 7340 + }, + { + "epoch": 5.076763485477178, + "grad_norm": 8.58651351928711, + "learning_rate": 2.7351313969571234e-05, + "log_odds_chosen": 10.983704566955566, + "log_odds_ratio": -6.344070425257087e-05, + "logits/chosen": -0.5228769779205322, + "logits/rejected": -0.5102678537368774, + "logps/chosen": -0.0003842473088297993, + "logps/rejected": -2.586697816848755, + "loss": 0.5418, + "nll_loss": 0.1354333907365799, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.842473233817145e-05, + "rewards/margins": 0.2586313486099243, + "rewards/rejected": -0.25866979360580444, + "step": 7341 + }, + { + "epoch": 5.077455048409405, + "grad_norm": 7.6569695472717285, + "learning_rate": 2.7347471953281083e-05, + "log_odds_chosen": 10.862194061279297, + "log_odds_ratio": -2.9732314942521043e-05, + "logits/chosen": -0.5924553871154785, + "logits/rejected": -0.6317382454872131, + "logps/chosen": -0.0003253734321333468, + "logps/rejected": -2.4295787811279297, + "loss": 0.6985, + "nll_loss": 0.1746188998222351, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.253734394093044e-05, + "rewards/margins": 0.24292536079883575, + "rewards/rejected": -0.24295789003372192, + "step": 7342 + }, + { + "epoch": 5.078146611341632, + "grad_norm": 9.416374206542969, + "learning_rate": 2.7343629936990932e-05, + "log_odds_chosen": 9.677162170410156, + "log_odds_ratio": -0.001104559632949531, + "logits/chosen": -0.07008127868175507, + "logits/rejected": -0.10235333442687988, + "logps/chosen": -0.0016829818487167358, + "logps/rejected": -2.11376953125, + "loss": 0.9175, + "nll_loss": 0.22925275564193726, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016829819651320577, + "rewards/margins": 0.21120867133140564, + "rewards/rejected": -0.21137696504592896, + "step": 7343 + }, + { + "epoch": 5.078838174273859, + "grad_norm": 6.895681858062744, + "learning_rate": 2.733978792070078e-05, + "log_odds_chosen": 8.807646751403809, + "log_odds_ratio": -0.03008407913148403, + "logits/chosen": -0.398873507976532, + "logits/rejected": -0.3212706446647644, + "logps/chosen": -0.008143252693116665, + "logps/rejected": -1.5997668504714966, + "loss": 0.7238, + "nll_loss": 0.1779380440711975, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008143253508023918, + "rewards/margins": 0.15916235744953156, + "rewards/rejected": -0.15997669100761414, + "step": 7344 + }, + { + "epoch": 5.0795297372060855, + "grad_norm": 11.062602996826172, + "learning_rate": 2.7335945904410637e-05, + "log_odds_chosen": 10.790531158447266, + "log_odds_ratio": -5.3668307373300195e-05, + "logits/chosen": -0.5789155960083008, + "logits/rejected": -0.5793518424034119, + "logps/chosen": -0.00012543403136078268, + "logps/rejected": -1.6924831867218018, + "loss": 0.9506, + "nll_loss": 0.23764421045780182, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2543401680886745e-05, + "rewards/margins": 0.16923576593399048, + "rewards/rejected": -0.1692483127117157, + "step": 7345 + }, + { + "epoch": 5.080221300138312, + "grad_norm": 9.413389205932617, + "learning_rate": 2.7332103888120486e-05, + "log_odds_chosen": 10.895193099975586, + "log_odds_ratio": -2.4430100893368945e-05, + "logits/chosen": -0.7501481771469116, + "logits/rejected": -0.772050142288208, + "logps/chosen": -0.00023057861835695803, + "logps/rejected": -2.3615636825561523, + "loss": 0.9952, + "nll_loss": 0.24878647923469543, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3057862563291565e-05, + "rewards/margins": 0.23613335192203522, + "rewards/rejected": -0.2361564040184021, + "step": 7346 + }, + { + "epoch": 5.080912863070539, + "grad_norm": 7.451417446136475, + "learning_rate": 2.7328261871830335e-05, + "log_odds_chosen": 11.018407821655273, + "log_odds_ratio": -3.6289315175963566e-05, + "logits/chosen": -0.28731396794319153, + "logits/rejected": -0.30185893177986145, + "logps/chosen": -0.00014567398466169834, + "logps/rejected": -2.0808353424072266, + "loss": 0.905, + "nll_loss": 0.22625084221363068, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4567398466169834e-05, + "rewards/margins": 0.20806896686553955, + "rewards/rejected": -0.20808354020118713, + "step": 7347 + }, + { + "epoch": 5.081604426002766, + "grad_norm": 5.167357444763184, + "learning_rate": 2.732441985554019e-05, + "log_odds_chosen": 10.189430236816406, + "log_odds_ratio": -0.00752821983769536, + "logits/chosen": -0.004103332757949829, + "logits/rejected": -0.07965162396430969, + "logps/chosen": -0.005710722412914038, + "logps/rejected": -2.303321361541748, + "loss": 0.7793, + "nll_loss": 0.19407445192337036, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005710722180083394, + "rewards/margins": 0.2297610640525818, + "rewards/rejected": -0.230332151055336, + "step": 7348 + }, + { + "epoch": 5.082295988934993, + "grad_norm": 7.182365417480469, + "learning_rate": 2.732057783925004e-05, + "log_odds_chosen": 11.068100929260254, + "log_odds_ratio": -8.026784053072333e-05, + "logits/chosen": -1.1031522750854492, + "logits/rejected": -1.079147458076477, + "logps/chosen": -0.00028857134748250246, + "logps/rejected": -2.3670783042907715, + "loss": 0.7401, + "nll_loss": 0.18502512574195862, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8857135475846007e-05, + "rewards/margins": 0.23667895793914795, + "rewards/rejected": -0.23670782148838043, + "step": 7349 + }, + { + "epoch": 5.08298755186722, + "grad_norm": 7.975959300994873, + "learning_rate": 2.731673582295989e-05, + "log_odds_chosen": 11.034915924072266, + "log_odds_ratio": -2.3781507479725406e-05, + "logits/chosen": -0.046159759163856506, + "logits/rejected": -0.14203815162181854, + "logps/chosen": -0.00022653871565125883, + "logps/rejected": -2.30277419090271, + "loss": 0.9624, + "nll_loss": 0.24060288071632385, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.265387047373224e-05, + "rewards/margins": 0.23025476932525635, + "rewards/rejected": -0.230277419090271, + "step": 7350 + }, + { + "epoch": 5.0836791147994465, + "grad_norm": 6.001211643218994, + "learning_rate": 2.7312893806669742e-05, + "log_odds_chosen": 8.615678787231445, + "log_odds_ratio": -0.0018858063267543912, + "logits/chosen": -0.41074180603027344, + "logits/rejected": -0.4159555733203888, + "logps/chosen": -0.002324719214811921, + "logps/rejected": -1.4358938932418823, + "loss": 0.831, + "nll_loss": 0.20755891501903534, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00023247190983965993, + "rewards/margins": 0.14335691928863525, + "rewards/rejected": -0.14358939230442047, + "step": 7351 + }, + { + "epoch": 5.084370677731673, + "grad_norm": 8.391213417053223, + "learning_rate": 2.730905179037959e-05, + "log_odds_chosen": 11.10920524597168, + "log_odds_ratio": -9.447715274291113e-05, + "logits/chosen": -0.3689385652542114, + "logits/rejected": -0.40068185329437256, + "logps/chosen": -0.006074007600545883, + "logps/rejected": -3.171781063079834, + "loss": 0.7202, + "nll_loss": 0.18004505336284637, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006074007251299918, + "rewards/margins": 0.3165706992149353, + "rewards/rejected": -0.31717807054519653, + "step": 7352 + }, + { + "epoch": 5.0850622406639, + "grad_norm": 11.358319282531738, + "learning_rate": 2.730520977408944e-05, + "log_odds_chosen": 10.263338088989258, + "log_odds_ratio": -0.0001083470560843125, + "logits/chosen": -0.030622661113739014, + "logits/rejected": -0.029181431978940964, + "logps/chosen": -0.00030826477450318635, + "logps/rejected": -1.6327803134918213, + "loss": 0.6405, + "nll_loss": 0.1601022183895111, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.082647890551016e-05, + "rewards/margins": 0.16324719786643982, + "rewards/rejected": -0.1632780283689499, + "step": 7353 + }, + { + "epoch": 5.085753803596127, + "grad_norm": 11.445735931396484, + "learning_rate": 2.7301367757799296e-05, + "log_odds_chosen": 9.003986358642578, + "log_odds_ratio": -0.114555723965168, + "logits/chosen": -0.5085049271583557, + "logits/rejected": -0.3850405812263489, + "logps/chosen": -0.01812593825161457, + "logps/rejected": -2.1957643032073975, + "loss": 1.3575, + "nll_loss": 0.32793062925338745, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0018125936621800065, + "rewards/margins": 0.21776384115219116, + "rewards/rejected": -0.21957644820213318, + "step": 7354 + }, + { + "epoch": 5.086445366528354, + "grad_norm": 5.323635101318359, + "learning_rate": 2.7297525741509145e-05, + "log_odds_chosen": 9.547505378723145, + "log_odds_ratio": -0.00031736362143419683, + "logits/chosen": -0.5309441685676575, + "logits/rejected": -0.579699695110321, + "logps/chosen": -0.00028090961859561503, + "logps/rejected": -1.4435864686965942, + "loss": 0.7717, + "nll_loss": 0.19289781153202057, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8090962587157264e-05, + "rewards/margins": 0.14433056116104126, + "rewards/rejected": -0.14435866475105286, + "step": 7355 + }, + { + "epoch": 5.087136929460581, + "grad_norm": 6.2045698165893555, + "learning_rate": 2.7293683725218994e-05, + "log_odds_chosen": 11.104674339294434, + "log_odds_ratio": -3.998466127086431e-05, + "logits/chosen": -0.5010840892791748, + "logits/rejected": -0.5729426145553589, + "logps/chosen": -0.00047244172310456634, + "logps/rejected": -2.5478477478027344, + "loss": 0.8783, + "nll_loss": 0.21957121789455414, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.724417158286087e-05, + "rewards/margins": 0.25473752617836, + "rewards/rejected": -0.2547847628593445, + "step": 7356 + }, + { + "epoch": 5.087828492392807, + "grad_norm": 10.404763221740723, + "learning_rate": 2.728984170892885e-05, + "log_odds_chosen": 8.978584289550781, + "log_odds_ratio": -0.023966118693351746, + "logits/chosen": -0.5867160558700562, + "logits/rejected": -0.536573588848114, + "logps/chosen": -0.010090148076415062, + "logps/rejected": -1.3400592803955078, + "loss": 0.7064, + "nll_loss": 0.17420047521591187, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010090150171890855, + "rewards/margins": 0.13299691677093506, + "rewards/rejected": -0.13400593400001526, + "step": 7357 + }, + { + "epoch": 5.088520055325034, + "grad_norm": 5.697388172149658, + "learning_rate": 2.72859996926387e-05, + "log_odds_chosen": 10.12976360321045, + "log_odds_ratio": -0.0003553791902959347, + "logits/chosen": -0.4104516804218292, + "logits/rejected": -0.41303762793540955, + "logps/chosen": -0.0001722878951113671, + "logps/rejected": -1.5895068645477295, + "loss": 0.8449, + "nll_loss": 0.2111791968345642, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.722878914733883e-05, + "rewards/margins": 0.15893347561359406, + "rewards/rejected": -0.15895068645477295, + "step": 7358 + }, + { + "epoch": 5.089211618257261, + "grad_norm": 7.838433265686035, + "learning_rate": 2.7282157676348548e-05, + "log_odds_chosen": 10.014269828796387, + "log_odds_ratio": -0.0005774472956545651, + "logits/chosen": -0.841597855091095, + "logits/rejected": -0.8463683128356934, + "logps/chosen": -0.00038939566002227366, + "logps/rejected": -1.616848111152649, + "loss": 1.211, + "nll_loss": 0.30269354581832886, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.893956454703584e-05, + "rewards/margins": 0.16164587438106537, + "rewards/rejected": -0.1616848260164261, + "step": 7359 + }, + { + "epoch": 5.089903181189488, + "grad_norm": 10.013843536376953, + "learning_rate": 2.72783156600584e-05, + "log_odds_chosen": 9.890030860900879, + "log_odds_ratio": -0.0004454090667422861, + "logits/chosen": -0.6855981945991516, + "logits/rejected": -0.7428559064865112, + "logps/chosen": -0.000614183722063899, + "logps/rejected": -1.476334571838379, + "loss": 0.8343, + "nll_loss": 0.20853474736213684, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.141837366158143e-05, + "rewards/margins": 0.14757204055786133, + "rewards/rejected": -0.14763344824314117, + "step": 7360 + }, + { + "epoch": 5.090594744121715, + "grad_norm": 11.054221153259277, + "learning_rate": 2.727447364376825e-05, + "log_odds_chosen": 9.4026517868042, + "log_odds_ratio": -0.00012418953701853752, + "logits/chosen": -0.2300967276096344, + "logits/rejected": -0.28243911266326904, + "logps/chosen": -0.0014360551722347736, + "logps/rejected": -1.9370076656341553, + "loss": 0.6715, + "nll_loss": 0.1678735762834549, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001436055317753926, + "rewards/margins": 0.19355714321136475, + "rewards/rejected": -0.19370076060295105, + "step": 7361 + }, + { + "epoch": 5.091286307053942, + "grad_norm": 5.798583984375, + "learning_rate": 2.72706316274781e-05, + "log_odds_chosen": 10.562169075012207, + "log_odds_ratio": -0.0001344050106126815, + "logits/chosen": -0.6078465580940247, + "logits/rejected": -0.6434661149978638, + "logps/chosen": -0.000248884956818074, + "logps/rejected": -2.1479055881500244, + "loss": 0.6105, + "nll_loss": 0.15260586142539978, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4888498955988325e-05, + "rewards/margins": 0.21476566791534424, + "rewards/rejected": -0.21479055285453796, + "step": 7362 + }, + { + "epoch": 5.091977869986168, + "grad_norm": 17.576988220214844, + "learning_rate": 2.7266789611187954e-05, + "log_odds_chosen": 9.467916488647461, + "log_odds_ratio": -0.0028660153038799763, + "logits/chosen": -0.6397823691368103, + "logits/rejected": -0.6845981478691101, + "logps/chosen": -0.022505810484290123, + "logps/rejected": -2.307908773422241, + "loss": 0.9296, + "nll_loss": 0.23210205137729645, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002250581281259656, + "rewards/margins": 0.228540301322937, + "rewards/rejected": -0.2307908833026886, + "step": 7363 + }, + { + "epoch": 5.092669432918395, + "grad_norm": 7.059711933135986, + "learning_rate": 2.7262947594897803e-05, + "log_odds_chosen": 11.390132904052734, + "log_odds_ratio": -3.355491207912564e-05, + "logits/chosen": -0.42600852251052856, + "logits/rejected": -0.4258362650871277, + "logps/chosen": -0.013769086450338364, + "logps/rejected": -3.572200298309326, + "loss": 0.6484, + "nll_loss": 0.16209310293197632, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013769087381660938, + "rewards/margins": 0.35584312677383423, + "rewards/rejected": -0.35722002387046814, + "step": 7364 + }, + { + "epoch": 5.093360995850622, + "grad_norm": 6.885730266571045, + "learning_rate": 2.7259105578607652e-05, + "log_odds_chosen": 11.165336608886719, + "log_odds_ratio": -2.7307323762215674e-05, + "logits/chosen": -0.5768116116523743, + "logits/rejected": -0.5612725019454956, + "logps/chosen": -0.00014302022464107722, + "logps/rejected": -2.2993600368499756, + "loss": 0.6686, + "nll_loss": 0.16714143753051758, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4302024283097126e-05, + "rewards/margins": 0.22992171347141266, + "rewards/rejected": -0.22993600368499756, + "step": 7365 + }, + { + "epoch": 5.094052558782849, + "grad_norm": 5.898820877075195, + "learning_rate": 2.7255263562317508e-05, + "log_odds_chosen": 9.465486526489258, + "log_odds_ratio": -0.00036768452264368534, + "logits/chosen": -0.23210309445858002, + "logits/rejected": -0.25932738184928894, + "logps/chosen": -0.0003661748196464032, + "logps/rejected": -1.7317314147949219, + "loss": 0.7547, + "nll_loss": 0.1886340081691742, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.661748269223608e-05, + "rewards/margins": 0.17313653230667114, + "rewards/rejected": -0.17317314445972443, + "step": 7366 + }, + { + "epoch": 5.094744121715076, + "grad_norm": 5.994511127471924, + "learning_rate": 2.7251421546027357e-05, + "log_odds_chosen": 10.102039337158203, + "log_odds_ratio": -0.00019134912872686982, + "logits/chosen": -0.47489118576049805, + "logits/rejected": -0.5151061415672302, + "logps/chosen": -0.0013110683066770434, + "logps/rejected": -2.2649006843566895, + "loss": 1.1379, + "nll_loss": 0.28446272015571594, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013110681902617216, + "rewards/margins": 0.22635895013809204, + "rewards/rejected": -0.2264900803565979, + "step": 7367 + }, + { + "epoch": 5.095435684647303, + "grad_norm": 8.04350757598877, + "learning_rate": 2.7247579529737206e-05, + "log_odds_chosen": 11.359281539916992, + "log_odds_ratio": -4.83084877487272e-05, + "logits/chosen": -0.5122644901275635, + "logits/rejected": -0.5524216890335083, + "logps/chosen": -0.00036712043220177293, + "logps/rejected": -2.3471951484680176, + "loss": 1.5251, + "nll_loss": 0.38128161430358887, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6712044675368816e-05, + "rewards/margins": 0.23468279838562012, + "rewards/rejected": -0.23471948504447937, + "step": 7368 + }, + { + "epoch": 5.096127247579529, + "grad_norm": 5.575632095336914, + "learning_rate": 2.724373751344706e-05, + "log_odds_chosen": 10.116594314575195, + "log_odds_ratio": -0.0032851833384484053, + "logits/chosen": -0.2861647605895996, + "logits/rejected": -0.35802027583122253, + "logps/chosen": -0.002376972232013941, + "logps/rejected": -2.0754401683807373, + "loss": 1.079, + "nll_loss": 0.26942795515060425, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002376972115598619, + "rewards/margins": 0.20730631053447723, + "rewards/rejected": -0.2075439989566803, + "step": 7369 + }, + { + "epoch": 5.096818810511756, + "grad_norm": 9.367115020751953, + "learning_rate": 2.7239895497156908e-05, + "log_odds_chosen": 10.316278457641602, + "log_odds_ratio": -0.00023210124345496297, + "logits/chosen": -0.25167056918144226, + "logits/rejected": -0.30842748284339905, + "logps/chosen": -0.0013250727206468582, + "logps/rejected": -2.0884361267089844, + "loss": 0.7695, + "nll_loss": 0.1923428624868393, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013250726624391973, + "rewards/margins": 0.20871111750602722, + "rewards/rejected": -0.20884361863136292, + "step": 7370 + }, + { + "epoch": 5.097510373443983, + "grad_norm": 8.676201820373535, + "learning_rate": 2.7236053480866757e-05, + "log_odds_chosen": 10.798030853271484, + "log_odds_ratio": -2.836514249793254e-05, + "logits/chosen": -0.15103256702423096, + "logits/rejected": -0.24257981777191162, + "logps/chosen": -0.00027271793805994093, + "logps/rejected": -2.4594173431396484, + "loss": 0.9249, + "nll_loss": 0.2312135100364685, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7271795261185616e-05, + "rewards/margins": 0.24591445922851562, + "rewards/rejected": -0.24594172835350037, + "step": 7371 + }, + { + "epoch": 5.09820193637621, + "grad_norm": 6.839383602142334, + "learning_rate": 2.7232211464576613e-05, + "log_odds_chosen": 10.274839401245117, + "log_odds_ratio": -7.13355912012048e-05, + "logits/chosen": -0.4008041322231293, + "logits/rejected": -0.5498058795928955, + "logps/chosen": -0.00037555681774392724, + "logps/rejected": -2.326822280883789, + "loss": 0.9025, + "nll_loss": 0.22562111914157867, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.755568468477577e-05, + "rewards/margins": 0.2326447069644928, + "rewards/rejected": -0.2326822429895401, + "step": 7372 + }, + { + "epoch": 5.098893499308437, + "grad_norm": 5.040561676025391, + "learning_rate": 2.722836944828646e-05, + "log_odds_chosen": 10.802556991577148, + "log_odds_ratio": -4.267587428330444e-05, + "logits/chosen": -0.7771573066711426, + "logits/rejected": -0.7587347030639648, + "logps/chosen": -0.0003114896244369447, + "logps/rejected": -2.4623873233795166, + "loss": 0.9528, + "nll_loss": 0.23819348216056824, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1148963898885995e-05, + "rewards/margins": 0.246207594871521, + "rewards/rejected": -0.24623876810073853, + "step": 7373 + }, + { + "epoch": 5.0995850622406635, + "grad_norm": 8.665498733520508, + "learning_rate": 2.722452743199631e-05, + "log_odds_chosen": 10.829631805419922, + "log_odds_ratio": -7.544008258264512e-05, + "logits/chosen": -0.11957578361034393, + "logits/rejected": -0.16547317802906036, + "logps/chosen": -0.00036867347080260515, + "logps/rejected": -2.133676052093506, + "loss": 0.7278, + "nll_loss": 0.181938037276268, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6867346352664754e-05, + "rewards/margins": 0.21333074569702148, + "rewards/rejected": -0.21336761116981506, + "step": 7374 + }, + { + "epoch": 5.10027662517289, + "grad_norm": 6.870170593261719, + "learning_rate": 2.7220685415706167e-05, + "log_odds_chosen": 11.019067764282227, + "log_odds_ratio": -4.180756150162779e-05, + "logits/chosen": -0.6265878081321716, + "logits/rejected": -0.6470605134963989, + "logps/chosen": -0.0006792093627154827, + "logps/rejected": -2.668718099594116, + "loss": 0.6031, + "nll_loss": 0.1507745087146759, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.792093336116523e-05, + "rewards/margins": 0.26680392026901245, + "rewards/rejected": -0.2668718099594116, + "step": 7375 + }, + { + "epoch": 5.100968188105117, + "grad_norm": 5.347779273986816, + "learning_rate": 2.7216843399416016e-05, + "log_odds_chosen": 9.326358795166016, + "log_odds_ratio": -0.000986828817985952, + "logits/chosen": -0.5040815472602844, + "logits/rejected": -0.5364580750465393, + "logps/chosen": -0.0005646024364978075, + "logps/rejected": -1.3842899799346924, + "loss": 0.74, + "nll_loss": 0.1848924309015274, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.646024510497227e-05, + "rewards/margins": 0.138372540473938, + "rewards/rejected": -0.13842900097370148, + "step": 7376 + }, + { + "epoch": 5.101659751037344, + "grad_norm": 7.426041126251221, + "learning_rate": 2.7213001383125865e-05, + "log_odds_chosen": 11.608118057250977, + "log_odds_ratio": -1.4023098628968e-05, + "logits/chosen": -0.6992658972740173, + "logits/rejected": -0.7811527252197266, + "logps/chosen": -0.0001724398462101817, + "logps/rejected": -2.5273571014404297, + "loss": 0.6181, + "nll_loss": 0.15453001856803894, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7243986803805456e-05, + "rewards/margins": 0.252718448638916, + "rewards/rejected": -0.2527356743812561, + "step": 7377 + }, + { + "epoch": 5.102351313969571, + "grad_norm": 5.961256980895996, + "learning_rate": 2.7209159366835717e-05, + "log_odds_chosen": 9.610187530517578, + "log_odds_ratio": -0.0005559841520152986, + "logits/chosen": -0.3288503587245941, + "logits/rejected": -0.42061495780944824, + "logps/chosen": -0.0010864774230867624, + "logps/rejected": -2.516143321990967, + "loss": 0.7354, + "nll_loss": 0.1837823987007141, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010864774230867624, + "rewards/margins": 0.25150567293167114, + "rewards/rejected": -0.2516143321990967, + "step": 7378 + }, + { + "epoch": 5.103042876901798, + "grad_norm": 7.212077617645264, + "learning_rate": 2.7205317350545566e-05, + "log_odds_chosen": 9.604665756225586, + "log_odds_ratio": -0.0002615238190628588, + "logits/chosen": -0.39613234996795654, + "logits/rejected": -0.3910067081451416, + "logps/chosen": -0.00038741217576898634, + "logps/rejected": -1.587263584136963, + "loss": 0.91, + "nll_loss": 0.2274717539548874, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.87412219424732e-05, + "rewards/margins": 0.15868760645389557, + "rewards/rejected": -0.15872636437416077, + "step": 7379 + }, + { + "epoch": 5.1037344398340245, + "grad_norm": 7.643638610839844, + "learning_rate": 2.7201475334255415e-05, + "log_odds_chosen": 10.92262077331543, + "log_odds_ratio": -8.597999112680554e-05, + "logits/chosen": -0.23542064428329468, + "logits/rejected": -0.249238058924675, + "logps/chosen": -0.00019922290812246501, + "logps/rejected": -2.349911689758301, + "loss": 0.6484, + "nll_loss": 0.1620965451002121, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9922288629459217e-05, + "rewards/margins": 0.23497125506401062, + "rewards/rejected": -0.234991192817688, + "step": 7380 + }, + { + "epoch": 5.104426002766251, + "grad_norm": 9.340903282165527, + "learning_rate": 2.719763331796527e-05, + "log_odds_chosen": 12.061431884765625, + "log_odds_ratio": -2.306019450770691e-05, + "logits/chosen": -0.42079007625579834, + "logits/rejected": -0.5221748948097229, + "logps/chosen": -0.00023580492415931076, + "logps/rejected": -3.28064227104187, + "loss": 1.0093, + "nll_loss": 0.25232964754104614, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3580492779728957e-05, + "rewards/margins": 0.32804062962532043, + "rewards/rejected": -0.3280642330646515, + "step": 7381 + }, + { + "epoch": 5.105117565698478, + "grad_norm": 6.661879062652588, + "learning_rate": 2.719379130167512e-05, + "log_odds_chosen": 10.242103576660156, + "log_odds_ratio": -4.760442243423313e-05, + "logits/chosen": -0.4171237349510193, + "logits/rejected": -0.3926810622215271, + "logps/chosen": -0.00015924654144328088, + "logps/rejected": -1.5589709281921387, + "loss": 0.5406, + "nll_loss": 0.13514304161071777, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.592465559951961e-05, + "rewards/margins": 0.15588116645812988, + "rewards/rejected": -0.1558970957994461, + "step": 7382 + }, + { + "epoch": 5.105809128630705, + "grad_norm": 9.575935363769531, + "learning_rate": 2.718994928538497e-05, + "log_odds_chosen": 11.292200088500977, + "log_odds_ratio": -3.7497273297049105e-05, + "logits/chosen": -0.48665758967399597, + "logits/rejected": -0.5950102806091309, + "logps/chosen": -0.0019470170373097062, + "logps/rejected": -2.7821574211120605, + "loss": 1.4374, + "nll_loss": 0.35933566093444824, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019470170082058758, + "rewards/margins": 0.27802103757858276, + "rewards/rejected": -0.2782157361507416, + "step": 7383 + }, + { + "epoch": 5.106500691562932, + "grad_norm": 6.445347785949707, + "learning_rate": 2.7186107269094825e-05, + "log_odds_chosen": 10.10672378540039, + "log_odds_ratio": -6.40135767753236e-05, + "logits/chosen": -0.3899781405925751, + "logits/rejected": -0.4565275311470032, + "logps/chosen": -0.00048476149095222354, + "logps/rejected": -2.0757288932800293, + "loss": 0.617, + "nll_loss": 0.15423867106437683, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.847614764003083e-05, + "rewards/margins": 0.20752441883087158, + "rewards/rejected": -0.20757289230823517, + "step": 7384 + }, + { + "epoch": 5.107192254495159, + "grad_norm": 7.290511131286621, + "learning_rate": 2.7182265252804674e-05, + "log_odds_chosen": 10.069574356079102, + "log_odds_ratio": -0.00016815030539873987, + "logits/chosen": -0.5361185073852539, + "logits/rejected": -0.5250629186630249, + "logps/chosen": -0.0002581964072305709, + "logps/rejected": -1.5850675106048584, + "loss": 0.5924, + "nll_loss": 0.14807942509651184, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5819641450652853e-05, + "rewards/margins": 0.1584809273481369, + "rewards/rejected": -0.15850675106048584, + "step": 7385 + }, + { + "epoch": 5.1078838174273855, + "grad_norm": 7.795459270477295, + "learning_rate": 2.7178423236514523e-05, + "log_odds_chosen": 10.93010139465332, + "log_odds_ratio": -4.356444696895778e-05, + "logits/chosen": -0.19619879126548767, + "logits/rejected": -0.21324469149112701, + "logps/chosen": -0.0002850884629879147, + "logps/rejected": -2.712125539779663, + "loss": 1.3588, + "nll_loss": 0.33969220519065857, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8508844479802065e-05, + "rewards/margins": 0.2711840271949768, + "rewards/rejected": -0.2712125778198242, + "step": 7386 + }, + { + "epoch": 5.108575380359612, + "grad_norm": 5.70391845703125, + "learning_rate": 2.7174581220224376e-05, + "log_odds_chosen": 10.274075508117676, + "log_odds_ratio": -0.00015871970390435308, + "logits/chosen": -0.36832761764526367, + "logits/rejected": -0.3919983506202698, + "logps/chosen": -0.00020493712509050965, + "logps/rejected": -1.8412601947784424, + "loss": 0.4894, + "nll_loss": 0.12234241515398026, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.049371505563613e-05, + "rewards/margins": 0.1841055452823639, + "rewards/rejected": -0.18412603437900543, + "step": 7387 + }, + { + "epoch": 5.109266943291839, + "grad_norm": 68.21056365966797, + "learning_rate": 2.7170739203934225e-05, + "log_odds_chosen": 8.288837432861328, + "log_odds_ratio": -0.08500274270772934, + "logits/chosen": -0.46375998854637146, + "logits/rejected": -0.4677826464176178, + "logps/chosen": -0.012799981981515884, + "logps/rejected": -1.4783885478973389, + "loss": 0.9068, + "nll_loss": 0.21818780899047852, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012799982214346528, + "rewards/margins": 0.14655886590480804, + "rewards/rejected": -0.14783886075019836, + "step": 7388 + }, + { + "epoch": 5.109958506224066, + "grad_norm": 5.482029438018799, + "learning_rate": 2.7166897187644074e-05, + "log_odds_chosen": 10.815271377563477, + "log_odds_ratio": -4.882266148342751e-05, + "logits/chosen": -0.7241552472114563, + "logits/rejected": -0.8382655382156372, + "logps/chosen": -0.00020358421897981316, + "logps/rejected": -2.069589138031006, + "loss": 0.6462, + "nll_loss": 0.16153374314308167, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0358422261779197e-05, + "rewards/margins": 0.20693853497505188, + "rewards/rejected": -0.20695888996124268, + "step": 7389 + }, + { + "epoch": 5.110650069156293, + "grad_norm": 6.103107929229736, + "learning_rate": 2.716305517135393e-05, + "log_odds_chosen": 9.955148696899414, + "log_odds_ratio": -0.0003933612897526473, + "logits/chosen": -0.18130913376808167, + "logits/rejected": -0.1627679169178009, + "logps/chosen": -0.0005892410408705473, + "logps/rejected": -1.9700955152511597, + "loss": 0.4972, + "nll_loss": 0.12425161898136139, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.892410263186321e-05, + "rewards/margins": 0.19695061445236206, + "rewards/rejected": -0.19700954854488373, + "step": 7390 + }, + { + "epoch": 5.11134163208852, + "grad_norm": 11.400738716125488, + "learning_rate": 2.715921315506378e-05, + "log_odds_chosen": 10.500256538391113, + "log_odds_ratio": -8.448248263448477e-05, + "logits/chosen": -0.44659101963043213, + "logits/rejected": -0.5452315211296082, + "logps/chosen": -0.00030397262889891863, + "logps/rejected": -2.122675657272339, + "loss": 0.6142, + "nll_loss": 0.15353217720985413, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0397262889891863e-05, + "rewards/margins": 0.212237149477005, + "rewards/rejected": -0.21226757764816284, + "step": 7391 + }, + { + "epoch": 5.1120331950207465, + "grad_norm": 9.719880104064941, + "learning_rate": 2.7155371138773628e-05, + "log_odds_chosen": 10.642242431640625, + "log_odds_ratio": -3.789024049183354e-05, + "logits/chosen": -0.05491916835308075, + "logits/rejected": -0.18953341245651245, + "logps/chosen": -0.004866994917392731, + "logps/rejected": -3.1003847122192383, + "loss": 0.9004, + "nll_loss": 0.22509226202964783, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004866994568146765, + "rewards/margins": 0.30955177545547485, + "rewards/rejected": -0.3100384771823883, + "step": 7392 + }, + { + "epoch": 5.112724757952973, + "grad_norm": 7.443755149841309, + "learning_rate": 2.7151529122483483e-05, + "log_odds_chosen": 10.646068572998047, + "log_odds_ratio": -4.681744030676782e-05, + "logits/chosen": -0.2412424087524414, + "logits/rejected": -0.2810656428337097, + "logps/chosen": -0.0002580955915618688, + "logps/rejected": -1.8962551355361938, + "loss": 0.7401, + "nll_loss": 0.185030996799469, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.58095606113784e-05, + "rewards/margins": 0.18959970772266388, + "rewards/rejected": -0.18962550163269043, + "step": 7393 + }, + { + "epoch": 5.1134163208852, + "grad_norm": 13.804279327392578, + "learning_rate": 2.7147687106193332e-05, + "log_odds_chosen": 9.380505561828613, + "log_odds_ratio": -0.003324545454233885, + "logits/chosen": -0.2428872287273407, + "logits/rejected": -0.3313140869140625, + "logps/chosen": -0.0022368980571627617, + "logps/rejected": -2.006077766418457, + "loss": 0.9655, + "nll_loss": 0.24104931950569153, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002236898144474253, + "rewards/margins": 0.20038409531116486, + "rewards/rejected": -0.2006077766418457, + "step": 7394 + }, + { + "epoch": 5.114107883817427, + "grad_norm": 9.437492370605469, + "learning_rate": 2.714384508990318e-05, + "log_odds_chosen": 11.143658638000488, + "log_odds_ratio": -0.0001585199497640133, + "logits/chosen": -0.45222657918930054, + "logits/rejected": -0.5829071998596191, + "logps/chosen": -0.00032591738272458315, + "logps/rejected": -2.8368942737579346, + "loss": 0.6586, + "nll_loss": 0.16463643312454224, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2591739000054076e-05, + "rewards/margins": 0.2836568355560303, + "rewards/rejected": -0.2836894392967224, + "step": 7395 + }, + { + "epoch": 5.114799446749654, + "grad_norm": 6.6503753662109375, + "learning_rate": 2.7140003073613034e-05, + "log_odds_chosen": 9.678725242614746, + "log_odds_ratio": -0.00023803582007531077, + "logits/chosen": -0.3980298638343811, + "logits/rejected": -0.35562199354171753, + "logps/chosen": -0.0006691630696877837, + "logps/rejected": -1.5765386819839478, + "loss": 1.2738, + "nll_loss": 0.31843432784080505, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.691631278954446e-05, + "rewards/margins": 0.157586932182312, + "rewards/rejected": -0.15765386819839478, + "step": 7396 + }, + { + "epoch": 5.115491009681881, + "grad_norm": 5.985082626342773, + "learning_rate": 2.7136161057322883e-05, + "log_odds_chosen": 9.743041038513184, + "log_odds_ratio": -0.0058269198052585125, + "logits/chosen": -0.20725753903388977, + "logits/rejected": -0.20159000158309937, + "logps/chosen": -0.03767280653119087, + "logps/rejected": -1.575268030166626, + "loss": 0.8821, + "nll_loss": 0.2199416160583496, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0037672806065529585, + "rewards/margins": 0.15375953912734985, + "rewards/rejected": -0.15752682089805603, + "step": 7397 + }, + { + "epoch": 5.1161825726141075, + "grad_norm": 9.815032958984375, + "learning_rate": 2.7132319041032732e-05, + "log_odds_chosen": 9.699831008911133, + "log_odds_ratio": -0.0003538834862411022, + "logits/chosen": -0.39152687788009644, + "logits/rejected": -0.4046846032142639, + "logps/chosen": -0.0007243537111207843, + "logps/rejected": -1.5959980487823486, + "loss": 0.8071, + "nll_loss": 0.20174673199653625, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.243537402246147e-05, + "rewards/margins": 0.15952739119529724, + "rewards/rejected": -0.15959982573986053, + "step": 7398 + }, + { + "epoch": 5.116874135546334, + "grad_norm": 7.80418062210083, + "learning_rate": 2.7128477024742588e-05, + "log_odds_chosen": 9.415637969970703, + "log_odds_ratio": -0.00019955117022618651, + "logits/chosen": -0.3784105181694031, + "logits/rejected": -0.3838611841201782, + "logps/chosen": -0.00038098107324913144, + "logps/rejected": -1.6047699451446533, + "loss": 0.5059, + "nll_loss": 0.12646718323230743, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.809811096289195e-05, + "rewards/margins": 0.1604388952255249, + "rewards/rejected": -0.16047699749469757, + "step": 7399 + }, + { + "epoch": 5.117565698478561, + "grad_norm": 6.374980926513672, + "learning_rate": 2.7124635008452437e-05, + "log_odds_chosen": 10.56325626373291, + "log_odds_ratio": -4.551166057353839e-05, + "logits/chosen": 0.02074243128299713, + "logits/rejected": -0.06978891789913177, + "logps/chosen": -0.0002741274074651301, + "logps/rejected": -2.0129191875457764, + "loss": 0.5533, + "nll_loss": 0.13831853866577148, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7412745112087578e-05, + "rewards/margins": 0.20126450061798096, + "rewards/rejected": -0.20129193365573883, + "step": 7400 + }, + { + "epoch": 5.118257261410788, + "grad_norm": 11.832037925720215, + "learning_rate": 2.7120792992162286e-05, + "log_odds_chosen": 10.429668426513672, + "log_odds_ratio": -0.0001599583774805069, + "logits/chosen": -0.6950970888137817, + "logits/rejected": -0.7039766311645508, + "logps/chosen": -0.0002300713094882667, + "logps/rejected": -1.8066484928131104, + "loss": 0.5992, + "nll_loss": 0.14977310597896576, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.300713094882667e-05, + "rewards/margins": 0.18064185976982117, + "rewards/rejected": -0.18066485226154327, + "step": 7401 + }, + { + "epoch": 5.118948824343015, + "grad_norm": 6.376369476318359, + "learning_rate": 2.7116950975872142e-05, + "log_odds_chosen": 10.112187385559082, + "log_odds_ratio": -0.0005594309768639505, + "logits/chosen": -0.2071499228477478, + "logits/rejected": -0.26662901043891907, + "logps/chosen": -0.0005525099113583565, + "logps/rejected": -2.110633373260498, + "loss": 0.8324, + "nll_loss": 0.208037868142128, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.525099186343141e-05, + "rewards/margins": 0.21100810170173645, + "rewards/rejected": -0.21106334030628204, + "step": 7402 + }, + { + "epoch": 5.119640387275242, + "grad_norm": 9.686321258544922, + "learning_rate": 2.711310895958199e-05, + "log_odds_chosen": 10.839088439941406, + "log_odds_ratio": -0.00028093927539885044, + "logits/chosen": 0.02540695294737816, + "logits/rejected": -0.06891262531280518, + "logps/chosen": -0.0037065306678414345, + "logps/rejected": -3.1824331283569336, + "loss": 0.8034, + "nll_loss": 0.20082277059555054, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003706530842464417, + "rewards/margins": 0.31787267327308655, + "rewards/rejected": -0.3182433247566223, + "step": 7403 + }, + { + "epoch": 5.1203319502074685, + "grad_norm": 8.942167282104492, + "learning_rate": 2.710926694329184e-05, + "log_odds_chosen": 10.840582847595215, + "log_odds_ratio": -8.063986024353653e-05, + "logits/chosen": -0.23275119066238403, + "logits/rejected": -0.25833365321159363, + "logps/chosen": -0.012653451412916183, + "logps/rejected": -2.892709970474243, + "loss": 1.3948, + "nll_loss": 0.34869325160980225, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012653451412916183, + "rewards/margins": 0.28800565004348755, + "rewards/rejected": -0.2892709970474243, + "step": 7404 + }, + { + "epoch": 5.121023513139695, + "grad_norm": 8.973079681396484, + "learning_rate": 2.7105424927001692e-05, + "log_odds_chosen": 11.203123092651367, + "log_odds_ratio": -2.6533847631071694e-05, + "logits/chosen": -0.4045684337615967, + "logits/rejected": -0.4558801054954529, + "logps/chosen": -8.019142842385918e-05, + "logps/rejected": -1.9351506233215332, + "loss": 0.5621, + "nll_loss": 0.1405208706855774, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.019143024284858e-06, + "rewards/margins": 0.19350704550743103, + "rewards/rejected": -0.19351506233215332, + "step": 7405 + }, + { + "epoch": 5.121715076071922, + "grad_norm": 12.250910758972168, + "learning_rate": 2.710158291071154e-05, + "log_odds_chosen": 9.914047241210938, + "log_odds_ratio": -0.00020869742729701102, + "logits/chosen": -0.42722779512405396, + "logits/rejected": -0.46873965859413147, + "logps/chosen": -0.0006569348042830825, + "logps/rejected": -1.443729043006897, + "loss": 0.8885, + "nll_loss": 0.22209203243255615, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.569348624907434e-05, + "rewards/margins": 0.1443072259426117, + "rewards/rejected": -0.14437291026115417, + "step": 7406 + }, + { + "epoch": 5.122406639004149, + "grad_norm": 9.799444198608398, + "learning_rate": 2.709774089442139e-05, + "log_odds_chosen": 9.920234680175781, + "log_odds_ratio": -0.0002802509116008878, + "logits/chosen": -0.10245361179113388, + "logits/rejected": -0.1588355451822281, + "logps/chosen": -0.0006374450167641044, + "logps/rejected": -2.294321298599243, + "loss": 0.7491, + "nll_loss": 0.18724007904529572, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.374450458679348e-05, + "rewards/margins": 0.22936837375164032, + "rewards/rejected": -0.2294321358203888, + "step": 7407 + }, + { + "epoch": 5.123098201936376, + "grad_norm": 7.869354248046875, + "learning_rate": 2.7093898878131246e-05, + "log_odds_chosen": 10.49201774597168, + "log_odds_ratio": -3.221644146833569e-05, + "logits/chosen": -0.8095996975898743, + "logits/rejected": -0.8921639919281006, + "logps/chosen": -0.00012403872096911073, + "logps/rejected": -1.7007167339324951, + "loss": 0.8265, + "nll_loss": 0.20662826299667358, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2403872460708953e-05, + "rewards/margins": 0.17005929350852966, + "rewards/rejected": -0.17007167637348175, + "step": 7408 + }, + { + "epoch": 5.123789764868603, + "grad_norm": 13.822428703308105, + "learning_rate": 2.7090056861841095e-05, + "log_odds_chosen": 10.582257270812988, + "log_odds_ratio": -8.250211976701394e-05, + "logits/chosen": -0.2686958611011505, + "logits/rejected": -0.37263888120651245, + "logps/chosen": -0.00015262029774021357, + "logps/rejected": -1.6685211658477783, + "loss": 0.8688, + "nll_loss": 0.21719194948673248, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.526203050161712e-05, + "rewards/margins": 0.16683685779571533, + "rewards/rejected": -0.16685211658477783, + "step": 7409 + }, + { + "epoch": 5.124481327800829, + "grad_norm": 6.8588714599609375, + "learning_rate": 2.7086214845550944e-05, + "log_odds_chosen": 9.660439491271973, + "log_odds_ratio": -0.00010367112554376945, + "logits/chosen": -0.35745152831077576, + "logits/rejected": -0.3240693211555481, + "logps/chosen": -0.0005797590129077435, + "logps/rejected": -1.9160172939300537, + "loss": 0.8896, + "nll_loss": 0.22238633036613464, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7975896197604015e-05, + "rewards/margins": 0.19154374301433563, + "rewards/rejected": -0.1916017234325409, + "step": 7410 + }, + { + "epoch": 5.125172890733056, + "grad_norm": 6.4284138679504395, + "learning_rate": 2.70823728292608e-05, + "log_odds_chosen": 9.837509155273438, + "log_odds_ratio": -0.00026661824085749686, + "logits/chosen": -0.2379743456840515, + "logits/rejected": -0.26656097173690796, + "logps/chosen": -0.0002663254563231021, + "logps/rejected": -1.4803067445755005, + "loss": 0.812, + "nll_loss": 0.202966570854187, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6632545996108092e-05, + "rewards/margins": 0.14800405502319336, + "rewards/rejected": -0.14803066849708557, + "step": 7411 + }, + { + "epoch": 5.125864453665283, + "grad_norm": 4.05260705947876, + "learning_rate": 2.707853081297065e-05, + "log_odds_chosen": 9.609614372253418, + "log_odds_ratio": -0.00021065973851364106, + "logits/chosen": -0.36739036440849304, + "logits/rejected": -0.41593652963638306, + "logps/chosen": -0.0002198894217144698, + "logps/rejected": -1.3649433851242065, + "loss": 0.6855, + "nll_loss": 0.1713617742061615, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1988940716255456e-05, + "rewards/margins": 0.13647235929965973, + "rewards/rejected": -0.13649435341358185, + "step": 7412 + }, + { + "epoch": 5.12655601659751, + "grad_norm": 14.065872192382812, + "learning_rate": 2.70746887966805e-05, + "log_odds_chosen": 10.061935424804688, + "log_odds_ratio": -0.00015091894601937383, + "logits/chosen": -0.4934547543525696, + "logits/rejected": -0.4658409357070923, + "logps/chosen": -0.00045286474050953984, + "logps/rejected": -1.942396879196167, + "loss": 0.5004, + "nll_loss": 0.12507638335227966, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.528647332335822e-05, + "rewards/margins": 0.19419440627098083, + "rewards/rejected": -0.19423969089984894, + "step": 7413 + }, + { + "epoch": 5.127247579529737, + "grad_norm": 9.801753044128418, + "learning_rate": 2.707084678039035e-05, + "log_odds_chosen": 9.888916015625, + "log_odds_ratio": -0.0004568792355712503, + "logits/chosen": -0.6383622884750366, + "logits/rejected": -0.6947815418243408, + "logps/chosen": -0.00046679971273988485, + "logps/rejected": -1.9457203149795532, + "loss": 0.5612, + "nll_loss": 0.14025507867336273, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6679975639563054e-05, + "rewards/margins": 0.194525346159935, + "rewards/rejected": -0.19457201659679413, + "step": 7414 + }, + { + "epoch": 5.127939142461964, + "grad_norm": 6.193912982940674, + "learning_rate": 2.70670047641002e-05, + "log_odds_chosen": 9.266860961914062, + "log_odds_ratio": -0.0017042263643816113, + "logits/chosen": -0.3525208830833435, + "logits/rejected": -0.4817659258842468, + "logps/chosen": -0.001416980056092143, + "logps/rejected": -1.485273003578186, + "loss": 0.7362, + "nll_loss": 0.18388216197490692, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014169800851959735, + "rewards/margins": 0.14838561415672302, + "rewards/rejected": -0.14852730929851532, + "step": 7415 + }, + { + "epoch": 5.12863070539419, + "grad_norm": 12.201619148254395, + "learning_rate": 2.706316274781005e-05, + "log_odds_chosen": 10.946922302246094, + "log_odds_ratio": -3.287233994342387e-05, + "logits/chosen": -0.041007209569215775, + "logits/rejected": -0.1741712987422943, + "logps/chosen": -0.00024743779795244336, + "logps/rejected": -2.4667646884918213, + "loss": 0.7509, + "nll_loss": 0.18771187961101532, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4743778340052813e-05, + "rewards/margins": 0.24665173888206482, + "rewards/rejected": -0.2466764748096466, + "step": 7416 + }, + { + "epoch": 5.129322268326418, + "grad_norm": 7.043701171875, + "learning_rate": 2.7059320731519905e-05, + "log_odds_chosen": 10.921808242797852, + "log_odds_ratio": -3.566598388715647e-05, + "logits/chosen": -0.6520669460296631, + "logits/rejected": -0.744756817817688, + "logps/chosen": -0.00022421804897021502, + "logps/rejected": -1.977895975112915, + "loss": 0.6564, + "nll_loss": 0.1640891134738922, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.242180380562786e-05, + "rewards/margins": 0.19776716828346252, + "rewards/rejected": -0.19778959453105927, + "step": 7417 + }, + { + "epoch": 5.130013831258645, + "grad_norm": 8.132904052734375, + "learning_rate": 2.7055478715229754e-05, + "log_odds_chosen": 9.147832870483398, + "log_odds_ratio": -0.0003267722495365888, + "logits/chosen": -0.30655428767204285, + "logits/rejected": -0.38167956471443176, + "logps/chosen": -0.0005435227649286389, + "logps/rejected": -1.5238276720046997, + "loss": 0.6515, + "nll_loss": 0.16284248232841492, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4352272854885086e-05, + "rewards/margins": 0.15232841670513153, + "rewards/rejected": -0.1523827612400055, + "step": 7418 + }, + { + "epoch": 5.130705394190872, + "grad_norm": 12.80761432647705, + "learning_rate": 2.7051636698939603e-05, + "log_odds_chosen": 10.520578384399414, + "log_odds_ratio": -0.0029854183085262775, + "logits/chosen": -0.4390121400356293, + "logits/rejected": -0.5808690786361694, + "logps/chosen": -0.0020407303236424923, + "logps/rejected": -3.0300936698913574, + "loss": 0.8269, + "nll_loss": 0.20642107725143433, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00020407306146807969, + "rewards/margins": 0.3028053045272827, + "rewards/rejected": -0.30300936102867126, + "step": 7419 + }, + { + "epoch": 5.131396957123099, + "grad_norm": 10.896114349365234, + "learning_rate": 2.704779468264946e-05, + "log_odds_chosen": 9.920369148254395, + "log_odds_ratio": -0.0003378927940502763, + "logits/chosen": -0.8067368865013123, + "logits/rejected": -0.8925114870071411, + "logps/chosen": -0.0007252587238326669, + "logps/rejected": -1.8626281023025513, + "loss": 1.059, + "nll_loss": 0.2647111713886261, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.252587238326669e-05, + "rewards/margins": 0.18619027733802795, + "rewards/rejected": -0.1862628161907196, + "step": 7420 + }, + { + "epoch": 5.1320885200553255, + "grad_norm": 9.972339630126953, + "learning_rate": 2.7043952666359308e-05, + "log_odds_chosen": 9.74928092956543, + "log_odds_ratio": -0.0004444057121872902, + "logits/chosen": -0.3922783136367798, + "logits/rejected": -0.4056027829647064, + "logps/chosen": -0.0012801011325791478, + "logps/rejected": -2.130465507507324, + "loss": 0.9444, + "nll_loss": 0.23606690764427185, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012801012780983, + "rewards/margins": 0.21291851997375488, + "rewards/rejected": -0.21304653584957123, + "step": 7421 + }, + { + "epoch": 5.132780082987552, + "grad_norm": 12.015710830688477, + "learning_rate": 2.7040110650069157e-05, + "log_odds_chosen": 10.567075729370117, + "log_odds_ratio": -8.119967969832942e-05, + "logits/chosen": -0.2184918373823166, + "logits/rejected": -0.3011958599090576, + "logps/chosen": -0.008143655024468899, + "logps/rejected": -2.781130790710449, + "loss": 0.8808, + "nll_loss": 0.2202032208442688, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000814365572296083, + "rewards/margins": 0.2772987484931946, + "rewards/rejected": -0.27811309695243835, + "step": 7422 + }, + { + "epoch": 5.133471645919779, + "grad_norm": 5.104509353637695, + "learning_rate": 2.703626863377901e-05, + "log_odds_chosen": 9.874157905578613, + "log_odds_ratio": -0.00014664784248452634, + "logits/chosen": -0.47639042139053345, + "logits/rejected": -0.47348520159721375, + "logps/chosen": -0.0013921656645834446, + "logps/rejected": -2.016475200653076, + "loss": 0.592, + "nll_loss": 0.14799407124519348, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013921657227911055, + "rewards/margins": 0.2015082985162735, + "rewards/rejected": -0.20164752006530762, + "step": 7423 + }, + { + "epoch": 5.134163208852006, + "grad_norm": 7.860300540924072, + "learning_rate": 2.703242661748886e-05, + "log_odds_chosen": 9.348637580871582, + "log_odds_ratio": -0.001988054485991597, + "logits/chosen": -0.6254793405532837, + "logits/rejected": -0.6899417042732239, + "logps/chosen": -0.00868395809084177, + "logps/rejected": -1.6159273386001587, + "loss": 0.8476, + "nll_loss": 0.21169663965702057, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008683958440087736, + "rewards/margins": 0.16072434186935425, + "rewards/rejected": -0.16159272193908691, + "step": 7424 + }, + { + "epoch": 5.134854771784233, + "grad_norm": 10.588888168334961, + "learning_rate": 2.7028584601198707e-05, + "log_odds_chosen": 10.115678787231445, + "log_odds_ratio": -0.0012466337066143751, + "logits/chosen": -0.4161772131919861, + "logits/rejected": -0.4731995463371277, + "logps/chosen": -0.0017289479728788137, + "logps/rejected": -1.8897874355316162, + "loss": 0.7414, + "nll_loss": 0.18522275984287262, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017289479728788137, + "rewards/margins": 0.18880584836006165, + "rewards/rejected": -0.18897874653339386, + "step": 7425 + }, + { + "epoch": 5.13554633471646, + "grad_norm": 4.885624408721924, + "learning_rate": 2.7024742584908563e-05, + "log_odds_chosen": 10.32637882232666, + "log_odds_ratio": -0.00019028893439099193, + "logits/chosen": -0.4755975604057312, + "logits/rejected": -0.5734898447990417, + "logps/chosen": -0.00046299712266772985, + "logps/rejected": -2.3506150245666504, + "loss": 2.0045, + "nll_loss": 0.5011166930198669, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6299712266772985e-05, + "rewards/margins": 0.23501522839069366, + "rewards/rejected": -0.23506152629852295, + "step": 7426 + }, + { + "epoch": 5.136237897648686, + "grad_norm": 11.235721588134766, + "learning_rate": 2.7020900568618412e-05, + "log_odds_chosen": 11.286067962646484, + "log_odds_ratio": -1.9410845197853632e-05, + "logits/chosen": -0.1929822564125061, + "logits/rejected": -0.2506953179836273, + "logps/chosen": -0.00020312087144702673, + "logps/rejected": -2.4689719676971436, + "loss": 0.8238, + "nll_loss": 0.20594075322151184, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0312087144702673e-05, + "rewards/margins": 0.24687688052654266, + "rewards/rejected": -0.24689719080924988, + "step": 7427 + }, + { + "epoch": 5.136929460580913, + "grad_norm": 9.159810066223145, + "learning_rate": 2.701705855232826e-05, + "log_odds_chosen": 9.450736999511719, + "log_odds_ratio": -0.002133977599442005, + "logits/chosen": -0.48191481828689575, + "logits/rejected": -0.49468135833740234, + "logps/chosen": -0.01258176565170288, + "logps/rejected": -1.7106521129608154, + "loss": 0.899, + "nll_loss": 0.22454796731472015, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012581765186041594, + "rewards/margins": 0.1698070466518402, + "rewards/rejected": -0.17106521129608154, + "step": 7428 + }, + { + "epoch": 5.13762102351314, + "grad_norm": 11.671100616455078, + "learning_rate": 2.7013216536038117e-05, + "log_odds_chosen": 10.852087020874023, + "log_odds_ratio": -9.678960486780852e-05, + "logits/chosen": -0.39900654554367065, + "logits/rejected": -0.47518616914749146, + "logps/chosen": -0.00019505196542013437, + "logps/rejected": -2.3899760246276855, + "loss": 0.7959, + "nll_loss": 0.19896200299263, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.950519799720496e-05, + "rewards/margins": 0.23897811770439148, + "rewards/rejected": -0.23899762332439423, + "step": 7429 + }, + { + "epoch": 5.138312586445367, + "grad_norm": 6.089036464691162, + "learning_rate": 2.7009374519747966e-05, + "log_odds_chosen": 10.770848274230957, + "log_odds_ratio": -2.904493157984689e-05, + "logits/chosen": -0.09202180802822113, + "logits/rejected": -0.08273860067129135, + "logps/chosen": -0.00025757448747754097, + "logps/rejected": -2.359013319015503, + "loss": 0.8615, + "nll_loss": 0.21537932753562927, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5757448383956216e-05, + "rewards/margins": 0.23587557673454285, + "rewards/rejected": -0.23590132594108582, + "step": 7430 + }, + { + "epoch": 5.139004149377594, + "grad_norm": 12.126273155212402, + "learning_rate": 2.7005532503457815e-05, + "log_odds_chosen": 12.461263656616211, + "log_odds_ratio": -1.4111486052570399e-05, + "logits/chosen": -0.42405080795288086, + "logits/rejected": -0.38803666830062866, + "logps/chosen": -0.0001690139906713739, + "logps/rejected": -3.408078193664551, + "loss": 1.0464, + "nll_loss": 0.2615966796875, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.690139833954163e-05, + "rewards/margins": 0.34079092741012573, + "rewards/rejected": -0.34080779552459717, + "step": 7431 + }, + { + "epoch": 5.139695712309821, + "grad_norm": 6.1468305587768555, + "learning_rate": 2.7001690487167668e-05, + "log_odds_chosen": 9.392050743103027, + "log_odds_ratio": -0.0002480958355590701, + "logits/chosen": -0.47275733947753906, + "logits/rejected": -0.47395336627960205, + "logps/chosen": -0.0005006209248676896, + "logps/rejected": -1.3755102157592773, + "loss": 0.9054, + "nll_loss": 0.2263253629207611, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.006208812119439e-05, + "rewards/margins": 0.13750094175338745, + "rewards/rejected": -0.13755100965499878, + "step": 7432 + }, + { + "epoch": 5.140387275242047, + "grad_norm": 12.276866912841797, + "learning_rate": 2.6997848470877517e-05, + "log_odds_chosen": 10.621747970581055, + "log_odds_ratio": -0.020847471430897713, + "logits/chosen": -0.467585027217865, + "logits/rejected": -0.47685593366622925, + "logps/chosen": -0.0061169276013970375, + "logps/rejected": -2.9758238792419434, + "loss": 1.0375, + "nll_loss": 0.25729823112487793, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000611692841630429, + "rewards/margins": 0.2969706952571869, + "rewards/rejected": -0.29758238792419434, + "step": 7433 + }, + { + "epoch": 5.141078838174274, + "grad_norm": 5.268993854522705, + "learning_rate": 2.6994006454587366e-05, + "log_odds_chosen": 9.679532051086426, + "log_odds_ratio": -0.0001878141483757645, + "logits/chosen": -0.28364884853363037, + "logits/rejected": -0.30598005652427673, + "logps/chosen": -0.0004549617297016084, + "logps/rejected": -2.0728020668029785, + "loss": 0.581, + "nll_loss": 0.14523589611053467, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5496170059777796e-05, + "rewards/margins": 0.2072347104549408, + "rewards/rejected": -0.2072802186012268, + "step": 7434 + }, + { + "epoch": 5.141770401106501, + "grad_norm": 5.956173896789551, + "learning_rate": 2.699016443829722e-05, + "log_odds_chosen": 9.59054183959961, + "log_odds_ratio": -0.00022657515364699066, + "logits/chosen": -0.17377327382564545, + "logits/rejected": -0.20633672177791595, + "logps/chosen": -0.0018869942286983132, + "logps/rejected": -1.6651039123535156, + "loss": 0.4865, + "nll_loss": 0.12160070985555649, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018869942869059741, + "rewards/margins": 0.16632169485092163, + "rewards/rejected": -0.16651038825511932, + "step": 7435 + }, + { + "epoch": 5.142461964038728, + "grad_norm": 6.142175197601318, + "learning_rate": 2.698632242200707e-05, + "log_odds_chosen": 10.699289321899414, + "log_odds_ratio": -3.694228871609084e-05, + "logits/chosen": -0.7496898174285889, + "logits/rejected": -0.757624089717865, + "logps/chosen": -0.00015292735770344734, + "logps/rejected": -1.9251577854156494, + "loss": 0.6407, + "nll_loss": 0.16017428040504456, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5292735042748973e-05, + "rewards/margins": 0.192500501871109, + "rewards/rejected": -0.1925157755613327, + "step": 7436 + }, + { + "epoch": 5.143153526970955, + "grad_norm": 5.331760406494141, + "learning_rate": 2.698248040571692e-05, + "log_odds_chosen": 9.898192405700684, + "log_odds_ratio": -0.002496067201718688, + "logits/chosen": -0.3681006133556366, + "logits/rejected": -0.4699682891368866, + "logps/chosen": -0.0007896803435869515, + "logps/rejected": -1.9627407789230347, + "loss": 0.7278, + "nll_loss": 0.18171098828315735, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.896803435869515e-05, + "rewards/margins": 0.19619512557983398, + "rewards/rejected": -0.19627408683300018, + "step": 7437 + }, + { + "epoch": 5.143845089903182, + "grad_norm": 6.796148777008057, + "learning_rate": 2.6978638389426776e-05, + "log_odds_chosen": 10.226568222045898, + "log_odds_ratio": -0.00014895365166012198, + "logits/chosen": -0.4499202370643616, + "logits/rejected": -0.516467273235321, + "logps/chosen": -0.00055777991656214, + "logps/rejected": -2.1390910148620605, + "loss": 0.8184, + "nll_loss": 0.20458026230335236, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.577799311140552e-05, + "rewards/margins": 0.21385329961776733, + "rewards/rejected": -0.2139090895652771, + "step": 7438 + }, + { + "epoch": 5.144536652835408, + "grad_norm": 16.08154296875, + "learning_rate": 2.6974796373136625e-05, + "log_odds_chosen": 7.4634013175964355, + "log_odds_ratio": -0.14611497521400452, + "logits/chosen": -0.4200138449668884, + "logits/rejected": -0.5181282758712769, + "logps/chosen": -0.0243761595338583, + "logps/rejected": -1.3469064235687256, + "loss": 0.709, + "nll_loss": 0.16263723373413086, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0024376162327826023, + "rewards/margins": 0.132253035902977, + "rewards/rejected": -0.13469064235687256, + "step": 7439 + }, + { + "epoch": 5.145228215767635, + "grad_norm": 8.133143424987793, + "learning_rate": 2.6970954356846474e-05, + "log_odds_chosen": 9.185934066772461, + "log_odds_ratio": -0.0035839800257235765, + "logits/chosen": -0.6846455931663513, + "logits/rejected": -0.700886607170105, + "logps/chosen": -0.0032714849803596735, + "logps/rejected": -2.566293716430664, + "loss": 0.928, + "nll_loss": 0.23163369297981262, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00032714850385673344, + "rewards/margins": 0.25630223751068115, + "rewards/rejected": -0.2566293776035309, + "step": 7440 + }, + { + "epoch": 5.145919778699862, + "grad_norm": 9.776525497436523, + "learning_rate": 2.6967112340556326e-05, + "log_odds_chosen": 10.732635498046875, + "log_odds_ratio": -5.480859545059502e-05, + "logits/chosen": -0.36197394132614136, + "logits/rejected": -0.3192319869995117, + "logps/chosen": -0.0002830323646776378, + "logps/rejected": -2.5188982486724854, + "loss": 0.8279, + "nll_loss": 0.2069777250289917, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8303238650551066e-05, + "rewards/margins": 0.2518615126609802, + "rewards/rejected": -0.25188982486724854, + "step": 7441 + }, + { + "epoch": 5.146611341632089, + "grad_norm": 7.6851606369018555, + "learning_rate": 2.6963270324266175e-05, + "log_odds_chosen": 10.15788459777832, + "log_odds_ratio": -0.0003483621112536639, + "logits/chosen": -0.30846232175827026, + "logits/rejected": -0.4022744297981262, + "logps/chosen": -0.00043301653931848705, + "logps/rejected": -1.8087468147277832, + "loss": 0.6652, + "nll_loss": 0.16627439856529236, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.330165756982751e-05, + "rewards/margins": 0.1808314025402069, + "rewards/rejected": -0.18087470531463623, + "step": 7442 + }, + { + "epoch": 5.147302904564316, + "grad_norm": 9.387350082397461, + "learning_rate": 2.6959428307976024e-05, + "log_odds_chosen": 9.48453140258789, + "log_odds_ratio": -0.00023655460972804576, + "logits/chosen": -0.8036692142486572, + "logits/rejected": -0.7573479413986206, + "logps/chosen": -0.0005227526417002082, + "logps/rejected": -1.2725003957748413, + "loss": 0.6543, + "nll_loss": 0.16355383396148682, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.227526344242506e-05, + "rewards/margins": 0.1271977722644806, + "rewards/rejected": -0.1272500455379486, + "step": 7443 + }, + { + "epoch": 5.1479944674965425, + "grad_norm": 7.64993143081665, + "learning_rate": 2.695558629168588e-05, + "log_odds_chosen": 10.165771484375, + "log_odds_ratio": -8.179282303899527e-05, + "logits/chosen": -0.6584855914115906, + "logits/rejected": -0.578942060470581, + "logps/chosen": -0.0001937196939252317, + "logps/rejected": -1.4448562860488892, + "loss": 0.798, + "nll_loss": 0.1994938999414444, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.937196975632105e-05, + "rewards/margins": 0.14446625113487244, + "rewards/rejected": -0.14448562264442444, + "step": 7444 + }, + { + "epoch": 5.148686030428769, + "grad_norm": 7.556155681610107, + "learning_rate": 2.695174427539573e-05, + "log_odds_chosen": 9.396717071533203, + "log_odds_ratio": -0.0032166705932468176, + "logits/chosen": -0.920258641242981, + "logits/rejected": -0.89804607629776, + "logps/chosen": -0.0029146973975002766, + "logps/rejected": -1.8120362758636475, + "loss": 0.8244, + "nll_loss": 0.20578640699386597, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00029146973975002766, + "rewards/margins": 0.18091216683387756, + "rewards/rejected": -0.18120363354682922, + "step": 7445 + }, + { + "epoch": 5.149377593360996, + "grad_norm": 9.94237995147705, + "learning_rate": 2.6947902259105578e-05, + "log_odds_chosen": 10.113234519958496, + "log_odds_ratio": -0.00012419956328812987, + "logits/chosen": -0.2607835531234741, + "logits/rejected": -0.2949237823486328, + "logps/chosen": -0.0004108600551262498, + "logps/rejected": -2.0484108924865723, + "loss": 0.6527, + "nll_loss": 0.16316281259059906, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1086008423008025e-05, + "rewards/margins": 0.20479997992515564, + "rewards/rejected": -0.20484107732772827, + "step": 7446 + }, + { + "epoch": 5.150069156293223, + "grad_norm": 8.69245719909668, + "learning_rate": 2.6944060242815434e-05, + "log_odds_chosen": 10.116579055786133, + "log_odds_ratio": -0.00012764372513629496, + "logits/chosen": -0.7028722763061523, + "logits/rejected": -0.7171040773391724, + "logps/chosen": -0.0006051872624084353, + "logps/rejected": -2.0514676570892334, + "loss": 0.4936, + "nll_loss": 0.12338022142648697, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.051873424439691e-05, + "rewards/margins": 0.20508623123168945, + "rewards/rejected": -0.20514675974845886, + "step": 7447 + }, + { + "epoch": 5.15076071922545, + "grad_norm": 7.142855167388916, + "learning_rate": 2.6940218226525283e-05, + "log_odds_chosen": 10.812362670898438, + "log_odds_ratio": -5.71875243622344e-05, + "logits/chosen": -0.4539200961589813, + "logits/rejected": -0.47321540117263794, + "logps/chosen": -0.00028848316287621856, + "logps/rejected": -2.2997775077819824, + "loss": 0.9367, + "nll_loss": 0.23417231440544128, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8848317015217617e-05, + "rewards/margins": 0.22994890809059143, + "rewards/rejected": -0.2299777716398239, + "step": 7448 + }, + { + "epoch": 5.151452282157677, + "grad_norm": 8.273560523986816, + "learning_rate": 2.6936376210235132e-05, + "log_odds_chosen": 9.92202377319336, + "log_odds_ratio": -0.00017694597772788256, + "logits/chosen": -0.35956692695617676, + "logits/rejected": -0.41068869829177856, + "logps/chosen": -0.0004035543533973396, + "logps/rejected": -1.659806251525879, + "loss": 0.4615, + "nll_loss": 0.11536431312561035, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.035543679492548e-05, + "rewards/margins": 0.1659402698278427, + "rewards/rejected": -0.16598062217235565, + "step": 7449 + }, + { + "epoch": 5.1521438450899035, + "grad_norm": 6.735435962677002, + "learning_rate": 2.6932534193944985e-05, + "log_odds_chosen": 9.072761535644531, + "log_odds_ratio": -0.0005184119800105691, + "logits/chosen": -0.17102433741092682, + "logits/rejected": -0.24381959438323975, + "logps/chosen": -0.0023045637644827366, + "logps/rejected": -1.9304804801940918, + "loss": 0.8306, + "nll_loss": 0.20758703351020813, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00023045638226903975, + "rewards/margins": 0.1928175985813141, + "rewards/rejected": -0.19304805994033813, + "step": 7450 + }, + { + "epoch": 5.15283540802213, + "grad_norm": 7.467382431030273, + "learning_rate": 2.6928692177654834e-05, + "log_odds_chosen": 10.252504348754883, + "log_odds_ratio": -0.00010818125156220049, + "logits/chosen": -0.3678957521915436, + "logits/rejected": -0.4237433671951294, + "logps/chosen": -0.0002906073350459337, + "logps/rejected": -2.056698799133301, + "loss": 0.5276, + "nll_loss": 0.13188603520393372, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.90607367787743e-05, + "rewards/margins": 0.20564082264900208, + "rewards/rejected": -0.20566987991333008, + "step": 7451 + }, + { + "epoch": 5.153526970954357, + "grad_norm": 8.431239128112793, + "learning_rate": 2.6924850161364683e-05, + "log_odds_chosen": 10.633581161499023, + "log_odds_ratio": -8.072963100858033e-05, + "logits/chosen": -0.427499383687973, + "logits/rejected": -0.4580521285533905, + "logps/chosen": -0.00054318638285622, + "logps/rejected": -2.396068572998047, + "loss": 0.9553, + "nll_loss": 0.23881791532039642, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.431864701677114e-05, + "rewards/margins": 0.23955252766609192, + "rewards/rejected": -0.2396068572998047, + "step": 7452 + }, + { + "epoch": 5.154218533886584, + "grad_norm": 6.476319789886475, + "learning_rate": 2.692100814507454e-05, + "log_odds_chosen": 10.82221508026123, + "log_odds_ratio": -5.586762563325465e-05, + "logits/chosen": -0.3435792326927185, + "logits/rejected": -0.32957378029823303, + "logps/chosen": -0.0001679111155681312, + "logps/rejected": -1.8454334735870361, + "loss": 0.5417, + "nll_loss": 0.13541817665100098, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6791113012004644e-05, + "rewards/margins": 0.18452654778957367, + "rewards/rejected": -0.18454334139823914, + "step": 7453 + }, + { + "epoch": 5.154910096818811, + "grad_norm": 6.970516681671143, + "learning_rate": 2.6917166128784388e-05, + "log_odds_chosen": 10.277645111083984, + "log_odds_ratio": -6.148001557448879e-05, + "logits/chosen": -0.5003536343574524, + "logits/rejected": -0.5715048909187317, + "logps/chosen": -0.00022070945124141872, + "logps/rejected": -2.006925106048584, + "loss": 0.9131, + "nll_loss": 0.22826507687568665, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2070948034524918e-05, + "rewards/margins": 0.20067042112350464, + "rewards/rejected": -0.20069250464439392, + "step": 7454 + }, + { + "epoch": 5.155601659751038, + "grad_norm": 11.107111930847168, + "learning_rate": 2.6913324112494237e-05, + "log_odds_chosen": 9.70268440246582, + "log_odds_ratio": -0.0004626112640835345, + "logits/chosen": -0.372115820646286, + "logits/rejected": -0.3687889575958252, + "logps/chosen": -0.001481476123444736, + "logps/rejected": -1.5637428760528564, + "loss": 0.7513, + "nll_loss": 0.18778753280639648, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001481476065237075, + "rewards/margins": 0.15622614324092865, + "rewards/rejected": -0.15637429058551788, + "step": 7455 + }, + { + "epoch": 5.1562932226832645, + "grad_norm": 10.2018404006958, + "learning_rate": 2.6909482096204092e-05, + "log_odds_chosen": 11.103732109069824, + "log_odds_ratio": -2.6793713914230466e-05, + "logits/chosen": -0.4589177966117859, + "logits/rejected": -0.5208662152290344, + "logps/chosen": -0.0001817662123357877, + "logps/rejected": -2.4946489334106445, + "loss": 0.8262, + "nll_loss": 0.2065504789352417, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.817662086978089e-05, + "rewards/margins": 0.24944674968719482, + "rewards/rejected": -0.24946492910385132, + "step": 7456 + }, + { + "epoch": 5.156984785615491, + "grad_norm": 11.086647033691406, + "learning_rate": 2.690564007991394e-05, + "log_odds_chosen": 10.889501571655273, + "log_odds_ratio": -3.276738425483927e-05, + "logits/chosen": -0.3176249861717224, + "logits/rejected": -0.41258126497268677, + "logps/chosen": -0.00030722259543836117, + "logps/rejected": -2.074706554412842, + "loss": 0.5755, + "nll_loss": 0.1438601016998291, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.07222617266234e-05, + "rewards/margins": 0.20743992924690247, + "rewards/rejected": -0.20747067034244537, + "step": 7457 + }, + { + "epoch": 5.157676348547718, + "grad_norm": 7.860065460205078, + "learning_rate": 2.690179806362379e-05, + "log_odds_chosen": 10.567296028137207, + "log_odds_ratio": -0.00013941126235295087, + "logits/chosen": -0.6222514510154724, + "logits/rejected": -0.7001794576644897, + "logps/chosen": -0.0014393426245078444, + "logps/rejected": -2.4629440307617188, + "loss": 0.7707, + "nll_loss": 0.19265933334827423, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014393427409231663, + "rewards/margins": 0.24615047872066498, + "rewards/rejected": -0.24629440903663635, + "step": 7458 + }, + { + "epoch": 5.158367911479945, + "grad_norm": 9.500434875488281, + "learning_rate": 2.6897956047333643e-05, + "log_odds_chosen": 11.329741477966309, + "log_odds_ratio": -4.6019948058528826e-05, + "logits/chosen": -0.7677967548370361, + "logits/rejected": -0.7199459671974182, + "logps/chosen": -0.00035254136309958994, + "logps/rejected": -2.6514434814453125, + "loss": 0.6871, + "nll_loss": 0.17177699506282806, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5254131944384426e-05, + "rewards/margins": 0.2651090919971466, + "rewards/rejected": -0.26514434814453125, + "step": 7459 + }, + { + "epoch": 5.159059474412172, + "grad_norm": 5.759252548217773, + "learning_rate": 2.6894114031043492e-05, + "log_odds_chosen": 8.701586723327637, + "log_odds_ratio": -0.000857122300658375, + "logits/chosen": -0.5436146259307861, + "logits/rejected": -0.5152435302734375, + "logps/chosen": -0.001559797441586852, + "logps/rejected": -2.025024175643921, + "loss": 1.0732, + "nll_loss": 0.268216073513031, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015597973833791912, + "rewards/margins": 0.20234644412994385, + "rewards/rejected": -0.20250242948532104, + "step": 7460 + }, + { + "epoch": 5.159751037344399, + "grad_norm": 8.084737777709961, + "learning_rate": 2.689027201475334e-05, + "log_odds_chosen": 10.72265625, + "log_odds_ratio": -3.390820711501874e-05, + "logits/chosen": -0.4589902460575104, + "logits/rejected": -0.464423805475235, + "logps/chosen": -0.00032347970409318805, + "logps/rejected": -2.2925033569335938, + "loss": 0.7002, + "nll_loss": 0.17503926157951355, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.234797259210609e-05, + "rewards/margins": 0.229217991232872, + "rewards/rejected": -0.22925034165382385, + "step": 7461 + }, + { + "epoch": 5.1604426002766255, + "grad_norm": 8.944840431213379, + "learning_rate": 2.6886429998463197e-05, + "log_odds_chosen": 9.330341339111328, + "log_odds_ratio": -0.0014703095657750964, + "logits/chosen": -0.4748300313949585, + "logits/rejected": -0.5044779777526855, + "logps/chosen": -0.0012496764538809657, + "logps/rejected": -1.0973037481307983, + "loss": 0.7171, + "nll_loss": 0.17911916971206665, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012496765702962875, + "rewards/margins": 0.10960540920495987, + "rewards/rejected": -0.10973037779331207, + "step": 7462 + }, + { + "epoch": 5.161134163208852, + "grad_norm": 9.398916244506836, + "learning_rate": 2.6882587982173046e-05, + "log_odds_chosen": 10.321409225463867, + "log_odds_ratio": -0.00010626899893395603, + "logits/chosen": -0.6396389007568359, + "logits/rejected": -0.7592541575431824, + "logps/chosen": -0.0009459155262447894, + "logps/rejected": -1.6495240926742554, + "loss": 0.7927, + "nll_loss": 0.19815877079963684, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.459155262447894e-05, + "rewards/margins": 0.16485781967639923, + "rewards/rejected": -0.16495241224765778, + "step": 7463 + }, + { + "epoch": 5.161825726141079, + "grad_norm": 7.023201942443848, + "learning_rate": 2.6878745965882895e-05, + "log_odds_chosen": 10.7672119140625, + "log_odds_ratio": -6.182586366776377e-05, + "logits/chosen": -0.4947451651096344, + "logits/rejected": -0.5416525602340698, + "logps/chosen": -0.00021441548597067595, + "logps/rejected": -2.317390203475952, + "loss": 0.6882, + "nll_loss": 0.17205506563186646, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1441548597067595e-05, + "rewards/margins": 0.23171758651733398, + "rewards/rejected": -0.23173902928829193, + "step": 7464 + }, + { + "epoch": 5.162517289073306, + "grad_norm": 264.32476806640625, + "learning_rate": 2.687490394959275e-05, + "log_odds_chosen": 8.581357955932617, + "log_odds_ratio": -0.7932350635528564, + "logits/chosen": -0.44917917251586914, + "logits/rejected": -0.46375998854637146, + "logps/chosen": -0.05425819382071495, + "logps/rejected": -1.6769574880599976, + "loss": 0.99, + "nll_loss": 0.16817449033260345, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00542581919580698, + "rewards/margins": 0.16226992011070251, + "rewards/rejected": -0.16769574582576752, + "step": 7465 + }, + { + "epoch": 5.163208852005533, + "grad_norm": 14.085131645202637, + "learning_rate": 2.68710619333026e-05, + "log_odds_chosen": 11.090156555175781, + "log_odds_ratio": -5.3086037951288745e-05, + "logits/chosen": -0.36317867040634155, + "logits/rejected": -0.48448365926742554, + "logps/chosen": -0.00022466076188720763, + "logps/rejected": -2.407838821411133, + "loss": 0.978, + "nll_loss": 0.24448320269584656, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2466076188720763e-05, + "rewards/margins": 0.240761399269104, + "rewards/rejected": -0.24078388512134552, + "step": 7466 + }, + { + "epoch": 5.16390041493776, + "grad_norm": 6.401428699493408, + "learning_rate": 2.686721991701245e-05, + "log_odds_chosen": 10.487455368041992, + "log_odds_ratio": -7.464921509381384e-05, + "logits/chosen": -0.5477094054222107, + "logits/rejected": -0.5565400123596191, + "logps/chosen": -0.00031943750218488276, + "logps/rejected": -1.8109780550003052, + "loss": 0.8222, + "nll_loss": 0.2055395245552063, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.194374585291371e-05, + "rewards/margins": 0.1810658574104309, + "rewards/rejected": -0.18109779059886932, + "step": 7467 + }, + { + "epoch": 5.1645919778699865, + "grad_norm": 12.549816131591797, + "learning_rate": 2.68633779007223e-05, + "log_odds_chosen": 9.737430572509766, + "log_odds_ratio": -0.0002125693717971444, + "logits/chosen": -0.3117648661136627, + "logits/rejected": -0.41824281215667725, + "logps/chosen": -0.00019726053869817406, + "logps/rejected": -1.5196945667266846, + "loss": 1.9219, + "nll_loss": 0.4804571866989136, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.972605605260469e-05, + "rewards/margins": 0.15194973349571228, + "rewards/rejected": -0.15196946263313293, + "step": 7468 + }, + { + "epoch": 5.165283540802213, + "grad_norm": 5.628894329071045, + "learning_rate": 2.685953588443215e-05, + "log_odds_chosen": 10.469507217407227, + "log_odds_ratio": -0.00010986346023855731, + "logits/chosen": -0.37138351798057556, + "logits/rejected": -0.4485434889793396, + "logps/chosen": -0.004168296232819557, + "logps/rejected": -2.2689802646636963, + "loss": 0.7922, + "nll_loss": 0.1980508416891098, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004168296291027218, + "rewards/margins": 0.22648121416568756, + "rewards/rejected": -0.22689804434776306, + "step": 7469 + }, + { + "epoch": 5.16597510373444, + "grad_norm": 8.571297645568848, + "learning_rate": 2.6855693868142e-05, + "log_odds_chosen": 9.631418228149414, + "log_odds_ratio": -8.710901602171361e-05, + "logits/chosen": -0.3490288257598877, + "logits/rejected": -0.3959527611732483, + "logps/chosen": -0.00012631932622753084, + "logps/rejected": -0.9703894853591919, + "loss": 1.1691, + "nll_loss": 0.2922728955745697, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2631931895157322e-05, + "rewards/margins": 0.09702632576227188, + "rewards/rejected": -0.09703895449638367, + "step": 7470 + }, + { + "epoch": 5.166666666666667, + "grad_norm": 5.723106861114502, + "learning_rate": 2.6851851851851855e-05, + "log_odds_chosen": 10.555776596069336, + "log_odds_ratio": -5.606668128166348e-05, + "logits/chosen": -0.6238350868225098, + "logits/rejected": -0.6863256692886353, + "logps/chosen": -0.0005328550469130278, + "logps/rejected": -2.041658401489258, + "loss": 0.5854, + "nll_loss": 0.14635531604290009, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3285504691302776e-05, + "rewards/margins": 0.20411255955696106, + "rewards/rejected": -0.20416586101055145, + "step": 7471 + }, + { + "epoch": 5.167358229598894, + "grad_norm": 7.153631210327148, + "learning_rate": 2.6848009835561704e-05, + "log_odds_chosen": 9.928325653076172, + "log_odds_ratio": -0.0010404024505987763, + "logits/chosen": -0.36878323554992676, + "logits/rejected": -0.4676334261894226, + "logps/chosen": -0.001209319569170475, + "logps/rejected": -2.2380104064941406, + "loss": 0.6111, + "nll_loss": 0.15266205370426178, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012093195982743055, + "rewards/margins": 0.22368010878562927, + "rewards/rejected": -0.22380106151103973, + "step": 7472 + }, + { + "epoch": 5.168049792531121, + "grad_norm": 4.801445484161377, + "learning_rate": 2.6844167819271554e-05, + "log_odds_chosen": 9.519782066345215, + "log_odds_ratio": -0.0011550523340702057, + "logits/chosen": -0.4875209629535675, + "logits/rejected": -0.37438157200813293, + "logps/chosen": -0.002648913534358144, + "logps/rejected": -2.1521012783050537, + "loss": 0.5119, + "nll_loss": 0.12784883379936218, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00026489136507734656, + "rewards/margins": 0.2149452269077301, + "rewards/rejected": -0.21521010994911194, + "step": 7473 + }, + { + "epoch": 5.1687413554633475, + "grad_norm": 10.136579513549805, + "learning_rate": 2.684032580298141e-05, + "log_odds_chosen": 11.658432006835938, + "log_odds_ratio": -1.348641853837762e-05, + "logits/chosen": -0.07469991594552994, + "logits/rejected": -0.1592363864183426, + "logps/chosen": -0.00022109775454737246, + "logps/rejected": -3.093412160873413, + "loss": 1.05, + "nll_loss": 0.2624865174293518, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2109776182333007e-05, + "rewards/margins": 0.3093191385269165, + "rewards/rejected": -0.3093412518501282, + "step": 7474 + }, + { + "epoch": 5.169432918395574, + "grad_norm": 8.295631408691406, + "learning_rate": 2.683648378669126e-05, + "log_odds_chosen": 11.123706817626953, + "log_odds_ratio": -2.043310450972058e-05, + "logits/chosen": -0.3312394917011261, + "logits/rejected": -0.36622753739356995, + "logps/chosen": -9.362811397295445e-05, + "logps/rejected": -1.9202853441238403, + "loss": 0.8433, + "nll_loss": 0.21081441640853882, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.362811397295445e-06, + "rewards/margins": 0.19201916456222534, + "rewards/rejected": -0.19202853739261627, + "step": 7475 + }, + { + "epoch": 5.170124481327801, + "grad_norm": 7.556997299194336, + "learning_rate": 2.6832641770401107e-05, + "log_odds_chosen": 9.98617172241211, + "log_odds_ratio": -8.398250793106854e-05, + "logits/chosen": -0.28513598442077637, + "logits/rejected": -0.35176780819892883, + "logps/chosen": -0.0001761521416483447, + "logps/rejected": -1.447932481765747, + "loss": 0.6559, + "nll_loss": 0.16395564377307892, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.761521525622811e-05, + "rewards/margins": 0.1447756290435791, + "rewards/rejected": -0.14479325711727142, + "step": 7476 + }, + { + "epoch": 5.170816044260028, + "grad_norm": 6.122884273529053, + "learning_rate": 2.682879975411096e-05, + "log_odds_chosen": 10.983901977539062, + "log_odds_ratio": -4.2492694774409756e-05, + "logits/chosen": -0.2037186324596405, + "logits/rejected": -0.3470858931541443, + "logps/chosen": -0.0003173601580783725, + "logps/rejected": -2.748629570007324, + "loss": 0.5459, + "nll_loss": 0.13647319376468658, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.173601726302877e-05, + "rewards/margins": 0.27483123540878296, + "rewards/rejected": -0.27486297488212585, + "step": 7477 + }, + { + "epoch": 5.171507607192255, + "grad_norm": 9.355021476745605, + "learning_rate": 2.682495773782081e-05, + "log_odds_chosen": 10.416082382202148, + "log_odds_ratio": -0.00011976615496678278, + "logits/chosen": -0.3434603810310364, + "logits/rejected": -0.34404170513153076, + "logps/chosen": -0.00027853366918861866, + "logps/rejected": -1.650794506072998, + "loss": 0.7857, + "nll_loss": 0.19641204178333282, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7853368010255508e-05, + "rewards/margins": 0.16505160927772522, + "rewards/rejected": -0.16507945954799652, + "step": 7478 + }, + { + "epoch": 5.172199170124482, + "grad_norm": 10.500981330871582, + "learning_rate": 2.6821115721530658e-05, + "log_odds_chosen": 8.71304702758789, + "log_odds_ratio": -0.1506773829460144, + "logits/chosen": -0.4835907816886902, + "logits/rejected": -0.5638684034347534, + "logps/chosen": -0.15316888689994812, + "logps/rejected": -1.914087176322937, + "loss": 1.1405, + "nll_loss": 0.27004915475845337, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.015316887758672237, + "rewards/margins": 0.17609182000160217, + "rewards/rejected": -0.19140870869159698, + "step": 7479 + }, + { + "epoch": 5.172890733056708, + "grad_norm": 10.053938865661621, + "learning_rate": 2.6817273705240507e-05, + "log_odds_chosen": 10.712471008300781, + "log_odds_ratio": -0.0001415243314113468, + "logits/chosen": -0.1618555337190628, + "logits/rejected": -0.18134653568267822, + "logps/chosen": -0.0003034502442460507, + "logps/rejected": -1.9955106973648071, + "loss": 0.755, + "nll_loss": 0.18873383104801178, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0345025152200833e-05, + "rewards/margins": 0.19952073693275452, + "rewards/rejected": -0.1995510756969452, + "step": 7480 + }, + { + "epoch": 5.173582295988935, + "grad_norm": 13.536983489990234, + "learning_rate": 2.6813431688950363e-05, + "log_odds_chosen": 10.499656677246094, + "log_odds_ratio": -0.00011437821376603097, + "logits/chosen": -0.613703727722168, + "logits/rejected": -0.7598463296890259, + "logps/chosen": -0.0006375666707754135, + "logps/rejected": -2.5778279304504395, + "loss": 0.8122, + "nll_loss": 0.20303978025913239, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.37566699879244e-05, + "rewards/margins": 0.2577190399169922, + "rewards/rejected": -0.25778278708457947, + "step": 7481 + }, + { + "epoch": 5.174273858921162, + "grad_norm": 7.290798187255859, + "learning_rate": 2.6809589672660212e-05, + "log_odds_chosen": 10.250022888183594, + "log_odds_ratio": -0.00485006021335721, + "logits/chosen": -0.1034635454416275, + "logits/rejected": -0.08264364302158356, + "logps/chosen": -0.002070134272798896, + "logps/rejected": -2.0911502838134766, + "loss": 0.6515, + "nll_loss": 0.1624019742012024, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00020701343601103872, + "rewards/margins": 0.20890799164772034, + "rewards/rejected": -0.20911502838134766, + "step": 7482 + }, + { + "epoch": 5.174965421853389, + "grad_norm": 6.1445841789245605, + "learning_rate": 2.680574765637006e-05, + "log_odds_chosen": 11.127788543701172, + "log_odds_ratio": -1.904199962154962e-05, + "logits/chosen": -0.28616607189178467, + "logits/rejected": -0.33013203740119934, + "logps/chosen": -0.0003823993029072881, + "logps/rejected": -2.642538070678711, + "loss": 0.9725, + "nll_loss": 0.24311384558677673, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.823993029072881e-05, + "rewards/margins": 0.2642155587673187, + "rewards/rejected": -0.26425379514694214, + "step": 7483 + }, + { + "epoch": 5.175656984785616, + "grad_norm": 8.26850414276123, + "learning_rate": 2.6801905640079917e-05, + "log_odds_chosen": 11.149333953857422, + "log_odds_ratio": -8.133323717629537e-05, + "logits/chosen": -0.40619760751724243, + "logits/rejected": -0.4585905969142914, + "logps/chosen": -0.0030611082911491394, + "logps/rejected": -2.522730588912964, + "loss": 0.744, + "nll_loss": 0.18599961698055267, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003061108582187444, + "rewards/margins": 0.2519669532775879, + "rewards/rejected": -0.2522730529308319, + "step": 7484 + }, + { + "epoch": 5.176348547717843, + "grad_norm": 11.564437866210938, + "learning_rate": 2.6798063623789766e-05, + "log_odds_chosen": 9.528322219848633, + "log_odds_ratio": -0.0002743283985182643, + "logits/chosen": -0.33204129338264465, + "logits/rejected": -0.3911939263343811, + "logps/chosen": -0.0010037249885499477, + "logps/rejected": -1.6887990236282349, + "loss": 1.0054, + "nll_loss": 0.25131741166114807, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010037249739980325, + "rewards/margins": 0.16877952218055725, + "rewards/rejected": -0.168879896402359, + "step": 7485 + }, + { + "epoch": 5.177040110650069, + "grad_norm": 7.996581077575684, + "learning_rate": 2.6794221607499615e-05, + "log_odds_chosen": 10.887833595275879, + "log_odds_ratio": -3.939437010558322e-05, + "logits/chosen": -0.38903307914733887, + "logits/rejected": -0.4599970877170563, + "logps/chosen": -0.0002921771665569395, + "logps/rejected": -2.234811305999756, + "loss": 0.562, + "nll_loss": 0.14050793647766113, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.921771738328971e-05, + "rewards/margins": 0.2234519124031067, + "rewards/rejected": -0.22348114848136902, + "step": 7486 + }, + { + "epoch": 5.177731673582296, + "grad_norm": 5.6479692459106445, + "learning_rate": 2.6790379591209467e-05, + "log_odds_chosen": 10.950244903564453, + "log_odds_ratio": -3.814305455307476e-05, + "logits/chosen": -0.6038317680358887, + "logits/rejected": -0.6398409605026245, + "logps/chosen": -0.00017186827608384192, + "logps/rejected": -2.3177053928375244, + "loss": 0.6308, + "nll_loss": 0.1576993316411972, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7186828699777834e-05, + "rewards/margins": 0.23175334930419922, + "rewards/rejected": -0.23177054524421692, + "step": 7487 + }, + { + "epoch": 5.178423236514523, + "grad_norm": 7.2999982833862305, + "learning_rate": 2.678653757491932e-05, + "log_odds_chosen": 10.90628719329834, + "log_odds_ratio": -0.00015409404295496643, + "logits/chosen": -0.3412237763404846, + "logits/rejected": -0.44108980894088745, + "logps/chosen": -0.00016340138972736895, + "logps/rejected": -2.2473998069763184, + "loss": 0.9075, + "nll_loss": 0.2268695831298828, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6340140064130537e-05, + "rewards/margins": 0.22472365200519562, + "rewards/rejected": -0.22473998367786407, + "step": 7488 + }, + { + "epoch": 5.17911479944675, + "grad_norm": 9.799997329711914, + "learning_rate": 2.678269555862917e-05, + "log_odds_chosen": 10.669675827026367, + "log_odds_ratio": -9.51956317294389e-05, + "logits/chosen": -0.427339643239975, + "logits/rejected": -0.4833088517189026, + "logps/chosen": -0.00027621022309176624, + "logps/rejected": -2.317042350769043, + "loss": 1.0254, + "nll_loss": 0.2563331127166748, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7621019398793578e-05, + "rewards/margins": 0.2316766232252121, + "rewards/rejected": -0.23170426487922668, + "step": 7489 + }, + { + "epoch": 5.179806362378977, + "grad_norm": 3.7129058837890625, + "learning_rate": 2.677885354233902e-05, + "log_odds_chosen": 10.49702262878418, + "log_odds_ratio": -5.258437158772722e-05, + "logits/chosen": -0.7177293300628662, + "logits/rejected": -0.6209688186645508, + "logps/chosen": -0.0001718494895612821, + "logps/rejected": -1.7512952089309692, + "loss": 0.4853, + "nll_loss": 0.12132173776626587, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.718494968372397e-05, + "rewards/margins": 0.17511233687400818, + "rewards/rejected": -0.17512951791286469, + "step": 7490 + }, + { + "epoch": 5.180497925311204, + "grad_norm": 11.894420623779297, + "learning_rate": 2.677501152604887e-05, + "log_odds_chosen": 11.11728286743164, + "log_odds_ratio": -2.9727882065344602e-05, + "logits/chosen": -0.5175437927246094, + "logits/rejected": -0.5285531878471375, + "logps/chosen": -0.0001054564054356888, + "logps/rejected": -2.058851957321167, + "loss": 0.9157, + "nll_loss": 0.22891131043434143, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.05456410892657e-05, + "rewards/margins": 0.20587465167045593, + "rewards/rejected": -0.20588520169258118, + "step": 7491 + }, + { + "epoch": 5.18118948824343, + "grad_norm": 7.50294303894043, + "learning_rate": 2.677116950975872e-05, + "log_odds_chosen": 10.425357818603516, + "log_odds_ratio": -0.001152600278146565, + "logits/chosen": -0.45772436261177063, + "logits/rejected": -0.43121635913848877, + "logps/chosen": -0.0005207476206123829, + "logps/rejected": -1.9553520679473877, + "loss": 0.7615, + "nll_loss": 0.19025787711143494, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.207476351642981e-05, + "rewards/margins": 0.19548313319683075, + "rewards/rejected": -0.19553521275520325, + "step": 7492 + }, + { + "epoch": 5.181881051175657, + "grad_norm": 6.374143123626709, + "learning_rate": 2.6767327493468575e-05, + "log_odds_chosen": 8.970407485961914, + "log_odds_ratio": -0.00023207120830193162, + "logits/chosen": -0.2817067801952362, + "logits/rejected": -0.27195069193840027, + "logps/chosen": -0.00061708630528301, + "logps/rejected": -1.4450798034667969, + "loss": 0.8252, + "nll_loss": 0.2062731385231018, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.170863343868405e-05, + "rewards/margins": 0.14444628357887268, + "rewards/rejected": -0.1445080041885376, + "step": 7493 + }, + { + "epoch": 5.182572614107884, + "grad_norm": 5.238719463348389, + "learning_rate": 2.6763485477178424e-05, + "log_odds_chosen": 9.787389755249023, + "log_odds_ratio": -0.002000561449676752, + "logits/chosen": -0.5416412353515625, + "logits/rejected": -0.5525435209274292, + "logps/chosen": -0.009776615537703037, + "logps/rejected": -2.6586475372314453, + "loss": 0.6396, + "nll_loss": 0.15969637036323547, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000977661577053368, + "rewards/margins": 0.26488709449768066, + "rewards/rejected": -0.2658647894859314, + "step": 7494 + }, + { + "epoch": 5.183264177040111, + "grad_norm": 8.104238510131836, + "learning_rate": 2.6759643460888273e-05, + "log_odds_chosen": 9.921451568603516, + "log_odds_ratio": -0.00029444476240314543, + "logits/chosen": 0.10883663594722748, + "logits/rejected": -0.02936922013759613, + "logps/chosen": -0.0010627154260873795, + "logps/rejected": -1.91524076461792, + "loss": 1.0661, + "nll_loss": 0.2664946913719177, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010627156007103622, + "rewards/margins": 0.19141781330108643, + "rewards/rejected": -0.19152405858039856, + "step": 7495 + }, + { + "epoch": 5.183955739972338, + "grad_norm": 11.126547813415527, + "learning_rate": 2.675580144459813e-05, + "log_odds_chosen": 10.28394889831543, + "log_odds_ratio": -0.00021344845299609005, + "logits/chosen": -0.46144601702690125, + "logits/rejected": -0.6137267351150513, + "logps/chosen": -0.0002671529073268175, + "logps/rejected": -1.7458523511886597, + "loss": 0.8458, + "nll_loss": 0.21143922209739685, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6715293643064797e-05, + "rewards/margins": 0.17455853521823883, + "rewards/rejected": -0.1745852380990982, + "step": 7496 + }, + { + "epoch": 5.1846473029045645, + "grad_norm": 8.785880088806152, + "learning_rate": 2.6751959428307978e-05, + "log_odds_chosen": 9.732967376708984, + "log_odds_ratio": -0.00011404632095945999, + "logits/chosen": -0.4368380904197693, + "logits/rejected": -0.49852725863456726, + "logps/chosen": -0.0003329858591314405, + "logps/rejected": -1.4203979969024658, + "loss": 0.682, + "nll_loss": 0.17047792673110962, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.329858373035677e-05, + "rewards/margins": 0.142006516456604, + "rewards/rejected": -0.14203980565071106, + "step": 7497 + }, + { + "epoch": 5.185338865836791, + "grad_norm": 7.608089923858643, + "learning_rate": 2.6748117412017827e-05, + "log_odds_chosen": 10.018511772155762, + "log_odds_ratio": -0.00010477846080902964, + "logits/chosen": -0.33977967500686646, + "logits/rejected": -0.3529067635536194, + "logps/chosen": -0.0014785649254918098, + "logps/rejected": -1.9228357076644897, + "loss": 1.3271, + "nll_loss": 0.33175739645957947, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001478564809076488, + "rewards/margins": 0.19213572144508362, + "rewards/rejected": -0.19228358566761017, + "step": 7498 + }, + { + "epoch": 5.186030428769018, + "grad_norm": 7.914434909820557, + "learning_rate": 2.674427539572768e-05, + "log_odds_chosen": 10.729510307312012, + "log_odds_ratio": -0.00024930204381234944, + "logits/chosen": -0.34463316202163696, + "logits/rejected": -0.36579248309135437, + "logps/chosen": -0.0005520581617020071, + "logps/rejected": -2.259552478790283, + "loss": 0.7027, + "nll_loss": 0.17564159631729126, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.520581908058375e-05, + "rewards/margins": 0.22590003907680511, + "rewards/rejected": -0.22595523297786713, + "step": 7499 + }, + { + "epoch": 5.186721991701245, + "grad_norm": 4.368546009063721, + "learning_rate": 2.674043337943753e-05, + "log_odds_chosen": 10.761625289916992, + "log_odds_ratio": -3.10266186716035e-05, + "logits/chosen": -0.5241531133651733, + "logits/rejected": -0.5674874782562256, + "logps/chosen": -0.00037843859172426164, + "logps/rejected": -2.337672710418701, + "loss": 0.5361, + "nll_loss": 0.13401782512664795, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7843859900021926e-05, + "rewards/margins": 0.23372939229011536, + "rewards/rejected": -0.23376727104187012, + "step": 7500 + }, + { + "epoch": 5.187413554633472, + "grad_norm": 9.344945907592773, + "learning_rate": 2.6736591363147378e-05, + "log_odds_chosen": 9.932015419006348, + "log_odds_ratio": -0.00025062222266569734, + "logits/chosen": -0.024730026721954346, + "logits/rejected": -0.1187935620546341, + "logps/chosen": -0.0003689700970426202, + "logps/rejected": -2.0399227142333984, + "loss": 0.5947, + "nll_loss": 0.1486581265926361, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.689701406983659e-05, + "rewards/margins": 0.20395538210868835, + "rewards/rejected": -0.20399229228496552, + "step": 7501 + }, + { + "epoch": 5.188105117565699, + "grad_norm": 6.984439373016357, + "learning_rate": 2.6732749346857234e-05, + "log_odds_chosen": 9.949422836303711, + "log_odds_ratio": -0.0003776532830670476, + "logits/chosen": -0.6317130327224731, + "logits/rejected": -0.6177940368652344, + "logps/chosen": -0.0006253727478906512, + "logps/rejected": -1.4834885597229004, + "loss": 0.5595, + "nll_loss": 0.13984926044940948, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.253727769944817e-05, + "rewards/margins": 0.14828631281852722, + "rewards/rejected": -0.1483488529920578, + "step": 7502 + }, + { + "epoch": 5.1887966804979255, + "grad_norm": 7.357310771942139, + "learning_rate": 2.6728907330567083e-05, + "log_odds_chosen": 10.67965316772461, + "log_odds_ratio": -5.9429581597214565e-05, + "logits/chosen": -0.7382419109344482, + "logits/rejected": -0.8030633330345154, + "logps/chosen": -0.00030407847953028977, + "logps/rejected": -2.4250638484954834, + "loss": 0.7071, + "nll_loss": 0.17677223682403564, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.04078494082205e-05, + "rewards/margins": 0.2424759864807129, + "rewards/rejected": -0.24250638484954834, + "step": 7503 + }, + { + "epoch": 5.189488243430152, + "grad_norm": 8.235445976257324, + "learning_rate": 2.6725065314276932e-05, + "log_odds_chosen": 9.837135314941406, + "log_odds_ratio": -0.00010966081754304469, + "logits/chosen": -0.4385377764701843, + "logits/rejected": -0.4710184633731842, + "logps/chosen": -0.0004885304369963706, + "logps/rejected": -2.0021209716796875, + "loss": 1.0324, + "nll_loss": 0.2580997943878174, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.885304588242434e-05, + "rewards/margins": 0.20016326010227203, + "rewards/rejected": -0.20021212100982666, + "step": 7504 + }, + { + "epoch": 5.190179806362379, + "grad_norm": 6.21942138671875, + "learning_rate": 2.6721223297986788e-05, + "log_odds_chosen": 10.279722213745117, + "log_odds_ratio": -0.00023209169739857316, + "logits/chosen": -0.28356489539146423, + "logits/rejected": -0.35319802165031433, + "logps/chosen": -0.0003053020918741822, + "logps/rejected": -1.986649513244629, + "loss": 0.7567, + "nll_loss": 0.18914204835891724, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.053020918741822e-05, + "rewards/margins": 0.19863444566726685, + "rewards/rejected": -0.19866496324539185, + "step": 7505 + }, + { + "epoch": 5.190871369294606, + "grad_norm": 5.215263366699219, + "learning_rate": 2.6717381281696637e-05, + "log_odds_chosen": 9.759660720825195, + "log_odds_ratio": -0.0009383500437252223, + "logits/chosen": -0.45862168073654175, + "logits/rejected": -0.5552151799201965, + "logps/chosen": -0.0033110720105469227, + "logps/rejected": -1.696666955947876, + "loss": 0.906, + "nll_loss": 0.22640550136566162, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00033110720687545836, + "rewards/margins": 0.16933558881282806, + "rewards/rejected": -0.16966669261455536, + "step": 7506 + }, + { + "epoch": 5.191562932226833, + "grad_norm": 13.544431686401367, + "learning_rate": 2.6713539265406486e-05, + "log_odds_chosen": 10.83315372467041, + "log_odds_ratio": -0.0009716550703160465, + "logits/chosen": -0.5825919508934021, + "logits/rejected": -0.6007131934165955, + "logps/chosen": -0.0005834702169522643, + "logps/rejected": -2.635345935821533, + "loss": 1.0757, + "nll_loss": 0.2688401937484741, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.834702096763067e-05, + "rewards/margins": 0.2634762227535248, + "rewards/rejected": -0.2635346055030823, + "step": 7507 + }, + { + "epoch": 5.19225449515906, + "grad_norm": 4.866628170013428, + "learning_rate": 2.6709697249116338e-05, + "log_odds_chosen": 8.859233856201172, + "log_odds_ratio": -0.0007630664622411132, + "logits/chosen": -0.362945556640625, + "logits/rejected": -0.17942696809768677, + "logps/chosen": -0.004629339091479778, + "logps/rejected": -2.050640821456909, + "loss": 1.2956, + "nll_loss": 0.3238285481929779, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00046293390914797783, + "rewards/margins": 0.20460115373134613, + "rewards/rejected": -0.20506411790847778, + "step": 7508 + }, + { + "epoch": 5.1929460580912865, + "grad_norm": 5.803537845611572, + "learning_rate": 2.6705855232826187e-05, + "log_odds_chosen": 9.54582405090332, + "log_odds_ratio": -0.0006485036574304104, + "logits/chosen": -0.5477535724639893, + "logits/rejected": -0.5572305917739868, + "logps/chosen": -0.007590239401906729, + "logps/rejected": -2.4489498138427734, + "loss": 0.9146, + "nll_loss": 0.22859731316566467, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007590239401906729, + "rewards/margins": 0.24413597583770752, + "rewards/rejected": -0.24489499628543854, + "step": 7509 + }, + { + "epoch": 5.193637621023513, + "grad_norm": 5.946167945861816, + "learning_rate": 2.6702013216536036e-05, + "log_odds_chosen": 10.172664642333984, + "log_odds_ratio": -0.00047986849676817656, + "logits/chosen": -0.6982347369194031, + "logits/rejected": -0.703554093837738, + "logps/chosen": -0.0008831677259877324, + "logps/rejected": -2.2994911670684814, + "loss": 0.8376, + "nll_loss": 0.20934252440929413, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.831676677800715e-05, + "rewards/margins": 0.22986078262329102, + "rewards/rejected": -0.22994911670684814, + "step": 7510 + }, + { + "epoch": 5.19432918395574, + "grad_norm": 6.564667701721191, + "learning_rate": 2.6698171200245892e-05, + "log_odds_chosen": 10.04134464263916, + "log_odds_ratio": -9.779791434993967e-05, + "logits/chosen": -0.5647412538528442, + "logits/rejected": -0.5988651514053345, + "logps/chosen": -0.0001927485573105514, + "logps/rejected": -1.5764150619506836, + "loss": 1.0745, + "nll_loss": 0.26861101388931274, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.927485573105514e-05, + "rewards/margins": 0.15762223303318024, + "rewards/rejected": -0.15764150023460388, + "step": 7511 + }, + { + "epoch": 5.195020746887967, + "grad_norm": 6.056695938110352, + "learning_rate": 2.669432918395574e-05, + "log_odds_chosen": 9.677583694458008, + "log_odds_ratio": -0.003684965195134282, + "logits/chosen": -0.4713554084300995, + "logits/rejected": -0.5660151243209839, + "logps/chosen": -0.0013247056631371379, + "logps/rejected": -2.0641555786132812, + "loss": 1.2825, + "nll_loss": 0.320260226726532, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001324705663137138, + "rewards/margins": 0.2062830775976181, + "rewards/rejected": -0.2064155638217926, + "step": 7512 + }, + { + "epoch": 5.195712309820194, + "grad_norm": 7.452434539794922, + "learning_rate": 2.669048716766559e-05, + "log_odds_chosen": 10.309123992919922, + "log_odds_ratio": -6.893956015119329e-05, + "logits/chosen": -0.45162904262542725, + "logits/rejected": -0.49090200662612915, + "logps/chosen": -0.00016838658484630287, + "logps/rejected": -1.5242056846618652, + "loss": 0.7136, + "nll_loss": 0.17838457226753235, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6838657757034525e-05, + "rewards/margins": 0.15240372717380524, + "rewards/rejected": -0.15242056548595428, + "step": 7513 + }, + { + "epoch": 5.196403872752421, + "grad_norm": 6.046849250793457, + "learning_rate": 2.6686645151375446e-05, + "log_odds_chosen": 10.795588493347168, + "log_odds_ratio": -5.696194421034306e-05, + "logits/chosen": -0.31942465901374817, + "logits/rejected": -0.33498522639274597, + "logps/chosen": -0.0003312878543511033, + "logps/rejected": -2.5096311569213867, + "loss": 1.0985, + "nll_loss": 0.2746131122112274, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3128788345493376e-05, + "rewards/margins": 0.2509300112724304, + "rewards/rejected": -0.25096315145492554, + "step": 7514 + }, + { + "epoch": 5.1970954356846475, + "grad_norm": 7.981918811798096, + "learning_rate": 2.6682803135085295e-05, + "log_odds_chosen": 9.353009223937988, + "log_odds_ratio": -0.007541028317064047, + "logits/chosen": -0.14878655970096588, + "logits/rejected": -0.24716559052467346, + "logps/chosen": -0.05903133004903793, + "logps/rejected": -2.631260871887207, + "loss": 0.8076, + "nll_loss": 0.20113366842269897, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005903133191168308, + "rewards/margins": 0.257222980260849, + "rewards/rejected": -0.26312610507011414, + "step": 7515 + }, + { + "epoch": 5.197786998616874, + "grad_norm": 9.73996639251709, + "learning_rate": 2.6678961118795144e-05, + "log_odds_chosen": 10.649823188781738, + "log_odds_ratio": -4.373151023173705e-05, + "logits/chosen": -0.6496425867080688, + "logits/rejected": -0.8041089773178101, + "logps/chosen": -0.00027097389101982117, + "logps/rejected": -2.0425217151641846, + "loss": 0.6071, + "nll_loss": 0.15177232027053833, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7097388738184236e-05, + "rewards/margins": 0.2042250782251358, + "rewards/rejected": -0.2042521834373474, + "step": 7516 + }, + { + "epoch": 5.198478561549101, + "grad_norm": 6.225884437561035, + "learning_rate": 2.6675119102504997e-05, + "log_odds_chosen": 10.766189575195312, + "log_odds_ratio": -0.0013470555422827601, + "logits/chosen": -0.20639252662658691, + "logits/rejected": -0.2310691624879837, + "logps/chosen": -0.000624267035163939, + "logps/rejected": -2.141397714614868, + "loss": 0.9381, + "nll_loss": 0.23438991606235504, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.242669769562781e-05, + "rewards/margins": 0.21407735347747803, + "rewards/rejected": -0.21413977444171906, + "step": 7517 + }, + { + "epoch": 5.199170124481328, + "grad_norm": 6.064988136291504, + "learning_rate": 2.6671277086214846e-05, + "log_odds_chosen": 9.96807861328125, + "log_odds_ratio": -0.00041285439510829747, + "logits/chosen": -0.6431621313095093, + "logits/rejected": -0.766636848449707, + "logps/chosen": -0.002066761488094926, + "logps/rejected": -2.4709391593933105, + "loss": 0.744, + "nll_loss": 0.185963436961174, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00020667616627179086, + "rewards/margins": 0.24688725173473358, + "rewards/rejected": -0.24709393084049225, + "step": 7518 + }, + { + "epoch": 5.199861687413555, + "grad_norm": 8.340907096862793, + "learning_rate": 2.6667435069924695e-05, + "log_odds_chosen": 8.846458435058594, + "log_odds_ratio": -0.0005865695420652628, + "logits/chosen": -0.24171032011508942, + "logits/rejected": -0.4122176766395569, + "logps/chosen": -0.0007772729732096195, + "logps/rejected": -1.002537488937378, + "loss": 1.177, + "nll_loss": 0.294181764125824, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.772730168653652e-05, + "rewards/margins": 0.10017602145671844, + "rewards/rejected": -0.10025375336408615, + "step": 7519 + }, + { + "epoch": 5.200553250345782, + "grad_norm": 10.035767555236816, + "learning_rate": 2.666359305363455e-05, + "log_odds_chosen": 10.078902244567871, + "log_odds_ratio": -0.000265401613432914, + "logits/chosen": -0.48366451263427734, + "logits/rejected": -0.5781936645507812, + "logps/chosen": -0.00022505372180603445, + "logps/rejected": -1.8617677688598633, + "loss": 0.9025, + "nll_loss": 0.22558824717998505, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2505371816805564e-05, + "rewards/margins": 0.18615427613258362, + "rewards/rejected": -0.18617677688598633, + "step": 7520 + }, + { + "epoch": 5.2012448132780085, + "grad_norm": 10.484762191772461, + "learning_rate": 2.66597510373444e-05, + "log_odds_chosen": 10.321274757385254, + "log_odds_ratio": -0.0001440600899513811, + "logits/chosen": -0.7540542483329773, + "logits/rejected": -0.7682251334190369, + "logps/chosen": -0.000749644823372364, + "logps/rejected": -2.1048970222473145, + "loss": 1.059, + "nll_loss": 0.26474690437316895, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.49644823372364e-05, + "rewards/margins": 0.21041473746299744, + "rewards/rejected": -0.21048972010612488, + "step": 7521 + }, + { + "epoch": 5.201936376210235, + "grad_norm": 10.707878112792969, + "learning_rate": 2.665590902105425e-05, + "log_odds_chosen": 10.353931427001953, + "log_odds_ratio": -0.00016276889073196799, + "logits/chosen": -0.08579882979393005, + "logits/rejected": -0.1549488604068756, + "logps/chosen": -0.00023727120424155146, + "logps/rejected": -1.4980394840240479, + "loss": 1.1418, + "nll_loss": 0.2854325771331787, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3727119696559384e-05, + "rewards/margins": 0.14978022873401642, + "rewards/rejected": -0.14980396628379822, + "step": 7522 + }, + { + "epoch": 5.202627939142462, + "grad_norm": 16.987855911254883, + "learning_rate": 2.6652067004764105e-05, + "log_odds_chosen": 11.349204063415527, + "log_odds_ratio": -6.56421689200215e-05, + "logits/chosen": -0.16829033195972443, + "logits/rejected": -0.2914985418319702, + "logps/chosen": -0.00012773709022440016, + "logps/rejected": -2.0962631702423096, + "loss": 0.7947, + "nll_loss": 0.19866439700126648, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2773710295732599e-05, + "rewards/margins": 0.20961356163024902, + "rewards/rejected": -0.20962633192539215, + "step": 7523 + }, + { + "epoch": 5.203319502074689, + "grad_norm": 12.814325332641602, + "learning_rate": 2.6648224988473954e-05, + "log_odds_chosen": 10.35975170135498, + "log_odds_ratio": -0.00010918633779510856, + "logits/chosen": -0.32887110114097595, + "logits/rejected": -0.4124990999698639, + "logps/chosen": -0.0007760451408103108, + "logps/rejected": -2.509739398956299, + "loss": 0.8109, + "nll_loss": 0.20270992815494537, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.760451262583956e-05, + "rewards/margins": 0.2508963346481323, + "rewards/rejected": -0.2509739100933075, + "step": 7524 + }, + { + "epoch": 5.204011065006916, + "grad_norm": 18.52150535583496, + "learning_rate": 2.6644382972183803e-05, + "log_odds_chosen": 10.428438186645508, + "log_odds_ratio": -5.791490548290312e-05, + "logits/chosen": -0.35513511300086975, + "logits/rejected": -0.3227297067642212, + "logps/chosen": -0.00031158284400589764, + "logps/rejected": -2.353970766067505, + "loss": 0.9356, + "nll_loss": 0.23389127850532532, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1158284400589764e-05, + "rewards/margins": 0.23536591231822968, + "rewards/rejected": -0.235397070646286, + "step": 7525 + }, + { + "epoch": 5.204702627939143, + "grad_norm": 7.042150974273682, + "learning_rate": 2.6640540955893655e-05, + "log_odds_chosen": 9.846081733703613, + "log_odds_ratio": -0.0005817461060360074, + "logits/chosen": -0.5193594098091125, + "logits/rejected": -0.5902177095413208, + "logps/chosen": -0.02698969841003418, + "logps/rejected": -2.1079354286193848, + "loss": 0.8825, + "nll_loss": 0.2205757051706314, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002698970027267933, + "rewards/margins": 0.20809456706047058, + "rewards/rejected": -0.21079353988170624, + "step": 7526 + }, + { + "epoch": 5.2053941908713695, + "grad_norm": 7.628520488739014, + "learning_rate": 2.6636698939603504e-05, + "log_odds_chosen": 11.020240783691406, + "log_odds_ratio": -0.0005862127291038632, + "logits/chosen": -0.2743455171585083, + "logits/rejected": -0.2303268015384674, + "logps/chosen": -0.0004525255935732275, + "logps/rejected": -2.5413033962249756, + "loss": 1.0111, + "nll_loss": 0.2527076303958893, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5252560084918514e-05, + "rewards/margins": 0.25408506393432617, + "rewards/rejected": -0.2541303336620331, + "step": 7527 + }, + { + "epoch": 5.206085753803596, + "grad_norm": 7.396105766296387, + "learning_rate": 2.6632856923313353e-05, + "log_odds_chosen": 11.09078311920166, + "log_odds_ratio": -3.599739648052491e-05, + "logits/chosen": -0.522323727607727, + "logits/rejected": -0.5345219373703003, + "logps/chosen": -0.0001351584796793759, + "logps/rejected": -2.2688066959381104, + "loss": 0.6788, + "nll_loss": 0.16970443725585938, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3515847058442887e-05, + "rewards/margins": 0.22686713933944702, + "rewards/rejected": -0.22688066959381104, + "step": 7528 + }, + { + "epoch": 5.206777316735823, + "grad_norm": 8.117511749267578, + "learning_rate": 2.662901490702321e-05, + "log_odds_chosen": 10.91192626953125, + "log_odds_ratio": -3.3292169973719865e-05, + "logits/chosen": -0.2902049422264099, + "logits/rejected": -0.3024437427520752, + "logps/chosen": -0.00023253005929291248, + "logps/rejected": -2.238485336303711, + "loss": 0.9055, + "nll_loss": 0.22636531293392181, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3253005565493368e-05, + "rewards/margins": 0.22382529079914093, + "rewards/rejected": -0.22384853661060333, + "step": 7529 + }, + { + "epoch": 5.20746887966805, + "grad_norm": 8.098960876464844, + "learning_rate": 2.6625172890733058e-05, + "log_odds_chosen": 11.654924392700195, + "log_odds_ratio": -1.6672003766871057e-05, + "logits/chosen": -0.6740207672119141, + "logits/rejected": -0.7895724773406982, + "logps/chosen": -0.00012381460692267865, + "logps/rejected": -2.4390528202056885, + "loss": 0.7303, + "nll_loss": 0.18256288766860962, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2381460692267865e-05, + "rewards/margins": 0.24389290809631348, + "rewards/rejected": -0.24390527606010437, + "step": 7530 + }, + { + "epoch": 5.208160442600277, + "grad_norm": 7.831466197967529, + "learning_rate": 2.6621330874442907e-05, + "log_odds_chosen": 11.02690601348877, + "log_odds_ratio": -2.8968894184799865e-05, + "logits/chosen": -0.7625277638435364, + "logits/rejected": -0.8582916259765625, + "logps/chosen": -0.00014673579426016659, + "logps/rejected": -2.1968586444854736, + "loss": 0.7601, + "nll_loss": 0.1900177001953125, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4673579244117718e-05, + "rewards/margins": 0.21967121958732605, + "rewards/rejected": -0.2196858823299408, + "step": 7531 + }, + { + "epoch": 5.208852005532504, + "grad_norm": 9.437521934509277, + "learning_rate": 2.6617488858152763e-05, + "log_odds_chosen": 9.570180892944336, + "log_odds_ratio": -0.00027336826315149665, + "logits/chosen": -0.15018832683563232, + "logits/rejected": -0.3415522575378418, + "logps/chosen": -0.0005937953246757388, + "logps/rejected": -1.3931825160980225, + "loss": 0.8658, + "nll_loss": 0.21641241014003754, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9379533922765404e-05, + "rewards/margins": 0.13925886154174805, + "rewards/rejected": -0.13931825757026672, + "step": 7532 + }, + { + "epoch": 5.20954356846473, + "grad_norm": 10.934542655944824, + "learning_rate": 2.6613646841862612e-05, + "log_odds_chosen": 10.167157173156738, + "log_odds_ratio": -8.426292333751917e-05, + "logits/chosen": -0.5966573357582092, + "logits/rejected": -0.6782484650611877, + "logps/chosen": -0.000651887443382293, + "logps/rejected": -1.9049832820892334, + "loss": 1.2315, + "nll_loss": 0.3078649044036865, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.518874579342082e-05, + "rewards/margins": 0.19043314456939697, + "rewards/rejected": -0.19049833714962006, + "step": 7533 + }, + { + "epoch": 5.210235131396957, + "grad_norm": 9.348286628723145, + "learning_rate": 2.660980482557246e-05, + "log_odds_chosen": 9.688858032226562, + "log_odds_ratio": -0.004896416794508696, + "logits/chosen": -0.4835703670978546, + "logits/rejected": -0.6681973934173584, + "logps/chosen": -0.002467024838551879, + "logps/rejected": -2.4683291912078857, + "loss": 0.7232, + "nll_loss": 0.18031813204288483, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000246702489675954, + "rewards/margins": 0.24658623337745667, + "rewards/rejected": -0.2468329221010208, + "step": 7534 + }, + { + "epoch": 5.210926694329184, + "grad_norm": 8.002618789672852, + "learning_rate": 2.6605962809282314e-05, + "log_odds_chosen": 10.053289413452148, + "log_odds_ratio": -0.00027556309942156076, + "logits/chosen": -0.28471508622169495, + "logits/rejected": -0.2700623571872711, + "logps/chosen": -0.0002975426323246211, + "logps/rejected": -1.8266332149505615, + "loss": 1.0783, + "nll_loss": 0.2695525884628296, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9754261049674824e-05, + "rewards/margins": 0.18263356387615204, + "rewards/rejected": -0.18266333639621735, + "step": 7535 + }, + { + "epoch": 5.211618257261411, + "grad_norm": 9.897976875305176, + "learning_rate": 2.6602120792992163e-05, + "log_odds_chosen": 9.274949073791504, + "log_odds_ratio": -0.00024828262394294143, + "logits/chosen": 0.012322517111897469, + "logits/rejected": -0.06911614537239075, + "logps/chosen": -0.0011005365522578359, + "logps/rejected": -1.8486918210983276, + "loss": 1.2116, + "nll_loss": 0.30286920070648193, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011005365377059206, + "rewards/margins": 0.18475914001464844, + "rewards/rejected": -0.1848691999912262, + "step": 7536 + }, + { + "epoch": 5.212309820193638, + "grad_norm": 5.6801934242248535, + "learning_rate": 2.659827877670201e-05, + "log_odds_chosen": 9.88988971710205, + "log_odds_ratio": -8.744581282371655e-05, + "logits/chosen": -0.3401045799255371, + "logits/rejected": -0.4230215847492218, + "logps/chosen": -0.000338320794980973, + "logps/rejected": -1.680631160736084, + "loss": 0.6281, + "nll_loss": 0.15702250599861145, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.383207877050154e-05, + "rewards/margins": 0.1680292785167694, + "rewards/rejected": -0.16806311905384064, + "step": 7537 + }, + { + "epoch": 5.213001383125865, + "grad_norm": 7.96742582321167, + "learning_rate": 2.6594436760411867e-05, + "log_odds_chosen": 11.109058380126953, + "log_odds_ratio": -7.072191510815173e-05, + "logits/chosen": -0.35100793838500977, + "logits/rejected": -0.40755900740623474, + "logps/chosen": -0.00039556881529279053, + "logps/rejected": -2.5512094497680664, + "loss": 0.6497, + "nll_loss": 0.16241194307804108, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9556882256874815e-05, + "rewards/margins": 0.2550814151763916, + "rewards/rejected": -0.2551209628582001, + "step": 7538 + }, + { + "epoch": 5.213692946058091, + "grad_norm": 8.653180122375488, + "learning_rate": 2.6590594744121717e-05, + "log_odds_chosen": 10.56446361541748, + "log_odds_ratio": -0.0005890832981094718, + "logits/chosen": -0.39483940601348877, + "logits/rejected": -0.4310857355594635, + "logps/chosen": -0.0010279006091877818, + "logps/rejected": -2.5635743141174316, + "loss": 0.9006, + "nll_loss": 0.22507987916469574, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010279006528435275, + "rewards/margins": 0.256254643201828, + "rewards/rejected": -0.25635743141174316, + "step": 7539 + }, + { + "epoch": 5.214384508990318, + "grad_norm": 7.798567295074463, + "learning_rate": 2.6586752727831566e-05, + "log_odds_chosen": 11.562299728393555, + "log_odds_ratio": -3.5191998904338107e-05, + "logits/chosen": -0.5837400555610657, + "logits/rejected": -0.5905430912971497, + "logps/chosen": -0.00020522023260127753, + "logps/rejected": -2.3719401359558105, + "loss": 0.7627, + "nll_loss": 0.19066068530082703, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.052202216873411e-05, + "rewards/margins": 0.23717349767684937, + "rewards/rejected": -0.2371940165758133, + "step": 7540 + }, + { + "epoch": 5.215076071922545, + "grad_norm": 14.764293670654297, + "learning_rate": 2.658291071154142e-05, + "log_odds_chosen": 10.847089767456055, + "log_odds_ratio": -3.9235426811501384e-05, + "logits/chosen": -0.2103194147348404, + "logits/rejected": -0.24678084254264832, + "logps/chosen": -0.0003935906570404768, + "logps/rejected": -2.3666625022888184, + "loss": 0.7798, + "nll_loss": 0.1949402391910553, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9359063521260396e-05, + "rewards/margins": 0.23662689328193665, + "rewards/rejected": -0.2366662621498108, + "step": 7541 + }, + { + "epoch": 5.215767634854772, + "grad_norm": 7.255589485168457, + "learning_rate": 2.657906869525127e-05, + "log_odds_chosen": 10.368997573852539, + "log_odds_ratio": -0.0005649956874549389, + "logits/chosen": -0.33564862608909607, + "logits/rejected": -0.42874041199684143, + "logps/chosen": -0.0008114329539239407, + "logps/rejected": -2.0210976600646973, + "loss": 0.6028, + "nll_loss": 0.15064597129821777, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.11432910268195e-05, + "rewards/margins": 0.20202863216400146, + "rewards/rejected": -0.20210978388786316, + "step": 7542 + }, + { + "epoch": 5.216459197786999, + "grad_norm": 6.3288350105285645, + "learning_rate": 2.657522667896112e-05, + "log_odds_chosen": 10.309673309326172, + "log_odds_ratio": -0.0007493701996281743, + "logits/chosen": -0.528795599937439, + "logits/rejected": -0.4613361358642578, + "logps/chosen": -0.0005491769406944513, + "logps/rejected": -1.9666938781738281, + "loss": 0.7648, + "nll_loss": 0.19112075865268707, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4917694797040895e-05, + "rewards/margins": 0.19661447405815125, + "rewards/rejected": -0.19666936993598938, + "step": 7543 + }, + { + "epoch": 5.217150760719226, + "grad_norm": 4.966573715209961, + "learning_rate": 2.6571384662670972e-05, + "log_odds_chosen": 10.328919410705566, + "log_odds_ratio": -4.260972491465509e-05, + "logits/chosen": -0.5825825929641724, + "logits/rejected": -0.6369431614875793, + "logps/chosen": -0.00013902317732572556, + "logps/rejected": -1.5345927476882935, + "loss": 0.5713, + "nll_loss": 0.14283086359500885, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3902318642067257e-05, + "rewards/margins": 0.15344536304473877, + "rewards/rejected": -0.15345928072929382, + "step": 7544 + }, + { + "epoch": 5.217842323651452, + "grad_norm": 8.431014060974121, + "learning_rate": 2.656754264638082e-05, + "log_odds_chosen": 10.66819953918457, + "log_odds_ratio": -4.8884499847190455e-05, + "logits/chosen": -0.7278584241867065, + "logits/rejected": -0.7446757555007935, + "logps/chosen": -0.0006141972844488919, + "logps/rejected": -2.559722423553467, + "loss": 0.7858, + "nll_loss": 0.1964486688375473, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.141973426565528e-05, + "rewards/margins": 0.25591087341308594, + "rewards/rejected": -0.2559722661972046, + "step": 7545 + }, + { + "epoch": 5.218533886583679, + "grad_norm": 19.670764923095703, + "learning_rate": 2.656370063009067e-05, + "log_odds_chosen": 10.58185863494873, + "log_odds_ratio": -7.996035856194794e-05, + "logits/chosen": -0.1423773467540741, + "logits/rejected": -0.23043018579483032, + "logps/chosen": -0.00021502267918549478, + "logps/rejected": -2.1016366481781006, + "loss": 1.1633, + "nll_loss": 0.29081207513809204, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.150226828234736e-05, + "rewards/margins": 0.21014216542243958, + "rewards/rejected": -0.2101636528968811, + "step": 7546 + }, + { + "epoch": 5.219225449515906, + "grad_norm": 22.079612731933594, + "learning_rate": 2.6559858613800526e-05, + "log_odds_chosen": 9.88987922668457, + "log_odds_ratio": -0.0003320075338706374, + "logits/chosen": -0.7029349207878113, + "logits/rejected": -0.8374977111816406, + "logps/chosen": -0.0006211322615854442, + "logps/rejected": -1.8706246614456177, + "loss": 0.6639, + "nll_loss": 0.16593071818351746, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.211322033777833e-05, + "rewards/margins": 0.1870003342628479, + "rewards/rejected": -0.18706245720386505, + "step": 7547 + }, + { + "epoch": 5.219917012448133, + "grad_norm": 8.58415699005127, + "learning_rate": 2.6556016597510375e-05, + "log_odds_chosen": 10.443418502807617, + "log_odds_ratio": -4.11863875342533e-05, + "logits/chosen": -0.5435079336166382, + "logits/rejected": -0.5880762338638306, + "logps/chosen": -0.0008465655264444649, + "logps/rejected": -2.0463433265686035, + "loss": 0.979, + "nll_loss": 0.2447490394115448, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.46565599204041e-05, + "rewards/margins": 0.2045496702194214, + "rewards/rejected": -0.20463432371616364, + "step": 7548 + }, + { + "epoch": 5.22060857538036, + "grad_norm": 19.33316421508789, + "learning_rate": 2.6552174581220224e-05, + "log_odds_chosen": 9.962740898132324, + "log_odds_ratio": -0.0001467976690037176, + "logits/chosen": -0.22345323860645294, + "logits/rejected": -0.23257654905319214, + "logps/chosen": -0.0007146099815145135, + "logps/rejected": -1.6609793901443481, + "loss": 1.134, + "nll_loss": 0.28347694873809814, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.146099233068526e-05, + "rewards/margins": 0.16602648794651031, + "rewards/rejected": -0.16609793901443481, + "step": 7549 + }, + { + "epoch": 5.2213001383125865, + "grad_norm": 7.09939432144165, + "learning_rate": 2.654833256493008e-05, + "log_odds_chosen": 9.59273910522461, + "log_odds_ratio": -0.0005566454492509365, + "logits/chosen": -0.44439682364463806, + "logits/rejected": -0.3926330804824829, + "logps/chosen": -0.0005894859787076712, + "logps/rejected": -1.5625590085983276, + "loss": 0.9567, + "nll_loss": 0.2391187995672226, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8948597143171355e-05, + "rewards/margins": 0.1561969518661499, + "rewards/rejected": -0.15625590085983276, + "step": 7550 + }, + { + "epoch": 5.221991701244813, + "grad_norm": 23.660127639770508, + "learning_rate": 2.654449054863993e-05, + "log_odds_chosen": 10.068212509155273, + "log_odds_ratio": -0.00010939614730887115, + "logits/chosen": -0.6090282797813416, + "logits/rejected": -0.6502014994621277, + "logps/chosen": -0.0002628647198434919, + "logps/rejected": -1.9712339639663696, + "loss": 0.8848, + "nll_loss": 0.22119513154029846, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6286472348147072e-05, + "rewards/margins": 0.19709712266921997, + "rewards/rejected": -0.19712340831756592, + "step": 7551 + }, + { + "epoch": 5.22268326417704, + "grad_norm": 8.631831169128418, + "learning_rate": 2.6540648532349778e-05, + "log_odds_chosen": 9.057621955871582, + "log_odds_ratio": -0.0005410752492025495, + "logits/chosen": -0.7084760665893555, + "logits/rejected": -0.5085403919219971, + "logps/chosen": -0.0005544874002225697, + "logps/rejected": -1.2910645008087158, + "loss": 1.2109, + "nll_loss": 0.3026607632637024, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.54487451154273e-05, + "rewards/margins": 0.12905099987983704, + "rewards/rejected": -0.12910646200180054, + "step": 7552 + }, + { + "epoch": 5.223374827109267, + "grad_norm": 10.814403533935547, + "learning_rate": 2.653680651605963e-05, + "log_odds_chosen": 11.15772819519043, + "log_odds_ratio": -3.051651037822012e-05, + "logits/chosen": -0.7379977107048035, + "logits/rejected": -0.7769466638565063, + "logps/chosen": -0.00015586796507705003, + "logps/rejected": -1.9965726137161255, + "loss": 1.3236, + "nll_loss": 0.330906480550766, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5586796507705003e-05, + "rewards/margins": 0.19964167475700378, + "rewards/rejected": -0.19965726137161255, + "step": 7553 + }, + { + "epoch": 5.224066390041494, + "grad_norm": 10.185839653015137, + "learning_rate": 2.653296449976948e-05, + "log_odds_chosen": 10.938926696777344, + "log_odds_ratio": -5.6243785365950316e-05, + "logits/chosen": -0.8716825246810913, + "logits/rejected": -0.8651461601257324, + "logps/chosen": -0.0005617404822260141, + "logps/rejected": -2.5082576274871826, + "loss": 0.7537, + "nll_loss": 0.18841758370399475, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.617404531221837e-05, + "rewards/margins": 0.25076958537101746, + "rewards/rejected": -0.25082576274871826, + "step": 7554 + }, + { + "epoch": 5.224757952973721, + "grad_norm": 10.396666526794434, + "learning_rate": 2.652912248347933e-05, + "log_odds_chosen": 10.43869686126709, + "log_odds_ratio": -7.070177525747567e-05, + "logits/chosen": -0.36217403411865234, + "logits/rejected": -0.410467267036438, + "logps/chosen": -0.00030194493592716753, + "logps/rejected": -1.7846906185150146, + "loss": 1.4544, + "nll_loss": 0.3636040687561035, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.01944965030998e-05, + "rewards/margins": 0.17843888700008392, + "rewards/rejected": -0.17846906185150146, + "step": 7555 + }, + { + "epoch": 5.2254495159059475, + "grad_norm": 7.918328285217285, + "learning_rate": 2.6525280467189184e-05, + "log_odds_chosen": 10.289556503295898, + "log_odds_ratio": -0.00011836655903607607, + "logits/chosen": -0.7381885647773743, + "logits/rejected": -0.7799155712127686, + "logps/chosen": -0.0009419352281838655, + "logps/rejected": -2.351292133331299, + "loss": 0.7091, + "nll_loss": 0.1772594451904297, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.419352136319503e-05, + "rewards/margins": 0.2350350320339203, + "rewards/rejected": -0.2351292073726654, + "step": 7556 + }, + { + "epoch": 5.226141078838174, + "grad_norm": 11.101275444030762, + "learning_rate": 2.6521438450899033e-05, + "log_odds_chosen": 11.636258125305176, + "log_odds_ratio": -1.3528469025914092e-05, + "logits/chosen": -0.5852770209312439, + "logits/rejected": -0.699225127696991, + "logps/chosen": -0.00014542852295562625, + "logps/rejected": -2.403111457824707, + "loss": 0.6832, + "nll_loss": 0.17079591751098633, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4542852113663685e-05, + "rewards/margins": 0.2402966022491455, + "rewards/rejected": -0.2403111457824707, + "step": 7557 + }, + { + "epoch": 5.226832641770401, + "grad_norm": 6.832596302032471, + "learning_rate": 2.6517596434608882e-05, + "log_odds_chosen": 9.619921684265137, + "log_odds_ratio": -0.0004081670194864273, + "logits/chosen": -0.6781374216079712, + "logits/rejected": -0.6394006013870239, + "logps/chosen": -0.00045663962373510003, + "logps/rejected": -1.9987691640853882, + "loss": 1.0768, + "nll_loss": 0.2691575586795807, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.566396091831848e-05, + "rewards/margins": 0.1998312622308731, + "rewards/rejected": -0.19987693428993225, + "step": 7558 + }, + { + "epoch": 5.227524204702628, + "grad_norm": 15.910563468933105, + "learning_rate": 2.6513754418318738e-05, + "log_odds_chosen": 11.075983047485352, + "log_odds_ratio": -3.4982949728146195e-05, + "logits/chosen": -0.7346349358558655, + "logits/rejected": -0.8451979756355286, + "logps/chosen": -0.00029519235249608755, + "logps/rejected": -2.4787511825561523, + "loss": 0.5291, + "nll_loss": 0.13227322697639465, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9519236704800278e-05, + "rewards/margins": 0.2478456050157547, + "rewards/rejected": -0.2478751242160797, + "step": 7559 + }, + { + "epoch": 5.228215767634855, + "grad_norm": 4.949207305908203, + "learning_rate": 2.6509912402028587e-05, + "log_odds_chosen": 10.036555290222168, + "log_odds_ratio": -7.955866021802649e-05, + "logits/chosen": -0.5480470657348633, + "logits/rejected": -0.4952780604362488, + "logps/chosen": -0.000600629486143589, + "logps/rejected": -2.0421175956726074, + "loss": 1.3625, + "nll_loss": 0.34061944484710693, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0062950069550425e-05, + "rewards/margins": 0.2041517198085785, + "rewards/rejected": -0.2042117714881897, + "step": 7560 + }, + { + "epoch": 5.228907330567082, + "grad_norm": 7.809098720550537, + "learning_rate": 2.6506070385738436e-05, + "log_odds_chosen": 10.17052936553955, + "log_odds_ratio": -0.00016491710266564041, + "logits/chosen": -0.6219543814659119, + "logits/rejected": -0.6439845561981201, + "logps/chosen": -0.00039262970676645637, + "logps/rejected": -2.2306129932403564, + "loss": 0.7724, + "nll_loss": 0.19308488070964813, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9262969949049875e-05, + "rewards/margins": 0.22302204370498657, + "rewards/rejected": -0.22306130826473236, + "step": 7561 + }, + { + "epoch": 5.2295988934993085, + "grad_norm": 6.184190273284912, + "learning_rate": 2.650222836944829e-05, + "log_odds_chosen": 10.522407531738281, + "log_odds_ratio": -0.0001425845839548856, + "logits/chosen": -0.6801462769508362, + "logits/rejected": -0.6958547234535217, + "logps/chosen": -0.005194925703108311, + "logps/rejected": -2.2992043495178223, + "loss": 0.6993, + "nll_loss": 0.1748044192790985, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005194926052354276, + "rewards/margins": 0.22940094769001007, + "rewards/rejected": -0.22992043197155, + "step": 7562 + }, + { + "epoch": 5.230290456431535, + "grad_norm": 6.153726100921631, + "learning_rate": 2.6498386353158138e-05, + "log_odds_chosen": 10.447622299194336, + "log_odds_ratio": -0.0019986850675195456, + "logits/chosen": -0.2738765478134155, + "logits/rejected": -0.29188835620880127, + "logps/chosen": -0.00196833279915154, + "logps/rejected": -2.315131664276123, + "loss": 0.8464, + "nll_loss": 0.21140965819358826, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001968332944670692, + "rewards/margins": 0.23131635785102844, + "rewards/rejected": -0.23151318728923798, + "step": 7563 + }, + { + "epoch": 5.230982019363762, + "grad_norm": 8.70680046081543, + "learning_rate": 2.6494544336867987e-05, + "log_odds_chosen": 9.893153190612793, + "log_odds_ratio": -0.0008131297072395682, + "logits/chosen": -0.281112939119339, + "logits/rejected": -0.32066017389297485, + "logps/chosen": -0.0007761477027088404, + "logps/rejected": -1.8777573108673096, + "loss": 1.1029, + "nll_loss": 0.2756534218788147, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.761476445011795e-05, + "rewards/margins": 0.1876981258392334, + "rewards/rejected": -0.18777573108673096, + "step": 7564 + }, + { + "epoch": 5.231673582295989, + "grad_norm": 12.37846851348877, + "learning_rate": 2.6490702320577843e-05, + "log_odds_chosen": 12.095224380493164, + "log_odds_ratio": -1.5422276192111894e-05, + "logits/chosen": -0.6123469471931458, + "logits/rejected": -0.6750447750091553, + "logps/chosen": -7.237125828396529e-05, + "logps/rejected": -2.5945072174072266, + "loss": 0.7965, + "nll_loss": 0.19912873208522797, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.23712628314388e-06, + "rewards/margins": 0.259443461894989, + "rewards/rejected": -0.2594507038593292, + "step": 7565 + }, + { + "epoch": 5.232365145228216, + "grad_norm": 6.599029541015625, + "learning_rate": 2.6486860304287692e-05, + "log_odds_chosen": 9.663243293762207, + "log_odds_ratio": -0.00901144091039896, + "logits/chosen": -0.5348978042602539, + "logits/rejected": -0.5369443893432617, + "logps/chosen": -0.002337898127734661, + "logps/rejected": -2.131387710571289, + "loss": 0.6999, + "nll_loss": 0.17407315969467163, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00023378983314614743, + "rewards/margins": 0.21290498971939087, + "rewards/rejected": -0.21313877403736115, + "step": 7566 + }, + { + "epoch": 5.233056708160443, + "grad_norm": 17.254377365112305, + "learning_rate": 2.648301828799754e-05, + "log_odds_chosen": 10.758286476135254, + "log_odds_ratio": -8.570626232540235e-05, + "logits/chosen": -0.32616153359413147, + "logits/rejected": -0.43125247955322266, + "logps/chosen": -0.0003337445668876171, + "logps/rejected": -2.5154995918273926, + "loss": 0.9858, + "nll_loss": 0.24644559621810913, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3374453778378665e-05, + "rewards/margins": 0.2515166103839874, + "rewards/rejected": -0.25154998898506165, + "step": 7567 + }, + { + "epoch": 5.2337482710926695, + "grad_norm": 8.67546558380127, + "learning_rate": 2.6479176271707397e-05, + "log_odds_chosen": 10.991742134094238, + "log_odds_ratio": -5.405677075032145e-05, + "logits/chosen": -0.7275859117507935, + "logits/rejected": -0.7737497091293335, + "logps/chosen": -8.817527850624174e-05, + "logps/rejected": -1.8147327899932861, + "loss": 0.8772, + "nll_loss": 0.21929170191287994, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.817527486826293e-06, + "rewards/margins": 0.18146446347236633, + "rewards/rejected": -0.18147329986095428, + "step": 7568 + }, + { + "epoch": 5.234439834024896, + "grad_norm": 11.16156005859375, + "learning_rate": 2.6475334255417246e-05, + "log_odds_chosen": 10.978002548217773, + "log_odds_ratio": -3.5411394492257386e-05, + "logits/chosen": -0.5314689874649048, + "logits/rejected": -0.5226581692695618, + "logps/chosen": -0.00018502044258639216, + "logps/rejected": -2.1848983764648438, + "loss": 0.8016, + "nll_loss": 0.20040425658226013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.850204535003286e-05, + "rewards/margins": 0.21847134828567505, + "rewards/rejected": -0.2184898406267166, + "step": 7569 + }, + { + "epoch": 5.235131396957123, + "grad_norm": 4.604856491088867, + "learning_rate": 2.6471492239127095e-05, + "log_odds_chosen": 10.077240943908691, + "log_odds_ratio": -0.0003931020328309387, + "logits/chosen": -0.3274500370025635, + "logits/rejected": -0.30237939953804016, + "logps/chosen": -0.00028508197283372283, + "logps/rejected": -1.58601713180542, + "loss": 0.5674, + "nll_loss": 0.14182157814502716, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8508195100585e-05, + "rewards/margins": 0.1585732102394104, + "rewards/rejected": -0.15860171616077423, + "step": 7570 + }, + { + "epoch": 5.23582295988935, + "grad_norm": 9.677464485168457, + "learning_rate": 2.6467650222836947e-05, + "log_odds_chosen": 11.102912902832031, + "log_odds_ratio": -5.850612433278002e-05, + "logits/chosen": -0.3548862338066101, + "logits/rejected": -0.38852009177207947, + "logps/chosen": -0.00034190929727628827, + "logps/rejected": -3.049135684967041, + "loss": 0.7139, + "nll_loss": 0.17846885323524475, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4190929000033066e-05, + "rewards/margins": 0.304879367351532, + "rewards/rejected": -0.30491358041763306, + "step": 7571 + }, + { + "epoch": 5.236514522821577, + "grad_norm": 6.259531497955322, + "learning_rate": 2.6463808206546796e-05, + "log_odds_chosen": 10.08462142944336, + "log_odds_ratio": -0.00012527560465969145, + "logits/chosen": -0.8586251735687256, + "logits/rejected": -0.914375901222229, + "logps/chosen": -0.0004578919615596533, + "logps/rejected": -1.9007015228271484, + "loss": 0.6399, + "nll_loss": 0.1599629819393158, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5789194700773805e-05, + "rewards/margins": 0.19002437591552734, + "rewards/rejected": -0.19007018208503723, + "step": 7572 + }, + { + "epoch": 5.237206085753804, + "grad_norm": 9.60282039642334, + "learning_rate": 2.6459966190256645e-05, + "log_odds_chosen": 10.403770446777344, + "log_odds_ratio": -0.00023783154028933495, + "logits/chosen": -0.27836769819259644, + "logits/rejected": -0.3159245252609253, + "logps/chosen": -0.0008468222804367542, + "logps/rejected": -2.479321002960205, + "loss": 1.2405, + "nll_loss": 0.3101038932800293, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.468222949886695e-05, + "rewards/margins": 0.24784742295742035, + "rewards/rejected": -0.24793210625648499, + "step": 7573 + }, + { + "epoch": 5.2378976486860305, + "grad_norm": 10.499727249145508, + "learning_rate": 2.64561241739665e-05, + "log_odds_chosen": 11.214341163635254, + "log_odds_ratio": -5.0808059313567355e-05, + "logits/chosen": -0.6644982099533081, + "logits/rejected": -0.6907986998558044, + "logps/chosen": -0.00016903187497518957, + "logps/rejected": -2.415891408920288, + "loss": 0.7948, + "nll_loss": 0.19869248569011688, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6903188225114718e-05, + "rewards/margins": 0.24157223105430603, + "rewards/rejected": -0.24158914387226105, + "step": 7574 + }, + { + "epoch": 5.238589211618257, + "grad_norm": 19.77359962463379, + "learning_rate": 2.645228215767635e-05, + "log_odds_chosen": 7.320486068725586, + "log_odds_ratio": -0.5365301966667175, + "logits/chosen": -0.5406480431556702, + "logits/rejected": -0.6060658693313599, + "logps/chosen": -0.06338723003864288, + "logps/rejected": -1.2642097473144531, + "loss": 0.8243, + "nll_loss": 0.15242083370685577, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.006338723469525576, + "rewards/margins": 0.12008225172758102, + "rewards/rejected": -0.1264209747314453, + "step": 7575 + }, + { + "epoch": 5.239280774550484, + "grad_norm": 9.19914436340332, + "learning_rate": 2.64484401413862e-05, + "log_odds_chosen": 10.021259307861328, + "log_odds_ratio": -0.0002777604095172137, + "logits/chosen": -0.5509160757064819, + "logits/rejected": -0.5600121021270752, + "logps/chosen": -0.0009588706307113171, + "logps/rejected": -2.2768490314483643, + "loss": 2.0222, + "nll_loss": 0.5055317878723145, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.588706598151475e-05, + "rewards/margins": 0.22758901119232178, + "rewards/rejected": -0.22768491506576538, + "step": 7576 + }, + { + "epoch": 5.239972337482711, + "grad_norm": 9.574323654174805, + "learning_rate": 2.6444598125096055e-05, + "log_odds_chosen": 9.933778762817383, + "log_odds_ratio": -0.00023947448062244803, + "logits/chosen": -0.6763154864311218, + "logits/rejected": -0.6849706172943115, + "logps/chosen": -0.00020450836746022105, + "logps/rejected": -1.3118788003921509, + "loss": 0.9615, + "nll_loss": 0.24034947156906128, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0450837837415747e-05, + "rewards/margins": 0.1311674416065216, + "rewards/rejected": -0.13118787109851837, + "step": 7577 + }, + { + "epoch": 5.240663900414938, + "grad_norm": 8.033780097961426, + "learning_rate": 2.6440756108805904e-05, + "log_odds_chosen": 9.859085083007812, + "log_odds_ratio": -0.00016901653725653887, + "logits/chosen": -0.3645017743110657, + "logits/rejected": -0.5088723301887512, + "logps/chosen": -0.00046245334669947624, + "logps/rejected": -1.7504345178604126, + "loss": 0.9783, + "nll_loss": 0.24455362558364868, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.624533903552219e-05, + "rewards/margins": 0.1749972254037857, + "rewards/rejected": -0.17504346370697021, + "step": 7578 + }, + { + "epoch": 5.241355463347165, + "grad_norm": 11.642791748046875, + "learning_rate": 2.6436914092515753e-05, + "log_odds_chosen": 10.092130661010742, + "log_odds_ratio": -7.946189725771546e-05, + "logits/chosen": -0.39425206184387207, + "logits/rejected": -0.43070298433303833, + "logps/chosen": -0.0004464397206902504, + "logps/rejected": -1.8897475004196167, + "loss": 1.096, + "nll_loss": 0.27399927377700806, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.464397352421656e-05, + "rewards/margins": 0.18893010914325714, + "rewards/rejected": -0.1889747530221939, + "step": 7579 + }, + { + "epoch": 5.2420470262793915, + "grad_norm": 12.233444213867188, + "learning_rate": 2.6433072076225606e-05, + "log_odds_chosen": 10.869236946105957, + "log_odds_ratio": -4.346840432845056e-05, + "logits/chosen": -0.49191814661026, + "logits/rejected": -0.5358452796936035, + "logps/chosen": -0.0001388008677167818, + "logps/rejected": -1.8795957565307617, + "loss": 0.7285, + "nll_loss": 0.18211990594863892, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3880086044082418e-05, + "rewards/margins": 0.18794569373130798, + "rewards/rejected": -0.18795958161354065, + "step": 7580 + }, + { + "epoch": 5.242738589211618, + "grad_norm": 8.853819847106934, + "learning_rate": 2.6429230059935455e-05, + "log_odds_chosen": 10.513277053833008, + "log_odds_ratio": -0.0003534628194756806, + "logits/chosen": -0.4861745536327362, + "logits/rejected": -0.5915360450744629, + "logps/chosen": -0.0008426437852904201, + "logps/rejected": -2.690995693206787, + "loss": 1.1245, + "nll_loss": 0.2810984253883362, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.4264378529042e-05, + "rewards/margins": 0.2690153121948242, + "rewards/rejected": -0.2690995931625366, + "step": 7581 + }, + { + "epoch": 5.243430152143845, + "grad_norm": 6.510776996612549, + "learning_rate": 2.6425388043645304e-05, + "log_odds_chosen": 8.518279075622559, + "log_odds_ratio": -0.000700904696714133, + "logits/chosen": -0.6766083240509033, + "logits/rejected": -0.7357896566390991, + "logps/chosen": -0.0011913528433069587, + "logps/rejected": -1.1983290910720825, + "loss": 1.104, + "nll_loss": 0.2759358286857605, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011913527850992978, + "rewards/margins": 0.11971378326416016, + "rewards/rejected": -0.11983291804790497, + "step": 7582 + }, + { + "epoch": 5.244121715076072, + "grad_norm": 7.311406135559082, + "learning_rate": 2.642154602735516e-05, + "log_odds_chosen": 10.2103271484375, + "log_odds_ratio": -0.0016313835512846708, + "logits/chosen": 0.12058839201927185, + "logits/rejected": 0.015313982963562012, + "logps/chosen": -0.005035826470702887, + "logps/rejected": -2.050142526626587, + "loss": 1.1695, + "nll_loss": 0.2922120690345764, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005035826470702887, + "rewards/margins": 0.2045106738805771, + "rewards/rejected": -0.20501424372196198, + "step": 7583 + }, + { + "epoch": 5.244813278008299, + "grad_norm": 6.708953857421875, + "learning_rate": 2.641770401106501e-05, + "log_odds_chosen": 10.654993057250977, + "log_odds_ratio": -6.913335528224707e-05, + "logits/chosen": -0.2991570234298706, + "logits/rejected": -0.3160810172557831, + "logps/chosen": -0.0002667972003109753, + "logps/rejected": -2.0909149646759033, + "loss": 0.691, + "nll_loss": 0.172745019197464, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.667971966729965e-05, + "rewards/margins": 0.2090648114681244, + "rewards/rejected": -0.20909149944782257, + "step": 7584 + }, + { + "epoch": 5.245504840940526, + "grad_norm": 5.9692559242248535, + "learning_rate": 2.6413861994774858e-05, + "log_odds_chosen": 10.247108459472656, + "log_odds_ratio": -0.00015192184946499765, + "logits/chosen": -0.4084897041320801, + "logits/rejected": -0.46908169984817505, + "logps/chosen": -0.0007824224303476512, + "logps/rejected": -2.4277756214141846, + "loss": 0.6644, + "nll_loss": 0.1660842001438141, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.824225031072274e-05, + "rewards/margins": 0.24269933998584747, + "rewards/rejected": -0.24277758598327637, + "step": 7585 + }, + { + "epoch": 5.246196403872752, + "grad_norm": 9.228209495544434, + "learning_rate": 2.6410019978484714e-05, + "log_odds_chosen": 10.015410423278809, + "log_odds_ratio": -0.002055589109659195, + "logits/chosen": -0.5356808304786682, + "logits/rejected": -0.6216482520103455, + "logps/chosen": -0.007103268522769213, + "logps/rejected": -1.934248685836792, + "loss": 0.9288, + "nll_loss": 0.2319985330104828, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007103268289938569, + "rewards/margins": 0.19271454215049744, + "rewards/rejected": -0.19342486560344696, + "step": 7586 + }, + { + "epoch": 5.246887966804979, + "grad_norm": 7.3460917472839355, + "learning_rate": 2.6406177962194563e-05, + "log_odds_chosen": 10.928125381469727, + "log_odds_ratio": -0.0001309488870901987, + "logits/chosen": -0.6458200216293335, + "logits/rejected": -0.7437701225280762, + "logps/chosen": -0.000363502447726205, + "logps/rejected": -2.271428108215332, + "loss": 0.6664, + "nll_loss": 0.16659240424633026, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6350247683003545e-05, + "rewards/margins": 0.2271064817905426, + "rewards/rejected": -0.2271428257226944, + "step": 7587 + }, + { + "epoch": 5.247579529737206, + "grad_norm": 7.9204511642456055, + "learning_rate": 2.640233594590441e-05, + "log_odds_chosen": 11.812535285949707, + "log_odds_ratio": -1.5402521967189386e-05, + "logits/chosen": -0.6082082390785217, + "logits/rejected": -0.641418993473053, + "logps/chosen": -0.00013046017556916922, + "logps/rejected": -2.6230921745300293, + "loss": 0.5233, + "nll_loss": 0.13082760572433472, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3046018466411624e-05, + "rewards/margins": 0.2622961401939392, + "rewards/rejected": -0.262309193611145, + "step": 7588 + }, + { + "epoch": 5.248271092669433, + "grad_norm": 9.620582580566406, + "learning_rate": 2.6398493929614264e-05, + "log_odds_chosen": 10.287154197692871, + "log_odds_ratio": -5.5621800129301846e-05, + "logits/chosen": -0.5223538279533386, + "logits/rejected": -0.5681127309799194, + "logps/chosen": -0.0006379556725732982, + "logps/rejected": -2.0750598907470703, + "loss": 0.7044, + "nll_loss": 0.1761023998260498, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.379557453328744e-05, + "rewards/margins": 0.20744217932224274, + "rewards/rejected": -0.2075059860944748, + "step": 7589 + }, + { + "epoch": 5.24896265560166, + "grad_norm": 12.833014488220215, + "learning_rate": 2.6394651913324113e-05, + "log_odds_chosen": 10.22948169708252, + "log_odds_ratio": -7.96320236986503e-05, + "logits/chosen": -0.8496423959732056, + "logits/rejected": -0.8584446907043457, + "logps/chosen": -0.0002447866427246481, + "logps/rejected": -2.0432004928588867, + "loss": 0.6975, + "nll_loss": 0.17437176406383514, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4478662453475408e-05, + "rewards/margins": 0.2042955756187439, + "rewards/rejected": -0.204320028424263, + "step": 7590 + }, + { + "epoch": 5.249654218533887, + "grad_norm": 8.8342924118042, + "learning_rate": 2.6390809897033962e-05, + "log_odds_chosen": 9.66745662689209, + "log_odds_ratio": -8.37321495055221e-05, + "logits/chosen": -0.5921209454536438, + "logits/rejected": -0.7187113761901855, + "logps/chosen": -0.00023703300394117832, + "logps/rejected": -1.3444901704788208, + "loss": 0.7808, + "nll_loss": 0.1951880007982254, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3703301849309355e-05, + "rewards/margins": 0.1344253122806549, + "rewards/rejected": -0.13444900512695312, + "step": 7591 + }, + { + "epoch": 5.250345781466113, + "grad_norm": 8.780054092407227, + "learning_rate": 2.6386967880743818e-05, + "log_odds_chosen": 10.155247688293457, + "log_odds_ratio": -0.00027468675398267806, + "logits/chosen": -0.6420718431472778, + "logits/rejected": -0.6700660586357117, + "logps/chosen": -0.001097345957532525, + "logps/rejected": -2.030292510986328, + "loss": 0.5962, + "nll_loss": 0.14901979267597198, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001097345957532525, + "rewards/margins": 0.20291951298713684, + "rewards/rejected": -0.20302924513816833, + "step": 7592 + }, + { + "epoch": 5.25103734439834, + "grad_norm": 8.005521774291992, + "learning_rate": 2.6383125864453667e-05, + "log_odds_chosen": 10.611947059631348, + "log_odds_ratio": -0.00013667433813679963, + "logits/chosen": -0.7551765441894531, + "logits/rejected": -0.7871332168579102, + "logps/chosen": -0.005232673604041338, + "logps/rejected": -2.3519179821014404, + "loss": 0.6701, + "nll_loss": 0.16752111911773682, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005232674302533269, + "rewards/margins": 0.2346685528755188, + "rewards/rejected": -0.23519182205200195, + "step": 7593 + }, + { + "epoch": 5.251728907330567, + "grad_norm": 6.8082685470581055, + "learning_rate": 2.6379283848163516e-05, + "log_odds_chosen": 10.906329154968262, + "log_odds_ratio": -3.830662171822041e-05, + "logits/chosen": -1.0006284713745117, + "logits/rejected": -0.8602535724639893, + "logps/chosen": -0.000224283488932997, + "logps/rejected": -2.3183956146240234, + "loss": 0.8172, + "nll_loss": 0.20428450405597687, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.242835034849122e-05, + "rewards/margins": 0.23181715607643127, + "rewards/rejected": -0.23183956742286682, + "step": 7594 + }, + { + "epoch": 5.252420470262794, + "grad_norm": 8.90805435180664, + "learning_rate": 2.6375441831873372e-05, + "log_odds_chosen": 10.537378311157227, + "log_odds_ratio": -0.00014934049977455288, + "logits/chosen": -0.6920749545097351, + "logits/rejected": -0.8484786748886108, + "logps/chosen": -0.0003269157896284014, + "logps/rejected": -1.866767406463623, + "loss": 0.9527, + "nll_loss": 0.23815563321113586, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.26915796904359e-05, + "rewards/margins": 0.1866440623998642, + "rewards/rejected": -0.1866767406463623, + "step": 7595 + }, + { + "epoch": 5.253112033195021, + "grad_norm": 6.233585834503174, + "learning_rate": 2.637159981558322e-05, + "log_odds_chosen": 10.72461986541748, + "log_odds_ratio": -2.351926923438441e-05, + "logits/chosen": -0.2917868494987488, + "logits/rejected": -0.38088148832321167, + "logps/chosen": -0.00023079040693119168, + "logps/rejected": -2.118159770965576, + "loss": 0.8197, + "nll_loss": 0.20491938292980194, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3079042875906453e-05, + "rewards/margins": 0.21179290115833282, + "rewards/rejected": -0.2118159681558609, + "step": 7596 + }, + { + "epoch": 5.253803596127248, + "grad_norm": 9.819358825683594, + "learning_rate": 2.636775779929307e-05, + "log_odds_chosen": 9.54849624633789, + "log_odds_ratio": -0.0014317891327664256, + "logits/chosen": -0.7229546308517456, + "logits/rejected": -0.8251336216926575, + "logps/chosen": -0.008922244422137737, + "logps/rejected": -1.8008694648742676, + "loss": 1.1143, + "nll_loss": 0.27843788266181946, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008922244887799025, + "rewards/margins": 0.17919471859931946, + "rewards/rejected": -0.18008697032928467, + "step": 7597 + }, + { + "epoch": 5.254495159059474, + "grad_norm": 11.097025871276855, + "learning_rate": 2.6363915783002923e-05, + "log_odds_chosen": 10.830201148986816, + "log_odds_ratio": -6.067188223823905e-05, + "logits/chosen": -0.41811248660087585, + "logits/rejected": -0.5653706789016724, + "logps/chosen": -0.00023369134578388184, + "logps/rejected": -2.0545437335968018, + "loss": 0.7246, + "nll_loss": 0.18113973736763, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3369135305983946e-05, + "rewards/margins": 0.2054310142993927, + "rewards/rejected": -0.20545436441898346, + "step": 7598 + }, + { + "epoch": 5.255186721991701, + "grad_norm": 17.628812789916992, + "learning_rate": 2.636007376671277e-05, + "log_odds_chosen": 10.451534271240234, + "log_odds_ratio": -0.000129957843455486, + "logits/chosen": -0.6208571791648865, + "logits/rejected": -0.7254254221916199, + "logps/chosen": -0.00016725575551390648, + "logps/rejected": -1.9300652742385864, + "loss": 0.8079, + "nll_loss": 0.20194993913173676, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6725574823794886e-05, + "rewards/margins": 0.1929897964000702, + "rewards/rejected": -0.1930065155029297, + "step": 7599 + }, + { + "epoch": 5.255878284923928, + "grad_norm": 7.975822448730469, + "learning_rate": 2.635623175042262e-05, + "log_odds_chosen": 10.203317642211914, + "log_odds_ratio": -0.00014215914416126907, + "logits/chosen": -0.25304776430130005, + "logits/rejected": -0.3041943907737732, + "logps/chosen": -0.000454009510576725, + "logps/rejected": -1.9212093353271484, + "loss": 0.7088, + "nll_loss": 0.1771775186061859, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.540094960248098e-05, + "rewards/margins": 0.19207553565502167, + "rewards/rejected": -0.19212093949317932, + "step": 7600 + }, + { + "epoch": 5.256569847856155, + "grad_norm": 14.046150207519531, + "learning_rate": 2.6352389734132477e-05, + "log_odds_chosen": 10.884321212768555, + "log_odds_ratio": -5.141493966220878e-05, + "logits/chosen": -0.62176513671875, + "logits/rejected": -0.5501907467842102, + "logps/chosen": -0.00033163276384584606, + "logps/rejected": -2.3565614223480225, + "loss": 1.0363, + "nll_loss": 0.25907301902770996, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3163276384584606e-05, + "rewards/margins": 0.23562298715114594, + "rewards/rejected": -0.23565614223480225, + "step": 7601 + }, + { + "epoch": 5.257261410788382, + "grad_norm": 8.92452335357666, + "learning_rate": 2.6348547717842326e-05, + "log_odds_chosen": 11.022520065307617, + "log_odds_ratio": -0.000125913429656066, + "logits/chosen": -0.35074371099472046, + "logits/rejected": -0.3500364422798157, + "logps/chosen": -0.0002775251923594624, + "logps/rejected": -2.63907527923584, + "loss": 0.7324, + "nll_loss": 0.18308815360069275, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7752517780754715e-05, + "rewards/margins": 0.26387977600097656, + "rewards/rejected": -0.2639075219631195, + "step": 7602 + }, + { + "epoch": 5.2579529737206085, + "grad_norm": 9.57458782196045, + "learning_rate": 2.6344705701552175e-05, + "log_odds_chosen": 9.637293815612793, + "log_odds_ratio": -0.0007488789851777256, + "logits/chosen": -0.3129688799381256, + "logits/rejected": -0.338850736618042, + "logps/chosen": -0.0013760539004579186, + "logps/rejected": -1.7696377038955688, + "loss": 0.6114, + "nll_loss": 0.15276853740215302, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013760538422502577, + "rewards/margins": 0.17682614922523499, + "rewards/rejected": -0.17696376144886017, + "step": 7603 + }, + { + "epoch": 5.258644536652835, + "grad_norm": 9.156579971313477, + "learning_rate": 2.634086368526203e-05, + "log_odds_chosen": 10.068866729736328, + "log_odds_ratio": -0.00020604400197044015, + "logits/chosen": -0.17679022252559662, + "logits/rejected": -0.2397395372390747, + "logps/chosen": -0.00029306631768122315, + "logps/rejected": -1.9695006608963013, + "loss": 0.571, + "nll_loss": 0.14272595942020416, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9306633223313838e-05, + "rewards/margins": 0.19692076742649078, + "rewards/rejected": -0.1969500631093979, + "step": 7604 + }, + { + "epoch": 5.259336099585062, + "grad_norm": 6.802250862121582, + "learning_rate": 2.633702166897188e-05, + "log_odds_chosen": 9.829263687133789, + "log_odds_ratio": -0.0001831715926527977, + "logits/chosen": -0.2670425474643707, + "logits/rejected": -0.39818575978279114, + "logps/chosen": -0.0004503652162384242, + "logps/rejected": -1.573431134223938, + "loss": 0.9043, + "nll_loss": 0.22605912387371063, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.503652235143818e-05, + "rewards/margins": 0.15729807317256927, + "rewards/rejected": -0.15734311938285828, + "step": 7605 + }, + { + "epoch": 5.260027662517289, + "grad_norm": 7.160187721252441, + "learning_rate": 2.633317965268173e-05, + "log_odds_chosen": 10.128305435180664, + "log_odds_ratio": -0.00012072191020706668, + "logits/chosen": -0.4220190644264221, + "logits/rejected": -0.54075688123703, + "logps/chosen": -0.0007584138656966388, + "logps/rejected": -1.8679054975509644, + "loss": 0.8598, + "nll_loss": 0.21494436264038086, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.58413880248554e-05, + "rewards/margins": 0.18671470880508423, + "rewards/rejected": -0.1867905557155609, + "step": 7606 + }, + { + "epoch": 5.260719225449516, + "grad_norm": 7.017876148223877, + "learning_rate": 2.632933763639158e-05, + "log_odds_chosen": 9.915639877319336, + "log_odds_ratio": -0.0001403828791808337, + "logits/chosen": -0.32453134655952454, + "logits/rejected": -0.42478859424591064, + "logps/chosen": -0.00033312488812953234, + "logps/rejected": -1.84895658493042, + "loss": 0.587, + "nll_loss": 0.14674820005893707, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.331249172333628e-05, + "rewards/margins": 0.18486234545707703, + "rewards/rejected": -0.18489566445350647, + "step": 7607 + }, + { + "epoch": 5.261410788381743, + "grad_norm": 5.297868251800537, + "learning_rate": 2.632549562010143e-05, + "log_odds_chosen": 9.418892860412598, + "log_odds_ratio": -0.00041185441659763455, + "logits/chosen": -0.6980939507484436, + "logits/rejected": -0.6271117925643921, + "logps/chosen": -0.0027880629058927298, + "logps/rejected": -2.140965223312378, + "loss": 1.0844, + "nll_loss": 0.27104663848876953, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00027880631387233734, + "rewards/margins": 0.21381771564483643, + "rewards/rejected": -0.21409651637077332, + "step": 7608 + }, + { + "epoch": 5.2621023513139695, + "grad_norm": 8.745232582092285, + "learning_rate": 2.632165360381128e-05, + "log_odds_chosen": 10.277860641479492, + "log_odds_ratio": -0.00029809505213052034, + "logits/chosen": -0.7317168712615967, + "logits/rejected": -0.8170226812362671, + "logps/chosen": -0.0035285335034132004, + "logps/rejected": -2.215935230255127, + "loss": 0.7868, + "nll_loss": 0.19666241109371185, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00035285335616208613, + "rewards/margins": 0.22124065458774567, + "rewards/rejected": -0.22159351408481598, + "step": 7609 + }, + { + "epoch": 5.262793914246196, + "grad_norm": 7.425640106201172, + "learning_rate": 2.6317811587521135e-05, + "log_odds_chosen": 10.117514610290527, + "log_odds_ratio": -5.87234681006521e-05, + "logits/chosen": -0.2286212295293808, + "logits/rejected": -0.2650706171989441, + "logps/chosen": -0.000267807423369959, + "logps/rejected": -1.8272347450256348, + "loss": 1.0774, + "nll_loss": 0.26933613419532776, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.678074270079378e-05, + "rewards/margins": 0.18269670009613037, + "rewards/rejected": -0.18272347748279572, + "step": 7610 + }, + { + "epoch": 5.263485477178423, + "grad_norm": 5.914295196533203, + "learning_rate": 2.6313969571230984e-05, + "log_odds_chosen": 9.658827781677246, + "log_odds_ratio": -0.0001679986744420603, + "logits/chosen": -0.4364164471626282, + "logits/rejected": -0.510414719581604, + "logps/chosen": -0.0007456679595634341, + "logps/rejected": -1.650540828704834, + "loss": 0.7927, + "nll_loss": 0.1981654018163681, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.456679304596037e-05, + "rewards/margins": 0.16497951745986938, + "rewards/rejected": -0.1650540977716446, + "step": 7611 + }, + { + "epoch": 5.26417704011065, + "grad_norm": 12.017693519592285, + "learning_rate": 2.6310127554940833e-05, + "log_odds_chosen": 11.355411529541016, + "log_odds_ratio": -2.2519958292832598e-05, + "logits/chosen": -0.6643273830413818, + "logits/rejected": -0.6930115222930908, + "logps/chosen": -8.34651873447001e-05, + "logps/rejected": -1.9710543155670166, + "loss": 0.5848, + "nll_loss": 0.14619022607803345, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.346518370672129e-06, + "rewards/margins": 0.19709710776805878, + "rewards/rejected": -0.19710545241832733, + "step": 7612 + }, + { + "epoch": 5.264868603042877, + "grad_norm": 9.172362327575684, + "learning_rate": 2.630628553865069e-05, + "log_odds_chosen": 9.445535659790039, + "log_odds_ratio": -0.0003532869159244001, + "logits/chosen": -0.44796112179756165, + "logits/rejected": -0.5494440197944641, + "logps/chosen": -0.000467819394543767, + "logps/rejected": -1.4349019527435303, + "loss": 0.8907, + "nll_loss": 0.22263102233409882, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.678194090956822e-05, + "rewards/margins": 0.14344340562820435, + "rewards/rejected": -0.14349019527435303, + "step": 7613 + }, + { + "epoch": 5.265560165975104, + "grad_norm": 5.42615270614624, + "learning_rate": 2.6302443522360538e-05, + "log_odds_chosen": 10.002979278564453, + "log_odds_ratio": -0.0002709925174713135, + "logits/chosen": -0.5812947154045105, + "logits/rejected": -0.4607744514942169, + "logps/chosen": -0.0012366706505417824, + "logps/rejected": -1.862508773803711, + "loss": 0.875, + "nll_loss": 0.21871274709701538, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012366706505417824, + "rewards/margins": 0.1861272156238556, + "rewards/rejected": -0.18625088036060333, + "step": 7614 + }, + { + "epoch": 5.2662517289073305, + "grad_norm": 13.714395523071289, + "learning_rate": 2.6298601506070387e-05, + "log_odds_chosen": 10.179404258728027, + "log_odds_ratio": -6.211637810338289e-05, + "logits/chosen": -0.911484956741333, + "logits/rejected": -0.9424377679824829, + "logps/chosen": -0.00022070165141485631, + "logps/rejected": -1.6600805521011353, + "loss": 0.7411, + "nll_loss": 0.18527226150035858, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2070165869081393e-05, + "rewards/margins": 0.16598597168922424, + "rewards/rejected": -0.16600805521011353, + "step": 7615 + }, + { + "epoch": 5.266943291839557, + "grad_norm": 8.581096649169922, + "learning_rate": 2.6294759489780236e-05, + "log_odds_chosen": 9.999594688415527, + "log_odds_ratio": -0.00010349383228458464, + "logits/chosen": -0.5014063119888306, + "logits/rejected": -0.4965861141681671, + "logps/chosen": -0.000183465686859563, + "logps/rejected": -1.4133416414260864, + "loss": 0.6778, + "nll_loss": 0.16944223642349243, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8346567230764776e-05, + "rewards/margins": 0.14131581783294678, + "rewards/rejected": -0.1413341611623764, + "step": 7616 + }, + { + "epoch": 5.267634854771784, + "grad_norm": 10.020563125610352, + "learning_rate": 2.629091747349009e-05, + "log_odds_chosen": 11.29733657836914, + "log_odds_ratio": -2.104317354678642e-05, + "logits/chosen": -0.7738044857978821, + "logits/rejected": -0.8598242402076721, + "logps/chosen": -0.00018091258243657649, + "logps/rejected": -2.586355686187744, + "loss": 1.2626, + "nll_loss": 0.3156498074531555, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8091257516061887e-05, + "rewards/margins": 0.25861749053001404, + "rewards/rejected": -0.25863558053970337, + "step": 7617 + }, + { + "epoch": 5.268326417704011, + "grad_norm": 15.1841459274292, + "learning_rate": 2.6287075457199938e-05, + "log_odds_chosen": 11.176735877990723, + "log_odds_ratio": -4.5400127419270575e-05, + "logits/chosen": -0.12970086932182312, + "logits/rejected": -0.3043827414512634, + "logps/chosen": -0.0007847197121009231, + "logps/rejected": -3.0710134506225586, + "loss": 1.185, + "nll_loss": 0.29624485969543457, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.847197412047535e-05, + "rewards/margins": 0.3070228695869446, + "rewards/rejected": -0.3071013391017914, + "step": 7618 + }, + { + "epoch": 5.269017980636238, + "grad_norm": 9.741006851196289, + "learning_rate": 2.6283233440909787e-05, + "log_odds_chosen": 11.197010040283203, + "log_odds_ratio": -7.193641067715362e-05, + "logits/chosen": -0.18809190392494202, + "logits/rejected": -0.3500503897666931, + "logps/chosen": -0.0011983285658061504, + "logps/rejected": -2.8563578128814697, + "loss": 1.0726, + "nll_loss": 0.26814720034599304, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011983286822214723, + "rewards/margins": 0.2855159640312195, + "rewards/rejected": -0.2856357991695404, + "step": 7619 + }, + { + "epoch": 5.269709543568465, + "grad_norm": 12.667189598083496, + "learning_rate": 2.6279391424619642e-05, + "log_odds_chosen": 11.548139572143555, + "log_odds_ratio": -1.4151154573482927e-05, + "logits/chosen": -0.6309748291969299, + "logits/rejected": -0.8380830883979797, + "logps/chosen": -0.0001395690196659416, + "logps/rejected": -2.490206003189087, + "loss": 1.1593, + "nll_loss": 0.28981447219848633, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3956902876088861e-05, + "rewards/margins": 0.24900665879249573, + "rewards/rejected": -0.24902060627937317, + "step": 7620 + }, + { + "epoch": 5.2704011065006915, + "grad_norm": 5.913382053375244, + "learning_rate": 2.627554940832949e-05, + "log_odds_chosen": 9.914276123046875, + "log_odds_ratio": -0.0004736467672046274, + "logits/chosen": -0.16976572573184967, + "logits/rejected": -0.2518896758556366, + "logps/chosen": -0.001111007179133594, + "logps/rejected": -2.4549412727355957, + "loss": 0.806, + "nll_loss": 0.20144861936569214, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011110070045106113, + "rewards/margins": 0.24538302421569824, + "rewards/rejected": -0.24549412727355957, + "step": 7621 + }, + { + "epoch": 5.271092669432918, + "grad_norm": 10.705941200256348, + "learning_rate": 2.627170739203934e-05, + "log_odds_chosen": 10.679733276367188, + "log_odds_ratio": -0.000957089476287365, + "logits/chosen": -0.9041181206703186, + "logits/rejected": -1.0080416202545166, + "logps/chosen": -0.0002205181517638266, + "logps/rejected": -2.086775779724121, + "loss": 0.9697, + "nll_loss": 0.24233216047286987, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2051815903978422e-05, + "rewards/margins": 0.20865552127361298, + "rewards/rejected": -0.20867758989334106, + "step": 7622 + }, + { + "epoch": 5.271784232365145, + "grad_norm": 9.783222198486328, + "learning_rate": 2.6267865375749196e-05, + "log_odds_chosen": 10.436159133911133, + "log_odds_ratio": -0.0005258338060230017, + "logits/chosen": -0.5189238786697388, + "logits/rejected": -0.537483811378479, + "logps/chosen": -0.0305920522660017, + "logps/rejected": -2.636291742324829, + "loss": 1.1168, + "nll_loss": 0.2791406512260437, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0030592053662985563, + "rewards/margins": 0.2605699896812439, + "rewards/rejected": -0.26362916827201843, + "step": 7623 + }, + { + "epoch": 5.272475795297372, + "grad_norm": 10.855855941772461, + "learning_rate": 2.6264023359459045e-05, + "log_odds_chosen": 10.203224182128906, + "log_odds_ratio": -0.0001932304003275931, + "logits/chosen": -0.32453393936157227, + "logits/rejected": -0.3398202061653137, + "logps/chosen": -0.0005054904613643885, + "logps/rejected": -2.162875175476074, + "loss": 0.8921, + "nll_loss": 0.22300416231155396, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0549046136438847e-05, + "rewards/margins": 0.21623697876930237, + "rewards/rejected": -0.2162875235080719, + "step": 7624 + }, + { + "epoch": 5.273167358229599, + "grad_norm": 8.04382038116455, + "learning_rate": 2.6260181343168894e-05, + "log_odds_chosen": 9.691407203674316, + "log_odds_ratio": -0.00022404029732570052, + "logits/chosen": -0.36626651883125305, + "logits/rejected": -0.39901232719421387, + "logps/chosen": -0.00020038278307765722, + "logps/rejected": -1.4379754066467285, + "loss": 0.8116, + "nll_loss": 0.20287036895751953, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0038278307765722e-05, + "rewards/margins": 0.1437775194644928, + "rewards/rejected": -0.14379754662513733, + "step": 7625 + }, + { + "epoch": 5.273858921161826, + "grad_norm": 8.791499137878418, + "learning_rate": 2.6256339326878747e-05, + "log_odds_chosen": 10.086090087890625, + "log_odds_ratio": -6.677798228338361e-05, + "logits/chosen": -0.6222400665283203, + "logits/rejected": -0.6852139234542847, + "logps/chosen": -0.00019318165141157806, + "logps/rejected": -1.5650521516799927, + "loss": 1.1248, + "nll_loss": 0.28118351101875305, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9318165868753567e-05, + "rewards/margins": 0.1564859002828598, + "rewards/rejected": -0.15650522708892822, + "step": 7626 + }, + { + "epoch": 5.2745504840940525, + "grad_norm": 13.350688934326172, + "learning_rate": 2.6252497310588596e-05, + "log_odds_chosen": 10.088001251220703, + "log_odds_ratio": -0.00010822910553542897, + "logits/chosen": -0.3409346342086792, + "logits/rejected": -0.35213860869407654, + "logps/chosen": -0.0001726550399325788, + "logps/rejected": -1.3348233699798584, + "loss": 0.9251, + "nll_loss": 0.2312626987695694, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.726550362946e-05, + "rewards/margins": 0.13346508145332336, + "rewards/rejected": -0.13348235189914703, + "step": 7627 + }, + { + "epoch": 5.275242047026279, + "grad_norm": 18.314821243286133, + "learning_rate": 2.6248655294298445e-05, + "log_odds_chosen": 11.440445899963379, + "log_odds_ratio": -3.44728869094979e-05, + "logits/chosen": -0.0827302485704422, + "logits/rejected": -0.22736823558807373, + "logps/chosen": -0.00017195713007822633, + "logps/rejected": -2.4652082920074463, + "loss": 1.3187, + "nll_loss": 0.32966870069503784, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7195712644024752e-05, + "rewards/margins": 0.24650365114212036, + "rewards/rejected": -0.24652084708213806, + "step": 7628 + }, + { + "epoch": 5.275933609958506, + "grad_norm": 8.562300682067871, + "learning_rate": 2.62448132780083e-05, + "log_odds_chosen": 12.402145385742188, + "log_odds_ratio": -1.9039196558878757e-05, + "logits/chosen": -0.12419766187667847, + "logits/rejected": -0.24297639727592468, + "logps/chosen": -9.086302452487871e-05, + "logps/rejected": -3.0359091758728027, + "loss": 0.928, + "nll_loss": 0.23200340569019318, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.086303180083632e-06, + "rewards/margins": 0.3035818636417389, + "rewards/rejected": -0.30359092354774475, + "step": 7629 + }, + { + "epoch": 5.276625172890733, + "grad_norm": 12.37198257446289, + "learning_rate": 2.624097126171815e-05, + "log_odds_chosen": 10.71961784362793, + "log_odds_ratio": -3.985036164522171e-05, + "logits/chosen": -0.6938871145248413, + "logits/rejected": -0.6587687134742737, + "logps/chosen": -0.00035433750599622726, + "logps/rejected": -2.0899598598480225, + "loss": 0.841, + "nll_loss": 0.21023572981357574, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.543375351000577e-05, + "rewards/margins": 0.20896054804325104, + "rewards/rejected": -0.20899598300457, + "step": 7630 + }, + { + "epoch": 5.27731673582296, + "grad_norm": 9.847604751586914, + "learning_rate": 2.6237129245428e-05, + "log_odds_chosen": 10.25374984741211, + "log_odds_ratio": -0.00017980107804760337, + "logits/chosen": -0.2884005904197693, + "logits/rejected": -0.314953088760376, + "logps/chosen": -0.00047901368816383183, + "logps/rejected": -2.042052984237671, + "loss": 0.8726, + "nll_loss": 0.21812944114208221, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.790136881638318e-05, + "rewards/margins": 0.20415738224983215, + "rewards/rejected": -0.20420530438423157, + "step": 7631 + }, + { + "epoch": 5.278008298755187, + "grad_norm": 5.961386680603027, + "learning_rate": 2.6233287229137855e-05, + "log_odds_chosen": 10.006340026855469, + "log_odds_ratio": -8.721081394469365e-05, + "logits/chosen": -0.18717193603515625, + "logits/rejected": -0.266289621591568, + "logps/chosen": -0.00018941918096970767, + "logps/rejected": -1.5128713846206665, + "loss": 0.8028, + "nll_loss": 0.2007022500038147, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8941918824566528e-05, + "rewards/margins": 0.15126819908618927, + "rewards/rejected": -0.15128713846206665, + "step": 7632 + }, + { + "epoch": 5.2786998616874135, + "grad_norm": 8.070467948913574, + "learning_rate": 2.6229445212847704e-05, + "log_odds_chosen": 9.738497734069824, + "log_odds_ratio": -0.00011974801600445062, + "logits/chosen": -0.25868844985961914, + "logits/rejected": -0.3139762282371521, + "logps/chosen": -0.002189639024436474, + "logps/rejected": -2.289876937866211, + "loss": 1.0836, + "nll_loss": 0.27089813351631165, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002189638908021152, + "rewards/margins": 0.2287687510251999, + "rewards/rejected": -0.22898772358894348, + "step": 7633 + }, + { + "epoch": 5.27939142461964, + "grad_norm": 4.927910327911377, + "learning_rate": 2.6225603196557553e-05, + "log_odds_chosen": 9.209070205688477, + "log_odds_ratio": -0.0003817295946646482, + "logits/chosen": -0.19250504672527313, + "logits/rejected": -0.22361023724079132, + "logps/chosen": -0.00035569886676967144, + "logps/rejected": -1.3613628149032593, + "loss": 0.8083, + "nll_loss": 0.20204555988311768, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5569886676967144e-05, + "rewards/margins": 0.13610070943832397, + "rewards/rejected": -0.1361362785100937, + "step": 7634 + }, + { + "epoch": 5.280082987551867, + "grad_norm": 6.918251991271973, + "learning_rate": 2.6221761180267405e-05, + "log_odds_chosen": 9.60433292388916, + "log_odds_ratio": -0.0013130803126841784, + "logits/chosen": -0.3779219388961792, + "logits/rejected": -0.43389493227005005, + "logps/chosen": -0.0008963979780673981, + "logps/rejected": -1.7923884391784668, + "loss": 0.7408, + "nll_loss": 0.18507538735866547, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.963979780673981e-05, + "rewards/margins": 0.17914921045303345, + "rewards/rejected": -0.17923887073993683, + "step": 7635 + }, + { + "epoch": 5.280774550484094, + "grad_norm": 11.273725509643555, + "learning_rate": 2.6217919163977254e-05, + "log_odds_chosen": 10.878364562988281, + "log_odds_ratio": -9.522255277261138e-05, + "logits/chosen": -0.7540746927261353, + "logits/rejected": -0.7857592105865479, + "logps/chosen": -0.00027051271172240376, + "logps/rejected": -1.8876264095306396, + "loss": 0.7663, + "nll_loss": 0.19157616794109344, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7051271899836138e-05, + "rewards/margins": 0.18873558938503265, + "rewards/rejected": -0.1887626349925995, + "step": 7636 + }, + { + "epoch": 5.281466113416321, + "grad_norm": 8.270740509033203, + "learning_rate": 2.6214077147687103e-05, + "log_odds_chosen": 10.152772903442383, + "log_odds_ratio": -0.0005649271188303828, + "logits/chosen": -0.48451852798461914, + "logits/rejected": -0.49426236748695374, + "logps/chosen": -0.0005245262291282415, + "logps/rejected": -2.122857093811035, + "loss": 0.9035, + "nll_loss": 0.22582755982875824, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.245262946118601e-05, + "rewards/margins": 0.2122332602739334, + "rewards/rejected": -0.21228571236133575, + "step": 7637 + }, + { + "epoch": 5.282157676348548, + "grad_norm": 10.418851852416992, + "learning_rate": 2.621023513139696e-05, + "log_odds_chosen": 10.892932891845703, + "log_odds_ratio": -9.03589534573257e-05, + "logits/chosen": -0.2918834686279297, + "logits/rejected": -0.41665488481521606, + "logps/chosen": -0.0004215552762616426, + "logps/rejected": -2.230041027069092, + "loss": 0.6557, + "nll_loss": 0.16391390562057495, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.21555305365473e-05, + "rewards/margins": 0.22296196222305298, + "rewards/rejected": -0.22300411760807037, + "step": 7638 + }, + { + "epoch": 5.282849239280774, + "grad_norm": 10.69952392578125, + "learning_rate": 2.620639311510681e-05, + "log_odds_chosen": 10.622076034545898, + "log_odds_ratio": -7.079998613335192e-05, + "logits/chosen": -0.5640475153923035, + "logits/rejected": -0.605728030204773, + "logps/chosen": -0.0003474515688139945, + "logps/rejected": -2.118351936340332, + "loss": 1.0377, + "nll_loss": 0.2594177722930908, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.474515688139945e-05, + "rewards/margins": 0.2118004560470581, + "rewards/rejected": -0.21183519065380096, + "step": 7639 + }, + { + "epoch": 5.283540802213001, + "grad_norm": 3.728268623352051, + "learning_rate": 2.6202551098816657e-05, + "log_odds_chosen": 10.98335075378418, + "log_odds_ratio": -2.5416817152290605e-05, + "logits/chosen": -0.40420278906822205, + "logits/rejected": -0.43859580159187317, + "logps/chosen": -0.0003488770453259349, + "logps/rejected": -2.3899548053741455, + "loss": 0.5625, + "nll_loss": 0.14062948524951935, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.488770380499773e-05, + "rewards/margins": 0.23896059393882751, + "rewards/rejected": -0.2389954924583435, + "step": 7640 + }, + { + "epoch": 5.284232365145228, + "grad_norm": 8.159320831298828, + "learning_rate": 2.6198709082526513e-05, + "log_odds_chosen": 10.255733489990234, + "log_odds_ratio": -0.00036923857987858355, + "logits/chosen": 0.03266778588294983, + "logits/rejected": 0.07152489572763443, + "logps/chosen": -0.022674523293972015, + "logps/rejected": -2.343322277069092, + "loss": 1.1866, + "nll_loss": 0.2966251075267792, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002267452422529459, + "rewards/margins": 0.23206476867198944, + "rewards/rejected": -0.23433223366737366, + "step": 7641 + }, + { + "epoch": 5.284923928077455, + "grad_norm": 13.458017349243164, + "learning_rate": 2.6194867066236362e-05, + "log_odds_chosen": 9.971641540527344, + "log_odds_ratio": -0.0003074409323744476, + "logits/chosen": -0.7888079881668091, + "logits/rejected": -0.8920333981513977, + "logps/chosen": -0.0006763875717297196, + "logps/rejected": -2.1958963871002197, + "loss": 1.1482, + "nll_loss": 0.2870239019393921, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.763875717297196e-05, + "rewards/margins": 0.21952198445796967, + "rewards/rejected": -0.21958963572978973, + "step": 7642 + }, + { + "epoch": 5.285615491009682, + "grad_norm": 8.60274887084961, + "learning_rate": 2.619102504994621e-05, + "log_odds_chosen": 10.84687614440918, + "log_odds_ratio": -4.1135343053610995e-05, + "logits/chosen": -0.19892117381095886, + "logits/rejected": -0.26732897758483887, + "logps/chosen": -0.0014369181590154767, + "logps/rejected": -2.524735689163208, + "loss": 1.2549, + "nll_loss": 0.3137156665325165, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001436918246326968, + "rewards/margins": 0.25232988595962524, + "rewards/rejected": -0.2524735927581787, + "step": 7643 + }, + { + "epoch": 5.286307053941909, + "grad_norm": 6.520380973815918, + "learning_rate": 2.6187183033656064e-05, + "log_odds_chosen": 11.032618522644043, + "log_odds_ratio": -5.6647313613211736e-05, + "logits/chosen": -0.40544480085372925, + "logits/rejected": -0.5566476583480835, + "logps/chosen": -0.0011669609230011702, + "logps/rejected": -2.725712299346924, + "loss": 1.3036, + "nll_loss": 0.3258954882621765, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011669609375530854, + "rewards/margins": 0.27245455980300903, + "rewards/rejected": -0.27257123589515686, + "step": 7644 + }, + { + "epoch": 5.286998616874135, + "grad_norm": 3.9610953330993652, + "learning_rate": 2.6183341017365913e-05, + "log_odds_chosen": 9.115592002868652, + "log_odds_ratio": -0.0002037350641330704, + "logits/chosen": -0.02345031499862671, + "logits/rejected": -0.0046147629618644714, + "logps/chosen": -0.00043547700624912977, + "logps/rejected": -1.3381257057189941, + "loss": 0.6619, + "nll_loss": 0.16544975340366364, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.354770135250874e-05, + "rewards/margins": 0.13376902043819427, + "rewards/rejected": -0.1338125616312027, + "step": 7645 + }, + { + "epoch": 5.287690179806362, + "grad_norm": 7.417776107788086, + "learning_rate": 2.6179499001075762e-05, + "log_odds_chosen": 9.859108924865723, + "log_odds_ratio": -0.00011395730689400807, + "logits/chosen": -0.11562471091747284, + "logits/rejected": -0.15001115202903748, + "logps/chosen": -0.0002473094209562987, + "logps/rejected": -1.3960421085357666, + "loss": 0.6926, + "nll_loss": 0.17313078045845032, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4730943550821394e-05, + "rewards/margins": 0.13957948982715607, + "rewards/rejected": -0.13960421085357666, + "step": 7646 + }, + { + "epoch": 5.288381742738589, + "grad_norm": 7.190175533294678, + "learning_rate": 2.6175656984785618e-05, + "log_odds_chosen": 9.759132385253906, + "log_odds_ratio": -0.0004232939681969583, + "logits/chosen": -0.1860746145248413, + "logits/rejected": -0.2614567279815674, + "logps/chosen": -0.0029081744141876698, + "logps/rejected": -2.041231632232666, + "loss": 0.842, + "nll_loss": 0.21046897768974304, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002908174355980009, + "rewards/margins": 0.20383237302303314, + "rewards/rejected": -0.20412318408489227, + "step": 7647 + }, + { + "epoch": 5.289073305670816, + "grad_norm": 6.9479498863220215, + "learning_rate": 2.6171814968495467e-05, + "log_odds_chosen": 9.591582298278809, + "log_odds_ratio": -0.00018219949561171234, + "logits/chosen": -0.2017437219619751, + "logits/rejected": -0.20229317247867584, + "logps/chosen": -0.0020538540557026863, + "logps/rejected": -2.25608491897583, + "loss": 0.6829, + "nll_loss": 0.17071056365966797, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002053853968391195, + "rewards/margins": 0.2254031002521515, + "rewards/rejected": -0.2256084829568863, + "step": 7648 + }, + { + "epoch": 5.289764868603043, + "grad_norm": 12.096253395080566, + "learning_rate": 2.6167972952205316e-05, + "log_odds_chosen": 10.28390884399414, + "log_odds_ratio": -0.0001337239664280787, + "logits/chosen": 0.2107926309108734, + "logits/rejected": 0.14508889615535736, + "logps/chosen": -0.0010782841127365828, + "logps/rejected": -2.0446832180023193, + "loss": 0.799, + "nll_loss": 0.19973695278167725, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010782841127365828, + "rewards/margins": 0.2043604850769043, + "rewards/rejected": -0.20446830987930298, + "step": 7649 + }, + { + "epoch": 5.29045643153527, + "grad_norm": 14.159560203552246, + "learning_rate": 2.616413093591517e-05, + "log_odds_chosen": 10.812664031982422, + "log_odds_ratio": -2.9879665817134082e-05, + "logits/chosen": -0.6208158731460571, + "logits/rejected": -0.6005766987800598, + "logps/chosen": -0.00011174961400683969, + "logps/rejected": -1.6413708925247192, + "loss": 0.926, + "nll_loss": 0.23150420188903809, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1174963219673373e-05, + "rewards/margins": 0.16412591934204102, + "rewards/rejected": -0.1641370952129364, + "step": 7650 + }, + { + "epoch": 5.291147994467496, + "grad_norm": 17.602048873901367, + "learning_rate": 2.616028891962502e-05, + "log_odds_chosen": 10.549786567687988, + "log_odds_ratio": -8.301087655127048e-05, + "logits/chosen": -0.6232462525367737, + "logits/rejected": -0.6550066471099854, + "logps/chosen": -0.000313351396471262, + "logps/rejected": -2.012857675552368, + "loss": 0.9327, + "nll_loss": 0.23317351937294006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.133514110231772e-05, + "rewards/margins": 0.20125442743301392, + "rewards/rejected": -0.20128576457500458, + "step": 7651 + }, + { + "epoch": 5.291839557399723, + "grad_norm": 5.173848628997803, + "learning_rate": 2.615644690333487e-05, + "log_odds_chosen": 10.600738525390625, + "log_odds_ratio": -6.898707943037152e-05, + "logits/chosen": -0.22983574867248535, + "logits/rejected": -0.23931768536567688, + "logps/chosen": -0.0003584368387237191, + "logps/rejected": -2.2960362434387207, + "loss": 0.5126, + "nll_loss": 0.1281490921974182, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5843684599967673e-05, + "rewards/margins": 0.22956779599189758, + "rewards/rejected": -0.22960364818572998, + "step": 7652 + }, + { + "epoch": 5.29253112033195, + "grad_norm": 11.524662017822266, + "learning_rate": 2.6152604887044722e-05, + "log_odds_chosen": 10.889970779418945, + "log_odds_ratio": -5.783965752925724e-05, + "logits/chosen": -0.4245738685131073, + "logits/rejected": -0.6069134473800659, + "logps/chosen": -0.0002389351575402543, + "logps/rejected": -2.2320656776428223, + "loss": 0.6865, + "nll_loss": 0.1716289222240448, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.389351539022755e-05, + "rewards/margins": 0.22318267822265625, + "rewards/rejected": -0.22320657968521118, + "step": 7653 + }, + { + "epoch": 5.293222683264177, + "grad_norm": 11.817529678344727, + "learning_rate": 2.614876287075457e-05, + "log_odds_chosen": 10.150485038757324, + "log_odds_ratio": -0.0003673167375382036, + "logits/chosen": -0.4654601216316223, + "logits/rejected": -0.4384010434150696, + "logps/chosen": -0.00017272391414735466, + "logps/rejected": -1.4797208309173584, + "loss": 1.7016, + "nll_loss": 0.4253644645214081, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7272392142331228e-05, + "rewards/margins": 0.1479548066854477, + "rewards/rejected": -0.14797207713127136, + "step": 7654 + }, + { + "epoch": 5.293914246196404, + "grad_norm": 8.556180000305176, + "learning_rate": 2.614492085446442e-05, + "log_odds_chosen": 9.928142547607422, + "log_odds_ratio": -0.0005739845219068229, + "logits/chosen": 0.10154339671134949, + "logits/rejected": -0.16080422699451447, + "logps/chosen": -0.0033231317065656185, + "logps/rejected": -2.4263432025909424, + "loss": 1.3841, + "nll_loss": 0.3459640145301819, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003323132114019245, + "rewards/margins": 0.24230200052261353, + "rewards/rejected": -0.24263429641723633, + "step": 7655 + }, + { + "epoch": 5.2946058091286305, + "grad_norm": 6.121419906616211, + "learning_rate": 2.6141078838174276e-05, + "log_odds_chosen": 10.052181243896484, + "log_odds_ratio": -6.283888797042891e-05, + "logits/chosen": -0.30488666892051697, + "logits/rejected": -0.36098307371139526, + "logps/chosen": -0.0031215278431773186, + "logps/rejected": -2.2946300506591797, + "loss": 0.5584, + "nll_loss": 0.1395900547504425, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00031215278431773186, + "rewards/margins": 0.22915084660053253, + "rewards/rejected": -0.22946301102638245, + "step": 7656 + }, + { + "epoch": 5.295297372060857, + "grad_norm": 5.950735569000244, + "learning_rate": 2.6137236821884125e-05, + "log_odds_chosen": 10.034515380859375, + "log_odds_ratio": -0.00034096435410901904, + "logits/chosen": -0.06915253400802612, + "logits/rejected": -0.15333367884159088, + "logps/chosen": -0.00016374155529774725, + "logps/rejected": -1.260401964187622, + "loss": 0.92, + "nll_loss": 0.22996269166469574, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6374155165976845e-05, + "rewards/margins": 0.1260238140821457, + "rewards/rejected": -0.12604019045829773, + "step": 7657 + }, + { + "epoch": 5.295988934993084, + "grad_norm": 6.171073913574219, + "learning_rate": 2.6133394805593974e-05, + "log_odds_chosen": 7.855501174926758, + "log_odds_ratio": -0.046969350427389145, + "logits/chosen": -0.45245859026908875, + "logits/rejected": -0.44178035855293274, + "logps/chosen": -0.012497956864535809, + "logps/rejected": -1.1013507843017578, + "loss": 0.9854, + "nll_loss": 0.2416542023420334, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012497956631705165, + "rewards/margins": 0.10888528823852539, + "rewards/rejected": -0.11013508588075638, + "step": 7658 + }, + { + "epoch": 5.296680497925311, + "grad_norm": 6.538420677185059, + "learning_rate": 2.612955278930383e-05, + "log_odds_chosen": 9.587251663208008, + "log_odds_ratio": -0.006791528780013323, + "logits/chosen": -0.4349149763584137, + "logits/rejected": -0.5419043302536011, + "logps/chosen": -0.003675490617752075, + "logps/rejected": -2.00207781791687, + "loss": 1.0787, + "nll_loss": 0.2690025269985199, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003675490734167397, + "rewards/margins": 0.1998402178287506, + "rewards/rejected": -0.20020776987075806, + "step": 7659 + }, + { + "epoch": 5.297372060857538, + "grad_norm": 6.733508110046387, + "learning_rate": 2.612571077301368e-05, + "log_odds_chosen": 9.530517578125, + "log_odds_ratio": -0.00025011191610246897, + "logits/chosen": -0.4764153063297272, + "logits/rejected": -0.5095908641815186, + "logps/chosen": -0.00014602337614633143, + "logps/rejected": -1.0838871002197266, + "loss": 0.7234, + "nll_loss": 0.1808272898197174, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4602336705138441e-05, + "rewards/margins": 0.10837408900260925, + "rewards/rejected": -0.10838870704174042, + "step": 7660 + }, + { + "epoch": 5.298063623789765, + "grad_norm": 8.761924743652344, + "learning_rate": 2.6121868756723528e-05, + "log_odds_chosen": 9.830541610717773, + "log_odds_ratio": -0.0003514336422085762, + "logits/chosen": -0.43194514513015747, + "logits/rejected": -0.5670047402381897, + "logps/chosen": -0.001400099485181272, + "logps/rejected": -2.193282127380371, + "loss": 0.9972, + "nll_loss": 0.24927306175231934, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001400099426973611, + "rewards/margins": 0.21918819844722748, + "rewards/rejected": -0.21932819485664368, + "step": 7661 + }, + { + "epoch": 5.2987551867219915, + "grad_norm": 8.479170799255371, + "learning_rate": 2.611802674043338e-05, + "log_odds_chosen": 9.956790924072266, + "log_odds_ratio": -0.0007199362153187394, + "logits/chosen": -0.38241422176361084, + "logits/rejected": -0.513526439666748, + "logps/chosen": -0.004810015205293894, + "logps/rejected": -2.448887825012207, + "loss": 0.9497, + "nll_loss": 0.23735857009887695, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00048100153799168766, + "rewards/margins": 0.2444078028202057, + "rewards/rejected": -0.2448887974023819, + "step": 7662 + }, + { + "epoch": 5.299446749654218, + "grad_norm": 7.522983551025391, + "learning_rate": 2.611418472414323e-05, + "log_odds_chosen": 10.109183311462402, + "log_odds_ratio": -0.00017479847883805633, + "logits/chosen": -0.39235997200012207, + "logits/rejected": -0.3930438458919525, + "logps/chosen": -0.00037281305412761867, + "logps/rejected": -1.6620938777923584, + "loss": 0.5012, + "nll_loss": 0.12528148293495178, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.728130832314491e-05, + "rewards/margins": 0.1661721169948578, + "rewards/rejected": -0.1662093997001648, + "step": 7663 + }, + { + "epoch": 5.300138312586445, + "grad_norm": 6.93427848815918, + "learning_rate": 2.611034270785308e-05, + "log_odds_chosen": 9.354527473449707, + "log_odds_ratio": -0.0008370226132683456, + "logits/chosen": -0.37404438853263855, + "logits/rejected": -0.3976355195045471, + "logps/chosen": -0.0009613793808966875, + "logps/rejected": -1.5531013011932373, + "loss": 0.9309, + "nll_loss": 0.23264700174331665, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.61379410000518e-05, + "rewards/margins": 0.15521399676799774, + "rewards/rejected": -0.15531013906002045, + "step": 7664 + }, + { + "epoch": 5.300829875518672, + "grad_norm": 6.72873592376709, + "learning_rate": 2.6106500691562935e-05, + "log_odds_chosen": 11.079107284545898, + "log_odds_ratio": -0.0003697759239003062, + "logits/chosen": -0.592350959777832, + "logits/rejected": -0.5782504081726074, + "logps/chosen": -0.00016440243052784353, + "logps/rejected": -2.4741718769073486, + "loss": 0.6636, + "nll_loss": 0.16586294770240784, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6440242688986473e-05, + "rewards/margins": 0.24740076065063477, + "rewards/rejected": -0.24741718173027039, + "step": 7665 + }, + { + "epoch": 5.301521438450899, + "grad_norm": 7.152824878692627, + "learning_rate": 2.6102658675272784e-05, + "log_odds_chosen": 10.747650146484375, + "log_odds_ratio": -3.955703868996352e-05, + "logits/chosen": -0.44400927424430847, + "logits/rejected": -0.5418944358825684, + "logps/chosen": -0.0003771585179492831, + "logps/rejected": -2.3978171348571777, + "loss": 0.8891, + "nll_loss": 0.22226016223430634, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.771585033973679e-05, + "rewards/margins": 0.23974400758743286, + "rewards/rejected": -0.2397817224264145, + "step": 7666 + }, + { + "epoch": 5.302213001383126, + "grad_norm": 8.8451566696167, + "learning_rate": 2.6098816658982633e-05, + "log_odds_chosen": 10.385505676269531, + "log_odds_ratio": -0.00013336195843294263, + "logits/chosen": -0.21721792221069336, + "logits/rejected": -0.16161830723285675, + "logps/chosen": -0.0003452481469139457, + "logps/rejected": -2.1876940727233887, + "loss": 0.8405, + "nll_loss": 0.21011018753051758, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4524819056969136e-05, + "rewards/margins": 0.21873487532138824, + "rewards/rejected": -0.21876941621303558, + "step": 7667 + }, + { + "epoch": 5.3029045643153525, + "grad_norm": 8.409830093383789, + "learning_rate": 2.609497464269249e-05, + "log_odds_chosen": 9.87546443939209, + "log_odds_ratio": -0.00024321695673279464, + "logits/chosen": -0.7340776324272156, + "logits/rejected": -0.8499897718429565, + "logps/chosen": -0.0005925848963670433, + "logps/rejected": -1.650931477546692, + "loss": 0.7627, + "nll_loss": 0.19064688682556152, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.925849109189585e-05, + "rewards/margins": 0.16503389179706573, + "rewards/rejected": -0.16509315371513367, + "step": 7668 + }, + { + "epoch": 5.303596127247579, + "grad_norm": 12.800469398498535, + "learning_rate": 2.6091132626402338e-05, + "log_odds_chosen": 10.688926696777344, + "log_odds_ratio": -3.9617676520720124e-05, + "logits/chosen": -0.5846214890480042, + "logits/rejected": -0.5911470055580139, + "logps/chosen": -0.00027569776284508407, + "logps/rejected": -2.2901434898376465, + "loss": 0.8275, + "nll_loss": 0.20687484741210938, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.756977846729569e-05, + "rewards/margins": 0.22898676991462708, + "rewards/rejected": -0.2290143519639969, + "step": 7669 + }, + { + "epoch": 5.304287690179806, + "grad_norm": 11.251484870910645, + "learning_rate": 2.6087290610112187e-05, + "log_odds_chosen": 9.43034553527832, + "log_odds_ratio": -0.00011557983089005575, + "logits/chosen": -0.6348074674606323, + "logits/rejected": -0.62444669008255, + "logps/chosen": -0.000787832832429558, + "logps/rejected": -1.6137120723724365, + "loss": 0.7808, + "nll_loss": 0.19517762959003448, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.878329051891342e-05, + "rewards/margins": 0.1612924337387085, + "rewards/rejected": -0.16137121617794037, + "step": 7670 + }, + { + "epoch": 5.304979253112033, + "grad_norm": 7.849981307983398, + "learning_rate": 2.608344859382204e-05, + "log_odds_chosen": 8.77843952178955, + "log_odds_ratio": -0.01902155764400959, + "logits/chosen": -0.5401623845100403, + "logits/rejected": -0.3866721987724304, + "logps/chosen": -0.008708938956260681, + "logps/rejected": -1.9028129577636719, + "loss": 1.5558, + "nll_loss": 0.38705021142959595, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008708939421921968, + "rewards/margins": 0.18941040337085724, + "rewards/rejected": -0.19028130173683167, + "step": 7671 + }, + { + "epoch": 5.30567081604426, + "grad_norm": 12.56816291809082, + "learning_rate": 2.6079606577531888e-05, + "log_odds_chosen": 9.128028869628906, + "log_odds_ratio": -0.00227135862223804, + "logits/chosen": -0.6871810555458069, + "logits/rejected": -0.6655930280685425, + "logps/chosen": -0.001776855206117034, + "logps/rejected": -1.6447970867156982, + "loss": 2.39, + "nll_loss": 0.5972785949707031, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001776855206117034, + "rewards/margins": 0.1643020212650299, + "rewards/rejected": -0.16447971761226654, + "step": 7672 + }, + { + "epoch": 5.306362378976487, + "grad_norm": 3.994396448135376, + "learning_rate": 2.607576456124174e-05, + "log_odds_chosen": 10.866761207580566, + "log_odds_ratio": -2.0611427316907793e-05, + "logits/chosen": -0.6379934549331665, + "logits/rejected": -0.6666987538337708, + "logps/chosen": -0.00013821777247358114, + "logps/rejected": -1.997601866722107, + "loss": 0.3953, + "nll_loss": 0.0988292247056961, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3821777429257054e-05, + "rewards/margins": 0.19974635541439056, + "rewards/rejected": -0.19976019859313965, + "step": 7673 + }, + { + "epoch": 5.3070539419087135, + "grad_norm": 8.11078929901123, + "learning_rate": 2.6071922544951593e-05, + "log_odds_chosen": 9.82305908203125, + "log_odds_ratio": -0.00041845531086437404, + "logits/chosen": -0.6439027786254883, + "logits/rejected": -0.6215129494667053, + "logps/chosen": -0.000763900694437325, + "logps/rejected": -1.9046980142593384, + "loss": 0.9606, + "nll_loss": 0.24010974168777466, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.639007526449859e-05, + "rewards/margins": 0.19039341807365417, + "rewards/rejected": -0.19046981632709503, + "step": 7674 + }, + { + "epoch": 5.30774550484094, + "grad_norm": 9.360323905944824, + "learning_rate": 2.6068080528661442e-05, + "log_odds_chosen": 10.177734375, + "log_odds_ratio": -0.0011329721892252564, + "logits/chosen": -0.3226807415485382, + "logits/rejected": -0.4182065427303314, + "logps/chosen": -0.006573481019586325, + "logps/rejected": -1.8262217044830322, + "loss": 0.6999, + "nll_loss": 0.17485645413398743, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006573480786755681, + "rewards/margins": 0.18196482956409454, + "rewards/rejected": -0.18262219429016113, + "step": 7675 + }, + { + "epoch": 5.308437067773167, + "grad_norm": 7.601252555847168, + "learning_rate": 2.606423851237129e-05, + "log_odds_chosen": 8.919883728027344, + "log_odds_ratio": -0.006886746268719435, + "logits/chosen": -0.41399550437927246, + "logits/rejected": -0.4040910303592682, + "logps/chosen": -0.004403387662023306, + "logps/rejected": -1.3676029443740845, + "loss": 0.5948, + "nll_loss": 0.14801070094108582, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004403387720230967, + "rewards/margins": 0.13631996512413025, + "rewards/rejected": -0.13676030933856964, + "step": 7676 + }, + { + "epoch": 5.309128630705394, + "grad_norm": 11.422032356262207, + "learning_rate": 2.6060396496081147e-05, + "log_odds_chosen": 10.98823070526123, + "log_odds_ratio": -3.098735760431737e-05, + "logits/chosen": -0.5396237373352051, + "logits/rejected": -0.6037777066230774, + "logps/chosen": -0.00012345438881311566, + "logps/rejected": -1.9171665906906128, + "loss": 0.7916, + "nll_loss": 0.19789397716522217, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2345439245109446e-05, + "rewards/margins": 0.19170431792736053, + "rewards/rejected": -0.19171667098999023, + "step": 7677 + }, + { + "epoch": 5.309820193637621, + "grad_norm": 7.421292304992676, + "learning_rate": 2.6056554479790996e-05, + "log_odds_chosen": 9.832866668701172, + "log_odds_ratio": -0.0001473495940444991, + "logits/chosen": -0.4560951590538025, + "logits/rejected": -0.554593563079834, + "logps/chosen": -0.00036930685746483505, + "logps/rejected": -1.706931471824646, + "loss": 0.8516, + "nll_loss": 0.2128933072090149, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.693068720167503e-05, + "rewards/margins": 0.1706562340259552, + "rewards/rejected": -0.17069315910339355, + "step": 7678 + }, + { + "epoch": 5.310511756569848, + "grad_norm": 5.684186935424805, + "learning_rate": 2.6052712463500845e-05, + "log_odds_chosen": 10.158199310302734, + "log_odds_ratio": -0.00011456047650426626, + "logits/chosen": -0.30376410484313965, + "logits/rejected": -0.34192806482315063, + "logps/chosen": -0.0003021803859155625, + "logps/rejected": -1.7264450788497925, + "loss": 0.7734, + "nll_loss": 0.19334980845451355, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.021803786396049e-05, + "rewards/margins": 0.17261429131031036, + "rewards/rejected": -0.1726444959640503, + "step": 7679 + }, + { + "epoch": 5.3112033195020745, + "grad_norm": 8.601361274719238, + "learning_rate": 2.60488704472107e-05, + "log_odds_chosen": 10.844970703125, + "log_odds_ratio": -3.83031765522901e-05, + "logits/chosen": -0.14833518862724304, + "logits/rejected": -0.11479371786117554, + "logps/chosen": -0.00024344338453374803, + "logps/rejected": -2.3909902572631836, + "loss": 0.9726, + "nll_loss": 0.2431459277868271, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4344339180970564e-05, + "rewards/margins": 0.23907466232776642, + "rewards/rejected": -0.23909902572631836, + "step": 7680 + }, + { + "epoch": 5.311894882434301, + "grad_norm": 5.738802433013916, + "learning_rate": 2.604502843092055e-05, + "log_odds_chosen": 10.435100555419922, + "log_odds_ratio": -7.32573025743477e-05, + "logits/chosen": -0.5592867136001587, + "logits/rejected": -0.5970466136932373, + "logps/chosen": -0.0003621687355916947, + "logps/rejected": -2.1743826866149902, + "loss": 0.691, + "nll_loss": 0.17275381088256836, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6216875741956756e-05, + "rewards/margins": 0.21740205585956573, + "rewards/rejected": -0.21743828058242798, + "step": 7681 + }, + { + "epoch": 5.312586445366528, + "grad_norm": 6.579929828643799, + "learning_rate": 2.60411864146304e-05, + "log_odds_chosen": 10.444208145141602, + "log_odds_ratio": -0.0007665945449844003, + "logits/chosen": -0.6170370578765869, + "logits/rejected": -0.5970430970191956, + "logps/chosen": -0.0007927232072688639, + "logps/rejected": -2.231851100921631, + "loss": 0.7417, + "nll_loss": 0.18535269796848297, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.927232218207791e-05, + "rewards/margins": 0.22310584783554077, + "rewards/rejected": -0.22318512201309204, + "step": 7682 + }, + { + "epoch": 5.313278008298755, + "grad_norm": 8.819731712341309, + "learning_rate": 2.603734439834025e-05, + "log_odds_chosen": 10.286642074584961, + "log_odds_ratio": -6.957343430258334e-05, + "logits/chosen": -0.7522540092468262, + "logits/rejected": -0.8117721080780029, + "logps/chosen": -0.00026084392447955906, + "logps/rejected": -1.9778194427490234, + "loss": 0.575, + "nll_loss": 0.14375334978103638, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6084395358338952e-05, + "rewards/margins": 0.1977558732032776, + "rewards/rejected": -0.19778196513652802, + "step": 7683 + }, + { + "epoch": 5.313969571230982, + "grad_norm": 8.924551010131836, + "learning_rate": 2.60335023820501e-05, + "log_odds_chosen": 10.201484680175781, + "log_odds_ratio": -0.0005286703235469759, + "logits/chosen": -0.8851473927497864, + "logits/rejected": -0.8970750570297241, + "logps/chosen": -0.0006871483637951314, + "logps/rejected": -2.151824474334717, + "loss": 0.9841, + "nll_loss": 0.2459702491760254, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.871483492432162e-05, + "rewards/margins": 0.21511372923851013, + "rewards/rejected": -0.21518243849277496, + "step": 7684 + }, + { + "epoch": 5.314661134163209, + "grad_norm": 9.595136642456055, + "learning_rate": 2.602966036575995e-05, + "log_odds_chosen": 8.468236923217773, + "log_odds_ratio": -0.010804870165884495, + "logits/chosen": -0.4198509752750397, + "logits/rejected": -0.37303659319877625, + "logps/chosen": -0.01878383569419384, + "logps/rejected": -1.6183393001556396, + "loss": 0.7602, + "nll_loss": 0.18898017704486847, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018783833365887403, + "rewards/margins": 0.15995556116104126, + "rewards/rejected": -0.16183394193649292, + "step": 7685 + }, + { + "epoch": 5.3153526970954355, + "grad_norm": 11.497410774230957, + "learning_rate": 2.6025818349469805e-05, + "log_odds_chosen": 10.909662246704102, + "log_odds_ratio": -2.5831925086094998e-05, + "logits/chosen": -0.6138488054275513, + "logits/rejected": -0.6145757436752319, + "logps/chosen": -0.00014651940728072077, + "logps/rejected": -1.8224164247512817, + "loss": 0.9949, + "nll_loss": 0.2487286627292633, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4651941455667838e-05, + "rewards/margins": 0.1822269856929779, + "rewards/rejected": -0.18224164843559265, + "step": 7686 + }, + { + "epoch": 5.316044260027662, + "grad_norm": 5.951731204986572, + "learning_rate": 2.6021976333179654e-05, + "log_odds_chosen": 10.142402648925781, + "log_odds_ratio": -0.00015327542496379465, + "logits/chosen": -0.4024023711681366, + "logits/rejected": -0.5670183300971985, + "logps/chosen": -0.000891193572897464, + "logps/rejected": -2.0417609214782715, + "loss": 0.6139, + "nll_loss": 0.15346777439117432, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.91193631105125e-05, + "rewards/margins": 0.20408698916435242, + "rewards/rejected": -0.204176127910614, + "step": 7687 + }, + { + "epoch": 5.316735822959889, + "grad_norm": 6.343304634094238, + "learning_rate": 2.6018134316889504e-05, + "log_odds_chosen": 10.47935676574707, + "log_odds_ratio": -0.00012434548989403993, + "logits/chosen": -0.5548829436302185, + "logits/rejected": -0.5281838774681091, + "logps/chosen": -0.0002495267253834754, + "logps/rejected": -1.912071943283081, + "loss": 0.5336, + "nll_loss": 0.1333903819322586, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4952674721134827e-05, + "rewards/margins": 0.19118225574493408, + "rewards/rejected": -0.19120720028877258, + "step": 7688 + }, + { + "epoch": 5.317427385892116, + "grad_norm": 7.945797443389893, + "learning_rate": 2.601429230059936e-05, + "log_odds_chosen": 10.331096649169922, + "log_odds_ratio": -6.510312960017473e-05, + "logits/chosen": -0.15395867824554443, + "logits/rejected": -0.3068086802959442, + "logps/chosen": -0.0003511592512950301, + "logps/rejected": -2.1909170150756836, + "loss": 0.6705, + "nll_loss": 0.16760849952697754, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.511592512950301e-05, + "rewards/margins": 0.2190566062927246, + "rewards/rejected": -0.2190917283296585, + "step": 7689 + }, + { + "epoch": 5.318118948824343, + "grad_norm": 7.558305740356445, + "learning_rate": 2.601045028430921e-05, + "log_odds_chosen": 9.520155906677246, + "log_odds_ratio": -0.00021912460215389729, + "logits/chosen": -0.7176843285560608, + "logits/rejected": -0.7478047609329224, + "logps/chosen": -0.000420411117374897, + "logps/rejected": -1.4949798583984375, + "loss": 0.538, + "nll_loss": 0.134473979473114, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.204111246508546e-05, + "rewards/margins": 0.1494559645652771, + "rewards/rejected": -0.14949798583984375, + "step": 7690 + }, + { + "epoch": 5.31881051175657, + "grad_norm": 8.577528953552246, + "learning_rate": 2.6006608268019057e-05, + "log_odds_chosen": 10.675228118896484, + "log_odds_ratio": -0.00035642407601699233, + "logits/chosen": -0.5709743499755859, + "logits/rejected": -0.5321433544158936, + "logps/chosen": -0.0003995354054495692, + "logps/rejected": -2.767270088195801, + "loss": 0.6596, + "nll_loss": 0.16486123204231262, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.995353836216964e-05, + "rewards/margins": 0.27668705582618713, + "rewards/rejected": -0.2767270505428314, + "step": 7691 + }, + { + "epoch": 5.319502074688796, + "grad_norm": 6.864575386047363, + "learning_rate": 2.600276625172891e-05, + "log_odds_chosen": 9.34201431274414, + "log_odds_ratio": -0.0005491009214892983, + "logits/chosen": -0.36394619941711426, + "logits/rejected": -0.4056907296180725, + "logps/chosen": -0.001376256812363863, + "logps/rejected": -1.977386713027954, + "loss": 0.6747, + "nll_loss": 0.1686277985572815, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013762569869868457, + "rewards/margins": 0.1976010501384735, + "rewards/rejected": -0.1977386772632599, + "step": 7692 + }, + { + "epoch": 5.320193637621023, + "grad_norm": 8.476661682128906, + "learning_rate": 2.599892423543876e-05, + "log_odds_chosen": 10.700014114379883, + "log_odds_ratio": -7.971821469254792e-05, + "logits/chosen": -0.47770819067955017, + "logits/rejected": -0.5215426683425903, + "logps/chosen": -0.005434725899249315, + "logps/rejected": -2.4658029079437256, + "loss": 1.0269, + "nll_loss": 0.25670719146728516, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005434725899249315, + "rewards/margins": 0.2460368126630783, + "rewards/rejected": -0.24658028781414032, + "step": 7693 + }, + { + "epoch": 5.32088520055325, + "grad_norm": 6.076938629150391, + "learning_rate": 2.5995082219148608e-05, + "log_odds_chosen": 9.699786186218262, + "log_odds_ratio": -0.0005650972598232329, + "logits/chosen": -0.43687766790390015, + "logits/rejected": -0.5232003331184387, + "logps/chosen": -0.001041522016748786, + "logps/rejected": -1.5237171649932861, + "loss": 1.6113, + "nll_loss": 0.40276864171028137, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010415221186121926, + "rewards/margins": 0.15226754546165466, + "rewards/rejected": -0.15237170457839966, + "step": 7694 + }, + { + "epoch": 5.321576763485477, + "grad_norm": 6.3537187576293945, + "learning_rate": 2.5991240202858464e-05, + "log_odds_chosen": 9.630485534667969, + "log_odds_ratio": -0.00017862251843325794, + "logits/chosen": -0.4959850609302521, + "logits/rejected": -0.4872666895389557, + "logps/chosen": -0.001289563486352563, + "logps/rejected": -2.0892245769500732, + "loss": 0.7614, + "nll_loss": 0.19032001495361328, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012895635154563934, + "rewards/margins": 0.208793506026268, + "rewards/rejected": -0.20892247557640076, + "step": 7695 + }, + { + "epoch": 5.322268326417704, + "grad_norm": 6.292471885681152, + "learning_rate": 2.5987398186568313e-05, + "log_odds_chosen": 11.091445922851562, + "log_odds_ratio": -3.2769057725090533e-05, + "logits/chosen": -0.3431626558303833, + "logits/rejected": -0.43052709102630615, + "logps/chosen": -0.00016557855997234583, + "logps/rejected": -2.2294929027557373, + "loss": 0.5896, + "nll_loss": 0.1474042385816574, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6557856724830344e-05, + "rewards/margins": 0.22293275594711304, + "rewards/rejected": -0.2229493111371994, + "step": 7696 + }, + { + "epoch": 5.322959889349931, + "grad_norm": 6.6983723640441895, + "learning_rate": 2.5983556170278162e-05, + "log_odds_chosen": 10.316198348999023, + "log_odds_ratio": -0.00027626199880614877, + "logits/chosen": -0.4761522114276886, + "logits/rejected": -0.43214714527130127, + "logps/chosen": -0.000250997458351776, + "logps/rejected": -1.6024224758148193, + "loss": 0.6304, + "nll_loss": 0.15757060050964355, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5099743652390316e-05, + "rewards/margins": 0.16021715104579926, + "rewards/rejected": -0.1602422595024109, + "step": 7697 + }, + { + "epoch": 5.323651452282157, + "grad_norm": 7.137821674346924, + "learning_rate": 2.5979714153988018e-05, + "log_odds_chosen": 8.657133102416992, + "log_odds_ratio": -0.005025691352784634, + "logits/chosen": -0.5021120309829712, + "logits/rejected": -0.5828627943992615, + "logps/chosen": -0.002383360406383872, + "logps/rejected": -1.3815230131149292, + "loss": 1.4318, + "nll_loss": 0.3574507534503937, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00023833605519030243, + "rewards/margins": 0.13791395723819733, + "rewards/rejected": -0.13815230131149292, + "step": 7698 + }, + { + "epoch": 5.324343015214384, + "grad_norm": 10.63414192199707, + "learning_rate": 2.5975872137697867e-05, + "log_odds_chosen": 10.593761444091797, + "log_odds_ratio": -0.00010258699330734089, + "logits/chosen": -0.7671674489974976, + "logits/rejected": -0.883109986782074, + "logps/chosen": -0.00022933653963264078, + "logps/rejected": -1.8197206258773804, + "loss": 0.804, + "nll_loss": 0.2009904384613037, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2933651052881032e-05, + "rewards/margins": 0.18194912374019623, + "rewards/rejected": -0.18197205662727356, + "step": 7699 + }, + { + "epoch": 5.325034578146611, + "grad_norm": 6.8750152587890625, + "learning_rate": 2.5972030121407716e-05, + "log_odds_chosen": 11.195669174194336, + "log_odds_ratio": -0.00011681480100378394, + "logits/chosen": 0.256054162979126, + "logits/rejected": 0.06311047077178955, + "logps/chosen": -0.0006633030134253204, + "logps/rejected": -2.8899636268615723, + "loss": 0.7191, + "nll_loss": 0.17976562678813934, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.633029988734052e-05, + "rewards/margins": 0.2889300286769867, + "rewards/rejected": -0.2889963388442993, + "step": 7700 + }, + { + "epoch": 5.325726141078838, + "grad_norm": 9.112584114074707, + "learning_rate": 2.596818810511757e-05, + "log_odds_chosen": 8.883886337280273, + "log_odds_ratio": -0.0013188114389777184, + "logits/chosen": -0.27402448654174805, + "logits/rejected": -0.3972470760345459, + "logps/chosen": -0.0013482251670211554, + "logps/rejected": -1.3245115280151367, + "loss": 1.1057, + "nll_loss": 0.2763024568557739, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013482251961249858, + "rewards/margins": 0.13231633603572845, + "rewards/rejected": -0.1324511468410492, + "step": 7701 + }, + { + "epoch": 5.326417704011065, + "grad_norm": 6.682565212249756, + "learning_rate": 2.5964346088827417e-05, + "log_odds_chosen": 10.133316993713379, + "log_odds_ratio": -0.00011569785419851542, + "logits/chosen": -0.41242727637290955, + "logits/rejected": -0.42250245809555054, + "logps/chosen": -0.00038382125785574317, + "logps/rejected": -2.057230234146118, + "loss": 0.5205, + "nll_loss": 0.13012166321277618, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8382124330382794e-05, + "rewards/margins": 0.205684632062912, + "rewards/rejected": -0.20572303235530853, + "step": 7702 + }, + { + "epoch": 5.327109266943292, + "grad_norm": 7.458148002624512, + "learning_rate": 2.5960504072537266e-05, + "log_odds_chosen": 10.32491683959961, + "log_odds_ratio": -6.329259485937655e-05, + "logits/chosen": -0.15579473972320557, + "logits/rejected": -0.2219676375389099, + "logps/chosen": -0.0004390804679132998, + "logps/rejected": -2.2576074600219727, + "loss": 0.9177, + "nll_loss": 0.22940751910209656, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.390804679132998e-05, + "rewards/margins": 0.22571685910224915, + "rewards/rejected": -0.22576075792312622, + "step": 7703 + }, + { + "epoch": 5.327800829875518, + "grad_norm": 6.013815402984619, + "learning_rate": 2.5956662056247122e-05, + "log_odds_chosen": 11.237785339355469, + "log_odds_ratio": -2.3372362193185836e-05, + "logits/chosen": -0.17733371257781982, + "logits/rejected": -0.2612563967704773, + "logps/chosen": -0.00018692499725148082, + "logps/rejected": -2.361201763153076, + "loss": 0.6259, + "nll_loss": 0.1564616858959198, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.869249899755232e-05, + "rewards/margins": 0.23610147833824158, + "rewards/rejected": -0.23612019419670105, + "step": 7704 + }, + { + "epoch": 5.328492392807745, + "grad_norm": 9.530840873718262, + "learning_rate": 2.595282003995697e-05, + "log_odds_chosen": 10.308015823364258, + "log_odds_ratio": -0.0008663894259370863, + "logits/chosen": -0.428272008895874, + "logits/rejected": -0.5346933603286743, + "logps/chosen": -0.00248519005253911, + "logps/rejected": -2.5558435916900635, + "loss": 0.8536, + "nll_loss": 0.21330133080482483, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000248519005253911, + "rewards/margins": 0.25533586740493774, + "rewards/rejected": -0.25558435916900635, + "step": 7705 + }, + { + "epoch": 5.329183955739972, + "grad_norm": 11.138049125671387, + "learning_rate": 2.594897802366682e-05, + "log_odds_chosen": 9.924549102783203, + "log_odds_ratio": -0.014975260011851788, + "logits/chosen": -0.5903857946395874, + "logits/rejected": -0.49607712030410767, + "logps/chosen": -0.004796158522367477, + "logps/rejected": -2.1003305912017822, + "loss": 0.6667, + "nll_loss": 0.1651792675256729, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004796158755198121, + "rewards/margins": 0.20955345034599304, + "rewards/rejected": -0.21003307402133942, + "step": 7706 + }, + { + "epoch": 5.329875518672199, + "grad_norm": 12.312597274780273, + "learning_rate": 2.5945136007376676e-05, + "log_odds_chosen": 10.369390487670898, + "log_odds_ratio": -0.00017965630104299635, + "logits/chosen": -0.5108599662780762, + "logits/rejected": -0.6373491883277893, + "logps/chosen": -0.00028219789965078235, + "logps/rejected": -2.1200790405273438, + "loss": 0.8651, + "nll_loss": 0.21625776588916779, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8219790692673996e-05, + "rewards/margins": 0.2119797021150589, + "rewards/rejected": -0.21200791001319885, + "step": 7707 + }, + { + "epoch": 5.330567081604426, + "grad_norm": 9.67041301727295, + "learning_rate": 2.5941293991086525e-05, + "log_odds_chosen": 10.308876037597656, + "log_odds_ratio": -0.00018406190793029964, + "logits/chosen": -0.6033331751823425, + "logits/rejected": -0.6974876523017883, + "logps/chosen": -0.00024687196128070354, + "logps/rejected": -1.9381176233291626, + "loss": 0.8257, + "nll_loss": 0.20640414953231812, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4687196855666116e-05, + "rewards/margins": 0.193787083029747, + "rewards/rejected": -0.19381175935268402, + "step": 7708 + }, + { + "epoch": 5.3312586445366525, + "grad_norm": 10.314027786254883, + "learning_rate": 2.5937451974796374e-05, + "log_odds_chosen": 10.448393821716309, + "log_odds_ratio": -0.0001520294463261962, + "logits/chosen": -0.3476250171661377, + "logits/rejected": -0.4068318009376526, + "logps/chosen": -0.00025387192727066576, + "logps/rejected": -2.4243698120117188, + "loss": 0.9056, + "nll_loss": 0.22639591991901398, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5387191271875054e-05, + "rewards/margins": 0.24241161346435547, + "rewards/rejected": -0.24243700504302979, + "step": 7709 + }, + { + "epoch": 5.331950207468879, + "grad_norm": 4.918290615081787, + "learning_rate": 2.5933609958506227e-05, + "log_odds_chosen": 10.350467681884766, + "log_odds_ratio": -0.00013474108709488064, + "logits/chosen": -0.4226555824279785, + "logits/rejected": -0.4367438554763794, + "logps/chosen": -0.0006977645098231733, + "logps/rejected": -2.2893643379211426, + "loss": 0.5785, + "nll_loss": 0.14459995925426483, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.977644807193428e-05, + "rewards/margins": 0.22886666655540466, + "rewards/rejected": -0.22893644869327545, + "step": 7710 + }, + { + "epoch": 5.332641770401106, + "grad_norm": 5.445563793182373, + "learning_rate": 2.5929767942216076e-05, + "log_odds_chosen": 10.40871810913086, + "log_odds_ratio": -0.00029386027017608285, + "logits/chosen": -0.039409562945365906, + "logits/rejected": -0.16003747284412384, + "logps/chosen": -0.0002922165731433779, + "logps/rejected": -2.41654634475708, + "loss": 1.0912, + "nll_loss": 0.27277228236198425, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.922165731433779e-05, + "rewards/margins": 0.24162541329860687, + "rewards/rejected": -0.24165461957454681, + "step": 7711 + }, + { + "epoch": 5.333333333333333, + "grad_norm": 5.648160457611084, + "learning_rate": 2.5925925925925925e-05, + "log_odds_chosen": 10.892804145812988, + "log_odds_ratio": -5.025168866268359e-05, + "logits/chosen": -0.28121262788772583, + "logits/rejected": -0.25881797075271606, + "logps/chosen": -0.00030297267949208617, + "logps/rejected": -2.5124268531799316, + "loss": 0.4989, + "nll_loss": 0.12472567707300186, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.029726940440014e-05, + "rewards/margins": 0.25121238827705383, + "rewards/rejected": -0.2512426972389221, + "step": 7712 + }, + { + "epoch": 5.33402489626556, + "grad_norm": 4.784796714782715, + "learning_rate": 2.592208390963578e-05, + "log_odds_chosen": 8.724172592163086, + "log_odds_ratio": -0.004657501820474863, + "logits/chosen": -0.30892983078956604, + "logits/rejected": -0.35402122139930725, + "logps/chosen": -0.00851732399314642, + "logps/rejected": -1.452544927597046, + "loss": 1.1248, + "nll_loss": 0.28072503209114075, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008517323876731098, + "rewards/margins": 0.14440277218818665, + "rewards/rejected": -0.14525450766086578, + "step": 7713 + }, + { + "epoch": 5.334716459197787, + "grad_norm": 6.670574188232422, + "learning_rate": 2.591824189334563e-05, + "log_odds_chosen": 9.750624656677246, + "log_odds_ratio": -0.00017071192269213498, + "logits/chosen": -0.28526899218559265, + "logits/rejected": -0.36482855677604675, + "logps/chosen": -0.00034535228041931987, + "logps/rejected": -1.7057344913482666, + "loss": 0.8604, + "nll_loss": 0.21508340537548065, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4535227314336225e-05, + "rewards/margins": 0.17053891718387604, + "rewards/rejected": -0.17057345807552338, + "step": 7714 + }, + { + "epoch": 5.3354080221300135, + "grad_norm": 18.297407150268555, + "learning_rate": 2.591439987705548e-05, + "log_odds_chosen": 10.761435508728027, + "log_odds_ratio": -6.893004319863394e-05, + "logits/chosen": -0.8565076589584351, + "logits/rejected": -0.8568417429924011, + "logps/chosen": -0.0004628521273843944, + "logps/rejected": -2.4840097427368164, + "loss": 0.785, + "nll_loss": 0.19624441862106323, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6285214921226725e-05, + "rewards/margins": 0.2483547031879425, + "rewards/rejected": -0.2484009861946106, + "step": 7715 + }, + { + "epoch": 5.33609958506224, + "grad_norm": 10.90261459350586, + "learning_rate": 2.5910557860765335e-05, + "log_odds_chosen": 12.301939964294434, + "log_odds_ratio": -5.705363946617581e-06, + "logits/chosen": -0.38555872440338135, + "logits/rejected": -0.37881430983543396, + "logps/chosen": -0.00010718798876041546, + "logps/rejected": -3.048621654510498, + "loss": 0.9846, + "nll_loss": 0.24614211916923523, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0718798876041546e-05, + "rewards/margins": 0.3048514723777771, + "rewards/rejected": -0.30486220121383667, + "step": 7716 + }, + { + "epoch": 5.336791147994467, + "grad_norm": 10.90868091583252, + "learning_rate": 2.5906715844475184e-05, + "log_odds_chosen": 9.353933334350586, + "log_odds_ratio": -0.00036395888309925795, + "logits/chosen": -0.5651636719703674, + "logits/rejected": -0.6099424958229065, + "logps/chosen": -0.0007730339420959353, + "logps/rejected": -1.5617212057113647, + "loss": 0.9819, + "nll_loss": 0.24544121325016022, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.730339711997658e-05, + "rewards/margins": 0.15609481930732727, + "rewards/rejected": -0.15617212653160095, + "step": 7717 + }, + { + "epoch": 5.337482710926694, + "grad_norm": 5.436877250671387, + "learning_rate": 2.5902873828185033e-05, + "log_odds_chosen": 10.302694320678711, + "log_odds_ratio": -0.00012022980081383139, + "logits/chosen": -0.3647574484348297, + "logits/rejected": -0.4723901152610779, + "logps/chosen": -0.0003554256691131741, + "logps/rejected": -1.8892847299575806, + "loss": 0.7578, + "nll_loss": 0.1894378662109375, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5542565456125885e-05, + "rewards/margins": 0.1888929307460785, + "rewards/rejected": -0.18892847001552582, + "step": 7718 + }, + { + "epoch": 5.338174273858921, + "grad_norm": 11.403399467468262, + "learning_rate": 2.5899031811894885e-05, + "log_odds_chosen": 11.144704818725586, + "log_odds_ratio": -2.1073818061267957e-05, + "logits/chosen": -0.6162378191947937, + "logits/rejected": -0.6216781139373779, + "logps/chosen": -0.00015070113295223564, + "logps/rejected": -2.228614330291748, + "loss": 0.6812, + "nll_loss": 0.17030274868011475, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5070114386617206e-05, + "rewards/margins": 0.2228463739156723, + "rewards/rejected": -0.2228614240884781, + "step": 7719 + }, + { + "epoch": 5.338865836791148, + "grad_norm": 9.804717063903809, + "learning_rate": 2.5895189795604734e-05, + "log_odds_chosen": 11.019902229309082, + "log_odds_ratio": -3.0428185709752142e-05, + "logits/chosen": -0.879685640335083, + "logits/rejected": -0.9076402187347412, + "logps/chosen": -0.0002764484379440546, + "logps/rejected": -1.8999154567718506, + "loss": 0.6518, + "nll_loss": 0.1629561185836792, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7644842703011818e-05, + "rewards/margins": 0.18996389210224152, + "rewards/rejected": -0.1899915337562561, + "step": 7720 + }, + { + "epoch": 5.3395573997233745, + "grad_norm": 9.187143325805664, + "learning_rate": 2.5891347779314583e-05, + "log_odds_chosen": 10.921390533447266, + "log_odds_ratio": -2.9327873562579043e-05, + "logits/chosen": -0.517184853553772, + "logits/rejected": -0.597442626953125, + "logps/chosen": -0.00017777850735001266, + "logps/rejected": -2.066096782684326, + "loss": 0.7333, + "nll_loss": 0.18332186341285706, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.777785291778855e-05, + "rewards/margins": 0.2065919041633606, + "rewards/rejected": -0.20660969614982605, + "step": 7721 + }, + { + "epoch": 5.340248962655601, + "grad_norm": 5.599289894104004, + "learning_rate": 2.588750576302444e-05, + "log_odds_chosen": 10.52072525024414, + "log_odds_ratio": -3.783634747378528e-05, + "logits/chosen": -0.5504370927810669, + "logits/rejected": -0.5719676613807678, + "logps/chosen": -0.00017955718794837594, + "logps/rejected": -1.7687039375305176, + "loss": 0.6403, + "nll_loss": 0.16006891429424286, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7955719158635475e-05, + "rewards/margins": 0.17685243487358093, + "rewards/rejected": -0.17687039077281952, + "step": 7722 + }, + { + "epoch": 5.340940525587828, + "grad_norm": 6.724042892456055, + "learning_rate": 2.5883663746734288e-05, + "log_odds_chosen": 10.5478515625, + "log_odds_ratio": -6.430500070564449e-05, + "logits/chosen": -0.7374582886695862, + "logits/rejected": -0.7794753909111023, + "logps/chosen": -0.0006023314199410379, + "logps/rejected": -2.344709634780884, + "loss": 1.372, + "nll_loss": 0.342988520860672, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0233145632082596e-05, + "rewards/margins": 0.23441073298454285, + "rewards/rejected": -0.23447097837924957, + "step": 7723 + }, + { + "epoch": 5.341632088520055, + "grad_norm": 8.0201416015625, + "learning_rate": 2.5879821730444137e-05, + "log_odds_chosen": 11.215263366699219, + "log_odds_ratio": -1.7243946786038578e-05, + "logits/chosen": -0.5369030237197876, + "logits/rejected": -0.5722092390060425, + "logps/chosen": -0.0001140009262599051, + "logps/rejected": -2.092883586883545, + "loss": 0.7565, + "nll_loss": 0.18912063539028168, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1400094081182033e-05, + "rewards/margins": 0.2092769593000412, + "rewards/rejected": -0.2092883586883545, + "step": 7724 + }, + { + "epoch": 5.342323651452282, + "grad_norm": 5.6271209716796875, + "learning_rate": 2.5875979714153993e-05, + "log_odds_chosen": 9.305371284484863, + "log_odds_ratio": -0.00024319568183273077, + "logits/chosen": -0.46023258566856384, + "logits/rejected": -0.4424994885921478, + "logps/chosen": -0.0005978612462058663, + "logps/rejected": -1.4785716533660889, + "loss": 0.6067, + "nll_loss": 0.15165482461452484, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.97861289861612e-05, + "rewards/margins": 0.1477973759174347, + "rewards/rejected": -0.1478571742773056, + "step": 7725 + }, + { + "epoch": 5.343015214384509, + "grad_norm": 8.000357627868652, + "learning_rate": 2.5872137697863842e-05, + "log_odds_chosen": 10.568313598632812, + "log_odds_ratio": -6.980830221436918e-05, + "logits/chosen": -0.7129371762275696, + "logits/rejected": -0.7479875087738037, + "logps/chosen": -0.0003442858287598938, + "logps/rejected": -2.159754991531372, + "loss": 0.5499, + "nll_loss": 0.13745707273483276, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.442858360358514e-05, + "rewards/margins": 0.21594107151031494, + "rewards/rejected": -0.21597550809383392, + "step": 7726 + }, + { + "epoch": 5.3437067773167355, + "grad_norm": 11.518319129943848, + "learning_rate": 2.586829568157369e-05, + "log_odds_chosen": 11.02501392364502, + "log_odds_ratio": -3.7570040149148554e-05, + "logits/chosen": -0.9000644683837891, + "logits/rejected": -1.0022200345993042, + "logps/chosen": -0.00020582509750965983, + "logps/rejected": -2.083603620529175, + "loss": 1.2189, + "nll_loss": 0.30470970273017883, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.058250902337022e-05, + "rewards/margins": 0.20833978056907654, + "rewards/rejected": -0.20836035907268524, + "step": 7727 + }, + { + "epoch": 5.344398340248962, + "grad_norm": 7.7212982177734375, + "learning_rate": 2.5864453665283544e-05, + "log_odds_chosen": 10.258532524108887, + "log_odds_ratio": -0.00015420767886098474, + "logits/chosen": -0.5833723545074463, + "logits/rejected": -0.5658838152885437, + "logps/chosen": -0.0007139868102967739, + "logps/rejected": -2.0901403427124023, + "loss": 0.9233, + "nll_loss": 0.23081792891025543, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.139868102967739e-05, + "rewards/margins": 0.20894262194633484, + "rewards/rejected": -0.20901402831077576, + "step": 7728 + }, + { + "epoch": 5.345089903181189, + "grad_norm": 7.630891799926758, + "learning_rate": 2.5860611648993393e-05, + "log_odds_chosen": 11.153766632080078, + "log_odds_ratio": -3.3938224078156054e-05, + "logits/chosen": -0.83609539270401, + "logits/rejected": -0.9128667116165161, + "logps/chosen": -0.00012093692203052342, + "logps/rejected": -2.11665415763855, + "loss": 0.9034, + "nll_loss": 0.22585429251194, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2093692021153402e-05, + "rewards/margins": 0.21165332198143005, + "rewards/rejected": -0.21166542172431946, + "step": 7729 + }, + { + "epoch": 5.345781466113416, + "grad_norm": 4.917819976806641, + "learning_rate": 2.5856769632703242e-05, + "log_odds_chosen": 11.009265899658203, + "log_odds_ratio": -0.0003466054331511259, + "logits/chosen": -0.9085547924041748, + "logits/rejected": -0.9079838395118713, + "logps/chosen": -0.0005740531487390399, + "logps/rejected": -2.7738754749298096, + "loss": 0.4157, + "nll_loss": 0.10390262305736542, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.740531923947856e-05, + "rewards/margins": 0.2773301601409912, + "rewards/rejected": -0.2773875594139099, + "step": 7730 + }, + { + "epoch": 5.346473029045643, + "grad_norm": 7.660555839538574, + "learning_rate": 2.5852927616413098e-05, + "log_odds_chosen": 10.09063720703125, + "log_odds_ratio": -0.00017584474699106067, + "logits/chosen": -0.5640292763710022, + "logits/rejected": -0.5959721207618713, + "logps/chosen": -0.0001486010878579691, + "logps/rejected": -1.3079724311828613, + "loss": 0.5152, + "nll_loss": 0.12878504395484924, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.486010842199903e-05, + "rewards/margins": 0.1307823657989502, + "rewards/rejected": -0.13079723715782166, + "step": 7731 + }, + { + "epoch": 5.34716459197787, + "grad_norm": 9.647659301757812, + "learning_rate": 2.5849085600122947e-05, + "log_odds_chosen": 10.084922790527344, + "log_odds_ratio": -0.0005560660501942039, + "logits/chosen": -0.3433188796043396, + "logits/rejected": -0.3567750155925751, + "logps/chosen": -0.016425875946879387, + "logps/rejected": -3.150747537612915, + "loss": 0.8037, + "nll_loss": 0.20086315274238586, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016425875946879387, + "rewards/margins": 0.31343215703964233, + "rewards/rejected": -0.31507474184036255, + "step": 7732 + }, + { + "epoch": 5.3478561549100965, + "grad_norm": 8.467803955078125, + "learning_rate": 2.5845243583832796e-05, + "log_odds_chosen": 9.264129638671875, + "log_odds_ratio": -0.0006215933244675398, + "logits/chosen": -0.7157605290412903, + "logits/rejected": -0.8103622794151306, + "logps/chosen": -0.0010265086311846972, + "logps/rejected": -1.4580941200256348, + "loss": 0.7277, + "nll_loss": 0.18185096979141235, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010265086166327819, + "rewards/margins": 0.14570675790309906, + "rewards/rejected": -0.14580941200256348, + "step": 7733 + }, + { + "epoch": 5.348547717842323, + "grad_norm": 5.992339611053467, + "learning_rate": 2.584140156754265e-05, + "log_odds_chosen": 9.315893173217773, + "log_odds_ratio": -0.0001914792082970962, + "logits/chosen": -0.5987723469734192, + "logits/rejected": -0.6892966628074646, + "logps/chosen": -0.0003230468137189746, + "logps/rejected": -1.2014394998550415, + "loss": 0.4648, + "nll_loss": 0.11617596447467804, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.23046806443017e-05, + "rewards/margins": 0.12011165916919708, + "rewards/rejected": -0.12014396488666534, + "step": 7734 + }, + { + "epoch": 5.34923928077455, + "grad_norm": 6.232851982116699, + "learning_rate": 2.58375595512525e-05, + "log_odds_chosen": 10.405027389526367, + "log_odds_ratio": -0.00011137073306599632, + "logits/chosen": -0.4917852282524109, + "logits/rejected": -0.48989009857177734, + "logps/chosen": -0.0002072631032206118, + "logps/rejected": -1.7466228008270264, + "loss": 0.8998, + "nll_loss": 0.224933922290802, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0726312868646346e-05, + "rewards/margins": 0.17464156448841095, + "rewards/rejected": -0.1746622771024704, + "step": 7735 + }, + { + "epoch": 5.349930843706777, + "grad_norm": 15.150169372558594, + "learning_rate": 2.583371753496235e-05, + "log_odds_chosen": 9.73324203491211, + "log_odds_ratio": -0.00026712685939855874, + "logits/chosen": -0.8724220395088196, + "logits/rejected": -0.7735554575920105, + "logps/chosen": -0.00018509995425119996, + "logps/rejected": -1.3555516004562378, + "loss": 0.9373, + "nll_loss": 0.23430290818214417, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8509996152715757e-05, + "rewards/margins": 0.13553665578365326, + "rewards/rejected": -0.1355551779270172, + "step": 7736 + }, + { + "epoch": 5.350622406639004, + "grad_norm": 45.932159423828125, + "learning_rate": 2.5829875518672202e-05, + "log_odds_chosen": 9.237135887145996, + "log_odds_ratio": -0.13657166063785553, + "logits/chosen": -0.49428650736808777, + "logits/rejected": -0.5516210794448853, + "logps/chosen": -0.03275838866829872, + "logps/rejected": -2.303534984588623, + "loss": 1.0329, + "nll_loss": 0.24456636607646942, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0032758391462266445, + "rewards/margins": 0.2270776629447937, + "rewards/rejected": -0.23035350441932678, + "step": 7737 + }, + { + "epoch": 5.351313969571231, + "grad_norm": 10.989821434020996, + "learning_rate": 2.582603350238205e-05, + "log_odds_chosen": 10.879453659057617, + "log_odds_ratio": -9.712464816402644e-05, + "logits/chosen": -0.32066982984542847, + "logits/rejected": -0.4816090166568756, + "logps/chosen": -0.00041363947093486786, + "logps/rejected": -2.4920613765716553, + "loss": 0.6973, + "nll_loss": 0.17431020736694336, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.136395000386983e-05, + "rewards/margins": 0.24916478991508484, + "rewards/rejected": -0.24920612573623657, + "step": 7738 + }, + { + "epoch": 5.3520055325034575, + "grad_norm": 7.630071640014648, + "learning_rate": 2.58221914860919e-05, + "log_odds_chosen": 10.645535469055176, + "log_odds_ratio": -0.00012921505549456924, + "logits/chosen": -0.3419490456581116, + "logits/rejected": -0.4747019410133362, + "logps/chosen": -0.0005621293094009161, + "logps/rejected": -2.0010368824005127, + "loss": 0.7174, + "nll_loss": 0.17933741211891174, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.621293530566618e-05, + "rewards/margins": 0.20004747807979584, + "rewards/rejected": -0.20010370016098022, + "step": 7739 + }, + { + "epoch": 5.352697095435684, + "grad_norm": 11.037864685058594, + "learning_rate": 2.5818349469801756e-05, + "log_odds_chosen": 10.154804229736328, + "log_odds_ratio": -8.114433148875833e-05, + "logits/chosen": -0.5941439867019653, + "logits/rejected": -0.7260634303092957, + "logps/chosen": -0.00017833770834840834, + "logps/rejected": -1.7538602352142334, + "loss": 0.6191, + "nll_loss": 0.1547648012638092, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7833772290032357e-05, + "rewards/margins": 0.17536818981170654, + "rewards/rejected": -0.17538602650165558, + "step": 7740 + }, + { + "epoch": 5.353388658367911, + "grad_norm": 7.120156764984131, + "learning_rate": 2.5814507453511605e-05, + "log_odds_chosen": 10.758832931518555, + "log_odds_ratio": -9.678566129878163e-05, + "logits/chosen": -0.42007553577423096, + "logits/rejected": -0.56074458360672, + "logps/chosen": -0.00023292946571018547, + "logps/rejected": -2.15881609916687, + "loss": 0.7465, + "nll_loss": 0.18660315871238708, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3292946934816428e-05, + "rewards/margins": 0.2158583104610443, + "rewards/rejected": -0.2158816009759903, + "step": 7741 + }, + { + "epoch": 5.354080221300138, + "grad_norm": 5.136490345001221, + "learning_rate": 2.5810665437221454e-05, + "log_odds_chosen": 10.323856353759766, + "log_odds_ratio": -9.061383025255054e-05, + "logits/chosen": -0.5882817506790161, + "logits/rejected": -0.6239702701568604, + "logps/chosen": -0.001000424730591476, + "logps/rejected": -2.1509175300598145, + "loss": 0.7211, + "nll_loss": 0.18025416135787964, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010004247451433912, + "rewards/margins": 0.2149917185306549, + "rewards/rejected": -0.2150917649269104, + "step": 7742 + }, + { + "epoch": 5.354771784232365, + "grad_norm": 11.205146789550781, + "learning_rate": 2.580682342093131e-05, + "log_odds_chosen": 9.922714233398438, + "log_odds_ratio": -0.00022928789258003235, + "logits/chosen": -0.7218578457832336, + "logits/rejected": -0.817878007888794, + "logps/chosen": -0.0009287429274991155, + "logps/rejected": -2.1084470748901367, + "loss": 1.1835, + "nll_loss": 0.29585108160972595, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.287429566029459e-05, + "rewards/margins": 0.21075184643268585, + "rewards/rejected": -0.2108447253704071, + "step": 7743 + }, + { + "epoch": 5.355463347164592, + "grad_norm": 14.219990730285645, + "learning_rate": 2.580298140464116e-05, + "log_odds_chosen": 10.053268432617188, + "log_odds_ratio": -0.0001390389952575788, + "logits/chosen": -0.5563762187957764, + "logits/rejected": -0.5396439433097839, + "logps/chosen": -0.0015786489238962531, + "logps/rejected": -2.2421164512634277, + "loss": 0.9543, + "nll_loss": 0.23855391144752502, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015786488074809313, + "rewards/margins": 0.2240537703037262, + "rewards/rejected": -0.22421163320541382, + "step": 7744 + }, + { + "epoch": 5.356154910096818, + "grad_norm": 7.2350358963012695, + "learning_rate": 2.5799139388351008e-05, + "log_odds_chosen": 9.452655792236328, + "log_odds_ratio": -0.0013515216996893287, + "logits/chosen": -0.33016490936279297, + "logits/rejected": -0.45986154675483704, + "logps/chosen": -0.004266166128218174, + "logps/rejected": -1.9477782249450684, + "loss": 0.6906, + "nll_loss": 0.1725178062915802, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004266166070010513, + "rewards/margins": 0.1943511962890625, + "rewards/rejected": -0.19477781653404236, + "step": 7745 + }, + { + "epoch": 5.356846473029045, + "grad_norm": 6.503514289855957, + "learning_rate": 2.579529737206086e-05, + "log_odds_chosen": 9.286174774169922, + "log_odds_ratio": -0.0001534043112769723, + "logits/chosen": -0.6407766342163086, + "logits/rejected": -0.6709181666374207, + "logps/chosen": -0.00034943222999572754, + "logps/rejected": -1.4314804077148438, + "loss": 0.6638, + "nll_loss": 0.16594256460666656, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4943222999572754e-05, + "rewards/margins": 0.14311309158802032, + "rewards/rejected": -0.1431480348110199, + "step": 7746 + }, + { + "epoch": 5.357538035961272, + "grad_norm": 5.2143120765686035, + "learning_rate": 2.579145535577071e-05, + "log_odds_chosen": 10.401906967163086, + "log_odds_ratio": -0.0001116981657105498, + "logits/chosen": -0.5172398686408997, + "logits/rejected": -0.5327882170677185, + "logps/chosen": -0.00015428580809384584, + "logps/rejected": -1.7058733701705933, + "loss": 0.7849, + "nll_loss": 0.19621866941452026, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5428580809384584e-05, + "rewards/margins": 0.17057189345359802, + "rewards/rejected": -0.17058733105659485, + "step": 7747 + }, + { + "epoch": 5.358229598893499, + "grad_norm": 9.994341850280762, + "learning_rate": 2.578761333948056e-05, + "log_odds_chosen": 10.679950714111328, + "log_odds_ratio": -4.546328273136169e-05, + "logits/chosen": -0.8168638944625854, + "logits/rejected": -0.8961727023124695, + "logps/chosen": -0.00011033992632292211, + "logps/rejected": -1.660325527191162, + "loss": 0.6641, + "nll_loss": 0.16603140532970428, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1033993359887972e-05, + "rewards/margins": 0.16602152585983276, + "rewards/rejected": -0.1660325527191162, + "step": 7748 + }, + { + "epoch": 5.358921161825726, + "grad_norm": 5.2905168533325195, + "learning_rate": 2.5783771323190414e-05, + "log_odds_chosen": 10.401647567749023, + "log_odds_ratio": -6.03256412432529e-05, + "logits/chosen": -0.7885475158691406, + "logits/rejected": -0.825690746307373, + "logps/chosen": -0.0001715484686428681, + "logps/rejected": -1.7799302339553833, + "loss": 0.633, + "nll_loss": 0.1582336723804474, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.715484722808469e-05, + "rewards/margins": 0.1779758632183075, + "rewards/rejected": -0.1779930293560028, + "step": 7749 + }, + { + "epoch": 5.359612724757953, + "grad_norm": 8.384246826171875, + "learning_rate": 2.5779929306900264e-05, + "log_odds_chosen": 9.969493865966797, + "log_odds_ratio": -0.0010337287094444036, + "logits/chosen": -0.5449033379554749, + "logits/rejected": -0.5999699831008911, + "logps/chosen": -0.003291376167908311, + "logps/rejected": -1.331317663192749, + "loss": 0.7406, + "nll_loss": 0.1850452423095703, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003291376051492989, + "rewards/margins": 0.13280263543128967, + "rewards/rejected": -0.13313177227973938, + "step": 7750 + }, + { + "epoch": 5.360304287690179, + "grad_norm": 11.273735046386719, + "learning_rate": 2.5776087290610113e-05, + "log_odds_chosen": 9.195895195007324, + "log_odds_ratio": -0.009233876131474972, + "logits/chosen": -0.07361237704753876, + "logits/rejected": -0.27805620431900024, + "logps/chosen": -0.0033656263258308172, + "logps/rejected": -1.2893497943878174, + "loss": 1.0303, + "nll_loss": 0.25664210319519043, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003365626325830817, + "rewards/margins": 0.1285984218120575, + "rewards/rejected": -0.12893497943878174, + "step": 7751 + }, + { + "epoch": 5.360995850622406, + "grad_norm": 5.421497344970703, + "learning_rate": 2.577224527431996e-05, + "log_odds_chosen": 10.941364288330078, + "log_odds_ratio": -0.00016141105152200907, + "logits/chosen": -0.5322350263595581, + "logits/rejected": -0.5843270421028137, + "logps/chosen": -0.00016066545504145324, + "logps/rejected": -2.216162919998169, + "loss": 0.6924, + "nll_loss": 0.173092782497406, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6066545867943205e-05, + "rewards/margins": 0.22160020470619202, + "rewards/rejected": -0.22161628305912018, + "step": 7752 + }, + { + "epoch": 5.361687413554633, + "grad_norm": 15.62678337097168, + "learning_rate": 2.5768403258029817e-05, + "log_odds_chosen": 10.776522636413574, + "log_odds_ratio": -0.0001856583112385124, + "logits/chosen": -0.1284799724817276, + "logits/rejected": -0.24484391510486603, + "logps/chosen": -0.00035133378696627915, + "logps/rejected": -2.2453131675720215, + "loss": 0.9448, + "nll_loss": 0.23618479073047638, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.513338015181944e-05, + "rewards/margins": 0.22449618577957153, + "rewards/rejected": -0.22453130781650543, + "step": 7753 + }, + { + "epoch": 5.36237897648686, + "grad_norm": 7.678762912750244, + "learning_rate": 2.5764561241739667e-05, + "log_odds_chosen": 9.862929344177246, + "log_odds_ratio": -7.821543113095686e-05, + "logits/chosen": -0.69775390625, + "logits/rejected": -0.7313964366912842, + "logps/chosen": -0.0011458772933110595, + "logps/rejected": -1.9824557304382324, + "loss": 0.8888, + "nll_loss": 0.22219723463058472, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011458773224148899, + "rewards/margins": 0.19813098013401031, + "rewards/rejected": -0.198245570063591, + "step": 7754 + }, + { + "epoch": 5.363070539419087, + "grad_norm": 5.978641986846924, + "learning_rate": 2.5760719225449516e-05, + "log_odds_chosen": 9.407076835632324, + "log_odds_ratio": -0.001449758536182344, + "logits/chosen": -0.5870097875595093, + "logits/rejected": -0.5762468576431274, + "logps/chosen": -0.0018696343759074807, + "logps/rejected": -1.773676872253418, + "loss": 0.8295, + "nll_loss": 0.20723845064640045, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001869634579634294, + "rewards/margins": 0.17718073725700378, + "rewards/rejected": -0.1773676872253418, + "step": 7755 + }, + { + "epoch": 5.363762102351314, + "grad_norm": 10.33115291595459, + "learning_rate": 2.5756877209159368e-05, + "log_odds_chosen": 10.442926406860352, + "log_odds_ratio": -0.00020072895858902484, + "logits/chosen": -0.6961485147476196, + "logits/rejected": -0.6396393775939941, + "logps/chosen": -0.0009271060116589069, + "logps/rejected": -2.2403993606567383, + "loss": 0.8586, + "nll_loss": 0.2146410495042801, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.27106011658907e-05, + "rewards/margins": 0.22394722700119019, + "rewards/rejected": -0.2240399271249771, + "step": 7756 + }, + { + "epoch": 5.36445366528354, + "grad_norm": 7.273455619812012, + "learning_rate": 2.5753035192869217e-05, + "log_odds_chosen": 9.373745918273926, + "log_odds_ratio": -0.0005260632606223226, + "logits/chosen": -0.5656231045722961, + "logits/rejected": -0.5640733242034912, + "logps/chosen": -0.0008249300881288946, + "logps/rejected": -1.4367049932479858, + "loss": 0.8845, + "nll_loss": 0.22107894718647003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.24930175440386e-05, + "rewards/margins": 0.14358802139759064, + "rewards/rejected": -0.14367049932479858, + "step": 7757 + }, + { + "epoch": 5.365145228215767, + "grad_norm": 6.712815761566162, + "learning_rate": 2.5749193176579066e-05, + "log_odds_chosen": 10.585709571838379, + "log_odds_ratio": -8.377588528674096e-05, + "logits/chosen": -0.11127348244190216, + "logits/rejected": -0.2184278815984726, + "logps/chosen": -0.0002512220526114106, + "logps/rejected": -2.0447144508361816, + "loss": 0.7724, + "nll_loss": 0.19309033453464508, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.51222045335453e-05, + "rewards/margins": 0.20444634556770325, + "rewards/rejected": -0.20447146892547607, + "step": 7758 + }, + { + "epoch": 5.365836791147994, + "grad_norm": 7.707136154174805, + "learning_rate": 2.5745351160288922e-05, + "log_odds_chosen": 10.819369316101074, + "log_odds_ratio": -4.1901796066667885e-05, + "logits/chosen": -0.35066330432891846, + "logits/rejected": -0.33344167470932007, + "logps/chosen": -0.0001883797231130302, + "logps/rejected": -1.9678874015808105, + "loss": 0.6437, + "nll_loss": 0.1609291285276413, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.88379726751009e-05, + "rewards/margins": 0.19676992297172546, + "rewards/rejected": -0.1967887580394745, + "step": 7759 + }, + { + "epoch": 5.366528354080221, + "grad_norm": 10.013188362121582, + "learning_rate": 2.574150914399877e-05, + "log_odds_chosen": 10.439899444580078, + "log_odds_ratio": -9.335255163023248e-05, + "logits/chosen": -0.6593368053436279, + "logits/rejected": -0.7114875316619873, + "logps/chosen": -0.00029753416310995817, + "logps/rejected": -1.846423625946045, + "loss": 1.0063, + "nll_loss": 0.25155696272850037, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.975341703859158e-05, + "rewards/margins": 0.18461261689662933, + "rewards/rejected": -0.18464237451553345, + "step": 7760 + }, + { + "epoch": 5.367219917012449, + "grad_norm": 12.000226020812988, + "learning_rate": 2.573766712770862e-05, + "log_odds_chosen": 9.719730377197266, + "log_odds_ratio": -0.0006029088981449604, + "logits/chosen": -0.8230023980140686, + "logits/rejected": -0.8593472242355347, + "logps/chosen": -0.0007920662756077945, + "logps/rejected": -1.9467694759368896, + "loss": 0.8142, + "nll_loss": 0.20348705351352692, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.920662756077945e-05, + "rewards/margins": 0.1945977509021759, + "rewards/rejected": -0.1946769654750824, + "step": 7761 + }, + { + "epoch": 5.367911479944675, + "grad_norm": 8.159753799438477, + "learning_rate": 2.5733825111418476e-05, + "log_odds_chosen": 9.705583572387695, + "log_odds_ratio": -0.00017643548198975623, + "logits/chosen": -0.550877571105957, + "logits/rejected": -0.6108225584030151, + "logps/chosen": -0.0006110378890298307, + "logps/rejected": -1.8862289190292358, + "loss": 0.6646, + "nll_loss": 0.16613613069057465, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.110379035817459e-05, + "rewards/margins": 0.18856178224086761, + "rewards/rejected": -0.18862289190292358, + "step": 7762 + }, + { + "epoch": 5.368603042876902, + "grad_norm": 7.820406436920166, + "learning_rate": 2.5729983095128325e-05, + "log_odds_chosen": 9.865856170654297, + "log_odds_ratio": -0.0018542808247730136, + "logits/chosen": -0.38348299264907837, + "logits/rejected": -0.452808141708374, + "logps/chosen": -0.0012067710049450397, + "logps/rejected": -2.1156463623046875, + "loss": 0.8963, + "nll_loss": 0.2238771766424179, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012067711213603616, + "rewards/margins": 0.2114439606666565, + "rewards/rejected": -0.21156466007232666, + "step": 7763 + }, + { + "epoch": 5.369294605809129, + "grad_norm": 14.397409439086914, + "learning_rate": 2.5726141078838174e-05, + "log_odds_chosen": 10.043509483337402, + "log_odds_ratio": -0.00018490861111786216, + "logits/chosen": -0.34730908274650574, + "logits/rejected": -0.4017433524131775, + "logps/chosen": -0.0008323953370563686, + "logps/rejected": -2.294076919555664, + "loss": 1.3105, + "nll_loss": 0.3275982439517975, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.323953079525381e-05, + "rewards/margins": 0.22932444512844086, + "rewards/rejected": -0.2294076830148697, + "step": 7764 + }, + { + "epoch": 5.369986168741356, + "grad_norm": 6.62226676940918, + "learning_rate": 2.5722299062548026e-05, + "log_odds_chosen": 9.916425704956055, + "log_odds_ratio": -0.0005616866401396692, + "logits/chosen": -0.20786112546920776, + "logits/rejected": -0.32927942276000977, + "logps/chosen": -0.0004251671489328146, + "logps/rejected": -1.9319912195205688, + "loss": 1.6643, + "nll_loss": 0.4160114526748657, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.251671634847298e-05, + "rewards/margins": 0.19315659999847412, + "rewards/rejected": -0.19319912791252136, + "step": 7765 + }, + { + "epoch": 5.370677731673583, + "grad_norm": 11.139594078063965, + "learning_rate": 2.5718457046257876e-05, + "log_odds_chosen": 10.382858276367188, + "log_odds_ratio": -0.00011644057667581365, + "logits/chosen": -0.2646900415420532, + "logits/rejected": -0.3348369300365448, + "logps/chosen": -0.0034151228610426188, + "logps/rejected": -2.536651134490967, + "loss": 1.7052, + "nll_loss": 0.4263002276420593, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003415122628211975, + "rewards/margins": 0.25332361459732056, + "rewards/rejected": -0.25366511940956116, + "step": 7766 + }, + { + "epoch": 5.37136929460581, + "grad_norm": 7.636089324951172, + "learning_rate": 2.5714615029967725e-05, + "log_odds_chosen": 10.559492111206055, + "log_odds_ratio": -0.00017273104458581656, + "logits/chosen": -0.10843844711780548, + "logits/rejected": -0.2018280327320099, + "logps/chosen": -0.000510864017996937, + "logps/rejected": -2.173729658126831, + "loss": 0.6438, + "nll_loss": 0.1609428972005844, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1086408348055556e-05, + "rewards/margins": 0.21732190251350403, + "rewards/rejected": -0.21737296879291534, + "step": 7767 + }, + { + "epoch": 5.372060857538036, + "grad_norm": 8.192671775817871, + "learning_rate": 2.571077301367758e-05, + "log_odds_chosen": 10.112607955932617, + "log_odds_ratio": -0.00016352730744984, + "logits/chosen": -0.7211679220199585, + "logits/rejected": -0.739108681678772, + "logps/chosen": -0.0004298785061109811, + "logps/rejected": -1.8741475343704224, + "loss": 0.918, + "nll_loss": 0.22947227954864502, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.298784915590659e-05, + "rewards/margins": 0.18737177550792694, + "rewards/rejected": -0.18741475045681, + "step": 7768 + }, + { + "epoch": 5.372752420470263, + "grad_norm": 9.19978141784668, + "learning_rate": 2.570693099738743e-05, + "log_odds_chosen": 9.75778865814209, + "log_odds_ratio": -0.0023856342304497957, + "logits/chosen": -0.2091895490884781, + "logits/rejected": -0.2485579550266266, + "logps/chosen": -0.0016922859940677881, + "logps/rejected": -2.306070327758789, + "loss": 0.7422, + "nll_loss": 0.18530671298503876, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016922858776524663, + "rewards/margins": 0.23043778538703918, + "rewards/rejected": -0.2306070327758789, + "step": 7769 + }, + { + "epoch": 5.37344398340249, + "grad_norm": 10.220325469970703, + "learning_rate": 2.570308898109728e-05, + "log_odds_chosen": 10.165271759033203, + "log_odds_ratio": -0.00017575306992512196, + "logits/chosen": -0.8035585880279541, + "logits/rejected": -0.88808673620224, + "logps/chosen": -0.0012654714519158006, + "logps/rejected": -2.365483283996582, + "loss": 0.783, + "nll_loss": 0.19573785364627838, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012654715101234615, + "rewards/margins": 0.23642179369926453, + "rewards/rejected": -0.23654834926128387, + "step": 7770 + }, + { + "epoch": 5.374135546334717, + "grad_norm": 5.651019096374512, + "learning_rate": 2.5699246964807134e-05, + "log_odds_chosen": 11.098930358886719, + "log_odds_ratio": -2.716020208026748e-05, + "logits/chosen": -0.5472127199172974, + "logits/rejected": -0.569158673286438, + "logps/chosen": -0.0001069469508365728, + "logps/rejected": -1.6667897701263428, + "loss": 0.9309, + "nll_loss": 0.2327098548412323, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.069469544745516e-05, + "rewards/margins": 0.16666828095912933, + "rewards/rejected": -0.16667896509170532, + "step": 7771 + }, + { + "epoch": 5.374827109266944, + "grad_norm": 7.243309497833252, + "learning_rate": 2.5695404948516983e-05, + "log_odds_chosen": 10.71410083770752, + "log_odds_ratio": -0.00019144202815368772, + "logits/chosen": -0.5720975399017334, + "logits/rejected": -0.6623126864433289, + "logps/chosen": -0.0006748468731530011, + "logps/rejected": -2.124738931655884, + "loss": 0.6301, + "nll_loss": 0.15749889612197876, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.748468149453402e-05, + "rewards/margins": 0.21240642666816711, + "rewards/rejected": -0.21247389912605286, + "step": 7772 + }, + { + "epoch": 5.375518672199171, + "grad_norm": 7.8964924812316895, + "learning_rate": 2.5691562932226832e-05, + "log_odds_chosen": 10.098054885864258, + "log_odds_ratio": -0.00017532003403175622, + "logits/chosen": -0.8081597089767456, + "logits/rejected": -0.803846001625061, + "logps/chosen": -0.0003364419681020081, + "logps/rejected": -1.8087891340255737, + "loss": 1.2479, + "nll_loss": 0.3119489550590515, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.364419535500929e-05, + "rewards/margins": 0.1808452606201172, + "rewards/rejected": -0.1808789074420929, + "step": 7773 + }, + { + "epoch": 5.376210235131397, + "grad_norm": 7.9021782875061035, + "learning_rate": 2.5687720915936685e-05, + "log_odds_chosen": 10.178018569946289, + "log_odds_ratio": -0.00031960944761522114, + "logits/chosen": -0.47578150033950806, + "logits/rejected": -0.4738832116127014, + "logps/chosen": -0.00027857403620146215, + "logps/rejected": -2.0880024433135986, + "loss": 1.0168, + "nll_loss": 0.2541689872741699, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7857404347741976e-05, + "rewards/margins": 0.20877239108085632, + "rewards/rejected": -0.20880025625228882, + "step": 7774 + }, + { + "epoch": 5.376901798063624, + "grad_norm": 7.102752685546875, + "learning_rate": 2.5683878899646534e-05, + "log_odds_chosen": 11.256190299987793, + "log_odds_ratio": -5.332418731995858e-05, + "logits/chosen": -0.4997277855873108, + "logits/rejected": -0.6343384981155396, + "logps/chosen": -0.000341162143740803, + "logps/rejected": -2.9551258087158203, + "loss": 0.9257, + "nll_loss": 0.23142942786216736, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.411621582927182e-05, + "rewards/margins": 0.295478492975235, + "rewards/rejected": -0.2955126166343689, + "step": 7775 + }, + { + "epoch": 5.377593360995851, + "grad_norm": 8.244155883789062, + "learning_rate": 2.5680036883356383e-05, + "log_odds_chosen": 10.558370590209961, + "log_odds_ratio": -6.264346302486956e-05, + "logits/chosen": -0.0697668194770813, + "logits/rejected": -0.22308999300003052, + "logps/chosen": -0.01101109478622675, + "logps/rejected": -2.8141188621520996, + "loss": 0.9669, + "nll_loss": 0.2417076975107193, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001101109548471868, + "rewards/margins": 0.28031080961227417, + "rewards/rejected": -0.28141191601753235, + "step": 7776 + }, + { + "epoch": 5.378284923928078, + "grad_norm": 7.367611408233643, + "learning_rate": 2.567619486706624e-05, + "log_odds_chosen": 9.765209197998047, + "log_odds_ratio": -0.0051423623226583, + "logits/chosen": -0.8637277483940125, + "logits/rejected": -0.8014044761657715, + "logps/chosen": -0.0039863623678684235, + "logps/rejected": -2.177565336227417, + "loss": 0.7636, + "nll_loss": 0.19038967788219452, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003986362717114389, + "rewards/margins": 0.21735788881778717, + "rewards/rejected": -0.21775653958320618, + "step": 7777 + }, + { + "epoch": 5.378976486860305, + "grad_norm": 8.670293807983398, + "learning_rate": 2.5672352850776088e-05, + "log_odds_chosen": 10.498734474182129, + "log_odds_ratio": -0.001384987379424274, + "logits/chosen": -0.26026180386543274, + "logits/rejected": -0.41038990020751953, + "logps/chosen": -0.0014264382189139724, + "logps/rejected": -2.3605895042419434, + "loss": 0.9921, + "nll_loss": 0.2478928565979004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014264382480178028, + "rewards/margins": 0.23591631650924683, + "rewards/rejected": -0.23605896532535553, + "step": 7778 + }, + { + "epoch": 5.3796680497925315, + "grad_norm": 4.998514652252197, + "learning_rate": 2.5668510834485937e-05, + "log_odds_chosen": 10.203685760498047, + "log_odds_ratio": -0.03531178459525108, + "logits/chosen": -0.11789742857217789, + "logits/rejected": -0.19914287328720093, + "logps/chosen": -0.008793053217232227, + "logps/rejected": -2.459259510040283, + "loss": 0.6583, + "nll_loss": 0.16104447841644287, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008793053566478193, + "rewards/margins": 0.24504666030406952, + "rewards/rejected": -0.24592596292495728, + "step": 7779 + }, + { + "epoch": 5.380359612724758, + "grad_norm": 9.771400451660156, + "learning_rate": 2.5664668818195793e-05, + "log_odds_chosen": 10.093647956848145, + "log_odds_ratio": -0.0003640882787294686, + "logits/chosen": -0.3619929552078247, + "logits/rejected": -0.4933694899082184, + "logps/chosen": -0.003856194205582142, + "logps/rejected": -2.7831075191497803, + "loss": 0.9292, + "nll_loss": 0.2322622537612915, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000385619408916682, + "rewards/margins": 0.27792513370513916, + "rewards/rejected": -0.27831077575683594, + "step": 7780 + }, + { + "epoch": 5.381051175656985, + "grad_norm": 5.72971773147583, + "learning_rate": 2.5660826801905642e-05, + "log_odds_chosen": 10.461682319641113, + "log_odds_ratio": -0.0001288391649723053, + "logits/chosen": -0.6851260662078857, + "logits/rejected": -0.5563321709632874, + "logps/chosen": -0.00017066244618035853, + "logps/rejected": -1.617461919784546, + "loss": 0.4388, + "nll_loss": 0.10969004780054092, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7066246073227376e-05, + "rewards/margins": 0.161729097366333, + "rewards/rejected": -0.16174617409706116, + "step": 7781 + }, + { + "epoch": 5.381742738589212, + "grad_norm": 6.927793025970459, + "learning_rate": 2.565698478561549e-05, + "log_odds_chosen": 10.049091339111328, + "log_odds_ratio": -0.0001307661586906761, + "logits/chosen": -0.46505558490753174, + "logits/rejected": -0.5025767087936401, + "logps/chosen": -0.0006050034426152706, + "logps/rejected": -1.6383215188980103, + "loss": 0.5501, + "nll_loss": 0.13750982284545898, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.050034789950587e-05, + "rewards/margins": 0.1637716442346573, + "rewards/rejected": -0.1638321429491043, + "step": 7782 + }, + { + "epoch": 5.382434301521439, + "grad_norm": 7.09966516494751, + "learning_rate": 2.5653142769325343e-05, + "log_odds_chosen": 9.871310234069824, + "log_odds_ratio": -0.00041894649621099234, + "logits/chosen": -0.6862342357635498, + "logits/rejected": -0.7124555706977844, + "logps/chosen": -0.0006207457045093179, + "logps/rejected": -2.264399766921997, + "loss": 0.7967, + "nll_loss": 0.19914031028747559, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.207457045093179e-05, + "rewards/margins": 0.22637790441513062, + "rewards/rejected": -0.226439967751503, + "step": 7783 + }, + { + "epoch": 5.383125864453666, + "grad_norm": 10.186842918395996, + "learning_rate": 2.5649300753035192e-05, + "log_odds_chosen": 11.019811630249023, + "log_odds_ratio": -0.00015502631140407175, + "logits/chosen": -0.20022788643836975, + "logits/rejected": -0.2286909967660904, + "logps/chosen": -0.000486434088088572, + "logps/rejected": -2.827481508255005, + "loss": 1.0997, + "nll_loss": 0.2749202251434326, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.864340735366568e-05, + "rewards/margins": 0.2826995253562927, + "rewards/rejected": -0.28274816274642944, + "step": 7784 + }, + { + "epoch": 5.3838174273858925, + "grad_norm": 11.512709617614746, + "learning_rate": 2.564545873674504e-05, + "log_odds_chosen": 11.191658020019531, + "log_odds_ratio": -2.234236671938561e-05, + "logits/chosen": -0.6829967498779297, + "logits/rejected": -0.7049492001533508, + "logps/chosen": -0.00013569237489718944, + "logps/rejected": -2.2471554279327393, + "loss": 0.743, + "nll_loss": 0.1857379525899887, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3569238944910467e-05, + "rewards/margins": 0.22470197081565857, + "rewards/rejected": -0.22471553087234497, + "step": 7785 + }, + { + "epoch": 5.384508990318119, + "grad_norm": 8.908424377441406, + "learning_rate": 2.5641616720454897e-05, + "log_odds_chosen": 8.431398391723633, + "log_odds_ratio": -0.018461300060153008, + "logits/chosen": -0.46144843101501465, + "logits/rejected": -0.542721688747406, + "logps/chosen": -0.006432386115193367, + "logps/rejected": -1.5983421802520752, + "loss": 1.1868, + "nll_loss": 0.2948574721813202, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006432385998778045, + "rewards/margins": 0.15919098258018494, + "rewards/rejected": -0.15983420610427856, + "step": 7786 + }, + { + "epoch": 5.385200553250346, + "grad_norm": 10.535396575927734, + "learning_rate": 2.5637774704164746e-05, + "log_odds_chosen": 11.600221633911133, + "log_odds_ratio": -2.1177609596634284e-05, + "logits/chosen": -0.837311863899231, + "logits/rejected": -0.8921162486076355, + "logps/chosen": -0.0001190628536278382, + "logps/rejected": -2.426147937774658, + "loss": 0.8309, + "nll_loss": 0.2077161967754364, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1906286090379581e-05, + "rewards/margins": 0.24260291457176208, + "rewards/rejected": -0.24261482059955597, + "step": 7787 + }, + { + "epoch": 5.385892116182573, + "grad_norm": 7.593047618865967, + "learning_rate": 2.5633932687874595e-05, + "log_odds_chosen": 9.907499313354492, + "log_odds_ratio": -0.00018442222790326923, + "logits/chosen": -0.50394207239151, + "logits/rejected": -0.508385956287384, + "logps/chosen": -0.00045533262891694903, + "logps/rejected": -1.5362834930419922, + "loss": 0.6708, + "nll_loss": 0.1676906943321228, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.55332628916949e-05, + "rewards/margins": 0.15358281135559082, + "rewards/rejected": -0.15362833440303802, + "step": 7788 + }, + { + "epoch": 5.3865836791148, + "grad_norm": 6.909388542175293, + "learning_rate": 2.563009067158445e-05, + "log_odds_chosen": 10.73196029663086, + "log_odds_ratio": -8.174381946446374e-05, + "logits/chosen": 0.10891541838645935, + "logits/rejected": 0.029914073646068573, + "logps/chosen": -0.00043416203698143363, + "logps/rejected": -2.2666399478912354, + "loss": 0.6813, + "nll_loss": 0.1703110784292221, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.341620660852641e-05, + "rewards/margins": 0.2266205996274948, + "rewards/rejected": -0.2266639769077301, + "step": 7789 + }, + { + "epoch": 5.387275242047027, + "grad_norm": 5.53429651260376, + "learning_rate": 2.56262486552943e-05, + "log_odds_chosen": 9.661088943481445, + "log_odds_ratio": -0.0002739218180067837, + "logits/chosen": 0.0766930878162384, + "logits/rejected": 0.019603468477725983, + "logps/chosen": -0.0010696876561269164, + "logps/rejected": -2.4559895992279053, + "loss": 0.8723, + "nll_loss": 0.2180538773536682, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010696877870941535, + "rewards/margins": 0.24549199640750885, + "rewards/rejected": -0.24559897184371948, + "step": 7790 + }, + { + "epoch": 5.3879668049792535, + "grad_norm": 6.531269073486328, + "learning_rate": 2.562240663900415e-05, + "log_odds_chosen": 9.580755233764648, + "log_odds_ratio": -0.00013173968181945384, + "logits/chosen": -0.1554318070411682, + "logits/rejected": -0.27889397740364075, + "logps/chosen": -0.0011233543045818806, + "logps/rejected": -1.9057151079177856, + "loss": 0.742, + "nll_loss": 0.1854807585477829, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011233543045818806, + "rewards/margins": 0.19045919179916382, + "rewards/rejected": -0.19057151675224304, + "step": 7791 + }, + { + "epoch": 5.38865836791148, + "grad_norm": 9.96691608428955, + "learning_rate": 2.5618564622714002e-05, + "log_odds_chosen": 10.92404842376709, + "log_odds_ratio": -5.517835961654782e-05, + "logits/chosen": -0.2640071511268616, + "logits/rejected": -0.3610392212867737, + "logps/chosen": -0.00020617686095647514, + "logps/rejected": -2.3490943908691406, + "loss": 0.7292, + "nll_loss": 0.18229231238365173, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0617686459445395e-05, + "rewards/margins": 0.23488885164260864, + "rewards/rejected": -0.23490947484970093, + "step": 7792 + }, + { + "epoch": 5.389349930843707, + "grad_norm": 10.167924880981445, + "learning_rate": 2.561472260642385e-05, + "log_odds_chosen": 10.396146774291992, + "log_odds_ratio": -0.0002577454433776438, + "logits/chosen": -0.1070237010717392, + "logits/rejected": -0.10790125280618668, + "logps/chosen": -0.0011379396310076118, + "logps/rejected": -2.7146997451782227, + "loss": 0.5973, + "nll_loss": 0.14929774403572083, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011379396892152727, + "rewards/margins": 0.2713561952114105, + "rewards/rejected": -0.27146998047828674, + "step": 7793 + }, + { + "epoch": 5.390041493775934, + "grad_norm": 8.253470420837402, + "learning_rate": 2.56108805901337e-05, + "log_odds_chosen": 10.578184127807617, + "log_odds_ratio": -6.63495811750181e-05, + "logits/chosen": -0.11157053709030151, + "logits/rejected": -0.1310674250125885, + "logps/chosen": -0.000345208594808355, + "logps/rejected": -1.878300428390503, + "loss": 0.7129, + "nll_loss": 0.17821325361728668, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.452086093602702e-05, + "rewards/margins": 0.18779553472995758, + "rewards/rejected": -0.18783004581928253, + "step": 7794 + }, + { + "epoch": 5.390733056708161, + "grad_norm": 7.623827934265137, + "learning_rate": 2.5607038573843556e-05, + "log_odds_chosen": 9.883403778076172, + "log_odds_ratio": -0.00019677457748912275, + "logits/chosen": -0.6082909107208252, + "logits/rejected": -0.6662979125976562, + "logps/chosen": -0.0005204399349167943, + "logps/rejected": -2.115126848220825, + "loss": 0.8763, + "nll_loss": 0.2190508097410202, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.204399349167943e-05, + "rewards/margins": 0.2114606499671936, + "rewards/rejected": -0.21151268482208252, + "step": 7795 + }, + { + "epoch": 5.391424619640388, + "grad_norm": 12.549150466918945, + "learning_rate": 2.5603196557553405e-05, + "log_odds_chosen": 10.353691101074219, + "log_odds_ratio": -6.125812797108665e-05, + "logits/chosen": -0.1614176332950592, + "logits/rejected": -0.20764128863811493, + "logps/chosen": -0.00044496028567664325, + "logps/rejected": -2.1650960445404053, + "loss": 0.9377, + "nll_loss": 0.23442500829696655, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.449603147804737e-05, + "rewards/margins": 0.21646510064601898, + "rewards/rejected": -0.21650958061218262, + "step": 7796 + }, + { + "epoch": 5.3921161825726145, + "grad_norm": 11.16163158416748, + "learning_rate": 2.5599354541263254e-05, + "log_odds_chosen": 10.391372680664062, + "log_odds_ratio": -0.0002313799923285842, + "logits/chosen": -0.6086483597755432, + "logits/rejected": -0.7471935749053955, + "logps/chosen": -0.0004083913518115878, + "logps/rejected": -1.9547507762908936, + "loss": 0.7049, + "nll_loss": 0.1762014776468277, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.083913518115878e-05, + "rewards/margins": 0.19543424248695374, + "rewards/rejected": -0.19547508656978607, + "step": 7797 + }, + { + "epoch": 5.392807745504841, + "grad_norm": 4.64284610748291, + "learning_rate": 2.559551252497311e-05, + "log_odds_chosen": 10.95727825164795, + "log_odds_ratio": -3.449685391387902e-05, + "logits/chosen": -0.517857551574707, + "logits/rejected": -0.5917432904243469, + "logps/chosen": -0.00013936441973783076, + "logps/rejected": -2.081711769104004, + "loss": 0.6214, + "nll_loss": 0.1553504914045334, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3936441973783076e-05, + "rewards/margins": 0.2081572711467743, + "rewards/rejected": -0.20817118883132935, + "step": 7798 + }, + { + "epoch": 5.393499308437068, + "grad_norm": 6.72576379776001, + "learning_rate": 2.559167050868296e-05, + "log_odds_chosen": 11.150545120239258, + "log_odds_ratio": -3.067057696171105e-05, + "logits/chosen": -0.11584608256816864, + "logits/rejected": -0.23737332224845886, + "logps/chosen": -0.003040261333808303, + "logps/rejected": -2.970217704772949, + "loss": 0.8037, + "nll_loss": 0.2009156048297882, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00030402615084312856, + "rewards/margins": 0.2967177629470825, + "rewards/rejected": -0.2970217764377594, + "step": 7799 + }, + { + "epoch": 5.394190871369295, + "grad_norm": 6.495701789855957, + "learning_rate": 2.5587828492392808e-05, + "log_odds_chosen": 10.355062484741211, + "log_odds_ratio": -8.241965406341478e-05, + "logits/chosen": -0.7206138372421265, + "logits/rejected": -0.7242903709411621, + "logps/chosen": -0.0006676479242742062, + "logps/rejected": -2.488015651702881, + "loss": 0.6441, + "nll_loss": 0.16100604832172394, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.67647909722291e-05, + "rewards/margins": 0.24873481690883636, + "rewards/rejected": -0.2488015741109848, + "step": 7800 + }, + { + "epoch": 5.394882434301522, + "grad_norm": 7.106627941131592, + "learning_rate": 2.558398647610266e-05, + "log_odds_chosen": 9.96081829071045, + "log_odds_ratio": -0.000366711406968534, + "logits/chosen": -0.4516948461532593, + "logits/rejected": -0.522978663444519, + "logps/chosen": -0.00035521230893209577, + "logps/rejected": -1.8816453218460083, + "loss": 0.5956, + "nll_loss": 0.14885476231575012, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.55212323484011e-05, + "rewards/margins": 0.1881290078163147, + "rewards/rejected": -0.18816451728343964, + "step": 7801 + }, + { + "epoch": 5.395573997233749, + "grad_norm": 7.80407190322876, + "learning_rate": 2.558014445981251e-05, + "log_odds_chosen": 10.894344329833984, + "log_odds_ratio": -3.5703680623555556e-05, + "logits/chosen": -0.4218696653842926, + "logits/rejected": -0.5361448526382446, + "logps/chosen": -0.0003464347682893276, + "logps/rejected": -2.5303425788879395, + "loss": 0.76, + "nll_loss": 0.19000676274299622, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4643479011720046e-05, + "rewards/margins": 0.25299960374832153, + "rewards/rejected": -0.25303423404693604, + "step": 7802 + }, + { + "epoch": 5.3962655601659755, + "grad_norm": 10.6310396194458, + "learning_rate": 2.557630244352236e-05, + "log_odds_chosen": 11.177322387695312, + "log_odds_ratio": -5.937780952081084e-05, + "logits/chosen": -0.3804362416267395, + "logits/rejected": -0.47340330481529236, + "logps/chosen": -0.0002439522068016231, + "logps/rejected": -2.295778274536133, + "loss": 0.7703, + "nll_loss": 0.19257938861846924, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4395221771555953e-05, + "rewards/margins": 0.2295534312725067, + "rewards/rejected": -0.22957783937454224, + "step": 7803 + }, + { + "epoch": 5.396957123098202, + "grad_norm": 6.042616367340088, + "learning_rate": 2.5572460427232214e-05, + "log_odds_chosen": 10.209222793579102, + "log_odds_ratio": -0.00022868410451337695, + "logits/chosen": -0.3529431223869324, + "logits/rejected": -0.3970162868499756, + "logps/chosen": -0.0002680581819731742, + "logps/rejected": -1.7455283403396606, + "loss": 1.1763, + "nll_loss": 0.29406148195266724, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6805821107700467e-05, + "rewards/margins": 0.17452603578567505, + "rewards/rejected": -0.17455284297466278, + "step": 7804 + }, + { + "epoch": 5.397648686030429, + "grad_norm": 8.033905982971191, + "learning_rate": 2.5568618410942063e-05, + "log_odds_chosen": 9.660449981689453, + "log_odds_ratio": -0.00011870273010572419, + "logits/chosen": -0.0120609812438488, + "logits/rejected": -0.07766593247652054, + "logps/chosen": -0.0003880371223203838, + "logps/rejected": -1.8114503622055054, + "loss": 0.7018, + "nll_loss": 0.17542925477027893, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.880371150444262e-05, + "rewards/margins": 0.18110623955726624, + "rewards/rejected": -0.18114504218101501, + "step": 7805 + }, + { + "epoch": 5.398340248962656, + "grad_norm": 11.184370040893555, + "learning_rate": 2.5564776394651912e-05, + "log_odds_chosen": 9.650315284729004, + "log_odds_ratio": -0.000734238070435822, + "logits/chosen": -0.34173810482025146, + "logits/rejected": -0.34528568387031555, + "logps/chosen": -0.0028360248543322086, + "logps/rejected": -2.4724929332733154, + "loss": 0.8162, + "nll_loss": 0.20397043228149414, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002836025378201157, + "rewards/margins": 0.246965691447258, + "rewards/rejected": -0.2472492754459381, + "step": 7806 + }, + { + "epoch": 5.399031811894883, + "grad_norm": 6.66445779800415, + "learning_rate": 2.5560934378361768e-05, + "log_odds_chosen": 9.032700538635254, + "log_odds_ratio": -0.0004254723316989839, + "logits/chosen": -0.38099467754364014, + "logits/rejected": -0.3853090703487396, + "logps/chosen": -0.0005114672239869833, + "logps/rejected": -1.1772360801696777, + "loss": 0.764, + "nll_loss": 0.19094544649124146, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.114672603667714e-05, + "rewards/margins": 0.11767245829105377, + "rewards/rejected": -0.11772359907627106, + "step": 7807 + }, + { + "epoch": 5.39972337482711, + "grad_norm": 5.572554111480713, + "learning_rate": 2.5557092362071617e-05, + "log_odds_chosen": 10.41073226928711, + "log_odds_ratio": -0.00024940905859693885, + "logits/chosen": -0.3622361123561859, + "logits/rejected": -0.4100070297718048, + "logps/chosen": -0.00029475893825292587, + "logps/rejected": -1.9671523571014404, + "loss": 0.5987, + "nll_loss": 0.1496485322713852, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9475893825292587e-05, + "rewards/margins": 0.19668574631214142, + "rewards/rejected": -0.19671523571014404, + "step": 7808 + }, + { + "epoch": 5.4004149377593365, + "grad_norm": 8.154836654663086, + "learning_rate": 2.5553250345781466e-05, + "log_odds_chosen": 9.927988052368164, + "log_odds_ratio": -0.0009140261099673808, + "logits/chosen": -0.1996062695980072, + "logits/rejected": -0.30821940302848816, + "logps/chosen": -0.003140796907246113, + "logps/rejected": -2.3840131759643555, + "loss": 0.5726, + "nll_loss": 0.14306655526161194, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003140796907246113, + "rewards/margins": 0.2380872368812561, + "rewards/rejected": -0.23840132355690002, + "step": 7809 + }, + { + "epoch": 5.401106500691563, + "grad_norm": 9.915299415588379, + "learning_rate": 2.554940832949132e-05, + "log_odds_chosen": 9.086884498596191, + "log_odds_ratio": -0.0034978806506842375, + "logits/chosen": -0.4298107922077179, + "logits/rejected": -0.47236326336860657, + "logps/chosen": -0.022841552272439003, + "logps/rejected": -1.7702233791351318, + "loss": 1.1822, + "nll_loss": 0.29521122574806213, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0022841552272439003, + "rewards/margins": 0.17473816871643066, + "rewards/rejected": -0.17702233791351318, + "step": 7810 + }, + { + "epoch": 5.40179806362379, + "grad_norm": 5.638696670532227, + "learning_rate": 2.5545566313201168e-05, + "log_odds_chosen": 9.845707893371582, + "log_odds_ratio": -0.0009368436876684427, + "logits/chosen": -0.3803238272666931, + "logits/rejected": -0.28517425060272217, + "logps/chosen": -0.0005764992092736065, + "logps/rejected": -1.737668752670288, + "loss": 0.523, + "nll_loss": 0.13065117597579956, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.764992238255218e-05, + "rewards/margins": 0.1737092286348343, + "rewards/rejected": -0.1737668663263321, + "step": 7811 + }, + { + "epoch": 5.402489626556017, + "grad_norm": 5.462301254272461, + "learning_rate": 2.5541724296911017e-05, + "log_odds_chosen": 11.049300193786621, + "log_odds_ratio": -3.35049771820195e-05, + "logits/chosen": -0.4539565443992615, + "logits/rejected": -0.527484655380249, + "logps/chosen": -0.00025346927577629685, + "logps/rejected": -2.4482388496398926, + "loss": 0.5746, + "nll_loss": 0.1436401754617691, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.534692976041697e-05, + "rewards/margins": 0.24479854106903076, + "rewards/rejected": -0.2448238879442215, + "step": 7812 + }, + { + "epoch": 5.403181189488244, + "grad_norm": 8.376514434814453, + "learning_rate": 2.5537882280620873e-05, + "log_odds_chosen": 9.106565475463867, + "log_odds_ratio": -0.0010129621950909495, + "logits/chosen": -0.5715546011924744, + "logits/rejected": -0.5758917331695557, + "logps/chosen": -0.0011959555558860302, + "logps/rejected": -1.408020257949829, + "loss": 0.6462, + "nll_loss": 0.16143743693828583, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011959556286456063, + "rewards/margins": 0.1406824290752411, + "rewards/rejected": -0.1408020406961441, + "step": 7813 + }, + { + "epoch": 5.403872752420471, + "grad_norm": 8.575206756591797, + "learning_rate": 2.553404026433072e-05, + "log_odds_chosen": 9.173189163208008, + "log_odds_ratio": -0.0004603645938914269, + "logits/chosen": -0.23822470009326935, + "logits/rejected": -0.33198216557502747, + "logps/chosen": -0.0019623057451099157, + "logps/rejected": -2.119011163711548, + "loss": 1.0927, + "nll_loss": 0.2731224596500397, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001962305832421407, + "rewards/margins": 0.21170490980148315, + "rewards/rejected": -0.21190112829208374, + "step": 7814 + }, + { + "epoch": 5.404564315352697, + "grad_norm": 7.439986705780029, + "learning_rate": 2.553019824804057e-05, + "log_odds_chosen": 11.508347511291504, + "log_odds_ratio": -2.3410131689161062e-05, + "logits/chosen": -0.23676235973834991, + "logits/rejected": -0.39286863803863525, + "logps/chosen": -0.0002546714968048036, + "logps/rejected": -2.7765936851501465, + "loss": 1.0065, + "nll_loss": 0.25163280963897705, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5467150408076122e-05, + "rewards/margins": 0.2776338756084442, + "rewards/rejected": -0.2776593565940857, + "step": 7815 + }, + { + "epoch": 5.405255878284924, + "grad_norm": 5.230106353759766, + "learning_rate": 2.5526356231750427e-05, + "log_odds_chosen": 9.520034790039062, + "log_odds_ratio": -0.00026278512086719275, + "logits/chosen": -0.4936428666114807, + "logits/rejected": -0.5037153363227844, + "logps/chosen": -0.00033657433232292533, + "logps/rejected": -1.4637000560760498, + "loss": 0.679, + "nll_loss": 0.16972285509109497, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3657433959888294e-05, + "rewards/margins": 0.14633634686470032, + "rewards/rejected": -0.1463700234889984, + "step": 7816 + }, + { + "epoch": 5.405947441217151, + "grad_norm": 7.410196781158447, + "learning_rate": 2.5522514215460276e-05, + "log_odds_chosen": 10.929586410522461, + "log_odds_ratio": -5.880877506569959e-05, + "logits/chosen": -0.26691627502441406, + "logits/rejected": -0.3176085352897644, + "logps/chosen": -0.0001894081215141341, + "logps/rejected": -2.1916801929473877, + "loss": 0.4472, + "nll_loss": 0.11179463565349579, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.894081106001977e-05, + "rewards/margins": 0.21914908289909363, + "rewards/rejected": -0.2191680371761322, + "step": 7817 + }, + { + "epoch": 5.406639004149378, + "grad_norm": 10.585952758789062, + "learning_rate": 2.5518672199170125e-05, + "log_odds_chosen": 9.789392471313477, + "log_odds_ratio": -9.345363650936633e-05, + "logits/chosen": -0.5864452123641968, + "logits/rejected": -0.47814223170280457, + "logps/chosen": -0.0005826476262882352, + "logps/rejected": -2.074592113494873, + "loss": 0.6702, + "nll_loss": 0.1675419807434082, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.826475899084471e-05, + "rewards/margins": 0.20740094780921936, + "rewards/rejected": -0.2074592113494873, + "step": 7818 + }, + { + "epoch": 5.407330567081605, + "grad_norm": 11.783663749694824, + "learning_rate": 2.5514830182879977e-05, + "log_odds_chosen": 10.423219680786133, + "log_odds_ratio": -0.00042906455928459764, + "logits/chosen": -0.22940194606781006, + "logits/rejected": -0.24586878716945648, + "logps/chosen": -0.0012691940646618605, + "logps/rejected": -2.0153493881225586, + "loss": 1.043, + "nll_loss": 0.2606947720050812, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000126919403555803, + "rewards/margins": 0.2014080137014389, + "rewards/rejected": -0.2015349417924881, + "step": 7819 + }, + { + "epoch": 5.408022130013832, + "grad_norm": 8.057897567749023, + "learning_rate": 2.5510988166589826e-05, + "log_odds_chosen": 10.591058731079102, + "log_odds_ratio": -5.7730518165044487e-05, + "logits/chosen": -0.3266027569770813, + "logits/rejected": -0.33674854040145874, + "logps/chosen": -0.00021130419918335974, + "logps/rejected": -2.0841257572174072, + "loss": 0.9355, + "nll_loss": 0.23385721445083618, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1130421373527497e-05, + "rewards/margins": 0.20839142799377441, + "rewards/rejected": -0.2084125578403473, + "step": 7820 + }, + { + "epoch": 5.408713692946058, + "grad_norm": 6.1873579025268555, + "learning_rate": 2.5507146150299675e-05, + "log_odds_chosen": 9.934715270996094, + "log_odds_ratio": -0.0001489290443714708, + "logits/chosen": -0.4274921417236328, + "logits/rejected": -0.41432029008865356, + "logps/chosen": -0.013626918196678162, + "logps/rejected": -2.4014713764190674, + "loss": 0.7978, + "nll_loss": 0.19942784309387207, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001362691866233945, + "rewards/margins": 0.23878444731235504, + "rewards/rejected": -0.24014714360237122, + "step": 7821 + }, + { + "epoch": 5.409405255878285, + "grad_norm": 7.516190528869629, + "learning_rate": 2.550330413400953e-05, + "log_odds_chosen": 10.188470840454102, + "log_odds_ratio": -0.00020180402498226613, + "logits/chosen": -0.3321394920349121, + "logits/rejected": -0.39233285188674927, + "logps/chosen": -0.0009289323934353888, + "logps/rejected": -2.192671537399292, + "loss": 0.8631, + "nll_loss": 0.2157595008611679, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.289323497796431e-05, + "rewards/margins": 0.21917426586151123, + "rewards/rejected": -0.21926715970039368, + "step": 7822 + }, + { + "epoch": 5.410096818810512, + "grad_norm": 11.857353210449219, + "learning_rate": 2.549946211771938e-05, + "log_odds_chosen": 9.52438735961914, + "log_odds_ratio": -0.0005739558837376535, + "logits/chosen": -0.26063770055770874, + "logits/rejected": -0.2853577733039856, + "logps/chosen": -0.0016212889458984137, + "logps/rejected": -1.7387852668762207, + "loss": 0.6358, + "nll_loss": 0.15889419615268707, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001621288975002244, + "rewards/margins": 0.17371639609336853, + "rewards/rejected": -0.1738785207271576, + "step": 7823 + }, + { + "epoch": 5.410788381742739, + "grad_norm": 8.935547828674316, + "learning_rate": 2.549562010142923e-05, + "log_odds_chosen": 10.768202781677246, + "log_odds_ratio": -3.928116348106414e-05, + "logits/chosen": -0.6923472285270691, + "logits/rejected": -0.6736399531364441, + "logps/chosen": -0.00015976907161530107, + "logps/rejected": -1.725914716720581, + "loss": 0.6046, + "nll_loss": 0.15114957094192505, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.597690788912587e-05, + "rewards/margins": 0.17257550358772278, + "rewards/rejected": -0.17259149253368378, + "step": 7824 + }, + { + "epoch": 5.411479944674966, + "grad_norm": 6.82484769821167, + "learning_rate": 2.5491778085139085e-05, + "log_odds_chosen": 10.331005096435547, + "log_odds_ratio": -6.685660628136247e-05, + "logits/chosen": -0.06196293607354164, + "logits/rejected": -0.18631382286548615, + "logps/chosen": -0.0006202163640409708, + "logps/rejected": -2.1389901638031006, + "loss": 0.8618, + "nll_loss": 0.21543405950069427, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.20216378592886e-05, + "rewards/margins": 0.21383699774742126, + "rewards/rejected": -0.21389901638031006, + "step": 7825 + }, + { + "epoch": 5.412171507607193, + "grad_norm": 5.5454583168029785, + "learning_rate": 2.5487936068848934e-05, + "log_odds_chosen": 10.644842147827148, + "log_odds_ratio": -7.072136213537306e-05, + "logits/chosen": -0.3575887084007263, + "logits/rejected": -0.3973062038421631, + "logps/chosen": -0.0005669151432812214, + "logps/rejected": -2.4327518939971924, + "loss": 0.821, + "nll_loss": 0.20524708926677704, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.669151869369671e-05, + "rewards/margins": 0.24321848154067993, + "rewards/rejected": -0.24327519536018372, + "step": 7826 + }, + { + "epoch": 5.412863070539419, + "grad_norm": 9.592449188232422, + "learning_rate": 2.5484094052558783e-05, + "log_odds_chosen": 8.673015594482422, + "log_odds_ratio": -0.013189301826059818, + "logits/chosen": -0.21795159578323364, + "logits/rejected": -0.34566575288772583, + "logps/chosen": -0.011491509154438972, + "logps/rejected": -1.5653188228607178, + "loss": 1.0418, + "nll_loss": 0.2591352164745331, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011491508921608329, + "rewards/margins": 0.15538272261619568, + "rewards/rejected": -0.15653188526630402, + "step": 7827 + }, + { + "epoch": 5.413554633471646, + "grad_norm": 8.43200397491455, + "learning_rate": 2.5480252036268636e-05, + "log_odds_chosen": 9.651618957519531, + "log_odds_ratio": -0.00014595997345168144, + "logits/chosen": -0.795384407043457, + "logits/rejected": -0.8214473724365234, + "logps/chosen": -0.0004256881948094815, + "logps/rejected": -1.5659555196762085, + "loss": 0.5753, + "nll_loss": 0.14379899203777313, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.256881948094815e-05, + "rewards/margins": 0.1565529853105545, + "rewards/rejected": -0.15659555792808533, + "step": 7828 + }, + { + "epoch": 5.414246196403873, + "grad_norm": 6.174905776977539, + "learning_rate": 2.5476410019978485e-05, + "log_odds_chosen": 10.615562438964844, + "log_odds_ratio": -9.659776696935296e-05, + "logits/chosen": -0.12677879631519318, + "logits/rejected": -0.31755977869033813, + "logps/chosen": -0.002218089997768402, + "logps/rejected": -2.367126941680908, + "loss": 0.6761, + "nll_loss": 0.1690124273300171, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00022180900850798935, + "rewards/margins": 0.2364909052848816, + "rewards/rejected": -0.2367127239704132, + "step": 7829 + }, + { + "epoch": 5.4149377593361, + "grad_norm": 5.8143439292907715, + "learning_rate": 2.5472568003688334e-05, + "log_odds_chosen": 10.541342735290527, + "log_odds_ratio": -0.00013124076940584928, + "logits/chosen": -0.693324089050293, + "logits/rejected": -0.8504449725151062, + "logps/chosen": -0.00027568236691877246, + "logps/rejected": -1.851475715637207, + "loss": 0.4963, + "nll_loss": 0.12405158579349518, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.756823414529208e-05, + "rewards/margins": 0.18512000143527985, + "rewards/rejected": -0.18514756858348846, + "step": 7830 + }, + { + "epoch": 5.415629322268327, + "grad_norm": 8.409075736999512, + "learning_rate": 2.546872598739819e-05, + "log_odds_chosen": 10.672347068786621, + "log_odds_ratio": -6.198248593136668e-05, + "logits/chosen": -0.43167707324028015, + "logits/rejected": -0.536747395992279, + "logps/chosen": -0.00020294116984587163, + "logps/rejected": -2.2104036808013916, + "loss": 0.6968, + "nll_loss": 0.1741933524608612, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0294119167374447e-05, + "rewards/margins": 0.22102010250091553, + "rewards/rejected": -0.22104039788246155, + "step": 7831 + }, + { + "epoch": 5.4163208852005535, + "grad_norm": 6.756588935852051, + "learning_rate": 2.546488397110804e-05, + "log_odds_chosen": 9.484219551086426, + "log_odds_ratio": -0.003364234697073698, + "logits/chosen": -0.5741379261016846, + "logits/rejected": -0.5597481727600098, + "logps/chosen": -0.00293903099372983, + "logps/rejected": -2.191380500793457, + "loss": 1.3862, + "nll_loss": 0.3462083637714386, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00029390311101451516, + "rewards/margins": 0.21884416043758392, + "rewards/rejected": -0.21913805603981018, + "step": 7832 + }, + { + "epoch": 5.41701244813278, + "grad_norm": 5.635895729064941, + "learning_rate": 2.5461041954817888e-05, + "log_odds_chosen": 10.222651481628418, + "log_odds_ratio": -0.0007151153404265642, + "logits/chosen": -0.44129547476768494, + "logits/rejected": -0.5282646417617798, + "logps/chosen": -0.0005037469090893865, + "logps/rejected": -2.230571746826172, + "loss": 1.3282, + "nll_loss": 0.3319754898548126, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.037469236413017e-05, + "rewards/margins": 0.22300681471824646, + "rewards/rejected": -0.22305719554424286, + "step": 7833 + }, + { + "epoch": 5.417704011065007, + "grad_norm": 6.464162349700928, + "learning_rate": 2.5457199938527743e-05, + "log_odds_chosen": 10.352190017700195, + "log_odds_ratio": -6.672489689663053e-05, + "logits/chosen": -0.5189627408981323, + "logits/rejected": -0.5931606292724609, + "logps/chosen": -0.0005168755888007581, + "logps/rejected": -2.325748920440674, + "loss": 0.7537, + "nll_loss": 0.18841908872127533, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.168755888007581e-05, + "rewards/margins": 0.23252321779727936, + "rewards/rejected": -0.23257490992546082, + "step": 7834 + }, + { + "epoch": 5.418395573997234, + "grad_norm": 9.047159194946289, + "learning_rate": 2.5453357922237592e-05, + "log_odds_chosen": 9.300627708435059, + "log_odds_ratio": -0.0006379535770975053, + "logits/chosen": -0.1021367758512497, + "logits/rejected": -0.06073558330535889, + "logps/chosen": -0.0013150572776794434, + "logps/rejected": -2.060987949371338, + "loss": 0.7732, + "nll_loss": 0.19324207305908203, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013150573067832738, + "rewards/margins": 0.20596730709075928, + "rewards/rejected": -0.2060987949371338, + "step": 7835 + }, + { + "epoch": 5.419087136929461, + "grad_norm": 4.95590353012085, + "learning_rate": 2.544951590594744e-05, + "log_odds_chosen": 9.185993194580078, + "log_odds_ratio": -0.000421573146013543, + "logits/chosen": -0.27182701230049133, + "logits/rejected": -0.27344971895217896, + "logps/chosen": -0.0005388180143199861, + "logps/rejected": -1.6429342031478882, + "loss": 0.747, + "nll_loss": 0.1866980791091919, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.388180579757318e-05, + "rewards/margins": 0.1642395406961441, + "rewards/rejected": -0.16429343819618225, + "step": 7836 + }, + { + "epoch": 5.419778699861688, + "grad_norm": 7.58992862701416, + "learning_rate": 2.5445673889657294e-05, + "log_odds_chosen": 10.388230323791504, + "log_odds_ratio": -0.00028462830232456326, + "logits/chosen": -0.27032992243766785, + "logits/rejected": -0.3360634446144104, + "logps/chosen": -0.014370308257639408, + "logps/rejected": -2.8050293922424316, + "loss": 0.5867, + "nll_loss": 0.14663758873939514, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014370307326316833, + "rewards/margins": 0.27906593680381775, + "rewards/rejected": -0.28050294518470764, + "step": 7837 + }, + { + "epoch": 5.4204702627939145, + "grad_norm": 8.065587043762207, + "learning_rate": 2.5441831873367143e-05, + "log_odds_chosen": 12.526787757873535, + "log_odds_ratio": -7.872034984757192e-06, + "logits/chosen": -0.7101952433586121, + "logits/rejected": -0.7076274752616882, + "logps/chosen": -4.7266785259125754e-05, + "logps/rejected": -2.2666127681732178, + "loss": 0.6991, + "nll_loss": 0.17478647828102112, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7266785259125754e-06, + "rewards/margins": 0.22665655612945557, + "rewards/rejected": -0.2266612946987152, + "step": 7838 + }, + { + "epoch": 5.421161825726141, + "grad_norm": 6.4186787605285645, + "learning_rate": 2.5437989857076992e-05, + "log_odds_chosen": 10.256253242492676, + "log_odds_ratio": -0.00030515273101627827, + "logits/chosen": -0.3902437090873718, + "logits/rejected": -0.510823667049408, + "logps/chosen": -0.0005199067527428269, + "logps/rejected": -2.0400826930999756, + "loss": 0.5576, + "nll_loss": 0.13937097787857056, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.19906789122615e-05, + "rewards/margins": 0.20395630598068237, + "rewards/rejected": -0.20400826632976532, + "step": 7839 + }, + { + "epoch": 5.421853388658368, + "grad_norm": 5.648606300354004, + "learning_rate": 2.5434147840786848e-05, + "log_odds_chosen": 9.406319618225098, + "log_odds_ratio": -0.00028232004842720926, + "logits/chosen": -0.4357467591762543, + "logits/rejected": -0.4736112058162689, + "logps/chosen": -0.0003766193403862417, + "logps/rejected": -1.1548511981964111, + "loss": 1.2206, + "nll_loss": 0.3051202893257141, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.766193549381569e-05, + "rewards/margins": 0.11544745415449142, + "rewards/rejected": -0.11548513174057007, + "step": 7840 + }, + { + "epoch": 5.422544951590595, + "grad_norm": 6.648719310760498, + "learning_rate": 2.5430305824496697e-05, + "log_odds_chosen": 9.577982902526855, + "log_odds_ratio": -0.007578455377370119, + "logits/chosen": -0.40829968452453613, + "logits/rejected": -0.46084004640579224, + "logps/chosen": -0.0030157307628542185, + "logps/rejected": -1.722627878189087, + "loss": 1.2203, + "nll_loss": 0.30430811643600464, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00030157307628542185, + "rewards/margins": 0.1719612181186676, + "rewards/rejected": -0.1722627878189087, + "step": 7841 + }, + { + "epoch": 5.423236514522822, + "grad_norm": 6.960811138153076, + "learning_rate": 2.5426463808206546e-05, + "log_odds_chosen": 11.941719055175781, + "log_odds_ratio": -1.1457333130238112e-05, + "logits/chosen": -0.8144433498382568, + "logits/rejected": -0.9371423125267029, + "logps/chosen": -8.856329077389091e-05, + "logps/rejected": -2.627164363861084, + "loss": 0.4427, + "nll_loss": 0.11068376898765564, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.856329259288032e-06, + "rewards/margins": 0.26270759105682373, + "rewards/rejected": -0.2627164423465729, + "step": 7842 + }, + { + "epoch": 5.423928077455049, + "grad_norm": 5.834754943847656, + "learning_rate": 2.5422621791916402e-05, + "log_odds_chosen": 9.961302757263184, + "log_odds_ratio": -8.535663073416799e-05, + "logits/chosen": -0.6488773226737976, + "logits/rejected": -0.695549726486206, + "logps/chosen": -0.0005070774932391942, + "logps/rejected": -1.7984031438827515, + "loss": 0.6351, + "nll_loss": 0.1587558388710022, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.070775296189822e-05, + "rewards/margins": 0.17978960275650024, + "rewards/rejected": -0.1798403263092041, + "step": 7843 + }, + { + "epoch": 5.4246196403872755, + "grad_norm": 4.278073310852051, + "learning_rate": 2.541877977562625e-05, + "log_odds_chosen": 9.657689094543457, + "log_odds_ratio": -0.0005896209622733295, + "logits/chosen": -0.5128522515296936, + "logits/rejected": -0.5324857234954834, + "logps/chosen": -0.009259654209017754, + "logps/rejected": -2.4090094566345215, + "loss": 0.7183, + "nll_loss": 0.17950932681560516, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009259654907509685, + "rewards/margins": 0.2399749755859375, + "rewards/rejected": -0.24090096354484558, + "step": 7844 + }, + { + "epoch": 5.425311203319502, + "grad_norm": 7.503888130187988, + "learning_rate": 2.54149377593361e-05, + "log_odds_chosen": 9.949324607849121, + "log_odds_ratio": -0.00013019111065659672, + "logits/chosen": -0.16912402212619781, + "logits/rejected": -0.2177458107471466, + "logps/chosen": -0.0010880143381655216, + "logps/rejected": -2.2526049613952637, + "loss": 0.8531, + "nll_loss": 0.2132684886455536, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010880143236136064, + "rewards/margins": 0.22515171766281128, + "rewards/rejected": -0.22526051104068756, + "step": 7845 + }, + { + "epoch": 5.426002766251729, + "grad_norm": 10.34386920928955, + "learning_rate": 2.5411095743045952e-05, + "log_odds_chosen": 11.487712860107422, + "log_odds_ratio": -0.00019205230637453496, + "logits/chosen": -0.636671245098114, + "logits/rejected": -0.6072683930397034, + "logps/chosen": -0.0009793839417397976, + "logps/rejected": -3.500851631164551, + "loss": 0.8111, + "nll_loss": 0.20274467766284943, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.793839126359671e-05, + "rewards/margins": 0.3499872088432312, + "rewards/rejected": -0.35008513927459717, + "step": 7846 + }, + { + "epoch": 5.426694329183956, + "grad_norm": 5.985348701477051, + "learning_rate": 2.54072537267558e-05, + "log_odds_chosen": 10.38613510131836, + "log_odds_ratio": -0.00016703951405361295, + "logits/chosen": -0.29771584272384644, + "logits/rejected": -0.3260590434074402, + "logps/chosen": -0.0007466238457709551, + "logps/rejected": -2.1024794578552246, + "loss": 0.5324, + "nll_loss": 0.13309422135353088, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.466238457709551e-05, + "rewards/margins": 0.21017327904701233, + "rewards/rejected": -0.2102479487657547, + "step": 7847 + }, + { + "epoch": 5.427385892116183, + "grad_norm": 11.137646675109863, + "learning_rate": 2.540341171046565e-05, + "log_odds_chosen": 10.203710556030273, + "log_odds_ratio": -0.0006535428110510111, + "logits/chosen": -0.46439170837402344, + "logits/rejected": -0.4716670513153076, + "logps/chosen": -0.0006691356538794935, + "logps/rejected": -2.228005886077881, + "loss": 0.7658, + "nll_loss": 0.19138406217098236, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.69135624775663e-05, + "rewards/margins": 0.22273366153240204, + "rewards/rejected": -0.2228005826473236, + "step": 7848 + }, + { + "epoch": 5.42807745504841, + "grad_norm": 9.139507293701172, + "learning_rate": 2.5399569694175506e-05, + "log_odds_chosen": 11.50609302520752, + "log_odds_ratio": -2.8523911169031635e-05, + "logits/chosen": -0.2181396782398224, + "logits/rejected": -0.2987217903137207, + "logps/chosen": -0.00016175236669369042, + "logps/rejected": -2.743394136428833, + "loss": 0.8292, + "nll_loss": 0.2072906345129013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.617523594177328e-05, + "rewards/margins": 0.2743232548236847, + "rewards/rejected": -0.2743394374847412, + "step": 7849 + }, + { + "epoch": 5.4287690179806365, + "grad_norm": 9.508968353271484, + "learning_rate": 2.5395727677885355e-05, + "log_odds_chosen": 10.997293472290039, + "log_odds_ratio": -4.560018714983016e-05, + "logits/chosen": -0.3897295892238617, + "logits/rejected": -0.3831002116203308, + "logps/chosen": -0.00018428656039759517, + "logps/rejected": -2.2266499996185303, + "loss": 0.6922, + "nll_loss": 0.17304514348506927, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8428656403557397e-05, + "rewards/margins": 0.222646564245224, + "rewards/rejected": -0.2226649969816208, + "step": 7850 + }, + { + "epoch": 5.429460580912863, + "grad_norm": 7.224493026733398, + "learning_rate": 2.5391885661595204e-05, + "log_odds_chosen": 11.01947021484375, + "log_odds_ratio": -8.331074059242383e-05, + "logits/chosen": -0.1935908943414688, + "logits/rejected": -0.2113337516784668, + "logps/chosen": -0.00044694929965771735, + "logps/rejected": -2.6879687309265137, + "loss": 0.8935, + "nll_loss": 0.22337090969085693, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.469492705538869e-05, + "rewards/margins": 0.26875215768814087, + "rewards/rejected": -0.2687968909740448, + "step": 7851 + }, + { + "epoch": 5.43015214384509, + "grad_norm": 9.00788688659668, + "learning_rate": 2.538804364530506e-05, + "log_odds_chosen": 9.374788284301758, + "log_odds_ratio": -0.05502600222826004, + "logits/chosen": -0.4989396929740906, + "logits/rejected": -0.6203451156616211, + "logps/chosen": -0.010878579691052437, + "logps/rejected": -1.516574740409851, + "loss": 0.7964, + "nll_loss": 0.19360893964767456, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010878578759729862, + "rewards/margins": 0.1505696177482605, + "rewards/rejected": -0.15165749192237854, + "step": 7852 + }, + { + "epoch": 5.430843706777317, + "grad_norm": 7.597384452819824, + "learning_rate": 2.538420162901491e-05, + "log_odds_chosen": 10.482349395751953, + "log_odds_ratio": -0.0003454138641245663, + "logits/chosen": -0.0877026915550232, + "logits/rejected": -0.19539429247379303, + "logps/chosen": -0.0005087396712042391, + "logps/rejected": -2.267861843109131, + "loss": 0.8494, + "nll_loss": 0.21232616901397705, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0873968575615436e-05, + "rewards/margins": 0.22673532366752625, + "rewards/rejected": -0.22678618133068085, + "step": 7853 + }, + { + "epoch": 5.431535269709544, + "grad_norm": 9.940882682800293, + "learning_rate": 2.538035961272476e-05, + "log_odds_chosen": 11.412620544433594, + "log_odds_ratio": -0.0010527351405471563, + "logits/chosen": -0.6566622257232666, + "logits/rejected": -0.7543309926986694, + "logps/chosen": -0.0007822321495041251, + "logps/rejected": -2.9906747341156006, + "loss": 0.7132, + "nll_loss": 0.17820365726947784, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.822322368156165e-05, + "rewards/margins": 0.29898926615715027, + "rewards/rejected": -0.2990674674510956, + "step": 7854 + }, + { + "epoch": 5.432226832641771, + "grad_norm": 8.040964126586914, + "learning_rate": 2.537651759643461e-05, + "log_odds_chosen": 10.689407348632812, + "log_odds_ratio": -4.262772563379258e-05, + "logits/chosen": -0.6748466491699219, + "logits/rejected": -0.7293582558631897, + "logps/chosen": -0.0002847913419827819, + "logps/rejected": -2.2315211296081543, + "loss": 0.6307, + "nll_loss": 0.1576675921678543, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.847913492587395e-05, + "rewards/margins": 0.22312362492084503, + "rewards/rejected": -0.22315210103988647, + "step": 7855 + }, + { + "epoch": 5.4329183955739975, + "grad_norm": 4.731971263885498, + "learning_rate": 2.537267558014446e-05, + "log_odds_chosen": 10.669794082641602, + "log_odds_ratio": -0.00023917089856695384, + "logits/chosen": -0.5203498005867004, + "logits/rejected": -0.4933302402496338, + "logps/chosen": -0.0003398106200620532, + "logps/rejected": -2.070086717605591, + "loss": 0.6356, + "nll_loss": 0.15886807441711426, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.398106127860956e-05, + "rewards/margins": 0.20697468519210815, + "rewards/rejected": -0.20700865983963013, + "step": 7856 + }, + { + "epoch": 5.433609958506224, + "grad_norm": 5.539132118225098, + "learning_rate": 2.5368833563854312e-05, + "log_odds_chosen": 10.369670867919922, + "log_odds_ratio": -0.000260966713540256, + "logits/chosen": -0.24483546614646912, + "logits/rejected": -0.3387295603752136, + "logps/chosen": -0.00038009221316315234, + "logps/rejected": -2.261167049407959, + "loss": 0.83, + "nll_loss": 0.20746630430221558, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.800922058871947e-05, + "rewards/margins": 0.22607870399951935, + "rewards/rejected": -0.22611671686172485, + "step": 7857 + }, + { + "epoch": 5.434301521438451, + "grad_norm": 6.399085998535156, + "learning_rate": 2.5364991547564165e-05, + "log_odds_chosen": 10.487403869628906, + "log_odds_ratio": -0.0002748219412751496, + "logits/chosen": -0.4126533567905426, + "logits/rejected": -0.4159373342990875, + "logps/chosen": -0.0016300861025229096, + "logps/rejected": -2.318737268447876, + "loss": 0.8799, + "nll_loss": 0.2199430912733078, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016300860443152487, + "rewards/margins": 0.23171071708202362, + "rewards/rejected": -0.23187373578548431, + "step": 7858 + }, + { + "epoch": 5.434993084370678, + "grad_norm": 6.040285110473633, + "learning_rate": 2.5361149531274014e-05, + "log_odds_chosen": 11.001068115234375, + "log_odds_ratio": -2.4839646357577294e-05, + "logits/chosen": -0.692878007888794, + "logits/rejected": -0.7236531972885132, + "logps/chosen": -0.0001058193010976538, + "logps/rejected": -1.721745252609253, + "loss": 0.5957, + "nll_loss": 0.1489114761352539, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.058192992786644e-05, + "rewards/margins": 0.1721639335155487, + "rewards/rejected": -0.17217451333999634, + "step": 7859 + }, + { + "epoch": 5.435684647302905, + "grad_norm": 8.008295059204102, + "learning_rate": 2.5357307514983863e-05, + "log_odds_chosen": 9.715714454650879, + "log_odds_ratio": -0.0013938343618065119, + "logits/chosen": -0.24811391532421112, + "logits/rejected": -0.3216937780380249, + "logps/chosen": -0.0010375329293310642, + "logps/rejected": -1.7235355377197266, + "loss": 0.8535, + "nll_loss": 0.2132408618927002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010375330020906404, + "rewards/margins": 0.17224982380867004, + "rewards/rejected": -0.17235355079174042, + "step": 7860 + }, + { + "epoch": 5.436376210235132, + "grad_norm": 7.474475383758545, + "learning_rate": 2.535346549869372e-05, + "log_odds_chosen": 9.584784507751465, + "log_odds_ratio": -0.0005862210527993739, + "logits/chosen": -0.5556076765060425, + "logits/rejected": -0.557613730430603, + "logps/chosen": -0.0021440701093524694, + "logps/rejected": -1.8439087867736816, + "loss": 0.8209, + "nll_loss": 0.20515908300876617, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002144070458598435, + "rewards/margins": 0.1841764748096466, + "rewards/rejected": -0.18439088761806488, + "step": 7861 + }, + { + "epoch": 5.4370677731673585, + "grad_norm": 7.606071472167969, + "learning_rate": 2.5349623482403568e-05, + "log_odds_chosen": 9.713556289672852, + "log_odds_ratio": -0.0002427960280328989, + "logits/chosen": -0.11545206606388092, + "logits/rejected": -0.1437520831823349, + "logps/chosen": -0.0006739971577189863, + "logps/rejected": -1.5161678791046143, + "loss": 0.8651, + "nll_loss": 0.2162489891052246, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.739972741343081e-05, + "rewards/margins": 0.15154938399791718, + "rewards/rejected": -0.15161678194999695, + "step": 7862 + }, + { + "epoch": 5.437759336099585, + "grad_norm": 8.389718055725098, + "learning_rate": 2.5345781466113417e-05, + "log_odds_chosen": 10.20302963256836, + "log_odds_ratio": -4.7732421080581844e-05, + "logits/chosen": -0.5267475843429565, + "logits/rejected": -0.5469604730606079, + "logps/chosen": -0.00023220572620630264, + "logps/rejected": -1.7363343238830566, + "loss": 1.0272, + "nll_loss": 0.25679296255111694, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.322057480341755e-05, + "rewards/margins": 0.17361021041870117, + "rewards/rejected": -0.1736334264278412, + "step": 7863 + }, + { + "epoch": 5.438450899031812, + "grad_norm": 13.0947847366333, + "learning_rate": 2.534193944982327e-05, + "log_odds_chosen": 9.859249114990234, + "log_odds_ratio": -0.00023818403133191168, + "logits/chosen": -0.2789401710033417, + "logits/rejected": -0.3713934123516083, + "logps/chosen": -0.0011656444985419512, + "logps/rejected": -1.5852174758911133, + "loss": 0.7659, + "nll_loss": 0.19143958389759064, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001165644425782375, + "rewards/margins": 0.15840518474578857, + "rewards/rejected": -0.15852174162864685, + "step": 7864 + }, + { + "epoch": 5.439142461964039, + "grad_norm": 6.816270351409912, + "learning_rate": 2.5338097433533122e-05, + "log_odds_chosen": 10.205053329467773, + "log_odds_ratio": -0.00020811142167076468, + "logits/chosen": -0.780189037322998, + "logits/rejected": -0.7887230515480042, + "logps/chosen": -0.007137411739677191, + "logps/rejected": -2.1368024349212646, + "loss": 0.8766, + "nll_loss": 0.219136044383049, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00071374123217538, + "rewards/margins": 0.21296651661396027, + "rewards/rejected": -0.21368026733398438, + "step": 7865 + }, + { + "epoch": 5.439834024896266, + "grad_norm": 8.272232055664062, + "learning_rate": 2.533425541724297e-05, + "log_odds_chosen": 8.850973129272461, + "log_odds_ratio": -0.0006840628921054304, + "logits/chosen": -0.629523515701294, + "logits/rejected": -0.6524733304977417, + "logps/chosen": -0.0012333383783698082, + "logps/rejected": -1.3839800357818604, + "loss": 0.9089, + "nll_loss": 0.2271665334701538, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012333384074736387, + "rewards/margins": 0.1382746696472168, + "rewards/rejected": -0.13839800655841827, + "step": 7866 + }, + { + "epoch": 5.440525587828493, + "grad_norm": 7.405073165893555, + "learning_rate": 2.5330413400952823e-05, + "log_odds_chosen": 10.685062408447266, + "log_odds_ratio": -2.844108166755177e-05, + "logits/chosen": -0.437447190284729, + "logits/rejected": -0.5611270070075989, + "logps/chosen": -0.00013399166346061975, + "logps/rejected": -1.8456788063049316, + "loss": 0.4605, + "nll_loss": 0.11511488258838654, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3399166164163034e-05, + "rewards/margins": 0.18455448746681213, + "rewards/rejected": -0.1845678687095642, + "step": 7867 + }, + { + "epoch": 5.441217150760719, + "grad_norm": 6.375338554382324, + "learning_rate": 2.5326571384662672e-05, + "log_odds_chosen": 10.531484603881836, + "log_odds_ratio": -0.00015703555254731327, + "logits/chosen": -0.2637036144733429, + "logits/rejected": -0.17880676686763763, + "logps/chosen": -0.00035209517227485776, + "logps/rejected": -1.7615535259246826, + "loss": 0.923, + "nll_loss": 0.23073497414588928, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.520952304825187e-05, + "rewards/margins": 0.17612014710903168, + "rewards/rejected": -0.17615535855293274, + "step": 7868 + }, + { + "epoch": 5.441908713692946, + "grad_norm": 12.731097221374512, + "learning_rate": 2.532272936837252e-05, + "log_odds_chosen": 10.44438362121582, + "log_odds_ratio": -5.5176606110762805e-05, + "logits/chosen": -1.0070915222167969, + "logits/rejected": -1.0459587574005127, + "logps/chosen": -0.00043362006545066833, + "logps/rejected": -2.1274542808532715, + "loss": 1.1053, + "nll_loss": 0.2763287425041199, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3362008000258356e-05, + "rewards/margins": 0.21270209550857544, + "rewards/rejected": -0.21274544298648834, + "step": 7869 + }, + { + "epoch": 5.442600276625173, + "grad_norm": 18.94190216064453, + "learning_rate": 2.5318887352082377e-05, + "log_odds_chosen": 11.627334594726562, + "log_odds_ratio": -2.15392756217625e-05, + "logits/chosen": -0.7026960849761963, + "logits/rejected": -0.7298768162727356, + "logps/chosen": -0.00012318039080128074, + "logps/rejected": -2.5806596279144287, + "loss": 0.8875, + "nll_loss": 0.22187533974647522, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2318038898229133e-05, + "rewards/margins": 0.25805363059043884, + "rewards/rejected": -0.25806596875190735, + "step": 7870 + }, + { + "epoch": 5.4432918395574, + "grad_norm": 12.605120658874512, + "learning_rate": 2.5315045335792226e-05, + "log_odds_chosen": 10.456953048706055, + "log_odds_ratio": -0.0006705054547637701, + "logits/chosen": -0.6541532874107361, + "logits/rejected": -0.7368367910385132, + "logps/chosen": -0.0007939254865050316, + "logps/rejected": -2.2013332843780518, + "loss": 0.6147, + "nll_loss": 0.15360420942306519, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.939254282973707e-05, + "rewards/margins": 0.22005394101142883, + "rewards/rejected": -0.22013333439826965, + "step": 7871 + }, + { + "epoch": 5.443983402489627, + "grad_norm": 5.563952922821045, + "learning_rate": 2.5311203319502075e-05, + "log_odds_chosen": 10.210861206054688, + "log_odds_ratio": -0.00012154671276221052, + "logits/chosen": -0.2771844267845154, + "logits/rejected": -0.2716318666934967, + "logps/chosen": -0.00019371393136680126, + "logps/rejected": -1.819549798965454, + "loss": 0.7497, + "nll_loss": 0.18740403652191162, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9371393136680126e-05, + "rewards/margins": 0.1819356083869934, + "rewards/rejected": -0.1819549798965454, + "step": 7872 + }, + { + "epoch": 5.444674965421854, + "grad_norm": 8.425066947937012, + "learning_rate": 2.530736130321193e-05, + "log_odds_chosen": 11.025166511535645, + "log_odds_ratio": -4.0440820157527924e-05, + "logits/chosen": -0.34361937642097473, + "logits/rejected": -0.40224599838256836, + "logps/chosen": -0.00013009503891225904, + "logps/rejected": -2.163750648498535, + "loss": 0.5686, + "nll_loss": 0.14215293526649475, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3009504073124845e-05, + "rewards/margins": 0.21636205911636353, + "rewards/rejected": -0.21637505292892456, + "step": 7873 + }, + { + "epoch": 5.44536652835408, + "grad_norm": 3.8120367527008057, + "learning_rate": 2.530351928692178e-05, + "log_odds_chosen": 10.694634437561035, + "log_odds_ratio": -4.566337156575173e-05, + "logits/chosen": -0.42301592230796814, + "logits/rejected": -0.48795831203460693, + "logps/chosen": -0.00040328531758859754, + "logps/rejected": -2.773578405380249, + "loss": 0.6298, + "nll_loss": 0.15745729207992554, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.032853394164704e-05, + "rewards/margins": 0.27731749415397644, + "rewards/rejected": -0.277357816696167, + "step": 7874 + }, + { + "epoch": 5.446058091286307, + "grad_norm": 4.726737022399902, + "learning_rate": 2.529967727063163e-05, + "log_odds_chosen": 10.373005867004395, + "log_odds_ratio": -8.296048326883465e-05, + "logits/chosen": -0.2723003029823303, + "logits/rejected": -0.2989640235900879, + "logps/chosen": -0.0003351265622768551, + "logps/rejected": -2.3543386459350586, + "loss": 0.562, + "nll_loss": 0.14049817621707916, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3512653317302465e-05, + "rewards/margins": 0.23540034890174866, + "rewards/rejected": -0.23543386161327362, + "step": 7875 + }, + { + "epoch": 5.446749654218534, + "grad_norm": 8.28689956665039, + "learning_rate": 2.529583525434148e-05, + "log_odds_chosen": 9.111331939697266, + "log_odds_ratio": -0.0001572294277139008, + "logits/chosen": -0.2043474316596985, + "logits/rejected": -0.28499382734298706, + "logps/chosen": -0.0006082933978177607, + "logps/rejected": -1.5229251384735107, + "loss": 1.0661, + "nll_loss": 0.26651203632354736, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0829341236967593e-05, + "rewards/margins": 0.15223167836666107, + "rewards/rejected": -0.15229250490665436, + "step": 7876 + }, + { + "epoch": 5.447441217150761, + "grad_norm": 6.75670862197876, + "learning_rate": 2.529199323805133e-05, + "log_odds_chosen": 10.351332664489746, + "log_odds_ratio": -0.0002016138460021466, + "logits/chosen": -0.27136003971099854, + "logits/rejected": -0.29065850377082825, + "logps/chosen": -0.0005154769751243293, + "logps/rejected": -2.0906341075897217, + "loss": 0.6432, + "nll_loss": 0.16078633069992065, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.154770042281598e-05, + "rewards/margins": 0.20901186764240265, + "rewards/rejected": -0.20906341075897217, + "step": 7877 + }, + { + "epoch": 5.448132780082988, + "grad_norm": 8.170028686523438, + "learning_rate": 2.528815122176118e-05, + "log_odds_chosen": 10.666536331176758, + "log_odds_ratio": -3.752233897102997e-05, + "logits/chosen": -0.25186508893966675, + "logits/rejected": -0.37342870235443115, + "logps/chosen": -0.0002312668802915141, + "logps/rejected": -2.112480640411377, + "loss": 0.7806, + "nll_loss": 0.1951507329940796, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.312668766535353e-05, + "rewards/margins": 0.21122492849826813, + "rewards/rejected": -0.21124804019927979, + "step": 7878 + }, + { + "epoch": 5.448824343015215, + "grad_norm": 12.511398315429688, + "learning_rate": 2.5284309205471036e-05, + "log_odds_chosen": 10.229702949523926, + "log_odds_ratio": -0.00016912652063183486, + "logits/chosen": -0.4138595461845398, + "logits/rejected": -0.3729810416698456, + "logps/chosen": -0.0001521167578175664, + "logps/rejected": -1.7475824356079102, + "loss": 0.792, + "nll_loss": 0.1979808211326599, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.521167632745346e-05, + "rewards/margins": 0.17474302649497986, + "rewards/rejected": -0.17475822567939758, + "step": 7879 + }, + { + "epoch": 5.449515905947441, + "grad_norm": 7.0651116371154785, + "learning_rate": 2.5280467189180885e-05, + "log_odds_chosen": 10.682268142700195, + "log_odds_ratio": -0.017535412684082985, + "logits/chosen": -0.3367564082145691, + "logits/rejected": -0.3902648985385895, + "logps/chosen": -0.006567737087607384, + "logps/rejected": -2.294995069503784, + "loss": 0.7196, + "nll_loss": 0.17814846336841583, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006567737436853349, + "rewards/margins": 0.22884273529052734, + "rewards/rejected": -0.22949950397014618, + "step": 7880 + }, + { + "epoch": 5.450207468879668, + "grad_norm": 6.419574737548828, + "learning_rate": 2.5276625172890734e-05, + "log_odds_chosen": 10.409414291381836, + "log_odds_ratio": -0.00017506652511656284, + "logits/chosen": -0.5267450213432312, + "logits/rejected": -0.5546387434005737, + "logps/chosen": -0.0007220894913189113, + "logps/rejected": -2.721928596496582, + "loss": 0.7508, + "nll_loss": 0.18768687546253204, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.220894622150809e-05, + "rewards/margins": 0.2721206843852997, + "rewards/rejected": -0.2721928656101227, + "step": 7881 + }, + { + "epoch": 5.450899031811895, + "grad_norm": 5.396580219268799, + "learning_rate": 2.527278315660059e-05, + "log_odds_chosen": 10.267948150634766, + "log_odds_ratio": -0.0004927387926727533, + "logits/chosen": -0.31401973962783813, + "logits/rejected": -0.41773149371147156, + "logps/chosen": -0.0007646388257853687, + "logps/rejected": -2.140446424484253, + "loss": 0.9892, + "nll_loss": 0.24724820256233215, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.646388257853687e-05, + "rewards/margins": 0.2139681726694107, + "rewards/rejected": -0.21404464542865753, + "step": 7882 + }, + { + "epoch": 5.451590594744122, + "grad_norm": 7.157034873962402, + "learning_rate": 2.526894114031044e-05, + "log_odds_chosen": 10.70358657836914, + "log_odds_ratio": -0.00021359114907681942, + "logits/chosen": -0.3688841760158539, + "logits/rejected": -0.40278881788253784, + "logps/chosen": -0.00044239018461667, + "logps/rejected": -2.261518716812134, + "loss": 0.7828, + "nll_loss": 0.19567003846168518, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.423902282724157e-05, + "rewards/margins": 0.2261076420545578, + "rewards/rejected": -0.22615188360214233, + "step": 7883 + }, + { + "epoch": 5.452282157676349, + "grad_norm": 7.7425432205200195, + "learning_rate": 2.5265099124020288e-05, + "log_odds_chosen": 10.416945457458496, + "log_odds_ratio": -0.00010456659219926223, + "logits/chosen": -0.07597717642784119, + "logits/rejected": -0.18601390719413757, + "logps/chosen": -0.0006473706453107297, + "logps/rejected": -2.18353271484375, + "loss": 0.4489, + "nll_loss": 0.11221115291118622, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.473706162068993e-05, + "rewards/margins": 0.21828854084014893, + "rewards/rejected": -0.2183532863855362, + "step": 7884 + }, + { + "epoch": 5.4529737206085755, + "grad_norm": 6.269292831420898, + "learning_rate": 2.526125710773014e-05, + "log_odds_chosen": 10.223175048828125, + "log_odds_ratio": -0.0005372378509491682, + "logits/chosen": -0.2822244167327881, + "logits/rejected": -0.3845008313655853, + "logps/chosen": -0.0014499751850962639, + "logps/rejected": -2.3291826248168945, + "loss": 0.615, + "nll_loss": 0.15368527173995972, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014499750977847725, + "rewards/margins": 0.23277327418327332, + "rewards/rejected": -0.23291826248168945, + "step": 7885 + }, + { + "epoch": 5.453665283540802, + "grad_norm": 7.0991387367248535, + "learning_rate": 2.525741509143999e-05, + "log_odds_chosen": 10.426545143127441, + "log_odds_ratio": -0.00014646339695900679, + "logits/chosen": -0.6692524552345276, + "logits/rejected": -0.6565045118331909, + "logps/chosen": -0.000264812697423622, + "logps/rejected": -2.0045952796936035, + "loss": 0.8776, + "nll_loss": 0.21939438581466675, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.64812697423622e-05, + "rewards/margins": 0.20043303072452545, + "rewards/rejected": -0.20045951008796692, + "step": 7886 + }, + { + "epoch": 5.454356846473029, + "grad_norm": 9.188990592956543, + "learning_rate": 2.5253573075149838e-05, + "log_odds_chosen": 11.181268692016602, + "log_odds_ratio": -0.0002525774762034416, + "logits/chosen": -0.7016425132751465, + "logits/rejected": -0.7341064214706421, + "logps/chosen": -0.0011649903608486056, + "logps/rejected": -2.7058682441711426, + "loss": 0.6038, + "nll_loss": 0.15092355012893677, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011649904627120122, + "rewards/margins": 0.2704703211784363, + "rewards/rejected": -0.27058684825897217, + "step": 7887 + }, + { + "epoch": 5.455048409405256, + "grad_norm": 6.652552604675293, + "learning_rate": 2.5249731058859687e-05, + "log_odds_chosen": 10.284914016723633, + "log_odds_ratio": -0.00010503552039153874, + "logits/chosen": -0.14076370000839233, + "logits/rejected": -0.18022692203521729, + "logps/chosen": -0.0003460772568359971, + "logps/rejected": -2.018402576446533, + "loss": 0.697, + "nll_loss": 0.17424136400222778, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4607728593982756e-05, + "rewards/margins": 0.20180565118789673, + "rewards/rejected": -0.20184025168418884, + "step": 7888 + }, + { + "epoch": 5.455739972337483, + "grad_norm": 5.636088848114014, + "learning_rate": 2.5245889042569543e-05, + "log_odds_chosen": 10.133207321166992, + "log_odds_ratio": -0.00039851610199548304, + "logits/chosen": -0.4633323550224304, + "logits/rejected": -0.46017763018608093, + "logps/chosen": -0.0005366819095797837, + "logps/rejected": -2.1516566276550293, + "loss": 1.1516, + "nll_loss": 0.2878515124320984, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.366818731999956e-05, + "rewards/margins": 0.21511197090148926, + "rewards/rejected": -0.21516567468643188, + "step": 7889 + }, + { + "epoch": 5.45643153526971, + "grad_norm": 5.4103498458862305, + "learning_rate": 2.5242047026279392e-05, + "log_odds_chosen": 10.09362506866455, + "log_odds_ratio": -0.00035184432636015117, + "logits/chosen": -0.4552658200263977, + "logits/rejected": -0.5115972757339478, + "logps/chosen": -0.0001713481906335801, + "logps/rejected": -1.8507417440414429, + "loss": 1.1351, + "nll_loss": 0.28374433517456055, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7134818335762247e-05, + "rewards/margins": 0.18505704402923584, + "rewards/rejected": -0.18507418036460876, + "step": 7890 + }, + { + "epoch": 5.4571230982019365, + "grad_norm": 14.919554710388184, + "learning_rate": 2.523820500998924e-05, + "log_odds_chosen": 10.883527755737305, + "log_odds_ratio": -0.0007609869935549796, + "logits/chosen": -0.5898886919021606, + "logits/rejected": -0.566375195980072, + "logps/chosen": -0.005747266113758087, + "logps/rejected": -2.3063547611236572, + "loss": 0.9042, + "nll_loss": 0.22596535086631775, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005747266695834696, + "rewards/margins": 0.23006075620651245, + "rewards/rejected": -0.23063546419143677, + "step": 7891 + }, + { + "epoch": 5.457814661134163, + "grad_norm": 10.642051696777344, + "learning_rate": 2.5234362993699097e-05, + "log_odds_chosen": 9.415760040283203, + "log_odds_ratio": -0.000675417308229953, + "logits/chosen": -0.38749903440475464, + "logits/rejected": -0.5239227414131165, + "logps/chosen": -0.0054013486951589584, + "logps/rejected": -1.73267662525177, + "loss": 1.5004, + "nll_loss": 0.37502381205558777, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005401347880251706, + "rewards/margins": 0.1727275252342224, + "rewards/rejected": -0.1732676774263382, + "step": 7892 + }, + { + "epoch": 5.45850622406639, + "grad_norm": 17.498640060424805, + "learning_rate": 2.5230520977408946e-05, + "log_odds_chosen": 10.213619232177734, + "log_odds_ratio": -7.920338248368353e-05, + "logits/chosen": -0.36259281635284424, + "logits/rejected": -0.41285136342048645, + "logps/chosen": -0.0004386402724776417, + "logps/rejected": -1.7193783521652222, + "loss": 1.2039, + "nll_loss": 0.3009600341320038, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.386402724776417e-05, + "rewards/margins": 0.17189398407936096, + "rewards/rejected": -0.17193783819675446, + "step": 7893 + }, + { + "epoch": 5.459197786998617, + "grad_norm": 8.615107536315918, + "learning_rate": 2.5226678961118795e-05, + "log_odds_chosen": 9.743375778198242, + "log_odds_ratio": -0.00016872762353159487, + "logits/chosen": -0.6855210065841675, + "logits/rejected": -0.7205241322517395, + "logps/chosen": -0.0006472010281868279, + "logps/rejected": -2.015528440475464, + "loss": 0.8585, + "nll_loss": 0.214613139629364, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.472010136349127e-05, + "rewards/margins": 0.20148813724517822, + "rewards/rejected": -0.2015528529882431, + "step": 7894 + }, + { + "epoch": 5.459889349930844, + "grad_norm": 8.549186706542969, + "learning_rate": 2.5222836944828648e-05, + "log_odds_chosen": 11.554786682128906, + "log_odds_ratio": -2.5646073481766507e-05, + "logits/chosen": -0.9103915691375732, + "logits/rejected": -0.9481276273727417, + "logps/chosen": -0.00018213970179203898, + "logps/rejected": -2.588318347930908, + "loss": 1.7569, + "nll_loss": 0.43922534584999084, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.82139719981933e-05, + "rewards/margins": 0.25881361961364746, + "rewards/rejected": -0.25883182883262634, + "step": 7895 + }, + { + "epoch": 5.460580912863071, + "grad_norm": 11.719233512878418, + "learning_rate": 2.5218994928538497e-05, + "log_odds_chosen": 10.113941192626953, + "log_odds_ratio": -0.00013782066525891423, + "logits/chosen": -0.4043325185775757, + "logits/rejected": -0.47121208906173706, + "logps/chosen": -0.0033909042831510305, + "logps/rejected": -2.3903987407684326, + "loss": 0.6943, + "nll_loss": 0.17356136441230774, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00033909041667357087, + "rewards/margins": 0.2387007772922516, + "rewards/rejected": -0.23903986811637878, + "step": 7896 + }, + { + "epoch": 5.4612724757952975, + "grad_norm": 8.379502296447754, + "learning_rate": 2.5215152912248346e-05, + "log_odds_chosen": 9.217211723327637, + "log_odds_ratio": -0.0010226225713267922, + "logits/chosen": -0.4326026439666748, + "logits/rejected": -0.5274725556373596, + "logps/chosen": -0.004476041067391634, + "logps/rejected": -2.342529535293579, + "loss": 0.9241, + "nll_loss": 0.2309216856956482, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004476041067391634, + "rewards/margins": 0.2338053584098816, + "rewards/rejected": -0.2342529594898224, + "step": 7897 + }, + { + "epoch": 5.461964038727524, + "grad_norm": 7.358403205871582, + "learning_rate": 2.52113108959582e-05, + "log_odds_chosen": 11.019567489624023, + "log_odds_ratio": -2.6093295673490502e-05, + "logits/chosen": -0.5179281830787659, + "logits/rejected": -0.6355370283126831, + "logps/chosen": -0.00020142251742072403, + "logps/rejected": -2.1744496822357178, + "loss": 0.6061, + "nll_loss": 0.15153148770332336, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0142253561061807e-05, + "rewards/margins": 0.21742482483386993, + "rewards/rejected": -0.21744495630264282, + "step": 7898 + }, + { + "epoch": 5.462655601659751, + "grad_norm": 15.733071327209473, + "learning_rate": 2.520746887966805e-05, + "log_odds_chosen": 10.189199447631836, + "log_odds_ratio": -0.0006109835230745375, + "logits/chosen": -0.6819102764129639, + "logits/rejected": -0.7697743773460388, + "logps/chosen": -0.0003226564731448889, + "logps/rejected": -2.198263168334961, + "loss": 1.0409, + "nll_loss": 0.260160356760025, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.226565240765922e-05, + "rewards/margins": 0.21979409456253052, + "rewards/rejected": -0.219826340675354, + "step": 7899 + }, + { + "epoch": 5.463347164591978, + "grad_norm": 13.659838676452637, + "learning_rate": 2.52036268633779e-05, + "log_odds_chosen": 10.806734085083008, + "log_odds_ratio": -0.00038712259265594184, + "logits/chosen": -0.5743763446807861, + "logits/rejected": -0.7685285806655884, + "logps/chosen": -0.0010596952633932233, + "logps/rejected": -2.2519259452819824, + "loss": 0.7574, + "nll_loss": 0.18931885063648224, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010596953507047147, + "rewards/margins": 0.22508664429187775, + "rewards/rejected": -0.2251926213502884, + "step": 7900 + }, + { + "epoch": 5.464038727524205, + "grad_norm": 9.067700386047363, + "learning_rate": 2.5199784847087755e-05, + "log_odds_chosen": 10.845809936523438, + "log_odds_ratio": -0.0001876501482911408, + "logits/chosen": -0.37300464510917664, + "logits/rejected": -0.43012484908103943, + "logps/chosen": -0.0001886295503936708, + "logps/rejected": -2.392216205596924, + "loss": 0.8364, + "nll_loss": 0.20909173786640167, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8862956494558603e-05, + "rewards/margins": 0.23920276761054993, + "rewards/rejected": -0.23922163248062134, + "step": 7901 + }, + { + "epoch": 5.464730290456432, + "grad_norm": 11.004956245422363, + "learning_rate": 2.5195942830797605e-05, + "log_odds_chosen": 9.733002662658691, + "log_odds_ratio": -0.0004925908870063722, + "logits/chosen": -0.7571265697479248, + "logits/rejected": -0.8239815831184387, + "logps/chosen": -0.00042578810825943947, + "logps/rejected": -1.9153045415878296, + "loss": 0.7991, + "nll_loss": 0.19973230361938477, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.257881300873123e-05, + "rewards/margins": 0.1914878785610199, + "rewards/rejected": -0.19153046607971191, + "step": 7902 + }, + { + "epoch": 5.4654218533886585, + "grad_norm": 9.545635223388672, + "learning_rate": 2.5192100814507454e-05, + "log_odds_chosen": 9.390292167663574, + "log_odds_ratio": -0.03421199694275856, + "logits/chosen": 0.020793870091438293, + "logits/rejected": -0.06223713606595993, + "logps/chosen": -0.007960923947393894, + "logps/rejected": -1.421905279159546, + "loss": 0.7789, + "nll_loss": 0.19130827486515045, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00079609255772084, + "rewards/margins": 0.14139443635940552, + "rewards/rejected": -0.14219053089618683, + "step": 7903 + }, + { + "epoch": 5.466113416320885, + "grad_norm": 10.32170295715332, + "learning_rate": 2.5188258798217306e-05, + "log_odds_chosen": 9.949746131896973, + "log_odds_ratio": -0.25190046429634094, + "logits/chosen": -0.5395863652229309, + "logits/rejected": -0.49859222769737244, + "logps/chosen": -0.04036932438611984, + "logps/rejected": -3.2632665634155273, + "loss": 1.3503, + "nll_loss": 0.31238770484924316, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004036932718008757, + "rewards/margins": 0.32228973507881165, + "rewards/rejected": -0.3263266682624817, + "step": 7904 + }, + { + "epoch": 5.466804979253112, + "grad_norm": 10.649049758911133, + "learning_rate": 2.5184416781927155e-05, + "log_odds_chosen": 9.888246536254883, + "log_odds_ratio": -0.00021169688261579722, + "logits/chosen": -0.7000361084938049, + "logits/rejected": -0.668064534664154, + "logps/chosen": -0.0006827355246059597, + "logps/rejected": -1.8547999858856201, + "loss": 0.9118, + "nll_loss": 0.22792655229568481, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.827355537097901e-05, + "rewards/margins": 0.1854117214679718, + "rewards/rejected": -0.185479998588562, + "step": 7905 + }, + { + "epoch": 5.467496542185339, + "grad_norm": 6.560869216918945, + "learning_rate": 2.5180574765637004e-05, + "log_odds_chosen": 11.247020721435547, + "log_odds_ratio": -6.718430813634768e-05, + "logits/chosen": -0.39155805110931396, + "logits/rejected": -0.431242436170578, + "logps/chosen": -0.0002432766486890614, + "logps/rejected": -2.5565779209136963, + "loss": 0.6568, + "nll_loss": 0.16420210897922516, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.432766450510826e-05, + "rewards/margins": 0.25563347339630127, + "rewards/rejected": -0.25565779209136963, + "step": 7906 + }, + { + "epoch": 5.468188105117566, + "grad_norm": 49.997371673583984, + "learning_rate": 2.517673274934686e-05, + "log_odds_chosen": 9.113655090332031, + "log_odds_ratio": -0.06486863642930984, + "logits/chosen": -0.48760855197906494, + "logits/rejected": -0.5602211952209473, + "logps/chosen": -0.01464058831334114, + "logps/rejected": -1.5333609580993652, + "loss": 0.653, + "nll_loss": 0.15675517916679382, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014640586450695992, + "rewards/margins": 0.15187203884124756, + "rewards/rejected": -0.15333609282970428, + "step": 7907 + }, + { + "epoch": 5.468879668049793, + "grad_norm": 7.984647274017334, + "learning_rate": 2.517289073305671e-05, + "log_odds_chosen": 11.545648574829102, + "log_odds_ratio": -1.5041207916510757e-05, + "logits/chosen": -0.33951878547668457, + "logits/rejected": -0.40240412950515747, + "logps/chosen": -0.00020837641204707325, + "logps/rejected": -2.9692435264587402, + "loss": 0.5283, + "nll_loss": 0.13208217918872833, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0837640477111563e-05, + "rewards/margins": 0.29690349102020264, + "rewards/rejected": -0.296924352645874, + "step": 7908 + }, + { + "epoch": 5.4695712309820195, + "grad_norm": 7.5629425048828125, + "learning_rate": 2.5169048716766558e-05, + "log_odds_chosen": 8.79336166381836, + "log_odds_ratio": -0.0004576348583213985, + "logits/chosen": -0.48704391717910767, + "logits/rejected": -0.5239378809928894, + "logps/chosen": -0.0007803332409821451, + "logps/rejected": -1.5541610717773438, + "loss": 0.627, + "nll_loss": 0.15671171247959137, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.803332846378908e-05, + "rewards/margins": 0.1553380787372589, + "rewards/rejected": -0.1554161012172699, + "step": 7909 + }, + { + "epoch": 5.470262793914246, + "grad_norm": 6.970637321472168, + "learning_rate": 2.5165206700476414e-05, + "log_odds_chosen": 9.41334342956543, + "log_odds_ratio": -0.0017763872165232897, + "logits/chosen": -0.6047480702400208, + "logits/rejected": -0.6281265616416931, + "logps/chosen": -0.008626021444797516, + "logps/rejected": -1.9706454277038574, + "loss": 1.2315, + "nll_loss": 0.30770647525787354, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008626022026874125, + "rewards/margins": 0.19620195031166077, + "rewards/rejected": -0.19706454873085022, + "step": 7910 + }, + { + "epoch": 5.470954356846473, + "grad_norm": 9.45356273651123, + "learning_rate": 2.5161364684186263e-05, + "log_odds_chosen": 9.752571105957031, + "log_odds_ratio": -0.00014655350241810083, + "logits/chosen": -0.5641658306121826, + "logits/rejected": -0.6220308542251587, + "logps/chosen": -0.0004932170268148184, + "logps/rejected": -1.6327717304229736, + "loss": 0.6847, + "nll_loss": 0.17116187512874603, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9321701226290315e-05, + "rewards/margins": 0.1632278561592102, + "rewards/rejected": -0.16327717900276184, + "step": 7911 + }, + { + "epoch": 5.4716459197787, + "grad_norm": 6.888040065765381, + "learning_rate": 2.5157522667896112e-05, + "log_odds_chosen": 11.008320808410645, + "log_odds_ratio": -2.1486168407136574e-05, + "logits/chosen": -0.8454128503799438, + "logits/rejected": -0.849215030670166, + "logps/chosen": -0.00010806650971062481, + "logps/rejected": -1.9455052614212036, + "loss": 0.515, + "nll_loss": 0.12873561680316925, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.08066506072646e-05, + "rewards/margins": 0.19453972578048706, + "rewards/rejected": -0.1945505440235138, + "step": 7912 + }, + { + "epoch": 5.472337482710927, + "grad_norm": 7.035167217254639, + "learning_rate": 2.5153680651605964e-05, + "log_odds_chosen": 9.783241271972656, + "log_odds_ratio": -0.0006064789486117661, + "logits/chosen": -0.18023613095283508, + "logits/rejected": -0.41178882122039795, + "logps/chosen": -0.0021076889242976904, + "logps/rejected": -2.3890411853790283, + "loss": 1.0119, + "nll_loss": 0.25290894508361816, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00021076889242976904, + "rewards/margins": 0.23869335651397705, + "rewards/rejected": -0.23890413343906403, + "step": 7913 + }, + { + "epoch": 5.473029045643154, + "grad_norm": 5.894155025482178, + "learning_rate": 2.5149838635315814e-05, + "log_odds_chosen": 9.392170906066895, + "log_odds_ratio": -0.00023966014850884676, + "logits/chosen": -0.40104061365127563, + "logits/rejected": -0.29543566703796387, + "logps/chosen": -0.0006486907368525863, + "logps/rejected": -1.4715025424957275, + "loss": 0.4484, + "nll_loss": 0.11208631843328476, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.486906931968406e-05, + "rewards/margins": 0.14708539843559265, + "rewards/rejected": -0.14715026319026947, + "step": 7914 + }, + { + "epoch": 5.4737206085753805, + "grad_norm": 8.190710067749023, + "learning_rate": 2.5145996619025663e-05, + "log_odds_chosen": 10.513279914855957, + "log_odds_ratio": -8.20760615170002e-05, + "logits/chosen": -0.7377473711967468, + "logits/rejected": -0.8403040170669556, + "logps/chosen": -0.00011504338181111962, + "logps/rejected": -1.5232871770858765, + "loss": 0.8404, + "nll_loss": 0.21009854972362518, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.150433763541514e-05, + "rewards/margins": 0.15231722593307495, + "rewards/rejected": -0.1523287296295166, + "step": 7915 + }, + { + "epoch": 5.474412171507607, + "grad_norm": 5.56616735458374, + "learning_rate": 2.514215460273552e-05, + "log_odds_chosen": 10.981778144836426, + "log_odds_ratio": -6.831878272350878e-05, + "logits/chosen": -0.657960057258606, + "logits/rejected": -0.801915168762207, + "logps/chosen": -0.0005254566203802824, + "logps/rejected": -2.9949893951416016, + "loss": 0.7411, + "nll_loss": 0.18527188897132874, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2545659855240956e-05, + "rewards/margins": 0.2994464039802551, + "rewards/rejected": -0.29949894547462463, + "step": 7916 + }, + { + "epoch": 5.475103734439834, + "grad_norm": 6.962458610534668, + "learning_rate": 2.5138312586445367e-05, + "log_odds_chosen": 10.380294799804688, + "log_odds_ratio": -0.0003663065726868808, + "logits/chosen": -0.727442741394043, + "logits/rejected": -0.8181726932525635, + "logps/chosen": -0.0008164530154317617, + "logps/rejected": -2.5967469215393066, + "loss": 0.8706, + "nll_loss": 0.21762053668498993, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.164531027432531e-05, + "rewards/margins": 0.25959306955337524, + "rewards/rejected": -0.25967469811439514, + "step": 7917 + }, + { + "epoch": 5.475795297372061, + "grad_norm": 5.841886043548584, + "learning_rate": 2.5134470570155217e-05, + "log_odds_chosen": 11.438337326049805, + "log_odds_ratio": -1.5561568943667226e-05, + "logits/chosen": -0.18600064516067505, + "logits/rejected": -0.32965904474258423, + "logps/chosen": -0.00011055743379984051, + "logps/rejected": -2.105381727218628, + "loss": 0.6327, + "nll_loss": 0.15817975997924805, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1055743925680872e-05, + "rewards/margins": 0.21052710711956024, + "rewards/rejected": -0.21053817868232727, + "step": 7918 + }, + { + "epoch": 5.476486860304288, + "grad_norm": 5.203618049621582, + "learning_rate": 2.5130628553865072e-05, + "log_odds_chosen": 9.663179397583008, + "log_odds_ratio": -0.000736804970074445, + "logits/chosen": -0.05351455509662628, + "logits/rejected": -0.050800621509552, + "logps/chosen": -0.0022640221286565065, + "logps/rejected": -2.2554354667663574, + "loss": 0.5911, + "nll_loss": 0.1477089375257492, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00022640220413450152, + "rewards/margins": 0.225317120552063, + "rewards/rejected": -0.2255435287952423, + "step": 7919 + }, + { + "epoch": 5.477178423236515, + "grad_norm": 8.263466835021973, + "learning_rate": 2.512678653757492e-05, + "log_odds_chosen": 10.013599395751953, + "log_odds_ratio": -0.00011523398279678077, + "logits/chosen": -0.7380114197731018, + "logits/rejected": -0.6945633292198181, + "logps/chosen": -0.0003955452120862901, + "logps/rejected": -1.6594562530517578, + "loss": 0.8181, + "nll_loss": 0.20452088117599487, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.955452120862901e-05, + "rewards/margins": 0.16590608656406403, + "rewards/rejected": -0.1659456342458725, + "step": 7920 + }, + { + "epoch": 5.477869986168741, + "grad_norm": 5.862675189971924, + "learning_rate": 2.512294452128477e-05, + "log_odds_chosen": 10.199264526367188, + "log_odds_ratio": -0.00010353358811698854, + "logits/chosen": -0.6615251302719116, + "logits/rejected": -0.6607353091239929, + "logps/chosen": -0.0004281606525182724, + "logps/rejected": -1.998523473739624, + "loss": 1.0327, + "nll_loss": 0.2581551671028137, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.281606015865691e-05, + "rewards/margins": 0.19980952143669128, + "rewards/rejected": -0.1998523622751236, + "step": 7921 + }, + { + "epoch": 5.478561549100968, + "grad_norm": 9.82187557220459, + "learning_rate": 2.5119102504994623e-05, + "log_odds_chosen": 10.85956859588623, + "log_odds_ratio": -2.9763910788460635e-05, + "logits/chosen": -0.6906986832618713, + "logits/rejected": -0.7038851380348206, + "logps/chosen": -0.0005827648565173149, + "logps/rejected": -2.5121445655822754, + "loss": 0.7615, + "nll_loss": 0.19037176668643951, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.827648419653997e-05, + "rewards/margins": 0.25115618109703064, + "rewards/rejected": -0.2512144446372986, + "step": 7922 + }, + { + "epoch": 5.479253112033195, + "grad_norm": 7.418650150299072, + "learning_rate": 2.5115260488704472e-05, + "log_odds_chosen": 10.056888580322266, + "log_odds_ratio": -0.00015551802061963826, + "logits/chosen": -0.8092662692070007, + "logits/rejected": -0.8078435659408569, + "logps/chosen": -0.0003808286564890295, + "logps/rejected": -2.0566279888153076, + "loss": 0.7577, + "nll_loss": 0.18939712643623352, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.808286419371143e-05, + "rewards/margins": 0.20562469959259033, + "rewards/rejected": -0.205662801861763, + "step": 7923 + }, + { + "epoch": 5.479944674965422, + "grad_norm": 6.861764907836914, + "learning_rate": 2.511141847241432e-05, + "log_odds_chosen": 10.926447868347168, + "log_odds_ratio": -9.362271521240473e-05, + "logits/chosen": -0.466507226228714, + "logits/rejected": -0.4524589776992798, + "logps/chosen": -0.0002040699328063056, + "logps/rejected": -2.489025831222534, + "loss": 0.8015, + "nll_loss": 0.20037364959716797, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0406996554811485e-05, + "rewards/margins": 0.2488822042942047, + "rewards/rejected": -0.2489026039838791, + "step": 7924 + }, + { + "epoch": 5.480636237897649, + "grad_norm": 10.226140975952148, + "learning_rate": 2.5107576456124177e-05, + "log_odds_chosen": 10.256708145141602, + "log_odds_ratio": -0.00020014113397337496, + "logits/chosen": -0.9860405325889587, + "logits/rejected": -0.9984095096588135, + "logps/chosen": -0.0006605676026083529, + "logps/rejected": -1.8433809280395508, + "loss": 0.8948, + "nll_loss": 0.22368690371513367, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.605676026083529e-05, + "rewards/margins": 0.18427203595638275, + "rewards/rejected": -0.18433809280395508, + "step": 7925 + }, + { + "epoch": 5.481327800829876, + "grad_norm": 8.794382095336914, + "learning_rate": 2.5103734439834026e-05, + "log_odds_chosen": 11.23933219909668, + "log_odds_ratio": -2.205545206379611e-05, + "logits/chosen": -0.6451046466827393, + "logits/rejected": -0.7187870740890503, + "logps/chosen": -0.0001990120654227212, + "logps/rejected": -2.49930477142334, + "loss": 0.8034, + "nll_loss": 0.20085430145263672, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.990120654227212e-05, + "rewards/margins": 0.24991059303283691, + "rewards/rejected": -0.2499305009841919, + "step": 7926 + }, + { + "epoch": 5.482019363762102, + "grad_norm": 9.740113258361816, + "learning_rate": 2.5099892423543875e-05, + "log_odds_chosen": 10.139154434204102, + "log_odds_ratio": -0.0001262970909010619, + "logits/chosen": -0.7651572227478027, + "logits/rejected": -0.8166838884353638, + "logps/chosen": -0.0006389489863067865, + "logps/rejected": -1.9527884721755981, + "loss": 0.8119, + "nll_loss": 0.2029644101858139, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.389489863067865e-05, + "rewards/margins": 0.19521497189998627, + "rewards/rejected": -0.1952788531780243, + "step": 7927 + }, + { + "epoch": 5.482710926694329, + "grad_norm": 15.382346153259277, + "learning_rate": 2.509605040725373e-05, + "log_odds_chosen": 10.098079681396484, + "log_odds_ratio": -8.8360087829642e-05, + "logits/chosen": -0.3574230372905731, + "logits/rejected": -0.33704087138175964, + "logps/chosen": -0.0006936791469343007, + "logps/rejected": -2.5140581130981445, + "loss": 1.2812, + "nll_loss": 0.3202976882457733, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.93679103278555e-05, + "rewards/margins": 0.2513364553451538, + "rewards/rejected": -0.25140583515167236, + "step": 7928 + }, + { + "epoch": 5.483402489626556, + "grad_norm": 9.151076316833496, + "learning_rate": 2.509220839096358e-05, + "log_odds_chosen": 10.143035888671875, + "log_odds_ratio": -0.00013026421947870404, + "logits/chosen": -0.2792324125766754, + "logits/rejected": -0.39061158895492554, + "logps/chosen": -0.000424693338572979, + "logps/rejected": -1.8352653980255127, + "loss": 1.1901, + "nll_loss": 0.2975192666053772, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.246933531248942e-05, + "rewards/margins": 0.18348407745361328, + "rewards/rejected": -0.18352654576301575, + "step": 7929 + }, + { + "epoch": 5.484094052558783, + "grad_norm": 15.15778636932373, + "learning_rate": 2.508836637467343e-05, + "log_odds_chosen": 11.682640075683594, + "log_odds_ratio": -1.2657160368689802e-05, + "logits/chosen": -0.23138004541397095, + "logits/rejected": -0.32260462641716003, + "logps/chosen": -0.00038684680475853384, + "logps/rejected": -3.3264083862304688, + "loss": 0.6705, + "nll_loss": 0.16762800514698029, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.868468411383219e-05, + "rewards/margins": 0.3326021432876587, + "rewards/rejected": -0.3326408565044403, + "step": 7930 + }, + { + "epoch": 5.48478561549101, + "grad_norm": 7.947671413421631, + "learning_rate": 2.508452435838328e-05, + "log_odds_chosen": 9.699936866760254, + "log_odds_ratio": -0.0006529848906211555, + "logits/chosen": -0.5883873701095581, + "logits/rejected": -0.7178964614868164, + "logps/chosen": -0.001084399176761508, + "logps/rejected": -1.9630234241485596, + "loss": 0.9102, + "nll_loss": 0.22749494016170502, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010843992640729994, + "rewards/margins": 0.1961939036846161, + "rewards/rejected": -0.1963023543357849, + "step": 7931 + }, + { + "epoch": 5.485477178423237, + "grad_norm": 6.651211261749268, + "learning_rate": 2.508068234209313e-05, + "log_odds_chosen": 10.307214736938477, + "log_odds_ratio": -0.0004254367668181658, + "logits/chosen": -0.8200116157531738, + "logits/rejected": -0.7713389992713928, + "logps/chosen": -0.00029836269095540047, + "logps/rejected": -1.8111873865127563, + "loss": 0.6126, + "nll_loss": 0.1531156450510025, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.983627018693369e-05, + "rewards/margins": 0.18108892440795898, + "rewards/rejected": -0.18111875653266907, + "step": 7932 + }, + { + "epoch": 5.486168741355463, + "grad_norm": 5.8201704025268555, + "learning_rate": 2.507684032580298e-05, + "log_odds_chosen": 10.988911628723145, + "log_odds_ratio": -0.0004914018791168928, + "logits/chosen": -0.3212338387966156, + "logits/rejected": -0.4956795573234558, + "logps/chosen": -0.0008269093232229352, + "logps/rejected": -2.6963746547698975, + "loss": 0.7461, + "nll_loss": 0.1864856332540512, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.269093814305961e-05, + "rewards/margins": 0.2695547938346863, + "rewards/rejected": -0.26963746547698975, + "step": 7933 + }, + { + "epoch": 5.48686030428769, + "grad_norm": 7.1769938468933105, + "learning_rate": 2.5072998309512835e-05, + "log_odds_chosen": 11.084684371948242, + "log_odds_ratio": -2.3583517759107053e-05, + "logits/chosen": -0.574668824672699, + "logits/rejected": -0.7622121572494507, + "logps/chosen": -0.0001388926466461271, + "logps/rejected": -2.050570249557495, + "loss": 0.5676, + "nll_loss": 0.1418919414281845, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.388926466461271e-05, + "rewards/margins": 0.20504313707351685, + "rewards/rejected": -0.2050570249557495, + "step": 7934 + }, + { + "epoch": 5.487551867219917, + "grad_norm": 9.00568675994873, + "learning_rate": 2.5069156293222684e-05, + "log_odds_chosen": 10.233102798461914, + "log_odds_ratio": -8.496645023114979e-05, + "logits/chosen": -0.5563417077064514, + "logits/rejected": -0.588212788105011, + "logps/chosen": -0.0002815087791532278, + "logps/rejected": -2.1203181743621826, + "loss": 0.579, + "nll_loss": 0.1447538137435913, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8150876460131258e-05, + "rewards/margins": 0.2120036780834198, + "rewards/rejected": -0.21203184127807617, + "step": 7935 + }, + { + "epoch": 5.488243430152144, + "grad_norm": 17.79596519470215, + "learning_rate": 2.5065314276932533e-05, + "log_odds_chosen": 9.840686798095703, + "log_odds_ratio": -0.00029457185883075, + "logits/chosen": -0.20178522169589996, + "logits/rejected": -0.3377896547317505, + "logps/chosen": -0.00044397261808626354, + "logps/rejected": -1.8732333183288574, + "loss": 0.7773, + "nll_loss": 0.19428730010986328, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.439726035343483e-05, + "rewards/margins": 0.18727895617485046, + "rewards/rejected": -0.18732333183288574, + "step": 7936 + }, + { + "epoch": 5.488934993084371, + "grad_norm": 9.293213844299316, + "learning_rate": 2.506147226064239e-05, + "log_odds_chosen": 10.284903526306152, + "log_odds_ratio": -6.364515866152942e-05, + "logits/chosen": -0.3089835047721863, + "logits/rejected": -0.39305680990219116, + "logps/chosen": -0.0027302736416459084, + "logps/rejected": -2.2307722568511963, + "loss": 0.7385, + "nll_loss": 0.18461675941944122, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002730273699853569, + "rewards/margins": 0.22280417382717133, + "rewards/rejected": -0.2230772227048874, + "step": 7937 + }, + { + "epoch": 5.4896265560165975, + "grad_norm": 5.33021879196167, + "learning_rate": 2.5057630244352238e-05, + "log_odds_chosen": 9.169672966003418, + "log_odds_ratio": -0.000621531275101006, + "logits/chosen": -0.2677467465400696, + "logits/rejected": -0.3390297293663025, + "logps/chosen": -0.0015468818601220846, + "logps/rejected": -1.889085054397583, + "loss": 0.8048, + "nll_loss": 0.20113667845726013, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015468819765374064, + "rewards/margins": 0.18875382840633392, + "rewards/rejected": -0.18890851736068726, + "step": 7938 + }, + { + "epoch": 5.490318118948824, + "grad_norm": 5.521744251251221, + "learning_rate": 2.5053788228062087e-05, + "log_odds_chosen": 9.12801456451416, + "log_odds_ratio": -0.0012813995126634836, + "logits/chosen": -0.7456037402153015, + "logits/rejected": -0.750789999961853, + "logps/chosen": -0.0036856280639767647, + "logps/rejected": -1.3551989793777466, + "loss": 1.1409, + "nll_loss": 0.2850930094718933, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00036856281803920865, + "rewards/margins": 0.13515134155750275, + "rewards/rejected": -0.13551990687847137, + "step": 7939 + }, + { + "epoch": 5.491009681881051, + "grad_norm": 8.740033149719238, + "learning_rate": 2.504994621177194e-05, + "log_odds_chosen": 8.394597053527832, + "log_odds_ratio": -0.0022611194290220737, + "logits/chosen": -0.43547797203063965, + "logits/rejected": -0.5125408172607422, + "logps/chosen": -0.005125186871737242, + "logps/rejected": -1.7335705757141113, + "loss": 1.0796, + "nll_loss": 0.2696824371814728, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005125186871737242, + "rewards/margins": 0.1728445589542389, + "rewards/rejected": -0.1733570694923401, + "step": 7940 + }, + { + "epoch": 5.491701244813278, + "grad_norm": 5.807063102722168, + "learning_rate": 2.504610419548179e-05, + "log_odds_chosen": 10.93294906616211, + "log_odds_ratio": -6.808589387219399e-05, + "logits/chosen": -0.6323756575584412, + "logits/rejected": -0.6467557549476624, + "logps/chosen": -0.00018673163140192628, + "logps/rejected": -1.8192808628082275, + "loss": 0.4817, + "nll_loss": 0.12041807919740677, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8673163140192628e-05, + "rewards/margins": 0.18190941214561462, + "rewards/rejected": -0.18192808330059052, + "step": 7941 + }, + { + "epoch": 5.492392807745505, + "grad_norm": 9.008139610290527, + "learning_rate": 2.5042262179191638e-05, + "log_odds_chosen": 10.336362838745117, + "log_odds_ratio": -0.00031754738301970065, + "logits/chosen": -0.75423264503479, + "logits/rejected": -0.7876052856445312, + "logps/chosen": -0.0003842760343104601, + "logps/rejected": -1.8387316465377808, + "loss": 0.5582, + "nll_loss": 0.13952915370464325, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.842760270345025e-05, + "rewards/margins": 0.1838347315788269, + "rewards/rejected": -0.18387316167354584, + "step": 7942 + }, + { + "epoch": 5.493084370677732, + "grad_norm": 7.10367488861084, + "learning_rate": 2.5038420162901494e-05, + "log_odds_chosen": 10.97952651977539, + "log_odds_ratio": -3.581916098482907e-05, + "logits/chosen": -0.8699790239334106, + "logits/rejected": -0.8966847658157349, + "logps/chosen": -0.00038829189725220203, + "logps/rejected": -2.5607616901397705, + "loss": 0.6365, + "nll_loss": 0.1591133177280426, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.882919190800749e-05, + "rewards/margins": 0.25603732466697693, + "rewards/rejected": -0.2560761570930481, + "step": 7943 + }, + { + "epoch": 5.4937759336099585, + "grad_norm": 5.826725959777832, + "learning_rate": 2.5034578146611343e-05, + "log_odds_chosen": 10.67078971862793, + "log_odds_ratio": -8.722233178559691e-05, + "logits/chosen": -0.6411142349243164, + "logits/rejected": -0.7242069244384766, + "logps/chosen": -0.0007470400887541473, + "logps/rejected": -2.562070608139038, + "loss": 0.6689, + "nll_loss": 0.16722771525382996, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.470400305464864e-05, + "rewards/margins": 0.2561323642730713, + "rewards/rejected": -0.25620707869529724, + "step": 7944 + }, + { + "epoch": 5.494467496542185, + "grad_norm": 9.011191368103027, + "learning_rate": 2.5030736130321192e-05, + "log_odds_chosen": 10.33845043182373, + "log_odds_ratio": -0.0010209310567006469, + "logits/chosen": -0.8604280948638916, + "logits/rejected": -0.891929030418396, + "logps/chosen": -0.0017238704022020102, + "logps/rejected": -2.7034897804260254, + "loss": 0.6467, + "nll_loss": 0.16158509254455566, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017238703730981797, + "rewards/margins": 0.27017658948898315, + "rewards/rejected": -0.2703489661216736, + "step": 7945 + }, + { + "epoch": 5.495159059474412, + "grad_norm": 6.513817310333252, + "learning_rate": 2.5026894114031048e-05, + "log_odds_chosen": 11.323633193969727, + "log_odds_ratio": -1.6800902812974527e-05, + "logits/chosen": 0.04846089333295822, + "logits/rejected": -0.07541124522686005, + "logps/chosen": -0.00011946099402848631, + "logps/rejected": -2.3040506839752197, + "loss": 0.7299, + "nll_loss": 0.18246236443519592, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1946100130444393e-05, + "rewards/margins": 0.23039312660694122, + "rewards/rejected": -0.2304050773382187, + "step": 7946 + }, + { + "epoch": 5.495850622406639, + "grad_norm": 10.008655548095703, + "learning_rate": 2.5023052097740897e-05, + "log_odds_chosen": 10.430981636047363, + "log_odds_ratio": -4.921500658383593e-05, + "logits/chosen": -0.12481583654880524, + "logits/rejected": -0.24347051978111267, + "logps/chosen": -0.00031723693246021867, + "logps/rejected": -2.288741111755371, + "loss": 0.7818, + "nll_loss": 0.19544847309589386, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.172369542880915e-05, + "rewards/margins": 0.22884240746498108, + "rewards/rejected": -0.2288741171360016, + "step": 7947 + }, + { + "epoch": 5.496542185338866, + "grad_norm": 15.212750434875488, + "learning_rate": 2.5019210081450746e-05, + "log_odds_chosen": 10.807697296142578, + "log_odds_ratio": -5.2383267757249996e-05, + "logits/chosen": -0.460726261138916, + "logits/rejected": -0.5388174057006836, + "logps/chosen": -0.00018959477893076837, + "logps/rejected": -1.999808669090271, + "loss": 1.155, + "nll_loss": 0.28874483704566956, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.895947934826836e-05, + "rewards/margins": 0.19996190071105957, + "rewards/rejected": -0.19998085498809814, + "step": 7948 + }, + { + "epoch": 5.497233748271093, + "grad_norm": 10.610461235046387, + "learning_rate": 2.5015368065160598e-05, + "log_odds_chosen": 10.641895294189453, + "log_odds_ratio": -0.0001160370884463191, + "logits/chosen": -0.6269339919090271, + "logits/rejected": -0.6266019344329834, + "logps/chosen": -0.00038971190224401653, + "logps/rejected": -2.3446550369262695, + "loss": 1.2292, + "nll_loss": 0.307287335395813, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.89711931347847e-05, + "rewards/margins": 0.23442654311656952, + "rewards/rejected": -0.23446550965309143, + "step": 7949 + }, + { + "epoch": 5.4979253112033195, + "grad_norm": 10.49134635925293, + "learning_rate": 2.5011526048870447e-05, + "log_odds_chosen": 10.224824905395508, + "log_odds_ratio": -0.000180298593477346, + "logits/chosen": -0.6823133826255798, + "logits/rejected": -0.756210446357727, + "logps/chosen": -0.010154692456126213, + "logps/rejected": -3.193361759185791, + "loss": 0.6622, + "nll_loss": 0.1655343770980835, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010154691990464926, + "rewards/margins": 0.31832069158554077, + "rewards/rejected": -0.3193361759185791, + "step": 7950 + }, + { + "epoch": 5.498616874135546, + "grad_norm": 8.006492614746094, + "learning_rate": 2.5007684032580296e-05, + "log_odds_chosen": 10.028341293334961, + "log_odds_ratio": -0.00015695212641730905, + "logits/chosen": -0.12197916209697723, + "logits/rejected": -0.190192312002182, + "logps/chosen": -0.00036239047767594457, + "logps/rejected": -1.8425757884979248, + "loss": 0.7822, + "nll_loss": 0.1955282837152481, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.623904922278598e-05, + "rewards/margins": 0.18422134220600128, + "rewards/rejected": -0.18425756692886353, + "step": 7951 + }, + { + "epoch": 5.499308437067773, + "grad_norm": 6.36076545715332, + "learning_rate": 2.5003842016290152e-05, + "log_odds_chosen": 10.715845108032227, + "log_odds_ratio": -5.2525760111166164e-05, + "logits/chosen": -0.1273401528596878, + "logits/rejected": -0.13696612417697906, + "logps/chosen": -0.0002070648333756253, + "logps/rejected": -2.113837718963623, + "loss": 0.9156, + "nll_loss": 0.2289057821035385, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0706484065158293e-05, + "rewards/margins": 0.2113630771636963, + "rewards/rejected": -0.21138378977775574, + "step": 7952 + }, + { + "epoch": 5.5, + "grad_norm": 10.263921737670898, + "learning_rate": 2.5e-05, + "log_odds_chosen": 10.461770057678223, + "log_odds_ratio": -0.00011564150190679356, + "logits/chosen": -0.663473904132843, + "logits/rejected": -0.5993514657020569, + "logps/chosen": -0.000487415527459234, + "logps/rejected": -1.8845086097717285, + "loss": 0.6197, + "nll_loss": 0.15491798520088196, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.874155638390221e-05, + "rewards/margins": 0.18840211629867554, + "rewards/rejected": -0.1884508728981018, + "step": 7953 + }, + { + "epoch": 5.500691562932227, + "grad_norm": 8.521260261535645, + "learning_rate": 2.4996157983709854e-05, + "log_odds_chosen": 9.991600036621094, + "log_odds_ratio": -0.0003730976313818246, + "logits/chosen": 0.06851185858249664, + "logits/rejected": 0.07069035619497299, + "logps/chosen": -0.00043773557990789413, + "logps/rejected": -1.9327276945114136, + "loss": 1.147, + "nll_loss": 0.2867090106010437, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3773557990789413e-05, + "rewards/margins": 0.19322898983955383, + "rewards/rejected": -0.19327276945114136, + "step": 7954 + }, + { + "epoch": 5.501383125864454, + "grad_norm": 7.410879135131836, + "learning_rate": 2.4992315967419703e-05, + "log_odds_chosen": 11.399124145507812, + "log_odds_ratio": -3.132876372546889e-05, + "logits/chosen": -0.29479166865348816, + "logits/rejected": -0.32007062435150146, + "logps/chosen": -0.00012927822535857558, + "logps/rejected": -2.465428590774536, + "loss": 0.6375, + "nll_loss": 0.15937091410160065, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2927822353958618e-05, + "rewards/margins": 0.2465299367904663, + "rewards/rejected": -0.24654288589954376, + "step": 7955 + }, + { + "epoch": 5.5020746887966805, + "grad_norm": 6.070524215698242, + "learning_rate": 2.4988473951129555e-05, + "log_odds_chosen": 10.64219856262207, + "log_odds_ratio": -4.545422052615322e-05, + "logits/chosen": -0.4476628601551056, + "logits/rejected": -0.507516086101532, + "logps/chosen": -0.0005662592011503875, + "logps/rejected": -2.5192782878875732, + "loss": 0.746, + "nll_loss": 0.18649649620056152, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.662592229782604e-05, + "rewards/margins": 0.2518712282180786, + "rewards/rejected": -0.25192785263061523, + "step": 7956 + }, + { + "epoch": 5.502766251728907, + "grad_norm": 9.197659492492676, + "learning_rate": 2.4984631934839408e-05, + "log_odds_chosen": 10.960630416870117, + "log_odds_ratio": -5.631362000713125e-05, + "logits/chosen": -0.7002097368240356, + "logits/rejected": -0.6325714588165283, + "logps/chosen": -0.0001856803719419986, + "logps/rejected": -2.307868003845215, + "loss": 1.0023, + "nll_loss": 0.2505730390548706, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.85680364666041e-05, + "rewards/margins": 0.23076826333999634, + "rewards/rejected": -0.23078681528568268, + "step": 7957 + }, + { + "epoch": 5.503457814661134, + "grad_norm": 6.928858280181885, + "learning_rate": 2.4980789918549257e-05, + "log_odds_chosen": 10.45057487487793, + "log_odds_ratio": -6.252497405512258e-05, + "logits/chosen": -0.41174808144569397, + "logits/rejected": -0.4137668311595917, + "logps/chosen": -0.00021452132205013186, + "logps/rejected": -1.9052519798278809, + "loss": 0.7878, + "nll_loss": 0.19695553183555603, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1452131477417424e-05, + "rewards/margins": 0.19050373136997223, + "rewards/rejected": -0.19052520394325256, + "step": 7958 + }, + { + "epoch": 5.504149377593361, + "grad_norm": 6.849351406097412, + "learning_rate": 2.4976947902259106e-05, + "log_odds_chosen": 10.479252815246582, + "log_odds_ratio": -8.19520719232969e-05, + "logits/chosen": -0.41458866000175476, + "logits/rejected": -0.5210939049720764, + "logps/chosen": -0.0002766270481515676, + "logps/rejected": -1.998887538909912, + "loss": 0.7945, + "nll_loss": 0.19861416518688202, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.766270517895464e-05, + "rewards/margins": 0.19986110925674438, + "rewards/rejected": -0.19988876581192017, + "step": 7959 + }, + { + "epoch": 5.504840940525588, + "grad_norm": 9.421629905700684, + "learning_rate": 2.4973105885968958e-05, + "log_odds_chosen": 10.660806655883789, + "log_odds_ratio": -3.9099551941035315e-05, + "logits/chosen": -0.23674964904785156, + "logits/rejected": -0.33446353673934937, + "logps/chosen": -0.00019968458218500018, + "logps/rejected": -1.9553388357162476, + "loss": 0.762, + "nll_loss": 0.19048884510993958, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9968458218500018e-05, + "rewards/margins": 0.19551391899585724, + "rewards/rejected": -0.195533886551857, + "step": 7960 + }, + { + "epoch": 5.505532503457815, + "grad_norm": 5.590500831604004, + "learning_rate": 2.4969263869678807e-05, + "log_odds_chosen": 10.218223571777344, + "log_odds_ratio": -7.917553011793643e-05, + "logits/chosen": -0.07494551688432693, + "logits/rejected": -0.1157187670469284, + "logps/chosen": -0.00028149288846179843, + "logps/rejected": -1.8711258172988892, + "loss": 0.8577, + "nll_loss": 0.21441730856895447, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.814928666339256e-05, + "rewards/margins": 0.18708443641662598, + "rewards/rejected": -0.18711256980895996, + "step": 7961 + }, + { + "epoch": 5.5062240663900415, + "grad_norm": 12.93747615814209, + "learning_rate": 2.496542185338866e-05, + "log_odds_chosen": 9.34193229675293, + "log_odds_ratio": -0.011150677688419819, + "logits/chosen": -0.5461189150810242, + "logits/rejected": -0.4774158000946045, + "logps/chosen": -0.04011606052517891, + "logps/rejected": -1.7543762922286987, + "loss": 0.703, + "nll_loss": 0.17463496327400208, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004011606797575951, + "rewards/margins": 0.17142602801322937, + "rewards/rejected": -0.17543762922286987, + "step": 7962 + }, + { + "epoch": 5.506915629322268, + "grad_norm": 16.010683059692383, + "learning_rate": 2.4961579837098512e-05, + "log_odds_chosen": 11.137772560119629, + "log_odds_ratio": -1.815756331779994e-05, + "logits/chosen": -0.25531166791915894, + "logits/rejected": -0.24413050711154938, + "logps/chosen": -0.00016972131561487913, + "logps/rejected": -2.17885422706604, + "loss": 0.7363, + "nll_loss": 0.18407143652439117, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6972131561487913e-05, + "rewards/margins": 0.21786844730377197, + "rewards/rejected": -0.21788541972637177, + "step": 7963 + }, + { + "epoch": 5.507607192254495, + "grad_norm": 11.521442413330078, + "learning_rate": 2.495773782080836e-05, + "log_odds_chosen": 9.831197738647461, + "log_odds_ratio": -0.00015496321429964155, + "logits/chosen": -0.6120339035987854, + "logits/rejected": -0.727606475353241, + "logps/chosen": -0.0003907257050741464, + "logps/rejected": -1.8259804248809814, + "loss": 0.8695, + "nll_loss": 0.21735124289989471, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.907257269020192e-05, + "rewards/margins": 0.18255898356437683, + "rewards/rejected": -0.1825980544090271, + "step": 7964 + }, + { + "epoch": 5.508298755186722, + "grad_norm": 8.496864318847656, + "learning_rate": 2.4953895804518214e-05, + "log_odds_chosen": 11.090476036071777, + "log_odds_ratio": -0.00013023380597587675, + "logits/chosen": -0.1817966103553772, + "logits/rejected": -0.22730708122253418, + "logps/chosen": -0.00041753993718884885, + "logps/rejected": -2.8603012561798096, + "loss": 0.7038, + "nll_loss": 0.1759316325187683, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.175399590167217e-05, + "rewards/margins": 0.28598839044570923, + "rewards/rejected": -0.2860301434993744, + "step": 7965 + }, + { + "epoch": 5.508990318118949, + "grad_norm": 5.603028297424316, + "learning_rate": 2.4950053788228066e-05, + "log_odds_chosen": 10.303237915039062, + "log_odds_ratio": -0.0003845428582280874, + "logits/chosen": -0.7924565672874451, + "logits/rejected": -0.8252934813499451, + "logps/chosen": -0.000411234941566363, + "logps/rejected": -1.761689305305481, + "loss": 0.5459, + "nll_loss": 0.1364300549030304, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.11234941566363e-05, + "rewards/margins": 0.1761278212070465, + "rewards/rejected": -0.17616893351078033, + "step": 7966 + }, + { + "epoch": 5.509681881051176, + "grad_norm": 8.586468696594238, + "learning_rate": 2.4946211771937915e-05, + "log_odds_chosen": 10.925909042358398, + "log_odds_ratio": -9.190478158416227e-05, + "logits/chosen": -0.11336120218038559, + "logits/rejected": -0.14280270040035248, + "logps/chosen": -0.00026982746203429997, + "logps/rejected": -2.5007822513580322, + "loss": 0.9702, + "nll_loss": 0.24254389107227325, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6982745112036355e-05, + "rewards/margins": 0.25005125999450684, + "rewards/rejected": -0.2500782310962677, + "step": 7967 + }, + { + "epoch": 5.5103734439834025, + "grad_norm": 5.238374710083008, + "learning_rate": 2.4942369755647764e-05, + "log_odds_chosen": 10.48822021484375, + "log_odds_ratio": -9.922584285959601e-05, + "logits/chosen": -0.5154739618301392, + "logits/rejected": -0.5209592580795288, + "logps/chosen": -0.00020251476962585002, + "logps/rejected": -2.038828134536743, + "loss": 0.8264, + "nll_loss": 0.2065943479537964, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.025147659878712e-05, + "rewards/margins": 0.20386257767677307, + "rewards/rejected": -0.20388279855251312, + "step": 7968 + }, + { + "epoch": 5.511065006915629, + "grad_norm": 7.684095859527588, + "learning_rate": 2.4938527739357617e-05, + "log_odds_chosen": 9.44205093383789, + "log_odds_ratio": -0.0006889720680192113, + "logits/chosen": -0.6913365721702576, + "logits/rejected": -0.6879432797431946, + "logps/chosen": -0.0010971655137836933, + "logps/rejected": -1.7808010578155518, + "loss": 1.0883, + "nll_loss": 0.2720167338848114, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010971655137836933, + "rewards/margins": 0.17797040939331055, + "rewards/rejected": -0.17808012664318085, + "step": 7969 + }, + { + "epoch": 5.511756569847856, + "grad_norm": 12.717248916625977, + "learning_rate": 2.4934685723067466e-05, + "log_odds_chosen": 9.68883991241455, + "log_odds_ratio": -0.00018611655104905367, + "logits/chosen": -0.4414333701133728, + "logits/rejected": -0.5569271445274353, + "logps/chosen": -0.00047959090443328023, + "logps/rejected": -1.9319571256637573, + "loss": 0.7711, + "nll_loss": 0.19275924563407898, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.795909262611531e-05, + "rewards/margins": 0.19314777851104736, + "rewards/rejected": -0.19319573044776917, + "step": 7970 + }, + { + "epoch": 5.512448132780083, + "grad_norm": 6.287275314331055, + "learning_rate": 2.4930843706777318e-05, + "log_odds_chosen": 10.674774169921875, + "log_odds_ratio": -9.911719826050103e-05, + "logits/chosen": -0.6898698806762695, + "logits/rejected": -0.5760419368743896, + "logps/chosen": -0.0015942247118800879, + "logps/rejected": -2.917684555053711, + "loss": 0.6086, + "nll_loss": 0.15215209126472473, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015942247409839183, + "rewards/margins": 0.2916090488433838, + "rewards/rejected": -0.29176849126815796, + "step": 7971 + }, + { + "epoch": 5.51313969571231, + "grad_norm": 8.057446479797363, + "learning_rate": 2.492700169048717e-05, + "log_odds_chosen": 9.769332885742188, + "log_odds_ratio": -0.002421770943328738, + "logits/chosen": -0.9956812858581543, + "logits/rejected": -0.9617546796798706, + "logps/chosen": -0.0010418831370770931, + "logps/rejected": -1.563226580619812, + "loss": 0.8939, + "nll_loss": 0.22323346138000488, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010418832243885845, + "rewards/margins": 0.15621846914291382, + "rewards/rejected": -0.1563226580619812, + "step": 7972 + }, + { + "epoch": 5.513831258644537, + "grad_norm": 7.7207512855529785, + "learning_rate": 2.492315967419702e-05, + "log_odds_chosen": 9.700448989868164, + "log_odds_ratio": -0.001893568434752524, + "logits/chosen": -0.9668991565704346, + "logits/rejected": -0.9769569635391235, + "logps/chosen": -0.001259871176443994, + "logps/rejected": -1.7321535348892212, + "loss": 1.0863, + "nll_loss": 0.2713871896266937, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012598712055478245, + "rewards/margins": 0.1730893850326538, + "rewards/rejected": -0.1732153594493866, + "step": 7973 + }, + { + "epoch": 5.514522821576763, + "grad_norm": 7.019569396972656, + "learning_rate": 2.4919317657906872e-05, + "log_odds_chosen": 9.588005065917969, + "log_odds_ratio": -0.00033628003438934684, + "logits/chosen": -0.6726071834564209, + "logits/rejected": -0.6700811982154846, + "logps/chosen": -0.0003584343066904694, + "logps/rejected": -1.4035160541534424, + "loss": 0.9721, + "nll_loss": 0.24299171566963196, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.584342994145118e-05, + "rewards/margins": 0.14031577110290527, + "rewards/rejected": -0.14035160839557648, + "step": 7974 + }, + { + "epoch": 5.51521438450899, + "grad_norm": 8.323095321655273, + "learning_rate": 2.4915475641616724e-05, + "log_odds_chosen": 10.26290512084961, + "log_odds_ratio": -0.00018353183986619115, + "logits/chosen": -0.5947859287261963, + "logits/rejected": -0.6203582286834717, + "logps/chosen": -0.0007037975010462105, + "logps/rejected": -1.9799339771270752, + "loss": 0.6118, + "nll_loss": 0.15292108058929443, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.037974864942953e-05, + "rewards/margins": 0.19792300462722778, + "rewards/rejected": -0.19799339771270752, + "step": 7975 + }, + { + "epoch": 5.515905947441217, + "grad_norm": 6.35712194442749, + "learning_rate": 2.4911633625326574e-05, + "log_odds_chosen": 10.924556732177734, + "log_odds_ratio": -3.232357630622573e-05, + "logits/chosen": -0.9577485918998718, + "logits/rejected": -0.9427499175071716, + "logps/chosen": -0.00012175530719105154, + "logps/rejected": -1.7347506284713745, + "loss": 0.6837, + "nll_loss": 0.17091642320156097, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2175531082903035e-05, + "rewards/margins": 0.1734628826379776, + "rewards/rejected": -0.17347505688667297, + "step": 7976 + }, + { + "epoch": 5.516597510373444, + "grad_norm": 10.328906059265137, + "learning_rate": 2.4907791609036423e-05, + "log_odds_chosen": 10.467013359069824, + "log_odds_ratio": -3.795954035012983e-05, + "logits/chosen": -0.6780118942260742, + "logits/rejected": -0.7425776124000549, + "logps/chosen": -0.00019371267990209162, + "logps/rejected": -1.8615496158599854, + "loss": 0.582, + "nll_loss": 0.14549127221107483, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.937126762641128e-05, + "rewards/margins": 0.18613559007644653, + "rewards/rejected": -0.18615497648715973, + "step": 7977 + }, + { + "epoch": 5.517289073305671, + "grad_norm": 10.159599304199219, + "learning_rate": 2.4903949592746275e-05, + "log_odds_chosen": 10.206741333007812, + "log_odds_ratio": -0.00017410822329111397, + "logits/chosen": -0.7243431210517883, + "logits/rejected": -0.6271113157272339, + "logps/chosen": -0.00040067086229100823, + "logps/rejected": -2.076680898666382, + "loss": 0.9364, + "nll_loss": 0.23409080505371094, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.006709059467539e-05, + "rewards/margins": 0.2076280117034912, + "rewards/rejected": -0.20766809582710266, + "step": 7978 + }, + { + "epoch": 5.517980636237898, + "grad_norm": 9.997716903686523, + "learning_rate": 2.4900107576456124e-05, + "log_odds_chosen": 10.515052795410156, + "log_odds_ratio": -6.538545130752027e-05, + "logits/chosen": -0.13770362734794617, + "logits/rejected": -0.2249658703804016, + "logps/chosen": -0.000640476297121495, + "logps/rejected": -2.286221981048584, + "loss": 0.9074, + "nll_loss": 0.22684696316719055, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.404763553291559e-05, + "rewards/margins": 0.22855816781520844, + "rewards/rejected": -0.2286222130060196, + "step": 7979 + }, + { + "epoch": 5.518672199170124, + "grad_norm": 4.5267333984375, + "learning_rate": 2.4896265560165977e-05, + "log_odds_chosen": 10.762895584106445, + "log_odds_ratio": -5.7566088798921555e-05, + "logits/chosen": -0.5054532885551453, + "logits/rejected": -0.5565618872642517, + "logps/chosen": -0.00048600853187963367, + "logps/rejected": -2.163853168487549, + "loss": 0.473, + "nll_loss": 0.11825583875179291, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8600857553537935e-05, + "rewards/margins": 0.2163367122411728, + "rewards/rejected": -0.21638531982898712, + "step": 7980 + }, + { + "epoch": 5.519363762102351, + "grad_norm": 6.196623802185059, + "learning_rate": 2.489242354387583e-05, + "log_odds_chosen": 9.516175270080566, + "log_odds_ratio": -0.000546960742212832, + "logits/chosen": -0.4045857787132263, + "logits/rejected": -0.4056215286254883, + "logps/chosen": -0.0007767517236061394, + "logps/rejected": -1.6184048652648926, + "loss": 0.5242, + "nll_loss": 0.1309964507818222, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.767517672618851e-05, + "rewards/margins": 0.16176281869411469, + "rewards/rejected": -0.1618404984474182, + "step": 7981 + }, + { + "epoch": 5.520055325034578, + "grad_norm": 10.751822471618652, + "learning_rate": 2.4888581527585678e-05, + "log_odds_chosen": 8.485633850097656, + "log_odds_ratio": -0.002912183292210102, + "logits/chosen": -0.6579504013061523, + "logits/rejected": -0.7018341422080994, + "logps/chosen": -0.0013555031036958098, + "logps/rejected": -1.6597306728363037, + "loss": 0.8041, + "nll_loss": 0.20074039697647095, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001355503045488149, + "rewards/margins": 0.16583752632141113, + "rewards/rejected": -0.16597306728363037, + "step": 7982 + }, + { + "epoch": 5.520746887966805, + "grad_norm": 7.2890238761901855, + "learning_rate": 2.488473951129553e-05, + "log_odds_chosen": 10.734992980957031, + "log_odds_ratio": -0.00011404085671529174, + "logits/chosen": -0.47404229640960693, + "logits/rejected": -0.5771169662475586, + "logps/chosen": -0.00018186001398134977, + "logps/rejected": -1.957323431968689, + "loss": 0.787, + "nll_loss": 0.1967388391494751, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8186001398134977e-05, + "rewards/margins": 0.19571414589881897, + "rewards/rejected": -0.19573235511779785, + "step": 7983 + }, + { + "epoch": 5.521438450899032, + "grad_norm": 10.685153007507324, + "learning_rate": 2.4880897495005383e-05, + "log_odds_chosen": 11.264336585998535, + "log_odds_ratio": -2.648890222189948e-05, + "logits/chosen": -0.4374861419200897, + "logits/rejected": -0.4763266444206238, + "logps/chosen": -0.00012154224532423541, + "logps/rejected": -2.124026298522949, + "loss": 1.0, + "nll_loss": 0.25000786781311035, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.21542243505246e-05, + "rewards/margins": 0.2123904675245285, + "rewards/rejected": -0.21240262687206268, + "step": 7984 + }, + { + "epoch": 5.522130013831259, + "grad_norm": 7.698362827301025, + "learning_rate": 2.4877055478715232e-05, + "log_odds_chosen": 9.903738021850586, + "log_odds_ratio": -0.00037517695454880595, + "logits/chosen": -0.3922664523124695, + "logits/rejected": -0.4175853729248047, + "logps/chosen": -0.0004097001510672271, + "logps/rejected": -1.7772531509399414, + "loss": 1.0402, + "nll_loss": 0.26001253724098206, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.097001510672271e-05, + "rewards/margins": 0.17768435180187225, + "rewards/rejected": -0.17772531509399414, + "step": 7985 + }, + { + "epoch": 5.522821576763485, + "grad_norm": 7.038385391235352, + "learning_rate": 2.487321346242508e-05, + "log_odds_chosen": 10.166360855102539, + "log_odds_ratio": -0.0013821757165715098, + "logits/chosen": -0.6075230240821838, + "logits/rejected": -0.6644953489303589, + "logps/chosen": -0.0011727921664714813, + "logps/rejected": -1.869389533996582, + "loss": 0.8812, + "nll_loss": 0.22015592455863953, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011727922537829727, + "rewards/margins": 0.18682168424129486, + "rewards/rejected": -0.18693895637989044, + "step": 7986 + }, + { + "epoch": 5.523513139695712, + "grad_norm": 23.00648307800293, + "learning_rate": 2.486937144613493e-05, + "log_odds_chosen": 11.663230895996094, + "log_odds_ratio": -3.127233503619209e-05, + "logits/chosen": -0.5745376944541931, + "logits/rejected": -0.5894800424575806, + "logps/chosen": -0.00014439536607824266, + "logps/rejected": -2.702629327774048, + "loss": 0.8924, + "nll_loss": 0.22308455407619476, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4439538063015789e-05, + "rewards/margins": 0.27024850249290466, + "rewards/rejected": -0.2702629566192627, + "step": 7987 + }, + { + "epoch": 5.524204702627939, + "grad_norm": 9.516694068908691, + "learning_rate": 2.4865529429844783e-05, + "log_odds_chosen": 10.172452926635742, + "log_odds_ratio": -0.00201642164029181, + "logits/chosen": -0.44605493545532227, + "logits/rejected": -0.4708332419395447, + "logps/chosen": -0.000771133229136467, + "logps/rejected": -2.4048895835876465, + "loss": 0.9287, + "nll_loss": 0.231984481215477, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.711332000326365e-05, + "rewards/margins": 0.24041184782981873, + "rewards/rejected": -0.24048897624015808, + "step": 7988 + }, + { + "epoch": 5.524896265560166, + "grad_norm": 6.859710216522217, + "learning_rate": 2.4861687413554635e-05, + "log_odds_chosen": 10.065300941467285, + "log_odds_ratio": -0.0002263882925035432, + "logits/chosen": -0.42298412322998047, + "logits/rejected": -0.4986242651939392, + "logps/chosen": -0.0020351733546704054, + "logps/rejected": -2.262023448944092, + "loss": 0.7278, + "nll_loss": 0.18191887438297272, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00020351732382550836, + "rewards/margins": 0.22599883377552032, + "rewards/rejected": -0.2262023538351059, + "step": 7989 + }, + { + "epoch": 5.525587828492393, + "grad_norm": 6.190978050231934, + "learning_rate": 2.4857845397264484e-05, + "log_odds_chosen": 10.284591674804688, + "log_odds_ratio": -8.91397285158746e-05, + "logits/chosen": -0.26648449897766113, + "logits/rejected": -0.4036068320274353, + "logps/chosen": -0.00047370928223244846, + "logps/rejected": -2.147756338119507, + "loss": 0.7173, + "nll_loss": 0.17931276559829712, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.737092967843637e-05, + "rewards/margins": 0.2147282510995865, + "rewards/rejected": -0.21477562189102173, + "step": 7990 + }, + { + "epoch": 5.5262793914246195, + "grad_norm": 10.940460205078125, + "learning_rate": 2.4854003380974336e-05, + "log_odds_chosen": 10.07880973815918, + "log_odds_ratio": -0.00037312853964976966, + "logits/chosen": -0.18514373898506165, + "logits/rejected": -0.27846235036849976, + "logps/chosen": -0.0007444091606885195, + "logps/rejected": -1.682682991027832, + "loss": 1.0049, + "nll_loss": 0.2511962652206421, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.444091170327738e-05, + "rewards/margins": 0.16819386184215546, + "rewards/rejected": -0.16826829314231873, + "step": 7991 + }, + { + "epoch": 5.526970954356846, + "grad_norm": 5.758509635925293, + "learning_rate": 2.485016136468419e-05, + "log_odds_chosen": 9.71127700805664, + "log_odds_ratio": -0.0002266637166030705, + "logits/chosen": -0.5368736386299133, + "logits/rejected": -0.5184175968170166, + "logps/chosen": -0.0008136904216371477, + "logps/rejected": -2.2877635955810547, + "loss": 0.7681, + "nll_loss": 0.1920141577720642, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.136904216371477e-05, + "rewards/margins": 0.22869496047496796, + "rewards/rejected": -0.22877633571624756, + "step": 7992 + }, + { + "epoch": 5.527662517289073, + "grad_norm": 6.862176418304443, + "learning_rate": 2.4846319348394038e-05, + "log_odds_chosen": 10.76097583770752, + "log_odds_ratio": -4.926729525323026e-05, + "logits/chosen": -0.7725235223770142, + "logits/rejected": -0.7454330325126648, + "logps/chosen": -0.00017461538664065301, + "logps/rejected": -2.0369513034820557, + "loss": 0.5055, + "nll_loss": 0.12636780738830566, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7461539755458944e-05, + "rewards/margins": 0.20367765426635742, + "rewards/rejected": -0.2036951184272766, + "step": 7993 + }, + { + "epoch": 5.5283540802213, + "grad_norm": 5.5887556076049805, + "learning_rate": 2.484247733210389e-05, + "log_odds_chosen": 9.259298324584961, + "log_odds_ratio": -0.0001776816789060831, + "logits/chosen": -0.32677367329597473, + "logits/rejected": -0.3950062096118927, + "logps/chosen": -0.00048437563236802816, + "logps/rejected": -1.6748768091201782, + "loss": 0.6943, + "nll_loss": 0.17354771494865417, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8437563236802816e-05, + "rewards/margins": 0.16743925213813782, + "rewards/rejected": -0.16748769581317902, + "step": 7994 + }, + { + "epoch": 5.529045643153527, + "grad_norm": 7.050952911376953, + "learning_rate": 2.483863531581374e-05, + "log_odds_chosen": 10.473284721374512, + "log_odds_ratio": -0.00017908669542521238, + "logits/chosen": -0.49899330735206604, + "logits/rejected": -0.49205830693244934, + "logps/chosen": -0.00046343091526068747, + "logps/rejected": -1.8278075456619263, + "loss": 0.5457, + "nll_loss": 0.13640612363815308, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.634309152606875e-05, + "rewards/margins": 0.182734414935112, + "rewards/rejected": -0.18278075754642487, + "step": 7995 + }, + { + "epoch": 5.529737206085754, + "grad_norm": 9.834431648254395, + "learning_rate": 2.483479329952359e-05, + "log_odds_chosen": 10.868444442749023, + "log_odds_ratio": -5.240093742031604e-05, + "logits/chosen": -0.4045601189136505, + "logits/rejected": -0.42231157422065735, + "logps/chosen": -0.0003390312194824219, + "logps/rejected": -2.556269407272339, + "loss": 0.4876, + "nll_loss": 0.1218939870595932, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3903121220646426e-05, + "rewards/margins": 0.25559306144714355, + "rewards/rejected": -0.25562694668769836, + "step": 7996 + }, + { + "epoch": 5.5304287690179805, + "grad_norm": 7.154923439025879, + "learning_rate": 2.483095128323344e-05, + "log_odds_chosen": 9.480171203613281, + "log_odds_ratio": -0.0006049070507287979, + "logits/chosen": -0.34929412603378296, + "logits/rejected": -0.3010343015193939, + "logps/chosen": -0.0012120280880481005, + "logps/rejected": -2.1136090755462646, + "loss": 0.9824, + "nll_loss": 0.24554985761642456, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012120280007366091, + "rewards/margins": 0.21123971045017242, + "rewards/rejected": -0.211360901594162, + "step": 7997 + }, + { + "epoch": 5.531120331950207, + "grad_norm": 7.135680675506592, + "learning_rate": 2.4827109266943293e-05, + "log_odds_chosen": 9.703398704528809, + "log_odds_ratio": -0.0005645658238790929, + "logits/chosen": -0.6417530179023743, + "logits/rejected": -0.698527455329895, + "logps/chosen": -0.0014950187178328633, + "logps/rejected": -2.188197135925293, + "loss": 1.2605, + "nll_loss": 0.3150603771209717, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001495018950663507, + "rewards/margins": 0.21867021918296814, + "rewards/rejected": -0.21881970763206482, + "step": 7998 + }, + { + "epoch": 5.531811894882434, + "grad_norm": 6.10271692276001, + "learning_rate": 2.4823267250653142e-05, + "log_odds_chosen": 9.906564712524414, + "log_odds_ratio": -0.00015931145753711462, + "logits/chosen": -0.23830081522464752, + "logits/rejected": -0.39299190044403076, + "logps/chosen": -0.00039393879706040025, + "logps/rejected": -1.9749970436096191, + "loss": 1.171, + "nll_loss": 0.2927406430244446, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.939388625440188e-05, + "rewards/margins": 0.1974603235721588, + "rewards/rejected": -0.19749972224235535, + "step": 7999 + }, + { + "epoch": 5.532503457814661, + "grad_norm": 5.048730850219727, + "learning_rate": 2.4819425234362995e-05, + "log_odds_chosen": 11.018048286437988, + "log_odds_ratio": -2.6650952349882573e-05, + "logits/chosen": -0.6066503524780273, + "logits/rejected": -0.6664042472839355, + "logps/chosen": -7.129425648599863e-05, + "logps/rejected": -1.5724921226501465, + "loss": 0.7817, + "nll_loss": 0.19543345272541046, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.129426194296684e-06, + "rewards/margins": 0.157242089509964, + "rewards/rejected": -0.15724921226501465, + "step": 8000 + }, + { + "epoch": 5.533195020746888, + "grad_norm": 10.095208168029785, + "learning_rate": 2.4815583218072847e-05, + "log_odds_chosen": 11.383611679077148, + "log_odds_ratio": -2.3711505491519347e-05, + "logits/chosen": -0.2679492235183716, + "logits/rejected": -0.3012941777706146, + "logps/chosen": -0.0003241963859181851, + "logps/rejected": -3.117649793624878, + "loss": 0.6962, + "nll_loss": 0.17404018342494965, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.241963713662699e-05, + "rewards/margins": 0.31173259019851685, + "rewards/rejected": -0.31176501512527466, + "step": 8001 + }, + { + "epoch": 5.533886583679115, + "grad_norm": 7.097454071044922, + "learning_rate": 2.4811741201782696e-05, + "log_odds_chosen": 10.319755554199219, + "log_odds_ratio": -0.00022292044013738632, + "logits/chosen": -0.2506617605686188, + "logits/rejected": -0.3690025210380554, + "logps/chosen": -0.0026361849159002304, + "logps/rejected": -2.5132508277893066, + "loss": 0.9051, + "nll_loss": 0.22624385356903076, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00026361847994849086, + "rewards/margins": 0.25106143951416016, + "rewards/rejected": -0.2513250708580017, + "step": 8002 + }, + { + "epoch": 5.5345781466113415, + "grad_norm": 9.05012035369873, + "learning_rate": 2.480789918549255e-05, + "log_odds_chosen": 11.596423149108887, + "log_odds_ratio": -2.853182377293706e-05, + "logits/chosen": -0.5594992637634277, + "logits/rejected": -0.7029032707214355, + "logps/chosen": -0.00015006544708739966, + "logps/rejected": -2.5829501152038574, + "loss": 0.8926, + "nll_loss": 0.22315490245819092, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5006546163931489e-05, + "rewards/margins": 0.25828003883361816, + "rewards/rejected": -0.2582949995994568, + "step": 8003 + }, + { + "epoch": 5.535269709543568, + "grad_norm": 13.613221168518066, + "learning_rate": 2.4804057169202398e-05, + "log_odds_chosen": 11.267099380493164, + "log_odds_ratio": -2.884993955376558e-05, + "logits/chosen": -0.017788421362638474, + "logits/rejected": -0.1763039231300354, + "logps/chosen": -0.0003624186501838267, + "logps/rejected": -2.790835380554199, + "loss": 0.8132, + "nll_loss": 0.20329706370830536, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6241868656361476e-05, + "rewards/margins": 0.27904731035232544, + "rewards/rejected": -0.2790835499763489, + "step": 8004 + }, + { + "epoch": 5.535961272475795, + "grad_norm": 7.166762351989746, + "learning_rate": 2.4800215152912247e-05, + "log_odds_chosen": 9.902912139892578, + "log_odds_ratio": -0.0005022470140829682, + "logits/chosen": -0.49830174446105957, + "logits/rejected": -0.5488074421882629, + "logps/chosen": -0.00040694093331694603, + "logps/rejected": -1.8442189693450928, + "loss": 0.5014, + "nll_loss": 0.12530532479286194, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.069409624207765e-05, + "rewards/margins": 0.18438121676445007, + "rewards/rejected": -0.18442192673683167, + "step": 8005 + }, + { + "epoch": 5.536652835408022, + "grad_norm": 7.92393684387207, + "learning_rate": 2.47963731366221e-05, + "log_odds_chosen": 11.132036209106445, + "log_odds_ratio": -2.9587336030090228e-05, + "logits/chosen": 0.26792770624160767, + "logits/rejected": 0.16624970734119415, + "logps/chosen": -0.0002462788834236562, + "logps/rejected": -2.610889196395874, + "loss": 0.7446, + "nll_loss": 0.18615420162677765, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4627888706163503e-05, + "rewards/margins": 0.2610642910003662, + "rewards/rejected": -0.26108893752098083, + "step": 8006 + }, + { + "epoch": 5.537344398340249, + "grad_norm": 12.280511856079102, + "learning_rate": 2.4792531120331952e-05, + "log_odds_chosen": 10.223596572875977, + "log_odds_ratio": -4.838874883716926e-05, + "logits/chosen": -0.27258747816085815, + "logits/rejected": -0.316699743270874, + "logps/chosen": -0.00025811471277847886, + "logps/rejected": -1.9246526956558228, + "loss": 0.743, + "nll_loss": 0.18575647473335266, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5811470550252125e-05, + "rewards/margins": 0.19243945181369781, + "rewards/rejected": -0.19246527552604675, + "step": 8007 + }, + { + "epoch": 5.538035961272476, + "grad_norm": 7.187430381774902, + "learning_rate": 2.47886891040418e-05, + "log_odds_chosen": 10.459199905395508, + "log_odds_ratio": -0.0001508681889390573, + "logits/chosen": -0.23558424413204193, + "logits/rejected": -0.32377859950065613, + "logps/chosen": -0.00045977221452631056, + "logps/rejected": -1.9837918281555176, + "loss": 0.6488, + "nll_loss": 0.16219037771224976, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.597722363541834e-05, + "rewards/margins": 0.19833320379257202, + "rewards/rejected": -0.19837918877601624, + "step": 8008 + }, + { + "epoch": 5.5387275242047025, + "grad_norm": 8.177022933959961, + "learning_rate": 2.4784847087751653e-05, + "log_odds_chosen": 9.602645874023438, + "log_odds_ratio": -0.00019704359874594957, + "logits/chosen": -0.11307889968156815, + "logits/rejected": -0.11641175299882889, + "logps/chosen": -0.0002687516971491277, + "logps/rejected": -1.5583631992340088, + "loss": 1.0271, + "nll_loss": 0.2567659020423889, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6875170078710653e-05, + "rewards/margins": 0.1558094471693039, + "rewards/rejected": -0.1558363288640976, + "step": 8009 + }, + { + "epoch": 5.539419087136929, + "grad_norm": 6.553852558135986, + "learning_rate": 2.4781005071461506e-05, + "log_odds_chosen": 9.845769882202148, + "log_odds_ratio": -0.01491259504109621, + "logits/chosen": -0.4053501486778259, + "logits/rejected": -0.48310720920562744, + "logps/chosen": -0.08075077831745148, + "logps/rejected": -1.9933180809020996, + "loss": 0.6495, + "nll_loss": 0.1608942449092865, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008075077086687088, + "rewards/margins": 0.19125673174858093, + "rewards/rejected": -0.19933182001113892, + "step": 8010 + }, + { + "epoch": 5.540110650069156, + "grad_norm": 4.823723793029785, + "learning_rate": 2.4777163055171355e-05, + "log_odds_chosen": 9.852107048034668, + "log_odds_ratio": -0.00047949200961738825, + "logits/chosen": -0.5386756658554077, + "logits/rejected": -0.5551636815071106, + "logps/chosen": -0.00034617114579305053, + "logps/rejected": -1.7934048175811768, + "loss": 0.6276, + "nll_loss": 0.1568397879600525, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4617110941326246e-05, + "rewards/margins": 0.17930585145950317, + "rewards/rejected": -0.17934048175811768, + "step": 8011 + }, + { + "epoch": 5.540802213001383, + "grad_norm": 6.76782751083374, + "learning_rate": 2.4773321038881207e-05, + "log_odds_chosen": 11.475250244140625, + "log_odds_ratio": -2.2427797375712544e-05, + "logits/chosen": -0.5119101405143738, + "logits/rejected": -0.5618435740470886, + "logps/chosen": -0.0002971701032947749, + "logps/rejected": -2.7157227993011475, + "loss": 0.7586, + "nll_loss": 0.18965360522270203, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9717009965679608e-05, + "rewards/margins": 0.27154257893562317, + "rewards/rejected": -0.2715722918510437, + "step": 8012 + }, + { + "epoch": 5.54149377593361, + "grad_norm": 6.336251258850098, + "learning_rate": 2.4769479022591056e-05, + "log_odds_chosen": 10.323335647583008, + "log_odds_ratio": -8.404036634601653e-05, + "logits/chosen": -0.17968213558197021, + "logits/rejected": -0.1371138095855713, + "logps/chosen": -0.00026224643806926906, + "logps/rejected": -2.03709077835083, + "loss": 0.914, + "nll_loss": 0.22849002480506897, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6224643079331145e-05, + "rewards/margins": 0.20368286967277527, + "rewards/rejected": -0.20370909571647644, + "step": 8013 + }, + { + "epoch": 5.542185338865837, + "grad_norm": 5.572230815887451, + "learning_rate": 2.4765637006300905e-05, + "log_odds_chosen": 10.898839950561523, + "log_odds_ratio": -0.00018718911451287568, + "logits/chosen": -0.6403128504753113, + "logits/rejected": -0.6781945824623108, + "logps/chosen": -0.0006806739838793874, + "logps/rejected": -1.9784750938415527, + "loss": 0.5278, + "nll_loss": 0.13193657994270325, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.806739838793874e-05, + "rewards/margins": 0.19777946174144745, + "rewards/rejected": -0.19784751534461975, + "step": 8014 + }, + { + "epoch": 5.5428769017980635, + "grad_norm": 7.013886451721191, + "learning_rate": 2.4761794990010758e-05, + "log_odds_chosen": 10.597729682922363, + "log_odds_ratio": -4.4307264033704996e-05, + "logits/chosen": -0.4737222194671631, + "logits/rejected": -0.6006163358688354, + "logps/chosen": -0.0001434629812138155, + "logps/rejected": -1.7592902183532715, + "loss": 0.7723, + "nll_loss": 0.1930730640888214, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4346298485179432e-05, + "rewards/margins": 0.1759146749973297, + "rewards/rejected": -0.1759290248155594, + "step": 8015 + }, + { + "epoch": 5.54356846473029, + "grad_norm": 9.871222496032715, + "learning_rate": 2.475795297372061e-05, + "log_odds_chosen": 10.386920928955078, + "log_odds_ratio": -7.108970748959109e-05, + "logits/chosen": -0.12642526626586914, + "logits/rejected": -0.1811293661594391, + "logps/chosen": -0.00018795863434206694, + "logps/rejected": -2.037794589996338, + "loss": 1.0485, + "nll_loss": 0.2621132433414459, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8795864889398217e-05, + "rewards/margins": 0.20376065373420715, + "rewards/rejected": -0.2037794589996338, + "step": 8016 + }, + { + "epoch": 5.544260027662517, + "grad_norm": 8.90102767944336, + "learning_rate": 2.475411095743046e-05, + "log_odds_chosen": 10.551826477050781, + "log_odds_ratio": -0.0010956140467897058, + "logits/chosen": -0.4647817611694336, + "logits/rejected": -0.46479907631874084, + "logps/chosen": -0.0002952416252810508, + "logps/rejected": -2.0322060585021973, + "loss": 0.983, + "nll_loss": 0.24563650786876678, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.952416252810508e-05, + "rewards/margins": 0.20319105684757233, + "rewards/rejected": -0.20322057604789734, + "step": 8017 + }, + { + "epoch": 5.544951590594744, + "grad_norm": 10.191629409790039, + "learning_rate": 2.4750268941140312e-05, + "log_odds_chosen": 10.009177207946777, + "log_odds_ratio": -0.00033783228718675673, + "logits/chosen": -0.4004913866519928, + "logits/rejected": -0.42859768867492676, + "logps/chosen": -0.00042483455035835505, + "logps/rejected": -1.8237648010253906, + "loss": 0.5796, + "nll_loss": 0.14487770199775696, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.248345430823974e-05, + "rewards/margins": 0.18233400583267212, + "rewards/rejected": -0.18237647414207458, + "step": 8018 + }, + { + "epoch": 5.545643153526971, + "grad_norm": 4.802680492401123, + "learning_rate": 2.4746426924850164e-05, + "log_odds_chosen": 10.210495948791504, + "log_odds_ratio": -0.00011646961502265185, + "logits/chosen": -0.39877617359161377, + "logits/rejected": -0.42844846844673157, + "logps/chosen": -0.0003031216620001942, + "logps/rejected": -1.4773494005203247, + "loss": 0.581, + "nll_loss": 0.14524339139461517, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.031216692761518e-05, + "rewards/margins": 0.14770463109016418, + "rewards/rejected": -0.14773495495319366, + "step": 8019 + }, + { + "epoch": 5.546334716459198, + "grad_norm": 20.15610694885254, + "learning_rate": 2.4742584908560013e-05, + "log_odds_chosen": 11.423358917236328, + "log_odds_ratio": -0.00012067429634043947, + "logits/chosen": -0.6458957195281982, + "logits/rejected": -0.7440388202667236, + "logps/chosen": -0.00017651362577453256, + "logps/rejected": -1.9940600395202637, + "loss": 0.715, + "nll_loss": 0.17874941229820251, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.765136403264478e-05, + "rewards/margins": 0.19938836991786957, + "rewards/rejected": -0.19940602779388428, + "step": 8020 + }, + { + "epoch": 5.5470262793914245, + "grad_norm": 8.5133695602417, + "learning_rate": 2.4738742892269866e-05, + "log_odds_chosen": 10.65363883972168, + "log_odds_ratio": -6.911178934387863e-05, + "logits/chosen": -0.026836829259991646, + "logits/rejected": -0.21755240857601166, + "logps/chosen": -0.000281484768493101, + "logps/rejected": -2.1441502571105957, + "loss": 0.7558, + "nll_loss": 0.18893516063690186, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8148479032097384e-05, + "rewards/margins": 0.21438689529895782, + "rewards/rejected": -0.214415043592453, + "step": 8021 + }, + { + "epoch": 5.547717842323651, + "grad_norm": 6.64901065826416, + "learning_rate": 2.4734900875979715e-05, + "log_odds_chosen": 10.427520751953125, + "log_odds_ratio": -0.00015523642650805414, + "logits/chosen": -0.5124070048332214, + "logits/rejected": -0.7381057143211365, + "logps/chosen": -0.0002842023386619985, + "logps/rejected": -2.0599887371063232, + "loss": 0.7097, + "nll_loss": 0.17740392684936523, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8420234229997732e-05, + "rewards/margins": 0.20597046613693237, + "rewards/rejected": -0.20599888265132904, + "step": 8022 + }, + { + "epoch": 5.548409405255878, + "grad_norm": 15.249797821044922, + "learning_rate": 2.4731058859689564e-05, + "log_odds_chosen": 10.686328887939453, + "log_odds_ratio": -5.325684469426051e-05, + "logits/chosen": -0.36337143182754517, + "logits/rejected": -0.3869422376155853, + "logps/chosen": -0.00022905900550540537, + "logps/rejected": -2.438955545425415, + "loss": 0.997, + "nll_loss": 0.24923908710479736, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2905900550540537e-05, + "rewards/margins": 0.24387264251708984, + "rewards/rejected": -0.2438955456018448, + "step": 8023 + }, + { + "epoch": 5.549100968188105, + "grad_norm": 11.313993453979492, + "learning_rate": 2.4727216843399416e-05, + "log_odds_chosen": 9.633194923400879, + "log_odds_ratio": -0.00018383633869234473, + "logits/chosen": -0.22703269124031067, + "logits/rejected": -0.29582494497299194, + "logps/chosen": -0.00045589538058266044, + "logps/rejected": -1.6704440116882324, + "loss": 0.6371, + "nll_loss": 0.15925177931785583, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5589538785861805e-05, + "rewards/margins": 0.16699880361557007, + "rewards/rejected": -0.16704440116882324, + "step": 8024 + }, + { + "epoch": 5.549792531120332, + "grad_norm": 11.974188804626465, + "learning_rate": 2.472337482710927e-05, + "log_odds_chosen": 10.81893539428711, + "log_odds_ratio": -0.0012266793055459857, + "logits/chosen": -0.4919191598892212, + "logits/rejected": -0.5107102394104004, + "logps/chosen": -0.0006843972951173782, + "logps/rejected": -2.7139744758605957, + "loss": 0.9274, + "nll_loss": 0.23171493411064148, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.843973824288696e-05, + "rewards/margins": 0.27132901549339294, + "rewards/rejected": -0.2713974714279175, + "step": 8025 + }, + { + "epoch": 5.550484094052559, + "grad_norm": 6.510634422302246, + "learning_rate": 2.4719532810819118e-05, + "log_odds_chosen": 10.264190673828125, + "log_odds_ratio": -0.0001600035757292062, + "logits/chosen": -0.5555983781814575, + "logits/rejected": -0.6701858043670654, + "logps/chosen": -0.0001303990138694644, + "logps/rejected": -1.360192894935608, + "loss": 1.1732, + "nll_loss": 0.2932942509651184, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3039902114542201e-05, + "rewards/margins": 0.13600623607635498, + "rewards/rejected": -0.1360192894935608, + "step": 8026 + }, + { + "epoch": 5.551175656984785, + "grad_norm": 13.490435600280762, + "learning_rate": 2.471569079452897e-05, + "log_odds_chosen": 11.536421775817871, + "log_odds_ratio": -2.0468776710913517e-05, + "logits/chosen": -0.7267881631851196, + "logits/rejected": -0.7671307325363159, + "logps/chosen": -0.00020429975120350718, + "logps/rejected": -2.4622702598571777, + "loss": 0.646, + "nll_loss": 0.1615058183670044, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.042997584794648e-05, + "rewards/margins": 0.246206596493721, + "rewards/rejected": -0.24622704088687897, + "step": 8027 + }, + { + "epoch": 5.551867219917012, + "grad_norm": 8.800284385681152, + "learning_rate": 2.4711848778238823e-05, + "log_odds_chosen": 9.855973243713379, + "log_odds_ratio": -0.00014719019236508757, + "logits/chosen": -0.531029224395752, + "logits/rejected": -0.5820093750953674, + "logps/chosen": -0.00027673933072946966, + "logps/rejected": -1.5422172546386719, + "loss": 0.6295, + "nll_loss": 0.1573716104030609, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7673933800542727e-05, + "rewards/margins": 0.15419405698776245, + "rewards/rejected": -0.15422172844409943, + "step": 8028 + }, + { + "epoch": 5.552558782849239, + "grad_norm": 5.500239372253418, + "learning_rate": 2.470800676194867e-05, + "log_odds_chosen": 10.604162216186523, + "log_odds_ratio": -3.668230419862084e-05, + "logits/chosen": -0.3131680488586426, + "logits/rejected": -0.3498471677303314, + "logps/chosen": -0.00015500406152568758, + "logps/rejected": -1.7472177743911743, + "loss": 0.6977, + "nll_loss": 0.17442475259304047, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.55004072439624e-05, + "rewards/margins": 0.17470628023147583, + "rewards/rejected": -0.17472177743911743, + "step": 8029 + }, + { + "epoch": 5.553250345781466, + "grad_norm": 9.993817329406738, + "learning_rate": 2.4704164745658524e-05, + "log_odds_chosen": 11.406761169433594, + "log_odds_ratio": -2.9171018468332477e-05, + "logits/chosen": -0.45108741521835327, + "logits/rejected": -0.48169055581092834, + "logps/chosen": -0.00019415847782511264, + "logps/rejected": -2.8017964363098145, + "loss": 0.8608, + "nll_loss": 0.21519702672958374, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9415847418713383e-05, + "rewards/margins": 0.28016021847724915, + "rewards/rejected": -0.28017961978912354, + "step": 8030 + }, + { + "epoch": 5.553941908713693, + "grad_norm": 6.946387767791748, + "learning_rate": 2.4700322729368373e-05, + "log_odds_chosen": 11.587224960327148, + "log_odds_ratio": -2.163035787816625e-05, + "logits/chosen": -0.49010270833969116, + "logits/rejected": -0.4291403591632843, + "logps/chosen": -0.0002493963693268597, + "logps/rejected": -3.0701684951782227, + "loss": 0.7018, + "nll_loss": 0.1754557192325592, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.493963620509021e-05, + "rewards/margins": 0.30699190497398376, + "rewards/rejected": -0.30701684951782227, + "step": 8031 + }, + { + "epoch": 5.55463347164592, + "grad_norm": 7.238589763641357, + "learning_rate": 2.4696480713078222e-05, + "log_odds_chosen": 9.71479606628418, + "log_odds_ratio": -0.0008989711641333997, + "logits/chosen": -0.19155974686145782, + "logits/rejected": -0.2716420292854309, + "logps/chosen": -0.0017191801453009248, + "logps/rejected": -1.540884017944336, + "loss": 0.6655, + "nll_loss": 0.16629430651664734, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017191801453009248, + "rewards/margins": 0.15391649305820465, + "rewards/rejected": -0.15408840775489807, + "step": 8032 + }, + { + "epoch": 5.555325034578146, + "grad_norm": 9.536120414733887, + "learning_rate": 2.4692638696788075e-05, + "log_odds_chosen": 10.171594619750977, + "log_odds_ratio": -0.0008030205499380827, + "logits/chosen": -0.537517786026001, + "logits/rejected": -0.603646993637085, + "logps/chosen": -0.0011483978014439344, + "logps/rejected": -2.086796998977661, + "loss": 1.2286, + "nll_loss": 0.30706262588500977, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011483977141324431, + "rewards/margins": 0.2085648626089096, + "rewards/rejected": -0.2086797058582306, + "step": 8033 + }, + { + "epoch": 5.556016597510373, + "grad_norm": 8.908292770385742, + "learning_rate": 2.4688796680497927e-05, + "log_odds_chosen": 8.988523483276367, + "log_odds_ratio": -0.00041150639299303293, + "logits/chosen": -0.355111300945282, + "logits/rejected": -0.3641355633735657, + "logps/chosen": -0.0014900579117238522, + "logps/rejected": -1.5867036581039429, + "loss": 0.6441, + "nll_loss": 0.16098034381866455, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014900577662047, + "rewards/margins": 0.1585213541984558, + "rewards/rejected": -0.1586703658103943, + "step": 8034 + }, + { + "epoch": 5.5567081604426, + "grad_norm": 6.447203636169434, + "learning_rate": 2.4684954664207776e-05, + "log_odds_chosen": 10.461267471313477, + "log_odds_ratio": -4.435352821019478e-05, + "logits/chosen": -0.19314274191856384, + "logits/rejected": -0.2793979048728943, + "logps/chosen": -0.0004317377461120486, + "logps/rejected": -2.267042875289917, + "loss": 0.753, + "nll_loss": 0.18823593854904175, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.317377170082182e-05, + "rewards/margins": 0.22666111588478088, + "rewards/rejected": -0.22670429944992065, + "step": 8035 + }, + { + "epoch": 5.557399723374827, + "grad_norm": 9.434931755065918, + "learning_rate": 2.468111264791763e-05, + "log_odds_chosen": 10.66131591796875, + "log_odds_ratio": -0.00020637440320570022, + "logits/chosen": 0.12125441431999207, + "logits/rejected": 0.10425134003162384, + "logps/chosen": -0.0005198074504733086, + "logps/rejected": -2.5221619606018066, + "loss": 0.7512, + "nll_loss": 0.18778873980045319, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.198074723011814e-05, + "rewards/margins": 0.25216418504714966, + "rewards/rejected": -0.2522161900997162, + "step": 8036 + }, + { + "epoch": 5.558091286307054, + "grad_norm": 7.9632768630981445, + "learning_rate": 2.467727063162748e-05, + "log_odds_chosen": 10.371261596679688, + "log_odds_ratio": -0.00016782450256869197, + "logits/chosen": -0.43128275871276855, + "logits/rejected": -0.4339713752269745, + "logps/chosen": -0.00034248243900947273, + "logps/rejected": -1.950977087020874, + "loss": 0.7265, + "nll_loss": 0.1816154569387436, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4248245356138796e-05, + "rewards/margins": 0.19506347179412842, + "rewards/rejected": -0.19509771466255188, + "step": 8037 + }, + { + "epoch": 5.558782849239281, + "grad_norm": 12.128067016601562, + "learning_rate": 2.467342861533733e-05, + "log_odds_chosen": 11.665319442749023, + "log_odds_ratio": -1.4122078027867246e-05, + "logits/chosen": -0.3573598861694336, + "logits/rejected": -0.5253264904022217, + "logps/chosen": -0.00014026931603439152, + "logps/rejected": -2.598935127258301, + "loss": 0.7445, + "nll_loss": 0.18612857162952423, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4026931239641272e-05, + "rewards/margins": 0.2598794996738434, + "rewards/rejected": -0.2598935067653656, + "step": 8038 + }, + { + "epoch": 5.559474412171507, + "grad_norm": 4.508486747741699, + "learning_rate": 2.4669586599047183e-05, + "log_odds_chosen": 10.604143142700195, + "log_odds_ratio": -0.00021092304086778313, + "logits/chosen": -0.2617264688014984, + "logits/rejected": -0.3032621145248413, + "logps/chosen": -0.00018225214444100857, + "logps/rejected": -2.072490692138672, + "loss": 0.8293, + "nll_loss": 0.2073136270046234, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8225213352707215e-05, + "rewards/margins": 0.2072308361530304, + "rewards/rejected": -0.20724907517433167, + "step": 8039 + }, + { + "epoch": 5.560165975103734, + "grad_norm": 7.761422634124756, + "learning_rate": 2.466574458275703e-05, + "log_odds_chosen": 10.363710403442383, + "log_odds_ratio": -0.00037738552782684565, + "logits/chosen": -0.173141211271286, + "logits/rejected": -0.13690513372421265, + "logps/chosen": -0.0004011451092083007, + "logps/rejected": -2.5756120681762695, + "loss": 0.827, + "nll_loss": 0.20670370757579803, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.011451164842583e-05, + "rewards/margins": 0.2575211226940155, + "rewards/rejected": -0.25756123661994934, + "step": 8040 + }, + { + "epoch": 5.560857538035961, + "grad_norm": 8.741007804870605, + "learning_rate": 2.466190256646688e-05, + "log_odds_chosen": 10.194473266601562, + "log_odds_ratio": -0.0012418972328305244, + "logits/chosen": -0.004908490926027298, + "logits/rejected": -0.020960234105587006, + "logps/chosen": -0.0012074820697307587, + "logps/rejected": -2.2647933959960938, + "loss": 1.0754, + "nll_loss": 0.26873573660850525, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012074820551788434, + "rewards/margins": 0.2263585925102234, + "rewards/rejected": -0.22647933661937714, + "step": 8041 + }, + { + "epoch": 5.561549100968188, + "grad_norm": 5.996640682220459, + "learning_rate": 2.4658060550176733e-05, + "log_odds_chosen": 9.534639358520508, + "log_odds_ratio": -0.0032207153271883726, + "logits/chosen": -0.05922443047165871, + "logits/rejected": -0.16529792547225952, + "logps/chosen": -0.0024670644197613, + "logps/rejected": -2.0751233100891113, + "loss": 0.9233, + "nll_loss": 0.23050865530967712, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00024670641869306564, + "rewards/margins": 0.20726561546325684, + "rewards/rejected": -0.20751231908798218, + "step": 8042 + }, + { + "epoch": 5.562240663900415, + "grad_norm": 7.457037448883057, + "learning_rate": 2.4654218533886586e-05, + "log_odds_chosen": 10.855881690979004, + "log_odds_ratio": -3.0194203645805828e-05, + "logits/chosen": -0.41566288471221924, + "logits/rejected": -0.40151041746139526, + "logps/chosen": -0.00016004889039322734, + "logps/rejected": -1.8453633785247803, + "loss": 0.6157, + "nll_loss": 0.15391522645950317, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.600488758413121e-05, + "rewards/margins": 0.18452034890651703, + "rewards/rejected": -0.18453636765480042, + "step": 8043 + }, + { + "epoch": 5.5629322268326415, + "grad_norm": 7.482526779174805, + "learning_rate": 2.4650376517596435e-05, + "log_odds_chosen": 10.949602127075195, + "log_odds_ratio": -3.0145914934109896e-05, + "logits/chosen": -0.474301278591156, + "logits/rejected": -0.4571562111377716, + "logps/chosen": -0.0003064905758947134, + "logps/rejected": -2.0776028633117676, + "loss": 1.0127, + "nll_loss": 0.2531779408454895, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.064906195504591e-05, + "rewards/margins": 0.20772963762283325, + "rewards/rejected": -0.2077602744102478, + "step": 8044 + }, + { + "epoch": 5.563623789764868, + "grad_norm": 8.855326652526855, + "learning_rate": 2.4646534501306287e-05, + "log_odds_chosen": 11.166003227233887, + "log_odds_ratio": -2.6106763471034355e-05, + "logits/chosen": -0.6545218229293823, + "logits/rejected": -0.6752309799194336, + "logps/chosen": -0.00016588937432970852, + "logps/rejected": -2.249631643295288, + "loss": 0.5967, + "nll_loss": 0.1491810381412506, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6588937796768732e-05, + "rewards/margins": 0.22494655847549438, + "rewards/rejected": -0.22496315836906433, + "step": 8045 + }, + { + "epoch": 5.564315352697095, + "grad_norm": 14.56978988647461, + "learning_rate": 2.464269248501614e-05, + "log_odds_chosen": 10.544530868530273, + "log_odds_ratio": -0.000425957259722054, + "logits/chosen": -0.20443707704544067, + "logits/rejected": -0.1254967600107193, + "logps/chosen": -0.00042200577445328236, + "logps/rejected": -2.852787494659424, + "loss": 1.0257, + "nll_loss": 0.2563808560371399, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.220057962811552e-05, + "rewards/margins": 0.2852365970611572, + "rewards/rejected": -0.2852787673473358, + "step": 8046 + }, + { + "epoch": 5.565006915629322, + "grad_norm": 10.46658706665039, + "learning_rate": 2.463885046872599e-05, + "log_odds_chosen": 10.141487121582031, + "log_odds_ratio": -0.0002209717349614948, + "logits/chosen": -0.49858924746513367, + "logits/rejected": -0.6654733419418335, + "logps/chosen": -0.0006808140315115452, + "logps/rejected": -2.273078680038452, + "loss": 0.6522, + "nll_loss": 0.16302204132080078, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.808139733038843e-05, + "rewards/margins": 0.22723978757858276, + "rewards/rejected": -0.22730787098407745, + "step": 8047 + }, + { + "epoch": 5.565698478561549, + "grad_norm": 6.561436653137207, + "learning_rate": 2.463500845243584e-05, + "log_odds_chosen": 10.76079273223877, + "log_odds_ratio": -4.994025221094489e-05, + "logits/chosen": -0.3768499493598938, + "logits/rejected": -0.5194529294967651, + "logps/chosen": -0.00028103572549298406, + "logps/rejected": -2.0566999912261963, + "loss": 0.6791, + "nll_loss": 0.16976627707481384, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8103571821702644e-05, + "rewards/margins": 0.20564191043376923, + "rewards/rejected": -0.20566999912261963, + "step": 8048 + }, + { + "epoch": 5.566390041493776, + "grad_norm": 6.7705488204956055, + "learning_rate": 2.463116643614569e-05, + "log_odds_chosen": 9.857308387756348, + "log_odds_ratio": -0.0005722575588151813, + "logits/chosen": -0.5434191823005676, + "logits/rejected": -0.5727492570877075, + "logps/chosen": -0.0007818934391252697, + "logps/rejected": -1.9093397855758667, + "loss": 0.8424, + "nll_loss": 0.21055403351783752, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.818934682291001e-05, + "rewards/margins": 0.19085580110549927, + "rewards/rejected": -0.1909339725971222, + "step": 8049 + }, + { + "epoch": 5.5670816044260025, + "grad_norm": 9.348870277404785, + "learning_rate": 2.4627324419855543e-05, + "log_odds_chosen": 10.814437866210938, + "log_odds_ratio": -9.064251207746565e-05, + "logits/chosen": -0.3218635320663452, + "logits/rejected": -0.3573150038719177, + "logps/chosen": -0.0009795472724363208, + "logps/rejected": -2.4565610885620117, + "loss": 0.6158, + "nll_loss": 0.15393370389938354, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.795472578844056e-05, + "rewards/margins": 0.24555814266204834, + "rewards/rejected": -0.2456560879945755, + "step": 8050 + }, + { + "epoch": 5.567773167358229, + "grad_norm": 10.63403034210205, + "learning_rate": 2.462348240356539e-05, + "log_odds_chosen": 10.409517288208008, + "log_odds_ratio": -0.0005241333856247365, + "logits/chosen": -0.7085733413696289, + "logits/rejected": -0.8216226100921631, + "logps/chosen": -0.0009803158463910222, + "logps/rejected": -2.1244659423828125, + "loss": 0.9471, + "nll_loss": 0.2367316335439682, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.803157445276156e-05, + "rewards/margins": 0.2123485803604126, + "rewards/rejected": -0.21244660019874573, + "step": 8051 + }, + { + "epoch": 5.568464730290456, + "grad_norm": 10.331066131591797, + "learning_rate": 2.4619640387275244e-05, + "log_odds_chosen": 10.168068885803223, + "log_odds_ratio": -0.0004346623900346458, + "logits/chosen": -0.7611827850341797, + "logits/rejected": -0.6889389157295227, + "logps/chosen": -0.0005256303120404482, + "logps/rejected": -1.8769570589065552, + "loss": 1.1829, + "nll_loss": 0.29568618535995483, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.256303120404482e-05, + "rewards/margins": 0.1876431405544281, + "rewards/rejected": -0.1876957267522812, + "step": 8052 + }, + { + "epoch": 5.569156293222683, + "grad_norm": 6.900964736938477, + "learning_rate": 2.4615798370985093e-05, + "log_odds_chosen": 9.281791687011719, + "log_odds_ratio": -0.007029072381556034, + "logits/chosen": -0.28015413880348206, + "logits/rejected": -0.38101398944854736, + "logps/chosen": -0.0018885629251599312, + "logps/rejected": -1.5742026567459106, + "loss": 0.5586, + "nll_loss": 0.13894297182559967, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001888562983367592, + "rewards/margins": 0.1572314202785492, + "rewards/rejected": -0.15742027759552002, + "step": 8053 + }, + { + "epoch": 5.56984785615491, + "grad_norm": 7.115659713745117, + "learning_rate": 2.4611956354694945e-05, + "log_odds_chosen": 11.122929573059082, + "log_odds_ratio": -5.1810442528221756e-05, + "logits/chosen": -0.014425862580537796, + "logits/rejected": -0.17043359577655792, + "logps/chosen": -0.000720279582310468, + "logps/rejected": -2.988020420074463, + "loss": 0.7804, + "nll_loss": 0.195082426071167, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.202795677585527e-05, + "rewards/margins": 0.2987300157546997, + "rewards/rejected": -0.29880204796791077, + "step": 8054 + }, + { + "epoch": 5.570539419087137, + "grad_norm": 7.839625358581543, + "learning_rate": 2.4608114338404795e-05, + "log_odds_chosen": 8.786584854125977, + "log_odds_ratio": -0.0005181143060326576, + "logits/chosen": 0.19618989527225494, + "logits/rejected": 0.024336382746696472, + "logps/chosen": -0.001151248230598867, + "logps/rejected": -1.499411702156067, + "loss": 0.6895, + "nll_loss": 0.17232772707939148, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011512481432873756, + "rewards/margins": 0.1498260498046875, + "rewards/rejected": -0.14994117617607117, + "step": 8055 + }, + { + "epoch": 5.5712309820193635, + "grad_norm": 6.1973772048950195, + "learning_rate": 2.4604272322114647e-05, + "log_odds_chosen": 11.382923126220703, + "log_odds_ratio": -2.2351225197780877e-05, + "logits/chosen": -0.7432894110679626, + "logits/rejected": -0.838476300239563, + "logps/chosen": -0.00017028761794790626, + "logps/rejected": -2.575474977493286, + "loss": 0.6824, + "nll_loss": 0.17060251533985138, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.702876397757791e-05, + "rewards/margins": 0.25753045082092285, + "rewards/rejected": -0.2575474977493286, + "step": 8056 + }, + { + "epoch": 5.57192254495159, + "grad_norm": 7.494674205780029, + "learning_rate": 2.46004303058245e-05, + "log_odds_chosen": 10.927709579467773, + "log_odds_ratio": -3.457109414739534e-05, + "logits/chosen": -0.8123071193695068, + "logits/rejected": -0.8477652072906494, + "logps/chosen": -0.00015166602679528296, + "logps/rejected": -2.0031211376190186, + "loss": 0.4584, + "nll_loss": 0.11458532512187958, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.51666044985177e-05, + "rewards/margins": 0.20029696822166443, + "rewards/rejected": -0.20031213760375977, + "step": 8057 + }, + { + "epoch": 5.572614107883817, + "grad_norm": 9.052752494812012, + "learning_rate": 2.459658828953435e-05, + "log_odds_chosen": 11.031610488891602, + "log_odds_ratio": -2.8391477826517075e-05, + "logits/chosen": -0.11535578966140747, + "logits/rejected": -0.1948067545890808, + "logps/chosen": -0.00014932632620912045, + "logps/rejected": -2.1253697872161865, + "loss": 0.932, + "nll_loss": 0.23300620913505554, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4932633348507807e-05, + "rewards/margins": 0.21252202987670898, + "rewards/rejected": -0.21253696084022522, + "step": 8058 + }, + { + "epoch": 5.573305670816044, + "grad_norm": 8.160527229309082, + "learning_rate": 2.45927462732442e-05, + "log_odds_chosen": 10.030344009399414, + "log_odds_ratio": -0.0001031822175718844, + "logits/chosen": -0.7391003370285034, + "logits/rejected": -0.8036350607872009, + "logps/chosen": -0.00034203121322207153, + "logps/rejected": -1.551897644996643, + "loss": 0.6604, + "nll_loss": 0.1650814414024353, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.420311986701563e-05, + "rewards/margins": 0.15515556931495667, + "rewards/rejected": -0.15518975257873535, + "step": 8059 + }, + { + "epoch": 5.573997233748271, + "grad_norm": 9.76441764831543, + "learning_rate": 2.458890425695405e-05, + "log_odds_chosen": 10.707071304321289, + "log_odds_ratio": -7.481992361135781e-05, + "logits/chosen": -0.37856489419937134, + "logits/rejected": -0.5111147165298462, + "logps/chosen": -0.0011090300977230072, + "logps/rejected": -2.6076529026031494, + "loss": 1.0368, + "nll_loss": 0.25918781757354736, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011090300540672615, + "rewards/margins": 0.26065438985824585, + "rewards/rejected": -0.26076528429985046, + "step": 8060 + }, + { + "epoch": 5.574688796680498, + "grad_norm": 7.49188232421875, + "learning_rate": 2.45850622406639e-05, + "log_odds_chosen": 9.97515869140625, + "log_odds_ratio": -0.0002881538530346006, + "logits/chosen": -0.33292925357818604, + "logits/rejected": -0.47597840428352356, + "logps/chosen": -0.0007897530449554324, + "logps/rejected": -2.464226722717285, + "loss": 0.7122, + "nll_loss": 0.1780133843421936, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.89752957643941e-05, + "rewards/margins": 0.2463436722755432, + "rewards/rejected": -0.2464226335287094, + "step": 8061 + }, + { + "epoch": 5.5753803596127245, + "grad_norm": 10.136432647705078, + "learning_rate": 2.458122022437375e-05, + "log_odds_chosen": 10.503422737121582, + "log_odds_ratio": -7.251178612932563e-05, + "logits/chosen": -0.33820295333862305, + "logits/rejected": -0.2976325750350952, + "logps/chosen": -0.0005803690291941166, + "logps/rejected": -2.2288050651550293, + "loss": 0.6395, + "nll_loss": 0.15985599160194397, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8036905102198943e-05, + "rewards/margins": 0.22282248735427856, + "rewards/rejected": -0.2228805124759674, + "step": 8062 + }, + { + "epoch": 5.576071922544951, + "grad_norm": 20.0115966796875, + "learning_rate": 2.4577378208083604e-05, + "log_odds_chosen": 10.732137680053711, + "log_odds_ratio": -7.273490336956456e-05, + "logits/chosen": -0.15554040670394897, + "logits/rejected": -0.23485851287841797, + "logps/chosen": -0.0004803851479664445, + "logps/rejected": -2.2087879180908203, + "loss": 1.1601, + "nll_loss": 0.29002463817596436, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.803850970347412e-05, + "rewards/margins": 0.2208307683467865, + "rewards/rejected": -0.22087879478931427, + "step": 8063 + }, + { + "epoch": 5.576763485477178, + "grad_norm": 6.817218780517578, + "learning_rate": 2.4573536191793453e-05, + "log_odds_chosen": 11.50283145904541, + "log_odds_ratio": -1.474907730880659e-05, + "logits/chosen": -0.3653714954853058, + "logits/rejected": -0.3387664258480072, + "logps/chosen": -0.00011848987196572125, + "logps/rejected": -2.3012490272521973, + "loss": 0.5957, + "nll_loss": 0.14892232418060303, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1848987014673185e-05, + "rewards/margins": 0.23011307418346405, + "rewards/rejected": -0.23012492060661316, + "step": 8064 + }, + { + "epoch": 5.577455048409405, + "grad_norm": 8.593660354614258, + "learning_rate": 2.4569694175503305e-05, + "log_odds_chosen": 10.678092002868652, + "log_odds_ratio": -0.00027599811437539756, + "logits/chosen": -0.37883827090263367, + "logits/rejected": -0.4286655783653259, + "logps/chosen": -0.00045390904415398836, + "logps/rejected": -2.3745663166046143, + "loss": 0.8728, + "nll_loss": 0.2181757539510727, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.53909051429946e-05, + "rewards/margins": 0.2374112457036972, + "rewards/rejected": -0.23745664954185486, + "step": 8065 + }, + { + "epoch": 5.578146611341632, + "grad_norm": 9.433917999267578, + "learning_rate": 2.4565852159213158e-05, + "log_odds_chosen": 11.234939575195312, + "log_odds_ratio": -0.00016438095190096647, + "logits/chosen": -0.3239496946334839, + "logits/rejected": -0.45599889755249023, + "logps/chosen": -0.0002022422559093684, + "logps/rejected": -2.838594913482666, + "loss": 1.1254, + "nll_loss": 0.28134027123451233, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0224228137522005e-05, + "rewards/margins": 0.28383928537368774, + "rewards/rejected": -0.283859521150589, + "step": 8066 + }, + { + "epoch": 5.578838174273859, + "grad_norm": 5.817116737365723, + "learning_rate": 2.4562010142923007e-05, + "log_odds_chosen": 11.082422256469727, + "log_odds_ratio": -0.0002813279570546001, + "logits/chosen": -0.47998160123825073, + "logits/rejected": -0.5466978549957275, + "logps/chosen": -0.001538085751235485, + "logps/rejected": -3.1759636402130127, + "loss": 0.9083, + "nll_loss": 0.22704046964645386, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015380859258584678, + "rewards/margins": 0.3174425959587097, + "rewards/rejected": -0.3175963759422302, + "step": 8067 + }, + { + "epoch": 5.5795297372060855, + "grad_norm": 11.252291679382324, + "learning_rate": 2.455816812663286e-05, + "log_odds_chosen": 11.147133827209473, + "log_odds_ratio": -4.1483330278424546e-05, + "logits/chosen": -0.722510814666748, + "logits/rejected": -0.667180597782135, + "logps/chosen": -0.0001893240405479446, + "logps/rejected": -2.4593329429626465, + "loss": 1.4768, + "nll_loss": 0.3691995143890381, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.893240369099658e-05, + "rewards/margins": 0.24591434001922607, + "rewards/rejected": -0.24593327939510345, + "step": 8068 + }, + { + "epoch": 5.580221300138312, + "grad_norm": 6.8589582443237305, + "learning_rate": 2.455432611034271e-05, + "log_odds_chosen": 9.747818946838379, + "log_odds_ratio": -0.0002661866310518235, + "logits/chosen": -0.017276108264923096, + "logits/rejected": -0.12848007678985596, + "logps/chosen": -0.0009187893010675907, + "logps/rejected": -1.8754096031188965, + "loss": 0.768, + "nll_loss": 0.191975936293602, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.187893010675907e-05, + "rewards/margins": 0.18744908273220062, + "rewards/rejected": -0.1875409632921219, + "step": 8069 + }, + { + "epoch": 5.580912863070539, + "grad_norm": 5.451083660125732, + "learning_rate": 2.4550484094052557e-05, + "log_odds_chosen": 9.641411781311035, + "log_odds_ratio": -0.000142205273732543, + "logits/chosen": -0.6447650790214539, + "logits/rejected": -0.639355480670929, + "logps/chosen": -0.0003977498272433877, + "logps/rejected": -1.4326138496398926, + "loss": 0.6992, + "nll_loss": 0.17477968335151672, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.977498272433877e-05, + "rewards/margins": 0.14322161674499512, + "rewards/rejected": -0.1432614028453827, + "step": 8070 + }, + { + "epoch": 5.581604426002766, + "grad_norm": 11.98173999786377, + "learning_rate": 2.454664207776241e-05, + "log_odds_chosen": 8.304815292358398, + "log_odds_ratio": -0.3241727948188782, + "logits/chosen": -0.649227499961853, + "logits/rejected": -0.7476930618286133, + "logps/chosen": -0.05667625367641449, + "logps/rejected": -1.2627215385437012, + "loss": 0.9664, + "nll_loss": 0.20918306708335876, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005667625926434994, + "rewards/margins": 0.12060452997684479, + "rewards/rejected": -0.12627214193344116, + "step": 8071 + }, + { + "epoch": 5.582295988934993, + "grad_norm": 6.674464225769043, + "learning_rate": 2.4542800061472262e-05, + "log_odds_chosen": 11.908063888549805, + "log_odds_ratio": -1.3823148037772626e-05, + "logits/chosen": -0.11755906790494919, + "logits/rejected": -0.20479317009449005, + "logps/chosen": -0.00016248153406195343, + "logps/rejected": -3.069481611251831, + "loss": 0.8086, + "nll_loss": 0.2021464854478836, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6248153769993223e-05, + "rewards/margins": 0.30693191289901733, + "rewards/rejected": -0.306948184967041, + "step": 8072 + }, + { + "epoch": 5.58298755186722, + "grad_norm": 9.36412239074707, + "learning_rate": 2.453895804518211e-05, + "log_odds_chosen": 11.2120361328125, + "log_odds_ratio": -2.773918276943732e-05, + "logits/chosen": -0.6709299087524414, + "logits/rejected": -0.7922207713127136, + "logps/chosen": -0.00016998070350382477, + "logps/rejected": -2.340764045715332, + "loss": 0.6088, + "nll_loss": 0.15218935906887054, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6998070350382477e-05, + "rewards/margins": 0.2340594232082367, + "rewards/rejected": -0.23407642543315887, + "step": 8073 + }, + { + "epoch": 5.5836791147994465, + "grad_norm": 5.475508689880371, + "learning_rate": 2.4535116028891964e-05, + "log_odds_chosen": 11.978597640991211, + "log_odds_ratio": -6.813578238507034e-06, + "logits/chosen": -0.49684345722198486, + "logits/rejected": -0.4248015284538269, + "logps/chosen": -9.586186206433922e-05, + "logps/rejected": -2.6206562519073486, + "loss": 0.4513, + "nll_loss": 0.11282727867364883, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.586186934029683e-06, + "rewards/margins": 0.26205605268478394, + "rewards/rejected": -0.2620656192302704, + "step": 8074 + }, + { + "epoch": 5.584370677731673, + "grad_norm": 10.361490249633789, + "learning_rate": 2.4531274012601816e-05, + "log_odds_chosen": 9.624015808105469, + "log_odds_ratio": -0.00036905109300278127, + "logits/chosen": -0.8618177175521851, + "logits/rejected": -0.8672319054603577, + "logps/chosen": -0.0006858786218799651, + "logps/rejected": -2.188358783721924, + "loss": 1.0405, + "nll_loss": 0.26007890701293945, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.858785491203889e-05, + "rewards/margins": 0.21876728534698486, + "rewards/rejected": -0.21883587539196014, + "step": 8075 + }, + { + "epoch": 5.5850622406639, + "grad_norm": 7.7408246994018555, + "learning_rate": 2.4527431996311665e-05, + "log_odds_chosen": 10.29294204711914, + "log_odds_ratio": -0.00012540553871076554, + "logits/chosen": -0.3908573389053345, + "logits/rejected": -0.5217314958572388, + "logps/chosen": -0.001282507088035345, + "logps/rejected": -1.9427863359451294, + "loss": 0.537, + "nll_loss": 0.134227454662323, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001282507146243006, + "rewards/margins": 0.1941503882408142, + "rewards/rejected": -0.19427862763404846, + "step": 8076 + }, + { + "epoch": 5.585753803596127, + "grad_norm": 11.349954605102539, + "learning_rate": 2.4523589980021518e-05, + "log_odds_chosen": 11.313045501708984, + "log_odds_ratio": -1.8136362996301614e-05, + "logits/chosen": -0.5860576033592224, + "logits/rejected": -0.5836232304573059, + "logps/chosen": -0.00015536148566752672, + "logps/rejected": -2.554374933242798, + "loss": 0.7355, + "nll_loss": 0.18388128280639648, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5536148566752672e-05, + "rewards/margins": 0.2554219365119934, + "rewards/rejected": -0.2554374933242798, + "step": 8077 + }, + { + "epoch": 5.586445366528354, + "grad_norm": 9.04966926574707, + "learning_rate": 2.4519747963731367e-05, + "log_odds_chosen": 10.225482940673828, + "log_odds_ratio": -0.00024371693143621087, + "logits/chosen": -0.7126716375350952, + "logits/rejected": -0.6592588424682617, + "logps/chosen": -0.0001922544906847179, + "logps/rejected": -1.6873338222503662, + "loss": 1.5855, + "nll_loss": 0.3963471055030823, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9225448340876028e-05, + "rewards/margins": 0.16871415078639984, + "rewards/rejected": -0.1687333732843399, + "step": 8078 + }, + { + "epoch": 5.587136929460581, + "grad_norm": 9.121757507324219, + "learning_rate": 2.4515905947441216e-05, + "log_odds_chosen": 10.379926681518555, + "log_odds_ratio": -0.0003530043177306652, + "logits/chosen": -0.5633220076560974, + "logits/rejected": -0.5946686267852783, + "logps/chosen": -0.0005274852155707777, + "logps/rejected": -1.8887830972671509, + "loss": 0.6953, + "nll_loss": 0.17379090189933777, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.274852446746081e-05, + "rewards/margins": 0.1888255774974823, + "rewards/rejected": -0.18887831270694733, + "step": 8079 + }, + { + "epoch": 5.587828492392807, + "grad_norm": 25.334081649780273, + "learning_rate": 2.451206393115107e-05, + "log_odds_chosen": 9.471872329711914, + "log_odds_ratio": -0.03043685294687748, + "logits/chosen": -0.5243476033210754, + "logits/rejected": -0.5439872741699219, + "logps/chosen": -0.0027094304095953703, + "logps/rejected": -1.7234371900558472, + "loss": 0.6609, + "nll_loss": 0.16217103600502014, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00027094304095953703, + "rewards/margins": 0.17207276821136475, + "rewards/rejected": -0.17234373092651367, + "step": 8080 + }, + { + "epoch": 5.588520055325034, + "grad_norm": 19.014925003051758, + "learning_rate": 2.450822191486092e-05, + "log_odds_chosen": 10.891995429992676, + "log_odds_ratio": -0.00017133046640083194, + "logits/chosen": -0.5247626900672913, + "logits/rejected": -0.5682399868965149, + "logps/chosen": -0.0002994315873365849, + "logps/rejected": -1.8587620258331299, + "loss": 0.8665, + "nll_loss": 0.21661463379859924, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9943159461254254e-05, + "rewards/margins": 0.1858462542295456, + "rewards/rejected": -0.18587620556354523, + "step": 8081 + }, + { + "epoch": 5.589211618257261, + "grad_norm": 19.9434757232666, + "learning_rate": 2.450437989857077e-05, + "log_odds_chosen": 8.52181339263916, + "log_odds_ratio": -0.08861760050058365, + "logits/chosen": -0.6639509201049805, + "logits/rejected": -0.7798012495040894, + "logps/chosen": -0.01545221172273159, + "logps/rejected": -1.6898785829544067, + "loss": 1.7577, + "nll_loss": 0.4305575489997864, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0015452211955562234, + "rewards/margins": 0.16744264960289001, + "rewards/rejected": -0.16898785531520844, + "step": 8082 + }, + { + "epoch": 5.589903181189488, + "grad_norm": 6.002845287322998, + "learning_rate": 2.4500537882280622e-05, + "log_odds_chosen": 9.620186805725098, + "log_odds_ratio": -0.00030638324096798897, + "logits/chosen": -0.7766826748847961, + "logits/rejected": -0.7715072631835938, + "logps/chosen": -0.00021891409414820373, + "logps/rejected": -1.4006415605545044, + "loss": 0.7964, + "nll_loss": 0.1990627646446228, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1891410142416134e-05, + "rewards/margins": 0.1400422751903534, + "rewards/rejected": -0.14006415009498596, + "step": 8083 + }, + { + "epoch": 5.590594744121715, + "grad_norm": 6.572117805480957, + "learning_rate": 2.4496695865990475e-05, + "log_odds_chosen": 9.993795394897461, + "log_odds_ratio": -0.0008166706538759172, + "logits/chosen": -0.8760631084442139, + "logits/rejected": -0.8676169514656067, + "logps/chosen": -0.005875871051102877, + "logps/rejected": -2.340808629989624, + "loss": 0.7076, + "nll_loss": 0.1768127679824829, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005875870701856911, + "rewards/margins": 0.23349328339099884, + "rewards/rejected": -0.23408088088035583, + "step": 8084 + }, + { + "epoch": 5.591286307053942, + "grad_norm": 4.220310688018799, + "learning_rate": 2.4492853849700324e-05, + "log_odds_chosen": 9.948683738708496, + "log_odds_ratio": -0.00011734232248272747, + "logits/chosen": -0.276368111371994, + "logits/rejected": -0.278658002614975, + "logps/chosen": -0.0008274052524939179, + "logps/rejected": -2.1559712886810303, + "loss": 0.6354, + "nll_loss": 0.1588326096534729, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.274052379420027e-05, + "rewards/margins": 0.2155143916606903, + "rewards/rejected": -0.21559712290763855, + "step": 8085 + }, + { + "epoch": 5.591977869986168, + "grad_norm": 8.20447063446045, + "learning_rate": 2.4489011833410176e-05, + "log_odds_chosen": 10.30103874206543, + "log_odds_ratio": -0.00018550232925917953, + "logits/chosen": -0.1794845461845398, + "logits/rejected": -0.23964820802211761, + "logps/chosen": -0.0002999446587637067, + "logps/rejected": -1.9575010538101196, + "loss": 0.668, + "nll_loss": 0.1669834852218628, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9994469514349476e-05, + "rewards/margins": 0.1957201063632965, + "rewards/rejected": -0.19575008749961853, + "step": 8086 + }, + { + "epoch": 5.592669432918395, + "grad_norm": 6.297646522521973, + "learning_rate": 2.4485169817120025e-05, + "log_odds_chosen": 9.927389144897461, + "log_odds_ratio": -0.0001191746341646649, + "logits/chosen": -0.42461729049682617, + "logits/rejected": -0.46698299050331116, + "logps/chosen": -0.000342499086400494, + "logps/rejected": -1.638321876525879, + "loss": 0.7794, + "nll_loss": 0.19483135640621185, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4249907912453637e-05, + "rewards/margins": 0.16379794478416443, + "rewards/rejected": -0.1638321876525879, + "step": 8087 + }, + { + "epoch": 5.593360995850622, + "grad_norm": 6.690411567687988, + "learning_rate": 2.4481327800829874e-05, + "log_odds_chosen": 10.245779991149902, + "log_odds_ratio": -0.00010225032747257501, + "logits/chosen": -0.5880808234214783, + "logits/rejected": -0.6006646156311035, + "logps/chosen": -0.00046540662879124284, + "logps/rejected": -2.1198089122772217, + "loss": 0.7397, + "nll_loss": 0.18490742146968842, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6540662879124284e-05, + "rewards/margins": 0.21193435788154602, + "rewards/rejected": -0.2119809091091156, + "step": 8088 + }, + { + "epoch": 5.594052558782849, + "grad_norm": 4.878581523895264, + "learning_rate": 2.4477485784539727e-05, + "log_odds_chosen": 10.35411548614502, + "log_odds_ratio": -4.369155431049876e-05, + "logits/chosen": -0.6042921543121338, + "logits/rejected": -0.6532620191574097, + "logps/chosen": -0.00029760473989881575, + "logps/rejected": -2.0455756187438965, + "loss": 0.5802, + "nll_loss": 0.14505022764205933, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9760474717477337e-05, + "rewards/margins": 0.20452779531478882, + "rewards/rejected": -0.20455756783485413, + "step": 8089 + }, + { + "epoch": 5.594744121715076, + "grad_norm": 6.3988752365112305, + "learning_rate": 2.447364376824958e-05, + "log_odds_chosen": 10.794388771057129, + "log_odds_ratio": -0.0006846464239060879, + "logits/chosen": -0.2714294195175171, + "logits/rejected": -0.3008936643600464, + "logps/chosen": -0.000839495100080967, + "logps/rejected": -2.3594367504119873, + "loss": 0.7923, + "nll_loss": 0.19801412522792816, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.394951873924583e-05, + "rewards/margins": 0.2358597218990326, + "rewards/rejected": -0.23594367504119873, + "step": 8090 + }, + { + "epoch": 5.595435684647303, + "grad_norm": 11.49247932434082, + "learning_rate": 2.4469801751959428e-05, + "log_odds_chosen": 9.74952507019043, + "log_odds_ratio": -0.0001429698895663023, + "logits/chosen": -0.4237690567970276, + "logits/rejected": -0.4887728691101074, + "logps/chosen": -0.000436846079537645, + "logps/rejected": -1.6562985181808472, + "loss": 0.6642, + "nll_loss": 0.16604474186897278, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3684605770977214e-05, + "rewards/margins": 0.1655861735343933, + "rewards/rejected": -0.16562986373901367, + "step": 8091 + }, + { + "epoch": 5.596127247579529, + "grad_norm": 8.297944068908691, + "learning_rate": 2.446595973566928e-05, + "log_odds_chosen": 9.776349067687988, + "log_odds_ratio": -0.0016647065058350563, + "logits/chosen": -0.1512414813041687, + "logits/rejected": -0.22371408343315125, + "logps/chosen": -0.0034234109334647655, + "logps/rejected": -1.9482197761535645, + "loss": 1.1818, + "nll_loss": 0.29528945684432983, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00034234108170494437, + "rewards/margins": 0.19447962939739227, + "rewards/rejected": -0.19482198357582092, + "step": 8092 + }, + { + "epoch": 5.596818810511756, + "grad_norm": 7.137149333953857, + "learning_rate": 2.4462117719379133e-05, + "log_odds_chosen": 10.748970031738281, + "log_odds_ratio": -9.89637992461212e-05, + "logits/chosen": -0.22393687069416046, + "logits/rejected": -0.3681349456310272, + "logps/chosen": -0.00027920937282033265, + "logps/rejected": -2.4082417488098145, + "loss": 0.9565, + "nll_loss": 0.2391146719455719, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7920936190639623e-05, + "rewards/margins": 0.2407962530851364, + "rewards/rejected": -0.24082417786121368, + "step": 8093 + }, + { + "epoch": 5.597510373443983, + "grad_norm": 10.234183311462402, + "learning_rate": 2.4458275703088982e-05, + "log_odds_chosen": 10.590971946716309, + "log_odds_ratio": -0.00014004560944158584, + "logits/chosen": -0.488928884267807, + "logits/rejected": -0.5948966145515442, + "logps/chosen": -0.0004322922322899103, + "logps/rejected": -2.103107452392578, + "loss": 0.6438, + "nll_loss": 0.16093306243419647, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3229225411778316e-05, + "rewards/margins": 0.21026751399040222, + "rewards/rejected": -0.21031074225902557, + "step": 8094 + }, + { + "epoch": 5.59820193637621, + "grad_norm": 5.393209457397461, + "learning_rate": 2.4454433686798835e-05, + "log_odds_chosen": 10.531394004821777, + "log_odds_ratio": -0.00012310939200688154, + "logits/chosen": -0.3937787711620331, + "logits/rejected": -0.35000520944595337, + "logps/chosen": -0.00021687900880351663, + "logps/rejected": -2.040586471557617, + "loss": 0.7389, + "nll_loss": 0.18470054864883423, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1687901607947424e-05, + "rewards/margins": 0.20403696596622467, + "rewards/rejected": -0.20405864715576172, + "step": 8095 + }, + { + "epoch": 5.598893499308437, + "grad_norm": 7.336994171142578, + "learning_rate": 2.4450591670508684e-05, + "log_odds_chosen": 10.63749885559082, + "log_odds_ratio": -0.00015697650087531656, + "logits/chosen": -0.22074763476848602, + "logits/rejected": -0.3110903203487396, + "logps/chosen": -0.000745423894841224, + "logps/rejected": -2.6639904975891113, + "loss": 0.8247, + "nll_loss": 0.20615951716899872, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.45423894841224e-05, + "rewards/margins": 0.266324520111084, + "rewards/rejected": -0.2663990557193756, + "step": 8096 + }, + { + "epoch": 5.5995850622406635, + "grad_norm": 7.200364589691162, + "learning_rate": 2.4446749654218533e-05, + "log_odds_chosen": 10.061790466308594, + "log_odds_ratio": -0.00010069488052977249, + "logits/chosen": -0.04585893079638481, + "logits/rejected": -0.07798229157924652, + "logps/chosen": -0.00022760449792258441, + "logps/rejected": -1.6503199338912964, + "loss": 0.4874, + "nll_loss": 0.12184731662273407, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2760450519854203e-05, + "rewards/margins": 0.16500923037528992, + "rewards/rejected": -0.16503199934959412, + "step": 8097 + }, + { + "epoch": 5.60027662517289, + "grad_norm": 12.905299186706543, + "learning_rate": 2.4442907637928385e-05, + "log_odds_chosen": 10.574941635131836, + "log_odds_ratio": -4.7053843445610255e-05, + "logits/chosen": -0.5514289140701294, + "logits/rejected": -0.5167350769042969, + "logps/chosen": -0.0002096430107485503, + "logps/rejected": -2.0648980140686035, + "loss": 1.3079, + "nll_loss": 0.32696911692619324, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0964303985238075e-05, + "rewards/margins": 0.2064688503742218, + "rewards/rejected": -0.20648980140686035, + "step": 8098 + }, + { + "epoch": 5.600968188105117, + "grad_norm": 10.966777801513672, + "learning_rate": 2.4439065621638238e-05, + "log_odds_chosen": 10.464959144592285, + "log_odds_ratio": -4.483927841647528e-05, + "logits/chosen": -0.3929779827594757, + "logits/rejected": -0.4391102194786072, + "logps/chosen": -0.00016973260790109634, + "logps/rejected": -1.95558500289917, + "loss": 0.9996, + "nll_loss": 0.2498941719532013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6973261153907515e-05, + "rewards/margins": 0.19554153084754944, + "rewards/rejected": -0.19555850327014923, + "step": 8099 + }, + { + "epoch": 5.601659751037344, + "grad_norm": 4.830338001251221, + "learning_rate": 2.4435223605348087e-05, + "log_odds_chosen": 9.880056381225586, + "log_odds_ratio": -0.00016596855130046606, + "logits/chosen": -0.3157750964164734, + "logits/rejected": -0.29224520921707153, + "logps/chosen": -0.0003259595832787454, + "logps/rejected": -1.9344159364700317, + "loss": 0.3731, + "nll_loss": 0.09325173497200012, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.259596269344911e-05, + "rewards/margins": 0.1934089958667755, + "rewards/rejected": -0.19344159960746765, + "step": 8100 + }, + { + "epoch": 5.602351313969571, + "grad_norm": 7.5184454917907715, + "learning_rate": 2.443138158905794e-05, + "log_odds_chosen": 12.033928871154785, + "log_odds_ratio": -7.474442099919543e-06, + "logits/chosen": -0.6768447756767273, + "logits/rejected": -0.6699569821357727, + "logps/chosen": -0.0002758900518529117, + "logps/rejected": -3.3420660495758057, + "loss": 0.5762, + "nll_loss": 0.1440436989068985, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7589003366301768e-05, + "rewards/margins": 0.3341790437698364, + "rewards/rejected": -0.33420661091804504, + "step": 8101 + }, + { + "epoch": 5.603042876901798, + "grad_norm": 7.3457350730896, + "learning_rate": 2.442753957276779e-05, + "log_odds_chosen": 10.256988525390625, + "log_odds_ratio": -0.00010181563993683085, + "logits/chosen": -0.43973827362060547, + "logits/rejected": -0.5459225177764893, + "logps/chosen": -0.00029382610227912664, + "logps/rejected": -2.0015015602111816, + "loss": 0.8933, + "nll_loss": 0.22332513332366943, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9382606953731738e-05, + "rewards/margins": 0.20012077689170837, + "rewards/rejected": -0.20015016198158264, + "step": 8102 + }, + { + "epoch": 5.6037344398340245, + "grad_norm": 12.629637718200684, + "learning_rate": 2.442369755647764e-05, + "log_odds_chosen": 9.829996109008789, + "log_odds_ratio": -0.00035466509871184826, + "logits/chosen": -0.3330022990703583, + "logits/rejected": -0.402560293674469, + "logps/chosen": -0.005177430808544159, + "logps/rejected": -1.6692194938659668, + "loss": 0.8688, + "nll_loss": 0.21716631948947906, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005177431157790124, + "rewards/margins": 0.16640420258045197, + "rewards/rejected": -0.1669219434261322, + "step": 8103 + }, + { + "epoch": 5.604426002766251, + "grad_norm": 11.547345161437988, + "learning_rate": 2.4419855540187493e-05, + "log_odds_chosen": 9.086764335632324, + "log_odds_ratio": -0.14109911024570465, + "logits/chosen": -0.4889862537384033, + "logits/rejected": -0.4533969759941101, + "logps/chosen": -0.021350393071770668, + "logps/rejected": -1.995664358139038, + "loss": 0.9185, + "nll_loss": 0.21551468968391418, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002135039307177067, + "rewards/margins": 0.19743140041828156, + "rewards/rejected": -0.19956645369529724, + "step": 8104 + }, + { + "epoch": 5.605117565698478, + "grad_norm": 10.75623893737793, + "learning_rate": 2.4416013523897342e-05, + "log_odds_chosen": 10.713278770446777, + "log_odds_ratio": -6.872645462863147e-05, + "logits/chosen": -0.4476158618927002, + "logits/rejected": -0.3338536322116852, + "logps/chosen": -0.00020456750644370914, + "logps/rejected": -1.8309910297393799, + "loss": 0.7068, + "nll_loss": 0.17670387029647827, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.045674955297727e-05, + "rewards/margins": 0.18307864665985107, + "rewards/rejected": -0.18309910595417023, + "step": 8105 + }, + { + "epoch": 5.605809128630705, + "grad_norm": 8.851183891296387, + "learning_rate": 2.441217150760719e-05, + "log_odds_chosen": 9.805652618408203, + "log_odds_ratio": -0.00024620784097351134, + "logits/chosen": -0.5062170624732971, + "logits/rejected": -0.5972744822502136, + "logps/chosen": -0.0004057588812429458, + "logps/rejected": -1.4951839447021484, + "loss": 1.1216, + "nll_loss": 0.28038015961647034, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.057588739669882e-05, + "rewards/margins": 0.14947780966758728, + "rewards/rejected": -0.14951838552951813, + "step": 8106 + }, + { + "epoch": 5.606500691562932, + "grad_norm": 9.78542423248291, + "learning_rate": 2.4408329491317044e-05, + "log_odds_chosen": 10.101738929748535, + "log_odds_ratio": -0.00014487582666333765, + "logits/chosen": -0.38510605692863464, + "logits/rejected": -0.43797144293785095, + "logps/chosen": -0.00026860134676098824, + "logps/rejected": -1.763177752494812, + "loss": 0.8307, + "nll_loss": 0.2076645791530609, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6860136131290346e-05, + "rewards/margins": 0.17629091441631317, + "rewards/rejected": -0.17631778120994568, + "step": 8107 + }, + { + "epoch": 5.607192254495159, + "grad_norm": 9.541324615478516, + "learning_rate": 2.4404487475026896e-05, + "log_odds_chosen": 11.488207817077637, + "log_odds_ratio": -2.1646897948812693e-05, + "logits/chosen": -0.32397058606147766, + "logits/rejected": -0.3785797953605652, + "logps/chosen": -0.0003754164499696344, + "logps/rejected": -2.5622153282165527, + "loss": 0.7442, + "nll_loss": 0.1860416829586029, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.75416457245592e-05, + "rewards/margins": 0.25618401169776917, + "rewards/rejected": -0.25622156262397766, + "step": 8108 + }, + { + "epoch": 5.6078838174273855, + "grad_norm": 12.605222702026367, + "learning_rate": 2.4400645458736745e-05, + "log_odds_chosen": 11.033609390258789, + "log_odds_ratio": -4.730310320155695e-05, + "logits/chosen": -0.603103518486023, + "logits/rejected": -0.6246871948242188, + "logps/chosen": -0.000758894719183445, + "logps/rejected": -2.748445749282837, + "loss": 1.1526, + "nll_loss": 0.28813713788986206, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.588947482872754e-05, + "rewards/margins": 0.2747687101364136, + "rewards/rejected": -0.27484458684921265, + "step": 8109 + }, + { + "epoch": 5.608575380359612, + "grad_norm": 15.241668701171875, + "learning_rate": 2.4396803442446598e-05, + "log_odds_chosen": 10.998993873596191, + "log_odds_ratio": -2.9695545890717767e-05, + "logits/chosen": -0.374109148979187, + "logits/rejected": -0.5247286558151245, + "logps/chosen": -0.0002444365236442536, + "logps/rejected": -2.5976760387420654, + "loss": 1.0804, + "nll_loss": 0.27008968591690063, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.444365236442536e-05, + "rewards/margins": 0.2597431540489197, + "rewards/rejected": -0.2597675919532776, + "step": 8110 + }, + { + "epoch": 5.609266943291839, + "grad_norm": 6.355210781097412, + "learning_rate": 2.439296142615645e-05, + "log_odds_chosen": 10.260903358459473, + "log_odds_ratio": -0.0003000001597683877, + "logits/chosen": -0.5140043497085571, + "logits/rejected": -0.5887265205383301, + "logps/chosen": -0.00032185198506340384, + "logps/rejected": -1.8163726329803467, + "loss": 0.7539, + "nll_loss": 0.18843594193458557, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.218520214431919e-05, + "rewards/margins": 0.18160510063171387, + "rewards/rejected": -0.18163727223873138, + "step": 8111 + }, + { + "epoch": 5.609958506224066, + "grad_norm": 6.719587802886963, + "learning_rate": 2.43891194098663e-05, + "log_odds_chosen": 11.945234298706055, + "log_odds_ratio": -8.13683436717838e-06, + "logits/chosen": -0.4414019286632538, + "logits/rejected": -0.47166669368743896, + "logps/chosen": -0.00016639998648315668, + "logps/rejected": -2.941502571105957, + "loss": 0.7043, + "nll_loss": 0.17606189846992493, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6639998648315668e-05, + "rewards/margins": 0.29413360357284546, + "rewards/rejected": -0.2941502630710602, + "step": 8112 + }, + { + "epoch": 5.610650069156293, + "grad_norm": 10.019978523254395, + "learning_rate": 2.438527739357615e-05, + "log_odds_chosen": 8.934112548828125, + "log_odds_ratio": -0.004202369134873152, + "logits/chosen": -0.646615743637085, + "logits/rejected": -0.5332063436508179, + "logps/chosen": -0.03276941925287247, + "logps/rejected": -1.848158836364746, + "loss": 0.5842, + "nll_loss": 0.14562909305095673, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003276942064985633, + "rewards/margins": 0.18153896927833557, + "rewards/rejected": -0.1848158836364746, + "step": 8113 + }, + { + "epoch": 5.61134163208852, + "grad_norm": 8.241288185119629, + "learning_rate": 2.4381435377286e-05, + "log_odds_chosen": 10.67772388458252, + "log_odds_ratio": -9.114733984461054e-05, + "logits/chosen": -0.32889315485954285, + "logits/rejected": -0.4711022675037384, + "logps/chosen": -0.000362161808880046, + "logps/rejected": -2.067312717437744, + "loss": 0.9802, + "nll_loss": 0.24502873420715332, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6216177250025794e-05, + "rewards/margins": 0.2066950649023056, + "rewards/rejected": -0.20673127472400665, + "step": 8114 + }, + { + "epoch": 5.6120331950207465, + "grad_norm": 10.448286056518555, + "learning_rate": 2.437759336099585e-05, + "log_odds_chosen": 10.521625518798828, + "log_odds_ratio": -0.0002287977113155648, + "logits/chosen": -0.395813912153244, + "logits/rejected": -0.3993910551071167, + "logps/chosen": -0.0018196626333519816, + "logps/rejected": -2.648771286010742, + "loss": 0.8287, + "nll_loss": 0.20715875923633575, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018196628661826253, + "rewards/margins": 0.2646951675415039, + "rewards/rejected": -0.2648771405220032, + "step": 8115 + }, + { + "epoch": 5.612724757952973, + "grad_norm": 6.530020236968994, + "learning_rate": 2.4373751344705702e-05, + "log_odds_chosen": 9.301420211791992, + "log_odds_ratio": -0.000275790982414037, + "logits/chosen": -0.31070441007614136, + "logits/rejected": -0.37882116436958313, + "logps/chosen": -0.000820478075183928, + "logps/rejected": -2.0555219650268555, + "loss": 0.8836, + "nll_loss": 0.2208763211965561, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.204780897358432e-05, + "rewards/margins": 0.20547014474868774, + "rewards/rejected": -0.20555220544338226, + "step": 8116 + }, + { + "epoch": 5.6134163208852, + "grad_norm": 9.40421199798584, + "learning_rate": 2.4369909328415555e-05, + "log_odds_chosen": 11.07664966583252, + "log_odds_ratio": -4.299749343772419e-05, + "logits/chosen": -0.7671568393707275, + "logits/rejected": -0.845366358757019, + "logps/chosen": -0.00028596227639354765, + "logps/rejected": -2.3645904064178467, + "loss": 1.0057, + "nll_loss": 0.25142791867256165, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8596226911759004e-05, + "rewards/margins": 0.23643043637275696, + "rewards/rejected": -0.23645903170108795, + "step": 8117 + }, + { + "epoch": 5.614107883817427, + "grad_norm": 7.280426502227783, + "learning_rate": 2.4366067312125404e-05, + "log_odds_chosen": 10.041878700256348, + "log_odds_ratio": -0.0001619046670384705, + "logits/chosen": -0.46100568771362305, + "logits/rejected": -0.5172097086906433, + "logps/chosen": -0.0005833308678120375, + "logps/rejected": -1.877671480178833, + "loss": 1.0205, + "nll_loss": 0.2551140785217285, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.833308750879951e-05, + "rewards/margins": 0.1877087950706482, + "rewards/rejected": -0.1877671331167221, + "step": 8118 + }, + { + "epoch": 5.614799446749654, + "grad_norm": 7.466595649719238, + "learning_rate": 2.4362225295835256e-05, + "log_odds_chosen": 10.243062973022461, + "log_odds_ratio": -0.00020536058582365513, + "logits/chosen": -0.8286471962928772, + "logits/rejected": -0.8110532164573669, + "logps/chosen": -0.00029074729536660016, + "logps/rejected": -1.445319414138794, + "loss": 0.5988, + "nll_loss": 0.1496695578098297, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.907473208324518e-05, + "rewards/margins": 0.14450286328792572, + "rewards/rejected": -0.14453193545341492, + "step": 8119 + }, + { + "epoch": 5.615491009681881, + "grad_norm": 8.413311004638672, + "learning_rate": 2.435838327954511e-05, + "log_odds_chosen": 9.944595336914062, + "log_odds_ratio": -0.000541605637408793, + "logits/chosen": -0.521904706954956, + "logits/rejected": -0.5767476558685303, + "logps/chosen": -0.00025783380260691047, + "logps/rejected": -1.7686951160430908, + "loss": 1.2197, + "nll_loss": 0.3048711121082306, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.578338171588257e-05, + "rewards/margins": 0.17684374749660492, + "rewards/rejected": -0.17686951160430908, + "step": 8120 + }, + { + "epoch": 5.6161825726141075, + "grad_norm": 15.419757843017578, + "learning_rate": 2.4354541263254958e-05, + "log_odds_chosen": 9.785442352294922, + "log_odds_ratio": -0.005669338628649712, + "logits/chosen": -0.20900648832321167, + "logits/rejected": -0.3056734800338745, + "logps/chosen": -0.004202909301966429, + "logps/rejected": -2.0534253120422363, + "loss": 0.8243, + "nll_loss": 0.20550069212913513, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004202909185551107, + "rewards/margins": 0.20492225885391235, + "rewards/rejected": -0.20534256100654602, + "step": 8121 + }, + { + "epoch": 5.616874135546334, + "grad_norm": 9.012137413024902, + "learning_rate": 2.435069924696481e-05, + "log_odds_chosen": 10.545869827270508, + "log_odds_ratio": -3.9361391827696934e-05, + "logits/chosen": -0.28159162402153015, + "logits/rejected": -0.3681046962738037, + "logps/chosen": -0.00032194817322306335, + "logps/rejected": -1.9497275352478027, + "loss": 0.6193, + "nll_loss": 0.15483029186725616, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.219482096028514e-05, + "rewards/margins": 0.19494055211544037, + "rewards/rejected": -0.19497275352478027, + "step": 8122 + }, + { + "epoch": 5.617565698478561, + "grad_norm": 4.492483139038086, + "learning_rate": 2.434685723067466e-05, + "log_odds_chosen": 10.464592933654785, + "log_odds_ratio": -8.870554302120581e-05, + "logits/chosen": -0.3013230562210083, + "logits/rejected": -0.35164928436279297, + "logps/chosen": -0.00027655542362481356, + "logps/rejected": -2.1586222648620605, + "loss": 0.543, + "nll_loss": 0.13574101030826569, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7655545636662282e-05, + "rewards/margins": 0.2158345729112625, + "rewards/rejected": -0.2158622443675995, + "step": 8123 + }, + { + "epoch": 5.618257261410788, + "grad_norm": 5.90981912612915, + "learning_rate": 2.4343015214384508e-05, + "log_odds_chosen": 10.498150825500488, + "log_odds_ratio": -0.0004647416644729674, + "logits/chosen": -0.26439282298088074, + "logits/rejected": -0.31085023283958435, + "logps/chosen": -0.0028593679890036583, + "logps/rejected": -2.0355193614959717, + "loss": 0.7325, + "nll_loss": 0.1830860823392868, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000285936810541898, + "rewards/margins": 0.2032659947872162, + "rewards/rejected": -0.20355194807052612, + "step": 8124 + }, + { + "epoch": 5.618948824343015, + "grad_norm": 14.861536026000977, + "learning_rate": 2.433917319809436e-05, + "log_odds_chosen": 9.945615768432617, + "log_odds_ratio": -0.00018102941976394504, + "logits/chosen": 0.029468819499015808, + "logits/rejected": -0.10923825204372406, + "logps/chosen": -0.000667485233861953, + "logps/rejected": -2.044044017791748, + "loss": 0.9351, + "nll_loss": 0.23375779390335083, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.67485292069614e-05, + "rewards/margins": 0.20433764159679413, + "rewards/rejected": -0.20440438389778137, + "step": 8125 + }, + { + "epoch": 5.619640387275242, + "grad_norm": 11.529810905456543, + "learning_rate": 2.433533118180421e-05, + "log_odds_chosen": 9.585453987121582, + "log_odds_ratio": -0.004293727222830057, + "logits/chosen": -0.5194653272628784, + "logits/rejected": -0.5726852416992188, + "logps/chosen": -0.0029624204616993666, + "logps/rejected": -1.9861929416656494, + "loss": 0.9043, + "nll_loss": 0.22564168274402618, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00029624204034917057, + "rewards/margins": 0.198323056101799, + "rewards/rejected": -0.1986193060874939, + "step": 8126 + }, + { + "epoch": 5.6203319502074685, + "grad_norm": 6.89424467086792, + "learning_rate": 2.4331489165514062e-05, + "log_odds_chosen": 10.434301376342773, + "log_odds_ratio": -7.22405529813841e-05, + "logits/chosen": -0.5234087705612183, + "logits/rejected": -0.48809075355529785, + "logps/chosen": -0.0002671776164788753, + "logps/rejected": -1.732217788696289, + "loss": 0.7066, + "nll_loss": 0.17664460837841034, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6717760192696005e-05, + "rewards/margins": 0.17319506406784058, + "rewards/rejected": -0.17322179675102234, + "step": 8127 + }, + { + "epoch": 5.621023513139695, + "grad_norm": 5.283146381378174, + "learning_rate": 2.4327647149223914e-05, + "log_odds_chosen": 10.077682495117188, + "log_odds_ratio": -0.0005050359759479761, + "logits/chosen": -0.4807586967945099, + "logits/rejected": -0.4631732106208801, + "logps/chosen": -0.0011460325913503766, + "logps/rejected": -2.214632034301758, + "loss": 0.8259, + "nll_loss": 0.20641423761844635, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011460327368695289, + "rewards/margins": 0.2213486135005951, + "rewards/rejected": -0.22146320343017578, + "step": 8128 + }, + { + "epoch": 5.621715076071922, + "grad_norm": 3.413188934326172, + "learning_rate": 2.4323805132933764e-05, + "log_odds_chosen": 10.446636199951172, + "log_odds_ratio": -0.010576541535556316, + "logits/chosen": -0.3370027542114258, + "logits/rejected": -0.30192655324935913, + "logps/chosen": -0.0044626230373978615, + "logps/rejected": -2.717942476272583, + "loss": 0.8401, + "nll_loss": 0.20897215604782104, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00044626230373978615, + "rewards/margins": 0.2713479995727539, + "rewards/rejected": -0.27179425954818726, + "step": 8129 + }, + { + "epoch": 5.622406639004149, + "grad_norm": 4.8018269538879395, + "learning_rate": 2.4319963116643616e-05, + "log_odds_chosen": 10.509857177734375, + "log_odds_ratio": -8.940868428908288e-05, + "logits/chosen": -0.34783679246902466, + "logits/rejected": -0.45641839504241943, + "logps/chosen": -0.000150371779454872, + "logps/rejected": -2.042797565460205, + "loss": 0.4591, + "nll_loss": 0.11477848887443542, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5037178854981903e-05, + "rewards/margins": 0.20426471531391144, + "rewards/rejected": -0.20427973568439484, + "step": 8130 + }, + { + "epoch": 5.623098201936376, + "grad_norm": 7.478186130523682, + "learning_rate": 2.431612110035347e-05, + "log_odds_chosen": 10.237602233886719, + "log_odds_ratio": -9.592981223249808e-05, + "logits/chosen": -0.6704153418540955, + "logits/rejected": -0.6934966444969177, + "logps/chosen": -0.0002596440608613193, + "logps/rejected": -1.833032488822937, + "loss": 0.6939, + "nll_loss": 0.17345938086509705, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5964407541323453e-05, + "rewards/margins": 0.18327729403972626, + "rewards/rejected": -0.18330325186252594, + "step": 8131 + }, + { + "epoch": 5.623789764868603, + "grad_norm": 8.362983703613281, + "learning_rate": 2.4312279084063317e-05, + "log_odds_chosen": 9.720096588134766, + "log_odds_ratio": -0.029921630397439003, + "logits/chosen": -0.3918308615684509, + "logits/rejected": -0.4415450692176819, + "logps/chosen": -0.007875367067754269, + "logps/rejected": -2.011821746826172, + "loss": 0.7132, + "nll_loss": 0.1753058284521103, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007875366136431694, + "rewards/margins": 0.2003946602344513, + "rewards/rejected": -0.20118218660354614, + "step": 8132 + }, + { + "epoch": 5.624481327800829, + "grad_norm": 5.304823875427246, + "learning_rate": 2.4308437067773167e-05, + "log_odds_chosen": 10.959489822387695, + "log_odds_ratio": -4.239015470375307e-05, + "logits/chosen": -0.517112672328949, + "logits/rejected": -0.6122016906738281, + "logps/chosen": -0.007303733378648758, + "logps/rejected": -2.872983455657959, + "loss": 0.7552, + "nll_loss": 0.1887841820716858, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007303733727894723, + "rewards/margins": 0.2865679860115051, + "rewards/rejected": -0.287298321723938, + "step": 8133 + }, + { + "epoch": 5.625172890733056, + "grad_norm": 8.693367958068848, + "learning_rate": 2.430459505148302e-05, + "log_odds_chosen": 10.738483428955078, + "log_odds_ratio": -6.522463081637397e-05, + "logits/chosen": -0.42843499779701233, + "logits/rejected": -0.4606199860572815, + "logps/chosen": -0.00020155473612248898, + "logps/rejected": -1.9086081981658936, + "loss": 0.6322, + "nll_loss": 0.15804407000541687, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0155472157057375e-05, + "rewards/margins": 0.19084066152572632, + "rewards/rejected": -0.1908608227968216, + "step": 8134 + }, + { + "epoch": 5.625864453665283, + "grad_norm": 5.344371318817139, + "learning_rate": 2.4300753035192868e-05, + "log_odds_chosen": 9.759532928466797, + "log_odds_ratio": -0.0003999543550889939, + "logits/chosen": -0.32385164499282837, + "logits/rejected": -0.38953524827957153, + "logps/chosen": -0.005059612449258566, + "logps/rejected": -2.5394933223724365, + "loss": 0.6739, + "nll_loss": 0.1684274673461914, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005059613031335175, + "rewards/margins": 0.2534433603286743, + "rewards/rejected": -0.2539493441581726, + "step": 8135 + }, + { + "epoch": 5.62655601659751, + "grad_norm": 11.509753227233887, + "learning_rate": 2.429691101890272e-05, + "log_odds_chosen": 10.994524002075195, + "log_odds_ratio": -2.6269664886058308e-05, + "logits/chosen": -0.5848830342292786, + "logits/rejected": -0.606230616569519, + "logps/chosen": -0.00015633998555131257, + "logps/rejected": -1.9623017311096191, + "loss": 0.9263, + "nll_loss": 0.23158368468284607, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5633999282727018e-05, + "rewards/margins": 0.19621454179286957, + "rewards/rejected": -0.1962301880121231, + "step": 8136 + }, + { + "epoch": 5.627247579529737, + "grad_norm": 7.090087413787842, + "learning_rate": 2.4293069002612573e-05, + "log_odds_chosen": 10.250383377075195, + "log_odds_ratio": -0.0006723024416714907, + "logits/chosen": -0.14481787383556366, + "logits/rejected": -0.26430898904800415, + "logps/chosen": -0.0007214234792627394, + "logps/rejected": -1.9152991771697998, + "loss": 0.8962, + "nll_loss": 0.2239772081375122, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.214234938146546e-05, + "rewards/margins": 0.19145777821540833, + "rewards/rejected": -0.19152991473674774, + "step": 8137 + }, + { + "epoch": 5.627939142461964, + "grad_norm": 7.703522682189941, + "learning_rate": 2.4289226986322422e-05, + "log_odds_chosen": 9.376312255859375, + "log_odds_ratio": -0.0018738940125331283, + "logits/chosen": -0.5013612508773804, + "logits/rejected": -0.541246771812439, + "logps/chosen": -0.006598047912120819, + "logps/rejected": -2.103334426879883, + "loss": 0.8793, + "nll_loss": 0.21962577104568481, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000659804733004421, + "rewards/margins": 0.20967364311218262, + "rewards/rejected": -0.2103334367275238, + "step": 8138 + }, + { + "epoch": 5.62863070539419, + "grad_norm": 8.062605857849121, + "learning_rate": 2.4285384970032274e-05, + "log_odds_chosen": 10.10983657836914, + "log_odds_ratio": -0.003539876313880086, + "logits/chosen": -0.43089261651039124, + "logits/rejected": -0.5064984560012817, + "logps/chosen": -0.0018464041640982032, + "logps/rejected": -2.3213188648223877, + "loss": 0.6593, + "nll_loss": 0.1644791215658188, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018464041932020336, + "rewards/margins": 0.23194727301597595, + "rewards/rejected": -0.23213189840316772, + "step": 8139 + }, + { + "epoch": 5.629322268326417, + "grad_norm": 8.136240005493164, + "learning_rate": 2.4281542953742127e-05, + "log_odds_chosen": 10.913151741027832, + "log_odds_ratio": -5.67370698263403e-05, + "logits/chosen": -0.5349059104919434, + "logits/rejected": -0.5520635843276978, + "logps/chosen": -0.00027292617596685886, + "logps/rejected": -2.168395519256592, + "loss": 0.5939, + "nll_loss": 0.14847087860107422, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7292615413898602e-05, + "rewards/margins": 0.21681226789951324, + "rewards/rejected": -0.21683958172798157, + "step": 8140 + }, + { + "epoch": 5.630013831258644, + "grad_norm": 8.027983665466309, + "learning_rate": 2.4277700937451976e-05, + "log_odds_chosen": 9.770769119262695, + "log_odds_ratio": -0.0002399134391453117, + "logits/chosen": -0.5105567574501038, + "logits/rejected": -0.5048942565917969, + "logps/chosen": -0.0008296390878967941, + "logps/rejected": -2.3705339431762695, + "loss": 1.3542, + "nll_loss": 0.33851632475852966, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.296391752082855e-05, + "rewards/margins": 0.2369704246520996, + "rewards/rejected": -0.23705337941646576, + "step": 8141 + }, + { + "epoch": 5.630705394190871, + "grad_norm": 9.292349815368652, + "learning_rate": 2.427385892116183e-05, + "log_odds_chosen": 11.085734367370605, + "log_odds_ratio": -0.0005495705408975482, + "logits/chosen": -0.3071434795856476, + "logits/rejected": -0.34941795468330383, + "logps/chosen": -0.0024962667375802994, + "logps/rejected": -2.7942733764648438, + "loss": 0.6718, + "nll_loss": 0.1678960621356964, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00024962666793726385, + "rewards/margins": 0.279177725315094, + "rewards/rejected": -0.27942734956741333, + "step": 8142 + }, + { + "epoch": 5.631396957123098, + "grad_norm": 10.542267799377441, + "learning_rate": 2.4270016904871677e-05, + "log_odds_chosen": 10.499855995178223, + "log_odds_ratio": -0.0001602061529411003, + "logits/chosen": -0.4694616198539734, + "logits/rejected": -0.4889953136444092, + "logps/chosen": -0.0002664076164364815, + "logps/rejected": -2.212634563446045, + "loss": 0.6688, + "nll_loss": 0.16717346012592316, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.664076237124391e-05, + "rewards/margins": 0.22123682498931885, + "rewards/rejected": -0.22126345336437225, + "step": 8143 + }, + { + "epoch": 5.632088520055325, + "grad_norm": 15.375788688659668, + "learning_rate": 2.4266174888581526e-05, + "log_odds_chosen": 7.971059322357178, + "log_odds_ratio": -0.06600493937730789, + "logits/chosen": -0.3449121117591858, + "logits/rejected": -0.39886587858200073, + "logps/chosen": -0.15883807837963104, + "logps/rejected": -1.4472795724868774, + "loss": 1.1476, + "nll_loss": 0.2803100347518921, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015883808955550194, + "rewards/margins": 0.12884415686130524, + "rewards/rejected": -0.14472796022891998, + "step": 8144 + }, + { + "epoch": 5.632780082987551, + "grad_norm": 6.314674377441406, + "learning_rate": 2.426233287229138e-05, + "log_odds_chosen": 9.617402076721191, + "log_odds_ratio": -0.024739326909184456, + "logits/chosen": -0.5911489725112915, + "logits/rejected": -0.5527961254119873, + "logps/chosen": -0.006679967511445284, + "logps/rejected": -2.2330830097198486, + "loss": 0.7778, + "nll_loss": 0.19198457896709442, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006679967627860606, + "rewards/margins": 0.22264030575752258, + "rewards/rejected": -0.22330829501152039, + "step": 8145 + }, + { + "epoch": 5.633471645919778, + "grad_norm": 6.1393022537231445, + "learning_rate": 2.425849085600123e-05, + "log_odds_chosen": 8.443960189819336, + "log_odds_ratio": -0.0038430807180702686, + "logits/chosen": -0.5928643345832825, + "logits/rejected": -0.6108096837997437, + "logps/chosen": -0.003301118966192007, + "logps/rejected": -1.4561421871185303, + "loss": 0.9987, + "nll_loss": 0.249294251203537, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000330111914081499, + "rewards/margins": 0.14528410136699677, + "rewards/rejected": -0.14561422169208527, + "step": 8146 + }, + { + "epoch": 5.634163208852005, + "grad_norm": 6.993804931640625, + "learning_rate": 2.425464883971108e-05, + "log_odds_chosen": 11.066886901855469, + "log_odds_ratio": -3.2031512091634795e-05, + "logits/chosen": -0.5116040706634521, + "logits/rejected": -0.4701327085494995, + "logps/chosen": -0.00016026015509851277, + "logps/rejected": -1.5866247415542603, + "loss": 1.1816, + "nll_loss": 0.2954084575176239, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6026015146053396e-05, + "rewards/margins": 0.15864643454551697, + "rewards/rejected": -0.15866246819496155, + "step": 8147 + }, + { + "epoch": 5.634854771784232, + "grad_norm": 12.311450004577637, + "learning_rate": 2.4250806823420933e-05, + "log_odds_chosen": 9.662727355957031, + "log_odds_ratio": -0.0002951676433440298, + "logits/chosen": -0.4856759011745453, + "logits/rejected": -0.667373538017273, + "logps/chosen": -0.014049791730940342, + "logps/rejected": -2.104684829711914, + "loss": 0.6139, + "nll_loss": 0.15344290435314178, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014049792662262917, + "rewards/margins": 0.20906350016593933, + "rewards/rejected": -0.21046847105026245, + "step": 8148 + }, + { + "epoch": 5.635546334716459, + "grad_norm": 12.116528511047363, + "learning_rate": 2.4246964807130785e-05, + "log_odds_chosen": 9.53546142578125, + "log_odds_ratio": -0.0001511829177616164, + "logits/chosen": -0.5348033308982849, + "logits/rejected": -0.59422767162323, + "logps/chosen": -0.00030178771703504026, + "logps/rejected": -1.5057218074798584, + "loss": 1.1536, + "nll_loss": 0.2883729040622711, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0178771339706145e-05, + "rewards/margins": 0.1505420058965683, + "rewards/rejected": -0.15057218074798584, + "step": 8149 + }, + { + "epoch": 5.6362378976486855, + "grad_norm": 6.730164527893066, + "learning_rate": 2.4243122790840634e-05, + "log_odds_chosen": 10.801505088806152, + "log_odds_ratio": -6.559064786415547e-05, + "logits/chosen": -0.3491261899471283, + "logits/rejected": -0.5007361173629761, + "logps/chosen": -0.00025117339100688696, + "logps/rejected": -1.964350938796997, + "loss": 0.9513, + "nll_loss": 0.23780661821365356, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5117338736890815e-05, + "rewards/margins": 0.19640997052192688, + "rewards/rejected": -0.1964350789785385, + "step": 8150 + }, + { + "epoch": 5.636929460580912, + "grad_norm": 5.858931541442871, + "learning_rate": 2.4239280774550487e-05, + "log_odds_chosen": 9.36701488494873, + "log_odds_ratio": -0.00018923338211607188, + "logits/chosen": -0.22614255547523499, + "logits/rejected": -0.21489690244197845, + "logps/chosen": -0.0004743195604532957, + "logps/rejected": -1.5576481819152832, + "loss": 0.5359, + "nll_loss": 0.1339491903781891, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7431953134946525e-05, + "rewards/margins": 0.1557173877954483, + "rewards/rejected": -0.15576481819152832, + "step": 8151 + }, + { + "epoch": 5.637621023513139, + "grad_norm": 6.291555404663086, + "learning_rate": 2.4235438758260336e-05, + "log_odds_chosen": 9.558046340942383, + "log_odds_ratio": -0.0004121177480556071, + "logits/chosen": -0.5385869145393372, + "logits/rejected": -0.6188297867774963, + "logps/chosen": -0.0006326594157144427, + "logps/rejected": -1.8800450563430786, + "loss": 0.7444, + "nll_loss": 0.18606965243816376, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.326595030259341e-05, + "rewards/margins": 0.18794125318527222, + "rewards/rejected": -0.1880044937133789, + "step": 8152 + }, + { + "epoch": 5.638312586445366, + "grad_norm": 13.652946472167969, + "learning_rate": 2.4231596741970185e-05, + "log_odds_chosen": 10.601496696472168, + "log_odds_ratio": -6.46712287561968e-05, + "logits/chosen": -0.8333589434623718, + "logits/rejected": -0.8563560247421265, + "logps/chosen": -0.00015697132039349526, + "logps/rejected": -1.7056736946105957, + "loss": 0.69, + "nll_loss": 0.17250311374664307, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5697132766945288e-05, + "rewards/margins": 0.17055165767669678, + "rewards/rejected": -0.1705673635005951, + "step": 8153 + }, + { + "epoch": 5.639004149377593, + "grad_norm": 20.094688415527344, + "learning_rate": 2.4227754725680037e-05, + "log_odds_chosen": 11.596328735351562, + "log_odds_ratio": -2.2379195797839202e-05, + "logits/chosen": -0.296434223651886, + "logits/rejected": -0.39348912239074707, + "logps/chosen": -0.0016546223778277636, + "logps/rejected": -2.9314146041870117, + "loss": 1.1227, + "nll_loss": 0.28066858649253845, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001654622465139255, + "rewards/margins": 0.2929759621620178, + "rewards/rejected": -0.2931414544582367, + "step": 8154 + }, + { + "epoch": 5.63969571230982, + "grad_norm": 7.812084674835205, + "learning_rate": 2.422391270938989e-05, + "log_odds_chosen": 10.527837753295898, + "log_odds_ratio": -7.580976671306416e-05, + "logits/chosen": -0.3038010895252228, + "logits/rejected": -0.37021446228027344, + "logps/chosen": -0.0003315204521641135, + "logps/rejected": -2.5236568450927734, + "loss": 0.7297, + "nll_loss": 0.1824055016040802, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3152045944007114e-05, + "rewards/margins": 0.25233250856399536, + "rewards/rejected": -0.2523656487464905, + "step": 8155 + }, + { + "epoch": 5.6403872752420465, + "grad_norm": 7.516757011413574, + "learning_rate": 2.422007069309974e-05, + "log_odds_chosen": 11.295770645141602, + "log_odds_ratio": -1.5275883924914524e-05, + "logits/chosen": -0.44369572401046753, + "logits/rejected": -0.5033272504806519, + "logps/chosen": -0.00013083986414130777, + "logps/rejected": -2.203603982925415, + "loss": 0.7065, + "nll_loss": 0.17663389444351196, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3083986232231837e-05, + "rewards/margins": 0.2203473150730133, + "rewards/rejected": -0.22036036849021912, + "step": 8156 + }, + { + "epoch": 5.641078838174274, + "grad_norm": 5.2864885330200195, + "learning_rate": 2.421622867680959e-05, + "log_odds_chosen": 9.633533477783203, + "log_odds_ratio": -0.00018607992387842387, + "logits/chosen": -0.41946959495544434, + "logits/rejected": -0.34882277250289917, + "logps/chosen": -0.0006838082917965949, + "logps/rejected": -1.833829641342163, + "loss": 1.038, + "nll_loss": 0.2594764530658722, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.838083209004253e-05, + "rewards/margins": 0.18331459164619446, + "rewards/rejected": -0.18338295817375183, + "step": 8157 + }, + { + "epoch": 5.641770401106501, + "grad_norm": 4.267787456512451, + "learning_rate": 2.4212386660519444e-05, + "log_odds_chosen": 10.599780082702637, + "log_odds_ratio": -5.783616870758124e-05, + "logits/chosen": -0.6049816012382507, + "logits/rejected": -0.5448726415634155, + "logps/chosen": -0.000193988045793958, + "logps/rejected": -1.547009825706482, + "loss": 0.4527, + "nll_loss": 0.11316224932670593, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9398805306991562e-05, + "rewards/margins": 0.15468159317970276, + "rewards/rejected": -0.15470099449157715, + "step": 8158 + }, + { + "epoch": 5.642461964038728, + "grad_norm": 27.239734649658203, + "learning_rate": 2.4208544644229293e-05, + "log_odds_chosen": 9.927078247070312, + "log_odds_ratio": -0.00020649611542467028, + "logits/chosen": -0.5524391531944275, + "logits/rejected": -0.6222734451293945, + "logps/chosen": -0.005761809181421995, + "logps/rejected": -2.021272659301758, + "loss": 1.2179, + "nll_loss": 0.30445796251296997, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005761808715760708, + "rewards/margins": 0.20155107975006104, + "rewards/rejected": -0.20212724804878235, + "step": 8159 + }, + { + "epoch": 5.643153526970955, + "grad_norm": 9.34630012512207, + "learning_rate": 2.4204702627939145e-05, + "log_odds_chosen": 10.594392776489258, + "log_odds_ratio": -5.7553992519387975e-05, + "logits/chosen": -0.5342981815338135, + "logits/rejected": -0.5811108350753784, + "logps/chosen": -0.0016168851871043444, + "logps/rejected": -2.659160614013672, + "loss": 0.7788, + "nll_loss": 0.19469034671783447, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016168851288966835, + "rewards/margins": 0.265754371881485, + "rewards/rejected": -0.2659160792827606, + "step": 8160 + }, + { + "epoch": 5.643845089903182, + "grad_norm": 13.244446754455566, + "learning_rate": 2.4200860611648994e-05, + "log_odds_chosen": 10.221586227416992, + "log_odds_ratio": -8.015262574190274e-05, + "logits/chosen": -0.3327646851539612, + "logits/rejected": -0.35911014676094055, + "logps/chosen": -0.0005617007846012712, + "logps/rejected": -2.2518398761749268, + "loss": 0.815, + "nll_loss": 0.20373176038265228, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.617008719127625e-05, + "rewards/margins": 0.22512781620025635, + "rewards/rejected": -0.22518399357795715, + "step": 8161 + }, + { + "epoch": 5.644536652835408, + "grad_norm": 5.6324639320373535, + "learning_rate": 2.4197018595358843e-05, + "log_odds_chosen": 10.490815162658691, + "log_odds_ratio": -3.7659163353964686e-05, + "logits/chosen": -0.3393840789794922, + "logits/rejected": -0.383233904838562, + "logps/chosen": -0.00018157096928916872, + "logps/rejected": -1.9173825979232788, + "loss": 0.805, + "nll_loss": 0.20124170184135437, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.815709583752323e-05, + "rewards/margins": 0.191720113158226, + "rewards/rejected": -0.19173826277256012, + "step": 8162 + }, + { + "epoch": 5.645228215767635, + "grad_norm": 12.69321060180664, + "learning_rate": 2.4193176579068696e-05, + "log_odds_chosen": 10.602964401245117, + "log_odds_ratio": -4.337333666626364e-05, + "logits/chosen": -0.27991557121276855, + "logits/rejected": -0.3347739577293396, + "logps/chosen": -0.00010848429519683123, + "logps/rejected": -1.7074754238128662, + "loss": 0.6757, + "nll_loss": 0.16892284154891968, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0848429155885242e-05, + "rewards/margins": 0.17073668539524078, + "rewards/rejected": -0.1707475334405899, + "step": 8163 + }, + { + "epoch": 5.645919778699862, + "grad_norm": 8.432408332824707, + "learning_rate": 2.4189334562778548e-05, + "log_odds_chosen": 10.296133041381836, + "log_odds_ratio": -8.676405559526756e-05, + "logits/chosen": -0.3905819356441498, + "logits/rejected": -0.5043076872825623, + "logps/chosen": -0.00029500541859306395, + "logps/rejected": -1.99964439868927, + "loss": 0.7268, + "nll_loss": 0.181697279214859, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9500542950700037e-05, + "rewards/margins": 0.1999349594116211, + "rewards/rejected": -0.19996444880962372, + "step": 8164 + }, + { + "epoch": 5.646611341632089, + "grad_norm": 6.604633808135986, + "learning_rate": 2.4185492546488397e-05, + "log_odds_chosen": 10.595582962036133, + "log_odds_ratio": -0.00013819042942486703, + "logits/chosen": -0.05515572428703308, + "logits/rejected": -0.09381835162639618, + "logps/chosen": -0.0016885169316083193, + "logps/rejected": -3.0337023735046387, + "loss": 0.8599, + "nll_loss": 0.214948832988739, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016885170771274716, + "rewards/margins": 0.3032013773918152, + "rewards/rejected": -0.30337023735046387, + "step": 8165 + }, + { + "epoch": 5.647302904564316, + "grad_norm": 11.411890983581543, + "learning_rate": 2.418165053019825e-05, + "log_odds_chosen": 11.249796867370605, + "log_odds_ratio": -2.3611090000486e-05, + "logits/chosen": -0.3377041220664978, + "logits/rejected": -0.35702985525131226, + "logps/chosen": -0.0001584252022439614, + "logps/rejected": -2.2942593097686768, + "loss": 0.6065, + "nll_loss": 0.15161898732185364, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5842519133002497e-05, + "rewards/margins": 0.2294101119041443, + "rewards/rejected": -0.22942596673965454, + "step": 8166 + }, + { + "epoch": 5.6479944674965425, + "grad_norm": 7.92568826675415, + "learning_rate": 2.4177808513908102e-05, + "log_odds_chosen": 10.211087226867676, + "log_odds_ratio": -9.13483600015752e-05, + "logits/chosen": -0.5297777056694031, + "logits/rejected": -0.485273152589798, + "logps/chosen": -0.00019397379946894944, + "logps/rejected": -1.8004730939865112, + "loss": 0.5342, + "nll_loss": 0.13353081047534943, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9397381038288586e-05, + "rewards/margins": 0.18002791702747345, + "rewards/rejected": -0.18004733324050903, + "step": 8167 + }, + { + "epoch": 5.648686030428769, + "grad_norm": 12.89731502532959, + "learning_rate": 2.417396649761795e-05, + "log_odds_chosen": 8.56032943725586, + "log_odds_ratio": -0.00191538967192173, + "logits/chosen": -0.3405977785587311, + "logits/rejected": -0.3541436791419983, + "logps/chosen": -0.002999624703079462, + "logps/rejected": -1.421423316001892, + "loss": 0.6859, + "nll_loss": 0.17127352952957153, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002999624703079462, + "rewards/margins": 0.14184238016605377, + "rewards/rejected": -0.14214232563972473, + "step": 8168 + }, + { + "epoch": 5.649377593360996, + "grad_norm": 5.056807518005371, + "learning_rate": 2.4170124481327804e-05, + "log_odds_chosen": 9.410223007202148, + "log_odds_ratio": -0.00013891287380829453, + "logits/chosen": -0.030084922909736633, + "logits/rejected": -0.058445125818252563, + "logps/chosen": -0.002498415531590581, + "logps/rejected": -1.902077555656433, + "loss": 0.6037, + "nll_loss": 0.15090909600257874, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00024984157062135637, + "rewards/margins": 0.18995791673660278, + "rewards/rejected": -0.19020774960517883, + "step": 8169 + }, + { + "epoch": 5.650069156293223, + "grad_norm": 13.368850708007812, + "learning_rate": 2.4166282465037653e-05, + "log_odds_chosen": 11.036503791809082, + "log_odds_ratio": -3.4123506338801235e-05, + "logits/chosen": -0.49376970529556274, + "logits/rejected": -0.48467227816581726, + "logps/chosen": -0.00012498226715251803, + "logps/rejected": -1.9384177923202515, + "loss": 0.8761, + "nll_loss": 0.2190292477607727, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2498228898039088e-05, + "rewards/margins": 0.19382928311824799, + "rewards/rejected": -0.19384178519248962, + "step": 8170 + }, + { + "epoch": 5.65076071922545, + "grad_norm": 16.7672061920166, + "learning_rate": 2.4162440448747502e-05, + "log_odds_chosen": 10.526063919067383, + "log_odds_ratio": -0.00012096527643734589, + "logits/chosen": -0.7498366832733154, + "logits/rejected": -0.8215652704238892, + "logps/chosen": -0.00040370371425524354, + "logps/rejected": -2.3306567668914795, + "loss": 0.8662, + "nll_loss": 0.21654112637043, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.037037069792859e-05, + "rewards/margins": 0.23302532732486725, + "rewards/rejected": -0.23306570947170258, + "step": 8171 + }, + { + "epoch": 5.651452282157677, + "grad_norm": 8.703173637390137, + "learning_rate": 2.4158598432457354e-05, + "log_odds_chosen": 11.517191886901855, + "log_odds_ratio": -8.211346721509472e-05, + "logits/chosen": -0.12739884853363037, + "logits/rejected": -0.23411113023757935, + "logps/chosen": -0.0003812021459452808, + "logps/rejected": -2.476501703262329, + "loss": 0.8198, + "nll_loss": 0.2049528807401657, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.812021896010265e-05, + "rewards/margins": 0.24761205911636353, + "rewards/rejected": -0.24765019118785858, + "step": 8172 + }, + { + "epoch": 5.6521438450899035, + "grad_norm": 6.470282554626465, + "learning_rate": 2.4154756416167207e-05, + "log_odds_chosen": 11.416342735290527, + "log_odds_ratio": -3.046138590434566e-05, + "logits/chosen": -0.05274605005979538, + "logits/rejected": -0.18470771610736847, + "logps/chosen": -0.0006778776296414435, + "logps/rejected": -3.102085828781128, + "loss": 0.7421, + "nll_loss": 0.18551874160766602, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.778776878491044e-05, + "rewards/margins": 0.31014078855514526, + "rewards/rejected": -0.31020858883857727, + "step": 8173 + }, + { + "epoch": 5.65283540802213, + "grad_norm": 8.105291366577148, + "learning_rate": 2.4150914399877056e-05, + "log_odds_chosen": 10.079261779785156, + "log_odds_ratio": -0.0001657334651099518, + "logits/chosen": -0.2964015007019043, + "logits/rejected": -0.39093565940856934, + "logps/chosen": -0.0011884081177413464, + "logps/rejected": -2.2544288635253906, + "loss": 0.7485, + "nll_loss": 0.18709859251976013, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011884080595336854, + "rewards/margins": 0.22532406449317932, + "rewards/rejected": -0.22544291615486145, + "step": 8174 + }, + { + "epoch": 5.653526970954357, + "grad_norm": 10.007908821105957, + "learning_rate": 2.4147072383586908e-05, + "log_odds_chosen": 11.506814956665039, + "log_odds_ratio": -2.823519753292203e-05, + "logits/chosen": -0.42628705501556396, + "logits/rejected": -0.4609462022781372, + "logps/chosen": -0.002497493987902999, + "logps/rejected": -3.4516782760620117, + "loss": 0.6041, + "nll_loss": 0.15103143453598022, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002497493987902999, + "rewards/margins": 0.3449181020259857, + "rewards/rejected": -0.3451678454875946, + "step": 8175 + }, + { + "epoch": 5.654218533886584, + "grad_norm": 5.072340965270996, + "learning_rate": 2.414323036729676e-05, + "log_odds_chosen": 11.129064559936523, + "log_odds_ratio": -2.1794527128804475e-05, + "logits/chosen": 0.02416606992483139, + "logits/rejected": -0.08635728806257248, + "logps/chosen": -0.00016831500397529453, + "logps/rejected": -2.3612351417541504, + "loss": 0.6541, + "nll_loss": 0.1635279506444931, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6831501852720976e-05, + "rewards/margins": 0.23610667884349823, + "rewards/rejected": -0.23612351715564728, + "step": 8176 + }, + { + "epoch": 5.654910096818811, + "grad_norm": 8.00340461730957, + "learning_rate": 2.413938835100661e-05, + "log_odds_chosen": 10.177129745483398, + "log_odds_ratio": -0.00021432647190522403, + "logits/chosen": -0.37250402569770813, + "logits/rejected": -0.48559921979904175, + "logps/chosen": -0.0009563150233589113, + "logps/rejected": -2.075915813446045, + "loss": 1.0559, + "nll_loss": 0.26395127177238464, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.563150524627417e-05, + "rewards/margins": 0.20749595761299133, + "rewards/rejected": -0.20759157836437225, + "step": 8177 + }, + { + "epoch": 5.655601659751038, + "grad_norm": 7.277695178985596, + "learning_rate": 2.4135546334716462e-05, + "log_odds_chosen": 10.896219253540039, + "log_odds_ratio": -3.1958810723153874e-05, + "logits/chosen": -0.5712587833404541, + "logits/rejected": -0.5515532493591309, + "logps/chosen": -0.0001701847359072417, + "logps/rejected": -1.8836162090301514, + "loss": 0.9981, + "nll_loss": 0.24951490759849548, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.701847395452205e-05, + "rewards/margins": 0.18834459781646729, + "rewards/rejected": -0.18836161494255066, + "step": 8178 + }, + { + "epoch": 5.6562932226832645, + "grad_norm": 13.938727378845215, + "learning_rate": 2.413170431842631e-05, + "log_odds_chosen": 11.479532241821289, + "log_odds_ratio": -8.935096411732957e-05, + "logits/chosen": -0.624695897102356, + "logits/rejected": -0.625409722328186, + "logps/chosen": -0.0005521889543160796, + "logps/rejected": -3.179570436477661, + "loss": 0.626, + "nll_loss": 0.15649336576461792, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5218897614395246e-05, + "rewards/margins": 0.3179018497467041, + "rewards/rejected": -0.3179570436477661, + "step": 8179 + }, + { + "epoch": 5.656984785615491, + "grad_norm": 10.801321029663086, + "learning_rate": 2.412786230213616e-05, + "log_odds_chosen": 10.582452774047852, + "log_odds_ratio": -3.776304583880119e-05, + "logits/chosen": -0.6883202791213989, + "logits/rejected": -0.7378414869308472, + "logps/chosen": -0.001248332904651761, + "logps/rejected": -2.509331703186035, + "loss": 0.7602, + "nll_loss": 0.19005697965621948, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012483329919632524, + "rewards/margins": 0.25080832839012146, + "rewards/rejected": -0.2509331703186035, + "step": 8180 + }, + { + "epoch": 5.657676348547718, + "grad_norm": 8.140530586242676, + "learning_rate": 2.4124020285846013e-05, + "log_odds_chosen": 11.58713150024414, + "log_odds_ratio": -4.277328844182193e-05, + "logits/chosen": -0.4091223478317261, + "logits/rejected": -0.49574363231658936, + "logps/chosen": -0.0004341943422332406, + "logps/rejected": -2.724029064178467, + "loss": 0.7815, + "nll_loss": 0.1953831911087036, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.341944077168591e-05, + "rewards/margins": 0.2723594903945923, + "rewards/rejected": -0.27240291237831116, + "step": 8181 + }, + { + "epoch": 5.658367911479945, + "grad_norm": 10.485898971557617, + "learning_rate": 2.4120178269555865e-05, + "log_odds_chosen": 10.399928092956543, + "log_odds_ratio": -8.76165067893453e-05, + "logits/chosen": -0.24331486225128174, + "logits/rejected": -0.3691559433937073, + "logps/chosen": -0.0004011784621980041, + "logps/rejected": -2.289734363555908, + "loss": 0.8195, + "nll_loss": 0.20486295223236084, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.011784403701313e-05, + "rewards/margins": 0.22893333435058594, + "rewards/rejected": -0.22897344827651978, + "step": 8182 + }, + { + "epoch": 5.659059474412172, + "grad_norm": 5.636170864105225, + "learning_rate": 2.4116336253265714e-05, + "log_odds_chosen": 10.645550727844238, + "log_odds_ratio": -4.1069572034757584e-05, + "logits/chosen": -0.39882680773735046, + "logits/rejected": -0.4428432583808899, + "logps/chosen": -0.0010795921552926302, + "logps/rejected": -2.0664827823638916, + "loss": 0.7889, + "nll_loss": 0.19723322987556458, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010795921116368845, + "rewards/margins": 0.2065403163433075, + "rewards/rejected": -0.20664829015731812, + "step": 8183 + }, + { + "epoch": 5.659751037344399, + "grad_norm": 8.679171562194824, + "learning_rate": 2.4112494236975567e-05, + "log_odds_chosen": 11.246646881103516, + "log_odds_ratio": -9.160442277789116e-05, + "logits/chosen": -0.30560818314552307, + "logits/rejected": -0.3293704092502594, + "logps/chosen": -0.0003360637929290533, + "logps/rejected": -3.327373504638672, + "loss": 0.7894, + "nll_loss": 0.19733411073684692, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3606382203288376e-05, + "rewards/margins": 0.33270376920700073, + "rewards/rejected": -0.33273738622665405, + "step": 8184 + }, + { + "epoch": 5.6604426002766255, + "grad_norm": 17.666276931762695, + "learning_rate": 2.410865222068542e-05, + "log_odds_chosen": 10.4041166305542, + "log_odds_ratio": -6.354991637635976e-05, + "logits/chosen": -0.48255521059036255, + "logits/rejected": -0.5338969230651855, + "logps/chosen": -0.0003337644156999886, + "logps/rejected": -2.2116596698760986, + "loss": 0.9727, + "nll_loss": 0.24315854907035828, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3376443752786145e-05, + "rewards/margins": 0.22113259136676788, + "rewards/rejected": -0.2211659699678421, + "step": 8185 + }, + { + "epoch": 5.661134163208852, + "grad_norm": 11.990691184997559, + "learning_rate": 2.4104810204395268e-05, + "log_odds_chosen": 10.493095397949219, + "log_odds_ratio": -8.417104254476726e-05, + "logits/chosen": -0.14247873425483704, + "logits/rejected": -0.22682204842567444, + "logps/chosen": -0.002159011783078313, + "logps/rejected": -2.218174457550049, + "loss": 0.8291, + "nll_loss": 0.20726041495800018, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00021590119285974652, + "rewards/margins": 0.22160154581069946, + "rewards/rejected": -0.22181746363639832, + "step": 8186 + }, + { + "epoch": 5.661825726141079, + "grad_norm": 9.079957962036133, + "learning_rate": 2.410096818810512e-05, + "log_odds_chosen": 10.605042457580566, + "log_odds_ratio": -0.0003826943866442889, + "logits/chosen": -0.6163223385810852, + "logits/rejected": -0.7284992933273315, + "logps/chosen": -0.0005659597227349877, + "logps/rejected": -2.3373608589172363, + "loss": 0.9552, + "nll_loss": 0.2387738823890686, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6595963542349637e-05, + "rewards/margins": 0.23367950320243835, + "rewards/rejected": -0.2337360978126526, + "step": 8187 + }, + { + "epoch": 5.662517289073306, + "grad_norm": 10.778707504272461, + "learning_rate": 2.409712617181497e-05, + "log_odds_chosen": 11.336727142333984, + "log_odds_ratio": -3.515938078635372e-05, + "logits/chosen": -0.5271604061126709, + "logits/rejected": -0.6426557898521423, + "logps/chosen": -0.0004096640623174608, + "logps/rejected": -2.7991998195648193, + "loss": 0.7284, + "nll_loss": 0.18209651112556458, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.096640623174608e-05, + "rewards/margins": 0.27987903356552124, + "rewards/rejected": -0.27991998195648193, + "step": 8188 + }, + { + "epoch": 5.663208852005533, + "grad_norm": 11.966964721679688, + "learning_rate": 2.409328415552482e-05, + "log_odds_chosen": 11.431020736694336, + "log_odds_ratio": -3.573830326786265e-05, + "logits/chosen": -0.8616727590560913, + "logits/rejected": -0.8452792167663574, + "logps/chosen": -9.532944386592135e-05, + "logps/rejected": -2.1406638622283936, + "loss": 0.7947, + "nll_loss": 0.19867615401744843, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.532944204693194e-06, + "rewards/margins": 0.2140568494796753, + "rewards/rejected": -0.21406638622283936, + "step": 8189 + }, + { + "epoch": 5.66390041493776, + "grad_norm": 7.023293495178223, + "learning_rate": 2.408944213923467e-05, + "log_odds_chosen": 9.780372619628906, + "log_odds_ratio": -0.0003126821538899094, + "logits/chosen": -0.4998539984226227, + "logits/rejected": -0.5834687948226929, + "logps/chosen": -0.001264077378436923, + "logps/rejected": -1.733214259147644, + "loss": 0.7783, + "nll_loss": 0.19453226029872894, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001264077436644584, + "rewards/margins": 0.173195019364357, + "rewards/rejected": -0.1733214110136032, + "step": 8190 + }, + { + "epoch": 5.6645919778699865, + "grad_norm": 7.998936653137207, + "learning_rate": 2.408560012294452e-05, + "log_odds_chosen": 9.439737319946289, + "log_odds_ratio": -0.0011585770407691598, + "logits/chosen": -0.6191846132278442, + "logits/rejected": -0.7086983323097229, + "logps/chosen": -0.0018918003188446164, + "logps/rejected": -1.6204073429107666, + "loss": 0.8352, + "nll_loss": 0.20867592096328735, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018918004934675992, + "rewards/margins": 0.16185154020786285, + "rewards/rejected": -0.16204072535037994, + "step": 8191 + }, + { + "epoch": 5.665283540802213, + "grad_norm": 8.45009994506836, + "learning_rate": 2.4081758106654373e-05, + "log_odds_chosen": 10.186800956726074, + "log_odds_ratio": -6.632292206631973e-05, + "logits/chosen": -0.5729277729988098, + "logits/rejected": -0.6806793808937073, + "logps/chosen": -0.0007412948179990053, + "logps/rejected": -2.046152114868164, + "loss": 0.7685, + "nll_loss": 0.19211547076702118, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.41294861654751e-05, + "rewards/margins": 0.20454107224941254, + "rewards/rejected": -0.20461520552635193, + "step": 8192 + }, + { + "epoch": 5.66597510373444, + "grad_norm": 8.021780967712402, + "learning_rate": 2.4077916090364225e-05, + "log_odds_chosen": 9.615251541137695, + "log_odds_ratio": -0.000610757211688906, + "logits/chosen": -0.6830639243125916, + "logits/rejected": -0.7671900391578674, + "logps/chosen": -0.001944072311744094, + "logps/rejected": -2.287820816040039, + "loss": 1.549, + "nll_loss": 0.3871961236000061, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019440722826402634, + "rewards/margins": 0.22858770191669464, + "rewards/rejected": -0.22878210246562958, + "step": 8193 + }, + { + "epoch": 5.666666666666667, + "grad_norm": 14.318559646606445, + "learning_rate": 2.4074074074074074e-05, + "log_odds_chosen": 11.765233039855957, + "log_odds_ratio": -1.4429515431402251e-05, + "logits/chosen": -0.749580979347229, + "logits/rejected": -0.7655578851699829, + "logps/chosen": -0.00037591400905512273, + "logps/rejected": -3.43573260307312, + "loss": 0.7071, + "nll_loss": 0.17676472663879395, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7591402360703796e-05, + "rewards/margins": 0.3435356914997101, + "rewards/rejected": -0.34357327222824097, + "step": 8194 + }, + { + "epoch": 5.667358229598894, + "grad_norm": 9.44308853149414, + "learning_rate": 2.4070232057783927e-05, + "log_odds_chosen": 9.825872421264648, + "log_odds_ratio": -0.0001476307079428807, + "logits/chosen": -0.5155048370361328, + "logits/rejected": -0.6302860975265503, + "logps/chosen": -0.0007964313263073564, + "logps/rejected": -1.5924385786056519, + "loss": 0.6867, + "nll_loss": 0.17166659235954285, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.964312680996954e-05, + "rewards/margins": 0.1591642200946808, + "rewards/rejected": -0.1592438519001007, + "step": 8195 + }, + { + "epoch": 5.668049792531121, + "grad_norm": 7.392195701599121, + "learning_rate": 2.406639004149378e-05, + "log_odds_chosen": 9.350540161132812, + "log_odds_ratio": -0.0004433426365721971, + "logits/chosen": -0.7200472950935364, + "logits/rejected": -0.722077488899231, + "logps/chosen": -0.00044103802065365016, + "logps/rejected": -1.473872184753418, + "loss": 0.7089, + "nll_loss": 0.17717213928699493, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.410380279296078e-05, + "rewards/margins": 0.14734309911727905, + "rewards/rejected": -0.14738722145557404, + "step": 8196 + }, + { + "epoch": 5.6687413554633475, + "grad_norm": 7.103992938995361, + "learning_rate": 2.4062548025203628e-05, + "log_odds_chosen": 9.922134399414062, + "log_odds_ratio": -0.00039362561074085534, + "logits/chosen": -0.593850314617157, + "logits/rejected": -0.6985044479370117, + "logps/chosen": -0.000883720291312784, + "logps/rejected": -2.1663904190063477, + "loss": 0.6011, + "nll_loss": 0.15024788677692413, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.837203495204449e-05, + "rewards/margins": 0.21655067801475525, + "rewards/rejected": -0.21663904190063477, + "step": 8197 + }, + { + "epoch": 5.669432918395574, + "grad_norm": 7.624302864074707, + "learning_rate": 2.4058706008913477e-05, + "log_odds_chosen": 10.729737281799316, + "log_odds_ratio": -3.654578540590592e-05, + "logits/chosen": -0.7158301472663879, + "logits/rejected": -0.7326931357383728, + "logps/chosen": -0.00021854013903066516, + "logps/rejected": -1.8644185066223145, + "loss": 0.8006, + "nll_loss": 0.20013828575611115, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.185401535825804e-05, + "rewards/margins": 0.1864199936389923, + "rewards/rejected": -0.18644185364246368, + "step": 8198 + }, + { + "epoch": 5.670124481327801, + "grad_norm": 7.759194374084473, + "learning_rate": 2.405486399262333e-05, + "log_odds_chosen": 9.836369514465332, + "log_odds_ratio": -0.00013164187839720398, + "logits/chosen": -0.2198760211467743, + "logits/rejected": -0.27906516194343567, + "logps/chosen": -0.00028999499045312405, + "logps/rejected": -1.5851798057556152, + "loss": 0.6143, + "nll_loss": 0.1535702645778656, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.899950231949333e-05, + "rewards/margins": 0.15848898887634277, + "rewards/rejected": -0.158517986536026, + "step": 8199 + }, + { + "epoch": 5.670816044260028, + "grad_norm": 9.081013679504395, + "learning_rate": 2.405102197633318e-05, + "log_odds_chosen": 11.407434463500977, + "log_odds_ratio": -4.3108979298267514e-05, + "logits/chosen": -0.48705050349235535, + "logits/rejected": -0.5852835178375244, + "logps/chosen": -0.00021016478422097862, + "logps/rejected": -2.620225667953491, + "loss": 0.7386, + "nll_loss": 0.18464510142803192, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1016479877289385e-05, + "rewards/margins": 0.26200154423713684, + "rewards/rejected": -0.26202255487442017, + "step": 8200 + }, + { + "epoch": 5.671507607192255, + "grad_norm": 6.312060356140137, + "learning_rate": 2.404717996004303e-05, + "log_odds_chosen": 10.28929328918457, + "log_odds_ratio": -4.510658254730515e-05, + "logits/chosen": -0.25805893540382385, + "logits/rejected": -0.3633671998977661, + "logps/chosen": -0.0004313248791731894, + "logps/rejected": -1.6498507261276245, + "loss": 0.4647, + "nll_loss": 0.1161627247929573, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.313248791731894e-05, + "rewards/margins": 0.1649419367313385, + "rewards/rejected": -0.16498509049415588, + "step": 8201 + }, + { + "epoch": 5.672199170124482, + "grad_norm": 5.8020734786987305, + "learning_rate": 2.4043337943752883e-05, + "log_odds_chosen": 10.381085395812988, + "log_odds_ratio": -8.950331539381295e-05, + "logits/chosen": -0.6993118524551392, + "logits/rejected": -0.7206007838249207, + "logps/chosen": -0.000795810017734766, + "logps/rejected": -2.066084146499634, + "loss": 0.4755, + "nll_loss": 0.11886313557624817, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.958101195981726e-05, + "rewards/margins": 0.20652884244918823, + "rewards/rejected": -0.20660841464996338, + "step": 8202 + }, + { + "epoch": 5.672890733056708, + "grad_norm": 6.812307834625244, + "learning_rate": 2.4039495927462733e-05, + "log_odds_chosen": 11.133984565734863, + "log_odds_ratio": -3.28252644976601e-05, + "logits/chosen": -0.28554531931877136, + "logits/rejected": -0.43246108293533325, + "logps/chosen": -0.0005199001170694828, + "logps/rejected": -2.497281789779663, + "loss": 0.8954, + "nll_loss": 0.22383980453014374, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.199001680011861e-05, + "rewards/margins": 0.24967621266841888, + "rewards/rejected": -0.24972817301750183, + "step": 8203 + }, + { + "epoch": 5.673582295988935, + "grad_norm": 9.337517738342285, + "learning_rate": 2.4035653911172585e-05, + "log_odds_chosen": 10.392212867736816, + "log_odds_ratio": -0.0002654620911926031, + "logits/chosen": -0.16084915399551392, + "logits/rejected": -0.23642441630363464, + "logps/chosen": -0.001370608457364142, + "logps/rejected": -2.3316516876220703, + "loss": 0.6279, + "nll_loss": 0.15694421529769897, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013706083700526506, + "rewards/margins": 0.2330280989408493, + "rewards/rejected": -0.23316514492034912, + "step": 8204 + }, + { + "epoch": 5.674273858921162, + "grad_norm": 8.094923973083496, + "learning_rate": 2.4031811894882437e-05, + "log_odds_chosen": 9.047698974609375, + "log_odds_ratio": -0.0011066325241699815, + "logits/chosen": -0.4470326900482178, + "logits/rejected": -0.4333973824977875, + "logps/chosen": -0.0011469994205981493, + "logps/rejected": -1.259209156036377, + "loss": 0.735, + "nll_loss": 0.18363803625106812, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011469994933577254, + "rewards/margins": 0.12580621242523193, + "rewards/rejected": -0.12592092156410217, + "step": 8205 + }, + { + "epoch": 5.674965421853389, + "grad_norm": 38.148014068603516, + "learning_rate": 2.4027969878592286e-05, + "log_odds_chosen": 10.174893379211426, + "log_odds_ratio": -9.261524974135682e-05, + "logits/chosen": -0.3130311965942383, + "logits/rejected": -0.3069872260093689, + "logps/chosen": -0.00035005720565095544, + "logps/rejected": -1.4493703842163086, + "loss": 0.7833, + "nll_loss": 0.19582557678222656, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.500572347547859e-05, + "rewards/margins": 0.14490202069282532, + "rewards/rejected": -0.14493703842163086, + "step": 8206 + }, + { + "epoch": 5.675656984785616, + "grad_norm": 10.254268646240234, + "learning_rate": 2.4024127862302136e-05, + "log_odds_chosen": 10.973251342773438, + "log_odds_ratio": -0.00014097840175963938, + "logits/chosen": -0.5504222512245178, + "logits/rejected": -0.6547200083732605, + "logps/chosen": -0.00023056677309796214, + "logps/rejected": -2.453969955444336, + "loss": 0.6504, + "nll_loss": 0.16258879005908966, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3056678401189856e-05, + "rewards/margins": 0.24537393450737, + "rewards/rejected": -0.24539700150489807, + "step": 8207 + }, + { + "epoch": 5.676348547717843, + "grad_norm": 8.336179733276367, + "learning_rate": 2.4020285846011988e-05, + "log_odds_chosen": 9.055521965026855, + "log_odds_ratio": -0.0005176339182071388, + "logits/chosen": -0.435626745223999, + "logits/rejected": -0.360824853181839, + "logps/chosen": -0.0011783160734921694, + "logps/rejected": -1.8434216976165771, + "loss": 0.7676, + "nll_loss": 0.19185706973075867, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011783160152845085, + "rewards/margins": 0.18422432243824005, + "rewards/rejected": -0.1843421757221222, + "step": 8208 + }, + { + "epoch": 5.677040110650069, + "grad_norm": 14.023844718933105, + "learning_rate": 2.4016443829721837e-05, + "log_odds_chosen": 10.210799217224121, + "log_odds_ratio": -0.14266715943813324, + "logits/chosen": -0.8347749710083008, + "logits/rejected": -0.8222837448120117, + "logps/chosen": -0.020706988871097565, + "logps/rejected": -2.22721266746521, + "loss": 0.7187, + "nll_loss": 0.16540297865867615, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0020706988871097565, + "rewards/margins": 0.2206505537033081, + "rewards/rejected": -0.22272126376628876, + "step": 8209 + }, + { + "epoch": 5.677731673582296, + "grad_norm": 8.138181686401367, + "learning_rate": 2.401260181343169e-05, + "log_odds_chosen": 9.880661964416504, + "log_odds_ratio": -0.0003724874695762992, + "logits/chosen": -0.6499980092048645, + "logits/rejected": -0.6886671185493469, + "logps/chosen": -0.003275522030889988, + "logps/rejected": -2.115614175796509, + "loss": 0.8486, + "nll_loss": 0.2121048867702484, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00032755223219282925, + "rewards/margins": 0.21123388409614563, + "rewards/rejected": -0.2115614414215088, + "step": 8210 + }, + { + "epoch": 5.678423236514523, + "grad_norm": 6.953858852386475, + "learning_rate": 2.4008759797141542e-05, + "log_odds_chosen": 10.449197769165039, + "log_odds_ratio": -5.541814607568085e-05, + "logits/chosen": -0.3932764530181885, + "logits/rejected": -0.3913537561893463, + "logps/chosen": -0.0018914844840765, + "logps/rejected": -2.5113325119018555, + "loss": 0.7375, + "nll_loss": 0.18436847627162933, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018914842803496867, + "rewards/margins": 0.2509440779685974, + "rewards/rejected": -0.2511332333087921, + "step": 8211 + }, + { + "epoch": 5.67911479944675, + "grad_norm": 4.819328308105469, + "learning_rate": 2.400491778085139e-05, + "log_odds_chosen": 10.556605339050293, + "log_odds_ratio": -0.00014853657921776175, + "logits/chosen": -0.684946596622467, + "logits/rejected": -0.7343550324440002, + "logps/chosen": -0.00017646751075517386, + "logps/rejected": -1.8235328197479248, + "loss": 0.7751, + "nll_loss": 0.19377098977565765, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7646751075517386e-05, + "rewards/margins": 0.18233563005924225, + "rewards/rejected": -0.18235328793525696, + "step": 8212 + }, + { + "epoch": 5.679806362378977, + "grad_norm": 6.816978454589844, + "learning_rate": 2.4001075764561243e-05, + "log_odds_chosen": 9.348443031311035, + "log_odds_ratio": -0.0002749827108345926, + "logits/chosen": -0.8403669595718384, + "logits/rejected": -0.8626226186752319, + "logps/chosen": -0.0004887762479484081, + "logps/rejected": -1.6191821098327637, + "loss": 0.7655, + "nll_loss": 0.19135594367980957, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.887762406724505e-05, + "rewards/margins": 0.1618693470954895, + "rewards/rejected": -0.16191820800304413, + "step": 8213 + }, + { + "epoch": 5.680497925311204, + "grad_norm": 8.590983390808105, + "learning_rate": 2.3997233748271096e-05, + "log_odds_chosen": 9.74236011505127, + "log_odds_ratio": -0.002336436416953802, + "logits/chosen": -0.39976224303245544, + "logits/rejected": -0.4997917413711548, + "logps/chosen": -0.001402735011652112, + "logps/rejected": -1.6303997039794922, + "loss": 0.9188, + "nll_loss": 0.22947055101394653, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001402735069859773, + "rewards/margins": 0.1628997027873993, + "rewards/rejected": -0.16303998231887817, + "step": 8214 + }, + { + "epoch": 5.68118948824343, + "grad_norm": 4.530670166015625, + "learning_rate": 2.3993391731980945e-05, + "log_odds_chosen": 9.391895294189453, + "log_odds_ratio": -0.00036676725721918046, + "logits/chosen": -0.7029848098754883, + "logits/rejected": -0.7017989158630371, + "logps/chosen": -0.00031811019289307296, + "logps/rejected": -1.4524152278900146, + "loss": 0.5978, + "nll_loss": 0.14941361546516418, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1811017834115773e-05, + "rewards/margins": 0.14520972967147827, + "rewards/rejected": -0.14524152874946594, + "step": 8215 + }, + { + "epoch": 5.681881051175657, + "grad_norm": 12.992388725280762, + "learning_rate": 2.3989549715690794e-05, + "log_odds_chosen": 9.962270736694336, + "log_odds_ratio": -7.181529508670792e-05, + "logits/chosen": -0.7905853986740112, + "logits/rejected": -0.7792447209358215, + "logps/chosen": -0.00018229984561912715, + "logps/rejected": -1.4809046983718872, + "loss": 0.903, + "nll_loss": 0.2257525473833084, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8229984561912715e-05, + "rewards/margins": 0.1480722278356552, + "rewards/rejected": -0.14809046685695648, + "step": 8216 + }, + { + "epoch": 5.682572614107884, + "grad_norm": 10.572747230529785, + "learning_rate": 2.3985707699400646e-05, + "log_odds_chosen": 10.466547012329102, + "log_odds_ratio": -0.00023784548102412373, + "logits/chosen": -0.533780574798584, + "logits/rejected": -0.5297945737838745, + "logps/chosen": -0.00042985755135305226, + "logps/rejected": -2.4228312969207764, + "loss": 0.9092, + "nll_loss": 0.2272777557373047, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.298575731809251e-05, + "rewards/margins": 0.24224016070365906, + "rewards/rejected": -0.24228313565254211, + "step": 8217 + }, + { + "epoch": 5.683264177040111, + "grad_norm": 24.881248474121094, + "learning_rate": 2.3981865683110495e-05, + "log_odds_chosen": 10.501619338989258, + "log_odds_ratio": -0.00023195492394734174, + "logits/chosen": -0.6462554335594177, + "logits/rejected": -0.7131510376930237, + "logps/chosen": -0.005770097486674786, + "logps/rejected": -2.762514114379883, + "loss": 1.5618, + "nll_loss": 0.3904319107532501, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005770098650828004, + "rewards/margins": 0.275674432516098, + "rewards/rejected": -0.2762514352798462, + "step": 8218 + }, + { + "epoch": 5.683955739972338, + "grad_norm": 8.164917945861816, + "learning_rate": 2.3978023666820348e-05, + "log_odds_chosen": 10.335380554199219, + "log_odds_ratio": -0.00032323403866030276, + "logits/chosen": -0.3388156592845917, + "logits/rejected": -0.4354027211666107, + "logps/chosen": -0.0008530388004146516, + "logps/rejected": -2.367396354675293, + "loss": 0.7178, + "nll_loss": 0.17940829694271088, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.530388004146516e-05, + "rewards/margins": 0.2366543412208557, + "rewards/rejected": -0.2367396354675293, + "step": 8219 + }, + { + "epoch": 5.6846473029045645, + "grad_norm": 16.99516487121582, + "learning_rate": 2.39741816505302e-05, + "log_odds_chosen": 10.967035293579102, + "log_odds_ratio": -9.04684275155887e-05, + "logits/chosen": -0.31332796812057495, + "logits/rejected": -0.46520695090293884, + "logps/chosen": -0.0002737205068115145, + "logps/rejected": -2.4684104919433594, + "loss": 0.9361, + "nll_loss": 0.23400893807411194, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7372052500140853e-05, + "rewards/margins": 0.24681369960308075, + "rewards/rejected": -0.24684105813503265, + "step": 8220 + }, + { + "epoch": 5.685338865836791, + "grad_norm": 7.044379234313965, + "learning_rate": 2.397033963424005e-05, + "log_odds_chosen": 9.70529556274414, + "log_odds_ratio": -0.0028542920481413603, + "logits/chosen": -0.5774523019790649, + "logits/rejected": -0.5707895755767822, + "logps/chosen": -0.0018815601943060756, + "logps/rejected": -2.0981202125549316, + "loss": 0.7339, + "nll_loss": 0.18318060040473938, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001881560165202245, + "rewards/margins": 0.20962387323379517, + "rewards/rejected": -0.2098120152950287, + "step": 8221 + }, + { + "epoch": 5.686030428769018, + "grad_norm": 5.558027267456055, + "learning_rate": 2.3966497617949902e-05, + "log_odds_chosen": 11.028006553649902, + "log_odds_ratio": -3.5399825719650835e-05, + "logits/chosen": -0.2784597873687744, + "logits/rejected": -0.24438714981079102, + "logps/chosen": -0.00037131988210603595, + "logps/rejected": -2.662822723388672, + "loss": 1.0955, + "nll_loss": 0.2738625109195709, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.713199112098664e-05, + "rewards/margins": 0.26624512672424316, + "rewards/rejected": -0.26628226041793823, + "step": 8222 + }, + { + "epoch": 5.686721991701245, + "grad_norm": 8.467771530151367, + "learning_rate": 2.3962655601659754e-05, + "log_odds_chosen": 10.364076614379883, + "log_odds_ratio": -0.00023806243552826345, + "logits/chosen": -0.7695846557617188, + "logits/rejected": -0.7961927652359009, + "logps/chosen": -0.00040172922308556736, + "logps/rejected": -2.285043716430664, + "loss": 0.5618, + "nll_loss": 0.14042823016643524, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0172926674131304e-05, + "rewards/margins": 0.22846421599388123, + "rewards/rejected": -0.22850438952445984, + "step": 8223 + }, + { + "epoch": 5.687413554633472, + "grad_norm": 5.40841007232666, + "learning_rate": 2.3958813585369603e-05, + "log_odds_chosen": 9.848953247070312, + "log_odds_ratio": -0.00010321783338440582, + "logits/chosen": -0.3513679802417755, + "logits/rejected": -0.4765387177467346, + "logps/chosen": -0.0004271346260793507, + "logps/rejected": -1.7631869316101074, + "loss": 0.7031, + "nll_loss": 0.17576465010643005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.271346551831812e-05, + "rewards/margins": 0.17627596855163574, + "rewards/rejected": -0.1763186901807785, + "step": 8224 + }, + { + "epoch": 5.688105117565699, + "grad_norm": 4.302876949310303, + "learning_rate": 2.3954971569079452e-05, + "log_odds_chosen": 10.70728874206543, + "log_odds_ratio": -5.083745054434985e-05, + "logits/chosen": -0.33189859986305237, + "logits/rejected": -0.4616236984729767, + "logps/chosen": -0.00015526746574323624, + "logps/rejected": -2.0939488410949707, + "loss": 0.5166, + "nll_loss": 0.1291552633047104, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5526748029515147e-05, + "rewards/margins": 0.20937936007976532, + "rewards/rejected": -0.2093949019908905, + "step": 8225 + }, + { + "epoch": 5.6887966804979255, + "grad_norm": 5.944268703460693, + "learning_rate": 2.3951129552789305e-05, + "log_odds_chosen": 10.232353210449219, + "log_odds_ratio": -0.0002500133996363729, + "logits/chosen": -0.46743786334991455, + "logits/rejected": -0.5347243547439575, + "logps/chosen": -0.00020009730360470712, + "logps/rejected": -1.7333909273147583, + "loss": 0.7712, + "nll_loss": 0.19276997447013855, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.000972926907707e-05, + "rewards/margins": 0.17331908643245697, + "rewards/rejected": -0.1733390986919403, + "step": 8226 + }, + { + "epoch": 5.689488243430152, + "grad_norm": 10.213455200195312, + "learning_rate": 2.3947287536499154e-05, + "log_odds_chosen": 9.59089469909668, + "log_odds_ratio": -0.0028409319929778576, + "logits/chosen": -0.5655418038368225, + "logits/rejected": -0.6786340475082397, + "logps/chosen": -0.0021009810734540224, + "logps/rejected": -1.8002028465270996, + "loss": 0.6108, + "nll_loss": 0.15240830183029175, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00021009810734540224, + "rewards/margins": 0.17981019616127014, + "rewards/rejected": -0.1800202876329422, + "step": 8227 + }, + { + "epoch": 5.690179806362379, + "grad_norm": 9.121550559997559, + "learning_rate": 2.3943445520209006e-05, + "log_odds_chosen": 10.495368957519531, + "log_odds_ratio": -4.995317794964649e-05, + "logits/chosen": -0.7145277261734009, + "logits/rejected": -0.744437038898468, + "logps/chosen": -0.0002535338862799108, + "logps/rejected": -1.7618240118026733, + "loss": 0.5839, + "nll_loss": 0.14596140384674072, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.535338899178896e-05, + "rewards/margins": 0.17615705728530884, + "rewards/rejected": -0.17618238925933838, + "step": 8228 + }, + { + "epoch": 5.690871369294606, + "grad_norm": 6.346843242645264, + "learning_rate": 2.393960350391886e-05, + "log_odds_chosen": 10.27423095703125, + "log_odds_ratio": -7.538552745245397e-05, + "logits/chosen": -0.4880000352859497, + "logits/rejected": -0.5067594647407532, + "logps/chosen": -0.00036941375583410263, + "logps/rejected": -1.9501086473464966, + "loss": 0.6427, + "nll_loss": 0.16067828238010406, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.694137558341026e-05, + "rewards/margins": 0.1949739158153534, + "rewards/rejected": -0.19501087069511414, + "step": 8229 + }, + { + "epoch": 5.691562932226833, + "grad_norm": 7.4739484786987305, + "learning_rate": 2.3935761487628708e-05, + "log_odds_chosen": 10.826186180114746, + "log_odds_ratio": -3.0438441172009334e-05, + "logits/chosen": -0.5610729455947876, + "logits/rejected": -0.6000049710273743, + "logps/chosen": -0.0002225643111160025, + "logps/rejected": -2.1735219955444336, + "loss": 0.5268, + "nll_loss": 0.1317044347524643, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.225643038400449e-05, + "rewards/margins": 0.2173299342393875, + "rewards/rejected": -0.21735221147537231, + "step": 8230 + }, + { + "epoch": 5.69225449515906, + "grad_norm": 8.88032341003418, + "learning_rate": 2.393191947133856e-05, + "log_odds_chosen": 9.630340576171875, + "log_odds_ratio": -0.00022998328495305032, + "logits/chosen": -0.8728919625282288, + "logits/rejected": -0.9178202152252197, + "logps/chosen": -0.00024191971169784665, + "logps/rejected": -1.42867112159729, + "loss": 0.9837, + "nll_loss": 0.24591073393821716, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4191969714593142e-05, + "rewards/margins": 0.14284291863441467, + "rewards/rejected": -0.14286711812019348, + "step": 8231 + }, + { + "epoch": 5.6929460580912865, + "grad_norm": 8.377534866333008, + "learning_rate": 2.3928077455048413e-05, + "log_odds_chosen": 10.767512321472168, + "log_odds_ratio": -2.962576581921894e-05, + "logits/chosen": -0.4958113431930542, + "logits/rejected": -0.6360245943069458, + "logps/chosen": -0.00018656565225683153, + "logps/rejected": -2.044919729232788, + "loss": 1.079, + "nll_loss": 0.2697392702102661, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8656564861885272e-05, + "rewards/margins": 0.2044733464717865, + "rewards/rejected": -0.2044919729232788, + "step": 8232 + }, + { + "epoch": 5.693637621023513, + "grad_norm": 8.898459434509277, + "learning_rate": 2.3924235438758262e-05, + "log_odds_chosen": 11.142088890075684, + "log_odds_ratio": -8.624765905551612e-05, + "logits/chosen": -0.6141859889030457, + "logits/rejected": -0.613359272480011, + "logps/chosen": -0.00017005293921101838, + "logps/rejected": -2.2443342208862305, + "loss": 0.6842, + "nll_loss": 0.1710437536239624, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.700529537629336e-05, + "rewards/margins": 0.22441640496253967, + "rewards/rejected": -0.22443342208862305, + "step": 8233 + }, + { + "epoch": 5.69432918395574, + "grad_norm": 8.689697265625, + "learning_rate": 2.3920393422468114e-05, + "log_odds_chosen": 9.910636901855469, + "log_odds_ratio": -0.00013076815230306238, + "logits/chosen": -0.5209373235702515, + "logits/rejected": -0.7035219669342041, + "logps/chosen": -0.0003667376295197755, + "logps/rejected": -1.636942744255066, + "loss": 0.6499, + "nll_loss": 0.16246803104877472, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.667376222438179e-05, + "rewards/margins": 0.16365760564804077, + "rewards/rejected": -0.16369427740573883, + "step": 8234 + }, + { + "epoch": 5.695020746887967, + "grad_norm": 11.052765846252441, + "learning_rate": 2.3916551406177963e-05, + "log_odds_chosen": 10.682456970214844, + "log_odds_ratio": -4.524372343439609e-05, + "logits/chosen": -0.676239550113678, + "logits/rejected": -0.8159259557723999, + "logps/chosen": -0.00013625116844195873, + "logps/rejected": -1.5824081897735596, + "loss": 0.8183, + "nll_loss": 0.20456570386886597, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3625117389892694e-05, + "rewards/margins": 0.15822717547416687, + "rewards/rejected": -0.15824081003665924, + "step": 8235 + }, + { + "epoch": 5.695712309820194, + "grad_norm": 9.405180931091309, + "learning_rate": 2.3912709389887812e-05, + "log_odds_chosen": 11.243492126464844, + "log_odds_ratio": -1.532965325168334e-05, + "logits/chosen": -0.4327881336212158, + "logits/rejected": -0.6123930811882019, + "logps/chosen": -0.0012353898491710424, + "logps/rejected": -2.936528205871582, + "loss": 0.87, + "nll_loss": 0.21749062836170197, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012353899364825338, + "rewards/margins": 0.29352930188179016, + "rewards/rejected": -0.29365283250808716, + "step": 8236 + }, + { + "epoch": 5.696403872752421, + "grad_norm": 12.553094863891602, + "learning_rate": 2.3908867373597665e-05, + "log_odds_chosen": 6.747748851776123, + "log_odds_ratio": -0.231939896941185, + "logits/chosen": -0.6761006116867065, + "logits/rejected": -0.734649658203125, + "logps/chosen": -0.037190742790699005, + "logps/rejected": -1.02030611038208, + "loss": 1.3833, + "nll_loss": 0.32261955738067627, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003719074185937643, + "rewards/margins": 0.09831153601408005, + "rewards/rejected": -0.10203061252832413, + "step": 8237 + }, + { + "epoch": 5.6970954356846475, + "grad_norm": 5.807753086090088, + "learning_rate": 2.3905025357307517e-05, + "log_odds_chosen": 10.674144744873047, + "log_odds_ratio": -0.00039669463876634836, + "logits/chosen": -0.3693387508392334, + "logits/rejected": -0.5821853280067444, + "logps/chosen": -0.0008466947474516928, + "logps/rejected": -3.156538724899292, + "loss": 1.0424, + "nll_loss": 0.2605629563331604, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.466947474516928e-05, + "rewards/margins": 0.3155692219734192, + "rewards/rejected": -0.31565389037132263, + "step": 8238 + }, + { + "epoch": 5.697786998616874, + "grad_norm": 9.770577430725098, + "learning_rate": 2.3901183341017366e-05, + "log_odds_chosen": 10.510368347167969, + "log_odds_ratio": -4.034125231555663e-05, + "logits/chosen": -0.5765312910079956, + "logits/rejected": -0.7340801954269409, + "logps/chosen": -0.0008137425757013261, + "logps/rejected": -2.114612102508545, + "loss": 0.5254, + "nll_loss": 0.1313486248254776, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.137425174936652e-05, + "rewards/margins": 0.21137984097003937, + "rewards/rejected": -0.21146121621131897, + "step": 8239 + }, + { + "epoch": 5.698478561549101, + "grad_norm": 7.401946544647217, + "learning_rate": 2.389734132472722e-05, + "log_odds_chosen": 9.060220718383789, + "log_odds_ratio": -0.00040474371053278446, + "logits/chosen": -0.37978866696357727, + "logits/rejected": -0.4097916781902313, + "logps/chosen": -0.0008790887659415603, + "logps/rejected": -1.4040875434875488, + "loss": 0.8951, + "nll_loss": 0.22374194860458374, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.790888387011364e-05, + "rewards/margins": 0.14032085239887238, + "rewards/rejected": -0.14040875434875488, + "step": 8240 + }, + { + "epoch": 5.699170124481328, + "grad_norm": 7.6191277503967285, + "learning_rate": 2.389349930843707e-05, + "log_odds_chosen": 11.199530601501465, + "log_odds_ratio": -3.895882036886178e-05, + "logits/chosen": -0.3387885093688965, + "logits/rejected": -0.5780792236328125, + "logps/chosen": -0.00025989252026192844, + "logps/rejected": -2.5432190895080566, + "loss": 1.016, + "nll_loss": 0.2539976239204407, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5989251298597082e-05, + "rewards/margins": 0.2542959451675415, + "rewards/rejected": -0.2543219327926636, + "step": 8241 + }, + { + "epoch": 5.699861687413555, + "grad_norm": 5.339159965515137, + "learning_rate": 2.388965729214692e-05, + "log_odds_chosen": 11.320333480834961, + "log_odds_ratio": -7.026216917438433e-05, + "logits/chosen": -0.19970078766345978, + "logits/rejected": -0.21898649632930756, + "logps/chosen": -0.00022263993741944432, + "logps/rejected": -2.338035821914673, + "loss": 0.5952, + "nll_loss": 0.1487809121608734, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2263993741944432e-05, + "rewards/margins": 0.23378130793571472, + "rewards/rejected": -0.23380357027053833, + "step": 8242 + }, + { + "epoch": 5.700553250345782, + "grad_norm": 10.900066375732422, + "learning_rate": 2.3885815275856773e-05, + "log_odds_chosen": 10.417686462402344, + "log_odds_ratio": -8.494217763654888e-05, + "logits/chosen": -0.42949870228767395, + "logits/rejected": -0.45903027057647705, + "logps/chosen": -0.00041172222699970007, + "logps/rejected": -2.4149889945983887, + "loss": 0.875, + "nll_loss": 0.21874701976776123, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1172221244778484e-05, + "rewards/margins": 0.2414577305316925, + "rewards/rejected": -0.2414989024400711, + "step": 8243 + }, + { + "epoch": 5.7012448132780085, + "grad_norm": 8.390003204345703, + "learning_rate": 2.3881973259566622e-05, + "log_odds_chosen": 11.345580101013184, + "log_odds_ratio": -0.0001110144512495026, + "logits/chosen": -0.23835702240467072, + "logits/rejected": -0.4055521786212921, + "logps/chosen": -0.00022283539874479175, + "logps/rejected": -2.9258501529693604, + "loss": 1.1082, + "nll_loss": 0.27704259753227234, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2283540602074936e-05, + "rewards/margins": 0.29256272315979004, + "rewards/rejected": -0.29258501529693604, + "step": 8244 + }, + { + "epoch": 5.701936376210235, + "grad_norm": 8.030402183532715, + "learning_rate": 2.387813124327647e-05, + "log_odds_chosen": 9.59226131439209, + "log_odds_ratio": -0.0007807939546182752, + "logits/chosen": -0.3640380799770355, + "logits/rejected": -0.3463056683540344, + "logps/chosen": -0.0004926139954477549, + "logps/rejected": -1.7567813396453857, + "loss": 0.7978, + "nll_loss": 0.19936777651309967, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.926140536554158e-05, + "rewards/margins": 0.1756288707256317, + "rewards/rejected": -0.17567814886569977, + "step": 8245 + }, + { + "epoch": 5.702627939142462, + "grad_norm": 11.18429946899414, + "learning_rate": 2.3874289226986323e-05, + "log_odds_chosen": 10.842290878295898, + "log_odds_ratio": -3.7047873775009066e-05, + "logits/chosen": -0.0423475056886673, + "logits/rejected": -0.15324100852012634, + "logps/chosen": -0.00022486448870040476, + "logps/rejected": -2.397068977355957, + "loss": 0.7768, + "nll_loss": 0.19419454038143158, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2486448870040476e-05, + "rewards/margins": 0.23968440294265747, + "rewards/rejected": -0.23970690369606018, + "step": 8246 + }, + { + "epoch": 5.703319502074689, + "grad_norm": 12.94444751739502, + "learning_rate": 2.3870447210696176e-05, + "log_odds_chosen": 10.377115249633789, + "log_odds_ratio": -0.0001268679043278098, + "logits/chosen": -0.14172455668449402, + "logits/rejected": -0.20555134117603302, + "logps/chosen": -0.00018239016935694963, + "logps/rejected": -1.9219377040863037, + "loss": 0.5551, + "nll_loss": 0.138749897480011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8239019482280128e-05, + "rewards/margins": 0.19217553734779358, + "rewards/rejected": -0.19219377636909485, + "step": 8247 + }, + { + "epoch": 5.704011065006916, + "grad_norm": 7.755146503448486, + "learning_rate": 2.3866605194406025e-05, + "log_odds_chosen": 10.609502792358398, + "log_odds_ratio": -0.0001665048039285466, + "logits/chosen": -0.09565743803977966, + "logits/rejected": -0.1093120276927948, + "logps/chosen": -0.00014979354455135763, + "logps/rejected": -1.856912612915039, + "loss": 0.8389, + "nll_loss": 0.2096976339817047, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4979355000832584e-05, + "rewards/margins": 0.18567627668380737, + "rewards/rejected": -0.18569126725196838, + "step": 8248 + }, + { + "epoch": 5.704702627939143, + "grad_norm": 4.313599586486816, + "learning_rate": 2.3862763178115877e-05, + "log_odds_chosen": 10.78399658203125, + "log_odds_ratio": -8.890665776561946e-05, + "logits/chosen": -0.40093475580215454, + "logits/rejected": -0.5203821063041687, + "logps/chosen": -0.0001348076475551352, + "logps/rejected": -1.8851039409637451, + "loss": 0.4644, + "nll_loss": 0.116102434694767, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.34807651193114e-05, + "rewards/margins": 0.1884969174861908, + "rewards/rejected": -0.18851038813591003, + "step": 8249 + }, + { + "epoch": 5.7053941908713695, + "grad_norm": 4.284151554107666, + "learning_rate": 2.385892116182573e-05, + "log_odds_chosen": 9.743794441223145, + "log_odds_ratio": -0.00013424563803710043, + "logits/chosen": -0.0006545856595039368, + "logits/rejected": 0.04994429275393486, + "logps/chosen": -0.0003481783205643296, + "logps/rejected": -1.907433271408081, + "loss": 0.9276, + "nll_loss": 0.23188096284866333, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.481783642200753e-05, + "rewards/margins": 0.19070850312709808, + "rewards/rejected": -0.1907433271408081, + "step": 8250 + }, + { + "epoch": 5.706085753803596, + "grad_norm": 10.36178970336914, + "learning_rate": 2.385507914553558e-05, + "log_odds_chosen": 9.836285591125488, + "log_odds_ratio": -0.00011164380703121424, + "logits/chosen": -0.49485906958580017, + "logits/rejected": -0.6330108642578125, + "logps/chosen": -0.00021051692601758987, + "logps/rejected": -1.2412182092666626, + "loss": 0.7839, + "nll_loss": 0.19596289098262787, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.105169369315263e-05, + "rewards/margins": 0.12410077452659607, + "rewards/rejected": -0.12412182241678238, + "step": 8251 + }, + { + "epoch": 5.706777316735823, + "grad_norm": 6.942699909210205, + "learning_rate": 2.385123712924543e-05, + "log_odds_chosen": 8.719355583190918, + "log_odds_ratio": -0.00041597173549234867, + "logits/chosen": -0.5135871171951294, + "logits/rejected": -0.5049526691436768, + "logps/chosen": -0.0006854881066828966, + "logps/rejected": -1.5492440462112427, + "loss": 1.1859, + "nll_loss": 0.29642635583877563, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.854880484752357e-05, + "rewards/margins": 0.1548558473587036, + "rewards/rejected": -0.1549244076013565, + "step": 8252 + }, + { + "epoch": 5.70746887966805, + "grad_norm": 6.982730865478516, + "learning_rate": 2.384739511295528e-05, + "log_odds_chosen": 10.45567798614502, + "log_odds_ratio": -0.00011374377936590463, + "logits/chosen": -0.31738218665122986, + "logits/rejected": -0.371385395526886, + "logps/chosen": -0.00034341655555181205, + "logps/rejected": -2.1118595600128174, + "loss": 0.7938, + "nll_loss": 0.1984386444091797, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.434165409998968e-05, + "rewards/margins": 0.2111515998840332, + "rewards/rejected": -0.21118594706058502, + "step": 8253 + }, + { + "epoch": 5.708160442600277, + "grad_norm": 9.220938682556152, + "learning_rate": 2.384355309666513e-05, + "log_odds_chosen": 10.828641891479492, + "log_odds_ratio": -4.158323281444609e-05, + "logits/chosen": -0.21871013939380646, + "logits/rejected": -0.33926716446876526, + "logps/chosen": -0.000680235680192709, + "logps/rejected": -2.519893169403076, + "loss": 0.8333, + "nll_loss": 0.20831838250160217, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.802357529522851e-05, + "rewards/margins": 0.25192129611968994, + "rewards/rejected": -0.25198930501937866, + "step": 8254 + }, + { + "epoch": 5.708852005532504, + "grad_norm": 7.605830669403076, + "learning_rate": 2.383971108037498e-05, + "log_odds_chosen": 9.904296875, + "log_odds_ratio": -0.0009204599191434681, + "logits/chosen": -0.8972567319869995, + "logits/rejected": -0.9149336814880371, + "logps/chosen": -0.001136539620347321, + "logps/rejected": -1.9943983554840088, + "loss": 0.7697, + "nll_loss": 0.19232803583145142, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011365396494511515, + "rewards/margins": 0.19932620227336884, + "rewards/rejected": -0.1994398534297943, + "step": 8255 + }, + { + "epoch": 5.70954356846473, + "grad_norm": 11.07079792022705, + "learning_rate": 2.3835869064084834e-05, + "log_odds_chosen": 8.659165382385254, + "log_odds_ratio": -0.12604594230651855, + "logits/chosen": -0.6595401763916016, + "logits/rejected": -0.7154828310012817, + "logps/chosen": -0.019718142226338387, + "logps/rejected": -1.4792919158935547, + "loss": 0.9043, + "nll_loss": 0.2134617567062378, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0019718140829354525, + "rewards/margins": 0.14595738053321838, + "rewards/rejected": -0.14792919158935547, + "step": 8256 + }, + { + "epoch": 5.710235131396957, + "grad_norm": 8.167619705200195, + "learning_rate": 2.3832027047794683e-05, + "log_odds_chosen": 9.714550018310547, + "log_odds_ratio": -0.00034440302988514304, + "logits/chosen": -0.48485106229782104, + "logits/rejected": -0.5268182158470154, + "logps/chosen": -0.0009396661771461368, + "logps/rejected": -1.8895883560180664, + "loss": 0.7271, + "nll_loss": 0.18174050748348236, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.396662062499672e-05, + "rewards/margins": 0.18886488676071167, + "rewards/rejected": -0.18895885348320007, + "step": 8257 + }, + { + "epoch": 5.710926694329184, + "grad_norm": 15.704752922058105, + "learning_rate": 2.3828185031504536e-05, + "log_odds_chosen": 11.482129096984863, + "log_odds_ratio": -2.9630047720274888e-05, + "logits/chosen": -0.26579806208610535, + "logits/rejected": -0.2275182008743286, + "logps/chosen": -0.00010128448047908023, + "logps/rejected": -2.2797024250030518, + "loss": 1.0707, + "nll_loss": 0.2676713764667511, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0128448593604844e-05, + "rewards/margins": 0.22796010971069336, + "rewards/rejected": -0.22797025740146637, + "step": 8258 + }, + { + "epoch": 5.711618257261411, + "grad_norm": 5.285425662994385, + "learning_rate": 2.3824343015214385e-05, + "log_odds_chosen": 9.573975563049316, + "log_odds_ratio": -0.0006210988503880799, + "logits/chosen": -0.48438677191734314, + "logits/rejected": -0.5001848936080933, + "logps/chosen": -0.0025780226569622755, + "logps/rejected": -1.3919250965118408, + "loss": 0.6277, + "nll_loss": 0.15686482191085815, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002578022249508649, + "rewards/margins": 0.13893470168113708, + "rewards/rejected": -0.13919252157211304, + "step": 8259 + }, + { + "epoch": 5.712309820193638, + "grad_norm": 6.764977931976318, + "learning_rate": 2.3820500998924237e-05, + "log_odds_chosen": 10.443593978881836, + "log_odds_ratio": -7.994850602699444e-05, + "logits/chosen": -0.5136489868164062, + "logits/rejected": -0.5443819165229797, + "logps/chosen": -0.00024153507547453046, + "logps/rejected": -1.9305315017700195, + "loss": 0.9562, + "nll_loss": 0.23904123902320862, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.415350536466576e-05, + "rewards/margins": 0.19302898645401, + "rewards/rejected": -0.19305315613746643, + "step": 8260 + }, + { + "epoch": 5.713001383125865, + "grad_norm": 5.804108619689941, + "learning_rate": 2.381665898263409e-05, + "log_odds_chosen": 9.328962326049805, + "log_odds_ratio": -0.0003930005186703056, + "logits/chosen": -0.5408710837364197, + "logits/rejected": -0.6241236329078674, + "logps/chosen": -0.0005625694757327437, + "logps/rejected": -1.4583903551101685, + "loss": 0.7266, + "nll_loss": 0.1816195398569107, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.625695121125318e-05, + "rewards/margins": 0.14578276872634888, + "rewards/rejected": -0.14583903551101685, + "step": 8261 + }, + { + "epoch": 5.713692946058091, + "grad_norm": 8.310803413391113, + "learning_rate": 2.381281696634394e-05, + "log_odds_chosen": 8.8518648147583, + "log_odds_ratio": -0.0004088030837010592, + "logits/chosen": -0.7182442545890808, + "logits/rejected": -0.7691875100135803, + "logps/chosen": -0.0009055974660441279, + "logps/rejected": -1.6568962335586548, + "loss": 0.7927, + "nll_loss": 0.19813647866249084, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.055974805960432e-05, + "rewards/margins": 0.16559907793998718, + "rewards/rejected": -0.1656896471977234, + "step": 8262 + }, + { + "epoch": 5.714384508990318, + "grad_norm": 5.538098335266113, + "learning_rate": 2.3808974950053788e-05, + "log_odds_chosen": 9.040349960327148, + "log_odds_ratio": -0.00048364241956733167, + "logits/chosen": -0.5108079314231873, + "logits/rejected": -0.5672467947006226, + "logps/chosen": -0.0017460084054619074, + "logps/rejected": -1.8536648750305176, + "loss": 0.7697, + "nll_loss": 0.1923697143793106, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017460084927733988, + "rewards/margins": 0.18519188463687897, + "rewards/rejected": -0.18536648154258728, + "step": 8263 + }, + { + "epoch": 5.715076071922545, + "grad_norm": 4.487985134124756, + "learning_rate": 2.380513293376364e-05, + "log_odds_chosen": 9.942371368408203, + "log_odds_ratio": -0.0001654111547395587, + "logits/chosen": -0.5469893217086792, + "logits/rejected": -0.5883113145828247, + "logps/chosen": -0.000214441679418087, + "logps/rejected": -1.191213846206665, + "loss": 0.7364, + "nll_loss": 0.18408462405204773, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.14441679418087e-05, + "rewards/margins": 0.1190999448299408, + "rewards/rejected": -0.11912138015031815, + "step": 8264 + }, + { + "epoch": 5.715767634854772, + "grad_norm": 5.776425361633301, + "learning_rate": 2.380129091747349e-05, + "log_odds_chosen": 11.487066268920898, + "log_odds_ratio": -7.581858517369255e-05, + "logits/chosen": -0.38149914145469666, + "logits/rejected": -0.47518178820610046, + "logps/chosen": -0.00017071723414119333, + "logps/rejected": -2.243966579437256, + "loss": 0.73, + "nll_loss": 0.18249055743217468, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7071723050321452e-05, + "rewards/margins": 0.22437959909439087, + "rewards/rejected": -0.22439667582511902, + "step": 8265 + }, + { + "epoch": 5.716459197786999, + "grad_norm": 9.092817306518555, + "learning_rate": 2.379744890118334e-05, + "log_odds_chosen": 10.952812194824219, + "log_odds_ratio": -4.257060936652124e-05, + "logits/chosen": -0.6757196187973022, + "logits/rejected": -0.6657350063323975, + "logps/chosen": -0.001055280677974224, + "logps/rejected": -2.6038944721221924, + "loss": 0.964, + "nll_loss": 0.24098624289035797, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010552808089414611, + "rewards/margins": 0.2602839469909668, + "rewards/rejected": -0.2603894770145416, + "step": 8266 + }, + { + "epoch": 5.717150760719226, + "grad_norm": 5.761097431182861, + "learning_rate": 2.3793606884893194e-05, + "log_odds_chosen": 10.911550521850586, + "log_odds_ratio": -4.369751695776358e-05, + "logits/chosen": -0.5501573085784912, + "logits/rejected": -0.622515857219696, + "logps/chosen": -0.00017440738156437874, + "logps/rejected": -2.022120237350464, + "loss": 0.6955, + "nll_loss": 0.17388296127319336, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7440739611629397e-05, + "rewards/margins": 0.20219460129737854, + "rewards/rejected": -0.20221203565597534, + "step": 8267 + }, + { + "epoch": 5.717842323651452, + "grad_norm": 7.399377822875977, + "learning_rate": 2.3789764868603043e-05, + "log_odds_chosen": 8.654727935791016, + "log_odds_ratio": -0.0005356417968869209, + "logits/chosen": -0.3415067791938782, + "logits/rejected": -0.369626522064209, + "logps/chosen": -0.004428997170180082, + "logps/rejected": -1.865692138671875, + "loss": 0.8491, + "nll_loss": 0.21222692728042603, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00044289970537647605, + "rewards/margins": 0.18612630665302277, + "rewards/rejected": -0.1865692138671875, + "step": 8268 + }, + { + "epoch": 5.718533886583679, + "grad_norm": 8.111486434936523, + "learning_rate": 2.3785922852312896e-05, + "log_odds_chosen": 10.670412063598633, + "log_odds_ratio": -6.3931496697478e-05, + "logits/chosen": -0.624998152256012, + "logits/rejected": -0.6532515287399292, + "logps/chosen": -0.0002908039605244994, + "logps/rejected": -1.9806416034698486, + "loss": 0.8571, + "nll_loss": 0.21425631642341614, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9080396416247822e-05, + "rewards/margins": 0.1980350911617279, + "rewards/rejected": -0.1980641782283783, + "step": 8269 + }, + { + "epoch": 5.719225449515906, + "grad_norm": 8.084224700927734, + "learning_rate": 2.3782080836022748e-05, + "log_odds_chosen": 11.551338195800781, + "log_odds_ratio": -1.5403145880554803e-05, + "logits/chosen": -0.40487638115882874, + "logits/rejected": -0.5136086344718933, + "logps/chosen": -0.00013520754873752594, + "logps/rejected": -2.4729225635528564, + "loss": 0.6435, + "nll_loss": 0.16087886691093445, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3520754691853654e-05, + "rewards/margins": 0.24727874994277954, + "rewards/rejected": -0.24729228019714355, + "step": 8270 + }, + { + "epoch": 5.719917012448133, + "grad_norm": 7.687830924987793, + "learning_rate": 2.3778238819732597e-05, + "log_odds_chosen": 10.32638168334961, + "log_odds_ratio": -0.0001268651831196621, + "logits/chosen": -0.4124564230442047, + "logits/rejected": -0.46572574973106384, + "logps/chosen": -0.00041066654375754297, + "logps/rejected": -2.4294347763061523, + "loss": 0.5519, + "nll_loss": 0.13796168565750122, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1066658013733104e-05, + "rewards/margins": 0.24290242791175842, + "rewards/rejected": -0.24294348061084747, + "step": 8271 + }, + { + "epoch": 5.72060857538036, + "grad_norm": 6.698635578155518, + "learning_rate": 2.3774396803442446e-05, + "log_odds_chosen": 9.576118469238281, + "log_odds_ratio": -0.0013523140223696828, + "logits/chosen": -0.2256212830543518, + "logits/rejected": -0.3111118972301483, + "logps/chosen": -0.005982575472444296, + "logps/rejected": -2.017214775085449, + "loss": 0.5258, + "nll_loss": 0.13131293654441833, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005982575239613652, + "rewards/margins": 0.2011232078075409, + "rewards/rejected": -0.20172148942947388, + "step": 8272 + }, + { + "epoch": 5.7213001383125865, + "grad_norm": 8.149397850036621, + "learning_rate": 2.37705547871523e-05, + "log_odds_chosen": 10.184762954711914, + "log_odds_ratio": -0.00050318893045187, + "logits/chosen": -0.5884162187576294, + "logits/rejected": -0.6161515712738037, + "logps/chosen": -0.0006764706922695041, + "logps/rejected": -2.1969189643859863, + "loss": 0.6321, + "nll_loss": 0.15796729922294617, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.764707359252498e-05, + "rewards/margins": 0.21962425112724304, + "rewards/rejected": -0.2196919023990631, + "step": 8273 + }, + { + "epoch": 5.721991701244813, + "grad_norm": 5.989152431488037, + "learning_rate": 2.3766712770862148e-05, + "log_odds_chosen": 10.977209091186523, + "log_odds_ratio": -0.00010491662396816537, + "logits/chosen": -0.5030311346054077, + "logits/rejected": -0.5460888147354126, + "logps/chosen": -0.00025720163830555975, + "logps/rejected": -2.2601795196533203, + "loss": 0.5065, + "nll_loss": 0.12660832703113556, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5720162739162333e-05, + "rewards/margins": 0.22599226236343384, + "rewards/rejected": -0.22601798176765442, + "step": 8274 + }, + { + "epoch": 5.72268326417704, + "grad_norm": 7.687798023223877, + "learning_rate": 2.3762870754572e-05, + "log_odds_chosen": 11.152847290039062, + "log_odds_ratio": -8.137941040331498e-05, + "logits/chosen": -0.33489474654197693, + "logits/rejected": -0.3867482542991638, + "logps/chosen": -0.00017074895731639117, + "logps/rejected": -2.54575777053833, + "loss": 1.1304, + "nll_loss": 0.28258955478668213, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7074895367841236e-05, + "rewards/margins": 0.2545586824417114, + "rewards/rejected": -0.2545757591724396, + "step": 8275 + }, + { + "epoch": 5.723374827109267, + "grad_norm": 8.79166316986084, + "learning_rate": 2.3759028738281852e-05, + "log_odds_chosen": 10.480096817016602, + "log_odds_ratio": -7.668240141356364e-05, + "logits/chosen": -0.5375388264656067, + "logits/rejected": -0.5943214893341064, + "logps/chosen": -0.0003251215966884047, + "logps/rejected": -2.195033073425293, + "loss": 0.6961, + "nll_loss": 0.17401918768882751, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2512158213648945e-05, + "rewards/margins": 0.2194707840681076, + "rewards/rejected": -0.21950331330299377, + "step": 8276 + }, + { + "epoch": 5.724066390041494, + "grad_norm": 11.357304573059082, + "learning_rate": 2.37551867219917e-05, + "log_odds_chosen": 10.792548179626465, + "log_odds_ratio": -4.351916868472472e-05, + "logits/chosen": -0.5698350071907043, + "logits/rejected": -0.6279833316802979, + "logps/chosen": -0.0003448599891271442, + "logps/rejected": -2.2444381713867188, + "loss": 0.6254, + "nll_loss": 0.15633320808410645, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.448599818511866e-05, + "rewards/margins": 0.2244093418121338, + "rewards/rejected": -0.22444380819797516, + "step": 8277 + }, + { + "epoch": 5.724757952973721, + "grad_norm": 11.965065002441406, + "learning_rate": 2.3751344705701554e-05, + "log_odds_chosen": 10.649494171142578, + "log_odds_ratio": -3.594794179662131e-05, + "logits/chosen": -0.3620974123477936, + "logits/rejected": -0.45498672127723694, + "logps/chosen": -0.0004868964897468686, + "logps/rejected": -2.1574292182922363, + "loss": 0.4775, + "nll_loss": 0.11938130855560303, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.868964970228262e-05, + "rewards/margins": 0.21569424867630005, + "rewards/rejected": -0.21574293076992035, + "step": 8278 + }, + { + "epoch": 5.7254495159059475, + "grad_norm": 7.889781951904297, + "learning_rate": 2.3747502689411406e-05, + "log_odds_chosen": 9.396888732910156, + "log_odds_ratio": -0.0008098037214949727, + "logits/chosen": -0.6873564720153809, + "logits/rejected": -0.6436290144920349, + "logps/chosen": -0.0009592488058842719, + "logps/rejected": -1.727072834968567, + "loss": 0.8465, + "nll_loss": 0.21155577898025513, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.592487913323566e-05, + "rewards/margins": 0.17261135578155518, + "rewards/rejected": -0.17270728945732117, + "step": 8279 + }, + { + "epoch": 5.726141078838174, + "grad_norm": 14.263683319091797, + "learning_rate": 2.3743660673121255e-05, + "log_odds_chosen": 10.230692863464355, + "log_odds_ratio": -0.00189583795145154, + "logits/chosen": -0.6234297752380371, + "logits/rejected": -0.7451103925704956, + "logps/chosen": -0.013375808484852314, + "logps/rejected": -2.628518581390381, + "loss": 0.4403, + "nll_loss": 0.10988589376211166, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013375808484852314, + "rewards/margins": 0.261514276266098, + "rewards/rejected": -0.2628518342971802, + "step": 8280 + }, + { + "epoch": 5.726832641770401, + "grad_norm": 7.551321983337402, + "learning_rate": 2.3739818656831105e-05, + "log_odds_chosen": 9.866402626037598, + "log_odds_ratio": -0.0008807494305074215, + "logits/chosen": -0.5730447173118591, + "logits/rejected": -0.5540003776550293, + "logps/chosen": -0.000991776934824884, + "logps/rejected": -1.3968546390533447, + "loss": 0.439, + "nll_loss": 0.10965709388256073, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.917769057210535e-05, + "rewards/margins": 0.13958629965782166, + "rewards/rejected": -0.1396854668855667, + "step": 8281 + }, + { + "epoch": 5.727524204702628, + "grad_norm": 8.65972900390625, + "learning_rate": 2.3735976640540957e-05, + "log_odds_chosen": 10.859101295471191, + "log_odds_ratio": -6.727507570758462e-05, + "logits/chosen": -0.6840611696243286, + "logits/rejected": -0.6930698752403259, + "logps/chosen": -0.0003340707626193762, + "logps/rejected": -2.5840611457824707, + "loss": 0.6857, + "nll_loss": 0.17142435908317566, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3407079172320664e-05, + "rewards/margins": 0.2583727240562439, + "rewards/rejected": -0.2584061026573181, + "step": 8282 + }, + { + "epoch": 5.728215767634855, + "grad_norm": 15.318198204040527, + "learning_rate": 2.3732134624250806e-05, + "log_odds_chosen": 9.745034217834473, + "log_odds_ratio": -0.000523479888215661, + "logits/chosen": -0.7020419836044312, + "logits/rejected": -0.69991135597229, + "logps/chosen": -0.0028948565013706684, + "logps/rejected": -2.178514003753662, + "loss": 0.6398, + "nll_loss": 0.15990720689296722, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00028948564431630075, + "rewards/margins": 0.21756193041801453, + "rewards/rejected": -0.2178514152765274, + "step": 8283 + }, + { + "epoch": 5.728907330567082, + "grad_norm": 10.038471221923828, + "learning_rate": 2.372829260796066e-05, + "log_odds_chosen": 10.421679496765137, + "log_odds_ratio": -4.225273005431518e-05, + "logits/chosen": -0.6500394940376282, + "logits/rejected": -0.5979303121566772, + "logps/chosen": -0.00040729602915234864, + "logps/rejected": -2.4221601486206055, + "loss": 1.2355, + "nll_loss": 0.3088797926902771, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.072960655321367e-05, + "rewards/margins": 0.24217528104782104, + "rewards/rejected": -0.24221599102020264, + "step": 8284 + }, + { + "epoch": 5.7295988934993085, + "grad_norm": 7.296390533447266, + "learning_rate": 2.372445059167051e-05, + "log_odds_chosen": 10.552610397338867, + "log_odds_ratio": -4.067786721861921e-05, + "logits/chosen": -0.259600967168808, + "logits/rejected": -0.38736289739608765, + "logps/chosen": -0.00021202483912929893, + "logps/rejected": -2.1145904064178467, + "loss": 0.828, + "nll_loss": 0.20700691640377045, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1202484276727773e-05, + "rewards/margins": 0.21143783628940582, + "rewards/rejected": -0.21145904064178467, + "step": 8285 + }, + { + "epoch": 5.730290456431535, + "grad_norm": 9.419576644897461, + "learning_rate": 2.372060857538036e-05, + "log_odds_chosen": 9.820661544799805, + "log_odds_ratio": -0.0002437801013002172, + "logits/chosen": -0.41803741455078125, + "logits/rejected": -0.5837389826774597, + "logps/chosen": -0.0005225567147135735, + "logps/rejected": -1.7315198183059692, + "loss": 0.6794, + "nll_loss": 0.16982489824295044, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.225568020250648e-05, + "rewards/margins": 0.17309972643852234, + "rewards/rejected": -0.17315198481082916, + "step": 8286 + }, + { + "epoch": 5.730982019363762, + "grad_norm": 9.923052787780762, + "learning_rate": 2.3716766559090212e-05, + "log_odds_chosen": 11.267467498779297, + "log_odds_ratio": -2.611328636703547e-05, + "logits/chosen": -0.2683943212032318, + "logits/rejected": -0.3119713068008423, + "logps/chosen": -0.0002064039435936138, + "logps/rejected": -2.690495252609253, + "loss": 0.721, + "nll_loss": 0.1802428811788559, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.064039472315926e-05, + "rewards/margins": 0.269028902053833, + "rewards/rejected": -0.2690495252609253, + "step": 8287 + }, + { + "epoch": 5.731673582295989, + "grad_norm": 6.981366157531738, + "learning_rate": 2.3712924542800065e-05, + "log_odds_chosen": 10.127792358398438, + "log_odds_ratio": -0.0002459138340782374, + "logits/chosen": -0.10545951128005981, + "logits/rejected": -0.11636913567781448, + "logps/chosen": -0.0017972186906263232, + "logps/rejected": -2.027972459793091, + "loss": 0.7241, + "nll_loss": 0.18100659549236298, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017972187197301537, + "rewards/margins": 0.20261752605438232, + "rewards/rejected": -0.20279724895954132, + "step": 8288 + }, + { + "epoch": 5.732365145228216, + "grad_norm": 8.613247871398926, + "learning_rate": 2.3709082526509914e-05, + "log_odds_chosen": 10.950531959533691, + "log_odds_ratio": -6.212839798536152e-05, + "logits/chosen": -0.49585795402526855, + "logits/rejected": -0.5744040012359619, + "logps/chosen": -0.0002617794962134212, + "logps/rejected": -2.5418455600738525, + "loss": 1.2651, + "nll_loss": 0.3162631690502167, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6177949621342123e-05, + "rewards/margins": 0.2541584074497223, + "rewards/rejected": -0.2541845738887787, + "step": 8289 + }, + { + "epoch": 5.733056708160443, + "grad_norm": 5.971242427825928, + "learning_rate": 2.3705240510219763e-05, + "log_odds_chosen": 9.750470161437988, + "log_odds_ratio": -0.00015549581439699978, + "logits/chosen": -0.6675084829330444, + "logits/rejected": -0.6928970813751221, + "logps/chosen": -0.000278493738733232, + "logps/rejected": -1.3103184700012207, + "loss": 0.5449, + "nll_loss": 0.13620929419994354, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7849375328514725e-05, + "rewards/margins": 0.13100400567054749, + "rewards/rejected": -0.1310318410396576, + "step": 8290 + }, + { + "epoch": 5.7337482710926695, + "grad_norm": 9.22486400604248, + "learning_rate": 2.3701398493929615e-05, + "log_odds_chosen": 10.807173728942871, + "log_odds_ratio": -4.7663743316661566e-05, + "logits/chosen": -0.46042823791503906, + "logits/rejected": -0.4756005108356476, + "logps/chosen": -0.0004922206280753016, + "logps/rejected": -2.3511030673980713, + "loss": 0.9725, + "nll_loss": 0.24313212931156158, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.922206426272169e-05, + "rewards/margins": 0.23506109416484833, + "rewards/rejected": -0.2351103127002716, + "step": 8291 + }, + { + "epoch": 5.734439834024896, + "grad_norm": 7.93418550491333, + "learning_rate": 2.3697556477639464e-05, + "log_odds_chosen": 10.170130729675293, + "log_odds_ratio": -0.00011982273281319067, + "logits/chosen": -0.5709888935089111, + "logits/rejected": -0.5875977277755737, + "logps/chosen": -0.0004376860451884568, + "logps/rejected": -1.9982610940933228, + "loss": 0.6622, + "nll_loss": 0.16554518043994904, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3768603063654155e-05, + "rewards/margins": 0.1997823566198349, + "rewards/rejected": -0.19982610642910004, + "step": 8292 + }, + { + "epoch": 5.735131396957123, + "grad_norm": 10.620789527893066, + "learning_rate": 2.3693714461349317e-05, + "log_odds_chosen": 10.807987213134766, + "log_odds_ratio": -0.00010772298264782876, + "logits/chosen": -0.4360056519508362, + "logits/rejected": -0.5167050361633301, + "logps/chosen": -0.0010975584154948592, + "logps/rejected": -2.4652209281921387, + "loss": 0.5281, + "nll_loss": 0.13200679421424866, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010975583427352831, + "rewards/margins": 0.24641233682632446, + "rewards/rejected": -0.24652206897735596, + "step": 8293 + }, + { + "epoch": 5.73582295988935, + "grad_norm": 8.644923210144043, + "learning_rate": 2.368987244505917e-05, + "log_odds_chosen": 10.958145141601562, + "log_odds_ratio": -3.0786028219154105e-05, + "logits/chosen": -0.44990062713623047, + "logits/rejected": -0.5274561047554016, + "logps/chosen": -0.00019371551752556115, + "logps/rejected": -2.090069055557251, + "loss": 0.7833, + "nll_loss": 0.19581928849220276, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9371555026737042e-05, + "rewards/margins": 0.2089875340461731, + "rewards/rejected": -0.2090069055557251, + "step": 8294 + }, + { + "epoch": 5.736514522821577, + "grad_norm": 4.456380367279053, + "learning_rate": 2.368603042876902e-05, + "log_odds_chosen": 9.164673805236816, + "log_odds_ratio": -0.006682587321847677, + "logits/chosen": -0.15113608539104462, + "logits/rejected": -0.227360799908638, + "logps/chosen": -0.0032797495368868113, + "logps/rejected": -2.0092923641204834, + "loss": 0.9032, + "nll_loss": 0.22512412071228027, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00032797493622638285, + "rewards/margins": 0.2006012499332428, + "rewards/rejected": -0.20092923939228058, + "step": 8295 + }, + { + "epoch": 5.737206085753804, + "grad_norm": 7.534974098205566, + "learning_rate": 2.368218841247887e-05, + "log_odds_chosen": 10.547529220581055, + "log_odds_ratio": -0.0002148733037756756, + "logits/chosen": -0.44200295209884644, + "logits/rejected": -0.5098553895950317, + "logps/chosen": -0.0008662916370667517, + "logps/rejected": -2.1029608249664307, + "loss": 0.5069, + "nll_loss": 0.1267036497592926, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.662916661705822e-05, + "rewards/margins": 0.210209459066391, + "rewards/rejected": -0.21029609441757202, + "step": 8296 + }, + { + "epoch": 5.7378976486860305, + "grad_norm": 9.358753204345703, + "learning_rate": 2.3678346396188723e-05, + "log_odds_chosen": 10.221595764160156, + "log_odds_ratio": -0.0011085877195000648, + "logits/chosen": -0.2000870555639267, + "logits/rejected": -0.22448882460594177, + "logps/chosen": -0.0008927193703129888, + "logps/rejected": -1.9040881395339966, + "loss": 1.1827, + "nll_loss": 0.29555743932724, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.927193994168192e-05, + "rewards/margins": 0.19031953811645508, + "rewards/rejected": -0.19040879607200623, + "step": 8297 + }, + { + "epoch": 5.738589211618257, + "grad_norm": 7.413073539733887, + "learning_rate": 2.3674504379898572e-05, + "log_odds_chosen": 9.21097469329834, + "log_odds_ratio": -0.00043296560761518776, + "logits/chosen": -0.333681583404541, + "logits/rejected": -0.4281213879585266, + "logps/chosen": -0.0006029916112311184, + "logps/rejected": -1.3785467147827148, + "loss": 0.6211, + "nll_loss": 0.15522557497024536, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.029916039551608e-05, + "rewards/margins": 0.1377943903207779, + "rewards/rejected": -0.1378546804189682, + "step": 8298 + }, + { + "epoch": 5.739280774550484, + "grad_norm": 6.0836992263793945, + "learning_rate": 2.367066236360842e-05, + "log_odds_chosen": 10.23131275177002, + "log_odds_ratio": -0.00010292407387169078, + "logits/chosen": -0.3584192395210266, + "logits/rejected": -0.46633875370025635, + "logps/chosen": -0.0003847597981803119, + "logps/rejected": -1.8952624797821045, + "loss": 0.5546, + "nll_loss": 0.13863810896873474, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.847598054562695e-05, + "rewards/margins": 0.1894877552986145, + "rewards/rejected": -0.18952623009681702, + "step": 8299 + }, + { + "epoch": 5.739972337482711, + "grad_norm": 7.007051467895508, + "learning_rate": 2.3666820347318274e-05, + "log_odds_chosen": 9.6890869140625, + "log_odds_ratio": -0.00029695930425077677, + "logits/chosen": -0.7456952333450317, + "logits/rejected": -0.7626982927322388, + "logps/chosen": -0.011276169680058956, + "logps/rejected": -2.1853461265563965, + "loss": 0.7108, + "nll_loss": 0.17767959833145142, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011276170844212174, + "rewards/margins": 0.2174069881439209, + "rewards/rejected": -0.21853460371494293, + "step": 8300 + }, + { + "epoch": 5.740663900414938, + "grad_norm": 7.881728172302246, + "learning_rate": 2.3662978331028123e-05, + "log_odds_chosen": 10.861671447753906, + "log_odds_ratio": -8.425668784184381e-05, + "logits/chosen": -0.25384053587913513, + "logits/rejected": -0.31320399045944214, + "logps/chosen": -0.0002361613733228296, + "logps/rejected": -2.5008726119995117, + "loss": 0.601, + "nll_loss": 0.15024977922439575, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.361613769608084e-05, + "rewards/margins": 0.2500636577606201, + "rewards/rejected": -0.25008726119995117, + "step": 8301 + }, + { + "epoch": 5.741355463347165, + "grad_norm": 5.908812999725342, + "learning_rate": 2.3659136314737975e-05, + "log_odds_chosen": 11.111687660217285, + "log_odds_ratio": -3.3626787626417354e-05, + "logits/chosen": -0.4688529372215271, + "logits/rejected": -0.41200196743011475, + "logps/chosen": -0.00020868267165496945, + "logps/rejected": -2.5753095149993896, + "loss": 0.8032, + "nll_loss": 0.20080536603927612, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0868266801699065e-05, + "rewards/margins": 0.25751006603240967, + "rewards/rejected": -0.25753095746040344, + "step": 8302 + }, + { + "epoch": 5.7420470262793915, + "grad_norm": 8.06871509552002, + "learning_rate": 2.3655294298447828e-05, + "log_odds_chosen": 9.761555671691895, + "log_odds_ratio": -0.00042566165211610496, + "logits/chosen": -0.6165575981140137, + "logits/rejected": -0.6146994829177856, + "logps/chosen": -0.00041026706458069384, + "logps/rejected": -1.7203035354614258, + "loss": 0.7944, + "nll_loss": 0.19855532050132751, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1026702092494816e-05, + "rewards/margins": 0.1719893217086792, + "rewards/rejected": -0.17203034460544586, + "step": 8303 + }, + { + "epoch": 5.742738589211618, + "grad_norm": 8.098464012145996, + "learning_rate": 2.3651452282157677e-05, + "log_odds_chosen": 10.305068016052246, + "log_odds_ratio": -0.00018514314433559775, + "logits/chosen": -0.7303239107131958, + "logits/rejected": -0.6914010047912598, + "logps/chosen": -0.0005240870523266494, + "logps/rejected": -1.8838518857955933, + "loss": 0.5746, + "nll_loss": 0.14362111687660217, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.240870814304799e-05, + "rewards/margins": 0.18833279609680176, + "rewards/rejected": -0.18838520348072052, + "step": 8304 + }, + { + "epoch": 5.743430152143845, + "grad_norm": 8.639131546020508, + "learning_rate": 2.364761026586753e-05, + "log_odds_chosen": 11.053112983703613, + "log_odds_ratio": -3.3354688639519736e-05, + "logits/chosen": -0.3220217823982239, + "logits/rejected": -0.3667411208152771, + "logps/chosen": -0.00018558189913164824, + "logps/rejected": -2.0954384803771973, + "loss": 0.6619, + "nll_loss": 0.1654653400182724, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8558190276962705e-05, + "rewards/margins": 0.20952528715133667, + "rewards/rejected": -0.2095438539981842, + "step": 8305 + }, + { + "epoch": 5.744121715076072, + "grad_norm": 9.16308879852295, + "learning_rate": 2.3643768249577382e-05, + "log_odds_chosen": 9.564486503601074, + "log_odds_ratio": -0.00025545209064148366, + "logits/chosen": -0.4439648985862732, + "logits/rejected": -0.4105756878852844, + "logps/chosen": -0.0007857171585783362, + "logps/rejected": -1.612653136253357, + "loss": 1.0796, + "nll_loss": 0.2698758542537689, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.857171294745058e-05, + "rewards/margins": 0.16118673980236053, + "rewards/rejected": -0.1612653136253357, + "step": 8306 + }, + { + "epoch": 5.744813278008299, + "grad_norm": 9.436599731445312, + "learning_rate": 2.363992623328723e-05, + "log_odds_chosen": 10.715935707092285, + "log_odds_ratio": -3.4208103897981346e-05, + "logits/chosen": -0.6278020143508911, + "logits/rejected": -0.7652894258499146, + "logps/chosen": -0.0002282711793668568, + "logps/rejected": -1.8947447538375854, + "loss": 0.7806, + "nll_loss": 0.19513654708862305, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.282711648149416e-05, + "rewards/margins": 0.1894516497850418, + "rewards/rejected": -0.18947447836399078, + "step": 8307 + }, + { + "epoch": 5.745504840940526, + "grad_norm": 10.356780052185059, + "learning_rate": 2.363608421699708e-05, + "log_odds_chosen": 10.806076049804688, + "log_odds_ratio": -2.597077764221467e-05, + "logits/chosen": -0.5891835689544678, + "logits/rejected": -0.6955010294914246, + "logps/chosen": -0.00022716508829034865, + "logps/rejected": -2.125861167907715, + "loss": 1.0314, + "nll_loss": 0.2578585147857666, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2716509192832746e-05, + "rewards/margins": 0.2125634104013443, + "rewards/rejected": -0.2125861495733261, + "step": 8308 + }, + { + "epoch": 5.746196403872752, + "grad_norm": 6.441484451293945, + "learning_rate": 2.3632242200706932e-05, + "log_odds_chosen": 10.06994342803955, + "log_odds_ratio": -0.00023832148872315884, + "logits/chosen": -0.2060949206352234, + "logits/rejected": -0.24644814431667328, + "logps/chosen": -0.001094201346859336, + "logps/rejected": -2.2449793815612793, + "loss": 0.6008, + "nll_loss": 0.15017682313919067, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010942013614112511, + "rewards/margins": 0.2243885099887848, + "rewards/rejected": -0.2244979292154312, + "step": 8309 + }, + { + "epoch": 5.746887966804979, + "grad_norm": 6.610924243927002, + "learning_rate": 2.362840018441678e-05, + "log_odds_chosen": 10.011903762817383, + "log_odds_ratio": -7.450394332408905e-05, + "logits/chosen": -0.5421202182769775, + "logits/rejected": -0.48485058546066284, + "logps/chosen": -0.0002872304758056998, + "logps/rejected": -1.7974812984466553, + "loss": 0.9211, + "nll_loss": 0.23026490211486816, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8723048671963625e-05, + "rewards/margins": 0.17971941828727722, + "rewards/rejected": -0.17974814772605896, + "step": 8310 + }, + { + "epoch": 5.747579529737206, + "grad_norm": 7.050563812255859, + "learning_rate": 2.3624558168126634e-05, + "log_odds_chosen": 12.180794715881348, + "log_odds_ratio": -6.411512913473416e-06, + "logits/chosen": -0.4151165783405304, + "logits/rejected": -0.5160906910896301, + "logps/chosen": -0.0001374170824419707, + "logps/rejected": -2.9264304637908936, + "loss": 0.5877, + "nll_loss": 0.14693555235862732, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3741709153691772e-05, + "rewards/margins": 0.29262930154800415, + "rewards/rejected": -0.29264307022094727, + "step": 8311 + }, + { + "epoch": 5.748271092669433, + "grad_norm": 6.463709354400635, + "learning_rate": 2.3620716151836486e-05, + "log_odds_chosen": 9.185981750488281, + "log_odds_ratio": -0.010154437273740768, + "logits/chosen": -0.40779149532318115, + "logits/rejected": -0.3674390912055969, + "logps/chosen": -0.003874379675835371, + "logps/rejected": -1.2728424072265625, + "loss": 0.7317, + "nll_loss": 0.18190543353557587, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000387437961762771, + "rewards/margins": 0.12689681351184845, + "rewards/rejected": -0.12728425860404968, + "step": 8312 + }, + { + "epoch": 5.74896265560166, + "grad_norm": 6.199563980102539, + "learning_rate": 2.3616874135546335e-05, + "log_odds_chosen": 10.591329574584961, + "log_odds_ratio": -9.259778744308278e-05, + "logits/chosen": -0.5904867053031921, + "logits/rejected": -0.6726003885269165, + "logps/chosen": -0.0002847728901542723, + "logps/rejected": -2.216808557510376, + "loss": 0.5315, + "nll_loss": 0.13287466764450073, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8477286832639948e-05, + "rewards/margins": 0.22165238857269287, + "rewards/rejected": -0.22168086469173431, + "step": 8313 + }, + { + "epoch": 5.749654218533887, + "grad_norm": 7.432782173156738, + "learning_rate": 2.3613032119256188e-05, + "log_odds_chosen": 9.365463256835938, + "log_odds_ratio": -0.00235193083062768, + "logits/chosen": -0.6185011267662048, + "logits/rejected": -0.6692292094230652, + "logps/chosen": -0.0015238788910210133, + "logps/rejected": -1.8370863199234009, + "loss": 0.7764, + "nll_loss": 0.19387227296829224, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001523878745501861, + "rewards/margins": 0.18355624377727509, + "rewards/rejected": -0.18370862305164337, + "step": 8314 + }, + { + "epoch": 5.750345781466113, + "grad_norm": 9.817612648010254, + "learning_rate": 2.360919010296604e-05, + "log_odds_chosen": 10.95273494720459, + "log_odds_ratio": -6.1401427956298e-05, + "logits/chosen": -0.7160186767578125, + "logits/rejected": -0.801353931427002, + "logps/chosen": -0.00029561432893387973, + "logps/rejected": -2.404662847518921, + "loss": 0.7411, + "nll_loss": 0.18526718020439148, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9561435439973138e-05, + "rewards/margins": 0.24043673276901245, + "rewards/rejected": -0.24046629667282104, + "step": 8315 + }, + { + "epoch": 5.75103734439834, + "grad_norm": 6.344141960144043, + "learning_rate": 2.360534808667589e-05, + "log_odds_chosen": 9.526826858520508, + "log_odds_ratio": -0.00014862409443594515, + "logits/chosen": -0.5527998208999634, + "logits/rejected": -0.6022185683250427, + "logps/chosen": -0.0007526268600486219, + "logps/rejected": -1.5950901508331299, + "loss": 0.9076, + "nll_loss": 0.22688600420951843, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.526268746005371e-05, + "rewards/margins": 0.1594337522983551, + "rewards/rejected": -0.15950901806354523, + "step": 8316 + }, + { + "epoch": 5.751728907330567, + "grad_norm": 9.710481643676758, + "learning_rate": 2.3601506070385738e-05, + "log_odds_chosen": 10.60885238647461, + "log_odds_ratio": -4.971360249328427e-05, + "logits/chosen": -0.39266735315322876, + "logits/rejected": -0.5182227492332458, + "logps/chosen": -0.00034685549326241016, + "logps/rejected": -2.6334073543548584, + "loss": 0.7297, + "nll_loss": 0.18243081867694855, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.468555223662406e-05, + "rewards/margins": 0.2633060812950134, + "rewards/rejected": -0.2633407413959503, + "step": 8317 + }, + { + "epoch": 5.752420470262794, + "grad_norm": 6.257876396179199, + "learning_rate": 2.359766405409559e-05, + "log_odds_chosen": 9.356744766235352, + "log_odds_ratio": -0.0002441833494231105, + "logits/chosen": -0.6548734307289124, + "logits/rejected": -0.7048206925392151, + "logps/chosen": -0.0006509888335131109, + "logps/rejected": -1.4409332275390625, + "loss": 0.4796, + "nll_loss": 0.11986897885799408, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.509888771688566e-05, + "rewards/margins": 0.14402823150157928, + "rewards/rejected": -0.1440933346748352, + "step": 8318 + }, + { + "epoch": 5.753112033195021, + "grad_norm": 5.713196754455566, + "learning_rate": 2.359382203780544e-05, + "log_odds_chosen": 10.923623085021973, + "log_odds_ratio": -4.0328122850041837e-05, + "logits/chosen": 0.05549601465463638, + "logits/rejected": 0.0023378878831863403, + "logps/chosen": -0.0001473123993491754, + "logps/rejected": -2.0698325634002686, + "loss": 1.0181, + "nll_loss": 0.25452303886413574, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4731239389220718e-05, + "rewards/margins": 0.2069685459136963, + "rewards/rejected": -0.2069832682609558, + "step": 8319 + }, + { + "epoch": 5.753803596127248, + "grad_norm": 9.718740463256836, + "learning_rate": 2.3589980021515292e-05, + "log_odds_chosen": 9.84650993347168, + "log_odds_ratio": -0.00020443143148440868, + "logits/chosen": -0.5786601305007935, + "logits/rejected": -0.5279185771942139, + "logps/chosen": -0.0007319730357266963, + "logps/rejected": -1.9794467687606812, + "loss": 1.0748, + "nll_loss": 0.2686849534511566, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.319729775190353e-05, + "rewards/margins": 0.19787146151065826, + "rewards/rejected": -0.19794468581676483, + "step": 8320 + }, + { + "epoch": 5.754495159059474, + "grad_norm": 7.701869964599609, + "learning_rate": 2.3586138005225145e-05, + "log_odds_chosen": 10.897615432739258, + "log_odds_ratio": -5.009109008824453e-05, + "logits/chosen": -0.20346534252166748, + "logits/rejected": -0.18375641107559204, + "logps/chosen": -0.00037128583062440157, + "logps/rejected": -2.7812089920043945, + "loss": 1.3457, + "nll_loss": 0.33642303943634033, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.71285859728232e-05, + "rewards/margins": 0.27808377146720886, + "rewards/rejected": -0.27812087535858154, + "step": 8321 + }, + { + "epoch": 5.755186721991701, + "grad_norm": 9.211677551269531, + "learning_rate": 2.3582295988934994e-05, + "log_odds_chosen": 9.959250450134277, + "log_odds_ratio": -0.00010035712330136448, + "logits/chosen": -0.5219177007675171, + "logits/rejected": -0.46816885471343994, + "logps/chosen": -0.0002476215595379472, + "logps/rejected": -1.614699363708496, + "loss": 0.9682, + "nll_loss": 0.2420455813407898, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4762153771007434e-05, + "rewards/margins": 0.16144520044326782, + "rewards/rejected": -0.161469966173172, + "step": 8322 + }, + { + "epoch": 5.755878284923928, + "grad_norm": 8.527338981628418, + "learning_rate": 2.3578453972644846e-05, + "log_odds_chosen": 10.829414367675781, + "log_odds_ratio": -2.493345527909696e-05, + "logits/chosen": -0.22919800877571106, + "logits/rejected": -0.3545800447463989, + "logps/chosen": -0.00017396353359799832, + "logps/rejected": -2.0452499389648438, + "loss": 0.6785, + "nll_loss": 0.16962985694408417, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7396354451193474e-05, + "rewards/margins": 0.20450758934020996, + "rewards/rejected": -0.20452499389648438, + "step": 8323 + }, + { + "epoch": 5.756569847856155, + "grad_norm": 14.861743927001953, + "learning_rate": 2.35746119563547e-05, + "log_odds_chosen": 9.110204696655273, + "log_odds_ratio": -0.0011775546008720994, + "logits/chosen": -0.42979809641838074, + "logits/rejected": -0.4159172773361206, + "logps/chosen": -0.0021905952598899603, + "logps/rejected": -1.6844552755355835, + "loss": 0.8767, + "nll_loss": 0.21905720233917236, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00021905952598899603, + "rewards/margins": 0.1682264804840088, + "rewards/rejected": -0.16844552755355835, + "step": 8324 + }, + { + "epoch": 5.757261410788382, + "grad_norm": 7.052018642425537, + "learning_rate": 2.3570769940064548e-05, + "log_odds_chosen": 10.287298202514648, + "log_odds_ratio": -0.0001158514860435389, + "logits/chosen": -0.0011706706136465073, + "logits/rejected": -0.10996723920106888, + "logps/chosen": -0.0002825699921231717, + "logps/rejected": -2.024381399154663, + "loss": 0.8779, + "nll_loss": 0.2194562554359436, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8256998120923527e-05, + "rewards/margins": 0.20240987837314606, + "rewards/rejected": -0.2024381309747696, + "step": 8325 + }, + { + "epoch": 5.7579529737206085, + "grad_norm": 6.528735160827637, + "learning_rate": 2.3566927923774397e-05, + "log_odds_chosen": 10.121028900146484, + "log_odds_ratio": -0.0001131187891587615, + "logits/chosen": -0.5546838641166687, + "logits/rejected": -0.48821526765823364, + "logps/chosen": -0.0006948888767510653, + "logps/rejected": -2.0356459617614746, + "loss": 0.3869, + "nll_loss": 0.09670525789260864, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.948888767510653e-05, + "rewards/margins": 0.2034951150417328, + "rewards/rejected": -0.2035646140575409, + "step": 8326 + }, + { + "epoch": 5.758644536652835, + "grad_norm": 9.359241485595703, + "learning_rate": 2.356308590748425e-05, + "log_odds_chosen": 10.889639854431152, + "log_odds_ratio": -4.8169935325859115e-05, + "logits/chosen": -0.1331821084022522, + "logits/rejected": -0.2709449529647827, + "logps/chosen": -0.0002415928611299023, + "logps/rejected": -2.5324788093566895, + "loss": 0.877, + "nll_loss": 0.21924549341201782, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4159287931979634e-05, + "rewards/margins": 0.253223717212677, + "rewards/rejected": -0.25324785709381104, + "step": 8327 + }, + { + "epoch": 5.759336099585062, + "grad_norm": 10.904271125793457, + "learning_rate": 2.3559243891194098e-05, + "log_odds_chosen": 11.361100196838379, + "log_odds_ratio": -2.6015641196863726e-05, + "logits/chosen": -0.5011187195777893, + "logits/rejected": -0.5023812055587769, + "logps/chosen": -0.0006036162376403809, + "logps/rejected": -3.1411614418029785, + "loss": 0.9301, + "nll_loss": 0.2325301468372345, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.036162449163385e-05, + "rewards/margins": 0.31405580043792725, + "rewards/rejected": -0.31411615014076233, + "step": 8328 + }, + { + "epoch": 5.760027662517289, + "grad_norm": 5.806125164031982, + "learning_rate": 2.355540187490395e-05, + "log_odds_chosen": 9.143113136291504, + "log_odds_ratio": -0.00015040890139061958, + "logits/chosen": -0.3005862534046173, + "logits/rejected": -0.32919326424598694, + "logps/chosen": -0.0002952023351099342, + "logps/rejected": -1.053991436958313, + "loss": 0.842, + "nll_loss": 0.21049299836158752, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.952023351099342e-05, + "rewards/margins": 0.10536962747573853, + "rewards/rejected": -0.10539913922548294, + "step": 8329 + }, + { + "epoch": 5.760719225449516, + "grad_norm": 6.955360412597656, + "learning_rate": 2.35515598586138e-05, + "log_odds_chosen": 9.990994453430176, + "log_odds_ratio": -0.00024183614004869014, + "logits/chosen": -0.36879050731658936, + "logits/rejected": -0.40905478596687317, + "logps/chosen": -0.0004077651828993112, + "logps/rejected": -1.8470849990844727, + "loss": 1.0752, + "nll_loss": 0.268771231174469, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0776521927909926e-05, + "rewards/margins": 0.18466772139072418, + "rewards/rejected": -0.18470850586891174, + "step": 8330 + }, + { + "epoch": 5.761410788381743, + "grad_norm": 5.922750473022461, + "learning_rate": 2.3547717842323652e-05, + "log_odds_chosen": 11.753957748413086, + "log_odds_ratio": -2.4064009267021902e-05, + "logits/chosen": -0.8286944031715393, + "logits/rejected": -0.8924739360809326, + "logps/chosen": -0.00015205403906293213, + "logps/rejected": -2.853480577468872, + "loss": 0.4743, + "nll_loss": 0.11856278777122498, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5205403542495333e-05, + "rewards/margins": 0.2853328287601471, + "rewards/rejected": -0.2853480577468872, + "step": 8331 + }, + { + "epoch": 5.7621023513139695, + "grad_norm": 7.735132217407227, + "learning_rate": 2.3543875826033505e-05, + "log_odds_chosen": 11.60269546508789, + "log_odds_ratio": -4.3830594222527e-05, + "logits/chosen": -0.26348862051963806, + "logits/rejected": -0.32002776861190796, + "logps/chosen": -0.00020181875152047724, + "logps/rejected": -2.8781893253326416, + "loss": 0.6063, + "nll_loss": 0.15156348049640656, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0181880245218053e-05, + "rewards/margins": 0.28779876232147217, + "rewards/rejected": -0.28781893849372864, + "step": 8332 + }, + { + "epoch": 5.762793914246196, + "grad_norm": 10.662870407104492, + "learning_rate": 2.3540033809743354e-05, + "log_odds_chosen": 9.17403507232666, + "log_odds_ratio": -0.006309689488261938, + "logits/chosen": -0.21587823331356049, + "logits/rejected": -0.20806483924388885, + "logps/chosen": -0.0029637019615620375, + "logps/rejected": -1.7408331632614136, + "loss": 0.7234, + "nll_loss": 0.18022409081459045, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00029637018451467156, + "rewards/margins": 0.1737869530916214, + "rewards/rejected": -0.17408332228660583, + "step": 8333 + }, + { + "epoch": 5.763485477178423, + "grad_norm": 7.98430871963501, + "learning_rate": 2.3536191793453206e-05, + "log_odds_chosen": 10.42989730834961, + "log_odds_ratio": -0.00014048349112272263, + "logits/chosen": -0.5470676422119141, + "logits/rejected": -0.7323271632194519, + "logps/chosen": -0.00022380598238669336, + "logps/rejected": -1.730640172958374, + "loss": 1.0507, + "nll_loss": 0.26266804337501526, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2380598238669336e-05, + "rewards/margins": 0.17304162681102753, + "rewards/rejected": -0.17306400835514069, + "step": 8334 + }, + { + "epoch": 5.76417704011065, + "grad_norm": 8.684687614440918, + "learning_rate": 2.353234977716306e-05, + "log_odds_chosen": 10.68388557434082, + "log_odds_ratio": -6.952737021492794e-05, + "logits/chosen": -0.6682871580123901, + "logits/rejected": -0.6703358888626099, + "logps/chosen": -0.0003899956354871392, + "logps/rejected": -2.3728811740875244, + "loss": 0.8976, + "nll_loss": 0.22438865900039673, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.899956573150121e-05, + "rewards/margins": 0.23724913597106934, + "rewards/rejected": -0.23728811740875244, + "step": 8335 + }, + { + "epoch": 5.764868603042877, + "grad_norm": 5.415703773498535, + "learning_rate": 2.3528507760872908e-05, + "log_odds_chosen": 9.89802360534668, + "log_odds_ratio": -0.0012788517633453012, + "logits/chosen": -0.927123486995697, + "logits/rejected": -0.8529543280601501, + "logps/chosen": -0.0020643535535782576, + "logps/rejected": -1.9083293676376343, + "loss": 1.323, + "nll_loss": 0.33061519265174866, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00020643536117859185, + "rewards/margins": 0.19062651693820953, + "rewards/rejected": -0.1908329427242279, + "step": 8336 + }, + { + "epoch": 5.765560165975104, + "grad_norm": 14.877506256103516, + "learning_rate": 2.3524665744582757e-05, + "log_odds_chosen": 11.388561248779297, + "log_odds_ratio": -2.0636227418435737e-05, + "logits/chosen": -1.0710829496383667, + "logits/rejected": -1.0672526359558105, + "logps/chosen": -0.00018354799249209464, + "logps/rejected": -2.684891700744629, + "loss": 0.9869, + "nll_loss": 0.24672679603099823, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8354799976805225e-05, + "rewards/margins": 0.268470823764801, + "rewards/rejected": -0.26848918199539185, + "step": 8337 + }, + { + "epoch": 5.7662517289073305, + "grad_norm": 6.486876964569092, + "learning_rate": 2.352082372829261e-05, + "log_odds_chosen": 10.3418550491333, + "log_odds_ratio": -4.532843013294041e-05, + "logits/chosen": -0.5621716976165771, + "logits/rejected": -0.5832915306091309, + "logps/chosen": -0.0004285993636585772, + "logps/rejected": -2.2560110092163086, + "loss": 0.5395, + "nll_loss": 0.13485826551914215, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2859934183070436e-05, + "rewards/margins": 0.22555822134017944, + "rewards/rejected": -0.22560109198093414, + "step": 8338 + }, + { + "epoch": 5.766943291839557, + "grad_norm": 8.831564903259277, + "learning_rate": 2.3516981712002458e-05, + "log_odds_chosen": 9.910039901733398, + "log_odds_ratio": -0.0002273163408972323, + "logits/chosen": -0.9114158153533936, + "logits/rejected": -0.9666690826416016, + "logps/chosen": -0.010054264217615128, + "logps/rejected": -2.3426384925842285, + "loss": 0.7889, + "nll_loss": 0.1972012221813202, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010054263984784484, + "rewards/margins": 0.2332584261894226, + "rewards/rejected": -0.2342638522386551, + "step": 8339 + }, + { + "epoch": 5.767634854771784, + "grad_norm": 12.703185081481934, + "learning_rate": 2.351313969571231e-05, + "log_odds_chosen": 10.88131332397461, + "log_odds_ratio": -0.00015934542170725763, + "logits/chosen": -0.9918793439865112, + "logits/rejected": -1.0424768924713135, + "logps/chosen": -0.00022023438941687346, + "logps/rejected": -2.2530806064605713, + "loss": 0.983, + "nll_loss": 0.2457437366247177, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2023437850293703e-05, + "rewards/margins": 0.22528605163097382, + "rewards/rejected": -0.22530806064605713, + "step": 8340 + }, + { + "epoch": 5.768326417704011, + "grad_norm": 11.430049896240234, + "learning_rate": 2.3509297679422163e-05, + "log_odds_chosen": 10.878573417663574, + "log_odds_ratio": -3.2517738873139024e-05, + "logits/chosen": -0.5243549942970276, + "logits/rejected": -0.507844090461731, + "logps/chosen": -0.00019536682520993054, + "logps/rejected": -2.2270255088806152, + "loss": 0.7372, + "nll_loss": 0.18429552018642426, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9536684703780338e-05, + "rewards/margins": 0.22268301248550415, + "rewards/rejected": -0.22270256280899048, + "step": 8341 + }, + { + "epoch": 5.769017980636238, + "grad_norm": 7.633252143859863, + "learning_rate": 2.3505455663132012e-05, + "log_odds_chosen": 10.390115737915039, + "log_odds_ratio": -0.000257474574027583, + "logits/chosen": -0.5723543763160706, + "logits/rejected": -0.537143349647522, + "logps/chosen": -0.0002853441401384771, + "logps/rejected": -2.1401093006134033, + "loss": 1.1687, + "nll_loss": 0.29213932156562805, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8534417651826516e-05, + "rewards/margins": 0.21398240327835083, + "rewards/rejected": -0.21401095390319824, + "step": 8342 + }, + { + "epoch": 5.769709543568465, + "grad_norm": 5.9149169921875, + "learning_rate": 2.3501613646841865e-05, + "log_odds_chosen": 10.216432571411133, + "log_odds_ratio": -6.506162026198581e-05, + "logits/chosen": -0.3109501898288727, + "logits/rejected": -0.35374629497528076, + "logps/chosen": -0.00036072812508791685, + "logps/rejected": -1.8220794200897217, + "loss": 0.5343, + "nll_loss": 0.13356661796569824, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.607281541917473e-05, + "rewards/margins": 0.18217185139656067, + "rewards/rejected": -0.18220794200897217, + "step": 8343 + }, + { + "epoch": 5.7704011065006915, + "grad_norm": 6.765550136566162, + "learning_rate": 2.3497771630551717e-05, + "log_odds_chosen": 10.58395767211914, + "log_odds_ratio": -0.00010712833318393677, + "logits/chosen": -0.22515490651130676, + "logits/rejected": -0.4198504090309143, + "logps/chosen": -0.00018988562806043774, + "logps/rejected": -1.9974274635314941, + "loss": 0.6781, + "nll_loss": 0.16950182616710663, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8988564988831058e-05, + "rewards/margins": 0.1997237652540207, + "rewards/rejected": -0.19974276423454285, + "step": 8344 + }, + { + "epoch": 5.771092669432918, + "grad_norm": 6.1564154624938965, + "learning_rate": 2.3493929614261566e-05, + "log_odds_chosen": 9.40694522857666, + "log_odds_ratio": -0.00018372457998339087, + "logits/chosen": -0.5075855851173401, + "logits/rejected": -0.5839619040489197, + "logps/chosen": -0.000857932900544256, + "logps/rejected": -1.6731497049331665, + "loss": 0.7596, + "nll_loss": 0.1898796111345291, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.579329733038321e-05, + "rewards/margins": 0.16722919046878815, + "rewards/rejected": -0.16731497645378113, + "step": 8345 + }, + { + "epoch": 5.771784232365145, + "grad_norm": 13.756538391113281, + "learning_rate": 2.3490087597971415e-05, + "log_odds_chosen": 10.903304100036621, + "log_odds_ratio": -6.926347123226151e-05, + "logits/chosen": -0.8468804359436035, + "logits/rejected": -0.880153238773346, + "logps/chosen": -0.00027108131325803697, + "logps/rejected": -2.4493632316589355, + "loss": 0.8235, + "nll_loss": 0.2058658003807068, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7108131689601578e-05, + "rewards/margins": 0.24490921199321747, + "rewards/rejected": -0.24493633210659027, + "step": 8346 + }, + { + "epoch": 5.772475795297372, + "grad_norm": 5.244549751281738, + "learning_rate": 2.3486245581681268e-05, + "log_odds_chosen": 8.939018249511719, + "log_odds_ratio": -0.0004187691956758499, + "logits/chosen": -0.5194487571716309, + "logits/rejected": -0.6112475395202637, + "logps/chosen": -0.0007814795826561749, + "logps/rejected": -1.6546971797943115, + "loss": 0.8936, + "nll_loss": 0.2233474999666214, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.814796117600054e-05, + "rewards/margins": 0.16539156436920166, + "rewards/rejected": -0.1654697060585022, + "step": 8347 + }, + { + "epoch": 5.773167358229599, + "grad_norm": 6.745221138000488, + "learning_rate": 2.3482403565391117e-05, + "log_odds_chosen": 10.619478225708008, + "log_odds_ratio": -7.211588672362268e-05, + "logits/chosen": -0.386459618806839, + "logits/rejected": -0.5498947501182556, + "logps/chosen": -0.00022541302314493805, + "logps/rejected": -1.8751081228256226, + "loss": 0.9235, + "nll_loss": 0.23086822032928467, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.25413004955044e-05, + "rewards/margins": 0.18748828768730164, + "rewards/rejected": -0.18751080334186554, + "step": 8348 + }, + { + "epoch": 5.773858921161826, + "grad_norm": 8.773056030273438, + "learning_rate": 2.347856154910097e-05, + "log_odds_chosen": 11.348367691040039, + "log_odds_ratio": -5.642771429847926e-05, + "logits/chosen": 0.013702712953090668, + "logits/rejected": -0.09357339143753052, + "logps/chosen": -0.00038130112807266414, + "logps/rejected": -2.4233832359313965, + "loss": 1.7573, + "nll_loss": 0.43933171033859253, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.813011062447913e-05, + "rewards/margins": 0.24230018258094788, + "rewards/rejected": -0.24233832955360413, + "step": 8349 + }, + { + "epoch": 5.7745504840940525, + "grad_norm": 7.7265143394470215, + "learning_rate": 2.347471953281082e-05, + "log_odds_chosen": 10.363457679748535, + "log_odds_ratio": -0.00023995987430680543, + "logits/chosen": -0.232026606798172, + "logits/rejected": -0.28352096676826477, + "logps/chosen": -0.005336123518645763, + "logps/rejected": -2.446638584136963, + "loss": 0.6538, + "nll_loss": 0.16343380510807037, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005336122703738511, + "rewards/margins": 0.24413025379180908, + "rewards/rejected": -0.24466384947299957, + "step": 8350 + }, + { + "epoch": 5.775242047026279, + "grad_norm": 9.928838729858398, + "learning_rate": 2.347087751652067e-05, + "log_odds_chosen": 9.737348556518555, + "log_odds_ratio": -0.00023867032723501325, + "logits/chosen": -0.06464526057243347, + "logits/rejected": -0.08520027995109558, + "logps/chosen": -0.0003639034694060683, + "logps/rejected": -1.6743927001953125, + "loss": 0.6192, + "nll_loss": 0.15477892756462097, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.639034548541531e-05, + "rewards/margins": 0.16740287840366364, + "rewards/rejected": -0.167439267039299, + "step": 8351 + }, + { + "epoch": 5.775933609958506, + "grad_norm": 10.34325885772705, + "learning_rate": 2.3467035500230523e-05, + "log_odds_chosen": 11.383646011352539, + "log_odds_ratio": -2.3865615730755962e-05, + "logits/chosen": -0.4863215684890747, + "logits/rejected": -0.634797990322113, + "logps/chosen": -0.00019501753558870405, + "logps/rejected": -2.7335824966430664, + "loss": 0.6773, + "nll_loss": 0.16931481659412384, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9501752831274644e-05, + "rewards/margins": 0.2733387351036072, + "rewards/rejected": -0.27335822582244873, + "step": 8352 + }, + { + "epoch": 5.776625172890733, + "grad_norm": 6.895632266998291, + "learning_rate": 2.3463193483940375e-05, + "log_odds_chosen": 10.131932258605957, + "log_odds_ratio": -7.790548261255026e-05, + "logits/chosen": -0.478166788816452, + "logits/rejected": -0.4578874707221985, + "logps/chosen": -0.00029212809749878943, + "logps/rejected": -1.9623818397521973, + "loss": 0.4387, + "nll_loss": 0.10965972393751144, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.921280793088954e-05, + "rewards/margins": 0.19620898365974426, + "rewards/rejected": -0.1962381899356842, + "step": 8353 + }, + { + "epoch": 5.77731673582296, + "grad_norm": 25.984107971191406, + "learning_rate": 2.3459351467650224e-05, + "log_odds_chosen": 8.111963272094727, + "log_odds_ratio": -0.6164579391479492, + "logits/chosen": -0.5472182035446167, + "logits/rejected": -0.5658654570579529, + "logps/chosen": -0.07941090315580368, + "logps/rejected": -1.5097594261169434, + "loss": 1.2514, + "nll_loss": 0.25121480226516724, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.007941090501844883, + "rewards/margins": 0.14303484559059143, + "rewards/rejected": -0.15097594261169434, + "step": 8354 + }, + { + "epoch": 5.778008298755187, + "grad_norm": 7.861454963684082, + "learning_rate": 2.3455509451360074e-05, + "log_odds_chosen": 11.025238037109375, + "log_odds_ratio": -4.6407567424466833e-05, + "logits/chosen": -0.7130393981933594, + "logits/rejected": -0.7995960712432861, + "logps/chosen": -0.0007563336985185742, + "logps/rejected": -2.4245080947875977, + "loss": 0.5134, + "nll_loss": 0.1283499151468277, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.563337567262352e-05, + "rewards/margins": 0.2423751950263977, + "rewards/rejected": -0.24245081841945648, + "step": 8355 + }, + { + "epoch": 5.7786998616874135, + "grad_norm": 6.390862941741943, + "learning_rate": 2.3451667435069926e-05, + "log_odds_chosen": 9.716100692749023, + "log_odds_ratio": -0.00018891274521593004, + "logits/chosen": -0.27408623695373535, + "logits/rejected": -0.37599632143974304, + "logps/chosen": -0.0005538674304261804, + "logps/rejected": -1.9498720169067383, + "loss": 0.4609, + "nll_loss": 0.11519482731819153, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.538674668059684e-05, + "rewards/margins": 0.19493183493614197, + "rewards/rejected": -0.1949872076511383, + "step": 8356 + }, + { + "epoch": 5.77939142461964, + "grad_norm": 7.093240737915039, + "learning_rate": 2.3447825418779775e-05, + "log_odds_chosen": 9.832173347473145, + "log_odds_ratio": -0.00016536461771465838, + "logits/chosen": -0.534382700920105, + "logits/rejected": -0.6079778075218201, + "logps/chosen": -0.00042304108501411974, + "logps/rejected": -1.752963662147522, + "loss": 0.5897, + "nll_loss": 0.14741525053977966, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.230411286698654e-05, + "rewards/margins": 0.17525407671928406, + "rewards/rejected": -0.1752963811159134, + "step": 8357 + }, + { + "epoch": 5.780082987551867, + "grad_norm": 4.863102436065674, + "learning_rate": 2.3443983402489627e-05, + "log_odds_chosen": 10.297584533691406, + "log_odds_ratio": -9.968863741960377e-05, + "logits/chosen": -0.21967944502830505, + "logits/rejected": -0.31562548875808716, + "logps/chosen": -0.00019475305452942848, + "logps/rejected": -1.7844955921173096, + "loss": 0.44, + "nll_loss": 0.10999009013175964, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9475304725347087e-05, + "rewards/margins": 0.17843008041381836, + "rewards/rejected": -0.1784495711326599, + "step": 8358 + }, + { + "epoch": 5.780774550484094, + "grad_norm": 5.4476752281188965, + "learning_rate": 2.344014138619948e-05, + "log_odds_chosen": 10.860854148864746, + "log_odds_ratio": -3.658882633317262e-05, + "logits/chosen": -0.47734540700912476, + "logits/rejected": -0.4496386647224426, + "logps/chosen": -0.00026548956520855427, + "logps/rejected": -2.257863759994507, + "loss": 1.2608, + "nll_loss": 0.3152031898498535, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.654895797604695e-05, + "rewards/margins": 0.22575980424880981, + "rewards/rejected": -0.22578637301921844, + "step": 8359 + }, + { + "epoch": 5.781466113416321, + "grad_norm": 5.5818705558776855, + "learning_rate": 2.343629936990933e-05, + "log_odds_chosen": 10.45132064819336, + "log_odds_ratio": -0.0004984838888049126, + "logits/chosen": -0.627118706703186, + "logits/rejected": -0.6624408960342407, + "logps/chosen": -0.0015973912086337805, + "logps/rejected": -2.520289421081543, + "loss": 0.7724, + "nll_loss": 0.19305981695652008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015973913832567632, + "rewards/margins": 0.25186920166015625, + "rewards/rejected": -0.2520289421081543, + "step": 8360 + }, + { + "epoch": 5.782157676348548, + "grad_norm": 10.007711410522461, + "learning_rate": 2.343245735361918e-05, + "log_odds_chosen": 10.941266059875488, + "log_odds_ratio": -6.014830432832241e-05, + "logits/chosen": -0.5361988544464111, + "logits/rejected": -0.5562364459037781, + "logps/chosen": -0.0003670321893878281, + "logps/rejected": -2.7064778804779053, + "loss": 0.6319, + "nll_loss": 0.15796193480491638, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.670322257676162e-05, + "rewards/margins": 0.27061107754707336, + "rewards/rejected": -0.2706477642059326, + "step": 8361 + }, + { + "epoch": 5.782849239280774, + "grad_norm": 7.794462203979492, + "learning_rate": 2.3428615337329034e-05, + "log_odds_chosen": 10.351293563842773, + "log_odds_ratio": -0.0006059914594516158, + "logits/chosen": -0.6464826464653015, + "logits/rejected": -0.7297243475914001, + "logps/chosen": -0.00021089179790578783, + "logps/rejected": -1.6023046970367432, + "loss": 1.1555, + "nll_loss": 0.28881415724754333, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1089181245770305e-05, + "rewards/margins": 0.16020938754081726, + "rewards/rejected": -0.16023047268390656, + "step": 8362 + }, + { + "epoch": 5.783540802213001, + "grad_norm": 6.460762023925781, + "learning_rate": 2.3424773321038883e-05, + "log_odds_chosen": 10.685081481933594, + "log_odds_ratio": -0.00012143061030656099, + "logits/chosen": -0.7190501689910889, + "logits/rejected": -0.7460399866104126, + "logps/chosen": -0.00029704137705266476, + "logps/rejected": -2.504964828491211, + "loss": 0.4641, + "nll_loss": 0.11600157618522644, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.970413515868131e-05, + "rewards/margins": 0.25046679377555847, + "rewards/rejected": -0.2504964768886566, + "step": 8363 + }, + { + "epoch": 5.784232365145228, + "grad_norm": 6.547046184539795, + "learning_rate": 2.3420931304748732e-05, + "log_odds_chosen": 10.335028648376465, + "log_odds_ratio": -0.0002512763603590429, + "logits/chosen": -0.5884988903999329, + "logits/rejected": -0.6217831373214722, + "logps/chosen": -0.0014175032265484333, + "logps/rejected": -2.6600277423858643, + "loss": 0.9214, + "nll_loss": 0.23032043874263763, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014175032265484333, + "rewards/margins": 0.26586103439331055, + "rewards/rejected": -0.2660028040409088, + "step": 8364 + }, + { + "epoch": 5.784923928077455, + "grad_norm": 7.295699596405029, + "learning_rate": 2.3417089288458584e-05, + "log_odds_chosen": 9.605745315551758, + "log_odds_ratio": -0.0009946682257577777, + "logits/chosen": 0.16253028810024261, + "logits/rejected": 0.00862811878323555, + "logps/chosen": -0.0010614326456561685, + "logps/rejected": -1.5691893100738525, + "loss": 0.7776, + "nll_loss": 0.19429834187030792, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010614325583446771, + "rewards/margins": 0.15681278705596924, + "rewards/rejected": -0.1569189429283142, + "step": 8365 + }, + { + "epoch": 5.785615491009682, + "grad_norm": 11.699624061584473, + "learning_rate": 2.3413247272168433e-05, + "log_odds_chosen": 9.368026733398438, + "log_odds_ratio": -0.0008493943023495376, + "logits/chosen": -0.38463887572288513, + "logits/rejected": -0.34834763407707214, + "logps/chosen": -0.00212243665009737, + "logps/rejected": -2.0978848934173584, + "loss": 0.7825, + "nll_loss": 0.19554492831230164, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00021224364172667265, + "rewards/margins": 0.20957626402378082, + "rewards/rejected": -0.2097885012626648, + "step": 8366 + }, + { + "epoch": 5.786307053941909, + "grad_norm": 6.0530500411987305, + "learning_rate": 2.3409405255878286e-05, + "log_odds_chosen": 11.640003204345703, + "log_odds_ratio": -2.191989915445447e-05, + "logits/chosen": -0.32898950576782227, + "logits/rejected": -0.30571579933166504, + "logps/chosen": -0.0001186039880849421, + "logps/rejected": -2.5280473232269287, + "loss": 0.4237, + "nll_loss": 0.10592895746231079, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.186039844469633e-05, + "rewards/margins": 0.2527928650379181, + "rewards/rejected": -0.2528047263622284, + "step": 8367 + }, + { + "epoch": 5.786998616874135, + "grad_norm": 5.655771732330322, + "learning_rate": 2.340556323958814e-05, + "log_odds_chosen": 10.02695369720459, + "log_odds_ratio": -0.0017944334540516138, + "logits/chosen": -0.2847994267940521, + "logits/rejected": -0.4468628466129303, + "logps/chosen": -0.0016070208512246609, + "logps/rejected": -1.8412233591079712, + "loss": 0.6046, + "nll_loss": 0.15095919370651245, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016070209676399827, + "rewards/margins": 0.1839616298675537, + "rewards/rejected": -0.18412232398986816, + "step": 8368 + }, + { + "epoch": 5.787690179806362, + "grad_norm": 8.640703201293945, + "learning_rate": 2.3401721223297987e-05, + "log_odds_chosen": 9.762921333312988, + "log_odds_ratio": -0.00027630673139356077, + "logits/chosen": -0.21005013585090637, + "logits/rejected": -0.32674640417099, + "logps/chosen": -0.0015161462360993028, + "logps/rejected": -1.6819090843200684, + "loss": 0.6451, + "nll_loss": 0.16124506294727325, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015161462943069637, + "rewards/margins": 0.16803930699825287, + "rewards/rejected": -0.16819091141223907, + "step": 8369 + }, + { + "epoch": 5.788381742738589, + "grad_norm": 7.583675384521484, + "learning_rate": 2.339787920700784e-05, + "log_odds_chosen": 11.343730926513672, + "log_odds_ratio": -1.8906674085883424e-05, + "logits/chosen": -0.6724814176559448, + "logits/rejected": -0.7422870397567749, + "logps/chosen": -0.00013959617353975773, + "logps/rejected": -2.027846574783325, + "loss": 0.4644, + "nll_loss": 0.11610476672649384, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3959616808278952e-05, + "rewards/margins": 0.20277069509029388, + "rewards/rejected": -0.20278465747833252, + "step": 8370 + }, + { + "epoch": 5.789073305670816, + "grad_norm": 9.348284721374512, + "learning_rate": 2.3394037190717692e-05, + "log_odds_chosen": 9.624040603637695, + "log_odds_ratio": -0.0005165559705346823, + "logits/chosen": -0.32689768075942993, + "logits/rejected": -0.353707492351532, + "logps/chosen": -0.00031788769410923123, + "logps/rejected": -1.819197654724121, + "loss": 0.9245, + "nll_loss": 0.23106229305267334, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.178877159371041e-05, + "rewards/margins": 0.18188798427581787, + "rewards/rejected": -0.18191976845264435, + "step": 8371 + }, + { + "epoch": 5.789764868603043, + "grad_norm": 6.802423000335693, + "learning_rate": 2.339019517442754e-05, + "log_odds_chosen": 10.92711067199707, + "log_odds_ratio": -4.961782178725116e-05, + "logits/chosen": -0.48822087049484253, + "logits/rejected": -0.5076096653938293, + "logps/chosen": -0.00014275651483330876, + "logps/rejected": -1.920109510421753, + "loss": 0.5638, + "nll_loss": 0.14093977212905884, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4275651665229816e-05, + "rewards/margins": 0.19199667870998383, + "rewards/rejected": -0.19201095402240753, + "step": 8372 + }, + { + "epoch": 5.79045643153527, + "grad_norm": 6.595789432525635, + "learning_rate": 2.338635315813739e-05, + "log_odds_chosen": 10.505535125732422, + "log_odds_ratio": -9.082785982172936e-05, + "logits/chosen": -0.8474116325378418, + "logits/rejected": -0.8347305059432983, + "logps/chosen": -0.0002792773593682796, + "logps/rejected": -1.8048241138458252, + "loss": 0.7605, + "nll_loss": 0.19012302160263062, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.792773739201948e-05, + "rewards/margins": 0.18045447766780853, + "rewards/rejected": -0.1804824024438858, + "step": 8373 + }, + { + "epoch": 5.791147994467496, + "grad_norm": 8.353302955627441, + "learning_rate": 2.3382511141847243e-05, + "log_odds_chosen": 10.95888900756836, + "log_odds_ratio": -7.381556497421116e-05, + "logits/chosen": -0.043954428285360336, + "logits/rejected": -0.19600719213485718, + "logps/chosen": -0.0006513033295050263, + "logps/rejected": -2.3824691772460938, + "loss": 0.7341, + "nll_loss": 0.18351265788078308, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.513033440569416e-05, + "rewards/margins": 0.23818179965019226, + "rewards/rejected": -0.23824693262577057, + "step": 8374 + }, + { + "epoch": 5.791839557399723, + "grad_norm": 6.531155586242676, + "learning_rate": 2.3378669125557092e-05, + "log_odds_chosen": 8.23431396484375, + "log_odds_ratio": -0.009316143579781055, + "logits/chosen": -0.7907726168632507, + "logits/rejected": -0.8477673530578613, + "logps/chosen": -0.004934323951601982, + "logps/rejected": -1.4430415630340576, + "loss": 0.7477, + "nll_loss": 0.18599727749824524, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004934323951601982, + "rewards/margins": 0.1438107192516327, + "rewards/rejected": -0.14430415630340576, + "step": 8375 + }, + { + "epoch": 5.79253112033195, + "grad_norm": 11.56829833984375, + "learning_rate": 2.3374827109266944e-05, + "log_odds_chosen": 9.17873764038086, + "log_odds_ratio": -0.0017092173220589757, + "logits/chosen": -0.5897649526596069, + "logits/rejected": -0.669125497341156, + "logps/chosen": -0.0021838941611349583, + "logps/rejected": -1.4593961238861084, + "loss": 0.6345, + "nll_loss": 0.1584663838148117, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00021838942484464496, + "rewards/margins": 0.1457212269306183, + "rewards/rejected": -0.14593960344791412, + "step": 8376 + }, + { + "epoch": 5.793222683264177, + "grad_norm": 10.690628051757812, + "learning_rate": 2.3370985092976797e-05, + "log_odds_chosen": 11.137996673583984, + "log_odds_ratio": -1.9885355868609622e-05, + "logits/chosen": -0.21123374998569489, + "logits/rejected": -0.23481081426143646, + "logps/chosen": -0.00025429227389395237, + "logps/rejected": -2.4706366062164307, + "loss": 1.1716, + "nll_loss": 0.2928870916366577, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5429229935980402e-05, + "rewards/margins": 0.24703821539878845, + "rewards/rejected": -0.24706363677978516, + "step": 8377 + }, + { + "epoch": 5.793914246196404, + "grad_norm": 5.095751762390137, + "learning_rate": 2.3367143076686646e-05, + "log_odds_chosen": 10.155561447143555, + "log_odds_ratio": -0.0005075469962321222, + "logits/chosen": -0.2100965529680252, + "logits/rejected": -0.24129444360733032, + "logps/chosen": -0.0019794083200395107, + "logps/rejected": -2.64514422416687, + "loss": 0.7971, + "nll_loss": 0.19922536611557007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001979408407351002, + "rewards/margins": 0.26431649923324585, + "rewards/rejected": -0.2645144462585449, + "step": 8378 + }, + { + "epoch": 5.7946058091286305, + "grad_norm": 9.237796783447266, + "learning_rate": 2.3363301060396498e-05, + "log_odds_chosen": 11.54646110534668, + "log_odds_ratio": -3.1261472031474113e-05, + "logits/chosen": -0.5789388418197632, + "logits/rejected": -0.6498785614967346, + "logps/chosen": -0.00018026272300630808, + "logps/rejected": -2.9494409561157227, + "loss": 1.1371, + "nll_loss": 0.28427499532699585, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8026272300630808e-05, + "rewards/margins": 0.2949260473251343, + "rewards/rejected": -0.29494407773017883, + "step": 8379 + }, + { + "epoch": 5.795297372060857, + "grad_norm": 9.438192367553711, + "learning_rate": 2.335945904410635e-05, + "log_odds_chosen": 10.218551635742188, + "log_odds_ratio": -0.00047488469863310456, + "logits/chosen": -0.473160058259964, + "logits/rejected": -0.4454137682914734, + "logps/chosen": -0.0006715701892971992, + "logps/rejected": -1.9091229438781738, + "loss": 0.6901, + "nll_loss": 0.17248502373695374, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.715701601933688e-05, + "rewards/margins": 0.19084513187408447, + "rewards/rejected": -0.19091227650642395, + "step": 8380 + }, + { + "epoch": 5.795988934993084, + "grad_norm": 6.330174922943115, + "learning_rate": 2.33556170278162e-05, + "log_odds_chosen": 10.507976531982422, + "log_odds_ratio": -5.8574034483172e-05, + "logits/chosen": -0.6390625834465027, + "logits/rejected": -0.7654403448104858, + "logps/chosen": -0.00023662333842366934, + "logps/rejected": -1.9224880933761597, + "loss": 0.608, + "nll_loss": 0.15200158953666687, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3662332750973292e-05, + "rewards/margins": 0.1922251582145691, + "rewards/rejected": -0.19224882125854492, + "step": 8381 + }, + { + "epoch": 5.796680497925311, + "grad_norm": 10.470353126525879, + "learning_rate": 2.335177501152605e-05, + "log_odds_chosen": 9.854674339294434, + "log_odds_ratio": -9.551268158247694e-05, + "logits/chosen": -0.5654609799385071, + "logits/rejected": -0.7642248272895813, + "logps/chosen": -0.00030991079984232783, + "logps/rejected": -1.416645884513855, + "loss": 0.8086, + "nll_loss": 0.2021281123161316, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.099108289461583e-05, + "rewards/margins": 0.14163358509540558, + "rewards/rejected": -0.14166459441184998, + "step": 8382 + }, + { + "epoch": 5.797372060857538, + "grad_norm": 6.718166351318359, + "learning_rate": 2.33479329952359e-05, + "log_odds_chosen": 8.857773780822754, + "log_odds_ratio": -0.0011921023251488805, + "logits/chosen": -0.36666426062583923, + "logits/rejected": -0.33660316467285156, + "logps/chosen": -0.002264556474983692, + "logps/rejected": -1.3710294961929321, + "loss": 0.7864, + "nll_loss": 0.19647863507270813, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002264556533191353, + "rewards/margins": 0.13687649369239807, + "rewards/rejected": -0.13710294663906097, + "step": 8383 + }, + { + "epoch": 5.798063623789765, + "grad_norm": 6.389466285705566, + "learning_rate": 2.334409097894575e-05, + "log_odds_chosen": 10.509028434753418, + "log_odds_ratio": -5.556520045502111e-05, + "logits/chosen": -0.4471544027328491, + "logits/rejected": -0.5482914447784424, + "logps/chosen": -0.0002809996949508786, + "logps/rejected": -2.2582454681396484, + "loss": 0.5929, + "nll_loss": 0.14821313321590424, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8099970222683623e-05, + "rewards/margins": 0.22579646110534668, + "rewards/rejected": -0.22582454979419708, + "step": 8384 + }, + { + "epoch": 5.7987551867219915, + "grad_norm": 8.72469425201416, + "learning_rate": 2.3340248962655603e-05, + "log_odds_chosen": 8.631034851074219, + "log_odds_ratio": -0.002588872332125902, + "logits/chosen": -0.4895836114883423, + "logits/rejected": -0.4251309931278229, + "logps/chosen": -0.004463810473680496, + "logps/rejected": -1.896122694015503, + "loss": 1.116, + "nll_loss": 0.27873343229293823, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004463810764718801, + "rewards/margins": 0.1891658902168274, + "rewards/rejected": -0.1896122843027115, + "step": 8385 + }, + { + "epoch": 5.799446749654218, + "grad_norm": 8.63010025024414, + "learning_rate": 2.3336406946365455e-05, + "log_odds_chosen": 10.331777572631836, + "log_odds_ratio": -6.749943713657558e-05, + "logits/chosen": -0.1513824462890625, + "logits/rejected": -0.1929640769958496, + "logps/chosen": -0.0006063561886548996, + "logps/rejected": -1.767856240272522, + "loss": 0.5939, + "nll_loss": 0.14847230911254883, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.063562250346877e-05, + "rewards/margins": 0.17672500014305115, + "rewards/rejected": -0.1767856478691101, + "step": 8386 + }, + { + "epoch": 5.800138312586445, + "grad_norm": 6.971400737762451, + "learning_rate": 2.3332564930075304e-05, + "log_odds_chosen": 10.246747970581055, + "log_odds_ratio": -0.0002245830837637186, + "logits/chosen": -0.6562935709953308, + "logits/rejected": -0.7062948942184448, + "logps/chosen": -0.0005609599174931645, + "logps/rejected": -2.1932871341705322, + "loss": 0.92, + "nll_loss": 0.22997446358203888, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.609598883893341e-05, + "rewards/margins": 0.21927259862422943, + "rewards/rejected": -0.21932871639728546, + "step": 8387 + }, + { + "epoch": 5.800829875518672, + "grad_norm": 12.3055419921875, + "learning_rate": 2.3328722913785157e-05, + "log_odds_chosen": 9.929397583007812, + "log_odds_ratio": -0.0008865576237440109, + "logits/chosen": -0.4087953567504883, + "logits/rejected": -0.3798828125, + "logps/chosen": -0.0010064990492537618, + "logps/rejected": -2.1022868156433105, + "loss": 0.7364, + "nll_loss": 0.1840183436870575, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010064992238767445, + "rewards/margins": 0.2101280391216278, + "rewards/rejected": -0.21022869646549225, + "step": 8388 + }, + { + "epoch": 5.801521438450899, + "grad_norm": 8.019001007080078, + "learning_rate": 2.332488089749501e-05, + "log_odds_chosen": 10.398614883422852, + "log_odds_ratio": -5.484620851348154e-05, + "logits/chosen": -0.4774879515171051, + "logits/rejected": -0.5907200574874878, + "logps/chosen": -0.0003679244837258011, + "logps/rejected": -2.316585063934326, + "loss": 0.7131, + "nll_loss": 0.17826199531555176, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.679245128296316e-05, + "rewards/margins": 0.23162171244621277, + "rewards/rejected": -0.23165848851203918, + "step": 8389 + }, + { + "epoch": 5.802213001383126, + "grad_norm": 6.085483551025391, + "learning_rate": 2.3321038881204858e-05, + "log_odds_chosen": 11.327494621276855, + "log_odds_ratio": -2.2759963030694053e-05, + "logits/chosen": -0.22526970505714417, + "logits/rejected": -0.23279313743114471, + "logps/chosen": -0.00042107899207621813, + "logps/rejected": -3.112298011779785, + "loss": 0.9084, + "nll_loss": 0.22709017992019653, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.210790211800486e-05, + "rewards/margins": 0.3111876845359802, + "rewards/rejected": -0.3112298250198364, + "step": 8390 + }, + { + "epoch": 5.8029045643153525, + "grad_norm": 6.497158527374268, + "learning_rate": 2.3317196864914707e-05, + "log_odds_chosen": 9.713647842407227, + "log_odds_ratio": -0.0035168626345694065, + "logits/chosen": -0.6375054121017456, + "logits/rejected": -0.6635434031486511, + "logps/chosen": -0.0017270109383389354, + "logps/rejected": -1.250663161277771, + "loss": 0.9453, + "nll_loss": 0.23597703874111176, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001727011112961918, + "rewards/margins": 0.12489361315965652, + "rewards/rejected": -0.12506631016731262, + "step": 8391 + }, + { + "epoch": 5.803596127247579, + "grad_norm": 8.719135284423828, + "learning_rate": 2.331335484862456e-05, + "log_odds_chosen": 10.795459747314453, + "log_odds_ratio": -4.9957008741330355e-05, + "logits/chosen": 0.13198542594909668, + "logits/rejected": -0.0006924569606781006, + "logps/chosen": -0.0006249711732380092, + "logps/rejected": -2.9145002365112305, + "loss": 0.7778, + "nll_loss": 0.19443432986736298, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.249711441341788e-05, + "rewards/margins": 0.29138755798339844, + "rewards/rejected": -0.29145002365112305, + "step": 8392 + }, + { + "epoch": 5.804287690179806, + "grad_norm": 7.647108554840088, + "learning_rate": 2.330951283233441e-05, + "log_odds_chosen": 10.523514747619629, + "log_odds_ratio": -7.726266630925238e-05, + "logits/chosen": -0.607114851474762, + "logits/rejected": -0.6458864808082581, + "logps/chosen": -0.00021323721739463508, + "logps/rejected": -2.1598801612854004, + "loss": 0.5307, + "nll_loss": 0.13266785442829132, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1323721739463508e-05, + "rewards/margins": 0.21596670150756836, + "rewards/rejected": -0.21598801016807556, + "step": 8393 + }, + { + "epoch": 5.804979253112033, + "grad_norm": 9.371009826660156, + "learning_rate": 2.330567081604426e-05, + "log_odds_chosen": 11.301060676574707, + "log_odds_ratio": -4.154463022132404e-05, + "logits/chosen": -0.39531388878822327, + "logits/rejected": -0.5437592267990112, + "logps/chosen": -0.00044395128497853875, + "logps/rejected": -2.742654323577881, + "loss": 0.7487, + "nll_loss": 0.18716083467006683, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.439512485987507e-05, + "rewards/margins": 0.2742210626602173, + "rewards/rejected": -0.27426543831825256, + "step": 8394 + }, + { + "epoch": 5.80567081604426, + "grad_norm": 6.178443908691406, + "learning_rate": 2.330182879975411e-05, + "log_odds_chosen": 9.948263168334961, + "log_odds_ratio": -0.0005975314998067915, + "logits/chosen": -0.48438113927841187, + "logits/rejected": -0.4877588152885437, + "logps/chosen": -0.001075923559255898, + "logps/rejected": -2.2288899421691895, + "loss": 0.8015, + "nll_loss": 0.20030581951141357, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001075923428288661, + "rewards/margins": 0.22278141975402832, + "rewards/rejected": -0.2228890061378479, + "step": 8395 + }, + { + "epoch": 5.806362378976487, + "grad_norm": 6.832706928253174, + "learning_rate": 2.3297986783463963e-05, + "log_odds_chosen": 8.992963790893555, + "log_odds_ratio": -0.001389929442666471, + "logits/chosen": -0.5586123466491699, + "logits/rejected": -0.6410993337631226, + "logps/chosen": -0.007598080672323704, + "logps/rejected": -2.2602124214172363, + "loss": 1.4837, + "nll_loss": 0.3707854151725769, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007598081137984991, + "rewards/margins": 0.22526142001152039, + "rewards/rejected": -0.22602123022079468, + "step": 8396 + }, + { + "epoch": 5.8070539419087135, + "grad_norm": 6.229443073272705, + "learning_rate": 2.3294144767173815e-05, + "log_odds_chosen": 10.032766342163086, + "log_odds_ratio": -0.00092526082880795, + "logits/chosen": -0.6621145606040955, + "logits/rejected": -0.7454342246055603, + "logps/chosen": -0.011643901467323303, + "logps/rejected": -2.058879852294922, + "loss": 0.7335, + "nll_loss": 0.18328972160816193, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011643902398645878, + "rewards/margins": 0.20472361147403717, + "rewards/rejected": -0.20588800311088562, + "step": 8397 + }, + { + "epoch": 5.80774550484094, + "grad_norm": 7.834509372711182, + "learning_rate": 2.3290302750883664e-05, + "log_odds_chosen": 10.701813697814941, + "log_odds_ratio": -0.0012211342109367251, + "logits/chosen": -0.31548330187797546, + "logits/rejected": -0.366915225982666, + "logps/chosen": -0.003109922166913748, + "logps/rejected": -2.5836844444274902, + "loss": 0.5284, + "nll_loss": 0.13197240233421326, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003109922108706087, + "rewards/margins": 0.25805744528770447, + "rewards/rejected": -0.25836843252182007, + "step": 8398 + }, + { + "epoch": 5.808437067773167, + "grad_norm": 8.876883506774902, + "learning_rate": 2.3286460734593517e-05, + "log_odds_chosen": 9.514881134033203, + "log_odds_ratio": -0.021531281992793083, + "logits/chosen": -0.385174036026001, + "logits/rejected": -0.4180731177330017, + "logps/chosen": -0.0053784530609846115, + "logps/rejected": -1.6292874813079834, + "loss": 0.555, + "nll_loss": 0.13659876585006714, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000537845422513783, + "rewards/margins": 0.16239090263843536, + "rewards/rejected": -0.1629287600517273, + "step": 8399 + }, + { + "epoch": 5.809128630705394, + "grad_norm": 7.484838962554932, + "learning_rate": 2.3282618718303366e-05, + "log_odds_chosen": 10.81995964050293, + "log_odds_ratio": -3.512339389999397e-05, + "logits/chosen": -0.687035083770752, + "logits/rejected": -0.7899414300918579, + "logps/chosen": -0.00020911633328069001, + "logps/rejected": -2.0662503242492676, + "loss": 0.6707, + "nll_loss": 0.16767863929271698, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0911633328069e-05, + "rewards/margins": 0.20660412311553955, + "rewards/rejected": -0.2066250443458557, + "step": 8400 + }, + { + "epoch": 5.809820193637621, + "grad_norm": 18.49502182006836, + "learning_rate": 2.3278776702013215e-05, + "log_odds_chosen": 9.66842269897461, + "log_odds_ratio": -0.2323729246854782, + "logits/chosen": -0.5169225335121155, + "logits/rejected": -0.574622392654419, + "logps/chosen": -0.025232313200831413, + "logps/rejected": -1.8740395307540894, + "loss": 1.1176, + "nll_loss": 0.2561640739440918, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0025232313200831413, + "rewards/margins": 0.18488073348999023, + "rewards/rejected": -0.18740394711494446, + "step": 8401 + }, + { + "epoch": 5.810511756569848, + "grad_norm": 9.298235893249512, + "learning_rate": 2.3274934685723067e-05, + "log_odds_chosen": 10.226746559143066, + "log_odds_ratio": -0.00023452314781025052, + "logits/chosen": -0.4971470236778259, + "logits/rejected": -0.48891282081604004, + "logps/chosen": -0.0006599174812436104, + "logps/rejected": -1.9210774898529053, + "loss": 0.6072, + "nll_loss": 0.15176746249198914, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.599174957955256e-05, + "rewards/margins": 0.19204173982143402, + "rewards/rejected": -0.19210773706436157, + "step": 8402 + }, + { + "epoch": 5.8112033195020745, + "grad_norm": 6.863306045532227, + "learning_rate": 2.327109266943292e-05, + "log_odds_chosen": 9.36366081237793, + "log_odds_ratio": -0.000508427619934082, + "logits/chosen": -0.5794307589530945, + "logits/rejected": -0.6291723251342773, + "logps/chosen": -0.0017660510493442416, + "logps/rejected": -1.9207453727722168, + "loss": 1.1281, + "nll_loss": 0.281978964805603, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017660509911365807, + "rewards/margins": 0.1918979436159134, + "rewards/rejected": -0.19207455217838287, + "step": 8403 + }, + { + "epoch": 5.811894882434301, + "grad_norm": 9.127928733825684, + "learning_rate": 2.326725065314277e-05, + "log_odds_chosen": 11.82565689086914, + "log_odds_ratio": -1.8272336092195474e-05, + "logits/chosen": -0.3629467189311981, + "logits/rejected": -0.43086594343185425, + "logps/chosen": -0.00016442629566881806, + "logps/rejected": -2.7578787803649902, + "loss": 0.6727, + "nll_loss": 0.16816404461860657, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.644263102207333e-05, + "rewards/margins": 0.27577143907546997, + "rewards/rejected": -0.275787889957428, + "step": 8404 + }, + { + "epoch": 5.812586445366528, + "grad_norm": 14.229921340942383, + "learning_rate": 2.326340863685262e-05, + "log_odds_chosen": 10.126533508300781, + "log_odds_ratio": -8.251607505371794e-05, + "logits/chosen": -0.7604619264602661, + "logits/rejected": -0.8193801641464233, + "logps/chosen": -0.00019167909340467304, + "logps/rejected": -1.3991739749908447, + "loss": 0.6961, + "nll_loss": 0.1740119755268097, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9167910068063065e-05, + "rewards/margins": 0.13989822566509247, + "rewards/rejected": -0.13991738855838776, + "step": 8405 + }, + { + "epoch": 5.813278008298755, + "grad_norm": 7.9956374168396, + "learning_rate": 2.3259566620562474e-05, + "log_odds_chosen": 8.889430046081543, + "log_odds_ratio": -0.04909144714474678, + "logits/chosen": -0.560508131980896, + "logits/rejected": -0.6312997341156006, + "logps/chosen": -0.010527387261390686, + "logps/rejected": -1.4551292657852173, + "loss": 1.1541, + "nll_loss": 0.28362512588500977, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010527388658374548, + "rewards/margins": 0.14446018636226654, + "rewards/rejected": -0.1455129235982895, + "step": 8406 + }, + { + "epoch": 5.813969571230982, + "grad_norm": 8.152767181396484, + "learning_rate": 2.3255724604272323e-05, + "log_odds_chosen": 10.798035621643066, + "log_odds_ratio": -4.214973887428641e-05, + "logits/chosen": -0.13847284018993378, + "logits/rejected": -0.19017915427684784, + "logps/chosen": -0.00013657697127200663, + "logps/rejected": -1.9986261129379272, + "loss": 0.6238, + "nll_loss": 0.1559559553861618, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3657696399604902e-05, + "rewards/margins": 0.1998489648103714, + "rewards/rejected": -0.19986259937286377, + "step": 8407 + }, + { + "epoch": 5.814661134163209, + "grad_norm": 5.268293380737305, + "learning_rate": 2.3251882587982175e-05, + "log_odds_chosen": 10.488828659057617, + "log_odds_ratio": -0.00019341889128554612, + "logits/chosen": -0.3686334192752838, + "logits/rejected": -0.41108012199401855, + "logps/chosen": -0.0010957105550915003, + "logps/rejected": -2.32481050491333, + "loss": 0.7747, + "nll_loss": 0.19365090131759644, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001095710540539585, + "rewards/margins": 0.2323714792728424, + "rewards/rejected": -0.23248103260993958, + "step": 8408 + }, + { + "epoch": 5.8153526970954355, + "grad_norm": 8.308907508850098, + "learning_rate": 2.3248040571692024e-05, + "log_odds_chosen": 8.83240032196045, + "log_odds_ratio": -0.0071347374469041824, + "logits/chosen": -0.38757187128067017, + "logits/rejected": -0.47507134079933167, + "logps/chosen": -0.009843516163527966, + "logps/rejected": -1.6178112030029297, + "loss": 0.8248, + "nll_loss": 0.20549696683883667, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009843516163527966, + "rewards/margins": 0.16079676151275635, + "rewards/rejected": -0.16178111732006073, + "step": 8409 + }, + { + "epoch": 5.816044260027662, + "grad_norm": 7.885484218597412, + "learning_rate": 2.3244198555401873e-05, + "log_odds_chosen": 10.769173622131348, + "log_odds_ratio": -5.721321213059127e-05, + "logits/chosen": -0.28124937415122986, + "logits/rejected": -0.4066743552684784, + "logps/chosen": -0.00032837747130542994, + "logps/rejected": -1.9929379224777222, + "loss": 0.648, + "nll_loss": 0.1619967371225357, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.283774640294723e-05, + "rewards/margins": 0.19926095008850098, + "rewards/rejected": -0.19929379224777222, + "step": 8410 + }, + { + "epoch": 5.816735822959889, + "grad_norm": 6.666642665863037, + "learning_rate": 2.3240356539111726e-05, + "log_odds_chosen": 10.48222827911377, + "log_odds_ratio": -0.0004556652274914086, + "logits/chosen": -0.1470799446105957, + "logits/rejected": -0.25690242648124695, + "logps/chosen": -0.0005777716287411749, + "logps/rejected": -2.4178152084350586, + "loss": 0.7361, + "nll_loss": 0.18398535251617432, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7777167967287824e-05, + "rewards/margins": 0.24172374606132507, + "rewards/rejected": -0.24178150296211243, + "step": 8411 + }, + { + "epoch": 5.817427385892116, + "grad_norm": 6.081725597381592, + "learning_rate": 2.3236514522821578e-05, + "log_odds_chosen": 10.020877838134766, + "log_odds_ratio": -0.0009894585236907005, + "logits/chosen": -0.49546316266059875, + "logits/rejected": -0.48959237337112427, + "logps/chosen": -0.0033304309472441673, + "logps/rejected": -1.8964627981185913, + "loss": 0.5553, + "nll_loss": 0.13872453570365906, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003330431063659489, + "rewards/margins": 0.18931323289871216, + "rewards/rejected": -0.18964628875255585, + "step": 8412 + }, + { + "epoch": 5.818118948824343, + "grad_norm": 5.866261959075928, + "learning_rate": 2.3232672506531427e-05, + "log_odds_chosen": 10.42142391204834, + "log_odds_ratio": -6.498560833279043e-05, + "logits/chosen": -0.15770825743675232, + "logits/rejected": -0.13760429620742798, + "logps/chosen": -0.0002847444557119161, + "logps/rejected": -1.9210240840911865, + "loss": 0.6556, + "nll_loss": 0.16390159726142883, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8474447390181012e-05, + "rewards/margins": 0.19207392632961273, + "rewards/rejected": -0.19210243225097656, + "step": 8413 + }, + { + "epoch": 5.81881051175657, + "grad_norm": 5.274764060974121, + "learning_rate": 2.322883049024128e-05, + "log_odds_chosen": 10.923952102661133, + "log_odds_ratio": -0.00017226138152182102, + "logits/chosen": -0.13174067437648773, + "logits/rejected": -0.2675361633300781, + "logps/chosen": -0.00036066051688976586, + "logps/rejected": -2.3805484771728516, + "loss": 0.7038, + "nll_loss": 0.17594024538993835, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.606605241657235e-05, + "rewards/margins": 0.23801881074905396, + "rewards/rejected": -0.23805485665798187, + "step": 8414 + }, + { + "epoch": 5.819502074688796, + "grad_norm": 11.084059715270996, + "learning_rate": 2.3224988473951132e-05, + "log_odds_chosen": 9.231254577636719, + "log_odds_ratio": -0.0006525219068862498, + "logits/chosen": -0.06093751639127731, + "logits/rejected": -0.19099873304367065, + "logps/chosen": -0.001553456182591617, + "logps/rejected": -1.725694179534912, + "loss": 1.0024, + "nll_loss": 0.2505434453487396, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015534560952801257, + "rewards/margins": 0.17241407930850983, + "rewards/rejected": -0.1725694239139557, + "step": 8415 + }, + { + "epoch": 5.820193637621023, + "grad_norm": 7.154449462890625, + "learning_rate": 2.322114645766098e-05, + "log_odds_chosen": 10.379135131835938, + "log_odds_ratio": -9.017589763971046e-05, + "logits/chosen": -0.49093595147132874, + "logits/rejected": -0.5859791040420532, + "logps/chosen": -0.0006343638524413109, + "logps/rejected": -2.3570706844329834, + "loss": 0.6112, + "nll_loss": 0.1527821123600006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.343638233374804e-05, + "rewards/margins": 0.2356436401605606, + "rewards/rejected": -0.23570707440376282, + "step": 8416 + }, + { + "epoch": 5.82088520055325, + "grad_norm": 6.823001861572266, + "learning_rate": 2.3217304441370834e-05, + "log_odds_chosen": 10.937118530273438, + "log_odds_ratio": -3.171800926793367e-05, + "logits/chosen": -0.024569761008024216, + "logits/rejected": -0.06036039814352989, + "logps/chosen": -0.00010773000394692644, + "logps/rejected": -1.862146258354187, + "loss": 0.5502, + "nll_loss": 0.137550950050354, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0773001122288406e-05, + "rewards/margins": 0.18620386719703674, + "rewards/rejected": -0.1862146258354187, + "step": 8417 + }, + { + "epoch": 5.821576763485477, + "grad_norm": 10.990660667419434, + "learning_rate": 2.3213462425080683e-05, + "log_odds_chosen": 10.83702278137207, + "log_odds_ratio": -0.0001108981596189551, + "logits/chosen": -0.432903528213501, + "logits/rejected": -0.4672655761241913, + "logps/chosen": -0.00029598127002827823, + "logps/rejected": -2.406498432159424, + "loss": 0.793, + "nll_loss": 0.19824771583080292, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9598128094221465e-05, + "rewards/margins": 0.24062024056911469, + "rewards/rejected": -0.24064984917640686, + "step": 8418 + }, + { + "epoch": 5.822268326417704, + "grad_norm": 8.261746406555176, + "learning_rate": 2.3209620408790535e-05, + "log_odds_chosen": 10.909168243408203, + "log_odds_ratio": -4.1487171984044835e-05, + "logits/chosen": -0.3953424394130707, + "logits/rejected": -0.41297727823257446, + "logps/chosen": -0.0001901828363770619, + "logps/rejected": -1.8639588356018066, + "loss": 0.481, + "nll_loss": 0.1202574297785759, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.901828363770619e-05, + "rewards/margins": 0.18637686967849731, + "rewards/rejected": -0.18639588356018066, + "step": 8419 + }, + { + "epoch": 5.822959889349931, + "grad_norm": 6.4954142570495605, + "learning_rate": 2.3205778392500384e-05, + "log_odds_chosen": 11.166680335998535, + "log_odds_ratio": -0.00025789288338273764, + "logits/chosen": -0.6210584044456482, + "logits/rejected": -0.7248342633247375, + "logps/chosen": -0.00025950977578759193, + "logps/rejected": -2.8732190132141113, + "loss": 0.7263, + "nll_loss": 0.18155349791049957, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.595098158053588e-05, + "rewards/margins": 0.287295937538147, + "rewards/rejected": -0.28732186555862427, + "step": 8420 + }, + { + "epoch": 5.823651452282157, + "grad_norm": 10.566604614257812, + "learning_rate": 2.3201936376210237e-05, + "log_odds_chosen": 11.431151390075684, + "log_odds_ratio": -1.713056735752616e-05, + "logits/chosen": -0.5161327123641968, + "logits/rejected": -0.5762264728546143, + "logps/chosen": -0.00018493003153707832, + "logps/rejected": -2.3237714767456055, + "loss": 0.6122, + "nll_loss": 0.15303756296634674, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.849300133471843e-05, + "rewards/margins": 0.2323586344718933, + "rewards/rejected": -0.23237714171409607, + "step": 8421 + }, + { + "epoch": 5.824343015214384, + "grad_norm": 7.167019844055176, + "learning_rate": 2.3198094359920086e-05, + "log_odds_chosen": 11.25281047821045, + "log_odds_ratio": -2.5544428353896365e-05, + "logits/chosen": -0.38715946674346924, + "logits/rejected": -0.45937803387641907, + "logps/chosen": -0.00012793140194844455, + "logps/rejected": -2.1478474140167236, + "loss": 0.7055, + "nll_loss": 0.1763816773891449, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2793139831046574e-05, + "rewards/margins": 0.21477195620536804, + "rewards/rejected": -0.21478475630283356, + "step": 8422 + }, + { + "epoch": 5.825034578146611, + "grad_norm": 8.895520210266113, + "learning_rate": 2.3194252343629938e-05, + "log_odds_chosen": 10.265355110168457, + "log_odds_ratio": -0.00013882019266020507, + "logits/chosen": -0.25306057929992676, + "logits/rejected": -0.3418146073818207, + "logps/chosen": -0.0015197532484307885, + "logps/rejected": -2.4995551109313965, + "loss": 0.8472, + "nll_loss": 0.21178866922855377, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001519753277534619, + "rewards/margins": 0.2498035430908203, + "rewards/rejected": -0.24995550513267517, + "step": 8423 + }, + { + "epoch": 5.825726141078838, + "grad_norm": 9.67374038696289, + "learning_rate": 2.319041032733979e-05, + "log_odds_chosen": 11.97836685180664, + "log_odds_ratio": -1.1517046914377715e-05, + "logits/chosen": -0.3377426862716675, + "logits/rejected": -0.48516958951950073, + "logps/chosen": -0.0001647754106670618, + "logps/rejected": -2.926840305328369, + "loss": 0.885, + "nll_loss": 0.2212529331445694, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.647754106670618e-05, + "rewards/margins": 0.2926675081253052, + "rewards/rejected": -0.29268401861190796, + "step": 8424 + }, + { + "epoch": 5.826417704011065, + "grad_norm": 3.828767776489258, + "learning_rate": 2.318656831104964e-05, + "log_odds_chosen": 10.650819778442383, + "log_odds_ratio": -0.00011364149395376444, + "logits/chosen": -0.7182785272598267, + "logits/rejected": -0.7033266425132751, + "logps/chosen": -0.0006292496109381318, + "logps/rejected": -2.3739876747131348, + "loss": 0.8316, + "nll_loss": 0.207891583442688, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.29249625490047e-05, + "rewards/margins": 0.23733583092689514, + "rewards/rejected": -0.23739874362945557, + "step": 8425 + }, + { + "epoch": 5.827109266943292, + "grad_norm": 8.507972717285156, + "learning_rate": 2.3182726294759492e-05, + "log_odds_chosen": 10.733532905578613, + "log_odds_ratio": -4.510982762440108e-05, + "logits/chosen": -0.49858397245407104, + "logits/rejected": -0.5925998687744141, + "logps/chosen": -0.005823322106152773, + "logps/rejected": -2.722111701965332, + "loss": 1.0338, + "nll_loss": 0.25843608379364014, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000582332257181406, + "rewards/margins": 0.27162885665893555, + "rewards/rejected": -0.2722111940383911, + "step": 8426 + }, + { + "epoch": 5.827800829875518, + "grad_norm": 9.715168952941895, + "learning_rate": 2.3178884278469344e-05, + "log_odds_chosen": 10.892751693725586, + "log_odds_ratio": -2.8781119908671826e-05, + "logits/chosen": -0.36615926027297974, + "logits/rejected": -0.417987585067749, + "logps/chosen": -0.0003425172180868685, + "logps/rejected": -2.6365199089050293, + "loss": 0.6392, + "nll_loss": 0.15979956090450287, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4251723263878375e-05, + "rewards/margins": 0.26361775398254395, + "rewards/rejected": -0.2636519968509674, + "step": 8427 + }, + { + "epoch": 5.828492392807745, + "grad_norm": 12.601354598999023, + "learning_rate": 2.3175042262179193e-05, + "log_odds_chosen": 10.926346778869629, + "log_odds_ratio": -3.957270018872805e-05, + "logits/chosen": -0.5197044610977173, + "logits/rejected": -0.5732940435409546, + "logps/chosen": -0.0004308591887820512, + "logps/rejected": -2.6111674308776855, + "loss": 0.7898, + "nll_loss": 0.1974467784166336, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3085921788588166e-05, + "rewards/margins": 0.26107367873191833, + "rewards/rejected": -0.26111674308776855, + "step": 8428 + }, + { + "epoch": 5.829183955739972, + "grad_norm": 12.75105094909668, + "learning_rate": 2.3171200245889043e-05, + "log_odds_chosen": 10.6773681640625, + "log_odds_ratio": -4.829808676731773e-05, + "logits/chosen": -0.23225167393684387, + "logits/rejected": -0.31541967391967773, + "logps/chosen": -0.00018530743545852602, + "logps/rejected": -2.179605484008789, + "loss": 0.775, + "nll_loss": 0.19375410676002502, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.85307435458526e-05, + "rewards/margins": 0.21794205904006958, + "rewards/rejected": -0.21796056628227234, + "step": 8429 + }, + { + "epoch": 5.829875518672199, + "grad_norm": 7.408595561981201, + "learning_rate": 2.3167358229598895e-05, + "log_odds_chosen": 9.79969596862793, + "log_odds_ratio": -0.0001708334602881223, + "logits/chosen": -0.47640442848205566, + "logits/rejected": -0.5898884534835815, + "logps/chosen": -0.0005523377330973744, + "logps/rejected": -2.0489134788513184, + "loss": 0.8691, + "nll_loss": 0.21726754307746887, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.523378058569506e-05, + "rewards/margins": 0.20483613014221191, + "rewards/rejected": -0.2048913538455963, + "step": 8430 + }, + { + "epoch": 5.830567081604426, + "grad_norm": 7.56455135345459, + "learning_rate": 2.3163516213308744e-05, + "log_odds_chosen": 10.20605182647705, + "log_odds_ratio": -6.57220371067524e-05, + "logits/chosen": -0.8905206918716431, + "logits/rejected": -0.9065886735916138, + "logps/chosen": -0.00035831076093018055, + "logps/rejected": -1.6489109992980957, + "loss": 0.7195, + "nll_loss": 0.17987284064292908, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5831071727443486e-05, + "rewards/margins": 0.16485527157783508, + "rewards/rejected": -0.1648910939693451, + "step": 8431 + }, + { + "epoch": 5.8312586445366525, + "grad_norm": 6.3840012550354, + "learning_rate": 2.3159674197018596e-05, + "log_odds_chosen": 9.805391311645508, + "log_odds_ratio": -0.0001165514113381505, + "logits/chosen": -0.5324653387069702, + "logits/rejected": -0.4614141583442688, + "logps/chosen": -0.0006516007706522942, + "logps/rejected": -1.4721413850784302, + "loss": 0.9642, + "nll_loss": 0.24104109406471252, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.516007124446332e-05, + "rewards/margins": 0.1471489667892456, + "rewards/rejected": -0.1472141444683075, + "step": 8432 + }, + { + "epoch": 5.831950207468879, + "grad_norm": 8.392349243164062, + "learning_rate": 2.315583218072845e-05, + "log_odds_chosen": 9.441329956054688, + "log_odds_ratio": -0.0006476533599197865, + "logits/chosen": -0.6239942908287048, + "logits/rejected": -0.6620289087295532, + "logps/chosen": -0.0011929383035749197, + "logps/rejected": -2.1252613067626953, + "loss": 0.8785, + "nll_loss": 0.21956561505794525, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011929382890230045, + "rewards/margins": 0.21240685880184174, + "rewards/rejected": -0.2125261425971985, + "step": 8433 + }, + { + "epoch": 5.832641770401106, + "grad_norm": 7.291143894195557, + "learning_rate": 2.3151990164438298e-05, + "log_odds_chosen": 10.459549903869629, + "log_odds_ratio": -8.78508435562253e-05, + "logits/chosen": -0.43349847197532654, + "logits/rejected": -0.5388727784156799, + "logps/chosen": -0.0004004551446996629, + "logps/rejected": -2.0080487728118896, + "loss": 0.7219, + "nll_loss": 0.18047361075878143, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.004551374237053e-05, + "rewards/margins": 0.20076483488082886, + "rewards/rejected": -0.20080485939979553, + "step": 8434 + }, + { + "epoch": 5.833333333333333, + "grad_norm": 5.595971584320068, + "learning_rate": 2.314814814814815e-05, + "log_odds_chosen": 11.08672046661377, + "log_odds_ratio": -2.201294591941405e-05, + "logits/chosen": -0.39860230684280396, + "logits/rejected": -0.45757389068603516, + "logps/chosen": -9.895279799820855e-05, + "logps/rejected": -1.784722924232483, + "loss": 0.5515, + "nll_loss": 0.1378733068704605, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.895280527416617e-06, + "rewards/margins": 0.17846240103244781, + "rewards/rejected": -0.17847228050231934, + "step": 8435 + }, + { + "epoch": 5.83402489626556, + "grad_norm": 5.454908847808838, + "learning_rate": 2.3144306131858003e-05, + "log_odds_chosen": 9.911537170410156, + "log_odds_ratio": -0.0003854777605738491, + "logits/chosen": -0.33123037219047546, + "logits/rejected": -0.4306354224681854, + "logps/chosen": -0.0004738108837045729, + "logps/rejected": -1.5103071928024292, + "loss": 0.6818, + "nll_loss": 0.17041829228401184, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.738109419122338e-05, + "rewards/margins": 0.15098334848880768, + "rewards/rejected": -0.15103071928024292, + "step": 8436 + }, + { + "epoch": 5.834716459197787, + "grad_norm": 6.055264472961426, + "learning_rate": 2.3140464115567852e-05, + "log_odds_chosen": 9.830744743347168, + "log_odds_ratio": -0.00022968296252656728, + "logits/chosen": -0.39797988533973694, + "logits/rejected": -0.44242775440216064, + "logps/chosen": -0.0002832105674315244, + "logps/rejected": -1.6076643466949463, + "loss": 0.5417, + "nll_loss": 0.1353919506072998, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8321059289737605e-05, + "rewards/margins": 0.16073811054229736, + "rewards/rejected": -0.16076642274856567, + "step": 8437 + }, + { + "epoch": 5.8354080221300135, + "grad_norm": 6.931327819824219, + "learning_rate": 2.31366220992777e-05, + "log_odds_chosen": 10.32223892211914, + "log_odds_ratio": -0.0001639589318074286, + "logits/chosen": -0.196787491440773, + "logits/rejected": -0.25322869420051575, + "logps/chosen": -0.00022969470592215657, + "logps/rejected": -1.8739651441574097, + "loss": 0.7697, + "nll_loss": 0.19240155816078186, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2969470592215657e-05, + "rewards/margins": 0.187373548746109, + "rewards/rejected": -0.18739651143550873, + "step": 8438 + }, + { + "epoch": 5.83609958506224, + "grad_norm": 7.6645588874816895, + "learning_rate": 2.3132780082987553e-05, + "log_odds_chosen": 10.39122200012207, + "log_odds_ratio": -0.00011932729103136808, + "logits/chosen": -0.17069807648658752, + "logits/rejected": -0.25050681829452515, + "logps/chosen": -0.0007452214485965669, + "logps/rejected": -2.2647957801818848, + "loss": 1.1396, + "nll_loss": 0.2848907709121704, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.452214049408212e-05, + "rewards/margins": 0.2264050841331482, + "rewards/rejected": -0.22647960484027863, + "step": 8439 + }, + { + "epoch": 5.836791147994467, + "grad_norm": 8.279850959777832, + "learning_rate": 2.3128938066697402e-05, + "log_odds_chosen": 10.228134155273438, + "log_odds_ratio": -6.32415249128826e-05, + "logits/chosen": -0.4481656551361084, + "logits/rejected": -0.5319783687591553, + "logps/chosen": -0.0004316547710914165, + "logps/rejected": -1.857574701309204, + "loss": 0.592, + "nll_loss": 0.14799726009368896, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.316547710914165e-05, + "rewards/margins": 0.18571430444717407, + "rewards/rejected": -0.18575747311115265, + "step": 8440 + }, + { + "epoch": 5.837482710926694, + "grad_norm": 6.741942405700684, + "learning_rate": 2.3125096050407255e-05, + "log_odds_chosen": 10.6365327835083, + "log_odds_ratio": -4.813177292817272e-05, + "logits/chosen": -0.09993970394134521, + "logits/rejected": -0.25238558650016785, + "logps/chosen": -0.00024143440532498062, + "logps/rejected": -2.303591728210449, + "loss": 0.8639, + "nll_loss": 0.21596278250217438, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.414343907730654e-05, + "rewards/margins": 0.2303350269794464, + "rewards/rejected": -0.23035918176174164, + "step": 8441 + }, + { + "epoch": 5.838174273858921, + "grad_norm": 7.0618181228637695, + "learning_rate": 2.3121254034117107e-05, + "log_odds_chosen": 11.856544494628906, + "log_odds_ratio": -1.502203394920798e-05, + "logits/chosen": -0.34077247977256775, + "logits/rejected": -0.3906419277191162, + "logps/chosen": -0.00016831861285027117, + "logps/rejected": -2.935901165008545, + "loss": 0.7305, + "nll_loss": 0.18261991441249847, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6831862012622878e-05, + "rewards/margins": 0.2935732901096344, + "rewards/rejected": -0.29359012842178345, + "step": 8442 + }, + { + "epoch": 5.838865836791148, + "grad_norm": 6.268857002258301, + "learning_rate": 2.3117412017826956e-05, + "log_odds_chosen": 11.079063415527344, + "log_odds_ratio": -0.00011699595052050427, + "logits/chosen": 0.002030055969953537, + "logits/rejected": -0.13568267226219177, + "logps/chosen": -0.00037257670192047954, + "logps/rejected": -2.717937707901001, + "loss": 0.651, + "nll_loss": 0.16273711621761322, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.725767237483524e-05, + "rewards/margins": 0.27175652980804443, + "rewards/rejected": -0.27179378271102905, + "step": 8443 + }, + { + "epoch": 5.8395573997233745, + "grad_norm": 6.871936321258545, + "learning_rate": 2.311357000153681e-05, + "log_odds_chosen": 10.119974136352539, + "log_odds_ratio": -0.00012443287414498627, + "logits/chosen": -0.23293182253837585, + "logits/rejected": -0.2058962881565094, + "logps/chosen": -0.0004133795737288892, + "logps/rejected": -2.0336642265319824, + "loss": 0.8331, + "nll_loss": 0.2082645148038864, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.133796028327197e-05, + "rewards/margins": 0.203325092792511, + "rewards/rejected": -0.20336642861366272, + "step": 8444 + }, + { + "epoch": 5.840248962655601, + "grad_norm": 6.8992719650268555, + "learning_rate": 2.310972798524666e-05, + "log_odds_chosen": 9.559707641601562, + "log_odds_ratio": -0.0006719698430970311, + "logits/chosen": -0.2077520489692688, + "logits/rejected": -0.28626370429992676, + "logps/chosen": -0.0007084297831170261, + "logps/rejected": -1.8092963695526123, + "loss": 1.4427, + "nll_loss": 0.3606107532978058, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.084297976689413e-05, + "rewards/margins": 0.1808588206768036, + "rewards/rejected": -0.18092964589595795, + "step": 8445 + }, + { + "epoch": 5.840940525587828, + "grad_norm": 6.642013072967529, + "learning_rate": 2.310588596895651e-05, + "log_odds_chosen": 11.043392181396484, + "log_odds_ratio": -4.813253690372221e-05, + "logits/chosen": -0.478056937456131, + "logits/rejected": -0.43620866537094116, + "logps/chosen": -0.00011894687486346811, + "logps/rejected": -1.8128962516784668, + "loss": 1.0769, + "nll_loss": 0.2692229747772217, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1894688213942572e-05, + "rewards/margins": 0.18127773702144623, + "rewards/rejected": -0.1812896430492401, + "step": 8446 + }, + { + "epoch": 5.841632088520055, + "grad_norm": 8.432222366333008, + "learning_rate": 2.310204395266636e-05, + "log_odds_chosen": 9.543682098388672, + "log_odds_ratio": -0.00022593011090066284, + "logits/chosen": -0.5476033687591553, + "logits/rejected": -0.4643722176551819, + "logps/chosen": -0.00032802880741655827, + "logps/rejected": -1.440566897392273, + "loss": 0.7975, + "nll_loss": 0.1993521898984909, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2802880014060065e-05, + "rewards/margins": 0.14402389526367188, + "rewards/rejected": -0.14405669271945953, + "step": 8447 + }, + { + "epoch": 5.842323651452282, + "grad_norm": 9.15583610534668, + "learning_rate": 2.3098201936376212e-05, + "log_odds_chosen": 10.55355453491211, + "log_odds_ratio": -4.6456440031761304e-05, + "logits/chosen": -0.14537620544433594, + "logits/rejected": -0.12754163146018982, + "logps/chosen": -0.0002685963409021497, + "logps/rejected": -1.9353593587875366, + "loss": 0.6313, + "nll_loss": 0.1578291952610016, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6859634090214968e-05, + "rewards/margins": 0.19350910186767578, + "rewards/rejected": -0.1935359537601471, + "step": 8448 + }, + { + "epoch": 5.843015214384509, + "grad_norm": 7.750082492828369, + "learning_rate": 2.309435992008606e-05, + "log_odds_chosen": 10.369003295898438, + "log_odds_ratio": -9.04241023818031e-05, + "logits/chosen": -0.43339040875434875, + "logits/rejected": -0.41327208280563354, + "logps/chosen": -0.0021025664173066616, + "logps/rejected": -2.554117441177368, + "loss": 0.5103, + "nll_loss": 0.1275673806667328, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00021025663590990007, + "rewards/margins": 0.2552014887332916, + "rewards/rejected": -0.2554117441177368, + "step": 8449 + }, + { + "epoch": 5.8437067773167355, + "grad_norm": 8.610663414001465, + "learning_rate": 2.3090517903795913e-05, + "log_odds_chosen": 10.283554077148438, + "log_odds_ratio": -6.271598249441013e-05, + "logits/chosen": -0.20354630053043365, + "logits/rejected": -0.2865172028541565, + "logps/chosen": -0.0012952906545251608, + "logps/rejected": -2.354818105697632, + "loss": 0.953, + "nll_loss": 0.23823662102222443, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012952905672136694, + "rewards/margins": 0.2353522777557373, + "rewards/rejected": -0.23548182845115662, + "step": 8450 + }, + { + "epoch": 5.844398340248962, + "grad_norm": 14.888447761535645, + "learning_rate": 2.3086675887505766e-05, + "log_odds_chosen": 10.123775482177734, + "log_odds_ratio": -0.00026565161533653736, + "logits/chosen": -0.7436781525611877, + "logits/rejected": -0.876754879951477, + "logps/chosen": -0.0005735972663387656, + "logps/rejected": -2.3147449493408203, + "loss": 1.1257, + "nll_loss": 0.2813999056816101, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.735972081311047e-05, + "rewards/margins": 0.23141713440418243, + "rewards/rejected": -0.23147448897361755, + "step": 8451 + }, + { + "epoch": 5.845089903181189, + "grad_norm": 9.175409317016602, + "learning_rate": 2.3082833871215615e-05, + "log_odds_chosen": 10.095325469970703, + "log_odds_ratio": -0.002730314154177904, + "logits/chosen": -0.44304752349853516, + "logits/rejected": -0.567151665687561, + "logps/chosen": -0.021072175353765488, + "logps/rejected": -2.3713250160217285, + "loss": 0.7359, + "nll_loss": 0.18370933830738068, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002107217675074935, + "rewards/margins": 0.2350253015756607, + "rewards/rejected": -0.23713251948356628, + "step": 8452 + }, + { + "epoch": 5.845781466113416, + "grad_norm": 9.673622131347656, + "learning_rate": 2.3078991854925467e-05, + "log_odds_chosen": 10.642251014709473, + "log_odds_ratio": -9.646148100728169e-05, + "logits/chosen": -0.7856646776199341, + "logits/rejected": -0.8123069405555725, + "logps/chosen": -0.000291735224891454, + "logps/rejected": -2.3011388778686523, + "loss": 0.8692, + "nll_loss": 0.21730202436447144, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.917352321674116e-05, + "rewards/margins": 0.23008470237255096, + "rewards/rejected": -0.23011387884616852, + "step": 8453 + }, + { + "epoch": 5.846473029045643, + "grad_norm": 8.762003898620605, + "learning_rate": 2.307514983863532e-05, + "log_odds_chosen": 10.296168327331543, + "log_odds_ratio": -0.00019660551333799958, + "logits/chosen": -0.41896605491638184, + "logits/rejected": -0.4700847268104553, + "logps/chosen": -0.00015691027510911226, + "logps/rejected": -1.820425271987915, + "loss": 0.8769, + "nll_loss": 0.21919672191143036, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5691028238506988e-05, + "rewards/margins": 0.18202683329582214, + "rewards/rejected": -0.18204253911972046, + "step": 8454 + }, + { + "epoch": 5.84716459197787, + "grad_norm": 4.631357669830322, + "learning_rate": 2.307130782234517e-05, + "log_odds_chosen": 9.847299575805664, + "log_odds_ratio": -0.0001940663787536323, + "logits/chosen": -0.48244351148605347, + "logits/rejected": -0.5233024954795837, + "logps/chosen": -0.0012303382391110063, + "logps/rejected": -1.9997535943984985, + "loss": 0.7227, + "nll_loss": 0.18065957725048065, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012303382391110063, + "rewards/margins": 0.1998523324728012, + "rewards/rejected": -0.1999753713607788, + "step": 8455 + }, + { + "epoch": 5.8478561549100965, + "grad_norm": 5.259738445281982, + "learning_rate": 2.3067465806055018e-05, + "log_odds_chosen": 11.067523956298828, + "log_odds_ratio": -8.703301136847585e-05, + "logits/chosen": -0.2891809940338135, + "logits/rejected": -0.3176378309726715, + "logps/chosen": -0.00046851314255036414, + "logps/rejected": -3.0254478454589844, + "loss": 0.7031, + "nll_loss": 0.1757761538028717, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.685131716541946e-05, + "rewards/margins": 0.302497923374176, + "rewards/rejected": -0.30254480242729187, + "step": 8456 + }, + { + "epoch": 5.848547717842323, + "grad_norm": 5.415672779083252, + "learning_rate": 2.306362378976487e-05, + "log_odds_chosen": 10.416397094726562, + "log_odds_ratio": -8.140011050272733e-05, + "logits/chosen": -0.6477643847465515, + "logits/rejected": -0.7383682727813721, + "logps/chosen": -0.00015809826436452568, + "logps/rejected": -1.8434655666351318, + "loss": 0.6798, + "nll_loss": 0.16995397210121155, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5809826436452568e-05, + "rewards/margins": 0.1843307614326477, + "rewards/rejected": -0.18434655666351318, + "step": 8457 + }, + { + "epoch": 5.84923928077455, + "grad_norm": 9.015101432800293, + "learning_rate": 2.305978177347472e-05, + "log_odds_chosen": 9.226083755493164, + "log_odds_ratio": -0.048988040536642075, + "logits/chosen": -0.7032791376113892, + "logits/rejected": -0.7187217473983765, + "logps/chosen": -0.01049938052892685, + "logps/rejected": -1.548127293586731, + "loss": 1.0571, + "nll_loss": 0.2593871057033539, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010499380296096206, + "rewards/margins": 0.1537627875804901, + "rewards/rejected": -0.15481272339820862, + "step": 8458 + }, + { + "epoch": 5.849930843706777, + "grad_norm": 6.82561731338501, + "learning_rate": 2.3055939757184572e-05, + "log_odds_chosen": 10.352500915527344, + "log_odds_ratio": -0.00015991966938599944, + "logits/chosen": -0.7856279015541077, + "logits/rejected": -0.8276427388191223, + "logps/chosen": -0.0003398554399609566, + "logps/rejected": -2.0386099815368652, + "loss": 0.9899, + "nll_loss": 0.2474551498889923, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.398553963052109e-05, + "rewards/margins": 0.20382700860500336, + "rewards/rejected": -0.20386099815368652, + "step": 8459 + }, + { + "epoch": 5.850622406639004, + "grad_norm": 5.151954650878906, + "learning_rate": 2.3052097740894424e-05, + "log_odds_chosen": 10.417545318603516, + "log_odds_ratio": -0.0009327520965598524, + "logits/chosen": -0.5495296120643616, + "logits/rejected": -0.5445981025695801, + "logps/chosen": -0.0010982422390952706, + "logps/rejected": -2.3092572689056396, + "loss": 0.718, + "nll_loss": 0.1794186234474182, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010982423555105925, + "rewards/margins": 0.23081590235233307, + "rewards/rejected": -0.23092572391033173, + "step": 8460 + }, + { + "epoch": 5.851313969571231, + "grad_norm": 8.871502876281738, + "learning_rate": 2.3048255724604273e-05, + "log_odds_chosen": 11.5017728805542, + "log_odds_ratio": -1.7553564248373732e-05, + "logits/chosen": -0.6545240879058838, + "logits/rejected": -0.648597776889801, + "logps/chosen": -0.000455363595392555, + "logps/rejected": -2.6390507221221924, + "loss": 0.5919, + "nll_loss": 0.14797112345695496, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.553635881165974e-05, + "rewards/margins": 0.2638595402240753, + "rewards/rejected": -0.2639050781726837, + "step": 8461 + }, + { + "epoch": 5.8520055325034575, + "grad_norm": 13.104969024658203, + "learning_rate": 2.3044413708314126e-05, + "log_odds_chosen": 9.82265567779541, + "log_odds_ratio": -0.0008504237048327923, + "logits/chosen": -0.2843659520149231, + "logits/rejected": -0.33131715655326843, + "logps/chosen": -0.0010565564734861255, + "logps/rejected": -1.8024723529815674, + "loss": 0.7141, + "nll_loss": 0.17844170331954956, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010565565753495321, + "rewards/margins": 0.1801415979862213, + "rewards/rejected": -0.1802472323179245, + "step": 8462 + }, + { + "epoch": 5.852697095435684, + "grad_norm": 7.004133701324463, + "learning_rate": 2.3040571692023975e-05, + "log_odds_chosen": 10.034337997436523, + "log_odds_ratio": -4.633550997823477e-05, + "logits/chosen": -0.47206172347068787, + "logits/rejected": -0.4350772500038147, + "logps/chosen": -0.00018470632494427264, + "logps/rejected": -1.5162315368652344, + "loss": 0.535, + "nll_loss": 0.13374532759189606, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.847063140303362e-05, + "rewards/margins": 0.15160468220710754, + "rewards/rejected": -0.15162315964698792, + "step": 8463 + }, + { + "epoch": 5.853388658367911, + "grad_norm": 4.642582893371582, + "learning_rate": 2.3036729675733827e-05, + "log_odds_chosen": 9.964178085327148, + "log_odds_ratio": -0.0001939919893629849, + "logits/chosen": -0.43271350860595703, + "logits/rejected": -0.43284517526626587, + "logps/chosen": -0.0008543849689885974, + "logps/rejected": -1.6528716087341309, + "loss": 0.3916, + "nll_loss": 0.09787382185459137, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.543848525732756e-05, + "rewards/margins": 0.16520172357559204, + "rewards/rejected": -0.16528716683387756, + "step": 8464 + }, + { + "epoch": 5.854080221300138, + "grad_norm": 7.169305801391602, + "learning_rate": 2.3032887659443676e-05, + "log_odds_chosen": 11.115387916564941, + "log_odds_ratio": -2.8890877729281783e-05, + "logits/chosen": -0.10302860289812088, + "logits/rejected": -0.27272582054138184, + "logps/chosen": -0.00022147822892293334, + "logps/rejected": -2.375821590423584, + "loss": 0.8242, + "nll_loss": 0.20604780316352844, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2147822164697573e-05, + "rewards/margins": 0.23756001889705658, + "rewards/rejected": -0.23758217692375183, + "step": 8465 + }, + { + "epoch": 5.854771784232365, + "grad_norm": 11.38037395477295, + "learning_rate": 2.3029045643153525e-05, + "log_odds_chosen": 11.165085792541504, + "log_odds_ratio": -4.8637983127264306e-05, + "logits/chosen": -0.23498114943504333, + "logits/rejected": -0.31281086802482605, + "logps/chosen": -0.00021896889666095376, + "logps/rejected": -2.051191806793213, + "loss": 0.7301, + "nll_loss": 0.18253003060817719, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1896890757489018e-05, + "rewards/margins": 0.2050972878932953, + "rewards/rejected": -0.20511919260025024, + "step": 8466 + }, + { + "epoch": 5.855463347164592, + "grad_norm": 7.457212924957275, + "learning_rate": 2.3025203626863378e-05, + "log_odds_chosen": 10.323047637939453, + "log_odds_ratio": -0.00025923640350811183, + "logits/chosen": -0.6115937829017639, + "logits/rejected": -0.5882790684700012, + "logps/chosen": -0.0003199709753971547, + "logps/rejected": -2.251497745513916, + "loss": 0.7397, + "nll_loss": 0.18489199876785278, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1997100450098515e-05, + "rewards/margins": 0.2251177728176117, + "rewards/rejected": -0.22514976561069489, + "step": 8467 + }, + { + "epoch": 5.856154910096818, + "grad_norm": 15.208779335021973, + "learning_rate": 2.302136161057323e-05, + "log_odds_chosen": 11.957347869873047, + "log_odds_ratio": -8.310971679748036e-06, + "logits/chosen": -0.6097729206085205, + "logits/rejected": -0.7365176677703857, + "logps/chosen": -7.011348498053849e-05, + "logps/rejected": -2.17049503326416, + "loss": 0.7336, + "nll_loss": 0.1834029257297516, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.01134922564961e-06, + "rewards/margins": 0.21704250574111938, + "rewards/rejected": -0.2170495092868805, + "step": 8468 + }, + { + "epoch": 5.856846473029045, + "grad_norm": 7.276225566864014, + "learning_rate": 2.301751959428308e-05, + "log_odds_chosen": 10.8690824508667, + "log_odds_ratio": -0.00017169021884910762, + "logits/chosen": -0.36788827180862427, + "logits/rejected": -0.4121812880039215, + "logps/chosen": -0.0006079672602936625, + "logps/rejected": -2.42014741897583, + "loss": 0.6196, + "nll_loss": 0.1548839658498764, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.079672311898321e-05, + "rewards/margins": 0.24195393919944763, + "rewards/rejected": -0.24201476573944092, + "step": 8469 + }, + { + "epoch": 5.857538035961272, + "grad_norm": 7.84228515625, + "learning_rate": 2.301367757799293e-05, + "log_odds_chosen": 8.987767219543457, + "log_odds_ratio": -0.0005815964541397989, + "logits/chosen": -0.4216848611831665, + "logits/rejected": -0.46568408608436584, + "logps/chosen": -0.000269006792223081, + "logps/rejected": -1.2155593633651733, + "loss": 0.7729, + "nll_loss": 0.19317123293876648, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6900681405095384e-05, + "rewards/margins": 0.12152904272079468, + "rewards/rejected": -0.12155595421791077, + "step": 8470 + }, + { + "epoch": 5.858229598893499, + "grad_norm": 11.462625503540039, + "learning_rate": 2.3009835561702784e-05, + "log_odds_chosen": 9.116348266601562, + "log_odds_ratio": -0.0001660315610934049, + "logits/chosen": -0.7824064493179321, + "logits/rejected": -0.8699042201042175, + "logps/chosen": -0.00036246480885893106, + "logps/rejected": -1.1887366771697998, + "loss": 0.7694, + "nll_loss": 0.19234436750411987, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.624648161348887e-05, + "rewards/margins": 0.11883742362260818, + "rewards/rejected": -0.11887366324663162, + "step": 8471 + }, + { + "epoch": 5.858921161825726, + "grad_norm": 7.8624067306518555, + "learning_rate": 2.3005993545412633e-05, + "log_odds_chosen": 9.661598205566406, + "log_odds_ratio": -0.00036091357469558716, + "logits/chosen": -0.0834280401468277, + "logits/rejected": 0.008592572063207626, + "logps/chosen": -0.0011003809049725533, + "logps/rejected": -2.2244157791137695, + "loss": 0.8687, + "nll_loss": 0.21713170409202576, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011003809777321294, + "rewards/margins": 0.22233150899410248, + "rewards/rejected": -0.22244155406951904, + "step": 8472 + }, + { + "epoch": 5.8596127247579535, + "grad_norm": 5.414401054382324, + "learning_rate": 2.3002151529122486e-05, + "log_odds_chosen": 10.450469970703125, + "log_odds_ratio": -0.0006526052602566779, + "logits/chosen": -0.48692911863327026, + "logits/rejected": -0.5583130121231079, + "logps/chosen": -0.0008224752964451909, + "logps/rejected": -2.2898762226104736, + "loss": 0.5195, + "nll_loss": 0.12981073558330536, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.224751945817843e-05, + "rewards/margins": 0.22890537977218628, + "rewards/rejected": -0.22898761928081512, + "step": 8473 + }, + { + "epoch": 5.86030428769018, + "grad_norm": 4.766345500946045, + "learning_rate": 2.2998309512832335e-05, + "log_odds_chosen": 10.06740951538086, + "log_odds_ratio": -0.00011042284313589334, + "logits/chosen": -0.5878250598907471, + "logits/rejected": -0.6204366087913513, + "logps/chosen": -0.00036290791467763484, + "logps/rejected": -1.7573728561401367, + "loss": 0.6145, + "nll_loss": 0.1536146104335785, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.629079583333805e-05, + "rewards/margins": 0.17570099234580994, + "rewards/rejected": -0.17573729157447815, + "step": 8474 + }, + { + "epoch": 5.860995850622407, + "grad_norm": 7.62743616104126, + "learning_rate": 2.2994467496542184e-05, + "log_odds_chosen": 10.28510856628418, + "log_odds_ratio": -0.00019185362907592207, + "logits/chosen": -0.22833538055419922, + "logits/rejected": -0.2660457491874695, + "logps/chosen": -0.0003476017154753208, + "logps/rejected": -1.9011214971542358, + "loss": 0.6462, + "nll_loss": 0.16153304278850555, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.476017445791513e-05, + "rewards/margins": 0.19007739424705505, + "rewards/rejected": -0.1901121437549591, + "step": 8475 + }, + { + "epoch": 5.861687413554634, + "grad_norm": 10.680320739746094, + "learning_rate": 2.2990625480252036e-05, + "log_odds_chosen": 11.026237487792969, + "log_odds_ratio": -0.0001369381498079747, + "logits/chosen": -0.4240252375602722, + "logits/rejected": -0.43849751353263855, + "logps/chosen": -0.000934754207264632, + "logps/rejected": -2.4850332736968994, + "loss": 0.6348, + "nll_loss": 0.15868297219276428, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.347541345050558e-05, + "rewards/margins": 0.24840986728668213, + "rewards/rejected": -0.24850334227085114, + "step": 8476 + }, + { + "epoch": 5.862378976486861, + "grad_norm": 5.56638765335083, + "learning_rate": 2.298678346396189e-05, + "log_odds_chosen": 10.3011474609375, + "log_odds_ratio": -0.00010999199002981186, + "logits/chosen": -0.46072691679000854, + "logits/rejected": -0.5030690431594849, + "logps/chosen": -0.00021592274424619973, + "logps/rejected": -1.7911337614059448, + "loss": 0.6528, + "nll_loss": 0.1632012128829956, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1592275516013615e-05, + "rewards/margins": 0.17909179627895355, + "rewards/rejected": -0.17911338806152344, + "step": 8477 + }, + { + "epoch": 5.863070539419088, + "grad_norm": 6.392146587371826, + "learning_rate": 2.2982941447671738e-05, + "log_odds_chosen": 8.475443840026855, + "log_odds_ratio": -0.000854872923810035, + "logits/chosen": -0.28882694244384766, + "logits/rejected": -0.32335835695266724, + "logps/chosen": -0.0007878682808950543, + "logps/rejected": -1.0920076370239258, + "loss": 0.6953, + "nll_loss": 0.1737397462129593, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.878682663431391e-05, + "rewards/margins": 0.10912197828292847, + "rewards/rejected": -0.10920076072216034, + "step": 8478 + }, + { + "epoch": 5.8637621023513145, + "grad_norm": 9.73110294342041, + "learning_rate": 2.297909943138159e-05, + "log_odds_chosen": 9.462718963623047, + "log_odds_ratio": -0.00021635452867485583, + "logits/chosen": -0.6231610774993896, + "logits/rejected": -0.7080822587013245, + "logps/chosen": -0.0003653976018540561, + "logps/rejected": -1.4073103666305542, + "loss": 0.6395, + "nll_loss": 0.15986159443855286, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6539760913001373e-05, + "rewards/margins": 0.1406944990158081, + "rewards/rejected": -0.14073103666305542, + "step": 8479 + }, + { + "epoch": 5.864453665283541, + "grad_norm": 7.435009479522705, + "learning_rate": 2.2975257415091443e-05, + "log_odds_chosen": 9.775884628295898, + "log_odds_ratio": -0.0010301289148628712, + "logits/chosen": -0.3054084777832031, + "logits/rejected": -0.41487932205200195, + "logps/chosen": -0.0004547960124909878, + "logps/rejected": -1.4178364276885986, + "loss": 0.7897, + "nll_loss": 0.19732072949409485, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.54796027042903e-05, + "rewards/margins": 0.141738161444664, + "rewards/rejected": -0.14178363978862762, + "step": 8480 + }, + { + "epoch": 5.865145228215768, + "grad_norm": 8.806510925292969, + "learning_rate": 2.297141539880129e-05, + "log_odds_chosen": 10.327197074890137, + "log_odds_ratio": -0.00014382670633494854, + "logits/chosen": -0.3441201150417328, + "logits/rejected": -0.33835649490356445, + "logps/chosen": -0.00020382092043291777, + "logps/rejected": -1.5859711170196533, + "loss": 0.8675, + "nll_loss": 0.21685351431369781, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0382089132908732e-05, + "rewards/margins": 0.15857672691345215, + "rewards/rejected": -0.15859711170196533, + "step": 8481 + }, + { + "epoch": 5.865836791147995, + "grad_norm": 7.617472171783447, + "learning_rate": 2.2967573382511144e-05, + "log_odds_chosen": 10.687834739685059, + "log_odds_ratio": -6.997709715506062e-05, + "logits/chosen": -0.09275079518556595, + "logits/rejected": -0.2638387680053711, + "logps/chosen": -0.00020644580945372581, + "logps/rejected": -1.818613052368164, + "loss": 0.6018, + "nll_loss": 0.15043112635612488, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0644582036766224e-05, + "rewards/margins": 0.1818406581878662, + "rewards/rejected": -0.1818612813949585, + "step": 8482 + }, + { + "epoch": 5.866528354080222, + "grad_norm": 10.971071243286133, + "learning_rate": 2.2963731366220993e-05, + "log_odds_chosen": 9.51954460144043, + "log_odds_ratio": -0.0009347121231257915, + "logits/chosen": -0.49512526392936707, + "logits/rejected": -0.43952494859695435, + "logps/chosen": -0.0011396382469683886, + "logps/rejected": -1.9709765911102295, + "loss": 0.83, + "nll_loss": 0.2073979377746582, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011396382615203038, + "rewards/margins": 0.1969837099313736, + "rewards/rejected": -0.19709765911102295, + "step": 8483 + }, + { + "epoch": 5.867219917012449, + "grad_norm": 14.948137283325195, + "learning_rate": 2.2959889349930842e-05, + "log_odds_chosen": 10.836645126342773, + "log_odds_ratio": -4.2490642954362556e-05, + "logits/chosen": -0.5271219611167908, + "logits/rejected": -0.587436318397522, + "logps/chosen": -0.00012515847629401833, + "logps/rejected": -1.9035587310791016, + "loss": 0.8411, + "nll_loss": 0.21027767658233643, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2515848538896535e-05, + "rewards/margins": 0.19034337997436523, + "rewards/rejected": -0.19035589694976807, + "step": 8484 + }, + { + "epoch": 5.867911479944675, + "grad_norm": 13.141921043395996, + "learning_rate": 2.2956047333640695e-05, + "log_odds_chosen": 11.41067886352539, + "log_odds_ratio": -5.523320214706473e-05, + "logits/chosen": -1.1916627883911133, + "logits/rejected": -1.240216612815857, + "logps/chosen": -0.00011970465129707009, + "logps/rejected": -2.4381299018859863, + "loss": 0.7998, + "nll_loss": 0.1999463587999344, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.197046549350489e-05, + "rewards/margins": 0.2438010424375534, + "rewards/rejected": -0.24381300806999207, + "step": 8485 + }, + { + "epoch": 5.868603042876902, + "grad_norm": 5.228637218475342, + "learning_rate": 2.2952205317350547e-05, + "log_odds_chosen": 8.226043701171875, + "log_odds_ratio": -0.0019224716816097498, + "logits/chosen": -0.5227464437484741, + "logits/rejected": -0.5976300239562988, + "logps/chosen": -0.0015107663348317146, + "logps/rejected": -1.1950490474700928, + "loss": 0.6394, + "nll_loss": 0.15966679155826569, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015107664512470365, + "rewards/margins": 0.11935383081436157, + "rewards/rejected": -0.1195048987865448, + "step": 8486 + }, + { + "epoch": 5.869294605809129, + "grad_norm": 5.083609104156494, + "learning_rate": 2.2948363301060396e-05, + "log_odds_chosen": 10.229864120483398, + "log_odds_ratio": -6.585160735994577e-05, + "logits/chosen": -0.5051541328430176, + "logits/rejected": -0.5046700835227966, + "logps/chosen": -0.0003552216512616724, + "logps/rejected": -1.795076608657837, + "loss": 0.5168, + "nll_loss": 0.1291866898536682, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.552216730895452e-05, + "rewards/margins": 0.17947213351726532, + "rewards/rejected": -0.17950765788555145, + "step": 8487 + }, + { + "epoch": 5.869986168741356, + "grad_norm": 5.455687999725342, + "learning_rate": 2.294452128477025e-05, + "log_odds_chosen": 10.526873588562012, + "log_odds_ratio": -0.0004474801244214177, + "logits/chosen": -0.5565546751022339, + "logits/rejected": -0.6399465799331665, + "logps/chosen": -0.0018108977237716317, + "logps/rejected": -2.827514886856079, + "loss": 0.9766, + "nll_loss": 0.24409957230091095, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018108976655639708, + "rewards/margins": 0.28257042169570923, + "rewards/rejected": -0.28275150060653687, + "step": 8488 + }, + { + "epoch": 5.870677731673583, + "grad_norm": 6.853804111480713, + "learning_rate": 2.29406792684801e-05, + "log_odds_chosen": 10.368824005126953, + "log_odds_ratio": -5.3320181905291975e-05, + "logits/chosen": -0.259101927280426, + "logits/rejected": -0.3078291714191437, + "logps/chosen": -0.0004884085501544178, + "logps/rejected": -2.398629665374756, + "loss": 0.6442, + "nll_loss": 0.16105058789253235, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8840855015441775e-05, + "rewards/margins": 0.23981411755084991, + "rewards/rejected": -0.23986297845840454, + "step": 8489 + }, + { + "epoch": 5.87136929460581, + "grad_norm": 7.719622611999512, + "learning_rate": 2.293683725218995e-05, + "log_odds_chosen": 10.203547477722168, + "log_odds_ratio": -0.0001927811244968325, + "logits/chosen": -0.5728868842124939, + "logits/rejected": -0.5559225082397461, + "logps/chosen": -0.0007595854112878442, + "logps/rejected": -2.2591919898986816, + "loss": 0.7431, + "nll_loss": 0.18574857711791992, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.595854549435899e-05, + "rewards/margins": 0.22584323585033417, + "rewards/rejected": -0.2259191870689392, + "step": 8490 + }, + { + "epoch": 5.872060857538036, + "grad_norm": 10.853219985961914, + "learning_rate": 2.2932995235899803e-05, + "log_odds_chosen": 10.449564933776855, + "log_odds_ratio": -8.658177102915943e-05, + "logits/chosen": -0.4404873549938202, + "logits/rejected": -0.49861884117126465, + "logps/chosen": -0.0005348151898942888, + "logps/rejected": -1.8154845237731934, + "loss": 1.0555, + "nll_loss": 0.263855516910553, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3481522627407685e-05, + "rewards/margins": 0.18149496614933014, + "rewards/rejected": -0.18154844641685486, + "step": 8491 + }, + { + "epoch": 5.872752420470263, + "grad_norm": 10.383162498474121, + "learning_rate": 2.292915321960965e-05, + "log_odds_chosen": 9.47683048248291, + "log_odds_ratio": -0.0003712968318723142, + "logits/chosen": -0.6891586780548096, + "logits/rejected": -0.7439630627632141, + "logps/chosen": -0.0007887138053774834, + "logps/rejected": -2.0626349449157715, + "loss": 0.996, + "nll_loss": 0.24896374344825745, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.887138781370595e-05, + "rewards/margins": 0.20618462562561035, + "rewards/rejected": -0.2062634974718094, + "step": 8492 + }, + { + "epoch": 5.87344398340249, + "grad_norm": 5.884782314300537, + "learning_rate": 2.29253112033195e-05, + "log_odds_chosen": 10.168281555175781, + "log_odds_ratio": -0.0001475010794820264, + "logits/chosen": -0.8397963047027588, + "logits/rejected": -0.9277424216270447, + "logps/chosen": -0.0025121637154370546, + "logps/rejected": -2.0754339694976807, + "loss": 0.5356, + "nll_loss": 0.1338898241519928, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00025121637736447155, + "rewards/margins": 0.20729219913482666, + "rewards/rejected": -0.20754341781139374, + "step": 8493 + }, + { + "epoch": 5.874135546334717, + "grad_norm": 7.1727213859558105, + "learning_rate": 2.2921469187029353e-05, + "log_odds_chosen": 11.261905670166016, + "log_odds_ratio": -2.215427957708016e-05, + "logits/chosen": -0.2568546533584595, + "logits/rejected": -0.34491297602653503, + "logps/chosen": -6.840811693109572e-05, + "logps/rejected": -1.8215889930725098, + "loss": 0.5514, + "nll_loss": 0.1378553807735443, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.840811693109572e-06, + "rewards/margins": 0.18215206265449524, + "rewards/rejected": -0.1821589171886444, + "step": 8494 + }, + { + "epoch": 5.874827109266944, + "grad_norm": 8.279706954956055, + "learning_rate": 2.2917627170739206e-05, + "log_odds_chosen": 10.762760162353516, + "log_odds_ratio": -0.00012183685612399131, + "logits/chosen": -0.5639219284057617, + "logits/rejected": -0.5642800331115723, + "logps/chosen": -0.00018160277977585793, + "logps/rejected": -2.2311253547668457, + "loss": 0.6445, + "nll_loss": 0.16111090779304504, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8160279068979435e-05, + "rewards/margins": 0.2230944037437439, + "rewards/rejected": -0.223112553358078, + "step": 8495 + }, + { + "epoch": 5.875518672199171, + "grad_norm": 6.084949970245361, + "learning_rate": 2.2913785154449055e-05, + "log_odds_chosen": 11.30678939819336, + "log_odds_ratio": -2.2296328097581863e-05, + "logits/chosen": -0.5212001800537109, + "logits/rejected": -0.5491906404495239, + "logps/chosen": -0.000233797516557388, + "logps/rejected": -2.737969398498535, + "loss": 0.5014, + "nll_loss": 0.12534253299236298, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3379752747132443e-05, + "rewards/margins": 0.27377355098724365, + "rewards/rejected": -0.2737969160079956, + "step": 8496 + }, + { + "epoch": 5.876210235131397, + "grad_norm": 7.121036052703857, + "learning_rate": 2.2909943138158907e-05, + "log_odds_chosen": 10.899856567382812, + "log_odds_ratio": -3.422797817620449e-05, + "logits/chosen": -0.7306734323501587, + "logits/rejected": -0.8067958354949951, + "logps/chosen": -0.0001387650117976591, + "logps/rejected": -2.044618606567383, + "loss": 0.5315, + "nll_loss": 0.13288140296936035, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3876499906473327e-05, + "rewards/margins": 0.20444798469543457, + "rewards/rejected": -0.20446187257766724, + "step": 8497 + }, + { + "epoch": 5.876901798063624, + "grad_norm": 9.996922492980957, + "learning_rate": 2.290610112186876e-05, + "log_odds_chosen": 10.232194900512695, + "log_odds_ratio": -0.0001114396145567298, + "logits/chosen": -0.5728435516357422, + "logits/rejected": -0.629746675491333, + "logps/chosen": -0.0003327956947032362, + "logps/rejected": -1.8851649761199951, + "loss": 0.8212, + "nll_loss": 0.20528879761695862, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3279571653110906e-05, + "rewards/margins": 0.18848322331905365, + "rewards/rejected": -0.1885165125131607, + "step": 8498 + }, + { + "epoch": 5.877593360995851, + "grad_norm": 7.2498579025268555, + "learning_rate": 2.290225910557861e-05, + "log_odds_chosen": 10.74060344696045, + "log_odds_ratio": -6.614306039409712e-05, + "logits/chosen": -0.4745241701602936, + "logits/rejected": -0.5490915179252625, + "logps/chosen": -0.00032557419035583735, + "logps/rejected": -2.3486926555633545, + "loss": 0.635, + "nll_loss": 0.15873301029205322, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.255742194596678e-05, + "rewards/margins": 0.23483669757843018, + "rewards/rejected": -0.23486925661563873, + "step": 8499 + }, + { + "epoch": 5.878284923928078, + "grad_norm": 5.14827299118042, + "learning_rate": 2.289841708928846e-05, + "log_odds_chosen": 10.924230575561523, + "log_odds_ratio": -2.3871401936048642e-05, + "logits/chosen": -0.4564250707626343, + "logits/rejected": -0.4732181131839752, + "logps/chosen": -0.00027109961956739426, + "logps/rejected": -2.2624363899230957, + "loss": 0.5444, + "nll_loss": 0.13610848784446716, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7109967049909756e-05, + "rewards/margins": 0.22621655464172363, + "rewards/rejected": -0.22624364495277405, + "step": 8500 + }, + { + "epoch": 5.878976486860305, + "grad_norm": 5.078951835632324, + "learning_rate": 2.289457507299831e-05, + "log_odds_chosen": 9.571325302124023, + "log_odds_ratio": -0.00027377932565286756, + "logits/chosen": -0.7607353925704956, + "logits/rejected": -0.7960146069526672, + "logps/chosen": -0.000566540053114295, + "logps/rejected": -1.4324028491973877, + "loss": 0.5346, + "nll_loss": 0.13362522423267365, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.665401113219559e-05, + "rewards/margins": 0.1431836485862732, + "rewards/rejected": -0.143240287899971, + "step": 8501 + }, + { + "epoch": 5.8796680497925315, + "grad_norm": 5.351574420928955, + "learning_rate": 2.289073305670816e-05, + "log_odds_chosen": 10.164881706237793, + "log_odds_ratio": -9.443954331800342e-05, + "logits/chosen": -0.7071594595909119, + "logits/rejected": -0.6985030770301819, + "logps/chosen": -0.0006836583488620818, + "logps/rejected": -2.3781793117523193, + "loss": 0.9594, + "nll_loss": 0.2398291975259781, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.836582906544209e-05, + "rewards/margins": 0.23774956166744232, + "rewards/rejected": -0.2378179430961609, + "step": 8502 + }, + { + "epoch": 5.880359612724758, + "grad_norm": 7.638421058654785, + "learning_rate": 2.288689104041801e-05, + "log_odds_chosen": 9.617386817932129, + "log_odds_ratio": -0.0010062733199447393, + "logits/chosen": -0.7072057127952576, + "logits/rejected": -0.7967448234558105, + "logps/chosen": -0.0010916423052549362, + "logps/rejected": -2.1913113594055176, + "loss": 1.4487, + "nll_loss": 0.36207252740859985, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010916422615991905, + "rewards/margins": 0.2190219610929489, + "rewards/rejected": -0.21913112699985504, + "step": 8503 + }, + { + "epoch": 5.881051175656985, + "grad_norm": 19.64476776123047, + "learning_rate": 2.2883049024127864e-05, + "log_odds_chosen": 11.248300552368164, + "log_odds_ratio": -2.4759872758295387e-05, + "logits/chosen": -0.17140576243400574, + "logits/rejected": -0.29898712038993835, + "logps/chosen": -0.00019525880634319037, + "logps/rejected": -2.35072922706604, + "loss": 0.8572, + "nll_loss": 0.21430210769176483, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9525879906723276e-05, + "rewards/margins": 0.2350533902645111, + "rewards/rejected": -0.23507292568683624, + "step": 8504 + }, + { + "epoch": 5.881742738589212, + "grad_norm": 5.020126819610596, + "learning_rate": 2.2879207007837713e-05, + "log_odds_chosen": 11.25761604309082, + "log_odds_ratio": -0.00011057972005801275, + "logits/chosen": -0.24836036562919617, + "logits/rejected": -0.3698427975177765, + "logps/chosen": -7.968972204253078e-05, + "logps/rejected": -2.058882713317871, + "loss": 1.0456, + "nll_loss": 0.26139941811561584, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.968971658556256e-06, + "rewards/margins": 0.2058803141117096, + "rewards/rejected": -0.2058882713317871, + "step": 8505 + }, + { + "epoch": 5.882434301521439, + "grad_norm": 9.156305313110352, + "learning_rate": 2.2875364991547565e-05, + "log_odds_chosen": 11.667762756347656, + "log_odds_ratio": -1.598697417648509e-05, + "logits/chosen": -0.29898375272750854, + "logits/rejected": -0.36718064546585083, + "logps/chosen": -0.0001733689132379368, + "logps/rejected": -2.839731216430664, + "loss": 0.8435, + "nll_loss": 0.21087408065795898, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7336889868602157e-05, + "rewards/margins": 0.28395578265190125, + "rewards/rejected": -0.2839730978012085, + "step": 8506 + }, + { + "epoch": 5.883125864453666, + "grad_norm": 16.775108337402344, + "learning_rate": 2.2871522975257418e-05, + "log_odds_chosen": 10.418400764465332, + "log_odds_ratio": -5.1155384426238015e-05, + "logits/chosen": -0.3068312108516693, + "logits/rejected": -0.30754444003105164, + "logps/chosen": -0.000983749981969595, + "logps/rejected": -2.654691457748413, + "loss": 0.7028, + "nll_loss": 0.17570620775222778, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.837499965215102e-05, + "rewards/margins": 0.26537075638771057, + "rewards/rejected": -0.26546913385391235, + "step": 8507 + }, + { + "epoch": 5.8838174273858925, + "grad_norm": 23.888675689697266, + "learning_rate": 2.2867680958967267e-05, + "log_odds_chosen": 9.692110061645508, + "log_odds_ratio": -0.026897268369793892, + "logits/chosen": -0.5336145758628845, + "logits/rejected": -0.5951070785522461, + "logps/chosen": -0.05457576364278793, + "logps/rejected": -2.692565679550171, + "loss": 0.6847, + "nll_loss": 0.16849052906036377, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005457576364278793, + "rewards/margins": 0.2637990117073059, + "rewards/rejected": -0.2692565619945526, + "step": 8508 + }, + { + "epoch": 5.884508990318119, + "grad_norm": 3.8887486457824707, + "learning_rate": 2.286383894267712e-05, + "log_odds_chosen": 10.369190216064453, + "log_odds_ratio": -4.303463356336579e-05, + "logits/chosen": -0.3863868713378906, + "logits/rejected": -0.4776327908039093, + "logps/chosen": -0.0003454264369793236, + "logps/rejected": -1.9126297235488892, + "loss": 0.4677, + "nll_loss": 0.11691690981388092, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4542645153123885e-05, + "rewards/margins": 0.191228449344635, + "rewards/rejected": -0.19126299023628235, + "step": 8509 + }, + { + "epoch": 5.885200553250346, + "grad_norm": 11.780563354492188, + "learning_rate": 2.285999692638697e-05, + "log_odds_chosen": 10.295709609985352, + "log_odds_ratio": -0.00017475405184086412, + "logits/chosen": -0.3894461691379547, + "logits/rejected": -0.4314771890640259, + "logps/chosen": -0.00022790816728956997, + "logps/rejected": -2.005774974822998, + "loss": 0.8555, + "nll_loss": 0.21384881436824799, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2790816728956997e-05, + "rewards/margins": 0.20055469870567322, + "rewards/rejected": -0.2005774825811386, + "step": 8510 + }, + { + "epoch": 5.885892116182573, + "grad_norm": 8.197593688964844, + "learning_rate": 2.285615491009682e-05, + "log_odds_chosen": 10.112564086914062, + "log_odds_ratio": -0.0003686411655507982, + "logits/chosen": -0.5047045946121216, + "logits/rejected": -0.4849182963371277, + "logps/chosen": -0.0006123175262473524, + "logps/rejected": -1.7913522720336914, + "loss": 0.8222, + "nll_loss": 0.20551498234272003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.123175990069285e-05, + "rewards/margins": 0.1790739893913269, + "rewards/rejected": -0.17913523316383362, + "step": 8511 + }, + { + "epoch": 5.8865836791148, + "grad_norm": 4.952797889709473, + "learning_rate": 2.285231289380667e-05, + "log_odds_chosen": 11.156974792480469, + "log_odds_ratio": -0.00033142950269393623, + "logits/chosen": -0.6084975600242615, + "logits/rejected": -0.6908661723136902, + "logps/chosen": -0.0005794093594886363, + "logps/rejected": -2.6508893966674805, + "loss": 0.7902, + "nll_loss": 0.197504460811615, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7940935221267864e-05, + "rewards/margins": 0.26503100991249084, + "rewards/rejected": -0.2650889456272125, + "step": 8512 + }, + { + "epoch": 5.887275242047027, + "grad_norm": 8.416805267333984, + "learning_rate": 2.2848470877516522e-05, + "log_odds_chosen": 10.48718547821045, + "log_odds_ratio": -4.281369183445349e-05, + "logits/chosen": -0.2424652874469757, + "logits/rejected": -0.38805943727493286, + "logps/chosen": -0.0007869750843383372, + "logps/rejected": -2.3322014808654785, + "loss": 0.767, + "nll_loss": 0.19175630807876587, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.869752153055742e-05, + "rewards/margins": 0.2331414371728897, + "rewards/rejected": -0.2332201451063156, + "step": 8513 + }, + { + "epoch": 5.8879668049792535, + "grad_norm": 6.996028423309326, + "learning_rate": 2.284462886122637e-05, + "log_odds_chosen": 10.321379661560059, + "log_odds_ratio": -5.5568940297234803e-05, + "logits/chosen": -0.36068078875541687, + "logits/rejected": -0.49022531509399414, + "logps/chosen": -0.00025417341385036707, + "logps/rejected": -1.7573280334472656, + "loss": 0.5471, + "nll_loss": 0.13676485419273376, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5417339202249423e-05, + "rewards/margins": 0.1757073849439621, + "rewards/rejected": -0.1757328063249588, + "step": 8514 + }, + { + "epoch": 5.88865836791148, + "grad_norm": 7.647250175476074, + "learning_rate": 2.2840786844936224e-05, + "log_odds_chosen": 11.126952171325684, + "log_odds_ratio": -3.1055500585353e-05, + "logits/chosen": 0.01863679103553295, + "logits/rejected": -0.0044981446117162704, + "logps/chosen": -0.00016973679885268211, + "logps/rejected": -2.32952880859375, + "loss": 0.9461, + "nll_loss": 0.2365235984325409, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6973681340459734e-05, + "rewards/margins": 0.23293590545654297, + "rewards/rejected": -0.23295289278030396, + "step": 8515 + }, + { + "epoch": 5.889349930843707, + "grad_norm": 7.024383068084717, + "learning_rate": 2.2836944828646076e-05, + "log_odds_chosen": 10.434898376464844, + "log_odds_ratio": -7.665654993616045e-05, + "logits/chosen": -0.5486772656440735, + "logits/rejected": -0.6262264251708984, + "logps/chosen": -0.00029988729511387646, + "logps/rejected": -1.8727209568023682, + "loss": 0.7843, + "nll_loss": 0.1960609257221222, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.998873060278129e-05, + "rewards/margins": 0.18724212050437927, + "rewards/rejected": -0.1872721016407013, + "step": 8516 + }, + { + "epoch": 5.890041493775934, + "grad_norm": 9.35839557647705, + "learning_rate": 2.2833102812355925e-05, + "log_odds_chosen": 11.336438179016113, + "log_odds_ratio": -3.796366945607588e-05, + "logits/chosen": -0.04211053624749184, + "logits/rejected": -0.026684284210205078, + "logps/chosen": -0.00012434335076250136, + "logps/rejected": -2.3859617710113525, + "loss": 1.1471, + "nll_loss": 0.28675931692123413, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2434335076250136e-05, + "rewards/margins": 0.2385837435722351, + "rewards/rejected": -0.23859617114067078, + "step": 8517 + }, + { + "epoch": 5.890733056708161, + "grad_norm": 5.5423197746276855, + "learning_rate": 2.2829260796065778e-05, + "log_odds_chosen": 10.303956985473633, + "log_odds_ratio": -0.00018886705220211297, + "logits/chosen": -0.3222675621509552, + "logits/rejected": -0.38316380977630615, + "logps/chosen": -0.0010156576754525304, + "logps/rejected": -1.6832990646362305, + "loss": 0.4919, + "nll_loss": 0.12294995039701462, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010156577627640218, + "rewards/margins": 0.16822832822799683, + "rewards/rejected": -0.16832990944385529, + "step": 8518 + }, + { + "epoch": 5.891424619640388, + "grad_norm": 5.728291034698486, + "learning_rate": 2.282541877977563e-05, + "log_odds_chosen": 9.790092468261719, + "log_odds_ratio": -0.0003914476837962866, + "logits/chosen": -0.2519989013671875, + "logits/rejected": -0.24132508039474487, + "logps/chosen": -0.0002517211833037436, + "logps/rejected": -1.394624948501587, + "loss": 0.6545, + "nll_loss": 0.16358324885368347, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.51721176027786e-05, + "rewards/margins": 0.13943731784820557, + "rewards/rejected": -0.13946250081062317, + "step": 8519 + }, + { + "epoch": 5.8921161825726145, + "grad_norm": 9.926775932312012, + "learning_rate": 2.282157676348548e-05, + "log_odds_chosen": 9.664847373962402, + "log_odds_ratio": -0.0005159341380931437, + "logits/chosen": -0.5023306012153625, + "logits/rejected": -0.516615629196167, + "logps/chosen": -0.0005643821787089109, + "logps/rejected": -1.4718093872070312, + "loss": 0.787, + "nll_loss": 0.19668985903263092, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.643822078127414e-05, + "rewards/margins": 0.1471245139837265, + "rewards/rejected": -0.1471809446811676, + "step": 8520 + }, + { + "epoch": 5.892807745504841, + "grad_norm": 6.766112327575684, + "learning_rate": 2.281773474719533e-05, + "log_odds_chosen": 10.125080108642578, + "log_odds_ratio": -0.0006498443544842303, + "logits/chosen": -0.47156214714050293, + "logits/rejected": -0.49221134185791016, + "logps/chosen": -0.0007708219927735627, + "logps/rejected": -1.8949339389801025, + "loss": 0.5459, + "nll_loss": 0.13641297817230225, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.708220073254779e-05, + "rewards/margins": 0.1894163191318512, + "rewards/rejected": -0.18949341773986816, + "step": 8521 + }, + { + "epoch": 5.893499308437068, + "grad_norm": 6.54794979095459, + "learning_rate": 2.281389273090518e-05, + "log_odds_chosen": 10.456282615661621, + "log_odds_ratio": -0.00014503316197078675, + "logits/chosen": -0.5151886940002441, + "logits/rejected": -0.6777184009552002, + "logps/chosen": -0.0003617224283516407, + "logps/rejected": -2.0949783325195312, + "loss": 0.9168, + "nll_loss": 0.22918158769607544, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6172245017951354e-05, + "rewards/margins": 0.20946168899536133, + "rewards/rejected": -0.2094978541135788, + "step": 8522 + }, + { + "epoch": 5.894190871369295, + "grad_norm": 4.836280822753906, + "learning_rate": 2.281005071461503e-05, + "log_odds_chosen": 10.79955005645752, + "log_odds_ratio": -7.796000136295334e-05, + "logits/chosen": -0.6844509840011597, + "logits/rejected": -0.7414740920066833, + "logps/chosen": -0.0005077764508314431, + "logps/rejected": -2.5114388465881348, + "loss": 0.6021, + "nll_loss": 0.1505274474620819, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0777649448718876e-05, + "rewards/margins": 0.2510930895805359, + "rewards/rejected": -0.2511438727378845, + "step": 8523 + }, + { + "epoch": 5.894882434301522, + "grad_norm": 7.654256820678711, + "learning_rate": 2.2806208698324882e-05, + "log_odds_chosen": 10.270405769348145, + "log_odds_ratio": -0.000742147967685014, + "logits/chosen": -0.23735153675079346, + "logits/rejected": -0.312753289937973, + "logps/chosen": -0.000335223157890141, + "logps/rejected": -1.793253779411316, + "loss": 0.5804, + "nll_loss": 0.14502355456352234, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.35223157890141e-05, + "rewards/margins": 0.17929185926914215, + "rewards/rejected": -0.1793254017829895, + "step": 8524 + }, + { + "epoch": 5.895573997233749, + "grad_norm": 8.170629501342773, + "learning_rate": 2.2802366682034735e-05, + "log_odds_chosen": 10.913341522216797, + "log_odds_ratio": -7.966715202201158e-05, + "logits/chosen": -0.7726643681526184, + "logits/rejected": -0.8593516945838928, + "logps/chosen": -0.0004093981988262385, + "logps/rejected": -2.7591519355773926, + "loss": 0.9395, + "nll_loss": 0.2348717749118805, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.093982352060266e-05, + "rewards/margins": 0.2758742570877075, + "rewards/rejected": -0.2759152054786682, + "step": 8525 + }, + { + "epoch": 5.8962655601659755, + "grad_norm": 5.694158554077148, + "learning_rate": 2.2798524665744584e-05, + "log_odds_chosen": 10.442919731140137, + "log_odds_ratio": -7.781775639159605e-05, + "logits/chosen": -0.2508584260940552, + "logits/rejected": -0.4683656692504883, + "logps/chosen": -0.00016275026428047568, + "logps/rejected": -1.475663423538208, + "loss": 0.9346, + "nll_loss": 0.23365084826946259, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.627502751944121e-05, + "rewards/margins": 0.1475500762462616, + "rewards/rejected": -0.14756636321544647, + "step": 8526 + }, + { + "epoch": 5.896957123098202, + "grad_norm": 6.7789177894592285, + "learning_rate": 2.2794682649454436e-05, + "log_odds_chosen": 9.294227600097656, + "log_odds_ratio": -0.024507587775588036, + "logits/chosen": -0.6571269035339355, + "logits/rejected": -0.7861886620521545, + "logps/chosen": -0.008058538660407066, + "logps/rejected": -1.9478745460510254, + "loss": 0.8478, + "nll_loss": 0.20949037373065948, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008058538078330457, + "rewards/margins": 0.1939816027879715, + "rewards/rejected": -0.19478744268417358, + "step": 8527 + }, + { + "epoch": 5.897648686030429, + "grad_norm": 6.08666467666626, + "learning_rate": 2.279084063316429e-05, + "log_odds_chosen": 10.613787651062012, + "log_odds_ratio": -9.840642451308668e-05, + "logits/chosen": -0.7829938530921936, + "logits/rejected": -0.8247061967849731, + "logps/chosen": -0.0008157877600751817, + "logps/rejected": -2.3705320358276367, + "loss": 0.7853, + "nll_loss": 0.19631502032279968, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.15787716419436e-05, + "rewards/margins": 0.2369716465473175, + "rewards/rejected": -0.23705321550369263, + "step": 8528 + }, + { + "epoch": 5.898340248962656, + "grad_norm": 7.659102439880371, + "learning_rate": 2.2786998616874138e-05, + "log_odds_chosen": 11.211624145507812, + "log_odds_ratio": -0.0016679400578141212, + "logits/chosen": -0.029983580112457275, + "logits/rejected": -0.10546886920928955, + "logps/chosen": -0.002418810036033392, + "logps/rejected": -2.425703287124634, + "loss": 0.994, + "nll_loss": 0.2483421117067337, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00024188101815525442, + "rewards/margins": 0.2423284649848938, + "rewards/rejected": -0.24257034063339233, + "step": 8529 + }, + { + "epoch": 5.899031811894883, + "grad_norm": 8.744182586669922, + "learning_rate": 2.2783156600583987e-05, + "log_odds_chosen": 10.308116912841797, + "log_odds_ratio": -0.00011377524060662836, + "logits/chosen": -0.3487590551376343, + "logits/rejected": -0.458489328622818, + "logps/chosen": -0.0003950122045353055, + "logps/rejected": -2.2673113346099854, + "loss": 0.6696, + "nll_loss": 0.16739189624786377, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.950122118112631e-05, + "rewards/margins": 0.22669163346290588, + "rewards/rejected": -0.22673112154006958, + "step": 8530 + }, + { + "epoch": 5.89972337482711, + "grad_norm": 6.020569324493408, + "learning_rate": 2.2779314584293836e-05, + "log_odds_chosen": 10.968600273132324, + "log_odds_ratio": -3.5992925404571e-05, + "logits/chosen": -0.5196901559829712, + "logits/rejected": -0.446166068315506, + "logps/chosen": -0.0002649162197485566, + "logps/rejected": -2.243401050567627, + "loss": 0.5834, + "nll_loss": 0.14584845304489136, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6491623430047184e-05, + "rewards/margins": 0.22431360185146332, + "rewards/rejected": -0.22434011101722717, + "step": 8531 + }, + { + "epoch": 5.9004149377593365, + "grad_norm": 6.1547675132751465, + "learning_rate": 2.2775472568003688e-05, + "log_odds_chosen": 10.011276245117188, + "log_odds_ratio": -0.0003909708757419139, + "logits/chosen": -0.654976487159729, + "logits/rejected": -0.6612088680267334, + "logps/chosen": -0.00025521713541820645, + "logps/rejected": -1.1476695537567139, + "loss": 0.6228, + "nll_loss": 0.15565167367458344, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5521714633214287e-05, + "rewards/margins": 0.11474142968654633, + "rewards/rejected": -0.11476695537567139, + "step": 8532 + }, + { + "epoch": 5.901106500691563, + "grad_norm": 12.199445724487305, + "learning_rate": 2.277163055171354e-05, + "log_odds_chosen": 9.943747520446777, + "log_odds_ratio": -0.0003530299582052976, + "logits/chosen": -0.8974602818489075, + "logits/rejected": -0.8420943021774292, + "logps/chosen": -0.0015661438228562474, + "logps/rejected": -2.1415584087371826, + "loss": 0.6201, + "nll_loss": 0.15499332547187805, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015661439101677388, + "rewards/margins": 0.2139992117881775, + "rewards/rejected": -0.21415582299232483, + "step": 8533 + }, + { + "epoch": 5.90179806362379, + "grad_norm": 11.708760261535645, + "learning_rate": 2.276778853542339e-05, + "log_odds_chosen": 10.785688400268555, + "log_odds_ratio": -3.1557647162117064e-05, + "logits/chosen": -0.6444445252418518, + "logits/rejected": -0.6951552629470825, + "logps/chosen": -0.0002593470271676779, + "logps/rejected": -2.308858871459961, + "loss": 1.0258, + "nll_loss": 0.25645360350608826, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.593470344436355e-05, + "rewards/margins": 0.23085996508598328, + "rewards/rejected": -0.23088589310646057, + "step": 8534 + }, + { + "epoch": 5.902489626556017, + "grad_norm": 6.170323848724365, + "learning_rate": 2.2763946519133242e-05, + "log_odds_chosen": 11.332568168640137, + "log_odds_ratio": -0.0004851693520322442, + "logits/chosen": -0.5559641122817993, + "logits/rejected": -0.5363360047340393, + "logps/chosen": -0.0006352619384415448, + "logps/rejected": -2.3492929935455322, + "loss": 0.6353, + "nll_loss": 0.158772274851799, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.352619675453752e-05, + "rewards/margins": 0.23486578464508057, + "rewards/rejected": -0.23492930829524994, + "step": 8535 + }, + { + "epoch": 5.903181189488244, + "grad_norm": 5.993259429931641, + "learning_rate": 2.2760104502843095e-05, + "log_odds_chosen": 9.582369804382324, + "log_odds_ratio": -0.02115216664969921, + "logits/chosen": -0.2557261288166046, + "logits/rejected": -0.2677662670612335, + "logps/chosen": -0.005261305253952742, + "logps/rejected": -1.5212640762329102, + "loss": 0.9547, + "nll_loss": 0.2365698367357254, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005261304322630167, + "rewards/margins": 0.15160028636455536, + "rewards/rejected": -0.15212641656398773, + "step": 8536 + }, + { + "epoch": 5.903872752420471, + "grad_norm": 6.829483509063721, + "learning_rate": 2.2756262486552944e-05, + "log_odds_chosen": 11.096105575561523, + "log_odds_ratio": -3.8061079976614565e-05, + "logits/chosen": -0.7347974181175232, + "logits/rejected": -0.7576143741607666, + "logps/chosen": -0.00016978240455500782, + "logps/rejected": -2.360802173614502, + "loss": 0.739, + "nll_loss": 0.18474218249320984, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6978241546894424e-05, + "rewards/margins": 0.23606324195861816, + "rewards/rejected": -0.23608021438121796, + "step": 8537 + }, + { + "epoch": 5.904564315352697, + "grad_norm": 6.303608417510986, + "learning_rate": 2.2752420470262796e-05, + "log_odds_chosen": 10.607667922973633, + "log_odds_ratio": -0.00042963639134541154, + "logits/chosen": -0.5105722546577454, + "logits/rejected": -0.6554079651832581, + "logps/chosen": -0.0006353295175358653, + "logps/rejected": -2.2140278816223145, + "loss": 0.5539, + "nll_loss": 0.13843461871147156, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.353294156724587e-05, + "rewards/margins": 0.22133925557136536, + "rewards/rejected": -0.22140279412269592, + "step": 8538 + }, + { + "epoch": 5.905255878284924, + "grad_norm": 11.24542236328125, + "learning_rate": 2.2748578453972645e-05, + "log_odds_chosen": 7.889625549316406, + "log_odds_ratio": -0.3360826373100281, + "logits/chosen": -0.5862733125686646, + "logits/rejected": -0.6579238176345825, + "logps/chosen": -0.056968431919813156, + "logps/rejected": -2.314674139022827, + "loss": 0.9672, + "nll_loss": 0.2081890106201172, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005696842912584543, + "rewards/margins": 0.22577057778835297, + "rewards/rejected": -0.23146742582321167, + "step": 8539 + }, + { + "epoch": 5.905947441217151, + "grad_norm": 8.91585636138916, + "learning_rate": 2.2744736437682494e-05, + "log_odds_chosen": 9.990187644958496, + "log_odds_ratio": -0.0002307224931428209, + "logits/chosen": -0.5497514009475708, + "logits/rejected": -0.5800761580467224, + "logps/chosen": -0.0003133144346065819, + "logps/rejected": -1.416618824005127, + "loss": 0.7085, + "nll_loss": 0.1771116703748703, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1331444915849715e-05, + "rewards/margins": 0.14163057506084442, + "rewards/rejected": -0.1416618973016739, + "step": 8540 + }, + { + "epoch": 5.906639004149378, + "grad_norm": 16.768951416015625, + "learning_rate": 2.2740894421392347e-05, + "log_odds_chosen": 11.527608871459961, + "log_odds_ratio": -1.8020944480667822e-05, + "logits/chosen": -0.24016182124614716, + "logits/rejected": -0.2821471095085144, + "logps/chosen": -0.00040088381501846015, + "logps/rejected": -2.8893532752990723, + "loss": 0.6619, + "nll_loss": 0.16548146307468414, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.00883836846333e-05, + "rewards/margins": 0.2888951897621155, + "rewards/rejected": -0.2889353036880493, + "step": 8541 + }, + { + "epoch": 5.907330567081605, + "grad_norm": 6.422243118286133, + "learning_rate": 2.27370524051022e-05, + "log_odds_chosen": 10.793885231018066, + "log_odds_ratio": -0.0001332794490735978, + "logits/chosen": -0.20492669939994812, + "logits/rejected": -0.28338170051574707, + "logps/chosen": -0.0005102384602651, + "logps/rejected": -2.7783327102661133, + "loss": 0.6223, + "nll_loss": 0.15556910634040833, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.102384602651e-05, + "rewards/margins": 0.27778226137161255, + "rewards/rejected": -0.2778332531452179, + "step": 8542 + }, + { + "epoch": 5.908022130013832, + "grad_norm": 8.342117309570312, + "learning_rate": 2.2733210388812048e-05, + "log_odds_chosen": 8.009577751159668, + "log_odds_ratio": -0.004809632431715727, + "logits/chosen": -0.4651964008808136, + "logits/rejected": -0.5181519985198975, + "logps/chosen": -0.0025818380527198315, + "logps/rejected": -1.4038352966308594, + "loss": 0.8466, + "nll_loss": 0.21116048097610474, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00025818380527198315, + "rewards/margins": 0.1401253491640091, + "rewards/rejected": -0.1403835266828537, + "step": 8543 + }, + { + "epoch": 5.908713692946058, + "grad_norm": 7.213745594024658, + "learning_rate": 2.27293683725219e-05, + "log_odds_chosen": 10.365843772888184, + "log_odds_ratio": -0.00014006158744450659, + "logits/chosen": -0.24567002058029175, + "logits/rejected": -0.297152042388916, + "logps/chosen": -0.0009655110188759863, + "logps/rejected": -2.6441903114318848, + "loss": 0.6859, + "nll_loss": 0.17147107422351837, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.65511062531732e-05, + "rewards/margins": 0.26432251930236816, + "rewards/rejected": -0.2644190490245819, + "step": 8544 + }, + { + "epoch": 5.909405255878285, + "grad_norm": 9.711751937866211, + "learning_rate": 2.2725526356231753e-05, + "log_odds_chosen": 10.276808738708496, + "log_odds_ratio": -8.432636968791485e-05, + "logits/chosen": -0.6451094150543213, + "logits/rejected": -0.7156064510345459, + "logps/chosen": -0.0004483590309973806, + "logps/rejected": -1.7663764953613281, + "loss": 1.1487, + "nll_loss": 0.2871660590171814, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.483590601012111e-05, + "rewards/margins": 0.17659281194210052, + "rewards/rejected": -0.1766376495361328, + "step": 8545 + }, + { + "epoch": 5.910096818810512, + "grad_norm": 8.632865905761719, + "learning_rate": 2.2721684339941602e-05, + "log_odds_chosen": 10.930590629577637, + "log_odds_ratio": -0.0001606600999366492, + "logits/chosen": -0.5400450825691223, + "logits/rejected": -0.6316708326339722, + "logps/chosen": -0.00023115398653317243, + "logps/rejected": -2.519530773162842, + "loss": 0.9548, + "nll_loss": 0.23867672681808472, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3115399017115124e-05, + "rewards/margins": 0.2519299387931824, + "rewards/rejected": -0.2519530653953552, + "step": 8546 + }, + { + "epoch": 5.910788381742739, + "grad_norm": 9.776907920837402, + "learning_rate": 2.2717842323651455e-05, + "log_odds_chosen": 10.466167449951172, + "log_odds_ratio": -0.00017565919551998377, + "logits/chosen": -0.25509530305862427, + "logits/rejected": -0.4272596538066864, + "logps/chosen": -0.0004573424521367997, + "logps/rejected": -2.5414772033691406, + "loss": 0.9307, + "nll_loss": 0.23266099393367767, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.573424666887149e-05, + "rewards/margins": 0.25410202145576477, + "rewards/rejected": -0.2541477382183075, + "step": 8547 + }, + { + "epoch": 5.911479944674966, + "grad_norm": 4.618690490722656, + "learning_rate": 2.2714000307361304e-05, + "log_odds_chosen": 10.580927848815918, + "log_odds_ratio": -5.235819116933271e-05, + "logits/chosen": -0.23213829100131989, + "logits/rejected": -0.3192077875137329, + "logps/chosen": -0.008321152068674564, + "logps/rejected": -3.3467907905578613, + "loss": 0.6374, + "nll_loss": 0.15933555364608765, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008321151835843921, + "rewards/margins": 0.33384695649147034, + "rewards/rejected": -0.33467909693717957, + "step": 8548 + }, + { + "epoch": 5.912171507607193, + "grad_norm": 5.7036333084106445, + "learning_rate": 2.2710158291071153e-05, + "log_odds_chosen": 10.605021476745605, + "log_odds_ratio": -4.936326513416134e-05, + "logits/chosen": -0.41037696599960327, + "logits/rejected": -0.5515607595443726, + "logps/chosen": -0.00024102001043502241, + "logps/rejected": -2.1665050983428955, + "loss": 0.7821, + "nll_loss": 0.19552463293075562, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4102002498693764e-05, + "rewards/margins": 0.21662640571594238, + "rewards/rejected": -0.21665051579475403, + "step": 8549 + }, + { + "epoch": 5.912863070539419, + "grad_norm": 13.772993087768555, + "learning_rate": 2.2706316274781005e-05, + "log_odds_chosen": 12.158016204833984, + "log_odds_ratio": -1.6254996808129363e-05, + "logits/chosen": -0.5372356176376343, + "logits/rejected": -0.6390924453735352, + "logps/chosen": -9.869621135294437e-05, + "logps/rejected": -2.9592461585998535, + "loss": 0.846, + "nll_loss": 0.21148821711540222, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.869620953395497e-06, + "rewards/margins": 0.29591473937034607, + "rewards/rejected": -0.2959246039390564, + "step": 8550 + }, + { + "epoch": 5.913554633471646, + "grad_norm": 8.666646003723145, + "learning_rate": 2.2702474258490858e-05, + "log_odds_chosen": 10.557194709777832, + "log_odds_ratio": -7.307058695005253e-05, + "logits/chosen": -0.8025202751159668, + "logits/rejected": -0.8761454820632935, + "logps/chosen": -0.00041048714774660766, + "logps/rejected": -2.3817849159240723, + "loss": 0.7585, + "nll_loss": 0.1896216869354248, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1048719140235335e-05, + "rewards/margins": 0.23813745379447937, + "rewards/rejected": -0.23817849159240723, + "step": 8551 + }, + { + "epoch": 5.914246196403873, + "grad_norm": 8.621912956237793, + "learning_rate": 2.2698632242200707e-05, + "log_odds_chosen": 10.435504913330078, + "log_odds_ratio": -0.00020912522450089455, + "logits/chosen": -0.304024338722229, + "logits/rejected": -0.3682836890220642, + "logps/chosen": -0.00018781126709654927, + "logps/rejected": -2.1270718574523926, + "loss": 0.6206, + "nll_loss": 0.15513885021209717, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8781129256240092e-05, + "rewards/margins": 0.2126884013414383, + "rewards/rejected": -0.21270719170570374, + "step": 8552 + }, + { + "epoch": 5.9149377593361, + "grad_norm": 10.987290382385254, + "learning_rate": 2.269479022591056e-05, + "log_odds_chosen": 10.077252388000488, + "log_odds_ratio": -0.00014401637599803507, + "logits/chosen": -0.4053186774253845, + "logits/rejected": -0.4016092121601105, + "logps/chosen": -0.0008381298393942416, + "logps/rejected": -2.265294313430786, + "loss": 0.6591, + "nll_loss": 0.16474927961826324, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.381298539461568e-05, + "rewards/margins": 0.22644563019275665, + "rewards/rejected": -0.22652943432331085, + "step": 8553 + }, + { + "epoch": 5.915629322268327, + "grad_norm": 9.601272583007812, + "learning_rate": 2.269094820962041e-05, + "log_odds_chosen": 10.813383102416992, + "log_odds_ratio": -4.799844464287162e-05, + "logits/chosen": -0.583682656288147, + "logits/rejected": -0.6282103657722473, + "logps/chosen": -0.00023382306972052902, + "logps/rejected": -2.446737289428711, + "loss": 0.5298, + "nll_loss": 0.13243836164474487, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3382304789265618e-05, + "rewards/margins": 0.24465036392211914, + "rewards/rejected": -0.2446737438440323, + "step": 8554 + }, + { + "epoch": 5.9163208852005535, + "grad_norm": 7.350333213806152, + "learning_rate": 2.268710619333026e-05, + "log_odds_chosen": 9.566350936889648, + "log_odds_ratio": -0.00035029981518164277, + "logits/chosen": 0.14597120881080627, + "logits/rejected": -0.05483380705118179, + "logps/chosen": -0.0013699035625904799, + "logps/rejected": -2.373587131500244, + "loss": 0.8311, + "nll_loss": 0.20774847269058228, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013699036207981408, + "rewards/margins": 0.23722171783447266, + "rewards/rejected": -0.2373587191104889, + "step": 8555 + }, + { + "epoch": 5.91701244813278, + "grad_norm": 17.94904136657715, + "learning_rate": 2.2683264177040113e-05, + "log_odds_chosen": 10.428934097290039, + "log_odds_ratio": -9.59420285653323e-05, + "logits/chosen": -0.45303452014923096, + "logits/rejected": -0.5170224905014038, + "logps/chosen": -0.0003410226199775934, + "logps/rejected": -2.2718138694763184, + "loss": 0.9655, + "nll_loss": 0.24137672781944275, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.410226054256782e-05, + "rewards/margins": 0.22714729607105255, + "rewards/rejected": -0.22718140482902527, + "step": 8556 + }, + { + "epoch": 5.917704011065007, + "grad_norm": 4.4098734855651855, + "learning_rate": 2.2679422160749962e-05, + "log_odds_chosen": 10.864945411682129, + "log_odds_ratio": -3.9907470636535436e-05, + "logits/chosen": -0.256070613861084, + "logits/rejected": -0.2606242597103119, + "logps/chosen": -0.0019068530527874827, + "logps/rejected": -2.8022260665893555, + "loss": 0.7057, + "nll_loss": 0.1764180064201355, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019068529945798218, + "rewards/margins": 0.2800319194793701, + "rewards/rejected": -0.2802225947380066, + "step": 8557 + }, + { + "epoch": 5.918395573997234, + "grad_norm": 7.403103351593018, + "learning_rate": 2.267558014445981e-05, + "log_odds_chosen": 10.689541816711426, + "log_odds_ratio": -3.504283449728973e-05, + "logits/chosen": -0.4196911156177521, + "logits/rejected": -0.4435195028781891, + "logps/chosen": -0.00029301928589120507, + "logps/rejected": -2.470460891723633, + "loss": 0.7607, + "nll_loss": 0.19015933573246002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9301927497726865e-05, + "rewards/margins": 0.2470167875289917, + "rewards/rejected": -0.2470460832118988, + "step": 8558 + }, + { + "epoch": 5.919087136929461, + "grad_norm": 7.533710479736328, + "learning_rate": 2.2671738128169664e-05, + "log_odds_chosen": 9.999994277954102, + "log_odds_ratio": -0.0002365948457736522, + "logits/chosen": -0.36014026403427124, + "logits/rejected": -0.3944398760795593, + "logps/chosen": -0.0003829544293694198, + "logps/rejected": -1.3881903886795044, + "loss": 0.7457, + "nll_loss": 0.18641208112239838, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.829544584732503e-05, + "rewards/margins": 0.13878074288368225, + "rewards/rejected": -0.13881903886795044, + "step": 8559 + }, + { + "epoch": 5.919778699861688, + "grad_norm": 10.651721000671387, + "learning_rate": 2.2667896111879516e-05, + "log_odds_chosen": 10.4109525680542, + "log_odds_ratio": -0.00021536195708904415, + "logits/chosen": -0.20203912258148193, + "logits/rejected": -0.3034498989582062, + "logps/chosen": -0.00043156338506378233, + "logps/rejected": -1.706121802330017, + "loss": 0.9793, + "nll_loss": 0.244802325963974, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3156334868399426e-05, + "rewards/margins": 0.1705690175294876, + "rewards/rejected": -0.1706121861934662, + "step": 8560 + }, + { + "epoch": 5.9204702627939145, + "grad_norm": 6.417027473449707, + "learning_rate": 2.2664054095589365e-05, + "log_odds_chosen": 7.905770778656006, + "log_odds_ratio": -0.002830528188496828, + "logits/chosen": -0.5475532412528992, + "logits/rejected": -0.5872865915298462, + "logps/chosen": -0.027408741414546967, + "logps/rejected": -1.390294075012207, + "loss": 1.1341, + "nll_loss": 0.28323599696159363, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0027408739551901817, + "rewards/margins": 0.13628853857517242, + "rewards/rejected": -0.13902941346168518, + "step": 8561 + }, + { + "epoch": 5.921161825726141, + "grad_norm": 7.8787007331848145, + "learning_rate": 2.2660212079299218e-05, + "log_odds_chosen": 11.101816177368164, + "log_odds_ratio": -0.00010772953828563914, + "logits/chosen": -0.5102895498275757, + "logits/rejected": -0.5560255646705627, + "logps/chosen": -0.00044660046114586294, + "logps/rejected": -2.1967179775238037, + "loss": 0.6121, + "nll_loss": 0.15301674604415894, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4660046114586294e-05, + "rewards/margins": 0.21962714195251465, + "rewards/rejected": -0.2196718156337738, + "step": 8562 + }, + { + "epoch": 5.921853388658368, + "grad_norm": 7.844832420349121, + "learning_rate": 2.265637006300907e-05, + "log_odds_chosen": 9.944887161254883, + "log_odds_ratio": -9.000200225273147e-05, + "logits/chosen": -0.30578577518463135, + "logits/rejected": -0.36093151569366455, + "logps/chosen": -0.0006033809040673077, + "logps/rejected": -1.9198884963989258, + "loss": 0.6007, + "nll_loss": 0.15015468001365662, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0338086768751964e-05, + "rewards/margins": 0.19192850589752197, + "rewards/rejected": -0.19198885560035706, + "step": 8563 + }, + { + "epoch": 5.922544951590595, + "grad_norm": 6.397083759307861, + "learning_rate": 2.265252804671892e-05, + "log_odds_chosen": 11.030065536499023, + "log_odds_ratio": -5.464674177346751e-05, + "logits/chosen": -0.08422800898551941, + "logits/rejected": -0.22156718373298645, + "logps/chosen": -0.0006563407951034606, + "logps/rejected": -2.902289867401123, + "loss": 1.1569, + "nll_loss": 0.28922930359840393, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.563407805515453e-05, + "rewards/margins": 0.2901633679866791, + "rewards/rejected": -0.2902289927005768, + "step": 8564 + }, + { + "epoch": 5.923236514522822, + "grad_norm": 5.906310558319092, + "learning_rate": 2.264868603042877e-05, + "log_odds_chosen": 9.578790664672852, + "log_odds_ratio": -0.0021116340067237616, + "logits/chosen": -0.46797680854797363, + "logits/rejected": -0.4816071391105652, + "logps/chosen": -0.0012455761898308992, + "logps/rejected": -1.300618052482605, + "loss": 0.8921, + "nll_loss": 0.22281301021575928, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012455761316232383, + "rewards/margins": 0.12993724644184113, + "rewards/rejected": -0.1300618052482605, + "step": 8565 + }, + { + "epoch": 5.923928077455049, + "grad_norm": 17.54301643371582, + "learning_rate": 2.264484401413862e-05, + "log_odds_chosen": 9.373865127563477, + "log_odds_ratio": -0.0004120336670894176, + "logits/chosen": -0.37062132358551025, + "logits/rejected": -0.48371198773384094, + "logps/chosen": -0.0012714089825749397, + "logps/rejected": -2.1493399143218994, + "loss": 0.6719, + "nll_loss": 0.1679382026195526, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012714089825749397, + "rewards/margins": 0.21480685472488403, + "rewards/rejected": -0.21493399143218994, + "step": 8566 + }, + { + "epoch": 5.9246196403872755, + "grad_norm": 6.1621222496032715, + "learning_rate": 2.264100199784847e-05, + "log_odds_chosen": 10.586554527282715, + "log_odds_ratio": -0.00022484929650090635, + "logits/chosen": -0.6475090980529785, + "logits/rejected": -0.742377519607544, + "logps/chosen": -0.0005424355622380972, + "logps/rejected": -1.4557613134384155, + "loss": 1.078, + "nll_loss": 0.2694748342037201, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.424355185823515e-05, + "rewards/margins": 0.14552189409732819, + "rewards/rejected": -0.14557614922523499, + "step": 8567 + }, + { + "epoch": 5.925311203319502, + "grad_norm": 10.520523071289062, + "learning_rate": 2.2637159981558322e-05, + "log_odds_chosen": 11.213796615600586, + "log_odds_ratio": -5.063842036179267e-05, + "logits/chosen": -0.15347711741924286, + "logits/rejected": -0.2412378191947937, + "logps/chosen": -0.001006375066936016, + "logps/rejected": -3.2527801990509033, + "loss": 0.8013, + "nll_loss": 0.20032110810279846, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010063750960398465, + "rewards/margins": 0.3251773715019226, + "rewards/rejected": -0.32527804374694824, + "step": 8568 + }, + { + "epoch": 5.926002766251729, + "grad_norm": 15.545186042785645, + "learning_rate": 2.2633317965268174e-05, + "log_odds_chosen": 11.705305099487305, + "log_odds_ratio": -0.00015831185737624764, + "logits/chosen": -0.3981912434101105, + "logits/rejected": -0.46448180079460144, + "logps/chosen": -0.00022738243569619954, + "logps/rejected": -2.729987621307373, + "loss": 0.6592, + "nll_loss": 0.16478851437568665, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2738244297215715e-05, + "rewards/margins": 0.2729760408401489, + "rewards/rejected": -0.27299875020980835, + "step": 8569 + }, + { + "epoch": 5.926694329183956, + "grad_norm": 10.800206184387207, + "learning_rate": 2.2629475948978024e-05, + "log_odds_chosen": 9.136209487915039, + "log_odds_ratio": -0.00038616295205429196, + "logits/chosen": -0.8798481225967407, + "logits/rejected": -0.9394698143005371, + "logps/chosen": -0.0006298995576798916, + "logps/rejected": -1.1048842668533325, + "loss": 0.5879, + "nll_loss": 0.14694133400917053, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.29899586783722e-05, + "rewards/margins": 0.1104254275560379, + "rewards/rejected": -0.1104884222149849, + "step": 8570 + }, + { + "epoch": 5.927385892116183, + "grad_norm": 8.502237319946289, + "learning_rate": 2.2625633932687876e-05, + "log_odds_chosen": 10.115253448486328, + "log_odds_ratio": -0.00011375232134014368, + "logits/chosen": -0.6215211749076843, + "logits/rejected": -0.6475383639335632, + "logps/chosen": -0.00029889732832089067, + "logps/rejected": -1.6586815118789673, + "loss": 0.6581, + "nll_loss": 0.16451016068458557, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.988973392348271e-05, + "rewards/margins": 0.16583827137947083, + "rewards/rejected": -0.16586816310882568, + "step": 8571 + }, + { + "epoch": 5.92807745504841, + "grad_norm": 15.530472755432129, + "learning_rate": 2.262179191639773e-05, + "log_odds_chosen": 8.37136459350586, + "log_odds_ratio": -0.005694496911019087, + "logits/chosen": -0.20429344475269318, + "logits/rejected": -0.2544511556625366, + "logps/chosen": -0.0032351193949580193, + "logps/rejected": -1.765426516532898, + "loss": 0.6276, + "nll_loss": 0.15632155537605286, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003235119511373341, + "rewards/margins": 0.1762191355228424, + "rewards/rejected": -0.17654263973236084, + "step": 8572 + }, + { + "epoch": 5.9287690179806365, + "grad_norm": 20.958740234375, + "learning_rate": 2.2617949900107577e-05, + "log_odds_chosen": 10.967823028564453, + "log_odds_ratio": -5.087364843348041e-05, + "logits/chosen": -0.7277958989143372, + "logits/rejected": -0.8081362247467041, + "logps/chosen": -0.0001713030505925417, + "logps/rejected": -1.981994867324829, + "loss": 0.9123, + "nll_loss": 0.22807423770427704, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7130303604062647e-05, + "rewards/margins": 0.1981823593378067, + "rewards/rejected": -0.19819949567317963, + "step": 8573 + }, + { + "epoch": 5.929460580912863, + "grad_norm": 10.149932861328125, + "learning_rate": 2.261410788381743e-05, + "log_odds_chosen": 9.296614646911621, + "log_odds_ratio": -0.0015210387064144015, + "logits/chosen": -0.5005256533622742, + "logits/rejected": -0.4993587136268616, + "logps/chosen": -0.0005473347846418619, + "logps/rejected": -1.6416828632354736, + "loss": 1.1707, + "nll_loss": 0.29251858592033386, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4733485740143806e-05, + "rewards/margins": 0.16411355137825012, + "rewards/rejected": -0.16416828334331512, + "step": 8574 + }, + { + "epoch": 5.93015214384509, + "grad_norm": 9.455828666687012, + "learning_rate": 2.261026586752728e-05, + "log_odds_chosen": 9.926673889160156, + "log_odds_ratio": -0.010275032371282578, + "logits/chosen": -0.8462824821472168, + "logits/rejected": -0.8726486563682556, + "logps/chosen": -0.004735386930406094, + "logps/rejected": -2.1909990310668945, + "loss": 0.6947, + "nll_loss": 0.17265570163726807, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00047353864647448063, + "rewards/margins": 0.21862637996673584, + "rewards/rejected": -0.21909990906715393, + "step": 8575 + }, + { + "epoch": 5.930843706777317, + "grad_norm": 7.079444885253906, + "learning_rate": 2.2606423851237128e-05, + "log_odds_chosen": 10.440970420837402, + "log_odds_ratio": -0.00572149408981204, + "logits/chosen": -0.4378744959831238, + "logits/rejected": -0.5360420942306519, + "logps/chosen": -0.035686738789081573, + "logps/rejected": -2.1300199031829834, + "loss": 0.5094, + "nll_loss": 0.12678956985473633, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003568673972040415, + "rewards/margins": 0.20943331718444824, + "rewards/rejected": -0.21300198137760162, + "step": 8576 + }, + { + "epoch": 5.931535269709544, + "grad_norm": 8.993355751037598, + "learning_rate": 2.260258183494698e-05, + "log_odds_chosen": 10.129745483398438, + "log_odds_ratio": -0.006981295999139547, + "logits/chosen": -0.5487630367279053, + "logits/rejected": -0.5710721611976624, + "logps/chosen": -0.003039892530068755, + "logps/rejected": -2.049140214920044, + "loss": 0.5986, + "nll_loss": 0.148961141705513, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00030398921808227897, + "rewards/margins": 0.20461003482341766, + "rewards/rejected": -0.20491401851177216, + "step": 8577 + }, + { + "epoch": 5.932226832641771, + "grad_norm": 5.472675323486328, + "learning_rate": 2.2598739818656833e-05, + "log_odds_chosen": 9.776227951049805, + "log_odds_ratio": -0.00033155985875055194, + "logits/chosen": -0.5276475548744202, + "logits/rejected": -0.5645096302032471, + "logps/chosen": -0.0007022687932476401, + "logps/rejected": -1.7329561710357666, + "loss": 0.9177, + "nll_loss": 0.22938492894172668, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.022687350399792e-05, + "rewards/margins": 0.17322540283203125, + "rewards/rejected": -0.17329561710357666, + "step": 8578 + }, + { + "epoch": 5.9329183955739975, + "grad_norm": 5.772761821746826, + "learning_rate": 2.2594897802366682e-05, + "log_odds_chosen": 9.596529006958008, + "log_odds_ratio": -0.0009786165319383144, + "logits/chosen": -0.4244062900543213, + "logits/rejected": -0.4867440462112427, + "logps/chosen": -0.018765205517411232, + "logps/rejected": -2.093818187713623, + "loss": 0.8859, + "nll_loss": 0.2213878631591797, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018765205750241876, + "rewards/margins": 0.20750531554222107, + "rewards/rejected": -0.2093818336725235, + "step": 8579 + }, + { + "epoch": 5.933609958506224, + "grad_norm": 12.897007942199707, + "learning_rate": 2.2591055786076534e-05, + "log_odds_chosen": 10.668493270874023, + "log_odds_ratio": -6.173766450956464e-05, + "logits/chosen": -0.47161316871643066, + "logits/rejected": -0.5060651302337646, + "logps/chosen": -0.00015352212358266115, + "logps/rejected": -1.8575775623321533, + "loss": 0.5795, + "nll_loss": 0.14486020803451538, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5352212358266115e-05, + "rewards/margins": 0.18574243783950806, + "rewards/rejected": -0.18575777113437653, + "step": 8580 + }, + { + "epoch": 5.934301521438451, + "grad_norm": 8.834507942199707, + "learning_rate": 2.2587213769786387e-05, + "log_odds_chosen": 10.817688941955566, + "log_odds_ratio": -0.00029401585925370455, + "logits/chosen": -0.7078501582145691, + "logits/rejected": -0.6817142963409424, + "logps/chosen": -0.00031072754063643515, + "logps/rejected": -2.6172075271606445, + "loss": 0.9104, + "nll_loss": 0.2275734394788742, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.107275551883504e-05, + "rewards/margins": 0.261689692735672, + "rewards/rejected": -0.26172077655792236, + "step": 8581 + }, + { + "epoch": 5.934993084370678, + "grad_norm": 5.395383358001709, + "learning_rate": 2.2583371753496236e-05, + "log_odds_chosen": 10.347330093383789, + "log_odds_ratio": -0.00010833951091626659, + "logits/chosen": -0.9370718002319336, + "logits/rejected": -0.9664212465286255, + "logps/chosen": -0.00035284709883853793, + "logps/rejected": -1.7800090312957764, + "loss": 0.6464, + "nll_loss": 0.16159787774085999, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.528471279423684e-05, + "rewards/margins": 0.17796562612056732, + "rewards/rejected": -0.17800092697143555, + "step": 8582 + }, + { + "epoch": 5.935684647302905, + "grad_norm": 8.83195972442627, + "learning_rate": 2.257952973720609e-05, + "log_odds_chosen": 10.438563346862793, + "log_odds_ratio": -6.224818935152143e-05, + "logits/chosen": -0.510218620300293, + "logits/rejected": -0.44954901933670044, + "logps/chosen": -0.0002455156354699284, + "logps/rejected": -1.6129556894302368, + "loss": 0.4977, + "nll_loss": 0.12440832704305649, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4551565729780123e-05, + "rewards/margins": 0.16127102077007294, + "rewards/rejected": -0.1612955629825592, + "step": 8583 + }, + { + "epoch": 5.936376210235132, + "grad_norm": 6.737086772918701, + "learning_rate": 2.2575687720915937e-05, + "log_odds_chosen": 10.558185577392578, + "log_odds_ratio": -0.00012676040933001786, + "logits/chosen": -0.4822555184364319, + "logits/rejected": -0.48588335514068604, + "logps/chosen": -0.00015993253327906132, + "logps/rejected": -1.8099101781845093, + "loss": 0.5725, + "nll_loss": 0.14312425255775452, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5993253327906132e-05, + "rewards/margins": 0.1809750348329544, + "rewards/rejected": -0.1809910237789154, + "step": 8584 + }, + { + "epoch": 5.9370677731673585, + "grad_norm": 6.972037315368652, + "learning_rate": 2.2571845704625786e-05, + "log_odds_chosen": 10.596516609191895, + "log_odds_ratio": -3.5987286537420005e-05, + "logits/chosen": -0.1407221257686615, + "logits/rejected": -0.32239216566085815, + "logps/chosen": -0.0002259848261019215, + "logps/rejected": -1.8885389566421509, + "loss": 0.6527, + "nll_loss": 0.16316595673561096, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2598484065383673e-05, + "rewards/margins": 0.18883128464221954, + "rewards/rejected": -0.1888538897037506, + "step": 8585 + }, + { + "epoch": 5.937759336099585, + "grad_norm": 11.993995666503906, + "learning_rate": 2.256800368833564e-05, + "log_odds_chosen": 8.331764221191406, + "log_odds_ratio": -0.4614397883415222, + "logits/chosen": -0.4517919421195984, + "logits/rejected": -0.5015581846237183, + "logps/chosen": -0.04650157317519188, + "logps/rejected": -1.8373360633850098, + "loss": 0.8105, + "nll_loss": 0.15647102892398834, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.004650157410651445, + "rewards/margins": 0.179083451628685, + "rewards/rejected": -0.18373361229896545, + "step": 8586 + }, + { + "epoch": 5.938450899031812, + "grad_norm": 7.184019088745117, + "learning_rate": 2.256416167204549e-05, + "log_odds_chosen": 9.58578872680664, + "log_odds_ratio": -0.0004617223748937249, + "logits/chosen": -0.3178104758262634, + "logits/rejected": -0.4234858453273773, + "logps/chosen": -0.00038188480539247394, + "logps/rejected": -1.509856939315796, + "loss": 0.58, + "nll_loss": 0.14495989680290222, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8188481994438916e-05, + "rewards/margins": 0.15094749629497528, + "rewards/rejected": -0.1509856879711151, + "step": 8587 + }, + { + "epoch": 5.939142461964039, + "grad_norm": 6.3074140548706055, + "learning_rate": 2.256031965575534e-05, + "log_odds_chosen": 9.820735931396484, + "log_odds_ratio": -0.0016581026138737798, + "logits/chosen": -0.4913124144077301, + "logits/rejected": -0.3220345377922058, + "logps/chosen": -0.0011917630909010768, + "logps/rejected": -1.8636376857757568, + "loss": 0.5282, + "nll_loss": 0.13187173008918762, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011917632218683138, + "rewards/margins": 0.18624460697174072, + "rewards/rejected": -0.18636377155780792, + "step": 8588 + }, + { + "epoch": 5.939834024896266, + "grad_norm": 5.304167747497559, + "learning_rate": 2.2556477639465193e-05, + "log_odds_chosen": 9.788444519042969, + "log_odds_ratio": -0.000702496210578829, + "logits/chosen": -0.5710271000862122, + "logits/rejected": -0.5970733165740967, + "logps/chosen": -0.0012307936558499932, + "logps/rejected": -2.3701136112213135, + "loss": 1.469, + "nll_loss": 0.3671877980232239, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012307935685385019, + "rewards/margins": 0.23688827455043793, + "rewards/rejected": -0.2370113730430603, + "step": 8589 + }, + { + "epoch": 5.940525587828493, + "grad_norm": 7.113852500915527, + "learning_rate": 2.2552635623175045e-05, + "log_odds_chosen": 10.384570121765137, + "log_odds_ratio": -0.00011506390001159161, + "logits/chosen": -0.6606665849685669, + "logits/rejected": -0.6632811427116394, + "logps/chosen": -0.0007222711574286222, + "logps/rejected": -2.103870391845703, + "loss": 0.9292, + "nll_loss": 0.23229539394378662, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.222710701171309e-05, + "rewards/margins": 0.2103148102760315, + "rewards/rejected": -0.21038705110549927, + "step": 8590 + }, + { + "epoch": 5.941217150760719, + "grad_norm": 11.421980857849121, + "learning_rate": 2.2548793606884894e-05, + "log_odds_chosen": 10.75039291381836, + "log_odds_ratio": -0.00022272029309533536, + "logits/chosen": -0.3645836412906647, + "logits/rejected": -0.5092456936836243, + "logps/chosen": -0.0002936196979135275, + "logps/rejected": -2.463463068008423, + "loss": 0.6619, + "nll_loss": 0.1654607355594635, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9361966880969703e-05, + "rewards/margins": 0.24631696939468384, + "rewards/rejected": -0.24634632468223572, + "step": 8591 + }, + { + "epoch": 5.941908713692946, + "grad_norm": 5.813602447509766, + "learning_rate": 2.2544951590594747e-05, + "log_odds_chosen": 11.08498477935791, + "log_odds_ratio": -3.317241498734802e-05, + "logits/chosen": -0.29458001255989075, + "logits/rejected": -0.34045854210853577, + "logps/chosen": -0.0003151995479129255, + "logps/rejected": -2.317119836807251, + "loss": 0.6591, + "nll_loss": 0.16476860642433167, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.151995406369679e-05, + "rewards/margins": 0.23168045282363892, + "rewards/rejected": -0.2317119836807251, + "step": 8592 + }, + { + "epoch": 5.942600276625173, + "grad_norm": 5.240687847137451, + "learning_rate": 2.2541109574304596e-05, + "log_odds_chosen": 10.848184585571289, + "log_odds_ratio": -0.0002010673051699996, + "logits/chosen": -0.7847503423690796, + "logits/rejected": -0.8887568116188049, + "logps/chosen": -0.000890504801645875, + "logps/rejected": -2.394976854324341, + "loss": 1.0431, + "nll_loss": 0.2607666254043579, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.905048161977902e-05, + "rewards/margins": 0.23940865695476532, + "rewards/rejected": -0.23949770629405975, + "step": 8593 + }, + { + "epoch": 5.9432918395574, + "grad_norm": 7.4959716796875, + "learning_rate": 2.2537267558014445e-05, + "log_odds_chosen": 11.396228790283203, + "log_odds_ratio": -1.6502690414199606e-05, + "logits/chosen": -0.25917848944664, + "logits/rejected": -0.3350893259048462, + "logps/chosen": -0.0001408563693985343, + "logps/rejected": -2.543832540512085, + "loss": 0.5539, + "nll_loss": 0.13848382234573364, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.408563730365131e-05, + "rewards/margins": 0.25436916947364807, + "rewards/rejected": -0.25438326597213745, + "step": 8594 + }, + { + "epoch": 5.943983402489627, + "grad_norm": 4.75478982925415, + "learning_rate": 2.2533425541724297e-05, + "log_odds_chosen": 11.02970027923584, + "log_odds_ratio": -3.782029671128839e-05, + "logits/chosen": -0.7902827262878418, + "logits/rejected": -0.83676677942276, + "logps/chosen": -0.00020545838924590498, + "logps/rejected": -2.285313129425049, + "loss": 0.4411, + "nll_loss": 0.11028016358613968, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0545838196994737e-05, + "rewards/margins": 0.2285107672214508, + "rewards/rejected": -0.22853130102157593, + "step": 8595 + }, + { + "epoch": 5.944674965421854, + "grad_norm": 9.864461898803711, + "learning_rate": 2.252958352543415e-05, + "log_odds_chosen": 11.542867660522461, + "log_odds_ratio": -1.895393324957695e-05, + "logits/chosen": -0.022642409428954124, + "logits/rejected": -0.0558503232896328, + "logps/chosen": -0.00012987994705326855, + "logps/rejected": -2.4602818489074707, + "loss": 0.7878, + "nll_loss": 0.1969459503889084, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2987994523427915e-05, + "rewards/margins": 0.24601522088050842, + "rewards/rejected": -0.24602821469306946, + "step": 8596 + }, + { + "epoch": 5.94536652835408, + "grad_norm": 5.678092002868652, + "learning_rate": 2.2525741509144e-05, + "log_odds_chosen": 8.817580223083496, + "log_odds_ratio": -0.0009618825279176235, + "logits/chosen": -0.4064343571662903, + "logits/rejected": -0.4465927481651306, + "logps/chosen": -0.0013117840280756354, + "logps/rejected": -1.1665174961090088, + "loss": 0.5906, + "nll_loss": 0.1475575864315033, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001311783998971805, + "rewards/margins": 0.11652056127786636, + "rewards/rejected": -0.1166517361998558, + "step": 8597 + }, + { + "epoch": 5.946058091286307, + "grad_norm": 8.367502212524414, + "learning_rate": 2.252189949285385e-05, + "log_odds_chosen": 11.335031509399414, + "log_odds_ratio": -3.1897066946839914e-05, + "logits/chosen": -0.8433002233505249, + "logits/rejected": -0.8570671081542969, + "logps/chosen": -0.00025656697107478976, + "logps/rejected": -2.491272211074829, + "loss": 1.2058, + "nll_loss": 0.30144941806793213, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5656696379883215e-05, + "rewards/margins": 0.24910154938697815, + "rewards/rejected": -0.24912720918655396, + "step": 8598 + }, + { + "epoch": 5.946749654218534, + "grad_norm": 7.0525803565979, + "learning_rate": 2.25180574765637e-05, + "log_odds_chosen": 9.210527420043945, + "log_odds_ratio": -0.0020838002674281597, + "logits/chosen": -0.15819773077964783, + "logits/rejected": -0.19214797019958496, + "logps/chosen": -0.005358480848371983, + "logps/rejected": -1.5282800197601318, + "loss": 1.1511, + "nll_loss": 0.28755632042884827, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005358480848371983, + "rewards/margins": 0.1522921621799469, + "rewards/rejected": -0.15282802283763885, + "step": 8599 + }, + { + "epoch": 5.947441217150761, + "grad_norm": 20.070941925048828, + "learning_rate": 2.2514215460273553e-05, + "log_odds_chosen": 10.128929138183594, + "log_odds_ratio": -9.235789184458554e-05, + "logits/chosen": -0.43759238719940186, + "logits/rejected": -0.4546014964580536, + "logps/chosen": -0.0004204876022413373, + "logps/rejected": -2.027372121810913, + "loss": 0.7217, + "nll_loss": 0.18042320013046265, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.204875949653797e-05, + "rewards/margins": 0.20269516110420227, + "rewards/rejected": -0.2027372121810913, + "step": 8600 + }, + { + "epoch": 5.948132780082988, + "grad_norm": 9.916879653930664, + "learning_rate": 2.2510373443983405e-05, + "log_odds_chosen": 11.070162773132324, + "log_odds_ratio": -4.012341742054559e-05, + "logits/chosen": -1.012428641319275, + "logits/rejected": -1.0315272808074951, + "logps/chosen": -0.0001498181081842631, + "logps/rejected": -1.9242411851882935, + "loss": 0.5297, + "nll_loss": 0.13243013620376587, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4981809727032669e-05, + "rewards/margins": 0.19240912795066833, + "rewards/rejected": -0.19242411851882935, + "step": 8601 + }, + { + "epoch": 5.948824343015215, + "grad_norm": 12.090327262878418, + "learning_rate": 2.2506531427693254e-05, + "log_odds_chosen": 10.315339088439941, + "log_odds_ratio": -0.0001418688625562936, + "logits/chosen": -0.34358614683151245, + "logits/rejected": -0.4839419722557068, + "logps/chosen": -0.0007913429872132838, + "logps/rejected": -1.9713385105133057, + "loss": 0.6987, + "nll_loss": 0.17466667294502258, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.913430454209447e-05, + "rewards/margins": 0.19705472886562347, + "rewards/rejected": -0.197133868932724, + "step": 8602 + }, + { + "epoch": 5.949515905947441, + "grad_norm": 5.3857808113098145, + "learning_rate": 2.2502689411403107e-05, + "log_odds_chosen": 10.192558288574219, + "log_odds_ratio": -0.00026585307205095887, + "logits/chosen": -0.8433107733726501, + "logits/rejected": -0.9449383020401001, + "logps/chosen": -0.0006162350182421505, + "logps/rejected": -1.9687180519104004, + "loss": 0.4978, + "nll_loss": 0.12442828714847565, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.16235047345981e-05, + "rewards/margins": 0.19681017100811005, + "rewards/rejected": -0.1968718022108078, + "step": 8603 + }, + { + "epoch": 5.950207468879668, + "grad_norm": 6.468098163604736, + "learning_rate": 2.2498847395112956e-05, + "log_odds_chosen": 9.801401138305664, + "log_odds_ratio": -0.00025009317323565483, + "logits/chosen": -0.6675868034362793, + "logits/rejected": -0.6527258157730103, + "logps/chosen": -0.0007151628378778696, + "logps/rejected": -2.322625160217285, + "loss": 0.7434, + "nll_loss": 0.18583112955093384, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.151628960855305e-05, + "rewards/margins": 0.23219099640846252, + "rewards/rejected": -0.232262521982193, + "step": 8604 + }, + { + "epoch": 5.950899031811895, + "grad_norm": 5.987737655639648, + "learning_rate": 2.2495005378822805e-05, + "log_odds_chosen": 10.477818489074707, + "log_odds_ratio": -7.430124969687313e-05, + "logits/chosen": -0.05224364250898361, + "logits/rejected": -0.051796793937683105, + "logps/chosen": -0.0001971510355360806, + "logps/rejected": -2.1585772037506104, + "loss": 1.3965, + "nll_loss": 0.3491280972957611, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9715105736395344e-05, + "rewards/margins": 0.2158380150794983, + "rewards/rejected": -0.21585772931575775, + "step": 8605 + }, + { + "epoch": 5.951590594744122, + "grad_norm": 6.161578178405762, + "learning_rate": 2.2491163362532657e-05, + "log_odds_chosen": 10.42709732055664, + "log_odds_ratio": -6.25356042291969e-05, + "logits/chosen": 0.113109290599823, + "logits/rejected": 0.06976966559886932, + "logps/chosen": -0.0025051303673535585, + "logps/rejected": -2.5732247829437256, + "loss": 0.6081, + "nll_loss": 0.15200835466384888, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00025051305419765413, + "rewards/margins": 0.25707194209098816, + "rewards/rejected": -0.2573224604129791, + "step": 8606 + }, + { + "epoch": 5.952282157676349, + "grad_norm": 15.835515975952148, + "learning_rate": 2.248732134624251e-05, + "log_odds_chosen": 9.979249000549316, + "log_odds_ratio": -0.0002866145805455744, + "logits/chosen": -0.7599179148674011, + "logits/rejected": -0.7421848773956299, + "logps/chosen": -0.000998812378384173, + "logps/rejected": -1.7596694231033325, + "loss": 1.1156, + "nll_loss": 0.27886295318603516, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.98812320176512e-05, + "rewards/margins": 0.17586706578731537, + "rewards/rejected": -0.17596694827079773, + "step": 8607 + }, + { + "epoch": 5.9529737206085755, + "grad_norm": 6.893677711486816, + "learning_rate": 2.248347932995236e-05, + "log_odds_chosen": 11.070934295654297, + "log_odds_ratio": -0.00021689744608011097, + "logits/chosen": -0.5308154821395874, + "logits/rejected": -0.5692165493965149, + "logps/chosen": -0.0005209331284277141, + "logps/rejected": -1.9994919300079346, + "loss": 0.6829, + "nll_loss": 0.17069843411445618, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2093309932388365e-05, + "rewards/margins": 0.19989712536334991, + "rewards/rejected": -0.1999492198228836, + "step": 8608 + }, + { + "epoch": 5.953665283540802, + "grad_norm": 9.771283149719238, + "learning_rate": 2.247963731366221e-05, + "log_odds_chosen": 7.985379695892334, + "log_odds_ratio": -0.13277308642864227, + "logits/chosen": -0.8190820217132568, + "logits/rejected": -0.7509307861328125, + "logps/chosen": -0.026225317269563675, + "logps/rejected": -1.2286534309387207, + "loss": 0.8013, + "nll_loss": 0.1870512068271637, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002622531494125724, + "rewards/margins": 0.12024280428886414, + "rewards/rejected": -0.12286533415317535, + "step": 8609 + }, + { + "epoch": 5.954356846473029, + "grad_norm": 9.570266723632812, + "learning_rate": 2.2475795297372064e-05, + "log_odds_chosen": 11.255922317504883, + "log_odds_ratio": -3.132349957013503e-05, + "logits/chosen": -0.6426629424095154, + "logits/rejected": -0.7214614152908325, + "logps/chosen": -0.00036300989449955523, + "logps/rejected": -2.28373646736145, + "loss": 0.6506, + "nll_loss": 0.16263887286186218, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.630098944995552e-05, + "rewards/margins": 0.228337362408638, + "rewards/rejected": -0.22837364673614502, + "step": 8610 + }, + { + "epoch": 5.955048409405256, + "grad_norm": 4.853519439697266, + "learning_rate": 2.2471953281081913e-05, + "log_odds_chosen": 9.830463409423828, + "log_odds_ratio": -0.00016724743181839585, + "logits/chosen": -0.3163721561431885, + "logits/rejected": -0.33467578887939453, + "logps/chosen": -0.0005570831708610058, + "logps/rejected": -1.6250646114349365, + "loss": 0.7881, + "nll_loss": 0.19701674580574036, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.57083185412921e-05, + "rewards/margins": 0.16245076060295105, + "rewards/rejected": -0.16250646114349365, + "step": 8611 + }, + { + "epoch": 5.955739972337483, + "grad_norm": 7.442193031311035, + "learning_rate": 2.2468111264791765e-05, + "log_odds_chosen": 10.984609603881836, + "log_odds_ratio": -3.6467157769948244e-05, + "logits/chosen": -0.8650991916656494, + "logits/rejected": -0.8918881416320801, + "logps/chosen": -0.0008259970345534384, + "logps/rejected": -2.5060737133026123, + "loss": 0.8518, + "nll_loss": 0.21293659508228302, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.259969763457775e-05, + "rewards/margins": 0.2505247890949249, + "rewards/rejected": -0.2506074011325836, + "step": 8612 + }, + { + "epoch": 5.95643153526971, + "grad_norm": 6.5263543128967285, + "learning_rate": 2.2464269248501614e-05, + "log_odds_chosen": 10.611418724060059, + "log_odds_ratio": -8.558265108149499e-05, + "logits/chosen": -0.4117690920829773, + "logits/rejected": -0.4803375005722046, + "logps/chosen": -0.00030033683287911117, + "logps/rejected": -2.3147926330566406, + "loss": 0.5673, + "nll_loss": 0.14182326197624207, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0033686925889924e-05, + "rewards/margins": 0.23144923150539398, + "rewards/rejected": -0.23147927224636078, + "step": 8613 + }, + { + "epoch": 5.9571230982019365, + "grad_norm": 5.585168361663818, + "learning_rate": 2.2460427232211463e-05, + "log_odds_chosen": 10.020788192749023, + "log_odds_ratio": -0.0020185827743262053, + "logits/chosen": -0.7417982220649719, + "logits/rejected": -0.8316002488136292, + "logps/chosen": -0.0019587883725762367, + "logps/rejected": -1.7383875846862793, + "loss": 0.9549, + "nll_loss": 0.23852689564228058, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001958788780029863, + "rewards/margins": 0.17364290356636047, + "rewards/rejected": -0.1738387793302536, + "step": 8614 + }, + { + "epoch": 5.957814661134163, + "grad_norm": 6.802754878997803, + "learning_rate": 2.2456585215921316e-05, + "log_odds_chosen": 10.667003631591797, + "log_odds_ratio": -9.732814942253754e-05, + "logits/chosen": -0.502316951751709, + "logits/rejected": -0.5313187837600708, + "logps/chosen": -0.0002029087336268276, + "logps/rejected": -2.169607639312744, + "loss": 0.9616, + "nll_loss": 0.24038337171077728, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.029087409027852e-05, + "rewards/margins": 0.2169404774904251, + "rewards/rejected": -0.21696075797080994, + "step": 8615 + }, + { + "epoch": 5.95850622406639, + "grad_norm": 4.8081183433532715, + "learning_rate": 2.2452743199631168e-05, + "log_odds_chosen": 10.98234748840332, + "log_odds_ratio": -0.00042538848356343806, + "logits/chosen": -0.437279611825943, + "logits/rejected": -0.5226849317550659, + "logps/chosen": -0.0008870043675415218, + "logps/rejected": -2.479304790496826, + "loss": 0.5971, + "nll_loss": 0.14922332763671875, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.870044257491827e-05, + "rewards/margins": 0.24784177541732788, + "rewards/rejected": -0.24793048202991486, + "step": 8616 + }, + { + "epoch": 5.959197786998617, + "grad_norm": 4.302028656005859, + "learning_rate": 2.2448901183341017e-05, + "log_odds_chosen": 9.503775596618652, + "log_odds_ratio": -0.00013842491898685694, + "logits/chosen": -0.5067854523658752, + "logits/rejected": -0.5400323867797852, + "logps/chosen": -0.00017005865811370313, + "logps/rejected": -1.234816074371338, + "loss": 0.5709, + "nll_loss": 0.1427188664674759, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7005866538966075e-05, + "rewards/margins": 0.12346460670232773, + "rewards/rejected": -0.12348160147666931, + "step": 8617 + }, + { + "epoch": 5.959889349930844, + "grad_norm": 9.732542991638184, + "learning_rate": 2.244505916705087e-05, + "log_odds_chosen": 10.742938041687012, + "log_odds_ratio": -0.0018327643629163504, + "logits/chosen": -0.1278042197227478, + "logits/rejected": -0.18076513707637787, + "logps/chosen": -0.0014679968589916825, + "logps/rejected": -2.591768264770508, + "loss": 0.7851, + "nll_loss": 0.19609123468399048, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014679969171993434, + "rewards/margins": 0.2590300738811493, + "rewards/rejected": -0.2591768503189087, + "step": 8618 + }, + { + "epoch": 5.960580912863071, + "grad_norm": 6.502620697021484, + "learning_rate": 2.2441217150760722e-05, + "log_odds_chosen": 10.331536293029785, + "log_odds_ratio": -0.00011660806922009215, + "logits/chosen": -0.10579995810985565, + "logits/rejected": -0.11191463470458984, + "logps/chosen": -0.0003946416836697608, + "logps/rejected": -1.7601673603057861, + "loss": 0.8607, + "nll_loss": 0.21515217423439026, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.946416836697608e-05, + "rewards/margins": 0.17597728967666626, + "rewards/rejected": -0.17601674795150757, + "step": 8619 + }, + { + "epoch": 5.9612724757952975, + "grad_norm": 17.422819137573242, + "learning_rate": 2.243737513447057e-05, + "log_odds_chosen": 11.365373611450195, + "log_odds_ratio": -2.036244040937163e-05, + "logits/chosen": 0.17082726955413818, + "logits/rejected": 0.10050006210803986, + "logps/chosen": -0.0002604515175335109, + "logps/rejected": -2.61152982711792, + "loss": 0.8965, + "nll_loss": 0.22412513196468353, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6045154299936257e-05, + "rewards/margins": 0.26112696528434753, + "rewards/rejected": -0.2611530125141144, + "step": 8620 + }, + { + "epoch": 5.961964038727524, + "grad_norm": 6.855748176574707, + "learning_rate": 2.2433533118180424e-05, + "log_odds_chosen": 10.539216041564941, + "log_odds_ratio": -9.504984336672351e-05, + "logits/chosen": -0.2191634476184845, + "logits/rejected": -0.2327946424484253, + "logps/chosen": -0.00025539161288179457, + "logps/rejected": -1.910869836807251, + "loss": 0.6591, + "nll_loss": 0.16477471590042114, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5539160560583696e-05, + "rewards/margins": 0.19106145203113556, + "rewards/rejected": -0.19108697772026062, + "step": 8621 + }, + { + "epoch": 5.962655601659751, + "grad_norm": 9.039917945861816, + "learning_rate": 2.2429691101890273e-05, + "log_odds_chosen": 10.876553535461426, + "log_odds_ratio": -4.704829188995063e-05, + "logits/chosen": -0.6791223883628845, + "logits/rejected": -0.7270498871803284, + "logps/chosen": -0.00022510235430672765, + "logps/rejected": -2.25089168548584, + "loss": 0.7708, + "nll_loss": 0.19269677996635437, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2510233975481242e-05, + "rewards/margins": 0.2250666469335556, + "rewards/rejected": -0.2250891625881195, + "step": 8622 + }, + { + "epoch": 5.963347164591978, + "grad_norm": 16.28369903564453, + "learning_rate": 2.2425849085600122e-05, + "log_odds_chosen": 11.484891891479492, + "log_odds_ratio": -2.6043024263344705e-05, + "logits/chosen": -0.1581682711839676, + "logits/rejected": -0.216511532664299, + "logps/chosen": -0.0003339290269650519, + "logps/rejected": -3.420424699783325, + "loss": 1.0881, + "nll_loss": 0.27203014492988586, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3392905606888235e-05, + "rewards/margins": 0.3420090675354004, + "rewards/rejected": -0.342042475938797, + "step": 8623 + }, + { + "epoch": 5.964038727524205, + "grad_norm": 7.26689338684082, + "learning_rate": 2.2422007069309974e-05, + "log_odds_chosen": 10.805521011352539, + "log_odds_ratio": -4.978823562851176e-05, + "logits/chosen": -0.5056122541427612, + "logits/rejected": -0.6053757667541504, + "logps/chosen": -0.00015571873518638313, + "logps/rejected": -2.0780341625213623, + "loss": 0.8315, + "nll_loss": 0.2078588306903839, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5571875337627716e-05, + "rewards/margins": 0.20778784155845642, + "rewards/rejected": -0.20780342817306519, + "step": 8624 + }, + { + "epoch": 5.964730290456432, + "grad_norm": 8.424686431884766, + "learning_rate": 2.2418165053019827e-05, + "log_odds_chosen": 10.124658584594727, + "log_odds_ratio": -0.0005912402411922812, + "logits/chosen": -0.28676581382751465, + "logits/rejected": -0.39113837480545044, + "logps/chosen": -0.006734848488122225, + "logps/rejected": -2.4362435340881348, + "loss": 0.6915, + "nll_loss": 0.17280808091163635, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006734849303029478, + "rewards/margins": 0.24295085668563843, + "rewards/rejected": -0.24362435936927795, + "step": 8625 + }, + { + "epoch": 5.9654218533886585, + "grad_norm": 8.816883087158203, + "learning_rate": 2.2414323036729676e-05, + "log_odds_chosen": 10.74892807006836, + "log_odds_ratio": -9.71397093962878e-05, + "logits/chosen": -0.7456105947494507, + "logits/rejected": -0.7920002937316895, + "logps/chosen": -0.0003633729356806725, + "logps/rejected": -2.1066083908081055, + "loss": 0.4359, + "nll_loss": 0.10895287245512009, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.633729284047149e-05, + "rewards/margins": 0.21062450110912323, + "rewards/rejected": -0.21066084504127502, + "step": 8626 + }, + { + "epoch": 5.966113416320885, + "grad_norm": 9.672571182250977, + "learning_rate": 2.2410481020439528e-05, + "log_odds_chosen": 10.556832313537598, + "log_odds_ratio": -0.00010228557221125811, + "logits/chosen": -0.39597249031066895, + "logits/rejected": -0.4569196105003357, + "logps/chosen": -0.0004282575682736933, + "logps/rejected": -2.1169612407684326, + "loss": 0.648, + "nll_loss": 0.1619986593723297, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.282575537217781e-05, + "rewards/margins": 0.21165330708026886, + "rewards/rejected": -0.21169611811637878, + "step": 8627 + }, + { + "epoch": 5.966804979253112, + "grad_norm": 9.692134857177734, + "learning_rate": 2.240663900414938e-05, + "log_odds_chosen": 10.527505874633789, + "log_odds_ratio": -8.708895620657131e-05, + "logits/chosen": -0.09428629279136658, + "logits/rejected": -0.1848604679107666, + "logps/chosen": -0.0017188250785693526, + "logps/rejected": -1.9022977352142334, + "loss": 0.9876, + "nll_loss": 0.24690304696559906, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017188250785693526, + "rewards/margins": 0.1900579035282135, + "rewards/rejected": -0.19022977352142334, + "step": 8628 + }, + { + "epoch": 5.967496542185339, + "grad_norm": 7.546328067779541, + "learning_rate": 2.240279698785923e-05, + "log_odds_chosen": 10.628252983093262, + "log_odds_ratio": -6.79682198096998e-05, + "logits/chosen": -0.28366631269454956, + "logits/rejected": -0.3606613874435425, + "logps/chosen": -0.0003197102341800928, + "logps/rejected": -1.9171504974365234, + "loss": 1.0352, + "nll_loss": 0.2587844133377075, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.197102341800928e-05, + "rewards/margins": 0.1916830837726593, + "rewards/rejected": -0.1917150467634201, + "step": 8629 + }, + { + "epoch": 5.968188105117566, + "grad_norm": 8.36005973815918, + "learning_rate": 2.2398954971569082e-05, + "log_odds_chosen": 9.56314468383789, + "log_odds_ratio": -0.0005237284349277616, + "logits/chosen": -0.3528299033641815, + "logits/rejected": -0.39573559165000916, + "logps/chosen": -0.001121058943681419, + "logps/rejected": -1.7783137559890747, + "loss": 0.9561, + "nll_loss": 0.2389650046825409, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011210589582333341, + "rewards/margins": 0.17771926522254944, + "rewards/rejected": -0.17783138155937195, + "step": 8630 + }, + { + "epoch": 5.968879668049793, + "grad_norm": 8.814260482788086, + "learning_rate": 2.239511295527893e-05, + "log_odds_chosen": 10.795074462890625, + "log_odds_ratio": -4.484070450416766e-05, + "logits/chosen": -0.7871134281158447, + "logits/rejected": -0.8398634791374207, + "logps/chosen": -0.0004096375487279147, + "logps/rejected": -2.22101092338562, + "loss": 0.6136, + "nll_loss": 0.15338875353336334, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.096375414519571e-05, + "rewards/margins": 0.22206011414527893, + "rewards/rejected": -0.22210107743740082, + "step": 8631 + }, + { + "epoch": 5.9695712309820195, + "grad_norm": 7.860897064208984, + "learning_rate": 2.239127093898878e-05, + "log_odds_chosen": 11.818281173706055, + "log_odds_ratio": -4.5246742956805974e-05, + "logits/chosen": -0.0748319998383522, + "logits/rejected": -0.21310384571552277, + "logps/chosen": -0.00032474740874022245, + "logps/rejected": -2.9677517414093018, + "loss": 0.7533, + "nll_loss": 0.1883193850517273, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.247474160161801e-05, + "rewards/margins": 0.29674267768859863, + "rewards/rejected": -0.2967751622200012, + "step": 8632 + }, + { + "epoch": 5.970262793914246, + "grad_norm": 5.752249240875244, + "learning_rate": 2.2387428922698633e-05, + "log_odds_chosen": 9.994537353515625, + "log_odds_ratio": -0.0003835707320831716, + "logits/chosen": -0.5630497932434082, + "logits/rejected": -0.6039668917655945, + "logps/chosen": -0.0008304682560265064, + "logps/rejected": -1.9116523265838623, + "loss": 0.6675, + "nll_loss": 0.16682936251163483, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.304682705784217e-05, + "rewards/margins": 0.1910821944475174, + "rewards/rejected": -0.1911652386188507, + "step": 8633 + }, + { + "epoch": 5.970954356846473, + "grad_norm": 6.303492546081543, + "learning_rate": 2.2383586906408485e-05, + "log_odds_chosen": 11.492966651916504, + "log_odds_ratio": -2.2081108909333125e-05, + "logits/chosen": -0.4860258102416992, + "logits/rejected": -0.5048642754554749, + "logps/chosen": -0.00012483232421800494, + "logps/rejected": -2.422050952911377, + "loss": 0.6797, + "nll_loss": 0.1699298918247223, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2483233149396256e-05, + "rewards/margins": 0.2421925961971283, + "rewards/rejected": -0.24220508337020874, + "step": 8634 + }, + { + "epoch": 5.9716459197787, + "grad_norm": 7.249591827392578, + "learning_rate": 2.2379744890118334e-05, + "log_odds_chosen": 8.761195182800293, + "log_odds_ratio": -0.004018676467239857, + "logits/chosen": -0.49295467138290405, + "logits/rejected": -0.5738300681114197, + "logps/chosen": -0.002777382265776396, + "logps/rejected": -1.8959558010101318, + "loss": 0.6219, + "nll_loss": 0.15507788956165314, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00027773823239840567, + "rewards/margins": 0.18931785225868225, + "rewards/rejected": -0.18959558010101318, + "step": 8635 + }, + { + "epoch": 5.972337482710927, + "grad_norm": 10.255823135375977, + "learning_rate": 2.2375902873828187e-05, + "log_odds_chosen": 10.179533004760742, + "log_odds_ratio": -0.00010110568109666929, + "logits/chosen": -0.43774309754371643, + "logits/rejected": -0.5146663188934326, + "logps/chosen": -0.0007115602493286133, + "logps/rejected": -2.2837624549865723, + "loss": 1.0529, + "nll_loss": 0.26320332288742065, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.115602056728676e-05, + "rewards/margins": 0.22830507159233093, + "rewards/rejected": -0.22837623953819275, + "step": 8636 + }, + { + "epoch": 5.973029045643154, + "grad_norm": 8.049625396728516, + "learning_rate": 2.237206085753804e-05, + "log_odds_chosen": 10.132574081420898, + "log_odds_ratio": -0.00020164766465313733, + "logits/chosen": -0.7841938734054565, + "logits/rejected": -0.8315725922584534, + "logps/chosen": -0.0002748143160715699, + "logps/rejected": -1.7135040760040283, + "loss": 0.5968, + "nll_loss": 0.14917722344398499, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7481431970954873e-05, + "rewards/margins": 0.17132292687892914, + "rewards/rejected": -0.1713503897190094, + "step": 8637 + }, + { + "epoch": 5.9737206085753805, + "grad_norm": 7.535101413726807, + "learning_rate": 2.2368218841247888e-05, + "log_odds_chosen": 9.183361053466797, + "log_odds_ratio": -0.00047606491716578603, + "logits/chosen": -0.4759877622127533, + "logits/rejected": -0.47154396772384644, + "logps/chosen": -0.0007837703451514244, + "logps/rejected": -1.6742212772369385, + "loss": 0.6127, + "nll_loss": 0.15313656628131866, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.837703742552549e-05, + "rewards/margins": 0.16734375059604645, + "rewards/rejected": -0.1674221307039261, + "step": 8638 + }, + { + "epoch": 5.974412171507607, + "grad_norm": 7.383305072784424, + "learning_rate": 2.236437682495774e-05, + "log_odds_chosen": 11.191994667053223, + "log_odds_ratio": -2.7318410502630286e-05, + "logits/chosen": -0.5382693409919739, + "logits/rejected": -0.5746403932571411, + "logps/chosen": -0.00014897863729856908, + "logps/rejected": -2.3866891860961914, + "loss": 0.5678, + "nll_loss": 0.14194512367248535, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4897865185048431e-05, + "rewards/margins": 0.23865404725074768, + "rewards/rejected": -0.23866893351078033, + "step": 8639 + }, + { + "epoch": 5.975103734439834, + "grad_norm": 9.034134864807129, + "learning_rate": 2.236053480866759e-05, + "log_odds_chosen": 9.865920066833496, + "log_odds_ratio": -0.05563132092356682, + "logits/chosen": -0.5075576305389404, + "logits/rejected": -0.5447198152542114, + "logps/chosen": -0.01322740875184536, + "logps/rejected": -2.043813467025757, + "loss": 0.9815, + "nll_loss": 0.23980922996997833, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013227409217506647, + "rewards/margins": 0.20305860042572021, + "rewards/rejected": -0.2043813318014145, + "step": 8640 + }, + { + "epoch": 5.975795297372061, + "grad_norm": 7.827591896057129, + "learning_rate": 2.235669279237744e-05, + "log_odds_chosen": 9.875395774841309, + "log_odds_ratio": -0.002394024282693863, + "logits/chosen": -0.35297128558158875, + "logits/rejected": -0.3840131163597107, + "logps/chosen": -0.0015253536403179169, + "logps/rejected": -1.8439840078353882, + "loss": 0.5606, + "nll_loss": 0.1399049460887909, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015253537276294082, + "rewards/margins": 0.18424585461616516, + "rewards/rejected": -0.18439838290214539, + "step": 8641 + }, + { + "epoch": 5.976486860304288, + "grad_norm": 7.682641983032227, + "learning_rate": 2.235285077608729e-05, + "log_odds_chosen": 10.980426788330078, + "log_odds_ratio": -3.709265365614556e-05, + "logits/chosen": -0.13334612548351288, + "logits/rejected": -0.23983854055404663, + "logps/chosen": -0.0006211751606315374, + "logps/rejected": -2.8142642974853516, + "loss": 0.8566, + "nll_loss": 0.21414557099342346, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.211752042872831e-05, + "rewards/margins": 0.2813643217086792, + "rewards/rejected": -0.28142642974853516, + "step": 8642 + }, + { + "epoch": 5.977178423236515, + "grad_norm": 8.214534759521484, + "learning_rate": 2.2349008759797143e-05, + "log_odds_chosen": 9.371925354003906, + "log_odds_ratio": -0.002760364906862378, + "logits/chosen": -0.04601850360631943, + "logits/rejected": -0.13053913414478302, + "logps/chosen": -0.0032472298480570316, + "logps/rejected": -1.5316357612609863, + "loss": 1.272, + "nll_loss": 0.31772580742836, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003247229615226388, + "rewards/margins": 0.15283885598182678, + "rewards/rejected": -0.1531635820865631, + "step": 8643 + }, + { + "epoch": 5.977869986168741, + "grad_norm": 9.129252433776855, + "learning_rate": 2.2345166743506993e-05, + "log_odds_chosen": 10.828535079956055, + "log_odds_ratio": -0.00030270888237282634, + "logits/chosen": -0.5784143209457397, + "logits/rejected": -0.5499647855758667, + "logps/chosen": -0.0003934805281460285, + "logps/rejected": -2.299145221710205, + "loss": 0.7022, + "nll_loss": 0.17550931870937347, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.934805135941133e-05, + "rewards/margins": 0.22987514734268188, + "rewards/rejected": -0.22991451621055603, + "step": 8644 + }, + { + "epoch": 5.978561549100968, + "grad_norm": 7.754681587219238, + "learning_rate": 2.2341324727216845e-05, + "log_odds_chosen": 10.025257110595703, + "log_odds_ratio": -0.00018732018361333758, + "logits/chosen": -0.36683574318885803, + "logits/rejected": -0.5633817315101624, + "logps/chosen": -0.0003875857510138303, + "logps/rejected": -1.5009148120880127, + "loss": 0.6898, + "nll_loss": 0.1724189668893814, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.875857873936184e-05, + "rewards/margins": 0.1500527262687683, + "rewards/rejected": -0.1500914990901947, + "step": 8645 + }, + { + "epoch": 5.979253112033195, + "grad_norm": 9.99053955078125, + "learning_rate": 2.2337482710926697e-05, + "log_odds_chosen": 10.781888008117676, + "log_odds_ratio": -5.4689018725184724e-05, + "logits/chosen": -0.21867603063583374, + "logits/rejected": -0.24089495837688446, + "logps/chosen": -0.00023485012934543192, + "logps/rejected": -2.4182658195495605, + "loss": 0.7284, + "nll_loss": 0.1821037083864212, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.348501220694743e-05, + "rewards/margins": 0.2418031096458435, + "rewards/rejected": -0.241826593875885, + "step": 8646 + }, + { + "epoch": 5.979944674965422, + "grad_norm": 5.023019313812256, + "learning_rate": 2.2333640694636546e-05, + "log_odds_chosen": 10.844634056091309, + "log_odds_ratio": -3.536961230565794e-05, + "logits/chosen": -0.45662564039230347, + "logits/rejected": -0.47854191064834595, + "logps/chosen": -0.00011072782217524946, + "logps/rejected": -1.7490148544311523, + "loss": 0.9009, + "nll_loss": 0.22522106766700745, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1072783308918588e-05, + "rewards/margins": 0.1748904138803482, + "rewards/rejected": -0.17490148544311523, + "step": 8647 + }, + { + "epoch": 5.980636237897649, + "grad_norm": 6.765304088592529, + "learning_rate": 2.23297986783464e-05, + "log_odds_chosen": 10.448127746582031, + "log_odds_ratio": -0.00030654840520583093, + "logits/chosen": -0.7699148058891296, + "logits/rejected": -0.8414617776870728, + "logps/chosen": -0.0004278847191017121, + "logps/rejected": -1.927390217781067, + "loss": 1.3203, + "nll_loss": 0.3300439715385437, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2788473365362734e-05, + "rewards/margins": 0.1926962435245514, + "rewards/rejected": -0.19273902475833893, + "step": 8648 + }, + { + "epoch": 5.981327800829876, + "grad_norm": 13.171281814575195, + "learning_rate": 2.2325956662056248e-05, + "log_odds_chosen": 9.756038665771484, + "log_odds_ratio": -0.00038166638114489615, + "logits/chosen": -0.5013332962989807, + "logits/rejected": -0.5654415488243103, + "logps/chosen": -0.0009114966378547251, + "logps/rejected": -2.246232032775879, + "loss": 1.1029, + "nll_loss": 0.27567875385284424, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.114966815104708e-05, + "rewards/margins": 0.22453203797340393, + "rewards/rejected": -0.2246231883764267, + "step": 8649 + }, + { + "epoch": 5.982019363762102, + "grad_norm": 7.266759872436523, + "learning_rate": 2.2322114645766097e-05, + "log_odds_chosen": 10.222375869750977, + "log_odds_ratio": -0.0001875264715636149, + "logits/chosen": -0.2591787874698639, + "logits/rejected": -0.2960703372955322, + "logps/chosen": -0.0010339757427573204, + "logps/rejected": -2.102975606918335, + "loss": 0.5865, + "nll_loss": 0.14661476016044617, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000103397571365349, + "rewards/margins": 0.2101941555738449, + "rewards/rejected": -0.2102975696325302, + "step": 8650 + }, + { + "epoch": 5.982710926694329, + "grad_norm": 9.63925838470459, + "learning_rate": 2.231827262947595e-05, + "log_odds_chosen": 10.470054626464844, + "log_odds_ratio": -0.00010331722296541557, + "logits/chosen": -0.034905824810266495, + "logits/rejected": -0.030192043632268906, + "logps/chosen": -0.0003535951836965978, + "logps/rejected": -1.8167335987091064, + "loss": 0.5858, + "nll_loss": 0.1464492231607437, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5359520552447066e-05, + "rewards/margins": 0.18163800239562988, + "rewards/rejected": -0.1816733479499817, + "step": 8651 + }, + { + "epoch": 5.983402489626556, + "grad_norm": 6.006370544433594, + "learning_rate": 2.2314430613185802e-05, + "log_odds_chosen": 9.1875638961792, + "log_odds_ratio": -0.00034047331428155303, + "logits/chosen": -0.3792960047721863, + "logits/rejected": -0.41480278968811035, + "logps/chosen": -0.006437941920012236, + "logps/rejected": -2.0599706172943115, + "loss": 0.8695, + "nll_loss": 0.21734338998794556, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006437941920012236, + "rewards/margins": 0.2053532749414444, + "rewards/rejected": -0.2059970647096634, + "step": 8652 + }, + { + "epoch": 5.984094052558783, + "grad_norm": 4.718745708465576, + "learning_rate": 2.231058859689565e-05, + "log_odds_chosen": 9.810354232788086, + "log_odds_ratio": -0.00027896571555174887, + "logits/chosen": -0.6763026714324951, + "logits/rejected": -0.6438024044036865, + "logps/chosen": -0.0017682433826848865, + "logps/rejected": -1.700201392173767, + "loss": 0.4415, + "nll_loss": 0.11035287380218506, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017682435282040387, + "rewards/margins": 0.16984331607818604, + "rewards/rejected": -0.17002014815807343, + "step": 8653 + }, + { + "epoch": 5.98478561549101, + "grad_norm": 9.258583068847656, + "learning_rate": 2.2306746580605503e-05, + "log_odds_chosen": 11.272083282470703, + "log_odds_ratio": -2.6522073312662542e-05, + "logits/chosen": -0.2892472445964813, + "logits/rejected": -0.37034428119659424, + "logps/chosen": -9.99235053313896e-05, + "logps/rejected": -2.1363444328308105, + "loss": 0.7271, + "nll_loss": 0.18177568912506104, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.99234998744214e-06, + "rewards/margins": 0.21362446248531342, + "rewards/rejected": -0.2136344462633133, + "step": 8654 + }, + { + "epoch": 5.985477178423237, + "grad_norm": 6.793190956115723, + "learning_rate": 2.2302904564315356e-05, + "log_odds_chosen": 10.94711971282959, + "log_odds_ratio": -0.0002915470104198903, + "logits/chosen": 0.11246512830257416, + "logits/rejected": -0.04465283453464508, + "logps/chosen": -0.0031524840742349625, + "logps/rejected": -3.0115573406219482, + "loss": 0.7859, + "nll_loss": 0.1964433640241623, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00031524847145192325, + "rewards/margins": 0.30084046721458435, + "rewards/rejected": -0.3011557459831238, + "step": 8655 + }, + { + "epoch": 5.986168741355463, + "grad_norm": 5.1810150146484375, + "learning_rate": 2.2299062548025205e-05, + "log_odds_chosen": 10.347426414489746, + "log_odds_ratio": -0.00036247429670765996, + "logits/chosen": -0.21458715200424194, + "logits/rejected": -0.24404297769069672, + "logps/chosen": -0.00046909027150832117, + "logps/rejected": -2.0660440921783447, + "loss": 0.6473, + "nll_loss": 0.16179302334785461, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.690902278525755e-05, + "rewards/margins": 0.2065575122833252, + "rewards/rejected": -0.20660439133644104, + "step": 8656 + }, + { + "epoch": 5.98686030428769, + "grad_norm": 7.241073131561279, + "learning_rate": 2.2295220531735057e-05, + "log_odds_chosen": 10.880849838256836, + "log_odds_ratio": -0.00020453293109312654, + "logits/chosen": -0.7550759315490723, + "logits/rejected": -0.7828030586242676, + "logps/chosen": -0.00047937879571691155, + "logps/rejected": -2.152524709701538, + "loss": 0.8083, + "nll_loss": 0.20206305384635925, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7937879571691155e-05, + "rewards/margins": 0.21520453691482544, + "rewards/rejected": -0.21525248885154724, + "step": 8657 + }, + { + "epoch": 5.987551867219917, + "grad_norm": 7.950253009796143, + "learning_rate": 2.2291378515444906e-05, + "log_odds_chosen": 10.849205017089844, + "log_odds_ratio": -4.903499939246103e-05, + "logits/chosen": -0.5559477210044861, + "logits/rejected": -0.5406090021133423, + "logps/chosen": -0.007074021268635988, + "logps/rejected": -2.487555503845215, + "loss": 1.2176, + "nll_loss": 0.3043956458568573, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007074021268635988, + "rewards/margins": 0.24804814159870148, + "rewards/rejected": -0.248755544424057, + "step": 8658 + }, + { + "epoch": 5.988243430152144, + "grad_norm": 6.710446357727051, + "learning_rate": 2.2287536499154755e-05, + "log_odds_chosen": 10.762208938598633, + "log_odds_ratio": -0.0006994472933001816, + "logits/chosen": -0.3678176999092102, + "logits/rejected": -0.47703123092651367, + "logps/chosen": -0.0007495337049476802, + "logps/rejected": -2.196031332015991, + "loss": 1.1679, + "nll_loss": 0.291897714138031, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.495337194995955e-05, + "rewards/margins": 0.2195281684398651, + "rewards/rejected": -0.21960312128067017, + "step": 8659 + }, + { + "epoch": 5.988934993084371, + "grad_norm": 7.118763446807861, + "learning_rate": 2.2283694482864608e-05, + "log_odds_chosen": 11.913274765014648, + "log_odds_ratio": -4.843656643060967e-05, + "logits/chosen": -0.5528740882873535, + "logits/rejected": -0.6186320781707764, + "logps/chosen": -0.0005820526275783777, + "logps/rejected": -3.5821590423583984, + "loss": 0.6879, + "nll_loss": 0.17196498811244965, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.820525984745473e-05, + "rewards/margins": 0.3581577241420746, + "rewards/rejected": -0.35821592807769775, + "step": 8660 + }, + { + "epoch": 5.9896265560165975, + "grad_norm": 10.976981163024902, + "learning_rate": 2.227985246657446e-05, + "log_odds_chosen": 10.413000106811523, + "log_odds_ratio": -0.00011384957906557247, + "logits/chosen": -0.6883463859558105, + "logits/rejected": -0.7387396693229675, + "logps/chosen": -0.00041092990431934595, + "logps/rejected": -2.2846686840057373, + "loss": 1.2261, + "nll_loss": 0.3065022826194763, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1092989704338834e-05, + "rewards/margins": 0.2284257709980011, + "rewards/rejected": -0.22846688330173492, + "step": 8661 + }, + { + "epoch": 5.990318118948824, + "grad_norm": 7.272087574005127, + "learning_rate": 2.227601045028431e-05, + "log_odds_chosen": 10.620426177978516, + "log_odds_ratio": -0.00024088645295705646, + "logits/chosen": -0.8818700909614563, + "logits/rejected": -0.8350770473480225, + "logps/chosen": -0.0008133258670568466, + "logps/rejected": -2.7097411155700684, + "loss": 0.7731, + "nll_loss": 0.1932595819234848, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.13325823401101e-05, + "rewards/margins": 0.27089282870292664, + "rewards/rejected": -0.27097412943840027, + "step": 8662 + }, + { + "epoch": 5.991009681881051, + "grad_norm": 8.038209915161133, + "learning_rate": 2.2272168433994162e-05, + "log_odds_chosen": 10.276552200317383, + "log_odds_ratio": -0.00010404939530417323, + "logits/chosen": -0.33884933590888977, + "logits/rejected": -0.34993690252304077, + "logps/chosen": -0.0004283892922103405, + "logps/rejected": -2.4142346382141113, + "loss": 0.6032, + "nll_loss": 0.15079672634601593, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2838932131417096e-05, + "rewards/margins": 0.24138060212135315, + "rewards/rejected": -0.24142345786094666, + "step": 8663 + }, + { + "epoch": 5.991701244813278, + "grad_norm": 6.9717698097229, + "learning_rate": 2.2268326417704014e-05, + "log_odds_chosen": 10.098555564880371, + "log_odds_ratio": -0.00010227275197394192, + "logits/chosen": -0.3164041042327881, + "logits/rejected": -0.28061607480049133, + "logps/chosen": -0.00019917808822356164, + "logps/rejected": -1.6113238334655762, + "loss": 0.6207, + "nll_loss": 0.15516464412212372, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9917810277547687e-05, + "rewards/margins": 0.1611124575138092, + "rewards/rejected": -0.16113239526748657, + "step": 8664 + }, + { + "epoch": 5.992392807745505, + "grad_norm": 7.463654518127441, + "learning_rate": 2.2264484401413863e-05, + "log_odds_chosen": 11.0711030960083, + "log_odds_ratio": -4.571516183204949e-05, + "logits/chosen": -0.8618670701980591, + "logits/rejected": -0.9495669603347778, + "logps/chosen": -0.00015691615408286452, + "logps/rejected": -2.068732976913452, + "loss": 0.6994, + "nll_loss": 0.17484234273433685, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5691615772084333e-05, + "rewards/margins": 0.2068575918674469, + "rewards/rejected": -0.2068733125925064, + "step": 8665 + }, + { + "epoch": 5.993084370677732, + "grad_norm": 5.811643600463867, + "learning_rate": 2.2260642385123716e-05, + "log_odds_chosen": 10.059645652770996, + "log_odds_ratio": -0.00012689466529991478, + "logits/chosen": -0.6054131388664246, + "logits/rejected": -0.5192132592201233, + "logps/chosen": -0.00026542056002654135, + "logps/rejected": -1.6344331502914429, + "loss": 1.132, + "nll_loss": 0.2829943895339966, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6542056730249897e-05, + "rewards/margins": 0.1634167730808258, + "rewards/rejected": -0.16344329714775085, + "step": 8666 + }, + { + "epoch": 5.9937759336099585, + "grad_norm": 7.299609661102295, + "learning_rate": 2.2256800368833565e-05, + "log_odds_chosen": 9.224020004272461, + "log_odds_ratio": -0.0012528158258646727, + "logits/chosen": -0.5629249811172485, + "logits/rejected": -0.6724428534507751, + "logps/chosen": -0.0006475116824731231, + "logps/rejected": -1.4131289720535278, + "loss": 0.8529, + "nll_loss": 0.21310065686702728, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.475116970250383e-05, + "rewards/margins": 0.14124815165996552, + "rewards/rejected": -0.14131289720535278, + "step": 8667 + }, + { + "epoch": 5.994467496542185, + "grad_norm": 6.194040775299072, + "learning_rate": 2.2252958352543414e-05, + "log_odds_chosen": 11.240839004516602, + "log_odds_ratio": -3.617586480686441e-05, + "logits/chosen": -0.3279075026512146, + "logits/rejected": -0.3418920636177063, + "logps/chosen": -0.0007275803945958614, + "logps/rejected": -2.8612427711486816, + "loss": 1.0579, + "nll_loss": 0.2644652724266052, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.275804819073528e-05, + "rewards/margins": 0.28605151176452637, + "rewards/rejected": -0.2861242890357971, + "step": 8668 + }, + { + "epoch": 5.995159059474412, + "grad_norm": 6.6501851081848145, + "learning_rate": 2.2249116336253266e-05, + "log_odds_chosen": 10.770421981811523, + "log_odds_ratio": -5.30408215126954e-05, + "logits/chosen": -0.2276163250207901, + "logits/rejected": -0.3236449062824249, + "logps/chosen": -0.00018166887457482517, + "logps/rejected": -2.0231595039367676, + "loss": 0.4534, + "nll_loss": 0.11334690451622009, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8166887457482517e-05, + "rewards/margins": 0.20229777693748474, + "rewards/rejected": -0.20231595635414124, + "step": 8669 + }, + { + "epoch": 5.995850622406639, + "grad_norm": 13.294677734375, + "learning_rate": 2.2245274319963115e-05, + "log_odds_chosen": 11.529056549072266, + "log_odds_ratio": -1.5711140804341994e-05, + "logits/chosen": -0.2797544300556183, + "logits/rejected": -0.37466806173324585, + "logps/chosen": -0.0009607851970940828, + "logps/rejected": -2.869312286376953, + "loss": 0.7845, + "nll_loss": 0.19612446427345276, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.607851097825915e-05, + "rewards/margins": 0.2868351638317108, + "rewards/rejected": -0.28693124651908875, + "step": 8670 + }, + { + "epoch": 5.996542185338866, + "grad_norm": 9.357288360595703, + "learning_rate": 2.2241432303672968e-05, + "log_odds_chosen": 10.64914608001709, + "log_odds_ratio": -8.049822645261884e-05, + "logits/chosen": -0.45057523250579834, + "logits/rejected": -0.4166865944862366, + "logps/chosen": -0.0009274584008380771, + "logps/rejected": -2.585829019546509, + "loss": 0.8917, + "nll_loss": 0.22291788458824158, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.27458459045738e-05, + "rewards/margins": 0.2584901750087738, + "rewards/rejected": -0.2585829198360443, + "step": 8671 + }, + { + "epoch": 5.997233748271093, + "grad_norm": 8.706218719482422, + "learning_rate": 2.223759028738282e-05, + "log_odds_chosen": 11.269906997680664, + "log_odds_ratio": -1.8488583009457216e-05, + "logits/chosen": -0.28220856189727783, + "logits/rejected": -0.3012881875038147, + "logps/chosen": -0.00011813380115199834, + "logps/rejected": -1.8137805461883545, + "loss": 0.5725, + "nll_loss": 0.14313194155693054, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1813381206593476e-05, + "rewards/margins": 0.18136624991893768, + "rewards/rejected": -0.1813780665397644, + "step": 8672 + }, + { + "epoch": 5.9979253112033195, + "grad_norm": 11.165410995483398, + "learning_rate": 2.223374827109267e-05, + "log_odds_chosen": 8.817961692810059, + "log_odds_ratio": -0.04084470868110657, + "logits/chosen": -0.30266138911247253, + "logits/rejected": -0.37050777673721313, + "logps/chosen": -0.01773255318403244, + "logps/rejected": -1.998016357421875, + "loss": 1.1478, + "nll_loss": 0.2828754186630249, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017732552951201797, + "rewards/margins": 0.19802838563919067, + "rewards/rejected": -0.19980162382125854, + "step": 8673 + }, + { + "epoch": 5.998616874135546, + "grad_norm": 15.032378196716309, + "learning_rate": 2.2229906254802522e-05, + "log_odds_chosen": 10.327550888061523, + "log_odds_ratio": -0.002341375919058919, + "logits/chosen": -0.28076595067977905, + "logits/rejected": -0.20058351755142212, + "logps/chosen": -0.001488571404479444, + "logps/rejected": -2.425849437713623, + "loss": 1.0028, + "nll_loss": 0.25046506524086, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014885715791024268, + "rewards/margins": 0.24243608117103577, + "rewards/rejected": -0.2425849437713623, + "step": 8674 + }, + { + "epoch": 5.999308437067773, + "grad_norm": 5.518479347229004, + "learning_rate": 2.2226064238512374e-05, + "log_odds_chosen": 10.280984878540039, + "log_odds_ratio": -0.001351931132376194, + "logits/chosen": -0.2739599943161011, + "logits/rejected": -0.3183516263961792, + "logps/chosen": -0.0012864520540460944, + "logps/rejected": -2.3341145515441895, + "loss": 0.5981, + "nll_loss": 0.14940090477466583, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012864521704614162, + "rewards/margins": 0.23328280448913574, + "rewards/rejected": -0.23341144621372223, + "step": 8675 + }, + { + "epoch": 6.0, + "grad_norm": 15.6314697265625, + "learning_rate": 2.2222222222222223e-05, + "log_odds_chosen": 10.472728729248047, + "log_odds_ratio": -6.75845512887463e-05, + "logits/chosen": -0.5962976217269897, + "logits/rejected": -0.6523298621177673, + "logps/chosen": -0.0005146162584424019, + "logps/rejected": -2.0234107971191406, + "loss": 0.6844, + "nll_loss": 0.17108887434005737, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1461629482218996e-05, + "rewards/margins": 0.2022896260023117, + "rewards/rejected": -0.20234109461307526, + "step": 8676 + }, + { + "epoch": 6.000691562932227, + "grad_norm": 6.434847831726074, + "learning_rate": 2.2218380205932072e-05, + "log_odds_chosen": 11.39303970336914, + "log_odds_ratio": -0.00013875612057745457, + "logits/chosen": -0.5068904161453247, + "logits/rejected": -0.46730056405067444, + "logps/chosen": -0.00022783351596444845, + "logps/rejected": -2.217576026916504, + "loss": 0.6635, + "nll_loss": 0.16585785150527954, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2783351596444845e-05, + "rewards/margins": 0.22173485159873962, + "rewards/rejected": -0.22175762057304382, + "step": 8677 + }, + { + "epoch": 6.001383125864454, + "grad_norm": 6.79045295715332, + "learning_rate": 2.2214538189641925e-05, + "log_odds_chosen": 11.089578628540039, + "log_odds_ratio": -0.00026866502594202757, + "logits/chosen": -0.40105462074279785, + "logits/rejected": -0.40405669808387756, + "logps/chosen": -0.0005820757942274213, + "logps/rejected": -2.961902618408203, + "loss": 0.6948, + "nll_loss": 0.17367303371429443, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.820758451591246e-05, + "rewards/margins": 0.29613208770751953, + "rewards/rejected": -0.2961902618408203, + "step": 8678 + }, + { + "epoch": 6.0020746887966805, + "grad_norm": 5.017109394073486, + "learning_rate": 2.2210696173351774e-05, + "log_odds_chosen": 9.893177032470703, + "log_odds_ratio": -0.00048073488869704306, + "logits/chosen": -0.5683239102363586, + "logits/rejected": -0.625013530254364, + "logps/chosen": -0.0008165045292116702, + "logps/rejected": -2.0497570037841797, + "loss": 0.8591, + "nll_loss": 0.214732825756073, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.165045437635854e-05, + "rewards/margins": 0.2048940360546112, + "rewards/rejected": -0.2049756795167923, + "step": 8679 + }, + { + "epoch": 6.002766251728907, + "grad_norm": 5.027901649475098, + "learning_rate": 2.2206854157061626e-05, + "log_odds_chosen": 10.607897758483887, + "log_odds_ratio": -0.00017300539184361696, + "logits/chosen": -0.35090476274490356, + "logits/rejected": -0.4269883334636688, + "logps/chosen": -0.0007968850550241768, + "logps/rejected": -2.3614661693573, + "loss": 0.46, + "nll_loss": 0.11498267948627472, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.968849968165159e-05, + "rewards/margins": 0.23606693744659424, + "rewards/rejected": -0.23614662885665894, + "step": 8680 + }, + { + "epoch": 6.003457814661134, + "grad_norm": 8.310800552368164, + "learning_rate": 2.220301214077148e-05, + "log_odds_chosen": 12.149660110473633, + "log_odds_ratio": -1.4518486750603188e-05, + "logits/chosen": -0.24523666501045227, + "logits/rejected": -0.3247970938682556, + "logps/chosen": -0.00015250536671373993, + "logps/rejected": -3.321934700012207, + "loss": 0.6412, + "nll_loss": 0.1602870225906372, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5250537217070814e-05, + "rewards/margins": 0.3321782350540161, + "rewards/rejected": -0.3321934938430786, + "step": 8681 + }, + { + "epoch": 6.004149377593361, + "grad_norm": 7.237411022186279, + "learning_rate": 2.2199170124481328e-05, + "log_odds_chosen": 9.43208122253418, + "log_odds_ratio": -0.00025389096117578447, + "logits/chosen": -0.5209689140319824, + "logits/rejected": -0.5669733881950378, + "logps/chosen": -0.0007305705803446472, + "logps/rejected": -1.4171273708343506, + "loss": 0.8169, + "nll_loss": 0.2042027860879898, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.305705366889015e-05, + "rewards/margins": 0.14163967967033386, + "rewards/rejected": -0.1417127400636673, + "step": 8682 + }, + { + "epoch": 6.004840940525588, + "grad_norm": 7.599490165710449, + "learning_rate": 2.219532810819118e-05, + "log_odds_chosen": 10.09867000579834, + "log_odds_ratio": -0.0008778284536674619, + "logits/chosen": -0.882388174533844, + "logits/rejected": -0.7930755615234375, + "logps/chosen": -0.0010563363321125507, + "logps/rejected": -2.356621742248535, + "loss": 0.5612, + "nll_loss": 0.14019975066184998, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010563363321125507, + "rewards/margins": 0.23555654287338257, + "rewards/rejected": -0.23566217720508575, + "step": 8683 + }, + { + "epoch": 6.005532503457815, + "grad_norm": 7.226995468139648, + "learning_rate": 2.2191486091901033e-05, + "log_odds_chosen": 10.630027770996094, + "log_odds_ratio": -0.00019700817938428372, + "logits/chosen": -0.4471282958984375, + "logits/rejected": -0.523328959941864, + "logps/chosen": -0.0018144343048334122, + "logps/rejected": -2.698294162750244, + "loss": 0.818, + "nll_loss": 0.20448969304561615, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001814434363041073, + "rewards/margins": 0.269648015499115, + "rewards/rejected": -0.2698294520378113, + "step": 8684 + }, + { + "epoch": 6.0062240663900415, + "grad_norm": 8.176135063171387, + "learning_rate": 2.2187644075610882e-05, + "log_odds_chosen": 10.905904769897461, + "log_odds_ratio": -3.206159090041183e-05, + "logits/chosen": -0.6330730319023132, + "logits/rejected": -0.6118956804275513, + "logps/chosen": -0.00013914890587329865, + "logps/rejected": -1.9043033123016357, + "loss": 0.613, + "nll_loss": 0.15325427055358887, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3914891496824566e-05, + "rewards/margins": 0.19041642546653748, + "rewards/rejected": -0.19043034315109253, + "step": 8685 + }, + { + "epoch": 6.006915629322268, + "grad_norm": 6.383642673492432, + "learning_rate": 2.218380205932073e-05, + "log_odds_chosen": 10.754993438720703, + "log_odds_ratio": -0.0014159767888486385, + "logits/chosen": -0.33393335342407227, + "logits/rejected": -0.4243951141834259, + "logps/chosen": -0.0009645427926443517, + "logps/rejected": -2.005563259124756, + "loss": 0.8443, + "nll_loss": 0.2109401524066925, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.645428508520126e-05, + "rewards/margins": 0.20045988261699677, + "rewards/rejected": -0.20055633783340454, + "step": 8686 + }, + { + "epoch": 6.007607192254495, + "grad_norm": 7.030446529388428, + "learning_rate": 2.2179960043030583e-05, + "log_odds_chosen": 10.016925811767578, + "log_odds_ratio": -0.00023798673646524549, + "logits/chosen": -0.22667162120342255, + "logits/rejected": -0.2796691358089447, + "logps/chosen": -0.0003618993505369872, + "logps/rejected": -1.9764633178710938, + "loss": 0.5404, + "nll_loss": 0.13506531715393066, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.618993650889024e-05, + "rewards/margins": 0.19761013984680176, + "rewards/rejected": -0.1976463347673416, + "step": 8687 + }, + { + "epoch": 6.008298755186722, + "grad_norm": 5.065103054046631, + "learning_rate": 2.2176118026740432e-05, + "log_odds_chosen": 10.794071197509766, + "log_odds_ratio": -5.74659243284259e-05, + "logits/chosen": -0.3765200674533844, + "logits/rejected": -0.446674644947052, + "logps/chosen": -0.00041642412543296814, + "logps/rejected": -2.614743232727051, + "loss": 0.7369, + "nll_loss": 0.18422549962997437, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.164241545367986e-05, + "rewards/margins": 0.2614326775074005, + "rewards/rejected": -0.2614743113517761, + "step": 8688 + }, + { + "epoch": 6.008990318118949, + "grad_norm": 4.796487331390381, + "learning_rate": 2.2172276010450285e-05, + "log_odds_chosen": 10.9295072555542, + "log_odds_ratio": -7.72800121922046e-05, + "logits/chosen": -0.05144000053405762, + "logits/rejected": -0.18354326486587524, + "logps/chosen": -0.0022706189192831516, + "logps/rejected": -3.0933749675750732, + "loss": 0.6238, + "nll_loss": 0.1559487283229828, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00022706188610754907, + "rewards/margins": 0.3091104328632355, + "rewards/rejected": -0.3093374967575073, + "step": 8689 + }, + { + "epoch": 6.009681881051176, + "grad_norm": 6.887716770172119, + "learning_rate": 2.2168433994160137e-05, + "log_odds_chosen": 10.934017181396484, + "log_odds_ratio": -3.159624611726031e-05, + "logits/chosen": -0.3636074662208557, + "logits/rejected": -0.3056112229824066, + "logps/chosen": -9.977620356949046e-05, + "logps/rejected": -1.744262933731079, + "loss": 0.4337, + "nll_loss": 0.10841409862041473, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.977620720746927e-06, + "rewards/margins": 0.17441630363464355, + "rewards/rejected": -0.17442628741264343, + "step": 8690 + }, + { + "epoch": 6.0103734439834025, + "grad_norm": 7.19215202331543, + "learning_rate": 2.2164591977869986e-05, + "log_odds_chosen": 11.126794815063477, + "log_odds_ratio": -4.7908208216540515e-05, + "logits/chosen": -0.503757655620575, + "logits/rejected": -0.5169774889945984, + "logps/chosen": -0.0005810896982438862, + "logps/rejected": -2.8171472549438477, + "loss": 0.5578, + "nll_loss": 0.13944345712661743, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.810897710034624e-05, + "rewards/margins": 0.2816566228866577, + "rewards/rejected": -0.2817147374153137, + "step": 8691 + }, + { + "epoch": 6.011065006915629, + "grad_norm": 5.968571186065674, + "learning_rate": 2.216074996157984e-05, + "log_odds_chosen": 10.023983001708984, + "log_odds_ratio": -0.0001322976895608008, + "logits/chosen": -0.5502274036407471, + "logits/rejected": -0.5863975286483765, + "logps/chosen": -0.00030613134731538594, + "logps/rejected": -1.6076347827911377, + "loss": 1.0732, + "nll_loss": 0.26829004287719727, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.061313327634707e-05, + "rewards/margins": 0.16073286533355713, + "rewards/rejected": -0.1607634723186493, + "step": 8692 + }, + { + "epoch": 6.011756569847856, + "grad_norm": 5.204310894012451, + "learning_rate": 2.215690794528969e-05, + "log_odds_chosen": 9.940327644348145, + "log_odds_ratio": -0.0021992912515997887, + "logits/chosen": -0.4144933521747589, + "logits/rejected": -0.3945091962814331, + "logps/chosen": -0.002976007293909788, + "logps/rejected": -1.6367212533950806, + "loss": 0.4361, + "nll_loss": 0.10879306495189667, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00029760069446638227, + "rewards/margins": 0.16337452828884125, + "rewards/rejected": -0.16367211937904358, + "step": 8693 + }, + { + "epoch": 6.012448132780083, + "grad_norm": 5.4716644287109375, + "learning_rate": 2.215306592899954e-05, + "log_odds_chosen": 10.381438255310059, + "log_odds_ratio": -0.00014213169924914837, + "logits/chosen": -0.2392970472574234, + "logits/rejected": -0.22555887699127197, + "logps/chosen": -0.00031407614005729556, + "logps/rejected": -1.8977861404418945, + "loss": 0.4822, + "nll_loss": 0.12054598331451416, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1407613278133795e-05, + "rewards/margins": 0.18974719941616058, + "rewards/rejected": -0.1897786259651184, + "step": 8694 + }, + { + "epoch": 6.01313969571231, + "grad_norm": 9.384466171264648, + "learning_rate": 2.214922391270939e-05, + "log_odds_chosen": 9.672860145568848, + "log_odds_ratio": -0.0006632217555306852, + "logits/chosen": -0.7125736474990845, + "logits/rejected": -0.7020600438117981, + "logps/chosen": -0.002472150605171919, + "logps/rejected": -2.1107208728790283, + "loss": 0.7561, + "nll_loss": 0.188960000872612, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000247215066337958, + "rewards/margins": 0.2108248770236969, + "rewards/rejected": -0.21107208728790283, + "step": 8695 + }, + { + "epoch": 6.013831258644537, + "grad_norm": 6.288478851318359, + "learning_rate": 2.214538189641924e-05, + "log_odds_chosen": 11.169961929321289, + "log_odds_ratio": -5.973395309410989e-05, + "logits/chosen": -0.3781644105911255, + "logits/rejected": -0.4019641876220703, + "logps/chosen": -0.0001706377079244703, + "logps/rejected": -2.1489315032958984, + "loss": 0.4614, + "nll_loss": 0.11533622443675995, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.706377042864915e-05, + "rewards/margins": 0.21487608551979065, + "rewards/rejected": -0.2148931622505188, + "step": 8696 + }, + { + "epoch": 6.014522821576763, + "grad_norm": 4.725882053375244, + "learning_rate": 2.214153988012909e-05, + "log_odds_chosen": 9.371036529541016, + "log_odds_ratio": -0.0006971032125875354, + "logits/chosen": -0.40736350417137146, + "logits/rejected": -0.5088552236557007, + "logps/chosen": -0.0007990387384779751, + "logps/rejected": -1.4437347650527954, + "loss": 0.4459, + "nll_loss": 0.11140558868646622, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.990387530298904e-05, + "rewards/margins": 0.14429357647895813, + "rewards/rejected": -0.14437347650527954, + "step": 8697 + }, + { + "epoch": 6.01521438450899, + "grad_norm": 4.899136543273926, + "learning_rate": 2.2137697863838943e-05, + "log_odds_chosen": 11.269777297973633, + "log_odds_ratio": -0.0002260785986436531, + "logits/chosen": -0.45080870389938354, + "logits/rejected": -0.5041922330856323, + "logps/chosen": -0.00014511129120364785, + "logps/rejected": -2.7046732902526855, + "loss": 0.7203, + "nll_loss": 0.180058091878891, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4511128028971143e-05, + "rewards/margins": 0.2704527974128723, + "rewards/rejected": -0.2704673111438751, + "step": 8698 + }, + { + "epoch": 6.015905947441217, + "grad_norm": 8.853839874267578, + "learning_rate": 2.2133855847548796e-05, + "log_odds_chosen": 10.970938682556152, + "log_odds_ratio": -5.5123193305917084e-05, + "logits/chosen": -0.610616147518158, + "logits/rejected": -0.6522665023803711, + "logps/chosen": -0.00042205723002552986, + "logps/rejected": -2.1785030364990234, + "loss": 0.6792, + "nll_loss": 0.16979989409446716, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2205727368127555e-05, + "rewards/margins": 0.2178080976009369, + "rewards/rejected": -0.21785031259059906, + "step": 8699 + }, + { + "epoch": 6.016597510373444, + "grad_norm": 5.846555709838867, + "learning_rate": 2.2130013831258645e-05, + "log_odds_chosen": 10.732621192932129, + "log_odds_ratio": -0.00025087303947657347, + "logits/chosen": -0.06174376606941223, + "logits/rejected": -0.09032157808542252, + "logps/chosen": -0.014207985252141953, + "logps/rejected": -2.8097808361053467, + "loss": 0.561, + "nll_loss": 0.14022380113601685, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014207985950633883, + "rewards/margins": 0.2795572578907013, + "rewards/rejected": -0.28097808361053467, + "step": 8700 + }, + { + "epoch": 6.017289073305671, + "grad_norm": 5.967756748199463, + "learning_rate": 2.2126171814968497e-05, + "log_odds_chosen": 9.416215896606445, + "log_odds_ratio": -0.0009794974466785789, + "logits/chosen": -0.6006361842155457, + "logits/rejected": -0.6846101880073547, + "logps/chosen": -0.007930763997137547, + "logps/rejected": -1.8586578369140625, + "loss": 0.4852, + "nll_loss": 0.12120527774095535, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007930764113552868, + "rewards/margins": 0.18507272005081177, + "rewards/rejected": -0.18586578965187073, + "step": 8701 + }, + { + "epoch": 6.017980636237898, + "grad_norm": 6.094478130340576, + "learning_rate": 2.212232979867835e-05, + "log_odds_chosen": 10.967626571655273, + "log_odds_ratio": -5.0005532102659345e-05, + "logits/chosen": -0.5818835496902466, + "logits/rejected": -0.7001234292984009, + "logps/chosen": -0.00010855847358470783, + "logps/rejected": -1.7564148902893066, + "loss": 0.4468, + "nll_loss": 0.11169680953025818, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0855848813662305e-05, + "rewards/margins": 0.1756306290626526, + "rewards/rejected": -0.1756414920091629, + "step": 8702 + }, + { + "epoch": 6.018672199170124, + "grad_norm": 4.230981349945068, + "learning_rate": 2.21184877823882e-05, + "log_odds_chosen": 9.438599586486816, + "log_odds_ratio": -0.01306835561990738, + "logits/chosen": -0.26618629693984985, + "logits/rejected": -0.3190756142139435, + "logps/chosen": -0.00445225415751338, + "logps/rejected": -1.745501160621643, + "loss": 0.7029, + "nll_loss": 0.1744271218776703, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00044522545067593455, + "rewards/margins": 0.17410489916801453, + "rewards/rejected": -0.1745501160621643, + "step": 8703 + }, + { + "epoch": 6.019363762102351, + "grad_norm": 6.9316301345825195, + "learning_rate": 2.211464576609805e-05, + "log_odds_chosen": 10.875043869018555, + "log_odds_ratio": -0.00011402039672248065, + "logits/chosen": -0.577836275100708, + "logits/rejected": -0.5895098447799683, + "logps/chosen": -0.0004857161547988653, + "logps/rejected": -2.3168389797210693, + "loss": 0.5247, + "nll_loss": 0.13116057217121124, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.857161547988653e-05, + "rewards/margins": 0.23163533210754395, + "rewards/rejected": -0.2316839098930359, + "step": 8704 + }, + { + "epoch": 6.020055325034578, + "grad_norm": 4.984665870666504, + "learning_rate": 2.21108037498079e-05, + "log_odds_chosen": 10.290657997131348, + "log_odds_ratio": -7.351344538619742e-05, + "logits/chosen": -0.3882635235786438, + "logits/rejected": -0.42681241035461426, + "logps/chosen": -0.00023238833819050342, + "logps/rejected": -1.7680834531784058, + "loss": 0.5464, + "nll_loss": 0.13659122586250305, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3238833819050342e-05, + "rewards/margins": 0.17678511142730713, + "rewards/rejected": -0.17680835723876953, + "step": 8705 + }, + { + "epoch": 6.020746887966805, + "grad_norm": 4.9300031661987305, + "learning_rate": 2.210696173351775e-05, + "log_odds_chosen": 11.626920700073242, + "log_odds_ratio": -3.644917524070479e-05, + "logits/chosen": -0.5183431506156921, + "logits/rejected": -0.5448989868164062, + "logps/chosen": -6.581601337529719e-05, + "logps/rejected": -2.022371292114258, + "loss": 0.4968, + "nll_loss": 0.1241886168718338, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5816011556307785e-06, + "rewards/margins": 0.2022305577993393, + "rewards/rejected": -0.20223712921142578, + "step": 8706 + }, + { + "epoch": 6.021438450899032, + "grad_norm": 9.541595458984375, + "learning_rate": 2.21031197172276e-05, + "log_odds_chosen": 10.2780179977417, + "log_odds_ratio": -0.00016216814401559532, + "logits/chosen": -0.6901393532752991, + "logits/rejected": -0.6745829582214355, + "logps/chosen": -0.0006011630175635219, + "logps/rejected": -2.878037452697754, + "loss": 0.6064, + "nll_loss": 0.15159110724925995, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0116301028756425e-05, + "rewards/margins": 0.28774362802505493, + "rewards/rejected": -0.2878037393093109, + "step": 8707 + }, + { + "epoch": 6.022130013831259, + "grad_norm": 7.746224880218506, + "learning_rate": 2.2099277700937454e-05, + "log_odds_chosen": 10.702656745910645, + "log_odds_ratio": -0.00015088812506292015, + "logits/chosen": -0.5035496950149536, + "logits/rejected": -0.5546244978904724, + "logps/chosen": -0.0008898228988982737, + "logps/rejected": -2.4310531616210938, + "loss": 0.602, + "nll_loss": 0.15048100054264069, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.89822913450189e-05, + "rewards/margins": 0.2430163472890854, + "rewards/rejected": -0.24310533702373505, + "step": 8708 + }, + { + "epoch": 6.022821576763485, + "grad_norm": 8.530742645263672, + "learning_rate": 2.2095435684647303e-05, + "log_odds_chosen": 12.953808784484863, + "log_odds_ratio": -2.9884475225117058e-05, + "logits/chosen": -0.26880186796188354, + "logits/rejected": -0.374568372964859, + "logps/chosen": -0.00034990967833437026, + "logps/rejected": -4.05475378036499, + "loss": 0.9199, + "nll_loss": 0.22996152937412262, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4990967833437026e-05, + "rewards/margins": 0.40544039011001587, + "rewards/rejected": -0.405475378036499, + "step": 8709 + }, + { + "epoch": 6.023513139695712, + "grad_norm": 5.546627044677734, + "learning_rate": 2.2091593668357156e-05, + "log_odds_chosen": 10.592414855957031, + "log_odds_ratio": -7.655217632418498e-05, + "logits/chosen": -0.37299686670303345, + "logits/rejected": -0.4346255660057068, + "logps/chosen": -0.0001668974873609841, + "logps/rejected": -1.760309100151062, + "loss": 0.4839, + "nll_loss": 0.12096671760082245, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.668974982749205e-05, + "rewards/margins": 0.17601421475410461, + "rewards/rejected": -0.17603090405464172, + "step": 8710 + }, + { + "epoch": 6.024204702627939, + "grad_norm": 5.520246982574463, + "learning_rate": 2.2087751652067008e-05, + "log_odds_chosen": 10.64315414428711, + "log_odds_ratio": -7.172457117121667e-05, + "logits/chosen": -0.214762344956398, + "logits/rejected": -0.4369995594024658, + "logps/chosen": -0.0005153071833774447, + "logps/rejected": -2.5201823711395264, + "loss": 0.7639, + "nll_loss": 0.1909710019826889, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.153072561370209e-05, + "rewards/margins": 0.2519667148590088, + "rewards/rejected": -0.2520182430744171, + "step": 8711 + }, + { + "epoch": 6.024896265560166, + "grad_norm": 6.77101469039917, + "learning_rate": 2.2083909635776857e-05, + "log_odds_chosen": 10.253266334533691, + "log_odds_ratio": -5.588552448898554e-05, + "logits/chosen": -0.5703224539756775, + "logits/rejected": -0.5802868008613586, + "logps/chosen": -0.00020519075042102486, + "logps/rejected": -1.7386521100997925, + "loss": 0.6399, + "nll_loss": 0.1599694937467575, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0519073586910963e-05, + "rewards/margins": 0.17384469509124756, + "rewards/rejected": -0.1738651990890503, + "step": 8712 + }, + { + "epoch": 6.025587828492393, + "grad_norm": 6.571026802062988, + "learning_rate": 2.208006761948671e-05, + "log_odds_chosen": 10.851655006408691, + "log_odds_ratio": -0.0003473691758699715, + "logits/chosen": -0.479988157749176, + "logits/rejected": -0.528286337852478, + "logps/chosen": -0.00018631343846209347, + "logps/rejected": -2.036672830581665, + "loss": 0.8061, + "nll_loss": 0.20148473978042603, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.863134457380511e-05, + "rewards/margins": 0.2036486715078354, + "rewards/rejected": -0.2036673128604889, + "step": 8713 + }, + { + "epoch": 6.0262793914246195, + "grad_norm": 7.67877197265625, + "learning_rate": 2.207622560319656e-05, + "log_odds_chosen": 11.414480209350586, + "log_odds_ratio": -1.969960067071952e-05, + "logits/chosen": -0.6675363183021545, + "logits/rejected": -0.6894694566726685, + "logps/chosen": -0.00010665694571798667, + "logps/rejected": -2.019965171813965, + "loss": 0.8034, + "nll_loss": 0.20084403455257416, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0665693480405025e-05, + "rewards/margins": 0.20198586583137512, + "rewards/rejected": -0.20199653506278992, + "step": 8714 + }, + { + "epoch": 6.026970954356846, + "grad_norm": 6.10966682434082, + "learning_rate": 2.2072383586906408e-05, + "log_odds_chosen": 10.402027130126953, + "log_odds_ratio": -6.45701729808934e-05, + "logits/chosen": -0.6677394509315491, + "logits/rejected": -0.7106789350509644, + "logps/chosen": -0.00032101484248414636, + "logps/rejected": -2.0806641578674316, + "loss": 0.4961, + "nll_loss": 0.12400933355093002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.21014849760104e-05, + "rewards/margins": 0.20803432166576385, + "rewards/rejected": -0.2080664187669754, + "step": 8715 + }, + { + "epoch": 6.027662517289073, + "grad_norm": 6.375890731811523, + "learning_rate": 2.206854157061626e-05, + "log_odds_chosen": 10.099254608154297, + "log_odds_ratio": -0.00010230734187643975, + "logits/chosen": -0.8636232018470764, + "logits/rejected": -0.8537338376045227, + "logps/chosen": -0.0005410996964201331, + "logps/rejected": -1.9590046405792236, + "loss": 0.5508, + "nll_loss": 0.13769523799419403, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.410997255239636e-05, + "rewards/margins": 0.1958463490009308, + "rewards/rejected": -0.19590047001838684, + "step": 8716 + }, + { + "epoch": 6.0283540802213, + "grad_norm": 5.492069244384766, + "learning_rate": 2.2064699554326112e-05, + "log_odds_chosen": 9.815468788146973, + "log_odds_ratio": -0.00042597288847900927, + "logits/chosen": -0.47465819120407104, + "logits/rejected": -0.614909827709198, + "logps/chosen": -0.001235145260579884, + "logps/rejected": -2.1695032119750977, + "loss": 0.9836, + "nll_loss": 0.24586890637874603, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001235145318787545, + "rewards/margins": 0.21682682633399963, + "rewards/rejected": -0.21695034205913544, + "step": 8717 + }, + { + "epoch": 6.029045643153527, + "grad_norm": 5.251602649688721, + "learning_rate": 2.206085753803596e-05, + "log_odds_chosen": 10.349724769592285, + "log_odds_ratio": -6.653775926679373e-05, + "logits/chosen": -0.5225825905799866, + "logits/rejected": -0.4800230860710144, + "logps/chosen": -0.0003861216246150434, + "logps/rejected": -2.0060806274414062, + "loss": 0.441, + "nll_loss": 0.11023493111133575, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.861216100631282e-05, + "rewards/margins": 0.20056945085525513, + "rewards/rejected": -0.2006080448627472, + "step": 8718 + }, + { + "epoch": 6.029737206085754, + "grad_norm": 7.304520606994629, + "learning_rate": 2.2057015521745814e-05, + "log_odds_chosen": 10.213470458984375, + "log_odds_ratio": -0.00011162283772137016, + "logits/chosen": -0.45237404108047485, + "logits/rejected": -0.5312992930412292, + "logps/chosen": -0.0010302782757207751, + "logps/rejected": -2.3630874156951904, + "loss": 0.6137, + "nll_loss": 0.1534142792224884, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010302782175131142, + "rewards/margins": 0.23620572686195374, + "rewards/rejected": -0.236308753490448, + "step": 8719 + }, + { + "epoch": 6.0304287690179805, + "grad_norm": 4.732924461364746, + "learning_rate": 2.2053173505455666e-05, + "log_odds_chosen": 11.044392585754395, + "log_odds_ratio": -3.0344037440954708e-05, + "logits/chosen": -0.6467368006706238, + "logits/rejected": -0.6115586757659912, + "logps/chosen": -0.0001271823130082339, + "logps/rejected": -2.0162088871002197, + "loss": 0.5966, + "nll_loss": 0.14914806187152863, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2718230209429748e-05, + "rewards/margins": 0.20160816609859467, + "rewards/rejected": -0.20162087678909302, + "step": 8720 + }, + { + "epoch": 6.031120331950207, + "grad_norm": 8.279495239257812, + "learning_rate": 2.2049331489165515e-05, + "log_odds_chosen": 10.505151748657227, + "log_odds_ratio": -8.098056423477829e-05, + "logits/chosen": -0.2747512757778168, + "logits/rejected": -0.38079598546028137, + "logps/chosen": -0.00027252238942310214, + "logps/rejected": -2.2139976024627686, + "loss": 0.7138, + "nll_loss": 0.17843583226203918, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.725223748711869e-05, + "rewards/margins": 0.22137251496315002, + "rewards/rejected": -0.22139976918697357, + "step": 8721 + }, + { + "epoch": 6.031811894882434, + "grad_norm": 5.9185471534729, + "learning_rate": 2.2045489472875368e-05, + "log_odds_chosen": 9.572835922241211, + "log_odds_ratio": -0.0009964063065126538, + "logits/chosen": -0.6413549780845642, + "logits/rejected": -0.7216004133224487, + "logps/chosen": -0.0012645921669900417, + "logps/rejected": -1.7656288146972656, + "loss": 0.702, + "nll_loss": 0.17540577054023743, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012645922834053636, + "rewards/margins": 0.1764364242553711, + "rewards/rejected": -0.17656287550926208, + "step": 8722 + }, + { + "epoch": 6.032503457814661, + "grad_norm": 12.007911682128906, + "learning_rate": 2.2041647456585217e-05, + "log_odds_chosen": 11.140923500061035, + "log_odds_ratio": -5.497312668012455e-05, + "logits/chosen": -0.6097493171691895, + "logits/rejected": -0.6834452152252197, + "logps/chosen": -0.0003111382247880101, + "logps/rejected": -2.289703845977783, + "loss": 0.6628, + "nll_loss": 0.1656874418258667, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1113824661588296e-05, + "rewards/margins": 0.22893927991390228, + "rewards/rejected": -0.22897037863731384, + "step": 8723 + }, + { + "epoch": 6.033195020746888, + "grad_norm": 6.161701202392578, + "learning_rate": 2.2037805440295066e-05, + "log_odds_chosen": 10.174674034118652, + "log_odds_ratio": -0.0007106483681127429, + "logits/chosen": -0.013030372560024261, + "logits/rejected": -0.07829655706882477, + "logps/chosen": -0.002556081395596266, + "logps/rejected": -2.195359706878662, + "loss": 1.0238, + "nll_loss": 0.2558753192424774, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00025560817448422313, + "rewards/margins": 0.21928036212921143, + "rewards/rejected": -0.2195359766483307, + "step": 8724 + }, + { + "epoch": 6.033886583679115, + "grad_norm": 12.640791893005371, + "learning_rate": 2.203396342400492e-05, + "log_odds_chosen": 10.730382919311523, + "log_odds_ratio": -4.421206904225983e-05, + "logits/chosen": -0.054626476019620895, + "logits/rejected": -0.17611365020275116, + "logps/chosen": -0.0003003279271069914, + "logps/rejected": -2.2857675552368164, + "loss": 0.8732, + "nll_loss": 0.218306303024292, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0032793802092783e-05, + "rewards/margins": 0.22854672372341156, + "rewards/rejected": -0.22857676446437836, + "step": 8725 + }, + { + "epoch": 6.0345781466113415, + "grad_norm": 5.11686372756958, + "learning_rate": 2.203012140771477e-05, + "log_odds_chosen": 11.164871215820312, + "log_odds_ratio": -5.090639751870185e-05, + "logits/chosen": -0.6329345107078552, + "logits/rejected": -0.7006338238716125, + "logps/chosen": -0.00026425038231536746, + "logps/rejected": -2.590266227722168, + "loss": 0.6781, + "nll_loss": 0.1695106029510498, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6425037503940985e-05, + "rewards/margins": 0.25900018215179443, + "rewards/rejected": -0.2590266168117523, + "step": 8726 + }, + { + "epoch": 6.035269709543568, + "grad_norm": 7.614181041717529, + "learning_rate": 2.202627939142462e-05, + "log_odds_chosen": 10.693385124206543, + "log_odds_ratio": -4.799907037522644e-05, + "logits/chosen": -0.26990771293640137, + "logits/rejected": -0.31676316261291504, + "logps/chosen": -0.0005678210873156786, + "logps/rejected": -2.929535388946533, + "loss": 0.7391, + "nll_loss": 0.18476933240890503, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6782111641950905e-05, + "rewards/margins": 0.29289674758911133, + "rewards/rejected": -0.2929535508155823, + "step": 8727 + }, + { + "epoch": 6.035961272475795, + "grad_norm": 5.707152366638184, + "learning_rate": 2.2022437375134472e-05, + "log_odds_chosen": 10.58765983581543, + "log_odds_ratio": -0.0001340707967756316, + "logits/chosen": -0.7809407711029053, + "logits/rejected": -0.8220765590667725, + "logps/chosen": -0.0005716230371035635, + "logps/rejected": -1.914036512374878, + "loss": 0.5423, + "nll_loss": 0.13555540144443512, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.716230589314364e-05, + "rewards/margins": 0.19134649634361267, + "rewards/rejected": -0.19140365719795227, + "step": 8728 + }, + { + "epoch": 6.036652835408022, + "grad_norm": 4.701399326324463, + "learning_rate": 2.2018595358844325e-05, + "log_odds_chosen": 10.997509002685547, + "log_odds_ratio": -5.641685493174009e-05, + "logits/chosen": -0.6777703166007996, + "logits/rejected": -0.7699306011199951, + "logps/chosen": -0.0004010865814052522, + "logps/rejected": -2.258298873901367, + "loss": 0.8187, + "nll_loss": 0.2046590894460678, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0108665416482836e-05, + "rewards/margins": 0.22578977048397064, + "rewards/rejected": -0.2258298695087433, + "step": 8729 + }, + { + "epoch": 6.037344398340249, + "grad_norm": 6.020839691162109, + "learning_rate": 2.2014753342554174e-05, + "log_odds_chosen": 10.484159469604492, + "log_odds_ratio": -0.0008418535580858588, + "logits/chosen": -0.5254714488983154, + "logits/rejected": -0.5732161998748779, + "logps/chosen": -0.0012820654083043337, + "logps/rejected": -2.5081441402435303, + "loss": 0.5102, + "nll_loss": 0.12746252119541168, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001282065495615825, + "rewards/margins": 0.2506862282752991, + "rewards/rejected": -0.25081440806388855, + "step": 8730 + }, + { + "epoch": 6.038035961272476, + "grad_norm": 6.0337982177734375, + "learning_rate": 2.2010911326264026e-05, + "log_odds_chosen": 10.774333000183105, + "log_odds_ratio": -2.366080298088491e-05, + "logits/chosen": -0.4981067180633545, + "logits/rejected": -0.5781898498535156, + "logps/chosen": -0.00015158558380790055, + "logps/rejected": -1.8667584657669067, + "loss": 0.3905, + "nll_loss": 0.09761685132980347, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5158560017880518e-05, + "rewards/margins": 0.18666070699691772, + "rewards/rejected": -0.18667584657669067, + "step": 8731 + }, + { + "epoch": 6.0387275242047025, + "grad_norm": 11.3192777633667, + "learning_rate": 2.2007069309973875e-05, + "log_odds_chosen": 11.054316520690918, + "log_odds_ratio": -9.583937207935378e-05, + "logits/chosen": -0.5879403352737427, + "logits/rejected": -0.6534625291824341, + "logps/chosen": -0.00024015655799303204, + "logps/rejected": -1.997798204421997, + "loss": 0.6967, + "nll_loss": 0.1741618514060974, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4015656890696846e-05, + "rewards/margins": 0.19975581765174866, + "rewards/rejected": -0.19977982342243195, + "step": 8732 + }, + { + "epoch": 6.039419087136929, + "grad_norm": 5.469114303588867, + "learning_rate": 2.2003227293683724e-05, + "log_odds_chosen": 10.51624584197998, + "log_odds_ratio": -4.1717499698279426e-05, + "logits/chosen": -0.2299852818250656, + "logits/rejected": -0.33670753240585327, + "logps/chosen": -0.00031217903597280383, + "logps/rejected": -2.0699970722198486, + "loss": 0.543, + "nll_loss": 0.1357382833957672, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.121790359728038e-05, + "rewards/margins": 0.2069685161113739, + "rewards/rejected": -0.20699971914291382, + "step": 8733 + }, + { + "epoch": 6.040110650069156, + "grad_norm": 6.060972690582275, + "learning_rate": 2.1999385277393577e-05, + "log_odds_chosen": 10.410504341125488, + "log_odds_ratio": -7.131236634450033e-05, + "logits/chosen": -0.4340049624443054, + "logits/rejected": -0.3281274437904358, + "logps/chosen": -0.0002894492354243994, + "logps/rejected": -2.016857624053955, + "loss": 0.5125, + "nll_loss": 0.12811097502708435, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8944923542439938e-05, + "rewards/margins": 0.20165681838989258, + "rewards/rejected": -0.20168575644493103, + "step": 8734 + }, + { + "epoch": 6.040802213001383, + "grad_norm": 10.327274322509766, + "learning_rate": 2.1995543261103426e-05, + "log_odds_chosen": 10.888412475585938, + "log_odds_ratio": -2.7028379918192513e-05, + "logits/chosen": -0.7649399042129517, + "logits/rejected": -0.7249663472175598, + "logps/chosen": -0.0001005790545605123, + "logps/rejected": -1.711350679397583, + "loss": 0.533, + "nll_loss": 0.13324396312236786, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.005790545605123e-05, + "rewards/margins": 0.17112502455711365, + "rewards/rejected": -0.1711350828409195, + "step": 8735 + }, + { + "epoch": 6.04149377593361, + "grad_norm": 6.163614273071289, + "learning_rate": 2.199170124481328e-05, + "log_odds_chosen": 11.648406028747559, + "log_odds_ratio": -1.5283943866961636e-05, + "logits/chosen": -0.784024715423584, + "logits/rejected": -0.7108168601989746, + "logps/chosen": -0.00010706988541642204, + "logps/rejected": -2.4180727005004883, + "loss": 0.6191, + "nll_loss": 0.15476897358894348, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0706989087339025e-05, + "rewards/margins": 0.2417965829372406, + "rewards/rejected": -0.24180728197097778, + "step": 8736 + }, + { + "epoch": 6.042185338865837, + "grad_norm": 5.836353302001953, + "learning_rate": 2.198785922852313e-05, + "log_odds_chosen": 11.407360076904297, + "log_odds_ratio": -6.587072857655585e-05, + "logits/chosen": -0.5999523401260376, + "logits/rejected": -0.6171562075614929, + "logps/chosen": -0.0004752510867547244, + "logps/rejected": -3.2661917209625244, + "loss": 1.0002, + "nll_loss": 0.2500498294830322, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.752511085825972e-05, + "rewards/margins": 0.32657167315483093, + "rewards/rejected": -0.32661914825439453, + "step": 8737 + }, + { + "epoch": 6.0428769017980635, + "grad_norm": 6.33279275894165, + "learning_rate": 2.198401721223298e-05, + "log_odds_chosen": 11.425430297851562, + "log_odds_ratio": -1.806885666155722e-05, + "logits/chosen": -0.6484169363975525, + "logits/rejected": -0.4264671802520752, + "logps/chosen": -0.000468467827886343, + "logps/rejected": -2.840762138366699, + "loss": 0.5571, + "nll_loss": 0.13927598297595978, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.684678788180463e-05, + "rewards/margins": 0.28402936458587646, + "rewards/rejected": -0.2840762138366699, + "step": 8738 + }, + { + "epoch": 6.04356846473029, + "grad_norm": 6.053421497344971, + "learning_rate": 2.1980175195942832e-05, + "log_odds_chosen": 10.433453559875488, + "log_odds_ratio": -0.00021078737336210907, + "logits/chosen": -0.27353599667549133, + "logits/rejected": -0.380341112613678, + "logps/chosen": -0.0008415814954787493, + "logps/rejected": -2.3961219787597656, + "loss": 0.6963, + "nll_loss": 0.17405669391155243, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.415814954787493e-05, + "rewards/margins": 0.23952805995941162, + "rewards/rejected": -0.23961222171783447, + "step": 8739 + }, + { + "epoch": 6.044260027662517, + "grad_norm": 5.677506923675537, + "learning_rate": 2.1976333179652685e-05, + "log_odds_chosen": 11.112959861755371, + "log_odds_ratio": -4.309674113756046e-05, + "logits/chosen": 0.1854228973388672, + "logits/rejected": 0.0214783176779747, + "logps/chosen": -0.00015564000932499766, + "logps/rejected": -2.153843641281128, + "loss": 0.739, + "nll_loss": 0.18473802506923676, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5564000932499766e-05, + "rewards/margins": 0.21536879241466522, + "rewards/rejected": -0.2153843641281128, + "step": 8740 + }, + { + "epoch": 6.044951590594744, + "grad_norm": 10.937490463256836, + "learning_rate": 2.1972491163362534e-05, + "log_odds_chosen": 9.786539077758789, + "log_odds_ratio": -0.00013552032760344446, + "logits/chosen": -0.7398344874382019, + "logits/rejected": -0.7491805553436279, + "logps/chosen": -0.0013153355102986097, + "logps/rejected": -1.9373672008514404, + "loss": 0.6691, + "nll_loss": 0.1672721803188324, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013153356849215925, + "rewards/margins": 0.1936051845550537, + "rewards/rejected": -0.1937367171049118, + "step": 8741 + }, + { + "epoch": 6.045643153526971, + "grad_norm": 7.335566997528076, + "learning_rate": 2.1968649147072383e-05, + "log_odds_chosen": 9.627555847167969, + "log_odds_ratio": -0.0001948888530023396, + "logits/chosen": -0.3438924551010132, + "logits/rejected": -0.501375675201416, + "logps/chosen": -0.00024168803065549582, + "logps/rejected": -1.2390199899673462, + "loss": 0.8358, + "nll_loss": 0.20894263684749603, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.416880124656018e-05, + "rewards/margins": 0.12387783080339432, + "rewards/rejected": -0.12390200793743134, + "step": 8742 + }, + { + "epoch": 6.046334716459198, + "grad_norm": 4.566446781158447, + "learning_rate": 2.1964807130782235e-05, + "log_odds_chosen": 10.265748023986816, + "log_odds_ratio": -9.862089063972235e-05, + "logits/chosen": -0.625510036945343, + "logits/rejected": -0.5897167325019836, + "logps/chosen": -0.00043656807974912226, + "logps/rejected": -1.9950671195983887, + "loss": 1.0323, + "nll_loss": 0.2580625116825104, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3656807974912226e-05, + "rewards/margins": 0.19946306943893433, + "rewards/rejected": -0.1995067149400711, + "step": 8743 + }, + { + "epoch": 6.0470262793914245, + "grad_norm": 5.270570278167725, + "learning_rate": 2.1960965114492084e-05, + "log_odds_chosen": 11.466952323913574, + "log_odds_ratio": -3.146990275126882e-05, + "logits/chosen": -0.5958943367004395, + "logits/rejected": -0.6558508276939392, + "logps/chosen": -0.00024944625329226255, + "logps/rejected": -2.6926231384277344, + "loss": 0.7491, + "nll_loss": 0.18726211786270142, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.494462751201354e-05, + "rewards/margins": 0.26923736929893494, + "rewards/rejected": -0.26926231384277344, + "step": 8744 + }, + { + "epoch": 6.047717842323651, + "grad_norm": 6.721274375915527, + "learning_rate": 2.1957123098201937e-05, + "log_odds_chosen": 10.474918365478516, + "log_odds_ratio": -0.0010723298182711005, + "logits/chosen": -0.36522969603538513, + "logits/rejected": -0.42422711849212646, + "logps/chosen": -0.0009068144718185067, + "logps/rejected": -2.2234137058258057, + "loss": 0.5801, + "nll_loss": 0.14491480588912964, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.068144572665915e-05, + "rewards/margins": 0.22225069999694824, + "rewards/rejected": -0.2223413735628128, + "step": 8745 + }, + { + "epoch": 6.048409405255878, + "grad_norm": 9.751940727233887, + "learning_rate": 2.195328108191179e-05, + "log_odds_chosen": 10.69310188293457, + "log_odds_ratio": -0.00014623063907492906, + "logits/chosen": -0.4476546049118042, + "logits/rejected": -0.47708776593208313, + "logps/chosen": -0.001096154097467661, + "logps/rejected": -2.643292188644409, + "loss": 0.5479, + "nll_loss": 0.1369646042585373, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010961541556753218, + "rewards/margins": 0.26421958208084106, + "rewards/rejected": -0.2643292248249054, + "step": 8746 + }, + { + "epoch": 6.049100968188105, + "grad_norm": 4.666476726531982, + "learning_rate": 2.194943906562164e-05, + "log_odds_chosen": 9.971809387207031, + "log_odds_ratio": -0.00015414562949445099, + "logits/chosen": -0.5992689728736877, + "logits/rejected": -0.6264042258262634, + "logps/chosen": -0.0009661355288699269, + "logps/rejected": -1.293394684791565, + "loss": 0.4689, + "nll_loss": 0.11721152067184448, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.661355579737574e-05, + "rewards/margins": 0.12924285233020782, + "rewards/rejected": -0.12933948636054993, + "step": 8747 + }, + { + "epoch": 6.049792531120332, + "grad_norm": 9.599535942077637, + "learning_rate": 2.194559704933149e-05, + "log_odds_chosen": 11.24213981628418, + "log_odds_ratio": -2.586483969935216e-05, + "logits/chosen": -0.49608927965164185, + "logits/rejected": -0.6723200678825378, + "logps/chosen": -0.00033675608574412763, + "logps/rejected": -2.715348720550537, + "loss": 0.7347, + "nll_loss": 0.18367436528205872, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3675609302008525e-05, + "rewards/margins": 0.27150118350982666, + "rewards/rejected": -0.27153486013412476, + "step": 8748 + }, + { + "epoch": 6.050484094052559, + "grad_norm": 5.931811332702637, + "learning_rate": 2.1941755033041343e-05, + "log_odds_chosen": 11.314300537109375, + "log_odds_ratio": -3.0137358407955617e-05, + "logits/chosen": -0.2631280720233917, + "logits/rejected": -0.15379023551940918, + "logps/chosen": -0.00013942176883574575, + "logps/rejected": -2.585228204727173, + "loss": 0.6119, + "nll_loss": 0.15297843515872955, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3942176337877754e-05, + "rewards/margins": 0.2585088610649109, + "rewards/rejected": -0.25852280855178833, + "step": 8749 + }, + { + "epoch": 6.051175656984785, + "grad_norm": 6.247043132781982, + "learning_rate": 2.1937913016751192e-05, + "log_odds_chosen": 10.999902725219727, + "log_odds_ratio": -1.8577326045488007e-05, + "logits/chosen": -0.564647912979126, + "logits/rejected": -0.6719080209732056, + "logps/chosen": -6.438637501560152e-05, + "logps/rejected": -1.439497709274292, + "loss": 0.4286, + "nll_loss": 0.10715620219707489, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.438637683459092e-06, + "rewards/margins": 0.14394333958625793, + "rewards/rejected": -0.14394977688789368, + "step": 8750 + }, + { + "epoch": 6.051867219917012, + "grad_norm": 7.51104736328125, + "learning_rate": 2.193407100046104e-05, + "log_odds_chosen": 9.899858474731445, + "log_odds_ratio": -0.00010276660032104701, + "logits/chosen": -0.40770965814590454, + "logits/rejected": -0.48507434129714966, + "logps/chosen": -0.0003978637687396258, + "logps/rejected": -1.6586893796920776, + "loss": 0.5209, + "nll_loss": 0.13021023571491241, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.978637687396258e-05, + "rewards/margins": 0.1658291518688202, + "rewards/rejected": -0.16586895287036896, + "step": 8751 + }, + { + "epoch": 6.052558782849239, + "grad_norm": 11.98574447631836, + "learning_rate": 2.1930228984170894e-05, + "log_odds_chosen": 10.0965576171875, + "log_odds_ratio": -0.0003631242143455893, + "logits/chosen": -0.3025035858154297, + "logits/rejected": -0.44557201862335205, + "logps/chosen": -0.0006092924159020185, + "logps/rejected": -1.8992372751235962, + "loss": 0.6847, + "nll_loss": 0.17114058136940002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.092924377298914e-05, + "rewards/margins": 0.18986281752586365, + "rewards/rejected": -0.1899237334728241, + "step": 8752 + }, + { + "epoch": 6.053250345781466, + "grad_norm": 6.399974822998047, + "learning_rate": 2.1926386967880743e-05, + "log_odds_chosen": 11.87247085571289, + "log_odds_ratio": -1.9874878489645198e-05, + "logits/chosen": -0.4109629988670349, + "logits/rejected": -0.34680724143981934, + "logps/chosen": -0.000150459905853495, + "logps/rejected": -2.954695463180542, + "loss": 0.5667, + "nll_loss": 0.14166241884231567, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5045991858642083e-05, + "rewards/margins": 0.2954545021057129, + "rewards/rejected": -0.2954695224761963, + "step": 8753 + }, + { + "epoch": 6.053941908713693, + "grad_norm": 3.7555723190307617, + "learning_rate": 2.1922544951590595e-05, + "log_odds_chosen": 11.044259071350098, + "log_odds_ratio": -0.00011946244194405153, + "logits/chosen": -0.8748759627342224, + "logits/rejected": -0.8916746973991394, + "logps/chosen": -0.00022297601390164346, + "logps/rejected": -2.106964349746704, + "loss": 0.5291, + "nll_loss": 0.13227327167987823, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.229760320915375e-05, + "rewards/margins": 0.21067410707473755, + "rewards/rejected": -0.21069641411304474, + "step": 8754 + }, + { + "epoch": 6.05463347164592, + "grad_norm": 6.178657054901123, + "learning_rate": 2.1918702935300448e-05, + "log_odds_chosen": 10.054718017578125, + "log_odds_ratio": -6.976965232752264e-05, + "logits/chosen": -0.617241621017456, + "logits/rejected": -0.643979012966156, + "logps/chosen": -0.00029691471718251705, + "logps/rejected": -1.5716367959976196, + "loss": 0.4473, + "nll_loss": 0.11182103306055069, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9691473173443228e-05, + "rewards/margins": 0.15713398158550262, + "rewards/rejected": -0.15716367959976196, + "step": 8755 + }, + { + "epoch": 6.055325034578146, + "grad_norm": 5.386208534240723, + "learning_rate": 2.1914860919010297e-05, + "log_odds_chosen": 10.863911628723145, + "log_odds_ratio": -4.7492121666437015e-05, + "logits/chosen": -0.40668195486068726, + "logits/rejected": -0.4869404435157776, + "logps/chosen": -0.00022119340428616852, + "logps/rejected": -2.426424026489258, + "loss": 0.536, + "nll_loss": 0.13398811221122742, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.211934042861685e-05, + "rewards/margins": 0.2426202893257141, + "rewards/rejected": -0.24264241755008698, + "step": 8756 + }, + { + "epoch": 6.056016597510373, + "grad_norm": 10.927085876464844, + "learning_rate": 2.191101890272015e-05, + "log_odds_chosen": 10.562910079956055, + "log_odds_ratio": -3.968338933191262e-05, + "logits/chosen": -0.40782853960990906, + "logits/rejected": -0.4667756259441376, + "logps/chosen": -0.00017079540702980012, + "logps/rejected": -1.7969799041748047, + "loss": 0.633, + "nll_loss": 0.15824900567531586, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7079541066777892e-05, + "rewards/margins": 0.17968091368675232, + "rewards/rejected": -0.17969800531864166, + "step": 8757 + }, + { + "epoch": 6.0567081604426, + "grad_norm": 4.373627662658691, + "learning_rate": 2.190717688643e-05, + "log_odds_chosen": 11.044089317321777, + "log_odds_ratio": -2.8408252546796575e-05, + "logits/chosen": -0.47975000739097595, + "logits/rejected": -0.5330215096473694, + "logps/chosen": -0.000502656155731529, + "logps/rejected": -2.2571511268615723, + "loss": 0.4773, + "nll_loss": 0.119330994784832, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0265618483535945e-05, + "rewards/margins": 0.22566482424736023, + "rewards/rejected": -0.22571511566638947, + "step": 8758 + }, + { + "epoch": 6.057399723374827, + "grad_norm": 5.878119468688965, + "learning_rate": 2.190333487013985e-05, + "log_odds_chosen": 10.509617805480957, + "log_odds_ratio": -0.00012422242434695363, + "logits/chosen": -0.5751892328262329, + "logits/rejected": -0.7009394764900208, + "logps/chosen": -0.00025830554659478366, + "logps/rejected": -2.0686752796173096, + "loss": 0.5925, + "nll_loss": 0.1481102705001831, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5830553568084724e-05, + "rewards/margins": 0.20684170722961426, + "rewards/rejected": -0.2068675458431244, + "step": 8759 + }, + { + "epoch": 6.058091286307054, + "grad_norm": 6.645744323730469, + "learning_rate": 2.18994928538497e-05, + "log_odds_chosen": 9.95135498046875, + "log_odds_ratio": -0.00012155869626440108, + "logits/chosen": -0.23222099244594574, + "logits/rejected": -0.3660616874694824, + "logps/chosen": -0.0002634202828630805, + "logps/rejected": -1.813197135925293, + "loss": 0.6914, + "nll_loss": 0.17283284664154053, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6342029741499573e-05, + "rewards/margins": 0.18129336833953857, + "rewards/rejected": -0.1813197135925293, + "step": 8760 + }, + { + "epoch": 6.058782849239281, + "grad_norm": 5.354851245880127, + "learning_rate": 2.1895650837559552e-05, + "log_odds_chosen": 10.4942626953125, + "log_odds_ratio": -0.0005334573797881603, + "logits/chosen": -0.8777337074279785, + "logits/rejected": -0.8068556785583496, + "logps/chosen": -0.00047751181409694254, + "logps/rejected": -2.2734134197235107, + "loss": 0.5328, + "nll_loss": 0.13314048945903778, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7751182137290016e-05, + "rewards/margins": 0.22729359567165375, + "rewards/rejected": -0.22734133899211884, + "step": 8761 + }, + { + "epoch": 6.059474412171507, + "grad_norm": 5.62773323059082, + "learning_rate": 2.18918088212694e-05, + "log_odds_chosen": 10.236888885498047, + "log_odds_ratio": -0.0005244009662419558, + "logits/chosen": -0.4014491140842438, + "logits/rejected": -0.46045809984207153, + "logps/chosen": -0.00047710942453704774, + "logps/rejected": -2.1145997047424316, + "loss": 0.8807, + "nll_loss": 0.2201147973537445, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.771094609168358e-05, + "rewards/margins": 0.211412250995636, + "rewards/rejected": -0.2114599496126175, + "step": 8762 + }, + { + "epoch": 6.060165975103734, + "grad_norm": 7.158164024353027, + "learning_rate": 2.1887966804979254e-05, + "log_odds_chosen": 10.382726669311523, + "log_odds_ratio": -0.0002478898677509278, + "logits/chosen": -0.5366454124450684, + "logits/rejected": -0.5365896821022034, + "logps/chosen": -0.003330084728077054, + "logps/rejected": -2.4704389572143555, + "loss": 0.593, + "nll_loss": 0.14822663366794586, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00033300850191153586, + "rewards/margins": 0.2467108964920044, + "rewards/rejected": -0.2470439076423645, + "step": 8763 + }, + { + "epoch": 6.060857538035961, + "grad_norm": 6.579989910125732, + "learning_rate": 2.1884124788689106e-05, + "log_odds_chosen": 10.012670516967773, + "log_odds_ratio": -0.00017603966989554465, + "logits/chosen": -0.5483078360557556, + "logits/rejected": -0.6120085716247559, + "logps/chosen": -0.0004534853796940297, + "logps/rejected": -1.733701229095459, + "loss": 0.583, + "nll_loss": 0.14573873579502106, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.534853724180721e-05, + "rewards/margins": 0.17332476377487183, + "rewards/rejected": -0.1733701229095459, + "step": 8764 + }, + { + "epoch": 6.061549100968188, + "grad_norm": 5.654554843902588, + "learning_rate": 2.1880282772398955e-05, + "log_odds_chosen": 9.292821884155273, + "log_odds_ratio": -0.0005053141503594816, + "logits/chosen": -0.6640750765800476, + "logits/rejected": -0.6798596978187561, + "logps/chosen": -0.0007134718471206725, + "logps/rejected": -1.5490200519561768, + "loss": 0.6347, + "nll_loss": 0.15863104164600372, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.134718907764181e-05, + "rewards/margins": 0.154830664396286, + "rewards/rejected": -0.15490201115608215, + "step": 8765 + }, + { + "epoch": 6.062240663900415, + "grad_norm": 5.678080081939697, + "learning_rate": 2.1876440756108808e-05, + "log_odds_chosen": 8.85152816772461, + "log_odds_ratio": -0.0004262386355549097, + "logits/chosen": -0.519461989402771, + "logits/rejected": -0.6006239652633667, + "logps/chosen": -0.0010829487582668662, + "logps/rejected": -1.4057854413986206, + "loss": 0.5594, + "nll_loss": 0.13980016112327576, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001082948874682188, + "rewards/margins": 0.1404702514410019, + "rewards/rejected": -0.14057855308055878, + "step": 8766 + }, + { + "epoch": 6.0629322268326415, + "grad_norm": 9.73702621459961, + "learning_rate": 2.187259873981866e-05, + "log_odds_chosen": 10.37993049621582, + "log_odds_ratio": -4.671388160204515e-05, + "logits/chosen": -0.5082290172576904, + "logits/rejected": -0.5737982988357544, + "logps/chosen": -0.00029075262136757374, + "logps/rejected": -2.170318841934204, + "loss": 0.6437, + "nll_loss": 0.16090813279151917, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.907525959017221e-05, + "rewards/margins": 0.21700282394886017, + "rewards/rejected": -0.21703189611434937, + "step": 8767 + }, + { + "epoch": 6.063623789764868, + "grad_norm": 8.46346664428711, + "learning_rate": 2.186875672352851e-05, + "log_odds_chosen": 9.255306243896484, + "log_odds_ratio": -0.000526092597283423, + "logits/chosen": -0.3945017158985138, + "logits/rejected": -0.47494441270828247, + "logps/chosen": -0.0019078406039625406, + "logps/rejected": -1.9639620780944824, + "loss": 0.5382, + "nll_loss": 0.1345093846321106, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019078404875472188, + "rewards/margins": 0.19620543718338013, + "rewards/rejected": -0.19639620184898376, + "step": 8768 + }, + { + "epoch": 6.064315352697095, + "grad_norm": 6.009820938110352, + "learning_rate": 2.1864914707238358e-05, + "log_odds_chosen": 10.627840042114258, + "log_odds_ratio": -0.00033696723403409123, + "logits/chosen": -0.2139442265033722, + "logits/rejected": -0.12539441883563995, + "logps/chosen": -0.0006009953212924302, + "logps/rejected": -2.532836675643921, + "loss": 0.5323, + "nll_loss": 0.1330462396144867, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.009953358443454e-05, + "rewards/margins": 0.25322356820106506, + "rewards/rejected": -0.25328364968299866, + "step": 8769 + }, + { + "epoch": 6.065006915629322, + "grad_norm": 5.412619590759277, + "learning_rate": 2.186107269094821e-05, + "log_odds_chosen": 10.109893798828125, + "log_odds_ratio": -0.00016745369066484272, + "logits/chosen": -0.5539190769195557, + "logits/rejected": -0.6296762824058533, + "logps/chosen": -0.00047056650510057807, + "logps/rejected": -1.8397618532180786, + "loss": 0.5788, + "nll_loss": 0.14467471837997437, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.705665196524933e-05, + "rewards/margins": 0.18392911553382874, + "rewards/rejected": -0.1839761734008789, + "step": 8770 + }, + { + "epoch": 6.065698478561549, + "grad_norm": 6.335086822509766, + "learning_rate": 2.185723067465806e-05, + "log_odds_chosen": 9.797880172729492, + "log_odds_ratio": -0.000998196774162352, + "logits/chosen": -0.8057728409767151, + "logits/rejected": -0.7389850616455078, + "logps/chosen": -0.0006941133178770542, + "logps/rejected": -2.1998682022094727, + "loss": 0.7095, + "nll_loss": 0.17727643251419067, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.941134051885456e-05, + "rewards/margins": 0.2199174016714096, + "rewards/rejected": -0.21998681128025055, + "step": 8771 + }, + { + "epoch": 6.066390041493776, + "grad_norm": 6.116896629333496, + "learning_rate": 2.1853388658367912e-05, + "log_odds_chosen": 10.272491455078125, + "log_odds_ratio": -0.00020923007105011493, + "logits/chosen": -0.30253416299819946, + "logits/rejected": -0.30119574069976807, + "logps/chosen": -0.0007446683594025671, + "logps/rejected": -1.7595021724700928, + "loss": 0.5707, + "nll_loss": 0.14265257120132446, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.446683594025671e-05, + "rewards/margins": 0.17587575316429138, + "rewards/rejected": -0.17595022916793823, + "step": 8772 + }, + { + "epoch": 6.0670816044260025, + "grad_norm": 5.565364360809326, + "learning_rate": 2.1849546642077765e-05, + "log_odds_chosen": 10.232695579528809, + "log_odds_ratio": -0.00010011553240474313, + "logits/chosen": -0.6119440793991089, + "logits/rejected": -0.6406114101409912, + "logps/chosen": -0.00029022886883467436, + "logps/rejected": -1.9426348209381104, + "loss": 0.7586, + "nll_loss": 0.18964678049087524, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9022887247265317e-05, + "rewards/margins": 0.1942344605922699, + "rewards/rejected": -0.1942634880542755, + "step": 8773 + }, + { + "epoch": 6.067773167358229, + "grad_norm": 4.671210765838623, + "learning_rate": 2.1845704625787614e-05, + "log_odds_chosen": 10.74302864074707, + "log_odds_ratio": -3.786133311223239e-05, + "logits/chosen": -0.643653392791748, + "logits/rejected": -0.7036978006362915, + "logps/chosen": -0.00018377033120486885, + "logps/rejected": -2.0579030513763428, + "loss": 0.4593, + "nll_loss": 0.11483326554298401, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8377033484284766e-05, + "rewards/margins": 0.20577193796634674, + "rewards/rejected": -0.20579031109809875, + "step": 8774 + }, + { + "epoch": 6.068464730290456, + "grad_norm": 9.357017517089844, + "learning_rate": 2.1841862609497466e-05, + "log_odds_chosen": 11.394503593444824, + "log_odds_ratio": -4.771078238263726e-05, + "logits/chosen": -0.53241366147995, + "logits/rejected": -0.6427210569381714, + "logps/chosen": -0.00017528921307530254, + "logps/rejected": -2.24078369140625, + "loss": 0.7093, + "nll_loss": 0.17733241617679596, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7528920579934493e-05, + "rewards/margins": 0.2240608185529709, + "rewards/rejected": -0.22407834231853485, + "step": 8775 + }, + { + "epoch": 6.069156293222683, + "grad_norm": 5.8118205070495605, + "learning_rate": 2.183802059320732e-05, + "log_odds_chosen": 10.930205345153809, + "log_odds_ratio": -3.402809306862764e-05, + "logits/chosen": -0.2228926718235016, + "logits/rejected": -0.3147223889827728, + "logps/chosen": -0.00012596958549693227, + "logps/rejected": -1.8366320133209229, + "loss": 0.6962, + "nll_loss": 0.17405246198177338, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2596959095390048e-05, + "rewards/margins": 0.18365059792995453, + "rewards/rejected": -0.18366320431232452, + "step": 8776 + }, + { + "epoch": 6.06984785615491, + "grad_norm": 7.497174263000488, + "learning_rate": 2.1834178576917168e-05, + "log_odds_chosen": 10.006776809692383, + "log_odds_ratio": -0.0003789706388488412, + "logits/chosen": -0.40487217903137207, + "logits/rejected": -0.34448304772377014, + "logps/chosen": -0.0007712909136898816, + "logps/rejected": -1.9380466938018799, + "loss": 0.8276, + "nll_loss": 0.2068547010421753, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.71290942793712e-05, + "rewards/margins": 0.1937275528907776, + "rewards/rejected": -0.19380466639995575, + "step": 8777 + }, + { + "epoch": 6.070539419087137, + "grad_norm": 6.315959930419922, + "learning_rate": 2.1830336560627017e-05, + "log_odds_chosen": 11.265578269958496, + "log_odds_ratio": -1.984885057026986e-05, + "logits/chosen": -0.29897958040237427, + "logits/rejected": -0.42221328616142273, + "logps/chosen": -0.00015299460210371763, + "logps/rejected": -2.132373809814453, + "loss": 0.5165, + "nll_loss": 0.12912456691265106, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5299459846573882e-05, + "rewards/margins": 0.21322208642959595, + "rewards/rejected": -0.21323740482330322, + "step": 8778 + }, + { + "epoch": 6.0712309820193635, + "grad_norm": 5.4421539306640625, + "learning_rate": 2.182649454433687e-05, + "log_odds_chosen": 10.107906341552734, + "log_odds_ratio": -7.581239333376288e-05, + "logits/chosen": -0.13145671784877777, + "logits/rejected": -0.1686045080423355, + "logps/chosen": -0.0011641870951279998, + "logps/rejected": -1.9716193675994873, + "loss": 0.7435, + "nll_loss": 0.18586105108261108, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011641871969914064, + "rewards/margins": 0.19704553484916687, + "rewards/rejected": -0.1971619725227356, + "step": 8779 + }, + { + "epoch": 6.07192254495159, + "grad_norm": 6.829347610473633, + "learning_rate": 2.1822652528046718e-05, + "log_odds_chosen": 10.207975387573242, + "log_odds_ratio": -0.00010395667050033808, + "logits/chosen": -0.7461816072463989, + "logits/rejected": -0.9530609250068665, + "logps/chosen": -0.0006818310357630253, + "logps/rejected": -1.8421964645385742, + "loss": 0.5781, + "nll_loss": 0.14451363682746887, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.81831079418771e-05, + "rewards/margins": 0.18415147066116333, + "rewards/rejected": -0.18421964347362518, + "step": 8780 + }, + { + "epoch": 6.072614107883817, + "grad_norm": 7.78087043762207, + "learning_rate": 2.181881051175657e-05, + "log_odds_chosen": 11.349825859069824, + "log_odds_ratio": -0.0001022371870931238, + "logits/chosen": -0.37964165210723877, + "logits/rejected": -0.4040919244289398, + "logps/chosen": -0.0001565673155710101, + "logps/rejected": -2.2509853839874268, + "loss": 0.5442, + "nll_loss": 0.1360395848751068, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.565673119330313e-05, + "rewards/margins": 0.2250829041004181, + "rewards/rejected": -0.22509855031967163, + "step": 8781 + }, + { + "epoch": 6.073305670816044, + "grad_norm": 11.672771453857422, + "learning_rate": 2.1814968495466423e-05, + "log_odds_chosen": 10.513250350952148, + "log_odds_ratio": -7.949629798531532e-05, + "logits/chosen": -0.1163056492805481, + "logits/rejected": -0.2008170783519745, + "logps/chosen": -0.00033721962245181203, + "logps/rejected": -2.001417636871338, + "loss": 0.6344, + "nll_loss": 0.15858982503414154, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.372196078998968e-05, + "rewards/margins": 0.20010803639888763, + "rewards/rejected": -0.20014174282550812, + "step": 8782 + }, + { + "epoch": 6.073997233748271, + "grad_norm": 5.708601474761963, + "learning_rate": 2.1811126479176272e-05, + "log_odds_chosen": 9.723910331726074, + "log_odds_ratio": -0.0002251253608847037, + "logits/chosen": -0.009398063644766808, + "logits/rejected": -0.16576920449733734, + "logps/chosen": -0.001370785990729928, + "logps/rejected": -2.355374574661255, + "loss": 0.9146, + "nll_loss": 0.22861604392528534, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001370786048937589, + "rewards/margins": 0.23540037870407104, + "rewards/rejected": -0.23553743958473206, + "step": 8783 + }, + { + "epoch": 6.074688796680498, + "grad_norm": 9.264809608459473, + "learning_rate": 2.1807284462886125e-05, + "log_odds_chosen": 10.570226669311523, + "log_odds_ratio": -0.0011507862946018577, + "logits/chosen": -0.425912469625473, + "logits/rejected": -0.580479621887207, + "logps/chosen": -0.005178863648325205, + "logps/rejected": -2.450840711593628, + "loss": 0.7519, + "nll_loss": 0.187865749001503, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005178863066248596, + "rewards/margins": 0.2445661872625351, + "rewards/rejected": -0.24508407711982727, + "step": 8784 + }, + { + "epoch": 6.0753803596127245, + "grad_norm": 5.949010848999023, + "learning_rate": 2.1803442446595977e-05, + "log_odds_chosen": 10.58531379699707, + "log_odds_ratio": -4.488856211537495e-05, + "logits/chosen": -0.4791228175163269, + "logits/rejected": -0.4790971875190735, + "logps/chosen": -0.00041575790964998305, + "logps/rejected": -2.0088014602661133, + "loss": 0.5197, + "nll_loss": 0.12990805506706238, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.157579314778559e-05, + "rewards/margins": 0.2008385807275772, + "rewards/rejected": -0.20088014006614685, + "step": 8785 + }, + { + "epoch": 6.076071922544951, + "grad_norm": 8.873502731323242, + "learning_rate": 2.1799600430305826e-05, + "log_odds_chosen": 9.660669326782227, + "log_odds_ratio": -0.000820180110167712, + "logits/chosen": -0.17997047305107117, + "logits/rejected": -0.2867315411567688, + "logps/chosen": -0.0021463148295879364, + "logps/rejected": -1.7114827632904053, + "loss": 0.5846, + "nll_loss": 0.1460685431957245, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00021463149460032582, + "rewards/margins": 0.17093363404273987, + "rewards/rejected": -0.17114827036857605, + "step": 8786 + }, + { + "epoch": 6.076763485477178, + "grad_norm": 3.9984657764434814, + "learning_rate": 2.1795758414015675e-05, + "log_odds_chosen": 10.248716354370117, + "log_odds_ratio": -0.00011280793842161074, + "logits/chosen": -0.40966886281967163, + "logits/rejected": -0.4978000819683075, + "logps/chosen": -0.0002598642313387245, + "logps/rejected": -2.1092262268066406, + "loss": 0.4102, + "nll_loss": 0.10254928469657898, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.598642277007457e-05, + "rewards/margins": 0.21089664101600647, + "rewards/rejected": -0.21092264354228973, + "step": 8787 + }, + { + "epoch": 6.077455048409405, + "grad_norm": 3.56330943107605, + "learning_rate": 2.1791916397725528e-05, + "log_odds_chosen": 10.921337127685547, + "log_odds_ratio": -0.0005771245341747999, + "logits/chosen": 0.020429208874702454, + "logits/rejected": -0.06128193438053131, + "logps/chosen": -0.0022159842774271965, + "logps/rejected": -2.977935314178467, + "loss": 0.758, + "nll_loss": 0.18944081664085388, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002215984568465501, + "rewards/margins": 0.29757189750671387, + "rewards/rejected": -0.29779350757598877, + "step": 8788 + }, + { + "epoch": 6.078146611341632, + "grad_norm": 7.409214973449707, + "learning_rate": 2.1788074381435377e-05, + "log_odds_chosen": 10.565449714660645, + "log_odds_ratio": -0.00011608524073380977, + "logits/chosen": -0.3189489543437958, + "logits/rejected": -0.2393302619457245, + "logps/chosen": -0.00021485527395270765, + "logps/rejected": -2.161606788635254, + "loss": 0.7909, + "nll_loss": 0.19770371913909912, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1485526303877123e-05, + "rewards/margins": 0.21613919734954834, + "rewards/rejected": -0.21616066992282867, + "step": 8789 + }, + { + "epoch": 6.078838174273859, + "grad_norm": 7.98007869720459, + "learning_rate": 2.178423236514523e-05, + "log_odds_chosen": 11.489389419555664, + "log_odds_ratio": -3.7924979551462457e-05, + "logits/chosen": -0.30156710743904114, + "logits/rejected": -0.31472352147102356, + "logps/chosen": -0.0001984165864996612, + "logps/rejected": -2.3415443897247314, + "loss": 0.646, + "nll_loss": 0.16149157285690308, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9841660105157644e-05, + "rewards/margins": 0.23413459956645966, + "rewards/rejected": -0.23415443301200867, + "step": 8790 + }, + { + "epoch": 6.0795297372060855, + "grad_norm": 11.55526065826416, + "learning_rate": 2.178039034885508e-05, + "log_odds_chosen": 10.707229614257812, + "log_odds_ratio": -0.000218723711441271, + "logits/chosen": -0.49764618277549744, + "logits/rejected": -0.5057958960533142, + "logps/chosen": -0.0008697055745869875, + "logps/rejected": -2.173175096511841, + "loss": 0.5207, + "nll_loss": 0.13016489148139954, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.69705545483157e-05, + "rewards/margins": 0.21723055839538574, + "rewards/rejected": -0.21731753647327423, + "step": 8791 + }, + { + "epoch": 6.080221300138312, + "grad_norm": 7.144685745239258, + "learning_rate": 2.177654833256493e-05, + "log_odds_chosen": 10.159027099609375, + "log_odds_ratio": -0.0007074850727804005, + "logits/chosen": -0.42714568972587585, + "logits/rejected": -0.45183366537094116, + "logps/chosen": -0.0006984758656471968, + "logps/rejected": -2.197754144668579, + "loss": 0.5004, + "nll_loss": 0.1250331550836563, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.984759238548577e-05, + "rewards/margins": 0.21970558166503906, + "rewards/rejected": -0.21977542340755463, + "step": 8792 + }, + { + "epoch": 6.080912863070539, + "grad_norm": 4.701704502105713, + "learning_rate": 2.1772706316274783e-05, + "log_odds_chosen": 10.532032012939453, + "log_odds_ratio": -4.7948538849595934e-05, + "logits/chosen": -0.30077916383743286, + "logits/rejected": -0.35092857480049133, + "logps/chosen": -0.00040165867540054023, + "logps/rejected": -2.3476030826568604, + "loss": 0.4936, + "nll_loss": 0.12340079247951508, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0165868995245546e-05, + "rewards/margins": 0.2347201704978943, + "rewards/rejected": -0.2347603291273117, + "step": 8793 + }, + { + "epoch": 6.081604426002766, + "grad_norm": 5.7320051193237305, + "learning_rate": 2.1768864299984635e-05, + "log_odds_chosen": 10.813372611999512, + "log_odds_ratio": -0.00021087832283228636, + "logits/chosen": -0.36254051327705383, + "logits/rejected": -0.48160022497177124, + "logps/chosen": -0.0005631643580272794, + "logps/rejected": -2.1880125999450684, + "loss": 0.657, + "nll_loss": 0.16422507166862488, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6316428526770324e-05, + "rewards/margins": 0.2187449336051941, + "rewards/rejected": -0.21880124509334564, + "step": 8794 + }, + { + "epoch": 6.082295988934993, + "grad_norm": 9.378345489501953, + "learning_rate": 2.1765022283694484e-05, + "log_odds_chosen": 10.610244750976562, + "log_odds_ratio": -5.6305994803551584e-05, + "logits/chosen": -0.02063523232936859, + "logits/rejected": -0.09921707957983017, + "logps/chosen": -0.0004355312848929316, + "logps/rejected": -2.193181037902832, + "loss": 0.6431, + "nll_loss": 0.16076835989952087, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.355312557891011e-05, + "rewards/margins": 0.21927456557750702, + "rewards/rejected": -0.21931812167167664, + "step": 8795 + }, + { + "epoch": 6.08298755186722, + "grad_norm": 12.209285736083984, + "learning_rate": 2.1761180267404337e-05, + "log_odds_chosen": 10.893821716308594, + "log_odds_ratio": -0.0001389766694046557, + "logits/chosen": -0.7187768816947937, + "logits/rejected": -0.7721809148788452, + "logps/chosen": -0.0013256346574053168, + "logps/rejected": -2.5911355018615723, + "loss": 0.5633, + "nll_loss": 0.14081165194511414, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013256346574053168, + "rewards/margins": 0.25898098945617676, + "rewards/rejected": -0.2591135501861572, + "step": 8796 + }, + { + "epoch": 6.0836791147994465, + "grad_norm": 5.005368709564209, + "learning_rate": 2.1757338251114186e-05, + "log_odds_chosen": 10.6502685546875, + "log_odds_ratio": -0.00034274725476279855, + "logits/chosen": -0.7452360391616821, + "logits/rejected": -0.7825686931610107, + "logps/chosen": -0.0019323653541505337, + "logps/rejected": -2.6518256664276123, + "loss": 0.5366, + "nll_loss": 0.13411401212215424, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019323652668390423, + "rewards/margins": 0.26498931646347046, + "rewards/rejected": -0.2651825547218323, + "step": 8797 + }, + { + "epoch": 6.084370677731673, + "grad_norm": 7.144959449768066, + "learning_rate": 2.1753496234824035e-05, + "log_odds_chosen": 9.655604362487793, + "log_odds_ratio": -0.000444319739472121, + "logits/chosen": -0.45785990357398987, + "logits/rejected": -0.474082350730896, + "logps/chosen": -0.00023596035316586494, + "logps/rejected": -1.6099621057510376, + "loss": 0.8545, + "nll_loss": 0.2135741412639618, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3596036044182256e-05, + "rewards/margins": 0.16097262501716614, + "rewards/rejected": -0.160996213555336, + "step": 8798 + }, + { + "epoch": 6.0850622406639, + "grad_norm": 6.444210529327393, + "learning_rate": 2.1749654218533887e-05, + "log_odds_chosen": 11.038360595703125, + "log_odds_ratio": -2.1456851754919626e-05, + "logits/chosen": -0.3064062297344208, + "logits/rejected": -0.40747886896133423, + "logps/chosen": -0.00010762877354864031, + "logps/rejected": -1.856105089187622, + "loss": 0.5418, + "nll_loss": 0.13545329868793488, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0762878446257673e-05, + "rewards/margins": 0.18559975922107697, + "rewards/rejected": -0.18561053276062012, + "step": 8799 + }, + { + "epoch": 6.085753803596127, + "grad_norm": 7.325039863586426, + "learning_rate": 2.174581220224374e-05, + "log_odds_chosen": 8.8878173828125, + "log_odds_ratio": -0.0006561190239153802, + "logits/chosen": -0.16303503513336182, + "logits/rejected": -0.23362107574939728, + "logps/chosen": -0.0007630744366906583, + "logps/rejected": -1.7544726133346558, + "loss": 0.6446, + "nll_loss": 0.16107672452926636, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.630744221387431e-05, + "rewards/margins": 0.1753709614276886, + "rewards/rejected": -0.1754472553730011, + "step": 8800 + }, + { + "epoch": 6.086445366528354, + "grad_norm": 6.830489635467529, + "learning_rate": 2.174197018595359e-05, + "log_odds_chosen": 11.936038970947266, + "log_odds_ratio": -3.0060902645345777e-05, + "logits/chosen": -0.29758238792419434, + "logits/rejected": -0.3780736029148102, + "logps/chosen": -0.00016458773461636156, + "logps/rejected": -2.9928178787231445, + "loss": 0.6726, + "nll_loss": 0.16814972460269928, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6458772734040394e-05, + "rewards/margins": 0.2992653250694275, + "rewards/rejected": -0.2992818057537079, + "step": 8801 + }, + { + "epoch": 6.087136929460581, + "grad_norm": 7.6381049156188965, + "learning_rate": 2.173812816966344e-05, + "log_odds_chosen": 11.096879959106445, + "log_odds_ratio": -5.57570529053919e-05, + "logits/chosen": -0.19633033871650696, + "logits/rejected": -0.25721418857574463, + "logps/chosen": -0.00017020636005327106, + "logps/rejected": -2.2261905670166016, + "loss": 0.5965, + "nll_loss": 0.1491204798221588, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7020636732922867e-05, + "rewards/margins": 0.22260203957557678, + "rewards/rejected": -0.22261905670166016, + "step": 8802 + }, + { + "epoch": 6.087828492392807, + "grad_norm": 14.14664077758789, + "learning_rate": 2.173428615337329e-05, + "log_odds_chosen": 11.166595458984375, + "log_odds_ratio": -0.00020692782709375024, + "logits/chosen": -0.11803440749645233, + "logits/rejected": -0.23896290361881256, + "logps/chosen": -0.0009996149456128478, + "logps/rejected": -2.9760303497314453, + "loss": 0.7754, + "nll_loss": 0.19382460415363312, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.996150038205087e-05, + "rewards/margins": 0.29750311374664307, + "rewards/rejected": -0.2976030707359314, + "step": 8803 + }, + { + "epoch": 6.088520055325034, + "grad_norm": 5.6617207527160645, + "learning_rate": 2.1730444137083143e-05, + "log_odds_chosen": 10.668686866760254, + "log_odds_ratio": -8.660917228553444e-05, + "logits/chosen": -0.5297747850418091, + "logits/rejected": -0.49632179737091064, + "logps/chosen": -0.0001935886830324307, + "logps/rejected": -2.0004067420959473, + "loss": 0.5958, + "nll_loss": 0.14893387258052826, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9358869394636713e-05, + "rewards/margins": 0.20002131164073944, + "rewards/rejected": -0.20004066824913025, + "step": 8804 + }, + { + "epoch": 6.089211618257261, + "grad_norm": 8.393978118896484, + "learning_rate": 2.1726602120792995e-05, + "log_odds_chosen": 9.278707504272461, + "log_odds_ratio": -0.003602417418733239, + "logits/chosen": -0.8583751916885376, + "logits/rejected": -0.8760848045349121, + "logps/chosen": -0.0023618321865797043, + "logps/rejected": -1.3024115562438965, + "loss": 0.5269, + "nll_loss": 0.1313529908657074, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00023618324485141784, + "rewards/margins": 0.13000498712062836, + "rewards/rejected": -0.13024115562438965, + "step": 8805 + }, + { + "epoch": 6.089903181189488, + "grad_norm": 6.89979887008667, + "learning_rate": 2.1722760104502844e-05, + "log_odds_chosen": 11.741314888000488, + "log_odds_ratio": -2.223522460553795e-05, + "logits/chosen": -0.4017873704433441, + "logits/rejected": -0.43786537647247314, + "logps/chosen": -0.00020560878328979015, + "logps/rejected": -3.0209012031555176, + "loss": 0.5789, + "nll_loss": 0.14472922682762146, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0560877601383254e-05, + "rewards/margins": 0.3020695447921753, + "rewards/rejected": -0.3020901381969452, + "step": 8806 + }, + { + "epoch": 6.090594744121715, + "grad_norm": 5.095583915710449, + "learning_rate": 2.1718918088212693e-05, + "log_odds_chosen": 11.15652847290039, + "log_odds_ratio": -1.8469172573531978e-05, + "logits/chosen": -0.8193786144256592, + "logits/rejected": -0.7967087030410767, + "logps/chosen": -7.29345265426673e-05, + "logps/rejected": -1.5476975440979004, + "loss": 0.3783, + "nll_loss": 0.09456189721822739, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.29345265426673e-06, + "rewards/margins": 0.15476244688034058, + "rewards/rejected": -0.15476974844932556, + "step": 8807 + }, + { + "epoch": 6.091286307053942, + "grad_norm": 10.117960929870605, + "learning_rate": 2.1715076071922546e-05, + "log_odds_chosen": 11.16279411315918, + "log_odds_ratio": -2.1533323888434097e-05, + "logits/chosen": -0.6959891319274902, + "logits/rejected": -0.7757497429847717, + "logps/chosen": -0.00027285193209536374, + "logps/rejected": -2.445639133453369, + "loss": 0.7058, + "nll_loss": 0.1764417141675949, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.728519575612154e-05, + "rewards/margins": 0.2445366233587265, + "rewards/rejected": -0.24456390738487244, + "step": 8808 + }, + { + "epoch": 6.091977869986168, + "grad_norm": 8.009075164794922, + "learning_rate": 2.1711234055632395e-05, + "log_odds_chosen": 10.137691497802734, + "log_odds_ratio": -0.00022952201834414154, + "logits/chosen": -0.6261448264122009, + "logits/rejected": -0.5945342183113098, + "logps/chosen": -0.0013284524902701378, + "logps/rejected": -2.2858855724334717, + "loss": 0.777, + "nll_loss": 0.19423788785934448, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013284524902701378, + "rewards/margins": 0.22845572233200073, + "rewards/rejected": -0.2285885512828827, + "step": 8809 + }, + { + "epoch": 6.092669432918395, + "grad_norm": 6.640091419219971, + "learning_rate": 2.1707392039342247e-05, + "log_odds_chosen": 11.143184661865234, + "log_odds_ratio": -2.3205455363495275e-05, + "logits/chosen": -0.3292912244796753, + "logits/rejected": -0.4862833619117737, + "logps/chosen": -0.00011166653712280095, + "logps/rejected": -1.7538467645645142, + "loss": 0.472, + "nll_loss": 0.11800672113895416, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1166652257088572e-05, + "rewards/margins": 0.17537352442741394, + "rewards/rejected": -0.17538467049598694, + "step": 8810 + }, + { + "epoch": 6.093360995850622, + "grad_norm": 10.416664123535156, + "learning_rate": 2.17035500230521e-05, + "log_odds_chosen": 10.741606712341309, + "log_odds_ratio": -0.0006154798902571201, + "logits/chosen": -0.605117917060852, + "logits/rejected": -0.5834625959396362, + "logps/chosen": -0.0005302996141836047, + "logps/rejected": -2.474724531173706, + "loss": 0.4787, + "nll_loss": 0.11962580680847168, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.302996578393504e-05, + "rewards/margins": 0.24741944670677185, + "rewards/rejected": -0.24747246503829956, + "step": 8811 + }, + { + "epoch": 6.094052558782849, + "grad_norm": 10.211325645446777, + "learning_rate": 2.169970800676195e-05, + "log_odds_chosen": 11.239428520202637, + "log_odds_ratio": -9.101382602239028e-05, + "logits/chosen": 0.054155007004737854, + "logits/rejected": 0.031840741634368896, + "logps/chosen": -0.0006743489066138864, + "logps/rejected": -3.3643722534179688, + "loss": 0.8705, + "nll_loss": 0.21762073040008545, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.743489211658016e-05, + "rewards/margins": 0.3363697826862335, + "rewards/rejected": -0.3364371955394745, + "step": 8812 + }, + { + "epoch": 6.094744121715076, + "grad_norm": 6.392689228057861, + "learning_rate": 2.16958659904718e-05, + "log_odds_chosen": 9.836763381958008, + "log_odds_ratio": -0.00022950033599045128, + "logits/chosen": -0.05819656699895859, + "logits/rejected": -0.19960781931877136, + "logps/chosen": -0.0003322213888168335, + "logps/rejected": -2.0401153564453125, + "loss": 0.8009, + "nll_loss": 0.20020034909248352, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.322213888168335e-05, + "rewards/margins": 0.20397831499576569, + "rewards/rejected": -0.20401152968406677, + "step": 8813 + }, + { + "epoch": 6.095435684647303, + "grad_norm": 3.7710330486297607, + "learning_rate": 2.1692023974181654e-05, + "log_odds_chosen": 9.871589660644531, + "log_odds_ratio": -0.00019951784634031355, + "logits/chosen": -0.00703035295009613, + "logits/rejected": -0.11229774355888367, + "logps/chosen": -0.0009015482501126826, + "logps/rejected": -1.984623670578003, + "loss": 0.796, + "nll_loss": 0.19897326827049255, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.01548337424174e-05, + "rewards/margins": 0.19837221503257751, + "rewards/rejected": -0.1984623670578003, + "step": 8814 + }, + { + "epoch": 6.096127247579529, + "grad_norm": 5.55619478225708, + "learning_rate": 2.1688181957891503e-05, + "log_odds_chosen": 9.764547348022461, + "log_odds_ratio": -0.00041928552673198283, + "logits/chosen": -0.7127132415771484, + "logits/rejected": -0.8955925107002258, + "logps/chosen": -0.0008278897730633616, + "logps/rejected": -1.8823670148849487, + "loss": 0.9079, + "nll_loss": 0.22692245244979858, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.278897439595312e-05, + "rewards/margins": 0.1881539225578308, + "rewards/rejected": -0.18823671340942383, + "step": 8815 + }, + { + "epoch": 6.096818810511756, + "grad_norm": 5.305004596710205, + "learning_rate": 2.1684339941601352e-05, + "log_odds_chosen": 10.194826126098633, + "log_odds_ratio": -0.0031981952488422394, + "logits/chosen": -0.43302011489868164, + "logits/rejected": -0.4673667550086975, + "logps/chosen": -0.0015352818882092834, + "logps/rejected": -1.9649176597595215, + "loss": 0.4772, + "nll_loss": 0.11898045241832733, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001535282062832266, + "rewards/margins": 0.1963382363319397, + "rewards/rejected": -0.19649174809455872, + "step": 8816 + }, + { + "epoch": 6.097510373443983, + "grad_norm": 6.293792724609375, + "learning_rate": 2.1680497925311204e-05, + "log_odds_chosen": 11.738176345825195, + "log_odds_ratio": -1.4964467482059263e-05, + "logits/chosen": -0.21480712294578552, + "logits/rejected": -0.2954432964324951, + "logps/chosen": -0.00013515262980945408, + "logps/rejected": -2.606994390487671, + "loss": 0.5252, + "nll_loss": 0.13129648566246033, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3515262253349647e-05, + "rewards/margins": 0.26068592071533203, + "rewards/rejected": -0.26069945096969604, + "step": 8817 + }, + { + "epoch": 6.09820193637621, + "grad_norm": 6.480541706085205, + "learning_rate": 2.1676655909021053e-05, + "log_odds_chosen": 9.735299110412598, + "log_odds_ratio": -0.0001797816512407735, + "logits/chosen": -0.051631614565849304, + "logits/rejected": -0.10556840896606445, + "logps/chosen": -0.0004435731389094144, + "logps/rejected": -1.998798131942749, + "loss": 0.7467, + "nll_loss": 0.18666529655456543, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.435731170815416e-05, + "rewards/margins": 0.19983544945716858, + "rewards/rejected": -0.19987979531288147, + "step": 8818 + }, + { + "epoch": 6.098893499308437, + "grad_norm": 4.939790725708008, + "learning_rate": 2.1672813892730906e-05, + "log_odds_chosen": 9.56667423248291, + "log_odds_ratio": -0.00015915413678158075, + "logits/chosen": -0.09113126248121262, + "logits/rejected": -0.1315183788537979, + "logps/chosen": -0.00048493000213056803, + "logps/rejected": -1.30069100856781, + "loss": 0.6208, + "nll_loss": 0.15519554913043976, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.849300239584409e-05, + "rewards/margins": 0.1300206184387207, + "rewards/rejected": -0.13006910681724548, + "step": 8819 + }, + { + "epoch": 6.0995850622406635, + "grad_norm": 6.470208168029785, + "learning_rate": 2.1668971876440758e-05, + "log_odds_chosen": 11.104886054992676, + "log_odds_ratio": -7.311101944651455e-05, + "logits/chosen": -0.5827865600585938, + "logits/rejected": -0.6194196343421936, + "logps/chosen": -0.0005881582037545741, + "logps/rejected": -2.9203414916992188, + "loss": 1.1226, + "nll_loss": 0.2806398868560791, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.881581819267012e-05, + "rewards/margins": 0.29197531938552856, + "rewards/rejected": -0.2920341491699219, + "step": 8820 + }, + { + "epoch": 6.10027662517289, + "grad_norm": 5.361146450042725, + "learning_rate": 2.1665129860150607e-05, + "log_odds_chosen": 11.236394882202148, + "log_odds_ratio": -0.00015473456005565822, + "logits/chosen": -0.7091895341873169, + "logits/rejected": -0.7002542018890381, + "logps/chosen": -0.000265885260887444, + "logps/rejected": -2.7120513916015625, + "loss": 0.5708, + "nll_loss": 0.1426815241575241, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.658852463355288e-05, + "rewards/margins": 0.27117854356765747, + "rewards/rejected": -0.2712051272392273, + "step": 8821 + }, + { + "epoch": 6.100968188105117, + "grad_norm": 6.503119945526123, + "learning_rate": 2.166128784386046e-05, + "log_odds_chosen": 11.223774909973145, + "log_odds_ratio": -8.994392555905506e-05, + "logits/chosen": -0.3564401865005493, + "logits/rejected": -0.36347508430480957, + "logps/chosen": -0.0007731412770226598, + "logps/rejected": -3.339472770690918, + "loss": 0.7328, + "nll_loss": 0.18319301307201385, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.731413643341511e-05, + "rewards/margins": 0.33386993408203125, + "rewards/rejected": -0.3339473009109497, + "step": 8822 + }, + { + "epoch": 6.101659751037344, + "grad_norm": 7.759904861450195, + "learning_rate": 2.1657445827570312e-05, + "log_odds_chosen": 9.283439636230469, + "log_odds_ratio": -0.000780658156145364, + "logits/chosen": -0.0106724314391613, + "logits/rejected": 0.050531066954135895, + "logps/chosen": -0.0009825469460338354, + "logps/rejected": -1.320664882659912, + "loss": 0.8748, + "nll_loss": 0.2186199277639389, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.82546916930005e-05, + "rewards/margins": 0.13196823000907898, + "rewards/rejected": -0.1320664882659912, + "step": 8823 + }, + { + "epoch": 6.102351313969571, + "grad_norm": 6.8154988288879395, + "learning_rate": 2.165360381128016e-05, + "log_odds_chosen": 10.798100471496582, + "log_odds_ratio": -3.55045085598249e-05, + "logits/chosen": -0.27733373641967773, + "logits/rejected": -0.40890154242515564, + "logps/chosen": -0.00019498051551636308, + "logps/rejected": -2.014132022857666, + "loss": 0.5225, + "nll_loss": 0.13062188029289246, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.949805300682783e-05, + "rewards/margins": 0.201393723487854, + "rewards/rejected": -0.20141321420669556, + "step": 8824 + }, + { + "epoch": 6.103042876901798, + "grad_norm": 9.223984718322754, + "learning_rate": 2.164976179499001e-05, + "log_odds_chosen": 11.027979850769043, + "log_odds_ratio": -0.0001737778220558539, + "logits/chosen": -0.4153405427932739, + "logits/rejected": -0.494740754365921, + "logps/chosen": -0.000494759005960077, + "logps/rejected": -3.088369369506836, + "loss": 0.5605, + "nll_loss": 0.14010044932365417, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9475900596007705e-05, + "rewards/margins": 0.30878746509552, + "rewards/rejected": -0.308836966753006, + "step": 8825 + }, + { + "epoch": 6.1037344398340245, + "grad_norm": 5.566494464874268, + "learning_rate": 2.1645919778699863e-05, + "log_odds_chosen": 9.569103240966797, + "log_odds_ratio": -0.0007949695573188365, + "logits/chosen": -0.2678110599517822, + "logits/rejected": -0.33686572313308716, + "logps/chosen": -0.001850008382461965, + "logps/rejected": -1.621332049369812, + "loss": 0.5076, + "nll_loss": 0.1268259584903717, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018500084115657955, + "rewards/margins": 0.16194820404052734, + "rewards/rejected": -0.16213320195674896, + "step": 8826 + }, + { + "epoch": 6.104426002766251, + "grad_norm": 7.433922290802002, + "learning_rate": 2.1642077762409712e-05, + "log_odds_chosen": 11.172834396362305, + "log_odds_ratio": -5.931320629315451e-05, + "logits/chosen": -0.10432031005620956, + "logits/rejected": -0.16316169500350952, + "logps/chosen": -0.00039796033524908125, + "logps/rejected": -2.6650447845458984, + "loss": 0.7373, + "nll_loss": 0.18432670831680298, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.97960320697166e-05, + "rewards/margins": 0.2664646804332733, + "rewards/rejected": -0.2665044963359833, + "step": 8827 + }, + { + "epoch": 6.105117565698478, + "grad_norm": 12.175658226013184, + "learning_rate": 2.1638235746119564e-05, + "log_odds_chosen": 11.484321594238281, + "log_odds_ratio": -3.6780231312150136e-05, + "logits/chosen": -0.14946818351745605, + "logits/rejected": -0.25240346789360046, + "logps/chosen": -0.0003283719124738127, + "logps/rejected": -3.119140386581421, + "loss": 0.6463, + "nll_loss": 0.1615663468837738, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.283718979218975e-05, + "rewards/margins": 0.3118812143802643, + "rewards/rejected": -0.3119140565395355, + "step": 8828 + }, + { + "epoch": 6.105809128630705, + "grad_norm": 6.868575096130371, + "learning_rate": 2.1634393729829417e-05, + "log_odds_chosen": 10.50800609588623, + "log_odds_ratio": -0.0007316919509321451, + "logits/chosen": 0.09018150717020035, + "logits/rejected": -0.14269495010375977, + "logps/chosen": -0.0008097183890640736, + "logps/rejected": -1.760002851486206, + "loss": 0.6243, + "nll_loss": 0.15601056814193726, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.097184036159888e-05, + "rewards/margins": 0.1759193241596222, + "rewards/rejected": -0.17600028216838837, + "step": 8829 + }, + { + "epoch": 6.106500691562932, + "grad_norm": 3.782043933868408, + "learning_rate": 2.1630551713539266e-05, + "log_odds_chosen": 10.608339309692383, + "log_odds_ratio": -8.497351518599316e-05, + "logits/chosen": -0.2754320502281189, + "logits/rejected": -0.29661867022514343, + "logps/chosen": -0.00019873691780958325, + "logps/rejected": -2.0929758548736572, + "loss": 0.4986, + "nll_loss": 0.12464545667171478, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9873692508554086e-05, + "rewards/margins": 0.2092777043581009, + "rewards/rejected": -0.20929758250713348, + "step": 8830 + }, + { + "epoch": 6.107192254495159, + "grad_norm": 5.824345588684082, + "learning_rate": 2.1626709697249118e-05, + "log_odds_chosen": 10.269322395324707, + "log_odds_ratio": -6.437697447836399e-05, + "logits/chosen": -0.6419390439987183, + "logits/rejected": -0.6782790422439575, + "logps/chosen": -0.0003181939828209579, + "logps/rejected": -1.8846712112426758, + "loss": 0.6367, + "nll_loss": 0.1591646373271942, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.181939973728731e-05, + "rewards/margins": 0.18843530118465424, + "rewards/rejected": -0.1884671151638031, + "step": 8831 + }, + { + "epoch": 6.1078838174273855, + "grad_norm": 5.414685249328613, + "learning_rate": 2.162286768095897e-05, + "log_odds_chosen": 10.8403959274292, + "log_odds_ratio": -2.654375566635281e-05, + "logits/chosen": -0.5415429472923279, + "logits/rejected": -0.5043303966522217, + "logps/chosen": -0.0001397305604768917, + "logps/rejected": -1.906392216682434, + "loss": 0.465, + "nll_loss": 0.11623533070087433, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.397305641148705e-05, + "rewards/margins": 0.19062525033950806, + "rewards/rejected": -0.19063922762870789, + "step": 8832 + }, + { + "epoch": 6.108575380359612, + "grad_norm": 9.278594970703125, + "learning_rate": 2.161902566466882e-05, + "log_odds_chosen": 11.402810096740723, + "log_odds_ratio": -1.8366235963185318e-05, + "logits/chosen": -0.18955518305301666, + "logits/rejected": -0.2610347867012024, + "logps/chosen": -0.00014559032570105046, + "logps/rejected": -2.5812301635742188, + "loss": 0.6596, + "nll_loss": 0.1648978888988495, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4559032933902927e-05, + "rewards/margins": 0.2581084668636322, + "rewards/rejected": -0.2581230401992798, + "step": 8833 + }, + { + "epoch": 6.109266943291839, + "grad_norm": 4.596074104309082, + "learning_rate": 2.161518364837867e-05, + "log_odds_chosen": 11.463423728942871, + "log_odds_ratio": -3.30315378960222e-05, + "logits/chosen": -0.4086986780166626, + "logits/rejected": -0.449398934841156, + "logps/chosen": -0.00018414505757391453, + "logps/rejected": -2.8123068809509277, + "loss": 0.3211, + "nll_loss": 0.08027203381061554, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.841450466599781e-05, + "rewards/margins": 0.2812122702598572, + "rewards/rejected": -0.2812306880950928, + "step": 8834 + }, + { + "epoch": 6.109958506224066, + "grad_norm": 4.561694622039795, + "learning_rate": 2.161134163208852e-05, + "log_odds_chosen": 10.875974655151367, + "log_odds_ratio": -6.157255120342597e-05, + "logits/chosen": -0.3669860363006592, + "logits/rejected": -0.43035003542900085, + "logps/chosen": -0.0002957833930850029, + "logps/rejected": -2.4708425998687744, + "loss": 0.4244, + "nll_loss": 0.10609880834817886, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.957833930850029e-05, + "rewards/margins": 0.24705468118190765, + "rewards/rejected": -0.24708425998687744, + "step": 8835 + }, + { + "epoch": 6.110650069156293, + "grad_norm": 5.778451442718506, + "learning_rate": 2.160749961579837e-05, + "log_odds_chosen": 11.576051712036133, + "log_odds_ratio": -6.129697430878878e-05, + "logits/chosen": -0.8564388751983643, + "logits/rejected": -0.9177031517028809, + "logps/chosen": -0.0002084274310618639, + "logps/rejected": -2.860732078552246, + "loss": 0.8458, + "nll_loss": 0.21144109964370728, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.084274274238851e-05, + "rewards/margins": 0.2860523760318756, + "rewards/rejected": -0.286073237657547, + "step": 8836 + }, + { + "epoch": 6.11134163208852, + "grad_norm": 4.546443939208984, + "learning_rate": 2.1603657599508223e-05, + "log_odds_chosen": 11.850934982299805, + "log_odds_ratio": -3.854755777865648e-05, + "logits/chosen": -0.09234938025474548, + "logits/rejected": -0.14182928204536438, + "logps/chosen": -0.0004462672513909638, + "logps/rejected": -2.16272234916687, + "loss": 0.4516, + "nll_loss": 0.11288943886756897, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.462672950467095e-05, + "rewards/margins": 0.21622759103775024, + "rewards/rejected": -0.21627222001552582, + "step": 8837 + }, + { + "epoch": 6.1120331950207465, + "grad_norm": 11.113327026367188, + "learning_rate": 2.1599815583218075e-05, + "log_odds_chosen": 9.901022911071777, + "log_odds_ratio": -0.024493100121617317, + "logits/chosen": -0.29056257009506226, + "logits/rejected": -0.41285592317581177, + "logps/chosen": -0.008077163249254227, + "logps/rejected": -2.225210428237915, + "loss": 0.97, + "nll_loss": 0.24004806578159332, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008077163947746158, + "rewards/margins": 0.2217133492231369, + "rewards/rejected": -0.22252105176448822, + "step": 8838 + }, + { + "epoch": 6.112724757952973, + "grad_norm": 9.41685962677002, + "learning_rate": 2.1595973566927924e-05, + "log_odds_chosen": 11.145782470703125, + "log_odds_ratio": -3.246070264140144e-05, + "logits/chosen": -0.5988461971282959, + "logits/rejected": -0.5849972367286682, + "logps/chosen": -0.0006508535007014871, + "logps/rejected": -2.710702419281006, + "loss": 0.6633, + "nll_loss": 0.1658124029636383, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.508534715976566e-05, + "rewards/margins": 0.27100518345832825, + "rewards/rejected": -0.271070271730423, + "step": 8839 + }, + { + "epoch": 6.1134163208852, + "grad_norm": 5.831615924835205, + "learning_rate": 2.1592131550637777e-05, + "log_odds_chosen": 11.677581787109375, + "log_odds_ratio": -2.2064965378376655e-05, + "logits/chosen": -0.07563771307468414, + "logits/rejected": -0.1285375952720642, + "logps/chosen": -0.000272395700449124, + "logps/rejected": -3.220499277114868, + "loss": 0.7135, + "nll_loss": 0.17837318778038025, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7239570044912398e-05, + "rewards/margins": 0.3220227062702179, + "rewards/rejected": -0.32204994559288025, + "step": 8840 + }, + { + "epoch": 6.114107883817427, + "grad_norm": 6.619877338409424, + "learning_rate": 2.158828953434763e-05, + "log_odds_chosen": 11.94092845916748, + "log_odds_ratio": -1.0732926057244185e-05, + "logits/chosen": -0.5719197392463684, + "logits/rejected": -0.581974983215332, + "logps/chosen": -8.917129889596254e-05, + "logps/rejected": -2.4263858795166016, + "loss": 0.4402, + "nll_loss": 0.11005041003227234, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.917129889596254e-06, + "rewards/margins": 0.24262967705726624, + "rewards/rejected": -0.24263860285282135, + "step": 8841 + }, + { + "epoch": 6.114799446749654, + "grad_norm": 5.583203315734863, + "learning_rate": 2.1584447518057478e-05, + "log_odds_chosen": 9.578554153442383, + "log_odds_ratio": -0.0002196382120018825, + "logits/chosen": -0.5063522458076477, + "logits/rejected": -0.6370638608932495, + "logps/chosen": -0.0006920626619830728, + "logps/rejected": -1.7157485485076904, + "loss": 0.7513, + "nll_loss": 0.18780651688575745, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.920627492945641e-05, + "rewards/margins": 0.1715056449174881, + "rewards/rejected": -0.17157486081123352, + "step": 8842 + }, + { + "epoch": 6.115491009681881, + "grad_norm": 7.913309097290039, + "learning_rate": 2.1580605501767327e-05, + "log_odds_chosen": 11.05504035949707, + "log_odds_ratio": -4.723056190414354e-05, + "logits/chosen": -0.6872215270996094, + "logits/rejected": -0.7240106463432312, + "logps/chosen": -0.00019065033120568842, + "logps/rejected": -2.20306658744812, + "loss": 0.5783, + "nll_loss": 0.1445586234331131, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9065033484366722e-05, + "rewards/margins": 0.22028759121894836, + "rewards/rejected": -0.2203066647052765, + "step": 8843 + }, + { + "epoch": 6.1161825726141075, + "grad_norm": 4.464817523956299, + "learning_rate": 2.157676348547718e-05, + "log_odds_chosen": 10.496665954589844, + "log_odds_ratio": -6.723314436385408e-05, + "logits/chosen": -0.6100011467933655, + "logits/rejected": -0.5485386252403259, + "logps/chosen": -0.0005118567496538162, + "logps/rejected": -1.8350151777267456, + "loss": 0.6321, + "nll_loss": 0.15802812576293945, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.118567787576467e-05, + "rewards/margins": 0.1834503412246704, + "rewards/rejected": -0.18350151181221008, + "step": 8844 + }, + { + "epoch": 6.116874135546334, + "grad_norm": 7.142628192901611, + "learning_rate": 2.157292146918703e-05, + "log_odds_chosen": 10.981527328491211, + "log_odds_ratio": -0.00034269201569259167, + "logits/chosen": -0.34676387906074524, + "logits/rejected": -0.35570889711380005, + "logps/chosen": -0.0019361183512955904, + "logps/rejected": -2.876324415206909, + "loss": 0.6613, + "nll_loss": 0.1652963012456894, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019361183512955904, + "rewards/margins": 0.2874388098716736, + "rewards/rejected": -0.28763246536254883, + "step": 8845 + }, + { + "epoch": 6.117565698478561, + "grad_norm": 4.98019552230835, + "learning_rate": 2.156907945289688e-05, + "log_odds_chosen": 10.531964302062988, + "log_odds_ratio": -4.4222902943147346e-05, + "logits/chosen": -0.2760690450668335, + "logits/rejected": -0.3222053349018097, + "logps/chosen": -0.00015717542555648834, + "logps/rejected": -1.7313332557678223, + "loss": 0.5804, + "nll_loss": 0.1450836956501007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5717543647042476e-05, + "rewards/margins": 0.17311760783195496, + "rewards/rejected": -0.17313331365585327, + "step": 8846 + }, + { + "epoch": 6.118257261410788, + "grad_norm": 7.392937660217285, + "learning_rate": 2.1565237436606734e-05, + "log_odds_chosen": 9.921597480773926, + "log_odds_ratio": -0.00011697213631123304, + "logits/chosen": -0.24065059423446655, + "logits/rejected": -0.28834766149520874, + "logps/chosen": -0.0005189216462895274, + "logps/rejected": -1.8165524005889893, + "loss": 0.5459, + "nll_loss": 0.1364704966545105, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1892166084144264e-05, + "rewards/margins": 0.1816033571958542, + "rewards/rejected": -0.18165524303913116, + "step": 8847 + }, + { + "epoch": 6.118948824343015, + "grad_norm": 11.480717658996582, + "learning_rate": 2.1561395420316583e-05, + "log_odds_chosen": 10.543256759643555, + "log_odds_ratio": -0.00019431406690273434, + "logits/chosen": 0.16179290413856506, + "logits/rejected": 0.02488519996404648, + "logps/chosen": -0.0006095264106988907, + "logps/rejected": -3.026254177093506, + "loss": 0.6545, + "nll_loss": 0.1636168211698532, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0952639614697546e-05, + "rewards/margins": 0.30256450176239014, + "rewards/rejected": -0.302625447511673, + "step": 8848 + }, + { + "epoch": 6.119640387275242, + "grad_norm": 6.340550422668457, + "learning_rate": 2.1557553404026435e-05, + "log_odds_chosen": 10.822004318237305, + "log_odds_ratio": -5.153457459527999e-05, + "logits/chosen": -0.5803370475769043, + "logits/rejected": -0.571527898311615, + "logps/chosen": -0.0002886800211854279, + "logps/rejected": -2.354701519012451, + "loss": 0.5035, + "nll_loss": 0.1258649379014969, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8868005756521598e-05, + "rewards/margins": 0.23544129729270935, + "rewards/rejected": -0.23547017574310303, + "step": 8849 + }, + { + "epoch": 6.1203319502074685, + "grad_norm": 7.232970714569092, + "learning_rate": 2.1553711387736288e-05, + "log_odds_chosen": 9.102911949157715, + "log_odds_ratio": -0.00019699697440955788, + "logits/chosen": -0.34530875086784363, + "logits/rejected": -0.4146209955215454, + "logps/chosen": -0.0025119970086961985, + "logps/rejected": -1.6312192678451538, + "loss": 0.561, + "nll_loss": 0.14022156596183777, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00025119970086961985, + "rewards/margins": 0.16287073493003845, + "rewards/rejected": -0.16312193870544434, + "step": 8850 + }, + { + "epoch": 6.121023513139695, + "grad_norm": 7.151753902435303, + "learning_rate": 2.1549869371446137e-05, + "log_odds_chosen": 10.662192344665527, + "log_odds_ratio": -0.00024196658341679722, + "logits/chosen": -0.7194679379463196, + "logits/rejected": -0.7703452110290527, + "logps/chosen": -0.00021683350496459752, + "logps/rejected": -1.6515040397644043, + "loss": 0.508, + "nll_loss": 0.12698546051979065, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1683352315449156e-05, + "rewards/margins": 0.1651287078857422, + "rewards/rejected": -0.16515041887760162, + "step": 8851 + }, + { + "epoch": 6.121715076071922, + "grad_norm": 6.731082916259766, + "learning_rate": 2.1546027355155986e-05, + "log_odds_chosen": 10.459026336669922, + "log_odds_ratio": -5.2345916628837585e-05, + "logits/chosen": -0.4643927812576294, + "logits/rejected": -0.45928698778152466, + "logps/chosen": -0.00017969627515412867, + "logps/rejected": -1.7573070526123047, + "loss": 0.4461, + "nll_loss": 0.11151199042797089, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.796962897060439e-05, + "rewards/margins": 0.17571274936199188, + "rewards/rejected": -0.17573070526123047, + "step": 8852 + }, + { + "epoch": 6.122406639004149, + "grad_norm": 9.097981452941895, + "learning_rate": 2.1542185338865838e-05, + "log_odds_chosen": 11.558393478393555, + "log_odds_ratio": -2.5802919481066056e-05, + "logits/chosen": -0.31933721899986267, + "logits/rejected": -0.3849840462207794, + "logps/chosen": -0.00011497936066007242, + "logps/rejected": -2.0980782508850098, + "loss": 0.5145, + "nll_loss": 0.12861578166484833, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1497935702209361e-05, + "rewards/margins": 0.20979633927345276, + "rewards/rejected": -0.2098078429698944, + "step": 8853 + }, + { + "epoch": 6.123098201936376, + "grad_norm": 5.605348110198975, + "learning_rate": 2.1538343322575687e-05, + "log_odds_chosen": 10.877555847167969, + "log_odds_ratio": -3.869025385938585e-05, + "logits/chosen": -0.3823351562023163, + "logits/rejected": -0.5414446592330933, + "logps/chosen": -0.00019438430899754167, + "logps/rejected": -2.1862130165100098, + "loss": 0.6517, + "nll_loss": 0.16291458904743195, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9438430172158405e-05, + "rewards/margins": 0.21860186755657196, + "rewards/rejected": -0.21862132847309113, + "step": 8854 + }, + { + "epoch": 6.123789764868603, + "grad_norm": 4.647186756134033, + "learning_rate": 2.153450130628554e-05, + "log_odds_chosen": 9.800394058227539, + "log_odds_ratio": -0.0005086607998237014, + "logits/chosen": -0.21515649557113647, + "logits/rejected": -0.26546669006347656, + "logps/chosen": -0.0007469278643839061, + "logps/rejected": -1.615663766860962, + "loss": 0.4185, + "nll_loss": 0.10456429421901703, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.469279080396518e-05, + "rewards/margins": 0.16149169206619263, + "rewards/rejected": -0.16156639158725739, + "step": 8855 + }, + { + "epoch": 6.124481327800829, + "grad_norm": 4.1204915046691895, + "learning_rate": 2.1530659289995392e-05, + "log_odds_chosen": 10.329703330993652, + "log_odds_ratio": -0.00016737988335080445, + "logits/chosen": -0.47069692611694336, + "logits/rejected": -0.48261600732803345, + "logps/chosen": -0.0007168280426412821, + "logps/rejected": -2.0946145057678223, + "loss": 0.5077, + "nll_loss": 0.12690681219100952, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.168280717451125e-05, + "rewards/margins": 0.209389790892601, + "rewards/rejected": -0.20946148037910461, + "step": 8856 + }, + { + "epoch": 6.125172890733056, + "grad_norm": 9.45824146270752, + "learning_rate": 2.152681727370524e-05, + "log_odds_chosen": 11.265548706054688, + "log_odds_ratio": -2.638051228132099e-05, + "logits/chosen": -0.44578057527542114, + "logits/rejected": -0.5971443057060242, + "logps/chosen": -0.0002542410511523485, + "logps/rejected": -2.679877281188965, + "loss": 1.5033, + "nll_loss": 0.3758128881454468, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5424105842830613e-05, + "rewards/margins": 0.2679622769355774, + "rewards/rejected": -0.2679877281188965, + "step": 8857 + }, + { + "epoch": 6.125864453665283, + "grad_norm": 11.375960350036621, + "learning_rate": 2.1522975257415094e-05, + "log_odds_chosen": 10.551385879516602, + "log_odds_ratio": -0.006846227683126926, + "logits/chosen": -0.25975847244262695, + "logits/rejected": -0.37072598934173584, + "logps/chosen": -0.04536538943648338, + "logps/rejected": -2.7914230823516846, + "loss": 0.6618, + "nll_loss": 0.16475853323936462, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004536538850516081, + "rewards/margins": 0.27460581064224243, + "rewards/rejected": -0.2791423499584198, + "step": 8858 + }, + { + "epoch": 6.12655601659751, + "grad_norm": 6.898074150085449, + "learning_rate": 2.1519133241124946e-05, + "log_odds_chosen": 11.68839168548584, + "log_odds_ratio": -2.754458910203539e-05, + "logits/chosen": -0.7259312868118286, + "logits/rejected": -0.8036313056945801, + "logps/chosen": -0.00017395266331732273, + "logps/rejected": -2.473479747772217, + "loss": 0.7248, + "nll_loss": 0.18118774890899658, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7395266695530154e-05, + "rewards/margins": 0.24733059108257294, + "rewards/rejected": -0.24734798073768616, + "step": 8859 + }, + { + "epoch": 6.127247579529737, + "grad_norm": 7.808147430419922, + "learning_rate": 2.1515291224834795e-05, + "log_odds_chosen": 9.726302146911621, + "log_odds_ratio": -0.00056614656932652, + "logits/chosen": -0.267952024936676, + "logits/rejected": -0.21720397472381592, + "logps/chosen": -0.00065661157714203, + "logps/rejected": -1.7407177686691284, + "loss": 0.8449, + "nll_loss": 0.21116022765636444, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.566115189343691e-05, + "rewards/margins": 0.1740061342716217, + "rewards/rejected": -0.1740717738866806, + "step": 8860 + }, + { + "epoch": 6.127939142461964, + "grad_norm": 6.377797603607178, + "learning_rate": 2.1511449208544644e-05, + "log_odds_chosen": 10.847086906433105, + "log_odds_ratio": -0.0001203061401611194, + "logits/chosen": -0.6302061676979065, + "logits/rejected": -0.6396503448486328, + "logps/chosen": -0.00020166633476037532, + "logps/rejected": -1.7068842649459839, + "loss": 0.6304, + "nll_loss": 0.1575794219970703, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.016663165704813e-05, + "rewards/margins": 0.17066825926303864, + "rewards/rejected": -0.1706884205341339, + "step": 8861 + }, + { + "epoch": 6.12863070539419, + "grad_norm": 9.449673652648926, + "learning_rate": 2.1507607192254497e-05, + "log_odds_chosen": 10.45321273803711, + "log_odds_ratio": -5.564562525250949e-05, + "logits/chosen": -0.11017411947250366, + "logits/rejected": -0.21915608644485474, + "logps/chosen": -0.0007298594573512673, + "logps/rejected": -2.18619966506958, + "loss": 0.4913, + "nll_loss": 0.12282784283161163, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.298595301108435e-05, + "rewards/margins": 0.21854698657989502, + "rewards/rejected": -0.21861997246742249, + "step": 8862 + }, + { + "epoch": 6.129322268326418, + "grad_norm": 6.794439315795898, + "learning_rate": 2.1503765175964346e-05, + "log_odds_chosen": 9.2752103805542, + "log_odds_ratio": -0.007447497453540564, + "logits/chosen": -0.42011135816574097, + "logits/rejected": -0.4615238606929779, + "logps/chosen": -0.003412168473005295, + "logps/rejected": -1.340742826461792, + "loss": 0.5112, + "nll_loss": 0.127059668302536, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00034121685894206166, + "rewards/margins": 0.13373306393623352, + "rewards/rejected": -0.13407427072525024, + "step": 8863 + }, + { + "epoch": 6.130013831258645, + "grad_norm": 6.756525039672852, + "learning_rate": 2.1499923159674198e-05, + "log_odds_chosen": 10.04014778137207, + "log_odds_ratio": -0.0001670851925155148, + "logits/chosen": -0.8022230863571167, + "logits/rejected": -0.8228714466094971, + "logps/chosen": -0.0008743289508856833, + "logps/rejected": -1.7404072284698486, + "loss": 0.6535, + "nll_loss": 0.16336099803447723, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.743289799895138e-05, + "rewards/margins": 0.17395329475402832, + "rewards/rejected": -0.17404071986675262, + "step": 8864 + }, + { + "epoch": 6.130705394190872, + "grad_norm": 4.477063179016113, + "learning_rate": 2.149608114338405e-05, + "log_odds_chosen": 10.5889253616333, + "log_odds_ratio": -0.00010871638369280845, + "logits/chosen": -0.7607325315475464, + "logits/rejected": -0.8645247220993042, + "logps/chosen": -0.00038288458017632365, + "logps/rejected": -2.266031265258789, + "loss": 0.5362, + "nll_loss": 0.1340354084968567, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.828846092801541e-05, + "rewards/margins": 0.22656482458114624, + "rewards/rejected": -0.22660312056541443, + "step": 8865 + }, + { + "epoch": 6.131396957123099, + "grad_norm": 6.994044303894043, + "learning_rate": 2.14922391270939e-05, + "log_odds_chosen": 10.54684829711914, + "log_odds_ratio": -5.5355423683067784e-05, + "logits/chosen": -0.7427743673324585, + "logits/rejected": -0.6657612919807434, + "logps/chosen": -0.00029647996416315436, + "logps/rejected": -1.9989583492279053, + "loss": 0.5749, + "nll_loss": 0.1437259316444397, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.964799750770908e-05, + "rewards/margins": 0.1998661756515503, + "rewards/rejected": -0.19989581406116486, + "step": 8866 + }, + { + "epoch": 6.1320885200553255, + "grad_norm": 6.7296671867370605, + "learning_rate": 2.1488397110803752e-05, + "log_odds_chosen": 10.42095947265625, + "log_odds_ratio": -8.404959953622892e-05, + "logits/chosen": -0.23315714299678802, + "logits/rejected": -0.3014877736568451, + "logps/chosen": -0.0001869620755314827, + "logps/rejected": -1.8798887729644775, + "loss": 0.6162, + "nll_loss": 0.15404073894023895, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8696206097956747e-05, + "rewards/margins": 0.18797020614147186, + "rewards/rejected": -0.18798887729644775, + "step": 8867 + }, + { + "epoch": 6.132780082987552, + "grad_norm": 5.581561088562012, + "learning_rate": 2.1484555094513604e-05, + "log_odds_chosen": 10.633293151855469, + "log_odds_ratio": -0.0003565740189515054, + "logits/chosen": -0.2947113513946533, + "logits/rejected": -0.3295404613018036, + "logps/chosen": -0.0009969068923965096, + "logps/rejected": -2.78654146194458, + "loss": 0.6332, + "nll_loss": 0.15827523171901703, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.969068923965096e-05, + "rewards/margins": 0.2785544693470001, + "rewards/rejected": -0.2786541283130646, + "step": 8868 + }, + { + "epoch": 6.133471645919779, + "grad_norm": 9.132095336914062, + "learning_rate": 2.1480713078223453e-05, + "log_odds_chosen": 10.777402877807617, + "log_odds_ratio": -6.423494778573513e-05, + "logits/chosen": -0.22323840856552124, + "logits/rejected": -0.3039165139198303, + "logps/chosen": -0.0002668887027539313, + "logps/rejected": -2.220047950744629, + "loss": 0.5278, + "nll_loss": 0.13193866610527039, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.668887100298889e-05, + "rewards/margins": 0.2219781130552292, + "rewards/rejected": -0.22200478613376617, + "step": 8869 + }, + { + "epoch": 6.134163208852006, + "grad_norm": 7.941714763641357, + "learning_rate": 2.1476871061933303e-05, + "log_odds_chosen": 10.74931526184082, + "log_odds_ratio": -0.00013049867993686348, + "logits/chosen": -0.3583360016345978, + "logits/rejected": -0.34637540578842163, + "logps/chosen": -0.0002213473489973694, + "logps/rejected": -2.156782627105713, + "loss": 0.5839, + "nll_loss": 0.1459662914276123, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.213473453593906e-05, + "rewards/margins": 0.2156561315059662, + "rewards/rejected": -0.21567825973033905, + "step": 8870 + }, + { + "epoch": 6.134854771784233, + "grad_norm": 5.933559894561768, + "learning_rate": 2.147302904564315e-05, + "log_odds_chosen": 10.360872268676758, + "log_odds_ratio": -3.608822953538038e-05, + "logits/chosen": -0.39610740542411804, + "logits/rejected": -0.4380618929862976, + "logps/chosen": -0.00028375934925861657, + "logps/rejected": -1.6616911888122559, + "loss": 0.4978, + "nll_loss": 0.124452605843544, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.837593638105318e-05, + "rewards/margins": 0.16614075005054474, + "rewards/rejected": -0.16616912186145782, + "step": 8871 + }, + { + "epoch": 6.13554633471646, + "grad_norm": 7.322857856750488, + "learning_rate": 2.1469187029353004e-05, + "log_odds_chosen": 9.55153751373291, + "log_odds_ratio": -0.0011159204877912998, + "logits/chosen": -0.08747230470180511, + "logits/rejected": -0.0702173113822937, + "logps/chosen": -0.0011271832045167685, + "logps/rejected": -1.8637242317199707, + "loss": 0.5021, + "nll_loss": 0.12542061507701874, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011271832045167685, + "rewards/margins": 0.18625971674919128, + "rewards/rejected": -0.18637242913246155, + "step": 8872 + }, + { + "epoch": 6.136237897648686, + "grad_norm": 3.4511232376098633, + "learning_rate": 2.1465345013062856e-05, + "log_odds_chosen": 9.75981616973877, + "log_odds_ratio": -0.00038340777973644435, + "logits/chosen": -0.4628656804561615, + "logits/rejected": -0.5771580934524536, + "logps/chosen": -0.0003525334468577057, + "logps/rejected": -1.7929129600524902, + "loss": 0.5708, + "nll_loss": 0.14265519380569458, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5253346140962094e-05, + "rewards/margins": 0.17925603687763214, + "rewards/rejected": -0.17929129302501678, + "step": 8873 + }, + { + "epoch": 6.136929460580913, + "grad_norm": 6.297825336456299, + "learning_rate": 2.1461502996772706e-05, + "log_odds_chosen": 9.935726165771484, + "log_odds_ratio": -0.00023815446184016764, + "logits/chosen": -0.35457730293273926, + "logits/rejected": -0.4085862338542938, + "logps/chosen": -0.0007369728991761804, + "logps/rejected": -1.723237156867981, + "loss": 0.5132, + "nll_loss": 0.12827615439891815, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.369727973127738e-05, + "rewards/margins": 0.17225001752376556, + "rewards/rejected": -0.17232371866703033, + "step": 8874 + }, + { + "epoch": 6.13762102351314, + "grad_norm": 8.161831855773926, + "learning_rate": 2.1457660980482558e-05, + "log_odds_chosen": 10.419249534606934, + "log_odds_ratio": -0.00042370465234853327, + "logits/chosen": -0.538821280002594, + "logits/rejected": -0.49489909410476685, + "logps/chosen": -0.0037653190083801746, + "logps/rejected": -2.5501937866210938, + "loss": 0.6721, + "nll_loss": 0.16797395050525665, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003765319415833801, + "rewards/margins": 0.25464287400245667, + "rewards/rejected": -0.2550193965435028, + "step": 8875 + }, + { + "epoch": 6.138312586445367, + "grad_norm": 6.556025505065918, + "learning_rate": 2.145381896419241e-05, + "log_odds_chosen": 11.062671661376953, + "log_odds_ratio": -3.0943039746489376e-05, + "logits/chosen": -0.2883804440498352, + "logits/rejected": -0.2896476686000824, + "logps/chosen": -0.0001753466494847089, + "logps/rejected": -2.1570873260498047, + "loss": 0.6934, + "nll_loss": 0.17334787547588348, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.753466494847089e-05, + "rewards/margins": 0.21569117903709412, + "rewards/rejected": -0.21570873260498047, + "step": 8876 + }, + { + "epoch": 6.139004149377594, + "grad_norm": 7.59773063659668, + "learning_rate": 2.144997694790226e-05, + "log_odds_chosen": 9.487346649169922, + "log_odds_ratio": -0.00012805784353986382, + "logits/chosen": -0.7149614691734314, + "logits/rejected": -0.7343244552612305, + "logps/chosen": -0.0005941563285887241, + "logps/rejected": -1.285873532295227, + "loss": 0.6787, + "nll_loss": 0.1696619689464569, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.941562994848937e-05, + "rewards/margins": 0.1285279393196106, + "rewards/rejected": -0.12858736515045166, + "step": 8877 + }, + { + "epoch": 6.139695712309821, + "grad_norm": 10.843826293945312, + "learning_rate": 2.1446134931612112e-05, + "log_odds_chosen": 10.153909683227539, + "log_odds_ratio": -0.00013622343249153346, + "logits/chosen": -0.4070616364479065, + "logits/rejected": -0.42122650146484375, + "logps/chosen": -0.00043185707181692123, + "logps/rejected": -1.9670538902282715, + "loss": 0.7594, + "nll_loss": 0.18982651829719543, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3185707909287885e-05, + "rewards/margins": 0.19666221737861633, + "rewards/rejected": -0.1967054009437561, + "step": 8878 + }, + { + "epoch": 6.140387275242047, + "grad_norm": 7.272959232330322, + "learning_rate": 2.144229291532196e-05, + "log_odds_chosen": 10.480998039245605, + "log_odds_ratio": -0.000361002457793802, + "logits/chosen": -0.2630500793457031, + "logits/rejected": -0.3229910731315613, + "logps/chosen": -0.000636959564872086, + "logps/rejected": -2.146204948425293, + "loss": 0.709, + "nll_loss": 0.17721214890480042, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.369595212163404e-05, + "rewards/margins": 0.21455681324005127, + "rewards/rejected": -0.21462051570415497, + "step": 8879 + }, + { + "epoch": 6.141078838174274, + "grad_norm": 10.42378044128418, + "learning_rate": 2.1438450899031813e-05, + "log_odds_chosen": 10.40891170501709, + "log_odds_ratio": -0.00029213528614491224, + "logits/chosen": -0.3548663854598999, + "logits/rejected": -0.32809650897979736, + "logps/chosen": -0.0004571013560052961, + "logps/rejected": -1.7706871032714844, + "loss": 0.8349, + "nll_loss": 0.20870471000671387, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5710137783316895e-05, + "rewards/margins": 0.1770229935646057, + "rewards/rejected": -0.17706872522830963, + "step": 8880 + }, + { + "epoch": 6.141770401106501, + "grad_norm": 9.024090766906738, + "learning_rate": 2.1434608882741662e-05, + "log_odds_chosen": 9.728191375732422, + "log_odds_ratio": -0.0002653269039001316, + "logits/chosen": -0.20413929224014282, + "logits/rejected": -0.2840261459350586, + "logps/chosen": -0.0007140958332456648, + "logps/rejected": -1.912898302078247, + "loss": 0.8936, + "nll_loss": 0.22338436543941498, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.140958041418344e-05, + "rewards/margins": 0.19121842086315155, + "rewards/rejected": -0.19128982722759247, + "step": 8881 + }, + { + "epoch": 6.142461964038728, + "grad_norm": 6.755743503570557, + "learning_rate": 2.1430766866451515e-05, + "log_odds_chosen": 10.81404972076416, + "log_odds_ratio": -0.0001417073654010892, + "logits/chosen": -0.343039870262146, + "logits/rejected": -0.28404492139816284, + "logps/chosen": -0.0005510105402208865, + "logps/rejected": -2.346940517425537, + "loss": 0.7738, + "nll_loss": 0.19343721866607666, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.510105620487593e-05, + "rewards/margins": 0.23463895916938782, + "rewards/rejected": -0.23469404876232147, + "step": 8882 + }, + { + "epoch": 6.143153526970955, + "grad_norm": 5.670907974243164, + "learning_rate": 2.1426924850161364e-05, + "log_odds_chosen": 10.498161315917969, + "log_odds_ratio": -0.00018444280431140214, + "logits/chosen": -0.24533668160438538, + "logits/rejected": -0.26434126496315, + "logps/chosen": -0.0005119394045323133, + "logps/rejected": -2.2618327140808105, + "loss": 0.4762, + "nll_loss": 0.11902209371328354, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.119394336361438e-05, + "rewards/margins": 0.2261320799589157, + "rewards/rejected": -0.22618328034877777, + "step": 8883 + }, + { + "epoch": 6.143845089903182, + "grad_norm": 4.66843318939209, + "learning_rate": 2.1423082833871216e-05, + "log_odds_chosen": 10.099947929382324, + "log_odds_ratio": -0.00010283004667144269, + "logits/chosen": -0.21136608719825745, + "logits/rejected": -0.33208179473876953, + "logps/chosen": -0.0003027537022717297, + "logps/rejected": -1.6289397478103638, + "loss": 0.4111, + "nll_loss": 0.10277269780635834, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0275368771981448e-05, + "rewards/margins": 0.16286370158195496, + "rewards/rejected": -0.16289398074150085, + "step": 8884 + }, + { + "epoch": 6.144536652835408, + "grad_norm": 6.934423923492432, + "learning_rate": 2.141924081758107e-05, + "log_odds_chosen": 10.522510528564453, + "log_odds_ratio": -0.00021211769490037113, + "logits/chosen": -0.11151409894227982, + "logits/rejected": -0.2589746117591858, + "logps/chosen": -0.0003004320606123656, + "logps/rejected": -2.039461612701416, + "loss": 0.6197, + "nll_loss": 0.15489232540130615, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0043207516428083e-05, + "rewards/margins": 0.20391613245010376, + "rewards/rejected": -0.20394617319107056, + "step": 8885 + }, + { + "epoch": 6.145228215767635, + "grad_norm": 6.154320240020752, + "learning_rate": 2.1415398801290918e-05, + "log_odds_chosen": 10.276860237121582, + "log_odds_ratio": -0.00023392810544464737, + "logits/chosen": -0.30178678035736084, + "logits/rejected": -0.3277810215950012, + "logps/chosen": -0.00093194650253281, + "logps/rejected": -2.405626058578491, + "loss": 0.6904, + "nll_loss": 0.1725805401802063, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.319464879808947e-05, + "rewards/margins": 0.24046942591667175, + "rewards/rejected": -0.24056261777877808, + "step": 8886 + }, + { + "epoch": 6.145919778699862, + "grad_norm": 4.306264400482178, + "learning_rate": 2.141155678500077e-05, + "log_odds_chosen": 10.523026466369629, + "log_odds_ratio": -0.00015434774104505777, + "logits/chosen": -0.4924680292606354, + "logits/rejected": -0.5587255954742432, + "logps/chosen": -0.0001923997770063579, + "logps/rejected": -1.9562420845031738, + "loss": 0.4092, + "nll_loss": 0.10229238867759705, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9239978428231552e-05, + "rewards/margins": 0.19560497999191284, + "rewards/rejected": -0.1956242024898529, + "step": 8887 + }, + { + "epoch": 6.146611341632089, + "grad_norm": 4.622827529907227, + "learning_rate": 2.1407714768710623e-05, + "log_odds_chosen": 9.643756866455078, + "log_odds_ratio": -0.00021520015434361994, + "logits/chosen": -0.06347708404064178, + "logits/rejected": -0.053321439772844315, + "logps/chosen": -0.0004967688000760972, + "logps/rejected": -1.9408763647079468, + "loss": 0.6166, + "nll_loss": 0.15413503348827362, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9676880735205486e-05, + "rewards/margins": 0.19403798878192902, + "rewards/rejected": -0.1940876543521881, + "step": 8888 + }, + { + "epoch": 6.147302904564316, + "grad_norm": 6.192646026611328, + "learning_rate": 2.1403872752420472e-05, + "log_odds_chosen": 10.072959899902344, + "log_odds_ratio": -0.0002504573785699904, + "logits/chosen": -0.3247324824333191, + "logits/rejected": -0.39910486340522766, + "logps/chosen": -0.0004856810555793345, + "logps/rejected": -1.7511827945709229, + "loss": 0.5946, + "nll_loss": 0.14862856268882751, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8568108468316495e-05, + "rewards/margins": 0.17506971955299377, + "rewards/rejected": -0.17511829733848572, + "step": 8889 + }, + { + "epoch": 6.1479944674965425, + "grad_norm": 4.0776543617248535, + "learning_rate": 2.140003073613032e-05, + "log_odds_chosen": 11.533493041992188, + "log_odds_ratio": -4.676056050811894e-05, + "logits/chosen": -0.2515339255332947, + "logits/rejected": -0.274416983127594, + "logps/chosen": -0.00019514464656822383, + "logps/rejected": -2.8244099617004395, + "loss": 0.9146, + "nll_loss": 0.2286345511674881, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9514465748216026e-05, + "rewards/margins": 0.2824214696884155, + "rewards/rejected": -0.28244102001190186, + "step": 8890 + }, + { + "epoch": 6.148686030428769, + "grad_norm": 5.191386699676514, + "learning_rate": 2.1396188719840173e-05, + "log_odds_chosen": 11.381552696228027, + "log_odds_ratio": -1.6261165001196787e-05, + "logits/chosen": -0.19675296545028687, + "logits/rejected": -0.3034244179725647, + "logps/chosen": -0.0002262248599436134, + "logps/rejected": -2.7013492584228516, + "loss": 0.475, + "nll_loss": 0.11874811351299286, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.262248563056346e-05, + "rewards/margins": 0.2701122760772705, + "rewards/rejected": -0.27013492584228516, + "step": 8891 + }, + { + "epoch": 6.149377593360996, + "grad_norm": 5.449078559875488, + "learning_rate": 2.1392346703550022e-05, + "log_odds_chosen": 12.33166217803955, + "log_odds_ratio": -1.2560204595502e-05, + "logits/chosen": -0.5773420929908752, + "logits/rejected": -0.6253620386123657, + "logps/chosen": -0.00010516971087781712, + "logps/rejected": -2.8040151596069336, + "loss": 0.8042, + "nll_loss": 0.20104244351387024, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0516971997276414e-05, + "rewards/margins": 0.28039100766181946, + "rewards/rejected": -0.2804015278816223, + "step": 8892 + }, + { + "epoch": 6.150069156293223, + "grad_norm": 6.951034069061279, + "learning_rate": 2.1388504687259875e-05, + "log_odds_chosen": 10.728132247924805, + "log_odds_ratio": -5.662855619448237e-05, + "logits/chosen": -0.44142085313796997, + "logits/rejected": -0.5342628955841064, + "logps/chosen": -0.000431107881013304, + "logps/rejected": -2.6346278190612793, + "loss": 0.5627, + "nll_loss": 0.14066919684410095, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.311078737373464e-05, + "rewards/margins": 0.26341962814331055, + "rewards/rejected": -0.26346278190612793, + "step": 8893 + }, + { + "epoch": 6.15076071922545, + "grad_norm": 5.496524810791016, + "learning_rate": 2.1384662670969727e-05, + "log_odds_chosen": 10.09290599822998, + "log_odds_ratio": -0.00015022409206721932, + "logits/chosen": -0.41256386041641235, + "logits/rejected": -0.4960746169090271, + "logps/chosen": -0.00129657459910959, + "logps/rejected": -2.2923073768615723, + "loss": 0.4069, + "nll_loss": 0.1017158180475235, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012965746282134205, + "rewards/margins": 0.2291010618209839, + "rewards/rejected": -0.22923073172569275, + "step": 8894 + }, + { + "epoch": 6.151452282157677, + "grad_norm": 4.6340651512146, + "learning_rate": 2.1380820654679576e-05, + "log_odds_chosen": 9.381574630737305, + "log_odds_ratio": -0.00721169076859951, + "logits/chosen": -0.3064397871494293, + "logits/rejected": -0.21976573765277863, + "logps/chosen": -0.004331896547228098, + "logps/rejected": -1.9964240789413452, + "loss": 0.4484, + "nll_loss": 0.11139068752527237, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004331897071097046, + "rewards/margins": 0.19920922815799713, + "rewards/rejected": -0.19964241981506348, + "step": 8895 + }, + { + "epoch": 6.1521438450899035, + "grad_norm": 5.038140296936035, + "learning_rate": 2.137697863838943e-05, + "log_odds_chosen": 10.987846374511719, + "log_odds_ratio": -4.962710227118805e-05, + "logits/chosen": -0.20362409949302673, + "logits/rejected": -0.23671866953372955, + "logps/chosen": -0.00026456365594640374, + "logps/rejected": -2.0418331623077393, + "loss": 0.5224, + "nll_loss": 0.13059036433696747, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6456365958438255e-05, + "rewards/margins": 0.20415686070919037, + "rewards/rejected": -0.20418329536914825, + "step": 8896 + }, + { + "epoch": 6.15283540802213, + "grad_norm": 7.108769416809082, + "learning_rate": 2.137313662209928e-05, + "log_odds_chosen": 9.597886085510254, + "log_odds_ratio": -0.0005740196211263537, + "logits/chosen": -0.19212770462036133, + "logits/rejected": -0.33647072315216064, + "logps/chosen": -0.0010434961877763271, + "logps/rejected": -1.7020900249481201, + "loss": 0.6459, + "nll_loss": 0.16142773628234863, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010434961586724967, + "rewards/margins": 0.17010465264320374, + "rewards/rejected": -0.17020899057388306, + "step": 8897 + }, + { + "epoch": 6.153526970954357, + "grad_norm": 5.511011600494385, + "learning_rate": 2.136929460580913e-05, + "log_odds_chosen": 10.08315658569336, + "log_odds_ratio": -0.0014666010392829776, + "logits/chosen": -0.0493057519197464, + "logits/rejected": -0.06691646575927734, + "logps/chosen": -0.0014571938663721085, + "logps/rejected": -1.961158037185669, + "loss": 0.7142, + "nll_loss": 0.17840927839279175, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014571940118912607, + "rewards/margins": 0.1959700882434845, + "rewards/rejected": -0.19611582159996033, + "step": 8898 + }, + { + "epoch": 6.154218533886584, + "grad_norm": 7.098496913909912, + "learning_rate": 2.136545258951898e-05, + "log_odds_chosen": 10.316756248474121, + "log_odds_ratio": -0.00021556735737249255, + "logits/chosen": -0.46447503566741943, + "logits/rejected": -0.5707484483718872, + "logps/chosen": -0.00018599169561639428, + "logps/rejected": -1.7750988006591797, + "loss": 0.8972, + "nll_loss": 0.2242662012577057, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.859917028923519e-05, + "rewards/margins": 0.17749127745628357, + "rewards/rejected": -0.1775098741054535, + "step": 8899 + }, + { + "epoch": 6.154910096818811, + "grad_norm": 11.79477596282959, + "learning_rate": 2.1361610573228832e-05, + "log_odds_chosen": 11.65049934387207, + "log_odds_ratio": -1.220466674567433e-05, + "logits/chosen": -0.15692803263664246, + "logits/rejected": -0.1678299605846405, + "logps/chosen": -0.00012023936142213643, + "logps/rejected": -2.5337300300598145, + "loss": 0.7389, + "nll_loss": 0.18471355736255646, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2023936506011523e-05, + "rewards/margins": 0.2533610165119171, + "rewards/rejected": -0.25337302684783936, + "step": 8900 + }, + { + "epoch": 6.155601659751038, + "grad_norm": 5.2032318115234375, + "learning_rate": 2.135776855693868e-05, + "log_odds_chosen": 11.28200912475586, + "log_odds_ratio": -6.505651253974065e-05, + "logits/chosen": -0.3721749484539032, + "logits/rejected": -0.4769337773323059, + "logps/chosen": -0.0004726629122160375, + "logps/rejected": -3.120828151702881, + "loss": 0.508, + "nll_loss": 0.12698745727539062, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.72662941319868e-05, + "rewards/margins": 0.31203556060791016, + "rewards/rejected": -0.31208279728889465, + "step": 8901 + }, + { + "epoch": 6.1562932226832645, + "grad_norm": 7.075010776519775, + "learning_rate": 2.1353926540648533e-05, + "log_odds_chosen": 10.750005722045898, + "log_odds_ratio": -7.514657045248896e-05, + "logits/chosen": -0.44335147738456726, + "logits/rejected": -0.45919889211654663, + "logps/chosen": -0.0005746853421442211, + "logps/rejected": -2.118549346923828, + "loss": 0.5021, + "nll_loss": 0.125524640083313, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.74685400351882e-05, + "rewards/margins": 0.21179747581481934, + "rewards/rejected": -0.211854949593544, + "step": 8902 + }, + { + "epoch": 6.156984785615491, + "grad_norm": 6.851710796356201, + "learning_rate": 2.1350084524358386e-05, + "log_odds_chosen": 10.875438690185547, + "log_odds_ratio": -7.797985745128244e-05, + "logits/chosen": -0.32115495204925537, + "logits/rejected": -0.40007078647613525, + "logps/chosen": -0.0004854054714087397, + "logps/rejected": -2.01396107673645, + "loss": 0.5387, + "nll_loss": 0.13466191291809082, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.854054714087397e-05, + "rewards/margins": 0.20134755969047546, + "rewards/rejected": -0.20139610767364502, + "step": 8903 + }, + { + "epoch": 6.157676348547718, + "grad_norm": 10.241081237792969, + "learning_rate": 2.1346242508068235e-05, + "log_odds_chosen": 10.226788520812988, + "log_odds_ratio": -0.00022474183060694486, + "logits/chosen": -0.7302020192146301, + "logits/rejected": -0.7752872109413147, + "logps/chosen": -0.00029912887839600444, + "logps/rejected": -1.4040298461914062, + "loss": 0.4306, + "nll_loss": 0.10763468593358994, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.991289147757925e-05, + "rewards/margins": 0.1403730809688568, + "rewards/rejected": -0.14040298759937286, + "step": 8904 + }, + { + "epoch": 6.158367911479945, + "grad_norm": 5.993839263916016, + "learning_rate": 2.1342400491778087e-05, + "log_odds_chosen": 10.475533485412598, + "log_odds_ratio": -7.221288979053497e-05, + "logits/chosen": -0.3128795027732849, + "logits/rejected": -0.3192360997200012, + "logps/chosen": -0.00036443519638851285, + "logps/rejected": -2.456841468811035, + "loss": 0.4818, + "nll_loss": 0.12044843286275864, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.644351818365976e-05, + "rewards/margins": 0.24564771354198456, + "rewards/rejected": -0.24568414688110352, + "step": 8905 + }, + { + "epoch": 6.159059474412172, + "grad_norm": 7.920623302459717, + "learning_rate": 2.133855847548794e-05, + "log_odds_chosen": 9.961012840270996, + "log_odds_ratio": -0.00036133910180069506, + "logits/chosen": -0.31349772214889526, + "logits/rejected": -0.35793423652648926, + "logps/chosen": -0.0008964145672507584, + "logps/rejected": -1.9681870937347412, + "loss": 0.4843, + "nll_loss": 0.12103552371263504, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.964145672507584e-05, + "rewards/margins": 0.19672906398773193, + "rewards/rejected": -0.19681870937347412, + "step": 8906 + }, + { + "epoch": 6.159751037344399, + "grad_norm": 5.189927101135254, + "learning_rate": 2.133471645919779e-05, + "log_odds_chosen": 10.290582656860352, + "log_odds_ratio": -5.6040276831481606e-05, + "logits/chosen": -0.4642256498336792, + "logits/rejected": -0.5244139432907104, + "logps/chosen": -0.0004029185511171818, + "logps/rejected": -2.1179988384246826, + "loss": 0.4601, + "nll_loss": 0.11502990871667862, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0291859477292746e-05, + "rewards/margins": 0.21175959706306458, + "rewards/rejected": -0.21179988980293274, + "step": 8907 + }, + { + "epoch": 6.1604426002766255, + "grad_norm": 7.469080924987793, + "learning_rate": 2.1330874442907638e-05, + "log_odds_chosen": 10.109557151794434, + "log_odds_ratio": -6.607791874557734e-05, + "logits/chosen": -0.637088418006897, + "logits/rejected": -0.6275466680526733, + "logps/chosen": -0.0007182995905168355, + "logps/rejected": -1.944756269454956, + "loss": 0.5628, + "nll_loss": 0.14069898426532745, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.18299561413005e-05, + "rewards/margins": 0.19440379738807678, + "rewards/rejected": -0.19447562098503113, + "step": 8908 + }, + { + "epoch": 6.161134163208852, + "grad_norm": 5.130368232727051, + "learning_rate": 2.132703242661749e-05, + "log_odds_chosen": 11.349884033203125, + "log_odds_ratio": -2.6829929993255064e-05, + "logits/chosen": -0.1395387351512909, + "logits/rejected": -0.07001563906669617, + "logps/chosen": -0.00030773127218708396, + "logps/rejected": -2.8125007152557373, + "loss": 1.1286, + "nll_loss": 0.28215882182121277, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.077312794630416e-05, + "rewards/margins": 0.2812193036079407, + "rewards/rejected": -0.28125008940696716, + "step": 8909 + }, + { + "epoch": 6.161825726141079, + "grad_norm": 5.060723781585693, + "learning_rate": 2.132319041032734e-05, + "log_odds_chosen": 10.80126953125, + "log_odds_ratio": -3.217908306396566e-05, + "logits/chosen": -0.5110337138175964, + "logits/rejected": -0.5838327407836914, + "logps/chosen": -0.00011997703404631466, + "logps/rejected": -1.7408264875411987, + "loss": 1.1244, + "nll_loss": 0.28110799193382263, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1997703040833585e-05, + "rewards/margins": 0.17407065629959106, + "rewards/rejected": -0.1740826517343521, + "step": 8910 + }, + { + "epoch": 6.162517289073306, + "grad_norm": 8.353875160217285, + "learning_rate": 2.131934839403719e-05, + "log_odds_chosen": 10.599270820617676, + "log_odds_ratio": -0.0002233553968835622, + "logits/chosen": -0.5864130258560181, + "logits/rejected": -0.667081356048584, + "logps/chosen": -0.00040467121289111674, + "logps/rejected": -2.2740707397460938, + "loss": 0.737, + "nll_loss": 0.18422240018844604, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.04671227443032e-05, + "rewards/margins": 0.2273666113615036, + "rewards/rejected": -0.2274070680141449, + "step": 8911 + }, + { + "epoch": 6.163208852005533, + "grad_norm": 7.310943603515625, + "learning_rate": 2.1315506377747044e-05, + "log_odds_chosen": 9.501506805419922, + "log_odds_ratio": -0.000398534961277619, + "logits/chosen": -0.3367373049259186, + "logits/rejected": -0.38235363364219666, + "logps/chosen": -0.0006585284136235714, + "logps/rejected": -1.6440269947052002, + "loss": 0.5572, + "nll_loss": 0.13925069570541382, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.585283699678257e-05, + "rewards/margins": 0.16433684527873993, + "rewards/rejected": -0.16440270841121674, + "step": 8912 + }, + { + "epoch": 6.16390041493776, + "grad_norm": 5.013652324676514, + "learning_rate": 2.1311664361456893e-05, + "log_odds_chosen": 11.695579528808594, + "log_odds_ratio": -1.577230796101503e-05, + "logits/chosen": -0.5814932584762573, + "logits/rejected": -0.5627788305282593, + "logps/chosen": -0.00018127662769984454, + "logps/rejected": -2.960634231567383, + "loss": 0.6214, + "nll_loss": 0.155350923538208, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8127662769984454e-05, + "rewards/margins": 0.29604530334472656, + "rewards/rejected": -0.29606345295906067, + "step": 8913 + }, + { + "epoch": 6.1645919778699865, + "grad_norm": 9.031291007995605, + "learning_rate": 2.1307822345166746e-05, + "log_odds_chosen": 11.528639793395996, + "log_odds_ratio": -2.0978211978217587e-05, + "logits/chosen": -0.34808099269866943, + "logits/rejected": -0.4555736482143402, + "logps/chosen": -0.00028040894540026784, + "logps/rejected": -2.9485039710998535, + "loss": 1.3101, + "nll_loss": 0.3275301158428192, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.804089308483526e-05, + "rewards/margins": 0.2948223352432251, + "rewards/rejected": -0.2948504090309143, + "step": 8914 + }, + { + "epoch": 6.165283540802213, + "grad_norm": 5.164880752563477, + "learning_rate": 2.1303980328876598e-05, + "log_odds_chosen": 10.544919967651367, + "log_odds_ratio": -0.00013954663882032037, + "logits/chosen": 0.003693707287311554, + "logits/rejected": -0.03545793890953064, + "logps/chosen": -0.00025245817960239947, + "logps/rejected": -2.0628929138183594, + "loss": 0.462, + "nll_loss": 0.11548123508691788, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.524581941543147e-05, + "rewards/margins": 0.20626406371593475, + "rewards/rejected": -0.20628932118415833, + "step": 8915 + }, + { + "epoch": 6.16597510373444, + "grad_norm": 4.7397942543029785, + "learning_rate": 2.1300138312586447e-05, + "log_odds_chosen": 10.250405311584473, + "log_odds_ratio": -8.911825716495514e-05, + "logits/chosen": -0.23642602562904358, + "logits/rejected": -0.3013024628162384, + "logps/chosen": -0.0003032445383723825, + "logps/rejected": -1.7590906620025635, + "loss": 0.5796, + "nll_loss": 0.14488404989242554, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0324456020025536e-05, + "rewards/margins": 0.17587874829769135, + "rewards/rejected": -0.17590907216072083, + "step": 8916 + }, + { + "epoch": 6.166666666666667, + "grad_norm": 5.335708141326904, + "learning_rate": 2.1296296296296296e-05, + "log_odds_chosen": 10.717148780822754, + "log_odds_ratio": -7.697472028667107e-05, + "logits/chosen": -0.5630709528923035, + "logits/rejected": -0.6455743312835693, + "logps/chosen": -0.0003123174246866256, + "logps/rejected": -2.522401809692383, + "loss": 0.587, + "nll_loss": 0.14673317968845367, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.123174246866256e-05, + "rewards/margins": 0.252208948135376, + "rewards/rejected": -0.2522401809692383, + "step": 8917 + }, + { + "epoch": 6.167358229598894, + "grad_norm": 6.6418867111206055, + "learning_rate": 2.129245428000615e-05, + "log_odds_chosen": 10.8656587600708, + "log_odds_ratio": -0.00013898345059715211, + "logits/chosen": -0.466498464345932, + "logits/rejected": -0.48642832040786743, + "logps/chosen": -0.0003943238698411733, + "logps/rejected": -2.5864686965942383, + "loss": 0.8249, + "nll_loss": 0.20621107518672943, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9432390622096136e-05, + "rewards/margins": 0.2586074471473694, + "rewards/rejected": -0.2586469054222107, + "step": 8918 + }, + { + "epoch": 6.168049792531121, + "grad_norm": 7.2308526039123535, + "learning_rate": 2.1288612263715998e-05, + "log_odds_chosen": 10.071611404418945, + "log_odds_ratio": -0.0009543601772747934, + "logits/chosen": 0.01656036078929901, + "logits/rejected": -0.1201089546084404, + "logps/chosen": -0.0030811289325356483, + "logps/rejected": -2.486435651779175, + "loss": 0.8668, + "nll_loss": 0.2165965735912323, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00030811288161203265, + "rewards/margins": 0.24833545088768005, + "rewards/rejected": -0.24864357709884644, + "step": 8919 + }, + { + "epoch": 6.1687413554633475, + "grad_norm": 4.748799800872803, + "learning_rate": 2.128477024742585e-05, + "log_odds_chosen": 11.16125202178955, + "log_odds_ratio": -5.2723400585819036e-05, + "logits/chosen": -0.24413493275642395, + "logits/rejected": -0.3363977074623108, + "logps/chosen": -0.0002226789656560868, + "logps/rejected": -2.519413948059082, + "loss": 0.6246, + "nll_loss": 0.15613305568695068, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.226789729320444e-05, + "rewards/margins": 0.251919150352478, + "rewards/rejected": -0.25194138288497925, + "step": 8920 + }, + { + "epoch": 6.169432918395574, + "grad_norm": 12.29122257232666, + "learning_rate": 2.1280928231135703e-05, + "log_odds_chosen": 9.678821563720703, + "log_odds_ratio": -6.596092134714127e-05, + "logits/chosen": -0.59602952003479, + "logits/rejected": -0.5715896487236023, + "logps/chosen": -0.00031758646946400404, + "logps/rejected": -1.6542768478393555, + "loss": 0.4999, + "nll_loss": 0.12496354430913925, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.175864912918769e-05, + "rewards/margins": 0.16539591550827026, + "rewards/rejected": -0.16542768478393555, + "step": 8921 + }, + { + "epoch": 6.170124481327801, + "grad_norm": 6.445894718170166, + "learning_rate": 2.127708621484555e-05, + "log_odds_chosen": 11.29629898071289, + "log_odds_ratio": -2.9858012567274272e-05, + "logits/chosen": -0.3435760736465454, + "logits/rejected": -0.36505529284477234, + "logps/chosen": -0.0001979438675334677, + "logps/rejected": -2.4094908237457275, + "loss": 0.6538, + "nll_loss": 0.16345354914665222, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9794388208538294e-05, + "rewards/margins": 0.24092930555343628, + "rewards/rejected": -0.2409490942955017, + "step": 8922 + }, + { + "epoch": 6.170816044260028, + "grad_norm": 5.647541522979736, + "learning_rate": 2.1273244198555404e-05, + "log_odds_chosen": 11.45530891418457, + "log_odds_ratio": -1.723444256640505e-05, + "logits/chosen": -0.5780686736106873, + "logits/rejected": -0.6104984283447266, + "logps/chosen": -0.0002975270035676658, + "logps/rejected": -2.274785280227661, + "loss": 0.5488, + "nll_loss": 0.13718825578689575, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9752698537777178e-05, + "rewards/margins": 0.22744877636432648, + "rewards/rejected": -0.2274785190820694, + "step": 8923 + }, + { + "epoch": 6.171507607192255, + "grad_norm": 6.984696388244629, + "learning_rate": 2.1269402182265257e-05, + "log_odds_chosen": 10.067432403564453, + "log_odds_ratio": -0.0020822491496801376, + "logits/chosen": -0.58390873670578, + "logits/rejected": -0.6317079067230225, + "logps/chosen": -0.00180349953006953, + "logps/rejected": -1.6590774059295654, + "loss": 0.7266, + "nll_loss": 0.18145343661308289, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001803499471861869, + "rewards/margins": 0.1657274067401886, + "rewards/rejected": -0.16590775549411774, + "step": 8924 + }, + { + "epoch": 6.172199170124482, + "grad_norm": 6.403304576873779, + "learning_rate": 2.1265560165975106e-05, + "log_odds_chosen": 9.484448432922363, + "log_odds_ratio": -0.00029352630372159183, + "logits/chosen": -0.7287478446960449, + "logits/rejected": -0.7109928131103516, + "logps/chosen": -0.00045713261351920664, + "logps/rejected": -1.874360203742981, + "loss": 1.3442, + "nll_loss": 0.3360257148742676, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.571326280711219e-05, + "rewards/margins": 0.1873903125524521, + "rewards/rejected": -0.18743601441383362, + "step": 8925 + }, + { + "epoch": 6.172890733056708, + "grad_norm": 6.790854454040527, + "learning_rate": 2.1261718149684955e-05, + "log_odds_chosen": 11.074459075927734, + "log_odds_ratio": -3.4488293749745935e-05, + "logits/chosen": -0.1194690614938736, + "logits/rejected": -0.39182788133621216, + "logps/chosen": -0.00013691597268916667, + "logps/rejected": -2.0365371704101562, + "loss": 0.6756, + "nll_loss": 0.16889050602912903, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.369159872410819e-05, + "rewards/margins": 0.20364001393318176, + "rewards/rejected": -0.20365369319915771, + "step": 8926 + }, + { + "epoch": 6.173582295988935, + "grad_norm": 5.842673301696777, + "learning_rate": 2.1257876133394807e-05, + "log_odds_chosen": 11.106877326965332, + "log_odds_ratio": -0.00012637543841265142, + "logits/chosen": -0.42743033170700073, + "logits/rejected": -0.4144943952560425, + "logps/chosen": -0.00023457163479179144, + "logps/rejected": -2.5342657566070557, + "loss": 1.133, + "nll_loss": 0.28323131799697876, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3457163479179144e-05, + "rewards/margins": 0.2534031271934509, + "rewards/rejected": -0.25342658162117004, + "step": 8927 + }, + { + "epoch": 6.174273858921162, + "grad_norm": 5.868600845336914, + "learning_rate": 2.1254034117104656e-05, + "log_odds_chosen": 10.483613967895508, + "log_odds_ratio": -3.841664147330448e-05, + "logits/chosen": -0.6506422162055969, + "logits/rejected": -0.6948019862174988, + "logps/chosen": -0.0002064492437057197, + "logps/rejected": -1.6701231002807617, + "loss": 0.5622, + "nll_loss": 0.14053946733474731, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0644925825763494e-05, + "rewards/margins": 0.1669916808605194, + "rewards/rejected": -0.1670123189687729, + "step": 8928 + }, + { + "epoch": 6.174965421853389, + "grad_norm": 6.216475009918213, + "learning_rate": 2.125019210081451e-05, + "log_odds_chosen": 10.1587495803833, + "log_odds_ratio": -8.018967491807416e-05, + "logits/chosen": -0.1432342678308487, + "logits/rejected": -0.23333218693733215, + "logps/chosen": -0.000549984397366643, + "logps/rejected": -2.5280392169952393, + "loss": 0.9891, + "nll_loss": 0.24727122485637665, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4998439736664295e-05, + "rewards/margins": 0.25274893641471863, + "rewards/rejected": -0.2528039216995239, + "step": 8929 + }, + { + "epoch": 6.175656984785616, + "grad_norm": 9.768843650817871, + "learning_rate": 2.124635008452436e-05, + "log_odds_chosen": 12.131757736206055, + "log_odds_ratio": -7.5639613896782976e-06, + "logits/chosen": -0.8124226331710815, + "logits/rejected": -0.8450635671615601, + "logps/chosen": -9.760225657373667e-05, + "logps/rejected": -2.8064818382263184, + "loss": 0.7664, + "nll_loss": 0.19160333275794983, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.760226021171547e-06, + "rewards/margins": 0.2806384265422821, + "rewards/rejected": -0.28064820170402527, + "step": 8930 + }, + { + "epoch": 6.176348547717843, + "grad_norm": 4.7237629890441895, + "learning_rate": 2.124250806823421e-05, + "log_odds_chosen": 10.60220718383789, + "log_odds_ratio": -0.0011569132329896092, + "logits/chosen": 0.03953489661216736, + "logits/rejected": 0.026873737573623657, + "logps/chosen": -0.0003797906683757901, + "logps/rejected": -2.4322993755340576, + "loss": 0.7953, + "nll_loss": 0.19870004057884216, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.797906538238749e-05, + "rewards/margins": 0.24319197237491608, + "rewards/rejected": -0.2432299554347992, + "step": 8931 + }, + { + "epoch": 6.177040110650069, + "grad_norm": 11.197449684143066, + "learning_rate": 2.1238666051944063e-05, + "log_odds_chosen": 10.218611717224121, + "log_odds_ratio": -4.907567927148193e-05, + "logits/chosen": -0.0748477429151535, + "logits/rejected": -0.015666451305150986, + "logps/chosen": -0.0007873535505495965, + "logps/rejected": -2.362804889678955, + "loss": 0.6111, + "nll_loss": 0.1527731865644455, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.873535651015118e-05, + "rewards/margins": 0.23620177805423737, + "rewards/rejected": -0.23628050088882446, + "step": 8932 + }, + { + "epoch": 6.177731673582296, + "grad_norm": 4.892848491668701, + "learning_rate": 2.1234824035653915e-05, + "log_odds_chosen": 9.225906372070312, + "log_odds_ratio": -0.0011509408941492438, + "logits/chosen": -0.2720785140991211, + "logits/rejected": -0.1349216103553772, + "logps/chosen": -0.001067000557668507, + "logps/rejected": -1.2327617406845093, + "loss": 0.7489, + "nll_loss": 0.18709851801395416, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010670005576685071, + "rewards/margins": 0.12316948175430298, + "rewards/rejected": -0.12327618151903152, + "step": 8933 + }, + { + "epoch": 6.178423236514523, + "grad_norm": 4.760075569152832, + "learning_rate": 2.1230982019363764e-05, + "log_odds_chosen": 10.438307762145996, + "log_odds_ratio": -0.00014227713108994067, + "logits/chosen": -0.4747689962387085, + "logits/rejected": -0.5722922086715698, + "logps/chosen": -0.0007225346053019166, + "logps/rejected": -2.717395067214966, + "loss": 0.5699, + "nll_loss": 0.14247281849384308, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.225346780614927e-05, + "rewards/margins": 0.2716672420501709, + "rewards/rejected": -0.27173948287963867, + "step": 8934 + }, + { + "epoch": 6.17911479944675, + "grad_norm": 5.34441614151001, + "learning_rate": 2.1227140003073613e-05, + "log_odds_chosen": 10.103221893310547, + "log_odds_ratio": -0.00013670921907760203, + "logits/chosen": -0.5896474123001099, + "logits/rejected": -0.6704520583152771, + "logps/chosen": -0.0009348234161734581, + "logps/rejected": -1.964598536491394, + "loss": 0.6346, + "nll_loss": 0.1586480289697647, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.34823474381119e-05, + "rewards/margins": 0.19636636972427368, + "rewards/rejected": -0.19645985960960388, + "step": 8935 + }, + { + "epoch": 6.179806362378977, + "grad_norm": 7.0526933670043945, + "learning_rate": 2.1223297986783462e-05, + "log_odds_chosen": 11.831838607788086, + "log_odds_ratio": -1.501597034803126e-05, + "logits/chosen": -0.7381616830825806, + "logits/rejected": -0.796602725982666, + "logps/chosen": -0.00020313140703365207, + "logps/rejected": -2.7845511436462402, + "loss": 0.858, + "nll_loss": 0.21448718011379242, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.031314215855673e-05, + "rewards/margins": 0.2784348130226135, + "rewards/rejected": -0.27845510840415955, + "step": 8936 + }, + { + "epoch": 6.180497925311204, + "grad_norm": 10.067313194274902, + "learning_rate": 2.1219455970493315e-05, + "log_odds_chosen": 10.108768463134766, + "log_odds_ratio": -0.00024934698012657464, + "logits/chosen": -0.5302776098251343, + "logits/rejected": -0.5117073059082031, + "logps/chosen": -0.0007634575595147908, + "logps/rejected": -1.7113847732543945, + "loss": 0.6449, + "nll_loss": 0.16119928658008575, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.634575013071299e-05, + "rewards/margins": 0.1710621416568756, + "rewards/rejected": -0.1711384654045105, + "step": 8937 + }, + { + "epoch": 6.18118948824343, + "grad_norm": 9.377707481384277, + "learning_rate": 2.1215613954203167e-05, + "log_odds_chosen": 9.925009727478027, + "log_odds_ratio": -0.0011858758516609669, + "logits/chosen": -0.3536968231201172, + "logits/rejected": -0.45212236046791077, + "logps/chosen": -0.0005519436672329903, + "logps/rejected": -2.1006224155426025, + "loss": 1.1264, + "nll_loss": 0.2814798951148987, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.519437399925664e-05, + "rewards/margins": 0.21000702679157257, + "rewards/rejected": -0.21006223559379578, + "step": 8938 + }, + { + "epoch": 6.181881051175657, + "grad_norm": 7.41147518157959, + "learning_rate": 2.1211771937913016e-05, + "log_odds_chosen": 10.760738372802734, + "log_odds_ratio": -3.743385968846269e-05, + "logits/chosen": -0.44082483649253845, + "logits/rejected": -0.5385691523551941, + "logps/chosen": -0.00021795716020278633, + "logps/rejected": -2.0892586708068848, + "loss": 0.615, + "nll_loss": 0.15374669432640076, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1795716747874394e-05, + "rewards/margins": 0.20890408754348755, + "rewards/rejected": -0.20892588794231415, + "step": 8939 + }, + { + "epoch": 6.182572614107884, + "grad_norm": 5.886544227600098, + "learning_rate": 2.120792992162287e-05, + "log_odds_chosen": 9.831256866455078, + "log_odds_ratio": -0.000369185465388, + "logits/chosen": -0.6895782351493835, + "logits/rejected": -0.7876423597335815, + "logps/chosen": -0.0003805963206104934, + "logps/rejected": -1.545591115951538, + "loss": 0.6755, + "nll_loss": 0.16883057355880737, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.805962478509173e-05, + "rewards/margins": 0.15452106297016144, + "rewards/rejected": -0.15455910563468933, + "step": 8940 + }, + { + "epoch": 6.183264177040111, + "grad_norm": 7.385894298553467, + "learning_rate": 2.120408790533272e-05, + "log_odds_chosen": 11.309425354003906, + "log_odds_ratio": -2.0229526853654534e-05, + "logits/chosen": 0.1541682481765747, + "logits/rejected": 0.07405371963977814, + "logps/chosen": -0.00015169157995842397, + "logps/rejected": -2.2509653568267822, + "loss": 1.0316, + "nll_loss": 0.25790539383888245, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5169158359640278e-05, + "rewards/margins": 0.22508138418197632, + "rewards/rejected": -0.22509652376174927, + "step": 8941 + }, + { + "epoch": 6.183955739972338, + "grad_norm": 22.69285774230957, + "learning_rate": 2.120024588904257e-05, + "log_odds_chosen": 11.588238716125488, + "log_odds_ratio": -4.003260255558416e-05, + "logits/chosen": -0.0912555530667305, + "logits/rejected": -0.20591235160827637, + "logps/chosen": -0.00015075062401592731, + "logps/rejected": -2.546660900115967, + "loss": 0.6722, + "nll_loss": 0.16805234551429749, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5075062947289553e-05, + "rewards/margins": 0.2546510100364685, + "rewards/rejected": -0.2546660602092743, + "step": 8942 + }, + { + "epoch": 6.1846473029045645, + "grad_norm": 5.821830749511719, + "learning_rate": 2.1196403872752422e-05, + "log_odds_chosen": 11.451372146606445, + "log_odds_ratio": -2.8443888368201442e-05, + "logits/chosen": -0.44433489441871643, + "logits/rejected": -0.5518810153007507, + "logps/chosen": -0.00010296277469024062, + "logps/rejected": -2.2659056186676025, + "loss": 0.6998, + "nll_loss": 0.17494189739227295, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.02962767414283e-05, + "rewards/margins": 0.22658026218414307, + "rewards/rejected": -0.22659055888652802, + "step": 8943 + }, + { + "epoch": 6.185338865836791, + "grad_norm": 5.573258876800537, + "learning_rate": 2.119256185646227e-05, + "log_odds_chosen": 10.505191802978516, + "log_odds_ratio": -8.956159581430256e-05, + "logits/chosen": -0.5057865381240845, + "logits/rejected": -0.5452699661254883, + "logps/chosen": -0.00032347580417990685, + "logps/rejected": -2.196836471557617, + "loss": 0.4975, + "nll_loss": 0.12436286360025406, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.234758332837373e-05, + "rewards/margins": 0.21965131163597107, + "rewards/rejected": -0.21968364715576172, + "step": 8944 + }, + { + "epoch": 6.186030428769018, + "grad_norm": 8.397379875183105, + "learning_rate": 2.118871984017212e-05, + "log_odds_chosen": 10.539834976196289, + "log_odds_ratio": -6.860620487714186e-05, + "logits/chosen": -0.0445815809071064, + "logits/rejected": -0.060250017791986465, + "logps/chosen": -0.00033489393536001444, + "logps/rejected": -2.3347554206848145, + "loss": 0.6105, + "nll_loss": 0.15261012315750122, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3489399356767535e-05, + "rewards/margins": 0.2334420382976532, + "rewards/rejected": -0.23347553610801697, + "step": 8945 + }, + { + "epoch": 6.186721991701245, + "grad_norm": 25.599557876586914, + "learning_rate": 2.1184877823881973e-05, + "log_odds_chosen": 10.395254135131836, + "log_odds_ratio": -7.127891149139032e-05, + "logits/chosen": -0.8309057950973511, + "logits/rejected": -0.9776581525802612, + "logps/chosen": -0.00036339106736704707, + "logps/rejected": -1.8712868690490723, + "loss": 0.6645, + "nll_loss": 0.16611744463443756, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.633910819189623e-05, + "rewards/margins": 0.1870923489332199, + "rewards/rejected": -0.1871286928653717, + "step": 8946 + }, + { + "epoch": 6.187413554633472, + "grad_norm": 8.522847175598145, + "learning_rate": 2.1181035807591825e-05, + "log_odds_chosen": 11.368314743041992, + "log_odds_ratio": -2.3690898160566576e-05, + "logits/chosen": -0.7033950090408325, + "logits/rejected": -0.5988879203796387, + "logps/chosen": -0.00023206671176012605, + "logps/rejected": -2.6347384452819824, + "loss": 0.6913, + "nll_loss": 0.17283479869365692, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3206672267406248e-05, + "rewards/margins": 0.26345065236091614, + "rewards/rejected": -0.26347386837005615, + "step": 8947 + }, + { + "epoch": 6.188105117565699, + "grad_norm": 6.687798976898193, + "learning_rate": 2.1177193791301674e-05, + "log_odds_chosen": 10.457347869873047, + "log_odds_ratio": -9.949246305041015e-05, + "logits/chosen": -0.6181915402412415, + "logits/rejected": -0.5568039417266846, + "logps/chosen": -0.0002464308054186404, + "logps/rejected": -1.782206654548645, + "loss": 1.0987, + "nll_loss": 0.2746756970882416, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4643079086672515e-05, + "rewards/margins": 0.1781960129737854, + "rewards/rejected": -0.17822065949440002, + "step": 8948 + }, + { + "epoch": 6.1887966804979255, + "grad_norm": 5.763555526733398, + "learning_rate": 2.1173351775011527e-05, + "log_odds_chosen": 9.946380615234375, + "log_odds_ratio": -7.830550021026284e-05, + "logits/chosen": -0.42581579089164734, + "logits/rejected": -0.44059014320373535, + "logps/chosen": -0.001672828570008278, + "logps/rejected": -1.8614200353622437, + "loss": 0.7887, + "nll_loss": 0.1971677839756012, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001672828511800617, + "rewards/margins": 0.18597471714019775, + "rewards/rejected": -0.1861419975757599, + "step": 8949 + }, + { + "epoch": 6.189488243430152, + "grad_norm": 5.339982509613037, + "learning_rate": 2.116950975872138e-05, + "log_odds_chosen": 10.638383865356445, + "log_odds_ratio": -0.0004364659544080496, + "logits/chosen": -0.5033034086227417, + "logits/rejected": -0.5210574269294739, + "logps/chosen": -0.00019398736185394228, + "logps/rejected": -2.3503217697143555, + "loss": 0.6866, + "nll_loss": 0.1716083437204361, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9398736185394228e-05, + "rewards/margins": 0.23501276969909668, + "rewards/rejected": -0.23503217101097107, + "step": 8950 + }, + { + "epoch": 6.190179806362379, + "grad_norm": 5.419443130493164, + "learning_rate": 2.116566774243123e-05, + "log_odds_chosen": 11.213875770568848, + "log_odds_ratio": -2.873012635973282e-05, + "logits/chosen": -1.0778733491897583, + "logits/rejected": -1.0934827327728271, + "logps/chosen": -0.0009469868382439017, + "logps/rejected": -2.755531072616577, + "loss": 0.6279, + "nll_loss": 0.15698230266571045, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.469868382439017e-05, + "rewards/margins": 0.275458425283432, + "rewards/rejected": -0.2755531072616577, + "step": 8951 + }, + { + "epoch": 6.190871369294606, + "grad_norm": 9.115525245666504, + "learning_rate": 2.116182572614108e-05, + "log_odds_chosen": 11.05650806427002, + "log_odds_ratio": -0.00020289364329073578, + "logits/chosen": -0.9041266441345215, + "logits/rejected": -0.829599916934967, + "logps/chosen": -0.00024038890842348337, + "logps/rejected": -2.259377956390381, + "loss": 0.847, + "nll_loss": 0.21172687411308289, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4038890842348337e-05, + "rewards/margins": 0.22591374814510345, + "rewards/rejected": -0.22593779861927032, + "step": 8952 + }, + { + "epoch": 6.191562932226833, + "grad_norm": 6.934108257293701, + "learning_rate": 2.115798370985093e-05, + "log_odds_chosen": 9.981086730957031, + "log_odds_ratio": -0.0009031962836161256, + "logits/chosen": -0.5584157705307007, + "logits/rejected": -0.6889655590057373, + "logps/chosen": -0.0022818795405328274, + "logps/rejected": -1.761110782623291, + "loss": 0.7491, + "nll_loss": 0.18718641996383667, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000228187971515581, + "rewards/margins": 0.17588287591934204, + "rewards/rejected": -0.17611107230186462, + "step": 8953 + }, + { + "epoch": 6.19225449515906, + "grad_norm": 5.375219821929932, + "learning_rate": 2.115414169356078e-05, + "log_odds_chosen": 10.997356414794922, + "log_odds_ratio": -3.314020068501122e-05, + "logits/chosen": -0.620721697807312, + "logits/rejected": -0.6426808834075928, + "logps/chosen": -0.00013396151189226657, + "logps/rejected": -1.832876443862915, + "loss": 0.4577, + "nll_loss": 0.11442224681377411, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3396151189226657e-05, + "rewards/margins": 0.1832742542028427, + "rewards/rejected": -0.1832876354455948, + "step": 8954 + }, + { + "epoch": 6.1929460580912865, + "grad_norm": 5.435145378112793, + "learning_rate": 2.115029967727063e-05, + "log_odds_chosen": 10.449785232543945, + "log_odds_ratio": -5.887317456654273e-05, + "logits/chosen": -0.18803539872169495, + "logits/rejected": -0.3863428831100464, + "logps/chosen": -0.00021623042994178832, + "logps/rejected": -1.649173378944397, + "loss": 0.881, + "nll_loss": 0.22024330496788025, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1623043721774593e-05, + "rewards/margins": 0.16489571332931519, + "rewards/rejected": -0.16491734981536865, + "step": 8955 + }, + { + "epoch": 6.193637621023513, + "grad_norm": 7.4757399559021, + "learning_rate": 2.1146457660980484e-05, + "log_odds_chosen": 11.041566848754883, + "log_odds_ratio": -0.00036361149977892637, + "logits/chosen": -0.3361101746559143, + "logits/rejected": -0.3597390055656433, + "logps/chosen": -0.000462493859231472, + "logps/rejected": -2.7006239891052246, + "loss": 0.6115, + "nll_loss": 0.1528266966342926, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.624938446795568e-05, + "rewards/margins": 0.2700161635875702, + "rewards/rejected": -0.2700624167919159, + "step": 8956 + }, + { + "epoch": 6.19432918395574, + "grad_norm": 6.892910957336426, + "learning_rate": 2.1142615644690333e-05, + "log_odds_chosen": 10.666043281555176, + "log_odds_ratio": -9.158872853731737e-05, + "logits/chosen": -0.44387686252593994, + "logits/rejected": -0.589409589767456, + "logps/chosen": -0.0003314261557534337, + "logps/rejected": -2.5575907230377197, + "loss": 0.8809, + "nll_loss": 0.22020521759986877, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.314261994091794e-05, + "rewards/margins": 0.2557259202003479, + "rewards/rejected": -0.255759060382843, + "step": 8957 + }, + { + "epoch": 6.195020746887967, + "grad_norm": 4.018488883972168, + "learning_rate": 2.1138773628400185e-05, + "log_odds_chosen": 11.348909378051758, + "log_odds_ratio": -3.268062573624775e-05, + "logits/chosen": -0.5060147643089294, + "logits/rejected": -0.6009031534194946, + "logps/chosen": -0.00012915278784930706, + "logps/rejected": -2.4707422256469727, + "loss": 0.336, + "nll_loss": 0.08399759232997894, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2915278603031766e-05, + "rewards/margins": 0.24706131219863892, + "rewards/rejected": -0.2470742166042328, + "step": 8958 + }, + { + "epoch": 6.195712309820194, + "grad_norm": 9.816084861755371, + "learning_rate": 2.1134931612110038e-05, + "log_odds_chosen": 10.38204574584961, + "log_odds_ratio": -0.00014280746108852327, + "logits/chosen": -0.5085594058036804, + "logits/rejected": -0.5785856246948242, + "logps/chosen": -0.0004231697239447385, + "logps/rejected": -2.2826638221740723, + "loss": 0.8793, + "nll_loss": 0.2198125571012497, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2316969484090805e-05, + "rewards/margins": 0.22822408378124237, + "rewards/rejected": -0.2282664030790329, + "step": 8959 + }, + { + "epoch": 6.196403872752421, + "grad_norm": 6.990175724029541, + "learning_rate": 2.1131089595819887e-05, + "log_odds_chosen": 10.67991828918457, + "log_odds_ratio": -5.843305916641839e-05, + "logits/chosen": -0.2339039295911789, + "logits/rejected": -0.31213295459747314, + "logps/chosen": -0.00019437081937212497, + "logps/rejected": -2.1106085777282715, + "loss": 0.6733, + "nll_loss": 0.16832423210144043, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9437082301010378e-05, + "rewards/margins": 0.2110414206981659, + "rewards/rejected": -0.21106085181236267, + "step": 8960 + }, + { + "epoch": 6.1970954356846475, + "grad_norm": 5.311736583709717, + "learning_rate": 2.112724757952974e-05, + "log_odds_chosen": 10.271112442016602, + "log_odds_ratio": -0.00021918118000030518, + "logits/chosen": -0.5048193335533142, + "logits/rejected": -0.5501112937927246, + "logps/chosen": -0.00027754349866881967, + "logps/rejected": -2.03511643409729, + "loss": 0.5808, + "nll_loss": 0.14516626298427582, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.775435132207349e-05, + "rewards/margins": 0.20348387956619263, + "rewards/rejected": -0.20351164042949677, + "step": 8961 + }, + { + "epoch": 6.197786998616874, + "grad_norm": 6.911527156829834, + "learning_rate": 2.112340556323959e-05, + "log_odds_chosen": 10.46664810180664, + "log_odds_ratio": -3.2967760489555076e-05, + "logits/chosen": -0.678046464920044, + "logits/rejected": -0.6764953136444092, + "logps/chosen": -0.0002369165886193514, + "logps/rejected": -1.7433416843414307, + "loss": 0.7735, + "nll_loss": 0.19336023926734924, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.369166031712666e-05, + "rewards/margins": 0.17431046068668365, + "rewards/rejected": -0.17433416843414307, + "step": 8962 + }, + { + "epoch": 6.198478561549101, + "grad_norm": 12.984915733337402, + "learning_rate": 2.1119563546949437e-05, + "log_odds_chosen": 10.639669418334961, + "log_odds_ratio": -0.00014092384662944824, + "logits/chosen": -0.5126928687095642, + "logits/rejected": -0.6322792768478394, + "logps/chosen": -0.0002208442019764334, + "logps/rejected": -1.886051893234253, + "loss": 1.7345, + "nll_loss": 0.4336107075214386, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2084421289036982e-05, + "rewards/margins": 0.188583105802536, + "rewards/rejected": -0.1886051893234253, + "step": 8963 + }, + { + "epoch": 6.199170124481328, + "grad_norm": 5.063332557678223, + "learning_rate": 2.111572153065929e-05, + "log_odds_chosen": 11.39436149597168, + "log_odds_ratio": -5.77733080717735e-05, + "logits/chosen": -0.39494332671165466, + "logits/rejected": -0.5832082629203796, + "logps/chosen": -0.00023869842698331922, + "logps/rejected": -2.9476852416992188, + "loss": 0.429, + "nll_loss": 0.10723817348480225, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3869844881119207e-05, + "rewards/margins": 0.2947446405887604, + "rewards/rejected": -0.2947685122489929, + "step": 8964 + }, + { + "epoch": 6.199861687413555, + "grad_norm": 8.189932823181152, + "learning_rate": 2.1111879514369142e-05, + "log_odds_chosen": 10.68580150604248, + "log_odds_ratio": -0.00010645938164088875, + "logits/chosen": -0.6137702465057373, + "logits/rejected": -0.6555781364440918, + "logps/chosen": -0.0002459251263644546, + "logps/rejected": -2.07014799118042, + "loss": 0.4549, + "nll_loss": 0.11371321231126785, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4592513000243343e-05, + "rewards/margins": 0.20699021220207214, + "rewards/rejected": -0.207014799118042, + "step": 8965 + }, + { + "epoch": 6.200553250345782, + "grad_norm": 5.458976745605469, + "learning_rate": 2.110803749807899e-05, + "log_odds_chosen": 9.840924263000488, + "log_odds_ratio": -0.0001311297673964873, + "logits/chosen": -0.5520514249801636, + "logits/rejected": -0.5865632891654968, + "logps/chosen": -0.0007088709389790893, + "logps/rejected": -1.6935031414031982, + "loss": 0.6038, + "nll_loss": 0.15092788636684418, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.088709389790893e-05, + "rewards/margins": 0.16927942633628845, + "rewards/rejected": -0.16935031116008759, + "step": 8966 + }, + { + "epoch": 6.2012448132780085, + "grad_norm": 5.70952033996582, + "learning_rate": 2.1104195481788844e-05, + "log_odds_chosen": 8.883814811706543, + "log_odds_ratio": -0.0009645810350775719, + "logits/chosen": -0.1132664605975151, + "logits/rejected": -0.18594291806221008, + "logps/chosen": -0.0007536731427535415, + "logps/rejected": -1.6175205707550049, + "loss": 0.6865, + "nll_loss": 0.17151714861392975, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.536730845458806e-05, + "rewards/margins": 0.16167670488357544, + "rewards/rejected": -0.16175207495689392, + "step": 8967 + }, + { + "epoch": 6.201936376210235, + "grad_norm": 4.402467250823975, + "learning_rate": 2.1100353465498696e-05, + "log_odds_chosen": 9.699943542480469, + "log_odds_ratio": -0.003861078992486, + "logits/chosen": -0.5733868479728699, + "logits/rejected": -0.6014716625213623, + "logps/chosen": -0.002426671562716365, + "logps/rejected": -1.5587421655654907, + "loss": 0.5523, + "nll_loss": 0.13767722249031067, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00024266715627163649, + "rewards/margins": 0.15563154220581055, + "rewards/rejected": -0.15587422251701355, + "step": 8968 + }, + { + "epoch": 6.202627939142462, + "grad_norm": 8.179895401000977, + "learning_rate": 2.1096511449208545e-05, + "log_odds_chosen": 11.282783508300781, + "log_odds_ratio": -2.2567839550902136e-05, + "logits/chosen": -0.29590824246406555, + "logits/rejected": -0.33641281723976135, + "logps/chosen": -0.0003832974180113524, + "logps/rejected": -2.8411865234375, + "loss": 0.5058, + "nll_loss": 0.12644313275814056, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.832974107353948e-05, + "rewards/margins": 0.2840803265571594, + "rewards/rejected": -0.28411865234375, + "step": 8969 + }, + { + "epoch": 6.203319502074689, + "grad_norm": 6.13981819152832, + "learning_rate": 2.1092669432918398e-05, + "log_odds_chosen": 10.735755920410156, + "log_odds_ratio": -4.915258614346385e-05, + "logits/chosen": -0.25060558319091797, + "logits/rejected": -0.3601216673851013, + "logps/chosen": -0.000244683149503544, + "logps/rejected": -2.069368600845337, + "loss": 0.5323, + "nll_loss": 0.1330815702676773, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.446831058477983e-05, + "rewards/margins": 0.20691239833831787, + "rewards/rejected": -0.20693686604499817, + "step": 8970 + }, + { + "epoch": 6.204011065006916, + "grad_norm": 5.527590751647949, + "learning_rate": 2.1088827416628247e-05, + "log_odds_chosen": 10.561314582824707, + "log_odds_ratio": -7.101245137164369e-05, + "logits/chosen": 0.0811053216457367, + "logits/rejected": -0.15598750114440918, + "logps/chosen": -0.000390547385904938, + "logps/rejected": -2.09092378616333, + "loss": 1.0619, + "nll_loss": 0.2654751241207123, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.905473931808956e-05, + "rewards/margins": 0.20905332267284393, + "rewards/rejected": -0.209092378616333, + "step": 8971 + }, + { + "epoch": 6.204702627939143, + "grad_norm": 10.170967102050781, + "learning_rate": 2.1084985400338096e-05, + "log_odds_chosen": 10.451860427856445, + "log_odds_ratio": -0.00031936095911078155, + "logits/chosen": -0.2732451856136322, + "logits/rejected": -0.36096101999282837, + "logps/chosen": -0.0005754973972216249, + "logps/rejected": -2.381434440612793, + "loss": 0.5891, + "nll_loss": 0.14723119139671326, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7549739722162485e-05, + "rewards/margins": 0.23808589577674866, + "rewards/rejected": -0.2381434589624405, + "step": 8972 + }, + { + "epoch": 6.2053941908713695, + "grad_norm": 4.112435817718506, + "learning_rate": 2.1081143384047948e-05, + "log_odds_chosen": 9.853161811828613, + "log_odds_ratio": -0.00031163229141384363, + "logits/chosen": -0.5074654221534729, + "logits/rejected": -0.43558061122894287, + "logps/chosen": -0.0003113880520686507, + "logps/rejected": -1.5286929607391357, + "loss": 0.6685, + "nll_loss": 0.16709870100021362, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1138806662056595e-05, + "rewards/margins": 0.15283815562725067, + "rewards/rejected": -0.15286928415298462, + "step": 8973 + }, + { + "epoch": 6.206085753803596, + "grad_norm": 4.837614059448242, + "learning_rate": 2.10773013677578e-05, + "log_odds_chosen": 9.713627815246582, + "log_odds_ratio": -0.0007801411557011306, + "logits/chosen": -0.37643712759017944, + "logits/rejected": -0.4500851631164551, + "logps/chosen": -0.00041137493099085987, + "logps/rejected": -2.01298189163208, + "loss": 0.6282, + "nll_loss": 0.15697824954986572, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.113749309908599e-05, + "rewards/margins": 0.20125705003738403, + "rewards/rejected": -0.20129819214344025, + "step": 8974 + }, + { + "epoch": 6.206777316735823, + "grad_norm": 5.911776065826416, + "learning_rate": 2.107345935146765e-05, + "log_odds_chosen": 10.234687805175781, + "log_odds_ratio": -0.0010891073616221547, + "logits/chosen": -0.4068930447101593, + "logits/rejected": -0.47032034397125244, + "logps/chosen": -0.008021952584385872, + "logps/rejected": -2.307753562927246, + "loss": 0.6201, + "nll_loss": 0.15490767359733582, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008021953399293125, + "rewards/margins": 0.22997316718101501, + "rewards/rejected": -0.2307753562927246, + "step": 8975 + }, + { + "epoch": 6.20746887966805, + "grad_norm": 9.897012710571289, + "learning_rate": 2.1069617335177502e-05, + "log_odds_chosen": 10.247870445251465, + "log_odds_ratio": -0.00021053629461675882, + "logits/chosen": -0.21756838262081146, + "logits/rejected": -0.35750874876976013, + "logps/chosen": -0.0004833069397136569, + "logps/rejected": -2.3169853687286377, + "loss": 0.8337, + "nll_loss": 0.2084033489227295, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.833069397136569e-05, + "rewards/margins": 0.2316502332687378, + "rewards/rejected": -0.23169854283332825, + "step": 8976 + }, + { + "epoch": 6.208160442600277, + "grad_norm": 6.505767345428467, + "learning_rate": 2.1065775318887355e-05, + "log_odds_chosen": 10.968399047851562, + "log_odds_ratio": -0.00010166892025154084, + "logits/chosen": -0.1739257425069809, + "logits/rejected": -0.3365812301635742, + "logps/chosen": -0.00034486784716136754, + "logps/rejected": -2.477846145629883, + "loss": 0.4064, + "nll_loss": 0.10159460455179214, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.44867876265198e-05, + "rewards/margins": 0.2477501481771469, + "rewards/rejected": -0.24778464436531067, + "step": 8977 + }, + { + "epoch": 6.208852005532504, + "grad_norm": 5.0093674659729, + "learning_rate": 2.1061933302597204e-05, + "log_odds_chosen": 10.247230529785156, + "log_odds_ratio": -0.00017742854834068567, + "logits/chosen": -0.27696970105171204, + "logits/rejected": -0.384748637676239, + "logps/chosen": -0.0005147414631210268, + "logps/rejected": -2.2932639122009277, + "loss": 0.5645, + "nll_loss": 0.14109958708286285, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.14741477672942e-05, + "rewards/margins": 0.2292748987674713, + "rewards/rejected": -0.22932636737823486, + "step": 8978 + }, + { + "epoch": 6.20954356846473, + "grad_norm": 11.902543067932129, + "learning_rate": 2.1058091286307056e-05, + "log_odds_chosen": 10.680965423583984, + "log_odds_ratio": -0.00011109773186035454, + "logits/chosen": -0.16623735427856445, + "logits/rejected": -0.2329682856798172, + "logps/chosen": -0.001161636901088059, + "logps/rejected": -3.0028746128082275, + "loss": 0.6431, + "nll_loss": 0.16075670719146729, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011616369738476351, + "rewards/margins": 0.30017128586769104, + "rewards/rejected": -0.30028748512268066, + "step": 8979 + }, + { + "epoch": 6.210235131396957, + "grad_norm": 8.642373085021973, + "learning_rate": 2.105424927001691e-05, + "log_odds_chosen": 10.992445945739746, + "log_odds_ratio": -0.0002544039161875844, + "logits/chosen": -0.2466789186000824, + "logits/rejected": -0.2625923752784729, + "logps/chosen": -0.0005904433783143759, + "logps/rejected": -2.307201385498047, + "loss": 0.5631, + "nll_loss": 0.1407500058412552, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.904433419345878e-05, + "rewards/margins": 0.23066109418869019, + "rewards/rejected": -0.2307201474905014, + "step": 8980 + }, + { + "epoch": 6.210926694329184, + "grad_norm": 5.6907172203063965, + "learning_rate": 2.1050407253726758e-05, + "log_odds_chosen": 10.472320556640625, + "log_odds_ratio": -0.00016914596199057996, + "logits/chosen": -0.727494478225708, + "logits/rejected": -0.7286410331726074, + "logps/chosen": -0.0006767577142454684, + "logps/rejected": -2.395904779434204, + "loss": 0.6677, + "nll_loss": 0.16691061854362488, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.767576996935531e-05, + "rewards/margins": 0.2395228147506714, + "rewards/rejected": -0.23959049582481384, + "step": 8981 + }, + { + "epoch": 6.211618257261411, + "grad_norm": 6.281667232513428, + "learning_rate": 2.1046565237436607e-05, + "log_odds_chosen": 11.036178588867188, + "log_odds_ratio": -5.01195972901769e-05, + "logits/chosen": -0.2629799544811249, + "logits/rejected": -0.3964104950428009, + "logps/chosen": -0.0003912192478310317, + "logps/rejected": -2.7341060638427734, + "loss": 0.6457, + "nll_loss": 0.16142527759075165, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.912192187272012e-05, + "rewards/margins": 0.27337148785591125, + "rewards/rejected": -0.2734106183052063, + "step": 8982 + }, + { + "epoch": 6.212309820193638, + "grad_norm": 7.363375186920166, + "learning_rate": 2.104272322114646e-05, + "log_odds_chosen": 11.015447616577148, + "log_odds_ratio": -3.7999296182533726e-05, + "logits/chosen": -0.6886452436447144, + "logits/rejected": -0.7008345127105713, + "logps/chosen": -0.0001938598434207961, + "logps/rejected": -2.3993420600891113, + "loss": 0.7438, + "nll_loss": 0.18593871593475342, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.938598506967537e-05, + "rewards/margins": 0.23991483449935913, + "rewards/rejected": -0.23993422091007233, + "step": 8983 + }, + { + "epoch": 6.213001383125865, + "grad_norm": 4.0122971534729, + "learning_rate": 2.1038881204856308e-05, + "log_odds_chosen": 10.397682189941406, + "log_odds_ratio": -0.00024126411881297827, + "logits/chosen": -0.46353641152381897, + "logits/rejected": -0.4780619740486145, + "logps/chosen": -0.00030690658604726195, + "logps/rejected": -2.0208168029785156, + "loss": 0.5212, + "nll_loss": 0.13026946783065796, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.069065496674739e-05, + "rewards/margins": 0.20205096900463104, + "rewards/rejected": -0.20208168029785156, + "step": 8984 + }, + { + "epoch": 6.213692946058091, + "grad_norm": 5.429798126220703, + "learning_rate": 2.103503918856616e-05, + "log_odds_chosen": 11.59724235534668, + "log_odds_ratio": -1.5820773114683107e-05, + "logits/chosen": -0.38226318359375, + "logits/rejected": -0.5154070258140564, + "logps/chosen": -0.0003342315030749887, + "logps/rejected": -2.710653305053711, + "loss": 0.4582, + "nll_loss": 0.11453854292631149, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3423151762690395e-05, + "rewards/margins": 0.2710318863391876, + "rewards/rejected": -0.2710653245449066, + "step": 8985 + }, + { + "epoch": 6.214384508990318, + "grad_norm": 6.813840866088867, + "learning_rate": 2.1031197172276013e-05, + "log_odds_chosen": 9.728269577026367, + "log_odds_ratio": -0.0004124910046812147, + "logits/chosen": -0.5749183297157288, + "logits/rejected": -0.5533679723739624, + "logps/chosen": -0.004407223779708147, + "logps/rejected": -1.9411263465881348, + "loss": 0.5482, + "nll_loss": 0.13700014352798462, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004407223896123469, + "rewards/margins": 0.19367189705371857, + "rewards/rejected": -0.194112628698349, + "step": 8986 + }, + { + "epoch": 6.215076071922545, + "grad_norm": 10.740469932556152, + "learning_rate": 2.1027355155985862e-05, + "log_odds_chosen": 9.602537155151367, + "log_odds_ratio": -0.014514979906380177, + "logits/chosen": -0.5719044208526611, + "logits/rejected": -0.700533390045166, + "logps/chosen": -0.010756929405033588, + "logps/rejected": -1.8883540630340576, + "loss": 1.2422, + "nll_loss": 0.309101939201355, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010756929405033588, + "rewards/margins": 0.18775972723960876, + "rewards/rejected": -0.18883541226387024, + "step": 8987 + }, + { + "epoch": 6.215767634854772, + "grad_norm": 5.412166118621826, + "learning_rate": 2.1023513139695715e-05, + "log_odds_chosen": 10.697986602783203, + "log_odds_ratio": -4.1972598410211504e-05, + "logits/chosen": -0.3933907449245453, + "logits/rejected": -0.3781155049800873, + "logps/chosen": -0.0002012891782214865, + "logps/rejected": -2.0933737754821777, + "loss": 0.5938, + "nll_loss": 0.14844883978366852, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.012891673075501e-05, + "rewards/margins": 0.20931726694107056, + "rewards/rejected": -0.20933738350868225, + "step": 8988 + }, + { + "epoch": 6.216459197786999, + "grad_norm": 6.379223346710205, + "learning_rate": 2.1019671123405567e-05, + "log_odds_chosen": 10.370619773864746, + "log_odds_ratio": -0.00026047302526421845, + "logits/chosen": -0.5298944711685181, + "logits/rejected": -0.5091612339019775, + "logps/chosen": -0.00016024197975639254, + "logps/rejected": -1.7236460447311401, + "loss": 0.9999, + "nll_loss": 0.24995876848697662, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6024197975639254e-05, + "rewards/margins": 0.17234858870506287, + "rewards/rejected": -0.17236460745334625, + "step": 8989 + }, + { + "epoch": 6.217150760719226, + "grad_norm": 6.144903659820557, + "learning_rate": 2.1015829107115416e-05, + "log_odds_chosen": 10.349257469177246, + "log_odds_ratio": -0.00021842276328243315, + "logits/chosen": -0.8314346075057983, + "logits/rejected": -0.7988418936729431, + "logps/chosen": -0.000315680488711223, + "logps/rejected": -2.2892818450927734, + "loss": 0.614, + "nll_loss": 0.1534736156463623, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1568051781505346e-05, + "rewards/margins": 0.22889664769172668, + "rewards/rejected": -0.22892819344997406, + "step": 8990 + }, + { + "epoch": 6.217842323651452, + "grad_norm": 7.017372131347656, + "learning_rate": 2.1011987090825265e-05, + "log_odds_chosen": 10.174150466918945, + "log_odds_ratio": -0.00014968191680964082, + "logits/chosen": -0.7089210748672485, + "logits/rejected": -0.7370721697807312, + "logps/chosen": -0.000289241987047717, + "logps/rejected": -1.7138595581054688, + "loss": 0.332, + "nll_loss": 0.08299598097801208, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8924197977175936e-05, + "rewards/margins": 0.17135705053806305, + "rewards/rejected": -0.1713859736919403, + "step": 8991 + }, + { + "epoch": 6.218533886583679, + "grad_norm": 6.155914783477783, + "learning_rate": 2.1008145074535118e-05, + "log_odds_chosen": 10.125147819519043, + "log_odds_ratio": -9.33370174607262e-05, + "logits/chosen": -0.5350738763809204, + "logits/rejected": -0.6198149919509888, + "logps/chosen": -0.000489847909193486, + "logps/rejected": -1.8168946504592896, + "loss": 0.7029, + "nll_loss": 0.17571145296096802, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.898479164694436e-05, + "rewards/margins": 0.18164049088954926, + "rewards/rejected": -0.18168947100639343, + "step": 8992 + }, + { + "epoch": 6.219225449515906, + "grad_norm": 8.826537132263184, + "learning_rate": 2.1004303058244967e-05, + "log_odds_chosen": 10.819112777709961, + "log_odds_ratio": -3.925442433683202e-05, + "logits/chosen": -0.8494081497192383, + "logits/rejected": -0.8767988681793213, + "logps/chosen": -0.0004170122556388378, + "logps/rejected": -2.2485222816467285, + "loss": 0.608, + "nll_loss": 0.15199565887451172, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1701227019075304e-05, + "rewards/margins": 0.22481051087379456, + "rewards/rejected": -0.22485221922397614, + "step": 8993 + }, + { + "epoch": 6.219917012448133, + "grad_norm": 11.54941463470459, + "learning_rate": 2.100046104195482e-05, + "log_odds_chosen": 10.90658187866211, + "log_odds_ratio": -9.246320405509323e-05, + "logits/chosen": -0.5866506099700928, + "logits/rejected": -0.6891856789588928, + "logps/chosen": -0.0002085501328110695, + "logps/rejected": -2.233708143234253, + "loss": 0.5723, + "nll_loss": 0.1430549919605255, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0855015463894233e-05, + "rewards/margins": 0.22334995865821838, + "rewards/rejected": -0.22337080538272858, + "step": 8994 + }, + { + "epoch": 6.22060857538036, + "grad_norm": 8.5200834274292, + "learning_rate": 2.099661902566467e-05, + "log_odds_chosen": 11.007604598999023, + "log_odds_ratio": -5.738082472817041e-05, + "logits/chosen": -0.5865336060523987, + "logits/rejected": -0.53379225730896, + "logps/chosen": -0.00028456421568989754, + "logps/rejected": -2.431556463241577, + "loss": 0.9227, + "nll_loss": 0.2306625097990036, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8456421205191873e-05, + "rewards/margins": 0.24312719702720642, + "rewards/rejected": -0.24315565824508667, + "step": 8995 + }, + { + "epoch": 6.2213001383125865, + "grad_norm": 7.333250045776367, + "learning_rate": 2.099277700937452e-05, + "log_odds_chosen": 10.954893112182617, + "log_odds_ratio": -6.844123709015548e-05, + "logits/chosen": -0.2322230190038681, + "logits/rejected": -0.3255404829978943, + "logps/chosen": -0.00023944003623910248, + "logps/rejected": -2.2728569507598877, + "loss": 0.6644, + "nll_loss": 0.16609756648540497, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.394400507910177e-05, + "rewards/margins": 0.2272617518901825, + "rewards/rejected": -0.227285698056221, + "step": 8996 + }, + { + "epoch": 6.221991701244813, + "grad_norm": 7.575857639312744, + "learning_rate": 2.0988934993084373e-05, + "log_odds_chosen": 11.492612838745117, + "log_odds_ratio": -4.526223710854538e-05, + "logits/chosen": 0.041964687407016754, + "logits/rejected": -0.07933655381202698, + "logps/chosen": -0.0002207772631663829, + "logps/rejected": -2.7611167430877686, + "loss": 0.7763, + "nll_loss": 0.19406726956367493, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.207772558904253e-05, + "rewards/margins": 0.2760895788669586, + "rewards/rejected": -0.2761116623878479, + "step": 8997 + }, + { + "epoch": 6.22268326417704, + "grad_norm": 7.768023490905762, + "learning_rate": 2.0985092976794226e-05, + "log_odds_chosen": 10.354554176330566, + "log_odds_ratio": -5.161076478543691e-05, + "logits/chosen": -0.37766382098197937, + "logits/rejected": -0.38012662529945374, + "logps/chosen": -0.00051166454795748, + "logps/rejected": -2.2143630981445312, + "loss": 0.5399, + "nll_loss": 0.13496245443820953, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.116645115776919e-05, + "rewards/margins": 0.2213851511478424, + "rewards/rejected": -0.2214363068342209, + "step": 8998 + }, + { + "epoch": 6.223374827109267, + "grad_norm": 6.647977352142334, + "learning_rate": 2.0981250960504075e-05, + "log_odds_chosen": 11.430723190307617, + "log_odds_ratio": -2.3654045435250737e-05, + "logits/chosen": -0.4360831677913666, + "logits/rejected": -0.4695732295513153, + "logps/chosen": -0.00035060528898611665, + "logps/rejected": -2.913233757019043, + "loss": 0.6416, + "nll_loss": 0.16040118038654327, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5060529626207426e-05, + "rewards/margins": 0.2912883162498474, + "rewards/rejected": -0.29132339358329773, + "step": 8999 + }, + { + "epoch": 6.224066390041494, + "grad_norm": 7.017353057861328, + "learning_rate": 2.0977408944213924e-05, + "log_odds_chosen": 11.734591484069824, + "log_odds_ratio": -3.7382968002930284e-05, + "logits/chosen": -0.06758692860603333, + "logits/rejected": -0.17713436484336853, + "logps/chosen": -0.0004616577934939414, + "logps/rejected": -2.8184022903442383, + "loss": 0.4697, + "nll_loss": 0.11741077154874802, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6165780076989904e-05, + "rewards/margins": 0.2817940413951874, + "rewards/rejected": -0.2818402051925659, + "step": 9000 + }, + { + "epoch": 6.224757952973721, + "grad_norm": 3.9933974742889404, + "learning_rate": 2.0973566927923776e-05, + "log_odds_chosen": 9.483848571777344, + "log_odds_ratio": -0.0003084187919739634, + "logits/chosen": -0.24222630262374878, + "logits/rejected": -0.286294162273407, + "logps/chosen": -0.0029962025582790375, + "logps/rejected": -2.703573703765869, + "loss": 0.5122, + "nll_loss": 0.1280231475830078, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002996202383656055, + "rewards/margins": 0.2700577676296234, + "rewards/rejected": -0.2703574001789093, + "step": 9001 + }, + { + "epoch": 6.2254495159059475, + "grad_norm": 7.665703773498535, + "learning_rate": 2.0969724911633625e-05, + "log_odds_chosen": 10.602262496948242, + "log_odds_ratio": -0.0001078636123565957, + "logits/chosen": -0.24498625099658966, + "logits/rejected": -0.3056301176548004, + "logps/chosen": -0.0003808321198448539, + "logps/rejected": -2.0292539596557617, + "loss": 0.6004, + "nll_loss": 0.1500934660434723, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8083209801698104e-05, + "rewards/margins": 0.20288731157779694, + "rewards/rejected": -0.2029254138469696, + "step": 9002 + }, + { + "epoch": 6.226141078838174, + "grad_norm": 8.88949966430664, + "learning_rate": 2.0965882895343478e-05, + "log_odds_chosen": 10.602922439575195, + "log_odds_ratio": -4.7081877710297704e-05, + "logits/chosen": -0.28342053294181824, + "logits/rejected": -0.29611077904701233, + "logps/chosen": -0.00026970237377099693, + "logps/rejected": -2.155287504196167, + "loss": 0.7502, + "nll_loss": 0.18754449486732483, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6970237740897574e-05, + "rewards/margins": 0.2155017852783203, + "rewards/rejected": -0.21552875638008118, + "step": 9003 + }, + { + "epoch": 6.226832641770401, + "grad_norm": 6.602673530578613, + "learning_rate": 2.0962040879053327e-05, + "log_odds_chosen": 10.173442840576172, + "log_odds_ratio": -0.00119855219963938, + "logits/chosen": -0.18686045706272125, + "logits/rejected": -0.2736320197582245, + "logps/chosen": -0.0023301991168409586, + "logps/rejected": -1.9063091278076172, + "loss": 0.6251, + "nll_loss": 0.15616220235824585, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00023301989131141454, + "rewards/margins": 0.19039788842201233, + "rewards/rejected": -0.19063091278076172, + "step": 9004 + }, + { + "epoch": 6.227524204702628, + "grad_norm": 10.806591033935547, + "learning_rate": 2.095819886276318e-05, + "log_odds_chosen": 9.640810012817383, + "log_odds_ratio": -0.00026288797380402684, + "logits/chosen": -0.46420085430145264, + "logits/rejected": -0.5561163425445557, + "logps/chosen": -0.0003273919865023345, + "logps/rejected": -1.186840534210205, + "loss": 0.438, + "nll_loss": 0.1094648689031601, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2739197195041925e-05, + "rewards/margins": 0.11865131556987762, + "rewards/rejected": -0.1186840608716011, + "step": 9005 + }, + { + "epoch": 6.228215767634855, + "grad_norm": 6.025399684906006, + "learning_rate": 2.095435684647303e-05, + "log_odds_chosen": 9.861456871032715, + "log_odds_ratio": -0.0004261066787876189, + "logits/chosen": -0.07645013928413391, + "logits/rejected": -0.15051329135894775, + "logps/chosen": -0.0006179987103678286, + "logps/rejected": -2.166393280029297, + "loss": 0.5465, + "nll_loss": 0.13658836483955383, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.179986667120829e-05, + "rewards/margins": 0.21657755970954895, + "rewards/rejected": -0.21663935482501984, + "step": 9006 + }, + { + "epoch": 6.228907330567082, + "grad_norm": 23.894134521484375, + "learning_rate": 2.095051483018288e-05, + "log_odds_chosen": 7.401577949523926, + "log_odds_ratio": -0.05275609716773033, + "logits/chosen": -0.36621835827827454, + "logits/rejected": -0.30994993448257446, + "logps/chosen": -0.03477298468351364, + "logps/rejected": -1.45652437210083, + "loss": 0.6965, + "nll_loss": 0.16883717477321625, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003477298654615879, + "rewards/margins": 0.14217513799667358, + "rewards/rejected": -0.1456524282693863, + "step": 9007 + }, + { + "epoch": 6.2295988934993085, + "grad_norm": 5.496852874755859, + "learning_rate": 2.0946672813892733e-05, + "log_odds_chosen": 10.890434265136719, + "log_odds_ratio": -4.913666271022521e-05, + "logits/chosen": -0.5789802074432373, + "logits/rejected": -0.6446264982223511, + "logps/chosen": -0.0001830903347581625, + "logps/rejected": -1.9237865209579468, + "loss": 0.4705, + "nll_loss": 0.11761998385190964, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.830903420341201e-05, + "rewards/margins": 0.19236035645008087, + "rewards/rejected": -0.19237865507602692, + "step": 9008 + }, + { + "epoch": 6.230290456431535, + "grad_norm": 20.32415008544922, + "learning_rate": 2.0942830797602582e-05, + "log_odds_chosen": 11.691722869873047, + "log_odds_ratio": -1.3160077287466265e-05, + "logits/chosen": -0.30656173825263977, + "logits/rejected": -0.3084481656551361, + "logps/chosen": -0.00013962341472506523, + "logps/rejected": -2.690067768096924, + "loss": 0.685, + "nll_loss": 0.1712568998336792, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3962340744910762e-05, + "rewards/margins": 0.2689928412437439, + "rewards/rejected": -0.26900678873062134, + "step": 9009 + }, + { + "epoch": 6.230982019363762, + "grad_norm": 14.121626853942871, + "learning_rate": 2.093898878131243e-05, + "log_odds_chosen": 10.256575584411621, + "log_odds_ratio": -0.00016809267981443554, + "logits/chosen": -0.6957007646560669, + "logits/rejected": -0.6951332688331604, + "logps/chosen": -0.0032358954194933176, + "logps/rejected": -2.291188955307007, + "loss": 0.6839, + "nll_loss": 0.170955628156662, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003235895128455013, + "rewards/margins": 0.22879531979560852, + "rewards/rejected": -0.22911891341209412, + "step": 9010 + }, + { + "epoch": 6.231673582295989, + "grad_norm": 6.468430995941162, + "learning_rate": 2.0935146765022284e-05, + "log_odds_chosen": 10.054919242858887, + "log_odds_ratio": -0.0005386160919442773, + "logits/chosen": -0.3988853991031647, + "logits/rejected": -0.5644351243972778, + "logps/chosen": -0.0008596985717304051, + "logps/rejected": -2.17459774017334, + "loss": 0.979, + "nll_loss": 0.24469514191150665, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.59698629938066e-05, + "rewards/margins": 0.2173738181591034, + "rewards/rejected": -0.2174597978591919, + "step": 9011 + }, + { + "epoch": 6.232365145228216, + "grad_norm": 5.70004415512085, + "learning_rate": 2.0931304748732136e-05, + "log_odds_chosen": 10.633837699890137, + "log_odds_ratio": -5.87763061048463e-05, + "logits/chosen": -0.38955414295196533, + "logits/rejected": -0.5138880610466003, + "logps/chosen": -0.00017273437697440386, + "logps/rejected": -1.9499411582946777, + "loss": 0.7007, + "nll_loss": 0.17516255378723145, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.727343988022767e-05, + "rewards/margins": 0.19497685134410858, + "rewards/rejected": -0.19499412178993225, + "step": 9012 + }, + { + "epoch": 6.233056708160443, + "grad_norm": 5.867102146148682, + "learning_rate": 2.0927462732441985e-05, + "log_odds_chosen": 9.976856231689453, + "log_odds_ratio": -0.0003477151913102716, + "logits/chosen": -0.1553748995065689, + "logits/rejected": -0.2037595510482788, + "logps/chosen": -0.0010640517575666308, + "logps/rejected": -2.2231345176696777, + "loss": 0.6212, + "nll_loss": 0.15527181327342987, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010640516120474786, + "rewards/margins": 0.22220705449581146, + "rewards/rejected": -0.22231344878673553, + "step": 9013 + }, + { + "epoch": 6.2337482710926695, + "grad_norm": 6.256630897521973, + "learning_rate": 2.0923620716151837e-05, + "log_odds_chosen": 11.773627281188965, + "log_odds_ratio": -2.0270506865927018e-05, + "logits/chosen": -0.41896963119506836, + "logits/rejected": -0.6450668573379517, + "logps/chosen": -0.00016775909170974046, + "logps/rejected": -2.4986515045166016, + "loss": 0.5745, + "nll_loss": 0.14361341297626495, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6775908079580404e-05, + "rewards/margins": 0.2498483806848526, + "rewards/rejected": -0.24986517429351807, + "step": 9014 + }, + { + "epoch": 6.234439834024896, + "grad_norm": 12.794068336486816, + "learning_rate": 2.091977869986169e-05, + "log_odds_chosen": 11.137877464294434, + "log_odds_ratio": -2.7645883164950646e-05, + "logits/chosen": -0.4125073254108429, + "logits/rejected": -0.45649489760398865, + "logps/chosen": -0.00030240084743127227, + "logps/rejected": -2.5896098613739014, + "loss": 0.6423, + "nll_loss": 0.16056138277053833, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0240084015531465e-05, + "rewards/margins": 0.2589307427406311, + "rewards/rejected": -0.2589609920978546, + "step": 9015 + }, + { + "epoch": 6.235131396957123, + "grad_norm": 6.285947799682617, + "learning_rate": 2.091593668357154e-05, + "log_odds_chosen": 10.62628173828125, + "log_odds_ratio": -5.577644697041251e-05, + "logits/chosen": -0.3475406765937805, + "logits/rejected": -0.41657719016075134, + "logps/chosen": -0.000300862651783973, + "logps/rejected": -2.3320021629333496, + "loss": 0.6059, + "nll_loss": 0.15147507190704346, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0086270271567628e-05, + "rewards/margins": 0.23317012190818787, + "rewards/rejected": -0.23320019245147705, + "step": 9016 + }, + { + "epoch": 6.23582295988935, + "grad_norm": 6.353825092315674, + "learning_rate": 2.091209466728139e-05, + "log_odds_chosen": 9.51252555847168, + "log_odds_ratio": -0.0012284711701795459, + "logits/chosen": -0.6180807948112488, + "logits/rejected": -0.6229330897331238, + "logps/chosen": -0.001876621157862246, + "logps/rejected": -2.276731491088867, + "loss": 0.438, + "nll_loss": 0.10938158631324768, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018766212451737374, + "rewards/margins": 0.22748547792434692, + "rewards/rejected": -0.22767315804958344, + "step": 9017 + }, + { + "epoch": 6.236514522821577, + "grad_norm": 7.255704402923584, + "learning_rate": 2.090825265099124e-05, + "log_odds_chosen": 10.664867401123047, + "log_odds_ratio": -3.9949351048562676e-05, + "logits/chosen": -0.4201251268386841, + "logits/rejected": -0.4479733407497406, + "logps/chosen": -0.00033521311706863344, + "logps/rejected": -2.4541258811950684, + "loss": 1.0342, + "nll_loss": 0.2585402727127075, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3521311706863344e-05, + "rewards/margins": 0.24537906050682068, + "rewards/rejected": -0.24541258811950684, + "step": 9018 + }, + { + "epoch": 6.237206085753804, + "grad_norm": 13.467849731445312, + "learning_rate": 2.090441063470109e-05, + "log_odds_chosen": 11.327585220336914, + "log_odds_ratio": -2.2701206034980714e-05, + "logits/chosen": -0.8228195905685425, + "logits/rejected": -0.897856593132019, + "logps/chosen": -0.00030445802258327603, + "logps/rejected": -1.9246413707733154, + "loss": 0.5952, + "nll_loss": 0.14880092442035675, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0445804441114888e-05, + "rewards/margins": 0.19243371486663818, + "rewards/rejected": -0.19246414303779602, + "step": 9019 + }, + { + "epoch": 6.2378976486860305, + "grad_norm": 7.673033714294434, + "learning_rate": 2.0900568618410942e-05, + "log_odds_chosen": 11.0219144821167, + "log_odds_ratio": -2.651966133271344e-05, + "logits/chosen": -0.431251585483551, + "logits/rejected": -0.5808360576629639, + "logps/chosen": -0.00018394278595224023, + "logps/rejected": -2.3105428218841553, + "loss": 0.5675, + "nll_loss": 0.14187981188297272, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8394279322819784e-05, + "rewards/margins": 0.23103588819503784, + "rewards/rejected": -0.23105429112911224, + "step": 9020 + }, + { + "epoch": 6.238589211618257, + "grad_norm": 7.586302280426025, + "learning_rate": 2.0896726602120794e-05, + "log_odds_chosen": 10.934219360351562, + "log_odds_ratio": -5.194777259021066e-05, + "logits/chosen": -0.42324450612068176, + "logits/rejected": -0.5293049812316895, + "logps/chosen": -0.0002458833623677492, + "logps/rejected": -2.190702438354492, + "loss": 0.6002, + "nll_loss": 0.15005594491958618, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4588336600572802e-05, + "rewards/margins": 0.21904563903808594, + "rewards/rejected": -0.21907024085521698, + "step": 9021 + }, + { + "epoch": 6.239280774550484, + "grad_norm": 7.884757995605469, + "learning_rate": 2.0892884585830643e-05, + "log_odds_chosen": 11.067387580871582, + "log_odds_ratio": -0.00040776049718260765, + "logits/chosen": -0.9919091463088989, + "logits/rejected": -0.9518996477127075, + "logps/chosen": -0.0002284134243382141, + "logps/rejected": -1.9842913150787354, + "loss": 0.5262, + "nll_loss": 0.13151274621486664, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.284134279761929e-05, + "rewards/margins": 0.19840630888938904, + "rewards/rejected": -0.198429137468338, + "step": 9022 + }, + { + "epoch": 6.239972337482711, + "grad_norm": 4.72267484664917, + "learning_rate": 2.0889042569540496e-05, + "log_odds_chosen": 11.019242286682129, + "log_odds_ratio": -0.0008485190337523818, + "logits/chosen": 0.0482051819562912, + "logits/rejected": -0.14590582251548767, + "logps/chosen": -0.0014960195403546095, + "logps/rejected": -2.0413918495178223, + "loss": 0.4531, + "nll_loss": 0.11318106949329376, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014960193948354572, + "rewards/margins": 0.20398959517478943, + "rewards/rejected": -0.20413920283317566, + "step": 9023 + }, + { + "epoch": 6.240663900414938, + "grad_norm": 6.891628265380859, + "learning_rate": 2.088520055325035e-05, + "log_odds_chosen": 10.24162483215332, + "log_odds_ratio": -0.00018210129928775132, + "logits/chosen": -0.6122728586196899, + "logits/rejected": -0.6851514577865601, + "logps/chosen": -0.000291764794383198, + "logps/rejected": -1.8662919998168945, + "loss": 0.589, + "nll_loss": 0.14722611010074615, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9176480893511325e-05, + "rewards/margins": 0.18660002946853638, + "rewards/rejected": -0.18662920594215393, + "step": 9024 + }, + { + "epoch": 6.241355463347165, + "grad_norm": 10.785980224609375, + "learning_rate": 2.0881358536960197e-05, + "log_odds_chosen": 10.858931541442871, + "log_odds_ratio": -0.00032134572393260896, + "logits/chosen": -0.2621542811393738, + "logits/rejected": -0.37139779329299927, + "logps/chosen": -0.0008505442528985441, + "logps/rejected": -2.503044605255127, + "loss": 0.7347, + "nll_loss": 0.1836353838443756, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.50544311106205e-05, + "rewards/margins": 0.2502194046974182, + "rewards/rejected": -0.2503044605255127, + "step": 9025 + }, + { + "epoch": 6.2420470262793915, + "grad_norm": 8.544774055480957, + "learning_rate": 2.087751652067005e-05, + "log_odds_chosen": 9.95460033416748, + "log_odds_ratio": -0.00020327308448031545, + "logits/chosen": -0.3229715824127197, + "logits/rejected": -0.3416019678115845, + "logps/chosen": -0.0009239742066711187, + "logps/rejected": -2.1367297172546387, + "loss": 0.5462, + "nll_loss": 0.13653871417045593, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.239741484634578e-05, + "rewards/margins": 0.21358059346675873, + "rewards/rejected": -0.21367299556732178, + "step": 9026 + }, + { + "epoch": 6.242738589211618, + "grad_norm": 4.830820083618164, + "learning_rate": 2.08736745043799e-05, + "log_odds_chosen": 10.288642883300781, + "log_odds_ratio": -7.357189315371215e-05, + "logits/chosen": 0.009236622601747513, + "logits/rejected": -0.19707392156124115, + "logps/chosen": -0.0003716089413501322, + "logps/rejected": -1.879990577697754, + "loss": 0.5735, + "nll_loss": 0.14336419105529785, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7160894862608984e-05, + "rewards/margins": 0.1879618912935257, + "rewards/rejected": -0.18799905478954315, + "step": 9027 + }, + { + "epoch": 6.243430152143845, + "grad_norm": 5.889484405517578, + "learning_rate": 2.0869832488089748e-05, + "log_odds_chosen": 9.802091598510742, + "log_odds_ratio": -0.0006548008532263339, + "logits/chosen": -0.6355588436126709, + "logits/rejected": -0.6946972608566284, + "logps/chosen": -0.0002756410976871848, + "logps/rejected": -1.7165451049804688, + "loss": 0.5623, + "nll_loss": 0.1405138224363327, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7564110496314242e-05, + "rewards/margins": 0.1716269552707672, + "rewards/rejected": -0.17165450751781464, + "step": 9028 + }, + { + "epoch": 6.244121715076072, + "grad_norm": 3.881678819656372, + "learning_rate": 2.08659904717996e-05, + "log_odds_chosen": 11.143312454223633, + "log_odds_ratio": -9.366253652842715e-05, + "logits/chosen": -0.4110240340232849, + "logits/rejected": -0.4805094301700592, + "logps/chosen": -0.00028160365764051676, + "logps/rejected": -2.812816619873047, + "loss": 0.4031, + "nll_loss": 0.10075541585683823, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8160364308860153e-05, + "rewards/margins": 0.28125351667404175, + "rewards/rejected": -0.2812816798686981, + "step": 9029 + }, + { + "epoch": 6.244813278008299, + "grad_norm": 4.964913845062256, + "learning_rate": 2.0862148455509453e-05, + "log_odds_chosen": 10.30183219909668, + "log_odds_ratio": -0.00011942970013478771, + "logits/chosen": -0.4194273352622986, + "logits/rejected": -0.4900840222835541, + "logps/chosen": -0.0014646524796262383, + "logps/rejected": -2.252519369125366, + "loss": 0.5851, + "nll_loss": 0.14625787734985352, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001464652450522408, + "rewards/margins": 0.22510546445846558, + "rewards/rejected": -0.2252519428730011, + "step": 9030 + }, + { + "epoch": 6.245504840940526, + "grad_norm": 5.751460552215576, + "learning_rate": 2.0858306439219302e-05, + "log_odds_chosen": 9.793046951293945, + "log_odds_ratio": -0.00017207200289703906, + "logits/chosen": -0.2726150453090668, + "logits/rejected": -0.30392637848854065, + "logps/chosen": -0.00022600665397476405, + "logps/rejected": -1.6625508069992065, + "loss": 0.8965, + "nll_loss": 0.22410230338573456, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2600665033678524e-05, + "rewards/margins": 0.16623248159885406, + "rewards/rejected": -0.16625507175922394, + "step": 9031 + }, + { + "epoch": 6.246196403872752, + "grad_norm": 6.082136631011963, + "learning_rate": 2.0854464422929154e-05, + "log_odds_chosen": 9.919041633605957, + "log_odds_ratio": -0.00023411812435369939, + "logits/chosen": -0.12235762178897858, + "logits/rejected": -0.16585399210453033, + "logps/chosen": -0.0003532566479407251, + "logps/rejected": -1.8604954481124878, + "loss": 0.6567, + "nll_loss": 0.16414141654968262, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.532566552166827e-05, + "rewards/margins": 0.18601423501968384, + "rewards/rejected": -0.18604956567287445, + "step": 9032 + }, + { + "epoch": 6.246887966804979, + "grad_norm": 5.677314281463623, + "learning_rate": 2.0850622406639007e-05, + "log_odds_chosen": 10.706421852111816, + "log_odds_ratio": -0.00010353871039114892, + "logits/chosen": -0.37744152545928955, + "logits/rejected": -0.43936687707901, + "logps/chosen": -0.0006696865311823785, + "logps/rejected": -2.4172487258911133, + "loss": 0.5202, + "nll_loss": 0.1300279051065445, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.69686560286209e-05, + "rewards/margins": 0.24165791273117065, + "rewards/rejected": -0.241724893450737, + "step": 9033 + }, + { + "epoch": 6.247579529737206, + "grad_norm": 7.973165512084961, + "learning_rate": 2.0846780390348856e-05, + "log_odds_chosen": 10.522238731384277, + "log_odds_ratio": -7.479259511455894e-05, + "logits/chosen": -0.38221296668052673, + "logits/rejected": -0.47134849429130554, + "logps/chosen": -0.0002770965511444956, + "logps/rejected": -1.7832226753234863, + "loss": 0.5368, + "nll_loss": 0.1341937780380249, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.770965511444956e-05, + "rewards/margins": 0.1782945692539215, + "rewards/rejected": -0.17832225561141968, + "step": 9034 + }, + { + "epoch": 6.248271092669433, + "grad_norm": 8.03611946105957, + "learning_rate": 2.0842938374058708e-05, + "log_odds_chosen": 11.027082443237305, + "log_odds_ratio": -3.489879600238055e-05, + "logits/chosen": -0.5230115056037903, + "logits/rejected": -0.560234785079956, + "logps/chosen": -0.0002563800080679357, + "logps/rejected": -2.4159536361694336, + "loss": 0.5464, + "nll_loss": 0.13658906519412994, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5638002625782974e-05, + "rewards/margins": 0.24156975746154785, + "rewards/rejected": -0.24159538745880127, + "step": 9035 + }, + { + "epoch": 6.24896265560166, + "grad_norm": 6.003415584564209, + "learning_rate": 2.0839096357768557e-05, + "log_odds_chosen": 9.94825553894043, + "log_odds_ratio": -0.00018064079631585628, + "logits/chosen": -0.5426046252250671, + "logits/rejected": -0.5991511940956116, + "logps/chosen": -0.00014731872943229973, + "logps/rejected": -1.313718557357788, + "loss": 0.6065, + "nll_loss": 0.15160351991653442, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4731874216522556e-05, + "rewards/margins": 0.1313571333885193, + "rewards/rejected": -0.1313718557357788, + "step": 9036 + }, + { + "epoch": 6.249654218533887, + "grad_norm": 6.042317867279053, + "learning_rate": 2.0835254341478406e-05, + "log_odds_chosen": 10.225564956665039, + "log_odds_ratio": -0.0007171995821408927, + "logits/chosen": -0.3908754885196686, + "logits/rejected": -0.5180646181106567, + "logps/chosen": -0.0009109845268540084, + "logps/rejected": -2.3440051078796387, + "loss": 0.5335, + "nll_loss": 0.13331358134746552, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.109845996135846e-05, + "rewards/margins": 0.23430943489074707, + "rewards/rejected": -0.23440054059028625, + "step": 9037 + }, + { + "epoch": 6.250345781466113, + "grad_norm": 3.844398021697998, + "learning_rate": 2.083141232518826e-05, + "log_odds_chosen": 11.103232383728027, + "log_odds_ratio": -2.063993269985076e-05, + "logits/chosen": -0.2962512671947479, + "logits/rejected": -0.3584537208080292, + "logps/chosen": -0.0001835509028751403, + "logps/rejected": -2.4707653522491455, + "loss": 0.5642, + "nll_loss": 0.14104364812374115, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.835508919612039e-05, + "rewards/margins": 0.2470581829547882, + "rewards/rejected": -0.24707652628421783, + "step": 9038 + }, + { + "epoch": 6.25103734439834, + "grad_norm": 4.758118152618408, + "learning_rate": 2.082757030889811e-05, + "log_odds_chosen": 10.80452823638916, + "log_odds_ratio": -6.069736264180392e-05, + "logits/chosen": -0.3704849183559418, + "logits/rejected": -0.4546177387237549, + "logps/chosen": -0.00019304102170281112, + "logps/rejected": -2.074105739593506, + "loss": 0.7361, + "nll_loss": 0.1840093731880188, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.930410144268535e-05, + "rewards/margins": 0.20739126205444336, + "rewards/rejected": -0.20741057395935059, + "step": 9039 + }, + { + "epoch": 6.251728907330567, + "grad_norm": 8.88686752319336, + "learning_rate": 2.082372829260796e-05, + "log_odds_chosen": 10.896563529968262, + "log_odds_ratio": -0.00014622285380028188, + "logits/chosen": -0.20167036354541779, + "logits/rejected": -0.2820538878440857, + "logps/chosen": -0.001121298992075026, + "logps/rejected": -2.7503931522369385, + "loss": 0.6227, + "nll_loss": 0.15567225217819214, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001121298992075026, + "rewards/margins": 0.27492719888687134, + "rewards/rejected": -0.27503931522369385, + "step": 9040 + }, + { + "epoch": 6.252420470262794, + "grad_norm": 8.300743103027344, + "learning_rate": 2.0819886276317813e-05, + "log_odds_chosen": 10.957125663757324, + "log_odds_ratio": -4.0475730202160776e-05, + "logits/chosen": -0.3142332434654236, + "logits/rejected": -0.33307600021362305, + "logps/chosen": -0.0002978050906676799, + "logps/rejected": -2.8132143020629883, + "loss": 0.727, + "nll_loss": 0.1817580759525299, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.978050906676799e-05, + "rewards/margins": 0.281291663646698, + "rewards/rejected": -0.2813214361667633, + "step": 9041 + }, + { + "epoch": 6.253112033195021, + "grad_norm": 4.971611976623535, + "learning_rate": 2.0816044260027665e-05, + "log_odds_chosen": 9.46718692779541, + "log_odds_ratio": -0.000553897931240499, + "logits/chosen": -0.3183286190032959, + "logits/rejected": -0.47218573093414307, + "logps/chosen": -0.0004418599419295788, + "logps/rejected": -1.6773815155029297, + "loss": 0.737, + "nll_loss": 0.18420332670211792, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.41859956481494e-05, + "rewards/margins": 0.16769397258758545, + "rewards/rejected": -0.1677381694316864, + "step": 9042 + }, + { + "epoch": 6.253803596127248, + "grad_norm": 6.985831260681152, + "learning_rate": 2.0812202243737514e-05, + "log_odds_chosen": 10.669255256652832, + "log_odds_ratio": -0.00018928670033346862, + "logits/chosen": -0.3893347382545471, + "logits/rejected": -0.44403713941574097, + "logps/chosen": -0.0007266980828717351, + "logps/rejected": -1.9807296991348267, + "loss": 0.7293, + "nll_loss": 0.18229913711547852, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.266981265274808e-05, + "rewards/margins": 0.19800031185150146, + "rewards/rejected": -0.19807296991348267, + "step": 9043 + }, + { + "epoch": 6.254495159059474, + "grad_norm": 15.985045433044434, + "learning_rate": 2.0808360227447367e-05, + "log_odds_chosen": 9.64474105834961, + "log_odds_ratio": -0.0008536613313481212, + "logits/chosen": -0.581167459487915, + "logits/rejected": -0.7496652007102966, + "logps/chosen": -0.00538310781121254, + "logps/rejected": -2.3295609951019287, + "loss": 1.2832, + "nll_loss": 0.3207254111766815, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005383107927627861, + "rewards/margins": 0.23241779208183289, + "rewards/rejected": -0.23295611143112183, + "step": 9044 + }, + { + "epoch": 6.255186721991701, + "grad_norm": 5.278681755065918, + "learning_rate": 2.0804518211157216e-05, + "log_odds_chosen": 10.54180908203125, + "log_odds_ratio": -3.6456309317145497e-05, + "logits/chosen": -0.7690801024436951, + "logits/rejected": -0.8494816422462463, + "logps/chosen": -0.0040186732076108456, + "logps/rejected": -2.7842183113098145, + "loss": 0.4661, + "nll_loss": 0.11652586609125137, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004018672916572541, + "rewards/margins": 0.2780199646949768, + "rewards/rejected": -0.2784218192100525, + "step": 9045 + }, + { + "epoch": 6.255878284923928, + "grad_norm": 5.81907320022583, + "learning_rate": 2.0800676194867065e-05, + "log_odds_chosen": 9.398512840270996, + "log_odds_ratio": -0.0007897837203927338, + "logits/chosen": -0.864077091217041, + "logits/rejected": -0.821776807308197, + "logps/chosen": -0.0018377433298155665, + "logps/rejected": -1.8497081995010376, + "loss": 0.538, + "nll_loss": 0.13441544771194458, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018377433298155665, + "rewards/margins": 0.18478704988956451, + "rewards/rejected": -0.18497082591056824, + "step": 9046 + }, + { + "epoch": 6.256569847856155, + "grad_norm": 5.225982666015625, + "learning_rate": 2.0796834178576917e-05, + "log_odds_chosen": 10.886088371276855, + "log_odds_ratio": -3.8307931390590966e-05, + "logits/chosen": -0.4181956648826599, + "logits/rejected": -0.533706545829773, + "logps/chosen": -0.0003992409911006689, + "logps/rejected": -2.2801594734191895, + "loss": 0.4228, + "nll_loss": 0.1056981086730957, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.992409983766265e-05, + "rewards/margins": 0.2279760241508484, + "rewards/rejected": -0.2280159592628479, + "step": 9047 + }, + { + "epoch": 6.257261410788382, + "grad_norm": 8.202089309692383, + "learning_rate": 2.079299216228677e-05, + "log_odds_chosen": 10.310097694396973, + "log_odds_ratio": -0.00020973727805539966, + "logits/chosen": -0.6218382716178894, + "logits/rejected": -0.619733452796936, + "logps/chosen": -0.001025262288749218, + "logps/rejected": -2.1449999809265137, + "loss": 0.6375, + "nll_loss": 0.15935611724853516, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010252623178530484, + "rewards/margins": 0.21439746022224426, + "rewards/rejected": -0.21449998021125793, + "step": 9048 + }, + { + "epoch": 6.2579529737206085, + "grad_norm": 6.26420259475708, + "learning_rate": 2.078915014599662e-05, + "log_odds_chosen": 11.018274307250977, + "log_odds_ratio": -4.080742655787617e-05, + "logits/chosen": -0.4249802231788635, + "logits/rejected": -0.5103530883789062, + "logps/chosen": -0.00018488478963263333, + "logps/rejected": -2.230947971343994, + "loss": 0.492, + "nll_loss": 0.12300199270248413, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8488481146050617e-05, + "rewards/margins": 0.22307631373405457, + "rewards/rejected": -0.22309479117393494, + "step": 9049 + }, + { + "epoch": 6.258644536652835, + "grad_norm": 10.108119010925293, + "learning_rate": 2.078530812970647e-05, + "log_odds_chosen": 11.731668472290039, + "log_odds_ratio": -0.00010565257252892479, + "logits/chosen": -0.432361364364624, + "logits/rejected": -0.5796195268630981, + "logps/chosen": -0.0012302513932809234, + "logps/rejected": -2.892240285873413, + "loss": 0.7714, + "nll_loss": 0.1928274929523468, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012302515096962452, + "rewards/margins": 0.2891010046005249, + "rewards/rejected": -0.2892240285873413, + "step": 9050 + }, + { + "epoch": 6.259336099585062, + "grad_norm": 5.540128707885742, + "learning_rate": 2.0781466113416324e-05, + "log_odds_chosen": 10.653034210205078, + "log_odds_ratio": -5.03646006109193e-05, + "logits/chosen": -0.30922791361808777, + "logits/rejected": -0.4006563425064087, + "logps/chosen": -0.0004195195506326854, + "logps/rejected": -1.9212048053741455, + "loss": 0.5047, + "nll_loss": 0.12615928053855896, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1951956518460065e-05, + "rewards/margins": 0.19207853078842163, + "rewards/rejected": -0.1921204924583435, + "step": 9051 + }, + { + "epoch": 6.260027662517289, + "grad_norm": 7.490386486053467, + "learning_rate": 2.0777624097126173e-05, + "log_odds_chosen": 11.348106384277344, + "log_odds_ratio": -4.494922541198321e-05, + "logits/chosen": -0.22071166336536407, + "logits/rejected": -0.259778767824173, + "logps/chosen": -0.00020064400450792164, + "logps/rejected": -2.4897258281707764, + "loss": 0.4719, + "nll_loss": 0.11798227578401566, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.006439899560064e-05, + "rewards/margins": 0.24895252287387848, + "rewards/rejected": -0.2489725798368454, + "step": 9052 + }, + { + "epoch": 6.260719225449516, + "grad_norm": 10.887153625488281, + "learning_rate": 2.0773782080836025e-05, + "log_odds_chosen": 10.057692527770996, + "log_odds_ratio": -0.017915375530719757, + "logits/chosen": 0.13184309005737305, + "logits/rejected": 0.09195524454116821, + "logps/chosen": -0.005825578700751066, + "logps/rejected": -2.157179832458496, + "loss": 0.9576, + "nll_loss": 0.2376118302345276, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005825579282827675, + "rewards/margins": 0.21513542532920837, + "rewards/rejected": -0.21571798622608185, + "step": 9053 + }, + { + "epoch": 6.261410788381743, + "grad_norm": 10.072612762451172, + "learning_rate": 2.0769940064545874e-05, + "log_odds_chosen": 11.511762619018555, + "log_odds_ratio": -2.426476748951245e-05, + "logits/chosen": -0.4715292453765869, + "logits/rejected": -0.5088003277778625, + "logps/chosen": -0.00019626517314463854, + "logps/rejected": -2.4985780715942383, + "loss": 0.4892, + "nll_loss": 0.12229645997285843, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9626517314463854e-05, + "rewards/margins": 0.24983817338943481, + "rewards/rejected": -0.2498578131198883, + "step": 9054 + }, + { + "epoch": 6.2621023513139695, + "grad_norm": 6.188398838043213, + "learning_rate": 2.0766098048255723e-05, + "log_odds_chosen": 10.952611923217773, + "log_odds_ratio": -7.49345199437812e-05, + "logits/chosen": -0.25741341710090637, + "logits/rejected": -0.28530973196029663, + "logps/chosen": -0.00017540823318995535, + "logps/rejected": -2.232959270477295, + "loss": 0.8443, + "nll_loss": 0.2110714316368103, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7540824046591297e-05, + "rewards/margins": 0.22327838838100433, + "rewards/rejected": -0.2232959270477295, + "step": 9055 + }, + { + "epoch": 6.262793914246196, + "grad_norm": 10.523344039916992, + "learning_rate": 2.0762256031965576e-05, + "log_odds_chosen": 12.459383964538574, + "log_odds_ratio": -1.0062859473691788e-05, + "logits/chosen": -0.5501984357833862, + "logits/rejected": -0.5856611132621765, + "logps/chosen": -0.00019254116341471672, + "logps/rejected": -3.8092966079711914, + "loss": 0.765, + "nll_loss": 0.19125302135944366, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9254117432865314e-05, + "rewards/margins": 0.3809104561805725, + "rewards/rejected": -0.38092970848083496, + "step": 9056 + }, + { + "epoch": 6.263485477178423, + "grad_norm": 6.1262407302856445, + "learning_rate": 2.0758414015675428e-05, + "log_odds_chosen": 9.742956161499023, + "log_odds_ratio": -0.000668332795612514, + "logits/chosen": -0.6014862060546875, + "logits/rejected": -0.7945506572723389, + "logps/chosen": -0.0018230837304145098, + "logps/rejected": -1.510488748550415, + "loss": 0.4158, + "nll_loss": 0.10387758910655975, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018230837304145098, + "rewards/margins": 0.1508665531873703, + "rewards/rejected": -0.15104886889457703, + "step": 9057 + }, + { + "epoch": 6.26417704011065, + "grad_norm": 5.251424312591553, + "learning_rate": 2.0754571999385277e-05, + "log_odds_chosen": 10.103209495544434, + "log_odds_ratio": -0.00021142560581211, + "logits/chosen": -0.3135252594947815, + "logits/rejected": -0.4640074074268341, + "logps/chosen": -0.00029878091299906373, + "logps/rejected": -1.7954256534576416, + "loss": 0.5794, + "nll_loss": 0.14481639862060547, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9878092391300015e-05, + "rewards/margins": 0.17951269447803497, + "rewards/rejected": -0.17954257130622864, + "step": 9058 + }, + { + "epoch": 6.264868603042877, + "grad_norm": 6.161133766174316, + "learning_rate": 2.075072998309513e-05, + "log_odds_chosen": 11.259553909301758, + "log_odds_ratio": -2.63049550994765e-05, + "logits/chosen": -0.7111493349075317, + "logits/rejected": -0.7237696051597595, + "logps/chosen": -7.550412556156516e-05, + "logps/rejected": -1.9565980434417725, + "loss": 0.594, + "nll_loss": 0.14849528670310974, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.550413556600688e-06, + "rewards/margins": 0.19565224647521973, + "rewards/rejected": -0.1956597864627838, + "step": 9059 + }, + { + "epoch": 6.265560165975104, + "grad_norm": 6.60701847076416, + "learning_rate": 2.0746887966804982e-05, + "log_odds_chosen": 8.687655448913574, + "log_odds_ratio": -0.0004205875447951257, + "logits/chosen": 0.09222330898046494, + "logits/rejected": 0.01570185273885727, + "logps/chosen": -0.002098360098898411, + "logps/rejected": -1.8479384183883667, + "loss": 0.5536, + "nll_loss": 0.1383584439754486, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002098359982483089, + "rewards/margins": 0.18458399176597595, + "rewards/rejected": -0.18479382991790771, + "step": 9060 + }, + { + "epoch": 6.2662517289073305, + "grad_norm": 5.517164707183838, + "learning_rate": 2.074304595051483e-05, + "log_odds_chosen": 10.857470512390137, + "log_odds_ratio": -7.395334978355095e-05, + "logits/chosen": -0.5717877149581909, + "logits/rejected": -0.6270738840103149, + "logps/chosen": -0.00035990981268696487, + "logps/rejected": -1.8339455127716064, + "loss": 0.3562, + "nll_loss": 0.08904750645160675, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.599098272388801e-05, + "rewards/margins": 0.1833585500717163, + "rewards/rejected": -0.18339455127716064, + "step": 9061 + }, + { + "epoch": 6.266943291839557, + "grad_norm": 5.447756290435791, + "learning_rate": 2.0739203934224684e-05, + "log_odds_chosen": 11.803698539733887, + "log_odds_ratio": -1.1309020919725299e-05, + "logits/chosen": -0.6223872900009155, + "logits/rejected": -0.656653106212616, + "logps/chosen": -6.747247971361503e-05, + "logps/rejected": -1.9564663171768188, + "loss": 0.3991, + "nll_loss": 0.0997781977057457, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.747248335159384e-06, + "rewards/margins": 0.19563987851142883, + "rewards/rejected": -0.19564664363861084, + "step": 9062 + }, + { + "epoch": 6.267634854771784, + "grad_norm": 9.957139015197754, + "learning_rate": 2.0735361917934533e-05, + "log_odds_chosen": 11.273309707641602, + "log_odds_ratio": -4.331594755058177e-05, + "logits/chosen": -0.41265708208084106, + "logits/rejected": -0.5016641616821289, + "logps/chosen": -0.00020188375492580235, + "logps/rejected": -2.586665630340576, + "loss": 0.709, + "nll_loss": 0.17723584175109863, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0188375856378116e-05, + "rewards/margins": 0.2586463689804077, + "rewards/rejected": -0.2586665749549866, + "step": 9063 + }, + { + "epoch": 6.268326417704011, + "grad_norm": 4.971700668334961, + "learning_rate": 2.0731519901644382e-05, + "log_odds_chosen": 10.290696144104004, + "log_odds_ratio": -9.44764688028954e-05, + "logits/chosen": -0.5032870769500732, + "logits/rejected": -0.5671851634979248, + "logps/chosen": -0.0004169541352894157, + "logps/rejected": -2.007357358932495, + "loss": 0.5111, + "nll_loss": 0.12775713205337524, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.169541352894157e-05, + "rewards/margins": 0.20069405436515808, + "rewards/rejected": -0.20073574781417847, + "step": 9064 + }, + { + "epoch": 6.269017980636238, + "grad_norm": 6.018120288848877, + "learning_rate": 2.0727677885354234e-05, + "log_odds_chosen": 9.527848243713379, + "log_odds_ratio": -0.0004043597145937383, + "logits/chosen": 0.20102456212043762, + "logits/rejected": 0.05797000974416733, + "logps/chosen": -0.00043211251613684, + "logps/rejected": -1.5511095523834229, + "loss": 0.9438, + "nll_loss": 0.23591835796833038, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3211250158492476e-05, + "rewards/margins": 0.15506777167320251, + "rewards/rejected": -0.15511095523834229, + "step": 9065 + }, + { + "epoch": 6.269709543568465, + "grad_norm": 7.352861404418945, + "learning_rate": 2.0723835869064087e-05, + "log_odds_chosen": 10.076114654541016, + "log_odds_ratio": -8.22042056825012e-05, + "logits/chosen": -0.38600829243659973, + "logits/rejected": -0.42129021883010864, + "logps/chosen": -0.0007903474033810198, + "logps/rejected": -2.3401424884796143, + "loss": 0.625, + "nll_loss": 0.15625423192977905, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.903473306214437e-05, + "rewards/margins": 0.23393520712852478, + "rewards/rejected": -0.23401425778865814, + "step": 9066 + }, + { + "epoch": 6.2704011065006915, + "grad_norm": 5.614367485046387, + "learning_rate": 2.0719993852773936e-05, + "log_odds_chosen": 11.346555709838867, + "log_odds_ratio": -7.106648263288662e-05, + "logits/chosen": -0.4547783136367798, + "logits/rejected": -0.5846199989318848, + "logps/chosen": -0.00023274804698303342, + "logps/rejected": -2.928040027618408, + "loss": 0.4747, + "nll_loss": 0.11867094784975052, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.327480433450546e-05, + "rewards/margins": 0.29278072714805603, + "rewards/rejected": -0.2928040027618408, + "step": 9067 + }, + { + "epoch": 6.271092669432918, + "grad_norm": 5.819713115692139, + "learning_rate": 2.0716151836483788e-05, + "log_odds_chosen": 9.828176498413086, + "log_odds_ratio": -0.00026798504404723644, + "logits/chosen": -0.31800785660743713, + "logits/rejected": -0.4112524092197418, + "logps/chosen": -0.0003081922768615186, + "logps/rejected": -1.351736307144165, + "loss": 0.5388, + "nll_loss": 0.1346682757139206, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0819228413747624e-05, + "rewards/margins": 0.13514280319213867, + "rewards/rejected": -0.13517361879348755, + "step": 9068 + }, + { + "epoch": 6.271784232365145, + "grad_norm": 4.693689346313477, + "learning_rate": 2.071230982019364e-05, + "log_odds_chosen": 10.99893856048584, + "log_odds_ratio": -2.8096686946810223e-05, + "logits/chosen": -0.7330502867698669, + "logits/rejected": -0.7905983924865723, + "logps/chosen": -0.0003268049331381917, + "logps/rejected": -2.4436917304992676, + "loss": 0.5298, + "nll_loss": 0.13243672251701355, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2680491131031886e-05, + "rewards/margins": 0.24433650076389313, + "rewards/rejected": -0.24436917901039124, + "step": 9069 + }, + { + "epoch": 6.272475795297372, + "grad_norm": 7.647608757019043, + "learning_rate": 2.070846780390349e-05, + "log_odds_chosen": 10.127792358398438, + "log_odds_ratio": -0.0002474577631801367, + "logits/chosen": -0.1824146807193756, + "logits/rejected": -0.25091737508773804, + "logps/chosen": -0.0006325167487375438, + "logps/rejected": -1.671685814857483, + "loss": 0.5367, + "nll_loss": 0.13413894176483154, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3251682149712e-05, + "rewards/margins": 0.1671053171157837, + "rewards/rejected": -0.16716857254505157, + "step": 9070 + }, + { + "epoch": 6.273167358229599, + "grad_norm": 6.014593124389648, + "learning_rate": 2.0704625787613342e-05, + "log_odds_chosen": 11.303054809570312, + "log_odds_ratio": -2.4923643650254235e-05, + "logits/chosen": -0.8515036106109619, + "logits/rejected": -0.9350264668464661, + "logps/chosen": -0.00017572117212694138, + "logps/rejected": -2.4927737712860107, + "loss": 1.1008, + "nll_loss": 0.27519088983535767, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.75721179402899e-05, + "rewards/margins": 0.249259814620018, + "rewards/rejected": -0.24927736818790436, + "step": 9071 + }, + { + "epoch": 6.273858921161826, + "grad_norm": 12.842439651489258, + "learning_rate": 2.070078377132319e-05, + "log_odds_chosen": 11.64586067199707, + "log_odds_ratio": -3.5853641747962683e-05, + "logits/chosen": -0.5936583876609802, + "logits/rejected": -0.699408769607544, + "logps/chosen": -0.00029556750087067485, + "logps/rejected": -2.9994378089904785, + "loss": 0.6965, + "nll_loss": 0.17411650717258453, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9556751542259008e-05, + "rewards/margins": 0.29991424083709717, + "rewards/rejected": -0.29994380474090576, + "step": 9072 + }, + { + "epoch": 6.2745504840940525, + "grad_norm": 9.507415771484375, + "learning_rate": 2.0696941755033044e-05, + "log_odds_chosen": 10.774910926818848, + "log_odds_ratio": -3.4509495890233666e-05, + "logits/chosen": -0.6610577702522278, + "logits/rejected": -0.6650505065917969, + "logps/chosen": -0.00021303967514541, + "logps/rejected": -1.9831280708312988, + "loss": 0.5951, + "nll_loss": 0.14877526462078094, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1303967514541e-05, + "rewards/margins": 0.19829149544239044, + "rewards/rejected": -0.19831281900405884, + "step": 9073 + }, + { + "epoch": 6.275242047026279, + "grad_norm": 9.559590339660645, + "learning_rate": 2.0693099738742893e-05, + "log_odds_chosen": 9.74659252166748, + "log_odds_ratio": -0.0001875993621069938, + "logits/chosen": -0.45239779353141785, + "logits/rejected": -0.4574759602546692, + "logps/chosen": -0.0002895795914810151, + "logps/rejected": -1.7154991626739502, + "loss": 0.4271, + "nll_loss": 0.10676049441099167, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8957958420505747e-05, + "rewards/margins": 0.17152096331119537, + "rewards/rejected": -0.17154991626739502, + "step": 9074 + }, + { + "epoch": 6.275933609958506, + "grad_norm": 5.952230453491211, + "learning_rate": 2.068925772245274e-05, + "log_odds_chosen": 10.594772338867188, + "log_odds_ratio": -0.00013316248077899218, + "logits/chosen": -0.35547271370887756, + "logits/rejected": -0.37703627347946167, + "logps/chosen": -0.00033602563780732453, + "logps/rejected": -2.5674431324005127, + "loss": 0.7013, + "nll_loss": 0.17531204223632812, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3602569601498544e-05, + "rewards/margins": 0.2567107081413269, + "rewards/rejected": -0.2567443251609802, + "step": 9075 + }, + { + "epoch": 6.276625172890733, + "grad_norm": 4.105951309204102, + "learning_rate": 2.0685415706162594e-05, + "log_odds_chosen": 10.632652282714844, + "log_odds_ratio": -0.00012810016050934792, + "logits/chosen": -0.11020754277706146, + "logits/rejected": -0.11705412715673447, + "logps/chosen": -0.00033791197347454727, + "logps/rejected": -2.2698371410369873, + "loss": 0.7135, + "nll_loss": 0.1783510148525238, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.379119880264625e-05, + "rewards/margins": 0.22694994509220123, + "rewards/rejected": -0.22698372602462769, + "step": 9076 + }, + { + "epoch": 6.27731673582296, + "grad_norm": 14.176226615905762, + "learning_rate": 2.0681573689872447e-05, + "log_odds_chosen": 10.789567947387695, + "log_odds_ratio": -0.0003163648652844131, + "logits/chosen": -0.5388079881668091, + "logits/rejected": -0.2634154260158539, + "logps/chosen": -0.0005803824751637876, + "logps/rejected": -2.910543203353882, + "loss": 0.4857, + "nll_loss": 0.12138887494802475, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.803824751637876e-05, + "rewards/margins": 0.2909962832927704, + "rewards/rejected": -0.2910543382167816, + "step": 9077 + }, + { + "epoch": 6.278008298755187, + "grad_norm": 10.08351993560791, + "learning_rate": 2.0677731673582296e-05, + "log_odds_chosen": 11.046411514282227, + "log_odds_ratio": -3.6873323551844805e-05, + "logits/chosen": -0.4525337815284729, + "logits/rejected": -0.5173885822296143, + "logps/chosen": -0.00015250897558871657, + "logps/rejected": -1.7773330211639404, + "loss": 0.9478, + "nll_loss": 0.2369578629732132, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5250896467478015e-05, + "rewards/margins": 0.17771805822849274, + "rewards/rejected": -0.17773330211639404, + "step": 9078 + }, + { + "epoch": 6.2786998616874135, + "grad_norm": 11.85649299621582, + "learning_rate": 2.0673889657292148e-05, + "log_odds_chosen": 11.521341323852539, + "log_odds_ratio": -1.7196343833347782e-05, + "logits/chosen": -0.2669324278831482, + "logits/rejected": -0.2659561336040497, + "logps/chosen": -0.00044245910248719156, + "logps/rejected": -3.3967370986938477, + "loss": 0.6256, + "nll_loss": 0.15640604496002197, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4245909521123394e-05, + "rewards/margins": 0.33962947130203247, + "rewards/rejected": -0.3396737277507782, + "step": 9079 + }, + { + "epoch": 6.27939142461964, + "grad_norm": 7.615908145904541, + "learning_rate": 2.0670047641002e-05, + "log_odds_chosen": 10.834648132324219, + "log_odds_ratio": -4.6995177399367094e-05, + "logits/chosen": -0.5375221371650696, + "logits/rejected": -0.5115315914154053, + "logps/chosen": -0.00023706954380031675, + "logps/rejected": -2.054745674133301, + "loss": 0.9527, + "nll_loss": 0.23815976083278656, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3706954380031675e-05, + "rewards/margins": 0.2054508626461029, + "rewards/rejected": -0.20547455549240112, + "step": 9080 + }, + { + "epoch": 6.280082987551867, + "grad_norm": 6.639434814453125, + "learning_rate": 2.066620562471185e-05, + "log_odds_chosen": 10.678813934326172, + "log_odds_ratio": -7.374895358225331e-05, + "logits/chosen": -0.46182703971862793, + "logits/rejected": -0.4474222660064697, + "logps/chosen": -0.0002161176089430228, + "logps/rejected": -1.9364413022994995, + "loss": 0.6428, + "nll_loss": 0.16069626808166504, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1611760530504398e-05, + "rewards/margins": 0.19362254440784454, + "rewards/rejected": -0.19364413619041443, + "step": 9081 + }, + { + "epoch": 6.280774550484094, + "grad_norm": 8.542716026306152, + "learning_rate": 2.0662363608421702e-05, + "log_odds_chosen": 10.277900695800781, + "log_odds_ratio": -0.0008143960149027407, + "logits/chosen": -0.46082472801208496, + "logits/rejected": -0.46745404601097107, + "logps/chosen": -0.0009153565624728799, + "logps/rejected": -2.7791833877563477, + "loss": 0.6049, + "nll_loss": 0.15115474164485931, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.153566497843713e-05, + "rewards/margins": 0.27782678604125977, + "rewards/rejected": -0.27791833877563477, + "step": 9082 + }, + { + "epoch": 6.281466113416321, + "grad_norm": 6.666465759277344, + "learning_rate": 2.065852159213155e-05, + "log_odds_chosen": 10.795612335205078, + "log_odds_ratio": -8.343016088474542e-05, + "logits/chosen": -0.242087721824646, + "logits/rejected": -0.20174013078212738, + "logps/chosen": -0.00022156513296067715, + "logps/rejected": -2.262528896331787, + "loss": 0.7005, + "nll_loss": 0.1751115769147873, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2156513296067715e-05, + "rewards/margins": 0.22623072564601898, + "rewards/rejected": -0.22625288367271423, + "step": 9083 + }, + { + "epoch": 6.282157676348548, + "grad_norm": 9.905106544494629, + "learning_rate": 2.06546795758414e-05, + "log_odds_chosen": 10.582621574401855, + "log_odds_ratio": -0.000918439356610179, + "logits/chosen": -0.24201442301273346, + "logits/rejected": -0.2323443591594696, + "logps/chosen": -0.004194437526166439, + "logps/rejected": -2.3850326538085938, + "loss": 0.6521, + "nll_loss": 0.16293781995773315, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004194437642581761, + "rewards/margins": 0.2380838245153427, + "rewards/rejected": -0.23850327730178833, + "step": 9084 + }, + { + "epoch": 6.282849239280774, + "grad_norm": 5.138657093048096, + "learning_rate": 2.0650837559551253e-05, + "log_odds_chosen": 11.133842468261719, + "log_odds_ratio": -3.982733323937282e-05, + "logits/chosen": -0.60860276222229, + "logits/rejected": -0.7133729457855225, + "logps/chosen": -0.00038923200918361545, + "logps/rejected": -2.505032539367676, + "loss": 0.6201, + "nll_loss": 0.1550285816192627, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8923200918361545e-05, + "rewards/margins": 0.2504643201828003, + "rewards/rejected": -0.2505032420158386, + "step": 9085 + }, + { + "epoch": 6.283540802213001, + "grad_norm": 4.807248592376709, + "learning_rate": 2.0646995543261105e-05, + "log_odds_chosen": 10.152233123779297, + "log_odds_ratio": -8.358690683962777e-05, + "logits/chosen": -0.18029722571372986, + "logits/rejected": -0.16884273290634155, + "logps/chosen": -0.0004682210856117308, + "logps/rejected": -1.9533414840698242, + "loss": 0.5167, + "nll_loss": 0.12917307019233704, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.682211147155613e-05, + "rewards/margins": 0.1952873170375824, + "rewards/rejected": -0.19533413648605347, + "step": 9086 + }, + { + "epoch": 6.284232365145228, + "grad_norm": 4.057741641998291, + "learning_rate": 2.0643153526970954e-05, + "log_odds_chosen": 11.659266471862793, + "log_odds_ratio": -1.8078741049976088e-05, + "logits/chosen": -0.5082545280456543, + "logits/rejected": -0.5605330467224121, + "logps/chosen": -0.00011240919411648065, + "logps/rejected": -2.500460386276245, + "loss": 0.4684, + "nll_loss": 0.11708788573741913, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1240919775445946e-05, + "rewards/margins": 0.25003480911254883, + "rewards/rejected": -0.250046044588089, + "step": 9087 + }, + { + "epoch": 6.284923928077455, + "grad_norm": 4.503249168395996, + "learning_rate": 2.0639311510680806e-05, + "log_odds_chosen": 10.83475399017334, + "log_odds_ratio": -2.814342951751314e-05, + "logits/chosen": -0.3474327325820923, + "logits/rejected": -0.4542396068572998, + "logps/chosen": -0.0001295089750783518, + "logps/rejected": -1.9213929176330566, + "loss": 0.4156, + "nll_loss": 0.10390356183052063, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2950898053532e-05, + "rewards/margins": 0.1921263337135315, + "rewards/rejected": -0.19213929772377014, + "step": 9088 + }, + { + "epoch": 6.285615491009682, + "grad_norm": 5.762747764587402, + "learning_rate": 2.063546949439066e-05, + "log_odds_chosen": 10.371255874633789, + "log_odds_ratio": -0.0003046975180041045, + "logits/chosen": -0.40071895718574524, + "logits/rejected": -0.4745933711528778, + "logps/chosen": -0.00035770676913671196, + "logps/rejected": -2.2709102630615234, + "loss": 0.7057, + "nll_loss": 0.17639602720737457, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.577067400328815e-05, + "rewards/margins": 0.22705526649951935, + "rewards/rejected": -0.22709104418754578, + "step": 9089 + }, + { + "epoch": 6.286307053941909, + "grad_norm": 20.534412384033203, + "learning_rate": 2.0631627478100508e-05, + "log_odds_chosen": 9.866781234741211, + "log_odds_ratio": -0.0009979484602808952, + "logits/chosen": -0.6083856225013733, + "logits/rejected": -0.5973707437515259, + "logps/chosen": -0.00729210302233696, + "logps/rejected": -2.3333680629730225, + "loss": 0.831, + "nll_loss": 0.20765653252601624, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007292103255167603, + "rewards/margins": 0.2326076328754425, + "rewards/rejected": -0.23333682119846344, + "step": 9090 + }, + { + "epoch": 6.286998616874135, + "grad_norm": 7.940767288208008, + "learning_rate": 2.062778546181036e-05, + "log_odds_chosen": 10.434680938720703, + "log_odds_ratio": -0.00012016297841910273, + "logits/chosen": -0.5466931462287903, + "logits/rejected": -0.6142657399177551, + "logps/chosen": -0.0004252656945027411, + "logps/rejected": -2.3878979682922363, + "loss": 0.7342, + "nll_loss": 0.18353325128555298, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.252656799508259e-05, + "rewards/margins": 0.23874729871749878, + "rewards/rejected": -0.23878982663154602, + "step": 9091 + }, + { + "epoch": 6.287690179806362, + "grad_norm": 9.114567756652832, + "learning_rate": 2.062394344552021e-05, + "log_odds_chosen": 11.336699485778809, + "log_odds_ratio": -2.9732616894762032e-05, + "logits/chosen": -0.5372797846794128, + "logits/rejected": -0.5294592380523682, + "logps/chosen": -9.94501169770956e-05, + "logps/rejected": -2.0726401805877686, + "loss": 0.6144, + "nll_loss": 0.1535939872264862, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.94501169770956e-06, + "rewards/margins": 0.2072540819644928, + "rewards/rejected": -0.2072640359401703, + "step": 9092 + }, + { + "epoch": 6.288381742738589, + "grad_norm": 5.164366722106934, + "learning_rate": 2.062010142923006e-05, + "log_odds_chosen": 11.472952842712402, + "log_odds_ratio": -6.04154120082967e-05, + "logits/chosen": -0.5103493332862854, + "logits/rejected": -0.6042656898498535, + "logps/chosen": -0.00023936809157021344, + "logps/rejected": -3.1101317405700684, + "loss": 0.5141, + "nll_loss": 0.12852534651756287, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3936810976010747e-05, + "rewards/margins": 0.31098923087120056, + "rewards/rejected": -0.3110131621360779, + "step": 9093 + }, + { + "epoch": 6.289073305670816, + "grad_norm": 5.8642120361328125, + "learning_rate": 2.061625941293991e-05, + "log_odds_chosen": 10.50048542022705, + "log_odds_ratio": -5.2675630286103114e-05, + "logits/chosen": -0.012782931327819824, + "logits/rejected": -0.0010571479797363281, + "logps/chosen": -0.0002058782265521586, + "logps/rejected": -1.895133137702942, + "loss": 0.4468, + "nll_loss": 0.1116909384727478, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.058782229141798e-05, + "rewards/margins": 0.18949273228645325, + "rewards/rejected": -0.18951331079006195, + "step": 9094 + }, + { + "epoch": 6.289764868603043, + "grad_norm": 4.499237060546875, + "learning_rate": 2.0612417396649763e-05, + "log_odds_chosen": 11.613418579101562, + "log_odds_ratio": -4.268988413969055e-05, + "logits/chosen": -0.33803707361221313, + "logits/rejected": -0.44322025775909424, + "logps/chosen": -0.00025714325602166355, + "logps/rejected": -3.2149362564086914, + "loss": 0.4654, + "nll_loss": 0.11635729670524597, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.571432378317695e-05, + "rewards/margins": 0.32146793603897095, + "rewards/rejected": -0.32149362564086914, + "step": 9095 + }, + { + "epoch": 6.29045643153527, + "grad_norm": 5.892808437347412, + "learning_rate": 2.0608575380359612e-05, + "log_odds_chosen": 10.895482063293457, + "log_odds_ratio": -8.759888442000374e-05, + "logits/chosen": -0.38643354177474976, + "logits/rejected": -0.507982075214386, + "logps/chosen": -0.0005721809575334191, + "logps/rejected": -2.772576332092285, + "loss": 0.4919, + "nll_loss": 0.12295990437269211, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.721809066017158e-05, + "rewards/margins": 0.2772004306316376, + "rewards/rejected": -0.27725762128829956, + "step": 9096 + }, + { + "epoch": 6.291147994467496, + "grad_norm": 4.480569362640381, + "learning_rate": 2.0604733364069465e-05, + "log_odds_chosen": 11.518074989318848, + "log_odds_ratio": -2.6272582545061596e-05, + "logits/chosen": -0.15712255239486694, + "logits/rejected": -0.21012672781944275, + "logps/chosen": -0.00012038549175485969, + "logps/rejected": -2.594068765640259, + "loss": 0.5891, + "nll_loss": 0.14728417992591858, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2038549357384909e-05, + "rewards/margins": 0.2593948543071747, + "rewards/rejected": -0.2594068646430969, + "step": 9097 + }, + { + "epoch": 6.291839557399723, + "grad_norm": 14.674330711364746, + "learning_rate": 2.0600891347779317e-05, + "log_odds_chosen": 10.980438232421875, + "log_odds_ratio": -5.24193346791435e-05, + "logits/chosen": -0.318036288022995, + "logits/rejected": -0.34022057056427, + "logps/chosen": -0.00012784292630385607, + "logps/rejected": -2.103323221206665, + "loss": 0.4618, + "nll_loss": 0.11543399095535278, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2784292266587727e-05, + "rewards/margins": 0.21031954884529114, + "rewards/rejected": -0.21033233404159546, + "step": 9098 + }, + { + "epoch": 6.29253112033195, + "grad_norm": 7.994041442871094, + "learning_rate": 2.0597049331489166e-05, + "log_odds_chosen": 10.096479415893555, + "log_odds_ratio": -0.000200098060304299, + "logits/chosen": -0.2078363597393036, + "logits/rejected": -0.2426210343837738, + "logps/chosen": -0.00042861944530159235, + "logps/rejected": -1.877027988433838, + "loss": 0.5436, + "nll_loss": 0.1358749121427536, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.286194598535076e-05, + "rewards/margins": 0.18765994906425476, + "rewards/rejected": -0.18770280480384827, + "step": 9099 + }, + { + "epoch": 6.293222683264177, + "grad_norm": 5.093844413757324, + "learning_rate": 2.059320731519902e-05, + "log_odds_chosen": 11.225149154663086, + "log_odds_ratio": -2.9234739486128092e-05, + "logits/chosen": -0.14554114639759064, + "logits/rejected": -0.16985180974006653, + "logps/chosen": -0.0001269277709070593, + "logps/rejected": -2.1995105743408203, + "loss": 0.5332, + "nll_loss": 0.1332854926586151, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.269277618121123e-05, + "rewards/margins": 0.21993838250637054, + "rewards/rejected": -0.2199510633945465, + "step": 9100 + }, + { + "epoch": 6.293914246196404, + "grad_norm": 7.994125843048096, + "learning_rate": 2.0589365298908868e-05, + "log_odds_chosen": 11.543903350830078, + "log_odds_ratio": -1.968575088540092e-05, + "logits/chosen": -0.5534186363220215, + "logits/rejected": -0.7465137243270874, + "logps/chosen": -6.856806430732831e-05, + "logps/rejected": -1.8713579177856445, + "loss": 0.8615, + "nll_loss": 0.21538425981998444, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.856806976429652e-06, + "rewards/margins": 0.187128946185112, + "rewards/rejected": -0.18713578581809998, + "step": 9101 + }, + { + "epoch": 6.2946058091286305, + "grad_norm": 9.89396858215332, + "learning_rate": 2.0585523282618717e-05, + "log_odds_chosen": 9.128700256347656, + "log_odds_ratio": -0.008632665500044823, + "logits/chosen": -0.5671526789665222, + "logits/rejected": -0.5252231359481812, + "logps/chosen": -0.0046012732200324535, + "logps/rejected": -1.1975053548812866, + "loss": 0.6559, + "nll_loss": 0.16312405467033386, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00046012733946554363, + "rewards/margins": 0.11929042637348175, + "rewards/rejected": -0.11975055187940598, + "step": 9102 + }, + { + "epoch": 6.295297372060857, + "grad_norm": 5.3626933097839355, + "learning_rate": 2.058168126632857e-05, + "log_odds_chosen": 10.341206550598145, + "log_odds_ratio": -6.292940815910697e-05, + "logits/chosen": -0.7664812803268433, + "logits/rejected": -0.827400267124176, + "logps/chosen": -0.0005252526607364416, + "logps/rejected": -1.9347258806228638, + "loss": 0.4056, + "nll_loss": 0.1013893336057663, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.25252653460484e-05, + "rewards/margins": 0.19342006742954254, + "rewards/rejected": -0.19347259402275085, + "step": 9103 + }, + { + "epoch": 6.295988934993084, + "grad_norm": 7.762931823730469, + "learning_rate": 2.0577839250038422e-05, + "log_odds_chosen": 11.329391479492188, + "log_odds_ratio": -0.0001028580591082573, + "logits/chosen": -0.2265159785747528, + "logits/rejected": -0.3073858320713043, + "logps/chosen": -0.0003771249030251056, + "logps/rejected": -3.12196683883667, + "loss": 0.6758, + "nll_loss": 0.16894958913326263, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7712492485297844e-05, + "rewards/margins": 0.3121589720249176, + "rewards/rejected": -0.31219667196273804, + "step": 9104 + }, + { + "epoch": 6.296680497925311, + "grad_norm": 6.711189270019531, + "learning_rate": 2.057399723374827e-05, + "log_odds_chosen": 10.471979141235352, + "log_odds_ratio": -4.5370179577730596e-05, + "logits/chosen": -0.34325867891311646, + "logits/rejected": -0.3504777252674103, + "logps/chosen": -0.0005373624735511839, + "logps/rejected": -2.595447540283203, + "loss": 0.5405, + "nll_loss": 0.13513071835041046, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.373624662752263e-05, + "rewards/margins": 0.2594910264015198, + "rewards/rejected": -0.2595447599887848, + "step": 9105 + }, + { + "epoch": 6.297372060857538, + "grad_norm": 5.516439914703369, + "learning_rate": 2.0570155217458123e-05, + "log_odds_chosen": 10.464244842529297, + "log_odds_ratio": -0.00011570812057470903, + "logits/chosen": -0.48608943819999695, + "logits/rejected": -0.6556553244590759, + "logps/chosen": -0.0003440856817178428, + "logps/rejected": -2.043456554412842, + "loss": 0.9232, + "nll_loss": 0.23078015446662903, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.440857108216733e-05, + "rewards/margins": 0.20431123673915863, + "rewards/rejected": -0.20434564352035522, + "step": 9106 + }, + { + "epoch": 6.298063623789765, + "grad_norm": 8.259562492370605, + "learning_rate": 2.0566313201167976e-05, + "log_odds_chosen": 10.183019638061523, + "log_odds_ratio": -0.00031526273232884705, + "logits/chosen": -0.6100044250488281, + "logits/rejected": -0.5913538932800293, + "logps/chosen": -0.0004313396639190614, + "logps/rejected": -1.5800104141235352, + "loss": 0.5025, + "nll_loss": 0.12558498978614807, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3133968574693426e-05, + "rewards/margins": 0.15795791149139404, + "rewards/rejected": -0.15800105035305023, + "step": 9107 + }, + { + "epoch": 6.2987551867219915, + "grad_norm": 5.118277549743652, + "learning_rate": 2.0562471184877825e-05, + "log_odds_chosen": 9.261262893676758, + "log_odds_ratio": -0.001336643472313881, + "logits/chosen": -0.7850015163421631, + "logits/rejected": -0.7912291288375854, + "logps/chosen": -0.002515049185603857, + "logps/rejected": -1.6835026741027832, + "loss": 0.4578, + "nll_loss": 0.11431378126144409, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00025150494184345007, + "rewards/margins": 0.16809876263141632, + "rewards/rejected": -0.16835026443004608, + "step": 9108 + }, + { + "epoch": 6.299446749654218, + "grad_norm": 10.098360061645508, + "learning_rate": 2.0558629168587677e-05, + "log_odds_chosen": 10.201295852661133, + "log_odds_ratio": -7.733918027952313e-05, + "logits/chosen": -0.8221825361251831, + "logits/rejected": -0.7927052974700928, + "logps/chosen": -0.00042313465382903814, + "logps/rejected": -2.2757129669189453, + "loss": 1.1279, + "nll_loss": 0.2819552421569824, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.231346247252077e-05, + "rewards/margins": 0.22752898931503296, + "rewards/rejected": -0.2275713086128235, + "step": 9109 + }, + { + "epoch": 6.300138312586445, + "grad_norm": 9.635416984558105, + "learning_rate": 2.0554787152297526e-05, + "log_odds_chosen": 10.860652923583984, + "log_odds_ratio": -8.556530519854277e-05, + "logits/chosen": -0.5292823910713196, + "logits/rejected": -0.528170108795166, + "logps/chosen": -0.0007029472617432475, + "logps/rejected": -2.466893434524536, + "loss": 0.9067, + "nll_loss": 0.2266591489315033, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.02947290847078e-05, + "rewards/margins": 0.24661904573440552, + "rewards/rejected": -0.2466893196105957, + "step": 9110 + }, + { + "epoch": 6.300829875518672, + "grad_norm": 11.15628719329834, + "learning_rate": 2.0550945136007375e-05, + "log_odds_chosen": 9.115913391113281, + "log_odds_ratio": -0.2130754590034485, + "logits/chosen": -0.5340465307235718, + "logits/rejected": -0.5288445353507996, + "logps/chosen": -0.02381826378405094, + "logps/rejected": -1.7315921783447266, + "loss": 0.6328, + "nll_loss": 0.1369016170501709, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0023818262852728367, + "rewards/margins": 0.17077738046646118, + "rewards/rejected": -0.17315921187400818, + "step": 9111 + }, + { + "epoch": 6.301521438450899, + "grad_norm": 8.721664428710938, + "learning_rate": 2.0547103119717228e-05, + "log_odds_chosen": 10.03692626953125, + "log_odds_ratio": -0.0002028129529207945, + "logits/chosen": -0.26854532957077026, + "logits/rejected": -0.32714080810546875, + "logps/chosen": -0.0004822782357223332, + "logps/rejected": -1.737931489944458, + "loss": 0.521, + "nll_loss": 0.130230113863945, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.822781920665875e-05, + "rewards/margins": 0.17374494671821594, + "rewards/rejected": -0.17379315197467804, + "step": 9112 + }, + { + "epoch": 6.302213001383126, + "grad_norm": 7.221090793609619, + "learning_rate": 2.054326110342708e-05, + "log_odds_chosen": 10.58332633972168, + "log_odds_ratio": -3.227575507480651e-05, + "logits/chosen": -0.5629743933677673, + "logits/rejected": -0.6404491662979126, + "logps/chosen": -0.0001708652707748115, + "logps/rejected": -1.8281381130218506, + "loss": 0.5051, + "nll_loss": 0.12627874314785004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7086527805076912e-05, + "rewards/margins": 0.18279673159122467, + "rewards/rejected": -0.182813823223114, + "step": 9113 + }, + { + "epoch": 6.3029045643153525, + "grad_norm": 14.374824523925781, + "learning_rate": 2.053941908713693e-05, + "log_odds_chosen": 11.562841415405273, + "log_odds_ratio": -2.0522167687886395e-05, + "logits/chosen": -0.5264410972595215, + "logits/rejected": -0.5556541681289673, + "logps/chosen": -0.00010420165926916525, + "logps/rejected": -2.358149290084839, + "loss": 0.6013, + "nll_loss": 0.15032121539115906, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0420166290714405e-05, + "rewards/margins": 0.23580452799797058, + "rewards/rejected": -0.2358149290084839, + "step": 9114 + }, + { + "epoch": 6.303596127247579, + "grad_norm": 4.442668437957764, + "learning_rate": 2.0535577070846782e-05, + "log_odds_chosen": 11.009807586669922, + "log_odds_ratio": -0.00015554331184830517, + "logits/chosen": -0.40942203998565674, + "logits/rejected": -0.4664459228515625, + "logps/chosen": -0.00044318806612864137, + "logps/rejected": -2.650289535522461, + "loss": 0.5712, + "nll_loss": 0.14278602600097656, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.43188073404599e-05, + "rewards/margins": 0.264984667301178, + "rewards/rejected": -0.2650289535522461, + "step": 9115 + }, + { + "epoch": 6.304287690179806, + "grad_norm": 7.319894790649414, + "learning_rate": 2.0531735054556634e-05, + "log_odds_chosen": 11.91440200805664, + "log_odds_ratio": -3.091490361839533e-05, + "logits/chosen": 0.10696769505739212, + "logits/rejected": 0.04083235189318657, + "logps/chosen": -0.00016952966689132154, + "logps/rejected": -2.9402096271514893, + "loss": 0.6421, + "nll_loss": 0.16051530838012695, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6952966689132154e-05, + "rewards/margins": 0.29400402307510376, + "rewards/rejected": -0.29402095079421997, + "step": 9116 + }, + { + "epoch": 6.304979253112033, + "grad_norm": 8.480218887329102, + "learning_rate": 2.0527893038266483e-05, + "log_odds_chosen": 9.917200088500977, + "log_odds_ratio": -0.0001736325357342139, + "logits/chosen": -0.692557692527771, + "logits/rejected": -0.7411516904830933, + "logps/chosen": -0.0005900151445530355, + "logps/rejected": -1.5346190929412842, + "loss": 0.6832, + "nll_loss": 0.1707778424024582, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9001511544920504e-05, + "rewards/margins": 0.1534029245376587, + "rewards/rejected": -0.15346190333366394, + "step": 9117 + }, + { + "epoch": 6.30567081604426, + "grad_norm": 9.101211547851562, + "learning_rate": 2.0524051021976336e-05, + "log_odds_chosen": 11.44467544555664, + "log_odds_ratio": -1.538614924356807e-05, + "logits/chosen": -0.3148750066757202, + "logits/rejected": -0.36211660504341125, + "logps/chosen": -0.000133889916469343, + "logps/rejected": -2.597325325012207, + "loss": 0.728, + "nll_loss": 0.1820085346698761, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.33889916469343e-05, + "rewards/margins": 0.2597191631793976, + "rewards/rejected": -0.25973254442214966, + "step": 9118 + }, + { + "epoch": 6.306362378976487, + "grad_norm": 8.270613670349121, + "learning_rate": 2.0520209005686185e-05, + "log_odds_chosen": 11.105756759643555, + "log_odds_ratio": -3.122861380688846e-05, + "logits/chosen": -0.34265416860580444, + "logits/rejected": -0.4220009744167328, + "logps/chosen": -0.0003007478080689907, + "logps/rejected": -2.6179518699645996, + "loss": 0.6988, + "nll_loss": 0.17468640208244324, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0074781534494832e-05, + "rewards/margins": 0.26176509261131287, + "rewards/rejected": -0.26179519295692444, + "step": 9119 + }, + { + "epoch": 6.3070539419087135, + "grad_norm": 5.276710033416748, + "learning_rate": 2.0516366989396034e-05, + "log_odds_chosen": 9.563478469848633, + "log_odds_ratio": -0.0029228893108665943, + "logits/chosen": -0.5020996928215027, + "logits/rejected": -0.5588923096656799, + "logps/chosen": -0.0016042347997426987, + "logps/rejected": -1.2131743431091309, + "loss": 0.5278, + "nll_loss": 0.1316453069448471, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016042348579503596, + "rewards/margins": 0.12115702033042908, + "rewards/rejected": -0.12131744623184204, + "step": 9120 + }, + { + "epoch": 6.30774550484094, + "grad_norm": 4.691458225250244, + "learning_rate": 2.0512524973105886e-05, + "log_odds_chosen": 10.797635078430176, + "log_odds_ratio": -0.00011942382843699306, + "logits/chosen": -0.8213660717010498, + "logits/rejected": -0.8382663726806641, + "logps/chosen": -0.00017635921540204436, + "logps/rejected": -2.3420279026031494, + "loss": 0.3518, + "nll_loss": 0.08794598281383514, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7635920812608674e-05, + "rewards/margins": 0.234185129404068, + "rewards/rejected": -0.2342027872800827, + "step": 9121 + }, + { + "epoch": 6.308437067773167, + "grad_norm": 6.859457969665527, + "learning_rate": 2.050868295681574e-05, + "log_odds_chosen": 10.081676483154297, + "log_odds_ratio": -8.843156683724374e-05, + "logits/chosen": -0.27068910002708435, + "logits/rejected": -0.39853787422180176, + "logps/chosen": -0.0002941065758932382, + "logps/rejected": -1.7044429779052734, + "loss": 0.6262, + "nll_loss": 0.1565401256084442, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.941065758932382e-05, + "rewards/margins": 0.17041489481925964, + "rewards/rejected": -0.1704443097114563, + "step": 9122 + }, + { + "epoch": 6.309128630705394, + "grad_norm": 8.61195182800293, + "learning_rate": 2.0504840940525588e-05, + "log_odds_chosen": 10.604068756103516, + "log_odds_ratio": -4.5928445615572855e-05, + "logits/chosen": -0.4395691454410553, + "logits/rejected": -0.49882519245147705, + "logps/chosen": -0.0006089697126299143, + "logps/rejected": -2.2564098834991455, + "loss": 0.5378, + "nll_loss": 0.13443604111671448, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.089697126299143e-05, + "rewards/margins": 0.225580096244812, + "rewards/rejected": -0.22564098238945007, + "step": 9123 + }, + { + "epoch": 6.309820193637621, + "grad_norm": 8.427109718322754, + "learning_rate": 2.050099892423544e-05, + "log_odds_chosen": 10.184764862060547, + "log_odds_ratio": -0.0012439328711479902, + "logits/chosen": -0.15111692249774933, + "logits/rejected": -0.2543826997280121, + "logps/chosen": -0.0011011157184839249, + "logps/rejected": -2.358288288116455, + "loss": 0.6526, + "nll_loss": 0.16302835941314697, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011011157766915858, + "rewards/margins": 0.2357187122106552, + "rewards/rejected": -0.23582881689071655, + "step": 9124 + }, + { + "epoch": 6.310511756569848, + "grad_norm": 8.313289642333984, + "learning_rate": 2.0497156907945293e-05, + "log_odds_chosen": 11.648088455200195, + "log_odds_ratio": -1.4956855011405423e-05, + "logits/chosen": -0.41185325384140015, + "logits/rejected": -0.4594433903694153, + "logps/chosen": -6.538411980727687e-05, + "logps/rejected": -1.9124966859817505, + "loss": 0.5214, + "nll_loss": 0.1303468942642212, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5384119807276875e-06, + "rewards/margins": 0.19124312698841095, + "rewards/rejected": -0.19124966859817505, + "step": 9125 + }, + { + "epoch": 6.3112033195020745, + "grad_norm": 6.2132039070129395, + "learning_rate": 2.0493314891655142e-05, + "log_odds_chosen": 10.96221923828125, + "log_odds_ratio": -0.00021452337387017906, + "logits/chosen": -0.42153963446617126, + "logits/rejected": -0.4461123049259186, + "logps/chosen": -0.00041328402585349977, + "logps/rejected": -2.3031129837036133, + "loss": 0.4764, + "nll_loss": 0.11907409131526947, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1328406950924546e-05, + "rewards/margins": 0.23026996850967407, + "rewards/rejected": -0.2303113043308258, + "step": 9126 + }, + { + "epoch": 6.311894882434301, + "grad_norm": 5.806750774383545, + "learning_rate": 2.0489472875364994e-05, + "log_odds_chosen": 10.953788757324219, + "log_odds_ratio": -2.7453637812868692e-05, + "logits/chosen": -0.5027472376823425, + "logits/rejected": -0.5538997054100037, + "logps/chosen": -0.0002139663847628981, + "logps/rejected": -2.0388693809509277, + "loss": 1.021, + "nll_loss": 0.2552356719970703, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1396637748694047e-05, + "rewards/margins": 0.20386554300785065, + "rewards/rejected": -0.203886941075325, + "step": 9127 + }, + { + "epoch": 6.312586445366528, + "grad_norm": 6.384781837463379, + "learning_rate": 2.0485630859074843e-05, + "log_odds_chosen": 11.070930480957031, + "log_odds_ratio": -2.5265617296099663e-05, + "logits/chosen": -0.4062907099723816, + "logits/rejected": -0.49823057651519775, + "logps/chosen": -0.0001699151616776362, + "logps/rejected": -2.390195846557617, + "loss": 0.8184, + "nll_loss": 0.20460784435272217, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.69915165315615e-05, + "rewards/margins": 0.23900258541107178, + "rewards/rejected": -0.23901957273483276, + "step": 9128 + }, + { + "epoch": 6.313278008298755, + "grad_norm": 9.393410682678223, + "learning_rate": 2.0481788842784692e-05, + "log_odds_chosen": 10.473282814025879, + "log_odds_ratio": -0.005252666771411896, + "logits/chosen": -0.12318453937768936, + "logits/rejected": -0.1653863489627838, + "logps/chosen": -0.03536149486899376, + "logps/rejected": -2.7704577445983887, + "loss": 0.5438, + "nll_loss": 0.13542786240577698, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003536149626597762, + "rewards/margins": 0.2735096216201782, + "rewards/rejected": -0.2770457863807678, + "step": 9129 + }, + { + "epoch": 6.313969571230982, + "grad_norm": 4.986472129821777, + "learning_rate": 2.0477946826494545e-05, + "log_odds_chosen": 10.974739074707031, + "log_odds_ratio": -6.281452806433663e-05, + "logits/chosen": -0.6672524213790894, + "logits/rejected": -0.7109086513519287, + "logps/chosen": -0.000695232767611742, + "logps/rejected": -3.159275531768799, + "loss": 0.6801, + "nll_loss": 0.17002885043621063, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.952328112674877e-05, + "rewards/margins": 0.31585806608200073, + "rewards/rejected": -0.31592756509780884, + "step": 9130 + }, + { + "epoch": 6.314661134163209, + "grad_norm": 6.2385430335998535, + "learning_rate": 2.0474104810204397e-05, + "log_odds_chosen": 10.428638458251953, + "log_odds_ratio": -9.423011215403676e-05, + "logits/chosen": 0.144125834107399, + "logits/rejected": 0.09078823775053024, + "logps/chosen": -0.0005835729534737766, + "logps/rejected": -1.8538960218429565, + "loss": 0.5353, + "nll_loss": 0.13381007313728333, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.835729098180309e-05, + "rewards/margins": 0.18533125519752502, + "rewards/rejected": -0.18538960814476013, + "step": 9131 + }, + { + "epoch": 6.3153526970954355, + "grad_norm": 5.815805435180664, + "learning_rate": 2.0470262793914246e-05, + "log_odds_chosen": 10.13248062133789, + "log_odds_ratio": -8.74106481205672e-05, + "logits/chosen": -0.2938426733016968, + "logits/rejected": -0.3387156128883362, + "logps/chosen": -0.000201555565581657, + "logps/rejected": -1.6588993072509766, + "loss": 0.4176, + "nll_loss": 0.10439470410346985, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0155555830569938e-05, + "rewards/margins": 0.1658697873353958, + "rewards/rejected": -0.1658899337053299, + "step": 9132 + }, + { + "epoch": 6.316044260027662, + "grad_norm": 6.086231708526611, + "learning_rate": 2.04664207776241e-05, + "log_odds_chosen": 10.607781410217285, + "log_odds_ratio": -0.0001166929941973649, + "logits/chosen": -0.4167684316635132, + "logits/rejected": -0.4740995764732361, + "logps/chosen": -0.000221387977944687, + "logps/rejected": -2.036670207977295, + "loss": 0.6779, + "nll_loss": 0.16946941614151, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2138799977255985e-05, + "rewards/margins": 0.20364490151405334, + "rewards/rejected": -0.2036670446395874, + "step": 9133 + }, + { + "epoch": 6.316735822959889, + "grad_norm": 4.986376762390137, + "learning_rate": 2.046257876133395e-05, + "log_odds_chosen": 11.060983657836914, + "log_odds_ratio": -8.120344864437357e-05, + "logits/chosen": -0.2317226231098175, + "logits/rejected": -0.25166720151901245, + "logps/chosen": -0.000272795237833634, + "logps/rejected": -2.5201592445373535, + "loss": 0.6768, + "nll_loss": 0.16920101642608643, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7279524147161283e-05, + "rewards/margins": 0.25198864936828613, + "rewards/rejected": -0.25201594829559326, + "step": 9134 + }, + { + "epoch": 6.317427385892116, + "grad_norm": 5.949633598327637, + "learning_rate": 2.04587367450438e-05, + "log_odds_chosen": 10.249530792236328, + "log_odds_ratio": -0.00014744520012754947, + "logits/chosen": -0.22081822156906128, + "logits/rejected": -0.32150161266326904, + "logps/chosen": -0.0003919299051631242, + "logps/rejected": -1.9968299865722656, + "loss": 0.4439, + "nll_loss": 0.11095862090587616, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.919298615073785e-05, + "rewards/margins": 0.1996438056230545, + "rewards/rejected": -0.19968298077583313, + "step": 9135 + }, + { + "epoch": 6.318118948824343, + "grad_norm": 7.3173065185546875, + "learning_rate": 2.0454894728753653e-05, + "log_odds_chosen": 10.903233528137207, + "log_odds_ratio": -0.00016012144624255598, + "logits/chosen": -0.3240607678890228, + "logits/rejected": -0.43824082612991333, + "logps/chosen": -0.0012418123660609126, + "logps/rejected": -2.7618508338928223, + "loss": 0.7438, + "nll_loss": 0.18593566119670868, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012418124242685735, + "rewards/margins": 0.27606093883514404, + "rewards/rejected": -0.2761850953102112, + "step": 9136 + }, + { + "epoch": 6.31881051175657, + "grad_norm": 5.717310905456543, + "learning_rate": 2.04510527124635e-05, + "log_odds_chosen": 10.645530700683594, + "log_odds_ratio": -4.525161057244986e-05, + "logits/chosen": -0.49670839309692383, + "logits/rejected": -0.5754987597465515, + "logps/chosen": -0.00016813335241749883, + "logps/rejected": -1.8609609603881836, + "loss": 0.5286, + "nll_loss": 0.13215577602386475, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.681333378655836e-05, + "rewards/margins": 0.18607929348945618, + "rewards/rejected": -0.18609611690044403, + "step": 9137 + }, + { + "epoch": 6.319502074688796, + "grad_norm": 3.6851601600646973, + "learning_rate": 2.044721069617335e-05, + "log_odds_chosen": 11.53646469116211, + "log_odds_ratio": -2.4523029424017295e-05, + "logits/chosen": -0.35948729515075684, + "logits/rejected": -0.32746779918670654, + "logps/chosen": -0.00018249072309117764, + "logps/rejected": -2.788064479827881, + "loss": 0.4728, + "nll_loss": 0.11818571388721466, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8249073036713526e-05, + "rewards/margins": 0.2787882387638092, + "rewards/rejected": -0.2788064777851105, + "step": 9138 + }, + { + "epoch": 6.320193637621023, + "grad_norm": 6.905288219451904, + "learning_rate": 2.0443368679883203e-05, + "log_odds_chosen": 10.342803001403809, + "log_odds_ratio": -5.947341560386121e-05, + "logits/chosen": -0.4630863070487976, + "logits/rejected": -0.570403516292572, + "logps/chosen": -0.0002199176378780976, + "logps/rejected": -1.5150375366210938, + "loss": 0.5436, + "nll_loss": 0.13588204979896545, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1991765606799163e-05, + "rewards/margins": 0.1514817476272583, + "rewards/rejected": -0.15150374174118042, + "step": 9139 + }, + { + "epoch": 6.32088520055325, + "grad_norm": 7.062165260314941, + "learning_rate": 2.0439526663593052e-05, + "log_odds_chosen": 10.084284782409668, + "log_odds_ratio": -0.000260809320025146, + "logits/chosen": -0.19906927645206451, + "logits/rejected": -0.24244311451911926, + "logps/chosen": -0.00016882145428098738, + "logps/rejected": -1.760647177696228, + "loss": 0.7813, + "nll_loss": 0.19529539346694946, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6882144336705096e-05, + "rewards/margins": 0.17604786157608032, + "rewards/rejected": -0.17606472969055176, + "step": 9140 + }, + { + "epoch": 6.321576763485477, + "grad_norm": 10.630314826965332, + "learning_rate": 2.0435684647302905e-05, + "log_odds_chosen": 9.74234676361084, + "log_odds_ratio": -0.00018264676327817142, + "logits/chosen": -0.09850164502859116, + "logits/rejected": -0.19318366050720215, + "logps/chosen": -0.0005119048291817307, + "logps/rejected": -1.7797728776931763, + "loss": 0.5908, + "nll_loss": 0.14769017696380615, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1190483645768836e-05, + "rewards/margins": 0.17792612314224243, + "rewards/rejected": -0.1779772937297821, + "step": 9141 + }, + { + "epoch": 6.322268326417704, + "grad_norm": 5.875476360321045, + "learning_rate": 2.0431842631012757e-05, + "log_odds_chosen": 10.273632049560547, + "log_odds_ratio": -0.0002673549752216786, + "logits/chosen": -0.5241735577583313, + "logits/rejected": -0.5758167505264282, + "logps/chosen": -0.00015015172539278865, + "logps/rejected": -1.5837026834487915, + "loss": 0.6094, + "nll_loss": 0.15232551097869873, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5015171811683103e-05, + "rewards/margins": 0.15835526585578918, + "rewards/rejected": -0.1583702713251114, + "step": 9142 + }, + { + "epoch": 6.322959889349931, + "grad_norm": 15.639997482299805, + "learning_rate": 2.0428000614722606e-05, + "log_odds_chosen": 11.281831741333008, + "log_odds_ratio": -5.805850014439784e-05, + "logits/chosen": -0.7342573404312134, + "logits/rejected": -0.7432233095169067, + "logps/chosen": -0.00012704191613011062, + "logps/rejected": -2.3933351039886475, + "loss": 0.6757, + "nll_loss": 0.16891874372959137, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2704192158707883e-05, + "rewards/margins": 0.2393207997083664, + "rewards/rejected": -0.23933351039886475, + "step": 9143 + }, + { + "epoch": 6.323651452282157, + "grad_norm": 6.517669677734375, + "learning_rate": 2.042415859843246e-05, + "log_odds_chosen": 9.65914249420166, + "log_odds_ratio": -0.00022453966084867716, + "logits/chosen": -0.19278670847415924, + "logits/rejected": -0.03370809555053711, + "logps/chosen": -0.00017699523596093059, + "logps/rejected": -1.4082324504852295, + "loss": 0.9838, + "nll_loss": 0.24593743681907654, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7699525415082462e-05, + "rewards/margins": 0.14080555737018585, + "rewards/rejected": -0.14082324504852295, + "step": 9144 + }, + { + "epoch": 6.324343015214384, + "grad_norm": 9.895349502563477, + "learning_rate": 2.042031658214231e-05, + "log_odds_chosen": 10.371511459350586, + "log_odds_ratio": -0.0004187318554613739, + "logits/chosen": -0.939992368221283, + "logits/rejected": -0.9398999214172363, + "logps/chosen": -0.0003308483865112066, + "logps/rejected": -2.229236602783203, + "loss": 0.612, + "nll_loss": 0.15294599533081055, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.308484156150371e-05, + "rewards/margins": 0.22289058566093445, + "rewards/rejected": -0.2229236662387848, + "step": 9145 + }, + { + "epoch": 6.325034578146611, + "grad_norm": 14.121508598327637, + "learning_rate": 2.041647456585216e-05, + "log_odds_chosen": 10.612939834594727, + "log_odds_ratio": -5.0611692131496966e-05, + "logits/chosen": -0.3637595474720001, + "logits/rejected": -0.39835870265960693, + "logps/chosen": -0.0002669534587766975, + "logps/rejected": -2.183626174926758, + "loss": 0.6725, + "nll_loss": 0.1681094914674759, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6695346605265513e-05, + "rewards/margins": 0.21833594143390656, + "rewards/rejected": -0.21836264431476593, + "step": 9146 + }, + { + "epoch": 6.325726141078838, + "grad_norm": 6.091559886932373, + "learning_rate": 2.041263254956201e-05, + "log_odds_chosen": 10.744900703430176, + "log_odds_ratio": -4.4846929085906595e-05, + "logits/chosen": -0.2999820113182068, + "logits/rejected": -0.3527391254901886, + "logps/chosen": -0.00013493587903212756, + "logps/rejected": -1.8932716846466064, + "loss": 0.4416, + "nll_loss": 0.11038561910390854, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3493588085111696e-05, + "rewards/margins": 0.18931367993354797, + "rewards/rejected": -0.1893271803855896, + "step": 9147 + }, + { + "epoch": 6.326417704011065, + "grad_norm": 6.129518985748291, + "learning_rate": 2.040879053327186e-05, + "log_odds_chosen": 10.612061500549316, + "log_odds_ratio": -0.00012798480747733265, + "logits/chosen": -0.22341987490653992, + "logits/rejected": -0.2803928554058075, + "logps/chosen": -0.0003002454759553075, + "logps/rejected": -1.9529635906219482, + "loss": 0.5286, + "nll_loss": 0.13213306665420532, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0024551961105317e-05, + "rewards/margins": 0.19526633620262146, + "rewards/rejected": -0.19529634714126587, + "step": 9148 + }, + { + "epoch": 6.327109266943292, + "grad_norm": 4.429156303405762, + "learning_rate": 2.040494851698171e-05, + "log_odds_chosen": 10.688800811767578, + "log_odds_ratio": -7.709318015258759e-05, + "logits/chosen": -0.19439886510372162, + "logits/rejected": -0.2739133834838867, + "logps/chosen": -0.00029584753792732954, + "logps/rejected": -2.3238232135772705, + "loss": 0.6883, + "nll_loss": 0.17206960916519165, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9584754884126596e-05, + "rewards/margins": 0.23235273361206055, + "rewards/rejected": -0.23238232731819153, + "step": 9149 + }, + { + "epoch": 6.327800829875518, + "grad_norm": 5.260585784912109, + "learning_rate": 2.0401106500691563e-05, + "log_odds_chosen": 10.529271125793457, + "log_odds_ratio": -6.538509478559718e-05, + "logits/chosen": -0.301781564950943, + "logits/rejected": -0.31411588191986084, + "logps/chosen": -0.00021967320935800672, + "logps/rejected": -2.090834856033325, + "loss": 0.5069, + "nll_loss": 0.12671947479248047, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.196732020820491e-05, + "rewards/margins": 0.20906151831150055, + "rewards/rejected": -0.20908348262310028, + "step": 9150 + }, + { + "epoch": 6.328492392807745, + "grad_norm": 5.937831401824951, + "learning_rate": 2.0397264484401416e-05, + "log_odds_chosen": 10.982461929321289, + "log_odds_ratio": -4.1540384700056165e-05, + "logits/chosen": -0.37165072560310364, + "logits/rejected": -0.42236626148223877, + "logps/chosen": -0.000152397362398915, + "logps/rejected": -1.7052466869354248, + "loss": 0.3998, + "nll_loss": 0.09994690120220184, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5239737876981962e-05, + "rewards/margins": 0.1705094277858734, + "rewards/rejected": -0.17052467167377472, + "step": 9151 + }, + { + "epoch": 6.329183955739972, + "grad_norm": 5.322743892669678, + "learning_rate": 2.0393422468111265e-05, + "log_odds_chosen": 11.560039520263672, + "log_odds_ratio": -7.76972301537171e-05, + "logits/chosen": -0.15101279318332672, + "logits/rejected": -0.20717932283878326, + "logps/chosen": -9.647566912462935e-05, + "logps/rejected": -2.2673559188842773, + "loss": 0.6161, + "nll_loss": 0.1540297269821167, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.647566912462935e-06, + "rewards/margins": 0.2267259657382965, + "rewards/rejected": -0.22673562169075012, + "step": 9152 + }, + { + "epoch": 6.329875518672199, + "grad_norm": 9.544047355651855, + "learning_rate": 2.0389580451821117e-05, + "log_odds_chosen": 10.044012069702148, + "log_odds_ratio": -0.00019923794025089592, + "logits/chosen": -0.8574331402778625, + "logits/rejected": -0.8163395524024963, + "logps/chosen": -0.00047073009773157537, + "logps/rejected": -2.0109262466430664, + "loss": 1.0646, + "nll_loss": 0.26612648367881775, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7073008317966014e-05, + "rewards/margins": 0.20104554295539856, + "rewards/rejected": -0.20109263062477112, + "step": 9153 + }, + { + "epoch": 6.330567081604426, + "grad_norm": 6.393756866455078, + "learning_rate": 2.038573843553097e-05, + "log_odds_chosen": 9.159208297729492, + "log_odds_ratio": -0.00200686976313591, + "logits/chosen": -0.35502588748931885, + "logits/rejected": -0.3517462909221649, + "logps/chosen": -0.0012916001724079251, + "logps/rejected": -1.1428102254867554, + "loss": 0.6195, + "nll_loss": 0.15467888116836548, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001291600347030908, + "rewards/margins": 0.11415186524391174, + "rewards/rejected": -0.11428102105855942, + "step": 9154 + }, + { + "epoch": 6.3312586445366525, + "grad_norm": 5.2988972663879395, + "learning_rate": 2.038189641924082e-05, + "log_odds_chosen": 11.045205116271973, + "log_odds_ratio": -3.658358036773279e-05, + "logits/chosen": -0.5847592949867249, + "logits/rejected": -0.4950445592403412, + "logps/chosen": -0.00017093573114834726, + "logps/rejected": -2.2229764461517334, + "loss": 0.3643, + "nll_loss": 0.09106318652629852, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7093572751036845e-05, + "rewards/margins": 0.2222805619239807, + "rewards/rejected": -0.22229765355587006, + "step": 9155 + }, + { + "epoch": 6.331950207468879, + "grad_norm": 10.425149917602539, + "learning_rate": 2.0378054402950668e-05, + "log_odds_chosen": 9.705398559570312, + "log_odds_ratio": -0.00030759384389966726, + "logits/chosen": -0.5570382475852966, + "logits/rejected": -0.5905536413192749, + "logps/chosen": -0.0006824440788477659, + "logps/rejected": -2.1629672050476074, + "loss": 0.6773, + "nll_loss": 0.16928672790527344, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.824440788477659e-05, + "rewards/margins": 0.21622847020626068, + "rewards/rejected": -0.2162967175245285, + "step": 9156 + }, + { + "epoch": 6.332641770401106, + "grad_norm": 5.680348873138428, + "learning_rate": 2.037421238666052e-05, + "log_odds_chosen": 10.483183860778809, + "log_odds_ratio": -8.02238064352423e-05, + "logits/chosen": -0.2764190137386322, + "logits/rejected": -0.2961311340332031, + "logps/chosen": -0.0003129146352875978, + "logps/rejected": -2.1694741249084473, + "loss": 0.5383, + "nll_loss": 0.1345573365688324, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.129146352875978e-05, + "rewards/margins": 0.21691614389419556, + "rewards/rejected": -0.21694743633270264, + "step": 9157 + }, + { + "epoch": 6.333333333333333, + "grad_norm": 5.355869770050049, + "learning_rate": 2.037037037037037e-05, + "log_odds_chosen": 11.758176803588867, + "log_odds_ratio": -1.2654306374315638e-05, + "logits/chosen": -0.28790849447250366, + "logits/rejected": -0.3816429376602173, + "logps/chosen": -0.0001446415262762457, + "logps/rejected": -2.7636444568634033, + "loss": 0.7746, + "nll_loss": 0.19364896416664124, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4464152627624571e-05, + "rewards/margins": 0.2763499617576599, + "rewards/rejected": -0.27636444568634033, + "step": 9158 + }, + { + "epoch": 6.33402489626556, + "grad_norm": 7.186736583709717, + "learning_rate": 2.036652835408022e-05, + "log_odds_chosen": 10.33999252319336, + "log_odds_ratio": -0.00015110634558368474, + "logits/chosen": -0.6750638484954834, + "logits/rejected": -0.5907482504844666, + "logps/chosen": -0.0005631681997328997, + "logps/rejected": -2.267817497253418, + "loss": 0.6841, + "nll_loss": 0.17102006077766418, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.631681779050268e-05, + "rewards/margins": 0.22672541439533234, + "rewards/rejected": -0.22678174078464508, + "step": 9159 + }, + { + "epoch": 6.334716459197787, + "grad_norm": 4.402047634124756, + "learning_rate": 2.0362686337790074e-05, + "log_odds_chosen": 10.460683822631836, + "log_odds_ratio": -5.5046300985850394e-05, + "logits/chosen": -0.535968542098999, + "logits/rejected": -0.5456217527389526, + "logps/chosen": -0.0004225453594699502, + "logps/rejected": -1.4134366512298584, + "loss": 0.5102, + "nll_loss": 0.1275371015071869, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2254538129782304e-05, + "rewards/margins": 0.14130142331123352, + "rewards/rejected": -0.14134368300437927, + "step": 9160 + }, + { + "epoch": 6.3354080221300135, + "grad_norm": 6.969802379608154, + "learning_rate": 2.0358844321499923e-05, + "log_odds_chosen": 10.595348358154297, + "log_odds_ratio": -9.216701437253505e-05, + "logits/chosen": -0.33821868896484375, + "logits/rejected": -0.4096333980560303, + "logps/chosen": -0.0007422784110531211, + "logps/rejected": -2.276984691619873, + "loss": 0.6899, + "nll_loss": 0.17246267199516296, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.422784256050363e-05, + "rewards/margins": 0.22762425243854523, + "rewards/rejected": -0.2276984602212906, + "step": 9161 + }, + { + "epoch": 6.33609958506224, + "grad_norm": 8.498947143554688, + "learning_rate": 2.0355002305209775e-05, + "log_odds_chosen": 10.396454811096191, + "log_odds_ratio": -0.0001644312433199957, + "logits/chosen": -0.4233134686946869, + "logits/rejected": -0.4007781445980072, + "logps/chosen": -0.0004954281030222774, + "logps/rejected": -1.3405135869979858, + "loss": 1.0076, + "nll_loss": 0.25187867879867554, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9542810302227736e-05, + "rewards/margins": 0.13400182127952576, + "rewards/rejected": -0.1340513676404953, + "step": 9162 + }, + { + "epoch": 6.336791147994467, + "grad_norm": 4.9324631690979, + "learning_rate": 2.0351160288919628e-05, + "log_odds_chosen": 10.10614013671875, + "log_odds_ratio": -0.0005629255319945514, + "logits/chosen": -0.5218598246574402, + "logits/rejected": -0.5570496916770935, + "logps/chosen": -0.000602225074544549, + "logps/rejected": -1.9400722980499268, + "loss": 0.4529, + "nll_loss": 0.11316834390163422, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.022251182002947e-05, + "rewards/margins": 0.19394701719284058, + "rewards/rejected": -0.19400723278522491, + "step": 9163 + }, + { + "epoch": 6.337482710926694, + "grad_norm": 11.972575187683105, + "learning_rate": 2.0347318272629477e-05, + "log_odds_chosen": 12.207606315612793, + "log_odds_ratio": -7.4364847932884e-06, + "logits/chosen": -0.28967320919036865, + "logits/rejected": -0.2895028591156006, + "logps/chosen": -7.918903429526836e-05, + "logps/rejected": -2.5067331790924072, + "loss": 0.7326, + "nll_loss": 0.18314482271671295, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.918903065728955e-06, + "rewards/margins": 0.2506653964519501, + "rewards/rejected": -0.2506733238697052, + "step": 9164 + }, + { + "epoch": 6.338174273858921, + "grad_norm": 6.695245265960693, + "learning_rate": 2.034347625633933e-05, + "log_odds_chosen": 10.07140827178955, + "log_odds_ratio": -0.0003995221049990505, + "logits/chosen": -0.7729678153991699, + "logits/rejected": -0.728569507598877, + "logps/chosen": -0.00022771614021621644, + "logps/rejected": -1.8983569145202637, + "loss": 0.7834, + "nll_loss": 0.19580984115600586, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2771615476813167e-05, + "rewards/margins": 0.18981292843818665, + "rewards/rejected": -0.18983569741249084, + "step": 9165 + }, + { + "epoch": 6.338865836791148, + "grad_norm": 8.26303482055664, + "learning_rate": 2.033963424004918e-05, + "log_odds_chosen": 11.618376731872559, + "log_odds_ratio": -1.5082228856044821e-05, + "logits/chosen": -0.44059500098228455, + "logits/rejected": -0.43658512830734253, + "logps/chosen": -0.0001126268834923394, + "logps/rejected": -2.456397294998169, + "loss": 0.6351, + "nll_loss": 0.15877383947372437, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1262687621638179e-05, + "rewards/margins": 0.2456284761428833, + "rewards/rejected": -0.24563972651958466, + "step": 9166 + }, + { + "epoch": 6.3395573997233745, + "grad_norm": 4.643455982208252, + "learning_rate": 2.0335792223759028e-05, + "log_odds_chosen": 10.716975212097168, + "log_odds_ratio": -7.84438379923813e-05, + "logits/chosen": -0.6044843792915344, + "logits/rejected": -0.6347651481628418, + "logps/chosen": -0.000250465702265501, + "logps/rejected": -2.1208696365356445, + "loss": 0.5771, + "nll_loss": 0.14426752924919128, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.504656913515646e-05, + "rewards/margins": 0.21206192672252655, + "rewards/rejected": -0.21208696067333221, + "step": 9167 + }, + { + "epoch": 6.340248962655601, + "grad_norm": 6.384197235107422, + "learning_rate": 2.033195020746888e-05, + "log_odds_chosen": 10.821781158447266, + "log_odds_ratio": -3.1766670872457325e-05, + "logits/chosen": -0.47071394324302673, + "logits/rejected": -0.5132383704185486, + "logps/chosen": -0.00017042181571014225, + "logps/rejected": -2.046055555343628, + "loss": 0.5178, + "nll_loss": 0.12943808734416962, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.704218448139727e-05, + "rewards/margins": 0.20458853244781494, + "rewards/rejected": -0.20460554957389832, + "step": 9168 + }, + { + "epoch": 6.340940525587828, + "grad_norm": 5.672266483306885, + "learning_rate": 2.0328108191178732e-05, + "log_odds_chosen": 11.661087036132812, + "log_odds_ratio": -1.4097817256697454e-05, + "logits/chosen": -0.49641531705856323, + "logits/rejected": -0.4841935634613037, + "logps/chosen": -0.0004052775038871914, + "logps/rejected": -2.6994376182556152, + "loss": 1.0757, + "nll_loss": 0.26892024278640747, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0527756937080994e-05, + "rewards/margins": 0.2699032425880432, + "rewards/rejected": -0.2699437737464905, + "step": 9169 + }, + { + "epoch": 6.341632088520055, + "grad_norm": 6.265336036682129, + "learning_rate": 2.032426617488858e-05, + "log_odds_chosen": 11.042598724365234, + "log_odds_ratio": -0.0007391467806883156, + "logits/chosen": -0.11241171509027481, + "logits/rejected": -0.1534719169139862, + "logps/chosen": -0.0021816291846334934, + "logps/rejected": -2.3804805278778076, + "loss": 0.4893, + "nll_loss": 0.12224604934453964, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00021816292428411543, + "rewards/margins": 0.23782990872859955, + "rewards/rejected": -0.23804807662963867, + "step": 9170 + }, + { + "epoch": 6.342323651452282, + "grad_norm": 4.920318603515625, + "learning_rate": 2.0320424158598434e-05, + "log_odds_chosen": 10.784578323364258, + "log_odds_ratio": -3.836057658190839e-05, + "logits/chosen": -0.7648136615753174, + "logits/rejected": -0.8611428737640381, + "logps/chosen": -0.00010240989649901167, + "logps/rejected": -1.7506695985794067, + "loss": 0.429, + "nll_loss": 0.10723373293876648, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0240990377496928e-05, + "rewards/margins": 0.17505672574043274, + "rewards/rejected": -0.1750669628381729, + "step": 9171 + }, + { + "epoch": 6.343015214384509, + "grad_norm": 8.815408706665039, + "learning_rate": 2.0316582142308286e-05, + "log_odds_chosen": 10.180644989013672, + "log_odds_ratio": -9.145465446636081e-05, + "logits/chosen": -0.47253626585006714, + "logits/rejected": -0.6050565242767334, + "logps/chosen": -0.0005305693484842777, + "logps/rejected": -2.2179205417633057, + "loss": 0.6499, + "nll_loss": 0.16246379911899567, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.305693775881082e-05, + "rewards/margins": 0.2217389941215515, + "rewards/rejected": -0.2217920422554016, + "step": 9172 + }, + { + "epoch": 6.3437067773167355, + "grad_norm": 5.342888832092285, + "learning_rate": 2.0312740126018135e-05, + "log_odds_chosen": 9.948837280273438, + "log_odds_ratio": -0.0001233671500813216, + "logits/chosen": -0.3362911343574524, + "logits/rejected": -0.4858512878417969, + "logps/chosen": -0.0003074869164265692, + "logps/rejected": -1.777867317199707, + "loss": 0.4667, + "nll_loss": 0.1166590079665184, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.074869164265692e-05, + "rewards/margins": 0.17775598168373108, + "rewards/rejected": -0.177786722779274, + "step": 9173 + }, + { + "epoch": 6.344398340248962, + "grad_norm": 6.704127788543701, + "learning_rate": 2.0308898109727988e-05, + "log_odds_chosen": 10.417448043823242, + "log_odds_ratio": -0.00019412532856222242, + "logits/chosen": -0.227530375123024, + "logits/rejected": -0.21911419928073883, + "logps/chosen": -0.0005401723901741207, + "logps/rejected": -2.1637792587280273, + "loss": 0.7081, + "nll_loss": 0.1770128607749939, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.401724047260359e-05, + "rewards/margins": 0.21632394194602966, + "rewards/rejected": -0.21637794375419617, + "step": 9174 + }, + { + "epoch": 6.345089903181189, + "grad_norm": 6.890054225921631, + "learning_rate": 2.0305056093437837e-05, + "log_odds_chosen": 10.487573623657227, + "log_odds_ratio": -7.205517613328993e-05, + "logits/chosen": -0.1999693661928177, + "logits/rejected": -0.12560325860977173, + "logps/chosen": -0.00024904205929487944, + "logps/rejected": -1.6322691440582275, + "loss": 0.4943, + "nll_loss": 0.12357941269874573, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4904205929487944e-05, + "rewards/margins": 0.16320201754570007, + "rewards/rejected": -0.1632269322872162, + "step": 9175 + }, + { + "epoch": 6.345781466113416, + "grad_norm": 11.061262130737305, + "learning_rate": 2.0301214077147686e-05, + "log_odds_chosen": 10.508772850036621, + "log_odds_ratio": -0.003219763981178403, + "logits/chosen": -0.7970146536827087, + "logits/rejected": -0.8142995238304138, + "logps/chosen": -0.001975291408598423, + "logps/rejected": -1.51791250705719, + "loss": 0.6285, + "nll_loss": 0.15680286288261414, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001975291670532897, + "rewards/margins": 0.15159371495246887, + "rewards/rejected": -0.15179124474525452, + "step": 9176 + }, + { + "epoch": 6.346473029045643, + "grad_norm": 5.008984565734863, + "learning_rate": 2.029737206085754e-05, + "log_odds_chosen": 10.216703414916992, + "log_odds_ratio": -0.00014600915892515332, + "logits/chosen": -0.5505592823028564, + "logits/rejected": -0.5319199562072754, + "logps/chosen": -0.0003324694116599858, + "logps/rejected": -1.6418519020080566, + "loss": 0.636, + "nll_loss": 0.15897497534751892, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.32469426211901e-05, + "rewards/margins": 0.16415196657180786, + "rewards/rejected": -0.16418521106243134, + "step": 9177 + }, + { + "epoch": 6.34716459197787, + "grad_norm": 10.514545440673828, + "learning_rate": 2.029353004456739e-05, + "log_odds_chosen": 10.497976303100586, + "log_odds_ratio": -0.00018029067723546177, + "logits/chosen": -0.2952839732170105, + "logits/rejected": -0.33477315306663513, + "logps/chosen": -0.0004315444966778159, + "logps/rejected": -2.282492160797119, + "loss": 0.8506, + "nll_loss": 0.21263387799263, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.315445039537735e-05, + "rewards/margins": 0.22820605337619781, + "rewards/rejected": -0.2282492071390152, + "step": 9178 + }, + { + "epoch": 6.3478561549100965, + "grad_norm": 6.681618690490723, + "learning_rate": 2.028968802827724e-05, + "log_odds_chosen": 10.633491516113281, + "log_odds_ratio": -7.278387784026563e-05, + "logits/chosen": -0.2575504779815674, + "logits/rejected": -0.360831081867218, + "logps/chosen": -0.0006361017003655434, + "logps/rejected": -2.2641687393188477, + "loss": 0.5476, + "nll_loss": 0.1369018256664276, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.361017585732043e-05, + "rewards/margins": 0.226353257894516, + "rewards/rejected": -0.22641685605049133, + "step": 9179 + }, + { + "epoch": 6.348547717842323, + "grad_norm": 6.981048583984375, + "learning_rate": 2.0285846011987092e-05, + "log_odds_chosen": 11.55207633972168, + "log_odds_ratio": -2.6088957383763045e-05, + "logits/chosen": -0.41310855746269226, + "logits/rejected": -0.5048751831054688, + "logps/chosen": -0.00016262067947536707, + "logps/rejected": -2.607013702392578, + "loss": 0.5756, + "nll_loss": 0.14389488101005554, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6262067219940946e-05, + "rewards/margins": 0.2606850862503052, + "rewards/rejected": -0.26070135831832886, + "step": 9180 + }, + { + "epoch": 6.34923928077455, + "grad_norm": 11.618637084960938, + "learning_rate": 2.0282003995696945e-05, + "log_odds_chosen": 10.954242706298828, + "log_odds_ratio": -2.8021946491207927e-05, + "logits/chosen": -0.4849565923213959, + "logits/rejected": -0.5182377696037292, + "logps/chosen": -0.00011028562585124746, + "logps/rejected": -1.8828730583190918, + "loss": 0.5321, + "nll_loss": 0.13302020728588104, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1028562767023686e-05, + "rewards/margins": 0.1882762908935547, + "rewards/rejected": -0.18828731775283813, + "step": 9181 + }, + { + "epoch": 6.349930843706777, + "grad_norm": 11.160720825195312, + "learning_rate": 2.0278161979406794e-05, + "log_odds_chosen": 8.96442985534668, + "log_odds_ratio": -0.00035631554783321917, + "logits/chosen": -0.4377845525741577, + "logits/rejected": -0.42905181646347046, + "logps/chosen": -0.0005569449858739972, + "logps/rejected": -1.361982822418213, + "loss": 0.5463, + "nll_loss": 0.13653430342674255, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.569449785980396e-05, + "rewards/margins": 0.13614259660243988, + "rewards/rejected": -0.1361982822418213, + "step": 9182 + }, + { + "epoch": 6.350622406639004, + "grad_norm": 10.625052452087402, + "learning_rate": 2.0274319963116646e-05, + "log_odds_chosen": 10.720477104187012, + "log_odds_ratio": -0.00016753214003983885, + "logits/chosen": -0.13363853096961975, + "logits/rejected": -0.259945273399353, + "logps/chosen": -0.0003033954999409616, + "logps/rejected": -2.012464761734009, + "loss": 0.8445, + "nll_loss": 0.21111492812633514, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.033954999409616e-05, + "rewards/margins": 0.20121616125106812, + "rewards/rejected": -0.2012465000152588, + "step": 9183 + }, + { + "epoch": 6.351313969571231, + "grad_norm": 9.830244064331055, + "learning_rate": 2.0270477946826495e-05, + "log_odds_chosen": 10.320734024047852, + "log_odds_ratio": -5.5076357966754586e-05, + "logits/chosen": -0.6449425220489502, + "logits/rejected": -0.5527397990226746, + "logps/chosen": -0.00033088948111981153, + "logps/rejected": -1.7124865055084229, + "loss": 0.6336, + "nll_loss": 0.15838631987571716, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.308894520159811e-05, + "rewards/margins": 0.17121556401252747, + "rewards/rejected": -0.171248659491539, + "step": 9184 + }, + { + "epoch": 6.3520055325034575, + "grad_norm": 7.309013366699219, + "learning_rate": 2.0266635930536344e-05, + "log_odds_chosen": 11.929250717163086, + "log_odds_ratio": -7.019154963927576e-06, + "logits/chosen": -0.6183920502662659, + "logits/rejected": -0.6302121877670288, + "logps/chosen": -0.0001854830770753324, + "logps/rejected": -2.6690664291381836, + "loss": 1.0344, + "nll_loss": 0.2585914433002472, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.854830770753324e-05, + "rewards/margins": 0.2668881118297577, + "rewards/rejected": -0.2669066786766052, + "step": 9185 + }, + { + "epoch": 6.352697095435684, + "grad_norm": 6.724107265472412, + "learning_rate": 2.0262793914246197e-05, + "log_odds_chosen": 10.042360305786133, + "log_odds_ratio": -9.932967077475041e-05, + "logits/chosen": -0.7215209007263184, + "logits/rejected": -0.7661874294281006, + "logps/chosen": -0.00031137201585806906, + "logps/rejected": -1.836350917816162, + "loss": 0.5111, + "nll_loss": 0.12775912880897522, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.113720231340267e-05, + "rewards/margins": 0.1836039423942566, + "rewards/rejected": -0.18363508582115173, + "step": 9186 + }, + { + "epoch": 6.353388658367911, + "grad_norm": 7.169684886932373, + "learning_rate": 2.025895189795605e-05, + "log_odds_chosen": 11.2716064453125, + "log_odds_ratio": -5.317230898072012e-05, + "logits/chosen": -0.3582202196121216, + "logits/rejected": -0.45097386837005615, + "logps/chosen": -0.00018732898752205074, + "logps/rejected": -2.5006752014160156, + "loss": 0.6799, + "nll_loss": 0.16997021436691284, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.873289693321567e-05, + "rewards/margins": 0.25004881620407104, + "rewards/rejected": -0.2500675320625305, + "step": 9187 + }, + { + "epoch": 6.354080221300138, + "grad_norm": 7.111165523529053, + "learning_rate": 2.02551098816659e-05, + "log_odds_chosen": 11.701675415039062, + "log_odds_ratio": -4.208488098811358e-05, + "logits/chosen": -0.37285587191581726, + "logits/rejected": -0.38051775097846985, + "logps/chosen": -0.00016419717576354742, + "logps/rejected": -2.967442512512207, + "loss": 0.5549, + "nll_loss": 0.1387246698141098, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.641971721255686e-05, + "rewards/margins": 0.29672783613204956, + "rewards/rejected": -0.2967442572116852, + "step": 9188 + }, + { + "epoch": 6.354771784232365, + "grad_norm": 8.374202728271484, + "learning_rate": 2.025126786537575e-05, + "log_odds_chosen": 10.756795883178711, + "log_odds_ratio": -0.00011663652549032122, + "logits/chosen": -0.39200976490974426, + "logits/rejected": -0.3689347803592682, + "logps/chosen": -0.00034770870115607977, + "logps/rejected": -2.374525308609009, + "loss": 0.725, + "nll_loss": 0.18124133348464966, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.477087011560798e-05, + "rewards/margins": 0.23741775751113892, + "rewards/rejected": -0.23745253682136536, + "step": 9189 + }, + { + "epoch": 6.355463347164592, + "grad_norm": 5.542569637298584, + "learning_rate": 2.0247425849085603e-05, + "log_odds_chosen": 10.205241203308105, + "log_odds_ratio": -0.000721512536983937, + "logits/chosen": -0.2690023183822632, + "logits/rejected": -0.26603633165359497, + "logps/chosen": -0.0011981608113273978, + "logps/rejected": -1.9227674007415771, + "loss": 0.4221, + "nll_loss": 0.10545966029167175, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001198160825879313, + "rewards/margins": 0.19215692579746246, + "rewards/rejected": -0.192276731133461, + "step": 9190 + }, + { + "epoch": 6.356154910096818, + "grad_norm": 7.170056343078613, + "learning_rate": 2.0243583832795452e-05, + "log_odds_chosen": 10.770509719848633, + "log_odds_ratio": -0.00014403241220861673, + "logits/chosen": -0.5067330598831177, + "logits/rejected": -0.5712544918060303, + "logps/chosen": -0.0002003665576921776, + "logps/rejected": -1.4868699312210083, + "loss": 0.5378, + "nll_loss": 0.13442908227443695, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0036653950228356e-05, + "rewards/margins": 0.14866694808006287, + "rewards/rejected": -0.1486869752407074, + "step": 9191 + }, + { + "epoch": 6.356846473029045, + "grad_norm": 5.789017677307129, + "learning_rate": 2.0239741816505305e-05, + "log_odds_chosen": 11.209232330322266, + "log_odds_ratio": -2.6927336875814945e-05, + "logits/chosen": -0.4791033864021301, + "logits/rejected": -0.5403072834014893, + "logps/chosen": -0.00015609999536536634, + "logps/rejected": -2.214787244796753, + "loss": 0.5028, + "nll_loss": 0.12570922076702118, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5609999536536634e-05, + "rewards/margins": 0.22146311402320862, + "rewards/rejected": -0.22147874534130096, + "step": 9192 + }, + { + "epoch": 6.357538035961272, + "grad_norm": 10.424960136413574, + "learning_rate": 2.0235899800215154e-05, + "log_odds_chosen": 10.695180892944336, + "log_odds_ratio": -9.039837459567934e-05, + "logits/chosen": -0.3451023995876312, + "logits/rejected": -0.28836554288864136, + "logps/chosen": -0.0003894604742527008, + "logps/rejected": -2.282132625579834, + "loss": 0.6913, + "nll_loss": 0.1728084236383438, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.894605106324889e-05, + "rewards/margins": 0.2281743288040161, + "rewards/rejected": -0.22821328043937683, + "step": 9193 + }, + { + "epoch": 6.358229598893499, + "grad_norm": 5.216132640838623, + "learning_rate": 2.0232057783925003e-05, + "log_odds_chosen": 11.270370483398438, + "log_odds_ratio": -2.522995782783255e-05, + "logits/chosen": -0.4266151487827301, + "logits/rejected": -0.5151241421699524, + "logps/chosen": -0.0001478709455113858, + "logps/rejected": -2.335394859313965, + "loss": 0.3715, + "nll_loss": 0.09286393970251083, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4787094187340699e-05, + "rewards/margins": 0.23352470993995667, + "rewards/rejected": -0.23353949189186096, + "step": 9194 + }, + { + "epoch": 6.358921161825726, + "grad_norm": 6.978919982910156, + "learning_rate": 2.0228215767634855e-05, + "log_odds_chosen": 9.261372566223145, + "log_odds_ratio": -0.0012194992741569877, + "logits/chosen": -0.22732126712799072, + "logits/rejected": -0.3630354702472687, + "logps/chosen": -0.002600749721750617, + "logps/rejected": -1.7258175611495972, + "loss": 0.5196, + "nll_loss": 0.12978777289390564, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00026007500127889216, + "rewards/margins": 0.17232167720794678, + "rewards/rejected": -0.1725817620754242, + "step": 9195 + }, + { + "epoch": 6.359612724757953, + "grad_norm": 7.02858304977417, + "learning_rate": 2.0224373751344708e-05, + "log_odds_chosen": 10.51254653930664, + "log_odds_ratio": -0.0002540510904509574, + "logits/chosen": -0.5970651507377625, + "logits/rejected": -0.6617342829704285, + "logps/chosen": -0.00046592182479798794, + "logps/rejected": -2.2553892135620117, + "loss": 0.4232, + "nll_loss": 0.10578116029500961, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.659218393499032e-05, + "rewards/margins": 0.22549235820770264, + "rewards/rejected": -0.2255389541387558, + "step": 9196 + }, + { + "epoch": 6.360304287690179, + "grad_norm": 13.577595710754395, + "learning_rate": 2.0220531735054557e-05, + "log_odds_chosen": 11.231561660766602, + "log_odds_ratio": -0.00012580891780089587, + "logits/chosen": -0.44007226824760437, + "logits/rejected": -0.46577906608581543, + "logps/chosen": -0.00034361236612312496, + "logps/rejected": -2.6888885498046875, + "loss": 0.5729, + "nll_loss": 0.1432117372751236, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.436123370192945e-05, + "rewards/margins": 0.2688544988632202, + "rewards/rejected": -0.2688888609409332, + "step": 9197 + }, + { + "epoch": 6.360995850622406, + "grad_norm": 7.206313133239746, + "learning_rate": 2.021668971876441e-05, + "log_odds_chosen": 10.88147258758545, + "log_odds_ratio": -2.9554386856034398e-05, + "logits/chosen": -0.534494161605835, + "logits/rejected": -0.661098837852478, + "logps/chosen": -0.000369693007087335, + "logps/rejected": -2.0248522758483887, + "loss": 0.5609, + "nll_loss": 0.1402200311422348, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6969304346712306e-05, + "rewards/margins": 0.2024482786655426, + "rewards/rejected": -0.20248523354530334, + "step": 9198 + }, + { + "epoch": 6.361687413554633, + "grad_norm": 5.518919467926025, + "learning_rate": 2.021284770247426e-05, + "log_odds_chosen": 10.717257499694824, + "log_odds_ratio": -0.0003174339362885803, + "logits/chosen": -0.2607053220272064, + "logits/rejected": -0.3942747414112091, + "logps/chosen": -0.00042165315244346857, + "logps/rejected": -2.048736810684204, + "loss": 0.583, + "nll_loss": 0.14572477340698242, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.216531669953838e-05, + "rewards/margins": 0.20483151078224182, + "rewards/rejected": -0.2048736959695816, + "step": 9199 + }, + { + "epoch": 6.36237897648686, + "grad_norm": 6.506553649902344, + "learning_rate": 2.020900568618411e-05, + "log_odds_chosen": 10.156195640563965, + "log_odds_ratio": -0.0008201644523069263, + "logits/chosen": -0.4659947156906128, + "logits/rejected": -0.5176660418510437, + "logps/chosen": -0.00027816108195111156, + "logps/rejected": -1.8388397693634033, + "loss": 0.6022, + "nll_loss": 0.15047568082809448, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7816107831313275e-05, + "rewards/margins": 0.18385615944862366, + "rewards/rejected": -0.18388396501541138, + "step": 9200 + }, + { + "epoch": 6.363070539419087, + "grad_norm": 5.314968585968018, + "learning_rate": 2.0205163669893963e-05, + "log_odds_chosen": 10.760082244873047, + "log_odds_ratio": -4.831475598621182e-05, + "logits/chosen": -0.20053163170814514, + "logits/rejected": -0.2904966473579407, + "logps/chosen": -0.00032365991501137614, + "logps/rejected": -2.176091194152832, + "loss": 0.5382, + "nll_loss": 0.13455608487129211, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2365991501137614e-05, + "rewards/margins": 0.2175767719745636, + "rewards/rejected": -0.21760913729667664, + "step": 9201 + }, + { + "epoch": 6.363762102351314, + "grad_norm": 8.624659538269043, + "learning_rate": 2.0201321653603812e-05, + "log_odds_chosen": 10.964468002319336, + "log_odds_ratio": -3.857814954244532e-05, + "logits/chosen": -0.562919020652771, + "logits/rejected": -0.7147216796875, + "logps/chosen": -0.00018094430561177433, + "logps/rejected": -2.261103630065918, + "loss": 0.6497, + "nll_loss": 0.16241827607154846, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.809442983358167e-05, + "rewards/margins": 0.22609226405620575, + "rewards/rejected": -0.22611036896705627, + "step": 9202 + }, + { + "epoch": 6.36445366528354, + "grad_norm": 5.81415319442749, + "learning_rate": 2.019747963731366e-05, + "log_odds_chosen": 10.822190284729004, + "log_odds_ratio": -4.008920950582251e-05, + "logits/chosen": -0.25131142139434814, + "logits/rejected": -0.4631497859954834, + "logps/chosen": -0.0002011386095546186, + "logps/rejected": -2.2005937099456787, + "loss": 0.597, + "nll_loss": 0.14925579726696014, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.011386095546186e-05, + "rewards/margins": 0.2200392633676529, + "rewards/rejected": -0.2200593799352646, + "step": 9203 + }, + { + "epoch": 6.365145228215767, + "grad_norm": 6.23052453994751, + "learning_rate": 2.0193637621023514e-05, + "log_odds_chosen": 10.77461051940918, + "log_odds_ratio": -0.00010428290261188522, + "logits/chosen": -0.029508620500564575, + "logits/rejected": -0.035192057490348816, + "logps/chosen": -0.00045014353236183524, + "logps/rejected": -2.0687994956970215, + "loss": 0.4886, + "nll_loss": 0.1221313402056694, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5014348870608956e-05, + "rewards/margins": 0.20683494210243225, + "rewards/rejected": -0.20687994360923767, + "step": 9204 + }, + { + "epoch": 6.365836791147994, + "grad_norm": 10.496476173400879, + "learning_rate": 2.0189795604733366e-05, + "log_odds_chosen": 12.02440071105957, + "log_odds_ratio": -9.049935215443838e-06, + "logits/chosen": -0.27575576305389404, + "logits/rejected": -0.30641281604766846, + "logps/chosen": -9.213421435561031e-05, + "logps/rejected": -2.588135242462158, + "loss": 0.8979, + "nll_loss": 0.22447043657302856, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.213421435561031e-06, + "rewards/margins": 0.2588043212890625, + "rewards/rejected": -0.2588135600090027, + "step": 9205 + }, + { + "epoch": 6.366528354080221, + "grad_norm": 8.798904418945312, + "learning_rate": 2.0185953588443215e-05, + "log_odds_chosen": 10.987395286560059, + "log_odds_ratio": -0.00019550917204469442, + "logits/chosen": -0.18301673233509064, + "logits/rejected": -0.3637821674346924, + "logps/chosen": -0.00034496065927669406, + "logps/rejected": -2.5501561164855957, + "loss": 0.5876, + "nll_loss": 0.1468726396560669, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.449606447247788e-05, + "rewards/margins": 0.2549811005592346, + "rewards/rejected": -0.2550155818462372, + "step": 9206 + }, + { + "epoch": 6.367219917012449, + "grad_norm": 5.261299133300781, + "learning_rate": 2.0182111572153068e-05, + "log_odds_chosen": 10.68661880493164, + "log_odds_ratio": -7.621490658493713e-05, + "logits/chosen": -0.5029549598693848, + "logits/rejected": -0.5550189018249512, + "logps/chosen": -6.601712084375322e-05, + "logps/rejected": -1.5257365703582764, + "loss": 0.5211, + "nll_loss": 0.13025513291358948, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.601712811971083e-06, + "rewards/margins": 0.152567058801651, + "rewards/rejected": -0.15257366001605988, + "step": 9207 + }, + { + "epoch": 6.367911479944675, + "grad_norm": 7.6004228591918945, + "learning_rate": 2.0178269555862917e-05, + "log_odds_chosen": 9.818439483642578, + "log_odds_ratio": -0.00021286829723976552, + "logits/chosen": -0.30478382110595703, + "logits/rejected": -0.21007993817329407, + "logps/chosen": -0.0003476694109849632, + "logps/rejected": -1.9411594867706299, + "loss": 0.6535, + "nll_loss": 0.16334381699562073, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.476694109849632e-05, + "rewards/margins": 0.19408118724822998, + "rewards/rejected": -0.19411595165729523, + "step": 9208 + }, + { + "epoch": 6.368603042876902, + "grad_norm": 8.168708801269531, + "learning_rate": 2.017442753957277e-05, + "log_odds_chosen": 11.416715621948242, + "log_odds_ratio": -1.4642300811829045e-05, + "logits/chosen": -0.12115032970905304, + "logits/rejected": -0.20901496708393097, + "logps/chosen": -0.00016176048666238785, + "logps/rejected": -2.281259059906006, + "loss": 0.7037, + "nll_loss": 0.17593160271644592, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6176049030036665e-05, + "rewards/margins": 0.2281097173690796, + "rewards/rejected": -0.2281259000301361, + "step": 9209 + }, + { + "epoch": 6.369294605809129, + "grad_norm": 5.403926849365234, + "learning_rate": 2.017058552328262e-05, + "log_odds_chosen": 10.59703540802002, + "log_odds_ratio": -6.367493188008666e-05, + "logits/chosen": 0.09212058782577515, + "logits/rejected": 0.14779186248779297, + "logps/chosen": -0.0002771377330645919, + "logps/rejected": -1.9987256526947021, + "loss": 0.5871, + "nll_loss": 0.14678050577640533, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7713776944437996e-05, + "rewards/margins": 0.1998448669910431, + "rewards/rejected": -0.19987258315086365, + "step": 9210 + }, + { + "epoch": 6.369986168741356, + "grad_norm": 5.3090620040893555, + "learning_rate": 2.016674350699247e-05, + "log_odds_chosen": 11.665288925170898, + "log_odds_ratio": -1.9512324797688052e-05, + "logits/chosen": -0.6499193906784058, + "logits/rejected": -0.6336812376976013, + "logps/chosen": -0.00016225603758357465, + "logps/rejected": -2.2109932899475098, + "loss": 0.6673, + "nll_loss": 0.16682903468608856, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6225603758357465e-05, + "rewards/margins": 0.22108310461044312, + "rewards/rejected": -0.22109931707382202, + "step": 9211 + }, + { + "epoch": 6.370677731673583, + "grad_norm": 10.165082931518555, + "learning_rate": 2.016290149070232e-05, + "log_odds_chosen": 11.195685386657715, + "log_odds_ratio": -0.00017299283354077488, + "logits/chosen": -0.5278292298316956, + "logits/rejected": -0.6007906198501587, + "logps/chosen": -0.00046260812086984515, + "logps/rejected": -2.265780448913574, + "loss": 0.397, + "nll_loss": 0.09923581033945084, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.626081499736756e-05, + "rewards/margins": 0.2265317738056183, + "rewards/rejected": -0.22657804191112518, + "step": 9212 + }, + { + "epoch": 6.37136929460581, + "grad_norm": 8.883148193359375, + "learning_rate": 2.0159059474412172e-05, + "log_odds_chosen": 10.232280731201172, + "log_odds_ratio": -0.0003335610090289265, + "logits/chosen": -0.48593389987945557, + "logits/rejected": -0.5400428175926208, + "logps/chosen": -0.0005161626031622291, + "logps/rejected": -1.7813667058944702, + "loss": 0.969, + "nll_loss": 0.2422185242176056, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1616254495456815e-05, + "rewards/margins": 0.17808504402637482, + "rewards/rejected": -0.1781366765499115, + "step": 9213 + }, + { + "epoch": 6.372060857538036, + "grad_norm": 6.551841735839844, + "learning_rate": 2.015521745812202e-05, + "log_odds_chosen": 10.956387519836426, + "log_odds_ratio": -9.756218059919775e-05, + "logits/chosen": -0.11400066316127777, + "logits/rejected": -0.21385729312896729, + "logps/chosen": -0.00022725429153069854, + "logps/rejected": -2.4168527126312256, + "loss": 0.4417, + "nll_loss": 0.11042511463165283, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2725429516867734e-05, + "rewards/margins": 0.24166254699230194, + "rewards/rejected": -0.24168527126312256, + "step": 9214 + }, + { + "epoch": 6.372752420470263, + "grad_norm": 7.063676357269287, + "learning_rate": 2.0151375441831874e-05, + "log_odds_chosen": 10.549491882324219, + "log_odds_ratio": -0.00019233435159549117, + "logits/chosen": -0.434749960899353, + "logits/rejected": -0.47967755794525146, + "logps/chosen": -0.00045270402915775776, + "logps/rejected": -2.4761176109313965, + "loss": 0.8509, + "nll_loss": 0.2126937210559845, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.52704043709673e-05, + "rewards/margins": 0.24756652116775513, + "rewards/rejected": -0.24761177599430084, + "step": 9215 + }, + { + "epoch": 6.37344398340249, + "grad_norm": 4.453362464904785, + "learning_rate": 2.0147533425541726e-05, + "log_odds_chosen": 10.472236633300781, + "log_odds_ratio": -6.213154119905084e-05, + "logits/chosen": -0.2916863262653351, + "logits/rejected": -0.2914811372756958, + "logps/chosen": -0.00018990921671502292, + "logps/rejected": -1.8327441215515137, + "loss": 0.3673, + "nll_loss": 0.09180820733308792, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.899092058010865e-05, + "rewards/margins": 0.1832554191350937, + "rewards/rejected": -0.18327441811561584, + "step": 9216 + }, + { + "epoch": 6.374135546334717, + "grad_norm": 5.508984565734863, + "learning_rate": 2.0143691409251575e-05, + "log_odds_chosen": 11.875307083129883, + "log_odds_ratio": -0.00015400855045299977, + "logits/chosen": -0.2681117057800293, + "logits/rejected": -0.2774713337421417, + "logps/chosen": -0.0009705049451440573, + "logps/rejected": -3.091235637664795, + "loss": 0.5325, + "nll_loss": 0.13310199975967407, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.705049160402268e-05, + "rewards/margins": 0.3090265393257141, + "rewards/rejected": -0.30912357568740845, + "step": 9217 + }, + { + "epoch": 6.374827109266944, + "grad_norm": 4.798890113830566, + "learning_rate": 2.0139849392961428e-05, + "log_odds_chosen": 11.035809516906738, + "log_odds_ratio": -0.00011051326873712242, + "logits/chosen": -0.5304756760597229, + "logits/rejected": -0.607671856880188, + "logps/chosen": -0.0002653436386026442, + "logps/rejected": -2.285710573196411, + "loss": 0.5152, + "nll_loss": 0.12877829372882843, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.65343642240623e-05, + "rewards/margins": 0.22854453325271606, + "rewards/rejected": -0.2285710722208023, + "step": 9218 + }, + { + "epoch": 6.375518672199171, + "grad_norm": 7.894879341125488, + "learning_rate": 2.013600737667128e-05, + "log_odds_chosen": 10.94662857055664, + "log_odds_ratio": -0.0001528830180177465, + "logits/chosen": -0.5855768918991089, + "logits/rejected": -0.5030243396759033, + "logps/chosen": -0.000555214995983988, + "logps/rejected": -2.32736873626709, + "loss": 0.6675, + "nll_loss": 0.1668596863746643, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.552149741561152e-05, + "rewards/margins": 0.23268136382102966, + "rewards/rejected": -0.23273690044879913, + "step": 9219 + }, + { + "epoch": 6.376210235131397, + "grad_norm": 6.6997904777526855, + "learning_rate": 2.013216536038113e-05, + "log_odds_chosen": 10.035709381103516, + "log_odds_ratio": -0.0005353145534172654, + "logits/chosen": -0.2331252545118332, + "logits/rejected": -0.38201838731765747, + "logps/chosen": -0.0001682726142462343, + "logps/rejected": -1.5411182641983032, + "loss": 0.9186, + "nll_loss": 0.22959373891353607, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.682726178842131e-05, + "rewards/margins": 0.15409502387046814, + "rewards/rejected": -0.154111847281456, + "step": 9220 + }, + { + "epoch": 6.376901798063624, + "grad_norm": 7.748939514160156, + "learning_rate": 2.0128323344090978e-05, + "log_odds_chosen": 10.36617660522461, + "log_odds_ratio": -0.0001164079294539988, + "logits/chosen": -0.572219967842102, + "logits/rejected": -0.5345730781555176, + "logps/chosen": -0.0003311182663310319, + "logps/rejected": -1.959726095199585, + "loss": 0.6189, + "nll_loss": 0.15470938384532928, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3111828088294715e-05, + "rewards/margins": 0.19593951106071472, + "rewards/rejected": -0.19597262144088745, + "step": 9221 + }, + { + "epoch": 6.377593360995851, + "grad_norm": 6.893679141998291, + "learning_rate": 2.012448132780083e-05, + "log_odds_chosen": 9.918173789978027, + "log_odds_ratio": -0.0005074600921943784, + "logits/chosen": -0.3186028003692627, + "logits/rejected": -0.3795122504234314, + "logps/chosen": -0.00208657281473279, + "logps/rejected": -1.9256505966186523, + "loss": 0.726, + "nll_loss": 0.1814383566379547, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00020865729311481118, + "rewards/margins": 0.1923563927412033, + "rewards/rejected": -0.19256505370140076, + "step": 9222 + }, + { + "epoch": 6.378284923928078, + "grad_norm": 9.132357597351074, + "learning_rate": 2.012063931151068e-05, + "log_odds_chosen": 10.881586074829102, + "log_odds_ratio": -5.283685095491819e-05, + "logits/chosen": -0.46947699785232544, + "logits/rejected": -0.5702405571937561, + "logps/chosen": -0.0001630905899219215, + "logps/rejected": -2.0741071701049805, + "loss": 0.7366, + "nll_loss": 0.18414491415023804, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.630905899219215e-05, + "rewards/margins": 0.20739439129829407, + "rewards/rejected": -0.20741069316864014, + "step": 9223 + }, + { + "epoch": 6.378976486860305, + "grad_norm": 7.436960220336914, + "learning_rate": 2.0116797295220532e-05, + "log_odds_chosen": 12.106593132019043, + "log_odds_ratio": -0.0003620930656325072, + "logits/chosen": -0.689150333404541, + "logits/rejected": -0.7047725915908813, + "logps/chosen": -0.0007022612262517214, + "logps/rejected": -3.17356538772583, + "loss": 0.6803, + "nll_loss": 0.1700369417667389, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.022612408036366e-05, + "rewards/margins": 0.31728631258010864, + "rewards/rejected": -0.31735655665397644, + "step": 9224 + }, + { + "epoch": 6.3796680497925315, + "grad_norm": 6.576411724090576, + "learning_rate": 2.0112955278930385e-05, + "log_odds_chosen": 10.059762954711914, + "log_odds_ratio": -0.00010953310993500054, + "logits/chosen": -0.7460529208183289, + "logits/rejected": -0.7538313269615173, + "logps/chosen": -0.0002810961741488427, + "logps/rejected": -1.5116372108459473, + "loss": 0.9037, + "nll_loss": 0.22591376304626465, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8109616323490627e-05, + "rewards/margins": 0.1511356234550476, + "rewards/rejected": -0.1511637270450592, + "step": 9225 + }, + { + "epoch": 6.380359612724758, + "grad_norm": 6.347103118896484, + "learning_rate": 2.0109113262640234e-05, + "log_odds_chosen": 9.54544448852539, + "log_odds_ratio": -0.0006142269703559577, + "logits/chosen": -0.5503570437431335, + "logits/rejected": -0.6310123205184937, + "logps/chosen": -0.0007548942230641842, + "logps/rejected": -1.9970778226852417, + "loss": 0.5175, + "nll_loss": 0.12930533289909363, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.548942812718451e-05, + "rewards/margins": 0.19963230192661285, + "rewards/rejected": -0.1997077912092209, + "step": 9226 + }, + { + "epoch": 6.381051175656985, + "grad_norm": 5.354732036590576, + "learning_rate": 2.0105271246350086e-05, + "log_odds_chosen": 10.121562957763672, + "log_odds_ratio": -5.865055209142156e-05, + "logits/chosen": -0.18324589729309082, + "logits/rejected": -0.29151177406311035, + "logps/chosen": -0.0003376026579644531, + "logps/rejected": -1.604554295539856, + "loss": 0.6855, + "nll_loss": 0.17136290669441223, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3760268706828356e-05, + "rewards/margins": 0.160421684384346, + "rewards/rejected": -0.16045543551445007, + "step": 9227 + }, + { + "epoch": 6.381742738589212, + "grad_norm": 7.14629602432251, + "learning_rate": 2.010142923005994e-05, + "log_odds_chosen": 11.467676162719727, + "log_odds_ratio": -8.916402293834835e-05, + "logits/chosen": -0.006139256525784731, + "logits/rejected": -0.11357221752405167, + "logps/chosen": -0.0001775856944732368, + "logps/rejected": -2.6482834815979004, + "loss": 0.5431, + "nll_loss": 0.13577528297901154, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7758567992132157e-05, + "rewards/margins": 0.26481059193611145, + "rewards/rejected": -0.26482832431793213, + "step": 9228 + }, + { + "epoch": 6.382434301521439, + "grad_norm": 3.6606783866882324, + "learning_rate": 2.0097587213769788e-05, + "log_odds_chosen": 10.625322341918945, + "log_odds_ratio": -0.00024825221044011414, + "logits/chosen": -0.7195248603820801, + "logits/rejected": -0.762287974357605, + "logps/chosen": -0.0004275470564607531, + "logps/rejected": -2.5666942596435547, + "loss": 0.5686, + "nll_loss": 0.1421373337507248, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.275470564607531e-05, + "rewards/margins": 0.256626695394516, + "rewards/rejected": -0.25666943192481995, + "step": 9229 + }, + { + "epoch": 6.383125864453666, + "grad_norm": 6.820446491241455, + "learning_rate": 2.0093745197479637e-05, + "log_odds_chosen": 10.165421485900879, + "log_odds_ratio": -0.0002845745184458792, + "logits/chosen": -0.7244423031806946, + "logits/rejected": -0.5874755382537842, + "logps/chosen": -0.0002369354770053178, + "logps/rejected": -1.8407738208770752, + "loss": 0.7861, + "nll_loss": 0.19650883972644806, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3693544790148735e-05, + "rewards/margins": 0.1840536892414093, + "rewards/rejected": -0.18407738208770752, + "step": 9230 + }, + { + "epoch": 6.3838174273858925, + "grad_norm": 10.45644760131836, + "learning_rate": 2.008990318118949e-05, + "log_odds_chosen": 10.038421630859375, + "log_odds_ratio": -0.000769000849686563, + "logits/chosen": -0.26605021953582764, + "logits/rejected": -0.3089340329170227, + "logps/chosen": -0.0019191744504496455, + "logps/rejected": -2.226240396499634, + "loss": 0.7829, + "nll_loss": 0.19565162062644958, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001919174537761137, + "rewards/margins": 0.22243213653564453, + "rewards/rejected": -0.2226240336894989, + "step": 9231 + }, + { + "epoch": 6.384508990318119, + "grad_norm": 6.580210208892822, + "learning_rate": 2.0086061164899338e-05, + "log_odds_chosen": 10.458142280578613, + "log_odds_ratio": -4.524439282249659e-05, + "logits/chosen": -0.7212006449699402, + "logits/rejected": -0.7315418720245361, + "logps/chosen": -0.00020987285824958235, + "logps/rejected": -2.0603229999542236, + "loss": 0.4201, + "nll_loss": 0.10501430928707123, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0987286916351877e-05, + "rewards/margins": 0.2060113102197647, + "rewards/rejected": -0.20603230595588684, + "step": 9232 + }, + { + "epoch": 6.385200553250346, + "grad_norm": 9.622176170349121, + "learning_rate": 2.008221914860919e-05, + "log_odds_chosen": 10.036763191223145, + "log_odds_ratio": -0.00010809496598085389, + "logits/chosen": -0.5044224858283997, + "logits/rejected": -0.501787006855011, + "logps/chosen": -0.0005055024521425366, + "logps/rejected": -1.873197317123413, + "loss": 0.5587, + "nll_loss": 0.13966771960258484, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.055024303146638e-05, + "rewards/margins": 0.1872691959142685, + "rewards/rejected": -0.18731975555419922, + "step": 9233 + }, + { + "epoch": 6.385892116182573, + "grad_norm": 7.534361362457275, + "learning_rate": 2.0078377132319043e-05, + "log_odds_chosen": 10.918198585510254, + "log_odds_ratio": -0.0002446550060994923, + "logits/chosen": -0.5140203833580017, + "logits/rejected": -0.5069654583930969, + "logps/chosen": -0.0003034729161299765, + "logps/rejected": -2.358905076980591, + "loss": 0.5943, + "nll_loss": 0.14855839312076569, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0347293431987055e-05, + "rewards/margins": 0.2358601689338684, + "rewards/rejected": -0.23589050769805908, + "step": 9234 + }, + { + "epoch": 6.3865836791148, + "grad_norm": 7.286149501800537, + "learning_rate": 2.0074535116028892e-05, + "log_odds_chosen": 10.955660820007324, + "log_odds_ratio": -0.001999650150537491, + "logits/chosen": -0.2171625941991806, + "logits/rejected": -0.2073816955089569, + "logps/chosen": -0.0009690782171674073, + "logps/rejected": -3.253391742706299, + "loss": 0.7628, + "nll_loss": 0.19049334526062012, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.6907839179039e-05, + "rewards/margins": 0.325242280960083, + "rewards/rejected": -0.3253391981124878, + "step": 9235 + }, + { + "epoch": 6.387275242047027, + "grad_norm": 4.016501426696777, + "learning_rate": 2.0070693099738744e-05, + "log_odds_chosen": 10.702842712402344, + "log_odds_ratio": -0.00010885380470426753, + "logits/chosen": 0.09817785024642944, + "logits/rejected": 0.017373614013195038, + "logps/chosen": -0.00010006909724324942, + "logps/rejected": -1.8319261074066162, + "loss": 0.6437, + "nll_loss": 0.16091807186603546, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0006910088122822e-05, + "rewards/margins": 0.18318259716033936, + "rewards/rejected": -0.18319261074066162, + "step": 9236 + }, + { + "epoch": 6.3879668049792535, + "grad_norm": 7.597879886627197, + "learning_rate": 2.0066851083448597e-05, + "log_odds_chosen": 9.945055961608887, + "log_odds_ratio": -0.0001487003028159961, + "logits/chosen": -0.3875085115432739, + "logits/rejected": -0.47611644864082336, + "logps/chosen": -0.00034226285060867667, + "logps/rejected": -1.6458823680877686, + "loss": 0.5198, + "nll_loss": 0.12993381917476654, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.422628651605919e-05, + "rewards/margins": 0.16455401480197906, + "rewards/rejected": -0.16458824276924133, + "step": 9237 + }, + { + "epoch": 6.38865836791148, + "grad_norm": 5.411985397338867, + "learning_rate": 2.0063009067158446e-05, + "log_odds_chosen": 10.670639038085938, + "log_odds_ratio": -8.757206524023786e-05, + "logits/chosen": -0.24942684173583984, + "logits/rejected": -0.2994964122772217, + "logps/chosen": -0.000738327216822654, + "logps/rejected": -2.4431498050689697, + "loss": 0.6914, + "nll_loss": 0.17284391820430756, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.383272895822302e-05, + "rewards/margins": 0.2442411482334137, + "rewards/rejected": -0.2443149983882904, + "step": 9238 + }, + { + "epoch": 6.389349930843707, + "grad_norm": 4.901096820831299, + "learning_rate": 2.0059167050868295e-05, + "log_odds_chosen": 11.94849967956543, + "log_odds_ratio": -9.714612679090351e-05, + "logits/chosen": 0.11094458401203156, + "logits/rejected": -0.12741082906723022, + "logps/chosen": -0.0005755483289249241, + "logps/rejected": -3.0402350425720215, + "loss": 0.448, + "nll_loss": 0.11199948936700821, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7554832892492414e-05, + "rewards/margins": 0.3039659261703491, + "rewards/rejected": -0.30402350425720215, + "step": 9239 + }, + { + "epoch": 6.390041493775934, + "grad_norm": 9.04263687133789, + "learning_rate": 2.0055325034578147e-05, + "log_odds_chosen": 10.631397247314453, + "log_odds_ratio": -6.708302680635825e-05, + "logits/chosen": -0.48490309715270996, + "logits/rejected": -0.5478677153587341, + "logps/chosen": -0.00012463021266739815, + "logps/rejected": -1.730746865272522, + "loss": 0.6251, + "nll_loss": 0.15627357363700867, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2463022358133458e-05, + "rewards/margins": 0.17306222021579742, + "rewards/rejected": -0.17307469248771667, + "step": 9240 + }, + { + "epoch": 6.390733056708161, + "grad_norm": 6.076363563537598, + "learning_rate": 2.0051483018287997e-05, + "log_odds_chosen": 10.133882522583008, + "log_odds_ratio": -0.0001550958986626938, + "logits/chosen": -0.24662718176841736, + "logits/rejected": -0.3362925052642822, + "logps/chosen": -0.0001913850283017382, + "logps/rejected": -1.8486599922180176, + "loss": 0.8633, + "nll_loss": 0.21581153571605682, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.913850246637594e-05, + "rewards/margins": 0.1848468780517578, + "rewards/rejected": -0.1848660111427307, + "step": 9241 + }, + { + "epoch": 6.391424619640388, + "grad_norm": 4.970860481262207, + "learning_rate": 2.004764100199785e-05, + "log_odds_chosen": 10.41044807434082, + "log_odds_ratio": -0.0003214046882931143, + "logits/chosen": -0.41268280148506165, + "logits/rejected": -0.39346709847450256, + "logps/chosen": -0.00024544625193811953, + "logps/rejected": -1.7044298648834229, + "loss": 0.5116, + "nll_loss": 0.12785673141479492, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.454462446621619e-05, + "rewards/margins": 0.17041844129562378, + "rewards/rejected": -0.17044298350811005, + "step": 9242 + }, + { + "epoch": 6.3921161825726145, + "grad_norm": 9.744050025939941, + "learning_rate": 2.00437989857077e-05, + "log_odds_chosen": 9.98155403137207, + "log_odds_ratio": -7.244835433084518e-05, + "logits/chosen": -0.5653132200241089, + "logits/rejected": -0.6456999182701111, + "logps/chosen": -0.0005318495677784085, + "logps/rejected": -1.479022741317749, + "loss": 0.5747, + "nll_loss": 0.14367491006851196, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3184958233032376e-05, + "rewards/margins": 0.14784908294677734, + "rewards/rejected": -0.14790228009223938, + "step": 9243 + }, + { + "epoch": 6.392807745504841, + "grad_norm": 11.613505363464355, + "learning_rate": 2.003995696941755e-05, + "log_odds_chosen": 10.732831954956055, + "log_odds_ratio": -0.0001225114392582327, + "logits/chosen": -0.5981311202049255, + "logits/rejected": -0.6376551985740662, + "logps/chosen": -0.0002539186389185488, + "logps/rejected": -2.253077507019043, + "loss": 0.4829, + "nll_loss": 0.12071920931339264, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.539186243666336e-05, + "rewards/margins": 0.22528235614299774, + "rewards/rejected": -0.22530776262283325, + "step": 9244 + }, + { + "epoch": 6.393499308437068, + "grad_norm": 6.729595184326172, + "learning_rate": 2.0036114953127403e-05, + "log_odds_chosen": 12.00716495513916, + "log_odds_ratio": -3.18633065035101e-05, + "logits/chosen": -0.4734930694103241, + "logits/rejected": -0.3904078006744385, + "logps/chosen": -0.000660967780277133, + "logps/rejected": -3.9002439975738525, + "loss": 0.7588, + "nll_loss": 0.18970799446105957, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.609678530367091e-05, + "rewards/margins": 0.38995829224586487, + "rewards/rejected": -0.39002442359924316, + "step": 9245 + }, + { + "epoch": 6.394190871369295, + "grad_norm": 8.777599334716797, + "learning_rate": 2.0032272936837255e-05, + "log_odds_chosen": 10.585710525512695, + "log_odds_ratio": -6.461291923187673e-05, + "logits/chosen": 0.009615451097488403, + "logits/rejected": -0.06416276097297668, + "logps/chosen": -0.0005693895509466529, + "logps/rejected": -2.4563021659851074, + "loss": 0.5749, + "nll_loss": 0.14370794594287872, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.693895218428224e-05, + "rewards/margins": 0.2455732822418213, + "rewards/rejected": -0.24563023447990417, + "step": 9246 + }, + { + "epoch": 6.394882434301522, + "grad_norm": 5.206984996795654, + "learning_rate": 2.0028430920547104e-05, + "log_odds_chosen": 10.491327285766602, + "log_odds_ratio": -9.465732728131115e-05, + "logits/chosen": -0.8148977756500244, + "logits/rejected": -0.8432801961898804, + "logps/chosen": -0.00031137533369474113, + "logps/rejected": -1.946422815322876, + "loss": 0.7206, + "nll_loss": 0.18014222383499146, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.113753336947411e-05, + "rewards/margins": 0.19461116194725037, + "rewards/rejected": -0.19464229047298431, + "step": 9247 + }, + { + "epoch": 6.395573997233749, + "grad_norm": 6.629668235778809, + "learning_rate": 2.0024588904256953e-05, + "log_odds_chosen": 10.25268840789795, + "log_odds_ratio": -0.00024983941693790257, + "logits/chosen": -0.48796379566192627, + "logits/rejected": -0.6104187965393066, + "logps/chosen": -0.000225244730245322, + "logps/rejected": -1.7549493312835693, + "loss": 0.5557, + "nll_loss": 0.13889212906360626, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.25244730245322e-05, + "rewards/margins": 0.1754724234342575, + "rewards/rejected": -0.1754949390888214, + "step": 9248 + }, + { + "epoch": 6.3962655601659755, + "grad_norm": 9.717963218688965, + "learning_rate": 2.0020746887966806e-05, + "log_odds_chosen": 10.322273254394531, + "log_odds_ratio": -0.0001579874224262312, + "logits/chosen": -0.3825463652610779, + "logits/rejected": -0.25651872158050537, + "logps/chosen": -0.00029214590904302895, + "logps/rejected": -1.7719712257385254, + "loss": 0.5939, + "nll_loss": 0.1484469473361969, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9214590540505014e-05, + "rewards/margins": 0.17716792225837708, + "rewards/rejected": -0.1771971434354782, + "step": 9249 + }, + { + "epoch": 6.396957123098202, + "grad_norm": 9.360939025878906, + "learning_rate": 2.0016904871676655e-05, + "log_odds_chosen": 11.585420608520508, + "log_odds_ratio": -1.1413669199100696e-05, + "logits/chosen": -0.4388987421989441, + "logits/rejected": -0.5015350580215454, + "logps/chosen": -0.00016019078611861914, + "logps/rejected": -2.4639031887054443, + "loss": 0.4223, + "nll_loss": 0.10557594150304794, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6019079339457676e-05, + "rewards/margins": 0.24637427926063538, + "rewards/rejected": -0.24639031291007996, + "step": 9250 + }, + { + "epoch": 6.397648686030429, + "grad_norm": 9.76177978515625, + "learning_rate": 2.0013062855386507e-05, + "log_odds_chosen": 10.462821960449219, + "log_odds_ratio": -0.0002514673105906695, + "logits/chosen": -0.6369720697402954, + "logits/rejected": -0.6475129127502441, + "logps/chosen": -0.0008802501251921058, + "logps/rejected": -2.4775772094726562, + "loss": 0.6221, + "nll_loss": 0.155501589179039, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.802501542959362e-05, + "rewards/margins": 0.24766971170902252, + "rewards/rejected": -0.24775774776935577, + "step": 9251 + }, + { + "epoch": 6.398340248962656, + "grad_norm": 7.950948238372803, + "learning_rate": 2.000922083909636e-05, + "log_odds_chosen": 11.220458984375, + "log_odds_ratio": -0.0001685236784396693, + "logits/chosen": -0.33944761753082275, + "logits/rejected": -0.4309898614883423, + "logps/chosen": -0.000930731650441885, + "logps/rejected": -3.331479787826538, + "loss": 0.9565, + "nll_loss": 0.23909664154052734, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.307316213380545e-05, + "rewards/margins": 0.33305490016937256, + "rewards/rejected": -0.33314797282218933, + "step": 9252 + }, + { + "epoch": 6.399031811894883, + "grad_norm": 9.884614944458008, + "learning_rate": 2.000537882280621e-05, + "log_odds_chosen": 10.504169464111328, + "log_odds_ratio": -0.00013753658276982605, + "logits/chosen": -0.39223068952560425, + "logits/rejected": -0.3681015372276306, + "logps/chosen": -0.00023119246179703623, + "logps/rejected": -2.302983045578003, + "loss": 0.5224, + "nll_loss": 0.13057824969291687, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3119247998693027e-05, + "rewards/margins": 0.23027518391609192, + "rewards/rejected": -0.23029831051826477, + "step": 9253 + }, + { + "epoch": 6.39972337482711, + "grad_norm": 5.766773700714111, + "learning_rate": 2.000153680651606e-05, + "log_odds_chosen": 10.745979309082031, + "log_odds_ratio": -0.00012643210357055068, + "logits/chosen": -0.6153202056884766, + "logits/rejected": -0.634486198425293, + "logps/chosen": -0.0003257495700381696, + "logps/rejected": -2.0734894275665283, + "loss": 0.4582, + "nll_loss": 0.11454488337039948, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.257495700381696e-05, + "rewards/margins": 0.20731636881828308, + "rewards/rejected": -0.20734894275665283, + "step": 9254 + }, + { + "epoch": 6.4004149377593365, + "grad_norm": 36.044620513916016, + "learning_rate": 1.9997694790225914e-05, + "log_odds_chosen": 10.067339897155762, + "log_odds_ratio": -0.00020148635667283088, + "logits/chosen": -0.5717883706092834, + "logits/rejected": -0.5887860059738159, + "logps/chosen": -0.00023326711379922926, + "logps/rejected": -1.9287145137786865, + "loss": 0.5745, + "nll_loss": 0.14359921216964722, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.332671283511445e-05, + "rewards/margins": 0.1928481161594391, + "rewards/rejected": -0.19287145137786865, + "step": 9255 + }, + { + "epoch": 6.401106500691563, + "grad_norm": 5.805153846740723, + "learning_rate": 1.9993852773935763e-05, + "log_odds_chosen": 11.110633850097656, + "log_odds_ratio": -2.4092261810437776e-05, + "logits/chosen": -0.10224826633930206, + "logits/rejected": -0.2345043420791626, + "logps/chosen": -0.0004690833739005029, + "logps/rejected": -2.6470563411712646, + "loss": 0.5916, + "nll_loss": 0.14789032936096191, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6908338845241815e-05, + "rewards/margins": 0.26465874910354614, + "rewards/rejected": -0.2647056579589844, + "step": 9256 + }, + { + "epoch": 6.40179806362379, + "grad_norm": 8.75613021850586, + "learning_rate": 1.9990010757645615e-05, + "log_odds_chosen": 10.300285339355469, + "log_odds_ratio": -8.09316334198229e-05, + "logits/chosen": -0.6939583420753479, + "logits/rejected": -0.8330811858177185, + "logps/chosen": -0.0003144872607663274, + "logps/rejected": -1.9681203365325928, + "loss": 0.5557, + "nll_loss": 0.1389131098985672, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.144872607663274e-05, + "rewards/margins": 0.19678059220314026, + "rewards/rejected": -0.19681203365325928, + "step": 9257 + }, + { + "epoch": 6.402489626556017, + "grad_norm": 6.8936004638671875, + "learning_rate": 1.9986168741355464e-05, + "log_odds_chosen": 11.252097129821777, + "log_odds_ratio": -3.741459295270033e-05, + "logits/chosen": -0.7355087399482727, + "logits/rejected": -0.8028329610824585, + "logps/chosen": -0.00016158061043825, + "logps/rejected": -2.471592426300049, + "loss": 0.4441, + "nll_loss": 0.11102715134620667, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6158061043825e-05, + "rewards/margins": 0.24714305996894836, + "rewards/rejected": -0.2471592277288437, + "step": 9258 + }, + { + "epoch": 6.403181189488244, + "grad_norm": 7.777023792266846, + "learning_rate": 1.9982326725065313e-05, + "log_odds_chosen": 10.736876487731934, + "log_odds_ratio": -0.000112594869278837, + "logits/chosen": -0.5803585648536682, + "logits/rejected": -0.5270214080810547, + "logps/chosen": -0.00024174893042072654, + "logps/rejected": -2.451173782348633, + "loss": 0.6914, + "nll_loss": 0.17283624410629272, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4174894861062057e-05, + "rewards/margins": 0.2450932115316391, + "rewards/rejected": -0.24511736631393433, + "step": 9259 + }, + { + "epoch": 6.403872752420471, + "grad_norm": 7.646719455718994, + "learning_rate": 1.9978484708775166e-05, + "log_odds_chosen": 11.0280179977417, + "log_odds_ratio": -4.2771946027642116e-05, + "logits/chosen": -0.3071643114089966, + "logits/rejected": -0.3612262010574341, + "logps/chosen": -0.00020295185095164925, + "logps/rejected": -2.3706865310668945, + "loss": 0.5651, + "nll_loss": 0.14127621054649353, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.029518691415433e-05, + "rewards/margins": 0.23704838752746582, + "rewards/rejected": -0.23706866800785065, + "step": 9260 + }, + { + "epoch": 6.404564315352697, + "grad_norm": 6.2436723709106445, + "learning_rate": 1.9974642692485018e-05, + "log_odds_chosen": 10.827913284301758, + "log_odds_ratio": -2.2066273231757805e-05, + "logits/chosen": -0.5114313960075378, + "logits/rejected": -0.5310375690460205, + "logps/chosen": -0.0001401654299115762, + "logps/rejected": -1.8054132461547852, + "loss": 0.4848, + "nll_loss": 0.12118765711784363, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4016542991157621e-05, + "rewards/margins": 0.18052731454372406, + "rewards/rejected": -0.18054133653640747, + "step": 9261 + }, + { + "epoch": 6.405255878284924, + "grad_norm": 5.233295917510986, + "learning_rate": 1.9970800676194867e-05, + "log_odds_chosen": 11.959897994995117, + "log_odds_ratio": -5.2490322559606284e-05, + "logits/chosen": -0.33044326305389404, + "logits/rejected": -0.49339759349823, + "logps/chosen": -0.00012844899902120233, + "logps/rejected": -3.1751294136047363, + "loss": 0.5452, + "nll_loss": 0.1362866759300232, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2844900084019173e-05, + "rewards/margins": 0.3175000846385956, + "rewards/rejected": -0.3175129294395447, + "step": 9262 + }, + { + "epoch": 6.405947441217151, + "grad_norm": 4.386822700500488, + "learning_rate": 1.996695865990472e-05, + "log_odds_chosen": 10.40085506439209, + "log_odds_ratio": -0.00021573121193796396, + "logits/chosen": -0.4487634301185608, + "logits/rejected": -0.4953088164329529, + "logps/chosen": -0.000500406080391258, + "logps/rejected": -2.1370983123779297, + "loss": 0.3764, + "nll_loss": 0.09408355504274368, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.004060949431732e-05, + "rewards/margins": 0.21365980803966522, + "rewards/rejected": -0.21370983123779297, + "step": 9263 + }, + { + "epoch": 6.406639004149378, + "grad_norm": 12.913554191589355, + "learning_rate": 1.9963116643614572e-05, + "log_odds_chosen": 11.511303901672363, + "log_odds_ratio": -5.697936285287142e-05, + "logits/chosen": -0.5148541927337646, + "logits/rejected": -0.5149716138839722, + "logps/chosen": -0.000354190357029438, + "logps/rejected": -3.1875712871551514, + "loss": 0.6385, + "nll_loss": 0.159611314535141, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5419037885731086e-05, + "rewards/margins": 0.3187217116355896, + "rewards/rejected": -0.31875714659690857, + "step": 9264 + }, + { + "epoch": 6.407330567081605, + "grad_norm": 5.047789096832275, + "learning_rate": 1.995927462732442e-05, + "log_odds_chosen": 11.870508193969727, + "log_odds_ratio": -1.1209338481421582e-05, + "logits/chosen": -0.1910778284072876, + "logits/rejected": -0.3116176724433899, + "logps/chosen": -0.0001415059232385829, + "logps/rejected": -2.6769137382507324, + "loss": 0.5289, + "nll_loss": 0.13223110139369965, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4150593415251933e-05, + "rewards/margins": 0.2676772475242615, + "rewards/rejected": -0.26769137382507324, + "step": 9265 + }, + { + "epoch": 6.408022130013832, + "grad_norm": 4.988502025604248, + "learning_rate": 1.9955432611034274e-05, + "log_odds_chosen": 10.377761840820312, + "log_odds_ratio": -4.9067879444919527e-05, + "logits/chosen": -0.2553957402706146, + "logits/rejected": -0.24770502746105194, + "logps/chosen": -0.00022306838945951313, + "logps/rejected": -1.7231662273406982, + "loss": 0.5945, + "nll_loss": 0.14862555265426636, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.230684185633436e-05, + "rewards/margins": 0.17229431867599487, + "rewards/rejected": -0.17231664061546326, + "step": 9266 + }, + { + "epoch": 6.408713692946058, + "grad_norm": 6.566638469696045, + "learning_rate": 1.9951590594744123e-05, + "log_odds_chosen": 11.166705131530762, + "log_odds_ratio": -4.125917257624678e-05, + "logits/chosen": -0.2326575368642807, + "logits/rejected": -0.1612112820148468, + "logps/chosen": -0.00021289799769874662, + "logps/rejected": -2.609964370727539, + "loss": 0.6125, + "nll_loss": 0.15311162173748016, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.128980304405559e-05, + "rewards/margins": 0.2609751522541046, + "rewards/rejected": -0.2609964609146118, + "step": 9267 + }, + { + "epoch": 6.409405255878285, + "grad_norm": 7.873584747314453, + "learning_rate": 1.9947748578453972e-05, + "log_odds_chosen": 10.430408477783203, + "log_odds_ratio": -8.551737118978053e-05, + "logits/chosen": -0.7652373313903809, + "logits/rejected": -0.8368032574653625, + "logps/chosen": -0.00012079392035957426, + "logps/rejected": -1.5141304731369019, + "loss": 0.6633, + "nll_loss": 0.1658088117837906, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2079392945452128e-05, + "rewards/margins": 0.15140098333358765, + "rewards/rejected": -0.15141305327415466, + "step": 9268 + }, + { + "epoch": 6.410096818810512, + "grad_norm": 8.773344039916992, + "learning_rate": 1.9943906562163824e-05, + "log_odds_chosen": 10.397517204284668, + "log_odds_ratio": -0.00030735606560483575, + "logits/chosen": -0.22622643411159515, + "logits/rejected": -0.39059799909591675, + "logps/chosen": -0.00034772863727994263, + "logps/rejected": -2.125950574874878, + "loss": 0.5186, + "nll_loss": 0.1296141892671585, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.477286372799426e-05, + "rewards/margins": 0.21256029605865479, + "rewards/rejected": -0.21259507536888123, + "step": 9269 + }, + { + "epoch": 6.410788381742739, + "grad_norm": 12.695683479309082, + "learning_rate": 1.9940064545873677e-05, + "log_odds_chosen": 11.041482925415039, + "log_odds_ratio": -0.0006497156573459506, + "logits/chosen": 0.3386712968349457, + "logits/rejected": 0.1383814960718155, + "logps/chosen": -0.0004824312636628747, + "logps/rejected": -2.4895670413970947, + "loss": 0.8336, + "nll_loss": 0.2083456963300705, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.82431314594578e-05, + "rewards/margins": 0.24890847504138947, + "rewards/rejected": -0.24895671010017395, + "step": 9270 + }, + { + "epoch": 6.411479944674966, + "grad_norm": 7.70023250579834, + "learning_rate": 1.9936222529583526e-05, + "log_odds_chosen": 11.177961349487305, + "log_odds_ratio": -9.007647167891264e-05, + "logits/chosen": -0.5466927289962769, + "logits/rejected": -0.5063419938087463, + "logps/chosen": -0.00019104511011391878, + "logps/rejected": -2.687896728515625, + "loss": 0.75, + "nll_loss": 0.18748390674591064, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9104511011391878e-05, + "rewards/margins": 0.26877057552337646, + "rewards/rejected": -0.268789678812027, + "step": 9271 + }, + { + "epoch": 6.412171507607193, + "grad_norm": 6.924728870391846, + "learning_rate": 1.9932380513293378e-05, + "log_odds_chosen": 10.738670349121094, + "log_odds_ratio": -0.00016212818445637822, + "logits/chosen": -0.2753411829471588, + "logits/rejected": -0.17702896893024445, + "logps/chosen": -0.0005732090794481337, + "logps/rejected": -2.4172887802124023, + "loss": 0.8917, + "nll_loss": 0.22289781272411346, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.73209035792388e-05, + "rewards/margins": 0.24167153239250183, + "rewards/rejected": -0.24172884225845337, + "step": 9272 + }, + { + "epoch": 6.412863070539419, + "grad_norm": 14.814724922180176, + "learning_rate": 1.992853849700323e-05, + "log_odds_chosen": 11.559272766113281, + "log_odds_ratio": -1.548027285025455e-05, + "logits/chosen": -0.89446622133255, + "logits/rejected": -1.0412811040878296, + "logps/chosen": -0.0004175920912530273, + "logps/rejected": -2.7191271781921387, + "loss": 0.7141, + "nll_loss": 0.1785222291946411, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1759216401260346e-05, + "rewards/margins": 0.2718709707260132, + "rewards/rejected": -0.27191272377967834, + "step": 9273 + }, + { + "epoch": 6.413554633471646, + "grad_norm": 8.348380088806152, + "learning_rate": 1.992469648071308e-05, + "log_odds_chosen": 10.117218017578125, + "log_odds_ratio": -0.00013753073289990425, + "logits/chosen": -0.4123270809650421, + "logits/rejected": -0.4688197374343872, + "logps/chosen": -0.0002495943335816264, + "logps/rejected": -1.7303459644317627, + "loss": 0.7288, + "nll_loss": 0.18217617273330688, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4959434085758403e-05, + "rewards/margins": 0.17300963401794434, + "rewards/rejected": -0.17303459346294403, + "step": 9274 + }, + { + "epoch": 6.414246196403873, + "grad_norm": 8.611536979675293, + "learning_rate": 1.9920854464422932e-05, + "log_odds_chosen": 10.415322303771973, + "log_odds_ratio": -8.811524457996711e-05, + "logits/chosen": -0.3573923707008362, + "logits/rejected": -0.4499669671058655, + "logps/chosen": -0.000698216026648879, + "logps/rejected": -2.2807555198669434, + "loss": 0.5595, + "nll_loss": 0.1398547887802124, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.98216026648879e-05, + "rewards/margins": 0.22800573706626892, + "rewards/rejected": -0.22807557880878448, + "step": 9275 + }, + { + "epoch": 6.4149377593361, + "grad_norm": 5.704978942871094, + "learning_rate": 1.991701244813278e-05, + "log_odds_chosen": 11.306905746459961, + "log_odds_ratio": -1.6761072402005084e-05, + "logits/chosen": 0.09830937534570694, + "logits/rejected": 0.004683436825871468, + "logps/chosen": -0.00013964908430352807, + "logps/rejected": -2.4486541748046875, + "loss": 0.5404, + "nll_loss": 0.13511033356189728, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3964909157948568e-05, + "rewards/margins": 0.2448514699935913, + "rewards/rejected": -0.24486540257930756, + "step": 9276 + }, + { + "epoch": 6.415629322268327, + "grad_norm": 6.354818820953369, + "learning_rate": 1.991317043184263e-05, + "log_odds_chosen": 9.531805992126465, + "log_odds_ratio": -0.012731587514281273, + "logits/chosen": -0.15848901867866516, + "logits/rejected": -0.299731969833374, + "logps/chosen": -0.0038687651976943016, + "logps/rejected": -1.9498724937438965, + "loss": 1.323, + "nll_loss": 0.3294808566570282, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00038687651976943016, + "rewards/margins": 0.1946004033088684, + "rewards/rejected": -0.19498726725578308, + "step": 9277 + }, + { + "epoch": 6.4163208852005535, + "grad_norm": 7.357949256896973, + "learning_rate": 1.9909328415552483e-05, + "log_odds_chosen": 8.946401596069336, + "log_odds_ratio": -0.0028267528396099806, + "logits/chosen": -0.25857284665107727, + "logits/rejected": -0.4397861361503601, + "logps/chosen": -0.0020083796698600054, + "logps/rejected": -1.6771559715270996, + "loss": 1.0644, + "nll_loss": 0.2658079266548157, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002008379524340853, + "rewards/margins": 0.16751474142074585, + "rewards/rejected": -0.16771559417247772, + "step": 9278 + }, + { + "epoch": 6.41701244813278, + "grad_norm": 6.2034687995910645, + "learning_rate": 1.9905486399262332e-05, + "log_odds_chosen": 11.08052921295166, + "log_odds_ratio": -3.313586785225198e-05, + "logits/chosen": -0.37916818261146545, + "logits/rejected": -0.47911304235458374, + "logps/chosen": -0.00012295949272811413, + "logps/rejected": -2.0425448417663574, + "loss": 0.393, + "nll_loss": 0.0982578918337822, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2295950909901876e-05, + "rewards/margins": 0.20424219965934753, + "rewards/rejected": -0.20425450801849365, + "step": 9279 + }, + { + "epoch": 6.417704011065007, + "grad_norm": 7.049955368041992, + "learning_rate": 1.9901644382972184e-05, + "log_odds_chosen": 11.446515083312988, + "log_odds_ratio": -3.128893149551004e-05, + "logits/chosen": -0.42859339714050293, + "logits/rejected": -0.3936436176300049, + "logps/chosen": -0.0002233328705187887, + "logps/rejected": -2.4475386142730713, + "loss": 0.7558, + "nll_loss": 0.18894362449645996, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2333286324283108e-05, + "rewards/margins": 0.24473154544830322, + "rewards/rejected": -0.2447538673877716, + "step": 9280 + }, + { + "epoch": 6.418395573997234, + "grad_norm": 6.746326923370361, + "learning_rate": 1.9897802366682037e-05, + "log_odds_chosen": 10.88402271270752, + "log_odds_ratio": -3.941710019716993e-05, + "logits/chosen": -0.1344437301158905, + "logits/rejected": -0.2331140637397766, + "logps/chosen": -0.0003557455202098936, + "logps/rejected": -2.0126137733459473, + "loss": 0.7242, + "nll_loss": 0.18103933334350586, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5574550565797836e-05, + "rewards/margins": 0.20122580230236053, + "rewards/rejected": -0.20126137137413025, + "step": 9281 + }, + { + "epoch": 6.419087136929461, + "grad_norm": 11.59501838684082, + "learning_rate": 1.9893960350391886e-05, + "log_odds_chosen": 10.503249168395996, + "log_odds_ratio": -8.245335629908368e-05, + "logits/chosen": -0.44896990060806274, + "logits/rejected": -0.6239047646522522, + "logps/chosen": -0.0002727765531744808, + "logps/rejected": -1.7392630577087402, + "loss": 0.4511, + "nll_loss": 0.11275888979434967, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7277654226054437e-05, + "rewards/margins": 0.17389902472496033, + "rewards/rejected": -0.17392629384994507, + "step": 9282 + }, + { + "epoch": 6.419778699861688, + "grad_norm": 7.3511881828308105, + "learning_rate": 1.9890118334101738e-05, + "log_odds_chosen": 11.662189483642578, + "log_odds_ratio": -1.938941204571165e-05, + "logits/chosen": -0.4689757823944092, + "logits/rejected": -0.5246522426605225, + "logps/chosen": -0.000340028025675565, + "logps/rejected": -3.2328014373779297, + "loss": 1.2258, + "nll_loss": 0.30644235014915466, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4002805477939546e-05, + "rewards/margins": 0.32324618101119995, + "rewards/rejected": -0.3232801556587219, + "step": 9283 + }, + { + "epoch": 6.4204702627939145, + "grad_norm": 11.929098129272461, + "learning_rate": 1.988627631781159e-05, + "log_odds_chosen": 10.89908504486084, + "log_odds_ratio": -9.280510130338371e-05, + "logits/chosen": -0.3836508095264435, + "logits/rejected": -0.4062102138996124, + "logps/chosen": -0.0006549840909428895, + "logps/rejected": -2.7214956283569336, + "loss": 0.6892, + "nll_loss": 0.1723027229309082, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.549841054948047e-05, + "rewards/margins": 0.2720840573310852, + "rewards/rejected": -0.27214956283569336, + "step": 9284 + }, + { + "epoch": 6.421161825726141, + "grad_norm": 6.852628707885742, + "learning_rate": 1.988243430152144e-05, + "log_odds_chosen": 12.121063232421875, + "log_odds_ratio": -8.600990440754686e-06, + "logits/chosen": -0.20669779181480408, + "logits/rejected": -0.30786818265914917, + "logps/chosen": -6.404731539078057e-05, + "logps/rejected": -2.255523681640625, + "loss": 0.4526, + "nll_loss": 0.11315148323774338, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.404731720976997e-06, + "rewards/margins": 0.2255459725856781, + "rewards/rejected": -0.22555235028266907, + "step": 9285 + }, + { + "epoch": 6.421853388658368, + "grad_norm": 5.463398456573486, + "learning_rate": 1.987859228523129e-05, + "log_odds_chosen": 10.10585880279541, + "log_odds_ratio": -0.00014293566346168518, + "logits/chosen": -0.45323115587234497, + "logits/rejected": -0.5202012658119202, + "logps/chosen": -0.0009458367712795734, + "logps/rejected": -2.0295233726501465, + "loss": 0.4648, + "nll_loss": 0.11618015170097351, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.458368003834039e-05, + "rewards/margins": 0.20285777747631073, + "rewards/rejected": -0.20295235514640808, + "step": 9286 + }, + { + "epoch": 6.422544951590595, + "grad_norm": 8.183695793151855, + "learning_rate": 1.987475026894114e-05, + "log_odds_chosen": 10.048669815063477, + "log_odds_ratio": -0.00015383576101157814, + "logits/chosen": -0.2582641839981079, + "logits/rejected": -0.39592811465263367, + "logps/chosen": -0.00028131093131378293, + "logps/rejected": -1.7277061939239502, + "loss": 0.5308, + "nll_loss": 0.132684126496315, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8131093131378293e-05, + "rewards/margins": 0.17274248600006104, + "rewards/rejected": -0.1727706342935562, + "step": 9287 + }, + { + "epoch": 6.423236514522822, + "grad_norm": 5.832282543182373, + "learning_rate": 1.987090825265099e-05, + "log_odds_chosen": 11.093450546264648, + "log_odds_ratio": -2.3829081328585744e-05, + "logits/chosen": -0.7500884532928467, + "logits/rejected": -0.7326939702033997, + "logps/chosen": -0.0003749387396965176, + "logps/rejected": -2.4053335189819336, + "loss": 0.5222, + "nll_loss": 0.1305372714996338, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.749387542484328e-05, + "rewards/margins": 0.24049586057662964, + "rewards/rejected": -0.24053336679935455, + "step": 9288 + }, + { + "epoch": 6.423928077455049, + "grad_norm": 23.444679260253906, + "learning_rate": 1.9867066236360843e-05, + "log_odds_chosen": 11.406169891357422, + "log_odds_ratio": -2.352497904212214e-05, + "logits/chosen": -0.45594874024391174, + "logits/rejected": -0.5880993008613586, + "logps/chosen": -0.00014266757352743298, + "logps/rejected": -2.4971399307250977, + "loss": 0.8397, + "nll_loss": 0.20991206169128418, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4266757716541179e-05, + "rewards/margins": 0.24969972670078278, + "rewards/rejected": -0.24971400201320648, + "step": 9289 + }, + { + "epoch": 6.4246196403872755, + "grad_norm": 6.20634651184082, + "learning_rate": 1.9863224220070695e-05, + "log_odds_chosen": 11.279754638671875, + "log_odds_ratio": -0.0001059678616002202, + "logits/chosen": -0.9170388579368591, + "logits/rejected": -0.9366865158081055, + "logps/chosen": -0.00013711173960473388, + "logps/rejected": -1.7270501852035522, + "loss": 0.5417, + "nll_loss": 0.13541099429130554, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3711173778574448e-05, + "rewards/margins": 0.17269130051136017, + "rewards/rejected": -0.1727050244808197, + "step": 9290 + }, + { + "epoch": 6.425311203319502, + "grad_norm": 6.133997917175293, + "learning_rate": 1.9859382203780544e-05, + "log_odds_chosen": 9.219642639160156, + "log_odds_ratio": -0.000537938205525279, + "logits/chosen": -0.45818766951560974, + "logits/rejected": -0.48580294847488403, + "logps/chosen": -0.010992909781634808, + "logps/rejected": -2.0333194732666016, + "loss": 0.5356, + "nll_loss": 0.13384385406970978, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010992909083142877, + "rewards/margins": 0.20223265886306763, + "rewards/rejected": -0.20333196222782135, + "step": 9291 + }, + { + "epoch": 6.426002766251729, + "grad_norm": 7.714879989624023, + "learning_rate": 1.9855540187490397e-05, + "log_odds_chosen": 11.284984588623047, + "log_odds_ratio": -4.448912659427151e-05, + "logits/chosen": -0.5899901390075684, + "logits/rejected": -0.7108150720596313, + "logps/chosen": -0.00018639791232999414, + "logps/rejected": -2.3057572841644287, + "loss": 0.9004, + "nll_loss": 0.22508883476257324, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8639791960595176e-05, + "rewards/margins": 0.23055709898471832, + "rewards/rejected": -0.23057572543621063, + "step": 9292 + }, + { + "epoch": 6.426694329183956, + "grad_norm": 8.799266815185547, + "learning_rate": 1.985169817120025e-05, + "log_odds_chosen": 10.905144691467285, + "log_odds_ratio": -2.7711570510291494e-05, + "logits/chosen": -0.4120444655418396, + "logits/rejected": -0.4582875669002533, + "logps/chosen": -0.00017896827193908393, + "logps/rejected": -1.9567880630493164, + "loss": 0.6326, + "nll_loss": 0.15815678238868713, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7896827557706274e-05, + "rewards/margins": 0.19566091895103455, + "rewards/rejected": -0.19567880034446716, + "step": 9293 + }, + { + "epoch": 6.427385892116183, + "grad_norm": 6.370955944061279, + "learning_rate": 1.9847856154910098e-05, + "log_odds_chosen": 11.097612380981445, + "log_odds_ratio": -2.4163698981283233e-05, + "logits/chosen": -0.4839726984500885, + "logits/rejected": -0.49444663524627686, + "logps/chosen": -0.00022879890457261354, + "logps/rejected": -2.4734559059143066, + "loss": 0.6235, + "nll_loss": 0.15587235987186432, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2879890821059234e-05, + "rewards/margins": 0.24732272326946259, + "rewards/rejected": -0.24734559655189514, + "step": 9294 + }, + { + "epoch": 6.42807745504841, + "grad_norm": 7.625823020935059, + "learning_rate": 1.9844014138619947e-05, + "log_odds_chosen": 10.773378372192383, + "log_odds_ratio": -5.160564978723414e-05, + "logits/chosen": -0.4517514407634735, + "logits/rejected": -0.41671445965766907, + "logps/chosen": -0.00014808705600444227, + "logps/rejected": -2.0106725692749023, + "loss": 0.8005, + "nll_loss": 0.2001313418149948, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4808705600444227e-05, + "rewards/margins": 0.2010524570941925, + "rewards/rejected": -0.2010672688484192, + "step": 9295 + }, + { + "epoch": 6.4287690179806365, + "grad_norm": 5.913774013519287, + "learning_rate": 1.98401721223298e-05, + "log_odds_chosen": 11.38464069366455, + "log_odds_ratio": -2.319132545380853e-05, + "logits/chosen": -0.3373313844203949, + "logits/rejected": -0.39826345443725586, + "logps/chosen": -0.00011273365089436993, + "logps/rejected": -2.226011037826538, + "loss": 0.4352, + "nll_loss": 0.10878792405128479, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1273365089436993e-05, + "rewards/margins": 0.22258983552455902, + "rewards/rejected": -0.22260110080242157, + "step": 9296 + }, + { + "epoch": 6.429460580912863, + "grad_norm": 5.648289680480957, + "learning_rate": 1.983633010603965e-05, + "log_odds_chosen": 10.220199584960938, + "log_odds_ratio": -5.696108564734459e-05, + "logits/chosen": -0.4742014408111572, + "logits/rejected": -0.4896661043167114, + "logps/chosen": -0.0002253315324196592, + "logps/rejected": -1.7355661392211914, + "loss": 0.3925, + "nll_loss": 0.09811148792505264, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2533155060955323e-05, + "rewards/margins": 0.1735340803861618, + "rewards/rejected": -0.1735566258430481, + "step": 9297 + }, + { + "epoch": 6.43015214384509, + "grad_norm": 7.391413688659668, + "learning_rate": 1.98324880897495e-05, + "log_odds_chosen": 10.479411125183105, + "log_odds_ratio": -5.7051620387937874e-05, + "logits/chosen": -0.1723538339138031, + "logits/rejected": -0.15370287001132965, + "logps/chosen": -0.0002791702572721988, + "logps/rejected": -1.9361761808395386, + "loss": 0.5332, + "nll_loss": 0.13329452276229858, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7917027182411402e-05, + "rewards/margins": 0.1935897171497345, + "rewards/rejected": -0.19361764192581177, + "step": 9298 + }, + { + "epoch": 6.430843706777317, + "grad_norm": 6.326645374298096, + "learning_rate": 1.9828646073459354e-05, + "log_odds_chosen": 10.112716674804688, + "log_odds_ratio": -0.0008755337912589312, + "logits/chosen": -0.3616921305656433, + "logits/rejected": -0.3587180972099304, + "logps/chosen": -0.002188085112720728, + "logps/rejected": -1.7334392070770264, + "loss": 0.9168, + "nll_loss": 0.2291109263896942, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002188085054513067, + "rewards/margins": 0.17312513291835785, + "rewards/rejected": -0.17334392666816711, + "step": 9299 + }, + { + "epoch": 6.431535269709544, + "grad_norm": 11.17259693145752, + "learning_rate": 1.9824804057169203e-05, + "log_odds_chosen": 10.165445327758789, + "log_odds_ratio": -0.002701385412365198, + "logits/chosen": -0.1986115574836731, + "logits/rejected": -0.2049403190612793, + "logps/chosen": -0.0016654229257255793, + "logps/rejected": -1.7589303255081177, + "loss": 0.7622, + "nll_loss": 0.19027338922023773, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016654228966217488, + "rewards/margins": 0.1757265031337738, + "rewards/rejected": -0.17589303851127625, + "step": 9300 + }, + { + "epoch": 6.432226832641771, + "grad_norm": 5.610093116760254, + "learning_rate": 1.9820962040879055e-05, + "log_odds_chosen": 11.30485725402832, + "log_odds_ratio": -1.869337029347662e-05, + "logits/chosen": -0.3075014352798462, + "logits/rejected": -0.27221566438674927, + "logps/chosen": -5.7261881011072546e-05, + "logps/rejected": -1.7437981367111206, + "loss": 0.4263, + "nll_loss": 0.10656291246414185, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.726188646804076e-06, + "rewards/margins": 0.1743740737438202, + "rewards/rejected": -0.17437982559204102, + "step": 9301 + }, + { + "epoch": 6.4329183955739975, + "grad_norm": 5.6840643882751465, + "learning_rate": 1.9817120024588907e-05, + "log_odds_chosen": 8.59211540222168, + "log_odds_ratio": -0.001855487353168428, + "logits/chosen": -0.6209691166877747, + "logits/rejected": -0.5917834639549255, + "logps/chosen": -0.003198577556759119, + "logps/rejected": -1.748227834701538, + "loss": 0.6318, + "nll_loss": 0.15775543451309204, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000319857761496678, + "rewards/margins": 0.17450293898582458, + "rewards/rejected": -0.17482279241085052, + "step": 9302 + }, + { + "epoch": 6.433609958506224, + "grad_norm": 7.813873767852783, + "learning_rate": 1.9813278008298757e-05, + "log_odds_chosen": 10.65418815612793, + "log_odds_ratio": -6.0141857829876244e-05, + "logits/chosen": -0.4232789874076843, + "logits/rejected": -0.4591125249862671, + "logps/chosen": -0.00038247957127168775, + "logps/rejected": -2.263458251953125, + "loss": 0.4639, + "nll_loss": 0.11595964431762695, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.824795567197725e-05, + "rewards/margins": 0.22630760073661804, + "rewards/rejected": -0.22634583711624146, + "step": 9303 + }, + { + "epoch": 6.434301521438451, + "grad_norm": 4.04008150100708, + "learning_rate": 1.9809435992008606e-05, + "log_odds_chosen": 10.580642700195312, + "log_odds_ratio": -0.0004853243299294263, + "logits/chosen": -0.3635895550251007, + "logits/rejected": -0.44146040081977844, + "logps/chosen": -0.0014031457249075174, + "logps/rejected": -2.3424365520477295, + "loss": 0.5206, + "nll_loss": 0.13010403513908386, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014031457249075174, + "rewards/margins": 0.23410335183143616, + "rewards/rejected": -0.23424366116523743, + "step": 9304 + }, + { + "epoch": 6.434993084370678, + "grad_norm": 6.571130752563477, + "learning_rate": 1.9805593975718458e-05, + "log_odds_chosen": 10.493505477905273, + "log_odds_ratio": -6.152471905807033e-05, + "logits/chosen": -1.1643874645233154, + "logits/rejected": -1.2527637481689453, + "logps/chosen": -0.0003062770119868219, + "logps/rejected": -2.2799320220947266, + "loss": 0.6452, + "nll_loss": 0.16130110621452332, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0627699743490666e-05, + "rewards/margins": 0.22796256840229034, + "rewards/rejected": -0.2279932051897049, + "step": 9305 + }, + { + "epoch": 6.435684647302905, + "grad_norm": 4.19816780090332, + "learning_rate": 1.9801751959428307e-05, + "log_odds_chosen": 9.588232040405273, + "log_odds_ratio": -0.00028488037060014904, + "logits/chosen": -0.4710809588432312, + "logits/rejected": -0.5234569311141968, + "logps/chosen": -0.0009138531750068069, + "logps/rejected": -2.053696632385254, + "loss": 0.5372, + "nll_loss": 0.1342787742614746, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.138531459029764e-05, + "rewards/margins": 0.20527824759483337, + "rewards/rejected": -0.20536965131759644, + "step": 9306 + }, + { + "epoch": 6.436376210235132, + "grad_norm": 6.288425445556641, + "learning_rate": 1.979790994313816e-05, + "log_odds_chosen": 10.086763381958008, + "log_odds_ratio": -0.00023319364117924124, + "logits/chosen": -0.4830077886581421, + "logits/rejected": -0.5060534477233887, + "logps/chosen": -0.0004371547547634691, + "logps/rejected": -1.8741427659988403, + "loss": 0.5413, + "nll_loss": 0.13530413806438446, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.371547402115539e-05, + "rewards/margins": 0.18737053871154785, + "rewards/rejected": -0.1874142587184906, + "step": 9307 + }, + { + "epoch": 6.4370677731673585, + "grad_norm": 6.124855995178223, + "learning_rate": 1.9794067926848012e-05, + "log_odds_chosen": 10.023574829101562, + "log_odds_ratio": -0.00033251961576752365, + "logits/chosen": -0.39527204632759094, + "logits/rejected": -0.48593708872795105, + "logps/chosen": -0.0005362079245969653, + "logps/rejected": -1.6085436344146729, + "loss": 0.3435, + "nll_loss": 0.08583007752895355, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.362079537007958e-05, + "rewards/margins": 0.1608007550239563, + "rewards/rejected": -0.16085438430309296, + "step": 9308 + }, + { + "epoch": 6.437759336099585, + "grad_norm": 5.4500837326049805, + "learning_rate": 1.979022591055786e-05, + "log_odds_chosen": 10.023755073547363, + "log_odds_ratio": -8.400460501434281e-05, + "logits/chosen": -0.1788664311170578, + "logits/rejected": -0.30472058057785034, + "logps/chosen": -0.00044232915388420224, + "logps/rejected": -1.5639151334762573, + "loss": 0.6181, + "nll_loss": 0.1545056849718094, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.423291466082446e-05, + "rewards/margins": 0.15634728968143463, + "rewards/rejected": -0.15639153122901917, + "step": 9309 + }, + { + "epoch": 6.438450899031812, + "grad_norm": 8.140402793884277, + "learning_rate": 1.9786383894267713e-05, + "log_odds_chosen": 11.199501991271973, + "log_odds_ratio": -2.9649016141775064e-05, + "logits/chosen": -0.3762897849082947, + "logits/rejected": -0.2551382780075073, + "logps/chosen": -0.0001679428678471595, + "logps/rejected": -2.0896825790405273, + "loss": 0.7305, + "nll_loss": 0.18261376023292542, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6794285329524428e-05, + "rewards/margins": 0.2089514434337616, + "rewards/rejected": -0.20896823704242706, + "step": 9310 + }, + { + "epoch": 6.439142461964039, + "grad_norm": 6.742094993591309, + "learning_rate": 1.9782541877977566e-05, + "log_odds_chosen": 10.582254409790039, + "log_odds_ratio": -5.873282498214394e-05, + "logits/chosen": 0.10128258913755417, + "logits/rejected": 0.13267558813095093, + "logps/chosen": -0.0006081910105422139, + "logps/rejected": -2.6717841625213623, + "loss": 0.6592, + "nll_loss": 0.16479700803756714, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.081910396460444e-05, + "rewards/margins": 0.26711761951446533, + "rewards/rejected": -0.26717841625213623, + "step": 9311 + }, + { + "epoch": 6.439834024896266, + "grad_norm": 9.156832695007324, + "learning_rate": 1.9778699861687415e-05, + "log_odds_chosen": 9.505995750427246, + "log_odds_ratio": -0.000228977354709059, + "logits/chosen": -0.6701303720474243, + "logits/rejected": -0.6045225858688354, + "logps/chosen": -0.0007705151801928878, + "logps/rejected": -1.7131035327911377, + "loss": 0.5438, + "nll_loss": 0.13592730462551117, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.705151801928878e-05, + "rewards/margins": 0.17123331129550934, + "rewards/rejected": -0.17131036520004272, + "step": 9312 + }, + { + "epoch": 6.440525587828493, + "grad_norm": 9.81544303894043, + "learning_rate": 1.9774857845397264e-05, + "log_odds_chosen": 10.630544662475586, + "log_odds_ratio": -0.00013499357737600803, + "logits/chosen": -0.050855569541454315, + "logits/rejected": 0.013263605535030365, + "logps/chosen": -0.0007859627366997302, + "logps/rejected": -2.815199375152588, + "loss": 0.6398, + "nll_loss": 0.15994472801685333, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.859627658035606e-05, + "rewards/margins": 0.2814413905143738, + "rewards/rejected": -0.28151994943618774, + "step": 9313 + }, + { + "epoch": 6.441217150760719, + "grad_norm": 9.10059928894043, + "learning_rate": 1.9771015829107116e-05, + "log_odds_chosen": 11.148942947387695, + "log_odds_ratio": -6.803381984354928e-05, + "logits/chosen": -0.27442580461502075, + "logits/rejected": -0.2540287375450134, + "logps/chosen": -0.0002024202112806961, + "logps/rejected": -2.5142269134521484, + "loss": 1.0249, + "nll_loss": 0.2562128007411957, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.024202149186749e-05, + "rewards/margins": 0.25140243768692017, + "rewards/rejected": -0.2514226734638214, + "step": 9314 + }, + { + "epoch": 6.441908713692946, + "grad_norm": 7.348708629608154, + "learning_rate": 1.9767173812816966e-05, + "log_odds_chosen": 11.32642936706543, + "log_odds_ratio": -9.057987335836515e-05, + "logits/chosen": -0.20230460166931152, + "logits/rejected": -0.28677451610565186, + "logps/chosen": -0.00028569228015840054, + "logps/rejected": -3.0858020782470703, + "loss": 0.6937, + "nll_loss": 0.17341278493404388, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8569229471031576e-05, + "rewards/margins": 0.3085516691207886, + "rewards/rejected": -0.3085802495479584, + "step": 9315 + }, + { + "epoch": 6.442600276625173, + "grad_norm": 8.409920692443848, + "learning_rate": 1.9763331796526818e-05, + "log_odds_chosen": 10.357051849365234, + "log_odds_ratio": -0.00010548779391683638, + "logits/chosen": -0.30940353870391846, + "logits/rejected": -0.36812877655029297, + "logps/chosen": -0.0001265437895199284, + "logps/rejected": -1.275539517402649, + "loss": 0.8621, + "nll_loss": 0.21550799906253815, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.265437913389178e-05, + "rewards/margins": 0.12754130363464355, + "rewards/rejected": -0.12755395472049713, + "step": 9316 + }, + { + "epoch": 6.4432918395574, + "grad_norm": 12.811484336853027, + "learning_rate": 1.975948978023667e-05, + "log_odds_chosen": 10.652695655822754, + "log_odds_ratio": -8.221437747124583e-05, + "logits/chosen": -0.3613952398300171, + "logits/rejected": -0.400936484336853, + "logps/chosen": -0.00026950432220473886, + "logps/rejected": -1.9418749809265137, + "loss": 0.4245, + "nll_loss": 0.10611484199762344, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6950432584271766e-05, + "rewards/margins": 0.1941605508327484, + "rewards/rejected": -0.19418750703334808, + "step": 9317 + }, + { + "epoch": 6.443983402489627, + "grad_norm": 4.133450031280518, + "learning_rate": 1.975564776394652e-05, + "log_odds_chosen": 10.288055419921875, + "log_odds_ratio": -0.002530732424929738, + "logits/chosen": -0.6164582371711731, + "logits/rejected": -0.6825376749038696, + "logps/chosen": -0.0004605620924849063, + "logps/rejected": -1.9580541849136353, + "loss": 0.6229, + "nll_loss": 0.1554667353630066, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6056211431277916e-05, + "rewards/margins": 0.1957593709230423, + "rewards/rejected": -0.1958054155111313, + "step": 9318 + }, + { + "epoch": 6.444674965421854, + "grad_norm": 7.042186737060547, + "learning_rate": 1.9751805747656372e-05, + "log_odds_chosen": 10.167938232421875, + "log_odds_ratio": -0.0009934669360518456, + "logits/chosen": -0.2549976110458374, + "logits/rejected": -0.33388811349868774, + "logps/chosen": -0.0003739151870831847, + "logps/rejected": -1.7513947486877441, + "loss": 0.7465, + "nll_loss": 0.1865352988243103, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7391520891105756e-05, + "rewards/margins": 0.1751020848751068, + "rewards/rejected": -0.17513945698738098, + "step": 9319 + }, + { + "epoch": 6.44536652835408, + "grad_norm": 6.495095252990723, + "learning_rate": 1.9747963731366224e-05, + "log_odds_chosen": 10.530926704406738, + "log_odds_ratio": -5.371103543438949e-05, + "logits/chosen": -0.6024811863899231, + "logits/rejected": -0.6971926689147949, + "logps/chosen": -0.0005244613857939839, + "logps/rejected": -2.2407333850860596, + "loss": 0.5084, + "nll_loss": 0.12709853053092957, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.244614294497296e-05, + "rewards/margins": 0.22402088344097137, + "rewards/rejected": -0.2240733504295349, + "step": 9320 + }, + { + "epoch": 6.446058091286307, + "grad_norm": 6.348737716674805, + "learning_rate": 1.9744121715076073e-05, + "log_odds_chosen": 10.62696361541748, + "log_odds_ratio": -0.000152592605445534, + "logits/chosen": -0.4025437831878662, + "logits/rejected": -0.4162963032722473, + "logps/chosen": -0.00025007472140714526, + "logps/rejected": -2.108785629272461, + "loss": 0.5057, + "nll_loss": 0.12640956044197083, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.500747359590605e-05, + "rewards/margins": 0.21085354685783386, + "rewards/rejected": -0.21087853610515594, + "step": 9321 + }, + { + "epoch": 6.446749654218534, + "grad_norm": 7.778499603271484, + "learning_rate": 1.9740279698785922e-05, + "log_odds_chosen": 10.604227066040039, + "log_odds_ratio": -9.288093860959634e-05, + "logits/chosen": -0.2538522183895111, + "logits/rejected": -0.3410423994064331, + "logps/chosen": -0.0012250157305970788, + "logps/rejected": -2.4406208992004395, + "loss": 0.7234, + "nll_loss": 0.1808387190103531, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012250157305970788, + "rewards/margins": 0.2439395785331726, + "rewards/rejected": -0.24406209588050842, + "step": 9322 + }, + { + "epoch": 6.447441217150761, + "grad_norm": 4.946698188781738, + "learning_rate": 1.9736437682495775e-05, + "log_odds_chosen": 10.961910247802734, + "log_odds_ratio": -0.0001718278363114223, + "logits/chosen": -0.43663695454597473, + "logits/rejected": -0.5011554956436157, + "logps/chosen": -0.00027560797752812505, + "logps/rejected": -2.781618356704712, + "loss": 0.3741, + "nll_loss": 0.09350738674402237, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7560796297620982e-05, + "rewards/margins": 0.2781342566013336, + "rewards/rejected": -0.27816182374954224, + "step": 9323 + }, + { + "epoch": 6.448132780082988, + "grad_norm": 6.42108678817749, + "learning_rate": 1.9732595666205624e-05, + "log_odds_chosen": 11.063654899597168, + "log_odds_ratio": -0.0002686860098037869, + "logits/chosen": -0.5942996144294739, + "logits/rejected": -0.705431342124939, + "logps/chosen": -0.0003295104543212801, + "logps/rejected": -2.265841245651245, + "loss": 1.2717, + "nll_loss": 0.31789982318878174, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2951047614915296e-05, + "rewards/margins": 0.22655119001865387, + "rewards/rejected": -0.22658413648605347, + "step": 9324 + }, + { + "epoch": 6.448824343015215, + "grad_norm": 4.929689407348633, + "learning_rate": 1.9728753649915476e-05, + "log_odds_chosen": 11.852518081665039, + "log_odds_ratio": -9.962430340237916e-06, + "logits/chosen": -0.7141345143318176, + "logits/rejected": -0.8319796323776245, + "logps/chosen": -0.00038489949656650424, + "logps/rejected": -2.632211685180664, + "loss": 0.4818, + "nll_loss": 0.1204419881105423, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8489950384246185e-05, + "rewards/margins": 0.263182669878006, + "rewards/rejected": -0.2632211744785309, + "step": 9325 + }, + { + "epoch": 6.449515905947441, + "grad_norm": 5.438591003417969, + "learning_rate": 1.972491163362533e-05, + "log_odds_chosen": 10.398658752441406, + "log_odds_ratio": -0.00011632785754045472, + "logits/chosen": -0.5323688983917236, + "logits/rejected": -0.6243947744369507, + "logps/chosen": -0.0003119676257483661, + "logps/rejected": -1.7409591674804688, + "loss": 0.7072, + "nll_loss": 0.1767822802066803, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.119676694041118e-05, + "rewards/margins": 0.17406471073627472, + "rewards/rejected": -0.17409591376781464, + "step": 9326 + }, + { + "epoch": 6.450207468879668, + "grad_norm": 6.563150882720947, + "learning_rate": 1.9721069617335178e-05, + "log_odds_chosen": 11.509295463562012, + "log_odds_ratio": -3.75781164621003e-05, + "logits/chosen": -0.442585289478302, + "logits/rejected": -0.5469682216644287, + "logps/chosen": -0.00018356960208620876, + "logps/rejected": -2.4356777667999268, + "loss": 0.6008, + "nll_loss": 0.15020179748535156, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8356960936216637e-05, + "rewards/margins": 0.2435494214296341, + "rewards/rejected": -0.24356777966022491, + "step": 9327 + }, + { + "epoch": 6.450899031811895, + "grad_norm": 17.16939926147461, + "learning_rate": 1.971722760104503e-05, + "log_odds_chosen": 10.529670715332031, + "log_odds_ratio": -6.26058827037923e-05, + "logits/chosen": -0.09079033136367798, + "logits/rejected": -0.12593227624893188, + "logps/chosen": -0.0005906840669922531, + "logps/rejected": -2.2363271713256836, + "loss": 0.7384, + "nll_loss": 0.18458396196365356, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.906839942326769e-05, + "rewards/margins": 0.22357365489006042, + "rewards/rejected": -0.22363270819187164, + "step": 9328 + }, + { + "epoch": 6.451590594744122, + "grad_norm": 6.848952770233154, + "learning_rate": 1.9713385584754883e-05, + "log_odds_chosen": 10.813990592956543, + "log_odds_ratio": -2.499014044587966e-05, + "logits/chosen": -0.5543779134750366, + "logits/rejected": -0.5834307670593262, + "logps/chosen": -0.00018239648488815874, + "logps/rejected": -2.1655075550079346, + "loss": 0.7097, + "nll_loss": 0.17741863429546356, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8239650671603158e-05, + "rewards/margins": 0.21653252840042114, + "rewards/rejected": -0.2165507674217224, + "step": 9329 + }, + { + "epoch": 6.452282157676349, + "grad_norm": 7.377292156219482, + "learning_rate": 1.9709543568464732e-05, + "log_odds_chosen": 11.44371509552002, + "log_odds_ratio": -3.2429546990897506e-05, + "logits/chosen": -0.05530393868684769, + "logits/rejected": -0.1549655795097351, + "logps/chosen": -0.00017569860210642219, + "logps/rejected": -2.239352226257324, + "loss": 0.5736, + "nll_loss": 0.14338725805282593, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.75698605744401e-05, + "rewards/margins": 0.2239176630973816, + "rewards/rejected": -0.22393521666526794, + "step": 9330 + }, + { + "epoch": 6.4529737206085755, + "grad_norm": 7.323204517364502, + "learning_rate": 1.970570155217458e-05, + "log_odds_chosen": 10.47035026550293, + "log_odds_ratio": -0.000393639609683305, + "logits/chosen": -0.6959431767463684, + "logits/rejected": -0.6528046131134033, + "logps/chosen": -0.0016256331000477076, + "logps/rejected": -2.6668670177459717, + "loss": 0.8856, + "nll_loss": 0.2213638573884964, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016256331582553685, + "rewards/margins": 0.26652413606643677, + "rewards/rejected": -0.26668670773506165, + "step": 9331 + }, + { + "epoch": 6.453665283540802, + "grad_norm": 6.314630031585693, + "learning_rate": 1.9701859535884433e-05, + "log_odds_chosen": 11.20853328704834, + "log_odds_ratio": -9.939574374584481e-05, + "logits/chosen": -0.811857283115387, + "logits/rejected": -0.8544670343399048, + "logps/chosen": -0.00013169238809496164, + "logps/rejected": -2.252102851867676, + "loss": 0.5471, + "nll_loss": 0.13677413761615753, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3169238627597224e-05, + "rewards/margins": 0.22519709169864655, + "rewards/rejected": -0.2252102643251419, + "step": 9332 + }, + { + "epoch": 6.454356846473029, + "grad_norm": 8.173171997070312, + "learning_rate": 1.9698017519594282e-05, + "log_odds_chosen": 11.281229019165039, + "log_odds_ratio": -6.0934617067687213e-05, + "logits/chosen": -0.24341799318790436, + "logits/rejected": -0.3885093927383423, + "logps/chosen": -7.761404413031414e-05, + "logps/rejected": -2.0577759742736816, + "loss": 0.676, + "nll_loss": 0.16900460422039032, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.761404049233533e-06, + "rewards/margins": 0.2057698518037796, + "rewards/rejected": -0.2057776153087616, + "step": 9333 + }, + { + "epoch": 6.455048409405256, + "grad_norm": 11.768661499023438, + "learning_rate": 1.9694175503304135e-05, + "log_odds_chosen": 10.951183319091797, + "log_odds_ratio": -0.0001368989615002647, + "logits/chosen": -0.5413077473640442, + "logits/rejected": -0.564630925655365, + "logps/chosen": -0.0001645167067181319, + "logps/rejected": -2.0779314041137695, + "loss": 0.5338, + "nll_loss": 0.13344186544418335, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.645167139940895e-05, + "rewards/margins": 0.20777669548988342, + "rewards/rejected": -0.20779314637184143, + "step": 9334 + }, + { + "epoch": 6.455739972337483, + "grad_norm": 5.604323863983154, + "learning_rate": 1.9690333487013987e-05, + "log_odds_chosen": 10.265859603881836, + "log_odds_ratio": -0.00023694182164035738, + "logits/chosen": -0.5120769739151001, + "logits/rejected": -0.4896959364414215, + "logps/chosen": -0.0002953608054667711, + "logps/rejected": -1.6938186883926392, + "loss": 0.3457, + "nll_loss": 0.08640851825475693, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9536076908698305e-05, + "rewards/margins": 0.16935233771800995, + "rewards/rejected": -0.16938188672065735, + "step": 9335 + }, + { + "epoch": 6.45643153526971, + "grad_norm": 6.015252590179443, + "learning_rate": 1.9686491470723836e-05, + "log_odds_chosen": 10.84512710571289, + "log_odds_ratio": -7.400707545457408e-05, + "logits/chosen": -0.5778646469116211, + "logits/rejected": -0.6446388959884644, + "logps/chosen": -0.0002638440055307001, + "logps/rejected": -2.0140769481658936, + "loss": 0.6219, + "nll_loss": 0.1554756760597229, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6384399461676367e-05, + "rewards/margins": 0.20138132572174072, + "rewards/rejected": -0.20140770077705383, + "step": 9336 + }, + { + "epoch": 6.4571230982019365, + "grad_norm": 8.022332191467285, + "learning_rate": 1.968264945443369e-05, + "log_odds_chosen": 10.207698822021484, + "log_odds_ratio": -0.00021755530906375498, + "logits/chosen": -0.5963926315307617, + "logits/rejected": -0.6795532703399658, + "logps/chosen": -0.0004776669084094465, + "logps/rejected": -1.8929169178009033, + "loss": 0.5339, + "nll_loss": 0.13344630599021912, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.776669084094465e-05, + "rewards/margins": 0.18924394249916077, + "rewards/rejected": -0.18929171562194824, + "step": 9337 + }, + { + "epoch": 6.457814661134163, + "grad_norm": 9.413582801818848, + "learning_rate": 1.967880743814354e-05, + "log_odds_chosen": 10.67624282836914, + "log_odds_ratio": -3.9920356357470155e-05, + "logits/chosen": -0.6611392498016357, + "logits/rejected": -0.6790140867233276, + "logps/chosen": -0.0013447502860799432, + "logps/rejected": -2.598658561706543, + "loss": 0.5386, + "nll_loss": 0.1346549391746521, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013447501987684518, + "rewards/margins": 0.2597314119338989, + "rewards/rejected": -0.2598658800125122, + "step": 9338 + }, + { + "epoch": 6.45850622406639, + "grad_norm": 6.010379791259766, + "learning_rate": 1.967496542185339e-05, + "log_odds_chosen": 10.788137435913086, + "log_odds_ratio": -0.00010153088805964217, + "logits/chosen": -0.3897785544395447, + "logits/rejected": -0.4715155363082886, + "logps/chosen": -0.00032580961124040186, + "logps/rejected": -2.5287859439849854, + "loss": 0.7011, + "nll_loss": 0.17525643110275269, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2580959668848664e-05, + "rewards/margins": 0.25284600257873535, + "rewards/rejected": -0.2528786063194275, + "step": 9339 + }, + { + "epoch": 6.459197786998617, + "grad_norm": 12.115055084228516, + "learning_rate": 1.967112340556324e-05, + "log_odds_chosen": 10.19715404510498, + "log_odds_ratio": -0.0005401435191743076, + "logits/chosen": -0.33382919430732727, + "logits/rejected": -0.3910999894142151, + "logps/chosen": -0.00039712939178571105, + "logps/rejected": -2.1408450603485107, + "loss": 0.7251, + "nll_loss": 0.1812172532081604, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.971294063376263e-05, + "rewards/margins": 0.21404479444026947, + "rewards/rejected": -0.21408450603485107, + "step": 9340 + }, + { + "epoch": 6.459889349930844, + "grad_norm": 5.437094211578369, + "learning_rate": 1.9667281389273092e-05, + "log_odds_chosen": 9.647651672363281, + "log_odds_ratio": -0.001371509861201048, + "logits/chosen": -0.4421365559101105, + "logits/rejected": -0.4803558588027954, + "logps/chosen": -0.0007117479108273983, + "logps/rejected": -1.8053985834121704, + "loss": 0.6026, + "nll_loss": 0.15051212906837463, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.117479981388897e-05, + "rewards/margins": 0.18046869337558746, + "rewards/rejected": -0.18053987622261047, + "step": 9341 + }, + { + "epoch": 6.460580912863071, + "grad_norm": 6.155204772949219, + "learning_rate": 1.966343937298294e-05, + "log_odds_chosen": 9.544366836547852, + "log_odds_ratio": -0.0002290259872097522, + "logits/chosen": -0.3371240496635437, + "logits/rejected": -0.3929316997528076, + "logps/chosen": -0.000392138579627499, + "logps/rejected": -1.5680603981018066, + "loss": 0.6517, + "nll_loss": 0.16289177536964417, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9213860873132944e-05, + "rewards/margins": 0.1567668467760086, + "rewards/rejected": -0.15680605173110962, + "step": 9342 + }, + { + "epoch": 6.4612724757952975, + "grad_norm": 13.597868919372559, + "learning_rate": 1.9659597356692793e-05, + "log_odds_chosen": 10.36497688293457, + "log_odds_ratio": -0.0014595024986192584, + "logits/chosen": -0.12537351250648499, + "logits/rejected": -0.10785672068595886, + "logps/chosen": -0.0014373556477949023, + "logps/rejected": -2.0005123615264893, + "loss": 0.7737, + "nll_loss": 0.19328731298446655, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014373556768987328, + "rewards/margins": 0.19990751147270203, + "rewards/rejected": -0.20005124807357788, + "step": 9343 + }, + { + "epoch": 6.461964038727524, + "grad_norm": 13.008829116821289, + "learning_rate": 1.9655755340402642e-05, + "log_odds_chosen": 11.800149917602539, + "log_odds_ratio": -0.0003027294878847897, + "logits/chosen": -0.6529538631439209, + "logits/rejected": -0.5301782488822937, + "logps/chosen": -0.0005336821777746081, + "logps/rejected": -2.8983495235443115, + "loss": 0.7727, + "nll_loss": 0.19314080476760864, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.336822141543962e-05, + "rewards/margins": 0.2897816002368927, + "rewards/rejected": -0.2898349463939667, + "step": 9344 + }, + { + "epoch": 6.462655601659751, + "grad_norm": 5.498860836029053, + "learning_rate": 1.9651913324112495e-05, + "log_odds_chosen": 10.476800918579102, + "log_odds_ratio": -0.0011540587292984128, + "logits/chosen": -0.04873857647180557, + "logits/rejected": -0.03208974748849869, + "logps/chosen": -0.0013205776922404766, + "logps/rejected": -2.6151742935180664, + "loss": 0.85, + "nll_loss": 0.2123776227235794, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013205778668634593, + "rewards/margins": 0.26138538122177124, + "rewards/rejected": -0.26151740550994873, + "step": 9345 + }, + { + "epoch": 6.463347164591978, + "grad_norm": 4.314711570739746, + "learning_rate": 1.9648071307822347e-05, + "log_odds_chosen": 11.41019344329834, + "log_odds_ratio": -7.023775106063113e-05, + "logits/chosen": -0.20374369621276855, + "logits/rejected": -0.33246517181396484, + "logps/chosen": -0.00020804136875085533, + "logps/rejected": -2.5009703636169434, + "loss": 0.6544, + "nll_loss": 0.16359777748584747, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.080413469229825e-05, + "rewards/margins": 0.2500762641429901, + "rewards/rejected": -0.2500970661640167, + "step": 9346 + }, + { + "epoch": 6.464038727524205, + "grad_norm": 10.438886642456055, + "learning_rate": 1.9644229291532196e-05, + "log_odds_chosen": 9.027183532714844, + "log_odds_ratio": -0.0007800416206009686, + "logits/chosen": -0.5669814944267273, + "logits/rejected": -0.6429253816604614, + "logps/chosen": -0.0011012261966243386, + "logps/rejected": -1.3472520112991333, + "loss": 0.8509, + "nll_loss": 0.21263641119003296, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011012262257281691, + "rewards/margins": 0.13461507856845856, + "rewards/rejected": -0.1347251981496811, + "step": 9347 + }, + { + "epoch": 6.464730290456432, + "grad_norm": 7.92177677154541, + "learning_rate": 1.964038727524205e-05, + "log_odds_chosen": 10.348886489868164, + "log_odds_ratio": -0.00021048627968411893, + "logits/chosen": -0.16438069939613342, + "logits/rejected": -0.22757890820503235, + "logps/chosen": -0.0006242183735594153, + "logps/rejected": -2.191136598587036, + "loss": 0.7032, + "nll_loss": 0.1757863610982895, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.242184463189915e-05, + "rewards/margins": 0.21905125677585602, + "rewards/rejected": -0.21911367774009705, + "step": 9348 + }, + { + "epoch": 6.4654218533886585, + "grad_norm": 6.527889728546143, + "learning_rate": 1.96365452589519e-05, + "log_odds_chosen": 10.015470504760742, + "log_odds_ratio": -0.00015627051470801234, + "logits/chosen": -0.504023015499115, + "logits/rejected": -0.560677707195282, + "logps/chosen": -0.0009324349230155349, + "logps/rejected": -1.8942363262176514, + "loss": 0.5809, + "nll_loss": 0.1452087163925171, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.324349230155349e-05, + "rewards/margins": 0.18933041393756866, + "rewards/rejected": -0.18942365050315857, + "step": 9349 + }, + { + "epoch": 6.466113416320885, + "grad_norm": 5.253231048583984, + "learning_rate": 1.963270324266175e-05, + "log_odds_chosen": 10.344749450683594, + "log_odds_ratio": -0.00010648036550264806, + "logits/chosen": -0.07455027103424072, + "logits/rejected": -0.14819921553134918, + "logps/chosen": -0.00038044259417802095, + "logps/rejected": -2.1492841243743896, + "loss": 0.583, + "nll_loss": 0.14573504030704498, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.804425796261057e-05, + "rewards/margins": 0.21489037573337555, + "rewards/rejected": -0.21492841839790344, + "step": 9350 + }, + { + "epoch": 6.466804979253112, + "grad_norm": 10.674192428588867, + "learning_rate": 1.96288612263716e-05, + "log_odds_chosen": 9.346158981323242, + "log_odds_ratio": -0.0011899136006832123, + "logits/chosen": -0.4632045328617096, + "logits/rejected": -0.5040472149848938, + "logps/chosen": -0.0012183859944343567, + "logps/rejected": -1.971890926361084, + "loss": 0.715, + "nll_loss": 0.17863427102565765, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012183861690573394, + "rewards/margins": 0.1970672458410263, + "rewards/rejected": -0.1971890926361084, + "step": 9351 + }, + { + "epoch": 6.467496542185339, + "grad_norm": 8.562958717346191, + "learning_rate": 1.9625019210081452e-05, + "log_odds_chosen": 11.356171607971191, + "log_odds_ratio": -2.4882941943360493e-05, + "logits/chosen": 0.02768966555595398, + "logits/rejected": 0.0028723329305648804, + "logps/chosen": -0.00023115705698728561, + "logps/rejected": -2.334925889968872, + "loss": 0.659, + "nll_loss": 0.1647453010082245, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3115706426324323e-05, + "rewards/margins": 0.23346947133541107, + "rewards/rejected": -0.23349258303642273, + "step": 9352 + }, + { + "epoch": 6.468188105117566, + "grad_norm": 6.569983959197998, + "learning_rate": 1.96211771937913e-05, + "log_odds_chosen": 10.749797821044922, + "log_odds_ratio": -4.219009861117229e-05, + "logits/chosen": -0.03230445086956024, + "logits/rejected": -0.04771629720926285, + "logps/chosen": -0.00044686076580546796, + "logps/rejected": -2.2594237327575684, + "loss": 0.5344, + "nll_loss": 0.13359317183494568, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4686075852951035e-05, + "rewards/margins": 0.22589769959449768, + "rewards/rejected": -0.22594238817691803, + "step": 9353 + }, + { + "epoch": 6.468879668049793, + "grad_norm": 8.693273544311523, + "learning_rate": 1.9617335177501153e-05, + "log_odds_chosen": 10.477245330810547, + "log_odds_ratio": -0.00017616937111597508, + "logits/chosen": -0.1361008733510971, + "logits/rejected": -0.2993399202823639, + "logps/chosen": -0.0009238553466275334, + "logps/rejected": -2.4320244789123535, + "loss": 0.7092, + "nll_loss": 0.17728503048419952, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.238554048351943e-05, + "rewards/margins": 0.2431100457906723, + "rewards/rejected": -0.24320244789123535, + "step": 9354 + }, + { + "epoch": 6.4695712309820195, + "grad_norm": 5.629900932312012, + "learning_rate": 1.9613493161211006e-05, + "log_odds_chosen": 11.165910720825195, + "log_odds_ratio": -3.186017056577839e-05, + "logits/chosen": -0.2521139979362488, + "logits/rejected": -0.2641168236732483, + "logps/chosen": -0.0004452554858289659, + "logps/rejected": -2.6347713470458984, + "loss": 0.6108, + "nll_loss": 0.15268629789352417, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.452555003808811e-05, + "rewards/margins": 0.2634325921535492, + "rewards/rejected": -0.2634771168231964, + "step": 9355 + }, + { + "epoch": 6.470262793914246, + "grad_norm": 5.684783458709717, + "learning_rate": 1.9609651144920855e-05, + "log_odds_chosen": 11.058123588562012, + "log_odds_ratio": -0.0007495767204090953, + "logits/chosen": -0.4604434370994568, + "logits/rejected": -0.5082454681396484, + "logps/chosen": -0.0005760700441896915, + "logps/rejected": -2.651322364807129, + "loss": 0.4217, + "nll_loss": 0.10533834248781204, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7607005146564916e-05, + "rewards/margins": 0.2650746703147888, + "rewards/rejected": -0.26513224840164185, + "step": 9356 + }, + { + "epoch": 6.470954356846473, + "grad_norm": 4.939406394958496, + "learning_rate": 1.9605809128630707e-05, + "log_odds_chosen": 10.365484237670898, + "log_odds_ratio": -0.0010485876118764281, + "logits/chosen": 0.26797395944595337, + "logits/rejected": 0.213160440325737, + "logps/chosen": -0.0005518148536793888, + "logps/rejected": -2.0900015830993652, + "loss": 0.7742, + "nll_loss": 0.1934371292591095, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5181484640343115e-05, + "rewards/margins": 0.20894496142864227, + "rewards/rejected": -0.2090001404285431, + "step": 9357 + }, + { + "epoch": 6.4716459197787, + "grad_norm": 5.356972694396973, + "learning_rate": 1.960196711234056e-05, + "log_odds_chosen": 10.236105918884277, + "log_odds_ratio": -8.164734754245728e-05, + "logits/chosen": -0.3928852379322052, + "logits/rejected": -0.440325528383255, + "logps/chosen": -0.00031203130492940545, + "logps/rejected": -2.0702123641967773, + "loss": 0.732, + "nll_loss": 0.18299226462841034, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1203126127365977e-05, + "rewards/margins": 0.206990048289299, + "rewards/rejected": -0.20702123641967773, + "step": 9358 + }, + { + "epoch": 6.472337482710927, + "grad_norm": 3.448922634124756, + "learning_rate": 1.959812509605041e-05, + "log_odds_chosen": 11.163787841796875, + "log_odds_ratio": -2.6103556592715904e-05, + "logits/chosen": -0.5693701505661011, + "logits/rejected": -0.47740453481674194, + "logps/chosen": -0.0026880258228629827, + "logps/rejected": -2.838186740875244, + "loss": 1.1219, + "nll_loss": 0.2804635763168335, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002688025706447661, + "rewards/margins": 0.2835499048233032, + "rewards/rejected": -0.28381872177124023, + "step": 9359 + }, + { + "epoch": 6.473029045643154, + "grad_norm": 4.64267110824585, + "learning_rate": 1.9594283079760258e-05, + "log_odds_chosen": 10.282106399536133, + "log_odds_ratio": -7.090805593179539e-05, + "logits/chosen": -0.46651166677474976, + "logits/rejected": -0.5370398759841919, + "logps/chosen": -0.0005136644467711449, + "logps/rejected": -2.2826156616210938, + "loss": 0.5595, + "nll_loss": 0.1398591846227646, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.136644540471025e-05, + "rewards/margins": 0.2282102108001709, + "rewards/rejected": -0.2282615602016449, + "step": 9360 + }, + { + "epoch": 6.4737206085753805, + "grad_norm": 10.618483543395996, + "learning_rate": 1.959044106347011e-05, + "log_odds_chosen": 12.327230453491211, + "log_odds_ratio": -1.442717075406108e-05, + "logits/chosen": -0.48336470127105713, + "logits/rejected": -0.5778825879096985, + "logps/chosen": -0.00022113663726486266, + "logps/rejected": -3.5289645195007324, + "loss": 0.7443, + "nll_loss": 0.18606841564178467, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2113663362688385e-05, + "rewards/margins": 0.3528743088245392, + "rewards/rejected": -0.35289645195007324, + "step": 9361 + }, + { + "epoch": 6.474412171507607, + "grad_norm": 7.489385604858398, + "learning_rate": 1.958659904717996e-05, + "log_odds_chosen": 10.170774459838867, + "log_odds_ratio": -0.00018795541836880147, + "logits/chosen": -0.43242692947387695, + "logits/rejected": -0.38654130697250366, + "logps/chosen": -0.000399279611883685, + "logps/rejected": -2.234528064727783, + "loss": 0.4405, + "nll_loss": 0.11011669039726257, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9927959733176976e-05, + "rewards/margins": 0.2234128713607788, + "rewards/rejected": -0.22345280647277832, + "step": 9362 + }, + { + "epoch": 6.475103734439834, + "grad_norm": 3.973806142807007, + "learning_rate": 1.958275703088981e-05, + "log_odds_chosen": 10.255285263061523, + "log_odds_ratio": -0.00025675195502117276, + "logits/chosen": -0.3827497661113739, + "logits/rejected": -0.4834297001361847, + "logps/chosen": -0.0008683456690050662, + "logps/rejected": -2.241360902786255, + "loss": 0.4224, + "nll_loss": 0.10557659715414047, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.683456690050662e-05, + "rewards/margins": 0.22404927015304565, + "rewards/rejected": -0.224136084318161, + "step": 9363 + }, + { + "epoch": 6.475795297372061, + "grad_norm": 5.911285877227783, + "learning_rate": 1.9578915014599664e-05, + "log_odds_chosen": 10.767386436462402, + "log_odds_ratio": -0.0002806742559187114, + "logits/chosen": -0.35741111636161804, + "logits/rejected": -0.3878954350948334, + "logps/chosen": -0.00031248465529643, + "logps/rejected": -2.3113627433776855, + "loss": 0.717, + "nll_loss": 0.17922081053256989, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.124846625723876e-05, + "rewards/margins": 0.2311050146818161, + "rewards/rejected": -0.2311362475156784, + "step": 9364 + }, + { + "epoch": 6.476486860304288, + "grad_norm": 5.464684963226318, + "learning_rate": 1.9575072998309513e-05, + "log_odds_chosen": 11.092345237731934, + "log_odds_ratio": -2.9721091777901165e-05, + "logits/chosen": -0.005119264125823975, + "logits/rejected": -0.08913788199424744, + "logps/chosen": -0.0006547888042405248, + "logps/rejected": -2.8356149196624756, + "loss": 0.7138, + "nll_loss": 0.17844641208648682, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5478881879244e-05, + "rewards/margins": 0.28349605202674866, + "rewards/rejected": -0.2835615277290344, + "step": 9365 + }, + { + "epoch": 6.477178423236515, + "grad_norm": 3.9686710834503174, + "learning_rate": 1.9571230982019366e-05, + "log_odds_chosen": 10.885116577148438, + "log_odds_ratio": -5.042840712121688e-05, + "logits/chosen": -0.5279171466827393, + "logits/rejected": -0.522693395614624, + "logps/chosen": -0.00017958540411200374, + "logps/rejected": -2.0214760303497314, + "loss": 0.3655, + "nll_loss": 0.09135989844799042, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7958540411200374e-05, + "rewards/margins": 0.20212964713573456, + "rewards/rejected": -0.20214760303497314, + "step": 9366 + }, + { + "epoch": 6.477869986168741, + "grad_norm": 6.191340446472168, + "learning_rate": 1.9567388965729218e-05, + "log_odds_chosen": 10.428787231445312, + "log_odds_ratio": -8.16139072412625e-05, + "logits/chosen": -0.4286971092224121, + "logits/rejected": -0.415450781583786, + "logps/chosen": -0.0001613447384443134, + "logps/rejected": -1.654663324356079, + "loss": 0.6405, + "nll_loss": 0.16010689735412598, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.613447420822922e-05, + "rewards/margins": 0.16545020043849945, + "rewards/rejected": -0.1654663383960724, + "step": 9367 + }, + { + "epoch": 6.478561549100968, + "grad_norm": 9.376286506652832, + "learning_rate": 1.9563546949439067e-05, + "log_odds_chosen": 9.9938325881958, + "log_odds_ratio": -0.00032971659675240517, + "logits/chosen": -0.41345369815826416, + "logits/rejected": -0.5434488654136658, + "logps/chosen": -0.00047324446495622396, + "logps/rejected": -1.9914203882217407, + "loss": 0.7204, + "nll_loss": 0.18007104098796844, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.732444358523935e-05, + "rewards/margins": 0.1990947127342224, + "rewards/rejected": -0.19914203882217407, + "step": 9368 + }, + { + "epoch": 6.479253112033195, + "grad_norm": 4.642989158630371, + "learning_rate": 1.9559704933148916e-05, + "log_odds_chosen": 10.619248390197754, + "log_odds_ratio": -4.108287612325512e-05, + "logits/chosen": -0.41739746928215027, + "logits/rejected": -0.4319196343421936, + "logps/chosen": -0.00022695327061228454, + "logps/rejected": -2.1578879356384277, + "loss": 0.503, + "nll_loss": 0.12573689222335815, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2695327061228454e-05, + "rewards/margins": 0.2157660871744156, + "rewards/rejected": -0.21578878164291382, + "step": 9369 + }, + { + "epoch": 6.479944674965422, + "grad_norm": 5.525413990020752, + "learning_rate": 1.955586291685877e-05, + "log_odds_chosen": 11.790472030639648, + "log_odds_ratio": -0.00010621760884532705, + "logits/chosen": -0.2747637629508972, + "logits/rejected": -0.2597922086715698, + "logps/chosen": -0.0005810950533486903, + "logps/rejected": -3.1645824909210205, + "loss": 0.6016, + "nll_loss": 0.15038540959358215, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.810950824525207e-05, + "rewards/margins": 0.3164001703262329, + "rewards/rejected": -0.31645825505256653, + "step": 9370 + }, + { + "epoch": 6.480636237897649, + "grad_norm": 8.51606559753418, + "learning_rate": 1.9552020900568618e-05, + "log_odds_chosen": 10.952482223510742, + "log_odds_ratio": -2.4556336938985623e-05, + "logits/chosen": -0.4160614311695099, + "logits/rejected": -0.5774533748626709, + "logps/chosen": -0.00023021356901153922, + "logps/rejected": -1.9789564609527588, + "loss": 0.6218, + "nll_loss": 0.15545280277729034, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3021357264951803e-05, + "rewards/margins": 0.19787262380123138, + "rewards/rejected": -0.19789564609527588, + "step": 9371 + }, + { + "epoch": 6.481327800829876, + "grad_norm": 11.30921745300293, + "learning_rate": 1.954817888427847e-05, + "log_odds_chosen": 10.13013744354248, + "log_odds_ratio": -0.0004740456060972065, + "logits/chosen": -0.18564777076244354, + "logits/rejected": -0.2205415815114975, + "logps/chosen": -0.0006518846494145691, + "logps/rejected": -1.6600679159164429, + "loss": 0.6882, + "nll_loss": 0.17199313640594482, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.518846203107387e-05, + "rewards/margins": 0.16594161093235016, + "rewards/rejected": -0.16600680351257324, + "step": 9372 + }, + { + "epoch": 6.482019363762102, + "grad_norm": 7.174125671386719, + "learning_rate": 1.9544336867988323e-05, + "log_odds_chosen": 10.80029296875, + "log_odds_ratio": -4.8086159949889407e-05, + "logits/chosen": -0.038425326347351074, + "logits/rejected": -0.18574784696102142, + "logps/chosen": -0.0002708366373553872, + "logps/rejected": -2.129744052886963, + "loss": 0.6591, + "nll_loss": 0.1647716909646988, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7083666282123886e-05, + "rewards/margins": 0.2129473239183426, + "rewards/rejected": -0.212974414229393, + "step": 9373 + }, + { + "epoch": 6.482710926694329, + "grad_norm": 9.137430191040039, + "learning_rate": 1.954049485169817e-05, + "log_odds_chosen": 10.96411418914795, + "log_odds_ratio": -6.397358811227605e-05, + "logits/chosen": -0.07936275750398636, + "logits/rejected": -0.16068057715892792, + "logps/chosen": -0.00018608587561175227, + "logps/rejected": -2.4485511779785156, + "loss": 0.5968, + "nll_loss": 0.14920562505722046, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.860858901636675e-05, + "rewards/margins": 0.2448364943265915, + "rewards/rejected": -0.2448551058769226, + "step": 9374 + }, + { + "epoch": 6.483402489626556, + "grad_norm": 5.644742488861084, + "learning_rate": 1.9536652835408024e-05, + "log_odds_chosen": 9.900896072387695, + "log_odds_ratio": -9.974578279070556e-05, + "logits/chosen": -0.14903931319713593, + "logits/rejected": -0.18918846547603607, + "logps/chosen": -0.0004961632657796144, + "logps/rejected": -1.9804139137268066, + "loss": 0.5944, + "nll_loss": 0.14859052002429962, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.961631930200383e-05, + "rewards/margins": 0.1979917734861374, + "rewards/rejected": -0.1980414092540741, + "step": 9375 + }, + { + "epoch": 6.484094052558783, + "grad_norm": 7.427826404571533, + "learning_rate": 1.9532810819117876e-05, + "log_odds_chosen": 9.996077537536621, + "log_odds_ratio": -0.00019332885858602822, + "logits/chosen": -0.2691127359867096, + "logits/rejected": -0.25111067295074463, + "logps/chosen": -0.0014359343331307173, + "logps/rejected": -2.8224704265594482, + "loss": 0.6442, + "nll_loss": 0.16103675961494446, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014359343913383782, + "rewards/margins": 0.2821034789085388, + "rewards/rejected": -0.28224706649780273, + "step": 9376 + }, + { + "epoch": 6.48478561549101, + "grad_norm": 9.478433609008789, + "learning_rate": 1.9528968802827726e-05, + "log_odds_chosen": 10.263532638549805, + "log_odds_ratio": -7.455523882526904e-05, + "logits/chosen": -0.27997761964797974, + "logits/rejected": -0.33347785472869873, + "logps/chosen": -0.0001137300132540986, + "logps/rejected": -1.435097336769104, + "loss": 0.623, + "nll_loss": 0.15574686229228973, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1373001143510919e-05, + "rewards/margins": 0.14349837601184845, + "rewards/rejected": -0.14350974559783936, + "step": 9377 + }, + { + "epoch": 6.485477178423237, + "grad_norm": 8.160806655883789, + "learning_rate": 1.9525126786537575e-05, + "log_odds_chosen": 10.65985107421875, + "log_odds_ratio": -5.170962685951963e-05, + "logits/chosen": 0.09655636548995972, + "logits/rejected": -0.008620738983154297, + "logps/chosen": -0.0003284272679593414, + "logps/rejected": -2.1207449436187744, + "loss": 0.5131, + "nll_loss": 0.12827156484127045, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.284272679593414e-05, + "rewards/margins": 0.21204164624214172, + "rewards/rejected": -0.21207448840141296, + "step": 9378 + }, + { + "epoch": 6.486168741355463, + "grad_norm": 7.359895706176758, + "learning_rate": 1.9521284770247427e-05, + "log_odds_chosen": 10.544219970703125, + "log_odds_ratio": -5.43832138646394e-05, + "logits/chosen": 0.11446790397167206, + "logits/rejected": 0.1329166442155838, + "logps/chosen": -0.00029686224297620356, + "logps/rejected": -1.8876280784606934, + "loss": 0.4447, + "nll_loss": 0.11115758121013641, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9686227208003402e-05, + "rewards/margins": 0.18873311579227448, + "rewards/rejected": -0.1887628138065338, + "step": 9379 + }, + { + "epoch": 6.48686030428769, + "grad_norm": 4.4276204109191895, + "learning_rate": 1.9517442753957276e-05, + "log_odds_chosen": 10.485124588012695, + "log_odds_ratio": -0.00014691927935928106, + "logits/chosen": -0.5485713481903076, + "logits/rejected": -0.5030081868171692, + "logps/chosen": -0.00014963530702516437, + "logps/rejected": -2.009016990661621, + "loss": 0.4577, + "nll_loss": 0.11441591382026672, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4963530702516437e-05, + "rewards/margins": 0.20088671147823334, + "rewards/rejected": -0.20090168714523315, + "step": 9380 + }, + { + "epoch": 6.487551867219917, + "grad_norm": 13.013057708740234, + "learning_rate": 1.951360073766713e-05, + "log_odds_chosen": 10.500027656555176, + "log_odds_ratio": -0.0002235960419056937, + "logits/chosen": -0.01233922690153122, + "logits/rejected": -0.05886126682162285, + "logps/chosen": -0.00036796220229007304, + "logps/rejected": -2.2961668968200684, + "loss": 0.7453, + "nll_loss": 0.18629077076911926, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.679622386698611e-05, + "rewards/margins": 0.22957991063594818, + "rewards/rejected": -0.2296167016029358, + "step": 9381 + }, + { + "epoch": 6.488243430152144, + "grad_norm": 5.009751319885254, + "learning_rate": 1.950975872137698e-05, + "log_odds_chosen": 11.303783416748047, + "log_odds_ratio": -2.4028908228501678e-05, + "logits/chosen": -0.00550035759806633, + "logits/rejected": -0.06772229075431824, + "logps/chosen": -0.00017578649567440152, + "logps/rejected": -2.637251853942871, + "loss": 0.5605, + "nll_loss": 0.14013239741325378, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.757864811224863e-05, + "rewards/margins": 0.26370760798454285, + "rewards/rejected": -0.2637251913547516, + "step": 9382 + }, + { + "epoch": 6.488934993084371, + "grad_norm": 4.654784679412842, + "learning_rate": 1.950591670508683e-05, + "log_odds_chosen": 10.375570297241211, + "log_odds_ratio": -8.63418317749165e-05, + "logits/chosen": -0.049294471740722656, + "logits/rejected": -0.10794499516487122, + "logps/chosen": -0.000227405660552904, + "logps/rejected": -1.9614112377166748, + "loss": 0.7354, + "nll_loss": 0.18383842706680298, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2740567146684043e-05, + "rewards/margins": 0.19611838459968567, + "rewards/rejected": -0.19614112377166748, + "step": 9383 + }, + { + "epoch": 6.4896265560165975, + "grad_norm": 6.240286350250244, + "learning_rate": 1.9502074688796682e-05, + "log_odds_chosen": 11.334842681884766, + "log_odds_ratio": -2.722295539570041e-05, + "logits/chosen": 0.04571309685707092, + "logits/rejected": -0.09449347853660583, + "logps/chosen": -0.00015503622125834227, + "logps/rejected": -2.295506477355957, + "loss": 0.8953, + "nll_loss": 0.2238202542066574, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5503621398238465e-05, + "rewards/margins": 0.22953513264656067, + "rewards/rejected": -0.22955065965652466, + "step": 9384 + }, + { + "epoch": 6.490318118948824, + "grad_norm": 6.103881359100342, + "learning_rate": 1.9498232672506535e-05, + "log_odds_chosen": 10.914320945739746, + "log_odds_ratio": -5.245122156338766e-05, + "logits/chosen": -0.478939026594162, + "logits/rejected": -0.47022366523742676, + "logps/chosen": -0.00027636217419058084, + "logps/rejected": -2.0653438568115234, + "loss": 0.5338, + "nll_loss": 0.13343766331672668, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7636218874249607e-05, + "rewards/margins": 0.20650672912597656, + "rewards/rejected": -0.20653435587882996, + "step": 9385 + }, + { + "epoch": 6.491009681881051, + "grad_norm": 11.520191192626953, + "learning_rate": 1.9494390656216384e-05, + "log_odds_chosen": 10.832954406738281, + "log_odds_ratio": -6.274733459576964e-05, + "logits/chosen": -0.5592326521873474, + "logits/rejected": -0.5868933796882629, + "logps/chosen": -0.00022971341968514025, + "logps/rejected": -2.2540066242218018, + "loss": 0.5466, + "nll_loss": 0.13664284348487854, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2971342332311906e-05, + "rewards/margins": 0.22537770867347717, + "rewards/rejected": -0.2254006564617157, + "step": 9386 + }, + { + "epoch": 6.491701244813278, + "grad_norm": 6.2752485275268555, + "learning_rate": 1.9490548639926233e-05, + "log_odds_chosen": 10.993319511413574, + "log_odds_ratio": -0.00043842248851433396, + "logits/chosen": -0.27941277623176575, + "logits/rejected": -0.2629889249801636, + "logps/chosen": -0.00019828768563456833, + "logps/rejected": -2.2103896141052246, + "loss": 0.9143, + "nll_loss": 0.22852730751037598, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9828770746244118e-05, + "rewards/margins": 0.22101914882659912, + "rewards/rejected": -0.22103895246982574, + "step": 9387 + }, + { + "epoch": 6.492392807745505, + "grad_norm": 7.458618640899658, + "learning_rate": 1.9486706623636085e-05, + "log_odds_chosen": 11.85312557220459, + "log_odds_ratio": -3.386929529369809e-05, + "logits/chosen": -0.12793764472007751, + "logits/rejected": -0.13494420051574707, + "logps/chosen": -9.064783080248162e-05, + "logps/rejected": -2.489367961883545, + "loss": 0.5406, + "nll_loss": 0.13514791429042816, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.064782716450281e-06, + "rewards/margins": 0.2489277422428131, + "rewards/rejected": -0.24893681704998016, + "step": 9388 + }, + { + "epoch": 6.493084370677732, + "grad_norm": 5.261797904968262, + "learning_rate": 1.9482864607345934e-05, + "log_odds_chosen": 10.775561332702637, + "log_odds_ratio": -0.0002782086085062474, + "logits/chosen": -0.1607806533575058, + "logits/rejected": -0.17884010076522827, + "logps/chosen": -0.00014673758414573967, + "logps/rejected": -1.7819510698318481, + "loss": 0.5429, + "nll_loss": 0.13568845391273499, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4673758414573967e-05, + "rewards/margins": 0.17818044126033783, + "rewards/rejected": -0.17819511890411377, + "step": 9389 + }, + { + "epoch": 6.4937759336099585, + "grad_norm": 4.2047953605651855, + "learning_rate": 1.9479022591055787e-05, + "log_odds_chosen": 10.238554000854492, + "log_odds_ratio": -0.00011502691631903872, + "logits/chosen": -0.3183709979057312, + "logits/rejected": -0.38084205985069275, + "logps/chosen": -0.0003533228882588446, + "logps/rejected": -2.0055973529815674, + "loss": 1.0841, + "nll_loss": 0.2710167467594147, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.533228664309718e-05, + "rewards/margins": 0.20052438974380493, + "rewards/rejected": -0.20055972039699554, + "step": 9390 + }, + { + "epoch": 6.494467496542185, + "grad_norm": 6.484034061431885, + "learning_rate": 1.947518057476564e-05, + "log_odds_chosen": 9.686558723449707, + "log_odds_ratio": -0.00015934224938973784, + "logits/chosen": -0.31236618757247925, + "logits/rejected": -0.3524632155895233, + "logps/chosen": -0.0008042749250307679, + "logps/rejected": -2.3469886779785156, + "loss": 0.4975, + "nll_loss": 0.12434867024421692, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.042750414460897e-05, + "rewards/margins": 0.23461845517158508, + "rewards/rejected": -0.23469887673854828, + "step": 9391 + }, + { + "epoch": 6.495159059474412, + "grad_norm": 4.563853740692139, + "learning_rate": 1.947133855847549e-05, + "log_odds_chosen": 10.755139350891113, + "log_odds_ratio": -7.848611858207732e-05, + "logits/chosen": -0.3584415912628174, + "logits/rejected": -0.4386383295059204, + "logps/chosen": -0.00026066272403113544, + "logps/rejected": -2.410828113555908, + "loss": 0.5091, + "nll_loss": 0.12725502252578735, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6066272766911425e-05, + "rewards/margins": 0.24105677008628845, + "rewards/rejected": -0.2410828173160553, + "step": 9392 + }, + { + "epoch": 6.495850622406639, + "grad_norm": 7.453660011291504, + "learning_rate": 1.946749654218534e-05, + "log_odds_chosen": 10.772347450256348, + "log_odds_ratio": -9.090732055483386e-05, + "logits/chosen": -0.3762636184692383, + "logits/rejected": -0.4067800045013428, + "logps/chosen": -0.0003499372396618128, + "logps/rejected": -2.7631566524505615, + "loss": 0.8204, + "nll_loss": 0.2051025927066803, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.49937254213728e-05, + "rewards/margins": 0.2762807011604309, + "rewards/rejected": -0.27631568908691406, + "step": 9393 + }, + { + "epoch": 6.496542185338866, + "grad_norm": 5.516212463378906, + "learning_rate": 1.9463654525895193e-05, + "log_odds_chosen": 12.666479110717773, + "log_odds_ratio": -7.51461811887566e-06, + "logits/chosen": -0.1613074094057083, + "logits/rejected": -0.21167302131652832, + "logps/chosen": -0.00015206293028313667, + "logps/rejected": -3.4487061500549316, + "loss": 0.5949, + "nll_loss": 0.1487281173467636, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5206292118818965e-05, + "rewards/margins": 0.3448554277420044, + "rewards/rejected": -0.3448706269264221, + "step": 9394 + }, + { + "epoch": 6.497233748271093, + "grad_norm": 5.250306606292725, + "learning_rate": 1.9459812509605042e-05, + "log_odds_chosen": 10.824128150939941, + "log_odds_ratio": -3.159879270242527e-05, + "logits/chosen": -0.18953561782836914, + "logits/rejected": -0.21519282460212708, + "logps/chosen": -0.00020199205027893186, + "logps/rejected": -2.2640249729156494, + "loss": 0.5068, + "nll_loss": 0.12670867145061493, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0199206119286828e-05, + "rewards/margins": 0.226382315158844, + "rewards/rejected": -0.22640252113342285, + "step": 9395 + }, + { + "epoch": 6.4979253112033195, + "grad_norm": 5.880115509033203, + "learning_rate": 1.945597049331489e-05, + "log_odds_chosen": 10.155384063720703, + "log_odds_ratio": -0.00017642477178014815, + "logits/chosen": -0.5245303511619568, + "logits/rejected": -0.5084295272827148, + "logps/chosen": -0.00028953165747225285, + "logps/rejected": -1.9637598991394043, + "loss": 0.664, + "nll_loss": 0.16598252952098846, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8953167202416807e-05, + "rewards/margins": 0.19634705781936646, + "rewards/rejected": -0.1963759958744049, + "step": 9396 + }, + { + "epoch": 6.498616874135546, + "grad_norm": 12.754213333129883, + "learning_rate": 1.9452128477024744e-05, + "log_odds_chosen": 10.310648918151855, + "log_odds_ratio": -0.0002212212566519156, + "logits/chosen": -0.7641613483428955, + "logits/rejected": -0.6988776922225952, + "logps/chosen": -0.00037628383142873645, + "logps/rejected": -1.668961763381958, + "loss": 0.5353, + "nll_loss": 0.13379371166229248, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7628382415277883e-05, + "rewards/margins": 0.16685855388641357, + "rewards/rejected": -0.16689617931842804, + "step": 9397 + }, + { + "epoch": 6.499308437067773, + "grad_norm": 7.186704635620117, + "learning_rate": 1.9448286460734593e-05, + "log_odds_chosen": 10.572660446166992, + "log_odds_ratio": -0.00016197163495235145, + "logits/chosen": -0.36398768424987793, + "logits/rejected": -0.2695331871509552, + "logps/chosen": -0.0002569742500782013, + "logps/rejected": -2.236602306365967, + "loss": 0.741, + "nll_loss": 0.18523554503917694, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5697427190607414e-05, + "rewards/margins": 0.2236345410346985, + "rewards/rejected": -0.22366023063659668, + "step": 9398 + }, + { + "epoch": 6.5, + "grad_norm": 7.115647792816162, + "learning_rate": 1.9444444444444445e-05, + "log_odds_chosen": 11.01469898223877, + "log_odds_ratio": -0.0006828614859841764, + "logits/chosen": -0.5176241397857666, + "logits/rejected": -0.5681507587432861, + "logps/chosen": -0.0010533032473176718, + "logps/rejected": -2.80560302734375, + "loss": 0.9728, + "nll_loss": 0.24311989545822144, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010533032036619261, + "rewards/margins": 0.28045496344566345, + "rewards/rejected": -0.28056028485298157, + "step": 9399 + }, + { + "epoch": 6.500691562932227, + "grad_norm": 6.133714199066162, + "learning_rate": 1.9440602428154298e-05, + "log_odds_chosen": 10.151878356933594, + "log_odds_ratio": -0.00010759021097328514, + "logits/chosen": -0.2427067905664444, + "logits/rejected": -0.35290953516960144, + "logps/chosen": -0.000351759692421183, + "logps/rejected": -1.7226321697235107, + "loss": 0.6078, + "nll_loss": 0.15194585919380188, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.517596996971406e-05, + "rewards/margins": 0.17222803831100464, + "rewards/rejected": -0.1722632348537445, + "step": 9400 + }, + { + "epoch": 6.501383125864454, + "grad_norm": 6.098109722137451, + "learning_rate": 1.9436760411864147e-05, + "log_odds_chosen": 10.773904800415039, + "log_odds_ratio": -0.00040848561911843717, + "logits/chosen": -0.5251795053482056, + "logits/rejected": -0.5544536113739014, + "logps/chosen": -0.0026470485609024763, + "logps/rejected": -2.45532488822937, + "loss": 0.5156, + "nll_loss": 0.12887021899223328, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002647048677317798, + "rewards/margins": 0.24526777863502502, + "rewards/rejected": -0.24553249776363373, + "step": 9401 + }, + { + "epoch": 6.5020746887966805, + "grad_norm": 6.668719291687012, + "learning_rate": 1.9432918395574e-05, + "log_odds_chosen": 10.610265731811523, + "log_odds_ratio": -5.127752956468612e-05, + "logits/chosen": -0.6081045269966125, + "logits/rejected": -0.5891537666320801, + "logps/chosen": -0.00013224473514128476, + "logps/rejected": -1.805846929550171, + "loss": 0.5718, + "nll_loss": 0.1429474651813507, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3224474059825297e-05, + "rewards/margins": 0.18057146668434143, + "rewards/rejected": -0.18058468401432037, + "step": 9402 + }, + { + "epoch": 6.502766251728907, + "grad_norm": 6.99521017074585, + "learning_rate": 1.9429076379283852e-05, + "log_odds_chosen": 10.94999885559082, + "log_odds_ratio": -4.3023428588639945e-05, + "logits/chosen": -0.1102178692817688, + "logits/rejected": -0.10561814159154892, + "logps/chosen": -0.00015922827878966928, + "logps/rejected": -2.272221088409424, + "loss": 0.498, + "nll_loss": 0.1244877278804779, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5922827515169047e-05, + "rewards/margins": 0.22720618546009064, + "rewards/rejected": -0.22722211480140686, + "step": 9403 + }, + { + "epoch": 6.503457814661134, + "grad_norm": 4.8532304763793945, + "learning_rate": 1.94252343629937e-05, + "log_odds_chosen": 10.020861625671387, + "log_odds_ratio": -0.0003055678680539131, + "logits/chosen": -0.24106940627098083, + "logits/rejected": -0.23849815130233765, + "logps/chosen": -0.0009358559618704021, + "logps/rejected": -2.470724105834961, + "loss": 0.4524, + "nll_loss": 0.11306346207857132, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.358559327665716e-05, + "rewards/margins": 0.24697881937026978, + "rewards/rejected": -0.24707241356372833, + "step": 9404 + }, + { + "epoch": 6.504149377593361, + "grad_norm": 6.786164283752441, + "learning_rate": 1.942139234670355e-05, + "log_odds_chosen": 10.718588829040527, + "log_odds_ratio": -9.22972394619137e-05, + "logits/chosen": -0.24664977192878723, + "logits/rejected": -0.4017777442932129, + "logps/chosen": -0.00023182231234386563, + "logps/rejected": -2.2096641063690186, + "loss": 0.5772, + "nll_loss": 0.14429354667663574, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3182230506790802e-05, + "rewards/margins": 0.22094322741031647, + "rewards/rejected": -0.2209664285182953, + "step": 9405 + }, + { + "epoch": 6.504840940525588, + "grad_norm": 5.937921524047852, + "learning_rate": 1.9417550330413402e-05, + "log_odds_chosen": 10.956061363220215, + "log_odds_ratio": -5.964807860436849e-05, + "logits/chosen": -0.31870102882385254, + "logits/rejected": -0.4643133878707886, + "logps/chosen": -0.0002928538015112281, + "logps/rejected": -2.033576011657715, + "loss": 0.5851, + "nll_loss": 0.14626480638980865, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.928538015112281e-05, + "rewards/margins": 0.20332831144332886, + "rewards/rejected": -0.20335760712623596, + "step": 9406 + }, + { + "epoch": 6.505532503457815, + "grad_norm": 5.6538825035095215, + "learning_rate": 1.941370831412325e-05, + "log_odds_chosen": 10.884583473205566, + "log_odds_ratio": -4.456051465240307e-05, + "logits/chosen": -0.44369328022003174, + "logits/rejected": -0.49158185720443726, + "logps/chosen": -0.0009643149096518755, + "logps/rejected": -2.6121954917907715, + "loss": 0.8599, + "nll_loss": 0.21495816111564636, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.643149678595364e-05, + "rewards/margins": 0.2611231207847595, + "rewards/rejected": -0.2612195611000061, + "step": 9407 + }, + { + "epoch": 6.5062240663900415, + "grad_norm": 5.787775993347168, + "learning_rate": 1.9409866297833104e-05, + "log_odds_chosen": 10.493619918823242, + "log_odds_ratio": -0.00011466229625511914, + "logits/chosen": -0.7101765871047974, + "logits/rejected": -0.7864276170730591, + "logps/chosen": -0.00018912236555479467, + "logps/rejected": -2.0413930416107178, + "loss": 0.5214, + "nll_loss": 0.13034909963607788, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8912236555479467e-05, + "rewards/margins": 0.20412039756774902, + "rewards/rejected": -0.20413930714130402, + "step": 9408 + }, + { + "epoch": 6.506915629322268, + "grad_norm": 3.5225272178649902, + "learning_rate": 1.9406024281542956e-05, + "log_odds_chosen": 10.417261123657227, + "log_odds_ratio": -0.0001832667039707303, + "logits/chosen": -0.47410666942596436, + "logits/rejected": -0.449399471282959, + "logps/chosen": -0.00038906122790649533, + "logps/rejected": -2.056983232498169, + "loss": 0.3241, + "nll_loss": 0.08101370185613632, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8906124245841056e-05, + "rewards/margins": 0.205659419298172, + "rewards/rejected": -0.20569832623004913, + "step": 9409 + }, + { + "epoch": 6.507607192254495, + "grad_norm": 5.769867897033691, + "learning_rate": 1.9402182265252805e-05, + "log_odds_chosen": 10.512954711914062, + "log_odds_ratio": -0.00010581602691672742, + "logits/chosen": -0.28758078813552856, + "logits/rejected": -0.2849484086036682, + "logps/chosen": -0.00046525232028216124, + "logps/rejected": -2.6369874477386475, + "loss": 0.808, + "nll_loss": 0.20199567079544067, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.65252305730246e-05, + "rewards/margins": 0.2636522352695465, + "rewards/rejected": -0.2636987566947937, + "step": 9410 + }, + { + "epoch": 6.508298755186722, + "grad_norm": 7.070125579833984, + "learning_rate": 1.9398340248962658e-05, + "log_odds_chosen": 10.729057312011719, + "log_odds_ratio": -6.643655797233805e-05, + "logits/chosen": -0.6054902076721191, + "logits/rejected": -0.48226070404052734, + "logps/chosen": -0.0008427513530477881, + "logps/rejected": -2.409019947052002, + "loss": 0.4645, + "nll_loss": 0.11612001061439514, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.427513967035338e-05, + "rewards/margins": 0.24081772565841675, + "rewards/rejected": -0.24090200662612915, + "step": 9411 + }, + { + "epoch": 6.508990318118949, + "grad_norm": 6.108166217803955, + "learning_rate": 1.9394498232672507e-05, + "log_odds_chosen": 10.982990264892578, + "log_odds_ratio": -3.755984653253108e-05, + "logits/chosen": -0.3624812364578247, + "logits/rejected": -0.5092224478721619, + "logps/chosen": -0.0005596339469775558, + "logps/rejected": -2.786410093307495, + "loss": 0.5912, + "nll_loss": 0.14779676496982574, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5963395425351337e-05, + "rewards/margins": 0.2785850465297699, + "rewards/rejected": -0.2786409854888916, + "step": 9412 + }, + { + "epoch": 6.509681881051176, + "grad_norm": 10.15151309967041, + "learning_rate": 1.939065621638236e-05, + "log_odds_chosen": 10.644213676452637, + "log_odds_ratio": -6.074633347452618e-05, + "logits/chosen": -0.6252480149269104, + "logits/rejected": -0.7317550182342529, + "logps/chosen": -0.0002546596515458077, + "logps/rejected": -2.0850729942321777, + "loss": 0.5701, + "nll_loss": 0.14251932501792908, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.546596442698501e-05, + "rewards/margins": 0.20848184823989868, + "rewards/rejected": -0.20850731432437897, + "step": 9413 + }, + { + "epoch": 6.5103734439834025, + "grad_norm": 10.627796173095703, + "learning_rate": 1.938681420009221e-05, + "log_odds_chosen": 11.714271545410156, + "log_odds_ratio": -1.4582346921088174e-05, + "logits/chosen": -0.08012107014656067, + "logits/rejected": -0.19204331934452057, + "logps/chosen": -0.00015324132982641459, + "logps/rejected": -2.5697147846221924, + "loss": 0.7031, + "nll_loss": 0.1757819950580597, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.532413443783298e-05, + "rewards/margins": 0.25695616006851196, + "rewards/rejected": -0.25697147846221924, + "step": 9414 + }, + { + "epoch": 6.511065006915629, + "grad_norm": 5.780624866485596, + "learning_rate": 1.9382972183802057e-05, + "log_odds_chosen": 8.441697120666504, + "log_odds_ratio": -0.002654177835211158, + "logits/chosen": -0.4831072688102722, + "logits/rejected": -0.44240304827690125, + "logps/chosen": -0.002396452473476529, + "logps/rejected": -1.4833424091339111, + "loss": 1.112, + "nll_loss": 0.27773788571357727, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00023964526189956814, + "rewards/margins": 0.14809459447860718, + "rewards/rejected": -0.14833424985408783, + "step": 9415 + }, + { + "epoch": 6.511756569847856, + "grad_norm": 5.6088643074035645, + "learning_rate": 1.937913016751191e-05, + "log_odds_chosen": 11.039244651794434, + "log_odds_ratio": -9.301940008299425e-05, + "logits/chosen": -0.23438656330108643, + "logits/rejected": -0.32080191373825073, + "logps/chosen": -0.00019122361845802516, + "logps/rejected": -2.171100616455078, + "loss": 0.4477, + "nll_loss": 0.11190492659807205, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.912236439238768e-05, + "rewards/margins": 0.2170909196138382, + "rewards/rejected": -0.2171100378036499, + "step": 9416 + }, + { + "epoch": 6.512448132780083, + "grad_norm": 6.845349311828613, + "learning_rate": 1.9375288151221762e-05, + "log_odds_chosen": 10.662532806396484, + "log_odds_ratio": -3.290931636001915e-05, + "logits/chosen": -0.48565664887428284, + "logits/rejected": -0.6462719440460205, + "logps/chosen": -0.00023579117259941995, + "logps/rejected": -2.098126173019409, + "loss": 0.6561, + "nll_loss": 0.16403229534626007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.357911944272928e-05, + "rewards/margins": 0.20978905260562897, + "rewards/rejected": -0.20981262624263763, + "step": 9417 + }, + { + "epoch": 6.51313969571231, + "grad_norm": 9.37191390991211, + "learning_rate": 1.937144613493161e-05, + "log_odds_chosen": 9.508430480957031, + "log_odds_ratio": -0.003604255151003599, + "logits/chosen": -0.48413991928100586, + "logits/rejected": -0.5383449792861938, + "logps/chosen": -0.01737593300640583, + "logps/rejected": -2.10723876953125, + "loss": 0.4074, + "nll_loss": 0.1014985591173172, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017375932075083256, + "rewards/margins": 0.2089862823486328, + "rewards/rejected": -0.210723876953125, + "step": 9418 + }, + { + "epoch": 6.513831258644537, + "grad_norm": 7.972471237182617, + "learning_rate": 1.9367604118641464e-05, + "log_odds_chosen": 10.649456977844238, + "log_odds_ratio": -0.0001401927729602903, + "logits/chosen": -0.40515631437301636, + "logits/rejected": -0.45684176683425903, + "logps/chosen": -0.00021025318710599095, + "logps/rejected": -2.0950050354003906, + "loss": 0.5264, + "nll_loss": 0.13159304857254028, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.102531652781181e-05, + "rewards/margins": 0.2094794660806656, + "rewards/rejected": -0.2095005065202713, + "step": 9419 + }, + { + "epoch": 6.514522821576763, + "grad_norm": 5.934338092803955, + "learning_rate": 1.9363762102351316e-05, + "log_odds_chosen": 10.455333709716797, + "log_odds_ratio": -0.00029340438777580857, + "logits/chosen": -0.5644535422325134, + "logits/rejected": -0.6517401933670044, + "logps/chosen": -0.0014436359051615, + "logps/rejected": -2.3486440181732178, + "loss": 0.432, + "nll_loss": 0.10797516256570816, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001443635846953839, + "rewards/margins": 0.23472005128860474, + "rewards/rejected": -0.23486441373825073, + "step": 9420 + }, + { + "epoch": 6.51521438450899, + "grad_norm": 6.147100448608398, + "learning_rate": 1.9359920086061165e-05, + "log_odds_chosen": 8.786388397216797, + "log_odds_ratio": -0.0012116666184738278, + "logits/chosen": -0.4684923589229584, + "logits/rejected": -0.490852415561676, + "logps/chosen": -0.001625780132599175, + "logps/rejected": -1.5735567808151245, + "loss": 0.7118, + "nll_loss": 0.17783603072166443, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001625780132599175, + "rewards/margins": 0.15719309449195862, + "rewards/rejected": -0.1573556810617447, + "step": 9421 + }, + { + "epoch": 6.515905947441217, + "grad_norm": 6.892473220825195, + "learning_rate": 1.9356078069771018e-05, + "log_odds_chosen": 10.530393600463867, + "log_odds_ratio": -0.00014122716675046831, + "logits/chosen": -0.6844215393066406, + "logits/rejected": -0.7224639654159546, + "logps/chosen": -0.0002905130968429148, + "logps/rejected": -2.489344358444214, + "loss": 0.5674, + "nll_loss": 0.14183634519577026, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.905130895669572e-05, + "rewards/margins": 0.24890540540218353, + "rewards/rejected": -0.24893444776535034, + "step": 9422 + }, + { + "epoch": 6.516597510373444, + "grad_norm": 4.591324329376221, + "learning_rate": 1.9352236053480867e-05, + "log_odds_chosen": 11.390593528747559, + "log_odds_ratio": -1.5925519619486295e-05, + "logits/chosen": -0.5548987984657288, + "logits/rejected": -0.5801922678947449, + "logps/chosen": -0.0001749470247887075, + "logps/rejected": -2.1435866355895996, + "loss": 0.4582, + "nll_loss": 0.11454764008522034, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.749470357026439e-05, + "rewards/margins": 0.2143411636352539, + "rewards/rejected": -0.21435865759849548, + "step": 9423 + }, + { + "epoch": 6.517289073305671, + "grad_norm": 6.513183116912842, + "learning_rate": 1.9348394037190716e-05, + "log_odds_chosen": 9.555843353271484, + "log_odds_ratio": -0.0003111936675850302, + "logits/chosen": -0.4625076949596405, + "logits/rejected": -0.4753469228744507, + "logps/chosen": -0.000441780430264771, + "logps/rejected": -1.761674404144287, + "loss": 0.527, + "nll_loss": 0.13172942399978638, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.41780430264771e-05, + "rewards/margins": 0.1761232614517212, + "rewards/rejected": -0.17616745829582214, + "step": 9424 + }, + { + "epoch": 6.517980636237898, + "grad_norm": 4.426665782928467, + "learning_rate": 1.9344552020900568e-05, + "log_odds_chosen": 10.659873962402344, + "log_odds_ratio": -4.747790808323771e-05, + "logits/chosen": -0.4140737056732178, + "logits/rejected": -0.3993862271308899, + "logps/chosen": -0.00014404115790966898, + "logps/rejected": -1.736601710319519, + "loss": 0.6718, + "nll_loss": 0.16794097423553467, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.440411688236054e-05, + "rewards/margins": 0.1736457794904709, + "rewards/rejected": -0.17366017401218414, + "step": 9425 + }, + { + "epoch": 6.518672199170124, + "grad_norm": 9.013787269592285, + "learning_rate": 1.934071000461042e-05, + "log_odds_chosen": 10.727466583251953, + "log_odds_ratio": -8.105146116577089e-05, + "logits/chosen": -0.25618284940719604, + "logits/rejected": -0.2904587984085083, + "logps/chosen": -0.0009987832745537162, + "logps/rejected": -2.389343738555908, + "loss": 0.5929, + "nll_loss": 0.1482187956571579, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.987832891056314e-05, + "rewards/margins": 0.23883448541164398, + "rewards/rejected": -0.23893436789512634, + "step": 9426 + }, + { + "epoch": 6.519363762102351, + "grad_norm": 6.031515598297119, + "learning_rate": 1.933686798832027e-05, + "log_odds_chosen": 11.05801773071289, + "log_odds_ratio": -3.0185055948095396e-05, + "logits/chosen": -0.46399399638175964, + "logits/rejected": -0.48402759432792664, + "logps/chosen": -0.0002074514195555821, + "logps/rejected": -2.437533378601074, + "loss": 0.4859, + "nll_loss": 0.12146744132041931, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.074514122796245e-05, + "rewards/margins": 0.24373260140419006, + "rewards/rejected": -0.2437533587217331, + "step": 9427 + }, + { + "epoch": 6.520055325034578, + "grad_norm": 5.211343288421631, + "learning_rate": 1.9333025972030122e-05, + "log_odds_chosen": 10.822071075439453, + "log_odds_ratio": -4.411491681821644e-05, + "logits/chosen": -0.7721026539802551, + "logits/rejected": -0.7612123489379883, + "logps/chosen": -0.00019037112360820174, + "logps/rejected": -2.127896785736084, + "loss": 0.3862, + "nll_loss": 0.09655695408582687, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9037111997022294e-05, + "rewards/margins": 0.21277067065238953, + "rewards/rejected": -0.21278971433639526, + "step": 9428 + }, + { + "epoch": 6.520746887966805, + "grad_norm": 4.8389105796813965, + "learning_rate": 1.9329183955739975e-05, + "log_odds_chosen": 10.801448822021484, + "log_odds_ratio": -3.792867937590927e-05, + "logits/chosen": -0.21949411928653717, + "logits/rejected": -0.2931068539619446, + "logps/chosen": -0.00027945160400122404, + "logps/rejected": -2.0993459224700928, + "loss": 0.5517, + "nll_loss": 0.13792496919631958, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7945161491516046e-05, + "rewards/margins": 0.2099066525697708, + "rewards/rejected": -0.20993459224700928, + "step": 9429 + }, + { + "epoch": 6.521438450899032, + "grad_norm": 5.096884727478027, + "learning_rate": 1.9325341939449824e-05, + "log_odds_chosen": 10.270853996276855, + "log_odds_ratio": -0.0004896325990557671, + "logits/chosen": -0.4335726201534271, + "logits/rejected": -0.4108191728591919, + "logps/chosen": -0.0069307005032896996, + "logps/rejected": -2.498528480529785, + "loss": 0.6719, + "nll_loss": 0.16792647540569305, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006930700037628412, + "rewards/margins": 0.2491597831249237, + "rewards/rejected": -0.24985285103321075, + "step": 9430 + }, + { + "epoch": 6.522130013831259, + "grad_norm": 7.353761196136475, + "learning_rate": 1.9321499923159676e-05, + "log_odds_chosen": 10.691258430480957, + "log_odds_ratio": -7.414798892568797e-05, + "logits/chosen": -0.3759855031967163, + "logits/rejected": -0.4204534590244293, + "logps/chosen": -0.00023737037554383278, + "logps/rejected": -2.425762891769409, + "loss": 0.6447, + "nll_loss": 0.16116894781589508, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3737036826787516e-05, + "rewards/margins": 0.24255254864692688, + "rewards/rejected": -0.24257630109786987, + "step": 9431 + }, + { + "epoch": 6.522821576763485, + "grad_norm": 4.431156635284424, + "learning_rate": 1.9317657906869525e-05, + "log_odds_chosen": 10.560503005981445, + "log_odds_ratio": -5.748879993916489e-05, + "logits/chosen": -0.09382009506225586, + "logits/rejected": -0.1083650067448616, + "logps/chosen": -0.0002093408547807485, + "logps/rejected": -1.8000552654266357, + "loss": 0.4804, + "nll_loss": 0.1201024129986763, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0934085114276968e-05, + "rewards/margins": 0.17998458445072174, + "rewards/rejected": -0.1800055205821991, + "step": 9432 + }, + { + "epoch": 6.523513139695712, + "grad_norm": 8.661432266235352, + "learning_rate": 1.9313815890579374e-05, + "log_odds_chosen": 10.021512985229492, + "log_odds_ratio": -0.00027715275064110756, + "logits/chosen": 0.011828139424324036, + "logits/rejected": -0.0077832043170928955, + "logps/chosen": -0.0018440388375893235, + "logps/rejected": -2.6286373138427734, + "loss": 0.869, + "nll_loss": 0.21721133589744568, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001844038924900815, + "rewards/margins": 0.2626793086528778, + "rewards/rejected": -0.2628636956214905, + "step": 9433 + }, + { + "epoch": 6.524204702627939, + "grad_norm": 8.74638557434082, + "learning_rate": 1.9309973874289227e-05, + "log_odds_chosen": 10.317972183227539, + "log_odds_ratio": -0.00020331793348304927, + "logits/chosen": -0.11461025476455688, + "logits/rejected": -0.2297324389219284, + "logps/chosen": -0.0001929646823555231, + "logps/rejected": -2.06437349319458, + "loss": 0.8159, + "nll_loss": 0.20395462214946747, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.929646714415867e-05, + "rewards/margins": 0.20641806721687317, + "rewards/rejected": -0.2064373791217804, + "step": 9434 + }, + { + "epoch": 6.524896265560166, + "grad_norm": 9.663103103637695, + "learning_rate": 1.930613185799908e-05, + "log_odds_chosen": 10.05333423614502, + "log_odds_ratio": -0.00019566371338441968, + "logits/chosen": 0.023167580366134644, + "logits/rejected": -0.003208555281162262, + "logps/chosen": -0.0008226472418755293, + "logps/rejected": -1.9805678129196167, + "loss": 0.6815, + "nll_loss": 0.17034311592578888, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.226472709793597e-05, + "rewards/margins": 0.19797450304031372, + "rewards/rejected": -0.19805677235126495, + "step": 9435 + }, + { + "epoch": 6.525587828492393, + "grad_norm": 5.263288974761963, + "learning_rate": 1.9302289841708928e-05, + "log_odds_chosen": 10.60208797454834, + "log_odds_ratio": -9.669522114563733e-05, + "logits/chosen": -0.3702765107154846, + "logits/rejected": -0.4385896623134613, + "logps/chosen": -0.0004391923430375755, + "logps/rejected": -2.460280418395996, + "loss": 0.6826, + "nll_loss": 0.1706320345401764, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.391923357616179e-05, + "rewards/margins": 0.24598410725593567, + "rewards/rejected": -0.24602803587913513, + "step": 9436 + }, + { + "epoch": 6.5262793914246195, + "grad_norm": 4.888511657714844, + "learning_rate": 1.929844782541878e-05, + "log_odds_chosen": 10.464603424072266, + "log_odds_ratio": -8.840052760206163e-05, + "logits/chosen": -0.7394514083862305, + "logits/rejected": -0.7927672863006592, + "logps/chosen": -0.00026991096092388034, + "logps/rejected": -1.7386510372161865, + "loss": 0.3981, + "nll_loss": 0.09950944036245346, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6991097911377437e-05, + "rewards/margins": 0.17383810877799988, + "rewards/rejected": -0.17386510968208313, + "step": 9437 + }, + { + "epoch": 6.526970954356846, + "grad_norm": 5.329433441162109, + "learning_rate": 1.9294605809128633e-05, + "log_odds_chosen": 10.825544357299805, + "log_odds_ratio": -6.626916729146615e-05, + "logits/chosen": -0.028324007987976074, + "logits/rejected": -0.0745202898979187, + "logps/chosen": -0.00023314285499509424, + "logps/rejected": -2.1176702976226807, + "loss": 0.746, + "nll_loss": 0.18650339543819427, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3314285499509424e-05, + "rewards/margins": 0.21174371242523193, + "rewards/rejected": -0.2117670178413391, + "step": 9438 + }, + { + "epoch": 6.527662517289073, + "grad_norm": 6.633513450622559, + "learning_rate": 1.9290763792838482e-05, + "log_odds_chosen": 10.572284698486328, + "log_odds_ratio": -9.137169399764389e-05, + "logits/chosen": -0.5419729351997375, + "logits/rejected": -0.650861918926239, + "logps/chosen": -0.00021510363148991019, + "logps/rejected": -2.059680223464966, + "loss": 0.7737, + "nll_loss": 0.19342711567878723, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1510362785193138e-05, + "rewards/margins": 0.20594650506973267, + "rewards/rejected": -0.20596802234649658, + "step": 9439 + }, + { + "epoch": 6.5283540802213, + "grad_norm": 8.28564453125, + "learning_rate": 1.9286921776548335e-05, + "log_odds_chosen": 9.535734176635742, + "log_odds_ratio": -0.0002018636732827872, + "logits/chosen": -0.6633929014205933, + "logits/rejected": -0.6869960427284241, + "logps/chosen": -0.0005071749328635633, + "logps/rejected": -1.613107442855835, + "loss": 0.6712, + "nll_loss": 0.16779053211212158, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0717495469143614e-05, + "rewards/margins": 0.16126003861427307, + "rewards/rejected": -0.16131076216697693, + "step": 9440 + }, + { + "epoch": 6.529045643153527, + "grad_norm": 6.801348686218262, + "learning_rate": 1.9283079760258184e-05, + "log_odds_chosen": 10.486151695251465, + "log_odds_ratio": -6.121492333477363e-05, + "logits/chosen": -0.37587589025497437, + "logits/rejected": -0.4333217740058899, + "logps/chosen": -0.0003721543762367219, + "logps/rejected": -2.1706387996673584, + "loss": 0.5519, + "nll_loss": 0.13796362280845642, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.721543907886371e-05, + "rewards/margins": 0.21702665090560913, + "rewards/rejected": -0.21706387400627136, + "step": 9441 + }, + { + "epoch": 6.529737206085754, + "grad_norm": 10.218798637390137, + "learning_rate": 1.9279237743968036e-05, + "log_odds_chosen": 11.670851707458496, + "log_odds_ratio": -3.392618236830458e-05, + "logits/chosen": -0.2155950665473938, + "logits/rejected": -0.32742008566856384, + "logps/chosen": -0.00012127251829952002, + "logps/rejected": -2.728766441345215, + "loss": 0.6559, + "nll_loss": 0.16396935284137726, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2127253285143524e-05, + "rewards/margins": 0.27286452054977417, + "rewards/rejected": -0.27287665009498596, + "step": 9442 + }, + { + "epoch": 6.5304287690179805, + "grad_norm": 5.403188705444336, + "learning_rate": 1.9275395727677885e-05, + "log_odds_chosen": 9.496389389038086, + "log_odds_ratio": -0.03377415984869003, + "logits/chosen": -0.280944287776947, + "logits/rejected": -0.27332478761672974, + "logps/chosen": -0.01256662979722023, + "logps/rejected": -1.719006061553955, + "loss": 0.4708, + "nll_loss": 0.11432860046625137, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012566630030050874, + "rewards/margins": 0.17064395546913147, + "rewards/rejected": -0.17190060019493103, + "step": 9443 + }, + { + "epoch": 6.531120331950207, + "grad_norm": 4.520147800445557, + "learning_rate": 1.9271553711387738e-05, + "log_odds_chosen": 10.924385070800781, + "log_odds_ratio": -6.160808698041365e-05, + "logits/chosen": -0.32104748487472534, + "logits/rejected": -0.3863796591758728, + "logps/chosen": -0.00016719617997296154, + "logps/rejected": -1.7605154514312744, + "loss": 0.5161, + "nll_loss": 0.12901218235492706, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6719619452487677e-05, + "rewards/margins": 0.1760348081588745, + "rewards/rejected": -0.1760515421628952, + "step": 9444 + }, + { + "epoch": 6.531811894882434, + "grad_norm": 5.334826946258545, + "learning_rate": 1.9267711695097587e-05, + "log_odds_chosen": 10.15666389465332, + "log_odds_ratio": -0.0001524979597888887, + "logits/chosen": -0.2669309973716736, + "logits/rejected": -0.278430700302124, + "logps/chosen": -0.0004270744975656271, + "logps/rejected": -1.9559170007705688, + "loss": 0.4716, + "nll_loss": 0.1178910881280899, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.27074555773288e-05, + "rewards/margins": 0.19554898142814636, + "rewards/rejected": -0.19559170305728912, + "step": 9445 + }, + { + "epoch": 6.532503457814661, + "grad_norm": 7.522082328796387, + "learning_rate": 1.926386967880744e-05, + "log_odds_chosen": 12.136481285095215, + "log_odds_ratio": -1.9502214854583144e-05, + "logits/chosen": -0.6247130036354065, + "logits/rejected": -0.572918176651001, + "logps/chosen": -7.606636063428596e-05, + "logps/rejected": -2.7466282844543457, + "loss": 1.3921, + "nll_loss": 0.34801381826400757, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.606636245327536e-06, + "rewards/margins": 0.27465522289276123, + "rewards/rejected": -0.2746628224849701, + "step": 9446 + }, + { + "epoch": 6.533195020746888, + "grad_norm": 7.554215431213379, + "learning_rate": 1.926002766251729e-05, + "log_odds_chosen": 10.835151672363281, + "log_odds_ratio": -2.6517705919104628e-05, + "logits/chosen": -0.16394104063510895, + "logits/rejected": -0.15501868724822998, + "logps/chosen": -0.00021345789718907326, + "logps/rejected": -1.9866135120391846, + "loss": 0.6984, + "nll_loss": 0.17458918690681458, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1345789718907326e-05, + "rewards/margins": 0.19864001870155334, + "rewards/rejected": -0.19866134226322174, + "step": 9447 + }, + { + "epoch": 6.533886583679115, + "grad_norm": 5.6107177734375, + "learning_rate": 1.925618564622714e-05, + "log_odds_chosen": 10.036823272705078, + "log_odds_ratio": -6.762363773304969e-05, + "logits/chosen": -0.44453203678131104, + "logits/rejected": -0.4903140068054199, + "logps/chosen": -0.0006642360240221024, + "logps/rejected": -1.758960485458374, + "loss": 1.0152, + "nll_loss": 0.2537900507450104, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.64236067677848e-05, + "rewards/margins": 0.17582963407039642, + "rewards/rejected": -0.1758960485458374, + "step": 9448 + }, + { + "epoch": 6.5345781466113415, + "grad_norm": 9.110634803771973, + "learning_rate": 1.9252343629936993e-05, + "log_odds_chosen": 10.609855651855469, + "log_odds_ratio": -5.668444646289572e-05, + "logits/chosen": -0.32916340231895447, + "logits/rejected": -0.376298725605011, + "logps/chosen": -0.0001745389890857041, + "logps/rejected": -1.83896803855896, + "loss": 0.7855, + "nll_loss": 0.19636529684066772, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7453898180974647e-05, + "rewards/margins": 0.18387934565544128, + "rewards/rejected": -0.18389680981636047, + "step": 9449 + }, + { + "epoch": 6.535269709543568, + "grad_norm": 8.429581642150879, + "learning_rate": 1.9248501613646845e-05, + "log_odds_chosen": 10.936147689819336, + "log_odds_ratio": -2.1902931621298194e-05, + "logits/chosen": -0.6698485016822815, + "logits/rejected": -0.7103371620178223, + "logps/chosen": -0.0002495582157280296, + "logps/rejected": -2.1973369121551514, + "loss": 0.6345, + "nll_loss": 0.1586150974035263, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.495582157280296e-05, + "rewards/margins": 0.21970872581005096, + "rewards/rejected": -0.21973368525505066, + "step": 9450 + }, + { + "epoch": 6.535961272475795, + "grad_norm": 12.12063980102539, + "learning_rate": 1.9244659597356694e-05, + "log_odds_chosen": 9.785110473632812, + "log_odds_ratio": -0.0005236170836724341, + "logits/chosen": -0.6654300093650818, + "logits/rejected": -0.7001447677612305, + "logps/chosen": -0.0013286888133734465, + "logps/rejected": -2.0298948287963867, + "loss": 0.5691, + "nll_loss": 0.14221365749835968, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013286888133734465, + "rewards/margins": 0.20285660028457642, + "rewards/rejected": -0.20298945903778076, + "step": 9451 + }, + { + "epoch": 6.536652835408022, + "grad_norm": 7.771703243255615, + "learning_rate": 1.9240817581066544e-05, + "log_odds_chosen": 10.353858947753906, + "log_odds_ratio": -8.914186037145555e-05, + "logits/chosen": -0.6196666955947876, + "logits/rejected": -0.6253817677497864, + "logps/chosen": -0.0004547428397927433, + "logps/rejected": -2.4246909618377686, + "loss": 0.6552, + "nll_loss": 0.16377976536750793, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5474280341295525e-05, + "rewards/margins": 0.24242360889911652, + "rewards/rejected": -0.24246907234191895, + "step": 9452 + }, + { + "epoch": 6.537344398340249, + "grad_norm": 5.618026256561279, + "learning_rate": 1.9236975564776396e-05, + "log_odds_chosen": 10.78561782836914, + "log_odds_ratio": -6.65760671836324e-05, + "logits/chosen": -0.4101612865924835, + "logits/rejected": -0.3885840177536011, + "logps/chosen": -0.0002911283809226006, + "logps/rejected": -2.4101407527923584, + "loss": 0.6213, + "nll_loss": 0.15532714128494263, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9112838092260063e-05, + "rewards/margins": 0.2409849464893341, + "rewards/rejected": -0.24101409316062927, + "step": 9453 + }, + { + "epoch": 6.538035961272476, + "grad_norm": 8.519519805908203, + "learning_rate": 1.9233133548486245e-05, + "log_odds_chosen": 10.831727981567383, + "log_odds_ratio": -9.052889072336257e-05, + "logits/chosen": -0.42768576741218567, + "logits/rejected": -0.5330202579498291, + "logps/chosen": -0.0004460075870156288, + "logps/rejected": -2.201939105987549, + "loss": 0.7587, + "nll_loss": 0.18967759609222412, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.460075797396712e-05, + "rewards/margins": 0.22014930844306946, + "rewards/rejected": -0.22019392251968384, + "step": 9454 + }, + { + "epoch": 6.5387275242047025, + "grad_norm": 6.629385948181152, + "learning_rate": 1.9229291532196097e-05, + "log_odds_chosen": 10.440211296081543, + "log_odds_ratio": -0.00025720984558574855, + "logits/chosen": -0.7466363906860352, + "logits/rejected": -0.7641102075576782, + "logps/chosen": -0.00040673461626283824, + "logps/rejected": -1.9535751342773438, + "loss": 0.6623, + "nll_loss": 0.1655389368534088, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0673461626283824e-05, + "rewards/margins": 0.19531682133674622, + "rewards/rejected": -0.1953575164079666, + "step": 9455 + }, + { + "epoch": 6.539419087136929, + "grad_norm": 5.408649921417236, + "learning_rate": 1.922544951590595e-05, + "log_odds_chosen": 11.0181884765625, + "log_odds_ratio": -0.00020642187155317515, + "logits/chosen": -0.2848138213157654, + "logits/rejected": -0.3254838287830353, + "logps/chosen": -0.00031954696169123054, + "logps/rejected": -2.2532505989074707, + "loss": 0.4727, + "nll_loss": 0.11815480887889862, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1954696169123054e-05, + "rewards/margins": 0.2252930998802185, + "rewards/rejected": -0.22532503306865692, + "step": 9456 + }, + { + "epoch": 6.540110650069156, + "grad_norm": 5.664820671081543, + "learning_rate": 1.92216074996158e-05, + "log_odds_chosen": 10.822545051574707, + "log_odds_ratio": -0.00010074210149468854, + "logits/chosen": -0.5778256058692932, + "logits/rejected": -0.5243774056434631, + "logps/chosen": -0.0004190989420749247, + "logps/rejected": -2.700394868850708, + "loss": 0.5906, + "nll_loss": 0.14764924347400665, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.190989420749247e-05, + "rewards/margins": 0.26999756693840027, + "rewards/rejected": -0.27003949880599976, + "step": 9457 + }, + { + "epoch": 6.540802213001383, + "grad_norm": 9.502604484558105, + "learning_rate": 1.921776548332565e-05, + "log_odds_chosen": 9.924819946289062, + "log_odds_ratio": -0.00013832449621986598, + "logits/chosen": -0.5658762454986572, + "logits/rejected": -0.6234560012817383, + "logps/chosen": -0.0010439768666401505, + "logps/rejected": -1.702471137046814, + "loss": 0.5704, + "nll_loss": 0.1425761580467224, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010439768811920658, + "rewards/margins": 0.17014271020889282, + "rewards/rejected": -0.1702471226453781, + "step": 9458 + }, + { + "epoch": 6.54149377593361, + "grad_norm": 4.656129360198975, + "learning_rate": 1.9213923467035504e-05, + "log_odds_chosen": 9.624756813049316, + "log_odds_ratio": -0.00042861714609898627, + "logits/chosen": -0.31074270606040955, + "logits/rejected": -0.3961406946182251, + "logps/chosen": -0.001230962690897286, + "logps/rejected": -2.0448856353759766, + "loss": 0.6971, + "nll_loss": 0.1742362380027771, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012309628073126078, + "rewards/margins": 0.20436546206474304, + "rewards/rejected": -0.20448856055736542, + "step": 9459 + }, + { + "epoch": 6.542185338865837, + "grad_norm": 6.805206775665283, + "learning_rate": 1.9210081450745353e-05, + "log_odds_chosen": 9.775091171264648, + "log_odds_ratio": -0.0002657772274687886, + "logits/chosen": -0.499624639749527, + "logits/rejected": -0.4671819806098938, + "logps/chosen": -0.0012333440827205777, + "logps/rejected": -2.1008732318878174, + "loss": 0.6091, + "nll_loss": 0.15224380791187286, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012333440827205777, + "rewards/margins": 0.20996397733688354, + "rewards/rejected": -0.21008731424808502, + "step": 9460 + }, + { + "epoch": 6.5428769017980635, + "grad_norm": 6.917591571807861, + "learning_rate": 1.9206239434455202e-05, + "log_odds_chosen": 10.791444778442383, + "log_odds_ratio": -4.337338395998813e-05, + "logits/chosen": -0.39784783124923706, + "logits/rejected": -0.5065803527832031, + "logps/chosen": -0.00024545512860640883, + "logps/rejected": -2.4440646171569824, + "loss": 0.698, + "nll_loss": 0.17449891567230225, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.454551577102393e-05, + "rewards/margins": 0.24438193440437317, + "rewards/rejected": -0.24440649151802063, + "step": 9461 + }, + { + "epoch": 6.54356846473029, + "grad_norm": 4.706018447875977, + "learning_rate": 1.9202397418165054e-05, + "log_odds_chosen": 11.40475845336914, + "log_odds_ratio": -9.582944767316803e-05, + "logits/chosen": -0.5012403726577759, + "logits/rejected": -0.5878694653511047, + "logps/chosen": -0.00046555051812902093, + "logps/rejected": -3.4569056034088135, + "loss": 0.4919, + "nll_loss": 0.12297195196151733, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.655505108530633e-05, + "rewards/margins": 0.3456440269947052, + "rewards/rejected": -0.34569060802459717, + "step": 9462 + }, + { + "epoch": 6.544260027662517, + "grad_norm": 8.319991111755371, + "learning_rate": 1.9198555401874903e-05, + "log_odds_chosen": 8.73961067199707, + "log_odds_ratio": -0.0007463196525350213, + "logits/chosen": -0.7648007869720459, + "logits/rejected": -0.7282172441482544, + "logps/chosen": -0.0007134063635021448, + "logps/rejected": -1.2260456085205078, + "loss": 0.8386, + "nll_loss": 0.2095862329006195, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.134064071578905e-05, + "rewards/margins": 0.12253323197364807, + "rewards/rejected": -0.12260457128286362, + "step": 9463 + }, + { + "epoch": 6.544951590594744, + "grad_norm": 7.8542561531066895, + "learning_rate": 1.9194713385584756e-05, + "log_odds_chosen": 11.679158210754395, + "log_odds_ratio": -2.236912041553296e-05, + "logits/chosen": -0.8372731804847717, + "logits/rejected": -0.8716490268707275, + "logps/chosen": -0.00024520649458281696, + "logps/rejected": -2.850311279296875, + "loss": 0.7341, + "nll_loss": 0.1835135519504547, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.452065200486686e-05, + "rewards/margins": 0.2850066125392914, + "rewards/rejected": -0.28503113985061646, + "step": 9464 + }, + { + "epoch": 6.545643153526971, + "grad_norm": 9.019186019897461, + "learning_rate": 1.919087136929461e-05, + "log_odds_chosen": 9.300536155700684, + "log_odds_ratio": -0.21208545565605164, + "logits/chosen": 0.11748534440994263, + "logits/rejected": -0.05792899429798126, + "logps/chosen": -0.035312023013830185, + "logps/rejected": -1.7976481914520264, + "loss": 1.0377, + "nll_loss": 0.23820726573467255, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0035312026739120483, + "rewards/margins": 0.17623361945152283, + "rewards/rejected": -0.17976480722427368, + "step": 9465 + }, + { + "epoch": 6.546334716459198, + "grad_norm": 9.906425476074219, + "learning_rate": 1.9187029353004457e-05, + "log_odds_chosen": 11.869268417358398, + "log_odds_ratio": -1.1419995644246228e-05, + "logits/chosen": -0.6074370741844177, + "logits/rejected": -0.6842989921569824, + "logps/chosen": -0.00011990070197498426, + "logps/rejected": -2.7005834579467773, + "loss": 0.5716, + "nll_loss": 0.14290668070316315, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1990070561296307e-05, + "rewards/margins": 0.2700463831424713, + "rewards/rejected": -0.2700583338737488, + "step": 9466 + }, + { + "epoch": 6.5470262793914245, + "grad_norm": 7.5030317306518555, + "learning_rate": 1.918318733671431e-05, + "log_odds_chosen": 9.613960266113281, + "log_odds_ratio": -0.0002500044647604227, + "logits/chosen": -0.09690068662166595, + "logits/rejected": -0.12829184532165527, + "logps/chosen": -0.00881747156381607, + "logps/rejected": -2.3322060108184814, + "loss": 0.653, + "nll_loss": 0.16322842240333557, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008817471680231392, + "rewards/margins": 0.2323388308286667, + "rewards/rejected": -0.23322060704231262, + "step": 9467 + }, + { + "epoch": 6.547717842323651, + "grad_norm": 5.7620744705200195, + "learning_rate": 1.9179345320424162e-05, + "log_odds_chosen": 11.665501594543457, + "log_odds_ratio": -3.687728167278692e-05, + "logits/chosen": -0.25433778762817383, + "logits/rejected": -0.2974012494087219, + "logps/chosen": -0.0004080279322806746, + "logps/rejected": -3.097548007965088, + "loss": 0.8516, + "nll_loss": 0.2128935307264328, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.080279541085474e-05, + "rewards/margins": 0.30971401929855347, + "rewards/rejected": -0.30975478887557983, + "step": 9468 + }, + { + "epoch": 6.548409405255878, + "grad_norm": 8.611531257629395, + "learning_rate": 1.917550330413401e-05, + "log_odds_chosen": 10.0744047164917, + "log_odds_ratio": -0.000632865761872381, + "logits/chosen": -0.8037955164909363, + "logits/rejected": -0.7918911576271057, + "logps/chosen": -0.002187209203839302, + "logps/rejected": -2.4003305435180664, + "loss": 0.4801, + "nll_loss": 0.11996515095233917, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002187209320254624, + "rewards/margins": 0.23981432616710663, + "rewards/rejected": -0.24003303050994873, + "step": 9469 + }, + { + "epoch": 6.549100968188105, + "grad_norm": 5.34125280380249, + "learning_rate": 1.917166128784386e-05, + "log_odds_chosen": 11.037103652954102, + "log_odds_ratio": -0.0001046846155077219, + "logits/chosen": -0.24765129387378693, + "logits/rejected": -0.3548104166984558, + "logps/chosen": -0.00016740552382543683, + "logps/rejected": -1.8313382863998413, + "loss": 0.5318, + "nll_loss": 0.13294678926467896, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.674055056355428e-05, + "rewards/margins": 0.1831170916557312, + "rewards/rejected": -0.1831338256597519, + "step": 9470 + }, + { + "epoch": 6.549792531120332, + "grad_norm": 11.186291694641113, + "learning_rate": 1.9167819271553713e-05, + "log_odds_chosen": 11.0018310546875, + "log_odds_ratio": -2.549621058278717e-05, + "logits/chosen": -0.14623451232910156, + "logits/rejected": -0.2587830424308777, + "logps/chosen": -0.00013841589679941535, + "logps/rejected": -1.5451520681381226, + "loss": 0.7605, + "nll_loss": 0.1901187300682068, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3841590771335177e-05, + "rewards/margins": 0.15450136363506317, + "rewards/rejected": -0.15451520681381226, + "step": 9471 + }, + { + "epoch": 6.550484094052559, + "grad_norm": 6.574780464172363, + "learning_rate": 1.9163977255263562e-05, + "log_odds_chosen": 11.316503524780273, + "log_odds_ratio": -0.00011724029172910377, + "logits/chosen": -0.6436998844146729, + "logits/rejected": -0.7034087181091309, + "logps/chosen": -0.00010371260577812791, + "logps/rejected": -2.339920997619629, + "loss": 0.9627, + "nll_loss": 0.240675687789917, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.037126003211597e-05, + "rewards/margins": 0.23398171365261078, + "rewards/rejected": -0.2339920699596405, + "step": 9472 + }, + { + "epoch": 6.551175656984785, + "grad_norm": 5.006659984588623, + "learning_rate": 1.9160135238973414e-05, + "log_odds_chosen": 10.22900104522705, + "log_odds_ratio": -0.0001384686620440334, + "logits/chosen": -0.35459089279174805, + "logits/rejected": -0.3786230683326721, + "logps/chosen": -0.0006998850731179118, + "logps/rejected": -2.1292760372161865, + "loss": 0.3758, + "nll_loss": 0.09392721951007843, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.998850585659966e-05, + "rewards/margins": 0.21285760402679443, + "rewards/rejected": -0.21292759478092194, + "step": 9473 + }, + { + "epoch": 6.551867219917012, + "grad_norm": 6.996860980987549, + "learning_rate": 1.9156293222683267e-05, + "log_odds_chosen": 9.235257148742676, + "log_odds_ratio": -0.00016873932327143848, + "logits/chosen": -0.7287513017654419, + "logits/rejected": -0.8030807971954346, + "logps/chosen": -0.0012789568863809109, + "logps/rejected": -1.5626949071884155, + "loss": 0.6777, + "nll_loss": 0.16941440105438232, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012789569154847413, + "rewards/margins": 0.15614160895347595, + "rewards/rejected": -0.15626949071884155, + "step": 9474 + }, + { + "epoch": 6.552558782849239, + "grad_norm": 7.9105730056762695, + "learning_rate": 1.9152451206393116e-05, + "log_odds_chosen": 9.815263748168945, + "log_odds_ratio": -0.0008661964093334973, + "logits/chosen": -0.900322675704956, + "logits/rejected": -0.8875852823257446, + "logps/chosen": -0.000402643287088722, + "logps/rejected": -1.414959192276001, + "loss": 1.0646, + "nll_loss": 0.26607388257980347, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.026433089165948e-05, + "rewards/margins": 0.14145566523075104, + "rewards/rejected": -0.14149592816829681, + "step": 9475 + }, + { + "epoch": 6.553250345781466, + "grad_norm": 18.255563735961914, + "learning_rate": 1.914860919010297e-05, + "log_odds_chosen": 12.136200904846191, + "log_odds_ratio": -1.486710607423447e-05, + "logits/chosen": -0.3935670852661133, + "logits/rejected": -0.4665505290031433, + "logps/chosen": -9.996606240747496e-05, + "logps/rejected": -2.7526142597198486, + "loss": 0.6183, + "nll_loss": 0.15456387400627136, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.996606422646437e-06, + "rewards/margins": 0.2752514183521271, + "rewards/rejected": -0.27526140213012695, + "step": 9476 + }, + { + "epoch": 6.553941908713693, + "grad_norm": 4.371954441070557, + "learning_rate": 1.914476717381282e-05, + "log_odds_chosen": 10.711895942687988, + "log_odds_ratio": -5.91963944316376e-05, + "logits/chosen": -0.6419881582260132, + "logits/rejected": -0.6792569756507874, + "logps/chosen": -0.00010584262054180726, + "logps/rejected": -1.6583061218261719, + "loss": 0.7602, + "nll_loss": 0.1900506317615509, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0584262781776488e-05, + "rewards/margins": 0.16582003235816956, + "rewards/rejected": -0.1658306121826172, + "step": 9477 + }, + { + "epoch": 6.55463347164592, + "grad_norm": 5.113374710083008, + "learning_rate": 1.914092515752267e-05, + "log_odds_chosen": 11.083767890930176, + "log_odds_ratio": -8.742226782487705e-05, + "logits/chosen": -0.5468670129776001, + "logits/rejected": -0.6056516170501709, + "logps/chosen": -0.000979842385277152, + "logps/rejected": -2.4781551361083984, + "loss": 0.4569, + "nll_loss": 0.11422540247440338, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.79842443484813e-05, + "rewards/margins": 0.24771751463413239, + "rewards/rejected": -0.24781548976898193, + "step": 9478 + }, + { + "epoch": 6.555325034578146, + "grad_norm": 6.143009185791016, + "learning_rate": 1.913708314123252e-05, + "log_odds_chosen": 11.41762638092041, + "log_odds_ratio": -1.6329569916706532e-05, + "logits/chosen": -0.10203643143177032, + "logits/rejected": -0.22533483803272247, + "logps/chosen": -0.00042999981087632477, + "logps/rejected": -2.9852843284606934, + "loss": 0.6501, + "nll_loss": 0.1625293344259262, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2999985453207046e-05, + "rewards/margins": 0.2984854280948639, + "rewards/rejected": -0.29852843284606934, + "step": 9479 + }, + { + "epoch": 6.556016597510373, + "grad_norm": 14.926573753356934, + "learning_rate": 1.9133241124942368e-05, + "log_odds_chosen": 10.724283218383789, + "log_odds_ratio": -4.1447154217166826e-05, + "logits/chosen": -0.6632795333862305, + "logits/rejected": -0.5676432847976685, + "logps/chosen": -0.0001383264025207609, + "logps/rejected": -1.9790575504302979, + "loss": 0.7707, + "nll_loss": 0.19267025589942932, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3832639524480328e-05, + "rewards/margins": 0.1978919506072998, + "rewards/rejected": -0.1979057788848877, + "step": 9480 + }, + { + "epoch": 6.5567081604426, + "grad_norm": 6.726595401763916, + "learning_rate": 1.912939910865222e-05, + "log_odds_chosen": 9.883056640625, + "log_odds_ratio": -9.502652392257005e-05, + "logits/chosen": -0.4928602874279022, + "logits/rejected": -0.48830845952033997, + "logps/chosen": -0.0003097387671004981, + "logps/rejected": -1.9427212476730347, + "loss": 0.4563, + "nll_loss": 0.11406896263360977, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.097387889283709e-05, + "rewards/margins": 0.19424115121364594, + "rewards/rejected": -0.19427213072776794, + "step": 9481 + }, + { + "epoch": 6.557399723374827, + "grad_norm": 6.6166181564331055, + "learning_rate": 1.9125557092362073e-05, + "log_odds_chosen": 10.360993385314941, + "log_odds_ratio": -0.0005267745582386851, + "logits/chosen": -0.7267522811889648, + "logits/rejected": -0.9201721549034119, + "logps/chosen": -0.000601665407884866, + "logps/rejected": -1.9809919595718384, + "loss": 0.8314, + "nll_loss": 0.2078043818473816, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0166537878103554e-05, + "rewards/margins": 0.19803902506828308, + "rewards/rejected": -0.19809919595718384, + "step": 9482 + }, + { + "epoch": 6.558091286307054, + "grad_norm": 5.971362590789795, + "learning_rate": 1.9121715076071922e-05, + "log_odds_chosen": 10.730911254882812, + "log_odds_ratio": -9.043634054251015e-05, + "logits/chosen": -0.5595455169677734, + "logits/rejected": -0.5485619306564331, + "logps/chosen": -0.0006760087562724948, + "logps/rejected": -2.1176257133483887, + "loss": 0.4346, + "nll_loss": 0.10863366723060608, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.760087126167491e-05, + "rewards/margins": 0.21169498562812805, + "rewards/rejected": -0.21176259219646454, + "step": 9483 + }, + { + "epoch": 6.558782849239281, + "grad_norm": 4.847436904907227, + "learning_rate": 1.9117873059781774e-05, + "log_odds_chosen": 9.789130210876465, + "log_odds_ratio": -0.00028666871367022395, + "logits/chosen": -0.652285099029541, + "logits/rejected": -0.7685700058937073, + "logps/chosen": -0.0002560943830758333, + "logps/rejected": -1.5029850006103516, + "loss": 0.4915, + "nll_loss": 0.1228560283780098, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5609435397200286e-05, + "rewards/margins": 0.1502728909254074, + "rewards/rejected": -0.15029850602149963, + "step": 9484 + }, + { + "epoch": 6.559474412171507, + "grad_norm": 7.007932662963867, + "learning_rate": 1.9114031043491627e-05, + "log_odds_chosen": 11.106407165527344, + "log_odds_ratio": -3.277970608905889e-05, + "logits/chosen": -0.42606693506240845, + "logits/rejected": -0.513460636138916, + "logps/chosen": -0.00017478324298281223, + "logps/rejected": -2.249858856201172, + "loss": 0.9326, + "nll_loss": 0.23315876722335815, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7478325389674865e-05, + "rewards/margins": 0.22496840357780457, + "rewards/rejected": -0.22498586773872375, + "step": 9485 + }, + { + "epoch": 6.560165975103734, + "grad_norm": 7.205862045288086, + "learning_rate": 1.9110189027201476e-05, + "log_odds_chosen": 10.024550437927246, + "log_odds_ratio": -0.0002134918759111315, + "logits/chosen": -0.5787373185157776, + "logits/rejected": -0.6127051115036011, + "logps/chosen": -0.002206590957939625, + "logps/rejected": -2.553743600845337, + "loss": 0.5282, + "nll_loss": 0.1320350468158722, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00022065910161472857, + "rewards/margins": 0.25515371561050415, + "rewards/rejected": -0.25537434220314026, + "step": 9486 + }, + { + "epoch": 6.560857538035961, + "grad_norm": 4.7558417320251465, + "learning_rate": 1.9106347010911328e-05, + "log_odds_chosen": 10.852564811706543, + "log_odds_ratio": -4.599963722284883e-05, + "logits/chosen": -0.7739884257316589, + "logits/rejected": -0.889204740524292, + "logps/chosen": -0.00010290837963111699, + "logps/rejected": -1.8536291122436523, + "loss": 0.6837, + "nll_loss": 0.17092812061309814, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0290837963111699e-05, + "rewards/margins": 0.1853526085615158, + "rewards/rejected": -0.18536292016506195, + "step": 9487 + }, + { + "epoch": 6.561549100968188, + "grad_norm": 9.699813842773438, + "learning_rate": 1.9102504994621177e-05, + "log_odds_chosen": 10.22033405303955, + "log_odds_ratio": -0.00013328839850146323, + "logits/chosen": -0.643222987651825, + "logits/rejected": -0.6561555862426758, + "logps/chosen": -0.00040478675509802997, + "logps/rejected": -1.997079610824585, + "loss": 0.5048, + "nll_loss": 0.12619677186012268, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.047867696499452e-05, + "rewards/margins": 0.19966749846935272, + "rewards/rejected": -0.1997079700231552, + "step": 9488 + }, + { + "epoch": 6.562240663900415, + "grad_norm": 7.772004127502441, + "learning_rate": 1.9098662978331026e-05, + "log_odds_chosen": 10.799205780029297, + "log_odds_ratio": -0.00011516768427100033, + "logits/chosen": -0.6208674907684326, + "logits/rejected": -0.6132377982139587, + "logps/chosen": -0.0009005725150927901, + "logps/rejected": -2.7482409477233887, + "loss": 1.4155, + "nll_loss": 0.3538641035556793, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.005725587485358e-05, + "rewards/margins": 0.2747340202331543, + "rewards/rejected": -0.2748240828514099, + "step": 9489 + }, + { + "epoch": 6.5629322268326415, + "grad_norm": 5.275304317474365, + "learning_rate": 1.909482096204088e-05, + "log_odds_chosen": 11.012089729309082, + "log_odds_ratio": -4.3458290747366846e-05, + "logits/chosen": 0.15669658780097961, + "logits/rejected": 0.09660986065864563, + "logps/chosen": -0.00021308660507202148, + "logps/rejected": -2.4177041053771973, + "loss": 0.4676, + "nll_loss": 0.11689440906047821, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.130866050720215e-05, + "rewards/margins": 0.241749107837677, + "rewards/rejected": -0.2417704164981842, + "step": 9490 + }, + { + "epoch": 6.563623789764868, + "grad_norm": 5.077573776245117, + "learning_rate": 1.909097894575073e-05, + "log_odds_chosen": 10.685736656188965, + "log_odds_ratio": -7.411053229589015e-05, + "logits/chosen": 0.0397484228014946, + "logits/rejected": -0.03544532507658005, + "logps/chosen": -0.00021711325098294765, + "logps/rejected": -2.0057690143585205, + "loss": 0.7528, + "nll_loss": 0.1881898045539856, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1711324734496884e-05, + "rewards/margins": 0.20055519044399261, + "rewards/rejected": -0.20057690143585205, + "step": 9491 + }, + { + "epoch": 6.564315352697095, + "grad_norm": 9.68488597869873, + "learning_rate": 1.908713692946058e-05, + "log_odds_chosen": 11.285282135009766, + "log_odds_ratio": -4.871240525972098e-05, + "logits/chosen": -0.5057222843170166, + "logits/rejected": -0.5519887208938599, + "logps/chosen": -0.0002046864974545315, + "logps/rejected": -2.48622989654541, + "loss": 0.6181, + "nll_loss": 0.15451568365097046, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0468651200644672e-05, + "rewards/margins": 0.2486025094985962, + "rewards/rejected": -0.24862298369407654, + "step": 9492 + }, + { + "epoch": 6.565006915629322, + "grad_norm": 9.92271900177002, + "learning_rate": 1.9083294913170433e-05, + "log_odds_chosen": 11.072915077209473, + "log_odds_ratio": -7.389196252916008e-05, + "logits/chosen": -0.047651246190071106, + "logits/rejected": -0.0919855609536171, + "logps/chosen": -0.0001974912011064589, + "logps/rejected": -2.3384013175964355, + "loss": 0.6102, + "nll_loss": 0.15253698825836182, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9749117200262845e-05, + "rewards/margins": 0.23382039368152618, + "rewards/rejected": -0.23384013772010803, + "step": 9493 + }, + { + "epoch": 6.565698478561549, + "grad_norm": 6.103567600250244, + "learning_rate": 1.9079452896880285e-05, + "log_odds_chosen": 10.56631851196289, + "log_odds_ratio": -0.0001871968706836924, + "logits/chosen": -0.5657141208648682, + "logits/rejected": -0.5551555156707764, + "logps/chosen": -0.000143907280289568, + "logps/rejected": -1.7810739278793335, + "loss": 0.7097, + "nll_loss": 0.1774117797613144, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.439072730136104e-05, + "rewards/margins": 0.17809300124645233, + "rewards/rejected": -0.1781073957681656, + "step": 9494 + }, + { + "epoch": 6.566390041493776, + "grad_norm": 4.250610828399658, + "learning_rate": 1.9075610880590134e-05, + "log_odds_chosen": 10.280776977539062, + "log_odds_ratio": -5.944541044300422e-05, + "logits/chosen": -0.13907837867736816, + "logits/rejected": -0.1998562067747116, + "logps/chosen": -0.00017767293320503086, + "logps/rejected": -1.4102723598480225, + "loss": 0.3628, + "nll_loss": 0.09070321172475815, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7767293684300967e-05, + "rewards/margins": 0.14100947976112366, + "rewards/rejected": -0.14102724194526672, + "step": 9495 + }, + { + "epoch": 6.5670816044260025, + "grad_norm": 8.311246871948242, + "learning_rate": 1.9071768864299987e-05, + "log_odds_chosen": 10.381108283996582, + "log_odds_ratio": -0.00028720340924337506, + "logits/chosen": -0.03732209652662277, + "logits/rejected": -0.08972935378551483, + "logps/chosen": -0.0005405236152000725, + "logps/rejected": -2.375124454498291, + "loss": 0.5801, + "nll_loss": 0.14500656723976135, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.405236152000725e-05, + "rewards/margins": 0.23745840787887573, + "rewards/rejected": -0.237512469291687, + "step": 9496 + }, + { + "epoch": 6.567773167358229, + "grad_norm": 4.872702598571777, + "learning_rate": 1.9067926848009836e-05, + "log_odds_chosen": 10.836227416992188, + "log_odds_ratio": -3.483764885459095e-05, + "logits/chosen": -0.0781363844871521, + "logits/rejected": -0.32934820652008057, + "logps/chosen": -0.0002562236040830612, + "logps/rejected": -2.0514607429504395, + "loss": 0.5159, + "nll_loss": 0.12898054718971252, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.562236477388069e-05, + "rewards/margins": 0.20512044429779053, + "rewards/rejected": -0.20514605939388275, + "step": 9497 + }, + { + "epoch": 6.568464730290456, + "grad_norm": 4.309937000274658, + "learning_rate": 1.9064084831719685e-05, + "log_odds_chosen": 10.821893692016602, + "log_odds_ratio": -6.669486901955679e-05, + "logits/chosen": -0.4724667966365814, + "logits/rejected": -0.48318246006965637, + "logps/chosen": -0.0002066226297756657, + "logps/rejected": -2.2214860916137695, + "loss": 0.3534, + "nll_loss": 0.0883367583155632, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.066226261376869e-05, + "rewards/margins": 0.22212794423103333, + "rewards/rejected": -0.222148597240448, + "step": 9498 + }, + { + "epoch": 6.569156293222683, + "grad_norm": 7.079944610595703, + "learning_rate": 1.9060242815429537e-05, + "log_odds_chosen": 11.52110481262207, + "log_odds_ratio": -7.554404146503657e-05, + "logits/chosen": -0.8700163960456848, + "logits/rejected": -0.9756256341934204, + "logps/chosen": -0.0003580303455237299, + "logps/rejected": -2.680870294570923, + "loss": 0.4434, + "nll_loss": 0.11085189133882523, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.580303382477723e-05, + "rewards/margins": 0.26805123686790466, + "rewards/rejected": -0.2680870294570923, + "step": 9499 + }, + { + "epoch": 6.56984785615491, + "grad_norm": 6.172976493835449, + "learning_rate": 1.905640079913939e-05, + "log_odds_chosen": 10.494388580322266, + "log_odds_ratio": -4.967241329723038e-05, + "logits/chosen": -0.44258636236190796, + "logits/rejected": -0.4766414165496826, + "logps/chosen": -0.00012744334526360035, + "logps/rejected": -1.7994089126586914, + "loss": 0.6341, + "nll_loss": 0.15851013362407684, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2744334526360035e-05, + "rewards/margins": 0.17992815375328064, + "rewards/rejected": -0.17994090914726257, + "step": 9500 + }, + { + "epoch": 6.570539419087137, + "grad_norm": 9.317890167236328, + "learning_rate": 1.905255878284924e-05, + "log_odds_chosen": 10.344244956970215, + "log_odds_ratio": -0.00025330157950520515, + "logits/chosen": -0.5354888439178467, + "logits/rejected": -0.4884495437145233, + "logps/chosen": -0.0006745475111529231, + "logps/rejected": -1.889319658279419, + "loss": 0.6263, + "nll_loss": 0.1565524935722351, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.745474820490927e-05, + "rewards/margins": 0.18886449933052063, + "rewards/rejected": -0.18893194198608398, + "step": 9501 + }, + { + "epoch": 6.5712309820193635, + "grad_norm": 7.175146579742432, + "learning_rate": 1.904871676655909e-05, + "log_odds_chosen": 11.946460723876953, + "log_odds_ratio": -1.0231826308881864e-05, + "logits/chosen": -0.3202419579029083, + "logits/rejected": -0.3598886728286743, + "logps/chosen": -7.545893458882347e-05, + "logps/rejected": -2.313094139099121, + "loss": 0.682, + "nll_loss": 0.17050229012966156, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.5458938226802275e-06, + "rewards/margins": 0.23130187392234802, + "rewards/rejected": -0.2313094139099121, + "step": 9502 + }, + { + "epoch": 6.57192254495159, + "grad_norm": 8.389784812927246, + "learning_rate": 1.9044874750268944e-05, + "log_odds_chosen": 11.532312393188477, + "log_odds_ratio": -1.5520981833105907e-05, + "logits/chosen": -0.3472404479980469, + "logits/rejected": -0.4375644326210022, + "logps/chosen": -0.00013552154996432364, + "logps/rejected": -2.4274983406066895, + "loss": 0.7095, + "nll_loss": 0.17736461758613586, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3552154996432364e-05, + "rewards/margins": 0.2427363097667694, + "rewards/rejected": -0.24274984002113342, + "step": 9503 + }, + { + "epoch": 6.572614107883817, + "grad_norm": 8.687039375305176, + "learning_rate": 1.9041032733978793e-05, + "log_odds_chosen": 7.895865440368652, + "log_odds_ratio": -0.3241705596446991, + "logits/chosen": -0.9922645092010498, + "logits/rejected": -0.9834545850753784, + "logps/chosen": -0.05756475776433945, + "logps/rejected": -1.5002421140670776, + "loss": 0.9695, + "nll_loss": 0.20995807647705078, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005756476428359747, + "rewards/margins": 0.144267737865448, + "rewards/rejected": -0.15002422034740448, + "step": 9504 + }, + { + "epoch": 6.573305670816044, + "grad_norm": 5.384150981903076, + "learning_rate": 1.9037190717688645e-05, + "log_odds_chosen": 10.194380760192871, + "log_odds_ratio": -8.458264346700162e-05, + "logits/chosen": -0.5537482500076294, + "logits/rejected": -0.5107707977294922, + "logps/chosen": -0.0008147121407091618, + "logps/rejected": -1.905914068222046, + "loss": 0.483, + "nll_loss": 0.12074509263038635, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.147121116053313e-05, + "rewards/margins": 0.19050993025302887, + "rewards/rejected": -0.19059139490127563, + "step": 9505 + }, + { + "epoch": 6.573997233748271, + "grad_norm": 5.172842979431152, + "learning_rate": 1.9033348701398494e-05, + "log_odds_chosen": 11.013910293579102, + "log_odds_ratio": -9.697994391899556e-05, + "logits/chosen": -0.18725240230560303, + "logits/rejected": -0.2527714967727661, + "logps/chosen": -0.00037460378371179104, + "logps/rejected": -2.615182876586914, + "loss": 0.3211, + "nll_loss": 0.08026767522096634, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.746037691598758e-05, + "rewards/margins": 0.26148083806037903, + "rewards/rejected": -0.26151829957962036, + "step": 9506 + }, + { + "epoch": 6.574688796680498, + "grad_norm": 4.038562774658203, + "learning_rate": 1.9029506685108343e-05, + "log_odds_chosen": 9.88060188293457, + "log_odds_ratio": -0.0001938038767548278, + "logits/chosen": -0.2381129413843155, + "logits/rejected": -0.20977772772312164, + "logps/chosen": -0.0005058772512711585, + "logps/rejected": -1.8327600955963135, + "loss": 0.561, + "nll_loss": 0.14024168252944946, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0587725127115846e-05, + "rewards/margins": 0.18322542309761047, + "rewards/rejected": -0.1832760125398636, + "step": 9507 + }, + { + "epoch": 6.5753803596127245, + "grad_norm": 5.107179164886475, + "learning_rate": 1.9025664668818196e-05, + "log_odds_chosen": 11.771180152893066, + "log_odds_ratio": -1.0985384506057017e-05, + "logits/chosen": -0.7047210335731506, + "logits/rejected": -0.7815711498260498, + "logps/chosen": -9.567014058120549e-05, + "logps/rejected": -2.4914684295654297, + "loss": 0.4273, + "nll_loss": 0.1068139374256134, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.56701478571631e-06, + "rewards/margins": 0.249137282371521, + "rewards/rejected": -0.24914684891700745, + "step": 9508 + }, + { + "epoch": 6.576071922544951, + "grad_norm": 5.086597919464111, + "learning_rate": 1.9021822652528048e-05, + "log_odds_chosen": 11.12252426147461, + "log_odds_ratio": -2.3953118216013536e-05, + "logits/chosen": -0.807060718536377, + "logits/rejected": -0.839371383190155, + "logps/chosen": -0.00011378790077287704, + "logps/rejected": -1.8926666975021362, + "loss": 0.5818, + "nll_loss": 0.14543595910072327, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1378790077287704e-05, + "rewards/margins": 0.18925531208515167, + "rewards/rejected": -0.18926669657230377, + "step": 9509 + }, + { + "epoch": 6.576763485477178, + "grad_norm": 8.26449966430664, + "learning_rate": 1.9017980636237897e-05, + "log_odds_chosen": 10.97335433959961, + "log_odds_ratio": -7.209448813227937e-05, + "logits/chosen": -0.16519924998283386, + "logits/rejected": -0.20669898390769958, + "logps/chosen": -0.00023038122162688524, + "logps/rejected": -2.1607444286346436, + "loss": 0.6227, + "nll_loss": 0.15566156804561615, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3038122890284285e-05, + "rewards/margins": 0.2160513997077942, + "rewards/rejected": -0.21607445180416107, + "step": 9510 + }, + { + "epoch": 6.577455048409405, + "grad_norm": 9.655121803283691, + "learning_rate": 1.901413861994775e-05, + "log_odds_chosen": 9.956766128540039, + "log_odds_ratio": -9.793389472179115e-05, + "logits/chosen": -0.218642920255661, + "logits/rejected": -0.3094644844532013, + "logps/chosen": -0.000380536075681448, + "logps/rejected": -1.8787182569503784, + "loss": 0.9288, + "nll_loss": 0.2321830689907074, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.80536075681448e-05, + "rewards/margins": 0.1878337562084198, + "rewards/rejected": -0.1878717988729477, + "step": 9511 + }, + { + "epoch": 6.578146611341632, + "grad_norm": 10.619027137756348, + "learning_rate": 1.9010296603657602e-05, + "log_odds_chosen": 10.957955360412598, + "log_odds_ratio": -7.806985377101228e-05, + "logits/chosen": -0.07476891577243805, + "logits/rejected": -0.12704113125801086, + "logps/chosen": -0.00032312539406120777, + "logps/rejected": -2.5475821495056152, + "loss": 0.9755, + "nll_loss": 0.24385805428028107, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2312538678525016e-05, + "rewards/margins": 0.25472593307495117, + "rewards/rejected": -0.25475820899009705, + "step": 9512 + }, + { + "epoch": 6.578838174273859, + "grad_norm": 6.9182820320129395, + "learning_rate": 1.900645458736745e-05, + "log_odds_chosen": 10.320255279541016, + "log_odds_ratio": -0.00038485744153149426, + "logits/chosen": -0.44043487310409546, + "logits/rejected": -0.43507570028305054, + "logps/chosen": -0.000618858146481216, + "logps/rejected": -2.230238437652588, + "loss": 0.7855, + "nll_loss": 0.19633594155311584, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.188581028254703e-05, + "rewards/margins": 0.22296196222305298, + "rewards/rejected": -0.22302386164665222, + "step": 9513 + }, + { + "epoch": 6.5795297372060855, + "grad_norm": 17.059465408325195, + "learning_rate": 1.9002612571077304e-05, + "log_odds_chosen": 11.300527572631836, + "log_odds_ratio": -2.5722471036715433e-05, + "logits/chosen": -0.45718055963516235, + "logits/rejected": -0.5097206234931946, + "logps/chosen": -0.00021330831805244088, + "logps/rejected": -2.203810691833496, + "loss": 0.6545, + "nll_loss": 0.1636224389076233, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.133083216904197e-05, + "rewards/margins": 0.22035974264144897, + "rewards/rejected": -0.22038106620311737, + "step": 9514 + }, + { + "epoch": 6.580221300138312, + "grad_norm": 8.320899963378906, + "learning_rate": 1.8998770554787153e-05, + "log_odds_chosen": 10.619794845581055, + "log_odds_ratio": -7.08003863110207e-05, + "logits/chosen": -0.4190692603588104, + "logits/rejected": -0.4577012062072754, + "logps/chosen": -0.0003057790454477072, + "logps/rejected": -2.1481821537017822, + "loss": 0.6001, + "nll_loss": 0.15001440048217773, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0577903089579195e-05, + "rewards/margins": 0.21478766202926636, + "rewards/rejected": -0.21481823921203613, + "step": 9515 + }, + { + "epoch": 6.580912863070539, + "grad_norm": 6.303850173950195, + "learning_rate": 1.8994928538497e-05, + "log_odds_chosen": 9.006599426269531, + "log_odds_ratio": -0.0007060145726427436, + "logits/chosen": -0.500159740447998, + "logits/rejected": -0.7312350273132324, + "logps/chosen": -0.0014982303837314248, + "logps/rejected": -1.930267572402954, + "loss": 0.6199, + "nll_loss": 0.1548926830291748, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001498230267316103, + "rewards/margins": 0.1928769201040268, + "rewards/rejected": -0.19302673637866974, + "step": 9516 + }, + { + "epoch": 6.581604426002766, + "grad_norm": 4.413415431976318, + "learning_rate": 1.8991086522206854e-05, + "log_odds_chosen": 10.201871871948242, + "log_odds_ratio": -5.679738023900427e-05, + "logits/chosen": -0.8834502696990967, + "logits/rejected": -0.8898751139640808, + "logps/chosen": -0.000270822987658903, + "logps/rejected": -1.7805309295654297, + "loss": 0.3352, + "nll_loss": 0.08379855006933212, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7082300221081823e-05, + "rewards/margins": 0.1780260056257248, + "rewards/rejected": -0.178053081035614, + "step": 9517 + }, + { + "epoch": 6.582295988934993, + "grad_norm": 10.41358757019043, + "learning_rate": 1.8987244505916707e-05, + "log_odds_chosen": 11.547701835632324, + "log_odds_ratio": -2.4673521693330258e-05, + "logits/chosen": -0.5142653584480286, + "logits/rejected": -0.41769781708717346, + "logps/chosen": -0.00011675099813146517, + "logps/rejected": -2.200291156768799, + "loss": 0.711, + "nll_loss": 0.17774221301078796, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1675099813146517e-05, + "rewards/margins": 0.2200174480676651, + "rewards/rejected": -0.22002913057804108, + "step": 9518 + }, + { + "epoch": 6.58298755186722, + "grad_norm": 7.81559944152832, + "learning_rate": 1.8983402489626556e-05, + "log_odds_chosen": 10.948009490966797, + "log_odds_ratio": -9.050434164237231e-05, + "logits/chosen": -0.33920398354530334, + "logits/rejected": -0.3603125512599945, + "logps/chosen": -0.0003907711070496589, + "logps/rejected": -2.4962830543518066, + "loss": 0.6102, + "nll_loss": 0.15255282819271088, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.907710924977437e-05, + "rewards/margins": 0.2495892345905304, + "rewards/rejected": -0.24962830543518066, + "step": 9519 + }, + { + "epoch": 6.5836791147994465, + "grad_norm": 6.346920490264893, + "learning_rate": 1.8979560473336408e-05, + "log_odds_chosen": 8.478560447692871, + "log_odds_ratio": -0.006527758669108152, + "logits/chosen": -0.3838501572608948, + "logits/rejected": -0.5643572807312012, + "logps/chosen": -0.004246383905410767, + "logps/rejected": -1.2931175231933594, + "loss": 0.6578, + "nll_loss": 0.16379287838935852, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00042463839054107666, + "rewards/margins": 0.1288871020078659, + "rewards/rejected": -0.12931174039840698, + "step": 9520 + }, + { + "epoch": 6.584370677731673, + "grad_norm": 5.637845516204834, + "learning_rate": 1.897571845704626e-05, + "log_odds_chosen": 10.634634017944336, + "log_odds_ratio": -0.00018999997701030225, + "logits/chosen": -0.8730132579803467, + "logits/rejected": -0.9408348798751831, + "logps/chosen": -0.00012764170242007822, + "logps/rejected": -1.8069618940353394, + "loss": 0.7198, + "nll_loss": 0.1799338161945343, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2764169696311e-05, + "rewards/margins": 0.1806834191083908, + "rewards/rejected": -0.18069618940353394, + "step": 9521 + }, + { + "epoch": 6.5850622406639, + "grad_norm": 12.139360427856445, + "learning_rate": 1.897187644075611e-05, + "log_odds_chosen": 11.475465774536133, + "log_odds_ratio": -0.00010113770986208692, + "logits/chosen": -0.2640308439731598, + "logits/rejected": -0.3200063705444336, + "logps/chosen": -0.00028492099954746664, + "logps/rejected": -3.306983470916748, + "loss": 0.6333, + "nll_loss": 0.15832307934761047, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8492100682342425e-05, + "rewards/margins": 0.3306698799133301, + "rewards/rejected": -0.3306983411312103, + "step": 9522 + }, + { + "epoch": 6.585753803596127, + "grad_norm": 12.55954360961914, + "learning_rate": 1.8968034424465962e-05, + "log_odds_chosen": 11.508321762084961, + "log_odds_ratio": -3.0403138225665316e-05, + "logits/chosen": -0.4079541563987732, + "logits/rejected": -0.4889557361602783, + "logps/chosen": -0.0002845217240974307, + "logps/rejected": -3.2464981079101562, + "loss": 0.597, + "nll_loss": 0.1492396742105484, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8452173864934593e-05, + "rewards/margins": 0.32462140917778015, + "rewards/rejected": -0.324649840593338, + "step": 9523 + }, + { + "epoch": 6.586445366528354, + "grad_norm": 4.427008152008057, + "learning_rate": 1.896419240817581e-05, + "log_odds_chosen": 10.566621780395508, + "log_odds_ratio": -0.00019363139290362597, + "logits/chosen": -0.2653787434101105, + "logits/rejected": -0.34483182430267334, + "logps/chosen": -0.0005865857820026577, + "logps/rejected": -2.4292454719543457, + "loss": 0.4088, + "nll_loss": 0.1021810993552208, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.865856655873358e-05, + "rewards/margins": 0.2428659051656723, + "rewards/rejected": -0.2429245561361313, + "step": 9524 + }, + { + "epoch": 6.587136929460581, + "grad_norm": 5.990933895111084, + "learning_rate": 1.896035039188566e-05, + "log_odds_chosen": 9.884780883789062, + "log_odds_ratio": -0.0008313123253174126, + "logits/chosen": -0.4263268709182739, + "logits/rejected": -0.45220786333084106, + "logps/chosen": -0.00040180000360123813, + "logps/rejected": -1.9751261472702026, + "loss": 1.0007, + "nll_loss": 0.2500944435596466, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.017999890493229e-05, + "rewards/margins": 0.1974724382162094, + "rewards/rejected": -0.19751261174678802, + "step": 9525 + }, + { + "epoch": 6.587828492392807, + "grad_norm": 6.74733304977417, + "learning_rate": 1.8956508375595513e-05, + "log_odds_chosen": 10.80959701538086, + "log_odds_ratio": -5.695023719454184e-05, + "logits/chosen": -0.033346615731716156, + "logits/rejected": -0.08503760397434235, + "logps/chosen": -0.00012283638352528214, + "logps/rejected": -1.6785304546356201, + "loss": 0.8599, + "nll_loss": 0.21497204899787903, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2283639080123976e-05, + "rewards/margins": 0.16784076392650604, + "rewards/rejected": -0.16785304248332977, + "step": 9526 + }, + { + "epoch": 6.588520055325034, + "grad_norm": 6.609440326690674, + "learning_rate": 1.8952666359305365e-05, + "log_odds_chosen": 9.931138038635254, + "log_odds_ratio": -0.00016108129057101905, + "logits/chosen": 0.06345038115978241, + "logits/rejected": -0.01876118779182434, + "logps/chosen": -0.00024808067246340215, + "logps/rejected": -1.2895182371139526, + "loss": 0.591, + "nll_loss": 0.14773456752300262, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4808066882542334e-05, + "rewards/margins": 0.12892702221870422, + "rewards/rejected": -0.12895183265209198, + "step": 9527 + }, + { + "epoch": 6.589211618257261, + "grad_norm": 8.213581085205078, + "learning_rate": 1.8948824343015214e-05, + "log_odds_chosen": 9.630084991455078, + "log_odds_ratio": -0.0004836757725570351, + "logits/chosen": -0.3603372275829315, + "logits/rejected": -0.4786415100097656, + "logps/chosen": -0.0006260552327148616, + "logps/rejected": -1.8332273960113525, + "loss": 0.6008, + "nll_loss": 0.15014447271823883, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.260553345782682e-05, + "rewards/margins": 0.18326014280319214, + "rewards/rejected": -0.1833227574825287, + "step": 9528 + }, + { + "epoch": 6.589903181189488, + "grad_norm": 7.819589614868164, + "learning_rate": 1.8944982326725066e-05, + "log_odds_chosen": 11.63312816619873, + "log_odds_ratio": -1.2704012988251634e-05, + "logits/chosen": -0.6833489537239075, + "logits/rejected": -0.7324924468994141, + "logps/chosen": -0.00019270573102403432, + "logps/rejected": -2.559584617614746, + "loss": 0.694, + "nll_loss": 0.17351114749908447, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9270573829999194e-05, + "rewards/margins": 0.25593918561935425, + "rewards/rejected": -0.2559584379196167, + "step": 9529 + }, + { + "epoch": 6.590594744121715, + "grad_norm": 7.137159824371338, + "learning_rate": 1.894114031043492e-05, + "log_odds_chosen": 10.68792724609375, + "log_odds_ratio": -9.086474892683327e-05, + "logits/chosen": -0.3094750940799713, + "logits/rejected": -0.34787717461586, + "logps/chosen": -0.0008726265514269471, + "logps/rejected": -2.354119300842285, + "loss": 0.5016, + "nll_loss": 0.12540172040462494, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.726265514269471e-05, + "rewards/margins": 0.2353246808052063, + "rewards/rejected": -0.23541194200515747, + "step": 9530 + }, + { + "epoch": 6.591286307053942, + "grad_norm": 7.465571403503418, + "learning_rate": 1.8937298294144768e-05, + "log_odds_chosen": 10.607606887817383, + "log_odds_ratio": -8.99828301044181e-05, + "logits/chosen": -0.06982383877038956, + "logits/rejected": -0.20454353094100952, + "logps/chosen": -0.0002789056452456862, + "logps/rejected": -1.781639575958252, + "loss": 0.9311, + "nll_loss": 0.23276673257350922, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7890564524568617e-05, + "rewards/margins": 0.17813608050346375, + "rewards/rejected": -0.17816394567489624, + "step": 9531 + }, + { + "epoch": 6.591977869986168, + "grad_norm": 5.6307692527771, + "learning_rate": 1.893345627785462e-05, + "log_odds_chosen": 11.764213562011719, + "log_odds_ratio": -4.389313835417852e-05, + "logits/chosen": -0.11563535034656525, + "logits/rejected": -0.09763626754283905, + "logps/chosen": -0.00018603801436256617, + "logps/rejected": -2.9742701053619385, + "loss": 0.4816, + "nll_loss": 0.12040294706821442, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8603801436256617e-05, + "rewards/margins": 0.29740840196609497, + "rewards/rejected": -0.2974269986152649, + "step": 9532 + }, + { + "epoch": 6.592669432918395, + "grad_norm": 6.012737274169922, + "learning_rate": 1.892961426156447e-05, + "log_odds_chosen": 10.957094192504883, + "log_odds_ratio": -0.00011141406139358878, + "logits/chosen": -0.31941908597946167, + "logits/rejected": -0.41692906618118286, + "logps/chosen": -0.0002899272076319903, + "logps/rejected": -2.2581934928894043, + "loss": 0.7247, + "nll_loss": 0.1811733841896057, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.899271930800751e-05, + "rewards/margins": 0.2257903665304184, + "rewards/rejected": -0.22581936419010162, + "step": 9533 + }, + { + "epoch": 6.593360995850622, + "grad_norm": 10.640678405761719, + "learning_rate": 1.8925772245274322e-05, + "log_odds_chosen": 11.120275497436523, + "log_odds_ratio": -5.54912221559789e-05, + "logits/chosen": -0.05910344794392586, + "logits/rejected": -0.1686576008796692, + "logps/chosen": -0.0006790636107325554, + "logps/rejected": -2.8853797912597656, + "loss": 0.7399, + "nll_loss": 0.184981107711792, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.790635961806402e-05, + "rewards/margins": 0.288470059633255, + "rewards/rejected": -0.28853797912597656, + "step": 9534 + }, + { + "epoch": 6.594052558782849, + "grad_norm": 6.17962646484375, + "learning_rate": 1.892193022898417e-05, + "log_odds_chosen": 10.639046669006348, + "log_odds_ratio": -7.220011320896447e-05, + "logits/chosen": -0.0466696172952652, + "logits/rejected": -0.14394527673721313, + "logps/chosen": -0.0006435574614442885, + "logps/rejected": -2.312441110610962, + "loss": 0.6346, + "nll_loss": 0.1586356908082962, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.43557432340458e-05, + "rewards/margins": 0.23117974400520325, + "rewards/rejected": -0.23124408721923828, + "step": 9535 + }, + { + "epoch": 6.594744121715076, + "grad_norm": 8.575480461120605, + "learning_rate": 1.8918088212694023e-05, + "log_odds_chosen": 11.129705429077148, + "log_odds_ratio": -0.00012059323489665985, + "logits/chosen": -0.32221323251724243, + "logits/rejected": -0.4411635994911194, + "logps/chosen": -0.0002790922299027443, + "logps/rejected": -2.5678157806396484, + "loss": 0.7306, + "nll_loss": 0.1826275885105133, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7909221898880787e-05, + "rewards/margins": 0.25675368309020996, + "rewards/rejected": -0.25678160786628723, + "step": 9536 + }, + { + "epoch": 6.595435684647303, + "grad_norm": 3.7951908111572266, + "learning_rate": 1.8914246196403872e-05, + "log_odds_chosen": 10.10299015045166, + "log_odds_ratio": -0.0005347510450519621, + "logits/chosen": -0.35992541909217834, + "logits/rejected": -0.4088529050350189, + "logps/chosen": -0.00016122058150358498, + "logps/rejected": -1.4273490905761719, + "loss": 0.4174, + "nll_loss": 0.10428406298160553, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.612205960555002e-05, + "rewards/margins": 0.14271880686283112, + "rewards/rejected": -0.14273492991924286, + "step": 9537 + }, + { + "epoch": 6.596127247579529, + "grad_norm": 5.140981674194336, + "learning_rate": 1.8910404180113725e-05, + "log_odds_chosen": 10.800771713256836, + "log_odds_ratio": -2.9589751648018137e-05, + "logits/chosen": -0.22679775953292847, + "logits/rejected": -0.4273071587085724, + "logps/chosen": -0.00040692847687751055, + "logps/rejected": -2.4438960552215576, + "loss": 0.5973, + "nll_loss": 0.1493130475282669, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0692848415346816e-05, + "rewards/margins": 0.2443489134311676, + "rewards/rejected": -0.244389608502388, + "step": 9538 + }, + { + "epoch": 6.596818810511756, + "grad_norm": 6.820888996124268, + "learning_rate": 1.8906562163823577e-05, + "log_odds_chosen": 10.571646690368652, + "log_odds_ratio": -5.5003725719871e-05, + "logits/chosen": -0.3664097189903259, + "logits/rejected": -0.406631201505661, + "logps/chosen": -0.00030732934828847647, + "logps/rejected": -1.8684226274490356, + "loss": 0.5273, + "nll_loss": 0.13182811439037323, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.07329319184646e-05, + "rewards/margins": 0.18681153655052185, + "rewards/rejected": -0.18684226274490356, + "step": 9539 + }, + { + "epoch": 6.597510373443983, + "grad_norm": 5.106546878814697, + "learning_rate": 1.8902720147533426e-05, + "log_odds_chosen": 10.965265274047852, + "log_odds_ratio": -7.689668564125896e-05, + "logits/chosen": -0.21386636793613434, + "logits/rejected": -0.3349723517894745, + "logps/chosen": -0.00039963488234207034, + "logps/rejected": -2.9627490043640137, + "loss": 0.6362, + "nll_loss": 0.15903426706790924, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.996348459622823e-05, + "rewards/margins": 0.29623496532440186, + "rewards/rejected": -0.29627490043640137, + "step": 9540 + }, + { + "epoch": 6.59820193637621, + "grad_norm": 10.467570304870605, + "learning_rate": 1.889887813124328e-05, + "log_odds_chosen": 11.801546096801758, + "log_odds_ratio": -1.0990625014528632e-05, + "logits/chosen": -0.19286254048347473, + "logits/rejected": -0.2172335833311081, + "logps/chosen": -0.0003137754974886775, + "logps/rejected": -2.841444492340088, + "loss": 0.8365, + "nll_loss": 0.20912466943264008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.137754902127199e-05, + "rewards/margins": 0.2841130793094635, + "rewards/rejected": -0.28414446115493774, + "step": 9541 + }, + { + "epoch": 6.598893499308437, + "grad_norm": 7.705151557922363, + "learning_rate": 1.889503611495313e-05, + "log_odds_chosen": 10.635993957519531, + "log_odds_ratio": -0.00017419188225176185, + "logits/chosen": -0.49404484033584595, + "logits/rejected": -0.5072313547134399, + "logps/chosen": -0.00041031482396647334, + "logps/rejected": -2.464137315750122, + "loss": 0.6947, + "nll_loss": 0.1736493706703186, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1031482396647334e-05, + "rewards/margins": 0.24637269973754883, + "rewards/rejected": -0.24641373753547668, + "step": 9542 + }, + { + "epoch": 6.5995850622406635, + "grad_norm": 8.610349655151367, + "learning_rate": 1.889119409866298e-05, + "log_odds_chosen": 10.723653793334961, + "log_odds_ratio": -5.8262728998670354e-05, + "logits/chosen": -0.5345897078514099, + "logits/rejected": -0.6072478890419006, + "logps/chosen": -0.0008024029666557908, + "logps/rejected": -2.9265053272247314, + "loss": 0.5516, + "nll_loss": 0.137889102101326, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.024030830711126e-05, + "rewards/margins": 0.2925702929496765, + "rewards/rejected": -0.2926505208015442, + "step": 9543 + }, + { + "epoch": 6.60027662517289, + "grad_norm": 21.155384063720703, + "learning_rate": 1.888735208237283e-05, + "log_odds_chosen": 11.516271591186523, + "log_odds_ratio": -1.7020091036101803e-05, + "logits/chosen": -0.27340206503868103, + "logits/rejected": -0.37262627482414246, + "logps/chosen": -0.0002646548382472247, + "logps/rejected": -2.5741095542907715, + "loss": 0.6883, + "nll_loss": 0.17208553850650787, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.646548455231823e-05, + "rewards/margins": 0.2573844790458679, + "rewards/rejected": -0.2574109435081482, + "step": 9544 + }, + { + "epoch": 6.600968188105117, + "grad_norm": 9.197043418884277, + "learning_rate": 1.8883510066082682e-05, + "log_odds_chosen": 11.402222633361816, + "log_odds_ratio": -3.696878411574289e-05, + "logits/chosen": -0.02247518301010132, + "logits/rejected": -0.15677234530448914, + "logps/chosen": -0.00015109230298548937, + "logps/rejected": -2.3994855880737305, + "loss": 0.6276, + "nll_loss": 0.1568952351808548, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.510923175374046e-05, + "rewards/margins": 0.23993346095085144, + "rewards/rejected": -0.23994854092597961, + "step": 9545 + }, + { + "epoch": 6.601659751037344, + "grad_norm": 6.208484649658203, + "learning_rate": 1.887966804979253e-05, + "log_odds_chosen": 9.860437393188477, + "log_odds_ratio": -0.00020793148723896593, + "logits/chosen": -0.564323365688324, + "logits/rejected": -0.5780093669891357, + "logps/chosen": -0.00029348081443458796, + "logps/rejected": -1.679945707321167, + "loss": 0.4888, + "nll_loss": 0.1221674308180809, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9348082534852438e-05, + "rewards/margins": 0.16796523332595825, + "rewards/rejected": -0.16799457371234894, + "step": 9546 + }, + { + "epoch": 6.602351313969571, + "grad_norm": 11.18844223022461, + "learning_rate": 1.8875826033502383e-05, + "log_odds_chosen": 9.754711151123047, + "log_odds_ratio": -0.00019032778800465167, + "logits/chosen": -0.47488075494766235, + "logits/rejected": -0.5205568075180054, + "logps/chosen": -0.0007518458878621459, + "logps/rejected": -2.273089647293091, + "loss": 0.6455, + "nll_loss": 0.16134408116340637, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.518458733102307e-05, + "rewards/margins": 0.22723379731178284, + "rewards/rejected": -0.2273089736700058, + "step": 9547 + }, + { + "epoch": 6.603042876901798, + "grad_norm": 5.4768385887146, + "learning_rate": 1.8871984017212232e-05, + "log_odds_chosen": 10.853092193603516, + "log_odds_ratio": -0.0003788030007854104, + "logits/chosen": -0.48384472727775574, + "logits/rejected": -0.4700443148612976, + "logps/chosen": -0.0005879810778424144, + "logps/rejected": -2.374781370162964, + "loss": 0.5334, + "nll_loss": 0.13330428302288055, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.879810487385839e-05, + "rewards/margins": 0.23741932213306427, + "rewards/rejected": -0.237478107213974, + "step": 9548 + }, + { + "epoch": 6.6037344398340245, + "grad_norm": 6.37504768371582, + "learning_rate": 1.8868142000922085e-05, + "log_odds_chosen": 10.665268898010254, + "log_odds_ratio": -0.00012861876166425645, + "logits/chosen": 0.07045517861843109, + "logits/rejected": 0.03083537518978119, + "logps/chosen": -0.0004432780551724136, + "logps/rejected": -2.561220407485962, + "loss": 0.6722, + "nll_loss": 0.16802969574928284, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.432780770002864e-05, + "rewards/margins": 0.25607770681381226, + "rewards/rejected": -0.25612205266952515, + "step": 9549 + }, + { + "epoch": 6.604426002766251, + "grad_norm": 5.965790748596191, + "learning_rate": 1.8864299984631937e-05, + "log_odds_chosen": 10.766727447509766, + "log_odds_ratio": -0.00010273887164657936, + "logits/chosen": -0.38880062103271484, + "logits/rejected": -0.4356210231781006, + "logps/chosen": -0.0008852147730067372, + "logps/rejected": -2.64434814453125, + "loss": 0.5739, + "nll_loss": 0.1434713900089264, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.852146856952459e-05, + "rewards/margins": 0.26434630155563354, + "rewards/rejected": -0.264434814453125, + "step": 9550 + }, + { + "epoch": 6.605117565698478, + "grad_norm": 9.020346641540527, + "learning_rate": 1.8860457968341786e-05, + "log_odds_chosen": 9.151586532592773, + "log_odds_ratio": -0.0010840434115380049, + "logits/chosen": -0.38431838154792786, + "logits/rejected": -0.3746948540210724, + "logps/chosen": -0.003543839557096362, + "logps/rejected": -2.262924909591675, + "loss": 0.4518, + "nll_loss": 0.11283522844314575, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00035438398481346667, + "rewards/margins": 0.2259381264448166, + "rewards/rejected": -0.22629249095916748, + "step": 9551 + }, + { + "epoch": 6.605809128630705, + "grad_norm": 3.341355562210083, + "learning_rate": 1.885661595205164e-05, + "log_odds_chosen": 11.200335502624512, + "log_odds_ratio": -4.079660357092507e-05, + "logits/chosen": -0.4858432710170746, + "logits/rejected": -0.571876049041748, + "logps/chosen": -7.985975389601663e-05, + "logps/rejected": -1.8703134059906006, + "loss": 0.4316, + "nll_loss": 0.10789884626865387, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.985974662005901e-06, + "rewards/margins": 0.1870233565568924, + "rewards/rejected": -0.1870313435792923, + "step": 9552 + }, + { + "epoch": 6.606500691562932, + "grad_norm": 5.128517150878906, + "learning_rate": 1.8852773935761488e-05, + "log_odds_chosen": 10.190290451049805, + "log_odds_ratio": -0.0012002821313217282, + "logits/chosen": -0.6075640916824341, + "logits/rejected": -0.617435097694397, + "logps/chosen": -0.002807852579280734, + "logps/rejected": -2.2782063484191895, + "loss": 0.4924, + "nll_loss": 0.12298320233821869, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002807852579280734, + "rewards/margins": 0.22753985226154327, + "rewards/rejected": -0.22782063484191895, + "step": 9553 + }, + { + "epoch": 6.607192254495159, + "grad_norm": 5.492445945739746, + "learning_rate": 1.8848931919471337e-05, + "log_odds_chosen": 10.870869636535645, + "log_odds_ratio": -4.509523932938464e-05, + "logits/chosen": -0.36585742235183716, + "logits/rejected": -0.4368995726108551, + "logps/chosen": -0.00011223943874938414, + "logps/rejected": -2.0290770530700684, + "loss": 0.667, + "nll_loss": 0.16674567759037018, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1223944056837354e-05, + "rewards/margins": 0.20289649069309235, + "rewards/rejected": -0.2029077261686325, + "step": 9554 + }, + { + "epoch": 6.6078838174273855, + "grad_norm": 4.166836738586426, + "learning_rate": 1.884508990318119e-05, + "log_odds_chosen": 10.888010025024414, + "log_odds_ratio": -5.706503361579962e-05, + "logits/chosen": -0.873898983001709, + "logits/rejected": -0.8950672149658203, + "logps/chosen": -0.00020168225455563515, + "logps/rejected": -2.0599961280822754, + "loss": 0.5566, + "nll_loss": 0.13913968205451965, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0168225091765635e-05, + "rewards/margins": 0.20597945153713226, + "rewards/rejected": -0.20599961280822754, + "step": 9555 + }, + { + "epoch": 6.608575380359612, + "grad_norm": 8.395662307739258, + "learning_rate": 1.8841247886891042e-05, + "log_odds_chosen": 10.506204605102539, + "log_odds_ratio": -0.0004264920426066965, + "logits/chosen": -0.49800848960876465, + "logits/rejected": -0.5854029059410095, + "logps/chosen": -0.0007227214518934488, + "logps/rejected": -2.0328316688537598, + "loss": 0.6594, + "nll_loss": 0.16481497883796692, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.227214518934488e-05, + "rewards/margins": 0.20321090519428253, + "rewards/rejected": -0.2032831907272339, + "step": 9556 + }, + { + "epoch": 6.609266943291839, + "grad_norm": 5.947407245635986, + "learning_rate": 1.883740587060089e-05, + "log_odds_chosen": 11.301834106445312, + "log_odds_ratio": -1.8402424757368863e-05, + "logits/chosen": -0.4008464217185974, + "logits/rejected": -0.39481836557388306, + "logps/chosen": -0.0004325605113990605, + "logps/rejected": -2.325915575027466, + "loss": 0.3629, + "nll_loss": 0.09073560684919357, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3256048229523e-05, + "rewards/margins": 0.2325483113527298, + "rewards/rejected": -0.23259153962135315, + "step": 9557 + }, + { + "epoch": 6.609958506224066, + "grad_norm": 5.509538173675537, + "learning_rate": 1.8833563854310743e-05, + "log_odds_chosen": 10.087053298950195, + "log_odds_ratio": -0.00010945965186692774, + "logits/chosen": -0.47453972697257996, + "logits/rejected": -0.5440321564674377, + "logps/chosen": -0.00029165492742322385, + "logps/rejected": -1.6123814582824707, + "loss": 0.5475, + "nll_loss": 0.13687016069889069, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.916549601650331e-05, + "rewards/margins": 0.16120897233486176, + "rewards/rejected": -0.1612381637096405, + "step": 9558 + }, + { + "epoch": 6.610650069156293, + "grad_norm": 7.089804649353027, + "learning_rate": 1.8829721838020596e-05, + "log_odds_chosen": 10.465744018554688, + "log_odds_ratio": -4.751564847538248e-05, + "logits/chosen": -0.36397644877433777, + "logits/rejected": -0.42793208360671997, + "logps/chosen": -0.0003245154512114823, + "logps/rejected": -2.2591567039489746, + "loss": 0.6086, + "nll_loss": 0.15214122831821442, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.245154221076518e-05, + "rewards/margins": 0.22588323056697845, + "rewards/rejected": -0.22591570019721985, + "step": 9559 + }, + { + "epoch": 6.61134163208852, + "grad_norm": 10.86466121673584, + "learning_rate": 1.8825879821730445e-05, + "log_odds_chosen": 11.104890823364258, + "log_odds_ratio": -9.219466301146895e-05, + "logits/chosen": -0.73785400390625, + "logits/rejected": -0.804728627204895, + "logps/chosen": -0.00013324012979865074, + "logps/rejected": -2.2057862281799316, + "loss": 0.7755, + "nll_loss": 0.19385896623134613, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3324013707460836e-05, + "rewards/margins": 0.2205653041601181, + "rewards/rejected": -0.2205786257982254, + "step": 9560 + }, + { + "epoch": 6.6120331950207465, + "grad_norm": 5.825887203216553, + "learning_rate": 1.8822037805440297e-05, + "log_odds_chosen": 10.568598747253418, + "log_odds_ratio": -0.000481350754853338, + "logits/chosen": -0.2709238827228546, + "logits/rejected": -0.3508308529853821, + "logps/chosen": -0.0004341882886365056, + "logps/rejected": -2.195253610610962, + "loss": 0.6048, + "nll_loss": 0.15116143226623535, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3418825953267515e-05, + "rewards/margins": 0.2194819301366806, + "rewards/rejected": -0.21952535212039948, + "step": 9561 + }, + { + "epoch": 6.612724757952973, + "grad_norm": 6.367141246795654, + "learning_rate": 1.8818195789150146e-05, + "log_odds_chosen": 11.000616073608398, + "log_odds_ratio": -2.92911208816804e-05, + "logits/chosen": -0.3233502507209778, + "logits/rejected": -0.48610731959342957, + "logps/chosen": -0.0006418666453100741, + "logps/rejected": -2.913724422454834, + "loss": 0.8625, + "nll_loss": 0.21563003957271576, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.418666453100741e-05, + "rewards/margins": 0.29130828380584717, + "rewards/rejected": -0.2913724482059479, + "step": 9562 + }, + { + "epoch": 6.6134163208852, + "grad_norm": 5.848628044128418, + "learning_rate": 1.8814353772859995e-05, + "log_odds_chosen": 11.818613052368164, + "log_odds_ratio": -1.573657755216118e-05, + "logits/chosen": -0.49255573749542236, + "logits/rejected": -0.38133054971694946, + "logps/chosen": -0.0008059104438871145, + "logps/rejected": -3.054670810699463, + "loss": 0.4733, + "nll_loss": 0.11832495778799057, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.059104584390298e-05, + "rewards/margins": 0.305386483669281, + "rewards/rejected": -0.30546706914901733, + "step": 9563 + }, + { + "epoch": 6.614107883817427, + "grad_norm": 5.037055015563965, + "learning_rate": 1.8810511756569848e-05, + "log_odds_chosen": 10.745399475097656, + "log_odds_ratio": -5.438456355477683e-05, + "logits/chosen": -0.537282407283783, + "logits/rejected": -0.6539811491966248, + "logps/chosen": -0.00031301064882427454, + "logps/rejected": -1.985874891281128, + "loss": 0.4274, + "nll_loss": 0.10683829337358475, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.13010677928105e-05, + "rewards/margins": 0.19855618476867676, + "rewards/rejected": -0.19858750700950623, + "step": 9564 + }, + { + "epoch": 6.614799446749654, + "grad_norm": 4.993642807006836, + "learning_rate": 1.88066697402797e-05, + "log_odds_chosen": 10.20506477355957, + "log_odds_ratio": -0.0003886056365445256, + "logits/chosen": -0.46335524320602417, + "logits/rejected": -0.4679669141769409, + "logps/chosen": -0.0008175184484571218, + "logps/rejected": -2.422447681427002, + "loss": 0.636, + "nll_loss": 0.15896305441856384, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.175184484571218e-05, + "rewards/margins": 0.2421630173921585, + "rewards/rejected": -0.24224475026130676, + "step": 9565 + }, + { + "epoch": 6.615491009681881, + "grad_norm": 5.190500259399414, + "learning_rate": 1.880282772398955e-05, + "log_odds_chosen": 10.710193634033203, + "log_odds_ratio": -6.619399937335402e-05, + "logits/chosen": -0.5193922519683838, + "logits/rejected": -0.5428926944732666, + "logps/chosen": -0.00012502398749347776, + "logps/rejected": -1.6223423480987549, + "loss": 0.4191, + "nll_loss": 0.10477405786514282, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2502399840741418e-05, + "rewards/margins": 0.1622217297554016, + "rewards/rejected": -0.16223423182964325, + "step": 9566 + }, + { + "epoch": 6.6161825726141075, + "grad_norm": 5.0056538581848145, + "learning_rate": 1.8798985707699402e-05, + "log_odds_chosen": 10.813407897949219, + "log_odds_ratio": -6.554868014063686e-05, + "logits/chosen": -0.4899275302886963, + "logits/rejected": -0.6252234578132629, + "logps/chosen": -0.00014681732864119112, + "logps/rejected": -2.1733970642089844, + "loss": 0.6276, + "nll_loss": 0.1568903625011444, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.468173195462441e-05, + "rewards/margins": 0.21732503175735474, + "rewards/rejected": -0.21733970940113068, + "step": 9567 + }, + { + "epoch": 6.616874135546334, + "grad_norm": 9.58974552154541, + "learning_rate": 1.8795143691409254e-05, + "log_odds_chosen": 9.627368927001953, + "log_odds_ratio": -0.0003191865107510239, + "logits/chosen": -0.2289269119501114, + "logits/rejected": -0.3812503218650818, + "logps/chosen": -0.0007769543444737792, + "logps/rejected": -1.7609001398086548, + "loss": 0.5846, + "nll_loss": 0.14612506330013275, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.769542571622878e-05, + "rewards/margins": 0.176012322306633, + "rewards/rejected": -0.1760900318622589, + "step": 9568 + }, + { + "epoch": 6.617565698478561, + "grad_norm": 6.556011199951172, + "learning_rate": 1.8791301675119103e-05, + "log_odds_chosen": 9.689128875732422, + "log_odds_ratio": -0.0007466164533980191, + "logits/chosen": -0.726296603679657, + "logits/rejected": -0.7090697884559631, + "logps/chosen": -0.0019731023348867893, + "logps/rejected": -2.217195749282837, + "loss": 0.5579, + "nll_loss": 0.13938948512077332, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019731024804059416, + "rewards/margins": 0.2215222716331482, + "rewards/rejected": -0.22171959280967712, + "step": 9569 + }, + { + "epoch": 6.618257261410788, + "grad_norm": 5.365951061248779, + "learning_rate": 1.8787459658828956e-05, + "log_odds_chosen": 10.164534568786621, + "log_odds_ratio": -0.00022725044982507825, + "logits/chosen": -0.7646428346633911, + "logits/rejected": -0.7043318748474121, + "logps/chosen": -0.00044344677007757127, + "logps/rejected": -1.7538894414901733, + "loss": 0.6294, + "nll_loss": 0.15733535587787628, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.434467700775713e-05, + "rewards/margins": 0.17534460127353668, + "rewards/rejected": -0.17538894712924957, + "step": 9570 + }, + { + "epoch": 6.618948824343015, + "grad_norm": 8.986666679382324, + "learning_rate": 1.8783617642538805e-05, + "log_odds_chosen": 10.886392593383789, + "log_odds_ratio": -3.449077485129237e-05, + "logits/chosen": -0.3698527216911316, + "logits/rejected": -0.5766900181770325, + "logps/chosen": -0.00017695670248940587, + "logps/rejected": -1.903517246246338, + "loss": 0.4073, + "nll_loss": 0.1018340140581131, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.769567097653635e-05, + "rewards/margins": 0.1903340220451355, + "rewards/rejected": -0.1903517246246338, + "step": 9571 + }, + { + "epoch": 6.619640387275242, + "grad_norm": 5.671159744262695, + "learning_rate": 1.8779775626248654e-05, + "log_odds_chosen": 9.405364990234375, + "log_odds_ratio": -0.0006271099555306137, + "logits/chosen": -0.4230155646800995, + "logits/rejected": -0.5213783383369446, + "logps/chosen": -0.0007497941260226071, + "logps/rejected": -1.665784478187561, + "loss": 0.7889, + "nll_loss": 0.1971660852432251, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.497941987821832e-05, + "rewards/margins": 0.16650345921516418, + "rewards/rejected": -0.16657845675945282, + "step": 9572 + }, + { + "epoch": 6.6203319502074685, + "grad_norm": 7.949342727661133, + "learning_rate": 1.8775933609958506e-05, + "log_odds_chosen": 11.04636287689209, + "log_odds_ratio": -0.002121716970577836, + "logits/chosen": -0.5016674995422363, + "logits/rejected": -0.6972486972808838, + "logps/chosen": -0.001570210326462984, + "logps/rejected": -1.935790777206421, + "loss": 0.5045, + "nll_loss": 0.12590371072292328, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015702102973591536, + "rewards/margins": 0.19342204928398132, + "rewards/rejected": -0.1935790777206421, + "step": 9573 + }, + { + "epoch": 6.621023513139695, + "grad_norm": 9.75149154663086, + "learning_rate": 1.877209159366836e-05, + "log_odds_chosen": 10.175332069396973, + "log_odds_ratio": -0.0002040680410573259, + "logits/chosen": -0.6536148190498352, + "logits/rejected": -0.7172862887382507, + "logps/chosen": -0.0005002233083359897, + "logps/rejected": -2.021902561187744, + "loss": 0.7664, + "nll_loss": 0.19158843159675598, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.002232865081169e-05, + "rewards/margins": 0.2021402269601822, + "rewards/rejected": -0.20219025015830994, + "step": 9574 + }, + { + "epoch": 6.621715076071922, + "grad_norm": 7.20743465423584, + "learning_rate": 1.8768249577378208e-05, + "log_odds_chosen": 10.140237808227539, + "log_odds_ratio": -8.70264702825807e-05, + "logits/chosen": -0.018910914659500122, + "logits/rejected": -0.14361506700515747, + "logps/chosen": -0.0007901238277554512, + "logps/rejected": -1.93674898147583, + "loss": 0.595, + "nll_loss": 0.14875128865242004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.90123813203536e-05, + "rewards/margins": 0.19359590113162994, + "rewards/rejected": -0.19367492198944092, + "step": 9575 + }, + { + "epoch": 6.622406639004149, + "grad_norm": 4.346622943878174, + "learning_rate": 1.876440756108806e-05, + "log_odds_chosen": 10.109397888183594, + "log_odds_ratio": -0.00031347855110652745, + "logits/chosen": -0.6578681468963623, + "logits/rejected": -0.699524462223053, + "logps/chosen": -0.00035372230922803283, + "logps/rejected": -1.861891508102417, + "loss": 0.6135, + "nll_loss": 0.153354674577713, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5372231650399044e-05, + "rewards/margins": 0.18615376949310303, + "rewards/rejected": -0.18618914484977722, + "step": 9576 + }, + { + "epoch": 6.623098201936376, + "grad_norm": 10.031994819641113, + "learning_rate": 1.8760565544797913e-05, + "log_odds_chosen": 10.174301147460938, + "log_odds_ratio": -0.0006410350324586034, + "logits/chosen": -0.09565894305706024, + "logits/rejected": -0.13197723031044006, + "logps/chosen": -0.0009404458105564117, + "logps/rejected": -1.8535264730453491, + "loss": 0.4278, + "nll_loss": 0.10688328742980957, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.404457523487508e-05, + "rewards/margins": 0.18525859713554382, + "rewards/rejected": -0.1853526532649994, + "step": 9577 + }, + { + "epoch": 6.623789764868603, + "grad_norm": 4.580968856811523, + "learning_rate": 1.875672352850776e-05, + "log_odds_chosen": 9.780548095703125, + "log_odds_ratio": -0.0003369380137883127, + "logits/chosen": -0.580957293510437, + "logits/rejected": -0.613364577293396, + "logps/chosen": -0.0019260908011347055, + "logps/rejected": -1.8503471612930298, + "loss": 0.6089, + "nll_loss": 0.1521856188774109, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019260909175500274, + "rewards/margins": 0.18484210968017578, + "rewards/rejected": -0.18503473699092865, + "step": 9578 + }, + { + "epoch": 6.624481327800829, + "grad_norm": 7.827384948730469, + "learning_rate": 1.8752881512217614e-05, + "log_odds_chosen": 10.557483673095703, + "log_odds_ratio": -4.911048381472938e-05, + "logits/chosen": -0.20439143478870392, + "logits/rejected": -0.24899733066558838, + "logps/chosen": -0.0001140248859883286, + "logps/rejected": -1.6083905696868896, + "loss": 0.6618, + "nll_loss": 0.16545367240905762, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1402488780731801e-05, + "rewards/margins": 0.1608276516199112, + "rewards/rejected": -0.1608390510082245, + "step": 9579 + }, + { + "epoch": 6.625172890733056, + "grad_norm": 10.01291275024414, + "learning_rate": 1.8749039495927463e-05, + "log_odds_chosen": 10.815433502197266, + "log_odds_ratio": -0.00027521009906195104, + "logits/chosen": -0.2627377510070801, + "logits/rejected": -0.3521021902561188, + "logps/chosen": -0.0012004250893369317, + "logps/rejected": -2.58683443069458, + "loss": 0.7189, + "nll_loss": 0.17969083786010742, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001200425103888847, + "rewards/margins": 0.25856339931488037, + "rewards/rejected": -0.258683443069458, + "step": 9580 + }, + { + "epoch": 6.625864453665283, + "grad_norm": 8.097025871276855, + "learning_rate": 1.8745197479637312e-05, + "log_odds_chosen": 10.607389450073242, + "log_odds_ratio": -0.00010616899817250669, + "logits/chosen": -0.21804478764533997, + "logits/rejected": -0.2118377983570099, + "logps/chosen": -0.00013465568190440536, + "logps/rejected": -1.7565428018569946, + "loss": 0.8019, + "nll_loss": 0.20045381784439087, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3465569281834178e-05, + "rewards/margins": 0.17564082145690918, + "rewards/rejected": -0.17565427720546722, + "step": 9581 + }, + { + "epoch": 6.62655601659751, + "grad_norm": 12.105314254760742, + "learning_rate": 1.8741355463347165e-05, + "log_odds_chosen": 9.97254753112793, + "log_odds_ratio": -0.00015050121874082834, + "logits/chosen": 0.02145688235759735, + "logits/rejected": -0.1352299302816391, + "logps/chosen": -0.00031788513297215104, + "logps/rejected": -1.7679246664047241, + "loss": 0.6162, + "nll_loss": 0.1540263444185257, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1788513297215104e-05, + "rewards/margins": 0.17676067352294922, + "rewards/rejected": -0.1767924726009369, + "step": 9582 + }, + { + "epoch": 6.627247579529737, + "grad_norm": 6.09376859664917, + "learning_rate": 1.8737513447057017e-05, + "log_odds_chosen": 10.102136611938477, + "log_odds_ratio": -0.004398900084197521, + "logits/chosen": -0.19770173728466034, + "logits/rejected": -0.2749331593513489, + "logps/chosen": -0.002805904019623995, + "logps/rejected": -2.46071195602417, + "loss": 0.9602, + "nll_loss": 0.23961874842643738, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002805904077831656, + "rewards/margins": 0.24579061567783356, + "rewards/rejected": -0.2460712194442749, + "step": 9583 + }, + { + "epoch": 6.627939142461964, + "grad_norm": 6.021434783935547, + "learning_rate": 1.8733671430766866e-05, + "log_odds_chosen": 10.092761993408203, + "log_odds_ratio": -0.0002814672188833356, + "logits/chosen": -0.1907365918159485, + "logits/rejected": -0.24558605253696442, + "logps/chosen": -0.0009807777823880315, + "logps/rejected": -2.1922271251678467, + "loss": 0.6207, + "nll_loss": 0.15515732765197754, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.807777678361163e-05, + "rewards/margins": 0.21912464499473572, + "rewards/rejected": -0.21922272443771362, + "step": 9584 + }, + { + "epoch": 6.62863070539419, + "grad_norm": 3.9753689765930176, + "learning_rate": 1.872982941447672e-05, + "log_odds_chosen": 10.512274742126465, + "log_odds_ratio": -6.560344627359882e-05, + "logits/chosen": -0.3456512987613678, + "logits/rejected": -0.3297366797924042, + "logps/chosen": -0.0002295379526913166, + "logps/rejected": -1.99502694606781, + "loss": 0.4358, + "nll_loss": 0.1089467853307724, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.29537945415359e-05, + "rewards/margins": 0.19947972893714905, + "rewards/rejected": -0.19950269162654877, + "step": 9585 + }, + { + "epoch": 6.629322268326417, + "grad_norm": 9.102583885192871, + "learning_rate": 1.872598739818657e-05, + "log_odds_chosen": 9.41545581817627, + "log_odds_ratio": -0.00023895951744634658, + "logits/chosen": -0.07881233841180801, + "logits/rejected": -0.14814022183418274, + "logps/chosen": -0.00046351380296982825, + "logps/rejected": -1.5590986013412476, + "loss": 0.4239, + "nll_loss": 0.10595303773880005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.63513788417913e-05, + "rewards/margins": 0.15586349368095398, + "rewards/rejected": -0.15590986609458923, + "step": 9586 + }, + { + "epoch": 6.630013831258644, + "grad_norm": 6.898933410644531, + "learning_rate": 1.872214538189642e-05, + "log_odds_chosen": 9.900001525878906, + "log_odds_ratio": -0.0005605472251772881, + "logits/chosen": -0.5851230025291443, + "logits/rejected": -0.6717733144760132, + "logps/chosen": -0.005218465346843004, + "logps/rejected": -2.2814183235168457, + "loss": 0.3372, + "nll_loss": 0.08423736691474915, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005218465812504292, + "rewards/margins": 0.2276199907064438, + "rewards/rejected": -0.22814184427261353, + "step": 9587 + }, + { + "epoch": 6.630705394190871, + "grad_norm": 3.2694380283355713, + "learning_rate": 1.8718303365606273e-05, + "log_odds_chosen": 10.500894546508789, + "log_odds_ratio": -8.928743045544252e-05, + "logits/chosen": -0.04570809006690979, + "logits/rejected": -0.1338164210319519, + "logps/chosen": -0.0009862055303528905, + "logps/rejected": -1.8601700067520142, + "loss": 0.4133, + "nll_loss": 0.10330963134765625, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.862056322162971e-05, + "rewards/margins": 0.18591837584972382, + "rewards/rejected": -0.1860170066356659, + "step": 9588 + }, + { + "epoch": 6.631396957123098, + "grad_norm": 7.532769680023193, + "learning_rate": 1.871446134931612e-05, + "log_odds_chosen": 11.600115776062012, + "log_odds_ratio": -2.4432218197034672e-05, + "logits/chosen": -0.6962848901748657, + "logits/rejected": -0.6211987733840942, + "logps/chosen": -0.00010943639790639281, + "logps/rejected": -2.3903281688690186, + "loss": 0.8404, + "nll_loss": 0.2100864201784134, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0943640518235043e-05, + "rewards/margins": 0.23902186751365662, + "rewards/rejected": -0.2390328049659729, + "step": 9589 + }, + { + "epoch": 6.632088520055325, + "grad_norm": 6.394747257232666, + "learning_rate": 1.871061933302597e-05, + "log_odds_chosen": 11.009204864501953, + "log_odds_ratio": -8.54446116136387e-05, + "logits/chosen": -0.5556159615516663, + "logits/rejected": -0.5460792183876038, + "logps/chosen": -0.00013034706353209913, + "logps/rejected": -1.7036371231079102, + "loss": 0.5307, + "nll_loss": 0.1326713263988495, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.303470526181627e-05, + "rewards/margins": 0.17035070061683655, + "rewards/rejected": -0.17036372423171997, + "step": 9590 + }, + { + "epoch": 6.632780082987551, + "grad_norm": 9.66374683380127, + "learning_rate": 1.8706777316735823e-05, + "log_odds_chosen": 11.150654792785645, + "log_odds_ratio": -6.50244255666621e-05, + "logits/chosen": -0.1793665587902069, + "logits/rejected": -0.26565828919410706, + "logps/chosen": -0.0004702771839220077, + "logps/rejected": -2.391282320022583, + "loss": 0.6228, + "nll_loss": 0.15569747984409332, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.702771911979653e-05, + "rewards/margins": 0.23908117413520813, + "rewards/rejected": -0.2391282171010971, + "step": 9591 + }, + { + "epoch": 6.633471645919778, + "grad_norm": 6.515843391418457, + "learning_rate": 1.8702935300445676e-05, + "log_odds_chosen": 10.865440368652344, + "log_odds_ratio": -7.430482219206169e-05, + "logits/chosen": -0.01707390695810318, + "logits/rejected": 0.12872716784477234, + "logps/chosen": -0.00039322333759628236, + "logps/rejected": -2.132324695587158, + "loss": 0.7089, + "nll_loss": 0.17720797657966614, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.932233084924519e-05, + "rewards/margins": 0.2131931483745575, + "rewards/rejected": -0.21323247253894806, + "step": 9592 + }, + { + "epoch": 6.634163208852005, + "grad_norm": 6.349339962005615, + "learning_rate": 1.8699093284155525e-05, + "log_odds_chosen": 10.240455627441406, + "log_odds_ratio": -0.00014411374286282808, + "logits/chosen": -0.6429404020309448, + "logits/rejected": -0.6957386136054993, + "logps/chosen": -0.0006632260046899319, + "logps/rejected": -2.199697732925415, + "loss": 0.5332, + "nll_loss": 0.133294016122818, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.632260192418471e-05, + "rewards/margins": 0.21990343928337097, + "rewards/rejected": -0.2199697643518448, + "step": 9593 + }, + { + "epoch": 6.634854771784232, + "grad_norm": 13.812353134155273, + "learning_rate": 1.8695251267865377e-05, + "log_odds_chosen": 10.978428840637207, + "log_odds_ratio": -3.51098642568104e-05, + "logits/chosen": -0.4268570840358734, + "logits/rejected": -0.5332842469215393, + "logps/chosen": -0.00032298153382726014, + "logps/rejected": -2.003493309020996, + "loss": 0.4843, + "nll_loss": 0.12108378857374191, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.229815774830058e-05, + "rewards/margins": 0.20031705498695374, + "rewards/rejected": -0.200349360704422, + "step": 9594 + }, + { + "epoch": 6.635546334716459, + "grad_norm": 7.508303642272949, + "learning_rate": 1.869140925157523e-05, + "log_odds_chosen": 10.579375267028809, + "log_odds_ratio": -3.901870150002651e-05, + "logits/chosen": -0.18920519948005676, + "logits/rejected": -0.1860412359237671, + "logps/chosen": -0.00022964477830100805, + "logps/rejected": -2.117384910583496, + "loss": 0.6806, + "nll_loss": 0.170146182179451, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2964477466302924e-05, + "rewards/margins": 0.21171551942825317, + "rewards/rejected": -0.2117384970188141, + "step": 9595 + }, + { + "epoch": 6.6362378976486855, + "grad_norm": 7.161440849304199, + "learning_rate": 1.868756723528508e-05, + "log_odds_chosen": 11.471312522888184, + "log_odds_ratio": -2.5577115593478084e-05, + "logits/chosen": -0.3285534381866455, + "logits/rejected": -0.4225424826145172, + "logps/chosen": -0.00020269016386009753, + "logps/rejected": -2.752018928527832, + "loss": 0.4149, + "nll_loss": 0.103721022605896, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0269017113605514e-05, + "rewards/margins": 0.2751816511154175, + "rewards/rejected": -0.2752019166946411, + "step": 9596 + }, + { + "epoch": 6.636929460580912, + "grad_norm": 6.152478218078613, + "learning_rate": 1.868372521899493e-05, + "log_odds_chosen": 11.246030807495117, + "log_odds_ratio": -0.00015961957979016006, + "logits/chosen": -0.34847715497016907, + "logits/rejected": -0.32901671528816223, + "logps/chosen": -0.00011934031499549747, + "logps/rejected": -2.253852367401123, + "loss": 0.4987, + "nll_loss": 0.12466421723365784, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1934031135751866e-05, + "rewards/margins": 0.2253732979297638, + "rewards/rejected": -0.22538524866104126, + "step": 9597 + }, + { + "epoch": 6.637621023513139, + "grad_norm": 5.750330448150635, + "learning_rate": 1.867988320270478e-05, + "log_odds_chosen": 9.047605514526367, + "log_odds_ratio": -0.0013029974652454257, + "logits/chosen": -0.5508097410202026, + "logits/rejected": -0.6082567572593689, + "logps/chosen": -0.0006829822086729109, + "logps/rejected": -1.3287931680679321, + "loss": 0.5018, + "nll_loss": 0.12532344460487366, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.829822814324871e-05, + "rewards/margins": 0.1328110247850418, + "rewards/rejected": -0.1328793317079544, + "step": 9598 + }, + { + "epoch": 6.638312586445366, + "grad_norm": 5.946147918701172, + "learning_rate": 1.867604118641463e-05, + "log_odds_chosen": 10.778899192810059, + "log_odds_ratio": -5.474353383760899e-05, + "logits/chosen": -0.5107077360153198, + "logits/rejected": -0.5558190941810608, + "logps/chosen": -0.0003230084548704326, + "logps/rejected": -2.5283544063568115, + "loss": 0.4038, + "nll_loss": 0.10093516856431961, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.230084621463902e-05, + "rewards/margins": 0.25280314683914185, + "rewards/rejected": -0.2528354525566101, + "step": 9599 + }, + { + "epoch": 6.639004149377593, + "grad_norm": 6.913265705108643, + "learning_rate": 1.867219917012448e-05, + "log_odds_chosen": 11.897588729858398, + "log_odds_ratio": -1.2387119568302296e-05, + "logits/chosen": -0.3910364508628845, + "logits/rejected": -0.3944079875946045, + "logps/chosen": -0.00012312438047956675, + "logps/rejected": -2.6750569343566895, + "loss": 0.5822, + "nll_loss": 0.14555397629737854, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2312437320360914e-05, + "rewards/margins": 0.2674933969974518, + "rewards/rejected": -0.2675057053565979, + "step": 9600 + }, + { + "epoch": 6.63969571230982, + "grad_norm": 5.178598403930664, + "learning_rate": 1.8668357153834334e-05, + "log_odds_chosen": 10.858855247497559, + "log_odds_ratio": -0.00013011848204769194, + "logits/chosen": -0.4819783568382263, + "logits/rejected": -0.49420034885406494, + "logps/chosen": -0.000334838405251503, + "logps/rejected": -3.0064451694488525, + "loss": 0.4746, + "nll_loss": 0.11863689124584198, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.34838405251503e-05, + "rewards/margins": 0.30061104893684387, + "rewards/rejected": -0.30064451694488525, + "step": 9601 + }, + { + "epoch": 6.6403872752420465, + "grad_norm": 3.6704227924346924, + "learning_rate": 1.8664515137544183e-05, + "log_odds_chosen": 10.26997184753418, + "log_odds_ratio": -7.55906366975978e-05, + "logits/chosen": -0.8013444542884827, + "logits/rejected": -0.8593555688858032, + "logps/chosen": -0.00013731844956055284, + "logps/rejected": -1.3853795528411865, + "loss": 0.3618, + "nll_loss": 0.09045219421386719, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3731846593145747e-05, + "rewards/margins": 0.13852423429489136, + "rewards/rejected": -0.13853797316551208, + "step": 9602 + }, + { + "epoch": 6.641078838174274, + "grad_norm": 8.884760856628418, + "learning_rate": 1.8660673121254035e-05, + "log_odds_chosen": 10.43310546875, + "log_odds_ratio": -0.00010702509462134913, + "logits/chosen": -0.3072403073310852, + "logits/rejected": -0.3633434772491455, + "logps/chosen": -0.0005991262733004987, + "logps/rejected": -1.909914255142212, + "loss": 0.4219, + "nll_loss": 0.10546346753835678, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.991263606119901e-05, + "rewards/margins": 0.190931499004364, + "rewards/rejected": -0.19099141657352448, + "step": 9603 + }, + { + "epoch": 6.641770401106501, + "grad_norm": 8.687732696533203, + "learning_rate": 1.8656831104963888e-05, + "log_odds_chosen": 8.809842109680176, + "log_odds_ratio": -0.3070237934589386, + "logits/chosen": -0.3055609166622162, + "logits/rejected": -0.283913791179657, + "logps/chosen": -0.050361473113298416, + "logps/rejected": -1.5872776508331299, + "loss": 0.8183, + "nll_loss": 0.17387992143630981, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.005036147776991129, + "rewards/margins": 0.1536916196346283, + "rewards/rejected": -0.158727765083313, + "step": 9604 + }, + { + "epoch": 6.642461964038728, + "grad_norm": 5.080305099487305, + "learning_rate": 1.8652989088673737e-05, + "log_odds_chosen": 9.537845611572266, + "log_odds_ratio": -0.00015548468218185008, + "logits/chosen": -0.37062469124794006, + "logits/rejected": -0.35777461528778076, + "logps/chosen": -0.00035577925154939294, + "logps/rejected": -1.4090495109558105, + "loss": 0.4689, + "nll_loss": 0.1172141507267952, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5577930248109624e-05, + "rewards/margins": 0.1408693641424179, + "rewards/rejected": -0.14090494811534882, + "step": 9605 + }, + { + "epoch": 6.643153526970955, + "grad_norm": 5.121057033538818, + "learning_rate": 1.864914707238359e-05, + "log_odds_chosen": 11.087963104248047, + "log_odds_ratio": -3.925432247342542e-05, + "logits/chosen": -0.663267970085144, + "logits/rejected": -0.7178729176521301, + "logps/chosen": -0.00014778254262637347, + "logps/rejected": -2.0659022331237793, + "loss": 0.5862, + "nll_loss": 0.146546870470047, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4778253898839466e-05, + "rewards/margins": 0.2065754383802414, + "rewards/rejected": -0.2065902054309845, + "step": 9606 + }, + { + "epoch": 6.643845089903182, + "grad_norm": 4.4347429275512695, + "learning_rate": 1.864530505609344e-05, + "log_odds_chosen": 10.788904190063477, + "log_odds_ratio": -0.0005048522725701332, + "logits/chosen": -0.43864724040031433, + "logits/rejected": -0.49911749362945557, + "logps/chosen": -0.000164158787811175, + "logps/rejected": -1.9060001373291016, + "loss": 0.5871, + "nll_loss": 0.14673137664794922, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.641587914491538e-05, + "rewards/margins": 0.19058358669281006, + "rewards/rejected": -0.19060002267360687, + "step": 9607 + }, + { + "epoch": 6.644536652835408, + "grad_norm": 9.326899528503418, + "learning_rate": 1.8641463039803288e-05, + "log_odds_chosen": 10.238898277282715, + "log_odds_ratio": -6.96768329362385e-05, + "logits/chosen": -0.1614079475402832, + "logits/rejected": -0.22287413477897644, + "logps/chosen": -0.0003164965892210603, + "logps/rejected": -1.8528064489364624, + "loss": 0.536, + "nll_loss": 0.1339990645647049, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.164965892210603e-05, + "rewards/margins": 0.18524901568889618, + "rewards/rejected": -0.18528065085411072, + "step": 9608 + }, + { + "epoch": 6.645228215767635, + "grad_norm": 13.386348724365234, + "learning_rate": 1.863762102351314e-05, + "log_odds_chosen": 11.673979759216309, + "log_odds_ratio": -9.696490451460704e-06, + "logits/chosen": -0.43724504113197327, + "logits/rejected": -0.4728170931339264, + "logps/chosen": -0.00015469803474843502, + "logps/rejected": -2.4809579849243164, + "loss": 0.6093, + "nll_loss": 0.15233442187309265, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.546980274724774e-05, + "rewards/margins": 0.24808037281036377, + "rewards/rejected": -0.2480958253145218, + "step": 9609 + }, + { + "epoch": 6.645919778699862, + "grad_norm": 8.906185150146484, + "learning_rate": 1.8633779007222992e-05, + "log_odds_chosen": 11.176610946655273, + "log_odds_ratio": -2.3035610865917988e-05, + "logits/chosen": -0.41594600677490234, + "logits/rejected": -0.4206879734992981, + "logps/chosen": -0.00022571772569790483, + "logps/rejected": -2.4756767749786377, + "loss": 0.6416, + "nll_loss": 0.16038845479488373, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2571772205992602e-05, + "rewards/margins": 0.24754513800144196, + "rewards/rejected": -0.24756768345832825, + "step": 9610 + }, + { + "epoch": 6.646611341632089, + "grad_norm": 4.980371952056885, + "learning_rate": 1.862993699093284e-05, + "log_odds_chosen": 10.551161766052246, + "log_odds_ratio": -5.1158975111320615e-05, + "logits/chosen": -0.20379826426506042, + "logits/rejected": -0.18743589520454407, + "logps/chosen": -0.0030980801675468683, + "logps/rejected": -2.539689064025879, + "loss": 0.5241, + "nll_loss": 0.13103193044662476, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00030980800511315465, + "rewards/margins": 0.25365912914276123, + "rewards/rejected": -0.2539689242839813, + "step": 9611 + }, + { + "epoch": 6.647302904564316, + "grad_norm": 5.498662948608398, + "learning_rate": 1.8626094974642694e-05, + "log_odds_chosen": 11.447917938232422, + "log_odds_ratio": -2.6658351998776197e-05, + "logits/chosen": -0.14285291731357574, + "logits/rejected": -0.09949278086423874, + "logps/chosen": -0.00011387672566343099, + "logps/rejected": -2.4001576900482178, + "loss": 0.4981, + "nll_loss": 0.12452501058578491, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1387672202545218e-05, + "rewards/margins": 0.24000439047813416, + "rewards/rejected": -0.24001577496528625, + "step": 9612 + }, + { + "epoch": 6.6479944674965425, + "grad_norm": 4.112555503845215, + "learning_rate": 1.8622252958352546e-05, + "log_odds_chosen": 11.097431182861328, + "log_odds_ratio": -6.698002107441425e-05, + "logits/chosen": -0.5834935903549194, + "logits/rejected": -0.6480783224105835, + "logps/chosen": -0.00037601194344460964, + "logps/rejected": -2.660428762435913, + "loss": 0.4349, + "nll_loss": 0.10872837901115417, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.760119579965249e-05, + "rewards/margins": 0.266005277633667, + "rewards/rejected": -0.26604288816452026, + "step": 9613 + }, + { + "epoch": 6.648686030428769, + "grad_norm": 6.84444522857666, + "learning_rate": 1.8618410942062395e-05, + "log_odds_chosen": 10.822056770324707, + "log_odds_ratio": -5.755308666266501e-05, + "logits/chosen": -0.028991512954235077, + "logits/rejected": -0.10154448449611664, + "logps/chosen": -0.00035526740248315036, + "logps/rejected": -2.3193163871765137, + "loss": 0.676, + "nll_loss": 0.16899527609348297, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5526743886293843e-05, + "rewards/margins": 0.23189613223075867, + "rewards/rejected": -0.2319316416978836, + "step": 9614 + }, + { + "epoch": 6.649377593360996, + "grad_norm": 3.9444468021392822, + "learning_rate": 1.8614568925772248e-05, + "log_odds_chosen": 10.631586074829102, + "log_odds_ratio": -0.0003470322408247739, + "logits/chosen": -0.3811192810535431, + "logits/rejected": -0.42284131050109863, + "logps/chosen": -0.0011563881998881698, + "logps/rejected": -2.0233068466186523, + "loss": 0.4872, + "nll_loss": 0.12176550179719925, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011563881707843393, + "rewards/margins": 0.2022150605916977, + "rewards/rejected": -0.20233069360256195, + "step": 9615 + }, + { + "epoch": 6.650069156293223, + "grad_norm": 6.1867594718933105, + "learning_rate": 1.8610726909482097e-05, + "log_odds_chosen": 9.69314193725586, + "log_odds_ratio": -0.0001658633555052802, + "logits/chosen": -0.4153778553009033, + "logits/rejected": -0.41827812790870667, + "logps/chosen": -0.0004052014264743775, + "logps/rejected": -1.5486747026443481, + "loss": 0.6721, + "nll_loss": 0.16801199316978455, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0520149923395365e-05, + "rewards/margins": 0.15482693910598755, + "rewards/rejected": -0.15486745536327362, + "step": 9616 + }, + { + "epoch": 6.65076071922545, + "grad_norm": 6.0896382331848145, + "learning_rate": 1.8606884893191946e-05, + "log_odds_chosen": 10.958621978759766, + "log_odds_ratio": -5.041091571911238e-05, + "logits/chosen": 0.014917820692062378, + "logits/rejected": -0.06887702643871307, + "logps/chosen": -0.0004453232977539301, + "logps/rejected": -2.456699848175049, + "loss": 0.4531, + "nll_loss": 0.1132640540599823, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.453233123058453e-05, + "rewards/margins": 0.24562548100948334, + "rewards/rejected": -0.24567002058029175, + "step": 9617 + }, + { + "epoch": 6.651452282157677, + "grad_norm": 27.65871810913086, + "learning_rate": 1.86030428769018e-05, + "log_odds_chosen": 11.017706871032715, + "log_odds_ratio": -2.6596382667776197e-05, + "logits/chosen": -0.4998897314071655, + "logits/rejected": -0.5836895108222961, + "logps/chosen": -0.00018730561714619398, + "logps/rejected": -2.200857162475586, + "loss": 0.5834, + "nll_loss": 0.14584854245185852, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.873056316981092e-05, + "rewards/margins": 0.22006699442863464, + "rewards/rejected": -0.2200857400894165, + "step": 9618 + }, + { + "epoch": 6.6521438450899035, + "grad_norm": 4.076780796051025, + "learning_rate": 1.8599200860611647e-05, + "log_odds_chosen": 10.832459449768066, + "log_odds_ratio": -3.48457797372248e-05, + "logits/chosen": -0.10937292873859406, + "logits/rejected": -0.10044606029987335, + "logps/chosen": -0.0005893828347325325, + "logps/rejected": -2.6281380653381348, + "loss": 0.4766, + "nll_loss": 0.11914488673210144, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.893828347325325e-05, + "rewards/margins": 0.2627548575401306, + "rewards/rejected": -0.2628138065338135, + "step": 9619 + }, + { + "epoch": 6.65283540802213, + "grad_norm": 7.478692531585693, + "learning_rate": 1.85953588443215e-05, + "log_odds_chosen": 10.989614486694336, + "log_odds_ratio": -2.4027020117500797e-05, + "logits/chosen": 0.045154161751270294, + "logits/rejected": -0.029847905039787292, + "logps/chosen": -0.000188558449735865, + "logps/rejected": -2.229787826538086, + "loss": 0.8103, + "nll_loss": 0.20257043838500977, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8855846064980142e-05, + "rewards/margins": 0.22295993566513062, + "rewards/rejected": -0.22297880053520203, + "step": 9620 + }, + { + "epoch": 6.653526970954357, + "grad_norm": 7.046220302581787, + "learning_rate": 1.8591516828031352e-05, + "log_odds_chosen": 10.115169525146484, + "log_odds_ratio": -9.490567026659846e-05, + "logits/chosen": -0.4010132849216461, + "logits/rejected": -0.5099731683731079, + "logps/chosen": -0.00034400090225972235, + "logps/rejected": -2.049590587615967, + "loss": 0.5036, + "nll_loss": 0.1258976310491562, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4400090953568e-05, + "rewards/margins": 0.20492467284202576, + "rewards/rejected": -0.20495907962322235, + "step": 9621 + }, + { + "epoch": 6.654218533886584, + "grad_norm": 5.485591411590576, + "learning_rate": 1.85876748117412e-05, + "log_odds_chosen": 10.569567680358887, + "log_odds_ratio": -8.486285514663905e-05, + "logits/chosen": -0.37124836444854736, + "logits/rejected": -0.304371178150177, + "logps/chosen": -0.00017183186719194055, + "logps/rejected": -1.9051198959350586, + "loss": 0.5917, + "nll_loss": 0.14791113138198853, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7183185264002532e-05, + "rewards/margins": 0.19049480557441711, + "rewards/rejected": -0.19051198661327362, + "step": 9622 + }, + { + "epoch": 6.654910096818811, + "grad_norm": 7.806471347808838, + "learning_rate": 1.8583832795451054e-05, + "log_odds_chosen": 12.124666213989258, + "log_odds_ratio": -1.1755686500691809e-05, + "logits/chosen": -0.05929935351014137, + "logits/rejected": -0.08076391369104385, + "logps/chosen": -0.0001312370295636356, + "logps/rejected": -2.980034351348877, + "loss": 0.6727, + "nll_loss": 0.1681688278913498, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3123702956363559e-05, + "rewards/margins": 0.2979903221130371, + "rewards/rejected": -0.2980034649372101, + "step": 9623 + }, + { + "epoch": 6.655601659751038, + "grad_norm": 5.634998798370361, + "learning_rate": 1.8579990779160906e-05, + "log_odds_chosen": 11.782219886779785, + "log_odds_ratio": -3.760270192287862e-05, + "logits/chosen": -0.06998319923877716, + "logits/rejected": 0.05776715278625488, + "logps/chosen": -0.0003284990380052477, + "logps/rejected": -2.760857105255127, + "loss": 0.7855, + "nll_loss": 0.19638195633888245, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.284990452812053e-05, + "rewards/margins": 0.276052862405777, + "rewards/rejected": -0.2760857045650482, + "step": 9624 + }, + { + "epoch": 6.6562932226832645, + "grad_norm": 4.896988868713379, + "learning_rate": 1.8576148762870755e-05, + "log_odds_chosen": 10.359952926635742, + "log_odds_ratio": -9.971457620849833e-05, + "logits/chosen": -0.534497857093811, + "logits/rejected": -0.6003218293190002, + "logps/chosen": -0.0005265720537863672, + "logps/rejected": -1.8906424045562744, + "loss": 0.643, + "nll_loss": 0.16074874997138977, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2657207561424e-05, + "rewards/margins": 0.1890116035938263, + "rewards/rejected": -0.18906423449516296, + "step": 9625 + }, + { + "epoch": 6.656984785615491, + "grad_norm": 4.442220211029053, + "learning_rate": 1.8572306746580608e-05, + "log_odds_chosen": 10.274168014526367, + "log_odds_ratio": -9.561772458255291e-05, + "logits/chosen": -0.26732587814331055, + "logits/rejected": -0.3629629909992218, + "logps/chosen": -0.0002495437511242926, + "logps/rejected": -1.8324568271636963, + "loss": 0.6342, + "nll_loss": 0.15854156017303467, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4954375476227142e-05, + "rewards/margins": 0.18322071433067322, + "rewards/rejected": -0.1832456737756729, + "step": 9626 + }, + { + "epoch": 6.657676348547718, + "grad_norm": 6.081766128540039, + "learning_rate": 1.8568464730290457e-05, + "log_odds_chosen": 10.357091903686523, + "log_odds_ratio": -8.496503869537264e-05, + "logits/chosen": -0.761346161365509, + "logits/rejected": -0.7080386877059937, + "logps/chosen": -0.00028227429720573127, + "logps/rejected": -1.7036181688308716, + "loss": 0.4951, + "nll_loss": 0.12377134710550308, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.822743044816889e-05, + "rewards/margins": 0.170333594083786, + "rewards/rejected": -0.17036181688308716, + "step": 9627 + }, + { + "epoch": 6.658367911479945, + "grad_norm": 3.2742528915405273, + "learning_rate": 1.8564622714000306e-05, + "log_odds_chosen": 10.262062072753906, + "log_odds_ratio": -7.255500531755388e-05, + "logits/chosen": 0.11290848255157471, + "logits/rejected": 0.05145927891135216, + "logps/chosen": -0.00043970055412501097, + "logps/rejected": -1.8753759860992432, + "loss": 0.6369, + "nll_loss": 0.15922844409942627, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.397005250211805e-05, + "rewards/margins": 0.18749363720417023, + "rewards/rejected": -0.18753761053085327, + "step": 9628 + }, + { + "epoch": 6.659059474412172, + "grad_norm": 6.663225173950195, + "learning_rate": 1.856078069771016e-05, + "log_odds_chosen": 10.049211502075195, + "log_odds_ratio": -0.0004824997449759394, + "logits/chosen": -0.3176838159561157, + "logits/rejected": -0.33979031443595886, + "logps/chosen": -0.00041182507993653417, + "logps/rejected": -1.8318284749984741, + "loss": 0.7267, + "nll_loss": 0.18163245916366577, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.118250581086613e-05, + "rewards/margins": 0.18314167857170105, + "rewards/rejected": -0.18318286538124084, + "step": 9629 + }, + { + "epoch": 6.659751037344399, + "grad_norm": 9.147109031677246, + "learning_rate": 1.855693868142001e-05, + "log_odds_chosen": 10.695847511291504, + "log_odds_ratio": -6.39359132037498e-05, + "logits/chosen": -0.25236430764198303, + "logits/rejected": -0.3654441833496094, + "logps/chosen": -0.00018572970293462276, + "logps/rejected": -1.872322916984558, + "loss": 0.7521, + "nll_loss": 0.1880245804786682, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8572969565866515e-05, + "rewards/margins": 0.1872137188911438, + "rewards/rejected": -0.18723228573799133, + "step": 9630 + }, + { + "epoch": 6.6604426002766255, + "grad_norm": 8.203865051269531, + "learning_rate": 1.855309666512986e-05, + "log_odds_chosen": 9.84941291809082, + "log_odds_ratio": -0.00018136092694476247, + "logits/chosen": -0.41311779618263245, + "logits/rejected": -0.5088513493537903, + "logps/chosen": -0.0002423153055133298, + "logps/rejected": -1.6244256496429443, + "loss": 0.4041, + "nll_loss": 0.10100964456796646, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4231529096141458e-05, + "rewards/margins": 0.16241833567619324, + "rewards/rejected": -0.16244256496429443, + "step": 9631 + }, + { + "epoch": 6.661134163208852, + "grad_norm": 4.43738317489624, + "learning_rate": 1.8549254648839712e-05, + "log_odds_chosen": 9.541839599609375, + "log_odds_ratio": -0.0006786661688238382, + "logits/chosen": -0.31768670678138733, + "logits/rejected": -0.28740179538726807, + "logps/chosen": -0.0011072256602346897, + "logps/rejected": -1.896850824356079, + "loss": 0.7986, + "nll_loss": 0.19957174360752106, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011072256893385202, + "rewards/margins": 0.18957436084747314, + "rewards/rejected": -0.18968507647514343, + "step": 9632 + }, + { + "epoch": 6.661825726141079, + "grad_norm": 7.632981300354004, + "learning_rate": 1.8545412632549565e-05, + "log_odds_chosen": 11.051722526550293, + "log_odds_ratio": -4.5273809519130737e-05, + "logits/chosen": -0.4231196343898773, + "logits/rejected": -0.45129501819610596, + "logps/chosen": -0.0002486844314262271, + "logps/rejected": -2.4345688819885254, + "loss": 0.4322, + "nll_loss": 0.10804326087236404, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4868444597814232e-05, + "rewards/margins": 0.24343204498291016, + "rewards/rejected": -0.2434569001197815, + "step": 9633 + }, + { + "epoch": 6.662517289073306, + "grad_norm": 6.208993911743164, + "learning_rate": 1.8541570616259414e-05, + "log_odds_chosen": 10.302834510803223, + "log_odds_ratio": -0.00035480436054058373, + "logits/chosen": -0.9572851657867432, + "logits/rejected": -0.8913147449493408, + "logps/chosen": -0.00021303204994183034, + "logps/rejected": -1.6176707744598389, + "loss": 0.7972, + "nll_loss": 0.19925661385059357, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1303205357980914e-05, + "rewards/margins": 0.1617458015680313, + "rewards/rejected": -0.16176709532737732, + "step": 9634 + }, + { + "epoch": 6.663208852005533, + "grad_norm": 8.000402450561523, + "learning_rate": 1.8537728599969266e-05, + "log_odds_chosen": 10.670997619628906, + "log_odds_ratio": -9.012306691147387e-05, + "logits/chosen": -0.35462328791618347, + "logits/rejected": -0.34307336807250977, + "logps/chosen": -0.00013257621321827173, + "logps/rejected": -1.5429167747497559, + "loss": 0.3766, + "nll_loss": 0.09414802491664886, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3257621503726114e-05, + "rewards/margins": 0.15427841246128082, + "rewards/rejected": -0.15429167449474335, + "step": 9635 + }, + { + "epoch": 6.66390041493776, + "grad_norm": 10.191165924072266, + "learning_rate": 1.8533886583679115e-05, + "log_odds_chosen": 10.87049674987793, + "log_odds_ratio": -6.27797853667289e-05, + "logits/chosen": -0.2523128092288971, + "logits/rejected": -0.27273494005203247, + "logps/chosen": -0.0003482589963823557, + "logps/rejected": -2.6285324096679688, + "loss": 0.5499, + "nll_loss": 0.1374695897102356, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4825898183044046e-05, + "rewards/margins": 0.2628183960914612, + "rewards/rejected": -0.2628532350063324, + "step": 9636 + }, + { + "epoch": 6.6645919778699865, + "grad_norm": 5.827001094818115, + "learning_rate": 1.8530044567388964e-05, + "log_odds_chosen": 11.037064552307129, + "log_odds_ratio": -3.355016087880358e-05, + "logits/chosen": -0.17959906160831451, + "logits/rejected": -0.25126656889915466, + "logps/chosen": -0.0003407234326004982, + "logps/rejected": -2.286252021789551, + "loss": 0.5514, + "nll_loss": 0.13783562183380127, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.40723418048583e-05, + "rewards/margins": 0.22859112918376923, + "rewards/rejected": -0.22862519323825836, + "step": 9637 + }, + { + "epoch": 6.665283540802213, + "grad_norm": 7.157079696655273, + "learning_rate": 1.8526202551098817e-05, + "log_odds_chosen": 10.269147872924805, + "log_odds_ratio": -0.000136653077788651, + "logits/chosen": -0.7898240089416504, + "logits/rejected": -0.8049187660217285, + "logps/chosen": -0.0003843801387120038, + "logps/rejected": -1.650019884109497, + "loss": 0.6758, + "nll_loss": 0.1689254343509674, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8438014598796144e-05, + "rewards/margins": 0.16496357321739197, + "rewards/rejected": -0.1650019884109497, + "step": 9638 + }, + { + "epoch": 6.66597510373444, + "grad_norm": 6.0989766120910645, + "learning_rate": 1.852236053480867e-05, + "log_odds_chosen": 10.749070167541504, + "log_odds_ratio": -0.00024179847969207913, + "logits/chosen": -0.37179988622665405, + "logits/rejected": -0.6003690958023071, + "logps/chosen": -0.00023933660122565925, + "logps/rejected": -2.0018997192382812, + "loss": 0.7154, + "nll_loss": 0.1788228154182434, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.393366230535321e-05, + "rewards/margins": 0.2001660317182541, + "rewards/rejected": -0.2001899629831314, + "step": 9639 + }, + { + "epoch": 6.666666666666667, + "grad_norm": 4.793224334716797, + "learning_rate": 1.8518518518518518e-05, + "log_odds_chosen": 10.52408504486084, + "log_odds_ratio": -0.0006772517808713019, + "logits/chosen": -0.41700923442840576, + "logits/rejected": -0.42366236448287964, + "logps/chosen": -0.0007978577632457018, + "logps/rejected": -2.5072598457336426, + "loss": 0.474, + "nll_loss": 0.11843764781951904, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.978577195899561e-05, + "rewards/margins": 0.2506462037563324, + "rewards/rejected": -0.25072598457336426, + "step": 9640 + }, + { + "epoch": 6.667358229598894, + "grad_norm": 5.932323455810547, + "learning_rate": 1.851467650222837e-05, + "log_odds_chosen": 11.987409591674805, + "log_odds_ratio": -3.294226189609617e-05, + "logits/chosen": -0.29465189576148987, + "logits/rejected": -0.4393021762371063, + "logps/chosen": -0.00013866080553270876, + "logps/rejected": -3.125673770904541, + "loss": 0.4178, + "nll_loss": 0.1044505387544632, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3866080735169817e-05, + "rewards/margins": 0.3125535249710083, + "rewards/rejected": -0.3125673830509186, + "step": 9641 + }, + { + "epoch": 6.668049792531121, + "grad_norm": 6.992809295654297, + "learning_rate": 1.8510834485938223e-05, + "log_odds_chosen": 9.339235305786133, + "log_odds_ratio": -0.00024313360336236656, + "logits/chosen": -0.6617879867553711, + "logits/rejected": -0.6839908361434937, + "logps/chosen": -0.00032492296304553747, + "logps/rejected": -1.6257578134536743, + "loss": 0.6955, + "nll_loss": 0.17384573817253113, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.249229848734103e-05, + "rewards/margins": 0.16254329681396484, + "rewards/rejected": -0.16257578134536743, + "step": 9642 + }, + { + "epoch": 6.6687413554633475, + "grad_norm": 5.294729709625244, + "learning_rate": 1.8506992469648072e-05, + "log_odds_chosen": 10.480348587036133, + "log_odds_ratio": -5.3323532483773306e-05, + "logits/chosen": -0.5375571250915527, + "logits/rejected": -0.5629109144210815, + "logps/chosen": -0.0002348765847273171, + "logps/rejected": -1.8479783535003662, + "loss": 0.525, + "nll_loss": 0.13125476241111755, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.348765883652959e-05, + "rewards/margins": 0.18477436900138855, + "rewards/rejected": -0.18479785323143005, + "step": 9643 + }, + { + "epoch": 6.669432918395574, + "grad_norm": 7.490869998931885, + "learning_rate": 1.8503150453357925e-05, + "log_odds_chosen": 9.760675430297852, + "log_odds_ratio": -0.0007779019069857895, + "logits/chosen": -0.43486517667770386, + "logits/rejected": -0.5689921379089355, + "logps/chosen": -0.000742304022423923, + "logps/rejected": -2.1124515533447266, + "loss": 0.6009, + "nll_loss": 0.1501590758562088, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.423041097354144e-05, + "rewards/margins": 0.21117094159126282, + "rewards/rejected": -0.21124516427516937, + "step": 9644 + }, + { + "epoch": 6.670124481327801, + "grad_norm": 7.676600456237793, + "learning_rate": 1.8499308437067774e-05, + "log_odds_chosen": 11.441679000854492, + "log_odds_ratio": -3.095114880125038e-05, + "logits/chosen": 0.07396015524864197, + "logits/rejected": -0.0116509348154068, + "logps/chosen": -0.0002146841725334525, + "logps/rejected": -2.4313998222351074, + "loss": 0.7564, + "nll_loss": 0.18909502029418945, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.146841688954737e-05, + "rewards/margins": 0.2431185245513916, + "rewards/rejected": -0.24313999712467194, + "step": 9645 + }, + { + "epoch": 6.670816044260028, + "grad_norm": 8.344149589538574, + "learning_rate": 1.8495466420777623e-05, + "log_odds_chosen": 10.729771614074707, + "log_odds_ratio": -2.940691410913132e-05, + "logits/chosen": -0.11430245637893677, + "logits/rejected": -0.15196284651756287, + "logps/chosen": -0.00011363202065695077, + "logps/rejected": -1.7529911994934082, + "loss": 0.588, + "nll_loss": 0.14699505269527435, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1363201338099316e-05, + "rewards/margins": 0.17528776824474335, + "rewards/rejected": -0.17529912292957306, + "step": 9646 + }, + { + "epoch": 6.671507607192255, + "grad_norm": 10.055741310119629, + "learning_rate": 1.8491624404487475e-05, + "log_odds_chosen": 10.192642211914062, + "log_odds_ratio": -0.00016000257164705545, + "logits/chosen": -0.41450396180152893, + "logits/rejected": -0.4917091429233551, + "logps/chosen": -0.0007967498968355358, + "logps/rejected": -2.0323781967163086, + "loss": 0.3877, + "nll_loss": 0.09689746052026749, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.967498822836205e-05, + "rewards/margins": 0.2031581550836563, + "rewards/rejected": -0.20323783159255981, + "step": 9647 + }, + { + "epoch": 6.672199170124482, + "grad_norm": 2.560049533843994, + "learning_rate": 1.8487782388197328e-05, + "log_odds_chosen": 11.013113975524902, + "log_odds_ratio": -0.00025713135255500674, + "logits/chosen": -0.3730487525463104, + "logits/rejected": -0.4757554531097412, + "logps/chosen": -0.0009568912792019546, + "logps/rejected": -2.6731841564178467, + "loss": 0.4333, + "nll_loss": 0.10830046236515045, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.56891308305785e-05, + "rewards/margins": 0.26722273230552673, + "rewards/rejected": -0.2673184275627136, + "step": 9648 + }, + { + "epoch": 6.672890733056708, + "grad_norm": 6.429426670074463, + "learning_rate": 1.8483940371907177e-05, + "log_odds_chosen": 11.566768646240234, + "log_odds_ratio": -1.660044836171437e-05, + "logits/chosen": -0.031103745102882385, + "logits/rejected": -0.10523568093776703, + "logps/chosen": -0.00010068865958601236, + "logps/rejected": -2.0547738075256348, + "loss": 0.6027, + "nll_loss": 0.15067416429519653, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0068865776702296e-05, + "rewards/margins": 0.2054673284292221, + "rewards/rejected": -0.20547738671302795, + "step": 9649 + }, + { + "epoch": 6.673582295988935, + "grad_norm": 5.067642688751221, + "learning_rate": 1.848009835561703e-05, + "log_odds_chosen": 11.074803352355957, + "log_odds_ratio": -4.086527042090893e-05, + "logits/chosen": -0.3686811327934265, + "logits/rejected": -0.437101274728775, + "logps/chosen": -0.00011494646605569869, + "logps/rejected": -2.052640438079834, + "loss": 0.5613, + "nll_loss": 0.14033235609531403, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.149464696936775e-05, + "rewards/margins": 0.20525255799293518, + "rewards/rejected": -0.20526404678821564, + "step": 9650 + }, + { + "epoch": 6.674273858921162, + "grad_norm": 5.264712810516357, + "learning_rate": 1.847625633932688e-05, + "log_odds_chosen": 10.861353874206543, + "log_odds_ratio": -5.436270294012502e-05, + "logits/chosen": -0.3558247983455658, + "logits/rejected": -0.3405495584011078, + "logps/chosen": -0.0003190129646100104, + "logps/rejected": -1.7811611890792847, + "loss": 0.5036, + "nll_loss": 0.1258881390094757, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.190129791619256e-05, + "rewards/margins": 0.17808422446250916, + "rewards/rejected": -0.178116112947464, + "step": 9651 + }, + { + "epoch": 6.674965421853389, + "grad_norm": 4.947920799255371, + "learning_rate": 1.847241432303673e-05, + "log_odds_chosen": 9.052240371704102, + "log_odds_ratio": -0.00023398856865242124, + "logits/chosen": -0.4382186830043793, + "logits/rejected": -0.4656783640384674, + "logps/chosen": -0.0004539778456091881, + "logps/rejected": -1.42952561378479, + "loss": 0.5459, + "nll_loss": 0.13645029067993164, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.539778456091881e-05, + "rewards/margins": 0.14290717244148254, + "rewards/rejected": -0.142952561378479, + "step": 9652 + }, + { + "epoch": 6.675656984785616, + "grad_norm": 11.757040023803711, + "learning_rate": 1.8468572306746583e-05, + "log_odds_chosen": 11.986181259155273, + "log_odds_ratio": -2.6844723834074102e-05, + "logits/chosen": -0.5267226696014404, + "logits/rejected": -0.6199319958686829, + "logps/chosen": -0.0001811894035199657, + "logps/rejected": -3.14363956451416, + "loss": 0.5525, + "nll_loss": 0.1381237804889679, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.811894071579445e-05, + "rewards/margins": 0.3143458366394043, + "rewards/rejected": -0.314363956451416, + "step": 9653 + }, + { + "epoch": 6.676348547717843, + "grad_norm": 13.25656509399414, + "learning_rate": 1.8464730290456432e-05, + "log_odds_chosen": 10.751134872436523, + "log_odds_ratio": -7.180450484156609e-05, + "logits/chosen": -0.4260287582874298, + "logits/rejected": -0.6113356351852417, + "logps/chosen": -0.00017347175162285566, + "logps/rejected": -1.92714524269104, + "loss": 0.8281, + "nll_loss": 0.20700892806053162, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7347174434689805e-05, + "rewards/margins": 0.1926971673965454, + "rewards/rejected": -0.19271452724933624, + "step": 9654 + }, + { + "epoch": 6.677040110650069, + "grad_norm": 5.646144866943359, + "learning_rate": 1.846088827416628e-05, + "log_odds_chosen": 10.25471019744873, + "log_odds_ratio": -0.0004184903227724135, + "logits/chosen": -0.3453481197357178, + "logits/rejected": -0.44830867648124695, + "logps/chosen": -0.0012309765443205833, + "logps/rejected": -1.7108335494995117, + "loss": 0.3654, + "nll_loss": 0.09130599349737167, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012309766316320747, + "rewards/margins": 0.17096027731895447, + "rewards/rejected": -0.17108336091041565, + "step": 9655 + }, + { + "epoch": 6.677731673582296, + "grad_norm": 6.999812126159668, + "learning_rate": 1.8457046257876134e-05, + "log_odds_chosen": 10.144024848937988, + "log_odds_ratio": -9.134892752626911e-05, + "logits/chosen": -0.5624509453773499, + "logits/rejected": -0.5673438310623169, + "logps/chosen": -0.0011014851043000817, + "logps/rejected": -2.104128360748291, + "loss": 0.4618, + "nll_loss": 0.11543923616409302, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011014851043000817, + "rewards/margins": 0.2103026956319809, + "rewards/rejected": -0.21041283011436462, + "step": 9656 + }, + { + "epoch": 6.678423236514523, + "grad_norm": 6.134801864624023, + "learning_rate": 1.8453204241585986e-05, + "log_odds_chosen": 10.598881721496582, + "log_odds_ratio": -6.408988701878116e-05, + "logits/chosen": -0.27690455317497253, + "logits/rejected": -0.27721768617630005, + "logps/chosen": -0.00022582666133530438, + "logps/rejected": -1.9214727878570557, + "loss": 0.6385, + "nll_loss": 0.15961936116218567, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2582664314541034e-05, + "rewards/margins": 0.19212469458580017, + "rewards/rejected": -0.19214728474617004, + "step": 9657 + }, + { + "epoch": 6.67911479944675, + "grad_norm": 12.72281551361084, + "learning_rate": 1.8449362225295835e-05, + "log_odds_chosen": 11.956278800964355, + "log_odds_ratio": -1.5963418263709173e-05, + "logits/chosen": -0.5332290530204773, + "logits/rejected": -0.5421327948570251, + "logps/chosen": -0.0003582322970032692, + "logps/rejected": -3.2543258666992188, + "loss": 0.4236, + "nll_loss": 0.10589415580034256, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5823231883114204e-05, + "rewards/margins": 0.3253967761993408, + "rewards/rejected": -0.32543259859085083, + "step": 9658 + }, + { + "epoch": 6.679806362378977, + "grad_norm": 5.651429653167725, + "learning_rate": 1.8445520209005688e-05, + "log_odds_chosen": 9.974549293518066, + "log_odds_ratio": -0.00014403206296265125, + "logits/chosen": -0.0029746294021606445, + "logits/rejected": 0.003281906247138977, + "logps/chosen": -0.001990691991522908, + "logps/rejected": -1.8177213668823242, + "loss": 0.6811, + "nll_loss": 0.17025278508663177, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019906919624190778, + "rewards/margins": 0.1815730780363083, + "rewards/rejected": -0.1817721426486969, + "step": 9659 + }, + { + "epoch": 6.680497925311204, + "grad_norm": 5.676749229431152, + "learning_rate": 1.844167819271554e-05, + "log_odds_chosen": 10.579449653625488, + "log_odds_ratio": -5.1365925173740834e-05, + "logits/chosen": -0.868455708026886, + "logits/rejected": -0.7236647009849548, + "logps/chosen": -0.00017911390750668943, + "logps/rejected": -1.8537935018539429, + "loss": 0.8948, + "nll_loss": 0.223703533411026, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7911390386871062e-05, + "rewards/margins": 0.18536144495010376, + "rewards/rejected": -0.18537934124469757, + "step": 9660 + }, + { + "epoch": 6.68118948824343, + "grad_norm": 7.042651176452637, + "learning_rate": 1.843783617642539e-05, + "log_odds_chosen": 10.082916259765625, + "log_odds_ratio": -0.000205668417038396, + "logits/chosen": -0.4950271248817444, + "logits/rejected": -0.5898337364196777, + "logps/chosen": -0.0004197222297079861, + "logps/rejected": -1.7821499109268188, + "loss": 0.5259, + "nll_loss": 0.13145092129707336, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.197222733637318e-05, + "rewards/margins": 0.1781730204820633, + "rewards/rejected": -0.17821499705314636, + "step": 9661 + }, + { + "epoch": 6.681881051175657, + "grad_norm": 6.74050235748291, + "learning_rate": 1.843399416013524e-05, + "log_odds_chosen": 10.639854431152344, + "log_odds_ratio": -7.393736450467259e-05, + "logits/chosen": -0.6220520734786987, + "logits/rejected": -0.6198940873146057, + "logps/chosen": -0.00022765059839002788, + "logps/rejected": -1.93635892868042, + "loss": 0.437, + "nll_loss": 0.10924112796783447, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.276506165799219e-05, + "rewards/margins": 0.19361314177513123, + "rewards/rejected": -0.19363591074943542, + "step": 9662 + }, + { + "epoch": 6.682572614107884, + "grad_norm": 4.9354329109191895, + "learning_rate": 1.843015214384509e-05, + "log_odds_chosen": 9.169984817504883, + "log_odds_ratio": -0.0003960762987844646, + "logits/chosen": -0.4277748167514801, + "logits/rejected": -0.4469650089740753, + "logps/chosen": -0.0004151844186708331, + "logps/rejected": -1.3824002742767334, + "loss": 0.4212, + "nll_loss": 0.10525692999362946, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1518444049870595e-05, + "rewards/margins": 0.13819852471351624, + "rewards/rejected": -0.1382400393486023, + "step": 9663 + }, + { + "epoch": 6.683264177040111, + "grad_norm": 4.044813632965088, + "learning_rate": 1.842631012755494e-05, + "log_odds_chosen": 10.278279304504395, + "log_odds_ratio": -0.0003889031650032848, + "logits/chosen": -0.4868510365486145, + "logits/rejected": -0.5194868445396423, + "logps/chosen": -0.0002763352240435779, + "logps/rejected": -1.5866440534591675, + "loss": 0.4056, + "nll_loss": 0.10136575251817703, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7633523131953552e-05, + "rewards/margins": 0.15863677859306335, + "rewards/rejected": -0.15866440534591675, + "step": 9664 + }, + { + "epoch": 6.683955739972338, + "grad_norm": 5.9982733726501465, + "learning_rate": 1.8422468111264792e-05, + "log_odds_chosen": 11.98559856414795, + "log_odds_ratio": -1.5875868484727107e-05, + "logits/chosen": -0.4334144592285156, + "logits/rejected": -0.5116381049156189, + "logps/chosen": -0.00015297062054742128, + "logps/rejected": -2.601205348968506, + "loss": 0.3902, + "nll_loss": 0.09754490852355957, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5297060599550605e-05, + "rewards/margins": 0.2601052522659302, + "rewards/rejected": -0.26012054085731506, + "step": 9665 + }, + { + "epoch": 6.6846473029045645, + "grad_norm": 5.988363265991211, + "learning_rate": 1.8418626094974645e-05, + "log_odds_chosen": 10.546000480651855, + "log_odds_ratio": -9.494496771367267e-05, + "logits/chosen": -0.5550673007965088, + "logits/rejected": -0.6216533184051514, + "logps/chosen": -0.0005368262063711882, + "logps/rejected": -1.8109443187713623, + "loss": 0.5025, + "nll_loss": 0.12562590837478638, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.368262645788491e-05, + "rewards/margins": 0.18104076385498047, + "rewards/rejected": -0.1810944527387619, + "step": 9666 + }, + { + "epoch": 6.685338865836791, + "grad_norm": 5.435577869415283, + "learning_rate": 1.8414784078684494e-05, + "log_odds_chosen": 10.510980606079102, + "log_odds_ratio": -0.00018327782163396478, + "logits/chosen": -0.2902176082134247, + "logits/rejected": -0.42554375529289246, + "logps/chosen": -0.0003930656239390373, + "logps/rejected": -2.0370352268218994, + "loss": 0.5285, + "nll_loss": 0.13210366666316986, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9306563849095255e-05, + "rewards/margins": 0.20366422832012177, + "rewards/rejected": -0.20370353758335114, + "step": 9667 + }, + { + "epoch": 6.686030428769018, + "grad_norm": 10.245564460754395, + "learning_rate": 1.8410942062394346e-05, + "log_odds_chosen": 10.817817687988281, + "log_odds_ratio": -4.786135832546279e-05, + "logits/chosen": -0.3151760697364807, + "logits/rejected": -0.2521878480911255, + "logps/chosen": -0.00020681676687672734, + "logps/rejected": -2.006657361984253, + "loss": 0.7619, + "nll_loss": 0.19047296047210693, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0681678506662138e-05, + "rewards/margins": 0.2006450593471527, + "rewards/rejected": -0.20066574215888977, + "step": 9668 + }, + { + "epoch": 6.686721991701245, + "grad_norm": 13.401941299438477, + "learning_rate": 1.84071000461042e-05, + "log_odds_chosen": 10.34614372253418, + "log_odds_ratio": -0.00021202873904258013, + "logits/chosen": -0.23712968826293945, + "logits/rejected": -0.26364678144454956, + "logps/chosen": -0.0003467551141511649, + "logps/rejected": -1.8067233562469482, + "loss": 0.6057, + "nll_loss": 0.15139594674110413, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.467550777713768e-05, + "rewards/margins": 0.1806376576423645, + "rewards/rejected": -0.18067234754562378, + "step": 9669 + }, + { + "epoch": 6.687413554633472, + "grad_norm": 5.781926155090332, + "learning_rate": 1.8403258029814048e-05, + "log_odds_chosen": 11.405348777770996, + "log_odds_ratio": -0.0004130418528802693, + "logits/chosen": -0.27188578248023987, + "logits/rejected": -0.3051159977912903, + "logps/chosen": -0.0005597221315838397, + "logps/rejected": -2.6814310550689697, + "loss": 0.5772, + "nll_loss": 0.1442687064409256, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.597221388597973e-05, + "rewards/margins": 0.26808711886405945, + "rewards/rejected": -0.2681431174278259, + "step": 9670 + }, + { + "epoch": 6.688105117565699, + "grad_norm": 10.355371475219727, + "learning_rate": 1.83994160135239e-05, + "log_odds_chosen": 9.402498245239258, + "log_odds_ratio": -0.00031604303512722254, + "logits/chosen": -0.7214362621307373, + "logits/rejected": -0.8196225166320801, + "logps/chosen": -0.0010002891067415476, + "logps/rejected": -1.4246968030929565, + "loss": 0.6271, + "nll_loss": 0.15675143897533417, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001000289194053039, + "rewards/margins": 0.14236965775489807, + "rewards/rejected": -0.14246967434883118, + "step": 9671 + }, + { + "epoch": 6.6887966804979255, + "grad_norm": 4.973082542419434, + "learning_rate": 1.839557399723375e-05, + "log_odds_chosen": 10.832221984863281, + "log_odds_ratio": -4.954859832650982e-05, + "logits/chosen": -0.6315814256668091, + "logits/rejected": -0.6772823333740234, + "logps/chosen": -0.0002705375081859529, + "logps/rejected": -2.3966708183288574, + "loss": 0.501, + "nll_loss": 0.12524764239788055, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.705375118239317e-05, + "rewards/margins": 0.2396400272846222, + "rewards/rejected": -0.23966708779335022, + "step": 9672 + }, + { + "epoch": 6.689488243430152, + "grad_norm": 5.241406440734863, + "learning_rate": 1.8391731980943598e-05, + "log_odds_chosen": 10.412388801574707, + "log_odds_ratio": -0.0001051185536198318, + "logits/chosen": -0.6095589995384216, + "logits/rejected": -0.5799497365951538, + "logps/chosen": -0.0005825125845149159, + "logps/rejected": -1.8753221035003662, + "loss": 0.4108, + "nll_loss": 0.10269524902105331, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.825126572744921e-05, + "rewards/margins": 0.18747395277023315, + "rewards/rejected": -0.1875322163105011, + "step": 9673 + }, + { + "epoch": 6.690179806362379, + "grad_norm": 7.175179958343506, + "learning_rate": 1.838788996465345e-05, + "log_odds_chosen": 9.725004196166992, + "log_odds_ratio": -0.00036285031819716096, + "logits/chosen": -0.5444189310073853, + "logits/rejected": -0.5555685758590698, + "logps/chosen": -0.001297255977988243, + "logps/rejected": -1.8658530712127686, + "loss": 0.6532, + "nll_loss": 0.16325727105140686, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012972559488844126, + "rewards/margins": 0.18645557761192322, + "rewards/rejected": -0.18658530712127686, + "step": 9674 + }, + { + "epoch": 6.690871369294606, + "grad_norm": 10.508170127868652, + "learning_rate": 1.8384047948363303e-05, + "log_odds_chosen": 11.734052658081055, + "log_odds_ratio": -1.1746539712476078e-05, + "logits/chosen": -0.3542497456073761, + "logits/rejected": -0.44388705492019653, + "logps/chosen": -7.884105434641242e-05, + "logps/rejected": -2.326277494430542, + "loss": 0.6263, + "nll_loss": 0.1565825343132019, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.884104888944421e-06, + "rewards/margins": 0.23261988162994385, + "rewards/rejected": -0.2326277494430542, + "step": 9675 + }, + { + "epoch": 6.691562932226833, + "grad_norm": 11.687735557556152, + "learning_rate": 1.8380205932073152e-05, + "log_odds_chosen": 10.39188289642334, + "log_odds_ratio": -9.057446732185781e-05, + "logits/chosen": -0.1264955699443817, + "logits/rejected": -0.32701337337493896, + "logps/chosen": -0.0002603328903205693, + "logps/rejected": -1.843508005142212, + "loss": 0.6477, + "nll_loss": 0.1619156301021576, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.603329085104633e-05, + "rewards/margins": 0.18432477116584778, + "rewards/rejected": -0.18435078859329224, + "step": 9676 + }, + { + "epoch": 6.69225449515906, + "grad_norm": 6.140292167663574, + "learning_rate": 1.8376363915783004e-05, + "log_odds_chosen": 10.866915702819824, + "log_odds_ratio": -7.089345308486372e-05, + "logits/chosen": -0.295518696308136, + "logits/rejected": -0.42706912755966187, + "logps/chosen": -0.00022815105330664665, + "logps/rejected": -2.186887264251709, + "loss": 0.4853, + "nll_loss": 0.12131841480731964, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.281510387547314e-05, + "rewards/margins": 0.21866591274738312, + "rewards/rejected": -0.2186887264251709, + "step": 9677 + }, + { + "epoch": 6.6929460580912865, + "grad_norm": 4.763021469116211, + "learning_rate": 1.8372521899492857e-05, + "log_odds_chosen": 11.162002563476562, + "log_odds_ratio": -7.944177195895463e-05, + "logits/chosen": -0.5970376133918762, + "logits/rejected": -0.6784569621086121, + "logps/chosen": -0.0002177559508709237, + "logps/rejected": -2.3210833072662354, + "loss": 0.4976, + "nll_loss": 0.12439815700054169, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.177559508709237e-05, + "rewards/margins": 0.23208656907081604, + "rewards/rejected": -0.23210833966732025, + "step": 9678 + }, + { + "epoch": 6.693637621023513, + "grad_norm": 7.053638935089111, + "learning_rate": 1.8368679883202706e-05, + "log_odds_chosen": 10.3631591796875, + "log_odds_ratio": -0.004077597986906767, + "logits/chosen": 0.01545802503824234, + "logits/rejected": -0.013516820967197418, + "logps/chosen": -0.0012991420226171613, + "logps/rejected": -2.3810198307037354, + "loss": 1.1509, + "nll_loss": 0.28730881214141846, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012991421681363136, + "rewards/margins": 0.23797208070755005, + "rewards/rejected": -0.238101989030838, + "step": 9679 + }, + { + "epoch": 6.69432918395574, + "grad_norm": 4.983798503875732, + "learning_rate": 1.836483786691256e-05, + "log_odds_chosen": 10.06801986694336, + "log_odds_ratio": -0.0001003733414108865, + "logits/chosen": -0.10567940026521683, + "logits/rejected": -0.22135068476200104, + "logps/chosen": -0.00038139085518196225, + "logps/rejected": -1.6341947317123413, + "loss": 0.5966, + "nll_loss": 0.1491512656211853, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.813908188021742e-05, + "rewards/margins": 0.16338133811950684, + "rewards/rejected": -0.1634194552898407, + "step": 9680 + }, + { + "epoch": 6.695020746887967, + "grad_norm": 5.632517337799072, + "learning_rate": 1.8360995850622407e-05, + "log_odds_chosen": 10.997325897216797, + "log_odds_ratio": -5.951305865892209e-05, + "logits/chosen": -0.24782253801822662, + "logits/rejected": -0.27457350492477417, + "logps/chosen": -0.0007469297270290554, + "logps/rejected": -2.7543869018554688, + "loss": 0.8587, + "nll_loss": 0.21465857326984406, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.469296542694792e-05, + "rewards/margins": 0.2753640115261078, + "rewards/rejected": -0.27543869614601135, + "step": 9681 + }, + { + "epoch": 6.695712309820194, + "grad_norm": 6.578545093536377, + "learning_rate": 1.8357153834332257e-05, + "log_odds_chosen": 10.860370635986328, + "log_odds_ratio": -9.661043441155925e-05, + "logits/chosen": -0.47947752475738525, + "logits/rejected": -0.4665643274784088, + "logps/chosen": -0.00026218523271381855, + "logps/rejected": -2.3484091758728027, + "loss": 0.5463, + "nll_loss": 0.13655605912208557, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.621852581796702e-05, + "rewards/margins": 0.23481470346450806, + "rewards/rejected": -0.23484092950820923, + "step": 9682 + }, + { + "epoch": 6.696403872752421, + "grad_norm": 3.9766275882720947, + "learning_rate": 1.835331181804211e-05, + "log_odds_chosen": 11.27324390411377, + "log_odds_ratio": -0.0010711115319281816, + "logits/chosen": -0.12841787934303284, + "logits/rejected": -0.22983276844024658, + "logps/chosen": -0.001020747353322804, + "logps/rejected": -2.9668526649475098, + "loss": 0.4289, + "nll_loss": 0.10712532699108124, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001020747295115143, + "rewards/margins": 0.2965832054615021, + "rewards/rejected": -0.29668527841567993, + "step": 9683 + }, + { + "epoch": 6.6970954356846475, + "grad_norm": 5.182412147521973, + "learning_rate": 1.8349469801751958e-05, + "log_odds_chosen": 10.569929122924805, + "log_odds_ratio": -0.00021206194651313126, + "logits/chosen": -0.38122642040252686, + "logits/rejected": -0.48790067434310913, + "logps/chosen": -0.000496427237521857, + "logps/rejected": -2.254833936691284, + "loss": 0.7962, + "nll_loss": 0.19904069602489471, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.96427237521857e-05, + "rewards/margins": 0.22543376684188843, + "rewards/rejected": -0.22548341751098633, + "step": 9684 + }, + { + "epoch": 6.697786998616874, + "grad_norm": 4.416690349578857, + "learning_rate": 1.834562778546181e-05, + "log_odds_chosen": 10.155586242675781, + "log_odds_ratio": -6.861994916107506e-05, + "logits/chosen": -0.21608369052410126, + "logits/rejected": -0.2599760890007019, + "logps/chosen": -0.00011967943282797933, + "logps/rejected": -1.2984521389007568, + "loss": 0.3751, + "nll_loss": 0.09377313405275345, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1967944374191575e-05, + "rewards/margins": 0.12983325123786926, + "rewards/rejected": -0.12984521687030792, + "step": 9685 + }, + { + "epoch": 6.698478561549101, + "grad_norm": 8.809900283813477, + "learning_rate": 1.8341785769171663e-05, + "log_odds_chosen": 11.358308792114258, + "log_odds_ratio": -3.5189397749491036e-05, + "logits/chosen": -0.5408127307891846, + "logits/rejected": -0.5565197467803955, + "logps/chosen": -0.0002709012187551707, + "logps/rejected": -2.950636625289917, + "loss": 0.5811, + "nll_loss": 0.14526110887527466, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7090123694506474e-05, + "rewards/margins": 0.29503658413887024, + "rewards/rejected": -0.29506367444992065, + "step": 9686 + }, + { + "epoch": 6.699170124481328, + "grad_norm": 4.948386192321777, + "learning_rate": 1.8337943752881512e-05, + "log_odds_chosen": 10.860937118530273, + "log_odds_ratio": -2.90093357762089e-05, + "logits/chosen": -0.7190711498260498, + "logits/rejected": -0.7402241230010986, + "logps/chosen": -0.00014132638170849532, + "logps/rejected": -1.7408219575881958, + "loss": 0.5168, + "nll_loss": 0.1291975975036621, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.413263726135483e-05, + "rewards/margins": 0.17406806349754333, + "rewards/rejected": -0.1740821897983551, + "step": 9687 + }, + { + "epoch": 6.699861687413555, + "grad_norm": 12.373002052307129, + "learning_rate": 1.8334101736591364e-05, + "log_odds_chosen": 9.979798316955566, + "log_odds_ratio": -0.0001685578899923712, + "logits/chosen": -0.5494964122772217, + "logits/rejected": -0.6202380657196045, + "logps/chosen": -0.0005979561246931553, + "logps/rejected": -1.987154483795166, + "loss": 0.4654, + "nll_loss": 0.11633975803852081, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.979560955893248e-05, + "rewards/margins": 0.1986556500196457, + "rewards/rejected": -0.1987154632806778, + "step": 9688 + }, + { + "epoch": 6.700553250345782, + "grad_norm": 9.903979301452637, + "learning_rate": 1.8330259720301217e-05, + "log_odds_chosen": 11.164133071899414, + "log_odds_ratio": -0.0001408745301887393, + "logits/chosen": -0.4057766795158386, + "logits/rejected": -0.4818309545516968, + "logps/chosen": -0.00046957345330156386, + "logps/rejected": -2.297665596008301, + "loss": 0.72, + "nll_loss": 0.1799810528755188, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.695734241977334e-05, + "rewards/margins": 0.22971957921981812, + "rewards/rejected": -0.22976654767990112, + "step": 9689 + }, + { + "epoch": 6.7012448132780085, + "grad_norm": 5.544249534606934, + "learning_rate": 1.8326417704011066e-05, + "log_odds_chosen": 10.337028503417969, + "log_odds_ratio": -7.314958929782733e-05, + "logits/chosen": -0.5487796068191528, + "logits/rejected": -0.6383087635040283, + "logps/chosen": -0.0005572509253397584, + "logps/rejected": -2.5427985191345215, + "loss": 0.5788, + "nll_loss": 0.14469903707504272, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.572508234763518e-05, + "rewards/margins": 0.25422412157058716, + "rewards/rejected": -0.25427988171577454, + "step": 9690 + }, + { + "epoch": 6.701936376210235, + "grad_norm": 3.8700461387634277, + "learning_rate": 1.8322575687720915e-05, + "log_odds_chosen": 10.711881637573242, + "log_odds_ratio": -5.9012105339206755e-05, + "logits/chosen": -0.5717523694038391, + "logits/rejected": -0.5392295122146606, + "logps/chosen": -0.0002523858565837145, + "logps/rejected": -2.2547340393066406, + "loss": 0.5199, + "nll_loss": 0.12995873391628265, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.523858711356297e-05, + "rewards/margins": 0.22544819116592407, + "rewards/rejected": -0.22547343373298645, + "step": 9691 + }, + { + "epoch": 6.702627939142462, + "grad_norm": 6.470229148864746, + "learning_rate": 1.8318733671430767e-05, + "log_odds_chosen": 11.414847373962402, + "log_odds_ratio": -4.2619489249773324e-05, + "logits/chosen": -0.591145396232605, + "logits/rejected": -0.6597984433174133, + "logps/chosen": -0.00014959601685404778, + "logps/rejected": -2.2852067947387695, + "loss": 0.3878, + "nll_loss": 0.09693406522274017, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4959600775910076e-05, + "rewards/margins": 0.22850573062896729, + "rewards/rejected": -0.2285206913948059, + "step": 9692 + }, + { + "epoch": 6.703319502074689, + "grad_norm": 5.7831950187683105, + "learning_rate": 1.8314891655140616e-05, + "log_odds_chosen": 9.764205932617188, + "log_odds_ratio": -0.00062859698664397, + "logits/chosen": -0.5247955918312073, + "logits/rejected": -0.4551923871040344, + "logps/chosen": -0.001059068599715829, + "logps/rejected": -1.63079035282135, + "loss": 0.6319, + "nll_loss": 0.15792065858840942, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010590685269562528, + "rewards/margins": 0.16297312080860138, + "rewards/rejected": -0.16307902336120605, + "step": 9693 + }, + { + "epoch": 6.704011065006916, + "grad_norm": 5.505740642547607, + "learning_rate": 1.831104963885047e-05, + "log_odds_chosen": 10.843637466430664, + "log_odds_ratio": -8.815214096102864e-05, + "logits/chosen": -0.569132924079895, + "logits/rejected": -0.7134794592857361, + "logps/chosen": -0.00018152498523704708, + "logps/rejected": -2.1765732765197754, + "loss": 0.8629, + "nll_loss": 0.21571555733680725, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.815249925130047e-05, + "rewards/margins": 0.21763917803764343, + "rewards/rejected": -0.21765734255313873, + "step": 9694 + }, + { + "epoch": 6.704702627939143, + "grad_norm": 4.550797939300537, + "learning_rate": 1.830720762256032e-05, + "log_odds_chosen": 10.1773681640625, + "log_odds_ratio": -0.00012865892495028675, + "logits/chosen": -0.46741408109664917, + "logits/rejected": -0.44873249530792236, + "logps/chosen": -0.000390806351788342, + "logps/rejected": -1.9166791439056396, + "loss": 0.6391, + "nll_loss": 0.15975472331047058, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.908063445123844e-05, + "rewards/margins": 0.1916288435459137, + "rewards/rejected": -0.19166792929172516, + "step": 9695 + }, + { + "epoch": 6.7053941908713695, + "grad_norm": 6.874147891998291, + "learning_rate": 1.830336560627017e-05, + "log_odds_chosen": 9.434051513671875, + "log_odds_ratio": -0.0004302481247577816, + "logits/chosen": -0.34089720249176025, + "logits/rejected": -0.3749096989631653, + "logps/chosen": -0.0005576977273449302, + "logps/rejected": -1.6736546754837036, + "loss": 0.6974, + "nll_loss": 0.17429561913013458, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5769771279301494e-05, + "rewards/margins": 0.1673096865415573, + "rewards/rejected": -0.1673654466867447, + "step": 9696 + }, + { + "epoch": 6.706085753803596, + "grad_norm": 5.495350360870361, + "learning_rate": 1.8299523589980023e-05, + "log_odds_chosen": 10.23301887512207, + "log_odds_ratio": -0.00018941161397378892, + "logits/chosen": 0.054965417832136154, + "logits/rejected": -0.0001494288444519043, + "logps/chosen": -0.0002769737329799682, + "logps/rejected": -1.891705870628357, + "loss": 0.7117, + "nll_loss": 0.17790867388248444, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.769737329799682e-05, + "rewards/margins": 0.18914291262626648, + "rewards/rejected": -0.18917059898376465, + "step": 9697 + }, + { + "epoch": 6.706777316735823, + "grad_norm": 6.182698726654053, + "learning_rate": 1.8295681573689875e-05, + "log_odds_chosen": 10.451370239257812, + "log_odds_ratio": -0.0012464377796277404, + "logits/chosen": -0.6633809804916382, + "logits/rejected": -0.69439297914505, + "logps/chosen": -0.001036527450196445, + "logps/rejected": -2.798448085784912, + "loss": 0.8142, + "nll_loss": 0.20343345403671265, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010365273919887841, + "rewards/margins": 0.27974116802215576, + "rewards/rejected": -0.27984482049942017, + "step": 9698 + }, + { + "epoch": 6.70746887966805, + "grad_norm": 6.854715824127197, + "learning_rate": 1.8291839557399724e-05, + "log_odds_chosen": 10.501351356506348, + "log_odds_ratio": -9.871691872831434e-05, + "logits/chosen": -0.4957367777824402, + "logits/rejected": -0.5315590500831604, + "logps/chosen": -0.0001666514144744724, + "logps/rejected": -1.935477614402771, + "loss": 0.7584, + "nll_loss": 0.1895884871482849, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.666514071985148e-05, + "rewards/margins": 0.1935310959815979, + "rewards/rejected": -0.19354775547981262, + "step": 9699 + }, + { + "epoch": 6.708160442600277, + "grad_norm": 6.463924407958984, + "learning_rate": 1.8287997541109573e-05, + "log_odds_chosen": 10.318857192993164, + "log_odds_ratio": -0.0005355229368433356, + "logits/chosen": -0.5634887218475342, + "logits/rejected": -0.6088401079177856, + "logps/chosen": -0.0005697443266399205, + "logps/rejected": -2.081566572189331, + "loss": 0.5238, + "nll_loss": 0.13088880479335785, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6974429753609e-05, + "rewards/margins": 0.20809967815876007, + "rewards/rejected": -0.20815666019916534, + "step": 9700 + }, + { + "epoch": 6.708852005532504, + "grad_norm": 4.38247537612915, + "learning_rate": 1.8284155524819426e-05, + "log_odds_chosen": 10.187275886535645, + "log_odds_ratio": -0.00047190545592457056, + "logits/chosen": -0.6247849464416504, + "logits/rejected": -0.6580078601837158, + "logps/chosen": -0.0009922175668179989, + "logps/rejected": -1.968461513519287, + "loss": 0.5271, + "nll_loss": 0.13173647224903107, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.922177559928969e-05, + "rewards/margins": 0.19674694538116455, + "rewards/rejected": -0.1968461573123932, + "step": 9701 + }, + { + "epoch": 6.70954356846473, + "grad_norm": 11.628179550170898, + "learning_rate": 1.8280313508529275e-05, + "log_odds_chosen": 10.961841583251953, + "log_odds_ratio": -0.00017139659030362964, + "logits/chosen": -0.5121583938598633, + "logits/rejected": -0.5763285160064697, + "logps/chosen": -0.00012023936869809404, + "logps/rejected": -2.096275568008423, + "loss": 0.6286, + "nll_loss": 0.15714293718338013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2023936506011523e-05, + "rewards/margins": 0.20961549878120422, + "rewards/rejected": -0.20962753891944885, + "step": 9702 + }, + { + "epoch": 6.710235131396957, + "grad_norm": 5.260577201843262, + "learning_rate": 1.8276471492239127e-05, + "log_odds_chosen": 9.537765502929688, + "log_odds_ratio": -0.00026477783103473485, + "logits/chosen": -0.22354461252689362, + "logits/rejected": -0.23986151814460754, + "logps/chosen": -0.0009489476797170937, + "logps/rejected": -1.7701373100280762, + "loss": 0.6928, + "nll_loss": 0.17317166924476624, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.489477088209242e-05, + "rewards/margins": 0.17691883444786072, + "rewards/rejected": -0.17701373994350433, + "step": 9703 + }, + { + "epoch": 6.710926694329184, + "grad_norm": 6.721794605255127, + "learning_rate": 1.827262947594898e-05, + "log_odds_chosen": 10.314403533935547, + "log_odds_ratio": -0.00021236721659079194, + "logits/chosen": -0.23889127373695374, + "logits/rejected": -0.44527435302734375, + "logps/chosen": -0.0008492742199450731, + "logps/rejected": -2.0169315338134766, + "loss": 0.4235, + "nll_loss": 0.1058477908372879, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.492742927046493e-05, + "rewards/margins": 0.20160824060440063, + "rewards/rejected": -0.20169317722320557, + "step": 9704 + }, + { + "epoch": 6.711618257261411, + "grad_norm": 4.631369113922119, + "learning_rate": 1.826878745965883e-05, + "log_odds_chosen": 11.75677490234375, + "log_odds_ratio": -5.7359426136827096e-05, + "logits/chosen": -0.39702045917510986, + "logits/rejected": -0.4525296092033386, + "logps/chosen": -0.0002216920693172142, + "logps/rejected": -2.8050849437713623, + "loss": 0.6856, + "nll_loss": 0.17140543460845947, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.216920802311506e-05, + "rewards/margins": 0.2804863452911377, + "rewards/rejected": -0.28050851821899414, + "step": 9705 + }, + { + "epoch": 6.712309820193638, + "grad_norm": 6.538344383239746, + "learning_rate": 1.826494544336868e-05, + "log_odds_chosen": 10.887186050415039, + "log_odds_ratio": -3.279655720689334e-05, + "logits/chosen": 0.09093475341796875, + "logits/rejected": -0.08036897331476212, + "logps/chosen": -0.0012253046734258533, + "logps/rejected": -2.9093241691589355, + "loss": 0.7538, + "nll_loss": 0.18844729661941528, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012253047316335142, + "rewards/margins": 0.29080986976623535, + "rewards/rejected": -0.29093241691589355, + "step": 9706 + }, + { + "epoch": 6.713001383125865, + "grad_norm": 6.628562927246094, + "learning_rate": 1.8261103427078534e-05, + "log_odds_chosen": 10.301244735717773, + "log_odds_ratio": -0.00011587039625737816, + "logits/chosen": -0.19004501402378082, + "logits/rejected": -0.1924629509449005, + "logps/chosen": -0.0005146628245711327, + "logps/rejected": -2.369720458984375, + "loss": 0.7877, + "nll_loss": 0.1969122737646103, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1466282457113266e-05, + "rewards/margins": 0.23692059516906738, + "rewards/rejected": -0.23697204887866974, + "step": 9707 + }, + { + "epoch": 6.713692946058091, + "grad_norm": 5.522481918334961, + "learning_rate": 1.8257261410788383e-05, + "log_odds_chosen": 11.290914535522461, + "log_odds_ratio": -1.9673158021760173e-05, + "logits/chosen": -0.5909338593482971, + "logits/rejected": -0.6225830912590027, + "logps/chosen": -0.00017350060807075351, + "logps/rejected": -2.613370180130005, + "loss": 0.8707, + "nll_loss": 0.21768240630626678, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7350061170873232e-05, + "rewards/margins": 0.26131969690322876, + "rewards/rejected": -0.2613370418548584, + "step": 9708 + }, + { + "epoch": 6.714384508990318, + "grad_norm": 5.048619747161865, + "learning_rate": 1.8253419394498232e-05, + "log_odds_chosen": 11.536866188049316, + "log_odds_ratio": -8.987126784631982e-05, + "logits/chosen": -0.37755972146987915, + "logits/rejected": -0.4132126271724701, + "logps/chosen": -0.00022529861598741263, + "logps/rejected": -3.207831382751465, + "loss": 0.64, + "nll_loss": 0.15998563170433044, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2529860871145502e-05, + "rewards/margins": 0.3207606077194214, + "rewards/rejected": -0.3207831382751465, + "step": 9709 + }, + { + "epoch": 6.715076071922545, + "grad_norm": 15.114882469177246, + "learning_rate": 1.8249577378208084e-05, + "log_odds_chosen": 11.053642272949219, + "log_odds_ratio": -3.2246993214357644e-05, + "logits/chosen": -0.8833234310150146, + "logits/rejected": -0.9113695025444031, + "logps/chosen": -7.948539132485166e-05, + "logps/rejected": -1.6513739824295044, + "loss": 0.49, + "nll_loss": 0.12250618636608124, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.948539860080928e-06, + "rewards/margins": 0.16512946784496307, + "rewards/rejected": -0.1651374101638794, + "step": 9710 + }, + { + "epoch": 6.715767634854772, + "grad_norm": 20.863941192626953, + "learning_rate": 1.8245735361917933e-05, + "log_odds_chosen": 10.994382858276367, + "log_odds_ratio": -0.00030419373069889843, + "logits/chosen": -0.337488055229187, + "logits/rejected": -0.3565511703491211, + "logps/chosen": -0.0006342109409160912, + "logps/rejected": -2.678623676300049, + "loss": 0.614, + "nll_loss": 0.15347182750701904, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.342109554680064e-05, + "rewards/margins": 0.2677989602088928, + "rewards/rejected": -0.26786237955093384, + "step": 9711 + }, + { + "epoch": 6.716459197786999, + "grad_norm": 11.249113082885742, + "learning_rate": 1.8241893345627786e-05, + "log_odds_chosen": 11.143560409545898, + "log_odds_ratio": -5.740381675423123e-05, + "logits/chosen": -0.40606385469436646, + "logits/rejected": -0.4958600699901581, + "logps/chosen": -0.00015600050392095, + "logps/rejected": -2.114410400390625, + "loss": 0.4894, + "nll_loss": 0.12235406786203384, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.560005148348864e-05, + "rewards/margins": 0.21142543852329254, + "rewards/rejected": -0.2114410549402237, + "step": 9712 + }, + { + "epoch": 6.717150760719226, + "grad_norm": 7.461971759796143, + "learning_rate": 1.8238051329337638e-05, + "log_odds_chosen": 10.837980270385742, + "log_odds_ratio": -2.7545340344659053e-05, + "logits/chosen": -0.575896143913269, + "logits/rejected": -0.6402074098587036, + "logps/chosen": -0.00017790490528568625, + "logps/rejected": -1.9333128929138184, + "loss": 0.6003, + "nll_loss": 0.1500811129808426, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7790491256164387e-05, + "rewards/margins": 0.19331350922584534, + "rewards/rejected": -0.1933312863111496, + "step": 9713 + }, + { + "epoch": 6.717842323651452, + "grad_norm": 10.962932586669922, + "learning_rate": 1.8234209313047487e-05, + "log_odds_chosen": 11.474613189697266, + "log_odds_ratio": -0.0001845585647970438, + "logits/chosen": -0.4879600405693054, + "logits/rejected": -0.56697678565979, + "logps/chosen": -0.0002608491631690413, + "logps/rejected": -2.9621644020080566, + "loss": 0.8872, + "nll_loss": 0.221793994307518, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.608491740829777e-05, + "rewards/margins": 0.2961903512477875, + "rewards/rejected": -0.2962164282798767, + "step": 9714 + }, + { + "epoch": 6.718533886583679, + "grad_norm": 4.909445762634277, + "learning_rate": 1.823036729675734e-05, + "log_odds_chosen": 10.88532829284668, + "log_odds_ratio": -4.1851682908600196e-05, + "logits/chosen": -0.5104326605796814, + "logits/rejected": -0.5674954652786255, + "logps/chosen": -0.0001578840019647032, + "logps/rejected": -1.880581021308899, + "loss": 0.3437, + "nll_loss": 0.08592512458562851, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5788402379257604e-05, + "rewards/margins": 0.1880423128604889, + "rewards/rejected": -0.18805810809135437, + "step": 9715 + }, + { + "epoch": 6.719225449515906, + "grad_norm": 10.080972671508789, + "learning_rate": 1.8226525280467192e-05, + "log_odds_chosen": 9.879682540893555, + "log_odds_ratio": -0.00017235639097634703, + "logits/chosen": -0.4462816119194031, + "logits/rejected": -0.4516682028770447, + "logps/chosen": -0.00025914120487868786, + "logps/rejected": -1.6137008666992188, + "loss": 0.4635, + "nll_loss": 0.1158699095249176, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5914119760273024e-05, + "rewards/margins": 0.16134417057037354, + "rewards/rejected": -0.16137008368968964, + "step": 9716 + }, + { + "epoch": 6.719917012448133, + "grad_norm": 13.479330062866211, + "learning_rate": 1.822268326417704e-05, + "log_odds_chosen": 10.896804809570312, + "log_odds_ratio": -6.16292527411133e-05, + "logits/chosen": 0.06766664981842041, + "logits/rejected": 0.15634863078594208, + "logps/chosen": -0.00036590383388102055, + "logps/rejected": -2.6821343898773193, + "loss": 0.5224, + "nll_loss": 0.13058429956436157, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6590383388102055e-05, + "rewards/margins": 0.2681768536567688, + "rewards/rejected": -0.2682134509086609, + "step": 9717 + }, + { + "epoch": 6.72060857538036, + "grad_norm": 5.043457984924316, + "learning_rate": 1.821884124788689e-05, + "log_odds_chosen": 10.161759376525879, + "log_odds_ratio": -0.00023662808234803379, + "logits/chosen": -0.6527903079986572, + "logits/rejected": -0.6864253282546997, + "logps/chosen": -0.00034481301554478705, + "logps/rejected": -2.12319278717041, + "loss": 0.6527, + "nll_loss": 0.16315683722496033, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4481301554478705e-05, + "rewards/margins": 0.21228480339050293, + "rewards/rejected": -0.2123192846775055, + "step": 9718 + }, + { + "epoch": 6.7213001383125865, + "grad_norm": 4.991131782531738, + "learning_rate": 1.8214999231596743e-05, + "log_odds_chosen": 9.911870956420898, + "log_odds_ratio": -8.921151311369613e-05, + "logits/chosen": -0.22037310898303986, + "logits/rejected": -0.2324230670928955, + "logps/chosen": -0.00027700295322574675, + "logps/rejected": -1.6150810718536377, + "loss": 0.5001, + "nll_loss": 0.12501946091651917, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7700294594978914e-05, + "rewards/margins": 0.16148041188716888, + "rewards/rejected": -0.16150811314582825, + "step": 9719 + }, + { + "epoch": 6.721991701244813, + "grad_norm": 7.188221454620361, + "learning_rate": 1.8211157215306592e-05, + "log_odds_chosen": 10.476211547851562, + "log_odds_ratio": -0.00016788275388535112, + "logits/chosen": -0.6707624197006226, + "logits/rejected": -0.7707037925720215, + "logps/chosen": -0.0008173736860044301, + "logps/rejected": -1.7757651805877686, + "loss": 0.4814, + "nll_loss": 0.12034104019403458, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.173738024197519e-05, + "rewards/margins": 0.17749479413032532, + "rewards/rejected": -0.17757654190063477, + "step": 9720 + }, + { + "epoch": 6.72268326417704, + "grad_norm": 5.455285549163818, + "learning_rate": 1.8207315199016444e-05, + "log_odds_chosen": 10.982564926147461, + "log_odds_ratio": -5.847503780387342e-05, + "logits/chosen": -0.566404402256012, + "logits/rejected": -0.5938459038734436, + "logps/chosen": -0.0002679351018741727, + "logps/rejected": -2.2465574741363525, + "loss": 0.8184, + "nll_loss": 0.20458795130252838, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6793512006406672e-05, + "rewards/margins": 0.22462892532348633, + "rewards/rejected": -0.22465574741363525, + "step": 9721 + }, + { + "epoch": 6.723374827109267, + "grad_norm": 6.680959701538086, + "learning_rate": 1.8203473182726297e-05, + "log_odds_chosen": 10.745830535888672, + "log_odds_ratio": -0.0002788332349155098, + "logits/chosen": -0.7466229200363159, + "logits/rejected": -0.8177922964096069, + "logps/chosen": -0.00017833249876275659, + "logps/rejected": -1.9720370769500732, + "loss": 0.6547, + "nll_loss": 0.16363570094108582, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7833252059062943e-05, + "rewards/margins": 0.19718587398529053, + "rewards/rejected": -0.19720371067523956, + "step": 9722 + }, + { + "epoch": 6.724066390041494, + "grad_norm": 6.327624797821045, + "learning_rate": 1.8199631166436146e-05, + "log_odds_chosen": 11.032835960388184, + "log_odds_ratio": -6.335016951197758e-05, + "logits/chosen": -0.3551556169986725, + "logits/rejected": -0.3900320529937744, + "logps/chosen": -0.000181854484253563, + "logps/rejected": -2.182952404022217, + "loss": 0.4887, + "nll_loss": 0.12215951085090637, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.81854484253563e-05, + "rewards/margins": 0.21827706694602966, + "rewards/rejected": -0.21829524636268616, + "step": 9723 + }, + { + "epoch": 6.724757952973721, + "grad_norm": 4.812091827392578, + "learning_rate": 1.8195789150145998e-05, + "log_odds_chosen": 11.22108268737793, + "log_odds_ratio": -8.43647649162449e-05, + "logits/chosen": -0.6499758958816528, + "logits/rejected": -0.6795446872711182, + "logps/chosen": -0.00011281066690571606, + "logps/rejected": -2.3831028938293457, + "loss": 0.4783, + "nll_loss": 0.11957649886608124, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1281066690571606e-05, + "rewards/margins": 0.23829901218414307, + "rewards/rejected": -0.2383102923631668, + "step": 9724 + }, + { + "epoch": 6.7254495159059475, + "grad_norm": 7.116838455200195, + "learning_rate": 1.819194713385585e-05, + "log_odds_chosen": 10.212159156799316, + "log_odds_ratio": -0.00012075810082023963, + "logits/chosen": -0.8690159320831299, + "logits/rejected": -0.9198919534683228, + "logps/chosen": -0.0005652224645018578, + "logps/rejected": -2.057105302810669, + "loss": 0.5324, + "nll_loss": 0.13308003544807434, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.652225445373915e-05, + "rewards/margins": 0.20565402507781982, + "rewards/rejected": -0.2057105451822281, + "step": 9725 + }, + { + "epoch": 6.726141078838174, + "grad_norm": 7.264196872711182, + "learning_rate": 1.81881051175657e-05, + "log_odds_chosen": 11.783138275146484, + "log_odds_ratio": -1.351820810668869e-05, + "logits/chosen": -0.6626642942428589, + "logits/rejected": -0.6600688099861145, + "logps/chosen": -0.00021941386512480676, + "logps/rejected": -2.530977249145508, + "loss": 0.4854, + "nll_loss": 0.12134195864200592, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.194138869526796e-05, + "rewards/margins": 0.25307580828666687, + "rewards/rejected": -0.2530977427959442, + "step": 9726 + }, + { + "epoch": 6.726832641770401, + "grad_norm": 5.8319411277771, + "learning_rate": 1.8184263101275552e-05, + "log_odds_chosen": 10.988485336303711, + "log_odds_ratio": -6.073584154364653e-05, + "logits/chosen": -0.42071330547332764, + "logits/rejected": -0.4699041247367859, + "logps/chosen": -0.0001319625589530915, + "logps/rejected": -1.9698193073272705, + "loss": 0.5171, + "nll_loss": 0.12927477061748505, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3196255167713389e-05, + "rewards/margins": 0.19696873426437378, + "rewards/rejected": -0.19698193669319153, + "step": 9727 + }, + { + "epoch": 6.727524204702628, + "grad_norm": 10.939681053161621, + "learning_rate": 1.81804210849854e-05, + "log_odds_chosen": 11.293813705444336, + "log_odds_ratio": -5.135497121955268e-05, + "logits/chosen": -0.7324642539024353, + "logits/rejected": -0.7711427807807922, + "logps/chosen": -0.00021397329692263156, + "logps/rejected": -2.492666721343994, + "loss": 0.7633, + "nll_loss": 0.19083064794540405, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1397330783656798e-05, + "rewards/margins": 0.249245285987854, + "rewards/rejected": -0.24926666915416718, + "step": 9728 + }, + { + "epoch": 6.728215767634855, + "grad_norm": 4.356656074523926, + "learning_rate": 1.817657906869525e-05, + "log_odds_chosen": 11.521858215332031, + "log_odds_ratio": -3.647685662144795e-05, + "logits/chosen": -0.21235541999340057, + "logits/rejected": -0.17017611861228943, + "logps/chosen": -0.0003204144013579935, + "logps/rejected": -3.0372743606567383, + "loss": 0.6446, + "nll_loss": 0.16115355491638184, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2041443773778155e-05, + "rewards/margins": 0.3036953806877136, + "rewards/rejected": -0.3037274479866028, + "step": 9729 + }, + { + "epoch": 6.728907330567082, + "grad_norm": 4.123113632202148, + "learning_rate": 1.8172737052405103e-05, + "log_odds_chosen": 11.573395729064941, + "log_odds_ratio": -1.4847018974251114e-05, + "logits/chosen": -0.36827945709228516, + "logits/rejected": -0.4537869691848755, + "logps/chosen": -0.00014661697787232697, + "logps/rejected": -2.7357630729675293, + "loss": 0.5253, + "nll_loss": 0.13132527470588684, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.466169942432316e-05, + "rewards/margins": 0.27356165647506714, + "rewards/rejected": -0.2735763192176819, + "step": 9730 + }, + { + "epoch": 6.7295988934993085, + "grad_norm": 5.720463275909424, + "learning_rate": 1.8168895036114955e-05, + "log_odds_chosen": 11.32773208618164, + "log_odds_ratio": -3.699558510561474e-05, + "logits/chosen": -0.2998761832714081, + "logits/rejected": -0.35165315866470337, + "logps/chosen": -0.00017662344907876104, + "logps/rejected": -2.2534048557281494, + "loss": 0.9829, + "nll_loss": 0.2457246333360672, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.766234345268458e-05, + "rewards/margins": 0.22532284259796143, + "rewards/rejected": -0.22534050047397614, + "step": 9731 + }, + { + "epoch": 6.730290456431535, + "grad_norm": 18.147850036621094, + "learning_rate": 1.8165053019824804e-05, + "log_odds_chosen": 11.302204132080078, + "log_odds_ratio": -1.858202631410677e-05, + "logits/chosen": -0.5007428526878357, + "logits/rejected": -0.5436948537826538, + "logps/chosen": -0.00026709146914072335, + "logps/rejected": -2.660487174987793, + "loss": 0.6066, + "nll_loss": 0.15165534615516663, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6709147277870215e-05, + "rewards/margins": 0.2660219967365265, + "rewards/rejected": -0.26604872941970825, + "step": 9732 + }, + { + "epoch": 6.730982019363762, + "grad_norm": 5.787598133087158, + "learning_rate": 1.8161211003534657e-05, + "log_odds_chosen": 9.566076278686523, + "log_odds_ratio": -0.00015092222020030022, + "logits/chosen": -0.6181987524032593, + "logits/rejected": -0.660263180732727, + "logps/chosen": -0.0011174660176038742, + "logps/rejected": -1.9777193069458008, + "loss": 0.4916, + "nll_loss": 0.12287604808807373, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001117465944844298, + "rewards/margins": 0.1976601779460907, + "rewards/rejected": -0.19777193665504456, + "step": 9733 + }, + { + "epoch": 6.731673582295989, + "grad_norm": 11.69165325164795, + "learning_rate": 1.815736898724451e-05, + "log_odds_chosen": 10.284931182861328, + "log_odds_ratio": -6.89550070092082e-05, + "logits/chosen": -0.6240053772926331, + "logits/rejected": -0.6646280288696289, + "logps/chosen": -0.00039611352258361876, + "logps/rejected": -2.2645621299743652, + "loss": 0.5858, + "nll_loss": 0.1464376002550125, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.96113537135534e-05, + "rewards/margins": 0.22641661763191223, + "rewards/rejected": -0.22645621001720428, + "step": 9734 + }, + { + "epoch": 6.732365145228216, + "grad_norm": 5.843806266784668, + "learning_rate": 1.8153526970954358e-05, + "log_odds_chosen": 10.615920066833496, + "log_odds_ratio": -9.333140042144805e-05, + "logits/chosen": -0.599016010761261, + "logits/rejected": -0.6555784940719604, + "logps/chosen": -0.0005432904581539333, + "logps/rejected": -2.101348876953125, + "loss": 0.4896, + "nll_loss": 0.12239242345094681, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4329044360201806e-05, + "rewards/margins": 0.2100805640220642, + "rewards/rejected": -0.21013489365577698, + "step": 9735 + }, + { + "epoch": 6.733056708160443, + "grad_norm": 6.959444999694824, + "learning_rate": 1.814968495466421e-05, + "log_odds_chosen": 11.002592086791992, + "log_odds_ratio": -2.5873685444821604e-05, + "logits/chosen": -0.22370731830596924, + "logits/rejected": -0.30544498562812805, + "logps/chosen": -0.0002484057913534343, + "logps/rejected": -2.4761853218078613, + "loss": 0.4903, + "nll_loss": 0.12257062643766403, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4840579499141313e-05, + "rewards/margins": 0.24759367108345032, + "rewards/rejected": -0.24761851131916046, + "step": 9736 + }, + { + "epoch": 6.7337482710926695, + "grad_norm": 7.223566055297852, + "learning_rate": 1.814584293837406e-05, + "log_odds_chosen": 10.804994583129883, + "log_odds_ratio": -0.0002193464752053842, + "logits/chosen": -0.6460748910903931, + "logits/rejected": -0.659801185131073, + "logps/chosen": -0.0003987317904829979, + "logps/rejected": -2.281238555908203, + "loss": 0.58, + "nll_loss": 0.14497052133083344, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9873182686278597e-05, + "rewards/margins": 0.22808398306369781, + "rewards/rejected": -0.22812385857105255, + "step": 9737 + }, + { + "epoch": 6.734439834024896, + "grad_norm": 4.332970142364502, + "learning_rate": 1.814200092208391e-05, + "log_odds_chosen": 11.209919929504395, + "log_odds_ratio": -3.504233609419316e-05, + "logits/chosen": -0.3805273473262787, + "logits/rejected": -0.42599645256996155, + "logps/chosen": -0.00011535930389072746, + "logps/rejected": -1.9247658252716064, + "loss": 0.4257, + "nll_loss": 0.1064300462603569, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1535931662365329e-05, + "rewards/margins": 0.19246505200862885, + "rewards/rejected": -0.19247660040855408, + "step": 9738 + }, + { + "epoch": 6.735131396957123, + "grad_norm": 6.551239967346191, + "learning_rate": 1.813815890579376e-05, + "log_odds_chosen": 9.787984848022461, + "log_odds_ratio": -0.0002358718920731917, + "logits/chosen": -0.6269487142562866, + "logits/rejected": -0.6987356543540955, + "logps/chosen": -0.0004186548467259854, + "logps/rejected": -1.6685190200805664, + "loss": 0.5725, + "nll_loss": 0.14309853315353394, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.18654854001943e-05, + "rewards/margins": 0.1668100506067276, + "rewards/rejected": -0.16685190796852112, + "step": 9739 + }, + { + "epoch": 6.73582295988935, + "grad_norm": 5.712563991546631, + "learning_rate": 1.8134316889503614e-05, + "log_odds_chosen": 9.63579273223877, + "log_odds_ratio": -0.0010889896657317877, + "logits/chosen": -0.6010291576385498, + "logits/rejected": -0.6723165512084961, + "logps/chosen": -0.0009074000408872962, + "logps/rejected": -2.5607223510742188, + "loss": 0.7981, + "nll_loss": 0.1994112730026245, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.074000990949571e-05, + "rewards/margins": 0.2559815049171448, + "rewards/rejected": -0.2560722529888153, + "step": 9740 + }, + { + "epoch": 6.736514522821577, + "grad_norm": 15.393342018127441, + "learning_rate": 1.8130474873213463e-05, + "log_odds_chosen": 10.707435607910156, + "log_odds_ratio": -0.00014981115236878395, + "logits/chosen": -0.5892646312713623, + "logits/rejected": -0.6897189617156982, + "logps/chosen": -0.0004563984111882746, + "logps/rejected": -2.320040702819824, + "loss": 0.879, + "nll_loss": 0.2197228968143463, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.563984475680627e-05, + "rewards/margins": 0.23195841908454895, + "rewards/rejected": -0.2320040464401245, + "step": 9741 + }, + { + "epoch": 6.737206085753804, + "grad_norm": 5.6039252281188965, + "learning_rate": 1.8126632856923315e-05, + "log_odds_chosen": 11.154106140136719, + "log_odds_ratio": -3.386279786354862e-05, + "logits/chosen": -0.3528594374656677, + "logits/rejected": -0.38136905431747437, + "logps/chosen": -0.00043841905426234007, + "logps/rejected": -2.813148021697998, + "loss": 0.58, + "nll_loss": 0.1449882984161377, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.384190833661705e-05, + "rewards/margins": 0.28127095103263855, + "rewards/rejected": -0.28131479024887085, + "step": 9742 + }, + { + "epoch": 6.7378976486860305, + "grad_norm": 4.884521484375, + "learning_rate": 1.8122790840633167e-05, + "log_odds_chosen": 11.476736068725586, + "log_odds_ratio": -6.344960274873301e-05, + "logits/chosen": -0.42241236567497253, + "logits/rejected": -0.46878641843795776, + "logps/chosen": -0.00015263850218616426, + "logps/rejected": -2.290283679962158, + "loss": 0.4595, + "nll_loss": 0.11485810577869415, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.526385312899947e-05, + "rewards/margins": 0.2290130853652954, + "rewards/rejected": -0.2290283441543579, + "step": 9743 + }, + { + "epoch": 6.738589211618257, + "grad_norm": 5.994597911834717, + "learning_rate": 1.8118948824343017e-05, + "log_odds_chosen": 11.044092178344727, + "log_odds_ratio": -0.0002066854212898761, + "logits/chosen": -0.5602484941482544, + "logits/rejected": -0.6336889266967773, + "logps/chosen": -0.0002837642969097942, + "logps/rejected": -2.173689126968384, + "loss": 0.543, + "nll_loss": 0.13572733104228973, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.837642932718154e-05, + "rewards/margins": 0.21734054386615753, + "rewards/rejected": -0.21736891567707062, + "step": 9744 + }, + { + "epoch": 6.739280774550484, + "grad_norm": 4.356839179992676, + "learning_rate": 1.811510680805287e-05, + "log_odds_chosen": 9.351144790649414, + "log_odds_ratio": -0.0005068988539278507, + "logits/chosen": -0.31054121255874634, + "logits/rejected": -0.3710728585720062, + "logps/chosen": -0.0007687251782044768, + "logps/rejected": -1.5605571269989014, + "loss": 0.5891, + "nll_loss": 0.14723043143749237, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.687251491006464e-05, + "rewards/margins": 0.15597884356975555, + "rewards/rejected": -0.15605571866035461, + "step": 9745 + }, + { + "epoch": 6.739972337482711, + "grad_norm": 6.220463275909424, + "learning_rate": 1.8111264791762718e-05, + "log_odds_chosen": 11.119985580444336, + "log_odds_ratio": -2.354292882955633e-05, + "logits/chosen": 0.07925686240196228, + "logits/rejected": 0.08532209694385529, + "logps/chosen": -0.00018369583995081484, + "logps/rejected": -2.416872978210449, + "loss": 0.7845, + "nll_loss": 0.1961258053779602, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8369584722677246e-05, + "rewards/margins": 0.2416689395904541, + "rewards/rejected": -0.24168729782104492, + "step": 9746 + }, + { + "epoch": 6.740663900414938, + "grad_norm": 5.230076313018799, + "learning_rate": 1.8107422775472567e-05, + "log_odds_chosen": 10.40478229522705, + "log_odds_ratio": -0.000254276383202523, + "logits/chosen": -0.46239978075027466, + "logits/rejected": -0.516649603843689, + "logps/chosen": -0.0008920235559344292, + "logps/rejected": -2.0291428565979004, + "loss": 0.671, + "nll_loss": 0.16771256923675537, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.920235268305987e-05, + "rewards/margins": 0.20282509922981262, + "rewards/rejected": -0.202914297580719, + "step": 9747 + }, + { + "epoch": 6.741355463347165, + "grad_norm": 4.9943528175354, + "learning_rate": 1.810358075918242e-05, + "log_odds_chosen": 11.097500801086426, + "log_odds_ratio": -2.419178417767398e-05, + "logits/chosen": -0.6011475324630737, + "logits/rejected": -0.8357247114181519, + "logps/chosen": -0.00037277143565006554, + "logps/rejected": -2.7854795455932617, + "loss": 0.4733, + "nll_loss": 0.11833024770021439, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.727714283741079e-05, + "rewards/margins": 0.2785106599330902, + "rewards/rejected": -0.2785479724407196, + "step": 9748 + }, + { + "epoch": 6.7420470262793915, + "grad_norm": 5.590667724609375, + "learning_rate": 1.8099738742892272e-05, + "log_odds_chosen": 10.696863174438477, + "log_odds_ratio": -3.9032405766192824e-05, + "logits/chosen": -0.5773473978042603, + "logits/rejected": -0.624853253364563, + "logps/chosen": -0.00022468066890724003, + "logps/rejected": -2.161128282546997, + "loss": 0.7687, + "nll_loss": 0.1921810805797577, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2468066163128242e-05, + "rewards/margins": 0.2160903513431549, + "rewards/rejected": -0.21611282229423523, + "step": 9749 + }, + { + "epoch": 6.742738589211618, + "grad_norm": 7.014741897583008, + "learning_rate": 1.809589672660212e-05, + "log_odds_chosen": 11.61632251739502, + "log_odds_ratio": -6.345485599013045e-05, + "logits/chosen": -0.0924762487411499, + "logits/rejected": -0.1658192276954651, + "logps/chosen": -0.00028687884332612157, + "logps/rejected": -2.877070188522339, + "loss": 0.6043, + "nll_loss": 0.15107500553131104, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.868788578780368e-05, + "rewards/margins": 0.2876783311367035, + "rewards/rejected": -0.28770703077316284, + "step": 9750 + }, + { + "epoch": 6.743430152143845, + "grad_norm": 6.5519890785217285, + "learning_rate": 1.8092054710311973e-05, + "log_odds_chosen": 10.81260871887207, + "log_odds_ratio": -4.300369619159028e-05, + "logits/chosen": -0.16680464148521423, + "logits/rejected": -0.21911774575710297, + "logps/chosen": -0.0001687578478595242, + "logps/rejected": -1.9256123304367065, + "loss": 0.3878, + "nll_loss": 0.09693576395511627, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6875783330760896e-05, + "rewards/margins": 0.1925443410873413, + "rewards/rejected": -0.19256123900413513, + "step": 9751 + }, + { + "epoch": 6.744121715076072, + "grad_norm": 4.346395015716553, + "learning_rate": 1.8088212694021823e-05, + "log_odds_chosen": 10.728569030761719, + "log_odds_ratio": -4.3420415750006214e-05, + "logits/chosen": -0.4695202708244324, + "logits/rejected": -0.5058833360671997, + "logps/chosen": -0.00018572999397292733, + "logps/rejected": -1.950321912765503, + "loss": 1.0065, + "nll_loss": 0.25160858035087585, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.857300230767578e-05, + "rewards/margins": 0.1950136125087738, + "rewards/rejected": -0.19503219425678253, + "step": 9752 + }, + { + "epoch": 6.744813278008299, + "grad_norm": 7.713948726654053, + "learning_rate": 1.8084370677731675e-05, + "log_odds_chosen": 10.131799697875977, + "log_odds_ratio": -0.00040340382838621736, + "logits/chosen": -0.7229098677635193, + "logits/rejected": -0.624686062335968, + "logps/chosen": -0.00037127750692889094, + "logps/rejected": -1.967607855796814, + "loss": 0.703, + "nll_loss": 0.1756991744041443, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.712774923769757e-05, + "rewards/margins": 0.19672366976737976, + "rewards/rejected": -0.19676080346107483, + "step": 9753 + }, + { + "epoch": 6.745504840940526, + "grad_norm": 6.567865371704102, + "learning_rate": 1.8080528661441527e-05, + "log_odds_chosen": 9.464198112487793, + "log_odds_ratio": -0.00013312677037902176, + "logits/chosen": -0.4827428162097931, + "logits/rejected": -0.5091724991798401, + "logps/chosen": -0.0005617655115202069, + "logps/rejected": -1.3640573024749756, + "loss": 0.5455, + "nll_loss": 0.13636085391044617, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.617655551759526e-05, + "rewards/margins": 0.13634954392910004, + "rewards/rejected": -0.13640573620796204, + "step": 9754 + }, + { + "epoch": 6.746196403872752, + "grad_norm": 6.985016822814941, + "learning_rate": 1.8076686645151376e-05, + "log_odds_chosen": 9.059370040893555, + "log_odds_ratio": -0.007898088544607162, + "logits/chosen": -0.5644688606262207, + "logits/rejected": -0.6181748509407043, + "logps/chosen": -0.003136302111670375, + "logps/rejected": -1.3507148027420044, + "loss": 0.7475, + "nll_loss": 0.18608833849430084, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003136302693746984, + "rewards/margins": 0.134757861495018, + "rewards/rejected": -0.13507148623466492, + "step": 9755 + }, + { + "epoch": 6.746887966804979, + "grad_norm": 6.688884258270264, + "learning_rate": 1.8072844628861226e-05, + "log_odds_chosen": 10.308743476867676, + "log_odds_ratio": -7.386624929495156e-05, + "logits/chosen": -0.3142358660697937, + "logits/rejected": -0.3920065462589264, + "logps/chosen": -0.00013700808631256223, + "logps/rejected": -1.6693358421325684, + "loss": 0.7536, + "nll_loss": 0.1883903443813324, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3700810086447746e-05, + "rewards/margins": 0.16691987216472626, + "rewards/rejected": -0.1669335663318634, + "step": 9756 + }, + { + "epoch": 6.747579529737206, + "grad_norm": 7.96720552444458, + "learning_rate": 1.8069002612571078e-05, + "log_odds_chosen": 11.322125434875488, + "log_odds_ratio": -0.00012096359569113702, + "logits/chosen": -0.20771357417106628, + "logits/rejected": -0.20665308833122253, + "logps/chosen": -0.00015821008128114045, + "logps/rejected": -2.490894317626953, + "loss": 0.7956, + "nll_loss": 0.19889280200004578, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5821007764316164e-05, + "rewards/margins": 0.24907363951206207, + "rewards/rejected": -0.24908944964408875, + "step": 9757 + }, + { + "epoch": 6.748271092669433, + "grad_norm": 7.730781078338623, + "learning_rate": 1.8065160596280927e-05, + "log_odds_chosen": 12.252197265625, + "log_odds_ratio": -3.502556137391366e-05, + "logits/chosen": -0.6151853203773499, + "logits/rejected": -0.6173470616340637, + "logps/chosen": -0.00014207100321073085, + "logps/rejected": -3.05303955078125, + "loss": 0.5453, + "nll_loss": 0.13632476329803467, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4207101230567787e-05, + "rewards/margins": 0.30528974533081055, + "rewards/rejected": -0.30530399084091187, + "step": 9758 + }, + { + "epoch": 6.74896265560166, + "grad_norm": 3.3932158946990967, + "learning_rate": 1.806131857999078e-05, + "log_odds_chosen": 10.762428283691406, + "log_odds_ratio": -0.00037677347427234054, + "logits/chosen": -0.2462678849697113, + "logits/rejected": -0.2845362424850464, + "logps/chosen": -0.0009380167466588318, + "logps/rejected": -1.9752521514892578, + "loss": 0.4398, + "nll_loss": 0.10991780459880829, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.38016819418408e-05, + "rewards/margins": 0.19743141531944275, + "rewards/rejected": -0.19752521812915802, + "step": 9759 + }, + { + "epoch": 6.749654218533887, + "grad_norm": 37.30306625366211, + "learning_rate": 1.8057476563700632e-05, + "log_odds_chosen": 9.855682373046875, + "log_odds_ratio": -0.0002588354400359094, + "logits/chosen": -0.47376930713653564, + "logits/rejected": -0.5016792416572571, + "logps/chosen": -0.0004103544051758945, + "logps/rejected": -1.791809320449829, + "loss": 0.5705, + "nll_loss": 0.14260096848011017, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.103544051758945e-05, + "rewards/margins": 0.1791399121284485, + "rewards/rejected": -0.17918093502521515, + "step": 9760 + }, + { + "epoch": 6.750345781466113, + "grad_norm": 9.75047779083252, + "learning_rate": 1.805363454741048e-05, + "log_odds_chosen": 10.098825454711914, + "log_odds_ratio": -0.0006348791648633778, + "logits/chosen": -0.4901638627052307, + "logits/rejected": -0.46601709723472595, + "logps/chosen": -0.0002679343451745808, + "logps/rejected": -1.9539092779159546, + "loss": 0.5457, + "nll_loss": 0.13636308908462524, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.679343378986232e-05, + "rewards/margins": 0.19536414742469788, + "rewards/rejected": -0.19539092481136322, + "step": 9761 + }, + { + "epoch": 6.75103734439834, + "grad_norm": 4.300050735473633, + "learning_rate": 1.8049792531120333e-05, + "log_odds_chosen": 10.523996353149414, + "log_odds_ratio": -3.409176861168817e-05, + "logits/chosen": -0.4422757625579834, + "logits/rejected": -0.513489842414856, + "logps/chosen": -0.00012512198009062558, + "logps/rejected": -1.5583089590072632, + "loss": 0.395, + "nll_loss": 0.09874957799911499, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2512197827163618e-05, + "rewards/margins": 0.1558183878660202, + "rewards/rejected": -0.15583088994026184, + "step": 9762 + }, + { + "epoch": 6.751728907330567, + "grad_norm": 4.803183555603027, + "learning_rate": 1.8045950514830186e-05, + "log_odds_chosen": 9.5350341796875, + "log_odds_ratio": -0.0017939151730388403, + "logits/chosen": -0.3713359832763672, + "logits/rejected": -0.5139442086219788, + "logps/chosen": -0.0018062122398987412, + "logps/rejected": -2.1577553749084473, + "loss": 0.5588, + "nll_loss": 0.13950826227664948, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018062123854178935, + "rewards/margins": 0.21559491753578186, + "rewards/rejected": -0.21577554941177368, + "step": 9763 + }, + { + "epoch": 6.752420470262794, + "grad_norm": 7.823028564453125, + "learning_rate": 1.8042108498540035e-05, + "log_odds_chosen": 11.513077735900879, + "log_odds_ratio": -2.8428832592908293e-05, + "logits/chosen": -0.1352018564939499, + "logits/rejected": -0.24655655026435852, + "logps/chosen": -0.00032958845258690417, + "logps/rejected": -3.1851940155029297, + "loss": 0.5832, + "nll_loss": 0.14579999446868896, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.29588474414777e-05, + "rewards/margins": 0.31848645210266113, + "rewards/rejected": -0.3185194134712219, + "step": 9764 + }, + { + "epoch": 6.753112033195021, + "grad_norm": 7.303538799285889, + "learning_rate": 1.8038266482249884e-05, + "log_odds_chosen": 10.897727966308594, + "log_odds_ratio": -0.0002780243812594563, + "logits/chosen": -0.24842569231987, + "logits/rejected": -0.3929397463798523, + "logps/chosen": -0.0001870131236501038, + "logps/rejected": -1.847274661064148, + "loss": 0.5453, + "nll_loss": 0.13629432022571564, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8701313820201904e-05, + "rewards/margins": 0.18470877408981323, + "rewards/rejected": -0.1847274750471115, + "step": 9765 + }, + { + "epoch": 6.753803596127248, + "grad_norm": 8.309207916259766, + "learning_rate": 1.8034424465959736e-05, + "log_odds_chosen": 11.285194396972656, + "log_odds_ratio": -7.014050788711756e-05, + "logits/chosen": -0.5131502151489258, + "logits/rejected": -0.5522158145904541, + "logps/chosen": -0.0004909878480248153, + "logps/rejected": -2.363844394683838, + "loss": 0.9031, + "nll_loss": 0.2257768213748932, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9098791350843385e-05, + "rewards/margins": 0.23633533716201782, + "rewards/rejected": -0.23638442158699036, + "step": 9766 + }, + { + "epoch": 6.754495159059474, + "grad_norm": 5.041337013244629, + "learning_rate": 1.8030582449669585e-05, + "log_odds_chosen": 10.607772827148438, + "log_odds_ratio": -7.603943231515586e-05, + "logits/chosen": 0.008264736272394657, + "logits/rejected": -0.07102751731872559, + "logps/chosen": -0.00019035846344195306, + "logps/rejected": -2.222433567047119, + "loss": 0.4561, + "nll_loss": 0.1140083372592926, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.903584779938683e-05, + "rewards/margins": 0.22222432494163513, + "rewards/rejected": -0.22224338352680206, + "step": 9767 + }, + { + "epoch": 6.755186721991701, + "grad_norm": 7.952573776245117, + "learning_rate": 1.8026740433379438e-05, + "log_odds_chosen": 10.102436065673828, + "log_odds_ratio": -7.308281783480197e-05, + "logits/chosen": -0.2653353810310364, + "logits/rejected": -0.30025357007980347, + "logps/chosen": -0.0003470160299912095, + "logps/rejected": -1.922732949256897, + "loss": 0.957, + "nll_loss": 0.2392345666885376, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.470160299912095e-05, + "rewards/margins": 0.19223859906196594, + "rewards/rejected": -0.1922733038663864, + "step": 9768 + }, + { + "epoch": 6.755878284923928, + "grad_norm": 5.763984680175781, + "learning_rate": 1.802289841708929e-05, + "log_odds_chosen": 11.691999435424805, + "log_odds_ratio": -5.55334409000352e-05, + "logits/chosen": -0.6231212615966797, + "logits/rejected": -0.7192140221595764, + "logps/chosen": -0.0001963993563549593, + "logps/rejected": -2.478055238723755, + "loss": 0.4841, + "nll_loss": 0.12102922052145004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9639934180304408e-05, + "rewards/margins": 0.24778589606285095, + "rewards/rejected": -0.24780553579330444, + "step": 9769 + }, + { + "epoch": 6.756569847856155, + "grad_norm": 6.567784786224365, + "learning_rate": 1.801905640079914e-05, + "log_odds_chosen": 11.372642517089844, + "log_odds_ratio": -5.993260128889233e-05, + "logits/chosen": -0.2392624318599701, + "logits/rejected": -0.3215843439102173, + "logps/chosen": -0.0003123276983387768, + "logps/rejected": -2.6074600219726562, + "loss": 0.53, + "nll_loss": 0.1324916034936905, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.123277201666497e-05, + "rewards/margins": 0.2607147693634033, + "rewards/rejected": -0.2607460021972656, + "step": 9770 + }, + { + "epoch": 6.757261410788382, + "grad_norm": 4.615629196166992, + "learning_rate": 1.8015214384508992e-05, + "log_odds_chosen": 10.940568923950195, + "log_odds_ratio": -6.288071745075285e-05, + "logits/chosen": -0.20645862817764282, + "logits/rejected": -0.4057818055152893, + "logps/chosen": -0.0003123377973679453, + "logps/rejected": -1.8413560390472412, + "loss": 0.3406, + "nll_loss": 0.0851416140794754, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1233776098815724e-05, + "rewards/margins": 0.18410436809062958, + "rewards/rejected": -0.18413560092449188, + "step": 9771 + }, + { + "epoch": 6.7579529737206085, + "grad_norm": 7.441464424133301, + "learning_rate": 1.8011372368218844e-05, + "log_odds_chosen": 10.164894104003906, + "log_odds_ratio": -0.0005850759916938841, + "logits/chosen": 0.008430100977420807, + "logits/rejected": -0.18595150113105774, + "logps/chosen": -0.0007026636158116162, + "logps/rejected": -2.359257221221924, + "loss": 0.6796, + "nll_loss": 0.169847771525383, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.026636740192771e-05, + "rewards/margins": 0.23585546016693115, + "rewards/rejected": -0.23592573404312134, + "step": 9772 + }, + { + "epoch": 6.758644536652835, + "grad_norm": 6.9098310470581055, + "learning_rate": 1.8007530351928693e-05, + "log_odds_chosen": 10.903366088867188, + "log_odds_ratio": -3.1184124964056537e-05, + "logits/chosen": -0.11911733448505402, + "logits/rejected": -0.18479466438293457, + "logps/chosen": -0.00035307067446410656, + "logps/rejected": -2.7793569564819336, + "loss": 0.4868, + "nll_loss": 0.12170146405696869, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.530706817400642e-05, + "rewards/margins": 0.2779003977775574, + "rewards/rejected": -0.2779357135295868, + "step": 9773 + }, + { + "epoch": 6.759336099585062, + "grad_norm": 7.216391086578369, + "learning_rate": 1.8003688335638542e-05, + "log_odds_chosen": 9.6553373336792, + "log_odds_ratio": -0.0013119232608005404, + "logits/chosen": -0.9779117107391357, + "logits/rejected": -0.9227054119110107, + "logps/chosen": -0.0010267137549817562, + "logps/rejected": -1.4132969379425049, + "loss": 0.608, + "nll_loss": 0.15186795592308044, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001026713871397078, + "rewards/margins": 0.14122702181339264, + "rewards/rejected": -0.14132969081401825, + "step": 9774 + }, + { + "epoch": 6.760027662517289, + "grad_norm": 4.36707067489624, + "learning_rate": 1.7999846319348395e-05, + "log_odds_chosen": 10.784286499023438, + "log_odds_ratio": -4.411122063174844e-05, + "logits/chosen": -0.5529571771621704, + "logits/rejected": -0.5241627097129822, + "logps/chosen": -0.0001336929271928966, + "logps/rejected": -1.6788662672042847, + "loss": 0.2466, + "nll_loss": 0.061649952083826065, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3369293810683303e-05, + "rewards/margins": 0.16787324845790863, + "rewards/rejected": -0.1678866297006607, + "step": 9775 + }, + { + "epoch": 6.760719225449516, + "grad_norm": 5.725052833557129, + "learning_rate": 1.7996004303058244e-05, + "log_odds_chosen": 10.938990592956543, + "log_odds_ratio": -0.0004287226765882224, + "logits/chosen": -0.285929411649704, + "logits/rejected": -0.36556243896484375, + "logps/chosen": -0.0005001586396247149, + "logps/rejected": -2.315542697906494, + "loss": 0.6023, + "nll_loss": 0.1505252718925476, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.001586032449268e-05, + "rewards/margins": 0.23150423169136047, + "rewards/rejected": -0.23155425488948822, + "step": 9776 + }, + { + "epoch": 6.761410788381743, + "grad_norm": 7.261622428894043, + "learning_rate": 1.7992162286768096e-05, + "log_odds_chosen": 9.896255493164062, + "log_odds_ratio": -0.00038533323095180094, + "logits/chosen": -0.1569298803806305, + "logits/rejected": -0.22647440433502197, + "logps/chosen": -0.0006728830048814416, + "logps/rejected": -2.288208484649658, + "loss": 0.7874, + "nll_loss": 0.1968180537223816, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.72883033985272e-05, + "rewards/margins": 0.22875356674194336, + "rewards/rejected": -0.22882086038589478, + "step": 9777 + }, + { + "epoch": 6.7621023513139695, + "grad_norm": 5.106160640716553, + "learning_rate": 1.798832027047795e-05, + "log_odds_chosen": 10.562328338623047, + "log_odds_ratio": -0.00016904577205423266, + "logits/chosen": -0.25822728872299194, + "logits/rejected": -0.3803952932357788, + "logps/chosen": -0.0005223815096542239, + "logps/rejected": -2.2937982082366943, + "loss": 0.5959, + "nll_loss": 0.1489495187997818, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2238152420613915e-05, + "rewards/margins": 0.22932758927345276, + "rewards/rejected": -0.2293798327445984, + "step": 9778 + }, + { + "epoch": 6.762793914246196, + "grad_norm": 9.766763687133789, + "learning_rate": 1.7984478254187798e-05, + "log_odds_chosen": 9.80665397644043, + "log_odds_ratio": -0.00023903910187073052, + "logits/chosen": -0.6199896335601807, + "logits/rejected": -0.6468402147293091, + "logps/chosen": -0.0003940025926567614, + "logps/rejected": -1.1876604557037354, + "loss": 0.4167, + "nll_loss": 0.1041593849658966, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9400256355293095e-05, + "rewards/margins": 0.11872664093971252, + "rewards/rejected": -0.11876604706048965, + "step": 9779 + }, + { + "epoch": 6.763485477178423, + "grad_norm": 5.44463586807251, + "learning_rate": 1.798063623789765e-05, + "log_odds_chosen": 9.514091491699219, + "log_odds_ratio": -0.00021153160196263343, + "logits/chosen": -0.4803120791912079, + "logits/rejected": -0.5570478439331055, + "logps/chosen": -0.0007655763765797019, + "logps/rejected": -2.086155891418457, + "loss": 0.8381, + "nll_loss": 0.20949587225914001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.655764056835324e-05, + "rewards/margins": 0.20853903889656067, + "rewards/rejected": -0.20861557126045227, + "step": 9780 + }, + { + "epoch": 6.76417704011065, + "grad_norm": 8.110411643981934, + "learning_rate": 1.7976794221607503e-05, + "log_odds_chosen": 10.88216781616211, + "log_odds_ratio": -6.38460842310451e-05, + "logits/chosen": -0.09918743371963501, + "logits/rejected": -0.13283143937587738, + "logps/chosen": -0.0002480775583535433, + "logps/rejected": -2.291240692138672, + "loss": 0.7315, + "nll_loss": 0.1828564554452896, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4807755835354328e-05, + "rewards/margins": 0.22909927368164062, + "rewards/rejected": -0.22912408411502838, + "step": 9781 + }, + { + "epoch": 6.764868603042877, + "grad_norm": 4.628012657165527, + "learning_rate": 1.7972952205317352e-05, + "log_odds_chosen": 10.212989807128906, + "log_odds_ratio": -6.754022615496069e-05, + "logits/chosen": -0.4950890839099884, + "logits/rejected": -0.5523540377616882, + "logps/chosen": -0.00018165886285714805, + "logps/rejected": -1.4461414813995361, + "loss": 0.4622, + "nll_loss": 0.1155511885881424, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8165887013310567e-05, + "rewards/margins": 0.14459598064422607, + "rewards/rejected": -0.14461416006088257, + "step": 9782 + }, + { + "epoch": 6.765560165975104, + "grad_norm": 6.216654300689697, + "learning_rate": 1.79691101890272e-05, + "log_odds_chosen": 11.898232460021973, + "log_odds_ratio": -1.2801835509890225e-05, + "logits/chosen": -0.22902534902095795, + "logits/rejected": -0.3668055534362793, + "logps/chosen": -0.00017746177036315203, + "logps/rejected": -3.097951889038086, + "loss": 0.668, + "nll_loss": 0.16699756681919098, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.77461752173258e-05, + "rewards/margins": 0.30977746844291687, + "rewards/rejected": -0.30979520082473755, + "step": 9783 + }, + { + "epoch": 6.7662517289073305, + "grad_norm": 4.292337417602539, + "learning_rate": 1.7965268172737053e-05, + "log_odds_chosen": 12.043815612792969, + "log_odds_ratio": -8.21490084490506e-06, + "logits/chosen": -0.5467915534973145, + "logits/rejected": -0.5668094158172607, + "logps/chosen": -0.00013092358130961657, + "logps/rejected": -2.8281164169311523, + "loss": 0.4635, + "nll_loss": 0.11587396264076233, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3092359040456358e-05, + "rewards/margins": 0.28279852867126465, + "rewards/rejected": -0.28281164169311523, + "step": 9784 + }, + { + "epoch": 6.766943291839557, + "grad_norm": 6.525885581970215, + "learning_rate": 1.7961426156446902e-05, + "log_odds_chosen": 11.126798629760742, + "log_odds_ratio": -5.15446845383849e-05, + "logits/chosen": -0.13213799893856049, + "logits/rejected": -0.13622528314590454, + "logps/chosen": -0.00017841748194769025, + "logps/rejected": -2.4514002799987793, + "loss": 0.5827, + "nll_loss": 0.145666241645813, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7841746739577502e-05, + "rewards/margins": 0.24512219429016113, + "rewards/rejected": -0.24514003098011017, + "step": 9785 + }, + { + "epoch": 6.767634854771784, + "grad_norm": 6.094909191131592, + "learning_rate": 1.7957584140156755e-05, + "log_odds_chosen": 11.047687530517578, + "log_odds_ratio": -3.2891894079511985e-05, + "logits/chosen": -0.5635808110237122, + "logits/rejected": -0.533679723739624, + "logps/chosen": -0.0001351062091998756, + "logps/rejected": -2.1552696228027344, + "loss": 0.3764, + "nll_loss": 0.09409818053245544, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.35106211018865e-05, + "rewards/margins": 0.2155134528875351, + "rewards/rejected": -0.21552695333957672, + "step": 9786 + }, + { + "epoch": 6.768326417704011, + "grad_norm": 4.369130611419678, + "learning_rate": 1.7953742123866607e-05, + "log_odds_chosen": 10.320816993713379, + "log_odds_ratio": -6.77104399073869e-05, + "logits/chosen": -0.5601058006286621, + "logits/rejected": -0.6135123372077942, + "logps/chosen": -0.00018023433221969754, + "logps/rejected": -1.5359973907470703, + "loss": 0.3982, + "nll_loss": 0.09954757988452911, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8023432858171873e-05, + "rewards/margins": 0.15358170866966248, + "rewards/rejected": -0.15359973907470703, + "step": 9787 + }, + { + "epoch": 6.769017980636238, + "grad_norm": 6.298321723937988, + "learning_rate": 1.7949900107576456e-05, + "log_odds_chosen": 10.307516098022461, + "log_odds_ratio": -0.00014691260003019124, + "logits/chosen": -0.3807259500026703, + "logits/rejected": -0.4523041248321533, + "logps/chosen": -0.000674390175845474, + "logps/rejected": -2.2402751445770264, + "loss": 0.4117, + "nll_loss": 0.1029200330376625, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.743902486050501e-05, + "rewards/margins": 0.22396008670330048, + "rewards/rejected": -0.22402751445770264, + "step": 9788 + }, + { + "epoch": 6.769709543568465, + "grad_norm": 15.953858375549316, + "learning_rate": 1.794605809128631e-05, + "log_odds_chosen": 11.054325103759766, + "log_odds_ratio": -4.443394936970435e-05, + "logits/chosen": -0.3734176456928253, + "logits/rejected": -0.36586546897888184, + "logps/chosen": -0.0002746654790826142, + "logps/rejected": -2.239257335662842, + "loss": 0.4929, + "nll_loss": 0.12322264909744263, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7466550818644464e-05, + "rewards/margins": 0.2238982617855072, + "rewards/rejected": -0.22392573952674866, + "step": 9789 + }, + { + "epoch": 6.7704011065006915, + "grad_norm": 5.373976707458496, + "learning_rate": 1.794221607499616e-05, + "log_odds_chosen": 10.213509559631348, + "log_odds_ratio": -0.00034472710103727877, + "logits/chosen": -0.10134115070104599, + "logits/rejected": -0.12651267647743225, + "logps/chosen": -0.00033351409365423024, + "logps/rejected": -2.3881418704986572, + "loss": 0.8267, + "nll_loss": 0.2066420614719391, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3351410820614547e-05, + "rewards/margins": 0.23878082633018494, + "rewards/rejected": -0.23881417512893677, + "step": 9790 + }, + { + "epoch": 6.771092669432918, + "grad_norm": 7.270600318908691, + "learning_rate": 1.793837405870601e-05, + "log_odds_chosen": 10.21059799194336, + "log_odds_ratio": -0.0002538753324188292, + "logits/chosen": -0.5336205363273621, + "logits/rejected": -0.5699590444564819, + "logps/chosen": -0.0002814323815982789, + "logps/rejected": -1.7851035594940186, + "loss": 0.6582, + "nll_loss": 0.16453300416469574, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8143236704636365e-05, + "rewards/margins": 0.17848220467567444, + "rewards/rejected": -0.17851035296916962, + "step": 9791 + }, + { + "epoch": 6.771784232365145, + "grad_norm": 6.971714019775391, + "learning_rate": 1.793453204241586e-05, + "log_odds_chosen": 10.254987716674805, + "log_odds_ratio": -0.0001944910327438265, + "logits/chosen": -0.4201323688030243, + "logits/rejected": -0.5163047909736633, + "logps/chosen": -0.0008295466541312635, + "logps/rejected": -1.9266215562820435, + "loss": 0.7876, + "nll_loss": 0.1968696117401123, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.29546625027433e-05, + "rewards/margins": 0.19257919490337372, + "rewards/rejected": -0.19266214966773987, + "step": 9792 + }, + { + "epoch": 6.772475795297372, + "grad_norm": 4.725278377532959, + "learning_rate": 1.7930690026125712e-05, + "log_odds_chosen": 9.815740585327148, + "log_odds_ratio": -0.0001148254523286596, + "logits/chosen": -0.10961273312568665, + "logits/rejected": -0.21037688851356506, + "logps/chosen": -0.0002081097918562591, + "logps/rejected": -1.2197327613830566, + "loss": 0.4787, + "nll_loss": 0.11967021226882935, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.081097954942379e-05, + "rewards/margins": 0.12195246666669846, + "rewards/rejected": -0.12197329103946686, + "step": 9793 + }, + { + "epoch": 6.773167358229599, + "grad_norm": 10.813013076782227, + "learning_rate": 1.792684800983556e-05, + "log_odds_chosen": 10.976402282714844, + "log_odds_ratio": -4.5033513742964715e-05, + "logits/chosen": -0.6192176342010498, + "logits/rejected": -0.8035860657691956, + "logps/chosen": -0.00024157708685379475, + "logps/rejected": -2.56021785736084, + "loss": 0.6271, + "nll_loss": 0.1567803919315338, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4157707230187953e-05, + "rewards/margins": 0.2559976279735565, + "rewards/rejected": -0.25602179765701294, + "step": 9794 + }, + { + "epoch": 6.773858921161826, + "grad_norm": 5.538814544677734, + "learning_rate": 1.7923005993545413e-05, + "log_odds_chosen": 10.855127334594727, + "log_odds_ratio": -3.1160849175648764e-05, + "logits/chosen": -0.7526274919509888, + "logits/rejected": -0.7859461903572083, + "logps/chosen": -0.00010621797264320776, + "logps/rejected": -1.528001070022583, + "loss": 0.4439, + "nll_loss": 0.11098209023475647, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0621797628118657e-05, + "rewards/margins": 0.15278948843479156, + "rewards/rejected": -0.15280009806156158, + "step": 9795 + }, + { + "epoch": 6.7745504840940525, + "grad_norm": 5.904871463775635, + "learning_rate": 1.7919163977255266e-05, + "log_odds_chosen": 9.980600357055664, + "log_odds_ratio": -0.00027487872284837067, + "logits/chosen": -0.48689329624176025, + "logits/rejected": -0.42207589745521545, + "logps/chosen": -0.0005123027949593961, + "logps/rejected": -1.6725163459777832, + "loss": 0.6245, + "nll_loss": 0.15609855949878693, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1230275857960805e-05, + "rewards/margins": 0.16720040142536163, + "rewards/rejected": -0.16725163161754608, + "step": 9796 + }, + { + "epoch": 6.775242047026279, + "grad_norm": 9.97628116607666, + "learning_rate": 1.7915321960965115e-05, + "log_odds_chosen": 11.540143966674805, + "log_odds_ratio": -2.4471966753480956e-05, + "logits/chosen": -0.5073536038398743, + "logits/rejected": -0.6288943290710449, + "logps/chosen": -0.00042590327211655676, + "logps/rejected": -2.9150373935699463, + "loss": 0.4392, + "nll_loss": 0.10978897660970688, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2590330849634483e-05, + "rewards/margins": 0.29146116971969604, + "rewards/rejected": -0.29150375723838806, + "step": 9797 + }, + { + "epoch": 6.775933609958506, + "grad_norm": 5.156939506530762, + "learning_rate": 1.7911479944674967e-05, + "log_odds_chosen": 9.476211547851562, + "log_odds_ratio": -0.00015527091454714537, + "logits/chosen": -0.17368683218955994, + "logits/rejected": -0.3279064893722534, + "logps/chosen": -0.000656230200547725, + "logps/rejected": -2.053809881210327, + "loss": 0.5374, + "nll_loss": 0.13434290885925293, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.562301859958097e-05, + "rewards/margins": 0.20531539618968964, + "rewards/rejected": -0.20538100600242615, + "step": 9798 + }, + { + "epoch": 6.776625172890733, + "grad_norm": 4.439167022705078, + "learning_rate": 1.790763792838482e-05, + "log_odds_chosen": 10.210926055908203, + "log_odds_ratio": -0.0001035605018842034, + "logits/chosen": -0.5380820631980896, + "logits/rejected": -0.5222083330154419, + "logps/chosen": -0.0005264157080091536, + "logps/rejected": -1.592585802078247, + "loss": 0.5461, + "nll_loss": 0.1365230679512024, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.264157152851112e-05, + "rewards/margins": 0.15920594334602356, + "rewards/rejected": -0.15925857424736023, + "step": 9799 + }, + { + "epoch": 6.77731673582296, + "grad_norm": 6.291228771209717, + "learning_rate": 1.790379591209467e-05, + "log_odds_chosen": 10.743213653564453, + "log_odds_ratio": -0.0011980285635218024, + "logits/chosen": -0.4152224063873291, + "logits/rejected": -0.40008097887039185, + "logps/chosen": -0.000732260406948626, + "logps/rejected": -2.3373422622680664, + "loss": 0.7109, + "nll_loss": 0.1776098906993866, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.322603778447956e-05, + "rewards/margins": 0.2336609959602356, + "rewards/rejected": -0.23373422026634216, + "step": 9800 + }, + { + "epoch": 6.778008298755187, + "grad_norm": 10.946796417236328, + "learning_rate": 1.7899953895804518e-05, + "log_odds_chosen": 9.949090957641602, + "log_odds_ratio": -0.00010966783884214237, + "logits/chosen": -0.6908112168312073, + "logits/rejected": -0.7527514696121216, + "logps/chosen": -0.00016632323968224227, + "logps/rejected": -1.3433506488800049, + "loss": 1.3295, + "nll_loss": 0.3323700428009033, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6632324332022108e-05, + "rewards/margins": 0.13431844115257263, + "rewards/rejected": -0.13433507084846497, + "step": 9801 + }, + { + "epoch": 6.7786998616874135, + "grad_norm": 10.57618522644043, + "learning_rate": 1.789611187951437e-05, + "log_odds_chosen": 11.234428405761719, + "log_odds_ratio": -2.9399394406937063e-05, + "logits/chosen": -0.23678916692733765, + "logits/rejected": -0.2845824360847473, + "logps/chosen": -0.00042127168853767216, + "logps/rejected": -2.484922170639038, + "loss": 0.6881, + "nll_loss": 0.17201298475265503, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.212717612972483e-05, + "rewards/margins": 0.24845010042190552, + "rewards/rejected": -0.24849221110343933, + "step": 9802 + }, + { + "epoch": 6.77939142461964, + "grad_norm": 4.66774845123291, + "learning_rate": 1.789226986322422e-05, + "log_odds_chosen": 10.529937744140625, + "log_odds_ratio": -0.00010805519559653476, + "logits/chosen": -0.5381174683570862, + "logits/rejected": -0.5654388666152954, + "logps/chosen": -0.0003065310011152178, + "logps/rejected": -2.255186080932617, + "loss": 0.6306, + "nll_loss": 0.15763989090919495, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.065310011152178e-05, + "rewards/margins": 0.22548796236515045, + "rewards/rejected": -0.2255186140537262, + "step": 9803 + }, + { + "epoch": 6.780082987551867, + "grad_norm": 7.76448917388916, + "learning_rate": 1.788842784693407e-05, + "log_odds_chosen": 10.768404960632324, + "log_odds_ratio": -3.6421581171453e-05, + "logits/chosen": -0.45603862404823303, + "logits/rejected": -0.5468306541442871, + "logps/chosen": -0.00010872337588807568, + "logps/rejected": -1.5940682888031006, + "loss": 0.4035, + "nll_loss": 0.10087916254997253, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0872337952605449e-05, + "rewards/margins": 0.1593959629535675, + "rewards/rejected": -0.159406840801239, + "step": 9804 + }, + { + "epoch": 6.780774550484094, + "grad_norm": 4.876262664794922, + "learning_rate": 1.7884585830643924e-05, + "log_odds_chosen": 11.440156936645508, + "log_odds_ratio": -4.563136462820694e-05, + "logits/chosen": -0.6698880195617676, + "logits/rejected": -0.6494791507720947, + "logps/chosen": -0.00046217741328291595, + "logps/rejected": -2.974748373031616, + "loss": 0.3709, + "nll_loss": 0.09273214638233185, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.62177449662704e-05, + "rewards/margins": 0.29742860794067383, + "rewards/rejected": -0.29747486114501953, + "step": 9805 + }, + { + "epoch": 6.781466113416321, + "grad_norm": 6.153312683105469, + "learning_rate": 1.7880743814353773e-05, + "log_odds_chosen": 10.653793334960938, + "log_odds_ratio": -7.533111056545749e-05, + "logits/chosen": -0.46784037351608276, + "logits/rejected": -0.522236704826355, + "logps/chosen": -0.00014894736523274332, + "logps/rejected": -1.5492587089538574, + "loss": 0.8613, + "nll_loss": 0.21532396972179413, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4894736523274332e-05, + "rewards/margins": 0.15491099655628204, + "rewards/rejected": -0.1549258828163147, + "step": 9806 + }, + { + "epoch": 6.782157676348548, + "grad_norm": 7.5202317237854, + "learning_rate": 1.7876901798063626e-05, + "log_odds_chosen": 10.069587707519531, + "log_odds_ratio": -9.036479605128989e-05, + "logits/chosen": -0.14121729135513306, + "logits/rejected": -0.23690003156661987, + "logps/chosen": -0.00048229345702566206, + "logps/rejected": -2.1249752044677734, + "loss": 0.4922, + "nll_loss": 0.12303020805120468, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.822934715775773e-05, + "rewards/margins": 0.21244929730892181, + "rewards/rejected": -0.2124975323677063, + "step": 9807 + }, + { + "epoch": 6.782849239280774, + "grad_norm": 5.495734214782715, + "learning_rate": 1.7873059781773478e-05, + "log_odds_chosen": 11.376768112182617, + "log_odds_ratio": -5.383559982874431e-05, + "logits/chosen": -0.3966137766838074, + "logits/rejected": -0.49125686287879944, + "logps/chosen": -0.00015656169853173196, + "logps/rejected": -1.8736109733581543, + "loss": 0.5997, + "nll_loss": 0.1499200463294983, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5656172763556242e-05, + "rewards/margins": 0.1873454451560974, + "rewards/rejected": -0.18736110627651215, + "step": 9808 + }, + { + "epoch": 6.783540802213001, + "grad_norm": 4.648586273193359, + "learning_rate": 1.7869217765483327e-05, + "log_odds_chosen": 10.439985275268555, + "log_odds_ratio": -0.00021521994494833052, + "logits/chosen": -0.5451334118843079, + "logits/rejected": -0.4855062961578369, + "logps/chosen": -0.0002363657986279577, + "logps/rejected": -1.9440194368362427, + "loss": 0.2804, + "nll_loss": 0.07008212804794312, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.363657949899789e-05, + "rewards/margins": 0.19437828660011292, + "rewards/rejected": -0.19440194964408875, + "step": 9809 + }, + { + "epoch": 6.784232365145228, + "grad_norm": 6.852433681488037, + "learning_rate": 1.7865375749193176e-05, + "log_odds_chosen": 10.628756523132324, + "log_odds_ratio": -5.7108372857328504e-05, + "logits/chosen": -0.18069210648536682, + "logits/rejected": -0.1497456133365631, + "logps/chosen": -0.0002983055601362139, + "logps/rejected": -2.0635130405426025, + "loss": 0.6769, + "nll_loss": 0.16920895874500275, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.983055674121715e-05, + "rewards/margins": 0.20632147789001465, + "rewards/rejected": -0.20635131001472473, + "step": 9810 + }, + { + "epoch": 6.784923928077455, + "grad_norm": 7.898940563201904, + "learning_rate": 1.786153373290303e-05, + "log_odds_chosen": 9.900899887084961, + "log_odds_ratio": -8.019372762646526e-05, + "logits/chosen": -0.499287873506546, + "logits/rejected": -0.6086374521255493, + "logps/chosen": -0.0004171186883468181, + "logps/rejected": -1.8263578414916992, + "loss": 0.8046, + "nll_loss": 0.20114830136299133, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.171186810708605e-05, + "rewards/margins": 0.18259406089782715, + "rewards/rejected": -0.18263578414916992, + "step": 9811 + }, + { + "epoch": 6.785615491009682, + "grad_norm": 14.485651016235352, + "learning_rate": 1.7857691716612878e-05, + "log_odds_chosen": 10.63548469543457, + "log_odds_ratio": -4.96716565976385e-05, + "logits/chosen": -0.5968855619430542, + "logits/rejected": -0.548457682132721, + "logps/chosen": -0.0004951007431373, + "logps/rejected": -2.177253246307373, + "loss": 0.5719, + "nll_loss": 0.1429726630449295, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9510075768921524e-05, + "rewards/margins": 0.2176758348941803, + "rewards/rejected": -0.21772533655166626, + "step": 9812 + }, + { + "epoch": 6.786307053941909, + "grad_norm": 7.115420818328857, + "learning_rate": 1.785384970032273e-05, + "log_odds_chosen": 11.811935424804688, + "log_odds_ratio": -0.00014342159556690603, + "logits/chosen": -0.2895018458366394, + "logits/rejected": -0.3131714463233948, + "logps/chosen": -0.00030310984584502876, + "logps/rejected": -3.217440128326416, + "loss": 0.7929, + "nll_loss": 0.19821369647979736, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0310988222481683e-05, + "rewards/margins": 0.3217136859893799, + "rewards/rejected": -0.3217439651489258, + "step": 9813 + }, + { + "epoch": 6.786998616874135, + "grad_norm": 8.736668586730957, + "learning_rate": 1.7850007684032583e-05, + "log_odds_chosen": 11.872312545776367, + "log_odds_ratio": -1.9381395759410225e-05, + "logits/chosen": -0.7531395554542542, + "logits/rejected": -0.8392912745475769, + "logps/chosen": -9.647270053392276e-05, + "logps/rejected": -2.450575351715088, + "loss": 0.5367, + "nll_loss": 0.13417330384254456, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.647269507695455e-06, + "rewards/margins": 0.2450478971004486, + "rewards/rejected": -0.24505755305290222, + "step": 9814 + }, + { + "epoch": 6.787690179806362, + "grad_norm": 5.51792049407959, + "learning_rate": 1.784616566774243e-05, + "log_odds_chosen": 9.964544296264648, + "log_odds_ratio": -8.551901555620134e-05, + "logits/chosen": -0.19018104672431946, + "logits/rejected": -0.35868245363235474, + "logps/chosen": -0.00032383183133788407, + "logps/rejected": -1.7374075651168823, + "loss": 0.607, + "nll_loss": 0.15173201262950897, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.238318458897993e-05, + "rewards/margins": 0.17370837926864624, + "rewards/rejected": -0.17374074459075928, + "step": 9815 + }, + { + "epoch": 6.788381742738589, + "grad_norm": 4.665560722351074, + "learning_rate": 1.7842323651452284e-05, + "log_odds_chosen": 9.851408004760742, + "log_odds_ratio": -0.0008542541763745248, + "logits/chosen": -0.43791818618774414, + "logits/rejected": -0.44907528162002563, + "logps/chosen": -0.0005597654380835593, + "logps/rejected": -1.7041947841644287, + "loss": 0.5862, + "nll_loss": 0.14647680521011353, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.597654671873897e-05, + "rewards/margins": 0.17036350071430206, + "rewards/rejected": -0.17041948437690735, + "step": 9816 + }, + { + "epoch": 6.789073305670816, + "grad_norm": 8.863218307495117, + "learning_rate": 1.7838481635162133e-05, + "log_odds_chosen": 10.806852340698242, + "log_odds_ratio": -7.586018909933046e-05, + "logits/chosen": -0.3525431454181671, + "logits/rejected": -0.44882509112358093, + "logps/chosen": -0.0006463914178311825, + "logps/rejected": -2.4669196605682373, + "loss": 0.5583, + "nll_loss": 0.1395639330148697, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.463914178311825e-05, + "rewards/margins": 0.2466273456811905, + "rewards/rejected": -0.2466919869184494, + "step": 9817 + }, + { + "epoch": 6.789764868603043, + "grad_norm": 4.653459072113037, + "learning_rate": 1.7834639618871986e-05, + "log_odds_chosen": 11.221488952636719, + "log_odds_ratio": -2.0016068447148427e-05, + "logits/chosen": -0.535835862159729, + "logits/rejected": -0.5027376413345337, + "logps/chosen": -0.0002073202922474593, + "logps/rejected": -2.5066559314727783, + "loss": 0.4301, + "nll_loss": 0.10751183331012726, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0732028133352287e-05, + "rewards/margins": 0.25064486265182495, + "rewards/rejected": -0.2506656050682068, + "step": 9818 + }, + { + "epoch": 6.79045643153527, + "grad_norm": 4.887798309326172, + "learning_rate": 1.7830797602581838e-05, + "log_odds_chosen": 12.314960479736328, + "log_odds_ratio": -7.485728565370664e-05, + "logits/chosen": -0.49125027656555176, + "logits/rejected": -0.5477063655853271, + "logps/chosen": -0.0001772197283571586, + "logps/rejected": -2.8946125507354736, + "loss": 0.5249, + "nll_loss": 0.1312086582183838, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7721975382301025e-05, + "rewards/margins": 0.2894435226917267, + "rewards/rejected": -0.289461225271225, + "step": 9819 + }, + { + "epoch": 6.791147994467496, + "grad_norm": 4.644054889678955, + "learning_rate": 1.7826955586291687e-05, + "log_odds_chosen": 10.332023620605469, + "log_odds_ratio": -0.00021751302119810134, + "logits/chosen": -0.3514784574508667, + "logits/rejected": -0.3961045742034912, + "logps/chosen": -0.0003058880683965981, + "logps/rejected": -2.000765562057495, + "loss": 0.6306, + "nll_loss": 0.15762130916118622, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0588809750042856e-05, + "rewards/margins": 0.20004597306251526, + "rewards/rejected": -0.20007656514644623, + "step": 9820 + }, + { + "epoch": 6.791839557399723, + "grad_norm": 5.434691429138184, + "learning_rate": 1.7823113570001536e-05, + "log_odds_chosen": 10.024774551391602, + "log_odds_ratio": -0.00034345826134085655, + "logits/chosen": -0.4995710551738739, + "logits/rejected": -0.48516565561294556, + "logps/chosen": -0.000309604627545923, + "logps/rejected": -1.9717507362365723, + "loss": 1.0495, + "nll_loss": 0.26234033703804016, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0960465664975345e-05, + "rewards/margins": 0.19714412093162537, + "rewards/rejected": -0.19717508554458618, + "step": 9821 + }, + { + "epoch": 6.79253112033195, + "grad_norm": 6.43671178817749, + "learning_rate": 1.781927155371139e-05, + "log_odds_chosen": 11.061049461364746, + "log_odds_ratio": -5.2428375056479126e-05, + "logits/chosen": -0.0359373539686203, + "logits/rejected": -0.08879576623439789, + "logps/chosen": -0.0005665082135237753, + "logps/rejected": -2.881434917449951, + "loss": 0.6287, + "nll_loss": 0.15717333555221558, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.665082062478177e-05, + "rewards/margins": 0.288086861371994, + "rewards/rejected": -0.288143515586853, + "step": 9822 + }, + { + "epoch": 6.793222683264177, + "grad_norm": 4.820457935333252, + "learning_rate": 1.7815429537421238e-05, + "log_odds_chosen": 12.112467765808105, + "log_odds_ratio": -1.8505757907405496e-05, + "logits/chosen": -0.6378794312477112, + "logits/rejected": -0.6656926870346069, + "logps/chosen": -0.00015574654389638454, + "logps/rejected": -2.9929399490356445, + "loss": 0.5178, + "nll_loss": 0.12945497035980225, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5574656572425738e-05, + "rewards/margins": 0.2992784082889557, + "rewards/rejected": -0.29929399490356445, + "step": 9823 + }, + { + "epoch": 6.793914246196404, + "grad_norm": 7.671065330505371, + "learning_rate": 1.781158752113109e-05, + "log_odds_chosen": 11.70947551727295, + "log_odds_ratio": -1.660875932429917e-05, + "logits/chosen": -0.1562579721212387, + "logits/rejected": -0.2405303716659546, + "logps/chosen": -8.214052650146186e-05, + "logps/rejected": -2.337221384048462, + "loss": 1.1624, + "nll_loss": 0.29060617089271545, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.214052286348306e-06, + "rewards/margins": 0.23371393978595734, + "rewards/rejected": -0.23372213542461395, + "step": 9824 + }, + { + "epoch": 6.7946058091286305, + "grad_norm": 5.846847057342529, + "learning_rate": 1.7807745504840942e-05, + "log_odds_chosen": 10.920866966247559, + "log_odds_ratio": -0.00011350302520440891, + "logits/chosen": -0.21679610013961792, + "logits/rejected": -0.20923295617103577, + "logps/chosen": -0.00021579388703685254, + "logps/rejected": -1.7754294872283936, + "loss": 0.8206, + "nll_loss": 0.20514589548110962, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1579389795078896e-05, + "rewards/margins": 0.17752137780189514, + "rewards/rejected": -0.17754295468330383, + "step": 9825 + }, + { + "epoch": 6.795297372060857, + "grad_norm": 9.214680671691895, + "learning_rate": 1.780390348855079e-05, + "log_odds_chosen": 10.451539993286133, + "log_odds_ratio": -0.0005183625034987926, + "logits/chosen": -0.4053252041339874, + "logits/rejected": -0.2930617928504944, + "logps/chosen": -0.0009055271511897445, + "logps/rejected": -2.129746437072754, + "loss": 0.4832, + "nll_loss": 0.12075144052505493, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.055271948454902e-05, + "rewards/margins": 0.2128840684890747, + "rewards/rejected": -0.2129746377468109, + "step": 9826 + }, + { + "epoch": 6.795988934993084, + "grad_norm": 7.412352085113525, + "learning_rate": 1.7800061472260644e-05, + "log_odds_chosen": 11.123895645141602, + "log_odds_ratio": -3.581685814424418e-05, + "logits/chosen": 0.04853195697069168, + "logits/rejected": -0.1060643196105957, + "logps/chosen": -0.0003896902489941567, + "logps/rejected": -1.9042718410491943, + "loss": 0.5712, + "nll_loss": 0.14278869330883026, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.896902489941567e-05, + "rewards/margins": 0.19038820266723633, + "rewards/rejected": -0.19042718410491943, + "step": 9827 + }, + { + "epoch": 6.796680497925311, + "grad_norm": 6.623965263366699, + "learning_rate": 1.7796219455970496e-05, + "log_odds_chosen": 10.686233520507812, + "log_odds_ratio": -9.329444583272561e-05, + "logits/chosen": -0.33430057764053345, + "logits/rejected": -0.3654334247112274, + "logps/chosen": -0.0001892134314402938, + "logps/rejected": -1.8002233505249023, + "loss": 0.579, + "nll_loss": 0.14473643898963928, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8921346054412425e-05, + "rewards/margins": 0.18000340461730957, + "rewards/rejected": -0.18002232909202576, + "step": 9828 + }, + { + "epoch": 6.797372060857538, + "grad_norm": 5.111135482788086, + "learning_rate": 1.7792377439680345e-05, + "log_odds_chosen": 11.351503372192383, + "log_odds_ratio": -2.5285688025178388e-05, + "logits/chosen": -0.5366664528846741, + "logits/rejected": -0.6100755929946899, + "logps/chosen": -0.0002965263556689024, + "logps/rejected": -2.0993173122406006, + "loss": 0.6221, + "nll_loss": 0.15552878379821777, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9652637749677524e-05, + "rewards/margins": 0.2099020779132843, + "rewards/rejected": -0.20993171632289886, + "step": 9829 + }, + { + "epoch": 6.798063623789765, + "grad_norm": 8.023554801940918, + "learning_rate": 1.7788535423390194e-05, + "log_odds_chosen": 11.423932075500488, + "log_odds_ratio": -7.935289613669738e-05, + "logits/chosen": -0.13450314104557037, + "logits/rejected": -0.3230983018875122, + "logps/chosen": -0.0002636217977851629, + "logps/rejected": -2.6877059936523438, + "loss": 0.4252, + "nll_loss": 0.10628889501094818, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.636217868712265e-05, + "rewards/margins": 0.26874423027038574, + "rewards/rejected": -0.26877060532569885, + "step": 9830 + }, + { + "epoch": 6.7987551867219915, + "grad_norm": 7.384605884552002, + "learning_rate": 1.7784693407100047e-05, + "log_odds_chosen": 10.56057357788086, + "log_odds_ratio": -0.00026500289095565677, + "logits/chosen": -0.6557856202125549, + "logits/rejected": -0.6926838159561157, + "logps/chosen": -0.0006864126771688461, + "logps/rejected": -2.5699915885925293, + "loss": 0.509, + "nll_loss": 0.12722210586071014, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.864126771688461e-05, + "rewards/margins": 0.25693053007125854, + "rewards/rejected": -0.2569991648197174, + "step": 9831 + }, + { + "epoch": 6.799446749654218, + "grad_norm": 4.883238792419434, + "learning_rate": 1.7780851390809896e-05, + "log_odds_chosen": 11.451370239257812, + "log_odds_ratio": -2.6813057047547773e-05, + "logits/chosen": -0.5282101035118103, + "logits/rejected": -0.6761583089828491, + "logps/chosen": -0.0001240254787262529, + "logps/rejected": -2.217674493789673, + "loss": 0.5614, + "nll_loss": 0.14034461975097656, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2402548236423172e-05, + "rewards/margins": 0.22175507247447968, + "rewards/rejected": -0.22176745533943176, + "step": 9832 + }, + { + "epoch": 6.800138312586445, + "grad_norm": 8.146604537963867, + "learning_rate": 1.777700937451975e-05, + "log_odds_chosen": 9.993793487548828, + "log_odds_ratio": -0.0001493952004238963, + "logits/chosen": -0.7147838473320007, + "logits/rejected": -0.7217827439308167, + "logps/chosen": -0.00039108877535909414, + "logps/rejected": -1.7512335777282715, + "loss": 0.9134, + "nll_loss": 0.22834616899490356, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.910887608071789e-05, + "rewards/margins": 0.17508423328399658, + "rewards/rejected": -0.17512336373329163, + "step": 9833 + }, + { + "epoch": 6.800829875518672, + "grad_norm": 7.877200603485107, + "learning_rate": 1.77731673582296e-05, + "log_odds_chosen": 10.837315559387207, + "log_odds_ratio": -4.8736881581135094e-05, + "logits/chosen": -0.28594496846199036, + "logits/rejected": -0.2990525960922241, + "logps/chosen": -0.0001039270282490179, + "logps/rejected": -1.6275827884674072, + "loss": 0.3505, + "nll_loss": 0.08761301636695862, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.039270318869967e-05, + "rewards/margins": 0.16274788975715637, + "rewards/rejected": -0.16275827586650848, + "step": 9834 + }, + { + "epoch": 6.801521438450899, + "grad_norm": 8.999431610107422, + "learning_rate": 1.776932534193945e-05, + "log_odds_chosen": 11.797481536865234, + "log_odds_ratio": -9.626210157875903e-06, + "logits/chosen": -0.5511617064476013, + "logits/rejected": -0.5186449885368347, + "logps/chosen": -0.0001594604691490531, + "logps/rejected": -2.653761863708496, + "loss": 0.6771, + "nll_loss": 0.16927213966846466, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5946045095915906e-05, + "rewards/margins": 0.2653602361679077, + "rewards/rejected": -0.26537618041038513, + "step": 9835 + }, + { + "epoch": 6.802213001383126, + "grad_norm": 6.913736820220947, + "learning_rate": 1.7765483325649302e-05, + "log_odds_chosen": 10.623117446899414, + "log_odds_ratio": -0.00021301039669197053, + "logits/chosen": -0.3210628926753998, + "logits/rejected": -0.3580225110054016, + "logps/chosen": -0.00019910847186110914, + "logps/rejected": -1.8200393915176392, + "loss": 0.6378, + "nll_loss": 0.15943297743797302, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9910847186110914e-05, + "rewards/margins": 0.1819840371608734, + "rewards/rejected": -0.1820039451122284, + "step": 9836 + }, + { + "epoch": 6.8029045643153525, + "grad_norm": 11.309036254882812, + "learning_rate": 1.7761641309359155e-05, + "log_odds_chosen": 10.597155570983887, + "log_odds_ratio": -4.918476042803377e-05, + "logits/chosen": -0.39929133653640747, + "logits/rejected": -0.5000516176223755, + "logps/chosen": -0.00024138286244124174, + "logps/rejected": -1.8075019121170044, + "loss": 0.5687, + "nll_loss": 0.14216798543930054, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4138287699315697e-05, + "rewards/margins": 0.1807260662317276, + "rewards/rejected": -0.18075020611286163, + "step": 9837 + }, + { + "epoch": 6.803596127247579, + "grad_norm": 6.357234954833984, + "learning_rate": 1.7757799293069004e-05, + "log_odds_chosen": 10.066715240478516, + "log_odds_ratio": -0.0007350252708420157, + "logits/chosen": -0.39687326550483704, + "logits/rejected": -0.40346938371658325, + "logps/chosen": -0.0013349910732358694, + "logps/rejected": -1.9573590755462646, + "loss": 0.4703, + "nll_loss": 0.11749234050512314, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013349908113013953, + "rewards/margins": 0.1956024020910263, + "rewards/rejected": -0.195735901594162, + "step": 9838 + }, + { + "epoch": 6.804287690179806, + "grad_norm": 9.22315788269043, + "learning_rate": 1.7753957276778853e-05, + "log_odds_chosen": 10.840571403503418, + "log_odds_ratio": -5.220840102992952e-05, + "logits/chosen": -0.40534543991088867, + "logits/rejected": -0.466486394405365, + "logps/chosen": -0.00023522484116256237, + "logps/rejected": -1.9446091651916504, + "loss": 0.3722, + "nll_loss": 0.0930478423833847, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.352248520764988e-05, + "rewards/margins": 0.19443738460540771, + "rewards/rejected": -0.1944609135389328, + "step": 9839 + }, + { + "epoch": 6.804979253112033, + "grad_norm": 5.794712066650391, + "learning_rate": 1.7750115260488705e-05, + "log_odds_chosen": 11.39574146270752, + "log_odds_ratio": -2.222778130089864e-05, + "logits/chosen": -0.6708822250366211, + "logits/rejected": -0.7222901582717896, + "logps/chosen": -0.00011192337842658162, + "logps/rejected": -1.9927271604537964, + "loss": 0.4359, + "nll_loss": 0.10897647589445114, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1192338206456043e-05, + "rewards/margins": 0.19926151633262634, + "rewards/rejected": -0.19927272200584412, + "step": 9840 + }, + { + "epoch": 6.80567081604426, + "grad_norm": 8.983512878417969, + "learning_rate": 1.7746273244198554e-05, + "log_odds_chosen": 10.862753868103027, + "log_odds_ratio": -5.702022826881148e-05, + "logits/chosen": -0.12994690239429474, + "logits/rejected": -0.24351057410240173, + "logps/chosen": -0.00021701918740291148, + "logps/rejected": -1.955878496170044, + "loss": 0.4519, + "nll_loss": 0.11296963691711426, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1701916921301745e-05, + "rewards/margins": 0.19556616246700287, + "rewards/rejected": -0.1955878734588623, + "step": 9841 + }, + { + "epoch": 6.806362378976487, + "grad_norm": 7.3522114753723145, + "learning_rate": 1.7742431227908407e-05, + "log_odds_chosen": 12.072206497192383, + "log_odds_ratio": -1.040917504724348e-05, + "logits/chosen": -0.44298988580703735, + "logits/rejected": -0.4893496036529541, + "logps/chosen": -0.00012864437303505838, + "logps/rejected": -2.958308696746826, + "loss": 0.6231, + "nll_loss": 0.15577593445777893, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2864436939707957e-05, + "rewards/margins": 0.295818030834198, + "rewards/rejected": -0.2958308756351471, + "step": 9842 + }, + { + "epoch": 6.8070539419087135, + "grad_norm": 4.939804553985596, + "learning_rate": 1.773858921161826e-05, + "log_odds_chosen": 10.557042121887207, + "log_odds_ratio": -0.00011877430370077491, + "logits/chosen": -0.2231357991695404, + "logits/rejected": -0.3202582895755768, + "logps/chosen": -0.0001812062255339697, + "logps/rejected": -1.729665994644165, + "loss": 0.8258, + "nll_loss": 0.20642876625061035, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.812062328099273e-05, + "rewards/margins": 0.17294849455356598, + "rewards/rejected": -0.1729666143655777, + "step": 9843 + }, + { + "epoch": 6.80774550484094, + "grad_norm": 4.72721529006958, + "learning_rate": 1.773474719532811e-05, + "log_odds_chosen": 10.538349151611328, + "log_odds_ratio": -7.440797344315797e-05, + "logits/chosen": -0.3844439387321472, + "logits/rejected": -0.457133024930954, + "logps/chosen": -0.00031418955768458545, + "logps/rejected": -1.9789464473724365, + "loss": 0.3445, + "nll_loss": 0.08611032366752625, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.141895649605431e-05, + "rewards/margins": 0.19786323606967926, + "rewards/rejected": -0.19789466261863708, + "step": 9844 + }, + { + "epoch": 6.808437067773167, + "grad_norm": 20.236831665039062, + "learning_rate": 1.773090517903796e-05, + "log_odds_chosen": 11.023157119750977, + "log_odds_ratio": -2.067050627374556e-05, + "logits/chosen": -0.7416025400161743, + "logits/rejected": -0.808810830116272, + "logps/chosen": -0.00022165890550240874, + "logps/rejected": -2.2892677783966064, + "loss": 0.7423, + "nll_loss": 0.18556413054466248, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2165892005432397e-05, + "rewards/margins": 0.2289046347141266, + "rewards/rejected": -0.22892676293849945, + "step": 9845 + }, + { + "epoch": 6.809128630705394, + "grad_norm": 6.052879810333252, + "learning_rate": 1.7727063162747813e-05, + "log_odds_chosen": 11.059163093566895, + "log_odds_ratio": -0.0001862043864093721, + "logits/chosen": -0.7816653251647949, + "logits/rejected": -0.7609491348266602, + "logps/chosen": -0.00040568297845311463, + "logps/rejected": -2.6221461296081543, + "loss": 0.4911, + "nll_loss": 0.12275606393814087, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.056830221088603e-05, + "rewards/margins": 0.2621740400791168, + "rewards/rejected": -0.2622146010398865, + "step": 9846 + }, + { + "epoch": 6.809820193637621, + "grad_norm": 11.615273475646973, + "learning_rate": 1.7723221146457662e-05, + "log_odds_chosen": 11.36082649230957, + "log_odds_ratio": -2.826635136443656e-05, + "logits/chosen": -0.3724120855331421, + "logits/rejected": -0.4282306432723999, + "logps/chosen": -0.0002107978070853278, + "logps/rejected": -2.5040338039398193, + "loss": 0.618, + "nll_loss": 0.1544925719499588, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.107978070853278e-05, + "rewards/margins": 0.25038230419158936, + "rewards/rejected": -0.25040340423583984, + "step": 9847 + }, + { + "epoch": 6.810511756569848, + "grad_norm": 5.582864284515381, + "learning_rate": 1.771937913016751e-05, + "log_odds_chosen": 10.69006061553955, + "log_odds_ratio": -0.00013318503624759614, + "logits/chosen": -0.09825585782527924, + "logits/rejected": -0.11967509239912033, + "logps/chosen": -0.0003724147391039878, + "logps/rejected": -2.130486488342285, + "loss": 0.442, + "nll_loss": 0.11048276722431183, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7241476093186066e-05, + "rewards/margins": 0.21301141381263733, + "rewards/rejected": -0.21304863691329956, + "step": 9848 + }, + { + "epoch": 6.8112033195020745, + "grad_norm": 9.837779998779297, + "learning_rate": 1.7715537113877364e-05, + "log_odds_chosen": 10.194995880126953, + "log_odds_ratio": -0.000401560275349766, + "logits/chosen": -0.651020884513855, + "logits/rejected": -0.6786574721336365, + "logps/chosen": -0.0005286269588395953, + "logps/rejected": -1.7069580554962158, + "loss": 0.5149, + "nll_loss": 0.12868283689022064, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.286269879434258e-05, + "rewards/margins": 0.1706429421901703, + "rewards/rejected": -0.17069579660892487, + "step": 9849 + }, + { + "epoch": 6.811894882434301, + "grad_norm": 6.8782639503479, + "learning_rate": 1.7711695097587213e-05, + "log_odds_chosen": 10.906352996826172, + "log_odds_ratio": -3.27380039379932e-05, + "logits/chosen": -0.1589619219303131, + "logits/rejected": -0.1439802497625351, + "logps/chosen": -0.0005701867048628628, + "logps/rejected": -2.554690361022949, + "loss": 0.8303, + "nll_loss": 0.20757944881916046, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7018671213882044e-05, + "rewards/margins": 0.2554119825363159, + "rewards/rejected": -0.25546902418136597, + "step": 9850 + }, + { + "epoch": 6.812586445366528, + "grad_norm": 4.783719539642334, + "learning_rate": 1.7707853081297065e-05, + "log_odds_chosen": 10.361427307128906, + "log_odds_ratio": -8.810401050141081e-05, + "logits/chosen": -0.48291251063346863, + "logits/rejected": -0.6110714077949524, + "logps/chosen": -0.00023719553428236395, + "logps/rejected": -1.6287189722061157, + "loss": 0.5322, + "nll_loss": 0.1330510973930359, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3719554519630037e-05, + "rewards/margins": 0.16284817457199097, + "rewards/rejected": -0.16287189722061157, + "step": 9851 + }, + { + "epoch": 6.813278008298755, + "grad_norm": 4.849263668060303, + "learning_rate": 1.7704011065006918e-05, + "log_odds_chosen": 10.773094177246094, + "log_odds_ratio": -4.350581002654508e-05, + "logits/chosen": -0.041710179299116135, + "logits/rejected": -0.09449899941682816, + "logps/chosen": -0.0002241919719381258, + "logps/rejected": -2.37168025970459, + "loss": 0.5471, + "nll_loss": 0.1367701292037964, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.241919719381258e-05, + "rewards/margins": 0.2371455878019333, + "rewards/rejected": -0.23716801404953003, + "step": 9852 + }, + { + "epoch": 6.813969571230982, + "grad_norm": 6.0411505699157715, + "learning_rate": 1.7700169048716767e-05, + "log_odds_chosen": 10.641590118408203, + "log_odds_ratio": -3.7356308894231915e-05, + "logits/chosen": -0.5324625372886658, + "logits/rejected": -0.6158381700515747, + "logps/chosen": -0.00016988397692330182, + "logps/rejected": -1.6251307725906372, + "loss": 0.402, + "nll_loss": 0.10049900412559509, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.698839696473442e-05, + "rewards/margins": 0.16249608993530273, + "rewards/rejected": -0.16251307725906372, + "step": 9853 + }, + { + "epoch": 6.814661134163209, + "grad_norm": 6.3942646980285645, + "learning_rate": 1.769632703242662e-05, + "log_odds_chosen": 10.55441665649414, + "log_odds_ratio": -5.743156361859292e-05, + "logits/chosen": -0.16442380845546722, + "logits/rejected": -0.19287921488285065, + "logps/chosen": -0.00024357457004953176, + "logps/rejected": -2.0037691593170166, + "loss": 0.5275, + "nll_loss": 0.1318666785955429, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.435745955153834e-05, + "rewards/margins": 0.20035257935523987, + "rewards/rejected": -0.20037691295146942, + "step": 9854 + }, + { + "epoch": 6.8153526970954355, + "grad_norm": 8.117294311523438, + "learning_rate": 1.7692485016136472e-05, + "log_odds_chosen": 10.78452205657959, + "log_odds_ratio": -4.109352812520228e-05, + "logits/chosen": -0.45345538854599, + "logits/rejected": -0.5758625268936157, + "logps/chosen": -0.00014015237684361637, + "logps/rejected": -1.9844865798950195, + "loss": 0.5024, + "nll_loss": 0.1255882978439331, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4015236956765875e-05, + "rewards/margins": 0.19843465089797974, + "rewards/rejected": -0.19844867289066315, + "step": 9855 + }, + { + "epoch": 6.816044260027662, + "grad_norm": 8.956670761108398, + "learning_rate": 1.768864299984632e-05, + "log_odds_chosen": 10.297235488891602, + "log_odds_ratio": -0.0001858493487816304, + "logits/chosen": -0.4566280245780945, + "logits/rejected": -0.49013885855674744, + "logps/chosen": -0.0009861596627160907, + "logps/rejected": -1.7377392053604126, + "loss": 0.754, + "nll_loss": 0.18847453594207764, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.861597209237516e-05, + "rewards/margins": 0.1736753135919571, + "rewards/rejected": -0.17377394437789917, + "step": 9856 + }, + { + "epoch": 6.816735822959889, + "grad_norm": 3.7923271656036377, + "learning_rate": 1.768480098355617e-05, + "log_odds_chosen": 11.093573570251465, + "log_odds_ratio": -8.753328438615426e-05, + "logits/chosen": -0.3417430818080902, + "logits/rejected": -0.33952879905700684, + "logps/chosen": -0.00043928029481321573, + "logps/rejected": -2.047579765319824, + "loss": 0.2997, + "nll_loss": 0.0749170333147049, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.392802657093853e-05, + "rewards/margins": 0.20471405982971191, + "rewards/rejected": -0.20475798845291138, + "step": 9857 + }, + { + "epoch": 6.817427385892116, + "grad_norm": 4.085455894470215, + "learning_rate": 1.7680958967266022e-05, + "log_odds_chosen": 11.034650802612305, + "log_odds_ratio": -5.613011308014393e-05, + "logits/chosen": -0.36551499366760254, + "logits/rejected": -0.37129878997802734, + "logps/chosen": -0.00019917613826692104, + "logps/rejected": -2.1950621604919434, + "loss": 0.4682, + "nll_loss": 0.11703965067863464, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9917613826692104e-05, + "rewards/margins": 0.2194863259792328, + "rewards/rejected": -0.21950623393058777, + "step": 9858 + }, + { + "epoch": 6.818118948824343, + "grad_norm": 4.8493218421936035, + "learning_rate": 1.767711695097587e-05, + "log_odds_chosen": 11.219084739685059, + "log_odds_ratio": -2.1206218661973253e-05, + "logits/chosen": -0.6236893534660339, + "logits/rejected": -0.6588972806930542, + "logps/chosen": -0.0002190757222706452, + "logps/rejected": -2.3729987144470215, + "loss": 0.4707, + "nll_loss": 0.11766964197158813, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.190757186326664e-05, + "rewards/margins": 0.23727793991565704, + "rewards/rejected": -0.2372998595237732, + "step": 9859 + }, + { + "epoch": 6.81881051175657, + "grad_norm": 5.451807975769043, + "learning_rate": 1.7673274934685724e-05, + "log_odds_chosen": 10.989908218383789, + "log_odds_ratio": -2.3420301658916287e-05, + "logits/chosen": 0.03291507437825203, + "logits/rejected": -0.09146300703287125, + "logps/chosen": -0.00030668292311020195, + "logps/rejected": -2.314227342605591, + "loss": 0.7565, + "nll_loss": 0.18912914395332336, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0668292311020195e-05, + "rewards/margins": 0.2313920557498932, + "rewards/rejected": -0.23142272233963013, + "step": 9860 + }, + { + "epoch": 6.819502074688796, + "grad_norm": 9.567743301391602, + "learning_rate": 1.7669432918395576e-05, + "log_odds_chosen": 12.856329917907715, + "log_odds_ratio": -5.586166480497923e-06, + "logits/chosen": -0.06611060351133347, + "logits/rejected": -0.3514470160007477, + "logps/chosen": -8.464483835268766e-05, + "logps/rejected": -3.357191324234009, + "loss": 0.7163, + "nll_loss": 0.1790757179260254, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.464484380965587e-06, + "rewards/margins": 0.33571070432662964, + "rewards/rejected": -0.33571916818618774, + "step": 9861 + }, + { + "epoch": 6.820193637621023, + "grad_norm": 6.616390228271484, + "learning_rate": 1.7665590902105425e-05, + "log_odds_chosen": 11.775555610656738, + "log_odds_ratio": -2.4208513423218392e-05, + "logits/chosen": -0.362802118062973, + "logits/rejected": -0.44630053639411926, + "logps/chosen": -0.00015943124890327454, + "logps/rejected": -2.8059206008911133, + "loss": 0.4418, + "nll_loss": 0.1104571521282196, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5943125617923215e-05, + "rewards/margins": 0.2805761396884918, + "rewards/rejected": -0.28059208393096924, + "step": 9862 + }, + { + "epoch": 6.82088520055325, + "grad_norm": 5.2259345054626465, + "learning_rate": 1.7661748885815278e-05, + "log_odds_chosen": 10.420143127441406, + "log_odds_ratio": -0.00035389253753237426, + "logits/chosen": -0.21969397366046906, + "logits/rejected": -0.3286452889442444, + "logps/chosen": -0.00023069702729117125, + "logps/rejected": -2.0163958072662354, + "loss": 0.6497, + "nll_loss": 0.1623815894126892, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3069704184308648e-05, + "rewards/margins": 0.20161652565002441, + "rewards/rejected": -0.2016395926475525, + "step": 9863 + }, + { + "epoch": 6.821576763485477, + "grad_norm": 6.109011173248291, + "learning_rate": 1.765790686952513e-05, + "log_odds_chosen": 10.854312896728516, + "log_odds_ratio": -6.197398761287332e-05, + "logits/chosen": -0.7911338806152344, + "logits/rejected": -0.7502856254577637, + "logps/chosen": -0.00017530944023746997, + "logps/rejected": -1.947382926940918, + "loss": 0.3902, + "nll_loss": 0.09753170609474182, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7530943296151236e-05, + "rewards/margins": 0.1947207748889923, + "rewards/rejected": -0.19473831355571747, + "step": 9864 + }, + { + "epoch": 6.822268326417704, + "grad_norm": 7.206421852111816, + "learning_rate": 1.765406485323498e-05, + "log_odds_chosen": 11.161964416503906, + "log_odds_ratio": -4.741992961498909e-05, + "logits/chosen": -0.5804327726364136, + "logits/rejected": -0.677083432674408, + "logps/chosen": -0.0005471400218084455, + "logps/rejected": -2.5652215480804443, + "loss": 0.5785, + "nll_loss": 0.14463266730308533, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.471400072565302e-05, + "rewards/margins": 0.25646743178367615, + "rewards/rejected": -0.25652214884757996, + "step": 9865 + }, + { + "epoch": 6.822959889349931, + "grad_norm": 5.182133197784424, + "learning_rate": 1.7650222836944828e-05, + "log_odds_chosen": 10.873984336853027, + "log_odds_ratio": -4.0695875213714316e-05, + "logits/chosen": -0.9530771374702454, + "logits/rejected": -0.6770225763320923, + "logps/chosen": -0.00030901507125236094, + "logps/rejected": -2.1355810165405273, + "loss": 0.4847, + "nll_loss": 0.1211652159690857, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.090150858042762e-05, + "rewards/margins": 0.21352717280387878, + "rewards/rejected": -0.21355809271335602, + "step": 9866 + }, + { + "epoch": 6.823651452282157, + "grad_norm": 6.041674613952637, + "learning_rate": 1.764638082065468e-05, + "log_odds_chosen": 10.147492408752441, + "log_odds_ratio": -0.00024508449132554233, + "logits/chosen": -0.27030548453330994, + "logits/rejected": -0.3851109743118286, + "logps/chosen": -0.0005953733925707638, + "logps/rejected": -2.3240535259246826, + "loss": 0.5796, + "nll_loss": 0.14487367868423462, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.953734216745943e-05, + "rewards/margins": 0.2323458194732666, + "rewards/rejected": -0.2324053794145584, + "step": 9867 + }, + { + "epoch": 6.824343015214384, + "grad_norm": 6.266101837158203, + "learning_rate": 1.764253880436453e-05, + "log_odds_chosen": 11.187174797058105, + "log_odds_ratio": -3.098931483691558e-05, + "logits/chosen": -0.15763667225837708, + "logits/rejected": -0.20958489179611206, + "logps/chosen": -8.974019146990031e-05, + "logps/rejected": -1.9341108798980713, + "loss": 0.4785, + "nll_loss": 0.11961343139410019, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.97401878319215e-06, + "rewards/margins": 0.19340214133262634, + "rewards/rejected": -0.19341108202934265, + "step": 9868 + }, + { + "epoch": 6.825034578146611, + "grad_norm": 4.985950946807861, + "learning_rate": 1.7638696788074382e-05, + "log_odds_chosen": 10.878445625305176, + "log_odds_ratio": -4.755319969262928e-05, + "logits/chosen": -0.18200913071632385, + "logits/rejected": -0.36122196912765503, + "logps/chosen": -0.0004627583548426628, + "logps/rejected": -2.711005210876465, + "loss": 1.1427, + "nll_loss": 0.28566545248031616, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.627583621186204e-05, + "rewards/margins": 0.2710542678833008, + "rewards/rejected": -0.2711005210876465, + "step": 9869 + }, + { + "epoch": 6.825726141078838, + "grad_norm": 5.5031418800354, + "learning_rate": 1.7634854771784235e-05, + "log_odds_chosen": 11.222797393798828, + "log_odds_ratio": -2.3735010472591966e-05, + "logits/chosen": -0.200686514377594, + "logits/rejected": -0.21383801102638245, + "logps/chosen": -0.00019569243886508048, + "logps/rejected": -2.272225856781006, + "loss": 0.5644, + "nll_loss": 0.1411091387271881, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9569246433093213e-05, + "rewards/margins": 0.22720301151275635, + "rewards/rejected": -0.22722259163856506, + "step": 9870 + }, + { + "epoch": 6.826417704011065, + "grad_norm": 15.147745132446289, + "learning_rate": 1.7631012755494084e-05, + "log_odds_chosen": 11.56512451171875, + "log_odds_ratio": -2.786301774904132e-05, + "logits/chosen": -0.873394250869751, + "logits/rejected": -0.8722903728485107, + "logps/chosen": -0.00020476612553466111, + "logps/rejected": -2.6685361862182617, + "loss": 0.7514, + "nll_loss": 0.18784187734127045, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0476612917263992e-05, + "rewards/margins": 0.2668331265449524, + "rewards/rejected": -0.26685360074043274, + "step": 9871 + }, + { + "epoch": 6.827109266943292, + "grad_norm": 15.383719444274902, + "learning_rate": 1.7627170739203936e-05, + "log_odds_chosen": 10.781224250793457, + "log_odds_ratio": -0.00011232474935241044, + "logits/chosen": -0.5298612117767334, + "logits/rejected": -0.44690805673599243, + "logps/chosen": -0.0005620787269435823, + "logps/rejected": -2.7007107734680176, + "loss": 0.8028, + "nll_loss": 0.20069590210914612, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.620787123916671e-05, + "rewards/margins": 0.2700148820877075, + "rewards/rejected": -0.2700711190700531, + "step": 9872 + }, + { + "epoch": 6.827800829875518, + "grad_norm": 7.158430576324463, + "learning_rate": 1.762332872291379e-05, + "log_odds_chosen": 9.014758110046387, + "log_odds_ratio": -0.0035419976338744164, + "logits/chosen": -0.386219322681427, + "logits/rejected": -0.1570458859205246, + "logps/chosen": -0.0029637387488037348, + "logps/rejected": -1.7540522813796997, + "loss": 0.7435, + "nll_loss": 0.1855314075946808, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00029637388070113957, + "rewards/margins": 0.17510885000228882, + "rewards/rejected": -0.17540523409843445, + "step": 9873 + }, + { + "epoch": 6.828492392807745, + "grad_norm": 5.015327453613281, + "learning_rate": 1.7619486706623638e-05, + "log_odds_chosen": 10.229984283447266, + "log_odds_ratio": -0.00017448162543587387, + "logits/chosen": -0.364147424697876, + "logits/rejected": -0.4106100797653198, + "logps/chosen": -0.0010063423542305827, + "logps/rejected": -1.9840210676193237, + "loss": 0.5084, + "nll_loss": 0.12709316611289978, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010063423542305827, + "rewards/margins": 0.19830146431922913, + "rewards/rejected": -0.19840210676193237, + "step": 9874 + }, + { + "epoch": 6.829183955739972, + "grad_norm": 8.063447952270508, + "learning_rate": 1.7615644690333487e-05, + "log_odds_chosen": 9.436758995056152, + "log_odds_ratio": -0.002550853881984949, + "logits/chosen": -0.4666481912136078, + "logits/rejected": -0.5314819812774658, + "logps/chosen": -0.004965066909790039, + "logps/rejected": -2.0318586826324463, + "loss": 0.528, + "nll_loss": 0.13174238801002502, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004965066909790039, + "rewards/margins": 0.20268937945365906, + "rewards/rejected": -0.20318587124347687, + "step": 9875 + }, + { + "epoch": 6.829875518672199, + "grad_norm": 7.094956398010254, + "learning_rate": 1.761180267404334e-05, + "log_odds_chosen": 10.774164199829102, + "log_odds_ratio": -4.405742947710678e-05, + "logits/chosen": -0.8490189909934998, + "logits/rejected": -0.9273943901062012, + "logps/chosen": -0.00021111921523697674, + "logps/rejected": -1.9229071140289307, + "loss": 0.4135, + "nll_loss": 0.10335968434810638, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1111922251293436e-05, + "rewards/margins": 0.19226960837841034, + "rewards/rejected": -0.19229072332382202, + "step": 9876 + }, + { + "epoch": 6.830567081604426, + "grad_norm": 3.5385513305664062, + "learning_rate": 1.7607960657753188e-05, + "log_odds_chosen": 11.042720794677734, + "log_odds_ratio": -7.377246947726235e-05, + "logits/chosen": -0.4456542730331421, + "logits/rejected": -0.48598921298980713, + "logps/chosen": -0.0011691419640555978, + "logps/rejected": -2.453537702560425, + "loss": 0.3032, + "nll_loss": 0.07579068839550018, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011691421241266653, + "rewards/margins": 0.24523687362670898, + "rewards/rejected": -0.2453537881374359, + "step": 9877 + }, + { + "epoch": 6.8312586445366525, + "grad_norm": 6.717398166656494, + "learning_rate": 1.760411864146304e-05, + "log_odds_chosen": 11.788796424865723, + "log_odds_ratio": -3.9101567381294444e-05, + "logits/chosen": -0.32864150404930115, + "logits/rejected": -0.4035564064979553, + "logps/chosen": -0.00012067243369529024, + "logps/rejected": -2.3607280254364014, + "loss": 0.7146, + "nll_loss": 0.17865239083766937, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2067244824720547e-05, + "rewards/margins": 0.2360607385635376, + "rewards/rejected": -0.23607280850410461, + "step": 9878 + }, + { + "epoch": 6.831950207468879, + "grad_norm": 6.486326217651367, + "learning_rate": 1.7600276625172893e-05, + "log_odds_chosen": 10.430749893188477, + "log_odds_ratio": -0.00010449629189679399, + "logits/chosen": -0.5080342292785645, + "logits/rejected": -0.5649960041046143, + "logps/chosen": -0.00015258403436746448, + "logps/rejected": -1.7463585138320923, + "loss": 0.4466, + "nll_loss": 0.11164919286966324, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5258403436746448e-05, + "rewards/margins": 0.1746205985546112, + "rewards/rejected": -0.1746358573436737, + "step": 9879 + }, + { + "epoch": 6.832641770401106, + "grad_norm": 4.887731552124023, + "learning_rate": 1.7596434608882742e-05, + "log_odds_chosen": 10.540019035339355, + "log_odds_ratio": -9.059869626071304e-05, + "logits/chosen": -0.37279269099235535, + "logits/rejected": -0.41054567694664, + "logps/chosen": -0.0002726602542679757, + "logps/rejected": -1.7142304182052612, + "loss": 0.6504, + "nll_loss": 0.16259139776229858, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7266023607808165e-05, + "rewards/margins": 0.17139577865600586, + "rewards/rejected": -0.1714230477809906, + "step": 9880 + }, + { + "epoch": 6.833333333333333, + "grad_norm": 7.472131729125977, + "learning_rate": 1.7592592592592595e-05, + "log_odds_chosen": 10.181229591369629, + "log_odds_ratio": -0.0009723737603053451, + "logits/chosen": -0.6690306067466736, + "logits/rejected": -0.6618920564651489, + "logps/chosen": -0.0008291637059301138, + "logps/rejected": -2.0386102199554443, + "loss": 0.6148, + "nll_loss": 0.15359333157539368, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.291636913781986e-05, + "rewards/margins": 0.20377810299396515, + "rewards/rejected": -0.20386101305484772, + "step": 9881 + }, + { + "epoch": 6.83402489626556, + "grad_norm": 5.243133068084717, + "learning_rate": 1.7588750576302447e-05, + "log_odds_chosen": 11.20293140411377, + "log_odds_ratio": -6.37612902210094e-05, + "logits/chosen": -0.4984665513038635, + "logits/rejected": -0.5271502733230591, + "logps/chosen": -0.0004649769398383796, + "logps/rejected": -2.8009819984436035, + "loss": 0.7215, + "nll_loss": 0.18037012219429016, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.649769834941253e-05, + "rewards/margins": 0.28005170822143555, + "rewards/rejected": -0.28009819984436035, + "step": 9882 + }, + { + "epoch": 6.834716459197787, + "grad_norm": 8.580633163452148, + "learning_rate": 1.7584908560012296e-05, + "log_odds_chosen": 11.648361206054688, + "log_odds_ratio": -5.3410247346619144e-05, + "logits/chosen": -0.5283809900283813, + "logits/rejected": -0.5525239109992981, + "logps/chosen": -0.0001951494487002492, + "logps/rejected": -2.4789934158325195, + "loss": 0.7252, + "nll_loss": 0.18128308653831482, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9514944142429158e-05, + "rewards/margins": 0.24787981808185577, + "rewards/rejected": -0.24789933860301971, + "step": 9883 + }, + { + "epoch": 6.8354080221300135, + "grad_norm": 6.00348424911499, + "learning_rate": 1.7581066543722145e-05, + "log_odds_chosen": 10.09213638305664, + "log_odds_ratio": -0.001151418313384056, + "logits/chosen": -0.7593430280685425, + "logits/rejected": -0.7525742650032043, + "logps/chosen": -0.0006862524314783514, + "logps/rejected": -1.8011060953140259, + "loss": 0.7752, + "nll_loss": 0.19369091093540192, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.862523878226057e-05, + "rewards/margins": 0.18004199862480164, + "rewards/rejected": -0.1801106035709381, + "step": 9884 + }, + { + "epoch": 6.83609958506224, + "grad_norm": 4.2488179206848145, + "learning_rate": 1.7577224527431994e-05, + "log_odds_chosen": 10.401985168457031, + "log_odds_ratio": -5.8780875406228006e-05, + "logits/chosen": -0.4401627480983734, + "logits/rejected": -0.5692065954208374, + "logps/chosen": -0.00022449388052336872, + "logps/rejected": -1.7061302661895752, + "loss": 0.4483, + "nll_loss": 0.1120588630437851, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2449388779932633e-05, + "rewards/margins": 0.1705905795097351, + "rewards/rejected": -0.17061302065849304, + "step": 9885 + }, + { + "epoch": 6.836791147994467, + "grad_norm": 5.48073148727417, + "learning_rate": 1.7573382511141847e-05, + "log_odds_chosen": 11.850058555603027, + "log_odds_ratio": -6.48322602501139e-05, + "logits/chosen": -0.0961153507232666, + "logits/rejected": -0.2069375216960907, + "logps/chosen": -0.00020627223420888186, + "logps/rejected": -2.962965965270996, + "loss": 0.5813, + "nll_loss": 0.14532530307769775, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0627223420888186e-05, + "rewards/margins": 0.2962760031223297, + "rewards/rejected": -0.296296626329422, + "step": 9886 + }, + { + "epoch": 6.837482710926694, + "grad_norm": 6.750162124633789, + "learning_rate": 1.75695404948517e-05, + "log_odds_chosen": 11.15122127532959, + "log_odds_ratio": -2.2114192688604817e-05, + "logits/chosen": -0.5288415551185608, + "logits/rejected": -0.5522376894950867, + "logps/chosen": -0.00011381346121197566, + "logps/rejected": -1.9328322410583496, + "loss": 0.519, + "nll_loss": 0.12975947558879852, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1381346666894387e-05, + "rewards/margins": 0.19327184557914734, + "rewards/rejected": -0.19328321516513824, + "step": 9887 + }, + { + "epoch": 6.838174273858921, + "grad_norm": 5.236433029174805, + "learning_rate": 1.7565698478561548e-05, + "log_odds_chosen": 11.429391860961914, + "log_odds_ratio": -1.779601734597236e-05, + "logits/chosen": -0.3569970726966858, + "logits/rejected": -0.42728957533836365, + "logps/chosen": -0.0001754456607159227, + "logps/rejected": -2.4692459106445312, + "loss": 0.595, + "nll_loss": 0.1487591713666916, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7544567526783794e-05, + "rewards/margins": 0.2469070553779602, + "rewards/rejected": -0.24692460894584656, + "step": 9888 + }, + { + "epoch": 6.838865836791148, + "grad_norm": 7.0818772315979, + "learning_rate": 1.75618564622714e-05, + "log_odds_chosen": 10.782758712768555, + "log_odds_ratio": -3.2990428735502064e-05, + "logits/chosen": -0.7973594069480896, + "logits/rejected": -0.8417778611183167, + "logps/chosen": -0.00016431367839686573, + "logps/rejected": -1.7458895444869995, + "loss": 0.3121, + "nll_loss": 0.07801300287246704, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6431367839686573e-05, + "rewards/margins": 0.17457252740859985, + "rewards/rejected": -0.17458894848823547, + "step": 9889 + }, + { + "epoch": 6.8395573997233745, + "grad_norm": 7.803895473480225, + "learning_rate": 1.7558014445981253e-05, + "log_odds_chosen": 9.956562995910645, + "log_odds_ratio": -0.00033388679730705917, + "logits/chosen": -0.541756272315979, + "logits/rejected": -0.5874733924865723, + "logps/chosen": -0.0005440291715785861, + "logps/rejected": -1.9587575197219849, + "loss": 0.6511, + "nll_loss": 0.1627349555492401, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.440291715785861e-05, + "rewards/margins": 0.1958213448524475, + "rewards/rejected": -0.19587576389312744, + "step": 9890 + }, + { + "epoch": 6.840248962655601, + "grad_norm": 9.624287605285645, + "learning_rate": 1.7554172429691102e-05, + "log_odds_chosen": 11.180758476257324, + "log_odds_ratio": -0.00014281031326390803, + "logits/chosen": -0.6653587222099304, + "logits/rejected": -0.7742767930030823, + "logps/chosen": -0.0012785769067704678, + "logps/rejected": -3.0060606002807617, + "loss": 0.6241, + "nll_loss": 0.1560094654560089, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012785769649781287, + "rewards/margins": 0.3004781901836395, + "rewards/rejected": -0.30060604214668274, + "step": 9891 + }, + { + "epoch": 6.840940525587828, + "grad_norm": 8.010558128356934, + "learning_rate": 1.7550330413400954e-05, + "log_odds_chosen": 9.888006210327148, + "log_odds_ratio": -7.842005288694054e-05, + "logits/chosen": -0.14711233973503113, + "logits/rejected": -0.025510773062705994, + "logps/chosen": -0.00022618676302954555, + "logps/rejected": -1.6812245845794678, + "loss": 0.7951, + "nll_loss": 0.19875632226467133, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2618674847763032e-05, + "rewards/margins": 0.16809983551502228, + "rewards/rejected": -0.16812245547771454, + "step": 9892 + }, + { + "epoch": 6.841632088520055, + "grad_norm": 5.166340351104736, + "learning_rate": 1.7546488397110804e-05, + "log_odds_chosen": 11.100112915039062, + "log_odds_ratio": -0.00014072639169171453, + "logits/chosen": -0.5190063714981079, + "logits/rejected": -0.5869351029396057, + "logps/chosen": -0.00019831795361824334, + "logps/rejected": -2.311744213104248, + "loss": 0.8563, + "nll_loss": 0.21406801044940948, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.983179390663281e-05, + "rewards/margins": 0.23115460574626923, + "rewards/rejected": -0.23117442429065704, + "step": 9893 + }, + { + "epoch": 6.842323651452282, + "grad_norm": 7.673447132110596, + "learning_rate": 1.7542646380820653e-05, + "log_odds_chosen": 10.821599960327148, + "log_odds_ratio": -7.789761002641171e-05, + "logits/chosen": -0.5115397572517395, + "logits/rejected": -0.5017709732055664, + "logps/chosen": -0.00034118720213882625, + "logps/rejected": -2.4737889766693115, + "loss": 0.6559, + "nll_loss": 0.1639774739742279, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.411872239666991e-05, + "rewards/margins": 0.24734479188919067, + "rewards/rejected": -0.2473789006471634, + "step": 9894 + }, + { + "epoch": 6.843015214384509, + "grad_norm": 4.814943790435791, + "learning_rate": 1.7538804364530505e-05, + "log_odds_chosen": 10.7841796875, + "log_odds_ratio": -0.00010694364755181596, + "logits/chosen": 0.014818176627159119, + "logits/rejected": -0.06501185148954391, + "logps/chosen": -0.00022933971195016056, + "logps/rejected": -2.209062337875366, + "loss": 0.5032, + "nll_loss": 0.12579664587974548, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2933971195016056e-05, + "rewards/margins": 0.220883309841156, + "rewards/rejected": -0.22090624272823334, + "step": 9895 + }, + { + "epoch": 6.8437067773167355, + "grad_norm": 4.384773254394531, + "learning_rate": 1.7534962348240357e-05, + "log_odds_chosen": 10.018265724182129, + "log_odds_ratio": -7.031872519291937e-05, + "logits/chosen": -0.062746062874794, + "logits/rejected": -0.10227086395025253, + "logps/chosen": -0.00020715226128231734, + "logps/rejected": -1.2999534606933594, + "loss": 0.6993, + "nll_loss": 0.1748102903366089, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0715227947221138e-05, + "rewards/margins": 0.1299746334552765, + "rewards/rejected": -0.12999534606933594, + "step": 9896 + }, + { + "epoch": 6.844398340248962, + "grad_norm": 7.973667621612549, + "learning_rate": 1.7531120331950207e-05, + "log_odds_chosen": 10.680398941040039, + "log_odds_ratio": -7.031670247670263e-05, + "logits/chosen": -0.6380209922790527, + "logits/rejected": -0.640826404094696, + "logps/chosen": -0.000123622827231884, + "logps/rejected": -1.6378414630889893, + "loss": 0.6583, + "nll_loss": 0.16457867622375488, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2362283086986281e-05, + "rewards/margins": 0.16377176344394684, + "rewards/rejected": -0.16378413140773773, + "step": 9897 + }, + { + "epoch": 6.845089903181189, + "grad_norm": 6.426230430603027, + "learning_rate": 1.752727831566006e-05, + "log_odds_chosen": 10.495553970336914, + "log_odds_ratio": -6.285425479291007e-05, + "logits/chosen": -0.44061386585235596, + "logits/rejected": -0.5182846188545227, + "logps/chosen": -0.00020382177899591625, + "logps/rejected": -1.9904173612594604, + "loss": 0.4765, + "nll_loss": 0.11910919845104218, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0382178263389505e-05, + "rewards/margins": 0.1990213543176651, + "rewards/rejected": -0.19904175400733948, + "step": 9898 + }, + { + "epoch": 6.845781466113416, + "grad_norm": 4.594810962677002, + "learning_rate": 1.752343629936991e-05, + "log_odds_chosen": 11.263570785522461, + "log_odds_ratio": -4.390040703583509e-05, + "logits/chosen": -0.26155662536621094, + "logits/rejected": -0.4153081178665161, + "logps/chosen": -0.00017742003547027707, + "logps/rejected": -2.175046920776367, + "loss": 0.4413, + "nll_loss": 0.11031512916088104, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.774200427462347e-05, + "rewards/margins": 0.21748696267604828, + "rewards/rejected": -0.21750468015670776, + "step": 9899 + }, + { + "epoch": 6.846473029045643, + "grad_norm": 6.1752028465271, + "learning_rate": 1.751959428307976e-05, + "log_odds_chosen": 9.894766807556152, + "log_odds_ratio": -0.0013582361862063408, + "logits/chosen": -0.18945205211639404, + "logits/rejected": -0.2020440697669983, + "logps/chosen": -0.0010447344975546002, + "logps/rejected": -2.0190021991729736, + "loss": 0.5525, + "nll_loss": 0.13797858357429504, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001044734672177583, + "rewards/margins": 0.20179572701454163, + "rewards/rejected": -0.20190021395683289, + "step": 9900 + }, + { + "epoch": 6.84716459197787, + "grad_norm": 5.831141948699951, + "learning_rate": 1.7515752266789613e-05, + "log_odds_chosen": 11.049789428710938, + "log_odds_ratio": -3.43750070896931e-05, + "logits/chosen": -0.5529294610023499, + "logits/rejected": -0.5563083291053772, + "logps/chosen": -0.00016891643463168293, + "logps/rejected": -2.304448127746582, + "loss": 0.3747, + "nll_loss": 0.09367722272872925, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6891644918359816e-05, + "rewards/margins": 0.23042795062065125, + "rewards/rejected": -0.23044481873512268, + "step": 9901 + }, + { + "epoch": 6.8478561549100965, + "grad_norm": 9.253786087036133, + "learning_rate": 1.7511910250499462e-05, + "log_odds_chosen": 12.252043724060059, + "log_odds_ratio": -6.59788429402397e-06, + "logits/chosen": -0.09550817310810089, + "logits/rejected": -0.1413390040397644, + "logps/chosen": -5.3725496400147676e-05, + "logps/rejected": -2.3383841514587402, + "loss": 0.633, + "nll_loss": 0.15825356543064117, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.372550276661059e-06, + "rewards/margins": 0.23383302986621857, + "rewards/rejected": -0.23383840918540955, + "step": 9902 + }, + { + "epoch": 6.848547717842323, + "grad_norm": 6.2306694984436035, + "learning_rate": 1.7508068234209314e-05, + "log_odds_chosen": 11.20789909362793, + "log_odds_ratio": -8.882944530341774e-05, + "logits/chosen": -0.6397537589073181, + "logits/rejected": -0.6996241807937622, + "logps/chosen": -0.0002480761322658509, + "logps/rejected": -2.258899688720703, + "loss": 0.4622, + "nll_loss": 0.11554432660341263, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.480761395418085e-05, + "rewards/margins": 0.2258651703596115, + "rewards/rejected": -0.22588998079299927, + "step": 9903 + }, + { + "epoch": 6.84923928077455, + "grad_norm": 7.639112949371338, + "learning_rate": 1.7504226217919163e-05, + "log_odds_chosen": 10.243135452270508, + "log_odds_ratio": -0.00010048186231870204, + "logits/chosen": -0.50126051902771, + "logits/rejected": -0.4946579039096832, + "logps/chosen": -0.0003781873674597591, + "logps/rejected": -1.7362889051437378, + "loss": 0.7682, + "nll_loss": 0.19203275442123413, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.781873601838015e-05, + "rewards/margins": 0.17359109222888947, + "rewards/rejected": -0.17362891137599945, + "step": 9904 + }, + { + "epoch": 6.849930843706777, + "grad_norm": 7.1962785720825195, + "learning_rate": 1.7500384201629016e-05, + "log_odds_chosen": 9.414996147155762, + "log_odds_ratio": -0.000451650150353089, + "logits/chosen": -0.937263548374176, + "logits/rejected": -0.9588169455528259, + "logps/chosen": -0.0006807852769270539, + "logps/rejected": -1.8451372385025024, + "loss": 0.8361, + "nll_loss": 0.2089846134185791, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.807853060308844e-05, + "rewards/margins": 0.18444564938545227, + "rewards/rejected": -0.18451373279094696, + "step": 9905 + }, + { + "epoch": 6.850622406639004, + "grad_norm": 5.980663776397705, + "learning_rate": 1.7496542185338865e-05, + "log_odds_chosen": 10.780477523803711, + "log_odds_ratio": -0.00016776591655798256, + "logits/chosen": -0.6011804938316345, + "logits/rejected": -0.6044209599494934, + "logps/chosen": -0.0005937922396697104, + "logps/rejected": -2.3112282752990723, + "loss": 0.5201, + "nll_loss": 0.12999917566776276, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.93792246945668e-05, + "rewards/margins": 0.23106345534324646, + "rewards/rejected": -0.23112282156944275, + "step": 9906 + }, + { + "epoch": 6.851313969571231, + "grad_norm": 4.122068881988525, + "learning_rate": 1.7492700169048717e-05, + "log_odds_chosen": 10.263906478881836, + "log_odds_ratio": -9.153223072644323e-05, + "logits/chosen": -0.19530266523361206, + "logits/rejected": -0.17187093198299408, + "logps/chosen": -0.0003283666155766696, + "logps/rejected": -1.8774287700653076, + "loss": 0.4826, + "nll_loss": 0.12064440548419952, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.283666228526272e-05, + "rewards/margins": 0.18771006166934967, + "rewards/rejected": -0.18774288892745972, + "step": 9907 + }, + { + "epoch": 6.8520055325034575, + "grad_norm": 4.443989276885986, + "learning_rate": 1.748885815275857e-05, + "log_odds_chosen": 11.554838180541992, + "log_odds_ratio": -1.3808636140311137e-05, + "logits/chosen": -0.20856159925460815, + "logits/rejected": -0.22478443384170532, + "logps/chosen": -0.00012917797721456736, + "logps/rejected": -2.4405529499053955, + "loss": 0.5185, + "nll_loss": 0.12961144745349884, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2917797903355677e-05, + "rewards/margins": 0.24404236674308777, + "rewards/rejected": -0.24405530095100403, + "step": 9908 + }, + { + "epoch": 6.852697095435684, + "grad_norm": 10.797835350036621, + "learning_rate": 1.748501613646842e-05, + "log_odds_chosen": 11.389801025390625, + "log_odds_ratio": -3.008267594850622e-05, + "logits/chosen": -0.08736234903335571, + "logits/rejected": -0.3351304233074188, + "logps/chosen": -8.788306877249852e-05, + "logps/rejected": -1.9150742292404175, + "loss": 0.8423, + "nll_loss": 0.21057824790477753, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.788307241047733e-06, + "rewards/margins": 0.19149863719940186, + "rewards/rejected": -0.19150742888450623, + "step": 9909 + }, + { + "epoch": 6.853388658367911, + "grad_norm": 8.192545890808105, + "learning_rate": 1.748117412017827e-05, + "log_odds_chosen": 10.926861763000488, + "log_odds_ratio": -2.7011970814783126e-05, + "logits/chosen": -0.3003247380256653, + "logits/rejected": -0.39332881569862366, + "logps/chosen": -0.0004899668274447322, + "logps/rejected": -1.9362943172454834, + "loss": 0.5292, + "nll_loss": 0.13229210674762726, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.899668419966474e-05, + "rewards/margins": 0.19358044862747192, + "rewards/rejected": -0.1936294436454773, + "step": 9910 + }, + { + "epoch": 6.854080221300138, + "grad_norm": 5.7841997146606445, + "learning_rate": 1.7477332103888124e-05, + "log_odds_chosen": 10.92652702331543, + "log_odds_ratio": -0.00018881492724176496, + "logits/chosen": -0.6957674026489258, + "logits/rejected": -0.7153472900390625, + "logps/chosen": -0.000338401849148795, + "logps/rejected": -2.6009016036987305, + "loss": 0.4916, + "nll_loss": 0.12288433313369751, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.384018418728374e-05, + "rewards/margins": 0.2600563168525696, + "rewards/rejected": -0.260090172290802, + "step": 9911 + }, + { + "epoch": 6.854771784232365, + "grad_norm": 6.821191787719727, + "learning_rate": 1.7473490087597973e-05, + "log_odds_chosen": 11.141485214233398, + "log_odds_ratio": -2.456578476994764e-05, + "logits/chosen": -0.656318187713623, + "logits/rejected": -0.7465082406997681, + "logps/chosen": -0.00015865606837905943, + "logps/rejected": -1.8773607015609741, + "loss": 0.3443, + "nll_loss": 0.0860750824213028, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5865607565501705e-05, + "rewards/margins": 0.18772020936012268, + "rewards/rejected": -0.18773606419563293, + "step": 9912 + }, + { + "epoch": 6.855463347164592, + "grad_norm": 5.875421524047852, + "learning_rate": 1.7469648071307822e-05, + "log_odds_chosen": 11.702245712280273, + "log_odds_ratio": -1.5685051039326936e-05, + "logits/chosen": -0.09253035485744476, + "logits/rejected": -0.146541029214859, + "logps/chosen": -0.00031491348636336625, + "logps/rejected": -2.6832847595214844, + "loss": 0.7997, + "nll_loss": 0.19991399347782135, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1491348636336625e-05, + "rewards/margins": 0.2682969868183136, + "rewards/rejected": -0.2683284878730774, + "step": 9913 + }, + { + "epoch": 6.856154910096818, + "grad_norm": 9.347213745117188, + "learning_rate": 1.7465806055017674e-05, + "log_odds_chosen": 11.504549026489258, + "log_odds_ratio": -2.4479886633343995e-05, + "logits/chosen": -0.6678361892700195, + "logits/rejected": -0.6643664836883545, + "logps/chosen": -0.00017276719154324383, + "logps/rejected": -2.7045693397521973, + "loss": 0.7489, + "nll_loss": 0.18721558153629303, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7276719518122263e-05, + "rewards/margins": 0.27043965458869934, + "rewards/rejected": -0.2704569101333618, + "step": 9914 + }, + { + "epoch": 6.856846473029045, + "grad_norm": 7.882079124450684, + "learning_rate": 1.7461964038727523e-05, + "log_odds_chosen": 11.102792739868164, + "log_odds_ratio": -5.0582086259964854e-05, + "logits/chosen": -0.3423008322715759, + "logits/rejected": -0.3178647756576538, + "logps/chosen": -0.00030722690280526876, + "logps/rejected": -2.448429822921753, + "loss": 0.8287, + "nll_loss": 0.2071687877178192, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0722694646101445e-05, + "rewards/margins": 0.2448122799396515, + "rewards/rejected": -0.244842991232872, + "step": 9915 + }, + { + "epoch": 6.857538035961272, + "grad_norm": 6.01251220703125, + "learning_rate": 1.7458122022437376e-05, + "log_odds_chosen": 10.909671783447266, + "log_odds_ratio": -0.00014612148515880108, + "logits/chosen": -0.47818073630332947, + "logits/rejected": -0.45567870140075684, + "logps/chosen": -0.00018920523871202022, + "logps/rejected": -2.08219051361084, + "loss": 0.723, + "nll_loss": 0.18072611093521118, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8920525690191425e-05, + "rewards/margins": 0.208200141787529, + "rewards/rejected": -0.20821905136108398, + "step": 9916 + }, + { + "epoch": 6.858229598893499, + "grad_norm": 9.785633087158203, + "learning_rate": 1.745428000614723e-05, + "log_odds_chosen": 11.04234504699707, + "log_odds_ratio": -2.0666961063398048e-05, + "logits/chosen": -0.45309072732925415, + "logits/rejected": -0.507732093334198, + "logps/chosen": -0.0002194387634517625, + "logps/rejected": -2.370762825012207, + "loss": 0.6212, + "nll_loss": 0.15530359745025635, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.194387707277201e-05, + "rewards/margins": 0.23705436289310455, + "rewards/rejected": -0.2370762974023819, + "step": 9917 + }, + { + "epoch": 6.858921161825726, + "grad_norm": 6.593029975891113, + "learning_rate": 1.7450437989857077e-05, + "log_odds_chosen": 10.200165748596191, + "log_odds_ratio": -0.000459955568658188, + "logits/chosen": -0.35304728150367737, + "logits/rejected": -0.35141220688819885, + "logps/chosen": -0.00028261399711482227, + "logps/rejected": -2.053372383117676, + "loss": 0.7283, + "nll_loss": 0.18202868103981018, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8261400075280108e-05, + "rewards/margins": 0.20530900359153748, + "rewards/rejected": -0.205337256193161, + "step": 9918 + }, + { + "epoch": 6.8596127247579535, + "grad_norm": 4.91062593460083, + "learning_rate": 1.744659597356693e-05, + "log_odds_chosen": 11.299886703491211, + "log_odds_ratio": -0.0001566058926982805, + "logits/chosen": -0.7404186725616455, + "logits/rejected": -0.7922763824462891, + "logps/chosen": -0.00026429325225763023, + "logps/rejected": -2.357874870300293, + "loss": 0.3988, + "nll_loss": 0.09967993944883347, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6429324861965142e-05, + "rewards/margins": 0.23576104640960693, + "rewards/rejected": -0.23578748106956482, + "step": 9919 + }, + { + "epoch": 6.86030428769018, + "grad_norm": 4.724360466003418, + "learning_rate": 1.7442753957276782e-05, + "log_odds_chosen": 9.903718948364258, + "log_odds_ratio": -0.000160827228683047, + "logits/chosen": -0.5334427952766418, + "logits/rejected": -0.5078845620155334, + "logps/chosen": -0.0003555277071427554, + "logps/rejected": -1.58981454372406, + "loss": 0.5266, + "nll_loss": 0.13162297010421753, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.555276998667978e-05, + "rewards/margins": 0.15894590318202972, + "rewards/rejected": -0.15898147225379944, + "step": 9920 + }, + { + "epoch": 6.860995850622407, + "grad_norm": 6.36701774597168, + "learning_rate": 1.743891194098663e-05, + "log_odds_chosen": 11.064401626586914, + "log_odds_ratio": -7.795902638463303e-05, + "logits/chosen": -0.48767662048339844, + "logits/rejected": -0.48847290873527527, + "logps/chosen": -0.0005439437227323651, + "logps/rejected": -2.7804341316223145, + "loss": 1.2125, + "nll_loss": 0.30312567949295044, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.439437518361956e-05, + "rewards/margins": 0.2779890298843384, + "rewards/rejected": -0.27804338932037354, + "step": 9921 + }, + { + "epoch": 6.861687413554634, + "grad_norm": 5.585921287536621, + "learning_rate": 1.743506992469648e-05, + "log_odds_chosen": 10.440255165100098, + "log_odds_ratio": -4.734244794235565e-05, + "logits/chosen": -0.3502916693687439, + "logits/rejected": -0.4361574351787567, + "logps/chosen": -0.00027647442766465247, + "logps/rejected": -1.9319242238998413, + "loss": 0.8023, + "nll_loss": 0.20056825876235962, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7647445676848292e-05, + "rewards/margins": 0.19316478073596954, + "rewards/rejected": -0.19319242238998413, + "step": 9922 + }, + { + "epoch": 6.862378976486861, + "grad_norm": 5.664307117462158, + "learning_rate": 1.7431227908406333e-05, + "log_odds_chosen": 10.078949928283691, + "log_odds_ratio": -0.0002160519507015124, + "logits/chosen": -0.47924864292144775, + "logits/rejected": -0.5029646158218384, + "logps/chosen": -0.00029257647111080587, + "logps/rejected": -1.7399296760559082, + "loss": 0.568, + "nll_loss": 0.14198613166809082, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.925764783867635e-05, + "rewards/margins": 0.173963725566864, + "rewards/rejected": -0.17399299144744873, + "step": 9923 + }, + { + "epoch": 6.863070539419088, + "grad_norm": 6.095013618469238, + "learning_rate": 1.7427385892116182e-05, + "log_odds_chosen": 11.45406436920166, + "log_odds_ratio": -2.605345616757404e-05, + "logits/chosen": -0.6417930126190186, + "logits/rejected": -0.6419544816017151, + "logps/chosen": -0.0002008125593420118, + "logps/rejected": -2.647881507873535, + "loss": 0.4199, + "nll_loss": 0.1049807220697403, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0081257389392704e-05, + "rewards/margins": 0.2647680938243866, + "rewards/rejected": -0.2647881805896759, + "step": 9924 + }, + { + "epoch": 6.8637621023513145, + "grad_norm": 4.876924514770508, + "learning_rate": 1.7423543875826034e-05, + "log_odds_chosen": 9.685707092285156, + "log_odds_ratio": -0.00021258770721033216, + "logits/chosen": -0.515690267086029, + "logits/rejected": -0.5359160900115967, + "logps/chosen": -0.0005392287275753915, + "logps/rejected": -1.847927212715149, + "loss": 1.1985, + "nll_loss": 0.2995995879173279, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.392287130234763e-05, + "rewards/margins": 0.18473881483078003, + "rewards/rejected": -0.18479272723197937, + "step": 9925 + }, + { + "epoch": 6.864453665283541, + "grad_norm": 5.993760585784912, + "learning_rate": 1.7419701859535887e-05, + "log_odds_chosen": 10.626228332519531, + "log_odds_ratio": -0.00015337899094447494, + "logits/chosen": -0.24197693169116974, + "logits/rejected": -0.2716585099697113, + "logps/chosen": -0.0003822005819529295, + "logps/rejected": -2.1401491165161133, + "loss": 0.589, + "nll_loss": 0.14723332226276398, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8220056012505665e-05, + "rewards/margins": 0.2139766812324524, + "rewards/rejected": -0.2140149176120758, + "step": 9926 + }, + { + "epoch": 6.865145228215768, + "grad_norm": 5.754234313964844, + "learning_rate": 1.7415859843245736e-05, + "log_odds_chosen": 11.140161514282227, + "log_odds_ratio": -9.091119864024222e-05, + "logits/chosen": -0.26153460144996643, + "logits/rejected": -0.45184725522994995, + "logps/chosen": -0.00045796221820637584, + "logps/rejected": -2.747653007507324, + "loss": 0.4756, + "nll_loss": 0.11889111995697021, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5796223275829107e-05, + "rewards/margins": 0.2747195065021515, + "rewards/rejected": -0.274765282869339, + "step": 9927 + }, + { + "epoch": 6.865836791147995, + "grad_norm": 5.773352146148682, + "learning_rate": 1.7412017826955588e-05, + "log_odds_chosen": 10.411229133605957, + "log_odds_ratio": -0.0001067575067281723, + "logits/chosen": -0.21838048100471497, + "logits/rejected": -0.21249642968177795, + "logps/chosen": -0.00023341068299487233, + "logps/rejected": -1.8828068971633911, + "loss": 0.5262, + "nll_loss": 0.13154050707817078, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3341068299487233e-05, + "rewards/margins": 0.1882573664188385, + "rewards/rejected": -0.18828070163726807, + "step": 9928 + }, + { + "epoch": 6.866528354080222, + "grad_norm": 5.741930961608887, + "learning_rate": 1.740817581066544e-05, + "log_odds_chosen": 11.804953575134277, + "log_odds_ratio": -1.0474625014467165e-05, + "logits/chosen": -0.3105663061141968, + "logits/rejected": -0.2862730026245117, + "logps/chosen": -0.00014494526840280741, + "logps/rejected": -2.5858964920043945, + "loss": 0.5473, + "nll_loss": 0.13682931661605835, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4494527931674384e-05, + "rewards/margins": 0.2585751712322235, + "rewards/rejected": -0.25858965516090393, + "step": 9929 + }, + { + "epoch": 6.867219917012449, + "grad_norm": 8.72846794128418, + "learning_rate": 1.740433379437529e-05, + "log_odds_chosen": 11.901812553405762, + "log_odds_ratio": -1.6944477465585805e-05, + "logits/chosen": -0.7704805135726929, + "logits/rejected": -0.7657447457313538, + "logps/chosen": -0.00020099672838114202, + "logps/rejected": -2.4580609798431396, + "loss": 0.6436, + "nll_loss": 0.1609020233154297, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0099672838114202e-05, + "rewards/margins": 0.24578601121902466, + "rewards/rejected": -0.24580609798431396, + "step": 9930 + }, + { + "epoch": 6.867911479944675, + "grad_norm": 8.556121826171875, + "learning_rate": 1.740049177808514e-05, + "log_odds_chosen": 9.893619537353516, + "log_odds_ratio": -0.00011758245818782598, + "logits/chosen": -0.35947567224502563, + "logits/rejected": -0.31796538829803467, + "logps/chosen": -0.0008837583591230214, + "logps/rejected": -2.0111024379730225, + "loss": 0.3324, + "nll_loss": 0.0830918699502945, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.83758402778767e-05, + "rewards/margins": 0.20102186501026154, + "rewards/rejected": -0.20111024379730225, + "step": 9931 + }, + { + "epoch": 6.868603042876902, + "grad_norm": 6.5274786949157715, + "learning_rate": 1.739664976179499e-05, + "log_odds_chosen": 11.198671340942383, + "log_odds_ratio": -3.070286402362399e-05, + "logits/chosen": -0.6600465178489685, + "logits/rejected": -0.6094987392425537, + "logps/chosen": -0.0002148185740225017, + "logps/rejected": -2.463357925415039, + "loss": 0.5131, + "nll_loss": 0.12826129794120789, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.148185740225017e-05, + "rewards/margins": 0.24631434679031372, + "rewards/rejected": -0.24633580446243286, + "step": 9932 + }, + { + "epoch": 6.869294605809129, + "grad_norm": 4.538366794586182, + "learning_rate": 1.739280774550484e-05, + "log_odds_chosen": 10.437494277954102, + "log_odds_ratio": -5.404383409768343e-05, + "logits/chosen": -0.17082995176315308, + "logits/rejected": -0.29417771100997925, + "logps/chosen": -0.00018732089665718377, + "logps/rejected": -1.973541259765625, + "loss": 0.5179, + "nll_loss": 0.12947946786880493, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.87320911209099e-05, + "rewards/margins": 0.1973353922367096, + "rewards/rejected": -0.19735412299633026, + "step": 9933 + }, + { + "epoch": 6.869986168741356, + "grad_norm": 6.1261091232299805, + "learning_rate": 1.7388965729214693e-05, + "log_odds_chosen": 10.655526161193848, + "log_odds_ratio": -0.000254765065619722, + "logits/chosen": -0.7289286851882935, + "logits/rejected": -0.755789041519165, + "logps/chosen": -0.00013674799993168563, + "logps/rejected": -1.394344449043274, + "loss": 0.5419, + "nll_loss": 0.13544505834579468, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3674801266461145e-05, + "rewards/margins": 0.1394207626581192, + "rewards/rejected": -0.13943444192409515, + "step": 9934 + }, + { + "epoch": 6.870677731673583, + "grad_norm": 6.334026336669922, + "learning_rate": 1.7385123712924545e-05, + "log_odds_chosen": 11.238309860229492, + "log_odds_ratio": -4.077638004673645e-05, + "logits/chosen": -0.6347928047180176, + "logits/rejected": -0.6307282447814941, + "logps/chosen": -0.00015452434308826923, + "logps/rejected": -2.1024088859558105, + "loss": 0.5892, + "nll_loss": 0.1472892463207245, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.545243321743328e-05, + "rewards/margins": 0.2102254331111908, + "rewards/rejected": -0.21024090051651, + "step": 9935 + }, + { + "epoch": 6.87136929460581, + "grad_norm": 12.584394454956055, + "learning_rate": 1.7381281696634394e-05, + "log_odds_chosen": 10.109475135803223, + "log_odds_ratio": -0.00010246929014101624, + "logits/chosen": -0.3030821681022644, + "logits/rejected": -0.35970088839530945, + "logps/chosen": -0.0004246834432706237, + "logps/rejected": -1.9912558794021606, + "loss": 0.6852, + "nll_loss": 0.1712900549173355, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.246834578225389e-05, + "rewards/margins": 0.1990831345319748, + "rewards/rejected": -0.19912561774253845, + "step": 9936 + }, + { + "epoch": 6.872060857538036, + "grad_norm": 8.13845443725586, + "learning_rate": 1.7377439680344247e-05, + "log_odds_chosen": 9.459121704101562, + "log_odds_ratio": -0.00015860966232139617, + "logits/chosen": -0.5512504577636719, + "logits/rejected": -0.6040525436401367, + "logps/chosen": -0.00028108907281421125, + "logps/rejected": -1.1141246557235718, + "loss": 0.4211, + "nll_loss": 0.10525853931903839, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8108906917623244e-05, + "rewards/margins": 0.11138436198234558, + "rewards/rejected": -0.11141246557235718, + "step": 9937 + }, + { + "epoch": 6.872752420470263, + "grad_norm": 7.232450485229492, + "learning_rate": 1.73735976640541e-05, + "log_odds_chosen": 9.538267135620117, + "log_odds_ratio": -0.00014865670527797192, + "logits/chosen": -0.4153312146663666, + "logits/rejected": -0.4232088327407837, + "logps/chosen": -0.000883034139405936, + "logps/rejected": -1.9747530221939087, + "loss": 0.392, + "nll_loss": 0.09798521548509598, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.830341539578512e-05, + "rewards/margins": 0.1973869949579239, + "rewards/rejected": -0.19747528433799744, + "step": 9938 + }, + { + "epoch": 6.87344398340249, + "grad_norm": 7.225497245788574, + "learning_rate": 1.7369755647763948e-05, + "log_odds_chosen": 10.864669799804688, + "log_odds_ratio": -3.936921712011099e-05, + "logits/chosen": -0.6600432395935059, + "logits/rejected": -0.6265806555747986, + "logps/chosen": -0.00037797007826156914, + "logps/rejected": -2.2492332458496094, + "loss": 0.6035, + "nll_loss": 0.15087053179740906, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.77970100089442e-05, + "rewards/margins": 0.22488552331924438, + "rewards/rejected": -0.22492331266403198, + "step": 9939 + }, + { + "epoch": 6.874135546334717, + "grad_norm": 8.900500297546387, + "learning_rate": 1.7365913631473797e-05, + "log_odds_chosen": 10.664848327636719, + "log_odds_ratio": -0.00032811707933433354, + "logits/chosen": -0.3347414433956146, + "logits/rejected": -0.35119128227233887, + "logps/chosen": -0.0006118988967500627, + "logps/rejected": -2.225257158279419, + "loss": 0.4027, + "nll_loss": 0.10063539445400238, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.118989404058084e-05, + "rewards/margins": 0.22246450185775757, + "rewards/rejected": -0.2225257158279419, + "step": 9940 + }, + { + "epoch": 6.874827109266944, + "grad_norm": 5.114599704742432, + "learning_rate": 1.736207161518365e-05, + "log_odds_chosen": 10.928939819335938, + "log_odds_ratio": -3.058044967474416e-05, + "logits/chosen": -0.3283681869506836, + "logits/rejected": -0.41022035479545593, + "logps/chosen": -0.0001671072095632553, + "logps/rejected": -2.0779647827148438, + "loss": 0.5119, + "nll_loss": 0.12795957922935486, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.671072095632553e-05, + "rewards/margins": 0.20777978003025055, + "rewards/rejected": -0.20779648423194885, + "step": 9941 + }, + { + "epoch": 6.875518672199171, + "grad_norm": 10.98104476928711, + "learning_rate": 1.73582295988935e-05, + "log_odds_chosen": 10.98994255065918, + "log_odds_ratio": -3.9570215449202806e-05, + "logits/chosen": -0.7664468288421631, + "logits/rejected": -0.778846263885498, + "logps/chosen": -0.0005870637251064181, + "logps/rejected": -2.516409158706665, + "loss": 0.9717, + "nll_loss": 0.24291543662548065, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.870637687621638e-05, + "rewards/margins": 0.25158220529556274, + "rewards/rejected": -0.2516409158706665, + "step": 9942 + }, + { + "epoch": 6.876210235131397, + "grad_norm": 4.401638984680176, + "learning_rate": 1.735438758260335e-05, + "log_odds_chosen": 9.971277236938477, + "log_odds_ratio": -0.00013970036525279284, + "logits/chosen": -0.6718819737434387, + "logits/rejected": -0.7286103367805481, + "logps/chosen": -0.0002767588885035366, + "logps/rejected": -1.503734827041626, + "loss": 0.863, + "nll_loss": 0.2157265543937683, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7675887395162135e-05, + "rewards/margins": 0.1503458023071289, + "rewards/rejected": -0.15037348866462708, + "step": 9943 + }, + { + "epoch": 6.876901798063624, + "grad_norm": 5.169467926025391, + "learning_rate": 1.7350545566313204e-05, + "log_odds_chosen": 9.853658676147461, + "log_odds_ratio": -0.0007788334041833878, + "logits/chosen": -0.5393489599227905, + "logits/rejected": -0.4997929334640503, + "logps/chosen": -0.0005267321248538792, + "logps/rejected": -1.7126424312591553, + "loss": 0.6214, + "nll_loss": 0.15526898205280304, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.26732110301964e-05, + "rewards/margins": 0.17121157050132751, + "rewards/rejected": -0.17126423120498657, + "step": 9944 + }, + { + "epoch": 6.877593360995851, + "grad_norm": 12.462244987487793, + "learning_rate": 1.7346703550023053e-05, + "log_odds_chosen": 9.403864860534668, + "log_odds_ratio": -0.0005906783044338226, + "logits/chosen": -0.40958431363105774, + "logits/rejected": -0.45778223872184753, + "logps/chosen": -0.0010209481697529554, + "logps/rejected": -1.3862988948822021, + "loss": 0.7853, + "nll_loss": 0.19627533853054047, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010209481843048707, + "rewards/margins": 0.1385277956724167, + "rewards/rejected": -0.13862988352775574, + "step": 9945 + }, + { + "epoch": 6.878284923928078, + "grad_norm": 6.601505756378174, + "learning_rate": 1.7342861533732905e-05, + "log_odds_chosen": 11.283668518066406, + "log_odds_ratio": -5.514323856914416e-05, + "logits/chosen": -0.7354806065559387, + "logits/rejected": -0.7971148490905762, + "logps/chosen": -0.00020214321557432413, + "logps/rejected": -2.620887517929077, + "loss": 0.4874, + "nll_loss": 0.12183713912963867, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.021432010224089e-05, + "rewards/margins": 0.2620685398578644, + "rewards/rejected": -0.2620887756347656, + "step": 9946 + }, + { + "epoch": 6.878976486860305, + "grad_norm": 5.732128620147705, + "learning_rate": 1.7339019517442758e-05, + "log_odds_chosen": 10.169954299926758, + "log_odds_ratio": -7.686160097364336e-05, + "logits/chosen": -0.5683677196502686, + "logits/rejected": -0.5826584696769714, + "logps/chosen": -0.0004312100063543767, + "logps/rejected": -2.1739308834075928, + "loss": 0.4796, + "nll_loss": 0.11989589035511017, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.312100645620376e-05, + "rewards/margins": 0.21734994649887085, + "rewards/rejected": -0.21739307045936584, + "step": 9947 + }, + { + "epoch": 6.8796680497925315, + "grad_norm": 7.44845724105835, + "learning_rate": 1.7335177501152607e-05, + "log_odds_chosen": 10.656408309936523, + "log_odds_ratio": -0.00015237029583659023, + "logits/chosen": -0.6948117017745972, + "logits/rejected": -0.6771795749664307, + "logps/chosen": -0.0001807966036722064, + "logps/rejected": -2.0261623859405518, + "loss": 0.5481, + "nll_loss": 0.13700607419013977, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.807965963962488e-05, + "rewards/margins": 0.20259815454483032, + "rewards/rejected": -0.20261624455451965, + "step": 9948 + }, + { + "epoch": 6.880359612724758, + "grad_norm": 7.517022132873535, + "learning_rate": 1.7331335484862456e-05, + "log_odds_chosen": 9.013700485229492, + "log_odds_ratio": -0.0011961768614128232, + "logits/chosen": -0.6705461740493774, + "logits/rejected": -0.6669026613235474, + "logps/chosen": -0.002910541370511055, + "logps/rejected": -2.0945003032684326, + "loss": 0.542, + "nll_loss": 0.13536912202835083, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00029105416615493596, + "rewards/margins": 0.2091589719057083, + "rewards/rejected": -0.20945002138614655, + "step": 9949 + }, + { + "epoch": 6.881051175656985, + "grad_norm": 4.460395812988281, + "learning_rate": 1.7327493468572308e-05, + "log_odds_chosen": 9.430727005004883, + "log_odds_ratio": -0.0002010946482187137, + "logits/chosen": -0.5580604076385498, + "logits/rejected": -0.5696870684623718, + "logps/chosen": -0.0006297902436926961, + "logps/rejected": -1.6898581981658936, + "loss": 0.4955, + "nll_loss": 0.12386074662208557, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.297902291407809e-05, + "rewards/margins": 0.16892285645008087, + "rewards/rejected": -0.16898582875728607, + "step": 9950 + }, + { + "epoch": 6.881742738589212, + "grad_norm": 6.6989593505859375, + "learning_rate": 1.7323651452282157e-05, + "log_odds_chosen": 11.214866638183594, + "log_odds_ratio": -3.179534905939363e-05, + "logits/chosen": -0.7736462950706482, + "logits/rejected": -0.8380985260009766, + "logps/chosen": -0.00015404712758027017, + "logps/rejected": -2.2978692054748535, + "loss": 0.4214, + "nll_loss": 0.10534738004207611, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5404712030431256e-05, + "rewards/margins": 0.22977152466773987, + "rewards/rejected": -0.2297869324684143, + "step": 9951 + }, + { + "epoch": 6.882434301521439, + "grad_norm": 6.151716232299805, + "learning_rate": 1.731980943599201e-05, + "log_odds_chosen": 10.018352508544922, + "log_odds_ratio": -0.00014328441466204822, + "logits/chosen": -0.40556055307388306, + "logits/rejected": -0.3207288980484009, + "logps/chosen": -0.00018548393563833088, + "logps/rejected": -1.5287439823150635, + "loss": 0.8473, + "nll_loss": 0.211818128824234, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8548393200035207e-05, + "rewards/margins": 0.15285584330558777, + "rewards/rejected": -0.15287438035011292, + "step": 9952 + }, + { + "epoch": 6.883125864453666, + "grad_norm": 5.060670852661133, + "learning_rate": 1.731596741970186e-05, + "log_odds_chosen": 10.694990158081055, + "log_odds_ratio": -0.00023410924768541008, + "logits/chosen": -0.3882830739021301, + "logits/rejected": -0.4243199825286865, + "logps/chosen": -0.0010852471459656954, + "logps/rejected": -1.9446629285812378, + "loss": 0.5633, + "nll_loss": 0.1408044546842575, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010852471314137802, + "rewards/margins": 0.1943577527999878, + "rewards/rejected": -0.19446627795696259, + "step": 9953 + }, + { + "epoch": 6.8838174273858925, + "grad_norm": 7.240312099456787, + "learning_rate": 1.731212540341171e-05, + "log_odds_chosen": 11.345335960388184, + "log_odds_ratio": -2.520248199289199e-05, + "logits/chosen": -0.3495720326900482, + "logits/rejected": -0.25986605882644653, + "logps/chosen": -0.0001573432091390714, + "logps/rejected": -2.135495185852051, + "loss": 0.6904, + "nll_loss": 0.17259274423122406, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.573432018631138e-05, + "rewards/margins": 0.21353378891944885, + "rewards/rejected": -0.21354952454566956, + "step": 9954 + }, + { + "epoch": 6.884508990318119, + "grad_norm": 6.1964335441589355, + "learning_rate": 1.7308283387121564e-05, + "log_odds_chosen": 11.313817024230957, + "log_odds_ratio": -4.6309156459756196e-05, + "logits/chosen": -0.5913910269737244, + "logits/rejected": -0.6920463442802429, + "logps/chosen": -0.00011107635509688407, + "logps/rejected": -2.270355224609375, + "loss": 0.8023, + "nll_loss": 0.20057925581932068, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1107635145890526e-05, + "rewards/margins": 0.2270244061946869, + "rewards/rejected": -0.2270355373620987, + "step": 9955 + }, + { + "epoch": 6.885200553250346, + "grad_norm": 6.067237854003906, + "learning_rate": 1.7304441370831413e-05, + "log_odds_chosen": 9.834778785705566, + "log_odds_ratio": -0.0003098523011431098, + "logits/chosen": -0.9029225707054138, + "logits/rejected": -0.8073776960372925, + "logps/chosen": -0.0003418096457608044, + "logps/rejected": -1.4671462774276733, + "loss": 0.4298, + "nll_loss": 0.10742116719484329, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4180960938101634e-05, + "rewards/margins": 0.14668045938014984, + "rewards/rejected": -0.14671462774276733, + "step": 9956 + }, + { + "epoch": 6.885892116182573, + "grad_norm": 4.893023490905762, + "learning_rate": 1.7300599354541265e-05, + "log_odds_chosen": 12.138407707214355, + "log_odds_ratio": -1.6137224520207383e-05, + "logits/chosen": -0.27513575553894043, + "logits/rejected": -0.3699111044406891, + "logps/chosen": -0.00010701712017180398, + "logps/rejected": -2.995022773742676, + "loss": 0.7685, + "nll_loss": 0.1921170949935913, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0701711289584637e-05, + "rewards/margins": 0.2994915843009949, + "rewards/rejected": -0.29950231313705444, + "step": 9957 + }, + { + "epoch": 6.8865836791148, + "grad_norm": 4.358221530914307, + "learning_rate": 1.7296757338251114e-05, + "log_odds_chosen": 11.058510780334473, + "log_odds_ratio": -6.035809565219097e-05, + "logits/chosen": -0.3187516927719116, + "logits/rejected": -0.34987926483154297, + "logps/chosen": -0.00014822027878835797, + "logps/rejected": -2.2254271507263184, + "loss": 0.4032, + "nll_loss": 0.1007840484380722, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4822028788330499e-05, + "rewards/margins": 0.2225278615951538, + "rewards/rejected": -0.22254270315170288, + "step": 9958 + }, + { + "epoch": 6.887275242047027, + "grad_norm": 3.851203203201294, + "learning_rate": 1.7292915321960963e-05, + "log_odds_chosen": 10.868274688720703, + "log_odds_ratio": -3.078898225794546e-05, + "logits/chosen": -0.3462313711643219, + "logits/rejected": -0.386076956987381, + "logps/chosen": -0.00013465769006870687, + "logps/rejected": -1.9355796575546265, + "loss": 0.3889, + "nll_loss": 0.09722109138965607, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3465767551679164e-05, + "rewards/margins": 0.19354449212551117, + "rewards/rejected": -0.1935579478740692, + "step": 9959 + }, + { + "epoch": 6.8879668049792535, + "grad_norm": 8.651869773864746, + "learning_rate": 1.7289073305670816e-05, + "log_odds_chosen": 10.275079727172852, + "log_odds_ratio": -5.3268369811121374e-05, + "logits/chosen": -0.2315186709165573, + "logits/rejected": -0.30275315046310425, + "logps/chosen": -0.0004828626988455653, + "logps/rejected": -1.9132792949676514, + "loss": 0.5085, + "nll_loss": 0.12712129950523376, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.82862742501311e-05, + "rewards/margins": 0.19127964973449707, + "rewards/rejected": -0.19132792949676514, + "step": 9960 + }, + { + "epoch": 6.88865836791148, + "grad_norm": 5.152824401855469, + "learning_rate": 1.7285231289380668e-05, + "log_odds_chosen": 11.55789566040039, + "log_odds_ratio": -3.1101779313758016e-05, + "logits/chosen": -0.11422102153301239, + "logits/rejected": -0.1345907300710678, + "logps/chosen": -0.00011851730960188434, + "logps/rejected": -2.35556697845459, + "loss": 0.5654, + "nll_loss": 0.14134716987609863, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1851730960188434e-05, + "rewards/margins": 0.2355448603630066, + "rewards/rejected": -0.2355567067861557, + "step": 9961 + }, + { + "epoch": 6.889349930843707, + "grad_norm": 5.693244934082031, + "learning_rate": 1.7281389273090517e-05, + "log_odds_chosen": 11.959161758422852, + "log_odds_ratio": -1.573568988533225e-05, + "logits/chosen": -0.5982638001441956, + "logits/rejected": -0.4819360375404358, + "logps/chosen": -0.0001248091139132157, + "logps/rejected": -2.7712881565093994, + "loss": 0.538, + "nll_loss": 0.13448816537857056, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.248091120942263e-05, + "rewards/margins": 0.2771163582801819, + "rewards/rejected": -0.27712881565093994, + "step": 9962 + }, + { + "epoch": 6.890041493775934, + "grad_norm": 3.995441436767578, + "learning_rate": 1.727754725680037e-05, + "log_odds_chosen": 9.996007919311523, + "log_odds_ratio": -9.138335008174181e-05, + "logits/chosen": -0.12948307394981384, + "logits/rejected": -0.19393359124660492, + "logps/chosen": -0.00023820166825316846, + "logps/rejected": -1.4062923192977905, + "loss": 0.2848, + "nll_loss": 0.07120097428560257, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3820166461518966e-05, + "rewards/margins": 0.14060541987419128, + "rewards/rejected": -0.14062923192977905, + "step": 9963 + }, + { + "epoch": 6.890733056708161, + "grad_norm": 4.375192165374756, + "learning_rate": 1.7273705240510222e-05, + "log_odds_chosen": 10.583505630493164, + "log_odds_ratio": -5.76963102503214e-05, + "logits/chosen": -0.3995498716831207, + "logits/rejected": -0.488243043422699, + "logps/chosen": -0.0001608713937457651, + "logps/rejected": -1.938847303390503, + "loss": 0.4457, + "nll_loss": 0.11142930388450623, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.608714046597015e-05, + "rewards/margins": 0.19386863708496094, + "rewards/rejected": -0.1938847303390503, + "step": 9964 + }, + { + "epoch": 6.891424619640388, + "grad_norm": 7.30011510848999, + "learning_rate": 1.726986322422007e-05, + "log_odds_chosen": 11.248298645019531, + "log_odds_ratio": -7.639620889676735e-05, + "logits/chosen": -0.05031472072005272, + "logits/rejected": -0.1647230088710785, + "logps/chosen": -0.0002004953712457791, + "logps/rejected": -2.555237293243408, + "loss": 0.6183, + "nll_loss": 0.15456579625606537, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.004953785217367e-05, + "rewards/margins": 0.25550365447998047, + "rewards/rejected": -0.2555237114429474, + "step": 9965 + }, + { + "epoch": 6.8921161825726145, + "grad_norm": 5.279568195343018, + "learning_rate": 1.7266021207929923e-05, + "log_odds_chosen": 9.024934768676758, + "log_odds_ratio": -0.0004643774009309709, + "logits/chosen": -0.3420089781284332, + "logits/rejected": -0.3719962537288666, + "logps/chosen": -0.0006390767521224916, + "logps/rejected": -1.4473562240600586, + "loss": 0.5353, + "nll_loss": 0.13377240300178528, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.390767521224916e-05, + "rewards/margins": 0.1446717232465744, + "rewards/rejected": -0.14473563432693481, + "step": 9966 + }, + { + "epoch": 6.892807745504841, + "grad_norm": 5.274577617645264, + "learning_rate": 1.7262179191639773e-05, + "log_odds_chosen": 11.9353666305542, + "log_odds_ratio": -1.3555643818108365e-05, + "logits/chosen": -0.20525482296943665, + "logits/rejected": -0.33735471963882446, + "logps/chosen": -0.00010454172297613695, + "logps/rejected": -2.7081387042999268, + "loss": 0.6671, + "nll_loss": 0.1667725145816803, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0454172297613695e-05, + "rewards/margins": 0.27080339193344116, + "rewards/rejected": -0.27081388235092163, + "step": 9967 + }, + { + "epoch": 6.893499308437068, + "grad_norm": 6.057780742645264, + "learning_rate": 1.725833717534962e-05, + "log_odds_chosen": 12.293357849121094, + "log_odds_ratio": -1.7128662875620648e-05, + "logits/chosen": -0.2888219952583313, + "logits/rejected": -0.35230833292007446, + "logps/chosen": -0.00016165403940249234, + "logps/rejected": -3.4829816818237305, + "loss": 0.607, + "nll_loss": 0.15174441039562225, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6165404304047115e-05, + "rewards/margins": 0.3482820391654968, + "rewards/rejected": -0.34829822182655334, + "step": 9968 + }, + { + "epoch": 6.894190871369295, + "grad_norm": 5.762960433959961, + "learning_rate": 1.7254495159059474e-05, + "log_odds_chosen": 11.103206634521484, + "log_odds_ratio": -5.622408934868872e-05, + "logits/chosen": -0.33061569929122925, + "logits/rejected": -0.4054297208786011, + "logps/chosen": -0.00026888775755651295, + "logps/rejected": -2.325014591217041, + "loss": 0.5813, + "nll_loss": 0.1453164964914322, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.688877793843858e-05, + "rewards/margins": 0.23247459530830383, + "rewards/rejected": -0.23250147700309753, + "step": 9969 + }, + { + "epoch": 6.894882434301522, + "grad_norm": 4.574897289276123, + "learning_rate": 1.7250653142769326e-05, + "log_odds_chosen": 10.430425643920898, + "log_odds_ratio": -0.000246243056608364, + "logits/chosen": 0.02395036816596985, + "logits/rejected": -0.009510427713394165, + "logps/chosen": -0.00040431745583191514, + "logps/rejected": -2.0692477226257324, + "loss": 0.5412, + "nll_loss": 0.135265052318573, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.043174703838304e-05, + "rewards/margins": 0.20688433945178986, + "rewards/rejected": -0.20692478120326996, + "step": 9970 + }, + { + "epoch": 6.895573997233749, + "grad_norm": 7.976735591888428, + "learning_rate": 1.7246811126479176e-05, + "log_odds_chosen": 11.322787284851074, + "log_odds_ratio": -4.459191404748708e-05, + "logits/chosen": -0.5531570911407471, + "logits/rejected": -0.6223657131195068, + "logps/chosen": -0.00028584557003341615, + "logps/rejected": -2.5278143882751465, + "loss": 0.7538, + "nll_loss": 0.1884533315896988, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8584558094735257e-05, + "rewards/margins": 0.2527528405189514, + "rewards/rejected": -0.2527814209461212, + "step": 9971 + }, + { + "epoch": 6.8962655601659755, + "grad_norm": 6.4360456466674805, + "learning_rate": 1.7242969110189028e-05, + "log_odds_chosen": 11.322450637817383, + "log_odds_ratio": -0.0002915957011282444, + "logits/chosen": -0.18822026252746582, + "logits/rejected": -0.21931561827659607, + "logps/chosen": -0.00036068481858819723, + "logps/rejected": -3.0156097412109375, + "loss": 0.522, + "nll_loss": 0.1304597705602646, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6068482586415485e-05, + "rewards/margins": 0.3015248775482178, + "rewards/rejected": -0.3015609681606293, + "step": 9972 + }, + { + "epoch": 6.896957123098202, + "grad_norm": 6.496629238128662, + "learning_rate": 1.723912709389888e-05, + "log_odds_chosen": 10.186212539672852, + "log_odds_ratio": -0.00041088840225711465, + "logits/chosen": -0.16055399179458618, + "logits/rejected": -0.13317851722240448, + "logps/chosen": -0.0008482407429255545, + "logps/rejected": -2.466451406478882, + "loss": 0.5271, + "nll_loss": 0.13174614310264587, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.482407429255545e-05, + "rewards/margins": 0.24656033515930176, + "rewards/rejected": -0.24664515256881714, + "step": 9973 + }, + { + "epoch": 6.897648686030429, + "grad_norm": 7.392096042633057, + "learning_rate": 1.723528507760873e-05, + "log_odds_chosen": 9.93017864227295, + "log_odds_ratio": -0.0005902046104893088, + "logits/chosen": -0.4856283664703369, + "logits/rejected": -0.4615434408187866, + "logps/chosen": -0.0007256892276927829, + "logps/rejected": -1.9430298805236816, + "loss": 0.5449, + "nll_loss": 0.1361699402332306, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.25689169485122e-05, + "rewards/margins": 0.19423040747642517, + "rewards/rejected": -0.1943029761314392, + "step": 9974 + }, + { + "epoch": 6.898340248962656, + "grad_norm": 4.620207786560059, + "learning_rate": 1.7231443061318582e-05, + "log_odds_chosen": 11.213531494140625, + "log_odds_ratio": -1.967877324204892e-05, + "logits/chosen": -0.3341743052005768, + "logits/rejected": -0.4299090504646301, + "logps/chosen": -0.00012599513866007328, + "logps/rejected": -2.0738210678100586, + "loss": 0.4984, + "nll_loss": 0.12459343671798706, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2599512956512626e-05, + "rewards/margins": 0.20736950635910034, + "rewards/rejected": -0.20738211274147034, + "step": 9975 + }, + { + "epoch": 6.899031811894883, + "grad_norm": 6.554383754730225, + "learning_rate": 1.722760104502843e-05, + "log_odds_chosen": 10.371408462524414, + "log_odds_ratio": -0.00014440737140830606, + "logits/chosen": 0.0777406394481659, + "logits/rejected": -0.02058453857898712, + "logps/chosen": -0.00027885413146577775, + "logps/rejected": -1.7560479640960693, + "loss": 0.6283, + "nll_loss": 0.1570620834827423, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7885413146577775e-05, + "rewards/margins": 0.17557692527770996, + "rewards/rejected": -0.17560480535030365, + "step": 9976 + }, + { + "epoch": 6.89972337482711, + "grad_norm": 6.355048656463623, + "learning_rate": 1.722375902873828e-05, + "log_odds_chosen": 10.970148086547852, + "log_odds_ratio": -4.067463305545971e-05, + "logits/chosen": -0.38521891832351685, + "logits/rejected": -0.4572032392024994, + "logps/chosen": -0.0004533581086434424, + "logps/rejected": -2.48785662651062, + "loss": 0.7192, + "nll_loss": 0.17979201674461365, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.533581522991881e-05, + "rewards/margins": 0.24874034523963928, + "rewards/rejected": -0.24878567457199097, + "step": 9977 + }, + { + "epoch": 6.9004149377593365, + "grad_norm": 14.030281066894531, + "learning_rate": 1.7219917012448132e-05, + "log_odds_chosen": 9.614143371582031, + "log_odds_ratio": -0.0008283082861453295, + "logits/chosen": -0.685629665851593, + "logits/rejected": -0.6637234687805176, + "logps/chosen": -0.0012888973578810692, + "logps/rejected": -1.804660439491272, + "loss": 1.246, + "nll_loss": 0.31140968203544617, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001288897474296391, + "rewards/margins": 0.18033716082572937, + "rewards/rejected": -0.18046605587005615, + "step": 9978 + }, + { + "epoch": 6.901106500691563, + "grad_norm": 4.408642292022705, + "learning_rate": 1.7216074996157985e-05, + "log_odds_chosen": 11.826573371887207, + "log_odds_ratio": -6.028627103660256e-05, + "logits/chosen": -0.07609276473522186, + "logits/rejected": -0.14199811220169067, + "logps/chosen": -0.0012418505502864718, + "logps/rejected": -3.6499717235565186, + "loss": 0.535, + "nll_loss": 0.1337420642375946, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012418505502864718, + "rewards/margins": 0.3648729920387268, + "rewards/rejected": -0.36499714851379395, + "step": 9979 + }, + { + "epoch": 6.90179806362379, + "grad_norm": 12.403252601623535, + "learning_rate": 1.7212232979867834e-05, + "log_odds_chosen": 10.558736801147461, + "log_odds_ratio": -7.954631291795522e-05, + "logits/chosen": -0.8147011399269104, + "logits/rejected": -0.8264572024345398, + "logps/chosen": -0.00023240508744493127, + "logps/rejected": -1.7422142028808594, + "loss": 0.4863, + "nll_loss": 0.12156790494918823, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3240507289301604e-05, + "rewards/margins": 0.1741981953382492, + "rewards/rejected": -0.17422142624855042, + "step": 9980 + }, + { + "epoch": 6.902489626556017, + "grad_norm": 5.796401023864746, + "learning_rate": 1.7208390963577686e-05, + "log_odds_chosen": 11.614713668823242, + "log_odds_ratio": -1.2603211871464737e-05, + "logits/chosen": -0.24464285373687744, + "logits/rejected": -0.20899458229541779, + "logps/chosen": -0.00011055903451051563, + "logps/rejected": -2.407068967819214, + "loss": 0.5689, + "nll_loss": 0.1422237902879715, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1055903087253682e-05, + "rewards/margins": 0.24069584906101227, + "rewards/rejected": -0.2407069057226181, + "step": 9981 + }, + { + "epoch": 6.903181189488244, + "grad_norm": 11.680974006652832, + "learning_rate": 1.720454894728754e-05, + "log_odds_chosen": 10.755126953125, + "log_odds_ratio": -4.615750731318258e-05, + "logits/chosen": -1.014672040939331, + "logits/rejected": -0.9060235023498535, + "logps/chosen": -9.19853919185698e-05, + "logps/rejected": -1.668900728225708, + "loss": 0.5539, + "nll_loss": 0.13847382366657257, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.198538464261219e-06, + "rewards/margins": 0.16688087582588196, + "rewards/rejected": -0.16689005494117737, + "step": 9982 + }, + { + "epoch": 6.903872752420471, + "grad_norm": 5.80061674118042, + "learning_rate": 1.7200706930997388e-05, + "log_odds_chosen": 11.222034454345703, + "log_odds_ratio": -2.8924485377501696e-05, + "logits/chosen": -0.3152433931827545, + "logits/rejected": -0.4137364327907562, + "logps/chosen": -0.0001420917978975922, + "logps/rejected": -2.2330398559570312, + "loss": 0.7179, + "nll_loss": 0.17946086823940277, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4209179425961338e-05, + "rewards/margins": 0.22328978776931763, + "rewards/rejected": -0.22330397367477417, + "step": 9983 + }, + { + "epoch": 6.904564315352697, + "grad_norm": 5.845762252807617, + "learning_rate": 1.719686491470724e-05, + "log_odds_chosen": 10.686575889587402, + "log_odds_ratio": -0.00021342271065805107, + "logits/chosen": -0.11986368149518967, + "logits/rejected": -0.23188787698745728, + "logps/chosen": -0.002099713310599327, + "logps/rejected": -2.823957920074463, + "loss": 0.6112, + "nll_loss": 0.1527710258960724, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00020997134561184794, + "rewards/margins": 0.282185822725296, + "rewards/rejected": -0.28239578008651733, + "step": 9984 + }, + { + "epoch": 6.905255878284924, + "grad_norm": 4.402422904968262, + "learning_rate": 1.719302289841709e-05, + "log_odds_chosen": 11.305420875549316, + "log_odds_ratio": -3.5889526770915836e-05, + "logits/chosen": -0.47906628251075745, + "logits/rejected": -0.5477668046951294, + "logps/chosen": -0.0002497454406693578, + "logps/rejected": -2.85650634765625, + "loss": 0.5407, + "nll_loss": 0.13516737520694733, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4974546249723062e-05, + "rewards/margins": 0.2856256663799286, + "rewards/rejected": -0.2856506407260895, + "step": 9985 + }, + { + "epoch": 6.905947441217151, + "grad_norm": 6.135840892791748, + "learning_rate": 1.718918088212694e-05, + "log_odds_chosen": 11.341968536376953, + "log_odds_ratio": -0.00011196234845556319, + "logits/chosen": -0.059741728007793427, + "logits/rejected": -0.2262853980064392, + "logps/chosen": -0.0001801389007596299, + "logps/rejected": -2.3228774070739746, + "loss": 0.5875, + "nll_loss": 0.1468682587146759, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.801389043976087e-05, + "rewards/margins": 0.23226971924304962, + "rewards/rejected": -0.2322877198457718, + "step": 9986 + }, + { + "epoch": 6.906639004149378, + "grad_norm": 6.067322254180908, + "learning_rate": 1.718533886583679e-05, + "log_odds_chosen": 9.752814292907715, + "log_odds_ratio": -0.00010807962098624557, + "logits/chosen": -0.9131343364715576, + "logits/rejected": -0.964361846446991, + "logps/chosen": -0.0003965977521147579, + "logps/rejected": -1.637006163597107, + "loss": 0.6769, + "nll_loss": 0.1692250370979309, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.965977521147579e-05, + "rewards/margins": 0.1636609435081482, + "rewards/rejected": -0.16370061039924622, + "step": 9987 + }, + { + "epoch": 6.907330567081605, + "grad_norm": 6.261120319366455, + "learning_rate": 1.7181496849546643e-05, + "log_odds_chosen": 11.65019416809082, + "log_odds_ratio": -1.2441886610758957e-05, + "logits/chosen": -0.14381486177444458, + "logits/rejected": -0.283840149641037, + "logps/chosen": -0.0005752052529715002, + "logps/rejected": -2.8144447803497314, + "loss": 0.6714, + "nll_loss": 0.16783645749092102, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.752052675234154e-05, + "rewards/margins": 0.28138697147369385, + "rewards/rejected": -0.2814444899559021, + "step": 9988 + }, + { + "epoch": 6.908022130013832, + "grad_norm": 6.250824451446533, + "learning_rate": 1.7177654833256492e-05, + "log_odds_chosen": 11.515530586242676, + "log_odds_ratio": -7.775715494062752e-05, + "logits/chosen": -0.605188250541687, + "logits/rejected": -0.6803003549575806, + "logps/chosen": -0.00018032471416518092, + "logps/rejected": -2.609130859375, + "loss": 0.7296, + "nll_loss": 0.1823856383562088, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8032471416518092e-05, + "rewards/margins": 0.2608950436115265, + "rewards/rejected": -0.26091307401657104, + "step": 9989 + }, + { + "epoch": 6.908713692946058, + "grad_norm": 8.184794425964355, + "learning_rate": 1.7173812816966345e-05, + "log_odds_chosen": 10.37601089477539, + "log_odds_ratio": -0.00011470400204416364, + "logits/chosen": -0.6234085559844971, + "logits/rejected": -0.6225950717926025, + "logps/chosen": -0.0002568865311332047, + "logps/rejected": -1.7968766689300537, + "loss": 0.4098, + "nll_loss": 0.10243692249059677, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5688654204714112e-05, + "rewards/margins": 0.17966195940971375, + "rewards/rejected": -0.17968766391277313, + "step": 9990 + }, + { + "epoch": 6.909405255878285, + "grad_norm": 6.626237869262695, + "learning_rate": 1.7169970800676197e-05, + "log_odds_chosen": 11.185545921325684, + "log_odds_ratio": -2.2582265955861658e-05, + "logits/chosen": -0.44990795850753784, + "logits/rejected": -0.45199233293533325, + "logps/chosen": -0.00030346005223691463, + "logps/rejected": -2.503441333770752, + "loss": 0.4825, + "nll_loss": 0.12061312049627304, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0346007406478748e-05, + "rewards/margins": 0.25031381845474243, + "rewards/rejected": -0.2503441572189331, + "step": 9991 + }, + { + "epoch": 6.910096818810512, + "grad_norm": 4.987192153930664, + "learning_rate": 1.7166128784386046e-05, + "log_odds_chosen": 10.907398223876953, + "log_odds_ratio": -3.8101232348708436e-05, + "logits/chosen": -0.447499543428421, + "logits/rejected": -0.5684572458267212, + "logps/chosen": -0.00017325104272458702, + "logps/rejected": -2.0211973190307617, + "loss": 0.4658, + "nll_loss": 0.11645621061325073, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7325104636256583e-05, + "rewards/margins": 0.202102392911911, + "rewards/rejected": -0.20211973786354065, + "step": 9992 + }, + { + "epoch": 6.910788381742739, + "grad_norm": 4.203260898590088, + "learning_rate": 1.71622867680959e-05, + "log_odds_chosen": 11.14862060546875, + "log_odds_ratio": -2.3845985197112896e-05, + "logits/chosen": -0.45166918635368347, + "logits/rejected": -0.48994314670562744, + "logps/chosen": -0.00017078101518563926, + "logps/rejected": -2.381739854812622, + "loss": 0.5594, + "nll_loss": 0.13983705639839172, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.707810406514909e-05, + "rewards/margins": 0.23815689980983734, + "rewards/rejected": -0.23817399144172668, + "step": 9993 + }, + { + "epoch": 6.911479944674966, + "grad_norm": 6.142948627471924, + "learning_rate": 1.7158444751805748e-05, + "log_odds_chosen": 12.051002502441406, + "log_odds_ratio": -4.126852945773862e-05, + "logits/chosen": -0.41811603307724, + "logits/rejected": -0.5299392342567444, + "logps/chosen": -0.0003040796145796776, + "logps/rejected": -3.4569079875946045, + "loss": 0.8762, + "nll_loss": 0.21903914213180542, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.040796218556352e-05, + "rewards/margins": 0.34566038846969604, + "rewards/rejected": -0.3456908166408539, + "step": 9994 + }, + { + "epoch": 6.912171507607193, + "grad_norm": 4.349850654602051, + "learning_rate": 1.7154602735515597e-05, + "log_odds_chosen": 10.87544059753418, + "log_odds_ratio": -3.4084103390341625e-05, + "logits/chosen": -0.6788896322250366, + "logits/rejected": -0.6680184006690979, + "logps/chosen": -0.00010783715697471052, + "logps/rejected": -1.7640870809555054, + "loss": 0.5287, + "nll_loss": 0.13216936588287354, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0783716788864695e-05, + "rewards/margins": 0.17639793455600739, + "rewards/rejected": -0.17640872299671173, + "step": 9995 + }, + { + "epoch": 6.912863070539419, + "grad_norm": 7.14302921295166, + "learning_rate": 1.715076071922545e-05, + "log_odds_chosen": 11.023346900939941, + "log_odds_ratio": -0.00014540627307724208, + "logits/chosen": -0.6619070172309875, + "logits/rejected": -0.6815442442893982, + "logps/chosen": -0.0004409942775964737, + "logps/rejected": -2.748929500579834, + "loss": 0.6391, + "nll_loss": 0.15976391732692719, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4099429942434654e-05, + "rewards/margins": 0.2748488783836365, + "rewards/rejected": -0.27489298582077026, + "step": 9996 + }, + { + "epoch": 6.913554633471646, + "grad_norm": 5.94216775894165, + "learning_rate": 1.7146918702935302e-05, + "log_odds_chosen": 9.212047576904297, + "log_odds_ratio": -0.0026079691015183926, + "logits/chosen": -0.22626805305480957, + "logits/rejected": -0.21808166801929474, + "logps/chosen": -0.0020967067684978247, + "logps/rejected": -1.5887069702148438, + "loss": 0.4767, + "nll_loss": 0.1189127266407013, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002096706593874842, + "rewards/margins": 0.15866103768348694, + "rewards/rejected": -0.15887069702148438, + "step": 9997 + }, + { + "epoch": 6.914246196403873, + "grad_norm": 5.059571266174316, + "learning_rate": 1.714307668664515e-05, + "log_odds_chosen": 9.903790473937988, + "log_odds_ratio": -0.00019789970247074962, + "logits/chosen": -0.3731571435928345, + "logits/rejected": -0.4034712612628937, + "logps/chosen": -0.0004390809335745871, + "logps/rejected": -1.7462046146392822, + "loss": 0.7642, + "nll_loss": 0.19103066623210907, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.390809408505447e-05, + "rewards/margins": 0.1745765507221222, + "rewards/rejected": -0.17462044954299927, + "step": 9998 + }, + { + "epoch": 6.9149377593361, + "grad_norm": 6.209487438201904, + "learning_rate": 1.7139234670355003e-05, + "log_odds_chosen": 10.59821891784668, + "log_odds_ratio": -3.986993760918267e-05, + "logits/chosen": -0.17233705520629883, + "logits/rejected": -0.20161035656929016, + "logps/chosen": -0.0008766738465055823, + "logps/rejected": -2.2711052894592285, + "loss": 0.8454, + "nll_loss": 0.21134933829307556, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.766739483689889e-05, + "rewards/margins": 0.22702288627624512, + "rewards/rejected": -0.22711056470870972, + "step": 9999 + }, + { + "epoch": 6.915629322268327, + "grad_norm": 5.539126396179199, + "learning_rate": 1.7135392654064856e-05, + "log_odds_chosen": 11.38946533203125, + "log_odds_ratio": -4.335786070441827e-05, + "logits/chosen": -0.4548533856868744, + "logits/rejected": -0.46245914697647095, + "logps/chosen": -0.00011622466263361275, + "logps/rejected": -2.4456939697265625, + "loss": 0.5107, + "nll_loss": 0.1276700645685196, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1622466445260216e-05, + "rewards/margins": 0.24455779790878296, + "rewards/rejected": -0.24456939101219177, + "step": 10000 + }, + { + "epoch": 6.9163208852005535, + "grad_norm": 5.396164894104004, + "learning_rate": 1.7131550637774705e-05, + "log_odds_chosen": 10.901666641235352, + "log_odds_ratio": -5.471762779052369e-05, + "logits/chosen": -0.3494221568107605, + "logits/rejected": -0.4387245774269104, + "logps/chosen": -0.00018875315436162055, + "logps/rejected": -2.1393706798553467, + "loss": 0.4547, + "nll_loss": 0.11367647349834442, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8875314708566293e-05, + "rewards/margins": 0.21391819417476654, + "rewards/rejected": -0.21393707394599915, + "step": 10001 + }, + { + "epoch": 6.91701244813278, + "grad_norm": 5.637955188751221, + "learning_rate": 1.7127708621484557e-05, + "log_odds_chosen": 11.170769691467285, + "log_odds_ratio": -0.0003723877598531544, + "logits/chosen": -0.5806461572647095, + "logits/rejected": -0.6152359247207642, + "logps/chosen": -0.001203806372359395, + "logps/rejected": -2.139021635055542, + "loss": 0.5255, + "nll_loss": 0.1313486397266388, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012038064596708864, + "rewards/margins": 0.21378177404403687, + "rewards/rejected": -0.21390217542648315, + "step": 10002 + }, + { + "epoch": 6.917704011065007, + "grad_norm": 4.686069965362549, + "learning_rate": 1.712386660519441e-05, + "log_odds_chosen": 8.970919609069824, + "log_odds_ratio": -0.0009532291442155838, + "logits/chosen": -0.3284967839717865, + "logits/rejected": -0.36006975173950195, + "logps/chosen": -0.001138596679084003, + "logps/rejected": -1.734472393989563, + "loss": 0.3506, + "nll_loss": 0.08756309747695923, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011385966354282573, + "rewards/margins": 0.17333339154720306, + "rewards/rejected": -0.17344725131988525, + "step": 10003 + }, + { + "epoch": 6.918395573997234, + "grad_norm": 5.611600875854492, + "learning_rate": 1.712002458890426e-05, + "log_odds_chosen": 9.66145133972168, + "log_odds_ratio": -0.0003402447036933154, + "logits/chosen": -0.2682963013648987, + "logits/rejected": -0.2505141794681549, + "logps/chosen": -0.0002948015171568841, + "logps/rejected": -1.4168219566345215, + "loss": 0.6874, + "nll_loss": 0.17181649804115295, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.948015389847569e-05, + "rewards/margins": 0.14165271818637848, + "rewards/rejected": -0.1416821926832199, + "step": 10004 + }, + { + "epoch": 6.919087136929461, + "grad_norm": 7.419260025024414, + "learning_rate": 1.7116182572614108e-05, + "log_odds_chosen": 10.889808654785156, + "log_odds_ratio": -5.775997487944551e-05, + "logits/chosen": -0.14147552847862244, + "logits/rejected": -0.18376371264457703, + "logps/chosen": -0.000382953614462167, + "logps/rejected": -2.3106813430786133, + "loss": 0.6622, + "nll_loss": 0.1655469834804535, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.829535853583366e-05, + "rewards/margins": 0.23102985322475433, + "rewards/rejected": -0.23106813430786133, + "step": 10005 + }, + { + "epoch": 6.919778699861688, + "grad_norm": 6.319573879241943, + "learning_rate": 1.711234055632396e-05, + "log_odds_chosen": 10.299653053283691, + "log_odds_ratio": -0.0001510842703282833, + "logits/chosen": -0.26969629526138306, + "logits/rejected": -0.48525863885879517, + "logps/chosen": -0.0004234910593368113, + "logps/rejected": -1.9849330186843872, + "loss": 0.6292, + "nll_loss": 0.15729010105133057, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.234910738887265e-05, + "rewards/margins": 0.198450967669487, + "rewards/rejected": -0.1984933316707611, + "step": 10006 + }, + { + "epoch": 6.9204702627939145, + "grad_norm": 8.048715591430664, + "learning_rate": 1.710849854003381e-05, + "log_odds_chosen": 11.729087829589844, + "log_odds_ratio": -2.784031858027447e-05, + "logits/chosen": -0.1425468623638153, + "logits/rejected": -0.20963522791862488, + "logps/chosen": -0.00016317141125909984, + "logps/rejected": -2.7022130489349365, + "loss": 0.6601, + "nll_loss": 0.165022611618042, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6317142581101507e-05, + "rewards/margins": 0.27020499110221863, + "rewards/rejected": -0.2702212929725647, + "step": 10007 + }, + { + "epoch": 6.921161825726141, + "grad_norm": 7.835314750671387, + "learning_rate": 1.7104656523743662e-05, + "log_odds_chosen": 9.968704223632812, + "log_odds_ratio": -0.0001407539821229875, + "logits/chosen": -0.6228182315826416, + "logits/rejected": -0.5100666284561157, + "logps/chosen": -0.00018801394617184997, + "logps/rejected": -1.6318118572235107, + "loss": 0.6759, + "nll_loss": 0.16896140575408936, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8801394617184997e-05, + "rewards/margins": 0.16316238045692444, + "rewards/rejected": -0.16318118572235107, + "step": 10008 + }, + { + "epoch": 6.921853388658368, + "grad_norm": 15.812283515930176, + "learning_rate": 1.7100814507453514e-05, + "log_odds_chosen": 10.900727272033691, + "log_odds_ratio": -4.0191374864662066e-05, + "logits/chosen": -0.17795416712760925, + "logits/rejected": -0.2042202651500702, + "logps/chosen": -0.00029529494349844754, + "logps/rejected": -2.5482981204986572, + "loss": 0.8083, + "nll_loss": 0.20208188891410828, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9529493986046873e-05, + "rewards/margins": 0.25480031967163086, + "rewards/rejected": -0.2548298239707947, + "step": 10009 + }, + { + "epoch": 6.922544951590595, + "grad_norm": 7.8032450675964355, + "learning_rate": 1.7096972491163363e-05, + "log_odds_chosen": 11.113170623779297, + "log_odds_ratio": -0.0002255002618767321, + "logits/chosen": -0.14331859350204468, + "logits/rejected": -0.16391247510910034, + "logps/chosen": -0.0001260231510968879, + "logps/rejected": -2.212287425994873, + "loss": 0.884, + "nll_loss": 0.22097471356391907, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2602315109688789e-05, + "rewards/margins": 0.2212161421775818, + "rewards/rejected": -0.22122874855995178, + "step": 10010 + }, + { + "epoch": 6.923236514522822, + "grad_norm": 5.020081043243408, + "learning_rate": 1.7093130474873216e-05, + "log_odds_chosen": 10.566305160522461, + "log_odds_ratio": -9.605865488993004e-05, + "logits/chosen": -0.05223175138235092, + "logits/rejected": -0.11202915012836456, + "logps/chosen": -0.0012542939512059093, + "logps/rejected": -2.5465216636657715, + "loss": 0.4485, + "nll_loss": 0.1121145635843277, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012542940385174006, + "rewards/margins": 0.2545267343521118, + "rewards/rejected": -0.2546521723270416, + "step": 10011 + }, + { + "epoch": 6.923928077455049, + "grad_norm": 7.946537017822266, + "learning_rate": 1.7089288458583068e-05, + "log_odds_chosen": 11.411028861999512, + "log_odds_ratio": -3.746839865925722e-05, + "logits/chosen": -0.3238312005996704, + "logits/rejected": -0.3977673649787903, + "logps/chosen": -0.00012408751354087144, + "logps/rejected": -2.3582587242126465, + "loss": 0.4168, + "nll_loss": 0.10418720543384552, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2408750990289263e-05, + "rewards/margins": 0.2358134537935257, + "rewards/rejected": -0.23582588136196136, + "step": 10012 + }, + { + "epoch": 6.9246196403872755, + "grad_norm": 5.735523223876953, + "learning_rate": 1.7085446442292917e-05, + "log_odds_chosen": 10.758692741394043, + "log_odds_ratio": -0.00016033755673561245, + "logits/chosen": -0.42282581329345703, + "logits/rejected": -0.3419947922229767, + "logps/chosen": -0.00011035312491003424, + "logps/rejected": -1.7250794172286987, + "loss": 0.7007, + "nll_loss": 0.17514723539352417, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1035313036700245e-05, + "rewards/margins": 0.17249691486358643, + "rewards/rejected": -0.17250794172286987, + "step": 10013 + }, + { + "epoch": 6.925311203319502, + "grad_norm": 5.317147254943848, + "learning_rate": 1.7081604426002766e-05, + "log_odds_chosen": 9.747225761413574, + "log_odds_ratio": -0.00041577807860448956, + "logits/chosen": -0.8432607650756836, + "logits/rejected": -0.8438321948051453, + "logps/chosen": -0.0006377776153385639, + "logps/rejected": -1.7046442031860352, + "loss": 0.474, + "nll_loss": 0.1184675395488739, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.377776298904791e-05, + "rewards/margins": 0.17040064930915833, + "rewards/rejected": -0.170464426279068, + "step": 10014 + }, + { + "epoch": 6.926002766251729, + "grad_norm": 5.1862711906433105, + "learning_rate": 1.707776240971262e-05, + "log_odds_chosen": 10.295071601867676, + "log_odds_ratio": -0.00014044718409422785, + "logits/chosen": -0.23374953866004944, + "logits/rejected": -0.22666972875595093, + "logps/chosen": -0.0014169241767376661, + "logps/rejected": -2.477402687072754, + "loss": 0.5488, + "nll_loss": 0.13718783855438232, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014169240603223443, + "rewards/margins": 0.2475985735654831, + "rewards/rejected": -0.24774028360843658, + "step": 10015 + }, + { + "epoch": 6.926694329183956, + "grad_norm": 4.504415988922119, + "learning_rate": 1.7073920393422468e-05, + "log_odds_chosen": 10.581575393676758, + "log_odds_ratio": -4.251056452631019e-05, + "logits/chosen": -0.5650668144226074, + "logits/rejected": -0.6096177101135254, + "logps/chosen": -0.00013295267126522958, + "logps/rejected": -1.5700645446777344, + "loss": 0.2816, + "nll_loss": 0.07039927691221237, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3295267308421899e-05, + "rewards/margins": 0.15699316561222076, + "rewards/rejected": -0.15700644254684448, + "step": 10016 + }, + { + "epoch": 6.927385892116183, + "grad_norm": 4.376059055328369, + "learning_rate": 1.707007837713232e-05, + "log_odds_chosen": 10.04145622253418, + "log_odds_ratio": -0.00011801601795013994, + "logits/chosen": -0.15814641118049622, + "logits/rejected": -0.174251988530159, + "logps/chosen": -0.0004231779312249273, + "logps/rejected": -1.86943781375885, + "loss": 0.4622, + "nll_loss": 0.11552843451499939, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.231779166730121e-05, + "rewards/margins": 0.18690146505832672, + "rewards/rejected": -0.18694376945495605, + "step": 10017 + }, + { + "epoch": 6.92807745504841, + "grad_norm": 5.4749836921691895, + "learning_rate": 1.7066236360842173e-05, + "log_odds_chosen": 10.348876953125, + "log_odds_ratio": -5.838269862579182e-05, + "logits/chosen": -0.050103262066841125, + "logits/rejected": -0.13943883776664734, + "logps/chosen": -0.0003169150440953672, + "logps/rejected": -2.1400392055511475, + "loss": 0.5455, + "nll_loss": 0.1363808512687683, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1691506592324004e-05, + "rewards/margins": 0.21397224068641663, + "rewards/rejected": -0.21400392055511475, + "step": 10018 + }, + { + "epoch": 6.9287690179806365, + "grad_norm": 5.5527496337890625, + "learning_rate": 1.706239434455202e-05, + "log_odds_chosen": 11.428709983825684, + "log_odds_ratio": -2.8784088499378413e-05, + "logits/chosen": -0.4300426244735718, + "logits/rejected": -0.44284480810165405, + "logps/chosen": -0.00032426012330688536, + "logps/rejected": -2.8826076984405518, + "loss": 0.6097, + "nll_loss": 0.1524135172367096, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.242601451347582e-05, + "rewards/margins": 0.2882283329963684, + "rewards/rejected": -0.2882607579231262, + "step": 10019 + }, + { + "epoch": 6.929460580912863, + "grad_norm": 5.034395694732666, + "learning_rate": 1.7058552328261874e-05, + "log_odds_chosen": 11.60173225402832, + "log_odds_ratio": -5.259798854240216e-05, + "logits/chosen": -0.3198222219944, + "logits/rejected": -0.42940211296081543, + "logps/chosen": -0.00019729719497263432, + "logps/rejected": -3.0343990325927734, + "loss": 0.8083, + "nll_loss": 0.2020743191242218, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9729721316252835e-05, + "rewards/margins": 0.30342015624046326, + "rewards/rejected": -0.3034399151802063, + "step": 10020 + }, + { + "epoch": 6.93015214384509, + "grad_norm": 7.2849555015563965, + "learning_rate": 1.7054710311971723e-05, + "log_odds_chosen": 11.120197296142578, + "log_odds_ratio": -3.993926657130942e-05, + "logits/chosen": -0.46323466300964355, + "logits/rejected": -0.38564297556877136, + "logps/chosen": -0.0003112137783318758, + "logps/rejected": -2.552988290786743, + "loss": 0.6821, + "nll_loss": 0.1705092489719391, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1121380743570626e-05, + "rewards/margins": 0.2552677094936371, + "rewards/rejected": -0.25529882311820984, + "step": 10021 + }, + { + "epoch": 6.930843706777317, + "grad_norm": 9.017226219177246, + "learning_rate": 1.7050868295681576e-05, + "log_odds_chosen": 11.707319259643555, + "log_odds_ratio": -7.7094002335798e-05, + "logits/chosen": -0.22343973815441132, + "logits/rejected": -0.31163230538368225, + "logps/chosen": -0.00026503336266614497, + "logps/rejected": -2.8297653198242188, + "loss": 0.4342, + "nll_loss": 0.10853277146816254, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6503335902816616e-05, + "rewards/margins": 0.2829500436782837, + "rewards/rejected": -0.28297656774520874, + "step": 10022 + }, + { + "epoch": 6.931535269709544, + "grad_norm": 4.966097831726074, + "learning_rate": 1.7047026279391425e-05, + "log_odds_chosen": 9.993123054504395, + "log_odds_ratio": -0.00042920681880787015, + "logits/chosen": -0.06338300555944443, + "logits/rejected": -0.0930364578962326, + "logps/chosen": -0.0007117825443856418, + "logps/rejected": -1.797957181930542, + "loss": 0.3926, + "nll_loss": 0.09811747819185257, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.117826316971332e-05, + "rewards/margins": 0.1797245442867279, + "rewards/rejected": -0.17979572713375092, + "step": 10023 + }, + { + "epoch": 6.932226832641771, + "grad_norm": 8.300261497497559, + "learning_rate": 1.7043184263101274e-05, + "log_odds_chosen": 11.617762565612793, + "log_odds_ratio": -2.416789902781602e-05, + "logits/chosen": -0.48339706659317017, + "logits/rejected": -0.5057979822158813, + "logps/chosen": -0.0001829541870392859, + "logps/rejected": -2.64070463180542, + "loss": 0.4426, + "nll_loss": 0.11063584685325623, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.829541906772647e-05, + "rewards/margins": 0.264052152633667, + "rewards/rejected": -0.26407045125961304, + "step": 10024 + }, + { + "epoch": 6.9329183955739975, + "grad_norm": 4.101850986480713, + "learning_rate": 1.7039342246811126e-05, + "log_odds_chosen": 11.531643867492676, + "log_odds_ratio": -1.3064412996754982e-05, + "logits/chosen": -0.33082935214042664, + "logits/rejected": -0.37834471464157104, + "logps/chosen": -0.00018180490587837994, + "logps/rejected": -2.5283150672912598, + "loss": 0.3357, + "nll_loss": 0.0839347094297409, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8180489860242233e-05, + "rewards/margins": 0.25281333923339844, + "rewards/rejected": -0.25283151865005493, + "step": 10025 + }, + { + "epoch": 6.933609958506224, + "grad_norm": 8.496269226074219, + "learning_rate": 1.703550023052098e-05, + "log_odds_chosen": 11.259842872619629, + "log_odds_ratio": -2.703062455111649e-05, + "logits/chosen": -0.5049582719802856, + "logits/rejected": -0.4686523377895355, + "logps/chosen": -0.00018693049787543714, + "logps/rejected": -2.6714820861816406, + "loss": 0.9831, + "nll_loss": 0.24576500058174133, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8693050151341595e-05, + "rewards/margins": 0.267129510641098, + "rewards/rejected": -0.2671481966972351, + "step": 10026 + }, + { + "epoch": 6.934301521438451, + "grad_norm": 6.2408223152160645, + "learning_rate": 1.7031658214230828e-05, + "log_odds_chosen": 11.580894470214844, + "log_odds_ratio": -1.513993993285112e-05, + "logits/chosen": -0.4473365247249603, + "logits/rejected": -0.402547687292099, + "logps/chosen": -0.0002027477603405714, + "logps/rejected": -2.6497583389282227, + "loss": 1.268, + "nll_loss": 0.31700828671455383, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.027477603405714e-05, + "rewards/margins": 0.2649555802345276, + "rewards/rejected": -0.2649758458137512, + "step": 10027 + }, + { + "epoch": 6.934993084370678, + "grad_norm": 4.515257358551025, + "learning_rate": 1.702781619794068e-05, + "log_odds_chosen": 9.019081115722656, + "log_odds_ratio": -0.0010447325184941292, + "logits/chosen": -0.32567375898361206, + "logits/rejected": -0.3403196930885315, + "logps/chosen": -0.0007029250264167786, + "logps/rejected": -1.4702303409576416, + "loss": 0.6056, + "nll_loss": 0.15129372477531433, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.029250991763547e-05, + "rewards/margins": 0.14695274829864502, + "rewards/rejected": -0.1470230221748352, + "step": 10028 + }, + { + "epoch": 6.935684647302905, + "grad_norm": 5.426224231719971, + "learning_rate": 1.7023974181650533e-05, + "log_odds_chosen": 10.142989158630371, + "log_odds_ratio": -0.0001359380839858204, + "logits/chosen": -0.6201168298721313, + "logits/rejected": -0.624829888343811, + "logps/chosen": -0.00029378788894973695, + "logps/rejected": -2.0749104022979736, + "loss": 0.5595, + "nll_loss": 0.13986918330192566, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9378788894973695e-05, + "rewards/margins": 0.2074616551399231, + "rewards/rejected": -0.20749104022979736, + "step": 10029 + }, + { + "epoch": 6.936376210235132, + "grad_norm": 6.71488618850708, + "learning_rate": 1.702013216536038e-05, + "log_odds_chosen": 9.412338256835938, + "log_odds_ratio": -0.0008309513796120882, + "logits/chosen": -0.765308141708374, + "logits/rejected": -0.7624964714050293, + "logps/chosen": -0.0015413952060043812, + "logps/rejected": -1.7514090538024902, + "loss": 0.3268, + "nll_loss": 0.08161558210849762, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015413951769005507, + "rewards/margins": 0.17498676478862762, + "rewards/rejected": -0.17514090240001678, + "step": 10030 + }, + { + "epoch": 6.9370677731673585, + "grad_norm": 4.619319438934326, + "learning_rate": 1.7016290149070234e-05, + "log_odds_chosen": 11.157760620117188, + "log_odds_ratio": -8.181616431102157e-05, + "logits/chosen": -0.22801737487316132, + "logits/rejected": -0.4726010859012604, + "logps/chosen": -0.0005426481948234141, + "logps/rejected": -2.3585970401763916, + "loss": 0.6443, + "nll_loss": 0.16105622053146362, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.426481948234141e-05, + "rewards/margins": 0.2358054369688034, + "rewards/rejected": -0.2358597218990326, + "step": 10031 + }, + { + "epoch": 6.937759336099585, + "grad_norm": 8.809869766235352, + "learning_rate": 1.7012448132780083e-05, + "log_odds_chosen": 11.043230056762695, + "log_odds_ratio": -5.463225534185767e-05, + "logits/chosen": -0.5463330745697021, + "logits/rejected": -0.6079367399215698, + "logps/chosen": -0.00029846368124708533, + "logps/rejected": -2.803921937942505, + "loss": 0.649, + "nll_loss": 0.1622409075498581, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9846369216102175e-05, + "rewards/margins": 0.2803623378276825, + "rewards/rejected": -0.28039219975471497, + "step": 10032 + }, + { + "epoch": 6.938450899031812, + "grad_norm": 5.242898464202881, + "learning_rate": 1.7008606116489932e-05, + "log_odds_chosen": 10.950862884521484, + "log_odds_ratio": -6.454918184317648e-05, + "logits/chosen": -0.547526478767395, + "logits/rejected": -0.5974233150482178, + "logps/chosen": -0.00021357613150030375, + "logps/rejected": -2.4515743255615234, + "loss": 0.5296, + "nll_loss": 0.13239407539367676, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1357613150030375e-05, + "rewards/margins": 0.2451360672712326, + "rewards/rejected": -0.2451574206352234, + "step": 10033 + }, + { + "epoch": 6.939142461964039, + "grad_norm": 5.736464023590088, + "learning_rate": 1.7004764100199785e-05, + "log_odds_chosen": 9.807477951049805, + "log_odds_ratio": -0.0001616263180039823, + "logits/chosen": -0.6837047338485718, + "logits/rejected": -0.7070760726928711, + "logps/chosen": -0.0004362165054772049, + "logps/rejected": -1.9144620895385742, + "loss": 0.558, + "nll_loss": 0.13947302103042603, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.362165054772049e-05, + "rewards/margins": 0.1914026141166687, + "rewards/rejected": -0.1914462298154831, + "step": 10034 + }, + { + "epoch": 6.939834024896266, + "grad_norm": 7.313083648681641, + "learning_rate": 1.7000922083909637e-05, + "log_odds_chosen": 11.469751358032227, + "log_odds_ratio": -1.8743656255537644e-05, + "logits/chosen": -0.35865500569343567, + "logits/rejected": -0.46712803840637207, + "logps/chosen": -0.000137790892040357, + "logps/rejected": -2.088139533996582, + "loss": 0.6775, + "nll_loss": 0.1693638563156128, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.377908938593464e-05, + "rewards/margins": 0.20880015194416046, + "rewards/rejected": -0.20881393551826477, + "step": 10035 + }, + { + "epoch": 6.940525587828493, + "grad_norm": 11.915936470031738, + "learning_rate": 1.6997080067619486e-05, + "log_odds_chosen": 10.631695747375488, + "log_odds_ratio": -5.5690161389065906e-05, + "logits/chosen": -0.1930725872516632, + "logits/rejected": -0.26618435978889465, + "logps/chosen": -0.0003762371779885143, + "logps/rejected": -2.718092918395996, + "loss": 0.535, + "nll_loss": 0.1337505578994751, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.762371852644719e-05, + "rewards/margins": 0.2717716693878174, + "rewards/rejected": -0.27180930972099304, + "step": 10036 + }, + { + "epoch": 6.941217150760719, + "grad_norm": 7.780851364135742, + "learning_rate": 1.699323805132934e-05, + "log_odds_chosen": 11.143234252929688, + "log_odds_ratio": -0.0001096235791919753, + "logits/chosen": -0.598465085029602, + "logits/rejected": -0.6562224626541138, + "logps/chosen": -0.00016340528964065015, + "logps/rejected": -2.1264867782592773, + "loss": 0.5632, + "nll_loss": 0.14078933000564575, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6340527508873492e-05, + "rewards/margins": 0.21263231337070465, + "rewards/rejected": -0.2126486748456955, + "step": 10037 + }, + { + "epoch": 6.941908713692946, + "grad_norm": 7.58236837387085, + "learning_rate": 1.698939603503919e-05, + "log_odds_chosen": 11.076812744140625, + "log_odds_ratio": -0.00014322507195174694, + "logits/chosen": -0.20436197519302368, + "logits/rejected": -0.21737417578697205, + "logps/chosen": -0.0005082335555925965, + "logps/rejected": -3.1508872509002686, + "loss": 0.8379, + "nll_loss": 0.20945268869400024, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0823357014451176e-05, + "rewards/margins": 0.31503787636756897, + "rewards/rejected": -0.31508874893188477, + "step": 10038 + }, + { + "epoch": 6.942600276625173, + "grad_norm": 5.023095607757568, + "learning_rate": 1.698555401874904e-05, + "log_odds_chosen": 10.241018295288086, + "log_odds_ratio": -0.0002668978413566947, + "logits/chosen": -0.1428799033164978, + "logits/rejected": -0.18114995956420898, + "logps/chosen": -0.00031494133872911334, + "logps/rejected": -1.6014385223388672, + "loss": 0.4756, + "nll_loss": 0.11888079345226288, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.149413532810286e-05, + "rewards/margins": 0.16011235117912292, + "rewards/rejected": -0.1601438671350479, + "step": 10039 + }, + { + "epoch": 6.9432918395574, + "grad_norm": 7.06999397277832, + "learning_rate": 1.6981712002458892e-05, + "log_odds_chosen": 9.103922843933105, + "log_odds_ratio": -0.00024017822579480708, + "logits/chosen": -0.24257206916809082, + "logits/rejected": -0.24254654347896576, + "logps/chosen": -0.0010004019131883979, + "logps/rejected": -1.7234010696411133, + "loss": 0.5883, + "nll_loss": 0.14705629646778107, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010004018986364827, + "rewards/margins": 0.17224006354808807, + "rewards/rejected": -0.17234010994434357, + "step": 10040 + }, + { + "epoch": 6.943983402489627, + "grad_norm": 5.003592491149902, + "learning_rate": 1.697786998616874e-05, + "log_odds_chosen": 10.527713775634766, + "log_odds_ratio": -8.007455471670255e-05, + "logits/chosen": -0.3677080273628235, + "logits/rejected": -0.2869381904602051, + "logps/chosen": -0.000419251446146518, + "logps/rejected": -2.2190418243408203, + "loss": 0.5501, + "nll_loss": 0.13752399384975433, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.19251446146518e-05, + "rewards/margins": 0.22186227142810822, + "rewards/rejected": -0.2219042032957077, + "step": 10041 + }, + { + "epoch": 6.944674965421854, + "grad_norm": 11.091449737548828, + "learning_rate": 1.697402796987859e-05, + "log_odds_chosen": 12.04973030090332, + "log_odds_ratio": -1.0531531188462395e-05, + "logits/chosen": 0.24184373021125793, + "logits/rejected": 0.1998082995414734, + "logps/chosen": -0.00014928578457329422, + "logps/rejected": -3.006049871444702, + "loss": 1.1853, + "nll_loss": 0.2963164448738098, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4928579730622005e-05, + "rewards/margins": 0.30059006810188293, + "rewards/rejected": -0.30060499906539917, + "step": 10042 + }, + { + "epoch": 6.94536652835408, + "grad_norm": 5.952218055725098, + "learning_rate": 1.6970185953588443e-05, + "log_odds_chosen": 9.409114837646484, + "log_odds_ratio": -0.00029461667872965336, + "logits/chosen": -0.48497089743614197, + "logits/rejected": -0.4794895052909851, + "logps/chosen": -0.0006209624698385596, + "logps/rejected": -1.867590308189392, + "loss": 0.5388, + "nll_loss": 0.13466249406337738, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.209625280462205e-05, + "rewards/margins": 0.18669693171977997, + "rewards/rejected": -0.18675902485847473, + "step": 10043 + }, + { + "epoch": 6.946058091286307, + "grad_norm": 6.226381301879883, + "learning_rate": 1.6966343937298295e-05, + "log_odds_chosen": 11.209342956542969, + "log_odds_ratio": -2.317272446816787e-05, + "logits/chosen": -0.49453210830688477, + "logits/rejected": -0.4661957621574402, + "logps/chosen": -0.0004177080118097365, + "logps/rejected": -2.362186908721924, + "loss": 0.4943, + "nll_loss": 0.12356767803430557, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.177080700173974e-05, + "rewards/margins": 0.23617693781852722, + "rewards/rejected": -0.23621870577335358, + "step": 10044 + }, + { + "epoch": 6.946749654218534, + "grad_norm": 6.473517417907715, + "learning_rate": 1.6962501921008145e-05, + "log_odds_chosen": 10.863648414611816, + "log_odds_ratio": -2.934444637503475e-05, + "logits/chosen": -0.4041895866394043, + "logits/rejected": -0.44918423891067505, + "logps/chosen": -0.00010348320938646793, + "logps/rejected": -1.6501774787902832, + "loss": 0.4861, + "nll_loss": 0.12151362746953964, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0348320756747853e-05, + "rewards/margins": 0.16500739753246307, + "rewards/rejected": -0.1650177538394928, + "step": 10045 + }, + { + "epoch": 6.947441217150761, + "grad_norm": 8.3028564453125, + "learning_rate": 1.6958659904717997e-05, + "log_odds_chosen": 10.222135543823242, + "log_odds_ratio": -5.702318958356045e-05, + "logits/chosen": 0.084259532392025, + "logits/rejected": 0.013848934322595596, + "logps/chosen": -0.0004253485822118819, + "logps/rejected": -1.8415899276733398, + "loss": 0.5054, + "nll_loss": 0.12634600698947906, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.253485894878395e-05, + "rewards/margins": 0.18411648273468018, + "rewards/rejected": -0.18415901064872742, + "step": 10046 + }, + { + "epoch": 6.948132780082988, + "grad_norm": 5.550440311431885, + "learning_rate": 1.695481788842785e-05, + "log_odds_chosen": 10.537508964538574, + "log_odds_ratio": -0.00010150601156055927, + "logits/chosen": -0.6768600344657898, + "logits/rejected": -0.6871519088745117, + "logps/chosen": -0.0002947281172964722, + "logps/rejected": -2.1085243225097656, + "loss": 0.5136, + "nll_loss": 0.12838245928287506, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.947281245724298e-05, + "rewards/margins": 0.2108229696750641, + "rewards/rejected": -0.21085244417190552, + "step": 10047 + }, + { + "epoch": 6.948824343015215, + "grad_norm": 4.045454978942871, + "learning_rate": 1.69509758721377e-05, + "log_odds_chosen": 9.82114028930664, + "log_odds_ratio": -0.00017208814097102731, + "logits/chosen": -0.4040992259979248, + "logits/rejected": -0.5197563171386719, + "logps/chosen": -0.00045935483649373055, + "logps/rejected": -1.638375997543335, + "loss": 0.3924, + "nll_loss": 0.09808853268623352, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.593547782860696e-05, + "rewards/margins": 0.16379165649414062, + "rewards/rejected": -0.16383758187294006, + "step": 10048 + }, + { + "epoch": 6.949515905947441, + "grad_norm": 6.038547515869141, + "learning_rate": 1.694713385584755e-05, + "log_odds_chosen": 11.265363693237305, + "log_odds_ratio": -3.956862565246411e-05, + "logits/chosen": -0.2933599352836609, + "logits/rejected": -0.4178452789783478, + "logps/chosen": -0.00016355025582015514, + "logps/rejected": -2.243529796600342, + "loss": 0.9845, + "nll_loss": 0.24611742794513702, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6355026673409157e-05, + "rewards/margins": 0.224336639046669, + "rewards/rejected": -0.22435298562049866, + "step": 10049 + }, + { + "epoch": 6.950207468879668, + "grad_norm": 6.056111812591553, + "learning_rate": 1.69432918395574e-05, + "log_odds_chosen": 11.285694122314453, + "log_odds_ratio": -3.228627974749543e-05, + "logits/chosen": 0.26198089122772217, + "logits/rejected": 0.2688486576080322, + "logps/chosen": -0.0002590077347122133, + "logps/rejected": -2.4489030838012695, + "loss": 0.8032, + "nll_loss": 0.20079305768013, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5900772016029805e-05, + "rewards/margins": 0.24486443400382996, + "rewards/rejected": -0.24489031732082367, + "step": 10050 + }, + { + "epoch": 6.950899031811895, + "grad_norm": 7.738328456878662, + "learning_rate": 1.693944982326725e-05, + "log_odds_chosen": 12.397817611694336, + "log_odds_ratio": -1.8989923773915507e-05, + "logits/chosen": -0.41316238045692444, + "logits/rejected": -0.4161011278629303, + "logps/chosen": -0.0001298328279517591, + "logps/rejected": -3.1106667518615723, + "loss": 0.6079, + "nll_loss": 0.1519773155450821, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.298328243137803e-05, + "rewards/margins": 0.31105366349220276, + "rewards/rejected": -0.3110666573047638, + "step": 10051 + }, + { + "epoch": 6.951590594744122, + "grad_norm": 6.873595714569092, + "learning_rate": 1.69356078069771e-05, + "log_odds_chosen": 9.874351501464844, + "log_odds_ratio": -0.00029799286858178675, + "logits/chosen": -0.18772411346435547, + "logits/rejected": -0.17775577306747437, + "logps/chosen": -0.0006274134502746165, + "logps/rejected": -2.138361930847168, + "loss": 1.1283, + "nll_loss": 0.28204089403152466, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.274134648265317e-05, + "rewards/margins": 0.2137734293937683, + "rewards/rejected": -0.2138361781835556, + "step": 10052 + }, + { + "epoch": 6.952282157676349, + "grad_norm": 3.5054914951324463, + "learning_rate": 1.6931765790686954e-05, + "log_odds_chosen": 11.076932907104492, + "log_odds_ratio": -8.66325935930945e-05, + "logits/chosen": 0.15985512733459473, + "logits/rejected": 0.1510522961616516, + "logps/chosen": -0.0002725853701122105, + "logps/rejected": -2.564537286758423, + "loss": 0.7468, + "nll_loss": 0.18668492138385773, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7258538466412574e-05, + "rewards/margins": 0.25642648339271545, + "rewards/rejected": -0.2564537525177002, + "step": 10053 + }, + { + "epoch": 6.9529737206085755, + "grad_norm": 8.001506805419922, + "learning_rate": 1.6927923774396803e-05, + "log_odds_chosen": 11.1409912109375, + "log_odds_ratio": -2.999947901116684e-05, + "logits/chosen": -0.5246400237083435, + "logits/rejected": -0.5274342894554138, + "logps/chosen": -0.00032147939782589674, + "logps/rejected": -2.5922746658325195, + "loss": 0.6247, + "nll_loss": 0.15617889165878296, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.214793832739815e-05, + "rewards/margins": 0.25919532775878906, + "rewards/rejected": -0.259227454662323, + "step": 10054 + }, + { + "epoch": 6.953665283540802, + "grad_norm": 5.166790962219238, + "learning_rate": 1.6924081758106655e-05, + "log_odds_chosen": 9.752410888671875, + "log_odds_ratio": -0.00018502950842957944, + "logits/chosen": -0.11179126054048538, + "logits/rejected": -0.06541785597801208, + "logps/chosen": -0.0007403604686260223, + "logps/rejected": -1.681820273399353, + "loss": 0.3964, + "nll_loss": 0.0990731418132782, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.403604104183614e-05, + "rewards/margins": 0.1681079864501953, + "rewards/rejected": -0.16818203032016754, + "step": 10055 + }, + { + "epoch": 6.954356846473029, + "grad_norm": 6.174735069274902, + "learning_rate": 1.6920239741816508e-05, + "log_odds_chosen": 10.701284408569336, + "log_odds_ratio": -0.0003483596374280751, + "logits/chosen": -0.37049442529678345, + "logits/rejected": -0.44730401039123535, + "logps/chosen": -0.00043324686703272164, + "logps/rejected": -2.10621976852417, + "loss": 0.7395, + "nll_loss": 0.1848517507314682, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.33246859756764e-05, + "rewards/margins": 0.21057865023612976, + "rewards/rejected": -0.21062195301055908, + "step": 10056 + }, + { + "epoch": 6.955048409405256, + "grad_norm": 3.980576753616333, + "learning_rate": 1.6916397725526357e-05, + "log_odds_chosen": 9.119756698608398, + "log_odds_ratio": -0.0003259534714743495, + "logits/chosen": -0.7593079805374146, + "logits/rejected": -0.7276464104652405, + "logps/chosen": -0.0003143846115563065, + "logps/rejected": -1.3028907775878906, + "loss": 0.4762, + "nll_loss": 0.11900606751441956, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.143846333841793e-05, + "rewards/margins": 0.13025765120983124, + "rewards/rejected": -0.13028909265995026, + "step": 10057 + }, + { + "epoch": 6.955739972337483, + "grad_norm": 6.947946071624756, + "learning_rate": 1.691255570923621e-05, + "log_odds_chosen": 10.878345489501953, + "log_odds_ratio": -6.959411257412285e-05, + "logits/chosen": -0.43439775705337524, + "logits/rejected": -0.48729392886161804, + "logps/chosen": -0.0004633645003195852, + "logps/rejected": -2.565565586090088, + "loss": 0.6566, + "nll_loss": 0.1641518920660019, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6336448576767e-05, + "rewards/margins": 0.25651025772094727, + "rewards/rejected": -0.25655657052993774, + "step": 10058 + }, + { + "epoch": 6.95643153526971, + "grad_norm": 2.9592649936676025, + "learning_rate": 1.690871369294606e-05, + "log_odds_chosen": 10.088553428649902, + "log_odds_ratio": -0.00013400233001448214, + "logits/chosen": -0.2962646186351776, + "logits/rejected": -0.3382180333137512, + "logps/chosen": -0.0004178662784397602, + "logps/rejected": -2.0320756435394287, + "loss": 0.4795, + "nll_loss": 0.11986919492483139, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.178662857157178e-05, + "rewards/margins": 0.20316576957702637, + "rewards/rejected": -0.20320755243301392, + "step": 10059 + }, + { + "epoch": 6.9571230982019365, + "grad_norm": 3.8344273567199707, + "learning_rate": 1.6904871676655907e-05, + "log_odds_chosen": 10.093937873840332, + "log_odds_ratio": -0.00031608311110176146, + "logits/chosen": -0.22365032136440277, + "logits/rejected": -0.47377943992614746, + "logps/chosen": -0.0006390301277860999, + "logps/rejected": -2.3930559158325195, + "loss": 0.4028, + "nll_loss": 0.10067568719387054, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.390301132341847e-05, + "rewards/margins": 0.23924173414707184, + "rewards/rejected": -0.23930561542510986, + "step": 10060 + }, + { + "epoch": 6.957814661134163, + "grad_norm": 8.053756713867188, + "learning_rate": 1.690102966036576e-05, + "log_odds_chosen": 10.189164161682129, + "log_odds_ratio": -0.0001274641981581226, + "logits/chosen": 0.14860232174396515, + "logits/rejected": 0.004969865083694458, + "logps/chosen": -0.0002809629950206727, + "logps/rejected": -1.9008369445800781, + "loss": 0.7519, + "nll_loss": 0.18795260787010193, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8096299502067268e-05, + "rewards/margins": 0.19005560874938965, + "rewards/rejected": -0.19008369743824005, + "step": 10061 + }, + { + "epoch": 6.95850622406639, + "grad_norm": 5.632915019989014, + "learning_rate": 1.6897187644075612e-05, + "log_odds_chosen": 11.19343376159668, + "log_odds_ratio": -7.442128844559193e-05, + "logits/chosen": -0.1333615630865097, + "logits/rejected": -0.2802311182022095, + "logps/chosen": -0.00037782572326250374, + "logps/rejected": -2.7057318687438965, + "loss": 0.5091, + "nll_loss": 0.12727561593055725, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.778257087105885e-05, + "rewards/margins": 0.270535409450531, + "rewards/rejected": -0.2705731987953186, + "step": 10062 + }, + { + "epoch": 6.959197786998617, + "grad_norm": 4.994719505310059, + "learning_rate": 1.689334562778546e-05, + "log_odds_chosen": 10.285367965698242, + "log_odds_ratio": -5.1024078857153654e-05, + "logits/chosen": -0.4173157811164856, + "logits/rejected": -0.36329323053359985, + "logps/chosen": -0.0005839330260641873, + "logps/rejected": -2.3260738849639893, + "loss": 0.4606, + "nll_loss": 0.11514244973659515, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.839329969603568e-05, + "rewards/margins": 0.2325490117073059, + "rewards/rejected": -0.2326073944568634, + "step": 10063 + }, + { + "epoch": 6.959889349930844, + "grad_norm": 5.117969989776611, + "learning_rate": 1.6889503611495314e-05, + "log_odds_chosen": 10.772396087646484, + "log_odds_ratio": -6.265474075917155e-05, + "logits/chosen": -0.29701873660087585, + "logits/rejected": -0.32611095905303955, + "logps/chosen": -0.00026552981580607593, + "logps/rejected": -2.4106943607330322, + "loss": 0.5858, + "nll_loss": 0.1464328020811081, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6552981580607593e-05, + "rewards/margins": 0.2410428822040558, + "rewards/rejected": -0.24106942117214203, + "step": 10064 + }, + { + "epoch": 6.960580912863071, + "grad_norm": 6.523797512054443, + "learning_rate": 1.6885661595205166e-05, + "log_odds_chosen": 10.308741569519043, + "log_odds_ratio": -7.950417784741148e-05, + "logits/chosen": -0.590244472026825, + "logits/rejected": -0.44279173016548157, + "logps/chosen": -0.00033462955616414547, + "logps/rejected": -1.7902774810791016, + "loss": 0.527, + "nll_loss": 0.13175415992736816, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.346295852679759e-05, + "rewards/margins": 0.178994283080101, + "rewards/rejected": -0.1790277510881424, + "step": 10065 + }, + { + "epoch": 6.9612724757952975, + "grad_norm": 4.181301116943359, + "learning_rate": 1.6881819578915015e-05, + "log_odds_chosen": 10.008706092834473, + "log_odds_ratio": -0.00012131897528888658, + "logits/chosen": -0.2146802544593811, + "logits/rejected": -0.37969833612442017, + "logps/chosen": -0.0010558163048699498, + "logps/rejected": -1.8638464212417603, + "loss": 0.4958, + "nll_loss": 0.12394772469997406, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001055816319421865, + "rewards/margins": 0.1862790733575821, + "rewards/rejected": -0.1863846480846405, + "step": 10066 + }, + { + "epoch": 6.961964038727524, + "grad_norm": 5.340941429138184, + "learning_rate": 1.6877977562624868e-05, + "log_odds_chosen": 10.816205978393555, + "log_odds_ratio": -0.00027935803518630564, + "logits/chosen": 0.5234330892562866, + "logits/rejected": 0.419673889875412, + "logps/chosen": -0.0005839740042574704, + "logps/rejected": -2.2605462074279785, + "loss": 0.6266, + "nll_loss": 0.15661442279815674, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8397399698151276e-05, + "rewards/margins": 0.2259962111711502, + "rewards/rejected": -0.2260546237230301, + "step": 10067 + }, + { + "epoch": 6.962655601659751, + "grad_norm": 6.287302494049072, + "learning_rate": 1.6874135546334717e-05, + "log_odds_chosen": 10.874994277954102, + "log_odds_ratio": -0.00023676344426348805, + "logits/chosen": -0.2309369295835495, + "logits/rejected": -0.3357173800468445, + "logps/chosen": -0.00018939608708024025, + "logps/rejected": -2.4723055362701416, + "loss": 1.0141, + "nll_loss": 0.25348982214927673, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8939608708024025e-05, + "rewards/margins": 0.24721162021160126, + "rewards/rejected": -0.24723055958747864, + "step": 10068 + }, + { + "epoch": 6.963347164591978, + "grad_norm": 7.557071685791016, + "learning_rate": 1.6870293530044566e-05, + "log_odds_chosen": 11.015689849853516, + "log_odds_ratio": -2.897938793466892e-05, + "logits/chosen": -0.3422916531562805, + "logits/rejected": -0.27559277415275574, + "logps/chosen": -0.0001115235936595127, + "logps/rejected": -1.888420581817627, + "loss": 0.4493, + "nll_loss": 0.1123114749789238, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.115235954785021e-05, + "rewards/margins": 0.1888309121131897, + "rewards/rejected": -0.1888420581817627, + "step": 10069 + }, + { + "epoch": 6.964038727524205, + "grad_norm": 5.31990385055542, + "learning_rate": 1.686645151375442e-05, + "log_odds_chosen": 10.67734146118164, + "log_odds_ratio": -6.908691284479573e-05, + "logits/chosen": -0.11804617941379547, + "logits/rejected": -0.43072015047073364, + "logps/chosen": -0.00025707035092636943, + "logps/rejected": -2.150763988494873, + "loss": 0.7585, + "nll_loss": 0.18962499499320984, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5707035092636943e-05, + "rewards/margins": 0.21505066752433777, + "rewards/rejected": -0.21507638692855835, + "step": 10070 + }, + { + "epoch": 6.964730290456432, + "grad_norm": 4.534579753875732, + "learning_rate": 1.686260949746427e-05, + "log_odds_chosen": 11.683751106262207, + "log_odds_ratio": -2.2830068701296113e-05, + "logits/chosen": -0.5978186130523682, + "logits/rejected": -0.578332781791687, + "logps/chosen": -9.108192170970142e-05, + "logps/rejected": -2.3900818824768066, + "loss": 0.4515, + "nll_loss": 0.11287988722324371, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.108191989071202e-06, + "rewards/margins": 0.23899908363819122, + "rewards/rejected": -0.23900818824768066, + "step": 10071 + }, + { + "epoch": 6.9654218533886585, + "grad_norm": 6.28562068939209, + "learning_rate": 1.685876748117412e-05, + "log_odds_chosen": 11.651711463928223, + "log_odds_ratio": -1.8427983377478085e-05, + "logits/chosen": -0.5274204015731812, + "logits/rejected": -0.525452196598053, + "logps/chosen": -0.0001939109934028238, + "logps/rejected": -2.805175542831421, + "loss": 0.5491, + "nll_loss": 0.13727723062038422, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.939109824888874e-05, + "rewards/margins": 0.2804981470108032, + "rewards/rejected": -0.2805175483226776, + "step": 10072 + }, + { + "epoch": 6.966113416320885, + "grad_norm": 12.471409797668457, + "learning_rate": 1.6854925464883972e-05, + "log_odds_chosen": 10.029308319091797, + "log_odds_ratio": -8.028361480683088e-05, + "logits/chosen": -0.35297563672065735, + "logits/rejected": -0.4903620481491089, + "logps/chosen": -0.0004890944110229611, + "logps/rejected": -2.2122983932495117, + "loss": 0.5236, + "nll_loss": 0.13088931143283844, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.890943819191307e-05, + "rewards/margins": 0.22118094563484192, + "rewards/rejected": -0.22122985124588013, + "step": 10073 + }, + { + "epoch": 6.966804979253112, + "grad_norm": 13.626130104064941, + "learning_rate": 1.6851083448593825e-05, + "log_odds_chosen": 10.919111251831055, + "log_odds_ratio": -5.1238042942713946e-05, + "logits/chosen": -0.6567732691764832, + "logits/rejected": -0.6847638487815857, + "logps/chosen": -0.00015020312275737524, + "logps/rejected": -2.0090315341949463, + "loss": 0.4309, + "nll_loss": 0.10771311819553375, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5020313185232226e-05, + "rewards/margins": 0.20088812708854675, + "rewards/rejected": -0.20090316236019135, + "step": 10074 + }, + { + "epoch": 6.967496542185339, + "grad_norm": 6.219060897827148, + "learning_rate": 1.6847241432303674e-05, + "log_odds_chosen": 10.488255500793457, + "log_odds_ratio": -4.7013538278406486e-05, + "logits/chosen": 0.17941254377365112, + "logits/rejected": -0.006448574364185333, + "logps/chosen": -0.00015892702504061162, + "logps/rejected": -1.7068005800247192, + "loss": 0.4064, + "nll_loss": 0.10159176588058472, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.589270141266752e-05, + "rewards/margins": 0.17066416144371033, + "rewards/rejected": -0.17068007588386536, + "step": 10075 + }, + { + "epoch": 6.968188105117566, + "grad_norm": 6.882790565490723, + "learning_rate": 1.6843399416013526e-05, + "log_odds_chosen": 10.748775482177734, + "log_odds_ratio": -3.974856008426286e-05, + "logits/chosen": -0.5364277362823486, + "logits/rejected": -0.6157901287078857, + "logps/chosen": -0.0003205241519026458, + "logps/rejected": -2.2799644470214844, + "loss": 0.5032, + "nll_loss": 0.12580451369285583, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2052415917860344e-05, + "rewards/margins": 0.2279644012451172, + "rewards/rejected": -0.22799643874168396, + "step": 10076 + }, + { + "epoch": 6.968879668049793, + "grad_norm": 8.474969863891602, + "learning_rate": 1.6839557399723375e-05, + "log_odds_chosen": 11.371885299682617, + "log_odds_ratio": -3.51688067894429e-05, + "logits/chosen": -0.451709508895874, + "logits/rejected": -0.5114191770553589, + "logps/chosen": -0.00017953138740267605, + "logps/rejected": -2.330127239227295, + "loss": 0.4933, + "nll_loss": 0.12332789599895477, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7953139831661247e-05, + "rewards/margins": 0.23299476504325867, + "rewards/rejected": -0.23301272094249725, + "step": 10077 + }, + { + "epoch": 6.9695712309820195, + "grad_norm": 4.735933780670166, + "learning_rate": 1.6835715383433224e-05, + "log_odds_chosen": 11.441869735717773, + "log_odds_ratio": -7.0574002165813e-05, + "logits/chosen": -0.39923131465911865, + "logits/rejected": -0.4018963575363159, + "logps/chosen": -0.0002980951394420117, + "logps/rejected": -2.8218677043914795, + "loss": 1.1981, + "nll_loss": 0.2995148003101349, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9809514671796933e-05, + "rewards/margins": 0.28215694427490234, + "rewards/rejected": -0.28218674659729004, + "step": 10078 + }, + { + "epoch": 6.970262793914246, + "grad_norm": 7.076903820037842, + "learning_rate": 1.6831873367143077e-05, + "log_odds_chosen": 10.777301788330078, + "log_odds_ratio": -3.476179335848428e-05, + "logits/chosen": -0.6153938174247742, + "logits/rejected": -0.6085329651832581, + "logps/chosen": -0.0001969627192011103, + "logps/rejected": -2.048153877258301, + "loss": 0.6916, + "nll_loss": 0.1728864461183548, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.969627192011103e-05, + "rewards/margins": 0.20479567348957062, + "rewards/rejected": -0.20481537282466888, + "step": 10079 + }, + { + "epoch": 6.970954356846473, + "grad_norm": 4.815555572509766, + "learning_rate": 1.682803135085293e-05, + "log_odds_chosen": 9.752076148986816, + "log_odds_ratio": -0.0008670328534208238, + "logits/chosen": -0.5131319761276245, + "logits/rejected": -0.5436765551567078, + "logps/chosen": -0.001340696937404573, + "logps/rejected": -1.7009871006011963, + "loss": 0.4662, + "nll_loss": 0.11645921319723129, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013406967627815902, + "rewards/margins": 0.16996464133262634, + "rewards/rejected": -0.1700986921787262, + "step": 10080 + }, + { + "epoch": 6.9716459197787, + "grad_norm": 5.930548667907715, + "learning_rate": 1.6824189334562778e-05, + "log_odds_chosen": 10.698500633239746, + "log_odds_ratio": -3.7088189856149256e-05, + "logits/chosen": -0.7052226662635803, + "logits/rejected": -0.7362067699432373, + "logps/chosen": -0.00012290122685953975, + "logps/rejected": -1.7287907600402832, + "loss": 0.6351, + "nll_loss": 0.15876275300979614, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2290122867852915e-05, + "rewards/margins": 0.1728667914867401, + "rewards/rejected": -0.17287908494472504, + "step": 10081 + }, + { + "epoch": 6.972337482710927, + "grad_norm": 5.158050060272217, + "learning_rate": 1.682034731827263e-05, + "log_odds_chosen": 10.191957473754883, + "log_odds_ratio": -0.0007288920460268855, + "logits/chosen": -0.7901021838188171, + "logits/rejected": -0.7060337066650391, + "logps/chosen": -0.0007514750468544662, + "logps/rejected": -2.1284823417663574, + "loss": 0.5622, + "nll_loss": 0.1404794603586197, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.514750177506357e-05, + "rewards/margins": 0.21277309954166412, + "rewards/rejected": -0.2128482460975647, + "step": 10082 + }, + { + "epoch": 6.973029045643154, + "grad_norm": 4.94846248626709, + "learning_rate": 1.6816505301982483e-05, + "log_odds_chosen": 11.54309368133545, + "log_odds_ratio": -1.1242198524996638e-05, + "logits/chosen": -0.23920166492462158, + "logits/rejected": -0.3178488314151764, + "logps/chosen": -0.00024875771487131715, + "logps/rejected": -2.5369608402252197, + "loss": 0.5647, + "nll_loss": 0.14116451144218445, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4875771487131715e-05, + "rewards/margins": 0.25367119908332825, + "rewards/rejected": -0.253696084022522, + "step": 10083 + }, + { + "epoch": 6.9737206085753805, + "grad_norm": 7.300533771514893, + "learning_rate": 1.6812663285692332e-05, + "log_odds_chosen": 10.943312644958496, + "log_odds_ratio": -0.0008375818142667413, + "logits/chosen": -0.8108684420585632, + "logits/rejected": -0.7640953063964844, + "logps/chosen": -0.0003348039463162422, + "logps/rejected": -2.059131145477295, + "loss": 0.6199, + "nll_loss": 0.15488187968730927, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.348039535921998e-05, + "rewards/margins": 0.20587962865829468, + "rewards/rejected": -0.20591309666633606, + "step": 10084 + }, + { + "epoch": 6.974412171507607, + "grad_norm": 5.671638011932373, + "learning_rate": 1.6808821269402185e-05, + "log_odds_chosen": 10.110841751098633, + "log_odds_ratio": -8.413447358179837e-05, + "logits/chosen": -0.6961773633956909, + "logits/rejected": -0.6649831533432007, + "logps/chosen": -0.0002696591254789382, + "logps/rejected": -2.1173558235168457, + "loss": 0.8262, + "nll_loss": 0.20654813945293427, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.696591218409594e-05, + "rewards/margins": 0.21170863509178162, + "rewards/rejected": -0.2117355912923813, + "step": 10085 + }, + { + "epoch": 6.975103734439834, + "grad_norm": 13.913922309875488, + "learning_rate": 1.6804979253112034e-05, + "log_odds_chosen": 11.766149520874023, + "log_odds_ratio": -2.2054537112126127e-05, + "logits/chosen": -0.4592455327510834, + "logits/rejected": -0.5087196230888367, + "logps/chosen": -0.00016856536967679858, + "logps/rejected": -2.561805486679077, + "loss": 0.6583, + "nll_loss": 0.1645684689283371, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.685653842287138e-05, + "rewards/margins": 0.25616368651390076, + "rewards/rejected": -0.2561805546283722, + "step": 10086 + }, + { + "epoch": 6.975795297372061, + "grad_norm": 9.301910400390625, + "learning_rate": 1.6801137236821883e-05, + "log_odds_chosen": 11.14494514465332, + "log_odds_ratio": -2.6727680960902944e-05, + "logits/chosen": -0.43499940633773804, + "logits/rejected": -0.4686170816421509, + "logps/chosen": -0.00015866890316829085, + "logps/rejected": -2.0248191356658936, + "loss": 0.504, + "nll_loss": 0.12599007785320282, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5866889953031205e-05, + "rewards/margins": 0.20246604084968567, + "rewards/rejected": -0.20248191058635712, + "step": 10087 + }, + { + "epoch": 6.976486860304288, + "grad_norm": 6.525985240936279, + "learning_rate": 1.6797295220531735e-05, + "log_odds_chosen": 10.123964309692383, + "log_odds_ratio": -0.00037600661744363606, + "logits/chosen": -0.38632524013519287, + "logits/rejected": -0.4179686903953552, + "logps/chosen": -0.001652559032663703, + "logps/rejected": -2.3613505363464355, + "loss": 0.4669, + "nll_loss": 0.11669015139341354, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016525591490790248, + "rewards/margins": 0.23596976697444916, + "rewards/rejected": -0.23613503575325012, + "step": 10088 + }, + { + "epoch": 6.977178423236515, + "grad_norm": 4.867033958435059, + "learning_rate": 1.6793453204241584e-05, + "log_odds_chosen": 12.31155776977539, + "log_odds_ratio": -9.306555512011983e-06, + "logits/chosen": -0.505553662776947, + "logits/rejected": -0.6083633899688721, + "logps/chosen": -0.00012676091864705086, + "logps/rejected": -3.2197482585906982, + "loss": 0.441, + "nll_loss": 0.11025519669055939, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2676091500907205e-05, + "rewards/margins": 0.3219621777534485, + "rewards/rejected": -0.32197481393814087, + "step": 10089 + }, + { + "epoch": 6.977869986168741, + "grad_norm": 5.310248851776123, + "learning_rate": 1.6789611187951437e-05, + "log_odds_chosen": 11.04545783996582, + "log_odds_ratio": -5.797618723590858e-05, + "logits/chosen": -0.42729341983795166, + "logits/rejected": -0.5321473479270935, + "logps/chosen": -0.00029431338771246374, + "logps/rejected": -2.51184344291687, + "loss": 0.5545, + "nll_loss": 0.1386297345161438, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9431339498842135e-05, + "rewards/margins": 0.25115492939949036, + "rewards/rejected": -0.2511843740940094, + "step": 10090 + }, + { + "epoch": 6.978561549100968, + "grad_norm": 5.998685836791992, + "learning_rate": 1.678576917166129e-05, + "log_odds_chosen": 10.200556755065918, + "log_odds_ratio": -0.0004204717988613993, + "logits/chosen": -0.19246073067188263, + "logits/rejected": -0.3010517358779907, + "logps/chosen": -0.0006728671723976731, + "logps/rejected": -2.1594655513763428, + "loss": 0.5987, + "nll_loss": 0.14964529871940613, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.728671723976731e-05, + "rewards/margins": 0.21587930619716644, + "rewards/rejected": -0.21594657003879547, + "step": 10091 + }, + { + "epoch": 6.979253112033195, + "grad_norm": 6.10350227355957, + "learning_rate": 1.6781927155371138e-05, + "log_odds_chosen": 10.90713882446289, + "log_odds_ratio": -0.0001578339870320633, + "logits/chosen": -0.31386104226112366, + "logits/rejected": -0.2914910316467285, + "logps/chosen": -0.00024432651116512716, + "logps/rejected": -2.0333194732666016, + "loss": 0.5143, + "nll_loss": 0.1285688579082489, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4432651116512716e-05, + "rewards/margins": 0.20330752432346344, + "rewards/rejected": -0.20333194732666016, + "step": 10092 + }, + { + "epoch": 6.979944674965422, + "grad_norm": 3.2862350940704346, + "learning_rate": 1.677808513908099e-05, + "log_odds_chosen": 11.088020324707031, + "log_odds_ratio": -2.1633699361700565e-05, + "logits/chosen": -0.22667689621448517, + "logits/rejected": -0.2776218354701996, + "logps/chosen": -0.0002498264075256884, + "logps/rejected": -2.5119142532348633, + "loss": 0.5233, + "nll_loss": 0.1308109015226364, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4982638933579437e-05, + "rewards/margins": 0.2511664628982544, + "rewards/rejected": -0.2511914372444153, + "step": 10093 + }, + { + "epoch": 6.980636237897649, + "grad_norm": 6.681600093841553, + "learning_rate": 1.6774243122790843e-05, + "log_odds_chosen": 11.789108276367188, + "log_odds_ratio": -0.00010184692655457184, + "logits/chosen": -0.5825809836387634, + "logits/rejected": -0.6333975195884705, + "logps/chosen": -0.0009490898228250444, + "logps/rejected": -3.6755330562591553, + "loss": 0.6252, + "nll_loss": 0.15628907084465027, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.490898810327053e-05, + "rewards/margins": 0.36745840311050415, + "rewards/rejected": -0.36755332350730896, + "step": 10094 + }, + { + "epoch": 6.981327800829876, + "grad_norm": 4.172235488891602, + "learning_rate": 1.6770401106500692e-05, + "log_odds_chosen": 10.5609130859375, + "log_odds_ratio": -9.2441332526505e-05, + "logits/chosen": -0.17121471464633942, + "logits/rejected": -0.2200719118118286, + "logps/chosen": -0.0002370928123127669, + "logps/rejected": -2.3008334636688232, + "loss": 0.417, + "nll_loss": 0.10424383729696274, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3709282686468214e-05, + "rewards/margins": 0.23005962371826172, + "rewards/rejected": -0.23008334636688232, + "step": 10095 + }, + { + "epoch": 6.982019363762102, + "grad_norm": 8.53943157196045, + "learning_rate": 1.6766559090210545e-05, + "log_odds_chosen": 10.216669082641602, + "log_odds_ratio": -0.00012501122546382248, + "logits/chosen": -0.24906474351882935, + "logits/rejected": -0.3450174033641815, + "logps/chosen": -0.000745933095458895, + "logps/rejected": -2.12357234954834, + "loss": 0.5926, + "nll_loss": 0.14813847839832306, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.459330663550645e-05, + "rewards/margins": 0.21228265762329102, + "rewards/rejected": -0.21235725283622742, + "step": 10096 + }, + { + "epoch": 6.982710926694329, + "grad_norm": 7.537756443023682, + "learning_rate": 1.6762717073920394e-05, + "log_odds_chosen": 9.626380920410156, + "log_odds_ratio": -0.0007371928659267724, + "logits/chosen": -0.4513899087905884, + "logits/rejected": -0.5055891275405884, + "logps/chosen": -0.0005784498644061387, + "logps/rejected": -1.1691521406173706, + "loss": 0.5523, + "nll_loss": 0.1379946917295456, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7844990806188434e-05, + "rewards/margins": 0.1168573722243309, + "rewards/rejected": -0.11691521108150482, + "step": 10097 + }, + { + "epoch": 6.983402489626556, + "grad_norm": 3.16633677482605, + "learning_rate": 1.6758875057630243e-05, + "log_odds_chosen": 11.536641120910645, + "log_odds_ratio": -2.3040020096232183e-05, + "logits/chosen": -0.5513945817947388, + "logits/rejected": -0.6492319107055664, + "logps/chosen": -0.0001172884221887216, + "logps/rejected": -2.33725643157959, + "loss": 0.588, + "nll_loss": 0.14700275659561157, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1728841855074279e-05, + "rewards/margins": 0.23371392488479614, + "rewards/rejected": -0.2337256669998169, + "step": 10098 + }, + { + "epoch": 6.984094052558783, + "grad_norm": 10.358414649963379, + "learning_rate": 1.6755033041340095e-05, + "log_odds_chosen": 10.274566650390625, + "log_odds_ratio": -0.00012472286471165717, + "logits/chosen": -0.2928815484046936, + "logits/rejected": -0.3074096441268921, + "logps/chosen": -0.0005019558011554182, + "logps/rejected": -2.5053319931030273, + "loss": 0.7442, + "nll_loss": 0.1860423982143402, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0195580115541816e-05, + "rewards/margins": 0.2504830062389374, + "rewards/rejected": -0.25053322315216064, + "step": 10099 + }, + { + "epoch": 6.98478561549101, + "grad_norm": 5.400953769683838, + "learning_rate": 1.6751191025049948e-05, + "log_odds_chosen": 9.803630828857422, + "log_odds_ratio": -0.0005115721723996103, + "logits/chosen": -0.3757714629173279, + "logits/rejected": -0.33992066979408264, + "logps/chosen": -0.00046948320232331753, + "logps/rejected": -2.0363478660583496, + "loss": 0.654, + "nll_loss": 0.16345909237861633, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.694832387031056e-05, + "rewards/margins": 0.2035878300666809, + "rewards/rejected": -0.20363479852676392, + "step": 10100 + }, + { + "epoch": 6.985477178423237, + "grad_norm": 6.6279449462890625, + "learning_rate": 1.6747349008759797e-05, + "log_odds_chosen": 11.03076171875, + "log_odds_ratio": -3.322376142023131e-05, + "logits/chosen": -0.3243277072906494, + "logits/rejected": -0.3870410621166229, + "logps/chosen": -0.00025668181478977203, + "logps/rejected": -2.2549896240234375, + "loss": 0.5772, + "nll_loss": 0.1443031281232834, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5668183297966607e-05, + "rewards/margins": 0.2254732996225357, + "rewards/rejected": -0.2254989743232727, + "step": 10101 + }, + { + "epoch": 6.986168741355463, + "grad_norm": 10.471397399902344, + "learning_rate": 1.674350699246965e-05, + "log_odds_chosen": 10.673310279846191, + "log_odds_ratio": -4.33100140071474e-05, + "logits/chosen": -0.1118420735001564, + "logits/rejected": -0.27916595339775085, + "logps/chosen": -0.0003742701665032655, + "logps/rejected": -2.4076638221740723, + "loss": 0.559, + "nll_loss": 0.13975459337234497, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.742701665032655e-05, + "rewards/margins": 0.240728959441185, + "rewards/rejected": -0.24076639115810394, + "step": 10102 + }, + { + "epoch": 6.98686030428769, + "grad_norm": 5.764482021331787, + "learning_rate": 1.67396649761795e-05, + "log_odds_chosen": 10.548346519470215, + "log_odds_ratio": -0.00014787615509703755, + "logits/chosen": -0.6814590096473694, + "logits/rejected": -0.6488746404647827, + "logps/chosen": -0.00026747508672997355, + "logps/rejected": -2.2368221282958984, + "loss": 0.5517, + "nll_loss": 0.1379205286502838, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6747507945401594e-05, + "rewards/margins": 0.22365549206733704, + "rewards/rejected": -0.22368223965168, + "step": 10103 + }, + { + "epoch": 6.987551867219917, + "grad_norm": 5.606224060058594, + "learning_rate": 1.673582295988935e-05, + "log_odds_chosen": 10.144465446472168, + "log_odds_ratio": -0.0001444382796762511, + "logits/chosen": -0.7282345294952393, + "logits/rejected": -0.8093679547309875, + "logps/chosen": -0.00044351324322633445, + "logps/rejected": -1.7967113256454468, + "loss": 0.3948, + "nll_loss": 0.09869752824306488, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.435132723301649e-05, + "rewards/margins": 0.17962679266929626, + "rewards/rejected": -0.17967115342617035, + "step": 10104 + }, + { + "epoch": 6.988243430152144, + "grad_norm": 4.325505256652832, + "learning_rate": 1.6731980943599203e-05, + "log_odds_chosen": 10.875144958496094, + "log_odds_ratio": -0.0001441028289264068, + "logits/chosen": -0.22643186151981354, + "logits/rejected": -0.352799654006958, + "logps/chosen": -0.00034628575667738914, + "logps/rejected": -2.2413947582244873, + "loss": 0.602, + "nll_loss": 0.1504793018102646, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.46285778505262e-05, + "rewards/margins": 0.2241048514842987, + "rewards/rejected": -0.2241394817829132, + "step": 10105 + }, + { + "epoch": 6.988934993084371, + "grad_norm": 8.308793067932129, + "learning_rate": 1.6728138927309052e-05, + "log_odds_chosen": 11.82182502746582, + "log_odds_ratio": -1.4797966287005693e-05, + "logits/chosen": -0.6414090991020203, + "logits/rejected": -0.7486789226531982, + "logps/chosen": -8.142340811900795e-05, + "logps/rejected": -2.3995790481567383, + "loss": 0.6312, + "nll_loss": 0.1577981412410736, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.142340448102914e-06, + "rewards/margins": 0.23994974792003632, + "rewards/rejected": -0.23995789885520935, + "step": 10106 + }, + { + "epoch": 6.9896265560165975, + "grad_norm": 5.863566875457764, + "learning_rate": 1.67242969110189e-05, + "log_odds_chosen": 10.61973762512207, + "log_odds_ratio": -5.890395550522953e-05, + "logits/chosen": -0.8222779035568237, + "logits/rejected": -0.8633177280426025, + "logps/chosen": -0.00021483330056071281, + "logps/rejected": -1.5272918939590454, + "loss": 0.5585, + "nll_loss": 0.13962499797344208, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1483330783667043e-05, + "rewards/margins": 0.15270771086215973, + "rewards/rejected": -0.15272918343544006, + "step": 10107 + }, + { + "epoch": 6.990318118948824, + "grad_norm": 3.0240771770477295, + "learning_rate": 1.6720454894728754e-05, + "log_odds_chosen": 9.746898651123047, + "log_odds_ratio": -0.00038893611053936183, + "logits/chosen": 0.04509582743048668, + "logits/rejected": -0.1614600419998169, + "logps/chosen": -0.0014492695918306708, + "logps/rejected": -1.977993130683899, + "loss": 0.4254, + "nll_loss": 0.10631842911243439, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014492697664536536, + "rewards/margins": 0.19765439629554749, + "rewards/rejected": -0.19779931008815765, + "step": 10108 + }, + { + "epoch": 6.991009681881051, + "grad_norm": 4.6803998947143555, + "learning_rate": 1.6716612878438606e-05, + "log_odds_chosen": 11.34988021850586, + "log_odds_ratio": -2.1349296730477363e-05, + "logits/chosen": -0.09972164034843445, + "logits/rejected": -0.21515415608882904, + "logps/chosen": -8.137220720527694e-05, + "logps/rejected": -1.995793104171753, + "loss": 0.4244, + "nll_loss": 0.10609244555234909, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.137220902426634e-06, + "rewards/margins": 0.1995711773633957, + "rewards/rejected": -0.19957931339740753, + "step": 10109 + }, + { + "epoch": 6.991701244813278, + "grad_norm": 4.614258289337158, + "learning_rate": 1.6712770862148455e-05, + "log_odds_chosen": 10.589548110961914, + "log_odds_ratio": -0.00012725955457426608, + "logits/chosen": -0.5055170059204102, + "logits/rejected": -0.6075488328933716, + "logps/chosen": -0.0002730110427364707, + "logps/rejected": -1.8980847597122192, + "loss": 0.3892, + "nll_loss": 0.09728061407804489, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7301106456434354e-05, + "rewards/margins": 0.18978118896484375, + "rewards/rejected": -0.18980847299098969, + "step": 10110 + }, + { + "epoch": 6.992392807745505, + "grad_norm": 5.7431182861328125, + "learning_rate": 1.6708928845858308e-05, + "log_odds_chosen": 10.315574645996094, + "log_odds_ratio": -7.668719626963139e-05, + "logits/chosen": -0.27294373512268066, + "logits/rejected": -0.38001275062561035, + "logps/chosen": -0.0002155811234842986, + "logps/rejected": -1.6959208250045776, + "loss": 0.494, + "nll_loss": 0.12349948287010193, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1558114895015024e-05, + "rewards/margins": 0.16957053542137146, + "rewards/rejected": -0.16959208250045776, + "step": 10111 + }, + { + "epoch": 6.993084370677732, + "grad_norm": 5.39589262008667, + "learning_rate": 1.670508682956816e-05, + "log_odds_chosen": 11.265823364257812, + "log_odds_ratio": -2.7276233595330268e-05, + "logits/chosen": -0.3681911528110504, + "logits/rejected": -0.30419978499412537, + "logps/chosen": -0.00024840107653290033, + "logps/rejected": -2.618328809738159, + "loss": 0.5388, + "nll_loss": 0.13470235466957092, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4840108380885795e-05, + "rewards/margins": 0.26180803775787354, + "rewards/rejected": -0.2618328630924225, + "step": 10112 + }, + { + "epoch": 6.9937759336099585, + "grad_norm": 6.419252395629883, + "learning_rate": 1.670124481327801e-05, + "log_odds_chosen": 10.902593612670898, + "log_odds_ratio": -8.501038246322423e-05, + "logits/chosen": -0.43653345108032227, + "logits/rejected": -0.4556824564933777, + "logps/chosen": -0.00019020687614101917, + "logps/rejected": -2.1433281898498535, + "loss": 0.4661, + "nll_loss": 0.11650720238685608, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9020688341697678e-05, + "rewards/margins": 0.214313805103302, + "rewards/rejected": -0.21433281898498535, + "step": 10113 + }, + { + "epoch": 6.994467496542185, + "grad_norm": 6.108236312866211, + "learning_rate": 1.669740279698786e-05, + "log_odds_chosen": 10.553049087524414, + "log_odds_ratio": -0.00028243346605449915, + "logits/chosen": -0.3672410249710083, + "logits/rejected": -0.38557636737823486, + "logps/chosen": -0.00033305544639006257, + "logps/rejected": -1.8382352590560913, + "loss": 0.5158, + "nll_loss": 0.12892918288707733, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3305543183814734e-05, + "rewards/margins": 0.18379022181034088, + "rewards/rejected": -0.18382352590560913, + "step": 10114 + }, + { + "epoch": 6.995159059474412, + "grad_norm": 7.925759792327881, + "learning_rate": 1.669356078069771e-05, + "log_odds_chosen": 10.422781944274902, + "log_odds_ratio": -6.591706187464297e-05, + "logits/chosen": -0.33322617411613464, + "logits/rejected": -0.3572065234184265, + "logps/chosen": -0.005260770209133625, + "logps/rejected": -2.2561864852905273, + "loss": 0.5688, + "nll_loss": 0.14219242334365845, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000526077055837959, + "rewards/margins": 0.2250925600528717, + "rewards/rejected": -0.2256186306476593, + "step": 10115 + }, + { + "epoch": 6.995850622406639, + "grad_norm": 5.784368515014648, + "learning_rate": 1.668971876440756e-05, + "log_odds_chosen": 11.080724716186523, + "log_odds_ratio": -5.246032742434181e-05, + "logits/chosen": -0.4725242853164673, + "logits/rejected": -0.45921579003334045, + "logps/chosen": -0.00017178738198708743, + "logps/rejected": -2.235011577606201, + "loss": 0.3307, + "nll_loss": 0.08267956972122192, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7178737834910862e-05, + "rewards/margins": 0.22348397970199585, + "rewards/rejected": -0.22350117564201355, + "step": 10116 + }, + { + "epoch": 6.996542185338866, + "grad_norm": 6.753635406494141, + "learning_rate": 1.6685876748117412e-05, + "log_odds_chosen": 10.79682731628418, + "log_odds_ratio": -0.0006323234993033111, + "logits/chosen": -0.6484255194664001, + "logits/rejected": -0.6901388764381409, + "logps/chosen": -0.0002220661408500746, + "logps/rejected": -1.7270817756652832, + "loss": 0.5553, + "nll_loss": 0.13875499367713928, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2206615540198982e-05, + "rewards/margins": 0.17268598079681396, + "rewards/rejected": -0.1727081835269928, + "step": 10117 + }, + { + "epoch": 6.997233748271093, + "grad_norm": 12.498651504516602, + "learning_rate": 1.6682034731827264e-05, + "log_odds_chosen": 11.772920608520508, + "log_odds_ratio": -1.1675167115754448e-05, + "logits/chosen": -0.6263840794563293, + "logits/rejected": -0.7192596793174744, + "logps/chosen": -0.00013208006566856056, + "logps/rejected": -2.688951015472412, + "loss": 0.522, + "nll_loss": 0.13049596548080444, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3208007658249699e-05, + "rewards/margins": 0.2688818871974945, + "rewards/rejected": -0.26889508962631226, + "step": 10118 + }, + { + "epoch": 6.9979253112033195, + "grad_norm": 4.978712558746338, + "learning_rate": 1.6678192715537114e-05, + "log_odds_chosen": 10.6630220413208, + "log_odds_ratio": -0.00014852010644972324, + "logits/chosen": -0.35795727372169495, + "logits/rejected": -0.4703786373138428, + "logps/chosen": -0.0005530283669941127, + "logps/rejected": -2.2193567752838135, + "loss": 0.6832, + "nll_loss": 0.1707746386528015, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.530284033739008e-05, + "rewards/margins": 0.22188037633895874, + "rewards/rejected": -0.2219356894493103, + "step": 10119 + }, + { + "epoch": 6.998616874135546, + "grad_norm": 5.544008255004883, + "learning_rate": 1.6674350699246966e-05, + "log_odds_chosen": 9.86840534210205, + "log_odds_ratio": -0.00033382399124093354, + "logits/chosen": -0.2054920494556427, + "logits/rejected": -0.13383245468139648, + "logps/chosen": -0.0002967912005260587, + "logps/rejected": -1.6687836647033691, + "loss": 0.6584, + "nll_loss": 0.1645655333995819, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.967912223539315e-05, + "rewards/margins": 0.16684868931770325, + "rewards/rejected": -0.1668783724308014, + "step": 10120 + }, + { + "epoch": 6.999308437067773, + "grad_norm": 5.937603950500488, + "learning_rate": 1.667050868295682e-05, + "log_odds_chosen": 9.719598770141602, + "log_odds_ratio": -0.002333612646907568, + "logits/chosen": -0.6707288026809692, + "logits/rejected": -0.6329488754272461, + "logps/chosen": -0.0016681014094501734, + "logps/rejected": -1.8420779705047607, + "loss": 0.6239, + "nll_loss": 0.1557316780090332, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016681014676578343, + "rewards/margins": 0.18404099345207214, + "rewards/rejected": -0.18420778214931488, + "step": 10121 + }, + { + "epoch": 7.0, + "grad_norm": 5.16898250579834, + "learning_rate": 1.6666666666666667e-05, + "log_odds_chosen": 10.842208862304688, + "log_odds_ratio": -3.623691372922622e-05, + "logits/chosen": -0.29508236050605774, + "logits/rejected": -0.20642954111099243, + "logps/chosen": -0.00010137717617908493, + "logps/rejected": -1.595341444015503, + "loss": 0.4601, + "nll_loss": 0.11501652002334595, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0137718163605314e-05, + "rewards/margins": 0.15952400863170624, + "rewards/rejected": -0.15953415632247925, + "step": 10122 + }, + { + "epoch": 7.000691562932227, + "grad_norm": 4.177090167999268, + "learning_rate": 1.666282465037652e-05, + "log_odds_chosen": 10.67172622680664, + "log_odds_ratio": -5.4966905736364424e-05, + "logits/chosen": -0.14299771189689636, + "logits/rejected": -0.20266348123550415, + "logps/chosen": -0.0003145110094919801, + "logps/rejected": -2.3612658977508545, + "loss": 0.4718, + "nll_loss": 0.11795040965080261, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.145109803881496e-05, + "rewards/margins": 0.236095130443573, + "rewards/rejected": -0.2361265867948532, + "step": 10123 + }, + { + "epoch": 7.001383125864454, + "grad_norm": 3.383732795715332, + "learning_rate": 1.665898263408637e-05, + "log_odds_chosen": 9.578885078430176, + "log_odds_ratio": -0.00014664589252788574, + "logits/chosen": -0.5051281452178955, + "logits/rejected": -0.5166344046592712, + "logps/chosen": -0.0004757488495670259, + "logps/rejected": -1.6275553703308105, + "loss": 0.4183, + "nll_loss": 0.10457170009613037, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7574882046319544e-05, + "rewards/margins": 0.16270795464515686, + "rewards/rejected": -0.16275553405284882, + "step": 10124 + }, + { + "epoch": 7.0020746887966805, + "grad_norm": 5.989767074584961, + "learning_rate": 1.6655140617796218e-05, + "log_odds_chosen": 10.526287078857422, + "log_odds_ratio": -0.00011244660709053278, + "logits/chosen": -0.414249449968338, + "logits/rejected": -0.5609310865402222, + "logps/chosen": -0.0006711309542879462, + "logps/rejected": -2.302802085876465, + "loss": 0.448, + "nll_loss": 0.1119890883564949, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.711309833917767e-05, + "rewards/margins": 0.23021312057971954, + "rewards/rejected": -0.23028022050857544, + "step": 10125 + }, + { + "epoch": 7.002766251728907, + "grad_norm": 4.631224632263184, + "learning_rate": 1.665129860150607e-05, + "log_odds_chosen": 9.21665096282959, + "log_odds_ratio": -0.00032175323576666415, + "logits/chosen": -0.3028196096420288, + "logits/rejected": -0.4589107036590576, + "logps/chosen": -0.0006924690096639097, + "logps/rejected": -1.6965086460113525, + "loss": 0.4529, + "nll_loss": 0.11318409442901611, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.924690387677401e-05, + "rewards/margins": 0.1695816069841385, + "rewards/rejected": -0.1696508675813675, + "step": 10126 + }, + { + "epoch": 7.003457814661134, + "grad_norm": 2.4340360164642334, + "learning_rate": 1.6647456585215923e-05, + "log_odds_chosen": 11.157234191894531, + "log_odds_ratio": -2.9356588129303418e-05, + "logits/chosen": -0.6620774269104004, + "logits/rejected": -0.5918096303939819, + "logps/chosen": -0.00017746233788784593, + "logps/rejected": -2.0909321308135986, + "loss": 0.3734, + "nll_loss": 0.09334038197994232, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7746233424986713e-05, + "rewards/margins": 0.209075465798378, + "rewards/rejected": -0.20909321308135986, + "step": 10127 + }, + { + "epoch": 7.004149377593361, + "grad_norm": 7.020127773284912, + "learning_rate": 1.6643614568925772e-05, + "log_odds_chosen": 11.063360214233398, + "log_odds_ratio": -9.526523353997618e-05, + "logits/chosen": -0.21672368049621582, + "logits/rejected": -0.2962769865989685, + "logps/chosen": -0.00026356359012424946, + "logps/rejected": -2.7616658210754395, + "loss": 0.3796, + "nll_loss": 0.0948876291513443, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6356357921031304e-05, + "rewards/margins": 0.2761402428150177, + "rewards/rejected": -0.2761665880680084, + "step": 10128 + }, + { + "epoch": 7.004840940525588, + "grad_norm": 4.720909595489502, + "learning_rate": 1.6639772552635624e-05, + "log_odds_chosen": 10.636579513549805, + "log_odds_ratio": -0.0001630079059395939, + "logits/chosen": -0.37148964405059814, + "logits/rejected": -0.47465845942497253, + "logps/chosen": -0.0003292100736871362, + "logps/rejected": -2.3341360092163086, + "loss": 0.4692, + "nll_loss": 0.1172887533903122, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.292100882390514e-05, + "rewards/margins": 0.23338070511817932, + "rewards/rejected": -0.23341360688209534, + "step": 10129 + }, + { + "epoch": 7.005532503457815, + "grad_norm": 3.466585397720337, + "learning_rate": 1.6635930536345477e-05, + "log_odds_chosen": 12.286417007446289, + "log_odds_ratio": -1.0827205187524669e-05, + "logits/chosen": -0.21593183279037476, + "logits/rejected": -0.2443038523197174, + "logps/chosen": -0.00011290085240034387, + "logps/rejected": -3.0278398990631104, + "loss": 0.4981, + "nll_loss": 0.12452976405620575, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1290085240034387e-05, + "rewards/margins": 0.3027727007865906, + "rewards/rejected": -0.3027840256690979, + "step": 10130 + }, + { + "epoch": 7.0062240663900415, + "grad_norm": 5.393481254577637, + "learning_rate": 1.6632088520055326e-05, + "log_odds_chosen": 11.34892463684082, + "log_odds_ratio": -1.9979619537480175e-05, + "logits/chosen": -0.10481264442205429, + "logits/rejected": -0.23816201090812683, + "logps/chosen": -0.00012460086145438254, + "logps/rejected": -2.261373996734619, + "loss": 0.527, + "nll_loss": 0.13175800442695618, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2460084690246731e-05, + "rewards/margins": 0.22612492740154266, + "rewards/rejected": -0.22613739967346191, + "step": 10131 + }, + { + "epoch": 7.006915629322268, + "grad_norm": 7.147092819213867, + "learning_rate": 1.662824650376518e-05, + "log_odds_chosen": 9.98300552368164, + "log_odds_ratio": -0.000987243838608265, + "logits/chosen": -0.5050815343856812, + "logits/rejected": -0.5195842981338501, + "logps/chosen": -0.0005599947762675583, + "logps/rejected": -1.8902868032455444, + "loss": 0.6455, + "nll_loss": 0.16128447651863098, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.599947689916007e-05, + "rewards/margins": 0.18897268176078796, + "rewards/rejected": -0.18902869522571564, + "step": 10132 + }, + { + "epoch": 7.007607192254495, + "grad_norm": 3.509237766265869, + "learning_rate": 1.6624404487475027e-05, + "log_odds_chosen": 10.463541984558105, + "log_odds_ratio": -5.5751308536855504e-05, + "logits/chosen": -0.6657548546791077, + "logits/rejected": -0.661270022392273, + "logps/chosen": -0.00012757029617205262, + "logps/rejected": -1.5192625522613525, + "loss": 0.3719, + "nll_loss": 0.09297018498182297, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2757031072396785e-05, + "rewards/margins": 0.15191349387168884, + "rewards/rejected": -0.15192626416683197, + "step": 10133 + }, + { + "epoch": 7.008298755186722, + "grad_norm": 4.898674011230469, + "learning_rate": 1.6620562471184876e-05, + "log_odds_chosen": 9.497284889221191, + "log_odds_ratio": -0.00023474835325032473, + "logits/chosen": -0.6238462924957275, + "logits/rejected": -0.657147228717804, + "logps/chosen": -0.0002542249276302755, + "logps/rejected": -1.2084856033325195, + "loss": 0.4356, + "nll_loss": 0.1088782474398613, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.542249421821907e-05, + "rewards/margins": 0.12082314491271973, + "rewards/rejected": -0.12084857374429703, + "step": 10134 + }, + { + "epoch": 7.008990318118949, + "grad_norm": 7.74991512298584, + "learning_rate": 1.661672045489473e-05, + "log_odds_chosen": 12.126110076904297, + "log_odds_ratio": -1.4810936590947676e-05, + "logits/chosen": -0.5204528570175171, + "logits/rejected": -0.5668099522590637, + "logps/chosen": -0.00012032059021294117, + "logps/rejected": -2.9467427730560303, + "loss": 0.4082, + "nll_loss": 0.10204964876174927, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2032059203193057e-05, + "rewards/margins": 0.2946622669696808, + "rewards/rejected": -0.294674277305603, + "step": 10135 + }, + { + "epoch": 7.009681881051176, + "grad_norm": 8.93749713897705, + "learning_rate": 1.661287843860458e-05, + "log_odds_chosen": 10.70609188079834, + "log_odds_ratio": -0.00022324280871544033, + "logits/chosen": -0.43974289298057556, + "logits/rejected": -0.5134708881378174, + "logps/chosen": -0.00028700660914182663, + "logps/rejected": -2.1391687393188477, + "loss": 0.4658, + "nll_loss": 0.11643637716770172, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.870065873139538e-05, + "rewards/margins": 0.21388816833496094, + "rewards/rejected": -0.2139168679714203, + "step": 10136 + }, + { + "epoch": 7.0103734439834025, + "grad_norm": 5.301570892333984, + "learning_rate": 1.660903642231443e-05, + "log_odds_chosen": 11.49659252166748, + "log_odds_ratio": -9.302143735112622e-05, + "logits/chosen": -0.3102811872959137, + "logits/rejected": -0.444336861371994, + "logps/chosen": -0.0011759212939068675, + "logps/rejected": -3.2168827056884766, + "loss": 0.3944, + "nll_loss": 0.09858773648738861, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011759212065953761, + "rewards/margins": 0.3215706944465637, + "rewards/rejected": -0.32168829441070557, + "step": 10137 + }, + { + "epoch": 7.011065006915629, + "grad_norm": 4.520849227905273, + "learning_rate": 1.6605194406024283e-05, + "log_odds_chosen": 9.959612846374512, + "log_odds_ratio": -0.0002180114242946729, + "logits/chosen": -0.8905350565910339, + "logits/rejected": -1.0491600036621094, + "logps/chosen": -0.00024582387413829565, + "logps/rejected": -1.7666348218917847, + "loss": 0.3302, + "nll_loss": 0.0825221985578537, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.45823848672444e-05, + "rewards/margins": 0.1766389012336731, + "rewards/rejected": -0.17666348814964294, + "step": 10138 + }, + { + "epoch": 7.011756569847856, + "grad_norm": 3.800283670425415, + "learning_rate": 1.6601352389734135e-05, + "log_odds_chosen": 10.321309089660645, + "log_odds_ratio": -0.00023361285275314003, + "logits/chosen": -0.3291170001029968, + "logits/rejected": -0.36998191475868225, + "logps/chosen": -0.00021534046391025186, + "logps/rejected": -1.4043457508087158, + "loss": 0.5506, + "nll_loss": 0.1376248598098755, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1534046027227305e-05, + "rewards/margins": 0.1404130458831787, + "rewards/rejected": -0.14043457806110382, + "step": 10139 + }, + { + "epoch": 7.012448132780083, + "grad_norm": 3.9852750301361084, + "learning_rate": 1.6597510373443984e-05, + "log_odds_chosen": 11.834362983703613, + "log_odds_ratio": -4.900186468148604e-05, + "logits/chosen": -0.27264270186424255, + "logits/rejected": -0.405239999294281, + "logps/chosen": -0.0005634097033180296, + "logps/rejected": -2.438737392425537, + "loss": 0.5053, + "nll_loss": 0.12631599605083466, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.634097396978177e-05, + "rewards/margins": 0.24381740391254425, + "rewards/rejected": -0.24387376010417938, + "step": 10140 + }, + { + "epoch": 7.01313969571231, + "grad_norm": 9.13525676727295, + "learning_rate": 1.6593668357153837e-05, + "log_odds_chosen": 10.613750457763672, + "log_odds_ratio": -7.9997735156212e-05, + "logits/chosen": -0.16509748995304108, + "logits/rejected": -0.236328586935997, + "logps/chosen": -0.0001578353112563491, + "logps/rejected": -1.8619117736816406, + "loss": 0.5854, + "nll_loss": 0.1463315635919571, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.578353112563491e-05, + "rewards/margins": 0.1861753910779953, + "rewards/rejected": -0.18619118630886078, + "step": 10141 + }, + { + "epoch": 7.013831258644537, + "grad_norm": 6.098309516906738, + "learning_rate": 1.6589826340863686e-05, + "log_odds_chosen": 10.384798049926758, + "log_odds_ratio": -0.00012338526721578091, + "logits/chosen": -0.11653617024421692, + "logits/rejected": -0.29346024990081787, + "logps/chosen": -0.001884337398223579, + "logps/rejected": -2.397714376449585, + "loss": 0.5334, + "nll_loss": 0.13334126770496368, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018843376892618835, + "rewards/margins": 0.23958301544189453, + "rewards/rejected": -0.23977145552635193, + "step": 10142 + }, + { + "epoch": 7.014522821576763, + "grad_norm": 4.79433536529541, + "learning_rate": 1.6585984324573535e-05, + "log_odds_chosen": 11.148327827453613, + "log_odds_ratio": -4.3775267840828747e-05, + "logits/chosen": -0.7096115946769714, + "logits/rejected": -0.7051650285720825, + "logps/chosen": -0.00015493936371058226, + "logps/rejected": -2.150099277496338, + "loss": 0.4802, + "nll_loss": 0.12003987282514572, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5493937098653987e-05, + "rewards/margins": 0.2149944305419922, + "rewards/rejected": -0.2150099277496338, + "step": 10143 + }, + { + "epoch": 7.01521438450899, + "grad_norm": 3.9534661769866943, + "learning_rate": 1.6582142308283387e-05, + "log_odds_chosen": 10.364572525024414, + "log_odds_ratio": -0.0018095355480909348, + "logits/chosen": -0.19156116247177124, + "logits/rejected": -0.20004430413246155, + "logps/chosen": -0.001704665133729577, + "logps/rejected": -2.612130641937256, + "loss": 0.4095, + "nll_loss": 0.1021956205368042, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001704665191937238, + "rewards/margins": 0.26104259490966797, + "rewards/rejected": -0.2612130641937256, + "step": 10144 + }, + { + "epoch": 7.015905947441217, + "grad_norm": 3.5418949127197266, + "learning_rate": 1.657830029199324e-05, + "log_odds_chosen": 10.750520706176758, + "log_odds_ratio": -0.00015969673404470086, + "logits/chosen": -0.18375131487846375, + "logits/rejected": -0.19293418526649475, + "logps/chosen": -0.0001509210269432515, + "logps/rejected": -1.8935903310775757, + "loss": 0.4629, + "nll_loss": 0.11571593582630157, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.509210324002197e-05, + "rewards/margins": 0.18934394419193268, + "rewards/rejected": -0.18935903906822205, + "step": 10145 + }, + { + "epoch": 7.016597510373444, + "grad_norm": 5.542314529418945, + "learning_rate": 1.657445827570309e-05, + "log_odds_chosen": 9.751424789428711, + "log_odds_ratio": -0.00025462431949563324, + "logits/chosen": -0.1339586228132248, + "logits/rejected": -0.1502704620361328, + "logps/chosen": -0.0007169965538196266, + "logps/rejected": -1.936805009841919, + "loss": 0.7262, + "nll_loss": 0.18153619766235352, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.169965101638809e-05, + "rewards/margins": 0.19360879063606262, + "rewards/rejected": -0.19368049502372742, + "step": 10146 + }, + { + "epoch": 7.017289073305671, + "grad_norm": 4.211172103881836, + "learning_rate": 1.657061625941294e-05, + "log_odds_chosen": 11.636990547180176, + "log_odds_ratio": -1.1203040230611805e-05, + "logits/chosen": -0.209406316280365, + "logits/rejected": -0.2680383324623108, + "logps/chosen": -0.00018335843924432993, + "logps/rejected": -2.7831931114196777, + "loss": 0.4178, + "nll_loss": 0.10444985330104828, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8335844288230874e-05, + "rewards/margins": 0.2783010005950928, + "rewards/rejected": -0.2783193290233612, + "step": 10147 + }, + { + "epoch": 7.017980636237898, + "grad_norm": 4.356594562530518, + "learning_rate": 1.6566774243122794e-05, + "log_odds_chosen": 10.074660301208496, + "log_odds_ratio": -0.0001061324801412411, + "logits/chosen": -0.42399752140045166, + "logits/rejected": -0.429721474647522, + "logps/chosen": -0.0004705238970927894, + "logps/rejected": -2.0024328231811523, + "loss": 0.2835, + "nll_loss": 0.07086857408285141, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.705238825408742e-05, + "rewards/margins": 0.20019622147083282, + "rewards/rejected": -0.200243279337883, + "step": 10148 + }, + { + "epoch": 7.018672199170124, + "grad_norm": 5.776265621185303, + "learning_rate": 1.6562932226832643e-05, + "log_odds_chosen": 10.335941314697266, + "log_odds_ratio": -0.00023757074086461216, + "logits/chosen": -0.38004934787750244, + "logits/rejected": -0.43100476264953613, + "logps/chosen": -0.0011662193574011326, + "logps/rejected": -2.4946436882019043, + "loss": 0.4329, + "nll_loss": 0.10819514095783234, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011662192991934717, + "rewards/margins": 0.2493477761745453, + "rewards/rejected": -0.24946439266204834, + "step": 10149 + }, + { + "epoch": 7.019363762102351, + "grad_norm": 4.387164115905762, + "learning_rate": 1.6559090210542495e-05, + "log_odds_chosen": 9.481719970703125, + "log_odds_ratio": -0.0002926454762928188, + "logits/chosen": -0.2788187265396118, + "logits/rejected": -0.28796088695526123, + "logps/chosen": -0.0004147875006310642, + "logps/rejected": -1.7780773639678955, + "loss": 0.4905, + "nll_loss": 0.12258856743574142, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1478753701085225e-05, + "rewards/margins": 0.17776626348495483, + "rewards/rejected": -0.1778077483177185, + "step": 10150 + }, + { + "epoch": 7.020055325034578, + "grad_norm": 5.8968658447265625, + "learning_rate": 1.6555248194252344e-05, + "log_odds_chosen": 12.193097114562988, + "log_odds_ratio": -9.320355275121983e-06, + "logits/chosen": -0.3670240640640259, + "logits/rejected": -0.5074326395988464, + "logps/chosen": -0.00020143986330367625, + "logps/rejected": -3.347984790802002, + "loss": 0.6501, + "nll_loss": 0.16253016889095306, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0143987057963386e-05, + "rewards/margins": 0.334778368473053, + "rewards/rejected": -0.33479851484298706, + "step": 10151 + }, + { + "epoch": 7.020746887966805, + "grad_norm": 7.897648334503174, + "learning_rate": 1.6551406177962193e-05, + "log_odds_chosen": 10.744218826293945, + "log_odds_ratio": -8.096903911791742e-05, + "logits/chosen": -0.6842846870422363, + "logits/rejected": -0.6260226368904114, + "logps/chosen": -0.00022721345885656774, + "logps/rejected": -2.1028618812561035, + "loss": 0.425, + "nll_loss": 0.10624252259731293, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2721345885656774e-05, + "rewards/margins": 0.2102634608745575, + "rewards/rejected": -0.2102862000465393, + "step": 10152 + }, + { + "epoch": 7.021438450899032, + "grad_norm": 4.6264495849609375, + "learning_rate": 1.6547564161672046e-05, + "log_odds_chosen": 11.312272071838379, + "log_odds_ratio": -2.633406074892264e-05, + "logits/chosen": -0.2902446389198303, + "logits/rejected": -0.3588501513004303, + "logps/chosen": -0.00012992211850360036, + "logps/rejected": -2.3609724044799805, + "loss": 0.3794, + "nll_loss": 0.09485436975955963, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.299221366934944e-05, + "rewards/margins": 0.2360842525959015, + "rewards/rejected": -0.23609723150730133, + "step": 10153 + }, + { + "epoch": 7.022130013831259, + "grad_norm": 4.990711212158203, + "learning_rate": 1.6543722145381898e-05, + "log_odds_chosen": 10.290438652038574, + "log_odds_ratio": -0.00021796667715534568, + "logits/chosen": -0.057533517479896545, + "logits/rejected": -0.3147449493408203, + "logps/chosen": -0.0010070588905364275, + "logps/rejected": -2.5475285053253174, + "loss": 0.6871, + "nll_loss": 0.17174169421195984, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010070588177768514, + "rewards/margins": 0.25465214252471924, + "rewards/rejected": -0.25475287437438965, + "step": 10154 + }, + { + "epoch": 7.022821576763485, + "grad_norm": 5.92970609664917, + "learning_rate": 1.6539880129091747e-05, + "log_odds_chosen": 11.233606338500977, + "log_odds_ratio": -2.7870761186932214e-05, + "logits/chosen": -0.32939082384109497, + "logits/rejected": -0.3940170407295227, + "logps/chosen": -8.571715443395078e-05, + "logps/rejected": -1.9228782653808594, + "loss": 0.4735, + "nll_loss": 0.11837649345397949, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.571715625294019e-06, + "rewards/margins": 0.19227927923202515, + "rewards/rejected": -0.1922878473997116, + "step": 10155 + }, + { + "epoch": 7.023513139695712, + "grad_norm": 3.8882079124450684, + "learning_rate": 1.65360381128016e-05, + "log_odds_chosen": 10.108419418334961, + "log_odds_ratio": -8.935236110119149e-05, + "logits/chosen": -0.8104405403137207, + "logits/rejected": -0.8440303802490234, + "logps/chosen": -0.0004185454163234681, + "logps/rejected": -1.7598785161972046, + "loss": 0.3911, + "nll_loss": 0.09776659309864044, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.185454599792138e-05, + "rewards/margins": 0.17594601213932037, + "rewards/rejected": -0.1759878545999527, + "step": 10156 + }, + { + "epoch": 7.024204702627939, + "grad_norm": 5.826569557189941, + "learning_rate": 1.653219609651145e-05, + "log_odds_chosen": 10.434246063232422, + "log_odds_ratio": -0.00024922305601648986, + "logits/chosen": -0.7025120258331299, + "logits/rejected": -0.6248111724853516, + "logps/chosen": -0.0001936000626301393, + "logps/rejected": -1.761898398399353, + "loss": 0.6959, + "nll_loss": 0.17395778000354767, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.936000626301393e-05, + "rewards/margins": 0.1761704683303833, + "rewards/rejected": -0.1761898398399353, + "step": 10157 + }, + { + "epoch": 7.024896265560166, + "grad_norm": 5.452138423919678, + "learning_rate": 1.65283540802213e-05, + "log_odds_chosen": 11.768486022949219, + "log_odds_ratio": -1.5557510778307915e-05, + "logits/chosen": -0.6322643160820007, + "logits/rejected": -0.7022844552993774, + "logps/chosen": -0.0001267803891096264, + "logps/rejected": -2.44952130317688, + "loss": 0.4294, + "nll_loss": 0.10734250396490097, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2678039638558403e-05, + "rewards/margins": 0.24493944644927979, + "rewards/rejected": -0.24495212733745575, + "step": 10158 + }, + { + "epoch": 7.025587828492393, + "grad_norm": 5.500707149505615, + "learning_rate": 1.6524512063931154e-05, + "log_odds_chosen": 11.4202241897583, + "log_odds_ratio": -0.0006046928465366364, + "logits/chosen": -0.01992719992995262, + "logits/rejected": -0.10526986420154572, + "logps/chosen": -0.0008786749094724655, + "logps/rejected": -2.987894058227539, + "loss": 0.6546, + "nll_loss": 0.16358627378940582, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.786749822320417e-05, + "rewards/margins": 0.29870152473449707, + "rewards/rejected": -0.298789381980896, + "step": 10159 + }, + { + "epoch": 7.0262793914246195, + "grad_norm": 5.260455131530762, + "learning_rate": 1.6520670047641003e-05, + "log_odds_chosen": 10.34775447845459, + "log_odds_ratio": -0.00031731827766634524, + "logits/chosen": 0.4068028926849365, + "logits/rejected": 0.27036112546920776, + "logps/chosen": -0.0009474047692492604, + "logps/rejected": -2.5685877799987793, + "loss": 0.5361, + "nll_loss": 0.13399837911128998, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.474047692492604e-05, + "rewards/margins": 0.2567640542984009, + "rewards/rejected": -0.25685879588127136, + "step": 10160 + }, + { + "epoch": 7.026970954356846, + "grad_norm": 5.471103191375732, + "learning_rate": 1.6516828031350852e-05, + "log_odds_chosen": 11.179512023925781, + "log_odds_ratio": -1.992461329791695e-05, + "logits/chosen": -0.5953192710876465, + "logits/rejected": -0.6373271942138672, + "logps/chosen": -0.000205492353416048, + "logps/rejected": -2.2310597896575928, + "loss": 0.6108, + "nll_loss": 0.1526985764503479, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.054923606920056e-05, + "rewards/margins": 0.2230854332447052, + "rewards/rejected": -0.22310596704483032, + "step": 10161 + }, + { + "epoch": 7.027662517289073, + "grad_norm": 13.870356559753418, + "learning_rate": 1.6512986015060704e-05, + "log_odds_chosen": 10.364818572998047, + "log_odds_ratio": -0.0002144112513633445, + "logits/chosen": -0.7196736335754395, + "logits/rejected": -0.7011851668357849, + "logps/chosen": -0.00027963684988208115, + "logps/rejected": -1.7333106994628906, + "loss": 0.4818, + "nll_loss": 0.12043341994285583, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7963686079601757e-05, + "rewards/margins": 0.17330311238765717, + "rewards/rejected": -0.17333108186721802, + "step": 10162 + }, + { + "epoch": 7.0283540802213, + "grad_norm": 6.397958278656006, + "learning_rate": 1.6509143998770553e-05, + "log_odds_chosen": 9.366116523742676, + "log_odds_ratio": -0.0018373996717855334, + "logits/chosen": -0.5085399150848389, + "logits/rejected": -0.570478081703186, + "logps/chosen": -0.0013225064612925053, + "logps/rejected": -1.975780963897705, + "loss": 0.5771, + "nll_loss": 0.1440969705581665, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013225062866695225, + "rewards/margins": 0.1974458396434784, + "rewards/rejected": -0.1975780874490738, + "step": 10163 + }, + { + "epoch": 7.029045643153527, + "grad_norm": 6.637556076049805, + "learning_rate": 1.6505301982480406e-05, + "log_odds_chosen": 11.188152313232422, + "log_odds_ratio": -5.743116344092414e-05, + "logits/chosen": -0.5558980107307434, + "logits/rejected": -0.5013357400894165, + "logps/chosen": -0.00044322869507595897, + "logps/rejected": -2.2768421173095703, + "loss": 0.4811, + "nll_loss": 0.12027549743652344, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.432286732480861e-05, + "rewards/margins": 0.22763991355895996, + "rewards/rejected": -0.22768422961235046, + "step": 10164 + }, + { + "epoch": 7.029737206085754, + "grad_norm": 10.173316955566406, + "learning_rate": 1.6501459966190258e-05, + "log_odds_chosen": 11.47596549987793, + "log_odds_ratio": -2.377521559537854e-05, + "logits/chosen": -0.5182502269744873, + "logits/rejected": -0.5592266917228699, + "logps/chosen": -0.0001406124501954764, + "logps/rejected": -2.4441890716552734, + "loss": 0.5262, + "nll_loss": 0.13155286014080048, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.406124465574976e-05, + "rewards/margins": 0.2444048523902893, + "rewards/rejected": -0.2444189041852951, + "step": 10165 + }, + { + "epoch": 7.0304287690179805, + "grad_norm": 6.493875026702881, + "learning_rate": 1.6497617949900107e-05, + "log_odds_chosen": 11.056800842285156, + "log_odds_ratio": -3.62358296115417e-05, + "logits/chosen": -0.4721360504627228, + "logits/rejected": -0.522377073764801, + "logps/chosen": -0.0004437095485627651, + "logps/rejected": -2.6853585243225098, + "loss": 0.551, + "nll_loss": 0.1377374231815338, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.437095776665956e-05, + "rewards/margins": 0.2684914767742157, + "rewards/rejected": -0.268535852432251, + "step": 10166 + }, + { + "epoch": 7.031120331950207, + "grad_norm": 7.352128982543945, + "learning_rate": 1.649377593360996e-05, + "log_odds_chosen": 9.795044898986816, + "log_odds_ratio": -0.0001043759984895587, + "logits/chosen": -0.6661955118179321, + "logits/rejected": -0.7603518962860107, + "logps/chosen": -0.00048251228872686625, + "logps/rejected": -1.7106971740722656, + "loss": 1.0816, + "nll_loss": 0.2703953981399536, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8251229600282386e-05, + "rewards/margins": 0.1710214763879776, + "rewards/rejected": -0.17106971144676208, + "step": 10167 + }, + { + "epoch": 7.031811894882434, + "grad_norm": 9.762201309204102, + "learning_rate": 1.6489933917319812e-05, + "log_odds_chosen": 10.23452377319336, + "log_odds_ratio": -0.00016816816059872508, + "logits/chosen": -0.28022176027297974, + "logits/rejected": -0.31643277406692505, + "logps/chosen": -0.0002240131434518844, + "logps/rejected": -1.9264167547225952, + "loss": 0.3028, + "nll_loss": 0.07567442953586578, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.240131470898632e-05, + "rewards/margins": 0.19261927902698517, + "rewards/rejected": -0.19264167547225952, + "step": 10168 + }, + { + "epoch": 7.032503457814661, + "grad_norm": 4.275112152099609, + "learning_rate": 1.648609190102966e-05, + "log_odds_chosen": 10.421937942504883, + "log_odds_ratio": -0.00016295308887492865, + "logits/chosen": -0.29650211334228516, + "logits/rejected": -0.4259142577648163, + "logps/chosen": -0.0003613826702348888, + "logps/rejected": -1.915174961090088, + "loss": 0.4729, + "nll_loss": 0.11820431053638458, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.613826629589312e-05, + "rewards/margins": 0.191481351852417, + "rewards/rejected": -0.19151750206947327, + "step": 10169 + }, + { + "epoch": 7.033195020746888, + "grad_norm": 5.346506118774414, + "learning_rate": 1.648224988473951e-05, + "log_odds_chosen": 10.069296836853027, + "log_odds_ratio": -9.543122723698616e-05, + "logits/chosen": -0.3640506863594055, + "logits/rejected": -0.4045189917087555, + "logps/chosen": -0.0005423121619969606, + "logps/rejected": -1.622582197189331, + "loss": 0.7224, + "nll_loss": 0.18058858811855316, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.42312154721003e-05, + "rewards/margins": 0.16220398247241974, + "rewards/rejected": -0.16225820779800415, + "step": 10170 + }, + { + "epoch": 7.033886583679115, + "grad_norm": 6.405359745025635, + "learning_rate": 1.6478407868449363e-05, + "log_odds_chosen": 10.256173133850098, + "log_odds_ratio": -0.0005121605936437845, + "logits/chosen": -0.609381914138794, + "logits/rejected": -0.6050175428390503, + "logps/chosen": -0.0007455676095560193, + "logps/rejected": -2.217386484146118, + "loss": 0.4848, + "nll_loss": 0.12116114050149918, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.455676677636802e-05, + "rewards/margins": 0.22166410088539124, + "rewards/rejected": -0.22173866629600525, + "step": 10171 + }, + { + "epoch": 7.0345781466113415, + "grad_norm": 5.032310962677002, + "learning_rate": 1.6474565852159212e-05, + "log_odds_chosen": 10.876982688903809, + "log_odds_ratio": -5.83749933866784e-05, + "logits/chosen": 0.012026078999042511, + "logits/rejected": -0.10096706449985504, + "logps/chosen": -0.0006185060483403504, + "logps/rejected": -2.8352396488189697, + "loss": 0.5413, + "nll_loss": 0.1353198140859604, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1850601923652e-05, + "rewards/margins": 0.2834621071815491, + "rewards/rejected": -0.28352394700050354, + "step": 10172 + }, + { + "epoch": 7.035269709543568, + "grad_norm": 5.299025058746338, + "learning_rate": 1.6470723835869064e-05, + "log_odds_chosen": 10.806124687194824, + "log_odds_ratio": -7.67287565395236e-05, + "logits/chosen": -0.44665342569351196, + "logits/rejected": -0.4744335114955902, + "logps/chosen": -0.0004359095182735473, + "logps/rejected": -2.548654556274414, + "loss": 0.4792, + "nll_loss": 0.11978526413440704, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3590949644567445e-05, + "rewards/margins": 0.25482189655303955, + "rewards/rejected": -0.25486546754837036, + "step": 10173 + }, + { + "epoch": 7.035961272475795, + "grad_norm": 5.840847015380859, + "learning_rate": 1.6466881819578917e-05, + "log_odds_chosen": 10.712270736694336, + "log_odds_ratio": -4.2316467443015426e-05, + "logits/chosen": -0.6601865291595459, + "logits/rejected": -0.8111155033111572, + "logps/chosen": -0.0002043562853941694, + "logps/rejected": -1.841064214706421, + "loss": 0.524, + "nll_loss": 0.13099926710128784, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.04356292670127e-05, + "rewards/margins": 0.18408598005771637, + "rewards/rejected": -0.18410643935203552, + "step": 10174 + }, + { + "epoch": 7.036652835408022, + "grad_norm": 3.828860282897949, + "learning_rate": 1.6463039803288766e-05, + "log_odds_chosen": 10.708616256713867, + "log_odds_ratio": -0.0003266993153374642, + "logits/chosen": -0.48677772283554077, + "logits/rejected": -0.5339542031288147, + "logps/chosen": -0.00128236785531044, + "logps/rejected": -2.570948600769043, + "loss": 0.5406, + "nll_loss": 0.13511976599693298, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012823677388951182, + "rewards/margins": 0.25696662068367004, + "rewards/rejected": -0.2570948600769043, + "step": 10175 + }, + { + "epoch": 7.037344398340249, + "grad_norm": 5.5663065910339355, + "learning_rate": 1.6459197786998618e-05, + "log_odds_chosen": 11.677118301391602, + "log_odds_ratio": -1.1773870028264355e-05, + "logits/chosen": -0.16936692595481873, + "logits/rejected": -0.15656155347824097, + "logps/chosen": -8.260917093139142e-05, + "logps/rejected": -2.1766366958618164, + "loss": 1.1645, + "nll_loss": 0.29112347960472107, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.260917638835963e-06, + "rewards/margins": 0.21765542030334473, + "rewards/rejected": -0.21766367554664612, + "step": 10176 + }, + { + "epoch": 7.038035961272476, + "grad_norm": 5.215190887451172, + "learning_rate": 1.645535577070847e-05, + "log_odds_chosen": 10.833396911621094, + "log_odds_ratio": -4.95106796734035e-05, + "logits/chosen": -0.0681900605559349, + "logits/rejected": -0.17159458994865417, + "logps/chosen": -0.0002625317720230669, + "logps/rejected": -1.9759297370910645, + "loss": 0.6218, + "nll_loss": 0.15545254945755005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6253175747115165e-05, + "rewards/margins": 0.19756671786308289, + "rewards/rejected": -0.19759297370910645, + "step": 10177 + }, + { + "epoch": 7.0387275242047025, + "grad_norm": 3.8841283321380615, + "learning_rate": 1.645151375441832e-05, + "log_odds_chosen": 10.032281875610352, + "log_odds_ratio": -0.00029725898639298975, + "logits/chosen": 0.27079981565475464, + "logits/rejected": 0.08898486196994781, + "logps/chosen": -0.0007310167420655489, + "logps/rejected": -1.6904047727584839, + "loss": 0.3667, + "nll_loss": 0.09165007621049881, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.310167711693794e-05, + "rewards/margins": 0.1689673662185669, + "rewards/rejected": -0.1690404713153839, + "step": 10178 + }, + { + "epoch": 7.039419087136929, + "grad_norm": 6.194324970245361, + "learning_rate": 1.644767173812817e-05, + "log_odds_chosen": 10.229728698730469, + "log_odds_ratio": -0.00041000815690495074, + "logits/chosen": -0.050195012241601944, + "logits/rejected": -0.04101834073662758, + "logps/chosen": -0.0002562832087278366, + "logps/rejected": -1.5868102312088013, + "loss": 0.7313, + "nll_loss": 0.18277321755886078, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.56283201451879e-05, + "rewards/margins": 0.15865539014339447, + "rewards/rejected": -0.1586810201406479, + "step": 10179 + }, + { + "epoch": 7.040110650069156, + "grad_norm": 9.337830543518066, + "learning_rate": 1.644382972183802e-05, + "log_odds_chosen": 10.965694427490234, + "log_odds_ratio": -3.5539087548386306e-05, + "logits/chosen": -0.4667804539203644, + "logits/rejected": -0.5348884463310242, + "logps/chosen": -0.0005278678145259619, + "logps/rejected": -2.2736480236053467, + "loss": 0.4805, + "nll_loss": 0.12011906504631042, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.278678145259619e-05, + "rewards/margins": 0.22731202840805054, + "rewards/rejected": -0.22736480832099915, + "step": 10180 + }, + { + "epoch": 7.040802213001383, + "grad_norm": 6.488518714904785, + "learning_rate": 1.643998770554787e-05, + "log_odds_chosen": 11.179366111755371, + "log_odds_ratio": -0.00010157489305129275, + "logits/chosen": -0.4254930317401886, + "logits/rejected": -0.45213884115219116, + "logps/chosen": -0.0007714617531746626, + "logps/rejected": -3.35191011428833, + "loss": 0.4736, + "nll_loss": 0.11839261651039124, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.71461782278493e-05, + "rewards/margins": 0.33511388301849365, + "rewards/rejected": -0.335191011428833, + "step": 10181 + }, + { + "epoch": 7.04149377593361, + "grad_norm": 5.294634819030762, + "learning_rate": 1.6436145689257723e-05, + "log_odds_chosen": 12.10110092163086, + "log_odds_ratio": -1.710414289846085e-05, + "logits/chosen": -0.1707492619752884, + "logits/rejected": -0.2344096601009369, + "logps/chosen": -7.648633618373424e-05, + "logps/rejected": -2.405935049057007, + "loss": 0.366, + "nll_loss": 0.09150756150484085, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.648633982171305e-06, + "rewards/margins": 0.2405858337879181, + "rewards/rejected": -0.24059350788593292, + "step": 10182 + }, + { + "epoch": 7.042185338865837, + "grad_norm": 3.243549108505249, + "learning_rate": 1.6432303672967575e-05, + "log_odds_chosen": 11.382166862487793, + "log_odds_ratio": -2.876598409784492e-05, + "logits/chosen": -0.34457629919052124, + "logits/rejected": -0.40560418367385864, + "logps/chosen": -0.00015357055235654116, + "logps/rejected": -2.4820568561553955, + "loss": 0.3721, + "nll_loss": 0.09302125871181488, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5357054508058354e-05, + "rewards/margins": 0.24819032847881317, + "rewards/rejected": -0.24820569157600403, + "step": 10183 + }, + { + "epoch": 7.0428769017980635, + "grad_norm": 5.427514553070068, + "learning_rate": 1.6428461656677424e-05, + "log_odds_chosen": 11.672807693481445, + "log_odds_ratio": -3.7301670090528205e-05, + "logits/chosen": -0.5080470442771912, + "logits/rejected": -0.5370722413063049, + "logps/chosen": -0.0006107304943725467, + "logps/rejected": -2.9019529819488525, + "loss": 0.4418, + "nll_loss": 0.11045674979686737, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.107304943725467e-05, + "rewards/margins": 0.2901342511177063, + "rewards/rejected": -0.2901953160762787, + "step": 10184 + }, + { + "epoch": 7.04356846473029, + "grad_norm": 16.298444747924805, + "learning_rate": 1.6424619640387277e-05, + "log_odds_chosen": 11.56513786315918, + "log_odds_ratio": -1.4087101590121165e-05, + "logits/chosen": -0.9115073680877686, + "logits/rejected": -0.917153000831604, + "logps/chosen": -0.000252393918344751, + "logps/rejected": -2.4125871658325195, + "loss": 0.3834, + "nll_loss": 0.09584072232246399, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5239394744858146e-05, + "rewards/margins": 0.2412334680557251, + "rewards/rejected": -0.24125871062278748, + "step": 10185 + }, + { + "epoch": 7.044260027662517, + "grad_norm": 3.273277759552002, + "learning_rate": 1.642077762409713e-05, + "log_odds_chosen": 10.584732055664062, + "log_odds_ratio": -0.00015055287803988904, + "logits/chosen": -0.2822999656200409, + "logits/rejected": -0.29767704010009766, + "logps/chosen": -0.00020575344387907535, + "logps/rejected": -2.1561877727508545, + "loss": 0.4178, + "nll_loss": 0.10444684326648712, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0575344024109654e-05, + "rewards/margins": 0.2155982106924057, + "rewards/rejected": -0.2156187891960144, + "step": 10186 + }, + { + "epoch": 7.044951590594744, + "grad_norm": 3.3376383781433105, + "learning_rate": 1.6416935607806978e-05, + "log_odds_chosen": 10.354433059692383, + "log_odds_ratio": -7.408359670080245e-05, + "logits/chosen": -0.4512789845466614, + "logits/rejected": -0.47543883323669434, + "logps/chosen": -0.00016649517056066543, + "logps/rejected": -1.6402223110198975, + "loss": 0.4424, + "nll_loss": 0.11058825254440308, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.664951560087502e-05, + "rewards/margins": 0.1640055924654007, + "rewards/rejected": -0.16402223706245422, + "step": 10187 + }, + { + "epoch": 7.045643153526971, + "grad_norm": 6.444431304931641, + "learning_rate": 1.641309359151683e-05, + "log_odds_chosen": 11.011929512023926, + "log_odds_ratio": -6.39606369077228e-05, + "logits/chosen": -0.19220955669879913, + "logits/rejected": -0.30211764574050903, + "logps/chosen": -0.00017778460460249335, + "logps/rejected": -2.086287498474121, + "loss": 0.4065, + "nll_loss": 0.10161326825618744, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7778460460249335e-05, + "rewards/margins": 0.20861098170280457, + "rewards/rejected": -0.20862877368927002, + "step": 10188 + }, + { + "epoch": 7.046334716459198, + "grad_norm": 6.527580261230469, + "learning_rate": 1.640925157522668e-05, + "log_odds_chosen": 11.151822090148926, + "log_odds_ratio": -4.187340528005734e-05, + "logits/chosen": -0.0439603328704834, + "logits/rejected": -0.05829164385795593, + "logps/chosen": -0.00031163141829892993, + "logps/rejected": -2.4885408878326416, + "loss": 0.8264, + "nll_loss": 0.20658345520496368, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.116314474027604e-05, + "rewards/margins": 0.24882292747497559, + "rewards/rejected": -0.2488541156053543, + "step": 10189 + }, + { + "epoch": 7.0470262793914245, + "grad_norm": 4.903499603271484, + "learning_rate": 1.640540955893653e-05, + "log_odds_chosen": 12.155038833618164, + "log_odds_ratio": -1.0776170711324085e-05, + "logits/chosen": -0.21331267058849335, + "logits/rejected": -0.24484039843082428, + "logps/chosen": -0.00034775433596223593, + "logps/rejected": -3.4819412231445312, + "loss": 0.6091, + "nll_loss": 0.1522829681634903, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.477543577901088e-05, + "rewards/margins": 0.3481593430042267, + "rewards/rejected": -0.3481941223144531, + "step": 10190 + }, + { + "epoch": 7.047717842323651, + "grad_norm": 4.194298267364502, + "learning_rate": 1.640156754264638e-05, + "log_odds_chosen": 10.863759994506836, + "log_odds_ratio": -4.0062346670310944e-05, + "logits/chosen": -0.3379928767681122, + "logits/rejected": -0.4385606646537781, + "logps/chosen": -7.709265628363937e-05, + "logps/rejected": -1.4500436782836914, + "loss": 0.3493, + "nll_loss": 0.0873255506157875, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.709265446464997e-06, + "rewards/margins": 0.14499665796756744, + "rewards/rejected": -0.14500436186790466, + "step": 10191 + }, + { + "epoch": 7.048409405255878, + "grad_norm": 6.124948024749756, + "learning_rate": 1.6397725526356233e-05, + "log_odds_chosen": 10.520427703857422, + "log_odds_ratio": -4.4489057472674176e-05, + "logits/chosen": -0.28054559230804443, + "logits/rejected": -0.371385395526886, + "logps/chosen": -0.00035703781759366393, + "logps/rejected": -2.016751289367676, + "loss": 0.6267, + "nll_loss": 0.1566665768623352, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.570377884898335e-05, + "rewards/margins": 0.20163944363594055, + "rewards/rejected": -0.201675146818161, + "step": 10192 + }, + { + "epoch": 7.049100968188105, + "grad_norm": 11.454859733581543, + "learning_rate": 1.6393883510066083e-05, + "log_odds_chosen": 11.12508773803711, + "log_odds_ratio": -5.2118764870101586e-05, + "logits/chosen": -0.2627827525138855, + "logits/rejected": -0.270622193813324, + "logps/chosen": -0.0005954367807134986, + "logps/rejected": -2.7131738662719727, + "loss": 1.0948, + "nll_loss": 0.2737029492855072, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.954367952654138e-05, + "rewards/margins": 0.2712578773498535, + "rewards/rejected": -0.27131742238998413, + "step": 10193 + }, + { + "epoch": 7.049792531120332, + "grad_norm": 3.717924118041992, + "learning_rate": 1.6390041493775935e-05, + "log_odds_chosen": 10.924869537353516, + "log_odds_ratio": -0.00014482364349532872, + "logits/chosen": -0.5940892696380615, + "logits/rejected": -0.574979305267334, + "logps/chosen": -0.0002499162219464779, + "logps/rejected": -2.2350001335144043, + "loss": 0.3341, + "nll_loss": 0.08351359516382217, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4991619284264743e-05, + "rewards/margins": 0.22347500920295715, + "rewards/rejected": -0.22350001335144043, + "step": 10194 + }, + { + "epoch": 7.050484094052559, + "grad_norm": 3.923851490020752, + "learning_rate": 1.6386199477485787e-05, + "log_odds_chosen": 11.278277397155762, + "log_odds_ratio": -4.831477417610586e-05, + "logits/chosen": -0.6000710129737854, + "logits/rejected": -0.6552660465240479, + "logps/chosen": -0.00019233408966101706, + "logps/rejected": -2.471733570098877, + "loss": 0.4294, + "nll_loss": 0.10733603686094284, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9233411876484752e-05, + "rewards/margins": 0.2471541315317154, + "rewards/rejected": -0.24717335402965546, + "step": 10195 + }, + { + "epoch": 7.051175656984785, + "grad_norm": 6.779111862182617, + "learning_rate": 1.6382357461195636e-05, + "log_odds_chosen": 11.04921817779541, + "log_odds_ratio": -2.1953759642201476e-05, + "logits/chosen": -0.08587629348039627, + "logits/rejected": -0.17471766471862793, + "logps/chosen": -0.00020453102479223162, + "logps/rejected": -2.310608148574829, + "loss": 0.5807, + "nll_loss": 0.14518429338932037, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0453102479223162e-05, + "rewards/margins": 0.231040358543396, + "rewards/rejected": -0.23106080293655396, + "step": 10196 + }, + { + "epoch": 7.051867219917012, + "grad_norm": 3.915616035461426, + "learning_rate": 1.637851544490549e-05, + "log_odds_chosen": 10.468697547912598, + "log_odds_ratio": -0.0005837790085934103, + "logits/chosen": -0.5680649280548096, + "logits/rejected": -0.6020484566688538, + "logps/chosen": -0.0018268902786076069, + "logps/rejected": -2.0773544311523438, + "loss": 0.4922, + "nll_loss": 0.12299355119466782, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001826890220399946, + "rewards/margins": 0.20755276083946228, + "rewards/rejected": -0.20773544907569885, + "step": 10197 + }, + { + "epoch": 7.052558782849239, + "grad_norm": 7.033474922180176, + "learning_rate": 1.6374673428615338e-05, + "log_odds_chosen": 11.299308776855469, + "log_odds_ratio": -9.374999353894964e-05, + "logits/chosen": -0.5625525712966919, + "logits/rejected": -0.5659523606300354, + "logps/chosen": -0.00023602109286002815, + "logps/rejected": -2.5319390296936035, + "loss": 0.4682, + "nll_loss": 0.11702945828437805, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3602109649800695e-05, + "rewards/margins": 0.25317031145095825, + "rewards/rejected": -0.2531939148902893, + "step": 10198 + }, + { + "epoch": 7.053250345781466, + "grad_norm": 7.322216510772705, + "learning_rate": 1.6370831412325187e-05, + "log_odds_chosen": 10.925352096557617, + "log_odds_ratio": -7.212365017039701e-05, + "logits/chosen": -0.8103114366531372, + "logits/rejected": -0.8264681696891785, + "logps/chosen": -0.00025325504248030484, + "logps/rejected": -1.6794970035552979, + "loss": 0.428, + "nll_loss": 0.10699731111526489, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5325503884232603e-05, + "rewards/margins": 0.16792437434196472, + "rewards/rejected": -0.16794970631599426, + "step": 10199 + }, + { + "epoch": 7.053941908713693, + "grad_norm": 4.7112956047058105, + "learning_rate": 1.636698939603504e-05, + "log_odds_chosen": 11.207176208496094, + "log_odds_ratio": -5.083268115413375e-05, + "logits/chosen": -0.6839872002601624, + "logits/rejected": -0.7153400778770447, + "logps/chosen": -0.00024315901100635529, + "logps/rejected": -2.7372074127197266, + "loss": 0.3759, + "nll_loss": 0.09395814687013626, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4315902919624932e-05, + "rewards/margins": 0.2736964523792267, + "rewards/rejected": -0.27372077107429504, + "step": 10200 + }, + { + "epoch": 7.05463347164592, + "grad_norm": 5.1407904624938965, + "learning_rate": 1.6363147379744892e-05, + "log_odds_chosen": 11.010181427001953, + "log_odds_ratio": -3.795043448917568e-05, + "logits/chosen": -0.2906171381473541, + "logits/rejected": -0.34959375858306885, + "logps/chosen": -0.00033321738010272384, + "logps/rejected": -2.463109016418457, + "loss": 0.3027, + "nll_loss": 0.07567539811134338, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3321739465463907e-05, + "rewards/margins": 0.2462776005268097, + "rewards/rejected": -0.24631091952323914, + "step": 10201 + }, + { + "epoch": 7.055325034578146, + "grad_norm": 4.411509990692139, + "learning_rate": 1.635930536345474e-05, + "log_odds_chosen": 12.155475616455078, + "log_odds_ratio": -5.8786867157323286e-05, + "logits/chosen": -0.5480877161026001, + "logits/rejected": -0.6062612533569336, + "logps/chosen": -0.00023595246602781117, + "logps/rejected": -3.229804515838623, + "loss": 0.5152, + "nll_loss": 0.12880592048168182, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3595246602781117e-05, + "rewards/margins": 0.3229568302631378, + "rewards/rejected": -0.32298046350479126, + "step": 10202 + }, + { + "epoch": 7.056016597510373, + "grad_norm": 4.577481746673584, + "learning_rate": 1.6355463347164593e-05, + "log_odds_chosen": 10.382850646972656, + "log_odds_ratio": -7.819625170668587e-05, + "logits/chosen": 0.20893914997577667, + "logits/rejected": 0.13939368724822998, + "logps/chosen": -0.0005484464927576482, + "logps/rejected": -2.282536506652832, + "loss": 0.4111, + "nll_loss": 0.10276637226343155, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.48446478205733e-05, + "rewards/margins": 0.2281987965106964, + "rewards/rejected": -0.22825364768505096, + "step": 10203 + }, + { + "epoch": 7.0567081604426, + "grad_norm": 5.125157356262207, + "learning_rate": 1.6351621330874446e-05, + "log_odds_chosen": 10.655238151550293, + "log_odds_ratio": -8.667867223266512e-05, + "logits/chosen": -0.31201982498168945, + "logits/rejected": -0.3651972711086273, + "logps/chosen": -0.00029079418163746595, + "logps/rejected": -2.151477813720703, + "loss": 0.3703, + "nll_loss": 0.09255577623844147, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9079419618938118e-05, + "rewards/margins": 0.21511872112751007, + "rewards/rejected": -0.21514779329299927, + "step": 10204 + }, + { + "epoch": 7.057399723374827, + "grad_norm": 6.1740217208862305, + "learning_rate": 1.6347779314584295e-05, + "log_odds_chosen": 10.545979499816895, + "log_odds_ratio": -6.465271872002631e-05, + "logits/chosen": -0.45721954107284546, + "logits/rejected": -0.4858323931694031, + "logps/chosen": -0.00011708165402524173, + "logps/rejected": -1.588803768157959, + "loss": 0.3377, + "nll_loss": 0.08442966639995575, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1708165402524173e-05, + "rewards/margins": 0.158868670463562, + "rewards/rejected": -0.15888039767742157, + "step": 10205 + }, + { + "epoch": 7.058091286307054, + "grad_norm": 4.65233039855957, + "learning_rate": 1.6343937298294147e-05, + "log_odds_chosen": 10.411567687988281, + "log_odds_ratio": -8.02720314823091e-05, + "logits/chosen": -0.1786220222711563, + "logits/rejected": -0.19122397899627686, + "logps/chosen": -0.00019446434453129768, + "logps/rejected": -1.4409358501434326, + "loss": 0.3758, + "nll_loss": 0.09393958747386932, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9446433725534007e-05, + "rewards/margins": 0.14407414197921753, + "rewards/rejected": -0.1440935879945755, + "step": 10206 + }, + { + "epoch": 7.058782849239281, + "grad_norm": 5.362807273864746, + "learning_rate": 1.6340095282003996e-05, + "log_odds_chosen": 9.287012100219727, + "log_odds_ratio": -0.0006890307413414121, + "logits/chosen": -0.3114354610443115, + "logits/rejected": -0.3563784956932068, + "logps/chosen": -0.0005471562035381794, + "logps/rejected": -1.4092155694961548, + "loss": 0.7202, + "nll_loss": 0.17999312281608582, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.471562690217979e-05, + "rewards/margins": 0.14086684584617615, + "rewards/rejected": -0.14092156291007996, + "step": 10207 + }, + { + "epoch": 7.059474412171507, + "grad_norm": 23.543014526367188, + "learning_rate": 1.6336253265713845e-05, + "log_odds_chosen": 10.889178276062012, + "log_odds_ratio": -5.638160655507818e-05, + "logits/chosen": -0.07049977779388428, + "logits/rejected": 0.04252389073371887, + "logps/chosen": -0.0003565900551620871, + "logps/rejected": -2.31201171875, + "loss": 0.4739, + "nll_loss": 0.11845926195383072, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.565900988178328e-05, + "rewards/margins": 0.23116551339626312, + "rewards/rejected": -0.2312011569738388, + "step": 10208 + }, + { + "epoch": 7.060165975103734, + "grad_norm": 8.616909980773926, + "learning_rate": 1.6332411249423698e-05, + "log_odds_chosen": 10.404594421386719, + "log_odds_ratio": -0.00043424172326922417, + "logits/chosen": -0.3541746139526367, + "logits/rejected": -0.3857997953891754, + "logps/chosen": -0.0004474206070881337, + "logps/rejected": -2.0740795135498047, + "loss": 0.3256, + "nll_loss": 0.08135127276182175, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4742064346792176e-05, + "rewards/margins": 0.20736320316791534, + "rewards/rejected": -0.20740795135498047, + "step": 10209 + }, + { + "epoch": 7.060857538035961, + "grad_norm": 6.20536994934082, + "learning_rate": 1.632856923313355e-05, + "log_odds_chosen": 11.783140182495117, + "log_odds_ratio": -2.0506131477304734e-05, + "logits/chosen": -0.2444104254245758, + "logits/rejected": -0.2824934124946594, + "logps/chosen": -0.0010264317970722914, + "logps/rejected": -3.3801109790802, + "loss": 0.7628, + "nll_loss": 0.19070187211036682, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010264317825203761, + "rewards/margins": 0.33790841698646545, + "rewards/rejected": -0.33801108598709106, + "step": 10210 + }, + { + "epoch": 7.061549100968188, + "grad_norm": 5.094874858856201, + "learning_rate": 1.63247272168434e-05, + "log_odds_chosen": 11.312137603759766, + "log_odds_ratio": -7.084964454406872e-05, + "logits/chosen": -0.21776999533176422, + "logits/rejected": -0.26254236698150635, + "logps/chosen": -0.00019212032202631235, + "logps/rejected": -2.4793243408203125, + "loss": 0.6099, + "nll_loss": 0.1524657905101776, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9212033294024877e-05, + "rewards/margins": 0.24791322648525238, + "rewards/rejected": -0.24793241918087006, + "step": 10211 + }, + { + "epoch": 7.062240663900415, + "grad_norm": 5.680364608764648, + "learning_rate": 1.6320885200553252e-05, + "log_odds_chosen": 10.518997192382812, + "log_odds_ratio": -5.35045110154897e-05, + "logits/chosen": -0.6529715061187744, + "logits/rejected": -0.7397947907447815, + "logps/chosen": -0.0001638684479985386, + "logps/rejected": -1.8137354850769043, + "loss": 0.6617, + "nll_loss": 0.16541114449501038, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6386846255045384e-05, + "rewards/margins": 0.18135717511177063, + "rewards/rejected": -0.18137355148792267, + "step": 10212 + }, + { + "epoch": 7.0629322268326415, + "grad_norm": 8.546245574951172, + "learning_rate": 1.6317043184263104e-05, + "log_odds_chosen": 10.472393035888672, + "log_odds_ratio": -6.55086332699284e-05, + "logits/chosen": -0.2394629418849945, + "logits/rejected": -0.44569700956344604, + "logps/chosen": -0.0004339215811342001, + "logps/rejected": -2.0617246627807617, + "loss": 0.5088, + "nll_loss": 0.12718401849269867, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.339216320659034e-05, + "rewards/margins": 0.2061290442943573, + "rewards/rejected": -0.20617243647575378, + "step": 10213 + }, + { + "epoch": 7.063623789764868, + "grad_norm": 15.67087459564209, + "learning_rate": 1.6313201167972953e-05, + "log_odds_chosen": 10.652711868286133, + "log_odds_ratio": -0.00014126955647952855, + "logits/chosen": -0.45673927664756775, + "logits/rejected": -0.45390135049819946, + "logps/chosen": -0.0003136818122584373, + "logps/rejected": -2.313603162765503, + "loss": 0.5092, + "nll_loss": 0.12729515135288239, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.136818122584373e-05, + "rewards/margins": 0.23132893443107605, + "rewards/rejected": -0.2313603013753891, + "step": 10214 + }, + { + "epoch": 7.064315352697095, + "grad_norm": 7.342169284820557, + "learning_rate": 1.6309359151682806e-05, + "log_odds_chosen": 10.016858100891113, + "log_odds_ratio": -0.00016397902800235897, + "logits/chosen": -0.3828228712081909, + "logits/rejected": -0.45859494805336, + "logps/chosen": -0.0007054362213239074, + "logps/rejected": -2.5958151817321777, + "loss": 0.5443, + "nll_loss": 0.13604670763015747, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.054361776681617e-05, + "rewards/margins": 0.25951096415519714, + "rewards/rejected": -0.2595815062522888, + "step": 10215 + }, + { + "epoch": 7.065006915629322, + "grad_norm": 5.9624528884887695, + "learning_rate": 1.6305517135392655e-05, + "log_odds_chosen": 11.168617248535156, + "log_odds_ratio": -4.607412120094523e-05, + "logits/chosen": -0.8698675632476807, + "logits/rejected": -0.9820318222045898, + "logps/chosen": -0.0002429347368888557, + "logps/rejected": -2.4946818351745605, + "loss": 0.3995, + "nll_loss": 0.09987377375364304, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4293474780279212e-05, + "rewards/margins": 0.2494438886642456, + "rewards/rejected": -0.24946817755699158, + "step": 10216 + }, + { + "epoch": 7.065698478561549, + "grad_norm": 3.372307062149048, + "learning_rate": 1.6301675119102504e-05, + "log_odds_chosen": 10.415140151977539, + "log_odds_ratio": -6.47974229650572e-05, + "logits/chosen": -0.6113623976707458, + "logits/rejected": -0.6171808838844299, + "logps/chosen": -0.0001965291448868811, + "logps/rejected": -1.7590970993041992, + "loss": 0.2584, + "nll_loss": 0.06460408866405487, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.965291448868811e-05, + "rewards/margins": 0.17589005827903748, + "rewards/rejected": -0.17590972781181335, + "step": 10217 + }, + { + "epoch": 7.066390041493776, + "grad_norm": 2.9455246925354004, + "learning_rate": 1.6297833102812356e-05, + "log_odds_chosen": 10.359132766723633, + "log_odds_ratio": -8.454386261291802e-05, + "logits/chosen": -0.9020794034004211, + "logits/rejected": -0.8879674673080444, + "logps/chosen": -0.000216637272387743, + "logps/rejected": -1.8610130548477173, + "loss": 0.2557, + "nll_loss": 0.06390724331140518, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.16637272387743e-05, + "rewards/margins": 0.18607963621616364, + "rewards/rejected": -0.1861013025045395, + "step": 10218 + }, + { + "epoch": 7.0670816044260025, + "grad_norm": 8.130203247070312, + "learning_rate": 1.629399108652221e-05, + "log_odds_chosen": 9.983545303344727, + "log_odds_ratio": -0.00011885909043485299, + "logits/chosen": -0.5236350297927856, + "logits/rejected": -0.5692715644836426, + "logps/chosen": -0.0002489006146788597, + "logps/rejected": -1.7812128067016602, + "loss": 0.5579, + "nll_loss": 0.1394590139389038, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4890059648896568e-05, + "rewards/margins": 0.17809638381004333, + "rewards/rejected": -0.17812128365039825, + "step": 10219 + }, + { + "epoch": 7.067773167358229, + "grad_norm": 6.38986873626709, + "learning_rate": 1.6290149070232058e-05, + "log_odds_chosen": 9.96983814239502, + "log_odds_ratio": -0.0007384002674371004, + "logits/chosen": -0.3837193548679352, + "logits/rejected": -0.4665347933769226, + "logps/chosen": -0.0025215353816747665, + "logps/rejected": -2.4805798530578613, + "loss": 0.5105, + "nll_loss": 0.12754540145397186, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00025215354980900884, + "rewards/margins": 0.24780580401420593, + "rewards/rejected": -0.24805796146392822, + "step": 10220 + }, + { + "epoch": 7.068464730290456, + "grad_norm": 5.260932922363281, + "learning_rate": 1.628630705394191e-05, + "log_odds_chosen": 10.382405281066895, + "log_odds_ratio": -4.916860780213028e-05, + "logits/chosen": -0.5273264050483704, + "logits/rejected": -0.4013931453227997, + "logps/chosen": -0.00019109931599814445, + "logps/rejected": -1.818406581878662, + "loss": 0.562, + "nll_loss": 0.1404985934495926, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9109931599814445e-05, + "rewards/margins": 0.1818215399980545, + "rewards/rejected": -0.1818406581878662, + "step": 10221 + }, + { + "epoch": 7.069156293222683, + "grad_norm": 5.364531993865967, + "learning_rate": 1.6282465037651763e-05, + "log_odds_chosen": 10.817171096801758, + "log_odds_ratio": -5.73900033487007e-05, + "logits/chosen": -0.4159190058708191, + "logits/rejected": -0.45596784353256226, + "logps/chosen": -0.00012160463666077703, + "logps/rejected": -1.9424967765808105, + "loss": 0.4322, + "nll_loss": 0.10803662240505219, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2160464393673465e-05, + "rewards/margins": 0.19423751533031464, + "rewards/rejected": -0.19424967467784882, + "step": 10222 + }, + { + "epoch": 7.06984785615491, + "grad_norm": 5.511767387390137, + "learning_rate": 1.6278623021361612e-05, + "log_odds_chosen": 10.276840209960938, + "log_odds_ratio": -0.00043214912875555456, + "logits/chosen": -0.16192559897899628, + "logits/rejected": -0.21732264757156372, + "logps/chosen": -0.0008462379919365048, + "logps/rejected": -2.0555167198181152, + "loss": 0.6557, + "nll_loss": 0.16387483477592468, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.462379628326744e-05, + "rewards/margins": 0.20546706020832062, + "rewards/rejected": -0.20555168390274048, + "step": 10223 + }, + { + "epoch": 7.070539419087137, + "grad_norm": 11.26865291595459, + "learning_rate": 1.6274781005071464e-05, + "log_odds_chosen": 10.140848159790039, + "log_odds_ratio": -0.0001493426680099219, + "logits/chosen": -0.12188369035720825, + "logits/rejected": -0.08648045361042023, + "logps/chosen": -0.0004011866985820234, + "logps/rejected": -1.6165556907653809, + "loss": 0.5129, + "nll_loss": 0.12821908295154572, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.011866985820234e-05, + "rewards/margins": 0.16161544620990753, + "rewards/rejected": -0.16165557503700256, + "step": 10224 + }, + { + "epoch": 7.0712309820193635, + "grad_norm": 4.674397945404053, + "learning_rate": 1.6270938988781313e-05, + "log_odds_chosen": 11.605939865112305, + "log_odds_ratio": -1.6314776075887494e-05, + "logits/chosen": -0.18150296807289124, + "logits/rejected": -0.30134832859039307, + "logps/chosen": -0.00014632599777542055, + "logps/rejected": -2.1314470767974854, + "loss": 0.4547, + "nll_loss": 0.11368532478809357, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4632600141339935e-05, + "rewards/margins": 0.21313008666038513, + "rewards/rejected": -0.2131447196006775, + "step": 10225 + }, + { + "epoch": 7.07192254495159, + "grad_norm": 7.563007831573486, + "learning_rate": 1.6267096972491162e-05, + "log_odds_chosen": 11.143383979797363, + "log_odds_ratio": -6.769195169908926e-05, + "logits/chosen": -0.3891788125038147, + "logits/rejected": -0.6019364595413208, + "logps/chosen": -0.0005590927321463823, + "logps/rejected": -2.7319889068603516, + "loss": 0.5802, + "nll_loss": 0.14505285024642944, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5909269576659426e-05, + "rewards/margins": 0.2731429934501648, + "rewards/rejected": -0.2731989026069641, + "step": 10226 + }, + { + "epoch": 7.072614107883817, + "grad_norm": 4.823071002960205, + "learning_rate": 1.6263254956201015e-05, + "log_odds_chosen": 10.227241516113281, + "log_odds_ratio": -9.763806156115606e-05, + "logits/chosen": -0.6507863402366638, + "logits/rejected": -0.7303443551063538, + "logps/chosen": -0.00030584761407226324, + "logps/rejected": -1.9578144550323486, + "loss": 0.2998, + "nll_loss": 0.07494589686393738, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.058476067963056e-05, + "rewards/margins": 0.19575083255767822, + "rewards/rejected": -0.19578143954277039, + "step": 10227 + }, + { + "epoch": 7.073305670816044, + "grad_norm": 3.057354688644409, + "learning_rate": 1.6259412939910864e-05, + "log_odds_chosen": 10.67620849609375, + "log_odds_ratio": -0.00018128998635802418, + "logits/chosen": -0.650242030620575, + "logits/rejected": -0.6747527718544006, + "logps/chosen": -0.00023936937213875353, + "logps/rejected": -1.8818947076797485, + "loss": 0.3277, + "nll_loss": 0.08190518617630005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3936938305268995e-05, + "rewards/margins": 0.188165545463562, + "rewards/rejected": -0.18818946182727814, + "step": 10228 + }, + { + "epoch": 7.073997233748271, + "grad_norm": 6.393499851226807, + "learning_rate": 1.6255570923620716e-05, + "log_odds_chosen": 11.460598945617676, + "log_odds_ratio": -1.495017932029441e-05, + "logits/chosen": -0.4063599407672882, + "logits/rejected": -0.4028100371360779, + "logps/chosen": -0.00011225016351090744, + "logps/rejected": -2.349792957305908, + "loss": 0.4413, + "nll_loss": 0.11032424122095108, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1225016351090744e-05, + "rewards/margins": 0.23496806621551514, + "rewards/rejected": -0.2349792867898941, + "step": 10229 + }, + { + "epoch": 7.074688796680498, + "grad_norm": 6.394151210784912, + "learning_rate": 1.625172890733057e-05, + "log_odds_chosen": 11.290897369384766, + "log_odds_ratio": -2.093686634907499e-05, + "logits/chosen": -0.2111247032880783, + "logits/rejected": -0.30278387665748596, + "logps/chosen": -0.00013798041618429124, + "logps/rejected": -2.391096591949463, + "loss": 0.5643, + "nll_loss": 0.1410730928182602, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3798041436530184e-05, + "rewards/margins": 0.23909586668014526, + "rewards/rejected": -0.23910966515541077, + "step": 10230 + }, + { + "epoch": 7.0753803596127245, + "grad_norm": 5.258058071136475, + "learning_rate": 1.6247886891040418e-05, + "log_odds_chosen": 10.287449836730957, + "log_odds_ratio": -0.00016187971050385386, + "logits/chosen": -0.5530920028686523, + "logits/rejected": -0.6217649579048157, + "logps/chosen": -0.00041097920620813966, + "logps/rejected": -1.967341423034668, + "loss": 0.5306, + "nll_loss": 0.1326388567686081, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.109792644158006e-05, + "rewards/margins": 0.1966930329799652, + "rewards/rejected": -0.19673413038253784, + "step": 10231 + }, + { + "epoch": 7.076071922544951, + "grad_norm": 7.700748920440674, + "learning_rate": 1.624404487475027e-05, + "log_odds_chosen": 10.730502128601074, + "log_odds_ratio": -5.2122355555184186e-05, + "logits/chosen": -0.06191644072532654, + "logits/rejected": -0.14686648547649384, + "logps/chosen": -0.00042249378748238087, + "logps/rejected": -1.9075627326965332, + "loss": 0.3847, + "nll_loss": 0.09616245329380035, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.224937947583385e-05, + "rewards/margins": 0.1907140165567398, + "rewards/rejected": -0.19075626134872437, + "step": 10232 + }, + { + "epoch": 7.076763485477178, + "grad_norm": 6.837832927703857, + "learning_rate": 1.6240202858460123e-05, + "log_odds_chosen": 11.609643936157227, + "log_odds_ratio": -0.00018670190183911473, + "logits/chosen": 0.05244845151901245, + "logits/rejected": -0.07849866151809692, + "logps/chosen": -0.00014949383330531418, + "logps/rejected": -2.7908411026000977, + "loss": 0.8373, + "nll_loss": 0.20931746065616608, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.494938442192506e-05, + "rewards/margins": 0.279069185256958, + "rewards/rejected": -0.27908408641815186, + "step": 10233 + }, + { + "epoch": 7.077455048409405, + "grad_norm": 3.711728096008301, + "learning_rate": 1.6236360842169972e-05, + "log_odds_chosen": 11.082402229309082, + "log_odds_ratio": -3.26655208482407e-05, + "logits/chosen": -0.3518868684768677, + "logits/rejected": -0.37547898292541504, + "logps/chosen": -0.0003602092619985342, + "logps/rejected": -2.4345664978027344, + "loss": 0.6388, + "nll_loss": 0.15970079600811005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.602092692744918e-05, + "rewards/margins": 0.24342063069343567, + "rewards/rejected": -0.24345663189888, + "step": 10234 + }, + { + "epoch": 7.078146611341632, + "grad_norm": 7.973918914794922, + "learning_rate": 1.623251882587982e-05, + "log_odds_chosen": 10.20424747467041, + "log_odds_ratio": -0.00019141007214784622, + "logits/chosen": -0.6154760122299194, + "logits/rejected": -0.7376799583435059, + "logps/chosen": -0.0003875193651765585, + "logps/rejected": -1.942379355430603, + "loss": 0.5139, + "nll_loss": 0.1284589171409607, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8751939428038895e-05, + "rewards/margins": 0.19419917464256287, + "rewards/rejected": -0.19423794746398926, + "step": 10235 + }, + { + "epoch": 7.078838174273859, + "grad_norm": 4.550065040588379, + "learning_rate": 1.6228676809589673e-05, + "log_odds_chosen": 10.399216651916504, + "log_odds_ratio": -9.611922723706812e-05, + "logits/chosen": -0.11642065644264221, + "logits/rejected": -0.13028909265995026, + "logps/chosen": -0.0006603579386137426, + "logps/rejected": -2.431800603866577, + "loss": 0.4395, + "nll_loss": 0.10985739529132843, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.603579822694883e-05, + "rewards/margins": 0.24311403930187225, + "rewards/rejected": -0.2431800663471222, + "step": 10236 + }, + { + "epoch": 7.0795297372060855, + "grad_norm": 5.748769760131836, + "learning_rate": 1.6224834793299522e-05, + "log_odds_chosen": 10.851175308227539, + "log_odds_ratio": -0.00013820805179420859, + "logits/chosen": -0.08196417987346649, + "logits/rejected": -0.1969267725944519, + "logps/chosen": -0.0003377099637873471, + "logps/rejected": -2.160815715789795, + "loss": 0.4784, + "nll_loss": 0.11958113312721252, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3771000744309276e-05, + "rewards/margins": 0.21604779362678528, + "rewards/rejected": -0.21608155965805054, + "step": 10237 + }, + { + "epoch": 7.080221300138312, + "grad_norm": 4.433885097503662, + "learning_rate": 1.6220992777009375e-05, + "log_odds_chosen": 10.613290786743164, + "log_odds_ratio": -7.50662584323436e-05, + "logits/chosen": -0.6019273400306702, + "logits/rejected": -0.6181836128234863, + "logps/chosen": -0.0005927207530476153, + "logps/rejected": -2.4414610862731934, + "loss": 0.5776, + "nll_loss": 0.14439508318901062, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.927207166678272e-05, + "rewards/margins": 0.2440868616104126, + "rewards/rejected": -0.24414612352848053, + "step": 10238 + }, + { + "epoch": 7.080912863070539, + "grad_norm": 5.122983932495117, + "learning_rate": 1.6217150760719227e-05, + "log_odds_chosen": 10.881866455078125, + "log_odds_ratio": -6.110264075687155e-05, + "logits/chosen": -0.3635219931602478, + "logits/rejected": -0.38984811305999756, + "logps/chosen": -0.00017556434613652527, + "logps/rejected": -2.0348191261291504, + "loss": 0.4104, + "nll_loss": 0.10259927809238434, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7556434613652527e-05, + "rewards/margins": 0.2034643590450287, + "rewards/rejected": -0.20348191261291504, + "step": 10239 + }, + { + "epoch": 7.081604426002766, + "grad_norm": 3.281977891921997, + "learning_rate": 1.6213308744429076e-05, + "log_odds_chosen": 10.762151718139648, + "log_odds_ratio": -0.00011343157530063763, + "logits/chosen": -0.8903244733810425, + "logits/rejected": -0.8314093947410583, + "logps/chosen": -0.00030741217778995633, + "logps/rejected": -1.9653372764587402, + "loss": 0.5867, + "nll_loss": 0.14665445685386658, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.074121559620835e-05, + "rewards/margins": 0.19650298357009888, + "rewards/rejected": -0.1965337097644806, + "step": 10240 + }, + { + "epoch": 7.082295988934993, + "grad_norm": 5.201610088348389, + "learning_rate": 1.620946672813893e-05, + "log_odds_chosen": 11.167607307434082, + "log_odds_ratio": -4.033373625134118e-05, + "logits/chosen": -0.05866801738739014, + "logits/rejected": -0.07460634410381317, + "logps/chosen": -0.0003725805436260998, + "logps/rejected": -2.66947078704834, + "loss": 0.7783, + "nll_loss": 0.19457535445690155, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.725805436260998e-05, + "rewards/margins": 0.26690980792045593, + "rewards/rejected": -0.26694709062576294, + "step": 10241 + }, + { + "epoch": 7.08298755186722, + "grad_norm": 7.919643402099609, + "learning_rate": 1.620562471184878e-05, + "log_odds_chosen": 9.751909255981445, + "log_odds_ratio": -0.0007580799865536392, + "logits/chosen": -0.5962412357330322, + "logits/rejected": -0.565430223941803, + "logps/chosen": -0.0007312754751183093, + "logps/rejected": -1.5626683235168457, + "loss": 0.7561, + "nll_loss": 0.18895158171653748, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.312755042221397e-05, + "rewards/margins": 0.15619370341300964, + "rewards/rejected": -0.15626683831214905, + "step": 10242 + }, + { + "epoch": 7.0836791147994465, + "grad_norm": 5.059696197509766, + "learning_rate": 1.620178269555863e-05, + "log_odds_chosen": 10.337515830993652, + "log_odds_ratio": -0.00017638430290389806, + "logits/chosen": 0.14084503054618835, + "logits/rejected": 0.10576988756656647, + "logps/chosen": -0.0007075028261169791, + "logps/rejected": -2.468397617340088, + "loss": 0.6239, + "nll_loss": 0.155946746468544, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.075029134284705e-05, + "rewards/margins": 0.2467690110206604, + "rewards/rejected": -0.24683977663516998, + "step": 10243 + }, + { + "epoch": 7.084370677731673, + "grad_norm": 5.1300177574157715, + "learning_rate": 1.619794067926848e-05, + "log_odds_chosen": 12.081521034240723, + "log_odds_ratio": -1.1685681784001645e-05, + "logits/chosen": -0.21191151440143585, + "logits/rejected": -0.2935439348220825, + "logps/chosen": -8.348415576620027e-05, + "logps/rejected": -2.5798864364624023, + "loss": 0.5743, + "nll_loss": 0.14358538389205933, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.34841648611473e-06, + "rewards/margins": 0.2579803168773651, + "rewards/rejected": -0.25798869132995605, + "step": 10244 + }, + { + "epoch": 7.0850622406639, + "grad_norm": 5.458332538604736, + "learning_rate": 1.619409866297833e-05, + "log_odds_chosen": 10.951864242553711, + "log_odds_ratio": -2.5700946935103275e-05, + "logits/chosen": -0.5814685821533203, + "logits/rejected": -0.651351273059845, + "logps/chosen": -0.00015072792302817106, + "logps/rejected": -2.1194839477539062, + "loss": 0.4206, + "nll_loss": 0.10515782982110977, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5072793758008629e-05, + "rewards/margins": 0.21193332970142365, + "rewards/rejected": -0.21194839477539062, + "step": 10245 + }, + { + "epoch": 7.085753803596127, + "grad_norm": 4.95285177230835, + "learning_rate": 1.619025664668818e-05, + "log_odds_chosen": 10.801020622253418, + "log_odds_ratio": -0.00013282234431244433, + "logits/chosen": -0.41535109281539917, + "logits/rejected": -0.4948478937149048, + "logps/chosen": -0.00011315855226712301, + "logps/rejected": -2.1074440479278564, + "loss": 0.6761, + "nll_loss": 0.16900669038295746, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.131585486291442e-05, + "rewards/margins": 0.2107330858707428, + "rewards/rejected": -0.21074441075325012, + "step": 10246 + }, + { + "epoch": 7.086445366528354, + "grad_norm": 5.93438720703125, + "learning_rate": 1.6186414630398033e-05, + "log_odds_chosen": 10.71384048461914, + "log_odds_ratio": -0.0002663441700860858, + "logits/chosen": -0.3704487085342407, + "logits/rejected": -0.44287770986557007, + "logps/chosen": -0.000291989475954324, + "logps/rejected": -2.676551580429077, + "loss": 0.4612, + "nll_loss": 0.11527866125106812, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9198949050623924e-05, + "rewards/margins": 0.26762595772743225, + "rewards/rejected": -0.2676551640033722, + "step": 10247 + }, + { + "epoch": 7.087136929460581, + "grad_norm": 4.31528902053833, + "learning_rate": 1.6182572614107886e-05, + "log_odds_chosen": 11.076179504394531, + "log_odds_ratio": -7.725445175310597e-05, + "logits/chosen": -0.18510644137859344, + "logits/rejected": -0.2398597002029419, + "logps/chosen": -0.00028446520445868373, + "logps/rejected": -2.576986312866211, + "loss": 0.4202, + "nll_loss": 0.1050301194190979, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.844651862687897e-05, + "rewards/margins": 0.25767016410827637, + "rewards/rejected": -0.2576986253261566, + "step": 10248 + }, + { + "epoch": 7.087828492392807, + "grad_norm": 6.1143951416015625, + "learning_rate": 1.6178730597817735e-05, + "log_odds_chosen": 9.519444465637207, + "log_odds_ratio": -0.0008869217708706856, + "logits/chosen": -0.04826436936855316, + "logits/rejected": -0.23044337332248688, + "logps/chosen": -0.0009210369898937643, + "logps/rejected": -1.7885702848434448, + "loss": 0.6443, + "nll_loss": 0.16099432110786438, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.210369898937643e-05, + "rewards/margins": 0.1787649393081665, + "rewards/rejected": -0.17885704338550568, + "step": 10249 + }, + { + "epoch": 7.088520055325034, + "grad_norm": 5.239924430847168, + "learning_rate": 1.6174888581527587e-05, + "log_odds_chosen": 10.471399307250977, + "log_odds_ratio": -0.00048443872947245836, + "logits/chosen": -0.21016259491443634, + "logits/rejected": -0.25805291533470154, + "logps/chosen": -0.0004966690903529525, + "logps/rejected": -2.707772970199585, + "loss": 0.6466, + "nll_loss": 0.16160735487937927, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.966690903529525e-05, + "rewards/margins": 0.27072763442993164, + "rewards/rejected": -0.27077731490135193, + "step": 10250 + }, + { + "epoch": 7.089211618257261, + "grad_norm": 5.758553981781006, + "learning_rate": 1.617104656523744e-05, + "log_odds_chosen": 10.279219627380371, + "log_odds_ratio": -7.061962969601154e-05, + "logits/chosen": -0.4701883792877197, + "logits/rejected": -0.5953389406204224, + "logps/chosen": -0.0011390014551579952, + "logps/rejected": -2.49090838432312, + "loss": 0.6064, + "nll_loss": 0.15158718824386597, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011390014697099105, + "rewards/margins": 0.2489769607782364, + "rewards/rejected": -0.24909085035324097, + "step": 10251 + }, + { + "epoch": 7.089903181189488, + "grad_norm": 6.174829483032227, + "learning_rate": 1.616720454894729e-05, + "log_odds_chosen": 11.544928550720215, + "log_odds_ratio": -2.1656065655406564e-05, + "logits/chosen": -0.46109312772750854, + "logits/rejected": -0.3749909996986389, + "logps/chosen": -0.0003066678764298558, + "logps/rejected": -2.6392576694488525, + "loss": 0.4445, + "nll_loss": 0.11111783236265182, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.066678618779406e-05, + "rewards/margins": 0.26389509439468384, + "rewards/rejected": -0.2639257609844208, + "step": 10252 + }, + { + "epoch": 7.090594744121715, + "grad_norm": 5.161226272583008, + "learning_rate": 1.6163362532657138e-05, + "log_odds_chosen": 11.50408935546875, + "log_odds_ratio": -2.141688673873432e-05, + "logits/chosen": -0.36267930269241333, + "logits/rejected": -0.41441887617111206, + "logps/chosen": -9.26033389987424e-05, + "logps/rejected": -2.279778003692627, + "loss": 0.4567, + "nll_loss": 0.11417672038078308, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.260334991267882e-06, + "rewards/margins": 0.22796852886676788, + "rewards/rejected": -0.22797778248786926, + "step": 10253 + }, + { + "epoch": 7.091286307053942, + "grad_norm": 6.588757038116455, + "learning_rate": 1.615952051636699e-05, + "log_odds_chosen": 9.77501106262207, + "log_odds_ratio": -0.012199745513498783, + "logits/chosen": -0.7907540798187256, + "logits/rejected": -0.9560619592666626, + "logps/chosen": -0.006054680794477463, + "logps/rejected": -2.5418083667755127, + "loss": 0.9669, + "nll_loss": 0.24049539864063263, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006054680561646819, + "rewards/margins": 0.2535753846168518, + "rewards/rejected": -0.2541808485984802, + "step": 10254 + }, + { + "epoch": 7.091977869986168, + "grad_norm": 8.896109580993652, + "learning_rate": 1.615567850007684e-05, + "log_odds_chosen": 10.15814208984375, + "log_odds_ratio": -0.00021658647165168077, + "logits/chosen": -0.1415342092514038, + "logits/rejected": -0.12889420986175537, + "logps/chosen": -0.0009950262028723955, + "logps/rejected": -2.5493836402893066, + "loss": 0.6642, + "nll_loss": 0.16603264212608337, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.950262028723955e-05, + "rewards/margins": 0.25483888387680054, + "rewards/rejected": -0.25493836402893066, + "step": 10255 + }, + { + "epoch": 7.092669432918395, + "grad_norm": 5.284745693206787, + "learning_rate": 1.615183648378669e-05, + "log_odds_chosen": 10.789627075195312, + "log_odds_ratio": -5.579138814937323e-05, + "logits/chosen": -0.7399893999099731, + "logits/rejected": -0.7633627653121948, + "logps/chosen": -0.00013292254880070686, + "logps/rejected": -1.8104476928710938, + "loss": 0.4407, + "nll_loss": 0.11017127335071564, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3292254152474925e-05, + "rewards/margins": 0.1810314953327179, + "rewards/rejected": -0.1810447871685028, + "step": 10256 + }, + { + "epoch": 7.093360995850622, + "grad_norm": 5.26345682144165, + "learning_rate": 1.6147994467496544e-05, + "log_odds_chosen": 12.348129272460938, + "log_odds_ratio": -5.9028197938459925e-06, + "logits/chosen": -0.5816521644592285, + "logits/rejected": -0.6002523899078369, + "logps/chosen": -8.477355731884018e-05, + "logps/rejected": -2.757575273513794, + "loss": 0.5918, + "nll_loss": 0.1479586660861969, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.477356459479779e-06, + "rewards/margins": 0.2757490575313568, + "rewards/rejected": -0.2757575511932373, + "step": 10257 + }, + { + "epoch": 7.094052558782849, + "grad_norm": 4.62961483001709, + "learning_rate": 1.6144152451206393e-05, + "log_odds_chosen": 10.03465461730957, + "log_odds_ratio": -0.00032837854814715683, + "logits/chosen": -0.6499353647232056, + "logits/rejected": -0.5975565910339355, + "logps/chosen": -0.0003332508495077491, + "logps/rejected": -1.9092382192611694, + "loss": 0.4114, + "nll_loss": 0.10280890017747879, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.332508640596643e-05, + "rewards/margins": 0.19089049100875854, + "rewards/rejected": -0.190923810005188, + "step": 10258 + }, + { + "epoch": 7.094744121715076, + "grad_norm": 8.160360336303711, + "learning_rate": 1.6140310434916246e-05, + "log_odds_chosen": 11.132489204406738, + "log_odds_ratio": -4.48873033747077e-05, + "logits/chosen": -0.7189033627510071, + "logits/rejected": -0.7392956614494324, + "logps/chosen": -0.0008609866490587592, + "logps/rejected": -3.053264617919922, + "loss": 0.6704, + "nll_loss": 0.16760316491127014, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.609866927145049e-05, + "rewards/margins": 0.30524036288261414, + "rewards/rejected": -0.3053264617919922, + "step": 10259 + }, + { + "epoch": 7.095435684647303, + "grad_norm": 6.795961380004883, + "learning_rate": 1.6136468418626098e-05, + "log_odds_chosen": 11.607210159301758, + "log_odds_ratio": -1.796493597794324e-05, + "logits/chosen": -0.5604426860809326, + "logits/rejected": -0.5590736865997314, + "logps/chosen": -0.00020991811470594257, + "logps/rejected": -2.8993279933929443, + "loss": 0.5176, + "nll_loss": 0.12940140068531036, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0991810742998496e-05, + "rewards/margins": 0.28991180658340454, + "rewards/rejected": -0.28993281722068787, + "step": 10260 + }, + { + "epoch": 7.096127247579529, + "grad_norm": 6.2075910568237305, + "learning_rate": 1.6132626402335947e-05, + "log_odds_chosen": 9.902588844299316, + "log_odds_ratio": -0.0001481300569139421, + "logits/chosen": -0.29406067728996277, + "logits/rejected": -0.4131001830101013, + "logps/chosen": -0.000532010046299547, + "logps/rejected": -1.9056155681610107, + "loss": 0.5059, + "nll_loss": 0.1264542192220688, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.320100535755046e-05, + "rewards/margins": 0.19050836563110352, + "rewards/rejected": -0.19056154787540436, + "step": 10261 + }, + { + "epoch": 7.096818810511756, + "grad_norm": 4.122228145599365, + "learning_rate": 1.6128784386045796e-05, + "log_odds_chosen": 10.819419860839844, + "log_odds_ratio": -8.17267646198161e-05, + "logits/chosen": -0.08426457643508911, + "logits/rejected": -0.1672859489917755, + "logps/chosen": -0.0007713919621892273, + "logps/rejected": -2.8938684463500977, + "loss": 0.6276, + "nll_loss": 0.1568855345249176, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.713919330853969e-05, + "rewards/margins": 0.28930971026420593, + "rewards/rejected": -0.2893868684768677, + "step": 10262 + }, + { + "epoch": 7.097510373443983, + "grad_norm": 3.4906275272369385, + "learning_rate": 1.612494236975565e-05, + "log_odds_chosen": 10.980330467224121, + "log_odds_ratio": -0.0008700335747562349, + "logits/chosen": -0.34543806314468384, + "logits/rejected": -0.43487393856048584, + "logps/chosen": -0.004367163870483637, + "logps/rejected": -2.638749361038208, + "loss": 0.4312, + "nll_loss": 0.10771296173334122, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00043671642197296023, + "rewards/margins": 0.26343822479248047, + "rewards/rejected": -0.26387494802474976, + "step": 10263 + }, + { + "epoch": 7.09820193637621, + "grad_norm": 8.13571834564209, + "learning_rate": 1.6121100353465498e-05, + "log_odds_chosen": 10.953519821166992, + "log_odds_ratio": -2.4463508452754468e-05, + "logits/chosen": -0.7077839970588684, + "logits/rejected": -0.7161340713500977, + "logps/chosen": -0.00047470693243667483, + "logps/rejected": -2.5533576011657715, + "loss": 0.9703, + "nll_loss": 0.24257071316242218, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.747069760924205e-05, + "rewards/margins": 0.255288302898407, + "rewards/rejected": -0.2553357481956482, + "step": 10264 + }, + { + "epoch": 7.098893499308437, + "grad_norm": 4.885742664337158, + "learning_rate": 1.611725833717535e-05, + "log_odds_chosen": 11.693151473999023, + "log_odds_ratio": -2.9904567782068625e-05, + "logits/chosen": -0.14424534142017365, + "logits/rejected": -0.17319512367248535, + "logps/chosen": -0.00031677918741479516, + "logps/rejected": -3.179150342941284, + "loss": 0.4999, + "nll_loss": 0.12497483193874359, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1677918741479516e-05, + "rewards/margins": 0.31788334250450134, + "rewards/rejected": -0.31791502237319946, + "step": 10265 + }, + { + "epoch": 7.0995850622406635, + "grad_norm": 5.787229537963867, + "learning_rate": 1.6113416320885202e-05, + "log_odds_chosen": 10.765002250671387, + "log_odds_ratio": -0.00011930213076993823, + "logits/chosen": -0.38844630122184753, + "logits/rejected": -0.4450450837612152, + "logps/chosen": -0.00026077215443365276, + "logps/rejected": -1.7682405710220337, + "loss": 0.5353, + "nll_loss": 0.13381041586399078, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.607721762615256e-05, + "rewards/margins": 0.17679797112941742, + "rewards/rejected": -0.17682406306266785, + "step": 10266 + }, + { + "epoch": 7.10027662517289, + "grad_norm": 6.018062591552734, + "learning_rate": 1.610957430459505e-05, + "log_odds_chosen": 10.322380065917969, + "log_odds_ratio": -0.00014119291154202074, + "logits/chosen": -0.3337664008140564, + "logits/rejected": -0.29941171407699585, + "logps/chosen": -0.00019943459483329207, + "logps/rejected": -1.98537015914917, + "loss": 0.6841, + "nll_loss": 0.17101384699344635, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9943458028137684e-05, + "rewards/margins": 0.1985170841217041, + "rewards/rejected": -0.19853702187538147, + "step": 10267 + }, + { + "epoch": 7.100968188105117, + "grad_norm": 3.84755277633667, + "learning_rate": 1.6105732288304904e-05, + "log_odds_chosen": 10.52993392944336, + "log_odds_ratio": -3.607830876717344e-05, + "logits/chosen": -0.5763347744941711, + "logits/rejected": -0.6093184351921082, + "logps/chosen": -0.00024122398463077843, + "logps/rejected": -2.0790586471557617, + "loss": 0.458, + "nll_loss": 0.11449619382619858, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.412239700788632e-05, + "rewards/margins": 0.20788174867630005, + "rewards/rejected": -0.2079058736562729, + "step": 10268 + }, + { + "epoch": 7.101659751037344, + "grad_norm": 5.161709308624268, + "learning_rate": 1.6101890272014756e-05, + "log_odds_chosen": 11.676347732543945, + "log_odds_ratio": -1.4311031918623485e-05, + "logits/chosen": -0.5673502683639526, + "logits/rejected": -0.6082361936569214, + "logps/chosen": -8.296072337543592e-05, + "logps/rejected": -2.3757548332214355, + "loss": 0.5151, + "nll_loss": 0.12878523766994476, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.296072337543592e-06, + "rewards/margins": 0.23756720125675201, + "rewards/rejected": -0.23757551610469818, + "step": 10269 + }, + { + "epoch": 7.102351313969571, + "grad_norm": 3.7552056312561035, + "learning_rate": 1.6098048255724605e-05, + "log_odds_chosen": 11.910984992980957, + "log_odds_ratio": -8.620838343631476e-06, + "logits/chosen": -0.28908026218414307, + "logits/rejected": -0.4366893470287323, + "logps/chosen": -0.0001288650673814118, + "logps/rejected": -2.667642116546631, + "loss": 0.4266, + "nll_loss": 0.10665756464004517, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2886507647635881e-05, + "rewards/margins": 0.2667512893676758, + "rewards/rejected": -0.26676419377326965, + "step": 10270 + }, + { + "epoch": 7.103042876901798, + "grad_norm": 6.64044713973999, + "learning_rate": 1.6094206239434455e-05, + "log_odds_chosen": 10.219522476196289, + "log_odds_ratio": -0.00020672775281127542, + "logits/chosen": -0.31991302967071533, + "logits/rejected": -0.3143084943294525, + "logps/chosen": -0.0004434236034285277, + "logps/rejected": -2.1775803565979004, + "loss": 0.5875, + "nll_loss": 0.1468459665775299, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.434235961525701e-05, + "rewards/margins": 0.21771368384361267, + "rewards/rejected": -0.21775802969932556, + "step": 10271 + }, + { + "epoch": 7.1037344398340245, + "grad_norm": 5.8512678146362305, + "learning_rate": 1.6090364223144307e-05, + "log_odds_chosen": 11.630446434020996, + "log_odds_ratio": -1.6054920706665143e-05, + "logits/chosen": -0.19076719880104065, + "logits/rejected": -0.22760090231895447, + "logps/chosen": -0.00014438344805967063, + "logps/rejected": -2.5034735202789307, + "loss": 0.5637, + "nll_loss": 0.1409282684326172, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4438344805967063e-05, + "rewards/margins": 0.25033292174339294, + "rewards/rejected": -0.2503473460674286, + "step": 10272 + }, + { + "epoch": 7.104426002766251, + "grad_norm": 4.392897605895996, + "learning_rate": 1.6086522206854156e-05, + "log_odds_chosen": 9.212032318115234, + "log_odds_ratio": -0.0010728597408160567, + "logits/chosen": -0.24878910183906555, + "logits/rejected": -0.2609459459781647, + "logps/chosen": -0.0008410682203248143, + "logps/rejected": -1.4358794689178467, + "loss": 0.4094, + "nll_loss": 0.10223326086997986, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.410682494286448e-05, + "rewards/margins": 0.1435038298368454, + "rewards/rejected": -0.14358794689178467, + "step": 10273 + }, + { + "epoch": 7.105117565698478, + "grad_norm": 3.730031967163086, + "learning_rate": 1.608268019056401e-05, + "log_odds_chosen": 10.332513809204102, + "log_odds_ratio": -5.71914242755156e-05, + "logits/chosen": -0.1447673738002777, + "logits/rejected": -0.2496945858001709, + "logps/chosen": -0.0003094303538091481, + "logps/rejected": -2.1362357139587402, + "loss": 0.3658, + "nll_loss": 0.0914420485496521, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.094303610851057e-05, + "rewards/margins": 0.21359263360500336, + "rewards/rejected": -0.21362358331680298, + "step": 10274 + }, + { + "epoch": 7.105809128630705, + "grad_norm": 6.209634780883789, + "learning_rate": 1.607883817427386e-05, + "log_odds_chosen": 10.855377197265625, + "log_odds_ratio": -5.840754238306545e-05, + "logits/chosen": -0.11033168435096741, + "logits/rejected": -0.09126077592372894, + "logps/chosen": -0.0002168384671676904, + "logps/rejected": -1.9451351165771484, + "loss": 0.5188, + "nll_loss": 0.1296835094690323, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1683845261577517e-05, + "rewards/margins": 0.19449183344841003, + "rewards/rejected": -0.1945134997367859, + "step": 10275 + }, + { + "epoch": 7.106500691562932, + "grad_norm": 4.6433916091918945, + "learning_rate": 1.607499615798371e-05, + "log_odds_chosen": 12.053773880004883, + "log_odds_ratio": -1.450142099201912e-05, + "logits/chosen": -0.31673458218574524, + "logits/rejected": -0.35910022258758545, + "logps/chosen": -0.00022834296396467835, + "logps/rejected": -3.324519395828247, + "loss": 0.4187, + "nll_loss": 0.10467317700386047, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2834297851659358e-05, + "rewards/margins": 0.33242911100387573, + "rewards/rejected": -0.3324519097805023, + "step": 10276 + }, + { + "epoch": 7.107192254495159, + "grad_norm": 5.220605373382568, + "learning_rate": 1.6071154141693562e-05, + "log_odds_chosen": 10.488154411315918, + "log_odds_ratio": -0.000241068220930174, + "logits/chosen": -0.4683865010738373, + "logits/rejected": -0.3266890048980713, + "logps/chosen": -0.00121038977522403, + "logps/rejected": -2.3827860355377197, + "loss": 0.339, + "nll_loss": 0.08471442759037018, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012103898916393518, + "rewards/margins": 0.23815757036209106, + "rewards/rejected": -0.2382786124944687, + "step": 10277 + }, + { + "epoch": 7.1078838174273855, + "grad_norm": 4.905571460723877, + "learning_rate": 1.6067312125403415e-05, + "log_odds_chosen": 12.048296928405762, + "log_odds_ratio": -9.217043407261372e-05, + "logits/chosen": -0.3623006045818329, + "logits/rejected": -0.492048054933548, + "logps/chosen": -0.0002966909669339657, + "logps/rejected": -3.2547688484191895, + "loss": 0.9058, + "nll_loss": 0.22643068432807922, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9669095965800807e-05, + "rewards/margins": 0.3254472017288208, + "rewards/rejected": -0.32547685503959656, + "step": 10278 + }, + { + "epoch": 7.108575380359612, + "grad_norm": 5.757862091064453, + "learning_rate": 1.6063470109113264e-05, + "log_odds_chosen": 10.922185897827148, + "log_odds_ratio": -3.313596243970096e-05, + "logits/chosen": -0.6016491055488586, + "logits/rejected": -0.6293889284133911, + "logps/chosen": -0.00030946702463552356, + "logps/rejected": -2.4500162601470947, + "loss": 0.5111, + "nll_loss": 0.12777014076709747, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.094670319114812e-05, + "rewards/margins": 0.24497069418430328, + "rewards/rejected": -0.2450016438961029, + "step": 10279 + }, + { + "epoch": 7.109266943291839, + "grad_norm": 4.9893317222595215, + "learning_rate": 1.6059628092823116e-05, + "log_odds_chosen": 10.57332992553711, + "log_odds_ratio": -6.0701986512867734e-05, + "logits/chosen": -0.4107089638710022, + "logits/rejected": -0.475097119808197, + "logps/chosen": -0.00036204716889187694, + "logps/rejected": -2.0633039474487305, + "loss": 0.4285, + "nll_loss": 0.10711251944303513, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.620472125476226e-05, + "rewards/margins": 0.2062942087650299, + "rewards/rejected": -0.20633040368556976, + "step": 10280 + }, + { + "epoch": 7.109958506224066, + "grad_norm": 5.906787395477295, + "learning_rate": 1.6055786076532965e-05, + "log_odds_chosen": 11.538268089294434, + "log_odds_ratio": -1.9969585991930217e-05, + "logits/chosen": -0.601881742477417, + "logits/rejected": -0.6438528895378113, + "logps/chosen": -0.0001609336177352816, + "logps/rejected": -2.604485511779785, + "loss": 0.5312, + "nll_loss": 0.13279756903648376, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.609336322871968e-05, + "rewards/margins": 0.26043248176574707, + "rewards/rejected": -0.2604485750198364, + "step": 10281 + }, + { + "epoch": 7.110650069156293, + "grad_norm": 5.751000881195068, + "learning_rate": 1.6051944060242814e-05, + "log_odds_chosen": 11.557856559753418, + "log_odds_ratio": -2.7186484658159316e-05, + "logits/chosen": -0.4481320381164551, + "logits/rejected": -0.5386602282524109, + "logps/chosen": -0.00025518867187201977, + "logps/rejected": -2.9913394451141357, + "loss": 0.55, + "nll_loss": 0.13750889897346497, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5518867914797738e-05, + "rewards/margins": 0.29910844564437866, + "rewards/rejected": -0.29913395643234253, + "step": 10282 + }, + { + "epoch": 7.11134163208852, + "grad_norm": 5.170167446136475, + "learning_rate": 1.6048102043952667e-05, + "log_odds_chosen": 10.355810165405273, + "log_odds_ratio": -0.0001789717498468235, + "logits/chosen": -0.5239746570587158, + "logits/rejected": -0.5420616269111633, + "logps/chosen": -0.00022885017096996307, + "logps/rejected": -1.945081114768982, + "loss": 0.82, + "nll_loss": 0.20499102771282196, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.288501855218783e-05, + "rewards/margins": 0.19448521733283997, + "rewards/rejected": -0.19450810551643372, + "step": 10283 + }, + { + "epoch": 7.1120331950207465, + "grad_norm": 4.432968616485596, + "learning_rate": 1.604426002766252e-05, + "log_odds_chosen": 9.272736549377441, + "log_odds_ratio": -0.00031625264091417193, + "logits/chosen": -0.7568486332893372, + "logits/rejected": -0.714116632938385, + "logps/chosen": -0.0004361242463346571, + "logps/rejected": -1.4550877809524536, + "loss": 0.3678, + "nll_loss": 0.09192757308483124, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3612428271444514e-05, + "rewards/margins": 0.1454651653766632, + "rewards/rejected": -0.1455087810754776, + "step": 10284 + }, + { + "epoch": 7.112724757952973, + "grad_norm": 4.4855780601501465, + "learning_rate": 1.604041801137237e-05, + "log_odds_chosen": 10.794032096862793, + "log_odds_ratio": -5.445224087452516e-05, + "logits/chosen": -0.3360009789466858, + "logits/rejected": -0.3737177848815918, + "logps/chosen": -0.0001795999560272321, + "logps/rejected": -2.0326905250549316, + "loss": 0.4603, + "nll_loss": 0.1150742843747139, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.795999560272321e-05, + "rewards/margins": 0.20325109362602234, + "rewards/rejected": -0.20326903462409973, + "step": 10285 + }, + { + "epoch": 7.1134163208852, + "grad_norm": 3.667229413986206, + "learning_rate": 1.603657599508222e-05, + "log_odds_chosen": 11.386724472045898, + "log_odds_ratio": -3.736492362804711e-05, + "logits/chosen": -0.4324737787246704, + "logits/rejected": -0.5414674282073975, + "logps/chosen": -0.0001199167309096083, + "logps/rejected": -1.8028628826141357, + "loss": 0.3823, + "nll_loss": 0.09556451439857483, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1991674909950234e-05, + "rewards/margins": 0.18027429282665253, + "rewards/rejected": -0.18028628826141357, + "step": 10286 + }, + { + "epoch": 7.114107883817427, + "grad_norm": 5.564022064208984, + "learning_rate": 1.6032733978792073e-05, + "log_odds_chosen": 10.44112491607666, + "log_odds_ratio": -0.0002731723652686924, + "logits/chosen": -0.8399361371994019, + "logits/rejected": -0.8326637744903564, + "logps/chosen": -0.0007418487221002579, + "logps/rejected": -2.643864631652832, + "loss": 0.8054, + "nll_loss": 0.20133022964000702, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.418487075483426e-05, + "rewards/margins": 0.2643122673034668, + "rewards/rejected": -0.26438647508621216, + "step": 10287 + }, + { + "epoch": 7.114799446749654, + "grad_norm": 3.6635069847106934, + "learning_rate": 1.6028891962501922e-05, + "log_odds_chosen": 10.54667854309082, + "log_odds_ratio": -0.0004861929046455771, + "logits/chosen": -0.4404884874820709, + "logits/rejected": -0.3213157057762146, + "logps/chosen": -0.0008983593434095383, + "logps/rejected": -2.263770818710327, + "loss": 0.5492, + "nll_loss": 0.13726304471492767, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.983593579614535e-05, + "rewards/margins": 0.22628724575042725, + "rewards/rejected": -0.22637708485126495, + "step": 10288 + }, + { + "epoch": 7.115491009681881, + "grad_norm": 5.660526275634766, + "learning_rate": 1.6025049946211775e-05, + "log_odds_chosen": 10.557701110839844, + "log_odds_ratio": -3.345799632370472e-05, + "logits/chosen": -0.3213052749633789, + "logits/rejected": -0.21575619280338287, + "logps/chosen": -0.0002533650549594313, + "logps/rejected": -2.217205047607422, + "loss": 0.6923, + "nll_loss": 0.17308278381824493, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.533650513214525e-05, + "rewards/margins": 0.2216951549053192, + "rewards/rejected": -0.22172048687934875, + "step": 10289 + }, + { + "epoch": 7.1161825726141075, + "grad_norm": 4.457588195800781, + "learning_rate": 1.6021207929921624e-05, + "log_odds_chosen": 11.590757369995117, + "log_odds_ratio": -2.638949081301689e-05, + "logits/chosen": -0.11472570896148682, + "logits/rejected": -0.2207712084054947, + "logps/chosen": -0.0001405734510626644, + "logps/rejected": -2.4990475177764893, + "loss": 0.5276, + "nll_loss": 0.1318967044353485, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4057346561457962e-05, + "rewards/margins": 0.24989071488380432, + "rewards/rejected": -0.24990476667881012, + "step": 10290 + }, + { + "epoch": 7.116874135546334, + "grad_norm": 3.319014549255371, + "learning_rate": 1.6017365913631473e-05, + "log_odds_chosen": 11.62279987335205, + "log_odds_ratio": -5.7544584706192836e-05, + "logits/chosen": -0.7007091045379639, + "logits/rejected": -0.7249524593353271, + "logps/chosen": -0.00015849883493501693, + "logps/rejected": -2.691718101501465, + "loss": 0.3129, + "nll_loss": 0.07820774614810944, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.584988240210805e-05, + "rewards/margins": 0.26915597915649414, + "rewards/rejected": -0.269171804189682, + "step": 10291 + }, + { + "epoch": 7.117565698478561, + "grad_norm": 4.47081184387207, + "learning_rate": 1.6013523897341325e-05, + "log_odds_chosen": 11.697754859924316, + "log_odds_ratio": -7.944389653857797e-05, + "logits/chosen": -0.3712863326072693, + "logits/rejected": -0.46870869398117065, + "logps/chosen": -0.0001674926606938243, + "logps/rejected": -2.9090423583984375, + "loss": 0.4161, + "nll_loss": 0.10401224344968796, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6749265341786668e-05, + "rewards/margins": 0.2908874750137329, + "rewards/rejected": -0.2909042239189148, + "step": 10292 + }, + { + "epoch": 7.118257261410788, + "grad_norm": 6.830644607543945, + "learning_rate": 1.6009681881051174e-05, + "log_odds_chosen": 11.319398880004883, + "log_odds_ratio": -5.532346403924748e-05, + "logits/chosen": 0.03190721571445465, + "logits/rejected": -0.02680405229330063, + "logps/chosen": -0.0006970899412408471, + "logps/rejected": -3.399998188018799, + "loss": 0.6686, + "nll_loss": 0.16714029014110565, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.970899266889319e-05, + "rewards/margins": 0.33993011713027954, + "rewards/rejected": -0.339999794960022, + "step": 10293 + }, + { + "epoch": 7.118948824343015, + "grad_norm": 6.158021450042725, + "learning_rate": 1.6005839864761027e-05, + "log_odds_chosen": 10.816229820251465, + "log_odds_ratio": -6.230578583199531e-05, + "logits/chosen": 0.07101868093013763, + "logits/rejected": -0.0015200749039649963, + "logps/chosen": -0.0005694905994459987, + "logps/rejected": -2.118673801422119, + "loss": 0.4078, + "nll_loss": 0.10194122791290283, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.694906576536596e-05, + "rewards/margins": 0.2118104249238968, + "rewards/rejected": -0.21186739206314087, + "step": 10294 + }, + { + "epoch": 7.119640387275242, + "grad_norm": 3.2136785984039307, + "learning_rate": 1.600199784847088e-05, + "log_odds_chosen": 10.907499313354492, + "log_odds_ratio": -0.00022041947522666305, + "logits/chosen": -0.7990951538085938, + "logits/rejected": -0.8849613666534424, + "logps/chosen": -9.993871935876086e-05, + "logps/rejected": -2.044731378555298, + "loss": 0.6505, + "nll_loss": 0.16259539127349854, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.993871572078206e-06, + "rewards/margins": 0.2044631540775299, + "rewards/rejected": -0.20447313785552979, + "step": 10295 + }, + { + "epoch": 7.1203319502074685, + "grad_norm": 4.888383388519287, + "learning_rate": 1.599815583218073e-05, + "log_odds_chosen": 10.031580924987793, + "log_odds_ratio": -0.0007512776064686477, + "logits/chosen": -0.46712109446525574, + "logits/rejected": -0.6181036829948425, + "logps/chosen": -0.0013126321136951447, + "logps/rejected": -2.666856288909912, + "loss": 0.9919, + "nll_loss": 0.2479080706834793, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013126322301104665, + "rewards/margins": 0.2665543854236603, + "rewards/rejected": -0.2666856348514557, + "step": 10296 + }, + { + "epoch": 7.121023513139695, + "grad_norm": 5.787823677062988, + "learning_rate": 1.599431381589058e-05, + "log_odds_chosen": 11.687092781066895, + "log_odds_ratio": -9.246945410268381e-06, + "logits/chosen": -0.2057281881570816, + "logits/rejected": -0.18899966776371002, + "logps/chosen": -9.951591346180066e-05, + "logps/rejected": -2.4338736534118652, + "loss": 0.5648, + "nll_loss": 0.1412111520767212, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.951590982382186e-06, + "rewards/margins": 0.2433774322271347, + "rewards/rejected": -0.243387371301651, + "step": 10297 + }, + { + "epoch": 7.121715076071922, + "grad_norm": 5.652944564819336, + "learning_rate": 1.5990471799600433e-05, + "log_odds_chosen": 10.583305358886719, + "log_odds_ratio": -0.0001832281268434599, + "logits/chosen": -0.5304673314094543, + "logits/rejected": -0.4824105203151703, + "logps/chosen": -0.00040059618186205626, + "logps/rejected": -2.164419174194336, + "loss": 0.5758, + "nll_loss": 0.143941730260849, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0059618186205626e-05, + "rewards/margins": 0.2164018452167511, + "rewards/rejected": -0.21644189953804016, + "step": 10298 + }, + { + "epoch": 7.122406639004149, + "grad_norm": 14.913167953491211, + "learning_rate": 1.5986629783310282e-05, + "log_odds_chosen": 11.30325984954834, + "log_odds_ratio": -0.00014173545059747994, + "logits/chosen": -0.09313886612653732, + "logits/rejected": -0.17511555552482605, + "logps/chosen": -0.00034270616015419364, + "logps/rejected": -2.881847620010376, + "loss": 0.5357, + "nll_loss": 0.13390406966209412, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.42706152878236e-05, + "rewards/margins": 0.28815048933029175, + "rewards/rejected": -0.2881847620010376, + "step": 10299 + }, + { + "epoch": 7.123098201936376, + "grad_norm": 3.5610148906707764, + "learning_rate": 1.598278776702013e-05, + "log_odds_chosen": 10.192537307739258, + "log_odds_ratio": -0.00013249927724245936, + "logits/chosen": -0.8154253959655762, + "logits/rejected": -0.7605419158935547, + "logps/chosen": -0.0002521907154005021, + "logps/rejected": -1.9220904111862183, + "loss": 0.3911, + "nll_loss": 0.0977524071931839, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.521907299524173e-05, + "rewards/margins": 0.19218380749225616, + "rewards/rejected": -0.19220903515815735, + "step": 10300 + }, + { + "epoch": 7.123789764868603, + "grad_norm": 10.971964836120605, + "learning_rate": 1.5978945750729984e-05, + "log_odds_chosen": 10.807840347290039, + "log_odds_ratio": -0.0001476934558013454, + "logits/chosen": -0.6921945810317993, + "logits/rejected": -0.7799077033996582, + "logps/chosen": -0.0018123986665159464, + "logps/rejected": -2.6817121505737305, + "loss": 0.7212, + "nll_loss": 0.18029022216796875, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001812398841138929, + "rewards/margins": 0.26798999309539795, + "rewards/rejected": -0.2681712210178375, + "step": 10301 + }, + { + "epoch": 7.124481327800829, + "grad_norm": 4.785852909088135, + "learning_rate": 1.5975103734439833e-05, + "log_odds_chosen": 12.113910675048828, + "log_odds_ratio": -2.486845369276125e-05, + "logits/chosen": -0.6913008093833923, + "logits/rejected": -0.7212645411491394, + "logps/chosen": -0.0002334258460905403, + "logps/rejected": -2.9632599353790283, + "loss": 0.5422, + "nll_loss": 0.13555222749710083, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3342583517660387e-05, + "rewards/margins": 0.2963026762008667, + "rewards/rejected": -0.29632601141929626, + "step": 10302 + }, + { + "epoch": 7.125172890733056, + "grad_norm": 6.334150314331055, + "learning_rate": 1.5971261718149685e-05, + "log_odds_chosen": 10.088814735412598, + "log_odds_ratio": -0.00018660105706658214, + "logits/chosen": -0.2547610104084015, + "logits/rejected": -0.330096960067749, + "logps/chosen": -0.00034176313783973455, + "logps/rejected": -2.0302841663360596, + "loss": 0.5072, + "nll_loss": 0.1267801821231842, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.417631523916498e-05, + "rewards/margins": 0.2029942274093628, + "rewards/rejected": -0.20302842557430267, + "step": 10303 + }, + { + "epoch": 7.125864453665283, + "grad_norm": 13.262726783752441, + "learning_rate": 1.5967419701859538e-05, + "log_odds_chosen": 10.863768577575684, + "log_odds_ratio": -3.665126860141754e-05, + "logits/chosen": -0.33151307702064514, + "logits/rejected": -0.3889313042163849, + "logps/chosen": -0.00023698012228123844, + "logps/rejected": -1.9521350860595703, + "loss": 0.7971, + "nll_loss": 0.19927959144115448, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.369801040913444e-05, + "rewards/margins": 0.19518983364105225, + "rewards/rejected": -0.19521352648735046, + "step": 10304 + }, + { + "epoch": 7.12655601659751, + "grad_norm": 5.214549541473389, + "learning_rate": 1.5963577685569387e-05, + "log_odds_chosen": 11.219606399536133, + "log_odds_ratio": -3.0332190362969413e-05, + "logits/chosen": -0.4732060432434082, + "logits/rejected": -0.5152763724327087, + "logps/chosen": -0.0002018004743149504, + "logps/rejected": -2.71527099609375, + "loss": 0.4732, + "nll_loss": 0.11829044669866562, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0180048522888683e-05, + "rewards/margins": 0.2715069353580475, + "rewards/rejected": -0.27152711153030396, + "step": 10305 + }, + { + "epoch": 7.127247579529737, + "grad_norm": 7.065393924713135, + "learning_rate": 1.595973566927924e-05, + "log_odds_chosen": 9.460151672363281, + "log_odds_ratio": -0.00042650566319935024, + "logits/chosen": -0.312082976102829, + "logits/rejected": -0.3087159991264343, + "logps/chosen": -0.0018372769700363278, + "logps/rejected": -1.5538609027862549, + "loss": 0.4244, + "nll_loss": 0.10606159269809723, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018372769409324974, + "rewards/margins": 0.15520235896110535, + "rewards/rejected": -0.1553860902786255, + "step": 10306 + }, + { + "epoch": 7.127939142461964, + "grad_norm": 7.4752936363220215, + "learning_rate": 1.595589365298909e-05, + "log_odds_chosen": 11.011021614074707, + "log_odds_ratio": -5.5785545555409044e-05, + "logits/chosen": -0.34302428364753723, + "logits/rejected": -0.4065769910812378, + "logps/chosen": -0.00016706316091585904, + "logps/rejected": -1.9993393421173096, + "loss": 0.4911, + "nll_loss": 0.12276819348335266, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6706317182979546e-05, + "rewards/margins": 0.19991722702980042, + "rewards/rejected": -0.1999339461326599, + "step": 10307 + }, + { + "epoch": 7.12863070539419, + "grad_norm": 6.967989921569824, + "learning_rate": 1.595205163669894e-05, + "log_odds_chosen": 10.802072525024414, + "log_odds_ratio": -7.645039295312017e-05, + "logits/chosen": -0.4464913308620453, + "logits/rejected": -0.402243435382843, + "logps/chosen": -0.00021770509192720056, + "logps/rejected": -2.035642385482788, + "loss": 0.3735, + "nll_loss": 0.09336201846599579, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1770507373730652e-05, + "rewards/margins": 0.20354247093200684, + "rewards/rejected": -0.20356424152851105, + "step": 10308 + }, + { + "epoch": 7.129322268326418, + "grad_norm": 5.5367751121521, + "learning_rate": 1.594820962040879e-05, + "log_odds_chosen": 10.833320617675781, + "log_odds_ratio": -6.608067633351311e-05, + "logits/chosen": -0.42877280712127686, + "logits/rejected": -0.40322345495224, + "logps/chosen": -0.00040209069265984, + "logps/rejected": -2.2934837341308594, + "loss": 0.4413, + "nll_loss": 0.11032700538635254, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.020907363155857e-05, + "rewards/margins": 0.22930817306041718, + "rewards/rejected": -0.22934837639331818, + "step": 10309 + }, + { + "epoch": 7.130013831258645, + "grad_norm": 4.261756420135498, + "learning_rate": 1.5944367604118642e-05, + "log_odds_chosen": 10.189852714538574, + "log_odds_ratio": -0.00017108373867813498, + "logits/chosen": -0.4927106201648712, + "logits/rejected": -0.5972291231155396, + "logps/chosen": -0.00038456491893157363, + "logps/rejected": -1.7242774963378906, + "loss": 0.4285, + "nll_loss": 0.10710548609495163, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.845649189315736e-05, + "rewards/margins": 0.17238929867744446, + "rewards/rejected": -0.17242774367332458, + "step": 10310 + }, + { + "epoch": 7.130705394190872, + "grad_norm": 6.028716564178467, + "learning_rate": 1.594052558782849e-05, + "log_odds_chosen": 10.36422348022461, + "log_odds_ratio": -0.00011556592653505504, + "logits/chosen": 0.011183492839336395, + "logits/rejected": 0.023617416620254517, + "logps/chosen": -0.005435407627373934, + "logps/rejected": -2.5519256591796875, + "loss": 0.6331, + "nll_loss": 0.1582653671503067, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005435406928882003, + "rewards/margins": 0.2546490430831909, + "rewards/rejected": -0.2551925480365753, + "step": 10311 + }, + { + "epoch": 7.131396957123099, + "grad_norm": 4.063992500305176, + "learning_rate": 1.5936683571538344e-05, + "log_odds_chosen": 10.359947204589844, + "log_odds_ratio": -4.255682142684236e-05, + "logits/chosen": -0.08217864483594894, + "logits/rejected": -0.14698439836502075, + "logps/chosen": -0.00041026753024198115, + "logps/rejected": -1.7657188177108765, + "loss": 0.3317, + "nll_loss": 0.08291476964950562, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.102674938621931e-05, + "rewards/margins": 0.1765308529138565, + "rewards/rejected": -0.17657186090946198, + "step": 10312 + }, + { + "epoch": 7.1320885200553255, + "grad_norm": 5.988481044769287, + "learning_rate": 1.5932841555248196e-05, + "log_odds_chosen": 10.727018356323242, + "log_odds_ratio": -7.427345553878695e-05, + "logits/chosen": -0.011402279138565063, + "logits/rejected": -0.15633204579353333, + "logps/chosen": -0.0004745680489577353, + "logps/rejected": -2.16426944732666, + "loss": 0.5971, + "nll_loss": 0.14926651120185852, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7456807806156576e-05, + "rewards/margins": 0.21637949347496033, + "rewards/rejected": -0.21642693877220154, + "step": 10313 + }, + { + "epoch": 7.132780082987552, + "grad_norm": 3.8203372955322266, + "learning_rate": 1.5928999538958045e-05, + "log_odds_chosen": 9.118133544921875, + "log_odds_ratio": -0.0004423003119882196, + "logits/chosen": -0.334600031375885, + "logits/rejected": -0.35691797733306885, + "logps/chosen": -0.0002994161914102733, + "logps/rejected": -0.8771684169769287, + "loss": 0.4588, + "nll_loss": 0.11465974897146225, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.994161877722945e-05, + "rewards/margins": 0.08768689632415771, + "rewards/rejected": -0.08771683275699615, + "step": 10314 + }, + { + "epoch": 7.133471645919779, + "grad_norm": 4.283903121948242, + "learning_rate": 1.5925157522667898e-05, + "log_odds_chosen": 10.53400993347168, + "log_odds_ratio": -0.0004266462055966258, + "logits/chosen": 0.029743246734142303, + "logits/rejected": 0.004639193415641785, + "logps/chosen": -0.0015510256635025144, + "logps/rejected": -2.293443441390991, + "loss": 0.5751, + "nll_loss": 0.1437395066022873, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001551025634398684, + "rewards/margins": 0.22918926179409027, + "rewards/rejected": -0.22934435307979584, + "step": 10315 + }, + { + "epoch": 7.134163208852006, + "grad_norm": 3.7874436378479004, + "learning_rate": 1.592131550637775e-05, + "log_odds_chosen": 10.79223346710205, + "log_odds_ratio": -0.0005515572265721858, + "logits/chosen": 0.022121310234069824, + "logits/rejected": -0.11769037693738937, + "logps/chosen": -0.0011315718293190002, + "logps/rejected": -2.303229808807373, + "loss": 0.3958, + "nll_loss": 0.09889844059944153, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011315719166304916, + "rewards/margins": 0.2302098125219345, + "rewards/rejected": -0.2303229719400406, + "step": 10316 + }, + { + "epoch": 7.134854771784233, + "grad_norm": 5.332455635070801, + "learning_rate": 1.59174734900876e-05, + "log_odds_chosen": 11.952607154846191, + "log_odds_ratio": -1.06203833638574e-05, + "logits/chosen": -0.041954405605793, + "logits/rejected": -0.03453304246068001, + "logps/chosen": -9.113582200370729e-05, + "logps/rejected": -2.6752333641052246, + "loss": 0.5962, + "nll_loss": 0.14904029667377472, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.113581654673908e-06, + "rewards/margins": 0.2675142288208008, + "rewards/rejected": -0.2675233483314514, + "step": 10317 + }, + { + "epoch": 7.13554633471646, + "grad_norm": 4.75240421295166, + "learning_rate": 1.5913631473797448e-05, + "log_odds_chosen": 10.119691848754883, + "log_odds_ratio": -0.00010590528836473823, + "logits/chosen": -0.41260311007499695, + "logits/rejected": -0.41198790073394775, + "logps/chosen": -0.0002040062245214358, + "logps/rejected": -1.4755959510803223, + "loss": 0.4137, + "nll_loss": 0.10340912640094757, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0400624634930864e-05, + "rewards/margins": 0.1475391983985901, + "rewards/rejected": -0.14755958318710327, + "step": 10318 + }, + { + "epoch": 7.136237897648686, + "grad_norm": 14.519569396972656, + "learning_rate": 1.59097894575073e-05, + "log_odds_chosen": 9.824604034423828, + "log_odds_ratio": -0.00032375051523558795, + "logits/chosen": -0.025557734072208405, + "logits/rejected": -0.07144501060247421, + "logps/chosen": -0.0012760567478835583, + "logps/rejected": -2.2134809494018555, + "loss": 0.7126, + "nll_loss": 0.1781070977449417, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012760567187797278, + "rewards/margins": 0.2212204784154892, + "rewards/rejected": -0.22134807705879211, + "step": 10319 + }, + { + "epoch": 7.136929460580913, + "grad_norm": 5.569480895996094, + "learning_rate": 1.590594744121715e-05, + "log_odds_chosen": 10.440262794494629, + "log_odds_ratio": -0.00048077639075927436, + "logits/chosen": -0.16568979620933533, + "logits/rejected": -0.1728629171848297, + "logps/chosen": -0.000473564286949113, + "logps/rejected": -1.734022855758667, + "loss": 0.4548, + "nll_loss": 0.11366012692451477, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.735643233289011e-05, + "rewards/margins": 0.17335493862628937, + "rewards/rejected": -0.17340229451656342, + "step": 10320 + }, + { + "epoch": 7.13762102351314, + "grad_norm": 11.692529678344727, + "learning_rate": 1.5902105424927002e-05, + "log_odds_chosen": 10.846525192260742, + "log_odds_ratio": -9.60138495429419e-05, + "logits/chosen": -0.25794142484664917, + "logits/rejected": -0.28674668073654175, + "logps/chosen": -0.0003197805490344763, + "logps/rejected": -1.4928094148635864, + "loss": 0.2902, + "nll_loss": 0.07254131138324738, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.197805563104339e-05, + "rewards/margins": 0.14924898743629456, + "rewards/rejected": -0.14928095042705536, + "step": 10321 + }, + { + "epoch": 7.138312586445367, + "grad_norm": 5.813745021820068, + "learning_rate": 1.5898263408636855e-05, + "log_odds_chosen": 11.401609420776367, + "log_odds_ratio": -1.9921841158065945e-05, + "logits/chosen": -0.3643776774406433, + "logits/rejected": -0.4122941493988037, + "logps/chosen": -0.00016441484331153333, + "logps/rejected": -2.542574167251587, + "loss": 0.4553, + "nll_loss": 0.11381081491708755, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.644148323975969e-05, + "rewards/margins": 0.2542409896850586, + "rewards/rejected": -0.2542574107646942, + "step": 10322 + }, + { + "epoch": 7.139004149377594, + "grad_norm": 6.33383846282959, + "learning_rate": 1.5894421392346704e-05, + "log_odds_chosen": 10.774675369262695, + "log_odds_ratio": -5.105520540382713e-05, + "logits/chosen": -0.5525538921356201, + "logits/rejected": -0.6430279612541199, + "logps/chosen": -7.996350177563727e-05, + "logps/rejected": -1.6820995807647705, + "loss": 0.3662, + "nll_loss": 0.091536745429039, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.996350177563727e-06, + "rewards/margins": 0.1682019829750061, + "rewards/rejected": -0.168209969997406, + "step": 10323 + }, + { + "epoch": 7.139695712309821, + "grad_norm": 12.23645305633545, + "learning_rate": 1.5890579376056556e-05, + "log_odds_chosen": 11.197294235229492, + "log_odds_ratio": -0.0001597453810973093, + "logits/chosen": -0.5753327012062073, + "logits/rejected": -0.5307286977767944, + "logps/chosen": -0.000334289507009089, + "logps/rejected": -2.763484001159668, + "loss": 0.4819, + "nll_loss": 0.12045233696699142, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.34289507009089e-05, + "rewards/margins": 0.27631497383117676, + "rewards/rejected": -0.27634841203689575, + "step": 10324 + }, + { + "epoch": 7.140387275242047, + "grad_norm": 4.146999359130859, + "learning_rate": 1.588673735976641e-05, + "log_odds_chosen": 11.96800422668457, + "log_odds_ratio": -8.39845415612217e-06, + "logits/chosen": -0.3274807035923004, + "logits/rejected": -0.3796335756778717, + "logps/chosen": -0.00016001489711925387, + "logps/rejected": -2.7158827781677246, + "loss": 0.5385, + "nll_loss": 0.13462276756763458, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.600149153091479e-05, + "rewards/margins": 0.2715722620487213, + "rewards/rejected": -0.2715882658958435, + "step": 10325 + }, + { + "epoch": 7.141078838174274, + "grad_norm": 4.407366752624512, + "learning_rate": 1.5882895343476258e-05, + "log_odds_chosen": 10.556142807006836, + "log_odds_ratio": -0.00011441886454122141, + "logits/chosen": -0.592565655708313, + "logits/rejected": -0.5965545773506165, + "logps/chosen": -0.0001350560487480834, + "logps/rejected": -1.7966774702072144, + "loss": 0.5097, + "nll_loss": 0.12740643322467804, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3505605238606222e-05, + "rewards/margins": 0.17965424060821533, + "rewards/rejected": -0.17966774106025696, + "step": 10326 + }, + { + "epoch": 7.141770401106501, + "grad_norm": 13.011528968811035, + "learning_rate": 1.5879053327186107e-05, + "log_odds_chosen": 10.92127799987793, + "log_odds_ratio": -4.325023473938927e-05, + "logits/chosen": -0.7639882564544678, + "logits/rejected": -0.845071017742157, + "logps/chosen": -0.00012478558346629143, + "logps/rejected": -1.907325029373169, + "loss": 0.4904, + "nll_loss": 0.12258665263652802, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2478559256123845e-05, + "rewards/margins": 0.19072003662586212, + "rewards/rejected": -0.19073250889778137, + "step": 10327 + }, + { + "epoch": 7.142461964038728, + "grad_norm": 3.8526790142059326, + "learning_rate": 1.587521131089596e-05, + "log_odds_chosen": 11.049694061279297, + "log_odds_ratio": -7.972231833264232e-05, + "logits/chosen": -0.5567727088928223, + "logits/rejected": -0.6450908780097961, + "logps/chosen": -0.00020438033971004188, + "logps/rejected": -2.268096446990967, + "loss": 0.369, + "nll_loss": 0.0922321155667305, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.043803578999359e-05, + "rewards/margins": 0.226789191365242, + "rewards/rejected": -0.22680962085723877, + "step": 10328 + }, + { + "epoch": 7.143153526970955, + "grad_norm": 3.450875997543335, + "learning_rate": 1.5871369294605808e-05, + "log_odds_chosen": 11.664745330810547, + "log_odds_ratio": -1.371507823932916e-05, + "logits/chosen": -0.36245304346084595, + "logits/rejected": -0.3094356060028076, + "logps/chosen": -0.00013537345512304455, + "logps/rejected": -2.370499849319458, + "loss": 0.4018, + "nll_loss": 0.1004365012049675, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3537345694203395e-05, + "rewards/margins": 0.23703645169734955, + "rewards/rejected": -0.23704996705055237, + "step": 10329 + }, + { + "epoch": 7.143845089903182, + "grad_norm": 8.461017608642578, + "learning_rate": 1.586752727831566e-05, + "log_odds_chosen": 11.80744743347168, + "log_odds_ratio": -4.3963693315163255e-05, + "logits/chosen": -0.02379719167947769, + "logits/rejected": -0.04182916879653931, + "logps/chosen": -0.0002002610854106024, + "logps/rejected": -2.7238898277282715, + "loss": 0.5228, + "nll_loss": 0.1306859254837036, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0026109268656e-05, + "rewards/margins": 0.2723689675331116, + "rewards/rejected": -0.2723889648914337, + "step": 10330 + }, + { + "epoch": 7.144536652835408, + "grad_norm": 7.45040225982666, + "learning_rate": 1.5863685262025513e-05, + "log_odds_chosen": 10.639909744262695, + "log_odds_ratio": -0.00013313893578015268, + "logits/chosen": -0.27051666378974915, + "logits/rejected": -0.4297066628932953, + "logps/chosen": -0.00021471580839715898, + "logps/rejected": -2.028541088104248, + "loss": 0.835, + "nll_loss": 0.20874357223510742, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1471580112120137e-05, + "rewards/margins": 0.2028326392173767, + "rewards/rejected": -0.20285411179065704, + "step": 10331 + }, + { + "epoch": 7.145228215767635, + "grad_norm": 4.30554723739624, + "learning_rate": 1.5859843245735362e-05, + "log_odds_chosen": 11.11909008026123, + "log_odds_ratio": -3.873251625918783e-05, + "logits/chosen": -0.6348779201507568, + "logits/rejected": -0.6722233891487122, + "logps/chosen": -0.00025545392418280244, + "logps/rejected": -2.312321186065674, + "loss": 0.3027, + "nll_loss": 0.075670525431633, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5545392418280244e-05, + "rewards/margins": 0.23120658099651337, + "rewards/rejected": -0.23123212158679962, + "step": 10332 + }, + { + "epoch": 7.145919778699862, + "grad_norm": 10.649109840393066, + "learning_rate": 1.5856001229445215e-05, + "log_odds_chosen": 10.8757905960083, + "log_odds_ratio": -3.358142203069292e-05, + "logits/chosen": -0.7790817022323608, + "logits/rejected": -0.7131824493408203, + "logps/chosen": -0.00020699258311651647, + "logps/rejected": -2.3343687057495117, + "loss": 0.4288, + "nll_loss": 0.10719355940818787, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.069925903924741e-05, + "rewards/margins": 0.23341616988182068, + "rewards/rejected": -0.23343686759471893, + "step": 10333 + }, + { + "epoch": 7.146611341632089, + "grad_norm": 4.358206748962402, + "learning_rate": 1.5852159213155067e-05, + "log_odds_chosen": 10.780325889587402, + "log_odds_ratio": -0.00010636411025188863, + "logits/chosen": -0.373515248298645, + "logits/rejected": -0.3184809386730194, + "logps/chosen": -0.00040083954809233546, + "logps/rejected": -2.1831705570220947, + "loss": 0.4458, + "nll_loss": 0.11144562065601349, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.008395626442507e-05, + "rewards/margins": 0.2182769775390625, + "rewards/rejected": -0.21831706166267395, + "step": 10334 + }, + { + "epoch": 7.147302904564316, + "grad_norm": 7.291419982910156, + "learning_rate": 1.5848317196864916e-05, + "log_odds_chosen": 10.505504608154297, + "log_odds_ratio": -0.0003409209894016385, + "logits/chosen": -0.6493133306503296, + "logits/rejected": -0.6722878217697144, + "logps/chosen": -0.0006983771454542875, + "logps/rejected": -2.949965000152588, + "loss": 0.6972, + "nll_loss": 0.1742691695690155, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.983771163504571e-05, + "rewards/margins": 0.2949266731739044, + "rewards/rejected": -0.2949965000152588, + "step": 10335 + }, + { + "epoch": 7.1479944674965425, + "grad_norm": 4.698943614959717, + "learning_rate": 1.5844475180574765e-05, + "log_odds_chosen": 10.346242904663086, + "log_odds_ratio": -0.00013529814896173775, + "logits/chosen": -0.5392444133758545, + "logits/rejected": -0.6504898071289062, + "logps/chosen": -0.00023310561664402485, + "logps/rejected": -1.646261215209961, + "loss": 0.5007, + "nll_loss": 0.12517225742340088, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.331056384718977e-05, + "rewards/margins": 0.16460281610488892, + "rewards/rejected": -0.1646261215209961, + "step": 10336 + }, + { + "epoch": 7.148686030428769, + "grad_norm": 4.3376688957214355, + "learning_rate": 1.5840633164284617e-05, + "log_odds_chosen": 9.543367385864258, + "log_odds_ratio": -0.00017217869753949344, + "logits/chosen": -0.21007820963859558, + "logits/rejected": -0.23069339990615845, + "logps/chosen": -0.00039433487108908594, + "logps/rejected": -1.671665906906128, + "loss": 0.8096, + "nll_loss": 0.20237885415554047, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.943348565371707e-05, + "rewards/margins": 0.16712714731693268, + "rewards/rejected": -0.1671665757894516, + "step": 10337 + }, + { + "epoch": 7.149377593360996, + "grad_norm": 5.843807220458984, + "learning_rate": 1.5836791147994467e-05, + "log_odds_chosen": 11.126989364624023, + "log_odds_ratio": -2.9735761927440763e-05, + "logits/chosen": -0.7239865660667419, + "logits/rejected": -0.7809531688690186, + "logps/chosen": -0.00020430656149983406, + "logps/rejected": -2.331878423690796, + "loss": 0.4957, + "nll_loss": 0.12391365319490433, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0430656149983406e-05, + "rewards/margins": 0.23316740989685059, + "rewards/rejected": -0.23318785429000854, + "step": 10338 + }, + { + "epoch": 7.150069156293223, + "grad_norm": 6.848789215087891, + "learning_rate": 1.583294913170432e-05, + "log_odds_chosen": 10.448689460754395, + "log_odds_ratio": -7.409107638522983e-05, + "logits/chosen": -0.3858215808868408, + "logits/rejected": -0.5062464475631714, + "logps/chosen": -0.00018067244673147798, + "logps/rejected": -1.631630539894104, + "loss": 0.4478, + "nll_loss": 0.11194244772195816, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8067245036945678e-05, + "rewards/margins": 0.1631450057029724, + "rewards/rejected": -0.16316306591033936, + "step": 10339 + }, + { + "epoch": 7.15076071922545, + "grad_norm": 4.909574031829834, + "learning_rate": 1.582910711541417e-05, + "log_odds_chosen": 9.875921249389648, + "log_odds_ratio": -0.00047237356193363667, + "logits/chosen": -0.16309067606925964, + "logits/rejected": -0.14102210104465485, + "logps/chosen": -0.0003869852516800165, + "logps/rejected": -1.511296272277832, + "loss": 0.4299, + "nll_loss": 0.10743023455142975, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8698526623193175e-05, + "rewards/margins": 0.15109093487262726, + "rewards/rejected": -0.15112963318824768, + "step": 10340 + }, + { + "epoch": 7.151452282157677, + "grad_norm": 4.363992691040039, + "learning_rate": 1.582526509912402e-05, + "log_odds_chosen": 10.739774703979492, + "log_odds_ratio": -0.00010132478200830519, + "logits/chosen": -0.3702893853187561, + "logits/rejected": -0.39425715804100037, + "logps/chosen": -0.0003061680472455919, + "logps/rejected": -2.24922776222229, + "loss": 0.4192, + "nll_loss": 0.10479110479354858, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0616803996963426e-05, + "rewards/margins": 0.22489216923713684, + "rewards/rejected": -0.2249227911233902, + "step": 10341 + }, + { + "epoch": 7.1521438450899035, + "grad_norm": 6.391323566436768, + "learning_rate": 1.5821423082833873e-05, + "log_odds_chosen": 10.767119407653809, + "log_odds_ratio": -5.2425250032683834e-05, + "logits/chosen": -0.312757283449173, + "logits/rejected": -0.3822609782218933, + "logps/chosen": -0.00017840521468315274, + "logps/rejected": -1.8881677389144897, + "loss": 0.516, + "nll_loss": 0.1289961338043213, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7840520740719512e-05, + "rewards/margins": 0.1887989342212677, + "rewards/rejected": -0.18881677091121674, + "step": 10342 + }, + { + "epoch": 7.15283540802213, + "grad_norm": 7.592628002166748, + "learning_rate": 1.5817581066543725e-05, + "log_odds_chosen": 11.997628211975098, + "log_odds_ratio": -1.2196329407743178e-05, + "logits/chosen": -0.21336635947227478, + "logits/rejected": -0.20982897281646729, + "logps/chosen": -0.00013047002721577883, + "logps/rejected": -2.9994723796844482, + "loss": 0.5806, + "nll_loss": 0.14516031742095947, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3047003449173644e-05, + "rewards/margins": 0.29993414878845215, + "rewards/rejected": -0.29994723200798035, + "step": 10343 + }, + { + "epoch": 7.153526970954357, + "grad_norm": 4.593406677246094, + "learning_rate": 1.5813739050253574e-05, + "log_odds_chosen": 10.125865936279297, + "log_odds_ratio": -0.0003525286738295108, + "logits/chosen": -0.13861218094825745, + "logits/rejected": -0.08278912305831909, + "logps/chosen": -0.00023694118135608733, + "logps/rejected": -1.7349421977996826, + "loss": 0.645, + "nll_loss": 0.16122090816497803, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3694119590800256e-05, + "rewards/margins": 0.17347052693367004, + "rewards/rejected": -0.17349421977996826, + "step": 10344 + }, + { + "epoch": 7.154218533886584, + "grad_norm": 5.003382205963135, + "learning_rate": 1.5809897033963423e-05, + "log_odds_chosen": 10.470653533935547, + "log_odds_ratio": -0.00014362734509631991, + "logits/chosen": 0.15004980564117432, + "logits/rejected": 0.009387247264385223, + "logps/chosen": -0.0001740518637234345, + "logps/rejected": -2.018296241760254, + "loss": 0.5705, + "nll_loss": 0.1426095813512802, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7405187463737093e-05, + "rewards/margins": 0.20181220769882202, + "rewards/rejected": -0.20182962715625763, + "step": 10345 + }, + { + "epoch": 7.154910096818811, + "grad_norm": 5.691940784454346, + "learning_rate": 1.5806055017673276e-05, + "log_odds_chosen": 10.687768936157227, + "log_odds_ratio": -0.0005019446252845228, + "logits/chosen": 0.009011678397655487, + "logits/rejected": -0.029282599687576294, + "logps/chosen": -0.00031394592951983213, + "logps/rejected": -2.420288562774658, + "loss": 0.5861, + "nll_loss": 0.14647966623306274, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1394592951983213e-05, + "rewards/margins": 0.24199745059013367, + "rewards/rejected": -0.2420288622379303, + "step": 10346 + }, + { + "epoch": 7.155601659751038, + "grad_norm": 5.003124713897705, + "learning_rate": 1.5802213001383125e-05, + "log_odds_chosen": 10.676345825195312, + "log_odds_ratio": -9.651888103689998e-05, + "logits/chosen": -0.3004814684391022, + "logits/rejected": -0.3764978349208832, + "logps/chosen": -9.466239862376824e-05, + "logps/rejected": -1.6661527156829834, + "loss": 0.7067, + "nll_loss": 0.1766619235277176, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.466239134781063e-06, + "rewards/margins": 0.16660580039024353, + "rewards/rejected": -0.16661526262760162, + "step": 10347 + }, + { + "epoch": 7.1562932226832645, + "grad_norm": 5.460468292236328, + "learning_rate": 1.5798370985092977e-05, + "log_odds_chosen": 10.289358139038086, + "log_odds_ratio": -0.00028913281857967377, + "logits/chosen": -0.4978940486907959, + "logits/rejected": -0.45821619033813477, + "logps/chosen": -0.0014770093839615583, + "logps/rejected": -2.626573324203491, + "loss": 0.6286, + "nll_loss": 0.1571165919303894, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014770095003768802, + "rewards/margins": 0.26250964403152466, + "rewards/rejected": -0.2626573443412781, + "step": 10348 + }, + { + "epoch": 7.156984785615491, + "grad_norm": 3.845489501953125, + "learning_rate": 1.579452896880283e-05, + "log_odds_chosen": 10.857307434082031, + "log_odds_ratio": -3.6831996112596244e-05, + "logits/chosen": -0.703261137008667, + "logits/rejected": -0.7102363705635071, + "logps/chosen": -0.00014799721247982234, + "logps/rejected": -1.7273801565170288, + "loss": 0.4403, + "nll_loss": 0.11005987226963043, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4799722521274816e-05, + "rewards/margins": 0.17272323369979858, + "rewards/rejected": -0.17273801565170288, + "step": 10349 + }, + { + "epoch": 7.157676348547718, + "grad_norm": 4.47117280960083, + "learning_rate": 1.579068695251268e-05, + "log_odds_chosen": 11.732563018798828, + "log_odds_ratio": -1.1336490388202947e-05, + "logits/chosen": -0.5973580479621887, + "logits/rejected": -0.7038625478744507, + "logps/chosen": -0.00014137805555947125, + "logps/rejected": -2.862057685852051, + "loss": 0.4003, + "nll_loss": 0.1000852882862091, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4137805919745006e-05, + "rewards/margins": 0.2861916422843933, + "rewards/rejected": -0.2862057387828827, + "step": 10350 + }, + { + "epoch": 7.158367911479945, + "grad_norm": 4.15538215637207, + "learning_rate": 1.578684493622253e-05, + "log_odds_chosen": 9.860397338867188, + "log_odds_ratio": -0.0005921100964769721, + "logits/chosen": -0.6259291768074036, + "logits/rejected": -0.635421097278595, + "logps/chosen": -0.00032356291194446385, + "logps/rejected": -1.5026354789733887, + "loss": 0.4756, + "nll_loss": 0.11884228140115738, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.235629264963791e-05, + "rewards/margins": 0.15023118257522583, + "rewards/rejected": -0.15026354789733887, + "step": 10351 + }, + { + "epoch": 7.159059474412172, + "grad_norm": 9.987689018249512, + "learning_rate": 1.5783002919932384e-05, + "log_odds_chosen": 11.829431533813477, + "log_odds_ratio": -1.2193728252896108e-05, + "logits/chosen": -0.5235463380813599, + "logits/rejected": -0.5612502098083496, + "logps/chosen": -0.00024450334603898227, + "logps/rejected": -3.104759454727173, + "loss": 0.5833, + "nll_loss": 0.14583048224449158, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.445033533149399e-05, + "rewards/margins": 0.3104515075683594, + "rewards/rejected": -0.3104759454727173, + "step": 10352 + }, + { + "epoch": 7.159751037344399, + "grad_norm": 5.015040874481201, + "learning_rate": 1.5779160903642233e-05, + "log_odds_chosen": 11.361230850219727, + "log_odds_ratio": -6.172016583150253e-05, + "logits/chosen": -0.24815750122070312, + "logits/rejected": -0.398996502161026, + "logps/chosen": -0.00015217142936307937, + "logps/rejected": -2.3462064266204834, + "loss": 0.5022, + "nll_loss": 0.12555190920829773, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5217143300105818e-05, + "rewards/margins": 0.23460541665554047, + "rewards/rejected": -0.23462063074111938, + "step": 10353 + }, + { + "epoch": 7.1604426002766255, + "grad_norm": 4.724827766418457, + "learning_rate": 1.5775318887352082e-05, + "log_odds_chosen": 10.288719177246094, + "log_odds_ratio": -0.00014790150453336537, + "logits/chosen": -0.09138578176498413, + "logits/rejected": -0.10876794159412384, + "logps/chosen": -0.000556323619093746, + "logps/rejected": -1.8416610956192017, + "loss": 0.4881, + "nll_loss": 0.12201963365077972, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.563236481975764e-05, + "rewards/margins": 0.18411049246788025, + "rewards/rejected": -0.1841661036014557, + "step": 10354 + }, + { + "epoch": 7.161134163208852, + "grad_norm": 4.384098052978516, + "learning_rate": 1.5771476871061934e-05, + "log_odds_chosen": 10.974483489990234, + "log_odds_ratio": -8.987160981632769e-05, + "logits/chosen": -0.27367737889289856, + "logits/rejected": -0.344980925321579, + "logps/chosen": -0.0001740015286486596, + "logps/rejected": -2.03933048248291, + "loss": 0.4814, + "nll_loss": 0.12034108489751816, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.740015432005748e-05, + "rewards/margins": 0.20391567051410675, + "rewards/rejected": -0.20393304526805878, + "step": 10355 + }, + { + "epoch": 7.161825726141079, + "grad_norm": 5.302286148071289, + "learning_rate": 1.5767634854771783e-05, + "log_odds_chosen": 10.35113525390625, + "log_odds_ratio": -4.7902078222250566e-05, + "logits/chosen": -0.45522600412368774, + "logits/rejected": -0.543906569480896, + "logps/chosen": -0.00014997663674876094, + "logps/rejected": -1.7539184093475342, + "loss": 0.3004, + "nll_loss": 0.07508346438407898, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4997664038673975e-05, + "rewards/margins": 0.17537686228752136, + "rewards/rejected": -0.17539185285568237, + "step": 10356 + }, + { + "epoch": 7.162517289073306, + "grad_norm": 5.789268493652344, + "learning_rate": 1.5763792838481636e-05, + "log_odds_chosen": 10.186277389526367, + "log_odds_ratio": -0.00013727105397265404, + "logits/chosen": 0.14934031665325165, + "logits/rejected": 0.11088190972805023, + "logps/chosen": -0.00018403568537905812, + "logps/rejected": -1.712958574295044, + "loss": 0.7635, + "nll_loss": 0.19086456298828125, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8403568901703693e-05, + "rewards/margins": 0.1712774634361267, + "rewards/rejected": -0.1712958663702011, + "step": 10357 + }, + { + "epoch": 7.163208852005533, + "grad_norm": 7.448746681213379, + "learning_rate": 1.575995082219149e-05, + "log_odds_chosen": 11.716529846191406, + "log_odds_ratio": -2.580987347755581e-05, + "logits/chosen": -0.25747519731521606, + "logits/rejected": -0.3709869384765625, + "logps/chosen": -0.00016249314649030566, + "logps/rejected": -2.8506031036376953, + "loss": 0.5447, + "nll_loss": 0.1361800581216812, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6249314285232686e-05, + "rewards/margins": 0.2850440442562103, + "rewards/rejected": -0.2850602865219116, + "step": 10358 + }, + { + "epoch": 7.16390041493776, + "grad_norm": 5.051116466522217, + "learning_rate": 1.5756108805901337e-05, + "log_odds_chosen": 10.604525566101074, + "log_odds_ratio": -9.764420974534005e-05, + "logits/chosen": -0.3789137005805969, + "logits/rejected": -0.3770774304866791, + "logps/chosen": -0.0001220260382979177, + "logps/rejected": -1.644858479499817, + "loss": 0.72, + "nll_loss": 0.1799805909395218, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2202603102196008e-05, + "rewards/margins": 0.16447363793849945, + "rewards/rejected": -0.1644858419895172, + "step": 10359 + }, + { + "epoch": 7.1645919778699865, + "grad_norm": 3.8639204502105713, + "learning_rate": 1.575226678961119e-05, + "log_odds_chosen": 10.401793479919434, + "log_odds_ratio": -5.208807488088496e-05, + "logits/chosen": -0.5287341475486755, + "logits/rejected": -0.491424024105072, + "logps/chosen": -0.00016572093591094017, + "logps/rejected": -1.6025166511535645, + "loss": 0.3203, + "nll_loss": 0.08007211983203888, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6572093954891898e-05, + "rewards/margins": 0.16023510694503784, + "rewards/rejected": -0.1602516770362854, + "step": 10360 + }, + { + "epoch": 7.165283540802213, + "grad_norm": 5.283153533935547, + "learning_rate": 1.574842477332104e-05, + "log_odds_chosen": 11.058725357055664, + "log_odds_ratio": -2.314174889761489e-05, + "logits/chosen": -0.41812098026275635, + "logits/rejected": -0.4501090347766876, + "logps/chosen": -0.00016006288933567703, + "logps/rejected": -1.8618499040603638, + "loss": 0.4602, + "nll_loss": 0.11504913866519928, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.600628820597194e-05, + "rewards/margins": 0.18616899847984314, + "rewards/rejected": -0.18618498742580414, + "step": 10361 + }, + { + "epoch": 7.16597510373444, + "grad_norm": 5.949680328369141, + "learning_rate": 1.574458275703089e-05, + "log_odds_chosen": 10.830810546875, + "log_odds_ratio": -6.881457375129685e-05, + "logits/chosen": 0.00372517853975296, + "logits/rejected": -0.08768212795257568, + "logps/chosen": -0.00016697445244062692, + "logps/rejected": -1.9055685997009277, + "loss": 0.4789, + "nll_loss": 0.11972109973430634, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.669744415266905e-05, + "rewards/margins": 0.19054014980793, + "rewards/rejected": -0.1905568540096283, + "step": 10362 + }, + { + "epoch": 7.166666666666667, + "grad_norm": 5.766731262207031, + "learning_rate": 1.574074074074074e-05, + "log_odds_chosen": 10.797150611877441, + "log_odds_ratio": -3.587778701330535e-05, + "logits/chosen": -0.25513169169425964, + "logits/rejected": -0.2806079089641571, + "logps/chosen": -0.0002537602267693728, + "logps/rejected": -2.0755133628845215, + "loss": 0.6524, + "nll_loss": 0.16309869289398193, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5376022676937282e-05, + "rewards/margins": 0.20752598345279694, + "rewards/rejected": -0.20755136013031006, + "step": 10363 + }, + { + "epoch": 7.167358229598894, + "grad_norm": 6.955835342407227, + "learning_rate": 1.573689872445059e-05, + "log_odds_chosen": 10.524138450622559, + "log_odds_ratio": -0.00010134144395124167, + "logits/chosen": -0.4832562804222107, + "logits/rejected": -0.5262367725372314, + "logps/chosen": -0.0003315797948744148, + "logps/rejected": -2.264157772064209, + "loss": 0.6328, + "nll_loss": 0.1581859588623047, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.315797948744148e-05, + "rewards/margins": 0.22638264298439026, + "rewards/rejected": -0.22641579806804657, + "step": 10364 + }, + { + "epoch": 7.168049792531121, + "grad_norm": 3.8119096755981445, + "learning_rate": 1.5733056708160442e-05, + "log_odds_chosen": 10.417471885681152, + "log_odds_ratio": -0.00031710093026049435, + "logits/chosen": -1.077115535736084, + "logits/rejected": -1.0785775184631348, + "logps/chosen": -0.00048406756832264364, + "logps/rejected": -2.097248077392578, + "loss": 0.427, + "nll_loss": 0.10671207308769226, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8406756832264364e-05, + "rewards/margins": 0.20967641472816467, + "rewards/rejected": -0.2097248136997223, + "step": 10365 + }, + { + "epoch": 7.1687413554633475, + "grad_norm": 4.384820938110352, + "learning_rate": 1.5729214691870294e-05, + "log_odds_chosen": 10.294703483581543, + "log_odds_ratio": -0.0010348277864977717, + "logits/chosen": -0.2706519663333893, + "logits/rejected": -0.32500049471855164, + "logps/chosen": -0.0025665496941655874, + "logps/rejected": -2.180053472518921, + "loss": 0.3983, + "nll_loss": 0.09947191178798676, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00025665495195426047, + "rewards/margins": 0.2177487015724182, + "rewards/rejected": -0.21800535917282104, + "step": 10366 + }, + { + "epoch": 7.169432918395574, + "grad_norm": 5.960926532745361, + "learning_rate": 1.5725372675580143e-05, + "log_odds_chosen": 10.927475929260254, + "log_odds_ratio": -6.083076732465997e-05, + "logits/chosen": -0.5122660994529724, + "logits/rejected": -0.5442514419555664, + "logps/chosen": -0.0004127591964788735, + "logps/rejected": -2.9259157180786133, + "loss": 0.4699, + "nll_loss": 0.11747057735919952, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.127592546865344e-05, + "rewards/margins": 0.29255032539367676, + "rewards/rejected": -0.2925916016101837, + "step": 10367 + }, + { + "epoch": 7.170124481327801, + "grad_norm": 7.345365524291992, + "learning_rate": 1.5721530659289996e-05, + "log_odds_chosen": 10.399859428405762, + "log_odds_ratio": -8.993582014227286e-05, + "logits/chosen": -0.5601933598518372, + "logits/rejected": -0.7083494663238525, + "logps/chosen": -0.00028123060474172235, + "logps/rejected": -1.9739731550216675, + "loss": 0.5057, + "nll_loss": 0.12640774250030518, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8123060474172235e-05, + "rewards/margins": 0.19736920297145844, + "rewards/rejected": -0.19739732146263123, + "step": 10368 + }, + { + "epoch": 7.170816044260028, + "grad_norm": 4.803614139556885, + "learning_rate": 1.5717688642999848e-05, + "log_odds_chosen": 11.357755661010742, + "log_odds_ratio": -5.818425051984377e-05, + "logits/chosen": -0.0957411378622055, + "logits/rejected": -0.19544725120067596, + "logps/chosen": -0.0002561407454777509, + "logps/rejected": -2.88908052444458, + "loss": 0.6493, + "nll_loss": 0.16231924295425415, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.561407382017933e-05, + "rewards/margins": 0.28888243436813354, + "rewards/rejected": -0.28890806436538696, + "step": 10369 + }, + { + "epoch": 7.171507607192255, + "grad_norm": 4.037209987640381, + "learning_rate": 1.5713846626709697e-05, + "log_odds_chosen": 11.223855972290039, + "log_odds_ratio": -2.7396872610552236e-05, + "logits/chosen": -0.37989529967308044, + "logits/rejected": -0.42906731367111206, + "logps/chosen": -0.00016098878404591233, + "logps/rejected": -2.571352958679199, + "loss": 0.4347, + "nll_loss": 0.10866893827915192, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6098878404591233e-05, + "rewards/margins": 0.25711923837661743, + "rewards/rejected": -0.2571353316307068, + "step": 10370 + }, + { + "epoch": 7.172199170124482, + "grad_norm": 5.679206371307373, + "learning_rate": 1.571000461041955e-05, + "log_odds_chosen": 10.426536560058594, + "log_odds_ratio": -0.00013442272029351443, + "logits/chosen": -0.4217427968978882, + "logits/rejected": -0.4483773112297058, + "logps/chosen": -0.00038492606836371124, + "logps/rejected": -1.9264945983886719, + "loss": 0.5683, + "nll_loss": 0.14207184314727783, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.849260610877536e-05, + "rewards/margins": 0.1926109790802002, + "rewards/rejected": -0.1926494687795639, + "step": 10371 + }, + { + "epoch": 7.172890733056708, + "grad_norm": 5.536522388458252, + "learning_rate": 1.5706162594129402e-05, + "log_odds_chosen": 11.396886825561523, + "log_odds_ratio": -6.0072481574025005e-05, + "logits/chosen": -0.3048505187034607, + "logits/rejected": -0.38790225982666016, + "logps/chosen": -0.0001912859734147787, + "logps/rejected": -2.801403045654297, + "loss": 0.5448, + "nll_loss": 0.13619258999824524, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9128598069073632e-05, + "rewards/margins": 0.28012117743492126, + "rewards/rejected": -0.28014034032821655, + "step": 10372 + }, + { + "epoch": 7.173582295988935, + "grad_norm": 5.496601104736328, + "learning_rate": 1.570232057783925e-05, + "log_odds_chosen": 11.184768676757812, + "log_odds_ratio": -2.8792088414775208e-05, + "logits/chosen": -0.6190522909164429, + "logits/rejected": -0.6823225617408752, + "logps/chosen": -0.00015820973203517497, + "logps/rejected": -2.4082930088043213, + "loss": 0.4024, + "nll_loss": 0.10058543086051941, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5820973203517497e-05, + "rewards/margins": 0.2408134937286377, + "rewards/rejected": -0.24082930386066437, + "step": 10373 + }, + { + "epoch": 7.174273858921162, + "grad_norm": 3.437378406524658, + "learning_rate": 1.56984785615491e-05, + "log_odds_chosen": 11.272420883178711, + "log_odds_ratio": -0.00011576030374271795, + "logits/chosen": -0.6390014886856079, + "logits/rejected": -0.6240395307540894, + "logps/chosen": -0.00013980553194414824, + "logps/rejected": -2.6278982162475586, + "loss": 0.7808, + "nll_loss": 0.1951945722103119, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3980553376313765e-05, + "rewards/margins": 0.26277586817741394, + "rewards/rejected": -0.26278984546661377, + "step": 10374 + }, + { + "epoch": 7.174965421853389, + "grad_norm": 11.342272758483887, + "learning_rate": 1.5694636545258953e-05, + "log_odds_chosen": 11.142118453979492, + "log_odds_ratio": -2.4900431526475586e-05, + "logits/chosen": -0.61419677734375, + "logits/rejected": -0.7007442116737366, + "logps/chosen": -0.00015536102000623941, + "logps/rejected": -1.8198778629302979, + "loss": 0.4692, + "nll_loss": 0.11729123443365097, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5536103092017584e-05, + "rewards/margins": 0.18197225034236908, + "rewards/rejected": -0.18198780715465546, + "step": 10375 + }, + { + "epoch": 7.175656984785616, + "grad_norm": 4.347101211547852, + "learning_rate": 1.5690794528968802e-05, + "log_odds_chosen": 11.29008960723877, + "log_odds_ratio": -3.775333607336506e-05, + "logits/chosen": -0.6615291833877563, + "logits/rejected": -0.6524258255958557, + "logps/chosen": -0.0004550123994704336, + "logps/rejected": -3.183990001678467, + "loss": 0.3407, + "nll_loss": 0.08517622947692871, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.550124140223488e-05, + "rewards/margins": 0.31835347414016724, + "rewards/rejected": -0.31839898228645325, + "step": 10376 + }, + { + "epoch": 7.176348547717843, + "grad_norm": 5.331522464752197, + "learning_rate": 1.5686952512678654e-05, + "log_odds_chosen": 10.315485000610352, + "log_odds_ratio": -0.000406495324568823, + "logits/chosen": -0.0037413835525512695, + "logits/rejected": -0.07865045964717865, + "logps/chosen": -0.0003331214829813689, + "logps/rejected": -1.8584060668945312, + "loss": 0.8735, + "nll_loss": 0.2183304727077484, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.331214975332841e-05, + "rewards/margins": 0.18580730259418488, + "rewards/rejected": -0.18584060668945312, + "step": 10377 + }, + { + "epoch": 7.177040110650069, + "grad_norm": 4.766586780548096, + "learning_rate": 1.5683110496388507e-05, + "log_odds_chosen": 10.226814270019531, + "log_odds_ratio": -0.00011353972513461486, + "logits/chosen": -0.40596237778663635, + "logits/rejected": -0.3591066598892212, + "logps/chosen": -0.0002922900894191116, + "logps/rejected": -1.915623664855957, + "loss": 0.4101, + "nll_loss": 0.10252165794372559, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9229006031528115e-05, + "rewards/margins": 0.1915331482887268, + "rewards/rejected": -0.19156238436698914, + "step": 10378 + }, + { + "epoch": 7.177731673582296, + "grad_norm": 4.257755756378174, + "learning_rate": 1.5679268480098356e-05, + "log_odds_chosen": 9.591224670410156, + "log_odds_ratio": -0.00026978107052855194, + "logits/chosen": -0.28969606757164, + "logits/rejected": -0.3585425615310669, + "logps/chosen": -0.0007068602135404944, + "logps/rejected": -1.5515594482421875, + "loss": 0.5163, + "nll_loss": 0.1290472447872162, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.068601553328335e-05, + "rewards/margins": 0.1550852507352829, + "rewards/rejected": -0.1551559418439865, + "step": 10379 + }, + { + "epoch": 7.178423236514523, + "grad_norm": 5.376257419586182, + "learning_rate": 1.5675426463808208e-05, + "log_odds_chosen": 11.736917495727539, + "log_odds_ratio": -1.4793120499234647e-05, + "logits/chosen": -0.34879764914512634, + "logits/rejected": -0.40188610553741455, + "logps/chosen": -0.00011175702093169093, + "logps/rejected": -2.322336435317993, + "loss": 0.5005, + "nll_loss": 0.12511463463306427, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1175701729371212e-05, + "rewards/margins": 0.23222248256206512, + "rewards/rejected": -0.23223364353179932, + "step": 10380 + }, + { + "epoch": 7.17911479944675, + "grad_norm": 4.578193187713623, + "learning_rate": 1.567158444751806e-05, + "log_odds_chosen": 10.820470809936523, + "log_odds_ratio": -0.00020237785065546632, + "logits/chosen": -0.958088755607605, + "logits/rejected": -0.8855130672454834, + "logps/chosen": -0.00032699486473575234, + "logps/rejected": -2.252181053161621, + "loss": 0.7879, + "nll_loss": 0.19696089625358582, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.269948501838371e-05, + "rewards/margins": 0.22518542408943176, + "rewards/rejected": -0.22521811723709106, + "step": 10381 + }, + { + "epoch": 7.179806362378977, + "grad_norm": 5.071114540100098, + "learning_rate": 1.566774243122791e-05, + "log_odds_chosen": 10.823832511901855, + "log_odds_ratio": -7.1186208515428e-05, + "logits/chosen": -0.4531767964363098, + "logits/rejected": -0.5184823870658875, + "logps/chosen": -0.00017408700659871101, + "logps/rejected": -2.2006890773773193, + "loss": 0.49, + "nll_loss": 0.12250152230262756, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.740869993227534e-05, + "rewards/margins": 0.22005151212215424, + "rewards/rejected": -0.22006891667842865, + "step": 10382 + }, + { + "epoch": 7.180497925311204, + "grad_norm": 8.42878246307373, + "learning_rate": 1.566390041493776e-05, + "log_odds_chosen": 11.13003158569336, + "log_odds_ratio": -2.9363169232965447e-05, + "logits/chosen": -0.412482351064682, + "logits/rejected": -0.33322468400001526, + "logps/chosen": -0.0001294870162382722, + "logps/rejected": -2.097151756286621, + "loss": 0.4352, + "nll_loss": 0.10879058390855789, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2948700714332517e-05, + "rewards/margins": 0.20970222353935242, + "rewards/rejected": -0.20971518754959106, + "step": 10383 + }, + { + "epoch": 7.18118948824343, + "grad_norm": 4.142075061798096, + "learning_rate": 1.566005839864761e-05, + "log_odds_chosen": 11.121615409851074, + "log_odds_ratio": -9.572529961587861e-05, + "logits/chosen": -0.3162578046321869, + "logits/rejected": -0.45748594403266907, + "logps/chosen": -0.00015045034524518996, + "logps/rejected": -2.277163505554199, + "loss": 0.6452, + "nll_loss": 0.16129402816295624, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5045035070215818e-05, + "rewards/margins": 0.2277013063430786, + "rewards/rejected": -0.2277163565158844, + "step": 10384 + }, + { + "epoch": 7.181881051175657, + "grad_norm": 4.8519511222839355, + "learning_rate": 1.565621638235746e-05, + "log_odds_chosen": 9.380922317504883, + "log_odds_ratio": -0.0005602799355983734, + "logits/chosen": -0.10082048177719116, + "logits/rejected": -0.11572250723838806, + "logps/chosen": -0.0007273274823091924, + "logps/rejected": -1.6986327171325684, + "loss": 0.5084, + "nll_loss": 0.12703612446784973, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.273274968611076e-05, + "rewards/margins": 0.16979053616523743, + "rewards/rejected": -0.1698632836341858, + "step": 10385 + }, + { + "epoch": 7.182572614107884, + "grad_norm": 11.137154579162598, + "learning_rate": 1.5652374366067313e-05, + "log_odds_chosen": 10.050169944763184, + "log_odds_ratio": -8.446585707133636e-05, + "logits/chosen": -0.49709948897361755, + "logits/rejected": -0.5779053568840027, + "logps/chosen": -0.00021436612587422132, + "logps/rejected": -1.611759066581726, + "loss": 0.4521, + "nll_loss": 0.11301799863576889, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.143661185982637e-05, + "rewards/margins": 0.16115447878837585, + "rewards/rejected": -0.1611759066581726, + "step": 10386 + }, + { + "epoch": 7.183264177040111, + "grad_norm": 5.8118672370910645, + "learning_rate": 1.5648532349777165e-05, + "log_odds_chosen": 10.831192016601562, + "log_odds_ratio": -0.001313655637204647, + "logits/chosen": -0.3823273777961731, + "logits/rejected": -0.407784640789032, + "logps/chosen": -0.0011425204575061798, + "logps/rejected": -2.535809278488159, + "loss": 0.4633, + "nll_loss": 0.11569167673587799, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011425205593695864, + "rewards/margins": 0.2534666657447815, + "rewards/rejected": -0.2535809278488159, + "step": 10387 + }, + { + "epoch": 7.183955739972338, + "grad_norm": 6.439311981201172, + "learning_rate": 1.5644690333487014e-05, + "log_odds_chosen": 10.70012378692627, + "log_odds_ratio": -5.9435871662572026e-05, + "logits/chosen": -0.28330811858177185, + "logits/rejected": -0.2336798906326294, + "logps/chosen": -0.00048739041085354984, + "logps/rejected": -2.6013941764831543, + "loss": 0.4894, + "nll_loss": 0.12234983593225479, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.873904254054651e-05, + "rewards/margins": 0.2600906789302826, + "rewards/rejected": -0.2601394057273865, + "step": 10388 + }, + { + "epoch": 7.1846473029045645, + "grad_norm": 5.669109344482422, + "learning_rate": 1.5640848317196867e-05, + "log_odds_chosen": 12.277332305908203, + "log_odds_ratio": -1.0741958249127492e-05, + "logits/chosen": -0.3358321785926819, + "logits/rejected": -0.4964350461959839, + "logps/chosen": -0.00014862377429381013, + "logps/rejected": -3.1850128173828125, + "loss": 0.535, + "nll_loss": 0.1337580531835556, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4862376701785251e-05, + "rewards/margins": 0.31848645210266113, + "rewards/rejected": -0.3185012936592102, + "step": 10389 + }, + { + "epoch": 7.185338865836791, + "grad_norm": 6.9274821281433105, + "learning_rate": 1.563700630090672e-05, + "log_odds_chosen": 11.21712875366211, + "log_odds_ratio": -3.1848030630499125e-05, + "logits/chosen": -0.15592873096466064, + "logits/rejected": -0.17688466608524323, + "logps/chosen": -0.0002714493020903319, + "logps/rejected": -2.7164435386657715, + "loss": 0.6537, + "nll_loss": 0.16341017186641693, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7144931664224714e-05, + "rewards/margins": 0.27161717414855957, + "rewards/rejected": -0.27164432406425476, + "step": 10390 + }, + { + "epoch": 7.186030428769018, + "grad_norm": 3.708771228790283, + "learning_rate": 1.5633164284616568e-05, + "log_odds_chosen": 11.14272403717041, + "log_odds_ratio": -1.6521320503670722e-05, + "logits/chosen": -0.04514652490615845, + "logits/rejected": -0.1443396508693695, + "logps/chosen": -0.00014625716721639037, + "logps/rejected": -2.0405805110931396, + "loss": 0.3545, + "nll_loss": 0.08861503005027771, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.462571799493162e-05, + "rewards/margins": 0.2040434181690216, + "rewards/rejected": -0.20405805110931396, + "step": 10391 + }, + { + "epoch": 7.186721991701245, + "grad_norm": 8.980436325073242, + "learning_rate": 1.5629322268326417e-05, + "log_odds_chosen": 10.469853401184082, + "log_odds_ratio": -0.0005530455382540822, + "logits/chosen": -0.5228238701820374, + "logits/rejected": -0.544980525970459, + "logps/chosen": -0.001911295112222433, + "logps/rejected": -2.6337761878967285, + "loss": 0.381, + "nll_loss": 0.09519396722316742, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019112952577415854, + "rewards/margins": 0.263186514377594, + "rewards/rejected": -0.2633776068687439, + "step": 10392 + }, + { + "epoch": 7.187413554633472, + "grad_norm": 5.94824743270874, + "learning_rate": 1.562548025203627e-05, + "log_odds_chosen": 10.073355674743652, + "log_odds_ratio": -0.0001066073018591851, + "logits/chosen": -0.34966346621513367, + "logits/rejected": -0.329208642244339, + "logps/chosen": -0.0004409438115544617, + "logps/rejected": -2.026257276535034, + "loss": 0.5787, + "nll_loss": 0.14465567469596863, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.40943767898716e-05, + "rewards/margins": 0.20258162915706635, + "rewards/rejected": -0.20262573659420013, + "step": 10393 + }, + { + "epoch": 7.188105117565699, + "grad_norm": 4.363674163818359, + "learning_rate": 1.562163823574612e-05, + "log_odds_chosen": 10.5360107421875, + "log_odds_ratio": -0.00013599703379441053, + "logits/chosen": -0.5808853507041931, + "logits/rejected": -0.5344756841659546, + "logps/chosen": -0.000584651657845825, + "logps/rejected": -2.1063883304595947, + "loss": 0.3505, + "nll_loss": 0.08760587871074677, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.846516432939097e-05, + "rewards/margins": 0.21058037877082825, + "rewards/rejected": -0.21063882112503052, + "step": 10394 + }, + { + "epoch": 7.1887966804979255, + "grad_norm": 5.332204341888428, + "learning_rate": 1.561779621945597e-05, + "log_odds_chosen": 10.541728019714355, + "log_odds_ratio": -8.822587551549077e-05, + "logits/chosen": -0.31964346766471863, + "logits/rejected": -0.37257277965545654, + "logps/chosen": -0.0002514015359338373, + "logps/rejected": -2.098186731338501, + "loss": 0.7035, + "nll_loss": 0.17586232721805573, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.514015432097949e-05, + "rewards/margins": 0.20979352295398712, + "rewards/rejected": -0.20981867611408234, + "step": 10395 + }, + { + "epoch": 7.189488243430152, + "grad_norm": 4.773373126983643, + "learning_rate": 1.5613954203165824e-05, + "log_odds_chosen": 10.850213050842285, + "log_odds_ratio": -0.0005813997704535723, + "logits/chosen": -0.8597350716590881, + "logits/rejected": -0.7824587821960449, + "logps/chosen": -0.0006822537397965789, + "logps/rejected": -1.9705058336257935, + "loss": 0.523, + "nll_loss": 0.1307014524936676, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.822537397965789e-05, + "rewards/margins": 0.19698236882686615, + "rewards/rejected": -0.19705060124397278, + "step": 10396 + }, + { + "epoch": 7.190179806362379, + "grad_norm": 7.614468097686768, + "learning_rate": 1.5610112186875673e-05, + "log_odds_chosen": 11.094710350036621, + "log_odds_ratio": -7.629027822986245e-05, + "logits/chosen": -0.6720897555351257, + "logits/rejected": -0.7558448314666748, + "logps/chosen": -0.0002666703367140144, + "logps/rejected": -2.537464141845703, + "loss": 0.472, + "nll_loss": 0.11800040304660797, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6667034035199322e-05, + "rewards/margins": 0.2537197172641754, + "rewards/rejected": -0.2537463903427124, + "step": 10397 + }, + { + "epoch": 7.190871369294606, + "grad_norm": 3.3446168899536133, + "learning_rate": 1.5606270170585525e-05, + "log_odds_chosen": 11.072732925415039, + "log_odds_ratio": -2.1762225514976308e-05, + "logits/chosen": -0.5741645097732544, + "logits/rejected": -0.5488527417182922, + "logps/chosen": -0.00014538533287122846, + "logps/rejected": -2.0284476280212402, + "loss": 0.3267, + "nll_loss": 0.08168265223503113, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4538533832819667e-05, + "rewards/margins": 0.2028302252292633, + "rewards/rejected": -0.2028447538614273, + "step": 10398 + }, + { + "epoch": 7.191562932226833, + "grad_norm": 4.859591007232666, + "learning_rate": 1.5602428154295378e-05, + "log_odds_chosen": 11.008413314819336, + "log_odds_ratio": -0.00010048101103166118, + "logits/chosen": 0.43324506282806396, + "logits/rejected": 0.308527410030365, + "logps/chosen": -0.0002522010472603142, + "logps/rejected": -2.1987271308898926, + "loss": 0.4725, + "nll_loss": 0.11810819059610367, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.522010800021235e-05, + "rewards/margins": 0.21984750032424927, + "rewards/rejected": -0.21987271308898926, + "step": 10399 + }, + { + "epoch": 7.19225449515906, + "grad_norm": 3.9196529388427734, + "learning_rate": 1.5598586138005227e-05, + "log_odds_chosen": 11.893542289733887, + "log_odds_ratio": -1.7006164853228256e-05, + "logits/chosen": -0.622429609298706, + "logits/rejected": -0.6667870283126831, + "logps/chosen": -0.00010225811274722219, + "logps/rejected": -2.4955382347106934, + "loss": 0.365, + "nll_loss": 0.09124172478914261, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0225810910924338e-05, + "rewards/margins": 0.24954360723495483, + "rewards/rejected": -0.249553844332695, + "step": 10400 + }, + { + "epoch": 7.1929460580912865, + "grad_norm": 6.067159652709961, + "learning_rate": 1.5594744121715076e-05, + "log_odds_chosen": 9.888992309570312, + "log_odds_ratio": -0.000312354473862797, + "logits/chosen": -0.39751923084259033, + "logits/rejected": -0.48524197936058044, + "logps/chosen": -0.0005245020147413015, + "logps/rejected": -2.020956516265869, + "loss": 0.5966, + "nll_loss": 0.1491076946258545, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.245019929134287e-05, + "rewards/margins": 0.20204317569732666, + "rewards/rejected": -0.202095627784729, + "step": 10401 + }, + { + "epoch": 7.193637621023513, + "grad_norm": 3.609178066253662, + "learning_rate": 1.5590902105424928e-05, + "log_odds_chosen": 9.639986038208008, + "log_odds_ratio": -0.00017861949163489044, + "logits/chosen": -0.4248259365558624, + "logits/rejected": -0.5246451497077942, + "logps/chosen": -0.0002483507269062102, + "logps/rejected": -1.2811558246612549, + "loss": 0.398, + "nll_loss": 0.09947745501995087, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4835069780237973e-05, + "rewards/margins": 0.1280907392501831, + "rewards/rejected": -0.12811557948589325, + "step": 10402 + }, + { + "epoch": 7.19432918395574, + "grad_norm": 11.75823974609375, + "learning_rate": 1.5587060089134777e-05, + "log_odds_chosen": 8.741238594055176, + "log_odds_ratio": -0.12879516184329987, + "logits/chosen": -0.30707478523254395, + "logits/rejected": -0.43609267473220825, + "logps/chosen": -0.028354782611131668, + "logps/rejected": -1.7946550846099854, + "loss": 0.5618, + "nll_loss": 0.12756292521953583, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002835478400811553, + "rewards/margins": 0.17663003504276276, + "rewards/rejected": -0.17946550250053406, + "step": 10403 + }, + { + "epoch": 7.195020746887967, + "grad_norm": 10.215047836303711, + "learning_rate": 1.558321807284463e-05, + "log_odds_chosen": 11.406917572021484, + "log_odds_ratio": -3.1501491321250796e-05, + "logits/chosen": 0.07894551753997803, + "logits/rejected": 0.08648325502872467, + "logps/chosen": -0.00029575484222732484, + "logps/rejected": -2.8551900386810303, + "loss": 0.4392, + "nll_loss": 0.10979416966438293, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.957548713311553e-05, + "rewards/margins": 0.28548943996429443, + "rewards/rejected": -0.285519003868103, + "step": 10404 + }, + { + "epoch": 7.195712309820194, + "grad_norm": 4.584980487823486, + "learning_rate": 1.5579376056554482e-05, + "log_odds_chosen": 10.541440963745117, + "log_odds_ratio": -9.464387403568253e-05, + "logits/chosen": -0.547250509262085, + "logits/rejected": -0.49824681878089905, + "logps/chosen": -0.00014894589548930526, + "logps/rejected": -1.8729922771453857, + "loss": 0.6072, + "nll_loss": 0.151779904961586, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4894590094627347e-05, + "rewards/margins": 0.18728432059288025, + "rewards/rejected": -0.1872992366552353, + "step": 10405 + }, + { + "epoch": 7.196403872752421, + "grad_norm": 10.538939476013184, + "learning_rate": 1.557553404026433e-05, + "log_odds_chosen": 10.94846248626709, + "log_odds_ratio": -2.3089738533599302e-05, + "logits/chosen": -0.2529940903186798, + "logits/rejected": -0.31514447927474976, + "logps/chosen": -0.0001458583283238113, + "logps/rejected": -2.1446704864501953, + "loss": 0.4062, + "nll_loss": 0.10155414044857025, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4585832104785368e-05, + "rewards/margins": 0.21445247530937195, + "rewards/rejected": -0.21446704864501953, + "step": 10406 + }, + { + "epoch": 7.1970954356846475, + "grad_norm": 3.172614574432373, + "learning_rate": 1.5571692023974183e-05, + "log_odds_chosen": 10.082971572875977, + "log_odds_ratio": -0.0005615535192191601, + "logits/chosen": -0.08390333503484726, + "logits/rejected": -0.12408564984798431, + "logps/chosen": -0.0015450555365532637, + "logps/rejected": -2.3086283206939697, + "loss": 0.3739, + "nll_loss": 0.09341214597225189, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015450554201379418, + "rewards/margins": 0.2307083010673523, + "rewards/rejected": -0.2308628261089325, + "step": 10407 + }, + { + "epoch": 7.197786998616874, + "grad_norm": 5.035568714141846, + "learning_rate": 1.5567850007684036e-05, + "log_odds_chosen": 11.911405563354492, + "log_odds_ratio": -9.349205356556922e-06, + "logits/chosen": -0.5108979344367981, + "logits/rejected": -0.5400675535202026, + "logps/chosen": -8.778244955465198e-05, + "logps/rejected": -2.3340892791748047, + "loss": 0.7396, + "nll_loss": 0.18490070104599, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.77824641065672e-06, + "rewards/margins": 0.2334001660346985, + "rewards/rejected": -0.23340894281864166, + "step": 10408 + }, + { + "epoch": 7.198478561549101, + "grad_norm": 3.9095423221588135, + "learning_rate": 1.5564007991393885e-05, + "log_odds_chosen": 11.930374145507812, + "log_odds_ratio": -1.5022533261799254e-05, + "logits/chosen": -0.36563628911972046, + "logits/rejected": -0.3588956296443939, + "logps/chosen": -0.00010352416575187817, + "logps/rejected": -2.5348033905029297, + "loss": 0.3587, + "nll_loss": 0.08966990560293198, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0352417120884638e-05, + "rewards/margins": 0.2534700036048889, + "rewards/rejected": -0.25348034501075745, + "step": 10409 + }, + { + "epoch": 7.199170124481328, + "grad_norm": 3.7800188064575195, + "learning_rate": 1.5560165975103734e-05, + "log_odds_chosen": 11.446325302124023, + "log_odds_ratio": -2.7550799131859094e-05, + "logits/chosen": -0.5708428621292114, + "logits/rejected": -0.7656696438789368, + "logps/chosen": -9.46579675655812e-05, + "logps/rejected": -1.9892699718475342, + "loss": 0.4408, + "nll_loss": 0.11019250750541687, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.465797120356001e-06, + "rewards/margins": 0.19891753792762756, + "rewards/rejected": -0.19892701506614685, + "step": 10410 + }, + { + "epoch": 7.199861687413555, + "grad_norm": 5.089803695678711, + "learning_rate": 1.5556323958813586e-05, + "log_odds_chosen": 9.740339279174805, + "log_odds_ratio": -0.00019847344083245844, + "logits/chosen": -0.26678839325904846, + "logits/rejected": -0.392799973487854, + "logps/chosen": -0.000566477479878813, + "logps/rejected": -1.7141238451004028, + "loss": 0.6536, + "nll_loss": 0.16337737441062927, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6647753808647394e-05, + "rewards/margins": 0.17135575413703918, + "rewards/rejected": -0.171412393450737, + "step": 10411 + }, + { + "epoch": 7.200553250345782, + "grad_norm": 6.466180801391602, + "learning_rate": 1.5552481942523436e-05, + "log_odds_chosen": 11.755807876586914, + "log_odds_ratio": -2.5303113943664357e-05, + "logits/chosen": -0.3360484838485718, + "logits/rejected": -0.27045273780822754, + "logps/chosen": -0.00012604852963704616, + "logps/rejected": -2.7367024421691895, + "loss": 0.8546, + "nll_loss": 0.21365918219089508, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2604852599906735e-05, + "rewards/margins": 0.2736576497554779, + "rewards/rejected": -0.2736702263355255, + "step": 10412 + }, + { + "epoch": 7.2012448132780085, + "grad_norm": 4.593533992767334, + "learning_rate": 1.5548639926233288e-05, + "log_odds_chosen": 11.521254539489746, + "log_odds_ratio": -3.387085234862752e-05, + "logits/chosen": -0.7450588345527649, + "logits/rejected": -0.8180086016654968, + "logps/chosen": -0.00029448719578795135, + "logps/rejected": -2.2137787342071533, + "loss": 0.434, + "nll_loss": 0.1085037887096405, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9448719942593016e-05, + "rewards/margins": 0.22134841978549957, + "rewards/rejected": -0.2213778793811798, + "step": 10413 + }, + { + "epoch": 7.201936376210235, + "grad_norm": 4.276266574859619, + "learning_rate": 1.554479790994314e-05, + "log_odds_chosen": 10.161270141601562, + "log_odds_ratio": -0.0001938036148203537, + "logits/chosen": -0.7717043161392212, + "logits/rejected": -0.7686448693275452, + "logps/chosen": -0.0001649046753300354, + "logps/rejected": -1.5598740577697754, + "loss": 0.6407, + "nll_loss": 0.16016465425491333, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6490466805407777e-05, + "rewards/margins": 0.15597090125083923, + "rewards/rejected": -0.15598741173744202, + "step": 10414 + }, + { + "epoch": 7.202627939142462, + "grad_norm": 3.8587746620178223, + "learning_rate": 1.554095589365299e-05, + "log_odds_chosen": 9.642004013061523, + "log_odds_ratio": -0.00021795628708787262, + "logits/chosen": 0.03109053522348404, + "logits/rejected": -0.1165243536233902, + "logps/chosen": -0.0006174801965244114, + "logps/rejected": -2.0108673572540283, + "loss": 0.4171, + "nll_loss": 0.10424423217773438, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.174801819724962e-05, + "rewards/margins": 0.20102497935295105, + "rewards/rejected": -0.20108672976493835, + "step": 10415 + }, + { + "epoch": 7.203319502074689, + "grad_norm": 3.5054314136505127, + "learning_rate": 1.5537113877362842e-05, + "log_odds_chosen": 10.877426147460938, + "log_odds_ratio": -3.7110381526872516e-05, + "logits/chosen": -0.1429760605096817, + "logits/rejected": -0.24752220511436462, + "logps/chosen": -0.0001301420124946162, + "logps/rejected": -1.6698437929153442, + "loss": 0.4203, + "nll_loss": 0.10507805645465851, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3014201613259502e-05, + "rewards/margins": 0.166971355676651, + "rewards/rejected": -0.16698436439037323, + "step": 10416 + }, + { + "epoch": 7.204011065006916, + "grad_norm": 7.614464282989502, + "learning_rate": 1.5533271861072694e-05, + "log_odds_chosen": 10.811773300170898, + "log_odds_ratio": -5.097378743812442e-05, + "logits/chosen": -0.8475204706192017, + "logits/rejected": -0.8343116641044617, + "logps/chosen": -8.274411811726168e-05, + "logps/rejected": -1.541874885559082, + "loss": 0.5566, + "nll_loss": 0.13913320004940033, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.274411811726168e-06, + "rewards/margins": 0.15417921543121338, + "rewards/rejected": -0.15418748557567596, + "step": 10417 + }, + { + "epoch": 7.204702627939143, + "grad_norm": 5.6126389503479, + "learning_rate": 1.5529429844782543e-05, + "log_odds_chosen": 11.205718994140625, + "log_odds_ratio": -2.211224455095362e-05, + "logits/chosen": -0.6076329350471497, + "logits/rejected": -0.6502000689506531, + "logps/chosen": -0.00023064023116603494, + "logps/rejected": -2.5637574195861816, + "loss": 0.6841, + "nll_loss": 0.17101337015628815, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3064025299390778e-05, + "rewards/margins": 0.2563526928424835, + "rewards/rejected": -0.2563757598400116, + "step": 10418 + }, + { + "epoch": 7.2053941908713695, + "grad_norm": 4.8593525886535645, + "learning_rate": 1.5525587828492392e-05, + "log_odds_chosen": 10.118298530578613, + "log_odds_ratio": -8.633873221697286e-05, + "logits/chosen": -0.492728590965271, + "logits/rejected": -0.41921842098236084, + "logps/chosen": -0.0003420605498831719, + "logps/rejected": -1.433894395828247, + "loss": 0.6214, + "nll_loss": 0.15532976388931274, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.420605935389176e-05, + "rewards/margins": 0.14335523545742035, + "rewards/rejected": -0.14338943362236023, + "step": 10419 + }, + { + "epoch": 7.206085753803596, + "grad_norm": 5.551113128662109, + "learning_rate": 1.5521745812202245e-05, + "log_odds_chosen": 10.857733726501465, + "log_odds_ratio": -4.6031469537410885e-05, + "logits/chosen": -0.7595330476760864, + "logits/rejected": -0.7728185653686523, + "logps/chosen": -0.0003562311176210642, + "logps/rejected": -2.313002109527588, + "loss": 0.7617, + "nll_loss": 0.19041162729263306, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5623110306914896e-05, + "rewards/margins": 0.23126459121704102, + "rewards/rejected": -0.2313002198934555, + "step": 10420 + }, + { + "epoch": 7.206777316735823, + "grad_norm": 3.6761534214019775, + "learning_rate": 1.5517903795912094e-05, + "log_odds_chosen": 11.04926872253418, + "log_odds_ratio": -0.0001277085393667221, + "logits/chosen": -0.43931564688682556, + "logits/rejected": -0.463930606842041, + "logps/chosen": -0.00013612773909699172, + "logps/rejected": -2.1665964126586914, + "loss": 0.3314, + "nll_loss": 0.08284401893615723, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3612774637294933e-05, + "rewards/margins": 0.21664604544639587, + "rewards/rejected": -0.21665966510772705, + "step": 10421 + }, + { + "epoch": 7.20746887966805, + "grad_norm": 6.475729465484619, + "learning_rate": 1.5514061779621946e-05, + "log_odds_chosen": 10.414512634277344, + "log_odds_ratio": -0.00010397224832559004, + "logits/chosen": -0.8556047081947327, + "logits/rejected": -0.8216189742088318, + "logps/chosen": -0.000805202464107424, + "logps/rejected": -2.3161447048187256, + "loss": 0.465, + "nll_loss": 0.11623618751764297, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.052025077631697e-05, + "rewards/margins": 0.231533944606781, + "rewards/rejected": -0.23161447048187256, + "step": 10422 + }, + { + "epoch": 7.208160442600277, + "grad_norm": 3.659374713897705, + "learning_rate": 1.55102197633318e-05, + "log_odds_chosen": 10.58485221862793, + "log_odds_ratio": -5.1850009185727686e-05, + "logits/chosen": -0.5555611252784729, + "logits/rejected": -0.6782649159431458, + "logps/chosen": -0.00020528820459730923, + "logps/rejected": -1.8091728687286377, + "loss": 0.5134, + "nll_loss": 0.12833338975906372, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0528819732135162e-05, + "rewards/margins": 0.18089677393436432, + "rewards/rejected": -0.18091730773448944, + "step": 10423 + }, + { + "epoch": 7.208852005532504, + "grad_norm": 5.045117378234863, + "learning_rate": 1.5506377747041648e-05, + "log_odds_chosen": 10.868586540222168, + "log_odds_ratio": -0.00010529413702897727, + "logits/chosen": -0.31450480222702026, + "logits/rejected": -0.3769742548465729, + "logps/chosen": -0.0010240180417895317, + "logps/rejected": -3.2506918907165527, + "loss": 0.5598, + "nll_loss": 0.1399279683828354, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001024018056341447, + "rewards/margins": 0.32496681809425354, + "rewards/rejected": -0.3250691592693329, + "step": 10424 + }, + { + "epoch": 7.20954356846473, + "grad_norm": 4.592897415161133, + "learning_rate": 1.55025357307515e-05, + "log_odds_chosen": 10.890989303588867, + "log_odds_ratio": -2.9873521270928904e-05, + "logits/chosen": -0.2958086133003235, + "logits/rejected": -0.3364552855491638, + "logps/chosen": -0.00012136924488004297, + "logps/rejected": -1.817213535308838, + "loss": 0.4928, + "nll_loss": 0.12318695336580276, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2136924851802178e-05, + "rewards/margins": 0.18170922994613647, + "rewards/rejected": -0.18172135949134827, + "step": 10425 + }, + { + "epoch": 7.210235131396957, + "grad_norm": 5.55319356918335, + "learning_rate": 1.5498693714461353e-05, + "log_odds_chosen": 10.696649551391602, + "log_odds_ratio": -0.00012903663446195424, + "logits/chosen": -0.5565292239189148, + "logits/rejected": -0.6371341347694397, + "logps/chosen": -0.0002119752753060311, + "logps/rejected": -2.162536382675171, + "loss": 0.3924, + "nll_loss": 0.09807487577199936, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1197525711613707e-05, + "rewards/margins": 0.21623243391513824, + "rewards/rejected": -0.2162536382675171, + "step": 10426 + }, + { + "epoch": 7.210926694329184, + "grad_norm": 5.8774495124816895, + "learning_rate": 1.5494851698171202e-05, + "log_odds_chosen": 11.74765396118164, + "log_odds_ratio": -1.3546211448556278e-05, + "logits/chosen": -0.4228131175041199, + "logits/rejected": -0.4262281656265259, + "logps/chosen": -0.0002533498336561024, + "logps/rejected": -3.003474235534668, + "loss": 0.5258, + "nll_loss": 0.13145418465137482, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.533498263801448e-05, + "rewards/margins": 0.3003220856189728, + "rewards/rejected": -0.3003474175930023, + "step": 10427 + }, + { + "epoch": 7.211618257261411, + "grad_norm": 7.68532133102417, + "learning_rate": 1.549100968188105e-05, + "log_odds_chosen": 10.173051834106445, + "log_odds_ratio": -8.042113040573895e-05, + "logits/chosen": -0.2979806661605835, + "logits/rejected": -0.38347023725509644, + "logps/chosen": -0.0008097730460576713, + "logps/rejected": -1.8612622022628784, + "loss": 0.4417, + "nll_loss": 0.11042722314596176, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.097730460576713e-05, + "rewards/margins": 0.18604524433612823, + "rewards/rejected": -0.1861262172460556, + "step": 10428 + }, + { + "epoch": 7.212309820193638, + "grad_norm": 6.552196502685547, + "learning_rate": 1.54871676655909e-05, + "log_odds_chosen": 11.38538932800293, + "log_odds_ratio": -0.0006448305794037879, + "logits/chosen": -0.4479593336582184, + "logits/rejected": -0.5196192264556885, + "logps/chosen": -0.0005789645947515965, + "logps/rejected": -2.6832289695739746, + "loss": 0.5465, + "nll_loss": 0.13654915988445282, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.789645729237236e-05, + "rewards/margins": 0.2682650089263916, + "rewards/rejected": -0.2683229148387909, + "step": 10429 + }, + { + "epoch": 7.213001383125865, + "grad_norm": 11.669718742370605, + "learning_rate": 1.5483325649300752e-05, + "log_odds_chosen": 10.768035888671875, + "log_odds_ratio": -9.076563583221287e-05, + "logits/chosen": -0.5250604748725891, + "logits/rejected": -0.5071054697036743, + "logps/chosen": -0.00046957319136708975, + "logps/rejected": -2.5636401176452637, + "loss": 0.4089, + "nll_loss": 0.10220938175916672, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.695731695392169e-05, + "rewards/margins": 0.2563170790672302, + "rewards/rejected": -0.25636401772499084, + "step": 10430 + }, + { + "epoch": 7.213692946058091, + "grad_norm": 3.9259867668151855, + "learning_rate": 1.5479483633010605e-05, + "log_odds_chosen": 9.859259605407715, + "log_odds_ratio": -0.00022302698926068842, + "logits/chosen": -0.8533114790916443, + "logits/rejected": -0.878596842288971, + "logps/chosen": -0.0004920702776871622, + "logps/rejected": -1.479432225227356, + "loss": 0.6729, + "nll_loss": 0.16821011900901794, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9207032134290785e-05, + "rewards/margins": 0.1478940099477768, + "rewards/rejected": -0.14794321358203888, + "step": 10431 + }, + { + "epoch": 7.214384508990318, + "grad_norm": 6.803659439086914, + "learning_rate": 1.5475641616720454e-05, + "log_odds_chosen": 10.224798202514648, + "log_odds_ratio": -0.00013334013056010008, + "logits/chosen": -0.16153977811336517, + "logits/rejected": 0.06601040065288544, + "logps/chosen": -0.0002593988610897213, + "logps/rejected": -1.8549165725708008, + "loss": 0.4982, + "nll_loss": 0.12453596293926239, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5939887564163655e-05, + "rewards/margins": 0.1854657083749771, + "rewards/rejected": -0.1854916512966156, + "step": 10432 + }, + { + "epoch": 7.215076071922545, + "grad_norm": 5.034233570098877, + "learning_rate": 1.5471799600430306e-05, + "log_odds_chosen": 9.249288558959961, + "log_odds_ratio": -0.00036163858021609485, + "logits/chosen": -0.1475447118282318, + "logits/rejected": -0.1759490966796875, + "logps/chosen": -0.0008161815349012613, + "logps/rejected": -1.8197362422943115, + "loss": 0.5334, + "nll_loss": 0.1333133578300476, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.161815640050918e-05, + "rewards/margins": 0.1818920075893402, + "rewards/rejected": -0.1819736361503601, + "step": 10433 + }, + { + "epoch": 7.215767634854772, + "grad_norm": 3.4301741123199463, + "learning_rate": 1.546795758414016e-05, + "log_odds_chosen": 11.221672058105469, + "log_odds_ratio": -3.974717401433736e-05, + "logits/chosen": -0.35873931646347046, + "logits/rejected": -0.3598625659942627, + "logps/chosen": -0.0005683759809471667, + "logps/rejected": -2.6728668212890625, + "loss": 0.4431, + "nll_loss": 0.11076383292675018, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.683759445673786e-05, + "rewards/margins": 0.2672298848628998, + "rewards/rejected": -0.2672867178916931, + "step": 10434 + }, + { + "epoch": 7.216459197786999, + "grad_norm": 12.01319408416748, + "learning_rate": 1.5464115567850008e-05, + "log_odds_chosen": 11.553258895874023, + "log_odds_ratio": -9.56603471422568e-05, + "logits/chosen": -0.7321569919586182, + "logits/rejected": -0.7380499243736267, + "logps/chosen": -8.728139073355123e-05, + "logps/rejected": -2.136077404022217, + "loss": 0.4899, + "nll_loss": 0.12245947122573853, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.728139619051944e-06, + "rewards/margins": 0.21359901130199432, + "rewards/rejected": -0.21360774338245392, + "step": 10435 + }, + { + "epoch": 7.217150760719226, + "grad_norm": 4.647044658660889, + "learning_rate": 1.546027355155986e-05, + "log_odds_chosen": 11.165498733520508, + "log_odds_ratio": -3.597480463213287e-05, + "logits/chosen": -0.29245832562446594, + "logits/rejected": -0.24228429794311523, + "logps/chosen": -0.00013531606236938387, + "logps/rejected": -2.185344696044922, + "loss": 0.4234, + "nll_loss": 0.10585423558950424, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3531605873140506e-05, + "rewards/margins": 0.2185209095478058, + "rewards/rejected": -0.2185344696044922, + "step": 10436 + }, + { + "epoch": 7.217842323651452, + "grad_norm": 8.116806030273438, + "learning_rate": 1.545643153526971e-05, + "log_odds_chosen": 11.70106029510498, + "log_odds_ratio": -2.180870433221571e-05, + "logits/chosen": -0.2894930839538574, + "logits/rejected": -0.29923558235168457, + "logps/chosen": -0.0001487794506829232, + "logps/rejected": -2.7362220287323, + "loss": 0.6568, + "nll_loss": 0.16420918703079224, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4877944522595499e-05, + "rewards/margins": 0.2736073136329651, + "rewards/rejected": -0.27362215518951416, + "step": 10437 + }, + { + "epoch": 7.218533886583679, + "grad_norm": 4.495815753936768, + "learning_rate": 1.545258951897956e-05, + "log_odds_chosen": 9.812202453613281, + "log_odds_ratio": -0.00014063966227695346, + "logits/chosen": -0.3846893310546875, + "logits/rejected": -0.39387011528015137, + "logps/chosen": -0.0003531720722094178, + "logps/rejected": -1.6779232025146484, + "loss": 0.4573, + "nll_loss": 0.11430048942565918, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.531720722094178e-05, + "rewards/margins": 0.16775700449943542, + "rewards/rejected": -0.16779232025146484, + "step": 10438 + }, + { + "epoch": 7.219225449515906, + "grad_norm": 4.9323272705078125, + "learning_rate": 1.544874750268941e-05, + "log_odds_chosen": 10.920612335205078, + "log_odds_ratio": -5.197681821300648e-05, + "logits/chosen": -0.4074556231498718, + "logits/rejected": -0.4035705327987671, + "logps/chosen": -0.00017860863590613008, + "logps/rejected": -1.8755065202713013, + "loss": 0.3981, + "nll_loss": 0.09951049834489822, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.786086431820877e-05, + "rewards/margins": 0.18753278255462646, + "rewards/rejected": -0.1875506490468979, + "step": 10439 + }, + { + "epoch": 7.219917012448133, + "grad_norm": 6.232209205627441, + "learning_rate": 1.5444905486399263e-05, + "log_odds_chosen": 11.688590049743652, + "log_odds_ratio": -3.540895340847783e-05, + "logits/chosen": -0.594054102897644, + "logits/rejected": -0.7286227345466614, + "logps/chosen": -7.078055932652205e-05, + "logps/rejected": -2.228940486907959, + "loss": 0.3724, + "nll_loss": 0.09309396147727966, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.078055659803795e-06, + "rewards/margins": 0.22288696467876434, + "rewards/rejected": -0.22289404273033142, + "step": 10440 + }, + { + "epoch": 7.22060857538036, + "grad_norm": 4.872360706329346, + "learning_rate": 1.5441063470109112e-05, + "log_odds_chosen": 10.40176010131836, + "log_odds_ratio": -0.00017088992171920836, + "logits/chosen": 0.11200764775276184, + "logits/rejected": 0.05680667608976364, + "logps/chosen": -0.00016381520254071802, + "logps/rejected": -1.9157912731170654, + "loss": 0.5962, + "nll_loss": 0.14902549982070923, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6381520254071802e-05, + "rewards/margins": 0.1915627419948578, + "rewards/rejected": -0.19157913327217102, + "step": 10441 + }, + { + "epoch": 7.2213001383125865, + "grad_norm": 4.740699291229248, + "learning_rate": 1.5437221453818965e-05, + "log_odds_chosen": 11.286370277404785, + "log_odds_ratio": -0.00013748435594607145, + "logits/chosen": -0.6109752058982849, + "logits/rejected": -0.6121599078178406, + "logps/chosen": -0.00045225946814753115, + "logps/rejected": -2.360914945602417, + "loss": 0.4395, + "nll_loss": 0.10985865443944931, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.522595554590225e-05, + "rewards/margins": 0.23604625463485718, + "rewards/rejected": -0.2360914945602417, + "step": 10442 + }, + { + "epoch": 7.221991701244813, + "grad_norm": 4.2852396965026855, + "learning_rate": 1.5433379437528817e-05, + "log_odds_chosen": 10.645816802978516, + "log_odds_ratio": -0.0001721422595437616, + "logits/chosen": -0.34702369570732117, + "logits/rejected": -0.3822890520095825, + "logps/chosen": -0.0005197233404032886, + "logps/rejected": -2.4108142852783203, + "loss": 0.4523, + "nll_loss": 0.11306465417146683, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1972336223116145e-05, + "rewards/margins": 0.24102944135665894, + "rewards/rejected": -0.24108143150806427, + "step": 10443 + }, + { + "epoch": 7.22268326417704, + "grad_norm": 14.121091842651367, + "learning_rate": 1.5429537421238666e-05, + "log_odds_chosen": 10.955095291137695, + "log_odds_ratio": -9.669965220382437e-05, + "logits/chosen": -0.3955569267272949, + "logits/rejected": -0.48575782775878906, + "logps/chosen": -0.00027399149257689714, + "logps/rejected": -2.102013111114502, + "loss": 0.5726, + "nll_loss": 0.14315034449100494, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7399149985285476e-05, + "rewards/margins": 0.21017390489578247, + "rewards/rejected": -0.21020129323005676, + "step": 10444 + }, + { + "epoch": 7.223374827109267, + "grad_norm": 5.549518585205078, + "learning_rate": 1.542569540494852e-05, + "log_odds_chosen": 10.822440147399902, + "log_odds_ratio": -0.00012936044367961586, + "logits/chosen": -0.2175404578447342, + "logits/rejected": -0.3416159152984619, + "logps/chosen": -0.000349888316122815, + "logps/rejected": -2.3769421577453613, + "loss": 0.6474, + "nll_loss": 0.16182616353034973, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.498883233987726e-05, + "rewards/margins": 0.2376592457294464, + "rewards/rejected": -0.23769423365592957, + "step": 10445 + }, + { + "epoch": 7.224066390041494, + "grad_norm": 5.736955642700195, + "learning_rate": 1.5421853388658368e-05, + "log_odds_chosen": 11.646930694580078, + "log_odds_ratio": -2.430060521874111e-05, + "logits/chosen": 0.10752647370100021, + "logits/rejected": -0.04080040007829666, + "logps/chosen": -0.00039383795228786767, + "logps/rejected": -2.7490639686584473, + "loss": 0.4966, + "nll_loss": 0.12413991987705231, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9383794501191005e-05, + "rewards/margins": 0.2748669981956482, + "rewards/rejected": -0.2749063968658447, + "step": 10446 + }, + { + "epoch": 7.224757952973721, + "grad_norm": 5.097479820251465, + "learning_rate": 1.5418011372368217e-05, + "log_odds_chosen": 10.945337295532227, + "log_odds_ratio": -4.302536399336532e-05, + "logits/chosen": 0.09947680681943893, + "logits/rejected": 0.12206941843032837, + "logps/chosen": -0.00015883771993685514, + "logps/rejected": -2.1310343742370605, + "loss": 0.5825, + "nll_loss": 0.14561805129051208, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5883773812674917e-05, + "rewards/margins": 0.2130875587463379, + "rewards/rejected": -0.21310344338417053, + "step": 10447 + }, + { + "epoch": 7.2254495159059475, + "grad_norm": 5.522941589355469, + "learning_rate": 1.541416935607807e-05, + "log_odds_chosen": 10.210217475891113, + "log_odds_ratio": -0.0001231904316227883, + "logits/chosen": -0.11590823531150818, + "logits/rejected": -0.18832921981811523, + "logps/chosen": -0.0001725100155454129, + "logps/rejected": -1.6004002094268799, + "loss": 0.638, + "nll_loss": 0.1594873070716858, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7251002645934932e-05, + "rewards/margins": 0.1600227653980255, + "rewards/rejected": -0.160040020942688, + "step": 10448 + }, + { + "epoch": 7.226141078838174, + "grad_norm": 4.131917476654053, + "learning_rate": 1.5410327339787922e-05, + "log_odds_chosen": 10.818761825561523, + "log_odds_ratio": -6.041261804057285e-05, + "logits/chosen": 0.06681928038597107, + "logits/rejected": 0.07110019773244858, + "logps/chosen": -0.0004300489672459662, + "logps/rejected": -2.5556554794311523, + "loss": 1.0391, + "nll_loss": 0.2597717344760895, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.300489672459662e-05, + "rewards/margins": 0.25552254915237427, + "rewards/rejected": -0.2555655539035797, + "step": 10449 + }, + { + "epoch": 7.226832641770401, + "grad_norm": 6.010919094085693, + "learning_rate": 1.540648532349777e-05, + "log_odds_chosen": 10.075103759765625, + "log_odds_ratio": -0.0001629930775379762, + "logits/chosen": -0.41976091265678406, + "logits/rejected": -0.37371984124183655, + "logps/chosen": -0.0004160638782195747, + "logps/rejected": -1.5253503322601318, + "loss": 0.4134, + "nll_loss": 0.1033235490322113, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.160638491157442e-05, + "rewards/margins": 0.152493417263031, + "rewards/rejected": -0.15253503620624542, + "step": 10450 + }, + { + "epoch": 7.227524204702628, + "grad_norm": 5.272099494934082, + "learning_rate": 1.5402643307207623e-05, + "log_odds_chosen": 11.40852165222168, + "log_odds_ratio": -2.994909846165683e-05, + "logits/chosen": -0.2771112322807312, + "logits/rejected": -0.3340737819671631, + "logps/chosen": -0.00013135781046003103, + "logps/rejected": -1.9381992816925049, + "loss": 0.3929, + "nll_loss": 0.09821852296590805, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3135780136508401e-05, + "rewards/margins": 0.19380679726600647, + "rewards/rejected": -0.19381992518901825, + "step": 10451 + }, + { + "epoch": 7.228215767634855, + "grad_norm": 4.739782333374023, + "learning_rate": 1.5398801290917476e-05, + "log_odds_chosen": 10.67758560180664, + "log_odds_ratio": -8.847292338032275e-05, + "logits/chosen": -0.15526941418647766, + "logits/rejected": -0.2297494113445282, + "logps/chosen": -0.0002579109859652817, + "logps/rejected": -2.387845039367676, + "loss": 0.5134, + "nll_loss": 0.12834352254867554, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5791099687921815e-05, + "rewards/margins": 0.23875871300697327, + "rewards/rejected": -0.23878450691699982, + "step": 10452 + }, + { + "epoch": 7.228907330567082, + "grad_norm": 7.194368839263916, + "learning_rate": 1.5394959274627325e-05, + "log_odds_chosen": 10.149903297424316, + "log_odds_ratio": -0.00012134911230532452, + "logits/chosen": -0.2545188367366791, + "logits/rejected": -0.28976038098335266, + "logps/chosen": -0.0004408220120240003, + "logps/rejected": -2.34104585647583, + "loss": 0.3147, + "nll_loss": 0.0786544531583786, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4082204112783074e-05, + "rewards/margins": 0.23406049609184265, + "rewards/rejected": -0.23410458862781525, + "step": 10453 + }, + { + "epoch": 7.2295988934993085, + "grad_norm": 4.988805294036865, + "learning_rate": 1.5391117258337177e-05, + "log_odds_chosen": 11.244706153869629, + "log_odds_ratio": -6.236710760276765e-05, + "logits/chosen": -0.35564902424812317, + "logits/rejected": -0.4153813123703003, + "logps/chosen": -9.814107761485502e-05, + "logps/rejected": -1.9542515277862549, + "loss": 0.3629, + "nll_loss": 0.09071110188961029, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.814107215788681e-06, + "rewards/margins": 0.19541534781455994, + "rewards/rejected": -0.1954251527786255, + "step": 10454 + }, + { + "epoch": 7.230290456431535, + "grad_norm": 4.210373878479004, + "learning_rate": 1.5387275242047026e-05, + "log_odds_chosen": 10.481241226196289, + "log_odds_ratio": -5.844100087415427e-05, + "logits/chosen": -0.6087998151779175, + "logits/rejected": -0.8502819538116455, + "logps/chosen": -0.0001757505815476179, + "logps/rejected": -1.623529076576233, + "loss": 0.5721, + "nll_loss": 0.14301741123199463, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7575059246155433e-05, + "rewards/margins": 0.1623353511095047, + "rewards/rejected": -0.16235291957855225, + "step": 10455 + }, + { + "epoch": 7.230982019363762, + "grad_norm": 2.9738030433654785, + "learning_rate": 1.5383433225756875e-05, + "log_odds_chosen": 11.003986358642578, + "log_odds_ratio": -3.15298602799885e-05, + "logits/chosen": -0.4080902338027954, + "logits/rejected": -0.5776853561401367, + "logps/chosen": -0.000146781763760373, + "logps/rejected": -1.7721868753433228, + "loss": 0.3357, + "nll_loss": 0.08393213152885437, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.467817673983518e-05, + "rewards/margins": 0.17720401287078857, + "rewards/rejected": -0.17721869051456451, + "step": 10456 + }, + { + "epoch": 7.231673582295989, + "grad_norm": 6.296378135681152, + "learning_rate": 1.5379591209466728e-05, + "log_odds_chosen": 10.507634162902832, + "log_odds_ratio": -9.133493585977703e-05, + "logits/chosen": -0.7208746671676636, + "logits/rejected": -0.7388486862182617, + "logps/chosen": -0.00014481779362540692, + "logps/rejected": -1.6705474853515625, + "loss": 0.4038, + "nll_loss": 0.10094369947910309, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4481780453934334e-05, + "rewards/margins": 0.16704027354717255, + "rewards/rejected": -0.16705477237701416, + "step": 10457 + }, + { + "epoch": 7.232365145228216, + "grad_norm": 5.793529033660889, + "learning_rate": 1.537574919317658e-05, + "log_odds_chosen": 11.204610824584961, + "log_odds_ratio": -0.0009517016005702317, + "logits/chosen": -0.3365629315376282, + "logits/rejected": -0.429565966129303, + "logps/chosen": -0.0028755757957696915, + "logps/rejected": -2.9955363273620605, + "loss": 0.4887, + "nll_loss": 0.1220720037817955, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00028755763196386397, + "rewards/margins": 0.29926609992980957, + "rewards/rejected": -0.29955363273620605, + "step": 10458 + }, + { + "epoch": 7.233056708160443, + "grad_norm": 5.168619155883789, + "learning_rate": 1.537190717688643e-05, + "log_odds_chosen": 9.76969051361084, + "log_odds_ratio": -0.00230429507791996, + "logits/chosen": -0.21838542819023132, + "logits/rejected": -0.32139796018600464, + "logps/chosen": -0.001311866450123489, + "logps/rejected": -1.8827883005142212, + "loss": 1.0604, + "nll_loss": 0.2648812532424927, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013118665083311498, + "rewards/margins": 0.1881476491689682, + "rewards/rejected": -0.18827882409095764, + "step": 10459 + }, + { + "epoch": 7.2337482710926695, + "grad_norm": 13.341404914855957, + "learning_rate": 1.536806516059628e-05, + "log_odds_chosen": 10.69163703918457, + "log_odds_ratio": -4.546945638139732e-05, + "logits/chosen": -0.3286779820919037, + "logits/rejected": -0.35568225383758545, + "logps/chosen": -0.0001345030905213207, + "logps/rejected": -1.6156198978424072, + "loss": 0.5042, + "nll_loss": 0.1260339319705963, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3450309779727831e-05, + "rewards/margins": 0.16154852509498596, + "rewards/rejected": -0.1615619957447052, + "step": 10460 + }, + { + "epoch": 7.234439834024896, + "grad_norm": 4.455132484436035, + "learning_rate": 1.5364223144306134e-05, + "log_odds_chosen": 11.63259506225586, + "log_odds_ratio": -2.2229838577914052e-05, + "logits/chosen": -0.44185471534729004, + "logits/rejected": -0.5651582479476929, + "logps/chosen": -0.0002310091513209045, + "logps/rejected": -2.6355438232421875, + "loss": 0.4199, + "nll_loss": 0.10497577488422394, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.310091622348409e-05, + "rewards/margins": 0.26353126764297485, + "rewards/rejected": -0.2635543942451477, + "step": 10461 + }, + { + "epoch": 7.235131396957123, + "grad_norm": 16.563955307006836, + "learning_rate": 1.5360381128015983e-05, + "log_odds_chosen": 11.065947532653809, + "log_odds_ratio": -6.139009929029271e-05, + "logits/chosen": -0.4617714583873749, + "logits/rejected": -0.40665292739868164, + "logps/chosen": -0.00022873807756695896, + "logps/rejected": -2.4485700130462646, + "loss": 0.4601, + "nll_loss": 0.11501865088939667, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.287380993948318e-05, + "rewards/margins": 0.24483412504196167, + "rewards/rejected": -0.24485701322555542, + "step": 10462 + }, + { + "epoch": 7.23582295988935, + "grad_norm": 3.3635497093200684, + "learning_rate": 1.5356539111725836e-05, + "log_odds_chosen": 11.090447425842285, + "log_odds_ratio": -0.00017930346075445414, + "logits/chosen": -0.838020920753479, + "logits/rejected": -0.7805964946746826, + "logps/chosen": -0.00020828915876336396, + "logps/rejected": -2.308910846710205, + "loss": 0.4053, + "nll_loss": 0.10130521655082703, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0828916603932157e-05, + "rewards/margins": 0.23087026178836823, + "rewards/rejected": -0.23089109361171722, + "step": 10463 + }, + { + "epoch": 7.236514522821577, + "grad_norm": 6.092136383056641, + "learning_rate": 1.5352697095435685e-05, + "log_odds_chosen": 11.38381576538086, + "log_odds_ratio": -4.869890835834667e-05, + "logits/chosen": -0.4995681941509247, + "logits/rejected": -0.4679161310195923, + "logps/chosen": -0.00020764560031238943, + "logps/rejected": -2.872692584991455, + "loss": 0.4099, + "nll_loss": 0.10247980803251266, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0764560758834705e-05, + "rewards/margins": 0.28724849224090576, + "rewards/rejected": -0.28726926445961, + "step": 10464 + }, + { + "epoch": 7.237206085753804, + "grad_norm": 4.699680805206299, + "learning_rate": 1.5348855079145537e-05, + "log_odds_chosen": 11.292537689208984, + "log_odds_ratio": -1.5159775102802087e-05, + "logits/chosen": -0.16589859127998352, + "logits/rejected": -0.3281225562095642, + "logps/chosen": -0.00021162032498978078, + "logps/rejected": -2.7325103282928467, + "loss": 0.6188, + "nll_loss": 0.15469348430633545, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.116203359037172e-05, + "rewards/margins": 0.2732298970222473, + "rewards/rejected": -0.2732510566711426, + "step": 10465 + }, + { + "epoch": 7.2378976486860305, + "grad_norm": 7.134780406951904, + "learning_rate": 1.5345013062855386e-05, + "log_odds_chosen": 11.710221290588379, + "log_odds_ratio": -1.791205068002455e-05, + "logits/chosen": -0.27478334307670593, + "logits/rejected": -0.3175143003463745, + "logps/chosen": -0.00016071861318778247, + "logps/rejected": -2.8377864360809326, + "loss": 0.436, + "nll_loss": 0.10900319367647171, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6071860954980366e-05, + "rewards/margins": 0.2837625741958618, + "rewards/rejected": -0.2837786376476288, + "step": 10466 + }, + { + "epoch": 7.238589211618257, + "grad_norm": 3.875941753387451, + "learning_rate": 1.534117104656524e-05, + "log_odds_chosen": 11.938088417053223, + "log_odds_ratio": -1.886241625470575e-05, + "logits/chosen": -0.29315173625946045, + "logits/rejected": -0.31250953674316406, + "logps/chosen": -0.0001133958576247096, + "logps/rejected": -2.6138150691986084, + "loss": 0.4277, + "nll_loss": 0.10692528635263443, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.133958539867308e-05, + "rewards/margins": 0.2613701820373535, + "rewards/rejected": -0.26138150691986084, + "step": 10467 + }, + { + "epoch": 7.239280774550484, + "grad_norm": 4.762474060058594, + "learning_rate": 1.5337329030275088e-05, + "log_odds_chosen": 11.086641311645508, + "log_odds_ratio": -3.06558795273304e-05, + "logits/chosen": -0.21213717758655548, + "logits/rejected": -0.4254568815231323, + "logps/chosen": -0.00031671352917328477, + "logps/rejected": -2.2601804733276367, + "loss": 0.431, + "nll_loss": 0.10773838311433792, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1671352189732715e-05, + "rewards/margins": 0.22598636150360107, + "rewards/rejected": -0.2260180562734604, + "step": 10468 + }, + { + "epoch": 7.239972337482711, + "grad_norm": 7.895349979400635, + "learning_rate": 1.533348701398494e-05, + "log_odds_chosen": 10.315384864807129, + "log_odds_ratio": -0.002297512488439679, + "logits/chosen": -0.5239083766937256, + "logits/rejected": -0.5624470710754395, + "logps/chosen": -0.0013429216342046857, + "logps/rejected": -2.5408456325531006, + "loss": 0.5067, + "nll_loss": 0.12644195556640625, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013429216051008552, + "rewards/margins": 0.253950297832489, + "rewards/rejected": -0.25408458709716797, + "step": 10469 + }, + { + "epoch": 7.240663900414938, + "grad_norm": 5.463350296020508, + "learning_rate": 1.5329644997694793e-05, + "log_odds_chosen": 10.608156204223633, + "log_odds_ratio": -0.00014082054258324206, + "logits/chosen": -0.29368531703948975, + "logits/rejected": -0.4282805919647217, + "logps/chosen": -0.0004581251123454422, + "logps/rejected": -2.505800724029541, + "loss": 0.5768, + "nll_loss": 0.14418385922908783, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5812510506948456e-05, + "rewards/margins": 0.2505342662334442, + "rewards/rejected": -0.2505800724029541, + "step": 10470 + }, + { + "epoch": 7.241355463347165, + "grad_norm": 9.05333137512207, + "learning_rate": 1.532580298140464e-05, + "log_odds_chosen": 10.077249526977539, + "log_odds_ratio": -0.00021240493515506387, + "logits/chosen": -0.28883975744247437, + "logits/rejected": -0.37934327125549316, + "logps/chosen": -0.0005463613197207451, + "logps/rejected": -2.040273666381836, + "loss": 0.8706, + "nll_loss": 0.2176218032836914, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.463613342726603e-05, + "rewards/margins": 0.203972727060318, + "rewards/rejected": -0.20402735471725464, + "step": 10471 + }, + { + "epoch": 7.2420470262793915, + "grad_norm": 6.9070143699646, + "learning_rate": 1.5321960965114494e-05, + "log_odds_chosen": 11.244699478149414, + "log_odds_ratio": -2.8428865334717557e-05, + "logits/chosen": -0.5642852783203125, + "logits/rejected": -0.6891046166419983, + "logps/chosen": -0.0001826708175940439, + "logps/rejected": -2.136019706726074, + "loss": 0.5984, + "nll_loss": 0.14959505200386047, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.826708103180863e-05, + "rewards/margins": 0.21358370780944824, + "rewards/rejected": -0.2136019766330719, + "step": 10472 + }, + { + "epoch": 7.242738589211618, + "grad_norm": 4.121352672576904, + "learning_rate": 1.5318118948824346e-05, + "log_odds_chosen": 11.082643508911133, + "log_odds_ratio": -3.54972762579564e-05, + "logits/chosen": -0.3325987458229065, + "logits/rejected": -0.3006635904312134, + "logps/chosen": -0.00017654309340287, + "logps/rejected": -2.044565200805664, + "loss": 0.5264, + "nll_loss": 0.131588876247406, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.765430897648912e-05, + "rewards/margins": 0.20443886518478394, + "rewards/rejected": -0.20445653796195984, + "step": 10473 + }, + { + "epoch": 7.243430152143845, + "grad_norm": 4.520073890686035, + "learning_rate": 1.5314276932534196e-05, + "log_odds_chosen": 10.439888000488281, + "log_odds_ratio": -0.0002236421569250524, + "logits/chosen": 0.012142367660999298, + "logits/rejected": -0.011481313034892082, + "logps/chosen": -0.0012220973148941994, + "logps/rejected": -2.2247068881988525, + "loss": 0.5939, + "nll_loss": 0.1484624147415161, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012220973439980298, + "rewards/margins": 0.22234848141670227, + "rewards/rejected": -0.2224707007408142, + "step": 10474 + }, + { + "epoch": 7.244121715076072, + "grad_norm": 6.4784932136535645, + "learning_rate": 1.5310434916244045e-05, + "log_odds_chosen": 9.859674453735352, + "log_odds_ratio": -0.00020394177408888936, + "logits/chosen": -0.17349502444267273, + "logits/rejected": -0.2399659901857376, + "logps/chosen": -0.000377682619728148, + "logps/rejected": -1.8223941326141357, + "loss": 0.4168, + "nll_loss": 0.10418784618377686, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.776826270041056e-05, + "rewards/margins": 0.18220163881778717, + "rewards/rejected": -0.18223941326141357, + "step": 10475 + }, + { + "epoch": 7.244813278008299, + "grad_norm": 19.220417022705078, + "learning_rate": 1.5306592899953897e-05, + "log_odds_chosen": 11.271221160888672, + "log_odds_ratio": -3.5153836506651714e-05, + "logits/chosen": 0.07185645401477814, + "logits/rejected": 0.009163126349449158, + "logps/chosen": -0.0005185899208299816, + "logps/rejected": -2.475466012954712, + "loss": 0.6362, + "nll_loss": 0.15904828906059265, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1858991355402395e-05, + "rewards/margins": 0.24749475717544556, + "rewards/rejected": -0.24754659831523895, + "step": 10476 + }, + { + "epoch": 7.245504840940526, + "grad_norm": 6.962161540985107, + "learning_rate": 1.5302750883663746e-05, + "log_odds_chosen": 10.97325611114502, + "log_odds_ratio": -6.409084744518623e-05, + "logits/chosen": -0.58690345287323, + "logits/rejected": -0.6452951431274414, + "logps/chosen": -0.0001535381597932428, + "logps/rejected": -1.999860167503357, + "loss": 0.7868, + "nll_loss": 0.1966904252767563, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5353816706920043e-05, + "rewards/margins": 0.19997066259384155, + "rewards/rejected": -0.1999860256910324, + "step": 10477 + }, + { + "epoch": 7.246196403872752, + "grad_norm": 11.236804962158203, + "learning_rate": 1.52989088673736e-05, + "log_odds_chosen": 10.579336166381836, + "log_odds_ratio": -9.932727698469535e-05, + "logits/chosen": -0.5695323348045349, + "logits/rejected": -0.42794185876846313, + "logps/chosen": -0.0003462682943791151, + "logps/rejected": -2.0027308464050293, + "loss": 0.503, + "nll_loss": 0.12573137879371643, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.462682798271999e-05, + "rewards/margins": 0.20023846626281738, + "rewards/rejected": -0.20027309656143188, + "step": 10478 + }, + { + "epoch": 7.246887966804979, + "grad_norm": 4.607297897338867, + "learning_rate": 1.529506685108345e-05, + "log_odds_chosen": 11.616013526916504, + "log_odds_ratio": -3.5801112971967086e-05, + "logits/chosen": -0.08392917364835739, + "logits/rejected": -0.20223979651927948, + "logps/chosen": -0.0002970542118418962, + "logps/rejected": -2.897155284881592, + "loss": 0.8331, + "nll_loss": 0.2082727998495102, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9705421184189618e-05, + "rewards/margins": 0.28968581557273865, + "rewards/rejected": -0.2897155284881592, + "step": 10479 + }, + { + "epoch": 7.247579529737206, + "grad_norm": 8.609513282775879, + "learning_rate": 1.52912248347933e-05, + "log_odds_chosen": 11.287090301513672, + "log_odds_ratio": -0.0002003059780690819, + "logits/chosen": -0.4144344925880432, + "logits/rejected": -0.5291805267333984, + "logps/chosen": -0.0007323965546675026, + "logps/rejected": -2.5458168983459473, + "loss": 1.0414, + "nll_loss": 0.2603290379047394, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.323966565309092e-05, + "rewards/margins": 0.25450846552848816, + "rewards/rejected": -0.2545816898345947, + "step": 10480 + }, + { + "epoch": 7.248271092669433, + "grad_norm": 4.7390265464782715, + "learning_rate": 1.5287382818503152e-05, + "log_odds_chosen": 12.203584671020508, + "log_odds_ratio": -5.612453242065385e-05, + "logits/chosen": 0.10204656422138214, + "logits/rejected": 0.06456445157527924, + "logps/chosen": -0.00033078165142796934, + "logps/rejected": -3.4384679794311523, + "loss": 0.5263, + "nll_loss": 0.13156820833683014, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3078165870392695e-05, + "rewards/margins": 0.3438137173652649, + "rewards/rejected": -0.34384679794311523, + "step": 10481 + }, + { + "epoch": 7.24896265560166, + "grad_norm": 5.3882293701171875, + "learning_rate": 1.5283540802213005e-05, + "log_odds_chosen": 11.932504653930664, + "log_odds_ratio": -9.521067113382742e-06, + "logits/chosen": 0.12056075036525726, + "logits/rejected": 0.016441553831100464, + "logps/chosen": -0.00011636118142632768, + "logps/rejected": -2.58870267868042, + "loss": 0.5869, + "nll_loss": 0.14673256874084473, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1636117960733827e-05, + "rewards/margins": 0.2588586211204529, + "rewards/rejected": -0.2588702440261841, + "step": 10482 + }, + { + "epoch": 7.249654218533887, + "grad_norm": 5.149341583251953, + "learning_rate": 1.5279698785922854e-05, + "log_odds_chosen": 11.435346603393555, + "log_odds_ratio": -1.5145962606766261e-05, + "logits/chosen": -0.11249607056379318, + "logits/rejected": -0.2753619849681854, + "logps/chosen": -0.00019512552535161376, + "logps/rejected": -2.530555248260498, + "loss": 0.5121, + "nll_loss": 0.1280229389667511, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.95125539903529e-05, + "rewards/margins": 0.2530360221862793, + "rewards/rejected": -0.25305551290512085, + "step": 10483 + }, + { + "epoch": 7.250345781466113, + "grad_norm": 4.259470462799072, + "learning_rate": 1.5275856769632703e-05, + "log_odds_chosen": 11.782159805297852, + "log_odds_ratio": -1.5393632565974258e-05, + "logits/chosen": -0.25803565979003906, + "logits/rejected": -0.2909121811389923, + "logps/chosen": -0.00011526994785526767, + "logps/rejected": -2.5579113960266113, + "loss": 0.563, + "nll_loss": 0.14076074957847595, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1526994057931006e-05, + "rewards/margins": 0.2557796239852905, + "rewards/rejected": -0.25579115748405457, + "step": 10484 + }, + { + "epoch": 7.25103734439834, + "grad_norm": 4.723344802856445, + "learning_rate": 1.5272014753342555e-05, + "log_odds_chosen": 12.08088493347168, + "log_odds_ratio": -1.5762172552058473e-05, + "logits/chosen": -0.12385115772485733, + "logits/rejected": -0.1922972947359085, + "logps/chosen": -0.00016547783161513507, + "logps/rejected": -3.349936008453369, + "loss": 0.4411, + "nll_loss": 0.11026619374752045, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6547783161513507e-05, + "rewards/margins": 0.33497709035873413, + "rewards/rejected": -0.3349936306476593, + "step": 10485 + }, + { + "epoch": 7.251728907330567, + "grad_norm": 5.340590000152588, + "learning_rate": 1.5268172737052405e-05, + "log_odds_chosen": 11.269617080688477, + "log_odds_ratio": -3.2030522561399266e-05, + "logits/chosen": -0.3441758155822754, + "logits/rejected": -0.45444193482398987, + "logps/chosen": -8.907563460525125e-05, + "logps/rejected": -1.851801872253418, + "loss": 0.5742, + "nll_loss": 0.14355482161045074, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.907563824323006e-06, + "rewards/margins": 0.18517126142978668, + "rewards/rejected": -0.1851801872253418, + "step": 10486 + }, + { + "epoch": 7.252420470262794, + "grad_norm": 3.9872963428497314, + "learning_rate": 1.5264330720762257e-05, + "log_odds_chosen": 10.931245803833008, + "log_odds_ratio": -3.074966662097722e-05, + "logits/chosen": -0.290107786655426, + "logits/rejected": -0.27709782123565674, + "logps/chosen": -0.00021908615599386394, + "logps/rejected": -2.241764545440674, + "loss": 0.5079, + "nll_loss": 0.12695977091789246, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1908617782173678e-05, + "rewards/margins": 0.22415456175804138, + "rewards/rejected": -0.22417645156383514, + "step": 10487 + }, + { + "epoch": 7.253112033195021, + "grad_norm": 5.664169788360596, + "learning_rate": 1.526048870447211e-05, + "log_odds_chosen": 10.62841510772705, + "log_odds_ratio": -0.00015170712140388787, + "logits/chosen": -0.543666422367096, + "logits/rejected": -0.5873074531555176, + "logps/chosen": -0.0002989826025441289, + "logps/rejected": -1.8184638023376465, + "loss": 0.5249, + "nll_loss": 0.13120988011360168, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9898259526817128e-05, + "rewards/margins": 0.1818164885044098, + "rewards/rejected": -0.18184638023376465, + "step": 10488 + }, + { + "epoch": 7.253803596127248, + "grad_norm": 9.838672637939453, + "learning_rate": 1.5256646688181958e-05, + "log_odds_chosen": 10.980676651000977, + "log_odds_ratio": -5.6112156016752124e-05, + "logits/chosen": -0.6420059204101562, + "logits/rejected": -0.6900030374526978, + "logps/chosen": -0.00023247089120559394, + "logps/rejected": -2.4337525367736816, + "loss": 1.0151, + "nll_loss": 0.2537762522697449, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.324709203094244e-05, + "rewards/margins": 0.243352010846138, + "rewards/rejected": -0.2433752715587616, + "step": 10489 + }, + { + "epoch": 7.254495159059474, + "grad_norm": 5.095361709594727, + "learning_rate": 1.525280467189181e-05, + "log_odds_chosen": 10.622602462768555, + "log_odds_ratio": -6.363199645420536e-05, + "logits/chosen": -0.27274662256240845, + "logits/rejected": -0.2679747939109802, + "logps/chosen": -0.00013507320545613766, + "logps/rejected": -1.8644970655441284, + "loss": 0.4659, + "nll_loss": 0.1164785847067833, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3507320545613766e-05, + "rewards/margins": 0.18643620610237122, + "rewards/rejected": -0.18644970655441284, + "step": 10490 + }, + { + "epoch": 7.255186721991701, + "grad_norm": 5.322432041168213, + "learning_rate": 1.5248962655601662e-05, + "log_odds_chosen": 11.213184356689453, + "log_odds_ratio": -6.134158320492133e-05, + "logits/chosen": -0.17560338973999023, + "logits/rejected": -0.2747710943222046, + "logps/chosen": -0.00028960593044757843, + "logps/rejected": -2.5317702293395996, + "loss": 0.67, + "nll_loss": 0.167500302195549, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8960590498172678e-05, + "rewards/margins": 0.25314807891845703, + "rewards/rejected": -0.25317704677581787, + "step": 10491 + }, + { + "epoch": 7.255878284923928, + "grad_norm": 3.760510206222534, + "learning_rate": 1.524512063931151e-05, + "log_odds_chosen": 10.296430587768555, + "log_odds_ratio": -0.0005208022193983197, + "logits/chosen": -0.9254645109176636, + "logits/rejected": -0.9133840799331665, + "logps/chosen": -0.00017214936087839305, + "logps/rejected": -1.6661992073059082, + "loss": 0.4374, + "nll_loss": 0.10929840058088303, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7214935724041425e-05, + "rewards/margins": 0.1666027009487152, + "rewards/rejected": -0.1666199266910553, + "step": 10492 + }, + { + "epoch": 7.256569847856155, + "grad_norm": 10.162874221801758, + "learning_rate": 1.5241278623021363e-05, + "log_odds_chosen": 10.922866821289062, + "log_odds_ratio": -0.0003495032142382115, + "logits/chosen": -0.776273250579834, + "logits/rejected": -0.8349599838256836, + "logps/chosen": -0.00038618946564383805, + "logps/rejected": -2.118861675262451, + "loss": 0.5614, + "nll_loss": 0.14032500982284546, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.861894947476685e-05, + "rewards/margins": 0.21184757351875305, + "rewards/rejected": -0.21188616752624512, + "step": 10493 + }, + { + "epoch": 7.257261410788382, + "grad_norm": 4.913707256317139, + "learning_rate": 1.5237436606731214e-05, + "log_odds_chosen": 10.938773155212402, + "log_odds_ratio": -5.736919047194533e-05, + "logits/chosen": -0.5350974798202515, + "logits/rejected": -0.47295793890953064, + "logps/chosen": -0.00023437832714989781, + "logps/rejected": -2.0633440017700195, + "loss": 0.4701, + "nll_loss": 0.1175263375043869, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3437833078787662e-05, + "rewards/margins": 0.20631097257137299, + "rewards/rejected": -0.2063344120979309, + "step": 10494 + }, + { + "epoch": 7.2579529737206085, + "grad_norm": 4.5175347328186035, + "learning_rate": 1.5233594590441063e-05, + "log_odds_chosen": 10.719084739685059, + "log_odds_ratio": -8.76936610438861e-05, + "logits/chosen": -0.2563233971595764, + "logits/rejected": -0.3417225778102875, + "logps/chosen": -0.0004405237559694797, + "logps/rejected": -2.187819242477417, + "loss": 0.5478, + "nll_loss": 0.13694101572036743, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.405237268656492e-05, + "rewards/margins": 0.2187378704547882, + "rewards/rejected": -0.21878191828727722, + "step": 10495 + }, + { + "epoch": 7.258644536652835, + "grad_norm": 5.704215049743652, + "learning_rate": 1.5229752574150915e-05, + "log_odds_chosen": 11.154399871826172, + "log_odds_ratio": -2.9882006856496446e-05, + "logits/chosen": -0.5726063251495361, + "logits/rejected": -0.5620173811912537, + "logps/chosen": -0.0001177690428448841, + "logps/rejected": -2.1058263778686523, + "loss": 0.5194, + "nll_loss": 0.12984028458595276, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1776905012084171e-05, + "rewards/margins": 0.21057087182998657, + "rewards/rejected": -0.2105826437473297, + "step": 10496 + }, + { + "epoch": 7.259336099585062, + "grad_norm": 3.9949491024017334, + "learning_rate": 1.5225910557860764e-05, + "log_odds_chosen": 11.0726318359375, + "log_odds_ratio": -2.9816317692166194e-05, + "logits/chosen": -0.23565047979354858, + "logits/rejected": -0.2983737885951996, + "logps/chosen": -0.00022566900588572025, + "logps/rejected": -2.5271129608154297, + "loss": 0.509, + "nll_loss": 0.12725451588630676, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2566900952369906e-05, + "rewards/margins": 0.2526887357234955, + "rewards/rejected": -0.25271129608154297, + "step": 10497 + }, + { + "epoch": 7.260027662517289, + "grad_norm": 5.695704460144043, + "learning_rate": 1.5222068541570617e-05, + "log_odds_chosen": 10.686333656311035, + "log_odds_ratio": -0.0001333652762696147, + "logits/chosen": -0.23131543397903442, + "logits/rejected": -0.2706787884235382, + "logps/chosen": -0.00022404955234378576, + "logps/rejected": -1.6549627780914307, + "loss": 0.5849, + "nll_loss": 0.14620620012283325, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2404954506782815e-05, + "rewards/margins": 0.16547387838363647, + "rewards/rejected": -0.16549627482891083, + "step": 10498 + }, + { + "epoch": 7.260719225449516, + "grad_norm": 4.58342981338501, + "learning_rate": 1.5218226525280468e-05, + "log_odds_chosen": 11.475317001342773, + "log_odds_ratio": -1.632480234547984e-05, + "logits/chosen": -0.41076338291168213, + "logits/rejected": -0.3890606760978699, + "logps/chosen": -8.86881971382536e-05, + "logps/rejected": -2.0317766666412354, + "loss": 0.3491, + "nll_loss": 0.08727437257766724, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.868820259522181e-06, + "rewards/margins": 0.2031688094139099, + "rewards/rejected": -0.20317767560482025, + "step": 10499 + }, + { + "epoch": 7.261410788381743, + "grad_norm": 6.624739170074463, + "learning_rate": 1.5214384508990317e-05, + "log_odds_chosen": 11.000941276550293, + "log_odds_ratio": -0.00010065599053632468, + "logits/chosen": -0.5458360314369202, + "logits/rejected": -0.49721595644950867, + "logps/chosen": -0.00041760189924389124, + "logps/rejected": -2.0315442085266113, + "loss": 0.345, + "nll_loss": 0.08623616397380829, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.176019137958065e-05, + "rewards/margins": 0.20311269164085388, + "rewards/rejected": -0.20315444469451904, + "step": 10500 + }, + { + "epoch": 7.2621023513139695, + "grad_norm": 3.732039451599121, + "learning_rate": 1.521054249270017e-05, + "log_odds_chosen": 10.946099281311035, + "log_odds_ratio": -9.804220462683588e-05, + "logits/chosen": -0.41156861186027527, + "logits/rejected": -0.5109293460845947, + "logps/chosen": -0.00017111182387452573, + "logps/rejected": -2.3970518112182617, + "loss": 0.4056, + "nll_loss": 0.10140102356672287, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7111182387452573e-05, + "rewards/margins": 0.23968809843063354, + "rewards/rejected": -0.23970520496368408, + "step": 10501 + }, + { + "epoch": 7.262793914246196, + "grad_norm": 8.999588012695312, + "learning_rate": 1.5206700476410022e-05, + "log_odds_chosen": 9.33828353881836, + "log_odds_ratio": -0.0001583740668138489, + "logits/chosen": -0.19364869594573975, + "logits/rejected": -0.2710861563682556, + "logps/chosen": -0.0010852512205019593, + "logps/rejected": -1.7289215326309204, + "loss": 0.7447, + "nll_loss": 0.18614733219146729, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010852512787096202, + "rewards/margins": 0.17278361320495605, + "rewards/rejected": -0.17289213836193085, + "step": 10502 + }, + { + "epoch": 7.263485477178423, + "grad_norm": 6.641200542449951, + "learning_rate": 1.520285846011987e-05, + "log_odds_chosen": 10.233026504516602, + "log_odds_ratio": -6.110913818702102e-05, + "logits/chosen": 0.1292034536600113, + "logits/rejected": 0.021028682589530945, + "logps/chosen": -0.0006737616495229304, + "logps/rejected": -2.5986855030059814, + "loss": 1.0676, + "nll_loss": 0.26689720153808594, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.737616786267608e-05, + "rewards/margins": 0.2598011791706085, + "rewards/rejected": -0.2598685622215271, + "step": 10503 + }, + { + "epoch": 7.26417704011065, + "grad_norm": 10.292189598083496, + "learning_rate": 1.5199016443829721e-05, + "log_odds_chosen": 10.901208877563477, + "log_odds_ratio": -0.00015122054901439697, + "logits/chosen": -0.44021257758140564, + "logits/rejected": -0.42914289236068726, + "logps/chosen": -0.0005100009148009121, + "logps/rejected": -2.387458324432373, + "loss": 0.4385, + "nll_loss": 0.10961472988128662, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.100009002489969e-05, + "rewards/margins": 0.23869486153125763, + "rewards/rejected": -0.23874585330486298, + "step": 10504 + }, + { + "epoch": 7.264868603042877, + "grad_norm": 11.866669654846191, + "learning_rate": 1.5195174427539574e-05, + "log_odds_chosen": 11.281862258911133, + "log_odds_ratio": -1.964960028999485e-05, + "logits/chosen": -0.24127569794654846, + "logits/rejected": -0.247794970870018, + "logps/chosen": -0.0001290692889597267, + "logps/rejected": -2.2144856452941895, + "loss": 0.4925, + "nll_loss": 0.12313254177570343, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2906928532174788e-05, + "rewards/margins": 0.22143566608428955, + "rewards/rejected": -0.22144857048988342, + "step": 10505 + }, + { + "epoch": 7.265560165975104, + "grad_norm": 5.618996620178223, + "learning_rate": 1.5191332411249423e-05, + "log_odds_chosen": 11.821170806884766, + "log_odds_ratio": -3.0051140129216947e-05, + "logits/chosen": 0.040791213512420654, + "logits/rejected": 0.0787191092967987, + "logps/chosen": -0.00021927639318164438, + "logps/rejected": -2.905123233795166, + "loss": 0.469, + "nll_loss": 0.1172391027212143, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.192764077335596e-05, + "rewards/margins": 0.29049041867256165, + "rewards/rejected": -0.2905123233795166, + "step": 10506 + }, + { + "epoch": 7.2662517289073305, + "grad_norm": 5.204725742340088, + "learning_rate": 1.5187490394959275e-05, + "log_odds_chosen": 10.920637130737305, + "log_odds_ratio": -7.086249388521537e-05, + "logits/chosen": 0.2980443835258484, + "logits/rejected": 0.26738932728767395, + "logps/chosen": -0.00040924924542196095, + "logps/rejected": -2.965292453765869, + "loss": 0.597, + "nll_loss": 0.14924757182598114, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0924929635366425e-05, + "rewards/margins": 0.29648828506469727, + "rewards/rejected": -0.29652923345565796, + "step": 10507 + }, + { + "epoch": 7.266943291839557, + "grad_norm": 5.361124515533447, + "learning_rate": 1.5183648378669126e-05, + "log_odds_chosen": 10.659852981567383, + "log_odds_ratio": -6.801797280786559e-05, + "logits/chosen": -0.5049962997436523, + "logits/rejected": -0.5538925528526306, + "logps/chosen": -0.0001713060773909092, + "logps/rejected": -1.7066850662231445, + "loss": 0.3816, + "nll_loss": 0.09539598226547241, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.713060737529304e-05, + "rewards/margins": 0.1706513613462448, + "rewards/rejected": -0.17066851258277893, + "step": 10508 + }, + { + "epoch": 7.267634854771784, + "grad_norm": 6.787872314453125, + "learning_rate": 1.5179806362378977e-05, + "log_odds_chosen": 11.107709884643555, + "log_odds_ratio": -5.516607780009508e-05, + "logits/chosen": -0.046600092202425, + "logits/rejected": -0.07911308854818344, + "logps/chosen": -0.0001958270586328581, + "logps/rejected": -2.434133529663086, + "loss": 0.4259, + "nll_loss": 0.10647544264793396, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9582706954679452e-05, + "rewards/margins": 0.24339377880096436, + "rewards/rejected": -0.24341335892677307, + "step": 10509 + }, + { + "epoch": 7.268326417704011, + "grad_norm": 5.733765125274658, + "learning_rate": 1.5175964346088828e-05, + "log_odds_chosen": 11.248653411865234, + "log_odds_ratio": -6.074179327697493e-05, + "logits/chosen": -0.26920583844184875, + "logits/rejected": -0.3057137429714203, + "logps/chosen": -0.00018125417409464717, + "logps/rejected": -2.5340723991394043, + "loss": 0.5202, + "nll_loss": 0.13004840910434723, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.812541813706048e-05, + "rewards/margins": 0.2533891201019287, + "rewards/rejected": -0.25340723991394043, + "step": 10510 + }, + { + "epoch": 7.269017980636238, + "grad_norm": 3.1367990970611572, + "learning_rate": 1.517212232979868e-05, + "log_odds_chosen": 10.54247760772705, + "log_odds_ratio": -6.820161797804758e-05, + "logits/chosen": -0.06475108861923218, + "logits/rejected": -0.1413227915763855, + "logps/chosen": -0.00020324383513070643, + "logps/rejected": -1.9654295444488525, + "loss": 0.6948, + "nll_loss": 0.1737029105424881, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0324383513070643e-05, + "rewards/margins": 0.19652262330055237, + "rewards/rejected": -0.19654296338558197, + "step": 10511 + }, + { + "epoch": 7.269709543568465, + "grad_norm": 2.6129097938537598, + "learning_rate": 1.5168280313508529e-05, + "log_odds_chosen": 10.622481346130371, + "log_odds_ratio": -0.00013337060227058828, + "logits/chosen": -0.3024970293045044, + "logits/rejected": -0.3406152129173279, + "logps/chosen": -0.00026366510428488255, + "logps/rejected": -1.9767258167266846, + "loss": 0.232, + "nll_loss": 0.05797753483057022, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.63665133388713e-05, + "rewards/margins": 0.19764623045921326, + "rewards/rejected": -0.19767259061336517, + "step": 10512 + }, + { + "epoch": 7.2704011065006915, + "grad_norm": 5.719183444976807, + "learning_rate": 1.5164438297218382e-05, + "log_odds_chosen": 11.262063026428223, + "log_odds_ratio": -4.1249000787502155e-05, + "logits/chosen": -0.06983301043510437, + "logits/rejected": 0.006098955869674683, + "logps/chosen": -0.0002281243505422026, + "logps/rejected": -2.2929091453552246, + "loss": 0.5736, + "nll_loss": 0.14339330792427063, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2812433599028736e-05, + "rewards/margins": 0.2292681187391281, + "rewards/rejected": -0.2292909324169159, + "step": 10513 + }, + { + "epoch": 7.271092669432918, + "grad_norm": 4.460439205169678, + "learning_rate": 1.5160596280928232e-05, + "log_odds_chosen": 11.027078628540039, + "log_odds_ratio": -6.842397124273703e-05, + "logits/chosen": -0.41829079389572144, + "logits/rejected": -0.5007650852203369, + "logps/chosen": -0.00018773046031128615, + "logps/rejected": -2.3771162033081055, + "loss": 0.5184, + "nll_loss": 0.1295936107635498, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8773047486320138e-05, + "rewards/margins": 0.23769287765026093, + "rewards/rejected": -0.23771163821220398, + "step": 10514 + }, + { + "epoch": 7.271784232365145, + "grad_norm": 12.794469833374023, + "learning_rate": 1.5156754264638081e-05, + "log_odds_chosen": 10.991874694824219, + "log_odds_ratio": -4.0517210436519235e-05, + "logits/chosen": -0.9412696957588196, + "logits/rejected": -1.0342063903808594, + "logps/chosen": -0.00036933840601705015, + "logps/rejected": -2.2551140785217285, + "loss": 0.5949, + "nll_loss": 0.14871898293495178, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6933841329300776e-05, + "rewards/margins": 0.22547447681427002, + "rewards/rejected": -0.22551141679286957, + "step": 10515 + }, + { + "epoch": 7.272475795297372, + "grad_norm": 5.832699298858643, + "learning_rate": 1.5152912248347934e-05, + "log_odds_chosen": 10.880440711975098, + "log_odds_ratio": -4.107558561372571e-05, + "logits/chosen": -0.5598329901695251, + "logits/rejected": -0.466844379901886, + "logps/chosen": -0.00018433656077831984, + "logps/rejected": -2.386929750442505, + "loss": 0.341, + "nll_loss": 0.08524862676858902, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8433656805427745e-05, + "rewards/margins": 0.23867452144622803, + "rewards/rejected": -0.238692969083786, + "step": 10516 + }, + { + "epoch": 7.273167358229599, + "grad_norm": 12.49798583984375, + "learning_rate": 1.5149070232057786e-05, + "log_odds_chosen": 10.499706268310547, + "log_odds_ratio": -0.0005382996168918908, + "logits/chosen": -0.7100361585617065, + "logits/rejected": -0.7095794677734375, + "logps/chosen": -0.003856180002912879, + "logps/rejected": -2.9049072265625, + "loss": 0.8193, + "nll_loss": 0.20477049052715302, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003856180119328201, + "rewards/margins": 0.29010510444641113, + "rewards/rejected": -0.2904907166957855, + "step": 10517 + }, + { + "epoch": 7.273858921161826, + "grad_norm": 5.052276611328125, + "learning_rate": 1.5145228215767635e-05, + "log_odds_chosen": 11.47298812866211, + "log_odds_ratio": -4.129732042201795e-05, + "logits/chosen": 0.3368604779243469, + "logits/rejected": 0.18388760089874268, + "logps/chosen": -0.00019135003094561398, + "logps/rejected": -2.2947165966033936, + "loss": 0.4334, + "nll_loss": 0.10833790153265, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.913500454975292e-05, + "rewards/margins": 0.22945252060890198, + "rewards/rejected": -0.22947168350219727, + "step": 10518 + }, + { + "epoch": 7.2745504840940525, + "grad_norm": 3.533688545227051, + "learning_rate": 1.5141386199477486e-05, + "log_odds_chosen": 11.837002754211426, + "log_odds_ratio": -1.9109105778625235e-05, + "logits/chosen": -0.4327254295349121, + "logits/rejected": -0.5700982809066772, + "logps/chosen": -8.316531602758914e-05, + "logps/rejected": -2.10473370552063, + "loss": 0.4574, + "nll_loss": 0.11434361338615417, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.316531420859974e-06, + "rewards/margins": 0.21046505868434906, + "rewards/rejected": -0.21047338843345642, + "step": 10519 + }, + { + "epoch": 7.275242047026279, + "grad_norm": 7.7128777503967285, + "learning_rate": 1.5137544183187338e-05, + "log_odds_chosen": 11.128162384033203, + "log_odds_ratio": -4.5072018110658973e-05, + "logits/chosen": -0.04674968123435974, + "logits/rejected": -0.19485291838645935, + "logps/chosen": -0.00018412625649943948, + "logps/rejected": -2.3100948333740234, + "loss": 0.6638, + "nll_loss": 0.16594311594963074, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8412625649943948e-05, + "rewards/margins": 0.23099108040332794, + "rewards/rejected": -0.23100949823856354, + "step": 10520 + }, + { + "epoch": 7.275933609958506, + "grad_norm": 5.247791767120361, + "learning_rate": 1.5133702166897188e-05, + "log_odds_chosen": 11.184106826782227, + "log_odds_ratio": -0.0001805050706025213, + "logits/chosen": -0.3185228705406189, + "logits/rejected": -0.2991114556789398, + "logps/chosen": -0.000440732081187889, + "logps/rejected": -3.0187864303588867, + "loss": 0.4653, + "nll_loss": 0.11631738394498825, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4073207391193137e-05, + "rewards/margins": 0.3018345832824707, + "rewards/rejected": -0.3018786907196045, + "step": 10521 + }, + { + "epoch": 7.276625172890733, + "grad_norm": 3.7616748809814453, + "learning_rate": 1.512986015060704e-05, + "log_odds_chosen": 10.252514839172363, + "log_odds_ratio": -0.00017552207282278687, + "logits/chosen": -0.2603698968887329, + "logits/rejected": -0.37053489685058594, + "logps/chosen": -0.0002522366994526237, + "logps/rejected": -1.5280945301055908, + "loss": 0.3446, + "nll_loss": 0.08612124621868134, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.52236732194433e-05, + "rewards/margins": 0.15278422832489014, + "rewards/rejected": -0.15280945599079132, + "step": 10522 + }, + { + "epoch": 7.27731673582296, + "grad_norm": 5.105588436126709, + "learning_rate": 1.512601813431689e-05, + "log_odds_chosen": 10.866778373718262, + "log_odds_ratio": -5.1680850447155535e-05, + "logits/chosen": -0.3906993865966797, + "logits/rejected": -0.3765483498573303, + "logps/chosen": -0.00013929187844041735, + "logps/rejected": -2.2346510887145996, + "loss": 0.3493, + "nll_loss": 0.0873219296336174, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3929187844041735e-05, + "rewards/margins": 0.22345119714736938, + "rewards/rejected": -0.22346511483192444, + "step": 10523 + }, + { + "epoch": 7.278008298755187, + "grad_norm": 5.647239685058594, + "learning_rate": 1.512217611802674e-05, + "log_odds_chosen": 11.40186882019043, + "log_odds_ratio": -3.86638603231404e-05, + "logits/chosen": -0.25338584184646606, + "logits/rejected": -0.3367050588130951, + "logps/chosen": -0.00011416596680646762, + "logps/rejected": -2.170494318008423, + "loss": 0.4294, + "nll_loss": 0.10734062641859055, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1416597772040404e-05, + "rewards/margins": 0.21703803539276123, + "rewards/rejected": -0.21704944968223572, + "step": 10524 + }, + { + "epoch": 7.2786998616874135, + "grad_norm": 5.01591682434082, + "learning_rate": 1.5118334101736592e-05, + "log_odds_chosen": 11.633666038513184, + "log_odds_ratio": -1.4664670743513852e-05, + "logits/chosen": -0.44926968216896057, + "logits/rejected": -0.561745285987854, + "logps/chosen": -0.00010955406469292939, + "logps/rejected": -2.233703136444092, + "loss": 0.4516, + "nll_loss": 0.11290092021226883, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0955405741697177e-05, + "rewards/margins": 0.2233593761920929, + "rewards/rejected": -0.22337032854557037, + "step": 10525 + }, + { + "epoch": 7.27939142461964, + "grad_norm": 3.686668872833252, + "learning_rate": 1.5114492085446445e-05, + "log_odds_chosen": 10.510477066040039, + "log_odds_ratio": -7.26221696822904e-05, + "logits/chosen": -0.1770956963300705, + "logits/rejected": -0.18479953706264496, + "logps/chosen": -0.00028913721325807273, + "logps/rejected": -1.9379236698150635, + "loss": 0.661, + "nll_loss": 0.16525492072105408, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8913722417200916e-05, + "rewards/margins": 0.19376343488693237, + "rewards/rejected": -0.19379234313964844, + "step": 10526 + }, + { + "epoch": 7.280082987551867, + "grad_norm": 7.128070831298828, + "learning_rate": 1.5110650069156294e-05, + "log_odds_chosen": 10.678375244140625, + "log_odds_ratio": -0.00012848350161220878, + "logits/chosen": -0.1631132960319519, + "logits/rejected": -0.17706161737442017, + "logps/chosen": -0.0004405647632665932, + "logps/rejected": -2.2365002632141113, + "loss": 0.4725, + "nll_loss": 0.11811183393001556, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.405647632665932e-05, + "rewards/margins": 0.2236059606075287, + "rewards/rejected": -0.2236500382423401, + "step": 10527 + }, + { + "epoch": 7.280774550484094, + "grad_norm": 5.585725784301758, + "learning_rate": 1.5106808052866144e-05, + "log_odds_chosen": 11.369913101196289, + "log_odds_ratio": -3.4828488423954695e-05, + "logits/chosen": -0.02725176513195038, + "logits/rejected": -0.12657952308654785, + "logps/chosen": -0.00021642667707055807, + "logps/rejected": -2.4218692779541016, + "loss": 0.595, + "nll_loss": 0.14873793721199036, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.164266879844945e-05, + "rewards/margins": 0.24216529726982117, + "rewards/rejected": -0.24218693375587463, + "step": 10528 + }, + { + "epoch": 7.281466113416321, + "grad_norm": 5.846073627471924, + "learning_rate": 1.5102966036575997e-05, + "log_odds_chosen": 11.152416229248047, + "log_odds_ratio": -0.00011589920904953033, + "logits/chosen": -0.1479973942041397, + "logits/rejected": -0.24840444326400757, + "logps/chosen": -0.00025501666823402047, + "logps/rejected": -2.243656635284424, + "loss": 0.5941, + "nll_loss": 0.14852215349674225, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5501665732008405e-05, + "rewards/margins": 0.22434015572071075, + "rewards/rejected": -0.22436565160751343, + "step": 10529 + }, + { + "epoch": 7.282157676348548, + "grad_norm": 5.144644737243652, + "learning_rate": 1.5099124020285846e-05, + "log_odds_chosen": 9.871131896972656, + "log_odds_ratio": -0.0003432652447372675, + "logits/chosen": -0.4323476254940033, + "logits/rejected": -0.432583749294281, + "logps/chosen": -0.0003030068473890424, + "logps/rejected": -1.5213305950164795, + "loss": 0.4964, + "nll_loss": 0.12406061589717865, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0300687285489403e-05, + "rewards/margins": 0.1521027386188507, + "rewards/rejected": -0.152133047580719, + "step": 10530 + }, + { + "epoch": 7.282849239280774, + "grad_norm": 3.782452344894409, + "learning_rate": 1.5095282003995698e-05, + "log_odds_chosen": 11.423676490783691, + "log_odds_ratio": -5.270875044516288e-05, + "logits/chosen": 0.17047113180160522, + "logits/rejected": 0.1552511751651764, + "logps/chosen": -0.00021822653070557863, + "logps/rejected": -2.7009034156799316, + "loss": 0.4165, + "nll_loss": 0.10411202162504196, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1822654161951505e-05, + "rewards/margins": 0.27006852626800537, + "rewards/rejected": -0.27009034156799316, + "step": 10531 + }, + { + "epoch": 7.283540802213001, + "grad_norm": 7.957913875579834, + "learning_rate": 1.509143998770555e-05, + "log_odds_chosen": 10.679388999938965, + "log_odds_ratio": -5.0163958803750575e-05, + "logits/chosen": -0.2738475203514099, + "logits/rejected": -0.2702501714229584, + "logps/chosen": -0.0001700377179076895, + "logps/rejected": -1.7659885883331299, + "loss": 0.582, + "nll_loss": 0.14549194276332855, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7003772882162593e-05, + "rewards/margins": 0.17658185958862305, + "rewards/rejected": -0.17659887671470642, + "step": 10532 + }, + { + "epoch": 7.284232365145228, + "grad_norm": 5.639610290527344, + "learning_rate": 1.5087597971415398e-05, + "log_odds_chosen": 10.85079288482666, + "log_odds_ratio": -5.8406731113791466e-05, + "logits/chosen": -0.4756738245487213, + "logits/rejected": -0.5125161409378052, + "logps/chosen": -0.00016177997167687863, + "logps/rejected": -1.9398754835128784, + "loss": 0.5036, + "nll_loss": 0.12588562071323395, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6177997167687863e-05, + "rewards/margins": 0.19397136569023132, + "rewards/rejected": -0.19398756325244904, + "step": 10533 + }, + { + "epoch": 7.284923928077455, + "grad_norm": 4.74798583984375, + "learning_rate": 1.508375595512525e-05, + "log_odds_chosen": 11.382217407226562, + "log_odds_ratio": -0.00015084307233337313, + "logits/chosen": -0.2914195656776428, + "logits/rejected": -0.2960367798805237, + "logps/chosen": -0.00023178594710770994, + "logps/rejected": -2.4456207752227783, + "loss": 0.4181, + "nll_loss": 0.10451382398605347, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3178594346973114e-05, + "rewards/margins": 0.24453890323638916, + "rewards/rejected": -0.2445620745420456, + "step": 10534 + }, + { + "epoch": 7.285615491009682, + "grad_norm": 6.910724639892578, + "learning_rate": 1.5079913938835103e-05, + "log_odds_chosen": 11.61507797241211, + "log_odds_ratio": -2.5585079129086807e-05, + "logits/chosen": -0.07478839159011841, + "logits/rejected": -0.11948782950639725, + "logps/chosen": -0.000139494746690616, + "logps/rejected": -2.8178374767303467, + "loss": 0.4718, + "nll_loss": 0.1179506704211235, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.394947412336478e-05, + "rewards/margins": 0.2817698121070862, + "rewards/rejected": -0.2817837595939636, + "step": 10535 + }, + { + "epoch": 7.286307053941909, + "grad_norm": 7.961095809936523, + "learning_rate": 1.5076071922544952e-05, + "log_odds_chosen": 10.0950345993042, + "log_odds_ratio": -0.00036910828202962875, + "logits/chosen": -0.15306870639324188, + "logits/rejected": -0.12899163365364075, + "logps/chosen": -0.0006089457892812788, + "logps/rejected": -1.624821424484253, + "loss": 0.4165, + "nll_loss": 0.10408499836921692, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.089458111091517e-05, + "rewards/margins": 0.16242125630378723, + "rewards/rejected": -0.1624821424484253, + "step": 10536 + }, + { + "epoch": 7.286998616874135, + "grad_norm": 3.866593837738037, + "learning_rate": 1.5072229906254803e-05, + "log_odds_chosen": 11.116514205932617, + "log_odds_ratio": -0.00020502627012319863, + "logits/chosen": -0.7989804744720459, + "logits/rejected": -0.7939543724060059, + "logps/chosen": -0.0004685473977588117, + "logps/rejected": -2.6888623237609863, + "loss": 0.4112, + "nll_loss": 0.10278967022895813, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.685474050347693e-05, + "rewards/margins": 0.26883938908576965, + "rewards/rejected": -0.2688862383365631, + "step": 10537 + }, + { + "epoch": 7.287690179806362, + "grad_norm": 3.782735586166382, + "learning_rate": 1.5068387889964655e-05, + "log_odds_chosen": 10.658288955688477, + "log_odds_ratio": -0.0001427416573278606, + "logits/chosen": -0.2742254436016083, + "logits/rejected": -0.25285041332244873, + "logps/chosen": -0.0005352857406251132, + "logps/rejected": -2.232027053833008, + "loss": 0.486, + "nll_loss": 0.12149648368358612, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.35285726073198e-05, + "rewards/margins": 0.22314918041229248, + "rewards/rejected": -0.22320270538330078, + "step": 10538 + }, + { + "epoch": 7.288381742738589, + "grad_norm": 10.33079719543457, + "learning_rate": 1.5064545873674504e-05, + "log_odds_chosen": 11.432767868041992, + "log_odds_ratio": -1.866390448412858e-05, + "logits/chosen": -0.4904142916202545, + "logits/rejected": -0.46854597330093384, + "logps/chosen": -9.947362559614703e-05, + "logps/rejected": -2.114985466003418, + "loss": 0.5173, + "nll_loss": 0.1293186992406845, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.947362741513643e-06, + "rewards/margins": 0.21148860454559326, + "rewards/rejected": -0.21149854362010956, + "step": 10539 + }, + { + "epoch": 7.289073305670816, + "grad_norm": 6.884215831756592, + "learning_rate": 1.5060703857384357e-05, + "log_odds_chosen": 11.117326736450195, + "log_odds_ratio": -2.461438998579979e-05, + "logits/chosen": -0.20820686221122742, + "logits/rejected": -0.2118438482284546, + "logps/chosen": -0.0001501316437497735, + "logps/rejected": -2.3482232093811035, + "loss": 0.6159, + "nll_loss": 0.1539812982082367, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5013165466370992e-05, + "rewards/margins": 0.2348073124885559, + "rewards/rejected": -0.2348223179578781, + "step": 10540 + }, + { + "epoch": 7.289764868603043, + "grad_norm": 5.243474960327148, + "learning_rate": 1.5056861841094208e-05, + "log_odds_chosen": 10.785823822021484, + "log_odds_ratio": -6.247861165320501e-05, + "logits/chosen": -0.42967790365219116, + "logits/rejected": -0.3784506618976593, + "logps/chosen": -0.0003760996332857758, + "logps/rejected": -2.1447207927703857, + "loss": 0.4956, + "nll_loss": 0.12390033900737762, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.760996332857758e-05, + "rewards/margins": 0.21443447470664978, + "rewards/rejected": -0.21447208523750305, + "step": 10541 + }, + { + "epoch": 7.29045643153527, + "grad_norm": 3.6636292934417725, + "learning_rate": 1.5053019824804057e-05, + "log_odds_chosen": 10.340880393981934, + "log_odds_ratio": -0.00022311796783469617, + "logits/chosen": -0.2471560537815094, + "logits/rejected": -0.27003979682922363, + "logps/chosen": -0.0003549442917574197, + "logps/rejected": -2.0278170108795166, + "loss": 0.5427, + "nll_loss": 0.13564293086528778, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.549442772055045e-05, + "rewards/margins": 0.20274618268013, + "rewards/rejected": -0.20278167724609375, + "step": 10542 + }, + { + "epoch": 7.291147994467496, + "grad_norm": 4.385778427124023, + "learning_rate": 1.5049177808513909e-05, + "log_odds_chosen": 10.510884284973145, + "log_odds_ratio": -0.0002553297090344131, + "logits/chosen": -0.03860250860452652, + "logits/rejected": -0.07355962693691254, + "logps/chosen": -0.0007388624362647533, + "logps/rejected": -2.116535186767578, + "loss": 0.5062, + "nll_loss": 0.12653449177742004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.388624362647533e-05, + "rewards/margins": 0.21157963573932648, + "rewards/rejected": -0.21165351569652557, + "step": 10543 + }, + { + "epoch": 7.291839557399723, + "grad_norm": 3.9589853286743164, + "learning_rate": 1.5045335792223762e-05, + "log_odds_chosen": 11.206121444702148, + "log_odds_ratio": -8.039772365009412e-05, + "logits/chosen": -0.4851464033126831, + "logits/rejected": -0.4680294394493103, + "logps/chosen": -0.0015334711642935872, + "logps/rejected": -2.6300063133239746, + "loss": 0.4228, + "nll_loss": 0.10569702833890915, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001533471222501248, + "rewards/margins": 0.26284730434417725, + "rewards/rejected": -0.26300063729286194, + "step": 10544 + }, + { + "epoch": 7.29253112033195, + "grad_norm": 8.450545310974121, + "learning_rate": 1.504149377593361e-05, + "log_odds_chosen": 10.956088066101074, + "log_odds_ratio": -3.171690696035512e-05, + "logits/chosen": -0.48458877205848694, + "logits/rejected": -0.5116901397705078, + "logps/chosen": -0.0001754190307110548, + "logps/rejected": -2.214137077331543, + "loss": 0.5659, + "nll_loss": 0.14148221909999847, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7541900888318196e-05, + "rewards/margins": 0.22139616310596466, + "rewards/rejected": -0.22141368687152863, + "step": 10545 + }, + { + "epoch": 7.293222683264177, + "grad_norm": 3.563842296600342, + "learning_rate": 1.5037651759643461e-05, + "log_odds_chosen": 10.58224868774414, + "log_odds_ratio": -2.9876087864977308e-05, + "logits/chosen": -0.21687503159046173, + "logits/rejected": -0.3506883680820465, + "logps/chosen": -0.00013978789502289146, + "logps/rejected": -1.6868329048156738, + "loss": 0.4411, + "nll_loss": 0.11027465760707855, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3978789866087027e-05, + "rewards/margins": 0.16866931319236755, + "rewards/rejected": -0.16868329048156738, + "step": 10546 + }, + { + "epoch": 7.293914246196404, + "grad_norm": 7.864605903625488, + "learning_rate": 1.5033809743353314e-05, + "log_odds_chosen": 10.986456871032715, + "log_odds_ratio": -3.460807056399062e-05, + "logits/chosen": -0.27556610107421875, + "logits/rejected": -0.277159720659256, + "logps/chosen": -0.0001903708907775581, + "logps/rejected": -2.378696918487549, + "loss": 0.4942, + "nll_loss": 0.1235458254814148, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9037088350160047e-05, + "rewards/margins": 0.23785068094730377, + "rewards/rejected": -0.23786970973014832, + "step": 10547 + }, + { + "epoch": 7.2946058091286305, + "grad_norm": 6.782561779022217, + "learning_rate": 1.5029967727063163e-05, + "log_odds_chosen": 10.94840145111084, + "log_odds_ratio": -2.8345846658339724e-05, + "logits/chosen": -0.2652517557144165, + "logits/rejected": -0.3147188425064087, + "logps/chosen": -0.001249704509973526, + "logps/rejected": -2.646296501159668, + "loss": 0.3847, + "nll_loss": 0.09618037939071655, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001249704509973526, + "rewards/margins": 0.26450470089912415, + "rewards/rejected": -0.26462966203689575, + "step": 10548 + }, + { + "epoch": 7.295297372060857, + "grad_norm": 12.487119674682617, + "learning_rate": 1.5026125710773015e-05, + "log_odds_chosen": 11.011866569519043, + "log_odds_ratio": -3.6507648474071175e-05, + "logits/chosen": -0.6930927038192749, + "logits/rejected": -0.7526741623878479, + "logps/chosen": -0.00017669968656264246, + "logps/rejected": -2.299051284790039, + "loss": 0.4072, + "nll_loss": 0.10179228335618973, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7669968656264246e-05, + "rewards/margins": 0.229887455701828, + "rewards/rejected": -0.2299051433801651, + "step": 10549 + }, + { + "epoch": 7.295988934993084, + "grad_norm": 6.350553035736084, + "learning_rate": 1.5022283694482866e-05, + "log_odds_chosen": 11.08919620513916, + "log_odds_ratio": -5.873236295883544e-05, + "logits/chosen": -0.28959664702415466, + "logits/rejected": -0.30707934498786926, + "logps/chosen": -0.0004931816947646439, + "logps/rejected": -2.704843759536743, + "loss": 0.4858, + "nll_loss": 0.12144720554351807, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9318172386847436e-05, + "rewards/margins": 0.27043506503105164, + "rewards/rejected": -0.27048438787460327, + "step": 10550 + }, + { + "epoch": 7.296680497925311, + "grad_norm": 5.712388038635254, + "learning_rate": 1.5018441678192715e-05, + "log_odds_chosen": 11.027812957763672, + "log_odds_ratio": -6.843291339464486e-05, + "logits/chosen": -0.6185637712478638, + "logits/rejected": -0.6799907684326172, + "logps/chosen": -0.0002997862466145307, + "logps/rejected": -2.2454919815063477, + "loss": 0.5096, + "nll_loss": 0.12738478183746338, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.997862611664459e-05, + "rewards/margins": 0.22451920807361603, + "rewards/rejected": -0.22454921901226044, + "step": 10551 + }, + { + "epoch": 7.297372060857538, + "grad_norm": 5.3984222412109375, + "learning_rate": 1.5014599661902568e-05, + "log_odds_chosen": 10.676874160766602, + "log_odds_ratio": -7.660678238607943e-05, + "logits/chosen": -0.5980682373046875, + "logits/rejected": -0.7237865924835205, + "logps/chosen": -0.0003695039777085185, + "logps/rejected": -2.3640284538269043, + "loss": 0.424, + "nll_loss": 0.10600431263446808, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.695040140883066e-05, + "rewards/margins": 0.2363658845424652, + "rewards/rejected": -0.23640283942222595, + "step": 10552 + }, + { + "epoch": 7.298063623789765, + "grad_norm": 5.493939399719238, + "learning_rate": 1.501075764561242e-05, + "log_odds_chosen": 10.982223510742188, + "log_odds_ratio": -5.2999013860244304e-05, + "logits/chosen": -0.49189019203186035, + "logits/rejected": -0.5520755648612976, + "logps/chosen": -0.000196084423805587, + "logps/rejected": -2.34517765045166, + "loss": 1.0095, + "nll_loss": 0.2523600459098816, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9608443835750222e-05, + "rewards/margins": 0.23449814319610596, + "rewards/rejected": -0.23451778292655945, + "step": 10553 + }, + { + "epoch": 7.2987551867219915, + "grad_norm": 8.220337867736816, + "learning_rate": 1.5006915629322269e-05, + "log_odds_chosen": 9.907423973083496, + "log_odds_ratio": -0.00023567386961076409, + "logits/chosen": -0.43401268124580383, + "logits/rejected": -0.4864483177661896, + "logps/chosen": -0.0004938674974255264, + "logps/rejected": -2.051173686981201, + "loss": 0.7212, + "nll_loss": 0.1802859604358673, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9386748287361115e-05, + "rewards/margins": 0.2050679624080658, + "rewards/rejected": -0.2051173746585846, + "step": 10554 + }, + { + "epoch": 7.299446749654218, + "grad_norm": 8.015789031982422, + "learning_rate": 1.500307361303212e-05, + "log_odds_chosen": 10.712055206298828, + "log_odds_ratio": -3.786015076912008e-05, + "logits/chosen": -0.363398015499115, + "logits/rejected": -0.5355995893478394, + "logps/chosen": -0.0001271214132430032, + "logps/rejected": -1.8779047727584839, + "loss": 0.9029, + "nll_loss": 0.22571158409118652, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2712141142401379e-05, + "rewards/margins": 0.1877777874469757, + "rewards/rejected": -0.18779048323631287, + "step": 10555 + }, + { + "epoch": 7.300138312586445, + "grad_norm": 5.6773905754089355, + "learning_rate": 1.4999231596741972e-05, + "log_odds_chosen": 12.072305679321289, + "log_odds_ratio": -9.910212611430325e-06, + "logits/chosen": -0.5058807730674744, + "logits/rejected": -0.5540425777435303, + "logps/chosen": -0.0003676057094708085, + "logps/rejected": -3.347212791442871, + "loss": 0.5019, + "nll_loss": 0.12547825276851654, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6760568036697805e-05, + "rewards/margins": 0.33468449115753174, + "rewards/rejected": -0.33472126722335815, + "step": 10556 + }, + { + "epoch": 7.300829875518672, + "grad_norm": 5.691203594207764, + "learning_rate": 1.4995389580451821e-05, + "log_odds_chosen": 9.261990547180176, + "log_odds_ratio": -0.1274312138557434, + "logits/chosen": -0.5108420848846436, + "logits/rejected": -0.5427403450012207, + "logps/chosen": -0.01616716757416725, + "logps/rejected": -1.4746990203857422, + "loss": 0.4642, + "nll_loss": 0.10331307351589203, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.001616716617718339, + "rewards/margins": 0.145853191614151, + "rewards/rejected": -0.1474699079990387, + "step": 10557 + }, + { + "epoch": 7.301521438450899, + "grad_norm": 6.426586627960205, + "learning_rate": 1.4991547564161674e-05, + "log_odds_chosen": 9.163455963134766, + "log_odds_ratio": -0.0005875678616575897, + "logits/chosen": -0.4957526922225952, + "logits/rejected": -0.4884418547153473, + "logps/chosen": -0.000682390877045691, + "logps/rejected": -1.4966734647750854, + "loss": 0.2752, + "nll_loss": 0.0687483549118042, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.823908188380301e-05, + "rewards/margins": 0.1495991051197052, + "rewards/rejected": -0.14966735243797302, + "step": 10558 + }, + { + "epoch": 7.302213001383126, + "grad_norm": 6.857354640960693, + "learning_rate": 1.4987705547871524e-05, + "log_odds_chosen": 10.375746726989746, + "log_odds_ratio": -9.528575174044818e-05, + "logits/chosen": -0.32422560453414917, + "logits/rejected": -0.4233476221561432, + "logps/chosen": -0.00015979795716702938, + "logps/rejected": -1.7596766948699951, + "loss": 0.4444, + "nll_loss": 0.11109989881515503, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5979794625309296e-05, + "rewards/margins": 0.17595168948173523, + "rewards/rejected": -0.17596766352653503, + "step": 10559 + }, + { + "epoch": 7.3029045643153525, + "grad_norm": 5.945572376251221, + "learning_rate": 1.4983863531581374e-05, + "log_odds_chosen": 11.158388137817383, + "log_odds_ratio": -5.777521801064722e-05, + "logits/chosen": 0.023258313536643982, + "logits/rejected": 0.0119294673204422, + "logps/chosen": -0.0002473669301252812, + "logps/rejected": -2.618809700012207, + "loss": 0.5432, + "nll_loss": 0.13579916954040527, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.47366915573366e-05, + "rewards/margins": 0.26185622811317444, + "rewards/rejected": -0.2618809640407562, + "step": 10560 + }, + { + "epoch": 7.303596127247579, + "grad_norm": 13.444534301757812, + "learning_rate": 1.4980021515291226e-05, + "log_odds_chosen": 10.43269157409668, + "log_odds_ratio": -0.00010532377928029746, + "logits/chosen": -0.7283393144607544, + "logits/rejected": -0.8246185779571533, + "logps/chosen": -0.00027138530276715755, + "logps/rejected": -2.0439107418060303, + "loss": 0.3743, + "nll_loss": 0.09355422109365463, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7138530640513636e-05, + "rewards/margins": 0.20436394214630127, + "rewards/rejected": -0.20439106225967407, + "step": 10561 + }, + { + "epoch": 7.304287690179806, + "grad_norm": 3.034716844558716, + "learning_rate": 1.4976179499001078e-05, + "log_odds_chosen": 11.042951583862305, + "log_odds_ratio": -4.987595821148716e-05, + "logits/chosen": -0.3208976089954376, + "logits/rejected": -0.383137583732605, + "logps/chosen": -0.000426338316174224, + "logps/rejected": -2.371851682662964, + "loss": 0.44, + "nll_loss": 0.10998556017875671, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2633833800209686e-05, + "rewards/margins": 0.23714253306388855, + "rewards/rejected": -0.23718518018722534, + "step": 10562 + }, + { + "epoch": 7.304979253112033, + "grad_norm": 5.60660457611084, + "learning_rate": 1.4972337482710927e-05, + "log_odds_chosen": 11.074758529663086, + "log_odds_ratio": -7.649294275324792e-05, + "logits/chosen": -0.26657915115356445, + "logits/rejected": -0.3863256275653839, + "logps/chosen": -0.0003114904393441975, + "logps/rejected": -2.4336323738098145, + "loss": 0.509, + "nll_loss": 0.12723150849342346, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.114904757239856e-05, + "rewards/margins": 0.24333205819129944, + "rewards/rejected": -0.24336321651935577, + "step": 10563 + }, + { + "epoch": 7.30567081604426, + "grad_norm": 6.80715274810791, + "learning_rate": 1.4968495466420778e-05, + "log_odds_chosen": 10.559022903442383, + "log_odds_ratio": -0.00011177535634487867, + "logits/chosen": -0.3276813328266144, + "logits/rejected": -0.44247764348983765, + "logps/chosen": -0.00020129492622800171, + "logps/rejected": -1.7909127473831177, + "loss": 0.3557, + "nll_loss": 0.08892100304365158, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0129493350395933e-05, + "rewards/margins": 0.17907115817070007, + "rewards/rejected": -0.17909128963947296, + "step": 10564 + }, + { + "epoch": 7.306362378976487, + "grad_norm": 4.869068622589111, + "learning_rate": 1.4964653450130627e-05, + "log_odds_chosen": 11.06036376953125, + "log_odds_ratio": -0.00016165415581781417, + "logits/chosen": -0.48836928606033325, + "logits/rejected": -0.5202836990356445, + "logps/chosen": -0.0002001393004320562, + "logps/rejected": -2.154864549636841, + "loss": 0.5012, + "nll_loss": 0.12527495622634888, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0013932953588665e-05, + "rewards/margins": 0.2154664397239685, + "rewards/rejected": -0.21548645198345184, + "step": 10565 + }, + { + "epoch": 7.3070539419087135, + "grad_norm": 9.951133728027344, + "learning_rate": 1.496081143384048e-05, + "log_odds_chosen": 12.023983001708984, + "log_odds_ratio": -6.627372204093263e-05, + "logits/chosen": -0.19025403261184692, + "logits/rejected": -0.2676059603691101, + "logps/chosen": -0.0003929885569959879, + "logps/rejected": -3.4180500507354736, + "loss": 0.6028, + "nll_loss": 0.1506935954093933, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9298858609981835e-05, + "rewards/margins": 0.34176570177078247, + "rewards/rejected": -0.34180501103401184, + "step": 10566 + }, + { + "epoch": 7.30774550484094, + "grad_norm": 7.036557674407959, + "learning_rate": 1.4956969417550332e-05, + "log_odds_chosen": 10.718587875366211, + "log_odds_ratio": -9.550920367473736e-05, + "logits/chosen": -0.44108590483665466, + "logits/rejected": -0.37370753288269043, + "logps/chosen": -0.00017977158131543547, + "logps/rejected": -1.9299838542938232, + "loss": 0.7186, + "nll_loss": 0.17963513731956482, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7977157767745666e-05, + "rewards/margins": 0.19298040866851807, + "rewards/rejected": -0.19299837946891785, + "step": 10567 + }, + { + "epoch": 7.308437067773167, + "grad_norm": 8.872588157653809, + "learning_rate": 1.4953127401260181e-05, + "log_odds_chosen": 11.814168930053711, + "log_odds_ratio": -2.9991559131303802e-05, + "logits/chosen": 0.08540153503417969, + "logits/rejected": -0.05404900014400482, + "logps/chosen": -0.00012598311877809465, + "logps/rejected": -2.4559972286224365, + "loss": 0.6095, + "nll_loss": 0.15238115191459656, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2598312423506286e-05, + "rewards/margins": 0.24558714032173157, + "rewards/rejected": -0.24559973180294037, + "step": 10568 + }, + { + "epoch": 7.309128630705394, + "grad_norm": 4.762966632843018, + "learning_rate": 1.4949285384970032e-05, + "log_odds_chosen": 11.357812881469727, + "log_odds_ratio": -5.139112181495875e-05, + "logits/chosen": 0.04269764572381973, + "logits/rejected": -0.12028162181377411, + "logps/chosen": -0.00012316771608311683, + "logps/rejected": -2.1897315979003906, + "loss": 0.4882, + "nll_loss": 0.12203666567802429, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2316771062614862e-05, + "rewards/margins": 0.21896082162857056, + "rewards/rejected": -0.21897317469120026, + "step": 10569 + }, + { + "epoch": 7.309820193637621, + "grad_norm": 2.450059652328491, + "learning_rate": 1.4945443368679884e-05, + "log_odds_chosen": 10.953805923461914, + "log_odds_ratio": -4.3371266656322405e-05, + "logits/chosen": -0.635241687297821, + "logits/rejected": -0.5566953420639038, + "logps/chosen": -0.00017698638839647174, + "logps/rejected": -2.116021156311035, + "loss": 0.3111, + "nll_loss": 0.07777298986911774, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7698639567242935e-05, + "rewards/margins": 0.2115844190120697, + "rewards/rejected": -0.21160215139389038, + "step": 10570 + }, + { + "epoch": 7.310511756569848, + "grad_norm": 3.795652389526367, + "learning_rate": 1.4941601352389733e-05, + "log_odds_chosen": 10.717495918273926, + "log_odds_ratio": -0.0009428179473616183, + "logits/chosen": -0.165732741355896, + "logits/rejected": -0.22573032975196838, + "logps/chosen": -0.0009718775982037187, + "logps/rejected": -2.1593799591064453, + "loss": 0.3906, + "nll_loss": 0.09756524860858917, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.718775982037187e-05, + "rewards/margins": 0.21584078669548035, + "rewards/rejected": -0.21593798696994781, + "step": 10571 + }, + { + "epoch": 7.3112033195020745, + "grad_norm": 4.54644250869751, + "learning_rate": 1.4937759336099586e-05, + "log_odds_chosen": 11.126632690429688, + "log_odds_ratio": -3.421502697165124e-05, + "logits/chosen": -0.4148055911064148, + "logits/rejected": -0.5459097623825073, + "logps/chosen": -0.00022523957886733115, + "logps/rejected": -2.3449649810791016, + "loss": 0.3978, + "nll_loss": 0.0994362086057663, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.25239600695204e-05, + "rewards/margins": 0.23447395861148834, + "rewards/rejected": -0.23449648916721344, + "step": 10572 + }, + { + "epoch": 7.311894882434301, + "grad_norm": 4.444427967071533, + "learning_rate": 1.4933917319809437e-05, + "log_odds_chosen": 10.908353805541992, + "log_odds_ratio": -2.464960743964184e-05, + "logits/chosen": -0.10387469083070755, + "logits/rejected": -0.16769036650657654, + "logps/chosen": -0.00013196248619351536, + "logps/rejected": -1.9175297021865845, + "loss": 0.4857, + "nll_loss": 0.12142340838909149, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3196249710745178e-05, + "rewards/margins": 0.19173979759216309, + "rewards/rejected": -0.19175297021865845, + "step": 10573 + }, + { + "epoch": 7.312586445366528, + "grad_norm": 4.917417049407959, + "learning_rate": 1.4930075303519286e-05, + "log_odds_chosen": 10.515276908874512, + "log_odds_ratio": -0.00024476443650200963, + "logits/chosen": -0.580511748790741, + "logits/rejected": -0.525976300239563, + "logps/chosen": -0.001163422828540206, + "logps/rejected": -2.875986099243164, + "loss": 0.5104, + "nll_loss": 0.12757620215415955, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011634228576440364, + "rewards/margins": 0.28748229146003723, + "rewards/rejected": -0.2875986099243164, + "step": 10574 + }, + { + "epoch": 7.313278008298755, + "grad_norm": 5.778778553009033, + "learning_rate": 1.4926233287229138e-05, + "log_odds_chosen": 11.418848037719727, + "log_odds_ratio": -2.969609704450704e-05, + "logits/chosen": -0.08569745719432831, + "logits/rejected": -0.1920081079006195, + "logps/chosen": -0.00034481758484616876, + "logps/rejected": -3.314943313598633, + "loss": 0.5071, + "nll_loss": 0.12677092850208282, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.44817599398084e-05, + "rewards/margins": 0.3314598798751831, + "rewards/rejected": -0.33149436116218567, + "step": 10575 + }, + { + "epoch": 7.313969571230982, + "grad_norm": 5.823296070098877, + "learning_rate": 1.492239127093899e-05, + "log_odds_chosen": 11.454782485961914, + "log_odds_ratio": -2.6344980142312124e-05, + "logits/chosen": -0.16251061856746674, + "logits/rejected": -0.06658054888248444, + "logps/chosen": -0.00016875607252586633, + "logps/rejected": -2.35142183303833, + "loss": 0.5564, + "nll_loss": 0.13910400867462158, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6875606888788752e-05, + "rewards/margins": 0.23512530326843262, + "rewards/rejected": -0.23514218628406525, + "step": 10576 + }, + { + "epoch": 7.314661134163209, + "grad_norm": 3.9457826614379883, + "learning_rate": 1.491854925464884e-05, + "log_odds_chosen": 9.852853775024414, + "log_odds_ratio": -0.00029724909109063447, + "logits/chosen": -0.303438663482666, + "logits/rejected": -0.3021944761276245, + "logps/chosen": -0.0003416346153244376, + "logps/rejected": -2.00277042388916, + "loss": 0.6604, + "nll_loss": 0.16506357491016388, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.416346226003952e-05, + "rewards/margins": 0.20024290680885315, + "rewards/rejected": -0.20027706027030945, + "step": 10577 + }, + { + "epoch": 7.3153526970954355, + "grad_norm": 10.15293025970459, + "learning_rate": 1.491470723835869e-05, + "log_odds_chosen": 9.349213600158691, + "log_odds_ratio": -0.0002637461293488741, + "logits/chosen": -0.38515281677246094, + "logits/rejected": -0.3794807195663452, + "logps/chosen": -0.0004498792113736272, + "logps/rejected": -1.5653575658798218, + "loss": 0.5365, + "nll_loss": 0.13410302996635437, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.498792259255424e-05, + "rewards/margins": 0.1564907729625702, + "rewards/rejected": -0.1565357744693756, + "step": 10578 + }, + { + "epoch": 7.316044260027662, + "grad_norm": 3.717820167541504, + "learning_rate": 1.4910865222068543e-05, + "log_odds_chosen": 10.480566024780273, + "log_odds_ratio": -9.45752690313384e-05, + "logits/chosen": -0.3336294889450073, + "logits/rejected": -0.38219499588012695, + "logps/chosen": -0.0002480067778378725, + "logps/rejected": -1.8441063165664673, + "loss": 0.4326, + "nll_loss": 0.10814318060874939, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.480068178556394e-05, + "rewards/margins": 0.18438583612442017, + "rewards/rejected": -0.18441063165664673, + "step": 10579 + }, + { + "epoch": 7.316735822959889, + "grad_norm": 4.929460048675537, + "learning_rate": 1.4907023205778392e-05, + "log_odds_chosen": 9.904447555541992, + "log_odds_ratio": -0.0005250414833426476, + "logits/chosen": -0.24390114843845367, + "logits/rejected": -0.2871720492839813, + "logps/chosen": -0.0018281986704096198, + "logps/rejected": -1.694327473640442, + "loss": 0.401, + "nll_loss": 0.10020306706428528, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001828198874136433, + "rewards/margins": 0.16924992203712463, + "rewards/rejected": -0.16943272948265076, + "step": 10580 + }, + { + "epoch": 7.317427385892116, + "grad_norm": 4.24871301651001, + "learning_rate": 1.4903181189488244e-05, + "log_odds_chosen": 9.435662269592285, + "log_odds_ratio": -0.00046115496661514044, + "logits/chosen": -0.2529515027999878, + "logits/rejected": -0.2180534303188324, + "logps/chosen": -0.0009722278919070959, + "logps/rejected": -1.756400227546692, + "loss": 0.5971, + "nll_loss": 0.14923420548439026, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.722278628032655e-05, + "rewards/margins": 0.17554278671741486, + "rewards/rejected": -0.1756400167942047, + "step": 10581 + }, + { + "epoch": 7.318118948824343, + "grad_norm": 12.383216857910156, + "learning_rate": 1.4899339173198095e-05, + "log_odds_chosen": 11.341604232788086, + "log_odds_ratio": -9.020642755785957e-05, + "logits/chosen": -0.24451106786727905, + "logits/rejected": -0.37076500058174133, + "logps/chosen": -0.000519920198712498, + "logps/rejected": -2.194770097732544, + "loss": 0.576, + "nll_loss": 0.1440034657716751, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.199202132644132e-05, + "rewards/margins": 0.2194250226020813, + "rewards/rejected": -0.21947701275348663, + "step": 10582 + }, + { + "epoch": 7.31881051175657, + "grad_norm": 5.260334014892578, + "learning_rate": 1.4895497156907944e-05, + "log_odds_chosen": 10.190016746520996, + "log_odds_ratio": -0.00013451275299303234, + "logits/chosen": -0.4095885157585144, + "logits/rejected": -0.5194642543792725, + "logps/chosen": -0.0005643427721224725, + "logps/rejected": -1.9316439628601074, + "loss": 0.3941, + "nll_loss": 0.09850853681564331, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.643428085022606e-05, + "rewards/margins": 0.1931079626083374, + "rewards/rejected": -0.1931644082069397, + "step": 10583 + }, + { + "epoch": 7.319502074688796, + "grad_norm": 3.2225735187530518, + "learning_rate": 1.4891655140617797e-05, + "log_odds_chosen": 10.30554485321045, + "log_odds_ratio": -0.0003724767011590302, + "logits/chosen": -0.6669608950614929, + "logits/rejected": -0.5772339105606079, + "logps/chosen": -0.00026067826547659934, + "logps/rejected": -1.8772404193878174, + "loss": 0.6247, + "nll_loss": 0.15613077580928802, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6067828002851456e-05, + "rewards/margins": 0.18769796192646027, + "rewards/rejected": -0.1877240240573883, + "step": 10584 + }, + { + "epoch": 7.320193637621023, + "grad_norm": 5.20518159866333, + "learning_rate": 1.4887813124327649e-05, + "log_odds_chosen": 10.33845329284668, + "log_odds_ratio": -6.268893775995821e-05, + "logits/chosen": -0.2971196174621582, + "logits/rejected": -0.38388875126838684, + "logps/chosen": -0.000514318177010864, + "logps/rejected": -2.25443959236145, + "loss": 0.4866, + "nll_loss": 0.12163745611906052, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.143181624589488e-05, + "rewards/margins": 0.22539252042770386, + "rewards/rejected": -0.2254439741373062, + "step": 10585 + }, + { + "epoch": 7.32088520055325, + "grad_norm": 7.644906044006348, + "learning_rate": 1.4883971108037498e-05, + "log_odds_chosen": 11.65134334564209, + "log_odds_ratio": -0.00015778436500113457, + "logits/chosen": -0.14399686455726624, + "logits/rejected": -0.23336483538150787, + "logps/chosen": -0.00014643429312855005, + "logps/rejected": -2.5161826610565186, + "loss": 0.4515, + "nll_loss": 0.11286191642284393, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4643430404248647e-05, + "rewards/margins": 0.2516036331653595, + "rewards/rejected": -0.25161826610565186, + "step": 10586 + }, + { + "epoch": 7.321576763485477, + "grad_norm": 5.68562650680542, + "learning_rate": 1.4880129091747349e-05, + "log_odds_chosen": 10.962752342224121, + "log_odds_ratio": -9.493598918197677e-05, + "logits/chosen": -0.562821626663208, + "logits/rejected": -0.5519170165061951, + "logps/chosen": -0.0001463395165046677, + "logps/rejected": -2.0623018741607666, + "loss": 0.4406, + "nll_loss": 0.11013630032539368, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.463395165046677e-05, + "rewards/margins": 0.20621556043624878, + "rewards/rejected": -0.20623019337654114, + "step": 10587 + }, + { + "epoch": 7.322268326417704, + "grad_norm": 8.166015625, + "learning_rate": 1.4876287075457201e-05, + "log_odds_chosen": 11.878768920898438, + "log_odds_ratio": -1.8437514881952666e-05, + "logits/chosen": -0.4738050103187561, + "logits/rejected": -0.4197632670402527, + "logps/chosen": -0.00016072619473561645, + "logps/rejected": -2.418221950531006, + "loss": 0.4483, + "nll_loss": 0.11207035183906555, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6072621292551048e-05, + "rewards/margins": 0.24180611968040466, + "rewards/rejected": -0.24182221293449402, + "step": 10588 + }, + { + "epoch": 7.322959889349931, + "grad_norm": 3.789522171020508, + "learning_rate": 1.487244505916705e-05, + "log_odds_chosen": 11.342962265014648, + "log_odds_ratio": -0.00010516175098018721, + "logits/chosen": -0.014583747833967209, + "logits/rejected": 0.013038881123065948, + "logps/chosen": -0.00019708505715243518, + "logps/rejected": -2.7194290161132812, + "loss": 0.5038, + "nll_loss": 0.1259286105632782, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.970850644283928e-05, + "rewards/margins": 0.2719232141971588, + "rewards/rejected": -0.2719429135322571, + "step": 10589 + }, + { + "epoch": 7.323651452282157, + "grad_norm": 7.632386207580566, + "learning_rate": 1.4868603042876903e-05, + "log_odds_chosen": 11.190752029418945, + "log_odds_ratio": -4.420655750436708e-05, + "logits/chosen": -0.17970700562000275, + "logits/rejected": -0.13008803129196167, + "logps/chosen": -0.0006894480320625007, + "logps/rejected": -2.4720520973205566, + "loss": 0.7858, + "nll_loss": 0.19643478095531464, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.894480611663312e-05, + "rewards/margins": 0.24713626503944397, + "rewards/rejected": -0.2472052276134491, + "step": 10590 + }, + { + "epoch": 7.324343015214384, + "grad_norm": 4.327857494354248, + "learning_rate": 1.4864761026586754e-05, + "log_odds_chosen": 11.098254203796387, + "log_odds_ratio": -4.739519135910086e-05, + "logits/chosen": -0.4026983976364136, + "logits/rejected": -0.36631515622138977, + "logps/chosen": -0.0002123128215316683, + "logps/rejected": -2.294590473175049, + "loss": 0.4672, + "nll_loss": 0.11679333448410034, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1231280697975308e-05, + "rewards/margins": 0.22943781316280365, + "rewards/rejected": -0.22945904731750488, + "step": 10591 + }, + { + "epoch": 7.325034578146611, + "grad_norm": 4.4742255210876465, + "learning_rate": 1.4860919010296603e-05, + "log_odds_chosen": 11.094287872314453, + "log_odds_ratio": -3.746439324459061e-05, + "logits/chosen": 0.13466286659240723, + "logits/rejected": 0.18415427207946777, + "logps/chosen": -0.00044445990351960063, + "logps/rejected": -2.663757801055908, + "loss": 0.4439, + "nll_loss": 0.11096180230379105, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.444599471753463e-05, + "rewards/margins": 0.26633134484291077, + "rewards/rejected": -0.2663758099079132, + "step": 10592 + }, + { + "epoch": 7.325726141078838, + "grad_norm": 10.97510814666748, + "learning_rate": 1.4857076994006455e-05, + "log_odds_chosen": 10.561748504638672, + "log_odds_ratio": -6.68507200316526e-05, + "logits/chosen": -0.11066879332065582, + "logits/rejected": -0.17471560835838318, + "logps/chosen": -0.00010785304039018229, + "logps/rejected": -1.5898044109344482, + "loss": 0.5916, + "nll_loss": 0.14789864420890808, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0785303857119288e-05, + "rewards/margins": 0.15896965563297272, + "rewards/rejected": -0.15898045897483826, + "step": 10593 + }, + { + "epoch": 7.326417704011065, + "grad_norm": 15.438594818115234, + "learning_rate": 1.4853234977716307e-05, + "log_odds_chosen": 10.610740661621094, + "log_odds_ratio": -9.984053031075746e-05, + "logits/chosen": -0.08416008949279785, + "logits/rejected": -0.13556626439094543, + "logps/chosen": -0.0005519052501767874, + "logps/rejected": -2.215517044067383, + "loss": 0.5154, + "nll_loss": 0.12885132431983948, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.519052501767874e-05, + "rewards/margins": 0.22149652242660522, + "rewards/rejected": -0.22155171632766724, + "step": 10594 + }, + { + "epoch": 7.327109266943292, + "grad_norm": 5.175950527191162, + "learning_rate": 1.4849392961426157e-05, + "log_odds_chosen": 10.522294044494629, + "log_odds_ratio": -6.946315988898277e-05, + "logits/chosen": -0.042398273944854736, + "logits/rejected": -0.09602394700050354, + "logps/chosen": -0.00017671106616035104, + "logps/rejected": -1.894153356552124, + "loss": 0.7866, + "nll_loss": 0.19665397703647614, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7671107343630865e-05, + "rewards/margins": 0.1893976628780365, + "rewards/rejected": -0.1894153356552124, + "step": 10595 + }, + { + "epoch": 7.327800829875518, + "grad_norm": 7.015600204467773, + "learning_rate": 1.4845550945136007e-05, + "log_odds_chosen": 10.751202583312988, + "log_odds_ratio": -0.0008175495895557106, + "logits/chosen": -0.42600420117378235, + "logits/rejected": -0.48659491539001465, + "logps/chosen": -0.00036460478440858424, + "logps/rejected": -1.832169532775879, + "loss": 0.5799, + "nll_loss": 0.14488884806632996, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6460478440858424e-05, + "rewards/margins": 0.18318049609661102, + "rewards/rejected": -0.18321695923805237, + "step": 10596 + }, + { + "epoch": 7.328492392807745, + "grad_norm": 6.0024566650390625, + "learning_rate": 1.484170892884586e-05, + "log_odds_chosen": 11.78825569152832, + "log_odds_ratio": -1.578919000166934e-05, + "logits/chosen": -0.17290112376213074, + "logits/rejected": -0.2185828983783722, + "logps/chosen": -0.00018440670100972056, + "logps/rejected": -2.6440727710723877, + "loss": 0.4376, + "nll_loss": 0.10940653085708618, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8440670828567818e-05, + "rewards/margins": 0.2643888294696808, + "rewards/rejected": -0.26440727710723877, + "step": 10597 + }, + { + "epoch": 7.329183955739972, + "grad_norm": 4.82000207901001, + "learning_rate": 1.4837866912555709e-05, + "log_odds_chosen": 10.55043888092041, + "log_odds_ratio": -0.000115828966954723, + "logits/chosen": -0.004259809851646423, + "logits/rejected": -0.05958893150091171, + "logps/chosen": -0.0005333904991857708, + "logps/rejected": -2.640122413635254, + "loss": 0.4736, + "nll_loss": 0.11838662624359131, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.333905210136436e-05, + "rewards/margins": 0.2639588713645935, + "rewards/rejected": -0.2640122175216675, + "step": 10598 + }, + { + "epoch": 7.329875518672199, + "grad_norm": 5.208899021148682, + "learning_rate": 1.4834024896265561e-05, + "log_odds_chosen": 11.910272598266602, + "log_odds_ratio": -1.2390030860842671e-05, + "logits/chosen": -0.34170472621917725, + "logits/rejected": -0.3993207812309265, + "logps/chosen": -0.000160014649736695, + "logps/rejected": -2.8986268043518066, + "loss": 0.5649, + "nll_loss": 0.14122623205184937, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6001464246073738e-05, + "rewards/margins": 0.2898466885089874, + "rewards/rejected": -0.2898626923561096, + "step": 10599 + }, + { + "epoch": 7.330567081604426, + "grad_norm": 4.279346466064453, + "learning_rate": 1.4830182879975412e-05, + "log_odds_chosen": 11.154391288757324, + "log_odds_ratio": -3.705886047100648e-05, + "logits/chosen": 0.053722962737083435, + "logits/rejected": -0.03543657436966896, + "logps/chosen": -0.00023111490008886904, + "logps/rejected": -2.4170563220977783, + "loss": 0.3872, + "nll_loss": 0.09679973125457764, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.31114881898975e-05, + "rewards/margins": 0.2416825294494629, + "rewards/rejected": -0.24170565605163574, + "step": 10600 + }, + { + "epoch": 7.3312586445366525, + "grad_norm": 6.050769805908203, + "learning_rate": 1.4826340863685263e-05, + "log_odds_chosen": 10.852587699890137, + "log_odds_ratio": -5.3520489018410444e-05, + "logits/chosen": -0.14165736734867096, + "logits/rejected": -0.25913006067276, + "logps/chosen": -0.00013877694436814636, + "logps/rejected": -1.9309008121490479, + "loss": 0.4902, + "nll_loss": 0.12254425883293152, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3877694982511457e-05, + "rewards/margins": 0.1930762082338333, + "rewards/rejected": -0.19309008121490479, + "step": 10601 + }, + { + "epoch": 7.331950207468879, + "grad_norm": 3.462172031402588, + "learning_rate": 1.4822498847395113e-05, + "log_odds_chosen": 11.225442886352539, + "log_odds_ratio": -4.436544986674562e-05, + "logits/chosen": -0.42905306816101074, + "logits/rejected": -0.5086164474487305, + "logps/chosen": -7.468480907846242e-05, + "logps/rejected": -1.767258644104004, + "loss": 0.2998, + "nll_loss": 0.07493775337934494, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.468481726391474e-06, + "rewards/margins": 0.17671838402748108, + "rewards/rejected": -0.1767258644104004, + "step": 10602 + }, + { + "epoch": 7.332641770401106, + "grad_norm": 5.805577754974365, + "learning_rate": 1.4818656831104966e-05, + "log_odds_chosen": 11.116992950439453, + "log_odds_ratio": -3.2158764952328056e-05, + "logits/chosen": -0.07801266014575958, + "logits/rejected": -0.24215415120124817, + "logps/chosen": -0.0002241364272776991, + "logps/rejected": -2.7057204246520996, + "loss": 0.4149, + "nll_loss": 0.10372462868690491, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.241364200017415e-05, + "rewards/margins": 0.27054962515830994, + "rewards/rejected": -0.2705720365047455, + "step": 10603 + }, + { + "epoch": 7.333333333333333, + "grad_norm": 6.583235740661621, + "learning_rate": 1.4814814814814815e-05, + "log_odds_chosen": 10.986959457397461, + "log_odds_ratio": -5.965959280729294e-05, + "logits/chosen": -0.23258370161056519, + "logits/rejected": -0.25661396980285645, + "logps/chosen": -0.00028355675749480724, + "logps/rejected": -2.7544238567352295, + "loss": 0.5009, + "nll_loss": 0.12522797286510468, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8355676477076486e-05, + "rewards/margins": 0.27541404962539673, + "rewards/rejected": -0.2754423916339874, + "step": 10604 + }, + { + "epoch": 7.33402489626556, + "grad_norm": 4.308938026428223, + "learning_rate": 1.4810972798524667e-05, + "log_odds_chosen": 11.198945999145508, + "log_odds_ratio": -1.4784338418394327e-05, + "logits/chosen": -0.344827800989151, + "logits/rejected": -0.36173003911972046, + "logps/chosen": -0.00011824148532468826, + "logps/rejected": -2.052685260772705, + "loss": 0.3203, + "nll_loss": 0.08006532490253448, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1824149623862468e-05, + "rewards/margins": 0.20525671541690826, + "rewards/rejected": -0.20526854693889618, + "step": 10605 + }, + { + "epoch": 7.334716459197787, + "grad_norm": 5.213698863983154, + "learning_rate": 1.4807130782234518e-05, + "log_odds_chosen": 11.838775634765625, + "log_odds_ratio": -1.4372966688824818e-05, + "logits/chosen": -0.27771133184432983, + "logits/rejected": -0.17700433731079102, + "logps/chosen": -5.760313069913536e-05, + "logps/rejected": -2.0721049308776855, + "loss": 0.4284, + "nll_loss": 0.10709850490093231, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.760313342761947e-06, + "rewards/margins": 0.20720471441745758, + "rewards/rejected": -0.2072104811668396, + "step": 10606 + }, + { + "epoch": 7.3354080221300135, + "grad_norm": 3.2967588901519775, + "learning_rate": 1.4803288765944367e-05, + "log_odds_chosen": 10.962372779846191, + "log_odds_ratio": -4.8951711505651474e-05, + "logits/chosen": -0.4963054358959198, + "logits/rejected": -0.5957614779472351, + "logps/chosen": -0.00023823344963602722, + "logps/rejected": -2.5089364051818848, + "loss": 0.3532, + "nll_loss": 0.08828841149806976, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3823346054996364e-05, + "rewards/margins": 0.2508698105812073, + "rewards/rejected": -0.25089362263679504, + "step": 10607 + }, + { + "epoch": 7.33609958506224, + "grad_norm": 6.884736061096191, + "learning_rate": 1.479944674965422e-05, + "log_odds_chosen": 11.086265563964844, + "log_odds_ratio": -2.916203811764717e-05, + "logits/chosen": -0.3722105622291565, + "logits/rejected": -0.4484604299068451, + "logps/chosen": -0.00019026699010282755, + "logps/rejected": -2.3791327476501465, + "loss": 0.3414, + "nll_loss": 0.08533491939306259, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9026698282686993e-05, + "rewards/margins": 0.23789425194263458, + "rewards/rejected": -0.23791325092315674, + "step": 10608 + }, + { + "epoch": 7.336791147994467, + "grad_norm": 5.710556507110596, + "learning_rate": 1.4795604733364072e-05, + "log_odds_chosen": 11.4507474899292, + "log_odds_ratio": -1.2639248780033085e-05, + "logits/chosen": -0.3838127851486206, + "logits/rejected": -0.45596855878829956, + "logps/chosen": -0.00017679229495115578, + "logps/rejected": -2.476207971572876, + "loss": 0.5747, + "nll_loss": 0.14368417859077454, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7679229131317697e-05, + "rewards/margins": 0.24760311841964722, + "rewards/rejected": -0.24762079119682312, + "step": 10609 + }, + { + "epoch": 7.337482710926694, + "grad_norm": 4.773012161254883, + "learning_rate": 1.4791762717073921e-05, + "log_odds_chosen": 9.811444282531738, + "log_odds_ratio": -0.00034897771547548473, + "logits/chosen": -0.5299882888793945, + "logits/rejected": -0.422236829996109, + "logps/chosen": -0.0005181976011954248, + "logps/rejected": -2.17285418510437, + "loss": 0.8478, + "nll_loss": 0.21192626655101776, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.181975575396791e-05, + "rewards/margins": 0.2172335833311081, + "rewards/rejected": -0.2172854095697403, + "step": 10610 + }, + { + "epoch": 7.338174273858921, + "grad_norm": 5.867862224578857, + "learning_rate": 1.4787920700783772e-05, + "log_odds_chosen": 10.030328750610352, + "log_odds_ratio": -0.0003582726640161127, + "logits/chosen": -0.4114347994327545, + "logits/rejected": -0.4240753650665283, + "logps/chosen": -0.007311244960874319, + "logps/rejected": -2.227712631225586, + "loss": 0.3881, + "nll_loss": 0.09698623418807983, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007311245426535606, + "rewards/margins": 0.22204013168811798, + "rewards/rejected": -0.22277125716209412, + "step": 10611 + }, + { + "epoch": 7.338865836791148, + "grad_norm": 3.5677428245544434, + "learning_rate": 1.4784078684493624e-05, + "log_odds_chosen": 10.60622787475586, + "log_odds_ratio": -0.0007465876988135278, + "logits/chosen": -0.2800699472427368, + "logits/rejected": -0.4186505675315857, + "logps/chosen": -0.0007694175001233816, + "logps/rejected": -2.591726303100586, + "loss": 0.4788, + "nll_loss": 0.11962364614009857, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.694175292272121e-05, + "rewards/margins": 0.259095698595047, + "rewards/rejected": -0.25917261838912964, + "step": 10612 + }, + { + "epoch": 7.3395573997233745, + "grad_norm": 6.79379940032959, + "learning_rate": 1.4780236668203473e-05, + "log_odds_chosen": 11.99535846710205, + "log_odds_ratio": -0.0001469114940846339, + "logits/chosen": -0.6883875131607056, + "logits/rejected": -0.5342060923576355, + "logps/chosen": -0.00039757287595421076, + "logps/rejected": -3.7388129234313965, + "loss": 0.7219, + "nll_loss": 0.18045930564403534, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.975728395744227e-05, + "rewards/margins": 0.3738415539264679, + "rewards/rejected": -0.3738812804222107, + "step": 10613 + }, + { + "epoch": 7.340248962655601, + "grad_norm": 3.342613458633423, + "learning_rate": 1.4776394651913326e-05, + "log_odds_chosen": 10.904277801513672, + "log_odds_ratio": -7.465697126463056e-05, + "logits/chosen": -0.211563378572464, + "logits/rejected": -0.4272039532661438, + "logps/chosen": -0.0015133580891415477, + "logps/rejected": -2.0817818641662598, + "loss": 0.403, + "nll_loss": 0.10073212534189224, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015133581473492086, + "rewards/margins": 0.20802685618400574, + "rewards/rejected": -0.20817819237709045, + "step": 10614 + }, + { + "epoch": 7.340940525587828, + "grad_norm": 5.032479763031006, + "learning_rate": 1.4772552635623177e-05, + "log_odds_chosen": 11.533479690551758, + "log_odds_ratio": -4.141679892200045e-05, + "logits/chosen": -0.4578152298927307, + "logits/rejected": -0.40243053436279297, + "logps/chosen": -0.0002884402929339558, + "logps/rejected": -2.7242417335510254, + "loss": 0.369, + "nll_loss": 0.09225247800350189, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8844027838204056e-05, + "rewards/margins": 0.2723953425884247, + "rewards/rejected": -0.2724241614341736, + "step": 10615 + }, + { + "epoch": 7.341632088520055, + "grad_norm": 3.96189284324646, + "learning_rate": 1.4768710619333026e-05, + "log_odds_chosen": 10.917028427124023, + "log_odds_ratio": -5.428310396382585e-05, + "logits/chosen": -0.5717241168022156, + "logits/rejected": -0.5869755148887634, + "logps/chosen": -0.00023707385116722435, + "logps/rejected": -2.5564537048339844, + "loss": 0.5278, + "nll_loss": 0.1319570243358612, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.370738729950972e-05, + "rewards/margins": 0.25562167167663574, + "rewards/rejected": -0.25564536452293396, + "step": 10616 + }, + { + "epoch": 7.342323651452282, + "grad_norm": 10.240484237670898, + "learning_rate": 1.4764868603042878e-05, + "log_odds_chosen": 11.378911018371582, + "log_odds_ratio": -9.107735240831971e-05, + "logits/chosen": -0.3321758508682251, + "logits/rejected": -0.4863980710506439, + "logps/chosen": -0.0003882453893311322, + "logps/rejected": -3.289459705352783, + "loss": 0.3806, + "nll_loss": 0.09513388574123383, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.882453529513441e-05, + "rewards/margins": 0.3289071321487427, + "rewards/rejected": -0.32894593477249146, + "step": 10617 + }, + { + "epoch": 7.343015214384509, + "grad_norm": 5.46720552444458, + "learning_rate": 1.476102658675273e-05, + "log_odds_chosen": 10.650997161865234, + "log_odds_ratio": -6.125005165813491e-05, + "logits/chosen": -0.47321465611457825, + "logits/rejected": -0.5267034769058228, + "logps/chosen": -0.00102140917442739, + "logps/rejected": -3.0506556034088135, + "loss": 0.5083, + "nll_loss": 0.12706559896469116, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010214091162197292, + "rewards/margins": 0.30496343970298767, + "rewards/rejected": -0.3050655722618103, + "step": 10618 + }, + { + "epoch": 7.3437067773167355, + "grad_norm": 16.179311752319336, + "learning_rate": 1.475718457046258e-05, + "log_odds_chosen": 11.190753936767578, + "log_odds_ratio": -4.702592923422344e-05, + "logits/chosen": -0.4904909133911133, + "logits/rejected": -0.6124447584152222, + "logps/chosen": -0.00019545605755411088, + "logps/rejected": -2.2012088298797607, + "loss": 0.6981, + "nll_loss": 0.17451709508895874, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9545605027815327e-05, + "rewards/margins": 0.22010135650634766, + "rewards/rejected": -0.22012090682983398, + "step": 10619 + }, + { + "epoch": 7.344398340248962, + "grad_norm": 4.674870014190674, + "learning_rate": 1.475334255417243e-05, + "log_odds_chosen": 10.288872718811035, + "log_odds_ratio": -0.0033015012741088867, + "logits/chosen": -0.5148146152496338, + "logits/rejected": -0.5312724709510803, + "logps/chosen": -0.0028533488512039185, + "logps/rejected": -1.5259571075439453, + "loss": 0.4448, + "nll_loss": 0.11086127907037735, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002853349142242223, + "rewards/margins": 0.15231037139892578, + "rewards/rejected": -0.15259572863578796, + "step": 10620 + }, + { + "epoch": 7.345089903181189, + "grad_norm": 5.621874809265137, + "learning_rate": 1.4749500537882283e-05, + "log_odds_chosen": 10.4603271484375, + "log_odds_ratio": -0.00011067378363804892, + "logits/chosen": -0.10758718848228455, + "logits/rejected": -0.14322113990783691, + "logps/chosen": -0.00019782486197073013, + "logps/rejected": -1.6435022354125977, + "loss": 0.5255, + "nll_loss": 0.13135552406311035, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9782486560870893e-05, + "rewards/margins": 0.16433045268058777, + "rewards/rejected": -0.1643502414226532, + "step": 10621 + }, + { + "epoch": 7.345781466113416, + "grad_norm": 4.4091997146606445, + "learning_rate": 1.4745658521592132e-05, + "log_odds_chosen": 10.637389183044434, + "log_odds_ratio": -0.00015043109306134284, + "logits/chosen": -0.32385605573654175, + "logits/rejected": -0.42906877398490906, + "logps/chosen": -0.0002690694236662239, + "logps/rejected": -2.022911787033081, + "loss": 0.3128, + "nll_loss": 0.07819736003875732, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.690694236662239e-05, + "rewards/margins": 0.20226429402828217, + "rewards/rejected": -0.20229119062423706, + "step": 10622 + }, + { + "epoch": 7.346473029045643, + "grad_norm": 4.546496391296387, + "learning_rate": 1.4741816505301984e-05, + "log_odds_chosen": 11.610639572143555, + "log_odds_ratio": -1.3535655853047501e-05, + "logits/chosen": -0.6121603846549988, + "logits/rejected": -0.6175299882888794, + "logps/chosen": -8.269608224509284e-05, + "logps/rejected": -1.9675970077514648, + "loss": 0.387, + "nll_loss": 0.09674078971147537, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.269607860711403e-06, + "rewards/margins": 0.1967514455318451, + "rewards/rejected": -0.19675971567630768, + "step": 10623 + }, + { + "epoch": 7.34716459197787, + "grad_norm": 4.866008758544922, + "learning_rate": 1.4737974489011835e-05, + "log_odds_chosen": 10.987707138061523, + "log_odds_ratio": -4.2686409869929776e-05, + "logits/chosen": -0.5667479038238525, + "logits/rejected": -0.5982543230056763, + "logps/chosen": -0.00047450707643292844, + "logps/rejected": -2.640371561050415, + "loss": 0.5077, + "nll_loss": 0.12692023813724518, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.745071055367589e-05, + "rewards/margins": 0.26398971676826477, + "rewards/rejected": -0.264037162065506, + "step": 10624 + }, + { + "epoch": 7.3478561549100965, + "grad_norm": 4.262731075286865, + "learning_rate": 1.4734132472721684e-05, + "log_odds_chosen": 11.199050903320312, + "log_odds_ratio": -3.262510654167272e-05, + "logits/chosen": 0.013080950826406479, + "logits/rejected": -0.06952833384275436, + "logps/chosen": -0.00019538719789125025, + "logps/rejected": -2.5833044052124023, + "loss": 0.466, + "nll_loss": 0.11649253219366074, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.953872197191231e-05, + "rewards/margins": 0.25831088423728943, + "rewards/rejected": -0.25833040475845337, + "step": 10625 + }, + { + "epoch": 7.348547717842323, + "grad_norm": 3.8600428104400635, + "learning_rate": 1.4730290456431537e-05, + "log_odds_chosen": 11.01201057434082, + "log_odds_ratio": -3.2962183468043804e-05, + "logits/chosen": -0.48151469230651855, + "logits/rejected": -0.6132542490959167, + "logps/chosen": -0.00011318025644868612, + "logps/rejected": -1.6544891595840454, + "loss": 0.284, + "nll_loss": 0.07098495215177536, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1318026736262254e-05, + "rewards/margins": 0.16543760895729065, + "rewards/rejected": -0.16544891893863678, + "step": 10626 + }, + { + "epoch": 7.34923928077455, + "grad_norm": 7.540706634521484, + "learning_rate": 1.4726448440141389e-05, + "log_odds_chosen": 10.292908668518066, + "log_odds_ratio": -0.004162487108260393, + "logits/chosen": -0.06213077902793884, + "logits/rejected": -0.1171325147151947, + "logps/chosen": -0.02855098620057106, + "logps/rejected": -2.2707366943359375, + "loss": 0.4042, + "nll_loss": 0.10062611103057861, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002855098806321621, + "rewards/margins": 0.22421857714653015, + "rewards/rejected": -0.22707366943359375, + "step": 10627 + }, + { + "epoch": 7.349930843706777, + "grad_norm": 5.197941780090332, + "learning_rate": 1.4722606423851238e-05, + "log_odds_chosen": 11.520332336425781, + "log_odds_ratio": -2.6367244572611526e-05, + "logits/chosen": -0.22637757658958435, + "logits/rejected": -0.36071324348449707, + "logps/chosen": -0.00021008508338127285, + "logps/rejected": -2.8108935356140137, + "loss": 0.4636, + "nll_loss": 0.1158994808793068, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1008509065723047e-05, + "rewards/margins": 0.2810683250427246, + "rewards/rejected": -0.2810893654823303, + "step": 10628 + }, + { + "epoch": 7.350622406639004, + "grad_norm": 11.5546293258667, + "learning_rate": 1.4718764407561089e-05, + "log_odds_chosen": 11.054574966430664, + "log_odds_ratio": -0.00022649082529824227, + "logits/chosen": -0.5302231907844543, + "logits/rejected": -0.6536000967025757, + "logps/chosen": -0.0014594955136999488, + "logps/rejected": -2.953653335571289, + "loss": 1.0388, + "nll_loss": 0.2596677839756012, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014594956883229315, + "rewards/margins": 0.29521939158439636, + "rewards/rejected": -0.2953653335571289, + "step": 10629 + }, + { + "epoch": 7.351313969571231, + "grad_norm": 7.829750061035156, + "learning_rate": 1.4714922391270941e-05, + "log_odds_chosen": 9.29830551147461, + "log_odds_ratio": -0.0005126740434207022, + "logits/chosen": -0.11748141795396805, + "logits/rejected": -0.10664454102516174, + "logps/chosen": -0.0015445123426616192, + "logps/rejected": -2.1555533409118652, + "loss": 1.0364, + "nll_loss": 0.25904539227485657, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015445123426616192, + "rewards/margins": 0.21540087461471558, + "rewards/rejected": -0.215555340051651, + "step": 10630 + }, + { + "epoch": 7.3520055325034575, + "grad_norm": 6.214385032653809, + "learning_rate": 1.471108037498079e-05, + "log_odds_chosen": 11.047991752624512, + "log_odds_ratio": -0.00016679904365446419, + "logits/chosen": 0.3329809010028839, + "logits/rejected": 0.18723444640636444, + "logps/chosen": -0.00027217259048484266, + "logps/rejected": -2.4213221073150635, + "loss": 0.8566, + "nll_loss": 0.21412542462348938, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7217258320888504e-05, + "rewards/margins": 0.24210500717163086, + "rewards/rejected": -0.24213220179080963, + "step": 10631 + }, + { + "epoch": 7.352697095435684, + "grad_norm": 5.405941486358643, + "learning_rate": 1.4707238358690643e-05, + "log_odds_chosen": 11.97227668762207, + "log_odds_ratio": -1.698249252513051e-05, + "logits/chosen": -0.6586976051330566, + "logits/rejected": -0.6436535716056824, + "logps/chosen": -8.172958041541278e-05, + "logps/rejected": -2.3857898712158203, + "loss": 0.3021, + "nll_loss": 0.07551433145999908, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.172957677743398e-06, + "rewards/margins": 0.23857080936431885, + "rewards/rejected": -0.23857899010181427, + "step": 10632 + }, + { + "epoch": 7.353388658367911, + "grad_norm": 6.927206993103027, + "learning_rate": 1.4703396342400492e-05, + "log_odds_chosen": 10.949483871459961, + "log_odds_ratio": -4.4107095163781196e-05, + "logits/chosen": -0.00477069616317749, + "logits/rejected": -0.0485563725233078, + "logps/chosen": -0.00016855084686540067, + "logps/rejected": -2.265960454940796, + "loss": 0.5283, + "nll_loss": 0.132061168551445, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6855085050337948e-05, + "rewards/margins": 0.2265791893005371, + "rewards/rejected": -0.22659605741500854, + "step": 10633 + }, + { + "epoch": 7.354080221300138, + "grad_norm": 3.2986459732055664, + "learning_rate": 1.4699554326110343e-05, + "log_odds_chosen": 11.19710922241211, + "log_odds_ratio": -3.278831354691647e-05, + "logits/chosen": -0.7175881266593933, + "logits/rejected": -0.6673378348350525, + "logps/chosen": -0.00038059664075262845, + "logps/rejected": -2.5626344680786133, + "loss": 0.3126, + "nll_loss": 0.07814760506153107, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.80596611648798e-05, + "rewards/margins": 0.2562254071235657, + "rewards/rejected": -0.25626346468925476, + "step": 10634 + }, + { + "epoch": 7.354771784232365, + "grad_norm": 4.856668472290039, + "learning_rate": 1.4695712309820195e-05, + "log_odds_chosen": 10.294366836547852, + "log_odds_ratio": -0.00012593253632076085, + "logits/chosen": -0.3165508806705475, + "logits/rejected": -0.36833885312080383, + "logps/chosen": -0.0007413438288494945, + "logps/rejected": -1.5169241428375244, + "loss": 0.5096, + "nll_loss": 0.127391055226326, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.413439016090706e-05, + "rewards/margins": 0.15161828696727753, + "rewards/rejected": -0.15169242024421692, + "step": 10635 + }, + { + "epoch": 7.355463347164592, + "grad_norm": 6.945402145385742, + "learning_rate": 1.4691870293530044e-05, + "log_odds_chosen": 11.395181655883789, + "log_odds_ratio": -4.5541368308477104e-05, + "logits/chosen": -0.2549218535423279, + "logits/rejected": -0.2680109739303589, + "logps/chosen": -0.00023953057825565338, + "logps/rejected": -2.7777349948883057, + "loss": 0.4051, + "nll_loss": 0.10126994550228119, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.395305818936322e-05, + "rewards/margins": 0.27774956822395325, + "rewards/rejected": -0.27777349948883057, + "step": 10636 + }, + { + "epoch": 7.356154910096818, + "grad_norm": 6.0253777503967285, + "learning_rate": 1.4688028277239896e-05, + "log_odds_chosen": 10.734546661376953, + "log_odds_ratio": -3.223002204322256e-05, + "logits/chosen": -0.4334542155265808, + "logits/rejected": -0.3397785425186157, + "logps/chosen": -9.362496348330751e-05, + "logps/rejected": -1.2691165208816528, + "loss": 0.5793, + "nll_loss": 0.14482782781124115, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.36249580263393e-06, + "rewards/margins": 0.1269022822380066, + "rewards/rejected": -0.12691165506839752, + "step": 10637 + }, + { + "epoch": 7.356846473029045, + "grad_norm": 3.829284906387329, + "learning_rate": 1.4684186260949747e-05, + "log_odds_chosen": 10.96677017211914, + "log_odds_ratio": -4.322976747062057e-05, + "logits/chosen": -0.5060489177703857, + "logits/rejected": -0.5655438899993896, + "logps/chosen": -0.00012070621596649289, + "logps/rejected": -1.8876540660858154, + "loss": 0.3506, + "nll_loss": 0.08765744417905807, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2070622688042931e-05, + "rewards/margins": 0.18875333666801453, + "rewards/rejected": -0.18876540660858154, + "step": 10638 + }, + { + "epoch": 7.357538035961272, + "grad_norm": 7.1783246994018555, + "learning_rate": 1.4680344244659596e-05, + "log_odds_chosen": 10.930610656738281, + "log_odds_ratio": -3.301469041616656e-05, + "logits/chosen": -0.04722614586353302, + "logits/rejected": -0.11504199355840683, + "logps/chosen": -0.000139197101816535, + "logps/rejected": -2.0308399200439453, + "loss": 0.6415, + "nll_loss": 0.16035979986190796, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3919711818743963e-05, + "rewards/margins": 0.20307007431983948, + "rewards/rejected": -0.20308397710323334, + "step": 10639 + }, + { + "epoch": 7.358229598893499, + "grad_norm": 4.304840564727783, + "learning_rate": 1.4676502228369449e-05, + "log_odds_chosen": 11.903948783874512, + "log_odds_ratio": -2.864694397430867e-05, + "logits/chosen": -0.3215031325817108, + "logits/rejected": -0.42134568095207214, + "logps/chosen": -0.00019372413225937635, + "logps/rejected": -2.997770309448242, + "loss": 0.4164, + "nll_loss": 0.10409659147262573, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.937241540872492e-05, + "rewards/margins": 0.2997576892375946, + "rewards/rejected": -0.2997770309448242, + "step": 10640 + }, + { + "epoch": 7.358921161825726, + "grad_norm": 5.644372463226318, + "learning_rate": 1.4672660212079301e-05, + "log_odds_chosen": 10.269853591918945, + "log_odds_ratio": -0.00014747031673323363, + "logits/chosen": -0.737433910369873, + "logits/rejected": -0.835425853729248, + "logps/chosen": -0.0012461732840165496, + "logps/rejected": -1.93202543258667, + "loss": 0.3985, + "nll_loss": 0.099620521068573, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012461733422242105, + "rewards/margins": 0.1930779218673706, + "rewards/rejected": -0.19320255517959595, + "step": 10641 + }, + { + "epoch": 7.359612724757953, + "grad_norm": 5.395466327667236, + "learning_rate": 1.466881819578915e-05, + "log_odds_chosen": 11.738531112670898, + "log_odds_ratio": -8.553595762350596e-06, + "logits/chosen": -0.5634970664978027, + "logits/rejected": -0.6411577463150024, + "logps/chosen": -0.00012023108138237149, + "logps/rejected": -2.624508857727051, + "loss": 0.4429, + "nll_loss": 0.11071419715881348, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.202310886583291e-05, + "rewards/margins": 0.26243889331817627, + "rewards/rejected": -0.2624509036540985, + "step": 10642 + }, + { + "epoch": 7.360304287690179, + "grad_norm": 4.805103778839111, + "learning_rate": 1.4664976179499001e-05, + "log_odds_chosen": 10.992596626281738, + "log_odds_ratio": -0.00036997467395849526, + "logits/chosen": -0.48275116086006165, + "logits/rejected": -0.691816508769989, + "logps/chosen": -0.0003715948842000216, + "logps/rejected": -2.5263161659240723, + "loss": 0.4734, + "nll_loss": 0.11830729246139526, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7159494240768254e-05, + "rewards/margins": 0.2525944411754608, + "rewards/rejected": -0.25263160467147827, + "step": 10643 + }, + { + "epoch": 7.360995850622406, + "grad_norm": 8.46743106842041, + "learning_rate": 1.4661134163208853e-05, + "log_odds_chosen": 10.13895320892334, + "log_odds_ratio": -0.0003543172206263989, + "logits/chosen": -0.6756718158721924, + "logits/rejected": -0.7190816402435303, + "logps/chosen": -0.00043742440175265074, + "logps/rejected": -1.8771418333053589, + "loss": 0.3552, + "nll_loss": 0.08875347673892975, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.374244235805236e-05, + "rewards/margins": 0.18767043948173523, + "rewards/rejected": -0.18771417438983917, + "step": 10644 + }, + { + "epoch": 7.361687413554633, + "grad_norm": 3.5724453926086426, + "learning_rate": 1.4657292146918702e-05, + "log_odds_chosen": 10.295007705688477, + "log_odds_ratio": -0.00011511320917634293, + "logits/chosen": 0.1354466676712036, + "logits/rejected": 0.08701753616333008, + "logps/chosen": -0.0006204830133356154, + "logps/rejected": -1.82797372341156, + "loss": 0.4692, + "nll_loss": 0.11728240549564362, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.204830424394459e-05, + "rewards/margins": 0.18273532390594482, + "rewards/rejected": -0.182797372341156, + "step": 10645 + }, + { + "epoch": 7.36237897648686, + "grad_norm": 8.771824836730957, + "learning_rate": 1.4653450130628555e-05, + "log_odds_chosen": 11.154577255249023, + "log_odds_ratio": -7.893896690802649e-05, + "logits/chosen": -0.17044945061206818, + "logits/rejected": -0.1967611312866211, + "logps/chosen": -0.0003008460334967822, + "logps/rejected": -2.7240962982177734, + "loss": 0.3921, + "nll_loss": 0.09801331907510757, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.008460407727398e-05, + "rewards/margins": 0.2723795771598816, + "rewards/rejected": -0.2724096477031708, + "step": 10646 + }, + { + "epoch": 7.363070539419087, + "grad_norm": 3.3796043395996094, + "learning_rate": 1.4649608114338406e-05, + "log_odds_chosen": 11.514384269714355, + "log_odds_ratio": -1.3234783182269894e-05, + "logits/chosen": -0.28235557675361633, + "logits/rejected": -0.3105676770210266, + "logps/chosen": -7.076616020640358e-05, + "logps/rejected": -1.9891992807388306, + "loss": 0.3257, + "nll_loss": 0.08141130954027176, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.076616384438239e-06, + "rewards/margins": 0.1989128440618515, + "rewards/rejected": -0.19891992211341858, + "step": 10647 + }, + { + "epoch": 7.363762102351314, + "grad_norm": 5.169199466705322, + "learning_rate": 1.4645766098048255e-05, + "log_odds_chosen": 9.638940811157227, + "log_odds_ratio": -0.005037724506109953, + "logits/chosen": -0.41315174102783203, + "logits/rejected": -0.427206814289093, + "logps/chosen": -0.0020048036240041256, + "logps/rejected": -1.8959660530090332, + "loss": 0.4813, + "nll_loss": 0.11983361840248108, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00020048035366926342, + "rewards/margins": 0.18939611315727234, + "rewards/rejected": -0.18959660828113556, + "step": 10648 + }, + { + "epoch": 7.36445366528354, + "grad_norm": 4.702418804168701, + "learning_rate": 1.4641924081758107e-05, + "log_odds_chosen": 10.577019691467285, + "log_odds_ratio": -8.794783207122236e-05, + "logits/chosen": -0.32490867376327515, + "logits/rejected": -0.3228122591972351, + "logps/chosen": -0.000967757951002568, + "logps/rejected": -2.048372745513916, + "loss": 0.8805, + "nll_loss": 0.22010564804077148, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.677580237621441e-05, + "rewards/margins": 0.2047404944896698, + "rewards/rejected": -0.20483727753162384, + "step": 10649 + }, + { + "epoch": 7.365145228215767, + "grad_norm": 7.433712959289551, + "learning_rate": 1.463808206546796e-05, + "log_odds_chosen": 10.622085571289062, + "log_odds_ratio": -0.00020646367920562625, + "logits/chosen": -0.3272697627544403, + "logits/rejected": -0.395301878452301, + "logps/chosen": -0.0006920951418578625, + "logps/rejected": -2.6321024894714355, + "loss": 0.5902, + "nll_loss": 0.14752425253391266, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.920951273059472e-05, + "rewards/margins": 0.26314103603363037, + "rewards/rejected": -0.263210266828537, + "step": 10650 + }, + { + "epoch": 7.365836791147994, + "grad_norm": 3.3372693061828613, + "learning_rate": 1.4634240049177809e-05, + "log_odds_chosen": 11.259846687316895, + "log_odds_ratio": -7.804886990925297e-05, + "logits/chosen": -0.1679503619670868, + "logits/rejected": -0.21933269500732422, + "logps/chosen": -0.0026375320740044117, + "logps/rejected": -3.6971280574798584, + "loss": 0.4456, + "nll_loss": 0.11138937622308731, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00026375323068350554, + "rewards/margins": 0.36944907903671265, + "rewards/rejected": -0.36971279978752136, + "step": 10651 + }, + { + "epoch": 7.366528354080221, + "grad_norm": 5.466868877410889, + "learning_rate": 1.463039803288766e-05, + "log_odds_chosen": 10.425373077392578, + "log_odds_ratio": -7.754612306598574e-05, + "logits/chosen": -0.004369847476482391, + "logits/rejected": -0.012086287140846252, + "logps/chosen": -0.00018663016089703888, + "logps/rejected": -1.868985652923584, + "loss": 0.5786, + "nll_loss": 0.14463818073272705, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.866301681729965e-05, + "rewards/margins": 0.18687991797924042, + "rewards/rejected": -0.18689857423305511, + "step": 10652 + }, + { + "epoch": 7.367219917012449, + "grad_norm": 9.621696472167969, + "learning_rate": 1.4626556016597512e-05, + "log_odds_chosen": 10.301619529724121, + "log_odds_ratio": -0.0031991673167794943, + "logits/chosen": -0.39241892099380493, + "logits/rejected": -0.4045332670211792, + "logps/chosen": -0.001698142383247614, + "logps/rejected": -2.017453670501709, + "loss": 0.9325, + "nll_loss": 0.23280005156993866, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016981424414552748, + "rewards/margins": 0.20157556235790253, + "rewards/rejected": -0.20174537599086761, + "step": 10653 + }, + { + "epoch": 7.367911479944675, + "grad_norm": 4.06018590927124, + "learning_rate": 1.4622714000307361e-05, + "log_odds_chosen": 11.616990089416504, + "log_odds_ratio": -3.6053897929377854e-05, + "logits/chosen": -0.2114473134279251, + "logits/rejected": -0.26343247294425964, + "logps/chosen": -0.0017658036667853594, + "logps/rejected": -2.960357666015625, + "loss": 0.4912, + "nll_loss": 0.12278671562671661, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017658036085776985, + "rewards/margins": 0.2958591878414154, + "rewards/rejected": -0.2960357666015625, + "step": 10654 + }, + { + "epoch": 7.368603042876902, + "grad_norm": 5.952072620391846, + "learning_rate": 1.4618871984017213e-05, + "log_odds_chosen": 10.786083221435547, + "log_odds_ratio": -7.878676115069538e-05, + "logits/chosen": -0.572536826133728, + "logits/rejected": -0.5953267216682434, + "logps/chosen": -0.0003121356130577624, + "logps/rejected": -2.293868064880371, + "loss": 0.482, + "nll_loss": 0.1204964742064476, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1213559850584716e-05, + "rewards/margins": 0.229355588555336, + "rewards/rejected": -0.2293868064880371, + "step": 10655 + }, + { + "epoch": 7.369294605809129, + "grad_norm": 4.473649978637695, + "learning_rate": 1.4615029967727064e-05, + "log_odds_chosen": 11.468165397644043, + "log_odds_ratio": -2.576730548753403e-05, + "logits/chosen": -0.5516068935394287, + "logits/rejected": -0.5789634585380554, + "logps/chosen": -0.00020674789266195148, + "logps/rejected": -2.422849655151367, + "loss": 0.3864, + "nll_loss": 0.09660729020833969, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.067478999379091e-05, + "rewards/margins": 0.2422642707824707, + "rewards/rejected": -0.24228495359420776, + "step": 10656 + }, + { + "epoch": 7.369986168741356, + "grad_norm": 6.674118518829346, + "learning_rate": 1.4611187951436913e-05, + "log_odds_chosen": 10.78459644317627, + "log_odds_ratio": -6.154303264338523e-05, + "logits/chosen": -0.10129731893539429, + "logits/rejected": -0.38078394532203674, + "logps/chosen": -0.0018489633221179247, + "logps/rejected": -3.1722657680511475, + "loss": 1.0619, + "nll_loss": 0.2654639184474945, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018489634385332465, + "rewards/margins": 0.31704169511795044, + "rewards/rejected": -0.3172265887260437, + "step": 10657 + }, + { + "epoch": 7.370677731673583, + "grad_norm": 8.016694068908691, + "learning_rate": 1.4607345935146766e-05, + "log_odds_chosen": 11.63819408416748, + "log_odds_ratio": -3.645301330834627e-05, + "logits/chosen": -0.41492342948913574, + "logits/rejected": -0.4904578924179077, + "logps/chosen": -0.0002599305589683354, + "logps/rejected": -2.92301344871521, + "loss": 0.5225, + "nll_loss": 0.13063208758831024, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.59930566244293e-05, + "rewards/margins": 0.2922753691673279, + "rewards/rejected": -0.29230135679244995, + "step": 10658 + }, + { + "epoch": 7.37136929460581, + "grad_norm": 4.327447891235352, + "learning_rate": 1.4603503918856618e-05, + "log_odds_chosen": 10.701872825622559, + "log_odds_ratio": -2.96252310363343e-05, + "logits/chosen": -0.5056709051132202, + "logits/rejected": -0.5226324796676636, + "logps/chosen": -8.454695489490405e-05, + "logps/rejected": -1.362694501876831, + "loss": 0.2557, + "nll_loss": 0.06392902135848999, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.454695489490405e-06, + "rewards/margins": 0.1362610161304474, + "rewards/rejected": -0.1362694650888443, + "step": 10659 + }, + { + "epoch": 7.372060857538036, + "grad_norm": 8.658827781677246, + "learning_rate": 1.4599661902566467e-05, + "log_odds_chosen": 11.036666870117188, + "log_odds_ratio": -0.00022471771808341146, + "logits/chosen": -0.4739881455898285, + "logits/rejected": -0.47643283009529114, + "logps/chosen": -0.0003724672715179622, + "logps/rejected": -3.089184284210205, + "loss": 0.6262, + "nll_loss": 0.15653277933597565, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.72467256966047e-05, + "rewards/margins": 0.30888116359710693, + "rewards/rejected": -0.30891841650009155, + "step": 10660 + }, + { + "epoch": 7.372752420470263, + "grad_norm": 4.330894947052002, + "learning_rate": 1.4595819886276318e-05, + "log_odds_chosen": 10.98950481414795, + "log_odds_ratio": -0.0005101492861285806, + "logits/chosen": 0.15454961359500885, + "logits/rejected": 0.1601768136024475, + "logps/chosen": -0.00024295755429193377, + "logps/rejected": -2.2143144607543945, + "loss": 0.336, + "nll_loss": 0.0839565247297287, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4295755792991258e-05, + "rewards/margins": 0.22140714526176453, + "rewards/rejected": -0.2214314490556717, + "step": 10661 + }, + { + "epoch": 7.37344398340249, + "grad_norm": 3.248152256011963, + "learning_rate": 1.459197786998617e-05, + "log_odds_chosen": 10.0687894821167, + "log_odds_ratio": -0.0001112874160753563, + "logits/chosen": -0.29376327991485596, + "logits/rejected": -0.3159409463405609, + "logps/chosen": -0.0002920984697993845, + "logps/rejected": -1.9017329216003418, + "loss": 0.4019, + "nll_loss": 0.10047246515750885, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.920984843512997e-05, + "rewards/margins": 0.19014407694339752, + "rewards/rejected": -0.19017328321933746, + "step": 10662 + }, + { + "epoch": 7.374135546334717, + "grad_norm": 4.734434127807617, + "learning_rate": 1.458813585369602e-05, + "log_odds_chosen": 10.095927238464355, + "log_odds_ratio": -0.00015906621410977095, + "logits/chosen": -0.4993319511413574, + "logits/rejected": -0.6769671440124512, + "logps/chosen": -0.0004607696318998933, + "logps/rejected": -1.7483266592025757, + "loss": 0.5519, + "nll_loss": 0.137965127825737, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.607695882441476e-05, + "rewards/margins": 0.17478659749031067, + "rewards/rejected": -0.17483268678188324, + "step": 10663 + }, + { + "epoch": 7.374827109266944, + "grad_norm": 10.651551246643066, + "learning_rate": 1.4584293837405872e-05, + "log_odds_chosen": 10.96095085144043, + "log_odds_ratio": -3.493850090308115e-05, + "logits/chosen": -0.25099456310272217, + "logits/rejected": -0.2105286717414856, + "logps/chosen": -0.00018202661885879934, + "logps/rejected": -2.0909557342529297, + "loss": 0.5512, + "nll_loss": 0.1378040462732315, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8202661522082053e-05, + "rewards/margins": 0.209077388048172, + "rewards/rejected": -0.20909559726715088, + "step": 10664 + }, + { + "epoch": 7.375518672199171, + "grad_norm": 6.378670692443848, + "learning_rate": 1.4580451821115723e-05, + "log_odds_chosen": 12.079700469970703, + "log_odds_ratio": -1.1181369700352661e-05, + "logits/chosen": -0.5731353163719177, + "logits/rejected": -0.5929599404335022, + "logps/chosen": -0.00041094704647548497, + "logps/rejected": -3.2692391872406006, + "loss": 0.5437, + "nll_loss": 0.13593482971191406, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1094703192356974e-05, + "rewards/margins": 0.32688283920288086, + "rewards/rejected": -0.3269239068031311, + "step": 10665 + }, + { + "epoch": 7.376210235131397, + "grad_norm": 5.395590782165527, + "learning_rate": 1.4576609804825572e-05, + "log_odds_chosen": 10.98073959350586, + "log_odds_ratio": -6.951152317924425e-05, + "logits/chosen": -0.6794606447219849, + "logits/rejected": -0.727249264717102, + "logps/chosen": -0.00024431568454019725, + "logps/rejected": -2.1115775108337402, + "loss": 0.6299, + "nll_loss": 0.1574762612581253, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.443157063680701e-05, + "rewards/margins": 0.21113333106040955, + "rewards/rejected": -0.21115775406360626, + "step": 10666 + }, + { + "epoch": 7.376901798063624, + "grad_norm": 4.982635021209717, + "learning_rate": 1.4572767788535424e-05, + "log_odds_chosen": 10.567577362060547, + "log_odds_ratio": -0.0005886530270799994, + "logits/chosen": -0.3521302342414856, + "logits/rejected": -0.36093980073928833, + "logps/chosen": -0.0007477524923160672, + "logps/rejected": -2.0216317176818848, + "loss": 1.0992, + "nll_loss": 0.2747451066970825, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.477525650756434e-05, + "rewards/margins": 0.2020883858203888, + "rewards/rejected": -0.20216315984725952, + "step": 10667 + }, + { + "epoch": 7.377593360995851, + "grad_norm": 17.154781341552734, + "learning_rate": 1.4568925772245276e-05, + "log_odds_chosen": 9.469273567199707, + "log_odds_ratio": -0.00030858165700919926, + "logits/chosen": -0.5006073117256165, + "logits/rejected": -0.5103213787078857, + "logps/chosen": -0.0004589389427565038, + "logps/rejected": -1.5602257251739502, + "loss": 0.4696, + "nll_loss": 0.11737901717424393, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.589389936882071e-05, + "rewards/margins": 0.15597668290138245, + "rewards/rejected": -0.1560225784778595, + "step": 10668 + }, + { + "epoch": 7.378284923928078, + "grad_norm": 4.346867561340332, + "learning_rate": 1.4565083755955126e-05, + "log_odds_chosen": 11.488432884216309, + "log_odds_ratio": -2.4716002371860668e-05, + "logits/chosen": -0.002739326097071171, + "logits/rejected": -0.030549317598342896, + "logps/chosen": -0.00011103839642601088, + "logps/rejected": -2.4439241886138916, + "loss": 0.4689, + "nll_loss": 0.11722592264413834, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1103839824500028e-05, + "rewards/margins": 0.24438130855560303, + "rewards/rejected": -0.24439239501953125, + "step": 10669 + }, + { + "epoch": 7.378976486860305, + "grad_norm": 4.403764724731445, + "learning_rate": 1.4561241739664976e-05, + "log_odds_chosen": 11.146065711975098, + "log_odds_ratio": -5.7201199524570256e-05, + "logits/chosen": -0.27991095185279846, + "logits/rejected": -0.315477192401886, + "logps/chosen": -0.0001651171623962, + "logps/rejected": -2.3870275020599365, + "loss": 0.437, + "nll_loss": 0.10924191772937775, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.651171623962e-05, + "rewards/margins": 0.2386862337589264, + "rewards/rejected": -0.23870275914669037, + "step": 10670 + }, + { + "epoch": 7.3796680497925315, + "grad_norm": 9.351802825927734, + "learning_rate": 1.4557399723374829e-05, + "log_odds_chosen": 11.575432777404785, + "log_odds_ratio": -6.787521851947531e-05, + "logits/chosen": -0.34020090103149414, + "logits/rejected": -0.3319574296474457, + "logps/chosen": -0.00018382327107246965, + "logps/rejected": -2.6232800483703613, + "loss": 0.469, + "nll_loss": 0.11724460124969482, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8382328562438488e-05, + "rewards/margins": 0.26230961084365845, + "rewards/rejected": -0.26232796907424927, + "step": 10671 + }, + { + "epoch": 7.380359612724758, + "grad_norm": 6.590975761413574, + "learning_rate": 1.4553557707084678e-05, + "log_odds_chosen": 11.52649974822998, + "log_odds_ratio": -2.1129832020960748e-05, + "logits/chosen": -0.33116066455841064, + "logits/rejected": -0.43521445989608765, + "logps/chosen": -0.00017208060307893902, + "logps/rejected": -2.7834463119506836, + "loss": 0.6478, + "nll_loss": 0.16194315254688263, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.720805994409602e-05, + "rewards/margins": 0.27832740545272827, + "rewards/rejected": -0.27834460139274597, + "step": 10672 + }, + { + "epoch": 7.381051175656985, + "grad_norm": 4.628101348876953, + "learning_rate": 1.454971569079453e-05, + "log_odds_chosen": 9.457018852233887, + "log_odds_ratio": -0.001599702751263976, + "logits/chosen": -0.5300707817077637, + "logits/rejected": -0.6058658957481384, + "logps/chosen": -0.00043648615246638656, + "logps/rejected": -1.410576343536377, + "loss": 0.4858, + "nll_loss": 0.12128002196550369, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3648615246638656e-05, + "rewards/margins": 0.1410140097141266, + "rewards/rejected": -0.14105764031410217, + "step": 10673 + }, + { + "epoch": 7.381742738589212, + "grad_norm": 3.3888509273529053, + "learning_rate": 1.4545873674504381e-05, + "log_odds_chosen": 11.028520584106445, + "log_odds_ratio": -4.537054701359011e-05, + "logits/chosen": -0.11969764530658722, + "logits/rejected": -0.1497419774532318, + "logps/chosen": -0.000272990990197286, + "logps/rejected": -2.4929189682006836, + "loss": 0.4646, + "nll_loss": 0.11614756286144257, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.729909829213284e-05, + "rewards/margins": 0.24926459789276123, + "rewards/rejected": -0.24929189682006836, + "step": 10674 + }, + { + "epoch": 7.382434301521439, + "grad_norm": 5.96130895614624, + "learning_rate": 1.454203165821423e-05, + "log_odds_chosen": 11.075370788574219, + "log_odds_ratio": -0.0001282305602217093, + "logits/chosen": -0.048389844596385956, + "logits/rejected": -0.16936539113521576, + "logps/chosen": -0.00021596168517135084, + "logps/rejected": -2.4580938816070557, + "loss": 0.6159, + "nll_loss": 0.1539560854434967, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1596169972326607e-05, + "rewards/margins": 0.24578779935836792, + "rewards/rejected": -0.2458093911409378, + "step": 10675 + }, + { + "epoch": 7.383125864453666, + "grad_norm": 5.834570407867432, + "learning_rate": 1.4538189641924082e-05, + "log_odds_chosen": 12.218822479248047, + "log_odds_ratio": -8.590914148953743e-06, + "logits/chosen": -0.06374844163656235, + "logits/rejected": -0.18003803491592407, + "logps/chosen": -0.00014865616685710847, + "logps/rejected": -3.2421627044677734, + "loss": 0.613, + "nll_loss": 0.15326063334941864, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4865616321912967e-05, + "rewards/margins": 0.32420143485069275, + "rewards/rejected": -0.3242163062095642, + "step": 10676 + }, + { + "epoch": 7.3838174273858925, + "grad_norm": 4.939897537231445, + "learning_rate": 1.4534347625633935e-05, + "log_odds_chosen": 11.44198989868164, + "log_odds_ratio": -1.698485721135512e-05, + "logits/chosen": -0.11121652275323868, + "logits/rejected": -0.08655675500631332, + "logps/chosen": -0.00016818266885820776, + "logps/rejected": -2.6434073448181152, + "loss": 0.5616, + "nll_loss": 0.14038707315921783, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6818266885820776e-05, + "rewards/margins": 0.2643239498138428, + "rewards/rejected": -0.26434075832366943, + "step": 10677 + }, + { + "epoch": 7.384508990318119, + "grad_norm": 3.912889003753662, + "learning_rate": 1.4530505609343784e-05, + "log_odds_chosen": 10.676713943481445, + "log_odds_ratio": -6.23077794443816e-05, + "logits/chosen": -0.3503372073173523, + "logits/rejected": -0.41021400690078735, + "logps/chosen": -0.00020047812722623348, + "logps/rejected": -1.4013879299163818, + "loss": 0.4033, + "nll_loss": 0.10082890838384628, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0047811631229706e-05, + "rewards/margins": 0.14011874794960022, + "rewards/rejected": -0.14013880491256714, + "step": 10678 + }, + { + "epoch": 7.385200553250346, + "grad_norm": 4.871994972229004, + "learning_rate": 1.4526663593053635e-05, + "log_odds_chosen": 11.72120189666748, + "log_odds_ratio": -1.770121707522776e-05, + "logits/chosen": -0.2013327181339264, + "logits/rejected": -0.28244835138320923, + "logps/chosen": -0.0002974059898406267, + "logps/rejected": -3.1454806327819824, + "loss": 0.6354, + "nll_loss": 0.15885329246520996, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.974059862026479e-05, + "rewards/margins": 0.3145183324813843, + "rewards/rejected": -0.3145480751991272, + "step": 10679 + }, + { + "epoch": 7.385892116182573, + "grad_norm": 6.55807638168335, + "learning_rate": 1.4522821576763487e-05, + "log_odds_chosen": 10.452030181884766, + "log_odds_ratio": -7.187146547948942e-05, + "logits/chosen": -0.46629953384399414, + "logits/rejected": -0.43954288959503174, + "logps/chosen": -0.0008648043731227517, + "logps/rejected": -2.166017770767212, + "loss": 0.482, + "nll_loss": 0.12049096822738647, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.648043149150908e-05, + "rewards/margins": 0.21651530265808105, + "rewards/rejected": -0.21660177409648895, + "step": 10680 + }, + { + "epoch": 7.3865836791148, + "grad_norm": 4.796786308288574, + "learning_rate": 1.4518979560473336e-05, + "log_odds_chosen": 11.206933975219727, + "log_odds_ratio": -3.39964208251331e-05, + "logits/chosen": -0.19763581454753876, + "logits/rejected": -0.24650007486343384, + "logps/chosen": -0.00019862827321048826, + "logps/rejected": -2.145627498626709, + "loss": 0.4517, + "nll_loss": 0.11292488127946854, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.986282950383611e-05, + "rewards/margins": 0.21454286575317383, + "rewards/rejected": -0.21456272900104523, + "step": 10681 + }, + { + "epoch": 7.387275242047027, + "grad_norm": 4.821454048156738, + "learning_rate": 1.4515137544183189e-05, + "log_odds_chosen": 11.62645435333252, + "log_odds_ratio": -3.971250043832697e-05, + "logits/chosen": -0.6977535486221313, + "logits/rejected": -0.7093822956085205, + "logps/chosen": -0.0003523613850120455, + "logps/rejected": -2.8389506340026855, + "loss": 0.6053, + "nll_loss": 0.15132945775985718, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.523613850120455e-05, + "rewards/margins": 0.2838597893714905, + "rewards/rejected": -0.2838950455188751, + "step": 10682 + }, + { + "epoch": 7.3879668049792535, + "grad_norm": 3.829512596130371, + "learning_rate": 1.451129552789304e-05, + "log_odds_chosen": 11.04928970336914, + "log_odds_ratio": -4.361994069768116e-05, + "logits/chosen": -0.3144991397857666, + "logits/rejected": -0.2946760654449463, + "logps/chosen": -0.00020116717496421188, + "logps/rejected": -2.3325746059417725, + "loss": 0.3145, + "nll_loss": 0.07862447202205658, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0116716768825427e-05, + "rewards/margins": 0.2332373410463333, + "rewards/rejected": -0.2332574725151062, + "step": 10683 + }, + { + "epoch": 7.38865836791148, + "grad_norm": 8.505328178405762, + "learning_rate": 1.4507453511602888e-05, + "log_odds_chosen": 11.85031509399414, + "log_odds_ratio": -1.9721686840057373e-05, + "logits/chosen": -0.48058003187179565, + "logits/rejected": -0.4414098560810089, + "logps/chosen": -0.00022078091569710523, + "logps/rejected": -2.5994250774383545, + "loss": 0.4388, + "nll_loss": 0.10970518738031387, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2078091205912642e-05, + "rewards/margins": 0.2599204480648041, + "rewards/rejected": -0.25994253158569336, + "step": 10684 + }, + { + "epoch": 7.389349930843707, + "grad_norm": 3.579763174057007, + "learning_rate": 1.4503611495312741e-05, + "log_odds_chosen": 11.160737991333008, + "log_odds_ratio": -4.9370610213372856e-05, + "logits/chosen": -0.12265770137310028, + "logits/rejected": -0.16684816777706146, + "logps/chosen": -0.0004806347715202719, + "logps/rejected": -3.001997709274292, + "loss": 0.5963, + "nll_loss": 0.14908070862293243, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.806347351404838e-05, + "rewards/margins": 0.3001517057418823, + "rewards/rejected": -0.30019980669021606, + "step": 10685 + }, + { + "epoch": 7.390041493775934, + "grad_norm": 9.121960639953613, + "learning_rate": 1.4499769479022593e-05, + "log_odds_chosen": 10.080194473266602, + "log_odds_ratio": -0.00014840658695902675, + "logits/chosen": -0.17311474680900574, + "logits/rejected": -0.22056277096271515, + "logps/chosen": -0.0003925380588043481, + "logps/rejected": -2.1007535457611084, + "loss": 0.3642, + "nll_loss": 0.09103476256132126, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.925380951841362e-05, + "rewards/margins": 0.21003609895706177, + "rewards/rejected": -0.21007534861564636, + "step": 10686 + }, + { + "epoch": 7.390733056708161, + "grad_norm": 7.154950141906738, + "learning_rate": 1.4495927462732442e-05, + "log_odds_chosen": 10.706562042236328, + "log_odds_ratio": -0.0001363321061944589, + "logits/chosen": -0.3177839517593384, + "logits/rejected": -0.38389331102371216, + "logps/chosen": -0.0002176029229303822, + "logps/rejected": -2.251680850982666, + "loss": 0.683, + "nll_loss": 0.1707375943660736, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.176029192924034e-05, + "rewards/margins": 0.2251463383436203, + "rewards/rejected": -0.2251681089401245, + "step": 10687 + }, + { + "epoch": 7.391424619640388, + "grad_norm": 7.543350696563721, + "learning_rate": 1.4492085446442293e-05, + "log_odds_chosen": 11.313833236694336, + "log_odds_ratio": -2.9455553885782138e-05, + "logits/chosen": -0.09866035729646683, + "logits/rejected": -0.17171455919742584, + "logps/chosen": -0.00020820967620238662, + "logps/rejected": -2.6313819885253906, + "loss": 0.3626, + "nll_loss": 0.09065688401460648, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0820967620238662e-05, + "rewards/margins": 0.26311737298965454, + "rewards/rejected": -0.26313820481300354, + "step": 10688 + }, + { + "epoch": 7.3921161825726145, + "grad_norm": 4.683642387390137, + "learning_rate": 1.4488243430152146e-05, + "log_odds_chosen": 10.479778289794922, + "log_odds_ratio": -5.9483056247700006e-05, + "logits/chosen": -0.30255410075187683, + "logits/rejected": -0.3816949725151062, + "logps/chosen": -0.00015024725871626288, + "logps/rejected": -1.8282932043075562, + "loss": 0.4385, + "nll_loss": 0.10963056236505508, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5024726053525228e-05, + "rewards/margins": 0.18281430006027222, + "rewards/rejected": -0.18282932043075562, + "step": 10689 + }, + { + "epoch": 7.392807745504841, + "grad_norm": 7.751023769378662, + "learning_rate": 1.4484401413861995e-05, + "log_odds_chosen": 9.871282577514648, + "log_odds_ratio": -0.00014603903400711715, + "logits/chosen": -0.2963367700576782, + "logits/rejected": -0.2837250828742981, + "logps/chosen": -0.00045877366210334003, + "logps/rejected": -1.9336518049240112, + "loss": 0.4378, + "nll_loss": 0.10944164544343948, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.587736475514248e-05, + "rewards/margins": 0.19331932067871094, + "rewards/rejected": -0.1933651864528656, + "step": 10690 + }, + { + "epoch": 7.393499308437068, + "grad_norm": 8.917428970336914, + "learning_rate": 1.4480559397571847e-05, + "log_odds_chosen": 11.292522430419922, + "log_odds_ratio": -5.915127621847205e-05, + "logits/chosen": -0.12618744373321533, + "logits/rejected": -0.11810366809368134, + "logps/chosen": -0.0004943721578456461, + "logps/rejected": -3.004287004470825, + "loss": 0.7274, + "nll_loss": 0.18185493350028992, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.94372179673519e-05, + "rewards/margins": 0.30037927627563477, + "rewards/rejected": -0.30042868852615356, + "step": 10691 + }, + { + "epoch": 7.394190871369295, + "grad_norm": 3.69183349609375, + "learning_rate": 1.4476717381281698e-05, + "log_odds_chosen": 10.84074592590332, + "log_odds_ratio": -0.0003451247466728091, + "logits/chosen": -0.23937007784843445, + "logits/rejected": -0.26529383659362793, + "logps/chosen": -0.0006957940058782697, + "logps/rejected": -2.5660195350646973, + "loss": 0.4154, + "nll_loss": 0.10382203757762909, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.957939331186935e-05, + "rewards/margins": 0.25653237104415894, + "rewards/rejected": -0.2566019594669342, + "step": 10692 + }, + { + "epoch": 7.394882434301522, + "grad_norm": 4.8974432945251465, + "learning_rate": 1.4472875364991547e-05, + "log_odds_chosen": 9.895940780639648, + "log_odds_ratio": -0.0010130507871508598, + "logits/chosen": -0.4183223247528076, + "logits/rejected": -0.24061670899391174, + "logps/chosen": -0.0012401751009747386, + "logps/rejected": -2.0234375, + "loss": 0.4782, + "nll_loss": 0.11944713443517685, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000124017518828623, + "rewards/margins": 0.20221972465515137, + "rewards/rejected": -0.20234374701976776, + "step": 10693 + }, + { + "epoch": 7.395573997233749, + "grad_norm": 4.895662784576416, + "learning_rate": 1.44690333487014e-05, + "log_odds_chosen": 11.957036018371582, + "log_odds_ratio": -0.00012197183968964964, + "logits/chosen": -0.42090246081352234, + "logits/rejected": -0.45159658789634705, + "logps/chosen": -0.00027564779156818986, + "logps/rejected": -3.303597927093506, + "loss": 0.4743, + "nll_loss": 0.1185615286231041, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7564779884414747e-05, + "rewards/margins": 0.3303322196006775, + "rewards/rejected": -0.3303597867488861, + "step": 10694 + }, + { + "epoch": 7.3962655601659755, + "grad_norm": 3.9130725860595703, + "learning_rate": 1.4465191332411252e-05, + "log_odds_chosen": 10.67100715637207, + "log_odds_ratio": -8.581003203289583e-05, + "logits/chosen": -0.26079773902893066, + "logits/rejected": -0.24093040823936462, + "logps/chosen": -0.0005644162301905453, + "logps/rejected": -2.086552619934082, + "loss": 0.5113, + "nll_loss": 0.12781992554664612, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.644162592943758e-05, + "rewards/margins": 0.20859882235527039, + "rewards/rejected": -0.20865526795387268, + "step": 10695 + }, + { + "epoch": 7.396957123098202, + "grad_norm": 4.183816909790039, + "learning_rate": 1.44613493161211e-05, + "log_odds_chosen": 9.546540260314941, + "log_odds_ratio": -0.00034563770168460906, + "logits/chosen": -0.0491783544421196, + "logits/rejected": -0.02445707842707634, + "logps/chosen": -0.0010862099006772041, + "logps/rejected": -1.8530609607696533, + "loss": 0.3779, + "nll_loss": 0.09445017576217651, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010862098861252889, + "rewards/margins": 0.18519748747348785, + "rewards/rejected": -0.1853061020374298, + "step": 10696 + }, + { + "epoch": 7.397648686030429, + "grad_norm": 4.751798152923584, + "learning_rate": 1.4457507299830953e-05, + "log_odds_chosen": 10.333335876464844, + "log_odds_ratio": -0.00028667665901593864, + "logits/chosen": -0.012272864580154419, + "logits/rejected": -0.2014240324497223, + "logps/chosen": -0.0004716809489764273, + "logps/rejected": -1.5080033540725708, + "loss": 1.0929, + "nll_loss": 0.2731882929801941, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.716809780802578e-05, + "rewards/margins": 0.1507531702518463, + "rewards/rejected": -0.15080034732818604, + "step": 10697 + }, + { + "epoch": 7.398340248962656, + "grad_norm": 4.033143997192383, + "learning_rate": 1.4453665283540804e-05, + "log_odds_chosen": 10.501607894897461, + "log_odds_ratio": -0.0003531145630404353, + "logits/chosen": -0.45851123332977295, + "logits/rejected": -0.6169005632400513, + "logps/chosen": -0.00027281188522465527, + "logps/rejected": -1.9819741249084473, + "loss": 0.6676, + "nll_loss": 0.16687491536140442, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.728119034145493e-05, + "rewards/margins": 0.19817012548446655, + "rewards/rejected": -0.1981974095106125, + "step": 10698 + }, + { + "epoch": 7.399031811894883, + "grad_norm": 3.8497109413146973, + "learning_rate": 1.4449823267250653e-05, + "log_odds_chosen": 10.944068908691406, + "log_odds_ratio": -0.00011575493408599868, + "logits/chosen": -0.21114255487918854, + "logits/rejected": -0.27162325382232666, + "logps/chosen": -0.000330332200974226, + "logps/rejected": -1.948697805404663, + "loss": 0.4342, + "nll_loss": 0.10853491723537445, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3033222280209884e-05, + "rewards/margins": 0.19483675062656403, + "rewards/rejected": -0.1948697865009308, + "step": 10699 + }, + { + "epoch": 7.39972337482711, + "grad_norm": 11.0430269241333, + "learning_rate": 1.4445981250960506e-05, + "log_odds_chosen": 12.51168155670166, + "log_odds_ratio": -7.633711902599316e-06, + "logits/chosen": 0.12339423596858978, + "logits/rejected": -0.01906883716583252, + "logps/chosen": -0.00014900990936439484, + "logps/rejected": -3.635141372680664, + "loss": 0.6599, + "nll_loss": 0.1649717390537262, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4900991118338425e-05, + "rewards/margins": 0.3634992241859436, + "rewards/rejected": -0.36351412534713745, + "step": 10700 + }, + { + "epoch": 7.4004149377593365, + "grad_norm": 4.974323272705078, + "learning_rate": 1.4442139234670355e-05, + "log_odds_chosen": 11.252674102783203, + "log_odds_ratio": -4.285808972781524e-05, + "logits/chosen": -0.09175758063793182, + "logits/rejected": -0.17847055196762085, + "logps/chosen": -0.00027339643565937877, + "logps/rejected": -2.2855420112609863, + "loss": 0.3829, + "nll_loss": 0.0957113727927208, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.733964538492728e-05, + "rewards/margins": 0.22852687537670135, + "rewards/rejected": -0.22855421900749207, + "step": 10701 + }, + { + "epoch": 7.401106500691563, + "grad_norm": 6.626379489898682, + "learning_rate": 1.4438297218380207e-05, + "log_odds_chosen": 10.925520896911621, + "log_odds_ratio": -0.00019308443006593734, + "logits/chosen": -0.40792328119277954, + "logits/rejected": -0.46591705083847046, + "logps/chosen": -0.0005892232875339687, + "logps/rejected": -2.2404043674468994, + "loss": 0.7062, + "nll_loss": 0.1765323281288147, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.892233457416296e-05, + "rewards/margins": 0.22398152947425842, + "rewards/rejected": -0.2240404486656189, + "step": 10702 + }, + { + "epoch": 7.40179806362379, + "grad_norm": 14.179370880126953, + "learning_rate": 1.4434455202090058e-05, + "log_odds_chosen": 11.303972244262695, + "log_odds_ratio": -0.00011472676851553842, + "logits/chosen": -0.22151263058185577, + "logits/rejected": -0.3165542185306549, + "logps/chosen": -0.00018160900799557567, + "logps/rejected": -2.4688503742218018, + "loss": 0.6201, + "nll_loss": 0.15501704812049866, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.816090298234485e-05, + "rewards/margins": 0.2468668520450592, + "rewards/rejected": -0.2468850165605545, + "step": 10703 + }, + { + "epoch": 7.402489626556017, + "grad_norm": 6.767401218414307, + "learning_rate": 1.4430613185799907e-05, + "log_odds_chosen": 12.096673965454102, + "log_odds_ratio": -1.661527858232148e-05, + "logits/chosen": -0.36304378509521484, + "logits/rejected": -0.34841278195381165, + "logps/chosen": -0.00022185189300216734, + "logps/rejected": -2.969815969467163, + "loss": 0.4502, + "nll_loss": 0.1125505343079567, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2185189664014615e-05, + "rewards/margins": 0.29695940017700195, + "rewards/rejected": -0.2969816029071808, + "step": 10704 + }, + { + "epoch": 7.403181189488244, + "grad_norm": 4.77768611907959, + "learning_rate": 1.442677116950976e-05, + "log_odds_chosen": 10.152460098266602, + "log_odds_ratio": -0.0015317605575546622, + "logits/chosen": -0.10069093108177185, + "logits/rejected": -0.29805952310562134, + "logps/chosen": -0.000922149105463177, + "logps/rejected": -1.9319921731948853, + "loss": 0.4613, + "nll_loss": 0.11516660451889038, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.221491200150922e-05, + "rewards/margins": 0.193107008934021, + "rewards/rejected": -0.19319921731948853, + "step": 10705 + }, + { + "epoch": 7.403872752420471, + "grad_norm": 10.977682113647461, + "learning_rate": 1.4422929153219612e-05, + "log_odds_chosen": 12.02888011932373, + "log_odds_ratio": -1.1932907909795176e-05, + "logits/chosen": -0.28198131918907166, + "logits/rejected": -0.30092811584472656, + "logps/chosen": -0.00018142201588489115, + "logps/rejected": -3.0927035808563232, + "loss": 1.0384, + "nll_loss": 0.25958961248397827, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8142201952286996e-05, + "rewards/margins": 0.30925223231315613, + "rewards/rejected": -0.30927035212516785, + "step": 10706 + }, + { + "epoch": 7.404564315352697, + "grad_norm": 5.967758655548096, + "learning_rate": 1.441908713692946e-05, + "log_odds_chosen": 10.04418659210205, + "log_odds_ratio": -0.00025337719125673175, + "logits/chosen": -0.2274404764175415, + "logits/rejected": -0.20164667069911957, + "logps/chosen": -0.007281972095370293, + "logps/rejected": -2.104808807373047, + "loss": 0.4879, + "nll_loss": 0.12194767594337463, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007281972211785614, + "rewards/margins": 0.20975270867347717, + "rewards/rejected": -0.21048089861869812, + "step": 10707 + }, + { + "epoch": 7.405255878284924, + "grad_norm": 4.674463272094727, + "learning_rate": 1.4415245120639312e-05, + "log_odds_chosen": 10.830794334411621, + "log_odds_ratio": -0.00020141026470810175, + "logits/chosen": -0.346174031496048, + "logits/rejected": -0.36087295413017273, + "logps/chosen": -0.0022484012879431248, + "logps/rejected": -3.0499613285064697, + "loss": 0.8594, + "nll_loss": 0.21483436226844788, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00022484012879431248, + "rewards/margins": 0.3047713041305542, + "rewards/rejected": -0.304996132850647, + "step": 10708 + }, + { + "epoch": 7.405947441217151, + "grad_norm": 8.939460754394531, + "learning_rate": 1.4411403104349164e-05, + "log_odds_chosen": 10.59695816040039, + "log_odds_ratio": -5.151807999936864e-05, + "logits/chosen": 0.13660788536071777, + "logits/rejected": 0.08717255294322968, + "logps/chosen": -0.0004426073282957077, + "logps/rejected": -2.0530447959899902, + "loss": 0.4217, + "nll_loss": 0.10542188584804535, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.42607379227411e-05, + "rewards/margins": 0.20526021718978882, + "rewards/rejected": -0.20530450344085693, + "step": 10709 + }, + { + "epoch": 7.406639004149378, + "grad_norm": 5.197713851928711, + "learning_rate": 1.4407561088059013e-05, + "log_odds_chosen": 11.313644409179688, + "log_odds_ratio": -2.7155860152561218e-05, + "logits/chosen": -0.37392541766166687, + "logits/rejected": -0.37939178943634033, + "logps/chosen": -0.0004172790504526347, + "logps/rejected": -3.008484363555908, + "loss": 0.5289, + "nll_loss": 0.132216677069664, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1727907955646515e-05, + "rewards/margins": 0.3008067309856415, + "rewards/rejected": -0.30084845423698425, + "step": 10710 + }, + { + "epoch": 7.407330567081605, + "grad_norm": 5.047555446624756, + "learning_rate": 1.4403719071768865e-05, + "log_odds_chosen": 10.623932838439941, + "log_odds_ratio": -0.0003137765161227435, + "logits/chosen": -0.21119244396686554, + "logits/rejected": -0.2765723466873169, + "logps/chosen": -0.00016566917474847287, + "logps/rejected": -2.1351804733276367, + "loss": 0.6784, + "nll_loss": 0.16956068575382233, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6566917111049406e-05, + "rewards/margins": 0.2135014683008194, + "rewards/rejected": -0.21351803839206696, + "step": 10711 + }, + { + "epoch": 7.408022130013832, + "grad_norm": 14.978245735168457, + "learning_rate": 1.4399877055478716e-05, + "log_odds_chosen": 11.25374698638916, + "log_odds_ratio": -5.259553654468618e-05, + "logits/chosen": -0.16523447632789612, + "logits/rejected": -0.2930530905723572, + "logps/chosen": -0.00020616357505787164, + "logps/rejected": -2.6449317932128906, + "loss": 0.447, + "nll_loss": 0.11174537241458893, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0616356778191403e-05, + "rewards/margins": 0.2644725441932678, + "rewards/rejected": -0.2644931674003601, + "step": 10712 + }, + { + "epoch": 7.408713692946058, + "grad_norm": 8.17994213104248, + "learning_rate": 1.4396035039188565e-05, + "log_odds_chosen": 10.682540893554688, + "log_odds_ratio": -0.0001834592258092016, + "logits/chosen": -0.19058746099472046, + "logits/rejected": -0.3210287392139435, + "logps/chosen": -0.0003053398395422846, + "logps/rejected": -2.055088996887207, + "loss": 0.5244, + "nll_loss": 0.13107192516326904, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0533985409419984e-05, + "rewards/margins": 0.20547837018966675, + "rewards/rejected": -0.20550891757011414, + "step": 10713 + }, + { + "epoch": 7.409405255878285, + "grad_norm": 5.7820353507995605, + "learning_rate": 1.4392193022898418e-05, + "log_odds_chosen": 11.297080993652344, + "log_odds_ratio": -0.00011493961210362613, + "logits/chosen": -0.6253798007965088, + "logits/rejected": -0.6458042860031128, + "logps/chosen": -0.00022130817524157465, + "logps/rejected": -2.653463840484619, + "loss": 0.3948, + "nll_loss": 0.0986841544508934, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2130816432763822e-05, + "rewards/margins": 0.26532429456710815, + "rewards/rejected": -0.2653464078903198, + "step": 10714 + }, + { + "epoch": 7.410096818810512, + "grad_norm": 4.849417209625244, + "learning_rate": 1.438835100660827e-05, + "log_odds_chosen": 11.000490188598633, + "log_odds_ratio": -2.988562118844129e-05, + "logits/chosen": -0.1645212173461914, + "logits/rejected": -0.20662593841552734, + "logps/chosen": -0.00014986046880949289, + "logps/rejected": -2.0731770992279053, + "loss": 0.3641, + "nll_loss": 0.09103177487850189, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4986046153353527e-05, + "rewards/margins": 0.20730271935462952, + "rewards/rejected": -0.20731770992279053, + "step": 10715 + }, + { + "epoch": 7.410788381742739, + "grad_norm": 8.789484024047852, + "learning_rate": 1.438450899031812e-05, + "log_odds_chosen": 9.726442337036133, + "log_odds_ratio": -0.21301937103271484, + "logits/chosen": -0.6469697952270508, + "logits/rejected": -0.6698107123374939, + "logps/chosen": -0.02360660955309868, + "logps/rejected": -1.9374034404754639, + "loss": 0.451, + "nll_loss": 0.09143715351819992, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0023606610484421253, + "rewards/margins": 0.19137969613075256, + "rewards/rejected": -0.1937403529882431, + "step": 10716 + }, + { + "epoch": 7.411479944674966, + "grad_norm": 3.8728785514831543, + "learning_rate": 1.438066697402797e-05, + "log_odds_chosen": 9.455845832824707, + "log_odds_ratio": -0.001326136291027069, + "logits/chosen": -0.06192685291171074, + "logits/rejected": -0.1491842418909073, + "logps/chosen": -0.0022150897420942783, + "logps/rejected": -1.7510604858398438, + "loss": 0.4582, + "nll_loss": 0.11441729962825775, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002215089916717261, + "rewards/margins": 0.17488452792167664, + "rewards/rejected": -0.17510606348514557, + "step": 10717 + }, + { + "epoch": 7.412171507607193, + "grad_norm": 19.989530563354492, + "learning_rate": 1.4376824957737822e-05, + "log_odds_chosen": 10.227559089660645, + "log_odds_ratio": -0.0007945247925817966, + "logits/chosen": -0.47161751985549927, + "logits/rejected": -0.5490267276763916, + "logps/chosen": -0.0002686060615815222, + "logps/rejected": -2.269040584564209, + "loss": 0.7494, + "nll_loss": 0.18727454543113708, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6860607249545865e-05, + "rewards/margins": 0.22687721252441406, + "rewards/rejected": -0.22690407931804657, + "step": 10718 + }, + { + "epoch": 7.412863070539419, + "grad_norm": 5.017844200134277, + "learning_rate": 1.4372982941447671e-05, + "log_odds_chosen": 10.590387344360352, + "log_odds_ratio": -6.486523489002138e-05, + "logits/chosen": -0.4550924301147461, + "logits/rejected": -0.4405045509338379, + "logps/chosen": -0.0001809865643735975, + "logps/rejected": -1.7180522680282593, + "loss": 0.5406, + "nll_loss": 0.135142520070076, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.809865716495551e-05, + "rewards/margins": 0.1717871129512787, + "rewards/rejected": -0.1718052327632904, + "step": 10719 + }, + { + "epoch": 7.413554633471646, + "grad_norm": 5.447579860687256, + "learning_rate": 1.4369140925157524e-05, + "log_odds_chosen": 10.651297569274902, + "log_odds_ratio": -8.183491445379332e-05, + "logits/chosen": -0.21291130781173706, + "logits/rejected": -0.34738385677337646, + "logps/chosen": -0.00040813308442011476, + "logps/rejected": -2.471925735473633, + "loss": 0.5637, + "nll_loss": 0.14090707898139954, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0813312807586044e-05, + "rewards/margins": 0.24715176224708557, + "rewards/rejected": -0.2471925914287567, + "step": 10720 + }, + { + "epoch": 7.414246196403873, + "grad_norm": 7.02000617980957, + "learning_rate": 1.4365298908867375e-05, + "log_odds_chosen": 10.658409118652344, + "log_odds_ratio": -6.082295294618234e-05, + "logits/chosen": 0.12181103974580765, + "logits/rejected": 0.011016082018613815, + "logps/chosen": -0.0004951843875460327, + "logps/rejected": -1.7555874586105347, + "loss": 0.5197, + "nll_loss": 0.1299232691526413, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.951843948219903e-05, + "rewards/margins": 0.1755092293024063, + "rewards/rejected": -0.17555874586105347, + "step": 10721 + }, + { + "epoch": 7.4149377593361, + "grad_norm": 3.4330828189849854, + "learning_rate": 1.4361456892577224e-05, + "log_odds_chosen": 11.125907897949219, + "log_odds_ratio": -4.971097951056436e-05, + "logits/chosen": -0.6454315185546875, + "logits/rejected": -0.6421571969985962, + "logps/chosen": -0.0001896456378744915, + "logps/rejected": -2.39558744430542, + "loss": 0.677, + "nll_loss": 0.1692388504743576, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8964565242640674e-05, + "rewards/margins": 0.23953978717327118, + "rewards/rejected": -0.23955872654914856, + "step": 10722 + }, + { + "epoch": 7.415629322268327, + "grad_norm": 4.539621353149414, + "learning_rate": 1.4357614876287076e-05, + "log_odds_chosen": 11.152408599853516, + "log_odds_ratio": -4.087133129360154e-05, + "logits/chosen": -0.2305634319782257, + "logits/rejected": -0.3199521005153656, + "logps/chosen": -0.00014302245108410716, + "logps/rejected": -2.1154966354370117, + "loss": 0.5057, + "nll_loss": 0.12642784416675568, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4302244380814955e-05, + "rewards/margins": 0.21153536438941956, + "rewards/rejected": -0.21154966950416565, + "step": 10723 + }, + { + "epoch": 7.4163208852005535, + "grad_norm": 3.9620726108551025, + "learning_rate": 1.4353772859996929e-05, + "log_odds_chosen": 11.445850372314453, + "log_odds_ratio": -0.000533333863131702, + "logits/chosen": -0.19320987164974213, + "logits/rejected": -0.2019619345664978, + "logps/chosen": -0.0006852270453236997, + "logps/rejected": -2.631791591644287, + "loss": 0.4167, + "nll_loss": 0.10412156581878662, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.852269871160388e-05, + "rewards/margins": 0.2631106674671173, + "rewards/rejected": -0.2631791830062866, + "step": 10724 + }, + { + "epoch": 7.41701244813278, + "grad_norm": 4.122745990753174, + "learning_rate": 1.4349930843706778e-05, + "log_odds_chosen": 9.989114761352539, + "log_odds_ratio": -0.00020754087017849088, + "logits/chosen": -0.23228757083415985, + "logits/rejected": -0.26432672142982483, + "logps/chosen": -0.004630462732166052, + "logps/rejected": -2.2054288387298584, + "loss": 0.5699, + "nll_loss": 0.14244690537452698, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004630462499335408, + "rewards/margins": 0.22007985413074493, + "rewards/rejected": -0.22054289281368256, + "step": 10725 + }, + { + "epoch": 7.417704011065007, + "grad_norm": 4.885697364807129, + "learning_rate": 1.4346088827416628e-05, + "log_odds_chosen": 10.486754417419434, + "log_odds_ratio": -7.210951298475266e-05, + "logits/chosen": -0.21432363986968994, + "logits/rejected": -0.28481611609458923, + "logps/chosen": -0.0004409044631756842, + "logps/rejected": -2.3748602867126465, + "loss": 0.4142, + "nll_loss": 0.10355227440595627, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.409044413478114e-05, + "rewards/margins": 0.23744192719459534, + "rewards/rejected": -0.23748603463172913, + "step": 10726 + }, + { + "epoch": 7.418395573997234, + "grad_norm": 5.20670223236084, + "learning_rate": 1.434224681112648e-05, + "log_odds_chosen": 10.297154426574707, + "log_odds_ratio": -0.000343418592819944, + "logits/chosen": -0.43424534797668457, + "logits/rejected": -0.27497410774230957, + "logps/chosen": -0.00046478534932248294, + "logps/rejected": -1.8652817010879517, + "loss": 0.3707, + "nll_loss": 0.09263065457344055, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.647853711503558e-05, + "rewards/margins": 0.18648171424865723, + "rewards/rejected": -0.18652817606925964, + "step": 10727 + }, + { + "epoch": 7.419087136929461, + "grad_norm": 7.701286792755127, + "learning_rate": 1.433840479483633e-05, + "log_odds_chosen": 10.816944122314453, + "log_odds_ratio": -0.00010325042239855975, + "logits/chosen": -0.21323725581169128, + "logits/rejected": -0.2841075360774994, + "logps/chosen": -0.00025166134582832456, + "logps/rejected": -2.0154638290405273, + "loss": 0.543, + "nll_loss": 0.13573779165744781, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5166134946630336e-05, + "rewards/margins": 0.20152121782302856, + "rewards/rejected": -0.20154638588428497, + "step": 10728 + }, + { + "epoch": 7.419778699861688, + "grad_norm": 3.741018056869507, + "learning_rate": 1.4334562778546182e-05, + "log_odds_chosen": 9.553108215332031, + "log_odds_ratio": -0.00026011423324234784, + "logits/chosen": -0.8217746019363403, + "logits/rejected": -0.849798321723938, + "logps/chosen": -0.0003743913257494569, + "logps/rejected": -1.1073468923568726, + "loss": 0.358, + "nll_loss": 0.08946304023265839, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7439131119754165e-05, + "rewards/margins": 0.11069725453853607, + "rewards/rejected": -0.11073470115661621, + "step": 10729 + }, + { + "epoch": 7.4204702627939145, + "grad_norm": 9.998017311096191, + "learning_rate": 1.4330720762256033e-05, + "log_odds_chosen": 9.961030006408691, + "log_odds_ratio": -0.0007552761235274374, + "logits/chosen": -0.31879276037216187, + "logits/rejected": -0.3525419235229492, + "logps/chosen": -0.0007340236334130168, + "logps/rejected": -1.8016279935836792, + "loss": 0.5934, + "nll_loss": 0.1482788324356079, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.340236334130168e-05, + "rewards/margins": 0.18008939921855927, + "rewards/rejected": -0.18016280233860016, + "step": 10730 + }, + { + "epoch": 7.421161825726141, + "grad_norm": 4.061387062072754, + "learning_rate": 1.4326878745965882e-05, + "log_odds_chosen": 11.667825698852539, + "log_odds_ratio": -4.544300463749096e-05, + "logits/chosen": -0.3991694152355194, + "logits/rejected": -0.4634754955768585, + "logps/chosen": -0.00041911107837222517, + "logps/rejected": -3.1630825996398926, + "loss": 0.5193, + "nll_loss": 0.12982602417469025, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.191110929241404e-05, + "rewards/margins": 0.31626635789871216, + "rewards/rejected": -0.31630825996398926, + "step": 10731 + }, + { + "epoch": 7.421853388658368, + "grad_norm": 3.5463762283325195, + "learning_rate": 1.4323036729675735e-05, + "log_odds_chosen": 10.553950309753418, + "log_odds_ratio": -0.0005513601936399937, + "logits/chosen": -0.9708388447761536, + "logits/rejected": -0.9920282363891602, + "logps/chosen": -0.00023983963183127344, + "logps/rejected": -2.1186563968658447, + "loss": 0.5151, + "nll_loss": 0.12870892882347107, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3983962819329463e-05, + "rewards/margins": 0.2118416726589203, + "rewards/rejected": -0.21186566352844238, + "step": 10732 + }, + { + "epoch": 7.422544951590595, + "grad_norm": 4.914392948150635, + "learning_rate": 1.4319194713385587e-05, + "log_odds_chosen": 10.95601749420166, + "log_odds_ratio": -6.64880353724584e-05, + "logits/chosen": -0.5185230374336243, + "logits/rejected": -0.5552864670753479, + "logps/chosen": -0.0002651938411872834, + "logps/rejected": -2.116021156311035, + "loss": 0.487, + "nll_loss": 0.12174984812736511, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.65193848463241e-05, + "rewards/margins": 0.21157559752464294, + "rewards/rejected": -0.2116020917892456, + "step": 10733 + }, + { + "epoch": 7.423236514522822, + "grad_norm": 8.798486709594727, + "learning_rate": 1.4315352697095436e-05, + "log_odds_chosen": 11.923349380493164, + "log_odds_ratio": -4.4096173951402307e-05, + "logits/chosen": -0.5265907049179077, + "logits/rejected": -0.5732027888298035, + "logps/chosen": -0.0003586837265174836, + "logps/rejected": -3.3685059547424316, + "loss": 0.4845, + "nll_loss": 0.12111400067806244, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.58683719241526e-05, + "rewards/margins": 0.3368147313594818, + "rewards/rejected": -0.3368505835533142, + "step": 10734 + }, + { + "epoch": 7.423928077455049, + "grad_norm": 4.281439781188965, + "learning_rate": 1.4311510680805287e-05, + "log_odds_chosen": 10.40571117401123, + "log_odds_ratio": -0.00017208530334755778, + "logits/chosen": -0.22984299063682556, + "logits/rejected": -0.34365904331207275, + "logps/chosen": -0.0007090799626894295, + "logps/rejected": -1.9753313064575195, + "loss": 0.4389, + "nll_loss": 0.10970845073461533, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.090799772413447e-05, + "rewards/margins": 0.19746221601963043, + "rewards/rejected": -0.19753314554691315, + "step": 10735 + }, + { + "epoch": 7.4246196403872755, + "grad_norm": 4.984375953674316, + "learning_rate": 1.430766866451514e-05, + "log_odds_chosen": 10.356589317321777, + "log_odds_ratio": -0.00015899700520094484, + "logits/chosen": -0.630196213722229, + "logits/rejected": -0.5708923935890198, + "logps/chosen": -0.0002872415934689343, + "logps/rejected": -2.1628894805908203, + "loss": 0.4995, + "nll_loss": 0.1248672604560852, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8724161893478595e-05, + "rewards/margins": 0.21626022458076477, + "rewards/rejected": -0.2162889540195465, + "step": 10736 + }, + { + "epoch": 7.425311203319502, + "grad_norm": 4.451498508453369, + "learning_rate": 1.4303826648224988e-05, + "log_odds_chosen": 10.957164764404297, + "log_odds_ratio": -2.3646662157261744e-05, + "logits/chosen": -0.6510589122772217, + "logits/rejected": -0.6703109741210938, + "logps/chosen": -6.0195921832928434e-05, + "logps/rejected": -1.4261058568954468, + "loss": 0.3418, + "nll_loss": 0.08545950800180435, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.019592547090724e-06, + "rewards/margins": 0.14260455965995789, + "rewards/rejected": -0.1426105797290802, + "step": 10737 + }, + { + "epoch": 7.426002766251729, + "grad_norm": 5.354230880737305, + "learning_rate": 1.429998463193484e-05, + "log_odds_chosen": 10.506896018981934, + "log_odds_ratio": -0.0001476502511650324, + "logits/chosen": -0.5877447128295898, + "logits/rejected": -0.6587046384811401, + "logps/chosen": -0.00017094443319365382, + "logps/rejected": -1.8655098676681519, + "loss": 0.6879, + "nll_loss": 0.17195531725883484, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7094444046961144e-05, + "rewards/margins": 0.18653389811515808, + "rewards/rejected": -0.18655097484588623, + "step": 10738 + }, + { + "epoch": 7.426694329183956, + "grad_norm": 4.813597679138184, + "learning_rate": 1.4296142615644692e-05, + "log_odds_chosen": 11.009190559387207, + "log_odds_ratio": -4.4412015995476395e-05, + "logits/chosen": -0.5427803993225098, + "logits/rejected": -0.5382024049758911, + "logps/chosen": -0.00028435117565095425, + "logps/rejected": -2.667729377746582, + "loss": 0.4004, + "nll_loss": 0.10008691251277924, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.843511538230814e-05, + "rewards/margins": 0.2667444944381714, + "rewards/rejected": -0.26677295565605164, + "step": 10739 + }, + { + "epoch": 7.427385892116183, + "grad_norm": 4.3468403816223145, + "learning_rate": 1.429230059935454e-05, + "log_odds_chosen": 12.126304626464844, + "log_odds_ratio": -1.4597450899600517e-05, + "logits/chosen": -0.11912352591753006, + "logits/rejected": -0.07812384516000748, + "logps/chosen": -8.256173168774694e-05, + "logps/rejected": -2.619260311126709, + "loss": 0.3048, + "nll_loss": 0.07620352506637573, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.256173714471515e-06, + "rewards/margins": 0.26191776990890503, + "rewards/rejected": -0.2619260251522064, + "step": 10740 + }, + { + "epoch": 7.42807745504841, + "grad_norm": 5.906902313232422, + "learning_rate": 1.4288458583064393e-05, + "log_odds_chosen": 10.210342407226562, + "log_odds_ratio": -0.00011116905079688877, + "logits/chosen": -0.35095638036727905, + "logits/rejected": -0.18661539256572723, + "logps/chosen": -0.0003797630197368562, + "logps/rejected": -1.7496932744979858, + "loss": 0.5041, + "nll_loss": 0.12601202726364136, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7976304156472906e-05, + "rewards/margins": 0.1749313473701477, + "rewards/rejected": -0.17496933043003082, + "step": 10741 + }, + { + "epoch": 7.4287690179806365, + "grad_norm": 5.483175754547119, + "learning_rate": 1.4284616566774245e-05, + "log_odds_chosen": 10.167135238647461, + "log_odds_ratio": -0.00012993926065973938, + "logits/chosen": -0.37157195806503296, + "logits/rejected": -0.44876325130462646, + "logps/chosen": -0.0008369782008230686, + "logps/rejected": -2.453321933746338, + "loss": 0.3618, + "nll_loss": 0.09044036269187927, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.369782153749838e-05, + "rewards/margins": 0.24524849653244019, + "rewards/rejected": -0.24533218145370483, + "step": 10742 + }, + { + "epoch": 7.429460580912863, + "grad_norm": 5.415278434753418, + "learning_rate": 1.4280774550484095e-05, + "log_odds_chosen": 9.506936073303223, + "log_odds_ratio": -0.0006739329546689987, + "logits/chosen": -0.33719977736473083, + "logits/rejected": -0.34382933378219604, + "logps/chosen": -0.0023659905418753624, + "logps/rejected": -2.140268325805664, + "loss": 0.5052, + "nll_loss": 0.12623867392539978, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002365990512771532, + "rewards/margins": 0.21379022300243378, + "rewards/rejected": -0.21402683854103088, + "step": 10743 + }, + { + "epoch": 7.43015214384509, + "grad_norm": 4.979397773742676, + "learning_rate": 1.4276932534193945e-05, + "log_odds_chosen": 11.472922325134277, + "log_odds_ratio": -3.206637848052196e-05, + "logits/chosen": 0.04949849843978882, + "logits/rejected": -0.01601281762123108, + "logps/chosen": -0.0002056766825262457, + "logps/rejected": -2.569906234741211, + "loss": 0.8592, + "nll_loss": 0.2147943079471588, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0567669707816094e-05, + "rewards/margins": 0.25697004795074463, + "rewards/rejected": -0.25699061155319214, + "step": 10744 + }, + { + "epoch": 7.430843706777317, + "grad_norm": 5.264523029327393, + "learning_rate": 1.4273090517903798e-05, + "log_odds_chosen": 11.385479927062988, + "log_odds_ratio": -7.055592141114175e-05, + "logits/chosen": -0.5030226111412048, + "logits/rejected": -0.5155960321426392, + "logps/chosen": -0.00022816695854999125, + "logps/rejected": -2.8762707710266113, + "loss": 0.5931, + "nll_loss": 0.14825564622879028, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2816697310190648e-05, + "rewards/margins": 0.28760427236557007, + "rewards/rejected": -0.28762707114219666, + "step": 10745 + }, + { + "epoch": 7.431535269709544, + "grad_norm": 8.076372146606445, + "learning_rate": 1.4269248501613647e-05, + "log_odds_chosen": 10.986825942993164, + "log_odds_ratio": -5.322953438735567e-05, + "logits/chosen": -0.3730906844139099, + "logits/rejected": -0.4914498031139374, + "logps/chosen": -0.00014477164950221777, + "logps/rejected": -1.9321510791778564, + "loss": 0.4905, + "nll_loss": 0.12262947112321854, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4477164768322837e-05, + "rewards/margins": 0.19320064783096313, + "rewards/rejected": -0.19321510195732117, + "step": 10746 + }, + { + "epoch": 7.432226832641771, + "grad_norm": 3.97635817527771, + "learning_rate": 1.42654064853235e-05, + "log_odds_chosen": 10.292720794677734, + "log_odds_ratio": -0.00020925466378685087, + "logits/chosen": -0.19483008980751038, + "logits/rejected": -0.10350719839334488, + "logps/chosen": -0.00033111387165263295, + "logps/rejected": -2.2941386699676514, + "loss": 0.423, + "nll_loss": 0.10573115944862366, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3111387892859057e-05, + "rewards/margins": 0.2293807864189148, + "rewards/rejected": -0.22941389679908752, + "step": 10747 + }, + { + "epoch": 7.4329183955739975, + "grad_norm": 14.447975158691406, + "learning_rate": 1.426156446903335e-05, + "log_odds_chosen": 12.107383728027344, + "log_odds_ratio": -4.643085048883222e-05, + "logits/chosen": -0.14821778237819672, + "logits/rejected": -0.15492330491542816, + "logps/chosen": -0.00020316088921390474, + "logps/rejected": -3.0534801483154297, + "loss": 0.6349, + "nll_loss": 0.15871471166610718, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0316088921390474e-05, + "rewards/margins": 0.30532771348953247, + "rewards/rejected": -0.3053480088710785, + "step": 10748 + }, + { + "epoch": 7.433609958506224, + "grad_norm": 4.383937358856201, + "learning_rate": 1.4257722452743199e-05, + "log_odds_chosen": 11.975631713867188, + "log_odds_ratio": -1.2384831279632635e-05, + "logits/chosen": -0.04449837654829025, + "logits/rejected": -0.04871883988380432, + "logps/chosen": -0.00011500051186885685, + "logps/rejected": -2.476550579071045, + "loss": 0.5421, + "nll_loss": 0.13553060591220856, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1500053005875088e-05, + "rewards/margins": 0.2476436048746109, + "rewards/rejected": -0.24765509366989136, + "step": 10749 + }, + { + "epoch": 7.434301521438451, + "grad_norm": 8.04423713684082, + "learning_rate": 1.4253880436453051e-05, + "log_odds_chosen": 12.123249053955078, + "log_odds_ratio": -0.0006074186530895531, + "logits/chosen": -0.17462217807769775, + "logits/rejected": -0.218583881855011, + "logps/chosen": -0.0002956288226414472, + "logps/rejected": -3.7079389095306396, + "loss": 0.8922, + "nll_loss": 0.2229778915643692, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.956288335553836e-05, + "rewards/margins": 0.3707643151283264, + "rewards/rejected": -0.3707938492298126, + "step": 10750 + }, + { + "epoch": 7.434993084370678, + "grad_norm": 4.193935394287109, + "learning_rate": 1.4250038420162904e-05, + "log_odds_chosen": 11.465576171875, + "log_odds_ratio": -1.6591426174272783e-05, + "logits/chosen": -0.18547575175762177, + "logits/rejected": -0.2769421339035034, + "logps/chosen": -8.868890290614218e-05, + "logps/rejected": -2.083763599395752, + "loss": 0.5466, + "nll_loss": 0.13663995265960693, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.868890290614218e-06, + "rewards/margins": 0.2083674967288971, + "rewards/rejected": -0.20837636291980743, + "step": 10751 + }, + { + "epoch": 7.435684647302905, + "grad_norm": 5.5280351638793945, + "learning_rate": 1.4246196403872753e-05, + "log_odds_chosen": 10.965143203735352, + "log_odds_ratio": -0.00019943565712310374, + "logits/chosen": -0.5253455638885498, + "logits/rejected": -0.5380521416664124, + "logps/chosen": -0.00028030495741404593, + "logps/rejected": -2.5139219760894775, + "loss": 0.2926, + "nll_loss": 0.07312865555286407, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8030497560393997e-05, + "rewards/margins": 0.25136417150497437, + "rewards/rejected": -0.2513922154903412, + "step": 10752 + }, + { + "epoch": 7.436376210235132, + "grad_norm": 10.10204792022705, + "learning_rate": 1.4242354387582604e-05, + "log_odds_chosen": 10.661382675170898, + "log_odds_ratio": -5.7524346630088985e-05, + "logits/chosen": -0.3062681555747986, + "logits/rejected": -0.31217724084854126, + "logps/chosen": -0.0003130334662273526, + "logps/rejected": -2.2218077182769775, + "loss": 0.5308, + "nll_loss": 0.13269490003585815, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1303348805522546e-05, + "rewards/margins": 0.22214949131011963, + "rewards/rejected": -0.2221807837486267, + "step": 10753 + }, + { + "epoch": 7.4370677731673585, + "grad_norm": 6.339082717895508, + "learning_rate": 1.4238512371292456e-05, + "log_odds_chosen": 10.925583839416504, + "log_odds_ratio": -0.00032584331347607076, + "logits/chosen": -0.11192497611045837, + "logits/rejected": -0.17509740591049194, + "logps/chosen": -0.00034168068668805063, + "logps/rejected": -2.2192599773406982, + "loss": 0.6147, + "nll_loss": 0.1536414623260498, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.41680679412093e-05, + "rewards/margins": 0.22189182043075562, + "rewards/rejected": -0.2219260036945343, + "step": 10754 + }, + { + "epoch": 7.437759336099585, + "grad_norm": 6.157551288604736, + "learning_rate": 1.4234670355002305e-05, + "log_odds_chosen": 10.910655975341797, + "log_odds_ratio": -5.358006092137657e-05, + "logits/chosen": -0.39808034896850586, + "logits/rejected": -0.46443721652030945, + "logps/chosen": -0.000200995389604941, + "logps/rejected": -2.153860569000244, + "loss": 0.4001, + "nll_loss": 0.10002223402261734, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0099540051887743e-05, + "rewards/margins": 0.21536597609519958, + "rewards/rejected": -0.21538607776165009, + "step": 10755 + }, + { + "epoch": 7.438450899031812, + "grad_norm": 4.591843128204346, + "learning_rate": 1.4230828338712158e-05, + "log_odds_chosen": 10.28937816619873, + "log_odds_ratio": -5.2966952353017405e-05, + "logits/chosen": -0.49192312359809875, + "logits/rejected": -0.5401813983917236, + "logps/chosen": -0.00017644742911215872, + "logps/rejected": -1.429124116897583, + "loss": 0.4016, + "nll_loss": 0.10039389133453369, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.764474291121587e-05, + "rewards/margins": 0.14289477467536926, + "rewards/rejected": -0.14291241765022278, + "step": 10756 + }, + { + "epoch": 7.439142461964039, + "grad_norm": 6.4378180503845215, + "learning_rate": 1.4226986322422008e-05, + "log_odds_chosen": 9.888177871704102, + "log_odds_ratio": -0.0002807261480484158, + "logits/chosen": -0.4602343440055847, + "logits/rejected": -0.5490994453430176, + "logps/chosen": -0.0010474890004843473, + "logps/rejected": -1.6598628759384155, + "loss": 0.4625, + "nll_loss": 0.11559551954269409, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010474889859324321, + "rewards/margins": 0.16588152945041656, + "rewards/rejected": -0.16598628461360931, + "step": 10757 + }, + { + "epoch": 7.439834024896266, + "grad_norm": 7.4068284034729, + "learning_rate": 1.4223144306131857e-05, + "log_odds_chosen": 10.624796867370605, + "log_odds_ratio": -5.686835356755182e-05, + "logits/chosen": -0.44092679023742676, + "logits/rejected": -0.5704661011695862, + "logps/chosen": -0.00023139704717323184, + "logps/rejected": -2.2564005851745605, + "loss": 0.7921, + "nll_loss": 0.19801200926303864, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3139706172514707e-05, + "rewards/margins": 0.225616917014122, + "rewards/rejected": -0.22564005851745605, + "step": 10758 + }, + { + "epoch": 7.440525587828493, + "grad_norm": 5.997343063354492, + "learning_rate": 1.421930228984171e-05, + "log_odds_chosen": 11.066978454589844, + "log_odds_ratio": -4.089455251232721e-05, + "logits/chosen": -0.14664515852928162, + "logits/rejected": -0.24715624749660492, + "logps/chosen": -0.0006729392916895449, + "logps/rejected": -2.328310251235962, + "loss": 0.5871, + "nll_loss": 0.1467694789171219, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.729392771376297e-05, + "rewards/margins": 0.23276372253894806, + "rewards/rejected": -0.23283101618289948, + "step": 10759 + }, + { + "epoch": 7.441217150760719, + "grad_norm": 5.868139743804932, + "learning_rate": 1.4215460273551562e-05, + "log_odds_chosen": 11.640571594238281, + "log_odds_ratio": -2.4290455257869326e-05, + "logits/chosen": -0.3725976049900055, + "logits/rejected": -0.4877762794494629, + "logps/chosen": -0.00015499522851314396, + "logps/rejected": -2.7272706031799316, + "loss": 0.715, + "nll_loss": 0.17873850464820862, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5499521396122873e-05, + "rewards/margins": 0.2727115750312805, + "rewards/rejected": -0.2727270722389221, + "step": 10760 + }, + { + "epoch": 7.441908713692946, + "grad_norm": 5.459692478179932, + "learning_rate": 1.4211618257261411e-05, + "log_odds_chosen": 10.97966194152832, + "log_odds_ratio": -0.00010686190944397822, + "logits/chosen": -0.6309956312179565, + "logits/rejected": -0.679790735244751, + "logps/chosen": -0.00012509649968706071, + "logps/rejected": -1.7729841470718384, + "loss": 0.3926, + "nll_loss": 0.09814368188381195, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.250964942300925e-05, + "rewards/margins": 0.17728590965270996, + "rewards/rejected": -0.1772984266281128, + "step": 10761 + }, + { + "epoch": 7.442600276625173, + "grad_norm": 10.462207794189453, + "learning_rate": 1.4207776240971262e-05, + "log_odds_chosen": 10.301549911499023, + "log_odds_ratio": -0.00023399626661557704, + "logits/chosen": -0.9923213720321655, + "logits/rejected": -1.0673259496688843, + "logps/chosen": -0.0002473454223945737, + "logps/rejected": -2.0343191623687744, + "loss": 0.3953, + "nll_loss": 0.09880739450454712, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4734541511861607e-05, + "rewards/margins": 0.2034071832895279, + "rewards/rejected": -0.20343193411827087, + "step": 10762 + }, + { + "epoch": 7.4432918395574, + "grad_norm": 3.7132325172424316, + "learning_rate": 1.4203934224681115e-05, + "log_odds_chosen": 11.021075248718262, + "log_odds_ratio": -2.973015944007784e-05, + "logits/chosen": -0.6955537796020508, + "logits/rejected": -0.7272615432739258, + "logps/chosen": -0.00016529949789401144, + "logps/rejected": -2.10345458984375, + "loss": 0.4654, + "nll_loss": 0.1163576990365982, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.652994797041174e-05, + "rewards/margins": 0.21032892167568207, + "rewards/rejected": -0.21034544706344604, + "step": 10763 + }, + { + "epoch": 7.443983402489627, + "grad_norm": 12.608946800231934, + "learning_rate": 1.4200092208390964e-05, + "log_odds_chosen": 11.590502738952637, + "log_odds_ratio": -1.641233393456787e-05, + "logits/chosen": -0.09940451383590698, + "logits/rejected": -0.19625744223594666, + "logps/chosen": -0.00038619639235548675, + "logps/rejected": -2.473310708999634, + "loss": 0.6055, + "nll_loss": 0.15136292576789856, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.861963705276139e-05, + "rewards/margins": 0.24729245901107788, + "rewards/rejected": -0.24733106791973114, + "step": 10764 + }, + { + "epoch": 7.444674965421854, + "grad_norm": 11.930988311767578, + "learning_rate": 1.4196250192100816e-05, + "log_odds_chosen": 11.384069442749023, + "log_odds_ratio": -1.1792275472544134e-05, + "logits/chosen": -0.3510861098766327, + "logits/rejected": -0.3697136342525482, + "logps/chosen": -0.00010838945308933035, + "logps/rejected": -2.1518192291259766, + "loss": 0.4241, + "nll_loss": 0.10602325946092606, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0838944945135154e-05, + "rewards/margins": 0.21517106890678406, + "rewards/rejected": -0.21518190205097198, + "step": 10765 + }, + { + "epoch": 7.44536652835408, + "grad_norm": 6.5892815589904785, + "learning_rate": 1.4192408175810665e-05, + "log_odds_chosen": 11.18331527709961, + "log_odds_ratio": -3.321813346701674e-05, + "logits/chosen": -0.7267346978187561, + "logits/rejected": -0.6547719836235046, + "logps/chosen": -0.00011852014722535387, + "logps/rejected": -1.9096920490264893, + "loss": 0.3928, + "nll_loss": 0.09818601608276367, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1852014722535387e-05, + "rewards/margins": 0.19095736742019653, + "rewards/rejected": -0.19096921384334564, + "step": 10766 + }, + { + "epoch": 7.446058091286307, + "grad_norm": 4.197224140167236, + "learning_rate": 1.4188566159520516e-05, + "log_odds_chosen": 11.295459747314453, + "log_odds_ratio": -2.2514745069202036e-05, + "logits/chosen": -0.046457454562187195, + "logits/rejected": -0.06667326390743256, + "logps/chosen": -0.00047718797577545047, + "logps/rejected": -2.5375447273254395, + "loss": 0.5637, + "nll_loss": 0.14091680943965912, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7718796849949285e-05, + "rewards/margins": 0.25370678305625916, + "rewards/rejected": -0.25375446677207947, + "step": 10767 + }, + { + "epoch": 7.446749654218534, + "grad_norm": 3.695483684539795, + "learning_rate": 1.4184724143230368e-05, + "log_odds_chosen": 10.953871726989746, + "log_odds_ratio": -0.00022770927171222866, + "logits/chosen": -0.1730370819568634, + "logits/rejected": -0.16847573220729828, + "logps/chosen": -0.00038342594052664936, + "logps/rejected": -1.9761154651641846, + "loss": 0.6385, + "nll_loss": 0.1596061736345291, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8342594052664936e-05, + "rewards/margins": 0.1975732147693634, + "rewards/rejected": -0.19761154055595398, + "step": 10768 + }, + { + "epoch": 7.447441217150761, + "grad_norm": 4.151753902435303, + "learning_rate": 1.4180882126940217e-05, + "log_odds_chosen": 10.379537582397461, + "log_odds_ratio": -0.0001264579186681658, + "logits/chosen": -0.23954395949840546, + "logits/rejected": -0.39786607027053833, + "logps/chosen": -0.0002555370156187564, + "logps/rejected": -1.9802124500274658, + "loss": 0.4602, + "nll_loss": 0.11504577100276947, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5553703380865045e-05, + "rewards/margins": 0.19799569249153137, + "rewards/rejected": -0.19802124798297882, + "step": 10769 + }, + { + "epoch": 7.448132780082988, + "grad_norm": 4.965488433837891, + "learning_rate": 1.417704011065007e-05, + "log_odds_chosen": 10.943584442138672, + "log_odds_ratio": -4.247497781761922e-05, + "logits/chosen": -0.40447431802749634, + "logits/rejected": -0.41507893800735474, + "logps/chosen": -0.00021655272576026618, + "logps/rejected": -1.846780776977539, + "loss": 0.5371, + "nll_loss": 0.13427071273326874, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.165527257602662e-05, + "rewards/margins": 0.18465642631053925, + "rewards/rejected": -0.1846780925989151, + "step": 10770 + }, + { + "epoch": 7.448824343015215, + "grad_norm": 5.554091930389404, + "learning_rate": 1.417319809435992e-05, + "log_odds_chosen": 10.870664596557617, + "log_odds_ratio": -3.63989602192305e-05, + "logits/chosen": -0.4929801821708679, + "logits/rejected": -0.4868937134742737, + "logps/chosen": -0.00014760282647330314, + "logps/rejected": -1.8903640508651733, + "loss": 0.4212, + "nll_loss": 0.10529060661792755, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4760284102521837e-05, + "rewards/margins": 0.18902164697647095, + "rewards/rejected": -0.18903639912605286, + "step": 10771 + }, + { + "epoch": 7.449515905947441, + "grad_norm": 3.917309284210205, + "learning_rate": 1.416935607806977e-05, + "log_odds_chosen": 10.563467025756836, + "log_odds_ratio": -4.412873022374697e-05, + "logits/chosen": -0.17110557854175568, + "logits/rejected": -0.023961514234542847, + "logps/chosen": -0.0002084320003632456, + "logps/rejected": -1.9825282096862793, + "loss": 0.381, + "nll_loss": 0.09524839371442795, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.08431993087288e-05, + "rewards/margins": 0.19823198020458221, + "rewards/rejected": -0.1982528269290924, + "step": 10772 + }, + { + "epoch": 7.450207468879668, + "grad_norm": 20.687332153320312, + "learning_rate": 1.4165514061779622e-05, + "log_odds_chosen": 10.863073348999023, + "log_odds_ratio": -5.997123662382364e-05, + "logits/chosen": -0.7228338718414307, + "logits/rejected": -0.7402847409248352, + "logps/chosen": -0.00035104190465062857, + "logps/rejected": -2.2573041915893555, + "loss": 0.4832, + "nll_loss": 0.12080300599336624, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5104189009871334e-05, + "rewards/margins": 0.22569534182548523, + "rewards/rejected": -0.22573043406009674, + "step": 10773 + }, + { + "epoch": 7.450899031811895, + "grad_norm": 13.213074684143066, + "learning_rate": 1.4161672045489475e-05, + "log_odds_chosen": 11.305316925048828, + "log_odds_ratio": -2.8845432098023593e-05, + "logits/chosen": -0.43465813994407654, + "logits/rejected": -0.469184011220932, + "logps/chosen": -0.00020464364206418395, + "logps/rejected": -2.64137864112854, + "loss": 0.4263, + "nll_loss": 0.10656726360321045, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0464365661609918e-05, + "rewards/margins": 0.26411741971969604, + "rewards/rejected": -0.264137864112854, + "step": 10774 + }, + { + "epoch": 7.451590594744122, + "grad_norm": 5.760486602783203, + "learning_rate": 1.4157830029199324e-05, + "log_odds_chosen": 11.941006660461426, + "log_odds_ratio": -1.6288035112665966e-05, + "logits/chosen": -0.22816580533981323, + "logits/rejected": -0.323006808757782, + "logps/chosen": -0.0001421998895239085, + "logps/rejected": -3.1332175731658936, + "loss": 0.5396, + "nll_loss": 0.13490593433380127, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4219987860997207e-05, + "rewards/margins": 0.313307523727417, + "rewards/rejected": -0.3133217394351959, + "step": 10775 + }, + { + "epoch": 7.452282157676349, + "grad_norm": 4.831750869750977, + "learning_rate": 1.4153988012909174e-05, + "log_odds_chosen": 10.773515701293945, + "log_odds_ratio": -8.02901922725141e-05, + "logits/chosen": -0.43861153721809387, + "logits/rejected": -0.4492945075035095, + "logps/chosen": -0.00017901469254866242, + "logps/rejected": -2.006941318511963, + "loss": 0.4554, + "nll_loss": 0.11384207755327225, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7901469618664123e-05, + "rewards/margins": 0.20067623257637024, + "rewards/rejected": -0.20069414377212524, + "step": 10776 + }, + { + "epoch": 7.4529737206085755, + "grad_norm": 6.30268669128418, + "learning_rate": 1.4150145996619027e-05, + "log_odds_chosen": 11.054546356201172, + "log_odds_ratio": -0.00010250294872093946, + "logits/chosen": -0.25811412930488586, + "logits/rejected": -0.3270358145236969, + "logps/chosen": -0.0002650795504450798, + "logps/rejected": -2.357173442840576, + "loss": 0.4343, + "nll_loss": 0.10856799781322479, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6507957954891026e-05, + "rewards/margins": 0.2356908619403839, + "rewards/rejected": -0.23571735620498657, + "step": 10777 + }, + { + "epoch": 7.453665283540802, + "grad_norm": 4.888160705566406, + "learning_rate": 1.4146303980328876e-05, + "log_odds_chosen": 10.101419448852539, + "log_odds_ratio": -0.0003310735628474504, + "logits/chosen": -0.5871963500976562, + "logits/rejected": -0.5617615580558777, + "logps/chosen": -0.0007257298566401005, + "logps/rejected": -1.4469420909881592, + "loss": 0.3538, + "nll_loss": 0.08840520679950714, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.257298420881853e-05, + "rewards/margins": 0.14462164044380188, + "rewards/rejected": -0.14469420909881592, + "step": 10778 + }, + { + "epoch": 7.454356846473029, + "grad_norm": 5.455580234527588, + "learning_rate": 1.4142461964038728e-05, + "log_odds_chosen": 10.255231857299805, + "log_odds_ratio": -9.311466419603676e-05, + "logits/chosen": -0.4162459373474121, + "logits/rejected": -0.4459051191806793, + "logps/chosen": -0.0005949109909124672, + "logps/rejected": -2.145630359649658, + "loss": 0.8744, + "nll_loss": 0.21859851479530334, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9491099818842486e-05, + "rewards/margins": 0.21450357139110565, + "rewards/rejected": -0.2145630568265915, + "step": 10779 + }, + { + "epoch": 7.455048409405256, + "grad_norm": 4.634705543518066, + "learning_rate": 1.4138619947748579e-05, + "log_odds_chosen": 12.55921745300293, + "log_odds_ratio": -7.130900485208258e-06, + "logits/chosen": -0.43504297733306885, + "logits/rejected": -0.39104989171028137, + "logps/chosen": -0.00015669013373553753, + "logps/rejected": -3.515052080154419, + "loss": 0.3879, + "nll_loss": 0.09698067605495453, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5669013009755872e-05, + "rewards/margins": 0.3514895439147949, + "rewards/rejected": -0.35150521993637085, + "step": 10780 + }, + { + "epoch": 7.455739972337483, + "grad_norm": 8.080259323120117, + "learning_rate": 1.4134777931458428e-05, + "log_odds_chosen": 11.038820266723633, + "log_odds_ratio": -3.259784716647118e-05, + "logits/chosen": -0.9450827836990356, + "logits/rejected": -0.9674822092056274, + "logps/chosen": -8.301252091769129e-05, + "logps/rejected": -1.79734206199646, + "loss": 0.4615, + "nll_loss": 0.11537669599056244, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.30125281936489e-06, + "rewards/margins": 0.17972591519355774, + "rewards/rejected": -0.17973420023918152, + "step": 10781 + }, + { + "epoch": 7.45643153526971, + "grad_norm": 6.411631107330322, + "learning_rate": 1.413093591516828e-05, + "log_odds_chosen": 11.170829772949219, + "log_odds_ratio": -2.751310239546001e-05, + "logits/chosen": -0.18959954380989075, + "logits/rejected": -0.21842291951179504, + "logps/chosen": -0.00018193494179286063, + "logps/rejected": -2.25022554397583, + "loss": 0.3511, + "nll_loss": 0.08777038007974625, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.819349199649878e-05, + "rewards/margins": 0.2250043749809265, + "rewards/rejected": -0.2250225692987442, + "step": 10782 + }, + { + "epoch": 7.4571230982019365, + "grad_norm": 4.9749369621276855, + "learning_rate": 1.4127093898878133e-05, + "log_odds_chosen": 11.735504150390625, + "log_odds_ratio": -1.129702286561951e-05, + "logits/chosen": -0.031236648559570312, + "logits/rejected": -0.15600450336933136, + "logps/chosen": -0.00017883992404676974, + "logps/rejected": -2.7734787464141846, + "loss": 0.7117, + "nll_loss": 0.1779349446296692, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7883992768474855e-05, + "rewards/margins": 0.2773299813270569, + "rewards/rejected": -0.2773478627204895, + "step": 10783 + }, + { + "epoch": 7.457814661134163, + "grad_norm": 7.109059810638428, + "learning_rate": 1.4123251882587982e-05, + "log_odds_chosen": 10.662548065185547, + "log_odds_ratio": -0.00014169953647069633, + "logits/chosen": -0.4513861835002899, + "logits/rejected": -0.4910764992237091, + "logps/chosen": -0.0001652487990213558, + "logps/rejected": -2.0241336822509766, + "loss": 0.3626, + "nll_loss": 0.09062620252370834, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.652488026593346e-05, + "rewards/margins": 0.20239682495594025, + "rewards/rejected": -0.20241336524486542, + "step": 10784 + }, + { + "epoch": 7.45850622406639, + "grad_norm": 4.813906192779541, + "learning_rate": 1.4119409866297833e-05, + "log_odds_chosen": 11.187372207641602, + "log_odds_ratio": -1.9936051103286445e-05, + "logits/chosen": -0.2327749878168106, + "logits/rejected": -0.30488571524620056, + "logps/chosen": -0.0004442424396984279, + "logps/rejected": -2.9384379386901855, + "loss": 0.5334, + "nll_loss": 0.1333410143852234, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.442424324224703e-05, + "rewards/margins": 0.29379940032958984, + "rewards/rejected": -0.2938438057899475, + "step": 10785 + }, + { + "epoch": 7.459197786998617, + "grad_norm": 10.139615058898926, + "learning_rate": 1.4115567850007685e-05, + "log_odds_chosen": 10.874614715576172, + "log_odds_ratio": -2.8667487640632316e-05, + "logits/chosen": -0.11820893734693527, + "logits/rejected": -0.16890212893486023, + "logps/chosen": -0.00012665041140280664, + "logps/rejected": -1.8323454856872559, + "loss": 0.6004, + "nll_loss": 0.15009675920009613, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2665040230785962e-05, + "rewards/margins": 0.18322189152240753, + "rewards/rejected": -0.1832345724105835, + "step": 10786 + }, + { + "epoch": 7.459889349930844, + "grad_norm": 8.36119556427002, + "learning_rate": 1.4111725833717534e-05, + "log_odds_chosen": 10.627643585205078, + "log_odds_ratio": -5.008725565858185e-05, + "logits/chosen": -0.37607264518737793, + "logits/rejected": -0.35164517164230347, + "logps/chosen": -0.000426368264015764, + "logps/rejected": -2.356189727783203, + "loss": 0.3696, + "nll_loss": 0.09239155799150467, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2636824218789116e-05, + "rewards/margins": 0.235576331615448, + "rewards/rejected": -0.2356189787387848, + "step": 10787 + }, + { + "epoch": 7.460580912863071, + "grad_norm": 5.333108901977539, + "learning_rate": 1.4107883817427387e-05, + "log_odds_chosen": 11.991171836853027, + "log_odds_ratio": -9.418118679604959e-06, + "logits/chosen": -0.29220810532569885, + "logits/rejected": -0.32383784651756287, + "logps/chosen": -8.977008110377938e-05, + "logps/rejected": -2.6561379432678223, + "loss": 0.5265, + "nll_loss": 0.13163018226623535, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.977008292276878e-06, + "rewards/margins": 0.26560482382774353, + "rewards/rejected": -0.2656137943267822, + "step": 10788 + }, + { + "epoch": 7.4612724757952975, + "grad_norm": 4.949190139770508, + "learning_rate": 1.4104041801137237e-05, + "log_odds_chosen": 11.44765853881836, + "log_odds_ratio": -4.3102198105771095e-05, + "logits/chosen": -0.0698050707578659, + "logits/rejected": -0.061611108481884, + "logps/chosen": -0.00015239565982483327, + "logps/rejected": -2.726309299468994, + "loss": 0.5276, + "nll_loss": 0.13188467919826508, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5239564163493924e-05, + "rewards/margins": 0.2726157307624817, + "rewards/rejected": -0.2726309299468994, + "step": 10789 + }, + { + "epoch": 7.461964038727524, + "grad_norm": 3.6364712715148926, + "learning_rate": 1.4100199784847088e-05, + "log_odds_chosen": 9.9117431640625, + "log_odds_ratio": -0.000305481516988948, + "logits/chosen": -0.4107625186443329, + "logits/rejected": -0.4288431704044342, + "logps/chosen": -0.0005071141058579087, + "logps/rejected": -2.180227756500244, + "loss": 0.429, + "nll_loss": 0.10721185803413391, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.071140549262054e-05, + "rewards/margins": 0.2179720252752304, + "rewards/rejected": -0.21802276372909546, + "step": 10790 + }, + { + "epoch": 7.462655601659751, + "grad_norm": 3.9746110439300537, + "learning_rate": 1.4096357768556939e-05, + "log_odds_chosen": 10.442115783691406, + "log_odds_ratio": -0.0003979535831604153, + "logits/chosen": -0.2429000735282898, + "logits/rejected": -0.4284162223339081, + "logps/chosen": -0.0005142191657796502, + "logps/rejected": -1.6921067237854004, + "loss": 0.3517, + "nll_loss": 0.08788755536079407, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.142191730556078e-05, + "rewards/margins": 0.16915924847126007, + "rewards/rejected": -0.16921067237854004, + "step": 10791 + }, + { + "epoch": 7.463347164591978, + "grad_norm": 3.416618824005127, + "learning_rate": 1.4092515752266791e-05, + "log_odds_chosen": 10.313505172729492, + "log_odds_ratio": -6.649178976658732e-05, + "logits/chosen": -0.601507842540741, + "logits/rejected": -0.7110568284988403, + "logps/chosen": -0.00023971637710928917, + "logps/rejected": -1.7452600002288818, + "loss": 0.382, + "nll_loss": 0.09550052881240845, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.397163916612044e-05, + "rewards/margins": 0.17450203001499176, + "rewards/rejected": -0.17452600598335266, + "step": 10792 + }, + { + "epoch": 7.464038727524205, + "grad_norm": 4.253975868225098, + "learning_rate": 1.408867373597664e-05, + "log_odds_chosen": 11.393698692321777, + "log_odds_ratio": -2.1312720491550863e-05, + "logits/chosen": -0.03397101163864136, + "logits/rejected": -0.07794360816478729, + "logps/chosen": -0.0002627108769956976, + "logps/rejected": -2.492248058319092, + "loss": 0.5482, + "nll_loss": 0.13705724477767944, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6271089154761285e-05, + "rewards/margins": 0.2491985410451889, + "rewards/rejected": -0.24922481179237366, + "step": 10793 + }, + { + "epoch": 7.464730290456432, + "grad_norm": 21.331689834594727, + "learning_rate": 1.4084831719686493e-05, + "log_odds_chosen": 10.241486549377441, + "log_odds_ratio": -0.0001373636769130826, + "logits/chosen": -0.186618834733963, + "logits/rejected": -0.10759405046701431, + "logps/chosen": -0.00029816178721375763, + "logps/rejected": -1.744796633720398, + "loss": 0.4677, + "nll_loss": 0.11691103875637054, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.981617762998212e-05, + "rewards/margins": 0.1744498461484909, + "rewards/rejected": -0.1744796633720398, + "step": 10794 + }, + { + "epoch": 7.4654218533886585, + "grad_norm": 2.5432212352752686, + "learning_rate": 1.4080989703396344e-05, + "log_odds_chosen": 10.715089797973633, + "log_odds_ratio": -0.00037892625550739467, + "logits/chosen": -0.19213101267814636, + "logits/rejected": -0.18021385371685028, + "logps/chosen": -0.0008219567826017737, + "logps/rejected": -2.4543795585632324, + "loss": 0.3218, + "nll_loss": 0.08040856570005417, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.219568553613499e-05, + "rewards/margins": 0.2453557401895523, + "rewards/rejected": -0.24543793499469757, + "step": 10795 + }, + { + "epoch": 7.466113416320885, + "grad_norm": 8.289212226867676, + "learning_rate": 1.4077147687106193e-05, + "log_odds_chosen": 10.710524559020996, + "log_odds_ratio": -0.00010787302016979083, + "logits/chosen": -0.8265199661254883, + "logits/rejected": -0.8303812742233276, + "logps/chosen": -0.00028980334172956645, + "logps/rejected": -2.316956043243408, + "loss": 0.4287, + "nll_loss": 0.1071544662117958, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.898033562814817e-05, + "rewards/margins": 0.23166662454605103, + "rewards/rejected": -0.23169559240341187, + "step": 10796 + }, + { + "epoch": 7.466804979253112, + "grad_norm": 4.027835369110107, + "learning_rate": 1.4073305670816045e-05, + "log_odds_chosen": 10.93043327331543, + "log_odds_ratio": -3.0192390113370493e-05, + "logits/chosen": -0.41126716136932373, + "logits/rejected": -0.4459652304649353, + "logps/chosen": -0.00023423923994414508, + "logps/rejected": -2.1818385124206543, + "loss": 0.4229, + "nll_loss": 0.10572469234466553, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.342392508580815e-05, + "rewards/margins": 0.21816042065620422, + "rewards/rejected": -0.21818384528160095, + "step": 10797 + }, + { + "epoch": 7.467496542185339, + "grad_norm": 6.063636302947998, + "learning_rate": 1.4069463654525898e-05, + "log_odds_chosen": 11.581496238708496, + "log_odds_ratio": -2.447320002829656e-05, + "logits/chosen": 0.05990840494632721, + "logits/rejected": -0.13078993558883667, + "logps/chosen": -0.00015943936887197196, + "logps/rejected": -2.7461681365966797, + "loss": 0.5508, + "nll_loss": 0.13769984245300293, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5943936887197196e-05, + "rewards/margins": 0.2746008634567261, + "rewards/rejected": -0.2746168076992035, + "step": 10798 + }, + { + "epoch": 7.468188105117566, + "grad_norm": 4.830626010894775, + "learning_rate": 1.4065621638235747e-05, + "log_odds_chosen": 9.754347801208496, + "log_odds_ratio": -0.0012114491546526551, + "logits/chosen": -0.5032768249511719, + "logits/rejected": -0.48865121603012085, + "logps/chosen": -0.0003861731383949518, + "logps/rejected": -1.4373276233673096, + "loss": 0.6617, + "nll_loss": 0.1653095781803131, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8617316022282466e-05, + "rewards/margins": 0.14369414746761322, + "rewards/rejected": -0.14373275637626648, + "step": 10799 + }, + { + "epoch": 7.468879668049793, + "grad_norm": 7.109471797943115, + "learning_rate": 1.4061779621945597e-05, + "log_odds_chosen": 10.44162368774414, + "log_odds_ratio": -6.263345130719244e-05, + "logits/chosen": -0.3996507525444031, + "logits/rejected": -0.43611565232276917, + "logps/chosen": -0.00012694389442913234, + "logps/rejected": -1.5626509189605713, + "loss": 0.4976, + "nll_loss": 0.12438362836837769, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2694389624812175e-05, + "rewards/margins": 0.1562523990869522, + "rewards/rejected": -0.15626509487628937, + "step": 10800 + }, + { + "epoch": 7.4695712309820195, + "grad_norm": 10.683833122253418, + "learning_rate": 1.405793760565545e-05, + "log_odds_chosen": 9.845418930053711, + "log_odds_ratio": -0.0006182001088745892, + "logits/chosen": 0.009782552719116211, + "logits/rejected": -0.12515202164649963, + "logps/chosen": -0.0007117694476619363, + "logps/rejected": -1.846452236175537, + "loss": 0.4558, + "nll_loss": 0.11387798190116882, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.117693894542754e-05, + "rewards/margins": 0.18457405269145966, + "rewards/rejected": -0.18464523553848267, + "step": 10801 + }, + { + "epoch": 7.470262793914246, + "grad_norm": 8.428908348083496, + "learning_rate": 1.4054095589365299e-05, + "log_odds_chosen": 11.426127433776855, + "log_odds_ratio": -2.4880095224943943e-05, + "logits/chosen": -0.08515352755784988, + "logits/rejected": -0.13209059834480286, + "logps/chosen": -0.00029765418730676174, + "logps/rejected": -2.629096746444702, + "loss": 0.5988, + "nll_loss": 0.14969633519649506, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9765420549665578e-05, + "rewards/margins": 0.2628799080848694, + "rewards/rejected": -0.2629096508026123, + "step": 10802 + }, + { + "epoch": 7.470954356846473, + "grad_norm": 6.119007587432861, + "learning_rate": 1.4050253573075151e-05, + "log_odds_chosen": 11.615740776062012, + "log_odds_ratio": -8.431605965597555e-05, + "logits/chosen": -0.24642956256866455, + "logits/rejected": -0.33345574140548706, + "logps/chosen": -0.0002293281868332997, + "logps/rejected": -2.501028060913086, + "loss": 0.5353, + "nll_loss": 0.13381671905517578, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2932819774723612e-05, + "rewards/margins": 0.250079870223999, + "rewards/rejected": -0.25010281801223755, + "step": 10803 + }, + { + "epoch": 7.4716459197787, + "grad_norm": 3.944324016571045, + "learning_rate": 1.4046411556785002e-05, + "log_odds_chosen": 9.476999282836914, + "log_odds_ratio": -0.0001978773798327893, + "logits/chosen": -0.2502937316894531, + "logits/rejected": -0.33076730370521545, + "logps/chosen": -0.0002558291016612202, + "logps/rejected": -1.2312133312225342, + "loss": 0.4154, + "nll_loss": 0.10383543372154236, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.558291089371778e-05, + "rewards/margins": 0.12309575825929642, + "rewards/rejected": -0.12312135100364685, + "step": 10804 + }, + { + "epoch": 7.472337482710927, + "grad_norm": 5.033731937408447, + "learning_rate": 1.4042569540494851e-05, + "log_odds_chosen": 10.716177940368652, + "log_odds_ratio": -7.970893057063222e-05, + "logits/chosen": -0.2932302951812744, + "logits/rejected": -0.336398184299469, + "logps/chosen": -0.0003216788754798472, + "logps/rejected": -2.5918960571289062, + "loss": 0.4384, + "nll_loss": 0.1095963567495346, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.216788900317624e-05, + "rewards/margins": 0.2591574490070343, + "rewards/rejected": -0.259189635515213, + "step": 10805 + }, + { + "epoch": 7.473029045643154, + "grad_norm": 10.533803939819336, + "learning_rate": 1.4038727524204704e-05, + "log_odds_chosen": 12.302633285522461, + "log_odds_ratio": -1.1996409739367664e-05, + "logits/chosen": -0.2196783572435379, + "logits/rejected": -0.2457887828350067, + "logps/chosen": -0.00010751017543952912, + "logps/rejected": -3.1479859352111816, + "loss": 0.5097, + "nll_loss": 0.12743321061134338, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0751018635346554e-05, + "rewards/margins": 0.3147878348827362, + "rewards/rejected": -0.3147985637187958, + "step": 10806 + }, + { + "epoch": 7.4737206085753805, + "grad_norm": 4.923978805541992, + "learning_rate": 1.4034885507914556e-05, + "log_odds_chosen": 10.04902172088623, + "log_odds_ratio": -6.729160668328404e-05, + "logits/chosen": -0.08962735533714294, + "logits/rejected": -0.1747085452079773, + "logps/chosen": -0.00033856008667498827, + "logps/rejected": -1.9707258939743042, + "loss": 0.4495, + "nll_loss": 0.11237649619579315, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.385600575711578e-05, + "rewards/margins": 0.19703873991966248, + "rewards/rejected": -0.1970725953578949, + "step": 10807 + }, + { + "epoch": 7.474412171507607, + "grad_norm": 4.651909828186035, + "learning_rate": 1.4031043491624405e-05, + "log_odds_chosen": 10.471010208129883, + "log_odds_ratio": -0.00011994114174740389, + "logits/chosen": -0.32653313875198364, + "logits/rejected": -0.3821827471256256, + "logps/chosen": -0.0005166777409613132, + "logps/rejected": -2.1474270820617676, + "loss": 0.6188, + "nll_loss": 0.15467827022075653, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.166777191334404e-05, + "rewards/margins": 0.21469107270240784, + "rewards/rejected": -0.2147427350282669, + "step": 10808 + }, + { + "epoch": 7.475103734439834, + "grad_norm": 5.474374771118164, + "learning_rate": 1.4027201475334256e-05, + "log_odds_chosen": 10.904947280883789, + "log_odds_ratio": -3.5007004044018686e-05, + "logits/chosen": -0.4522826075553894, + "logits/rejected": -0.5444246530532837, + "logps/chosen": -0.00017778566689230502, + "logps/rejected": -2.036471128463745, + "loss": 0.6515, + "nll_loss": 0.16286161541938782, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7778567780624144e-05, + "rewards/margins": 0.20362932980060577, + "rewards/rejected": -0.20364712178707123, + "step": 10809 + }, + { + "epoch": 7.475795297372061, + "grad_norm": 4.451005935668945, + "learning_rate": 1.4023359459044108e-05, + "log_odds_chosen": 11.30253791809082, + "log_odds_ratio": -2.957035758299753e-05, + "logits/chosen": 0.049805790185928345, + "logits/rejected": 0.07589408755302429, + "logps/chosen": -0.00016353554383385926, + "logps/rejected": -2.659938097000122, + "loss": 0.4698, + "nll_loss": 0.11744727194309235, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6353555110981688e-05, + "rewards/margins": 0.2659774422645569, + "rewards/rejected": -0.26599380373954773, + "step": 10810 + }, + { + "epoch": 7.476486860304288, + "grad_norm": 4.779135704040527, + "learning_rate": 1.4019517442753957e-05, + "log_odds_chosen": 11.290592193603516, + "log_odds_ratio": -6.531582039315253e-05, + "logits/chosen": -0.4310583472251892, + "logits/rejected": -0.4172248840332031, + "logps/chosen": -0.00018636384629644454, + "logps/rejected": -2.7107105255126953, + "loss": 0.5556, + "nll_loss": 0.13890208303928375, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8636386812431738e-05, + "rewards/margins": 0.27105242013931274, + "rewards/rejected": -0.27107107639312744, + "step": 10811 + }, + { + "epoch": 7.477178423236515, + "grad_norm": 4.228855133056641, + "learning_rate": 1.401567542646381e-05, + "log_odds_chosen": 11.1829833984375, + "log_odds_ratio": -0.00010237680544378236, + "logits/chosen": -0.6038228273391724, + "logits/rejected": -0.6043468713760376, + "logps/chosen": -0.00033634959254413843, + "logps/rejected": -2.578450918197632, + "loss": 0.4377, + "nll_loss": 0.1094059944152832, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.363495852681808e-05, + "rewards/margins": 0.25781145691871643, + "rewards/rejected": -0.25784510374069214, + "step": 10812 + }, + { + "epoch": 7.477869986168741, + "grad_norm": 5.022854804992676, + "learning_rate": 1.401183341017366e-05, + "log_odds_chosen": 10.278348922729492, + "log_odds_ratio": -7.624067075084895e-05, + "logits/chosen": 0.1413256675004959, + "logits/rejected": 0.09014451503753662, + "logps/chosen": -0.00034738524118438363, + "logps/rejected": -2.108718156814575, + "loss": 0.455, + "nll_loss": 0.11374906450510025, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4738524846034124e-05, + "rewards/margins": 0.21083708107471466, + "rewards/rejected": -0.21087180078029633, + "step": 10813 + }, + { + "epoch": 7.478561549100968, + "grad_norm": 7.37864875793457, + "learning_rate": 1.400799139388351e-05, + "log_odds_chosen": 11.088615417480469, + "log_odds_ratio": -5.135659739607945e-05, + "logits/chosen": -0.5195218324661255, + "logits/rejected": -0.534462571144104, + "logps/chosen": -0.00026968546444550157, + "logps/rejected": -2.4104957580566406, + "loss": 0.5126, + "nll_loss": 0.12813904881477356, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.696854789974168e-05, + "rewards/margins": 0.24102260172367096, + "rewards/rejected": -0.24104955792427063, + "step": 10814 + }, + { + "epoch": 7.479253112033195, + "grad_norm": 3.966606378555298, + "learning_rate": 1.4004149377593362e-05, + "log_odds_chosen": 10.707971572875977, + "log_odds_ratio": -9.568365203449503e-05, + "logits/chosen": -0.5049077272415161, + "logits/rejected": -0.4820142984390259, + "logps/chosen": -0.00014141273277346045, + "logps/rejected": -1.723301887512207, + "loss": 0.3993, + "nll_loss": 0.09980343282222748, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4141274732537568e-05, + "rewards/margins": 0.1723160743713379, + "rewards/rejected": -0.17233020067214966, + "step": 10815 + }, + { + "epoch": 7.479944674965422, + "grad_norm": 5.684383392333984, + "learning_rate": 1.4000307361303214e-05, + "log_odds_chosen": 11.650728225708008, + "log_odds_ratio": -1.511611117166467e-05, + "logits/chosen": -0.2656325101852417, + "logits/rejected": -0.32714197039604187, + "logps/chosen": -0.0003647196863312274, + "logps/rejected": -3.0555384159088135, + "loss": 0.5385, + "nll_loss": 0.13461153209209442, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.647196717793122e-05, + "rewards/margins": 0.30551737546920776, + "rewards/rejected": -0.3055538535118103, + "step": 10816 + }, + { + "epoch": 7.480636237897649, + "grad_norm": 24.105382919311523, + "learning_rate": 1.3996465345013063e-05, + "log_odds_chosen": 10.815409660339355, + "log_odds_ratio": -4.043741864734329e-05, + "logits/chosen": -0.6106455326080322, + "logits/rejected": -0.7042526602745056, + "logps/chosen": -0.000373400078387931, + "logps/rejected": -1.910614252090454, + "loss": 0.8274, + "nll_loss": 0.20684409141540527, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7340010749176145e-05, + "rewards/margins": 0.19102409482002258, + "rewards/rejected": -0.19106143712997437, + "step": 10817 + }, + { + "epoch": 7.481327800829876, + "grad_norm": 4.480190753936768, + "learning_rate": 1.3992623328722914e-05, + "log_odds_chosen": 10.819806098937988, + "log_odds_ratio": -9.418633999302983e-05, + "logits/chosen": -0.22619548439979553, + "logits/rejected": -0.3512183427810669, + "logps/chosen": -0.00013654318172484636, + "logps/rejected": -1.9781020879745483, + "loss": 0.4419, + "nll_loss": 0.11045687645673752, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3654316717293113e-05, + "rewards/margins": 0.1977965533733368, + "rewards/rejected": -0.19781020283699036, + "step": 10818 + }, + { + "epoch": 7.482019363762102, + "grad_norm": 6.823324203491211, + "learning_rate": 1.3988781312432767e-05, + "log_odds_chosen": 11.323492050170898, + "log_odds_ratio": -2.871198557841126e-05, + "logits/chosen": -0.5868136286735535, + "logits/rejected": -0.6236948370933533, + "logps/chosen": -0.00010528555139899254, + "logps/rejected": -2.055727481842041, + "loss": 0.3748, + "nll_loss": 0.09368917346000671, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0528554412303492e-05, + "rewards/margins": 0.20556223392486572, + "rewards/rejected": -0.20557278394699097, + "step": 10819 + }, + { + "epoch": 7.482710926694329, + "grad_norm": 10.466057777404785, + "learning_rate": 1.3984939296142616e-05, + "log_odds_chosen": 11.126243591308594, + "log_odds_ratio": -0.0005590688670054078, + "logits/chosen": -0.436701238155365, + "logits/rejected": -0.4432668387889862, + "logps/chosen": -0.00015593662101309747, + "logps/rejected": -1.8980236053466797, + "loss": 0.591, + "nll_loss": 0.14768701791763306, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5593661373713985e-05, + "rewards/margins": 0.18978677690029144, + "rewards/rejected": -0.1898023635149002, + "step": 10820 + }, + { + "epoch": 7.483402489626556, + "grad_norm": 5.3855671882629395, + "learning_rate": 1.3981097279852468e-05, + "log_odds_chosen": 11.250757217407227, + "log_odds_ratio": -2.490894439688418e-05, + "logits/chosen": -0.3038305640220642, + "logits/rejected": -0.2972073256969452, + "logps/chosen": -0.0006267963908612728, + "logps/rejected": -2.738424301147461, + "loss": 0.5656, + "nll_loss": 0.1413910835981369, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.267963908612728e-05, + "rewards/margins": 0.2737797498703003, + "rewards/rejected": -0.273842453956604, + "step": 10821 + }, + { + "epoch": 7.484094052558783, + "grad_norm": 4.8172526359558105, + "learning_rate": 1.3977255263562319e-05, + "log_odds_chosen": 10.236217498779297, + "log_odds_ratio": -0.00011173608800163493, + "logits/chosen": -0.2512364685535431, + "logits/rejected": -0.2988510727882385, + "logps/chosen": -0.00020601501455530524, + "logps/rejected": -1.6551272869110107, + "loss": 0.4242, + "nll_loss": 0.10604405403137207, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0601502910722047e-05, + "rewards/margins": 0.16549211740493774, + "rewards/rejected": -0.16551271080970764, + "step": 10822 + }, + { + "epoch": 7.48478561549101, + "grad_norm": 11.536404609680176, + "learning_rate": 1.3973413247272168e-05, + "log_odds_chosen": 11.51347827911377, + "log_odds_ratio": -4.374596392153762e-05, + "logits/chosen": 0.03762510418891907, + "logits/rejected": -0.09024792909622192, + "logps/chosen": -0.00028255494544282556, + "logps/rejected": -2.5662331581115723, + "loss": 0.512, + "nll_loss": 0.12798868119716644, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.82554974546656e-05, + "rewards/margins": 0.25659507513046265, + "rewards/rejected": -0.2566233277320862, + "step": 10823 + }, + { + "epoch": 7.485477178423237, + "grad_norm": 3.3092715740203857, + "learning_rate": 1.396957123098202e-05, + "log_odds_chosen": 11.188426971435547, + "log_odds_ratio": -4.85469754494261e-05, + "logits/chosen": -0.43238580226898193, + "logits/rejected": -0.4718017578125, + "logps/chosen": -0.0002774116874206811, + "logps/rejected": -1.945969820022583, + "loss": 0.3066, + "nll_loss": 0.07664395868778229, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.774116728687659e-05, + "rewards/margins": 0.19456923007965088, + "rewards/rejected": -0.1945970058441162, + "step": 10824 + }, + { + "epoch": 7.486168741355463, + "grad_norm": 3.8885912895202637, + "learning_rate": 1.3965729214691873e-05, + "log_odds_chosen": 10.345460891723633, + "log_odds_ratio": -0.00019396745483390987, + "logits/chosen": -0.20144695043563843, + "logits/rejected": -0.27505654096603394, + "logps/chosen": -0.0007674898370169103, + "logps/rejected": -2.2125778198242188, + "loss": 0.3127, + "nll_loss": 0.07814507186412811, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.674898370169103e-05, + "rewards/margins": 0.22118106484413147, + "rewards/rejected": -0.2212577909231186, + "step": 10825 + }, + { + "epoch": 7.48686030428769, + "grad_norm": 5.219362258911133, + "learning_rate": 1.3961887198401722e-05, + "log_odds_chosen": 11.4567289352417, + "log_odds_ratio": -2.5133645976893604e-05, + "logits/chosen": -0.30604687333106995, + "logits/rejected": -0.3041151762008667, + "logps/chosen": -7.92650316725485e-05, + "logps/rejected": -2.015554904937744, + "loss": 0.5405, + "nll_loss": 0.13513174653053284, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.926503712951671e-06, + "rewards/margins": 0.2015475630760193, + "rewards/rejected": -0.20155547559261322, + "step": 10826 + }, + { + "epoch": 7.487551867219917, + "grad_norm": 4.860392093658447, + "learning_rate": 1.3958045182111573e-05, + "log_odds_chosen": 10.375247955322266, + "log_odds_ratio": -8.097510726656765e-05, + "logits/chosen": -0.26754456758499146, + "logits/rejected": -0.21948489546775818, + "logps/chosen": -0.0003514794516377151, + "logps/rejected": -2.076695203781128, + "loss": 0.5953, + "nll_loss": 0.14881163835525513, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5147942980984226e-05, + "rewards/margins": 0.2076343595981598, + "rewards/rejected": -0.20766952633857727, + "step": 10827 + }, + { + "epoch": 7.488243430152144, + "grad_norm": 4.747002124786377, + "learning_rate": 1.3954203165821425e-05, + "log_odds_chosen": 10.324663162231445, + "log_odds_ratio": -0.0005005595157854259, + "logits/chosen": -0.6674417853355408, + "logits/rejected": -0.6481961011886597, + "logps/chosen": -0.0006220329669304192, + "logps/rejected": -1.9338624477386475, + "loss": 0.3806, + "nll_loss": 0.09510613977909088, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.220330396899953e-05, + "rewards/margins": 0.1933240294456482, + "rewards/rejected": -0.1933862417936325, + "step": 10828 + }, + { + "epoch": 7.488934993084371, + "grad_norm": 5.115079879760742, + "learning_rate": 1.3950361149531274e-05, + "log_odds_chosen": 10.410758018493652, + "log_odds_ratio": -0.00015181548951659352, + "logits/chosen": -0.34289151430130005, + "logits/rejected": -0.4306577742099762, + "logps/chosen": -0.00022027691011317074, + "logps/rejected": -1.804958701133728, + "loss": 0.6822, + "nll_loss": 0.17052656412124634, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2027692466508597e-05, + "rewards/margins": 0.18047384917736053, + "rewards/rejected": -0.18049587309360504, + "step": 10829 + }, + { + "epoch": 7.4896265560165975, + "grad_norm": 5.388112545013428, + "learning_rate": 1.3946519133241127e-05, + "log_odds_chosen": 10.932295799255371, + "log_odds_ratio": -0.00010340958397137001, + "logits/chosen": -0.5505223274230957, + "logits/rejected": -0.4584423005580902, + "logps/chosen": -0.0001718951971270144, + "logps/rejected": -1.8500322103500366, + "loss": 0.4229, + "nll_loss": 0.10570985078811646, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.718951898510568e-05, + "rewards/margins": 0.18498602509498596, + "rewards/rejected": -0.18500322103500366, + "step": 10830 + }, + { + "epoch": 7.490318118948824, + "grad_norm": 3.090034246444702, + "learning_rate": 1.3942677116950977e-05, + "log_odds_chosen": 12.040966033935547, + "log_odds_ratio": -1.1882757462444715e-05, + "logits/chosen": -0.24035069346427917, + "logits/rejected": -0.2807389497756958, + "logps/chosen": -0.00018961979367304593, + "logps/rejected": -3.0832576751708984, + "loss": 0.3444, + "nll_loss": 0.08610430359840393, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8961978639708832e-05, + "rewards/margins": 0.30830681324005127, + "rewards/rejected": -0.30832576751708984, + "step": 10831 + }, + { + "epoch": 7.491009681881051, + "grad_norm": 5.554166793823242, + "learning_rate": 1.3938835100660826e-05, + "log_odds_chosen": 11.567726135253906, + "log_odds_ratio": -7.94117950135842e-05, + "logits/chosen": 0.07390864193439484, + "logits/rejected": 0.0015699323266744614, + "logps/chosen": -0.0004241417336743325, + "logps/rejected": -3.301921844482422, + "loss": 0.5749, + "nll_loss": 0.1437089443206787, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2414176277816296e-05, + "rewards/margins": 0.33014976978302, + "rewards/rejected": -0.3301921784877777, + "step": 10832 + }, + { + "epoch": 7.491701244813278, + "grad_norm": 13.624567031860352, + "learning_rate": 1.3934993084370679e-05, + "log_odds_chosen": 11.913568496704102, + "log_odds_ratio": -1.140839322033571e-05, + "logits/chosen": -0.3410708010196686, + "logits/rejected": -0.37177151441574097, + "logps/chosen": -0.00010055984603241086, + "logps/rejected": -2.67598557472229, + "loss": 0.5215, + "nll_loss": 0.13038001954555511, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0055984603241086e-05, + "rewards/margins": 0.2675884962081909, + "rewards/rejected": -0.26759853959083557, + "step": 10833 + }, + { + "epoch": 7.492392807745505, + "grad_norm": 8.092597007751465, + "learning_rate": 1.3931151068080528e-05, + "log_odds_chosen": 10.475318908691406, + "log_odds_ratio": -8.045632421271876e-05, + "logits/chosen": -0.1439984142780304, + "logits/rejected": -0.18829496204853058, + "logps/chosen": -0.00023990156478248537, + "logps/rejected": -1.957043170928955, + "loss": 0.5433, + "nll_loss": 0.13581514358520508, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.399015829723794e-05, + "rewards/margins": 0.19568033516407013, + "rewards/rejected": -0.19570431113243103, + "step": 10834 + }, + { + "epoch": 7.493084370677732, + "grad_norm": 5.48667049407959, + "learning_rate": 1.392730905179038e-05, + "log_odds_chosen": 9.939897537231445, + "log_odds_ratio": -0.0004207099264021963, + "logits/chosen": -0.3046836853027344, + "logits/rejected": -0.29632100462913513, + "logps/chosen": -0.0005699221510440111, + "logps/rejected": -1.9147216081619263, + "loss": 0.4148, + "nll_loss": 0.10365672409534454, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.699221583199687e-05, + "rewards/margins": 0.19141516089439392, + "rewards/rejected": -0.19147217273712158, + "step": 10835 + }, + { + "epoch": 7.4937759336099585, + "grad_norm": 5.311141490936279, + "learning_rate": 1.3923467035500231e-05, + "log_odds_chosen": 11.650004386901855, + "log_odds_ratio": -7.711305806878954e-05, + "logits/chosen": -0.2858215570449829, + "logits/rejected": -0.29272037744522095, + "logps/chosen": -0.00040270035970024765, + "logps/rejected": -3.0382204055786133, + "loss": 0.5492, + "nll_loss": 0.13729213178157806, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0270035242429e-05, + "rewards/margins": 0.30378180742263794, + "rewards/rejected": -0.30382204055786133, + "step": 10836 + }, + { + "epoch": 7.494467496542185, + "grad_norm": 6.042548179626465, + "learning_rate": 1.391962501921008e-05, + "log_odds_chosen": 11.16757583618164, + "log_odds_ratio": -6.1027007177472115e-05, + "logits/chosen": -0.06862695515155792, + "logits/rejected": -0.1478588879108429, + "logps/chosen": -0.00038854603189975023, + "logps/rejected": -2.471419334411621, + "loss": 0.4313, + "nll_loss": 0.10782044380903244, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.885460318997502e-05, + "rewards/margins": 0.24710306525230408, + "rewards/rejected": -0.24714189767837524, + "step": 10837 + }, + { + "epoch": 7.495159059474412, + "grad_norm": 4.193350315093994, + "learning_rate": 1.3915783002919933e-05, + "log_odds_chosen": 11.357162475585938, + "log_odds_ratio": -6.056849815649912e-05, + "logits/chosen": 0.1128070279955864, + "logits/rejected": 0.034058213233947754, + "logps/chosen": -0.00016933982260525227, + "logps/rejected": -2.145360231399536, + "loss": 0.4616, + "nll_loss": 0.11538281291723251, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.693398371571675e-05, + "rewards/margins": 0.21451905369758606, + "rewards/rejected": -0.21453601121902466, + "step": 10838 + }, + { + "epoch": 7.495850622406639, + "grad_norm": 5.410037517547607, + "learning_rate": 1.3911940986629785e-05, + "log_odds_chosen": 11.573707580566406, + "log_odds_ratio": -0.00010516634938539937, + "logits/chosen": -0.06613228470087051, + "logits/rejected": -0.07556043565273285, + "logps/chosen": -0.0012632563011720777, + "logps/rejected": -3.0463085174560547, + "loss": 0.4794, + "nll_loss": 0.1198422759771347, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012632562720682472, + "rewards/margins": 0.30450454354286194, + "rewards/rejected": -0.3046308755874634, + "step": 10839 + }, + { + "epoch": 7.496542185338866, + "grad_norm": 6.1775312423706055, + "learning_rate": 1.3908098970339634e-05, + "log_odds_chosen": 10.662359237670898, + "log_odds_ratio": -0.00016371881065424532, + "logits/chosen": -0.5707687139511108, + "logits/rejected": -0.6249455213546753, + "logps/chosen": -0.0003949836827814579, + "logps/rejected": -2.2343854904174805, + "loss": 0.5969, + "nll_loss": 0.14921541512012482, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.949836900574155e-05, + "rewards/margins": 0.22339904308319092, + "rewards/rejected": -0.2234385758638382, + "step": 10840 + }, + { + "epoch": 7.497233748271093, + "grad_norm": 4.109016418457031, + "learning_rate": 1.3904256954049485e-05, + "log_odds_chosen": 11.424544334411621, + "log_odds_ratio": -9.455587132833898e-05, + "logits/chosen": -0.13782545924186707, + "logits/rejected": -0.13644850254058838, + "logps/chosen": -0.00015705320402048528, + "logps/rejected": -2.8745219707489014, + "loss": 0.4233, + "nll_loss": 0.1058032214641571, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5705320038250647e-05, + "rewards/margins": 0.28743648529052734, + "rewards/rejected": -0.28745219111442566, + "step": 10841 + }, + { + "epoch": 7.4979253112033195, + "grad_norm": 6.663751125335693, + "learning_rate": 1.3900414937759337e-05, + "log_odds_chosen": 11.010114669799805, + "log_odds_ratio": -5.1399500080151483e-05, + "logits/chosen": -0.6667817831039429, + "logits/rejected": -0.705284059047699, + "logps/chosen": -0.00030787562718614936, + "logps/rejected": -2.436732053756714, + "loss": 0.5613, + "nll_loss": 0.1403152048587799, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.078755980823189e-05, + "rewards/margins": 0.2436424046754837, + "rewards/rejected": -0.2436732053756714, + "step": 10842 + }, + { + "epoch": 7.498616874135546, + "grad_norm": 6.209922790527344, + "learning_rate": 1.3896572921469186e-05, + "log_odds_chosen": 10.507733345031738, + "log_odds_ratio": -0.00028826348716393113, + "logits/chosen": 0.04832683503627777, + "logits/rejected": 0.12393030524253845, + "logps/chosen": -0.000690105021931231, + "logps/rejected": -2.3561508655548096, + "loss": 0.5515, + "nll_loss": 0.13785235583782196, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.901050073793158e-05, + "rewards/margins": 0.2355460822582245, + "rewards/rejected": -0.235615074634552, + "step": 10843 + }, + { + "epoch": 7.499308437067773, + "grad_norm": 3.0588796138763428, + "learning_rate": 1.3892730905179039e-05, + "log_odds_chosen": 11.098876953125, + "log_odds_ratio": -4.758929935633205e-05, + "logits/chosen": -0.18588104844093323, + "logits/rejected": -0.24176223576068878, + "logps/chosen": -0.00041968681034632027, + "logps/rejected": -2.5732569694519043, + "loss": 0.346, + "nll_loss": 0.08649633824825287, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.196868394501507e-05, + "rewards/margins": 0.2572837471961975, + "rewards/rejected": -0.2573257088661194, + "step": 10844 + }, + { + "epoch": 7.5, + "grad_norm": 5.975062847137451, + "learning_rate": 1.388888888888889e-05, + "log_odds_chosen": 10.546667098999023, + "log_odds_ratio": -0.00010005592775996774, + "logits/chosen": -0.2629231810569763, + "logits/rejected": -0.28820547461509705, + "logps/chosen": -0.00022396890562959015, + "logps/rejected": -2.047438859939575, + "loss": 0.5123, + "nll_loss": 0.1280604898929596, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2396889107767493e-05, + "rewards/margins": 0.20472149550914764, + "rewards/rejected": -0.204743891954422, + "step": 10845 + }, + { + "epoch": 7.500691562932227, + "grad_norm": 5.633182048797607, + "learning_rate": 1.3885046872598739e-05, + "log_odds_chosen": 11.560811996459961, + "log_odds_ratio": -2.4324574042111635e-05, + "logits/chosen": -0.2091791182756424, + "logits/rejected": -0.3963402211666107, + "logps/chosen": -0.00012010188947897404, + "logps/rejected": -2.2216544151306152, + "loss": 0.4798, + "nll_loss": 0.11993524432182312, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2010188584099524e-05, + "rewards/margins": 0.2221534252166748, + "rewards/rejected": -0.22216545045375824, + "step": 10846 + }, + { + "epoch": 7.501383125864454, + "grad_norm": 4.486021995544434, + "learning_rate": 1.3881204856308591e-05, + "log_odds_chosen": 10.803471565246582, + "log_odds_ratio": -0.0002556285762693733, + "logits/chosen": -0.42428725957870483, + "logits/rejected": -0.4375617802143097, + "logps/chosen": -0.0005502038984559476, + "logps/rejected": -2.2866053581237793, + "loss": 0.6085, + "nll_loss": 0.1520923227071762, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5020391300786287e-05, + "rewards/margins": 0.22860552370548248, + "rewards/rejected": -0.22866055369377136, + "step": 10847 + }, + { + "epoch": 7.5020746887966805, + "grad_norm": 6.069352626800537, + "learning_rate": 1.3877362840018443e-05, + "log_odds_chosen": 11.59472370147705, + "log_odds_ratio": -1.1047510270145722e-05, + "logits/chosen": -0.1648513525724411, + "logits/rejected": -0.3039112389087677, + "logps/chosen": -7.49052778701298e-05, + "logps/rejected": -1.9297611713409424, + "loss": 0.4073, + "nll_loss": 0.10182895511388779, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.490527423215099e-06, + "rewards/margins": 0.19296863675117493, + "rewards/rejected": -0.19297611713409424, + "step": 10848 + }, + { + "epoch": 7.502766251728907, + "grad_norm": 6.025568962097168, + "learning_rate": 1.3873520823728293e-05, + "log_odds_chosen": 10.147893905639648, + "log_odds_ratio": -0.00010455265874043107, + "logits/chosen": -0.4919975996017456, + "logits/rejected": -0.5295048952102661, + "logps/chosen": -0.00031061304616741836, + "logps/rejected": -1.9053452014923096, + "loss": 0.3951, + "nll_loss": 0.09877252578735352, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.106130679952912e-05, + "rewards/margins": 0.19050344824790955, + "rewards/rejected": -0.19053450226783752, + "step": 10849 + }, + { + "epoch": 7.503457814661134, + "grad_norm": 4.921655654907227, + "learning_rate": 1.3869678807438143e-05, + "log_odds_chosen": 10.672442436218262, + "log_odds_ratio": -0.00011154203821206465, + "logits/chosen": -0.2638598382472992, + "logits/rejected": -0.33946990966796875, + "logps/chosen": -0.00035740650491788983, + "logps/rejected": -2.011767864227295, + "loss": 0.5176, + "nll_loss": 0.12939995527267456, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.574065340217203e-05, + "rewards/margins": 0.20114102959632874, + "rewards/rejected": -0.20117677748203278, + "step": 10850 + }, + { + "epoch": 7.504149377593361, + "grad_norm": 6.706781387329102, + "learning_rate": 1.3865836791147996e-05, + "log_odds_chosen": 10.745321273803711, + "log_odds_ratio": -4.268326301826164e-05, + "logits/chosen": -0.3959668278694153, + "logits/rejected": -0.4123864769935608, + "logps/chosen": -0.00017690425738692284, + "logps/rejected": -2.003281354904175, + "loss": 0.5213, + "nll_loss": 0.13032077252864838, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7690425011096522e-05, + "rewards/margins": 0.20031043887138367, + "rewards/rejected": -0.20032814145088196, + "step": 10851 + }, + { + "epoch": 7.504840940525588, + "grad_norm": 4.583414077758789, + "learning_rate": 1.3861994774857845e-05, + "log_odds_chosen": 9.970561027526855, + "log_odds_ratio": -0.0002485551231075078, + "logits/chosen": -0.5081609487533569, + "logits/rejected": -0.5775808095932007, + "logps/chosen": -0.000991704175248742, + "logps/rejected": -1.5721248388290405, + "loss": 0.4789, + "nll_loss": 0.11970222741365433, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.917042189044878e-05, + "rewards/margins": 0.15711332857608795, + "rewards/rejected": -0.1572125107049942, + "step": 10852 + }, + { + "epoch": 7.505532503457815, + "grad_norm": 5.421633720397949, + "learning_rate": 1.3858152758567697e-05, + "log_odds_chosen": 10.60433578491211, + "log_odds_ratio": -4.82356917927973e-05, + "logits/chosen": 0.1491568386554718, + "logits/rejected": 0.11569232493638992, + "logps/chosen": -0.00021837849635630846, + "logps/rejected": -2.0376646518707275, + "loss": 0.4481, + "nll_loss": 0.112018883228302, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1837848180439323e-05, + "rewards/margins": 0.20374462008476257, + "rewards/rejected": -0.20376646518707275, + "step": 10853 + }, + { + "epoch": 7.5062240663900415, + "grad_norm": 4.001258373260498, + "learning_rate": 1.3854310742277548e-05, + "log_odds_chosen": 10.59055233001709, + "log_odds_ratio": -6.709634908474982e-05, + "logits/chosen": -0.5928189158439636, + "logits/rejected": -0.6791818737983704, + "logps/chosen": -0.0002604667388368398, + "logps/rejected": -1.9976965188980103, + "loss": 0.457, + "nll_loss": 0.11424599587917328, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6046673156088218e-05, + "rewards/margins": 0.1997436136007309, + "rewards/rejected": -0.19976966083049774, + "step": 10854 + }, + { + "epoch": 7.506915629322268, + "grad_norm": 4.610876560211182, + "learning_rate": 1.3850468725987397e-05, + "log_odds_chosen": 11.297388076782227, + "log_odds_ratio": -2.201269489887636e-05, + "logits/chosen": -0.5671547055244446, + "logits/rejected": -0.6654840111732483, + "logps/chosen": -0.0001199392499984242, + "logps/rejected": -2.3370468616485596, + "loss": 0.3404, + "nll_loss": 0.08509953320026398, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.199392499984242e-05, + "rewards/margins": 0.2336927056312561, + "rewards/rejected": -0.23370468616485596, + "step": 10855 + }, + { + "epoch": 7.507607192254495, + "grad_norm": 3.907001256942749, + "learning_rate": 1.384662670969725e-05, + "log_odds_chosen": 9.279020309448242, + "log_odds_ratio": -0.00034641113597899675, + "logits/chosen": -0.48437485098838806, + "logits/rejected": -0.5484408140182495, + "logps/chosen": -0.0015225738752633333, + "logps/rejected": -1.8507013320922852, + "loss": 0.4286, + "nll_loss": 0.10712503641843796, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015225740207824856, + "rewards/margins": 0.1849178671836853, + "rewards/rejected": -0.18507012724876404, + "step": 10856 + }, + { + "epoch": 7.508298755186722, + "grad_norm": 9.251437187194824, + "learning_rate": 1.3842784693407102e-05, + "log_odds_chosen": 11.04911994934082, + "log_odds_ratio": -2.3998569304239936e-05, + "logits/chosen": -0.401960551738739, + "logits/rejected": -0.45356813073158264, + "logps/chosen": -0.0001485783141106367, + "logps/rejected": -2.1403989791870117, + "loss": 0.4878, + "nll_loss": 0.12194465100765228, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.485783104726579e-05, + "rewards/margins": 0.21402505040168762, + "rewards/rejected": -0.2140398919582367, + "step": 10857 + }, + { + "epoch": 7.508990318118949, + "grad_norm": 4.904819488525391, + "learning_rate": 1.3838942677116951e-05, + "log_odds_chosen": 10.50926399230957, + "log_odds_ratio": -0.0006038338178768754, + "logits/chosen": -0.7038325071334839, + "logits/rejected": -0.7800592184066772, + "logps/chosen": -0.0006285731215029955, + "logps/rejected": -1.9317022562026978, + "loss": 0.556, + "nll_loss": 0.13892945647239685, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.285731069510803e-05, + "rewards/margins": 0.19310736656188965, + "rewards/rejected": -0.1931702196598053, + "step": 10858 + }, + { + "epoch": 7.509681881051176, + "grad_norm": 5.4442219734191895, + "learning_rate": 1.3835100660826802e-05, + "log_odds_chosen": 11.821813583374023, + "log_odds_ratio": -0.00018751317111309618, + "logits/chosen": -0.22810126841068268, + "logits/rejected": -0.2429996132850647, + "logps/chosen": -0.00044313608668744564, + "logps/rejected": -2.8103132247924805, + "loss": 0.5328, + "nll_loss": 0.13319182395935059, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.431361230672337e-05, + "rewards/margins": 0.280987024307251, + "rewards/rejected": -0.2810313105583191, + "step": 10859 + }, + { + "epoch": 7.5103734439834025, + "grad_norm": 7.400333881378174, + "learning_rate": 1.3831258644536654e-05, + "log_odds_chosen": 11.880918502807617, + "log_odds_ratio": -1.1990883649559692e-05, + "logits/chosen": -0.29229143261909485, + "logits/rejected": -0.37813225388526917, + "logps/chosen": -0.00014303525676950812, + "logps/rejected": -2.716463088989258, + "loss": 0.4499, + "nll_loss": 0.11248502880334854, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4303526768344454e-05, + "rewards/margins": 0.27163201570510864, + "rewards/rejected": -0.27164632081985474, + "step": 10860 + }, + { + "epoch": 7.511065006915629, + "grad_norm": 4.424728870391846, + "learning_rate": 1.3827416628246503e-05, + "log_odds_chosen": 11.932880401611328, + "log_odds_ratio": -2.0810561181860976e-05, + "logits/chosen": -0.48462021350860596, + "logits/rejected": -0.565332293510437, + "logps/chosen": -0.00025298818945884705, + "logps/rejected": -2.961071729660034, + "loss": 0.3484, + "nll_loss": 0.08709091693162918, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5298817490693182e-05, + "rewards/margins": 0.29608190059661865, + "rewards/rejected": -0.2961071729660034, + "step": 10861 + }, + { + "epoch": 7.511756569847856, + "grad_norm": 13.965356826782227, + "learning_rate": 1.3823574611956356e-05, + "log_odds_chosen": 11.458879470825195, + "log_odds_ratio": -5.086950841359794e-05, + "logits/chosen": -0.34427160024642944, + "logits/rejected": -0.39023107290267944, + "logps/chosen": -0.00023432400485035032, + "logps/rejected": -2.773988962173462, + "loss": 0.4941, + "nll_loss": 0.12353166192770004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3432401576428674e-05, + "rewards/margins": 0.2773754596710205, + "rewards/rejected": -0.2773989140987396, + "step": 10862 + }, + { + "epoch": 7.512448132780083, + "grad_norm": 4.900034427642822, + "learning_rate": 1.3819732595666206e-05, + "log_odds_chosen": 10.443470001220703, + "log_odds_ratio": -7.572891627205536e-05, + "logits/chosen": -0.350027859210968, + "logits/rejected": -0.39866653084754944, + "logps/chosen": -0.00017401771037839353, + "logps/rejected": -1.6754381656646729, + "loss": 0.4745, + "nll_loss": 0.11862252652645111, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.740176958264783e-05, + "rewards/margins": 0.16752642393112183, + "rewards/rejected": -0.16754382848739624, + "step": 10863 + }, + { + "epoch": 7.51313969571231, + "grad_norm": 5.489154815673828, + "learning_rate": 1.3815890579376055e-05, + "log_odds_chosen": 9.630025863647461, + "log_odds_ratio": -0.0019220231333747506, + "logits/chosen": -0.5830115079879761, + "logits/rejected": -0.6449246406555176, + "logps/chosen": -0.013586835004389286, + "logps/rejected": -1.557092308998108, + "loss": 0.4387, + "nll_loss": 0.1094917505979538, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013586835702881217, + "rewards/margins": 0.15435056388378143, + "rewards/rejected": -0.15570923686027527, + "step": 10864 + }, + { + "epoch": 7.513831258644537, + "grad_norm": 5.110268592834473, + "learning_rate": 1.3812048563085908e-05, + "log_odds_chosen": 11.101775169372559, + "log_odds_ratio": -2.5323784939246252e-05, + "logits/chosen": -0.5525895357131958, + "logits/rejected": -0.4849681258201599, + "logps/chosen": -0.0002422073157504201, + "logps/rejected": -2.35410213470459, + "loss": 0.4673, + "nll_loss": 0.11682500690221786, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.422073157504201e-05, + "rewards/margins": 0.2353859841823578, + "rewards/rejected": -0.23541021347045898, + "step": 10865 + }, + { + "epoch": 7.514522821576763, + "grad_norm": 4.0105109214782715, + "learning_rate": 1.380820654679576e-05, + "log_odds_chosen": 10.789510726928711, + "log_odds_ratio": -4.4687138142762706e-05, + "logits/chosen": -0.22874239087104797, + "logits/rejected": -0.27216294407844543, + "logps/chosen": -0.00013487483374774456, + "logps/rejected": -1.885083556175232, + "loss": 0.5365, + "nll_loss": 0.13411131501197815, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3487482647178695e-05, + "rewards/margins": 0.18849486112594604, + "rewards/rejected": -0.18850834667682648, + "step": 10866 + }, + { + "epoch": 7.51521438450899, + "grad_norm": 8.645621299743652, + "learning_rate": 1.380436453050561e-05, + "log_odds_chosen": 10.929075241088867, + "log_odds_ratio": -2.9114125936757773e-05, + "logits/chosen": -0.23884816467761993, + "logits/rejected": -0.2688317894935608, + "logps/chosen": -0.0002449355670250952, + "logps/rejected": -2.4290623664855957, + "loss": 0.5502, + "nll_loss": 0.13755536079406738, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4493558157701045e-05, + "rewards/margins": 0.24288174510002136, + "rewards/rejected": -0.24290621280670166, + "step": 10867 + }, + { + "epoch": 7.515905947441217, + "grad_norm": 3.153947591781616, + "learning_rate": 1.380052251421546e-05, + "log_odds_chosen": 9.717711448669434, + "log_odds_ratio": -0.0009161827620118856, + "logits/chosen": -0.1779230833053589, + "logits/rejected": -0.19890283048152924, + "logps/chosen": -0.0008492513443343341, + "logps/rejected": -2.064303159713745, + "loss": 0.3649, + "nll_loss": 0.09113053977489471, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.492513734381646e-05, + "rewards/margins": 0.20634539425373077, + "rewards/rejected": -0.2064303308725357, + "step": 10868 + }, + { + "epoch": 7.516597510373444, + "grad_norm": 4.896413803100586, + "learning_rate": 1.3796680497925313e-05, + "log_odds_chosen": 10.881827354431152, + "log_odds_ratio": -8.696097938809544e-05, + "logits/chosen": -0.43188685178756714, + "logits/rejected": -0.6426720023155212, + "logps/chosen": -0.00026656663976609707, + "logps/rejected": -2.365948438644409, + "loss": 0.5023, + "nll_loss": 0.12557311356067657, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.665666579559911e-05, + "rewards/margins": 0.23656819760799408, + "rewards/rejected": -0.23659485578536987, + "step": 10869 + }, + { + "epoch": 7.517289073305671, + "grad_norm": 4.01028299331665, + "learning_rate": 1.3792838481635162e-05, + "log_odds_chosen": 10.271018981933594, + "log_odds_ratio": -0.0006215503090061247, + "logits/chosen": -0.5212066173553467, + "logits/rejected": -0.5539983510971069, + "logps/chosen": -0.0008918846724554896, + "logps/rejected": -2.137946605682373, + "loss": 0.3695, + "nll_loss": 0.09231384098529816, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.918847015593201e-05, + "rewards/margins": 0.21370546519756317, + "rewards/rejected": -0.21379466354846954, + "step": 10870 + }, + { + "epoch": 7.517980636237898, + "grad_norm": 4.348485469818115, + "learning_rate": 1.3788996465345014e-05, + "log_odds_chosen": 10.320483207702637, + "log_odds_ratio": -0.0003907751524820924, + "logits/chosen": -0.7220146656036377, + "logits/rejected": -0.757490336894989, + "logps/chosen": -0.0001988973090192303, + "logps/rejected": -1.8074790239334106, + "loss": 0.454, + "nll_loss": 0.11345857381820679, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9889732357114553e-05, + "rewards/margins": 0.180728018283844, + "rewards/rejected": -0.18074792623519897, + "step": 10871 + }, + { + "epoch": 7.518672199170124, + "grad_norm": 5.348752975463867, + "learning_rate": 1.3785154449054865e-05, + "log_odds_chosen": 10.597746849060059, + "log_odds_ratio": -0.00013086672697681934, + "logits/chosen": -0.4913485646247864, + "logits/rejected": -0.5052577257156372, + "logps/chosen": -0.000320828752592206, + "logps/rejected": -1.856242060661316, + "loss": 0.4562, + "nll_loss": 0.11404484510421753, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.208287671441212e-05, + "rewards/margins": 0.18559211492538452, + "rewards/rejected": -0.18562419712543488, + "step": 10872 + }, + { + "epoch": 7.519363762102351, + "grad_norm": 6.101108551025391, + "learning_rate": 1.3781312432764714e-05, + "log_odds_chosen": 11.56706428527832, + "log_odds_ratio": -1.9296106984256767e-05, + "logits/chosen": -0.42387184500694275, + "logits/rejected": -0.4706951975822449, + "logps/chosen": -0.00025341319269500673, + "logps/rejected": -2.820647716522217, + "loss": 0.4645, + "nll_loss": 0.11612551659345627, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.534131817810703e-05, + "rewards/margins": 0.28203946352005005, + "rewards/rejected": -0.2820647954940796, + "step": 10873 + }, + { + "epoch": 7.520055325034578, + "grad_norm": 4.491504192352295, + "learning_rate": 1.3777470416474566e-05, + "log_odds_chosen": 11.704853057861328, + "log_odds_ratio": -6.659721111645922e-05, + "logits/chosen": -0.7244716286659241, + "logits/rejected": -0.687706470489502, + "logps/chosen": -0.00029626733157783747, + "logps/rejected": -2.5468931198120117, + "loss": 0.4924, + "nll_loss": 0.1230815127491951, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9626738978549838e-05, + "rewards/margins": 0.2546597123146057, + "rewards/rejected": -0.2546893060207367, + "step": 10874 + }, + { + "epoch": 7.520746887966805, + "grad_norm": 6.189776420593262, + "learning_rate": 1.3773628400184419e-05, + "log_odds_chosen": 9.723426818847656, + "log_odds_ratio": -0.00011284545325906947, + "logits/chosen": 0.09950869530439377, + "logits/rejected": 0.02981482446193695, + "logps/chosen": -0.0002519416739232838, + "logps/rejected": -1.3983118534088135, + "loss": 0.4422, + "nll_loss": 0.1105465441942215, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5194169211317785e-05, + "rewards/margins": 0.13980598747730255, + "rewards/rejected": -0.13983118534088135, + "step": 10875 + }, + { + "epoch": 7.521438450899032, + "grad_norm": 4.315496921539307, + "learning_rate": 1.3769786383894268e-05, + "log_odds_chosen": 9.566936492919922, + "log_odds_ratio": -0.00027100040460936725, + "logits/chosen": -0.5626333951950073, + "logits/rejected": -0.5699162483215332, + "logps/chosen": -0.00043434806866571307, + "logps/rejected": -1.7643786668777466, + "loss": 0.4788, + "nll_loss": 0.11968132853507996, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3434811232145876e-05, + "rewards/margins": 0.17639443278312683, + "rewards/rejected": -0.1764378547668457, + "step": 10876 + }, + { + "epoch": 7.522130013831259, + "grad_norm": 5.777177333831787, + "learning_rate": 1.3765944367604119e-05, + "log_odds_chosen": 11.527708053588867, + "log_odds_ratio": -4.7561734390910715e-05, + "logits/chosen": -0.4485383629798889, + "logits/rejected": -0.49741944670677185, + "logps/chosen": -0.0002845055714715272, + "logps/rejected": -3.065023899078369, + "loss": 0.486, + "nll_loss": 0.12149247527122498, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.845055678335484e-05, + "rewards/margins": 0.3064739406108856, + "rewards/rejected": -0.30650240182876587, + "step": 10877 + }, + { + "epoch": 7.522821576763485, + "grad_norm": 7.339626789093018, + "learning_rate": 1.3762102351313971e-05, + "log_odds_chosen": 12.749917984008789, + "log_odds_ratio": -5.649513695971109e-06, + "logits/chosen": -0.6558374166488647, + "logits/rejected": -0.6490030288696289, + "logps/chosen": -9.840505663305521e-05, + "logps/rejected": -3.400402069091797, + "loss": 0.3943, + "nll_loss": 0.09857062995433807, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.840506209002342e-06, + "rewards/margins": 0.34003040194511414, + "rewards/rejected": -0.3400402367115021, + "step": 10878 + }, + { + "epoch": 7.523513139695712, + "grad_norm": 3.726856231689453, + "learning_rate": 1.375826033502382e-05, + "log_odds_chosen": 11.200907707214355, + "log_odds_ratio": -6.0507183661684394e-05, + "logits/chosen": 0.2244858741760254, + "logits/rejected": 0.21234449744224548, + "logps/chosen": -0.00015088150394149125, + "logps/rejected": -2.1337900161743164, + "loss": 0.4039, + "nll_loss": 0.10097833722829819, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5088150576048065e-05, + "rewards/margins": 0.213363915681839, + "rewards/rejected": -0.21337899565696716, + "step": 10879 + }, + { + "epoch": 7.524204702627939, + "grad_norm": 4.425390720367432, + "learning_rate": 1.3754418318733673e-05, + "log_odds_chosen": 10.128186225891113, + "log_odds_ratio": -0.00025991900474764407, + "logits/chosen": -0.5495592355728149, + "logits/rejected": -0.6602213978767395, + "logps/chosen": -0.0003403293085284531, + "logps/rejected": -1.90929114818573, + "loss": 0.4687, + "nll_loss": 0.11716103553771973, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.403293521841988e-05, + "rewards/margins": 0.19089508056640625, + "rewards/rejected": -0.190929114818573, + "step": 10880 + }, + { + "epoch": 7.524896265560166, + "grad_norm": 9.592628479003906, + "learning_rate": 1.3750576302443523e-05, + "log_odds_chosen": 10.581683158874512, + "log_odds_ratio": -9.209234121954069e-05, + "logits/chosen": -0.42138999700546265, + "logits/rejected": -0.36162620782852173, + "logps/chosen": -0.00041084305848926306, + "logps/rejected": -2.096816301345825, + "loss": 0.8981, + "nll_loss": 0.22452345490455627, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.108430948690511e-05, + "rewards/margins": 0.2096405327320099, + "rewards/rejected": -0.20968163013458252, + "step": 10881 + }, + { + "epoch": 7.525587828492393, + "grad_norm": 5.487520217895508, + "learning_rate": 1.3746734286153374e-05, + "log_odds_chosen": 9.746979713439941, + "log_odds_ratio": -0.0008402118692174554, + "logits/chosen": 0.11258521676063538, + "logits/rejected": -0.005723059177398682, + "logps/chosen": -0.0005987212061882019, + "logps/rejected": -1.7435500621795654, + "loss": 0.432, + "nll_loss": 0.10791030526161194, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.987211989122443e-05, + "rewards/margins": 0.17429512739181519, + "rewards/rejected": -0.17435501515865326, + "step": 10882 + }, + { + "epoch": 7.5262793914246195, + "grad_norm": 3.8324203491210938, + "learning_rate": 1.3742892269863225e-05, + "log_odds_chosen": 11.629316329956055, + "log_odds_ratio": -2.1213923901086673e-05, + "logits/chosen": 0.08265161514282227, + "logits/rejected": -0.0059318579733371735, + "logps/chosen": -0.00020495994249358773, + "logps/rejected": -2.784921169281006, + "loss": 0.4584, + "nll_loss": 0.11459926515817642, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0495994249358773e-05, + "rewards/margins": 0.2784716486930847, + "rewards/rejected": -0.27849212288856506, + "step": 10883 + }, + { + "epoch": 7.526970954356846, + "grad_norm": 4.976540565490723, + "learning_rate": 1.3739050253573077e-05, + "log_odds_chosen": 10.63410758972168, + "log_odds_ratio": -0.00015587112284265459, + "logits/chosen": -0.23880663514137268, + "logits/rejected": -0.28580334782600403, + "logps/chosen": -0.0003356942906975746, + "logps/rejected": -2.1784019470214844, + "loss": 0.5595, + "nll_loss": 0.1398695409297943, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.356942761456594e-05, + "rewards/margins": 0.2178066074848175, + "rewards/rejected": -0.21784019470214844, + "step": 10884 + }, + { + "epoch": 7.527662517289073, + "grad_norm": 5.252691745758057, + "learning_rate": 1.3735208237282926e-05, + "log_odds_chosen": 10.884276390075684, + "log_odds_ratio": -0.00032903405372053385, + "logits/chosen": -0.4288550019264221, + "logits/rejected": -0.4972131550312042, + "logps/chosen": -0.00042299655615352094, + "logps/rejected": -2.275272846221924, + "loss": 0.3326, + "nll_loss": 0.0831230953335762, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.229965998092666e-05, + "rewards/margins": 0.22748498618602753, + "rewards/rejected": -0.22752729058265686, + "step": 10885 + }, + { + "epoch": 7.5283540802213, + "grad_norm": 4.7539896965026855, + "learning_rate": 1.3731366220992779e-05, + "log_odds_chosen": 12.380012512207031, + "log_odds_ratio": -1.0317680789739825e-05, + "logits/chosen": -0.6371591091156006, + "logits/rejected": -0.5750231742858887, + "logps/chosen": -0.00010312439553672448, + "logps/rejected": -2.810936450958252, + "loss": 0.5902, + "nll_loss": 0.14755801856517792, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0312439371773507e-05, + "rewards/margins": 0.281083345413208, + "rewards/rejected": -0.28109365701675415, + "step": 10886 + }, + { + "epoch": 7.529045643153527, + "grad_norm": 4.949764251708984, + "learning_rate": 1.372752420470263e-05, + "log_odds_chosen": 9.83175277709961, + "log_odds_ratio": -8.935166988521814e-05, + "logits/chosen": -0.7508769631385803, + "logits/rejected": -0.7804704904556274, + "logps/chosen": -0.0005445395363494754, + "logps/rejected": -2.1102652549743652, + "loss": 0.4111, + "nll_loss": 0.10276912897825241, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.445395072456449e-05, + "rewards/margins": 0.21097204089164734, + "rewards/rejected": -0.21102651953697205, + "step": 10887 + }, + { + "epoch": 7.529737206085754, + "grad_norm": 8.927925109863281, + "learning_rate": 1.3723682188412479e-05, + "log_odds_chosen": 11.731058120727539, + "log_odds_ratio": -2.9501774406526238e-05, + "logits/chosen": -0.3342224657535553, + "logits/rejected": -0.4032493829727173, + "logps/chosen": -0.0001932134327944368, + "logps/rejected": -2.837803363800049, + "loss": 0.5749, + "nll_loss": 0.14372438192367554, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.932134182425216e-05, + "rewards/margins": 0.28376102447509766, + "rewards/rejected": -0.2837803363800049, + "step": 10888 + }, + { + "epoch": 7.5304287690179805, + "grad_norm": 4.814039707183838, + "learning_rate": 1.3719840172122331e-05, + "log_odds_chosen": 11.000864028930664, + "log_odds_ratio": -3.4564662200864404e-05, + "logits/chosen": -0.37925636768341064, + "logits/rejected": -0.5058153867721558, + "logps/chosen": -0.00013589671289082617, + "logps/rejected": -1.8084535598754883, + "loss": 0.3503, + "nll_loss": 0.08757337927818298, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3589671652880497e-05, + "rewards/margins": 0.18083176016807556, + "rewards/rejected": -0.18084535002708435, + "step": 10889 + }, + { + "epoch": 7.531120331950207, + "grad_norm": 6.472630500793457, + "learning_rate": 1.3715998155832183e-05, + "log_odds_chosen": 11.88841438293457, + "log_odds_ratio": -1.0142521205125377e-05, + "logits/chosen": -0.33784234523773193, + "logits/rejected": -0.48194387555122375, + "logps/chosen": -0.00014380461652763188, + "logps/rejected": -2.7480721473693848, + "loss": 0.4279, + "nll_loss": 0.10696800798177719, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4380460925167426e-05, + "rewards/margins": 0.2747928500175476, + "rewards/rejected": -0.2748072147369385, + "step": 10890 + }, + { + "epoch": 7.531811894882434, + "grad_norm": 3.6994967460632324, + "learning_rate": 1.3712156139542032e-05, + "log_odds_chosen": 11.189812660217285, + "log_odds_ratio": -5.345175668480806e-05, + "logits/chosen": -0.1665852963924408, + "logits/rejected": -0.2894322872161865, + "logps/chosen": -0.0002716692688409239, + "logps/rejected": -2.516805648803711, + "loss": 0.9048, + "nll_loss": 0.22619935870170593, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.716692688409239e-05, + "rewards/margins": 0.25165340304374695, + "rewards/rejected": -0.25168055295944214, + "step": 10891 + }, + { + "epoch": 7.532503457814661, + "grad_norm": 5.318807125091553, + "learning_rate": 1.3708314123251883e-05, + "log_odds_chosen": 10.00977897644043, + "log_odds_ratio": -0.00037104147486388683, + "logits/chosen": -0.8708664178848267, + "logits/rejected": -0.8931385278701782, + "logps/chosen": -0.00028637779178097844, + "logps/rejected": -1.5222827196121216, + "loss": 0.6229, + "nll_loss": 0.155700221657753, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.86377762677148e-05, + "rewards/margins": 0.1521996408700943, + "rewards/rejected": -0.15222826600074768, + "step": 10892 + }, + { + "epoch": 7.533195020746888, + "grad_norm": 5.508212089538574, + "learning_rate": 1.3704472106961736e-05, + "log_odds_chosen": 10.706792831420898, + "log_odds_ratio": -4.5154974941397086e-05, + "logits/chosen": -0.543492317199707, + "logits/rejected": -0.5513139367103577, + "logps/chosen": -0.00043358042603358626, + "logps/rejected": -1.8944940567016602, + "loss": 0.5116, + "nll_loss": 0.1279076486825943, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3358046241337433e-05, + "rewards/margins": 0.18940606713294983, + "rewards/rejected": -0.18944941461086273, + "step": 10893 + }, + { + "epoch": 7.533886583679115, + "grad_norm": 4.529955863952637, + "learning_rate": 1.3700630090671585e-05, + "log_odds_chosen": 11.382387161254883, + "log_odds_ratio": -3.067726720473729e-05, + "logits/chosen": -0.7753385305404663, + "logits/rejected": -0.8173359632492065, + "logps/chosen": -8.34188685985282e-05, + "logps/rejected": -2.0420587062835693, + "loss": 0.4156, + "nll_loss": 0.10389338433742523, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.341887223650701e-06, + "rewards/margins": 0.20419752597808838, + "rewards/rejected": -0.20420587062835693, + "step": 10894 + }, + { + "epoch": 7.5345781466113415, + "grad_norm": 4.1907854080200195, + "learning_rate": 1.3696788074381437e-05, + "log_odds_chosen": 9.962699890136719, + "log_odds_ratio": -8.843952673487365e-05, + "logits/chosen": -0.5206528902053833, + "logits/rejected": -0.6122863292694092, + "logps/chosen": -0.0005318694747984409, + "logps/rejected": -1.814341425895691, + "loss": 0.4367, + "nll_loss": 0.10916710644960403, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.318695184541866e-05, + "rewards/margins": 0.181380957365036, + "rewards/rejected": -0.18143412470817566, + "step": 10895 + }, + { + "epoch": 7.535269709543568, + "grad_norm": 6.572483062744141, + "learning_rate": 1.3692946058091288e-05, + "log_odds_chosen": 10.755268096923828, + "log_odds_ratio": -6.7830944317393e-05, + "logits/chosen": -0.4748067855834961, + "logits/rejected": -0.5554353594779968, + "logps/chosen": -0.0005805790424346924, + "logps/rejected": -2.811382293701172, + "loss": 0.4749, + "nll_loss": 0.11872723698616028, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.805790351587348e-05, + "rewards/margins": 0.2810802161693573, + "rewards/rejected": -0.28113824129104614, + "step": 10896 + }, + { + "epoch": 7.535961272475795, + "grad_norm": 4.007783889770508, + "learning_rate": 1.3689104041801137e-05, + "log_odds_chosen": 11.30660343170166, + "log_odds_ratio": -0.0003542294434737414, + "logits/chosen": -0.5386285185813904, + "logits/rejected": -0.5860005021095276, + "logps/chosen": -9.375077934237197e-05, + "logps/rejected": -2.324409008026123, + "loss": 0.575, + "nll_loss": 0.14370985329151154, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.375078661832958e-06, + "rewards/margins": 0.2324315309524536, + "rewards/rejected": -0.23244090378284454, + "step": 10897 + }, + { + "epoch": 7.536652835408022, + "grad_norm": 5.47639274597168, + "learning_rate": 1.368526202551099e-05, + "log_odds_chosen": 11.627883911132812, + "log_odds_ratio": -2.6631163564161398e-05, + "logits/chosen": -0.3915488123893738, + "logits/rejected": -0.4371181130409241, + "logps/chosen": -0.00011329659901093692, + "logps/rejected": -2.546294927597046, + "loss": 0.298, + "nll_loss": 0.07449459284543991, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1329660082992632e-05, + "rewards/margins": 0.25461816787719727, + "rewards/rejected": -0.2546294927597046, + "step": 10898 + }, + { + "epoch": 7.537344398340249, + "grad_norm": 13.84184741973877, + "learning_rate": 1.3681420009220842e-05, + "log_odds_chosen": 10.913511276245117, + "log_odds_ratio": -3.6600089515559375e-05, + "logits/chosen": -0.50941401720047, + "logits/rejected": -0.592617392539978, + "logps/chosen": -0.00012552604312077165, + "logps/rejected": -2.05088210105896, + "loss": 0.4461, + "nll_loss": 0.11151237785816193, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2552604857773986e-05, + "rewards/margins": 0.2050756812095642, + "rewards/rejected": -0.20508822798728943, + "step": 10899 + }, + { + "epoch": 7.538035961272476, + "grad_norm": 8.043490409851074, + "learning_rate": 1.3677577992930691e-05, + "log_odds_chosen": 10.945327758789062, + "log_odds_ratio": -4.287495539756492e-05, + "logits/chosen": -0.2591250538825989, + "logits/rejected": -0.3053608536720276, + "logps/chosen": -0.0002915628720074892, + "logps/rejected": -2.4187121391296387, + "loss": 0.3408, + "nll_loss": 0.08519420772790909, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9156288292142563e-05, + "rewards/margins": 0.24184203147888184, + "rewards/rejected": -0.241871178150177, + "step": 10900 + }, + { + "epoch": 7.5387275242047025, + "grad_norm": 6.757442951202393, + "learning_rate": 1.3673735976640542e-05, + "log_odds_chosen": 10.361787796020508, + "log_odds_ratio": -9.385471639689058e-05, + "logits/chosen": -0.5704642534255981, + "logits/rejected": -0.6144750118255615, + "logps/chosen": -0.00023506373690906912, + "logps/rejected": -1.5697286128997803, + "loss": 0.4696, + "nll_loss": 0.11740292608737946, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3506374418502674e-05, + "rewards/margins": 0.15694934129714966, + "rewards/rejected": -0.15697285532951355, + "step": 10901 + }, + { + "epoch": 7.539419087136929, + "grad_norm": 4.761688232421875, + "learning_rate": 1.366989396035039e-05, + "log_odds_chosen": 11.750556945800781, + "log_odds_ratio": -1.5190888007055037e-05, + "logits/chosen": -0.6291791796684265, + "logits/rejected": -0.6748467683792114, + "logps/chosen": -8.288262324640527e-05, + "logps/rejected": -2.036034107208252, + "loss": 0.399, + "nll_loss": 0.09974059462547302, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.288262506539468e-06, + "rewards/margins": 0.2035951316356659, + "rewards/rejected": -0.20360340178012848, + "step": 10902 + }, + { + "epoch": 7.540110650069156, + "grad_norm": 4.356700897216797, + "learning_rate": 1.3666051944060243e-05, + "log_odds_chosen": 9.920604705810547, + "log_odds_ratio": -0.00011185869516339153, + "logits/chosen": -0.24763020873069763, + "logits/rejected": -0.22162744402885437, + "logps/chosen": -0.00019569398136809468, + "logps/rejected": -1.5281915664672852, + "loss": 0.4347, + "nll_loss": 0.10866691172122955, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9569397409213707e-05, + "rewards/margins": 0.152799591422081, + "rewards/rejected": -0.1528191715478897, + "step": 10903 + }, + { + "epoch": 7.540802213001383, + "grad_norm": 5.018890380859375, + "learning_rate": 1.3662209927770096e-05, + "log_odds_chosen": 11.4712553024292, + "log_odds_ratio": -1.9771054212469608e-05, + "logits/chosen": -0.3403857350349426, + "logits/rejected": -0.3579222559928894, + "logps/chosen": -0.00012209927081130445, + "logps/rejected": -2.3484385013580322, + "loss": 0.869, + "nll_loss": 0.2172483205795288, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2209927263029385e-05, + "rewards/margins": 0.23483163118362427, + "rewards/rejected": -0.23484385013580322, + "step": 10904 + }, + { + "epoch": 7.54149377593361, + "grad_norm": 4.402528762817383, + "learning_rate": 1.3658367911479945e-05, + "log_odds_chosen": 11.565021514892578, + "log_odds_ratio": -3.308330997242592e-05, + "logits/chosen": -0.5835554003715515, + "logits/rejected": -0.7503261566162109, + "logps/chosen": -0.0005653960397467017, + "logps/rejected": -3.1881885528564453, + "loss": 0.6669, + "nll_loss": 0.1667259931564331, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.653960397467017e-05, + "rewards/margins": 0.31876230239868164, + "rewards/rejected": -0.3188188672065735, + "step": 10905 + }, + { + "epoch": 7.542185338865837, + "grad_norm": 5.141797065734863, + "learning_rate": 1.3654525895189795e-05, + "log_odds_chosen": 10.716361999511719, + "log_odds_ratio": -0.0001025652454700321, + "logits/chosen": -0.10361681878566742, + "logits/rejected": -0.13290606439113617, + "logps/chosen": -0.000345752079738304, + "logps/rejected": -2.291757822036743, + "loss": 0.6552, + "nll_loss": 0.16379579901695251, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.457520870142616e-05, + "rewards/margins": 0.2291412204504013, + "rewards/rejected": -0.22917580604553223, + "step": 10906 + }, + { + "epoch": 7.5428769017980635, + "grad_norm": 5.401771068572998, + "learning_rate": 1.3650683878899648e-05, + "log_odds_chosen": 11.184661865234375, + "log_odds_ratio": -0.00032188615296036005, + "logits/chosen": -0.28486594557762146, + "logits/rejected": -0.28327369689941406, + "logps/chosen": -0.0005774404271505773, + "logps/rejected": -2.8740031719207764, + "loss": 0.5407, + "nll_loss": 0.1351354569196701, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.774404780822806e-05, + "rewards/margins": 0.2873425781726837, + "rewards/rejected": -0.2874003052711487, + "step": 10907 + }, + { + "epoch": 7.54356846473029, + "grad_norm": 5.230515956878662, + "learning_rate": 1.3646841862609497e-05, + "log_odds_chosen": 10.53951358795166, + "log_odds_ratio": -0.00010120034858118743, + "logits/chosen": 0.010187406092882156, + "logits/rejected": -0.06117922440171242, + "logps/chosen": -0.00033147024805657566, + "logps/rejected": -2.029526472091675, + "loss": 0.519, + "nll_loss": 0.12973934412002563, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.314702189527452e-05, + "rewards/margins": 0.20291949808597565, + "rewards/rejected": -0.20295265316963196, + "step": 10908 + }, + { + "epoch": 7.544260027662517, + "grad_norm": 7.434238433837891, + "learning_rate": 1.364299984631935e-05, + "log_odds_chosen": 11.641447067260742, + "log_odds_ratio": -3.3890286431415007e-05, + "logits/chosen": -0.2585147023200989, + "logits/rejected": -0.19221074879169464, + "logps/chosen": -0.000364738138159737, + "logps/rejected": -3.0308213233947754, + "loss": 0.9996, + "nll_loss": 0.24988438189029694, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.647381527116522e-05, + "rewards/margins": 0.3030456304550171, + "rewards/rejected": -0.30308210849761963, + "step": 10909 + }, + { + "epoch": 7.544951590594744, + "grad_norm": 11.842061996459961, + "learning_rate": 1.36391578300292e-05, + "log_odds_chosen": 10.118453025817871, + "log_odds_ratio": -0.0001638751127757132, + "logits/chosen": -0.2586163580417633, + "logits/rejected": -0.3308694362640381, + "logps/chosen": -0.0005985454190522432, + "logps/rejected": -1.85396146774292, + "loss": 0.4477, + "nll_loss": 0.11192052066326141, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.985454481560737e-05, + "rewards/margins": 0.18533629179000854, + "rewards/rejected": -0.18539613485336304, + "step": 10910 + }, + { + "epoch": 7.545643153526971, + "grad_norm": 4.961551189422607, + "learning_rate": 1.363531581373905e-05, + "log_odds_chosen": 10.454790115356445, + "log_odds_ratio": -0.00013295510143507272, + "logits/chosen": -0.42641448974609375, + "logits/rejected": -0.42252567410469055, + "logps/chosen": -0.00023922814580146223, + "logps/rejected": -1.8349452018737793, + "loss": 0.3928, + "nll_loss": 0.0981857031583786, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3922815671539865e-05, + "rewards/margins": 0.18347060680389404, + "rewards/rejected": -0.18349452316761017, + "step": 10911 + }, + { + "epoch": 7.546334716459198, + "grad_norm": 3.9336705207824707, + "learning_rate": 1.3631473797448902e-05, + "log_odds_chosen": 10.669046401977539, + "log_odds_ratio": -7.399608148261905e-05, + "logits/chosen": -0.16355778276920319, + "logits/rejected": -0.23712387681007385, + "logps/chosen": -0.0004548661527223885, + "logps/rejected": -2.373354911804199, + "loss": 0.4048, + "nll_loss": 0.10118485987186432, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5486616727430373e-05, + "rewards/margins": 0.23728998005390167, + "rewards/rejected": -0.2373354732990265, + "step": 10912 + }, + { + "epoch": 7.5470262793914245, + "grad_norm": 3.3881542682647705, + "learning_rate": 1.3627631781158754e-05, + "log_odds_chosen": 9.861526489257812, + "log_odds_ratio": -0.0007383470074273646, + "logits/chosen": -0.34626615047454834, + "logits/rejected": -0.34497809410095215, + "logps/chosen": -0.0006527705118060112, + "logps/rejected": -1.7543696165084839, + "loss": 0.3492, + "nll_loss": 0.08722208440303802, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.527705409098417e-05, + "rewards/margins": 0.1753716766834259, + "rewards/rejected": -0.17543694376945496, + "step": 10913 + }, + { + "epoch": 7.547717842323651, + "grad_norm": 3.727787971496582, + "learning_rate": 1.3623789764868603e-05, + "log_odds_chosen": 9.567840576171875, + "log_odds_ratio": -0.0009155230945907533, + "logits/chosen": -0.030373331159353256, + "logits/rejected": -0.031623005867004395, + "logps/chosen": -0.0018961422611027956, + "logps/rejected": -1.626967430114746, + "loss": 0.7449, + "nll_loss": 0.18613487482070923, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018961422028951347, + "rewards/margins": 0.16250713169574738, + "rewards/rejected": -0.1626967489719391, + "step": 10914 + }, + { + "epoch": 7.548409405255878, + "grad_norm": 5.287073612213135, + "learning_rate": 1.3619947748578454e-05, + "log_odds_chosen": 10.789902687072754, + "log_odds_ratio": -5.0830250984290615e-05, + "logits/chosen": -0.9028030037879944, + "logits/rejected": -0.9219222664833069, + "logps/chosen": -0.00033289406565018, + "logps/rejected": -2.100735664367676, + "loss": 0.3585, + "nll_loss": 0.08961069583892822, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.328940874780528e-05, + "rewards/margins": 0.21004027128219604, + "rewards/rejected": -0.2100735753774643, + "step": 10915 + }, + { + "epoch": 7.549100968188105, + "grad_norm": 4.347574234008789, + "learning_rate": 1.3616105732288306e-05, + "log_odds_chosen": 10.415060043334961, + "log_odds_ratio": -9.381695417687297e-05, + "logits/chosen": -0.31361645460128784, + "logits/rejected": -0.4492868483066559, + "logps/chosen": -0.00033570057712495327, + "logps/rejected": -2.3112096786499023, + "loss": 0.2271, + "nll_loss": 0.05677155405282974, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.357006062287837e-05, + "rewards/margins": 0.23108740150928497, + "rewards/rejected": -0.2311209738254547, + "step": 10916 + }, + { + "epoch": 7.549792531120332, + "grad_norm": 4.294283390045166, + "learning_rate": 1.3612263715998155e-05, + "log_odds_chosen": 10.835281372070312, + "log_odds_ratio": -3.1858988222666085e-05, + "logits/chosen": -0.05257886275649071, + "logits/rejected": -0.055259451270103455, + "logps/chosen": -0.00035562628181651235, + "logps/rejected": -2.4179494380950928, + "loss": 0.4152, + "nll_loss": 0.10379417985677719, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5562628909246996e-05, + "rewards/margins": 0.24175938963890076, + "rewards/rejected": -0.24179495871067047, + "step": 10917 + }, + { + "epoch": 7.550484094052559, + "grad_norm": 8.6405668258667, + "learning_rate": 1.3608421699708008e-05, + "log_odds_chosen": 10.642109870910645, + "log_odds_ratio": -0.00011460207315394655, + "logits/chosen": -0.4259280562400818, + "logits/rejected": -0.4998432993888855, + "logps/chosen": -0.00035607305471785367, + "logps/rejected": -2.274139165878296, + "loss": 0.4877, + "nll_loss": 0.12190458178520203, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.560730692697689e-05, + "rewards/margins": 0.22737830877304077, + "rewards/rejected": -0.22741392254829407, + "step": 10918 + }, + { + "epoch": 7.551175656984785, + "grad_norm": 4.222203731536865, + "learning_rate": 1.3604579683417859e-05, + "log_odds_chosen": 10.999160766601562, + "log_odds_ratio": -3.665951226139441e-05, + "logits/chosen": -0.5962536334991455, + "logits/rejected": -0.6591715812683105, + "logps/chosen": -0.00010887366079259664, + "logps/rejected": -1.5783509016036987, + "loss": 0.3694, + "nll_loss": 0.09234155714511871, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0887366443057545e-05, + "rewards/margins": 0.15782421827316284, + "rewards/rejected": -0.15783509612083435, + "step": 10919 + }, + { + "epoch": 7.551867219917012, + "grad_norm": 4.335033416748047, + "learning_rate": 1.3600737667127708e-05, + "log_odds_chosen": 10.389474868774414, + "log_odds_ratio": -6.485909398179501e-05, + "logits/chosen": -0.1939990371465683, + "logits/rejected": -0.242635577917099, + "logps/chosen": -0.00023443308600690216, + "logps/rejected": -2.0197837352752686, + "loss": 0.3774, + "nll_loss": 0.09434625506401062, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3443308236892335e-05, + "rewards/margins": 0.2019549310207367, + "rewards/rejected": -0.20197835564613342, + "step": 10920 + }, + { + "epoch": 7.552558782849239, + "grad_norm": 4.163060665130615, + "learning_rate": 1.359689565083756e-05, + "log_odds_chosen": 10.459320068359375, + "log_odds_ratio": -0.0014625128824263811, + "logits/chosen": -0.4592801034450531, + "logits/rejected": -0.5197211503982544, + "logps/chosen": -0.0027722991071641445, + "logps/rejected": -2.700474500656128, + "loss": 0.4314, + "nll_loss": 0.10769951343536377, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00027722990489564836, + "rewards/margins": 0.26977020502090454, + "rewards/rejected": -0.27004745602607727, + "step": 10921 + }, + { + "epoch": 7.553250345781466, + "grad_norm": 7.067339897155762, + "learning_rate": 1.3593053634547412e-05, + "log_odds_chosen": 11.163808822631836, + "log_odds_ratio": -9.274063631892204e-05, + "logits/chosen": -0.5344827771186829, + "logits/rejected": -0.5484803915023804, + "logps/chosen": -7.391178223770112e-05, + "logps/rejected": -2.0709049701690674, + "loss": 0.8812, + "nll_loss": 0.22027882933616638, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.391178769466933e-06, + "rewards/margins": 0.20708312094211578, + "rewards/rejected": -0.20709051191806793, + "step": 10922 + }, + { + "epoch": 7.553941908713693, + "grad_norm": 4.202138423919678, + "learning_rate": 1.3589211618257262e-05, + "log_odds_chosen": 10.711816787719727, + "log_odds_ratio": -0.0009630117565393448, + "logits/chosen": -0.5583059191703796, + "logits/rejected": -0.6031702756881714, + "logps/chosen": -0.0003722950932569802, + "logps/rejected": -1.7346935272216797, + "loss": 0.5121, + "nll_loss": 0.12792344391345978, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7229507142910734e-05, + "rewards/margins": 0.1734321117401123, + "rewards/rejected": -0.17346934974193573, + "step": 10923 + }, + { + "epoch": 7.55463347164592, + "grad_norm": 4.490311622619629, + "learning_rate": 1.3585369601967112e-05, + "log_odds_chosen": 10.83390998840332, + "log_odds_ratio": -5.0170037866337225e-05, + "logits/chosen": -0.5973794460296631, + "logits/rejected": -0.6883317232131958, + "logps/chosen": -0.0004492515290621668, + "logps/rejected": -2.4969382286071777, + "loss": 0.457, + "nll_loss": 0.11424939334392548, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.492515290621668e-05, + "rewards/margins": 0.24964889883995056, + "rewards/rejected": -0.24969382584095, + "step": 10924 + }, + { + "epoch": 7.555325034578146, + "grad_norm": 3.2777633666992188, + "learning_rate": 1.3581527585676965e-05, + "log_odds_chosen": 11.443975448608398, + "log_odds_ratio": -3.585993545129895e-05, + "logits/chosen": -0.6728773713111877, + "logits/rejected": -0.8710072040557861, + "logps/chosen": -0.00010490731074241921, + "logps/rejected": -2.018962860107422, + "loss": 0.251, + "nll_loss": 0.062735915184021, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0490731256140862e-05, + "rewards/margins": 0.20188578963279724, + "rewards/rejected": -0.2018962949514389, + "step": 10925 + }, + { + "epoch": 7.556016597510373, + "grad_norm": 3.8097290992736816, + "learning_rate": 1.3577685569386814e-05, + "log_odds_chosen": 11.300907135009766, + "log_odds_ratio": -9.683187818154693e-05, + "logits/chosen": -0.43710342049598694, + "logits/rejected": -0.3327723741531372, + "logps/chosen": -0.00046393644879572093, + "logps/rejected": -2.8096423149108887, + "loss": 0.3756, + "nll_loss": 0.09388810396194458, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6393644879572093e-05, + "rewards/margins": 0.28091785311698914, + "rewards/rejected": -0.2809642255306244, + "step": 10926 + }, + { + "epoch": 7.5567081604426, + "grad_norm": 6.978212833404541, + "learning_rate": 1.3573843553096666e-05, + "log_odds_chosen": 11.489490509033203, + "log_odds_ratio": -1.5987618098733947e-05, + "logits/chosen": -0.2056851089000702, + "logits/rejected": -0.2149638533592224, + "logps/chosen": -0.0001905599783640355, + "logps/rejected": -2.3923375606536865, + "loss": 0.5754, + "nll_loss": 0.14384540915489197, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.905599856399931e-05, + "rewards/margins": 0.239214688539505, + "rewards/rejected": -0.23923374712467194, + "step": 10927 + }, + { + "epoch": 7.557399723374827, + "grad_norm": 10.007233619689941, + "learning_rate": 1.3570001536806517e-05, + "log_odds_chosen": 11.136518478393555, + "log_odds_ratio": -2.728265644691419e-05, + "logits/chosen": -0.21735602617263794, + "logits/rejected": -0.17750519514083862, + "logps/chosen": -0.0002487509627826512, + "logps/rejected": -2.4564075469970703, + "loss": 0.474, + "nll_loss": 0.1185031458735466, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4875096642063e-05, + "rewards/margins": 0.2456158846616745, + "rewards/rejected": -0.24564076960086823, + "step": 10928 + }, + { + "epoch": 7.558091286307054, + "grad_norm": 3.546488046646118, + "learning_rate": 1.3566159520516366e-05, + "log_odds_chosen": 10.552568435668945, + "log_odds_ratio": -5.7265235227532685e-05, + "logits/chosen": -0.5594015121459961, + "logits/rejected": -0.616118311882019, + "logps/chosen": -0.00035102260881103575, + "logps/rejected": -1.9318368434906006, + "loss": 0.4211, + "nll_loss": 0.1052689179778099, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5102260881103575e-05, + "rewards/margins": 0.19314856827259064, + "rewards/rejected": -0.19318366050720215, + "step": 10929 + }, + { + "epoch": 7.558782849239281, + "grad_norm": 3.83551287651062, + "learning_rate": 1.3562317504226218e-05, + "log_odds_chosen": 11.369571685791016, + "log_odds_ratio": -0.00012164629151811823, + "logits/chosen": -0.3469897210597992, + "logits/rejected": -0.39686405658721924, + "logps/chosen": -0.0004319645231589675, + "logps/rejected": -2.764150619506836, + "loss": 0.4561, + "nll_loss": 0.1140192300081253, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.319645086070523e-05, + "rewards/margins": 0.27637186646461487, + "rewards/rejected": -0.276415079832077, + "step": 10930 + }, + { + "epoch": 7.559474412171507, + "grad_norm": 4.2708845138549805, + "learning_rate": 1.3558475487936071e-05, + "log_odds_chosen": 10.607118606567383, + "log_odds_ratio": -6.914585537742823e-05, + "logits/chosen": -0.4425663948059082, + "logits/rejected": -0.491806298494339, + "logps/chosen": -0.00018268710118718445, + "logps/rejected": -1.8966383934020996, + "loss": 0.6051, + "nll_loss": 0.1512637436389923, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8268709027324803e-05, + "rewards/margins": 0.18964557349681854, + "rewards/rejected": -0.1896638423204422, + "step": 10931 + }, + { + "epoch": 7.560165975103734, + "grad_norm": 5.706757068634033, + "learning_rate": 1.355463347164592e-05, + "log_odds_chosen": 10.261611938476562, + "log_odds_ratio": -0.0001751371455611661, + "logits/chosen": -0.23027479648590088, + "logits/rejected": -0.2460954487323761, + "logps/chosen": -0.0038980338722467422, + "logps/rejected": -2.111262798309326, + "loss": 0.3182, + "nll_loss": 0.07953331619501114, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00038980337558314204, + "rewards/margins": 0.210736483335495, + "rewards/rejected": -0.21112629771232605, + "step": 10932 + }, + { + "epoch": 7.560857538035961, + "grad_norm": 4.881253242492676, + "learning_rate": 1.355079145535577e-05, + "log_odds_chosen": 11.820080757141113, + "log_odds_ratio": -0.0004292959056328982, + "logits/chosen": -0.5062116980552673, + "logits/rejected": -0.4614921510219574, + "logps/chosen": -0.00029794700094498694, + "logps/rejected": -3.201214551925659, + "loss": 0.3764, + "nll_loss": 0.09405495971441269, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9794700822094455e-05, + "rewards/margins": 0.3200916647911072, + "rewards/rejected": -0.3201214671134949, + "step": 10933 + }, + { + "epoch": 7.561549100968188, + "grad_norm": 6.380796432495117, + "learning_rate": 1.3546949439065623e-05, + "log_odds_chosen": 10.26038646697998, + "log_odds_ratio": -6.220456270966679e-05, + "logits/chosen": -0.5859804153442383, + "logits/rejected": -0.6496269702911377, + "logps/chosen": -0.0006967331864871085, + "logps/rejected": -1.7505897283554077, + "loss": 0.5233, + "nll_loss": 0.13082881271839142, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.967331864871085e-05, + "rewards/margins": 0.17498929798603058, + "rewards/rejected": -0.175058975815773, + "step": 10934 + }, + { + "epoch": 7.562240663900415, + "grad_norm": 4.680851459503174, + "learning_rate": 1.3543107422775472e-05, + "log_odds_chosen": 10.964227676391602, + "log_odds_ratio": -2.9342954803723842e-05, + "logits/chosen": -0.40084123611450195, + "logits/rejected": -0.4273257255554199, + "logps/chosen": -0.00015311934112105519, + "logps/rejected": -1.6325464248657227, + "loss": 0.3898, + "nll_loss": 0.0974491536617279, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.53119344759034e-05, + "rewards/margins": 0.16323933005332947, + "rewards/rejected": -0.16325464844703674, + "step": 10935 + }, + { + "epoch": 7.5629322268326415, + "grad_norm": 4.604579925537109, + "learning_rate": 1.3539265406485325e-05, + "log_odds_chosen": 11.361536026000977, + "log_odds_ratio": -2.4415654479525983e-05, + "logits/chosen": -0.4994909167289734, + "logits/rejected": -0.45419901609420776, + "logps/chosen": -7.685035961912945e-05, + "logps/rejected": -1.7258703708648682, + "loss": 0.4156, + "nll_loss": 0.10389953851699829, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.685036507609766e-06, + "rewards/margins": 0.1725793480873108, + "rewards/rejected": -0.172587051987648, + "step": 10936 + }, + { + "epoch": 7.563623789764868, + "grad_norm": 6.140023231506348, + "learning_rate": 1.3535423390195175e-05, + "log_odds_chosen": 10.658794403076172, + "log_odds_ratio": -0.0001274331589229405, + "logits/chosen": -0.35980963706970215, + "logits/rejected": -0.44661083817481995, + "logps/chosen": -0.0009565124055370688, + "logps/rejected": -2.4567627906799316, + "loss": 0.531, + "nll_loss": 0.13273148238658905, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.565124491928145e-05, + "rewards/margins": 0.24558061361312866, + "rewards/rejected": -0.24567627906799316, + "step": 10937 + }, + { + "epoch": 7.564315352697095, + "grad_norm": 6.799753189086914, + "learning_rate": 1.3531581373905024e-05, + "log_odds_chosen": 11.992451667785645, + "log_odds_ratio": -9.671902262198273e-06, + "logits/chosen": -0.6842149496078491, + "logits/rejected": -0.5291277170181274, + "logps/chosen": -7.34966088202782e-05, + "logps/rejected": -1.9616817235946655, + "loss": 0.4859, + "nll_loss": 0.12148353457450867, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.3496612458257005e-06, + "rewards/margins": 0.19616082310676575, + "rewards/rejected": -0.1961681842803955, + "step": 10938 + }, + { + "epoch": 7.565006915629322, + "grad_norm": 4.647584915161133, + "learning_rate": 1.3527739357614877e-05, + "log_odds_chosen": 10.580001831054688, + "log_odds_ratio": -0.00015425118908751756, + "logits/chosen": -0.34404534101486206, + "logits/rejected": -0.4109252095222473, + "logps/chosen": -0.00034372409572824836, + "logps/rejected": -2.354987144470215, + "loss": 0.3574, + "nll_loss": 0.08934677392244339, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4372409572824836e-05, + "rewards/margins": 0.23546436429023743, + "rewards/rejected": -0.23549872636795044, + "step": 10939 + }, + { + "epoch": 7.565698478561549, + "grad_norm": 5.138919353485107, + "learning_rate": 1.352389734132473e-05, + "log_odds_chosen": 10.619292259216309, + "log_odds_ratio": -6.635507452301681e-05, + "logits/chosen": -0.6313656568527222, + "logits/rejected": -0.664108395576477, + "logps/chosen": -0.0003709258744493127, + "logps/rejected": -2.564089298248291, + "loss": 0.5316, + "nll_loss": 0.13289271295070648, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.709258817252703e-05, + "rewards/margins": 0.2563718259334564, + "rewards/rejected": -0.2564089298248291, + "step": 10940 + }, + { + "epoch": 7.566390041493776, + "grad_norm": 5.1360955238342285, + "learning_rate": 1.3520055325034578e-05, + "log_odds_chosen": 10.733930587768555, + "log_odds_ratio": -6.731198664056137e-05, + "logits/chosen": -0.3754677176475525, + "logits/rejected": -0.3669604957103729, + "logps/chosen": -0.0004016646998934448, + "logps/rejected": -2.154148578643799, + "loss": 0.4801, + "nll_loss": 0.12000709027051926, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0166469261748716e-05, + "rewards/margins": 0.2153746783733368, + "rewards/rejected": -0.2154148519039154, + "step": 10941 + }, + { + "epoch": 7.5670816044260025, + "grad_norm": 5.6666059494018555, + "learning_rate": 1.351621330874443e-05, + "log_odds_chosen": 10.722338676452637, + "log_odds_ratio": -9.273626346839592e-05, + "logits/chosen": -0.11671411991119385, + "logits/rejected": -0.16290025413036346, + "logps/chosen": -0.0001824825449148193, + "logps/rejected": -1.8317897319793701, + "loss": 0.5547, + "nll_loss": 0.1386728286743164, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8248256310471334e-05, + "rewards/margins": 0.1831607222557068, + "rewards/rejected": -0.18317899107933044, + "step": 10942 + }, + { + "epoch": 7.567773167358229, + "grad_norm": 6.869671821594238, + "learning_rate": 1.3512371292454282e-05, + "log_odds_chosen": 11.702990531921387, + "log_odds_ratio": -1.1985999663011171e-05, + "logits/chosen": -0.3330806791782379, + "logits/rejected": -0.39222127199172974, + "logps/chosen": -5.937376045039855e-05, + "logps/rejected": -2.041858673095703, + "loss": 0.3399, + "nll_loss": 0.08497916907072067, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.937376045039855e-06, + "rewards/margins": 0.20417992770671844, + "rewards/rejected": -0.2041858732700348, + "step": 10943 + }, + { + "epoch": 7.568464730290456, + "grad_norm": 4.465538024902344, + "learning_rate": 1.350852927616413e-05, + "log_odds_chosen": 9.980588912963867, + "log_odds_ratio": -0.0002866295399144292, + "logits/chosen": -0.20742267370224, + "logits/rejected": -0.17144420742988586, + "logps/chosen": -0.0009605524828657508, + "logps/rejected": -2.3056931495666504, + "loss": 0.3348, + "nll_loss": 0.08365992456674576, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.605525701772422e-05, + "rewards/margins": 0.23047326505184174, + "rewards/rejected": -0.23056930303573608, + "step": 10944 + }, + { + "epoch": 7.569156293222683, + "grad_norm": 6.102631092071533, + "learning_rate": 1.3504687259873983e-05, + "log_odds_chosen": 10.743348121643066, + "log_odds_ratio": -4.211036139167845e-05, + "logits/chosen": -0.45897427201271057, + "logits/rejected": -0.37194669246673584, + "logps/chosen": -0.00015622461796738207, + "logps/rejected": -1.906165361404419, + "loss": 0.758, + "nll_loss": 0.18948470056056976, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5622459613950923e-05, + "rewards/margins": 0.1906009316444397, + "rewards/rejected": -0.19061654806137085, + "step": 10945 + }, + { + "epoch": 7.56984785615491, + "grad_norm": 4.20671272277832, + "learning_rate": 1.3500845243583834e-05, + "log_odds_chosen": 10.227163314819336, + "log_odds_ratio": -0.0010573863983154297, + "logits/chosen": -0.5030470490455627, + "logits/rejected": -0.5151537656784058, + "logps/chosen": -0.001639746013097465, + "logps/rejected": -2.151639223098755, + "loss": 0.3991, + "nll_loss": 0.09966427087783813, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016397460422012955, + "rewards/margins": 0.2149999439716339, + "rewards/rejected": -0.21516390144824982, + "step": 10946 + }, + { + "epoch": 7.570539419087137, + "grad_norm": 5.223161697387695, + "learning_rate": 1.3497003227293683e-05, + "log_odds_chosen": 10.072952270507812, + "log_odds_ratio": -0.00028690227190963924, + "logits/chosen": -0.4013603627681732, + "logits/rejected": -0.553269624710083, + "logps/chosen": -0.0002936360251624137, + "logps/rejected": -1.9826910495758057, + "loss": 0.6725, + "nll_loss": 0.1680985689163208, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.936360215244349e-05, + "rewards/margins": 0.1982397586107254, + "rewards/rejected": -0.19826912879943848, + "step": 10947 + }, + { + "epoch": 7.5712309820193635, + "grad_norm": 5.351415157318115, + "learning_rate": 1.3493161211003535e-05, + "log_odds_chosen": 10.563679695129395, + "log_odds_ratio": -5.192552271182649e-05, + "logits/chosen": -0.21586225926876068, + "logits/rejected": -0.19644798338413239, + "logps/chosen": -0.0003393357910681516, + "logps/rejected": -1.8586252927780151, + "loss": 0.5359, + "nll_loss": 0.1339617669582367, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.39335783792194e-05, + "rewards/margins": 0.18582861125469208, + "rewards/rejected": -0.18586254119873047, + "step": 10948 + }, + { + "epoch": 7.57192254495159, + "grad_norm": 4.629922389984131, + "learning_rate": 1.3489319194713388e-05, + "log_odds_chosen": 11.77937126159668, + "log_odds_ratio": -3.537982047419064e-05, + "logits/chosen": 0.03612430393695831, + "logits/rejected": 0.025129958987236023, + "logps/chosen": -0.0004390345420688391, + "logps/rejected": -3.6106865406036377, + "loss": 0.5558, + "nll_loss": 0.1389411985874176, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.390345566207543e-05, + "rewards/margins": 0.36102473735809326, + "rewards/rejected": -0.3610686659812927, + "step": 10949 + }, + { + "epoch": 7.572614107883817, + "grad_norm": 6.359925270080566, + "learning_rate": 1.3485477178423237e-05, + "log_odds_chosen": 11.694245338439941, + "log_odds_ratio": -0.0001377922744723037, + "logits/chosen": 0.01395311951637268, + "logits/rejected": -0.0705404207110405, + "logps/chosen": -0.00012315658386796713, + "logps/rejected": -2.751325845718384, + "loss": 0.6301, + "nll_loss": 0.15750938653945923, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2315658750594594e-05, + "rewards/margins": 0.27512025833129883, + "rewards/rejected": -0.27513253688812256, + "step": 10950 + }, + { + "epoch": 7.573305670816044, + "grad_norm": 5.494706153869629, + "learning_rate": 1.3481635162133088e-05, + "log_odds_chosen": 11.016411781311035, + "log_odds_ratio": -0.0002136414113920182, + "logits/chosen": -0.6450036764144897, + "logits/rejected": -0.6881355047225952, + "logps/chosen": -0.000511638296302408, + "logps/rejected": -1.8842029571533203, + "loss": 0.4141, + "nll_loss": 0.10351468622684479, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1163828175049275e-05, + "rewards/margins": 0.18836914002895355, + "rewards/rejected": -0.18842029571533203, + "step": 10951 + }, + { + "epoch": 7.573997233748271, + "grad_norm": 4.768087387084961, + "learning_rate": 1.347779314584294e-05, + "log_odds_chosen": 10.597572326660156, + "log_odds_ratio": -7.884033402660862e-05, + "logits/chosen": -0.0407976359128952, + "logits/rejected": -0.14931830763816833, + "logps/chosen": -0.0009158989414572716, + "logps/rejected": -2.176492214202881, + "loss": 0.539, + "nll_loss": 0.13473784923553467, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.158989996649325e-05, + "rewards/margins": 0.2175576388835907, + "rewards/rejected": -0.21764922142028809, + "step": 10952 + }, + { + "epoch": 7.574688796680498, + "grad_norm": 4.516747951507568, + "learning_rate": 1.3473951129552789e-05, + "log_odds_chosen": 10.323025703430176, + "log_odds_ratio": -0.0001632234634598717, + "logits/chosen": 0.14617076516151428, + "logits/rejected": 0.029506176710128784, + "logps/chosen": -0.0003115920699201524, + "logps/rejected": -1.745704174041748, + "loss": 0.4054, + "nll_loss": 0.10133402049541473, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.115920480922796e-05, + "rewards/margins": 0.17453926801681519, + "rewards/rejected": -0.17457042634487152, + "step": 10953 + }, + { + "epoch": 7.5753803596127245, + "grad_norm": 18.594852447509766, + "learning_rate": 1.3470109113262642e-05, + "log_odds_chosen": 11.122175216674805, + "log_odds_ratio": -1.9262806745246053e-05, + "logits/chosen": -0.2828685939311981, + "logits/rejected": -0.3240869641304016, + "logps/chosen": -0.000400659249862656, + "logps/rejected": -2.6810555458068848, + "loss": 0.3611, + "nll_loss": 0.09027761220932007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.006593007943593e-05, + "rewards/margins": 0.26806551218032837, + "rewards/rejected": -0.2681055963039398, + "step": 10954 + }, + { + "epoch": 7.576071922544951, + "grad_norm": 6.125179290771484, + "learning_rate": 1.3466267096972492e-05, + "log_odds_chosen": 9.519245147705078, + "log_odds_ratio": -0.0002525094896554947, + "logits/chosen": -0.2528844475746155, + "logits/rejected": -0.3225175738334656, + "logps/chosen": -0.0008995598182082176, + "logps/rejected": -1.8533120155334473, + "loss": 0.4051, + "nll_loss": 0.10125729441642761, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.995598909677938e-05, + "rewards/margins": 0.185241237282753, + "rewards/rejected": -0.18533121049404144, + "step": 10955 + }, + { + "epoch": 7.576763485477178, + "grad_norm": 6.245513916015625, + "learning_rate": 1.3462425080682341e-05, + "log_odds_chosen": 9.875443458557129, + "log_odds_ratio": -0.001092126127332449, + "logits/chosen": -0.346031129360199, + "logits/rejected": -0.39958661794662476, + "logps/chosen": -0.0008469183230772614, + "logps/rejected": -1.874719500541687, + "loss": 0.612, + "nll_loss": 0.1528850495815277, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.469182648696005e-05, + "rewards/margins": 0.18738725781440735, + "rewards/rejected": -0.18747195601463318, + "step": 10956 + }, + { + "epoch": 7.577455048409405, + "grad_norm": 5.480423450469971, + "learning_rate": 1.3458583064392194e-05, + "log_odds_chosen": 10.91093635559082, + "log_odds_ratio": -0.0004052415315527469, + "logits/chosen": -0.14419147372245789, + "logits/rejected": -0.27634382247924805, + "logps/chosen": -0.0007029871921986341, + "logps/rejected": -2.4353132247924805, + "loss": 0.6362, + "nll_loss": 0.15900415182113647, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.029872358543798e-05, + "rewards/margins": 0.24346104264259338, + "rewards/rejected": -0.24353134632110596, + "step": 10957 + }, + { + "epoch": 7.578146611341632, + "grad_norm": 9.778234481811523, + "learning_rate": 1.3454741048102046e-05, + "log_odds_chosen": 11.435365676879883, + "log_odds_ratio": -4.242352588335052e-05, + "logits/chosen": -0.5769628286361694, + "logits/rejected": -0.63201904296875, + "logps/chosen": -0.00024153337290044874, + "logps/rejected": -2.636207103729248, + "loss": 0.3735, + "nll_loss": 0.09337200224399567, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4153336198651232e-05, + "rewards/margins": 0.2635965943336487, + "rewards/rejected": -0.2636207044124603, + "step": 10958 + }, + { + "epoch": 7.578838174273859, + "grad_norm": 6.235721111297607, + "learning_rate": 1.3450899031811895e-05, + "log_odds_chosen": 12.401895523071289, + "log_odds_ratio": -2.3549444449599832e-05, + "logits/chosen": -0.3957058787345886, + "logits/rejected": -0.5027981996536255, + "logps/chosen": -0.00010508876584935933, + "logps/rejected": -3.0551514625549316, + "loss": 0.3612, + "nll_loss": 0.09028894454240799, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0508876584935933e-05, + "rewards/margins": 0.3055046498775482, + "rewards/rejected": -0.3055151700973511, + "step": 10959 + }, + { + "epoch": 7.5795297372060855, + "grad_norm": 6.023120403289795, + "learning_rate": 1.3447057015521746e-05, + "log_odds_chosen": 9.714308738708496, + "log_odds_ratio": -0.00028595273033715785, + "logits/chosen": -0.4025363326072693, + "logits/rejected": -0.6282363533973694, + "logps/chosen": -0.00048416247591376305, + "logps/rejected": -1.7566821575164795, + "loss": 0.4718, + "nll_loss": 0.11792824417352676, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8416248318972066e-05, + "rewards/margins": 0.17561981081962585, + "rewards/rejected": -0.17566822469234467, + "step": 10960 + }, + { + "epoch": 7.580221300138312, + "grad_norm": 6.062592506408691, + "learning_rate": 1.3443214999231598e-05, + "log_odds_chosen": 11.775699615478516, + "log_odds_ratio": -1.3323345228855032e-05, + "logits/chosen": -0.692188560962677, + "logits/rejected": -0.8705258965492249, + "logps/chosen": -0.00046975412988103926, + "logps/rejected": -2.715114116668701, + "loss": 0.5378, + "nll_loss": 0.13445189595222473, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6975412260508165e-05, + "rewards/margins": 0.271464467048645, + "rewards/rejected": -0.271511435508728, + "step": 10961 + }, + { + "epoch": 7.580912863070539, + "grad_norm": 3.96543550491333, + "learning_rate": 1.3439372982941448e-05, + "log_odds_chosen": 10.873712539672852, + "log_odds_ratio": -0.00017372961156070232, + "logits/chosen": -0.15156838297843933, + "logits/rejected": -0.14810311794281006, + "logps/chosen": -0.00023373104340862483, + "logps/rejected": -2.0172066688537598, + "loss": 0.476, + "nll_loss": 0.11897880584001541, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3373106159851886e-05, + "rewards/margins": 0.20169728994369507, + "rewards/rejected": -0.20172066986560822, + "step": 10962 + }, + { + "epoch": 7.581604426002766, + "grad_norm": 10.002387046813965, + "learning_rate": 1.34355309666513e-05, + "log_odds_chosen": 11.76556396484375, + "log_odds_ratio": -3.788443427765742e-05, + "logits/chosen": -0.5352721214294434, + "logits/rejected": -0.49158841371536255, + "logps/chosen": -0.000397785275708884, + "logps/rejected": -3.296494483947754, + "loss": 0.5677, + "nll_loss": 0.1419147551059723, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.977853339165449e-05, + "rewards/margins": 0.3296097218990326, + "rewards/rejected": -0.3296494781970978, + "step": 10963 + }, + { + "epoch": 7.582295988934993, + "grad_norm": 6.157203674316406, + "learning_rate": 1.343168895036115e-05, + "log_odds_chosen": 10.94287109375, + "log_odds_ratio": -2.8982145522604696e-05, + "logits/chosen": -0.9206770062446594, + "logits/rejected": -0.8118359446525574, + "logps/chosen": -0.00016417729784734547, + "logps/rejected": -1.7131142616271973, + "loss": 0.3055, + "nll_loss": 0.07637281715869904, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6417729057138786e-05, + "rewards/margins": 0.17129501700401306, + "rewards/rejected": -0.17131143808364868, + "step": 10964 + }, + { + "epoch": 7.58298755186722, + "grad_norm": 4.484544277191162, + "learning_rate": 1.3427846934071e-05, + "log_odds_chosen": 9.924369812011719, + "log_odds_ratio": -0.00012947487994097173, + "logits/chosen": 0.10280543565750122, + "logits/rejected": 0.010313667356967926, + "logps/chosen": -0.0008380117360502481, + "logps/rejected": -2.1853108406066895, + "loss": 0.5956, + "nll_loss": 0.14889571070671082, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.380118379136547e-05, + "rewards/margins": 0.21844729781150818, + "rewards/rejected": -0.21853110194206238, + "step": 10965 + }, + { + "epoch": 7.5836791147994465, + "grad_norm": 5.453164100646973, + "learning_rate": 1.3424004917780852e-05, + "log_odds_chosen": 11.101842880249023, + "log_odds_ratio": -5.8247616834705696e-05, + "logits/chosen": -0.3038039803504944, + "logits/rejected": -0.29949885606765747, + "logps/chosen": -0.0001985014823731035, + "logps/rejected": -2.3401663303375244, + "loss": 0.425, + "nll_loss": 0.10624787211418152, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.985014750971459e-05, + "rewards/margins": 0.23399679362773895, + "rewards/rejected": -0.23401664197444916, + "step": 10966 + }, + { + "epoch": 7.584370677731673, + "grad_norm": 4.547872543334961, + "learning_rate": 1.3420162901490705e-05, + "log_odds_chosen": 9.808728218078613, + "log_odds_ratio": -0.00034682743716984987, + "logits/chosen": -0.5433746576309204, + "logits/rejected": -0.5797238349914551, + "logps/chosen": -0.0007145186536945403, + "logps/rejected": -1.7367208003997803, + "loss": 0.6369, + "nll_loss": 0.1591930389404297, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.145186100387946e-05, + "rewards/margins": 0.17360062897205353, + "rewards/rejected": -0.17367208003997803, + "step": 10967 + }, + { + "epoch": 7.5850622406639, + "grad_norm": 5.118727207183838, + "learning_rate": 1.3416320885200554e-05, + "log_odds_chosen": 9.705362319946289, + "log_odds_ratio": -0.004164530895650387, + "logits/chosen": -0.5493965148925781, + "logits/rejected": -0.5997754335403442, + "logps/chosen": -0.02874472737312317, + "logps/rejected": -1.9699627161026, + "loss": 0.4065, + "nll_loss": 0.10120692849159241, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0028744726441800594, + "rewards/margins": 0.1941218078136444, + "rewards/rejected": -0.1969962865114212, + "step": 10968 + }, + { + "epoch": 7.585753803596127, + "grad_norm": 5.127748966217041, + "learning_rate": 1.3412478868910404e-05, + "log_odds_chosen": 11.015447616577148, + "log_odds_ratio": -0.00034674344351515174, + "logits/chosen": -0.395458459854126, + "logits/rejected": -0.5069045424461365, + "logps/chosen": -0.0021178291644901037, + "logps/rejected": -2.4018898010253906, + "loss": 0.4879, + "nll_loss": 0.12193099409341812, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002117828989867121, + "rewards/margins": 0.2399771809577942, + "rewards/rejected": -0.24018898606300354, + "step": 10969 + }, + { + "epoch": 7.586445366528354, + "grad_norm": 3.3442649841308594, + "learning_rate": 1.3408636852620254e-05, + "log_odds_chosen": 11.893460273742676, + "log_odds_ratio": -1.2278281246835832e-05, + "logits/chosen": -0.3384820818901062, + "logits/rejected": -0.3775098919868469, + "logps/chosen": -8.853011240717024e-05, + "logps/rejected": -2.3381340503692627, + "loss": 0.4465, + "nll_loss": 0.11162815988063812, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.853011422615964e-06, + "rewards/margins": 0.23380455374717712, + "rewards/rejected": -0.23381341993808746, + "step": 10970 + }, + { + "epoch": 7.587136929460581, + "grad_norm": 3.977128505706787, + "learning_rate": 1.3404794836330106e-05, + "log_odds_chosen": 11.14720344543457, + "log_odds_ratio": -3.100410322076641e-05, + "logits/chosen": -0.4686664044857025, + "logits/rejected": -0.5176323652267456, + "logps/chosen": -0.00023064535344019532, + "logps/rejected": -2.558053493499756, + "loss": 0.3092, + "nll_loss": 0.07729601860046387, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.306453461642377e-05, + "rewards/margins": 0.2557823061943054, + "rewards/rejected": -0.2558053731918335, + "step": 10971 + }, + { + "epoch": 7.587828492392807, + "grad_norm": 5.078945636749268, + "learning_rate": 1.3400952820039958e-05, + "log_odds_chosen": 10.111377716064453, + "log_odds_ratio": -0.00019062630599364638, + "logits/chosen": -0.5156462788581848, + "logits/rejected": -0.569412887096405, + "logps/chosen": -0.0004553595499601215, + "logps/rejected": -2.0580437183380127, + "loss": 0.4682, + "nll_loss": 0.11703924834728241, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.553595499601215e-05, + "rewards/margins": 0.20575882494449615, + "rewards/rejected": -0.20580437779426575, + "step": 10972 + }, + { + "epoch": 7.588520055325034, + "grad_norm": 3.717108964920044, + "learning_rate": 1.3397110803749807e-05, + "log_odds_chosen": 12.030096054077148, + "log_odds_ratio": -5.3260264394339174e-05, + "logits/chosen": -0.05415065586566925, + "logits/rejected": -0.14219613373279572, + "logps/chosen": -0.00016555978800170124, + "logps/rejected": -2.8418140411376953, + "loss": 0.4215, + "nll_loss": 0.10536690801382065, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6555979527765885e-05, + "rewards/margins": 0.2841648459434509, + "rewards/rejected": -0.2841814160346985, + "step": 10973 + }, + { + "epoch": 7.589211618257261, + "grad_norm": 2.715785503387451, + "learning_rate": 1.339326878745966e-05, + "log_odds_chosen": 10.593321800231934, + "log_odds_ratio": -0.0017579298000782728, + "logits/chosen": -0.6318291425704956, + "logits/rejected": -0.6820675134658813, + "logps/chosen": -0.008687568828463554, + "logps/rejected": -2.200286626815796, + "loss": 0.3041, + "nll_loss": 0.07583903521299362, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008687569061294198, + "rewards/margins": 0.21915991604328156, + "rewards/rejected": -0.22002866864204407, + "step": 10974 + }, + { + "epoch": 7.589903181189488, + "grad_norm": 15.399463653564453, + "learning_rate": 1.338942677116951e-05, + "log_odds_chosen": 9.43349838256836, + "log_odds_ratio": -0.049315690994262695, + "logits/chosen": 0.13571880757808685, + "logits/rejected": 0.015549729578197002, + "logps/chosen": -0.013712975196540356, + "logps/rejected": -1.929039716720581, + "loss": 0.4323, + "nll_loss": 0.10313201695680618, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013712975196540356, + "rewards/margins": 0.1915326714515686, + "rewards/rejected": -0.19290399551391602, + "step": 10975 + }, + { + "epoch": 7.590594744121715, + "grad_norm": 2.948235511779785, + "learning_rate": 1.338558475487936e-05, + "log_odds_chosen": 11.358328819274902, + "log_odds_ratio": -2.3135744413593784e-05, + "logits/chosen": -0.5567000508308411, + "logits/rejected": -0.6210377216339111, + "logps/chosen": -0.0002084262960124761, + "logps/rejected": -2.1898272037506104, + "loss": 0.283, + "nll_loss": 0.07073809206485748, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.084262996504549e-05, + "rewards/margins": 0.21896187961101532, + "rewards/rejected": -0.2189827263355255, + "step": 10976 + }, + { + "epoch": 7.591286307053942, + "grad_norm": 4.210711479187012, + "learning_rate": 1.3381742738589212e-05, + "log_odds_chosen": 10.899679183959961, + "log_odds_ratio": -3.151923374389298e-05, + "logits/chosen": -0.7823795080184937, + "logits/rejected": -0.7703849077224731, + "logps/chosen": -6.220992509042844e-05, + "logps/rejected": -1.3939049243927002, + "loss": 0.3605, + "nll_loss": 0.09011554718017578, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.220993327588076e-06, + "rewards/margins": 0.13938426971435547, + "rewards/rejected": -0.1393904983997345, + "step": 10977 + }, + { + "epoch": 7.591977869986168, + "grad_norm": 3.5889976024627686, + "learning_rate": 1.3377900722299065e-05, + "log_odds_chosen": 11.090879440307617, + "log_odds_ratio": -2.8063863283023238e-05, + "logits/chosen": -0.5752851963043213, + "logits/rejected": -0.5773409605026245, + "logps/chosen": -0.00026858376804739237, + "logps/rejected": -2.355632781982422, + "loss": 0.3274, + "nll_loss": 0.08185110986232758, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.685837898752652e-05, + "rewards/margins": 0.23553641140460968, + "rewards/rejected": -0.2355632781982422, + "step": 10978 + }, + { + "epoch": 7.592669432918395, + "grad_norm": 4.86741828918457, + "learning_rate": 1.3374058706008914e-05, + "log_odds_chosen": 11.348465919494629, + "log_odds_ratio": -2.008001865760889e-05, + "logits/chosen": -0.5246957540512085, + "logits/rejected": -0.5292121767997742, + "logps/chosen": -0.00023641643929295242, + "logps/rejected": -2.6028027534484863, + "loss": 0.6461, + "nll_loss": 0.16152501106262207, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.364164356549736e-05, + "rewards/margins": 0.26025664806365967, + "rewards/rejected": -0.2602802515029907, + "step": 10979 + }, + { + "epoch": 7.593360995850622, + "grad_norm": 5.084012985229492, + "learning_rate": 1.3370216689718764e-05, + "log_odds_chosen": 9.90985107421875, + "log_odds_ratio": -0.0005122892325744033, + "logits/chosen": -0.5847611427307129, + "logits/rejected": -0.6524494886398315, + "logps/chosen": -0.0005478012026287615, + "logps/rejected": -1.9832638502120972, + "loss": 0.6648, + "nll_loss": 0.16615362465381622, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4780120990471914e-05, + "rewards/margins": 0.19827160239219666, + "rewards/rejected": -0.19832637906074524, + "step": 10980 + }, + { + "epoch": 7.594052558782849, + "grad_norm": 7.956467628479004, + "learning_rate": 1.3366374673428617e-05, + "log_odds_chosen": 10.32036018371582, + "log_odds_ratio": -0.000159708913997747, + "logits/chosen": -0.22464311122894287, + "logits/rejected": -0.23039782047271729, + "logps/chosen": -0.0003303846169728786, + "logps/rejected": -2.0702874660491943, + "loss": 0.5042, + "nll_loss": 0.12603464722633362, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.303846824564971e-05, + "rewards/margins": 0.20699569582939148, + "rewards/rejected": -0.20702874660491943, + "step": 10981 + }, + { + "epoch": 7.594744121715076, + "grad_norm": 9.162469863891602, + "learning_rate": 1.3362532657138466e-05, + "log_odds_chosen": 10.577921867370605, + "log_odds_ratio": -9.873359522316605e-05, + "logits/chosen": -0.6851182579994202, + "logits/rejected": -0.5594465732574463, + "logps/chosen": -0.0005685054929926991, + "logps/rejected": -2.600642204284668, + "loss": 0.5216, + "nll_loss": 0.13038098812103271, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6850549299269915e-05, + "rewards/margins": 0.260007381439209, + "rewards/rejected": -0.2600642442703247, + "step": 10982 + }, + { + "epoch": 7.595435684647303, + "grad_norm": 6.030345916748047, + "learning_rate": 1.3358690640848318e-05, + "log_odds_chosen": 11.665300369262695, + "log_odds_ratio": -2.2962234652368352e-05, + "logits/chosen": -0.31740760803222656, + "logits/rejected": -0.39805299043655396, + "logps/chosen": -0.0002136161783710122, + "logps/rejected": -2.858198642730713, + "loss": 0.4233, + "nll_loss": 0.10583190619945526, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1361618564696983e-05, + "rewards/margins": 0.28579849004745483, + "rewards/rejected": -0.2858198583126068, + "step": 10983 + }, + { + "epoch": 7.596127247579529, + "grad_norm": 9.487578392028809, + "learning_rate": 1.3354848624558169e-05, + "log_odds_chosen": 11.256978034973145, + "log_odds_ratio": -2.319498162250966e-05, + "logits/chosen": -0.5560814738273621, + "logits/rejected": -0.5887335538864136, + "logps/chosen": -0.00013760194997303188, + "logps/rejected": -2.3657851219177246, + "loss": 0.6228, + "nll_loss": 0.1557023823261261, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.376019645249471e-05, + "rewards/margins": 0.2365647554397583, + "rewards/rejected": -0.23657852411270142, + "step": 10984 + }, + { + "epoch": 7.596818810511756, + "grad_norm": 4.475757122039795, + "learning_rate": 1.3351006608268018e-05, + "log_odds_chosen": 11.180810928344727, + "log_odds_ratio": -7.700147398281842e-05, + "logits/chosen": -0.20350779592990875, + "logits/rejected": -0.23505568504333496, + "logps/chosen": -0.00022611429449170828, + "logps/rejected": -2.398804187774658, + "loss": 0.4366, + "nll_loss": 0.10914000123739243, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2611427993979305e-05, + "rewards/margins": 0.23985780775547028, + "rewards/rejected": -0.23988041281700134, + "step": 10985 + }, + { + "epoch": 7.597510373443983, + "grad_norm": 3.7865943908691406, + "learning_rate": 1.334716459197787e-05, + "log_odds_chosen": 11.013235092163086, + "log_odds_ratio": -3.90688655897975e-05, + "logits/chosen": -0.33913129568099976, + "logits/rejected": -0.37505990266799927, + "logps/chosen": -0.0002632609975989908, + "logps/rejected": -2.480992317199707, + "loss": 0.2838, + "nll_loss": 0.07094120979309082, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6326102670282125e-05, + "rewards/margins": 0.24807292222976685, + "rewards/rejected": -0.24809923768043518, + "step": 10986 + }, + { + "epoch": 7.59820193637621, + "grad_norm": 4.870147228240967, + "learning_rate": 1.3343322575687723e-05, + "log_odds_chosen": 11.334236145019531, + "log_odds_ratio": -2.096058233291842e-05, + "logits/chosen": -0.3121829032897949, + "logits/rejected": -0.403335303068161, + "logps/chosen": -0.0002347916306462139, + "logps/rejected": -2.9569520950317383, + "loss": 0.5128, + "nll_loss": 0.128209188580513, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.347916415601503e-05, + "rewards/margins": 0.2956717610359192, + "rewards/rejected": -0.2956952154636383, + "step": 10987 + }, + { + "epoch": 7.598893499308437, + "grad_norm": 7.494716167449951, + "learning_rate": 1.3339480559397572e-05, + "log_odds_chosen": 11.476689338684082, + "log_odds_ratio": -2.559039057814516e-05, + "logits/chosen": -0.4330449104309082, + "logits/rejected": -0.4117595851421356, + "logps/chosen": -0.00018414655642118305, + "logps/rejected": -2.5021536350250244, + "loss": 0.4617, + "nll_loss": 0.11543288826942444, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8414655642118305e-05, + "rewards/margins": 0.2501969337463379, + "rewards/rejected": -0.2502153515815735, + "step": 10988 + }, + { + "epoch": 7.5995850622406635, + "grad_norm": 4.136565208435059, + "learning_rate": 1.3335638543107423e-05, + "log_odds_chosen": 10.985734939575195, + "log_odds_ratio": -0.00016389289521612227, + "logits/chosen": -0.43322715163230896, + "logits/rejected": -0.4590873718261719, + "logps/chosen": -0.0002824410330504179, + "logps/rejected": -2.6134121417999268, + "loss": 0.3672, + "nll_loss": 0.09179577976465225, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.824410330504179e-05, + "rewards/margins": 0.26131296157836914, + "rewards/rejected": -0.2613412141799927, + "step": 10989 + }, + { + "epoch": 7.60027662517289, + "grad_norm": 4.833652019500732, + "learning_rate": 1.3331796526817275e-05, + "log_odds_chosen": 11.522449493408203, + "log_odds_ratio": -2.0415656763361767e-05, + "logits/chosen": 0.11912795156240463, + "logits/rejected": 0.1455882340669632, + "logps/chosen": -0.00014867138816043735, + "logps/rejected": -2.4429879188537598, + "loss": 0.6078, + "nll_loss": 0.15194672346115112, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4867138816043735e-05, + "rewards/margins": 0.24428394436836243, + "rewards/rejected": -0.2442988008260727, + "step": 10990 + }, + { + "epoch": 7.600968188105117, + "grad_norm": 7.191924571990967, + "learning_rate": 1.3327954510527124e-05, + "log_odds_chosen": 11.404149055480957, + "log_odds_ratio": -5.720463377656415e-05, + "logits/chosen": -0.12018120288848877, + "logits/rejected": -0.19981467723846436, + "logps/chosen": -0.00022083328804001212, + "logps/rejected": -2.782083034515381, + "loss": 0.4762, + "nll_loss": 0.11905424296855927, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2083329895394854e-05, + "rewards/margins": 0.27818623185157776, + "rewards/rejected": -0.27820831537246704, + "step": 10991 + }, + { + "epoch": 7.601659751037344, + "grad_norm": 5.22706413269043, + "learning_rate": 1.3324112494236977e-05, + "log_odds_chosen": 10.684869766235352, + "log_odds_ratio": -0.0003298583615105599, + "logits/chosen": -0.1452624648809433, + "logits/rejected": -0.24476586282253265, + "logps/chosen": -0.00021354752243496478, + "logps/rejected": -1.7574265003204346, + "loss": 0.5013, + "nll_loss": 0.12530261278152466, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1354751879698597e-05, + "rewards/margins": 0.17572128772735596, + "rewards/rejected": -0.17574265599250793, + "step": 10992 + }, + { + "epoch": 7.602351313969571, + "grad_norm": 5.045536518096924, + "learning_rate": 1.3320270477946828e-05, + "log_odds_chosen": 10.604886054992676, + "log_odds_ratio": -4.5088541810400784e-05, + "logits/chosen": 0.04422904551029205, + "logits/rejected": -0.07325298339128494, + "logps/chosen": -0.000652353628538549, + "logps/rejected": -2.1631197929382324, + "loss": 0.5821, + "nll_loss": 0.14552420377731323, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.52353628538549e-05, + "rewards/margins": 0.21624672412872314, + "rewards/rejected": -0.2163119614124298, + "step": 10993 + }, + { + "epoch": 7.603042876901798, + "grad_norm": 3.8175852298736572, + "learning_rate": 1.3316428461656677e-05, + "log_odds_chosen": 11.407567977905273, + "log_odds_ratio": -2.908537135226652e-05, + "logits/chosen": -0.21130746603012085, + "logits/rejected": -0.28915801644325256, + "logps/chosen": -0.00012252983287908137, + "logps/rejected": -1.775246500968933, + "loss": 0.3452, + "nll_loss": 0.08629661798477173, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2252983651706018e-05, + "rewards/margins": 0.17751239240169525, + "rewards/rejected": -0.1775246560573578, + "step": 10994 + }, + { + "epoch": 7.6037344398340245, + "grad_norm": 5.9048261642456055, + "learning_rate": 1.3312586445366529e-05, + "log_odds_chosen": 11.353921890258789, + "log_odds_ratio": -9.299576777266338e-05, + "logits/chosen": -0.4369305372238159, + "logits/rejected": -0.5150572657585144, + "logps/chosen": -0.00022164465917740017, + "logps/rejected": -2.1506056785583496, + "loss": 0.4776, + "nll_loss": 0.11938893049955368, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2164465917740017e-05, + "rewards/margins": 0.21503840386867523, + "rewards/rejected": -0.21506056189537048, + "step": 10995 + }, + { + "epoch": 7.604426002766251, + "grad_norm": 6.622079849243164, + "learning_rate": 1.3308744429076381e-05, + "log_odds_chosen": 10.349295616149902, + "log_odds_ratio": -7.134541374398395e-05, + "logits/chosen": -0.5894879698753357, + "logits/rejected": -0.5946841835975647, + "logps/chosen": -0.00031164148822426796, + "logps/rejected": -1.9511890411376953, + "loss": 0.3949, + "nll_loss": 0.09870664030313492, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.116414518444799e-05, + "rewards/margins": 0.195087730884552, + "rewards/rejected": -0.19511890411376953, + "step": 10996 + }, + { + "epoch": 7.605117565698478, + "grad_norm": 3.938234329223633, + "learning_rate": 1.330490241278623e-05, + "log_odds_chosen": 10.668981552124023, + "log_odds_ratio": -7.578918302897364e-05, + "logits/chosen": -0.1792973130941391, + "logits/rejected": -0.2401539832353592, + "logps/chosen": -0.0008044381975196302, + "logps/rejected": -2.882570505142212, + "loss": 0.3754, + "nll_loss": 0.09384553134441376, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.044381684157997e-05, + "rewards/margins": 0.28817659616470337, + "rewards/rejected": -0.28825703263282776, + "step": 10997 + }, + { + "epoch": 7.605809128630705, + "grad_norm": 6.447854995727539, + "learning_rate": 1.3301060396496081e-05, + "log_odds_chosen": 9.736894607543945, + "log_odds_ratio": -0.0004386040091048926, + "logits/chosen": -0.4050842225551605, + "logits/rejected": -0.40405207872390747, + "logps/chosen": -0.00036903645377606153, + "logps/rejected": -1.3877718448638916, + "loss": 0.3477, + "nll_loss": 0.08688089996576309, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.690364246722311e-05, + "rewards/margins": 0.13874028623104095, + "rewards/rejected": -0.13877719640731812, + "step": 10998 + }, + { + "epoch": 7.606500691562932, + "grad_norm": 6.765289306640625, + "learning_rate": 1.3297218380205934e-05, + "log_odds_chosen": 10.077953338623047, + "log_odds_ratio": -0.000245953124249354, + "logits/chosen": 0.016869522631168365, + "logits/rejected": -0.13176044821739197, + "logps/chosen": -0.000427676597610116, + "logps/rejected": -1.8634501695632935, + "loss": 0.5673, + "nll_loss": 0.1417999118566513, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2767656850628555e-05, + "rewards/margins": 0.18630225956439972, + "rewards/rejected": -0.18634501099586487, + "step": 10999 + }, + { + "epoch": 7.607192254495159, + "grad_norm": 5.110645771026611, + "learning_rate": 1.3293376363915783e-05, + "log_odds_chosen": 11.453241348266602, + "log_odds_ratio": -1.5676314433221705e-05, + "logits/chosen": -0.6428710222244263, + "logits/rejected": -0.628454327583313, + "logps/chosen": -0.00034174363827332854, + "logps/rejected": -2.8143365383148193, + "loss": 0.4801, + "nll_loss": 0.12001337110996246, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.417436528252438e-05, + "rewards/margins": 0.2813994884490967, + "rewards/rejected": -0.281433641910553, + "step": 11000 + }, + { + "epoch": 7.6078838174273855, + "grad_norm": 4.963628768920898, + "learning_rate": 1.3289534347625635e-05, + "log_odds_chosen": 10.312638282775879, + "log_odds_ratio": -0.00037841207813471556, + "logits/chosen": -0.15952733159065247, + "logits/rejected": -0.047689080238342285, + "logps/chosen": -0.000430744286859408, + "logps/rejected": -1.889266848564148, + "loss": 0.5159, + "nll_loss": 0.12893825769424438, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3074429413536564e-05, + "rewards/margins": 0.18888360261917114, + "rewards/rejected": -0.18892668187618256, + "step": 11001 + }, + { + "epoch": 7.608575380359612, + "grad_norm": 4.401406288146973, + "learning_rate": 1.3285692331335486e-05, + "log_odds_chosen": 11.755447387695312, + "log_odds_ratio": -3.4304284781683236e-05, + "logits/chosen": -0.5623564720153809, + "logits/rejected": -0.5761032104492188, + "logps/chosen": -0.00023698658333159983, + "logps/rejected": -2.986236095428467, + "loss": 0.4556, + "nll_loss": 0.11389949917793274, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3698659788351506e-05, + "rewards/margins": 0.29859989881515503, + "rewards/rejected": -0.29862362146377563, + "step": 11002 + }, + { + "epoch": 7.609266943291839, + "grad_norm": 4.843077182769775, + "learning_rate": 1.3281850315045335e-05, + "log_odds_chosen": 10.616469383239746, + "log_odds_ratio": -6.649021815974265e-05, + "logits/chosen": -0.30027931928634644, + "logits/rejected": -0.256147176027298, + "logps/chosen": -0.00033794311457313597, + "logps/rejected": -2.2427988052368164, + "loss": 0.4738, + "nll_loss": 0.11844848841428757, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.379431291250512e-05, + "rewards/margins": 0.224246084690094, + "rewards/rejected": -0.22427986562252045, + "step": 11003 + }, + { + "epoch": 7.609958506224066, + "grad_norm": 16.88494873046875, + "learning_rate": 1.3278008298755187e-05, + "log_odds_chosen": 10.293458938598633, + "log_odds_ratio": -0.001284759258851409, + "logits/chosen": -0.34886935353279114, + "logits/rejected": -0.31973719596862793, + "logps/chosen": -0.0020102402195334435, + "logps/rejected": -2.528747797012329, + "loss": 0.3658, + "nll_loss": 0.09131994843482971, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00020102402777411044, + "rewards/margins": 0.25267377495765686, + "rewards/rejected": -0.25287479162216187, + "step": 11004 + }, + { + "epoch": 7.610650069156293, + "grad_norm": 4.845985412597656, + "learning_rate": 1.327416628246504e-05, + "log_odds_chosen": 10.976385116577148, + "log_odds_ratio": -8.727479143999517e-05, + "logits/chosen": -0.379669189453125, + "logits/rejected": -0.5029025077819824, + "logps/chosen": -0.0001920202048495412, + "logps/rejected": -1.9614152908325195, + "loss": 0.3782, + "nll_loss": 0.09454361349344254, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9202019757358357e-05, + "rewards/margins": 0.19612233340740204, + "rewards/rejected": -0.1961415410041809, + "step": 11005 + }, + { + "epoch": 7.61134163208852, + "grad_norm": 3.453927993774414, + "learning_rate": 1.3270324266174889e-05, + "log_odds_chosen": 11.28010368347168, + "log_odds_ratio": -4.166722283116542e-05, + "logits/chosen": -0.3787435293197632, + "logits/rejected": -0.456486314535141, + "logps/chosen": -0.0001853770372690633, + "logps/rejected": -2.5516510009765625, + "loss": 0.2239, + "nll_loss": 0.05597200244665146, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.853770481829997e-05, + "rewards/margins": 0.2551465630531311, + "rewards/rejected": -0.25516510009765625, + "step": 11006 + }, + { + "epoch": 7.6120331950207465, + "grad_norm": 4.172607421875, + "learning_rate": 1.326648224988474e-05, + "log_odds_chosen": 10.891128540039062, + "log_odds_ratio": -2.8951366402907297e-05, + "logits/chosen": -0.4312208294868469, + "logits/rejected": -0.48598796129226685, + "logps/chosen": -0.00013583191321231425, + "logps/rejected": -1.9276304244995117, + "loss": 0.4674, + "nll_loss": 0.11685236543416977, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3583192412625067e-05, + "rewards/margins": 0.19274947047233582, + "rewards/rejected": -0.1927630603313446, + "step": 11007 + }, + { + "epoch": 7.612724757952973, + "grad_norm": 4.176784515380859, + "learning_rate": 1.3262640233594592e-05, + "log_odds_chosen": 10.976896286010742, + "log_odds_ratio": -0.00014842335076536983, + "logits/chosen": -0.16801583766937256, + "logits/rejected": -0.2282327264547348, + "logps/chosen": -0.004101084545254707, + "logps/rejected": -2.81337833404541, + "loss": 0.461, + "nll_loss": 0.11524462699890137, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004101084778085351, + "rewards/margins": 0.28092771768569946, + "rewards/rejected": -0.28133782744407654, + "step": 11008 + }, + { + "epoch": 7.6134163208852, + "grad_norm": 6.124029636383057, + "learning_rate": 1.3258798217304441e-05, + "log_odds_chosen": 10.99689769744873, + "log_odds_ratio": -3.287247818661854e-05, + "logits/chosen": -0.6622257232666016, + "logits/rejected": -0.7392969131469727, + "logps/chosen": -0.00018039315182249993, + "logps/rejected": -1.7692515850067139, + "loss": 0.3868, + "nll_loss": 0.09669550508260727, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8039316273643635e-05, + "rewards/margins": 0.17690712213516235, + "rewards/rejected": -0.1769251525402069, + "step": 11009 + }, + { + "epoch": 7.614107883817427, + "grad_norm": 5.059366703033447, + "learning_rate": 1.3254956201014294e-05, + "log_odds_chosen": 12.686284065246582, + "log_odds_ratio": -7.265820840984816e-06, + "logits/chosen": -0.4957124888896942, + "logits/rejected": -0.509590744972229, + "logps/chosen": -9.553524432703853e-05, + "logps/rejected": -3.3620762825012207, + "loss": 0.4987, + "nll_loss": 0.12466945499181747, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.553524250804912e-06, + "rewards/margins": 0.3361980617046356, + "rewards/rejected": -0.33620762825012207, + "step": 11010 + }, + { + "epoch": 7.614799446749654, + "grad_norm": 4.135800361633301, + "learning_rate": 1.3251114184724144e-05, + "log_odds_chosen": 9.902254104614258, + "log_odds_ratio": -0.0010262223659083247, + "logits/chosen": -0.37849074602127075, + "logits/rejected": -0.48255205154418945, + "logps/chosen": -0.0004565394192468375, + "logps/rejected": -1.282253623008728, + "loss": 0.5531, + "nll_loss": 0.1381729543209076, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.565394192468375e-05, + "rewards/margins": 0.12817969918251038, + "rewards/rejected": -0.12822535634040833, + "step": 11011 + }, + { + "epoch": 7.615491009681881, + "grad_norm": 6.363430976867676, + "learning_rate": 1.3247272168433993e-05, + "log_odds_chosen": 10.299551010131836, + "log_odds_ratio": -5.6483760999981314e-05, + "logits/chosen": -0.4059584438800812, + "logits/rejected": -0.4429929852485657, + "logps/chosen": -0.00020562093413900584, + "logps/rejected": -1.7810602188110352, + "loss": 0.6852, + "nll_loss": 0.1712915003299713, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0562092686304823e-05, + "rewards/margins": 0.17808546125888824, + "rewards/rejected": -0.17810603976249695, + "step": 11012 + }, + { + "epoch": 7.6161825726141075, + "grad_norm": 7.91430139541626, + "learning_rate": 1.3243430152143846e-05, + "log_odds_chosen": 11.475375175476074, + "log_odds_ratio": -3.780444239964709e-05, + "logits/chosen": -0.5700872540473938, + "logits/rejected": -0.6604025363922119, + "logps/chosen": -0.0003129754331894219, + "logps/rejected": -3.1353888511657715, + "loss": 0.3652, + "nll_loss": 0.09129648655653, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.129754259134643e-05, + "rewards/margins": 0.3135075569152832, + "rewards/rejected": -0.31353887915611267, + "step": 11013 + }, + { + "epoch": 7.616874135546334, + "grad_norm": 5.952118396759033, + "learning_rate": 1.3239588135853698e-05, + "log_odds_chosen": 9.803913116455078, + "log_odds_ratio": -0.00028944603400304914, + "logits/chosen": -0.25628095865249634, + "logits/rejected": -0.29409071803092957, + "logps/chosen": -0.00030227593379095197, + "logps/rejected": -1.7432351112365723, + "loss": 0.792, + "nll_loss": 0.19796809554100037, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.022759483428672e-05, + "rewards/margins": 0.17429327964782715, + "rewards/rejected": -0.17432349920272827, + "step": 11014 + }, + { + "epoch": 7.617565698478561, + "grad_norm": 8.945135116577148, + "learning_rate": 1.3235746119563547e-05, + "log_odds_chosen": 11.400367736816406, + "log_odds_ratio": -1.497354060120415e-05, + "logits/chosen": -0.6030562520027161, + "logits/rejected": -0.6528980731964111, + "logps/chosen": -0.0002069780748570338, + "logps/rejected": -2.4284608364105225, + "loss": 0.5021, + "nll_loss": 0.12551532685756683, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.069780748570338e-05, + "rewards/margins": 0.24282538890838623, + "rewards/rejected": -0.24284610152244568, + "step": 11015 + }, + { + "epoch": 7.618257261410788, + "grad_norm": 5.686136722564697, + "learning_rate": 1.3231904103273398e-05, + "log_odds_chosen": 11.649585723876953, + "log_odds_ratio": -2.029037023021374e-05, + "logits/chosen": -0.46048489212989807, + "logits/rejected": -0.606641411781311, + "logps/chosen": -6.944908091099933e-05, + "logps/rejected": -2.059354543685913, + "loss": 0.3509, + "nll_loss": 0.08772450685501099, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.9449079092009924e-06, + "rewards/margins": 0.2059285193681717, + "rewards/rejected": -0.20593544840812683, + "step": 11016 + }, + { + "epoch": 7.618948824343015, + "grad_norm": 4.732438564300537, + "learning_rate": 1.322806208698325e-05, + "log_odds_chosen": 9.011289596557617, + "log_odds_ratio": -0.11389197409152985, + "logits/chosen": -0.37712395191192627, + "logits/rejected": -0.46562129259109497, + "logps/chosen": -0.02596820518374443, + "logps/rejected": -1.6240522861480713, + "loss": 0.7094, + "nll_loss": 0.16597187519073486, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0025968204718083143, + "rewards/margins": 0.15980841219425201, + "rewards/rejected": -0.16240522265434265, + "step": 11017 + }, + { + "epoch": 7.619640387275242, + "grad_norm": 5.688709259033203, + "learning_rate": 1.32242200706931e-05, + "log_odds_chosen": 11.936830520629883, + "log_odds_ratio": -1.1859597179864068e-05, + "logits/chosen": -0.36379367113113403, + "logits/rejected": -0.5231572389602661, + "logps/chosen": -8.398642967222258e-05, + "logps/rejected": -2.399197578430176, + "loss": 0.6543, + "nll_loss": 0.16357439756393433, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.39864333102014e-06, + "rewards/margins": 0.23991134762763977, + "rewards/rejected": -0.2399197369813919, + "step": 11018 + }, + { + "epoch": 7.6203319502074685, + "grad_norm": 7.055410385131836, + "learning_rate": 1.3220378054402952e-05, + "log_odds_chosen": 11.101515769958496, + "log_odds_ratio": -0.00024681005743332207, + "logits/chosen": -0.38099271059036255, + "logits/rejected": -0.40887144207954407, + "logps/chosen": -0.00039470737101510167, + "logps/rejected": -3.0094618797302246, + "loss": 0.597, + "nll_loss": 0.1492285132408142, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.947073855670169e-05, + "rewards/margins": 0.3009067475795746, + "rewards/rejected": -0.3009462058544159, + "step": 11019 + }, + { + "epoch": 7.621023513139695, + "grad_norm": 4.223811626434326, + "learning_rate": 1.3216536038112803e-05, + "log_odds_chosen": 11.240891456604004, + "log_odds_ratio": -0.00010240564733976498, + "logits/chosen": -0.3982436954975128, + "logits/rejected": -0.43361055850982666, + "logps/chosen": -0.00044147035805508494, + "logps/rejected": -2.726694107055664, + "loss": 0.5172, + "nll_loss": 0.1292930543422699, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4147036533104256e-05, + "rewards/margins": 0.27262526750564575, + "rewards/rejected": -0.2726694345474243, + "step": 11020 + }, + { + "epoch": 7.621715076071922, + "grad_norm": 4.663049697875977, + "learning_rate": 1.3212694021822652e-05, + "log_odds_chosen": 10.073478698730469, + "log_odds_ratio": -0.0008136788383126259, + "logits/chosen": -0.33111122250556946, + "logits/rejected": -0.4370899498462677, + "logps/chosen": -0.0008942785789258778, + "logps/rejected": -1.933433175086975, + "loss": 0.9654, + "nll_loss": 0.24126268923282623, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.942785643739626e-05, + "rewards/margins": 0.19325390458106995, + "rewards/rejected": -0.19334332644939423, + "step": 11021 + }, + { + "epoch": 7.622406639004149, + "grad_norm": 3.3118700981140137, + "learning_rate": 1.3208852005532504e-05, + "log_odds_chosen": 10.24764633178711, + "log_odds_ratio": -0.0003374480293132365, + "logits/chosen": -0.3461621105670929, + "logits/rejected": -0.29326191544532776, + "logps/chosen": -0.0007046432583592832, + "logps/rejected": -1.8110865354537964, + "loss": 0.3827, + "nll_loss": 0.09563969075679779, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.04643243807368e-05, + "rewards/margins": 0.18103818595409393, + "rewards/rejected": -0.18110865354537964, + "step": 11022 + }, + { + "epoch": 7.623098201936376, + "grad_norm": 32.8052864074707, + "learning_rate": 1.3205009989242357e-05, + "log_odds_chosen": 11.21299934387207, + "log_odds_ratio": -2.9290804377524182e-05, + "logits/chosen": 0.08305463194847107, + "logits/rejected": -0.015703324228525162, + "logps/chosen": -0.00022970230202190578, + "logps/rejected": -2.0762534141540527, + "loss": 0.6082, + "nll_loss": 0.15203939378261566, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.297023092978634e-05, + "rewards/margins": 0.207602396607399, + "rewards/rejected": -0.2076253592967987, + "step": 11023 + }, + { + "epoch": 7.623789764868603, + "grad_norm": 4.772336959838867, + "learning_rate": 1.3201167972952206e-05, + "log_odds_chosen": 11.075516700744629, + "log_odds_ratio": -0.00023821770446375012, + "logits/chosen": -0.2510223090648651, + "logits/rejected": -0.28794288635253906, + "logps/chosen": -0.00033794849878177047, + "logps/rejected": -2.5776126384735107, + "loss": 0.5069, + "nll_loss": 0.1267106682062149, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.379485133336857e-05, + "rewards/margins": 0.2577274441719055, + "rewards/rejected": -0.25776124000549316, + "step": 11024 + }, + { + "epoch": 7.624481327800829, + "grad_norm": 7.352637767791748, + "learning_rate": 1.3197325956662057e-05, + "log_odds_chosen": 10.048306465148926, + "log_odds_ratio": -0.0002940360573120415, + "logits/chosen": -0.5171242952346802, + "logits/rejected": -0.6400086283683777, + "logps/chosen": -0.0003537190204951912, + "logps/rejected": -1.8638750314712524, + "loss": 0.6937, + "nll_loss": 0.1733952760696411, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5371904232306406e-05, + "rewards/margins": 0.18635213375091553, + "rewards/rejected": -0.18638749420642853, + "step": 11025 + }, + { + "epoch": 7.625172890733056, + "grad_norm": 5.062249660491943, + "learning_rate": 1.3193483940371909e-05, + "log_odds_chosen": 11.70677375793457, + "log_odds_ratio": -2.8659145755227655e-05, + "logits/chosen": -0.5007099509239197, + "logits/rejected": -0.5252422094345093, + "logps/chosen": -0.00022100911883171648, + "logps/rejected": -2.3927669525146484, + "loss": 0.458, + "nll_loss": 0.1145017147064209, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2100914065958932e-05, + "rewards/margins": 0.23925460875034332, + "rewards/rejected": -0.2392767071723938, + "step": 11026 + }, + { + "epoch": 7.625864453665283, + "grad_norm": 3.744673252105713, + "learning_rate": 1.3189641924081758e-05, + "log_odds_chosen": 10.837970733642578, + "log_odds_ratio": -0.00011957847164012492, + "logits/chosen": -0.12889793515205383, + "logits/rejected": -0.14431166648864746, + "logps/chosen": -0.00019565450202208012, + "logps/rejected": -1.9157960414886475, + "loss": 0.3511, + "nll_loss": 0.08775661885738373, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.956544838321861e-05, + "rewards/margins": 0.1915600299835205, + "rewards/rejected": -0.19157959520816803, + "step": 11027 + }, + { + "epoch": 7.62655601659751, + "grad_norm": 5.839942455291748, + "learning_rate": 1.318579990779161e-05, + "log_odds_chosen": 11.359109878540039, + "log_odds_ratio": -3.5803877835860476e-05, + "logits/chosen": -0.36645859479904175, + "logits/rejected": -0.45863598585128784, + "logps/chosen": -0.00030411925399675965, + "logps/rejected": -2.276093006134033, + "loss": 0.3209, + "nll_loss": 0.0802203118801117, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0411923944484442e-05, + "rewards/margins": 0.22757890820503235, + "rewards/rejected": -0.2276093065738678, + "step": 11028 + }, + { + "epoch": 7.627247579529737, + "grad_norm": 6.054105281829834, + "learning_rate": 1.3181957891501461e-05, + "log_odds_chosen": 9.636726379394531, + "log_odds_ratio": -0.0002913882490247488, + "logits/chosen": -0.4848182201385498, + "logits/rejected": -0.537946343421936, + "logps/chosen": -0.0012282358948141336, + "logps/rejected": -1.7565040588378906, + "loss": 0.5026, + "nll_loss": 0.1256217062473297, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001228235923917964, + "rewards/margins": 0.17552758753299713, + "rewards/rejected": -0.17565041780471802, + "step": 11029 + }, + { + "epoch": 7.627939142461964, + "grad_norm": 3.5452024936676025, + "learning_rate": 1.317811587521131e-05, + "log_odds_chosen": 10.0420560836792, + "log_odds_ratio": -0.000112081368570216, + "logits/chosen": -0.17948752641677856, + "logits/rejected": -0.3174286484718323, + "logps/chosen": -0.00016764186148066074, + "logps/rejected": -1.5102574825286865, + "loss": 0.3552, + "nll_loss": 0.08878683298826218, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6764186511863954e-05, + "rewards/margins": 0.1510089933872223, + "rewards/rejected": -0.15102574229240417, + "step": 11030 + }, + { + "epoch": 7.62863070539419, + "grad_norm": 6.366281986236572, + "learning_rate": 1.3174273858921163e-05, + "log_odds_chosen": 10.957046508789062, + "log_odds_ratio": -9.888163913274184e-05, + "logits/chosen": -0.3846125602722168, + "logits/rejected": -0.5001208186149597, + "logps/chosen": -0.0010962296510115266, + "logps/rejected": -2.7141101360321045, + "loss": 0.463, + "nll_loss": 0.11573092639446259, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001096229680115357, + "rewards/margins": 0.27130138874053955, + "rewards/rejected": -0.2714110314846039, + "step": 11031 + }, + { + "epoch": 7.629322268326417, + "grad_norm": 3.164262533187866, + "learning_rate": 1.3170431842631015e-05, + "log_odds_chosen": 10.120061874389648, + "log_odds_ratio": -5.873154805158265e-05, + "logits/chosen": -0.38629233837127686, + "logits/rejected": -0.45555785298347473, + "logps/chosen": -0.0002465298166498542, + "logps/rejected": -1.6912938356399536, + "loss": 0.3558, + "nll_loss": 0.0889514684677124, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.465298166498542e-05, + "rewards/margins": 0.16910472512245178, + "rewards/rejected": -0.1691294014453888, + "step": 11032 + }, + { + "epoch": 7.630013831258644, + "grad_norm": 5.1167497634887695, + "learning_rate": 1.3166589826340864e-05, + "log_odds_chosen": 10.183965682983398, + "log_odds_ratio": -0.0002524867304600775, + "logits/chosen": -0.28899770975112915, + "logits/rejected": -0.23264774680137634, + "logps/chosen": -0.0006701169768348336, + "logps/rejected": -2.2900922298431396, + "loss": 0.3546, + "nll_loss": 0.08861719071865082, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.701170059386641e-05, + "rewards/margins": 0.22894223034381866, + "rewards/rejected": -0.2290092408657074, + "step": 11033 + }, + { + "epoch": 7.630705394190871, + "grad_norm": 6.881916046142578, + "learning_rate": 1.3162747810050715e-05, + "log_odds_chosen": 11.365351676940918, + "log_odds_ratio": -0.0002513904182706028, + "logits/chosen": -0.2015291452407837, + "logits/rejected": -0.4687212109565735, + "logps/chosen": -0.000261218985542655, + "logps/rejected": -2.440117835998535, + "loss": 0.4145, + "nll_loss": 0.10359037667512894, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.612189928186126e-05, + "rewards/margins": 0.24398568272590637, + "rewards/rejected": -0.2440118044614792, + "step": 11034 + }, + { + "epoch": 7.631396957123098, + "grad_norm": 3.9592254161834717, + "learning_rate": 1.3158905793760567e-05, + "log_odds_chosen": 11.326955795288086, + "log_odds_ratio": -0.00016150146257132292, + "logits/chosen": -0.5032748579978943, + "logits/rejected": -0.6475515961647034, + "logps/chosen": -0.00020860202494077384, + "logps/rejected": -2.3688547611236572, + "loss": 0.3881, + "nll_loss": 0.09700487554073334, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0860203221673146e-05, + "rewards/margins": 0.2368645817041397, + "rewards/rejected": -0.2368854582309723, + "step": 11035 + }, + { + "epoch": 7.632088520055325, + "grad_norm": 33.969703674316406, + "learning_rate": 1.3155063777470417e-05, + "log_odds_chosen": 11.514963150024414, + "log_odds_ratio": -0.00013620522804558277, + "logits/chosen": -0.34811991453170776, + "logits/rejected": -0.2632172405719757, + "logps/chosen": -0.0005448471638374031, + "logps/rejected": -2.7535440921783447, + "loss": 0.4057, + "nll_loss": 0.10140715539455414, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4484720749314874e-05, + "rewards/margins": 0.27529993653297424, + "rewards/rejected": -0.27535441517829895, + "step": 11036 + }, + { + "epoch": 7.632780082987551, + "grad_norm": 4.854983329772949, + "learning_rate": 1.3151221761180269e-05, + "log_odds_chosen": 9.725920677185059, + "log_odds_ratio": -0.0005855665076524019, + "logits/chosen": -0.6156142950057983, + "logits/rejected": -0.7673656940460205, + "logps/chosen": -0.0009282439714297652, + "logps/rejected": -1.8330867290496826, + "loss": 0.5263, + "nll_loss": 0.13151204586029053, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.282439714297652e-05, + "rewards/margins": 0.18321585655212402, + "rewards/rejected": -0.1833086907863617, + "step": 11037 + }, + { + "epoch": 7.633471645919778, + "grad_norm": 4.63347864151001, + "learning_rate": 1.3147379744890118e-05, + "log_odds_chosen": 10.164543151855469, + "log_odds_ratio": -0.00015178456669673324, + "logits/chosen": -0.4672108292579651, + "logits/rejected": -0.455607533454895, + "logps/chosen": -0.0006099496386013925, + "logps/rejected": -1.7959169149398804, + "loss": 0.6339, + "nll_loss": 0.1584549993276596, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.099496386013925e-05, + "rewards/margins": 0.17953070998191833, + "rewards/rejected": -0.17959168553352356, + "step": 11038 + }, + { + "epoch": 7.634163208852005, + "grad_norm": 3.911747455596924, + "learning_rate": 1.3143537728599969e-05, + "log_odds_chosen": 11.236038208007812, + "log_odds_ratio": -2.346294786548242e-05, + "logits/chosen": -0.409637987613678, + "logits/rejected": -0.37441956996917725, + "logps/chosen": -0.000681709498167038, + "logps/rejected": -2.959486961364746, + "loss": 0.5466, + "nll_loss": 0.13664905726909637, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.81709498167038e-05, + "rewards/margins": 0.2958804965019226, + "rewards/rejected": -0.29594868421554565, + "step": 11039 + }, + { + "epoch": 7.634854771784232, + "grad_norm": 3.748584747314453, + "learning_rate": 1.3139695712309821e-05, + "log_odds_chosen": 11.168519020080566, + "log_odds_ratio": -8.183128375094384e-05, + "logits/chosen": -0.16099804639816284, + "logits/rejected": -0.1140536293387413, + "logps/chosen": -0.000173651977092959, + "logps/rejected": -2.3219733238220215, + "loss": 0.3957, + "nll_loss": 0.09892675280570984, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.736519880068954e-05, + "rewards/margins": 0.23217995464801788, + "rewards/rejected": -0.2321973294019699, + "step": 11040 + }, + { + "epoch": 7.635546334716459, + "grad_norm": 8.52940559387207, + "learning_rate": 1.313585369601967e-05, + "log_odds_chosen": 9.946934700012207, + "log_odds_ratio": -0.00021104965708218515, + "logits/chosen": -0.5072532892227173, + "logits/rejected": -0.5420174598693848, + "logps/chosen": -0.0002429535670671612, + "logps/rejected": -1.3531861305236816, + "loss": 0.4053, + "nll_loss": 0.10131174325942993, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4295355615322478e-05, + "rewards/margins": 0.13529431819915771, + "rewards/rejected": -0.1353186070919037, + "step": 11041 + }, + { + "epoch": 7.6362378976486855, + "grad_norm": 4.192775249481201, + "learning_rate": 1.3132011679729523e-05, + "log_odds_chosen": 9.611391067504883, + "log_odds_ratio": -0.0006527138175442815, + "logits/chosen": -0.23275458812713623, + "logits/rejected": -0.29491573572158813, + "logps/chosen": -0.0010348953073844314, + "logps/rejected": -1.9139440059661865, + "loss": 0.4315, + "nll_loss": 0.10780539363622665, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010348952491767704, + "rewards/margins": 0.19129091501235962, + "rewards/rejected": -0.1913943886756897, + "step": 11042 + }, + { + "epoch": 7.636929460580912, + "grad_norm": 4.465821266174316, + "learning_rate": 1.3128169663439373e-05, + "log_odds_chosen": 9.13964557647705, + "log_odds_ratio": -0.00025269301841035485, + "logits/chosen": -0.5438176989555359, + "logits/rejected": -0.5296689867973328, + "logps/chosen": -0.0004898920306004584, + "logps/rejected": -1.6209301948547363, + "loss": 0.484, + "nll_loss": 0.12096859514713287, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.898920451523736e-05, + "rewards/margins": 0.16204401850700378, + "rewards/rejected": -0.16209301352500916, + "step": 11043 + }, + { + "epoch": 7.637621023513139, + "grad_norm": 3.129265546798706, + "learning_rate": 1.3124327647149223e-05, + "log_odds_chosen": 11.348709106445312, + "log_odds_ratio": -1.561789758852683e-05, + "logits/chosen": -0.549349308013916, + "logits/rejected": -0.5514668226242065, + "logps/chosen": -8.251456165453419e-05, + "logps/rejected": -1.7832891941070557, + "loss": 0.3419, + "nll_loss": 0.08547525107860565, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.251457074948121e-06, + "rewards/margins": 0.17832067608833313, + "rewards/rejected": -0.17832891643047333, + "step": 11044 + }, + { + "epoch": 7.638312586445366, + "grad_norm": 6.4049224853515625, + "learning_rate": 1.3120485630859075e-05, + "log_odds_chosen": 10.522080421447754, + "log_odds_ratio": -9.593094728188589e-05, + "logits/chosen": -0.32245880365371704, + "logits/rejected": -0.2920140326023102, + "logps/chosen": -0.0004965736879967153, + "logps/rejected": -1.7941436767578125, + "loss": 0.527, + "nll_loss": 0.13173790276050568, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9657366616884246e-05, + "rewards/margins": 0.17936471104621887, + "rewards/rejected": -0.17941437661647797, + "step": 11045 + }, + { + "epoch": 7.639004149377593, + "grad_norm": 4.091948509216309, + "learning_rate": 1.3116643614568927e-05, + "log_odds_chosen": 9.834325790405273, + "log_odds_ratio": -0.0001687437470536679, + "logits/chosen": -0.7652751207351685, + "logits/rejected": -0.6992076635360718, + "logps/chosen": -0.0004149047308601439, + "logps/rejected": -1.7303555011749268, + "loss": 0.6693, + "nll_loss": 0.16730302572250366, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.149047163082287e-05, + "rewards/margins": 0.17299406230449677, + "rewards/rejected": -0.17303556203842163, + "step": 11046 + }, + { + "epoch": 7.63969571230982, + "grad_norm": 5.325662612915039, + "learning_rate": 1.3112801598278776e-05, + "log_odds_chosen": 11.176193237304688, + "log_odds_ratio": -0.00010870936966966838, + "logits/chosen": -0.8514454960823059, + "logits/rejected": -0.8901273012161255, + "logps/chosen": -0.0001286338228965178, + "logps/rejected": -2.1061313152313232, + "loss": 0.6168, + "nll_loss": 0.15418866276741028, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.28633819258539e-05, + "rewards/margins": 0.21060027182102203, + "rewards/rejected": -0.21061314642429352, + "step": 11047 + }, + { + "epoch": 7.6403872752420465, + "grad_norm": 5.81463098526001, + "learning_rate": 1.3108959581988627e-05, + "log_odds_chosen": 11.00114917755127, + "log_odds_ratio": -3.155127342324704e-05, + "logits/chosen": -0.3544766306877136, + "logits/rejected": -0.32276397943496704, + "logps/chosen": -0.0002679823955986649, + "logps/rejected": -2.583901882171631, + "loss": 0.4699, + "nll_loss": 0.11747168749570847, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6798239559866488e-05, + "rewards/margins": 0.25836339592933655, + "rewards/rejected": -0.2583901882171631, + "step": 11048 + }, + { + "epoch": 7.641078838174274, + "grad_norm": 5.353766441345215, + "learning_rate": 1.310511756569848e-05, + "log_odds_chosen": 11.442070007324219, + "log_odds_ratio": -1.775973942130804e-05, + "logits/chosen": -0.6793317794799805, + "logits/rejected": -0.642096996307373, + "logps/chosen": -0.0002000771783059463, + "logps/rejected": -2.514169692993164, + "loss": 0.5048, + "nll_loss": 0.12619104981422424, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.000771746679675e-05, + "rewards/margins": 0.25139695405960083, + "rewards/rejected": -0.251416951417923, + "step": 11049 + }, + { + "epoch": 7.641770401106501, + "grad_norm": 4.452378273010254, + "learning_rate": 1.3101275549408329e-05, + "log_odds_chosen": 10.53568172454834, + "log_odds_ratio": -6.396832759492099e-05, + "logits/chosen": 0.1380060613155365, + "logits/rejected": 0.048368752002716064, + "logps/chosen": -0.00042815087363123894, + "logps/rejected": -2.1110527515411377, + "loss": 0.5971, + "nll_loss": 0.14927981793880463, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.281508518033661e-05, + "rewards/margins": 0.21106243133544922, + "rewards/rejected": -0.21110525727272034, + "step": 11050 + }, + { + "epoch": 7.642461964038728, + "grad_norm": 5.398094654083252, + "learning_rate": 1.3097433533118181e-05, + "log_odds_chosen": 10.258386611938477, + "log_odds_ratio": -0.00016224366845563054, + "logits/chosen": -0.8140449523925781, + "logits/rejected": -0.8388763070106506, + "logps/chosen": -0.00026844540843740106, + "logps/rejected": -2.0129921436309814, + "loss": 0.5928, + "nll_loss": 0.1481805443763733, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.684454375412315e-05, + "rewards/margins": 0.2012723684310913, + "rewards/rejected": -0.20129922032356262, + "step": 11051 + }, + { + "epoch": 7.643153526970955, + "grad_norm": 4.131388187408447, + "learning_rate": 1.3093591516828032e-05, + "log_odds_chosen": 11.63953971862793, + "log_odds_ratio": -2.8737176762660965e-05, + "logits/chosen": -0.29496562480926514, + "logits/rejected": -0.39437806606292725, + "logps/chosen": -0.00011601299775065854, + "logps/rejected": -2.398297071456909, + "loss": 0.7148, + "nll_loss": 0.17868834733963013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1601299775065854e-05, + "rewards/margins": 0.23981809616088867, + "rewards/rejected": -0.23982968926429749, + "step": 11052 + }, + { + "epoch": 7.643845089903182, + "grad_norm": 3.8487603664398193, + "learning_rate": 1.3089749500537881e-05, + "log_odds_chosen": 10.986795425415039, + "log_odds_ratio": -0.00010452770220581442, + "logits/chosen": -0.11752845346927643, + "logits/rejected": -0.21908220648765564, + "logps/chosen": -0.0001764098706189543, + "logps/rejected": -2.1109352111816406, + "loss": 0.4098, + "nll_loss": 0.10244373977184296, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.764098669809755e-05, + "rewards/margins": 0.21107587218284607, + "rewards/rejected": -0.21109351515769958, + "step": 11053 + }, + { + "epoch": 7.644536652835408, + "grad_norm": 5.135563850402832, + "learning_rate": 1.3085907484247733e-05, + "log_odds_chosen": 10.207877159118652, + "log_odds_ratio": -0.0003905233461409807, + "logits/chosen": -0.8292889595031738, + "logits/rejected": -0.7769272327423096, + "logps/chosen": -0.0012153888819739223, + "logps/rejected": -2.044013261795044, + "loss": 0.4407, + "nll_loss": 0.1101444885134697, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012153889110777527, + "rewards/margins": 0.20427978038787842, + "rewards/rejected": -0.20440131425857544, + "step": 11054 + }, + { + "epoch": 7.645228215767635, + "grad_norm": 3.8269355297088623, + "learning_rate": 1.3082065467957586e-05, + "log_odds_chosen": 11.03348159790039, + "log_odds_ratio": -3.061819006688893e-05, + "logits/chosen": -0.3585564196109772, + "logits/rejected": -0.18128502368927002, + "logps/chosen": -0.00017354279407300055, + "logps/rejected": -1.9331927299499512, + "loss": 0.4084, + "nll_loss": 0.10210368037223816, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7354279407300055e-05, + "rewards/margins": 0.19330193102359772, + "rewards/rejected": -0.19331926107406616, + "step": 11055 + }, + { + "epoch": 7.645919778699862, + "grad_norm": 3.101496696472168, + "learning_rate": 1.3078223451667435e-05, + "log_odds_chosen": 9.86187744140625, + "log_odds_ratio": -0.0008957652025856078, + "logits/chosen": -0.471457839012146, + "logits/rejected": -0.4430035650730133, + "logps/chosen": -0.0008445671992376447, + "logps/rejected": -1.6268866062164307, + "loss": 0.4819, + "nll_loss": 0.12039318680763245, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.445671846857294e-05, + "rewards/margins": 0.1626042127609253, + "rewards/rejected": -0.16268867254257202, + "step": 11056 + }, + { + "epoch": 7.646611341632089, + "grad_norm": 5.060888290405273, + "learning_rate": 1.3074381435377286e-05, + "log_odds_chosen": 10.043825149536133, + "log_odds_ratio": -0.00010040355118690059, + "logits/chosen": -0.6101137399673462, + "logits/rejected": -0.6584270000457764, + "logps/chosen": -0.0004174050991423428, + "logps/rejected": -1.608933687210083, + "loss": 0.2704, + "nll_loss": 0.06759215891361237, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.174050991423428e-05, + "rewards/margins": 0.1608516275882721, + "rewards/rejected": -0.16089335083961487, + "step": 11057 + }, + { + "epoch": 7.647302904564316, + "grad_norm": 4.368715763092041, + "learning_rate": 1.3070539419087138e-05, + "log_odds_chosen": 10.287227630615234, + "log_odds_ratio": -4.808422090718523e-05, + "logits/chosen": -0.42402833700180054, + "logits/rejected": -0.5325243473052979, + "logps/chosen": -0.00013529349234886467, + "logps/rejected": -1.3623861074447632, + "loss": 0.2842, + "nll_loss": 0.0710451528429985, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3529348507290706e-05, + "rewards/margins": 0.13622508943080902, + "rewards/rejected": -0.13623861968517303, + "step": 11058 + }, + { + "epoch": 7.6479944674965425, + "grad_norm": 3.9085097312927246, + "learning_rate": 1.3066697402796987e-05, + "log_odds_chosen": 11.880556106567383, + "log_odds_ratio": -8.965845154307317e-06, + "logits/chosen": -0.4448636770248413, + "logits/rejected": -0.3722899854183197, + "logps/chosen": -8.700077160028741e-05, + "logps/rejected": -2.3147661685943604, + "loss": 0.4813, + "nll_loss": 0.1203346773982048, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.70007716002874e-06, + "rewards/margins": 0.23146793246269226, + "rewards/rejected": -0.23147661983966827, + "step": 11059 + }, + { + "epoch": 7.648686030428769, + "grad_norm": 5.653106212615967, + "learning_rate": 1.306285538650684e-05, + "log_odds_chosen": 11.682040214538574, + "log_odds_ratio": -4.3620006181299686e-05, + "logits/chosen": -0.41115790605545044, + "logits/rejected": -0.5715847015380859, + "logps/chosen": -0.00011102095595560968, + "logps/rejected": -2.4288294315338135, + "loss": 0.5793, + "nll_loss": 0.14481747150421143, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.110209632315673e-05, + "rewards/margins": 0.24287183582782745, + "rewards/rejected": -0.24288293719291687, + "step": 11060 + }, + { + "epoch": 7.649377593360996, + "grad_norm": 4.428610324859619, + "learning_rate": 1.305901337021669e-05, + "log_odds_chosen": 12.014104843139648, + "log_odds_ratio": -3.248677967349067e-05, + "logits/chosen": -0.6964359879493713, + "logits/rejected": -0.754778265953064, + "logps/chosen": -0.00016804641927592456, + "logps/rejected": -2.9685726165771484, + "loss": 0.4959, + "nll_loss": 0.12398052215576172, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6804642655188218e-05, + "rewards/margins": 0.2968404293060303, + "rewards/rejected": -0.29685723781585693, + "step": 11061 + }, + { + "epoch": 7.650069156293223, + "grad_norm": 6.771653652191162, + "learning_rate": 1.305517135392654e-05, + "log_odds_chosen": 11.188127517700195, + "log_odds_ratio": -2.3848771888879128e-05, + "logits/chosen": -0.6413849592208862, + "logits/rejected": -0.7468559145927429, + "logps/chosen": -0.00015722739044576883, + "logps/rejected": -2.141380548477173, + "loss": 0.5106, + "nll_loss": 0.1276474893093109, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5722740499768406e-05, + "rewards/margins": 0.21412234008312225, + "rewards/rejected": -0.21413806080818176, + "step": 11062 + }, + { + "epoch": 7.65076071922545, + "grad_norm": 3.573374032974243, + "learning_rate": 1.3051329337636392e-05, + "log_odds_chosen": 11.263134002685547, + "log_odds_ratio": -4.986676140106283e-05, + "logits/chosen": -0.48395946621894836, + "logits/rejected": -0.5471813082695007, + "logps/chosen": -0.0003003604360856116, + "logps/rejected": -2.517976999282837, + "loss": 0.388, + "nll_loss": 0.09699208289384842, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.003604433615692e-05, + "rewards/margins": 0.25176769495010376, + "rewards/rejected": -0.25179770588874817, + "step": 11063 + }, + { + "epoch": 7.651452282157677, + "grad_norm": 3.1509644985198975, + "learning_rate": 1.3047487321346244e-05, + "log_odds_chosen": 11.123980522155762, + "log_odds_ratio": -0.00010064549860544503, + "logits/chosen": -0.35980379581451416, + "logits/rejected": -0.4639556109905243, + "logps/chosen": -0.00015493386308662593, + "logps/rejected": -2.0041680335998535, + "loss": 0.3588, + "nll_loss": 0.08968466520309448, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5493385944864713e-05, + "rewards/margins": 0.20040130615234375, + "rewards/rejected": -0.20041680335998535, + "step": 11064 + }, + { + "epoch": 7.6521438450899035, + "grad_norm": 5.125320911407471, + "learning_rate": 1.3043645305056093e-05, + "log_odds_chosen": 10.726048469543457, + "log_odds_ratio": -0.00013175973435863853, + "logits/chosen": -0.6750519275665283, + "logits/rejected": -0.722963809967041, + "logps/chosen": -0.000154820314492099, + "logps/rejected": -1.9031356573104858, + "loss": 0.5442, + "nll_loss": 0.13604173064231873, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5482029994018376e-05, + "rewards/margins": 0.19029808044433594, + "rewards/rejected": -0.19031357765197754, + "step": 11065 + }, + { + "epoch": 7.65283540802213, + "grad_norm": 3.7792458534240723, + "learning_rate": 1.3039803288765944e-05, + "log_odds_chosen": 11.097878456115723, + "log_odds_ratio": -6.332351040327922e-05, + "logits/chosen": -0.40088146924972534, + "logits/rejected": -0.414165735244751, + "logps/chosen": -0.0004706221807282418, + "logps/rejected": -2.7524054050445557, + "loss": 0.3347, + "nll_loss": 0.08366944640874863, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.706221807282418e-05, + "rewards/margins": 0.2751935124397278, + "rewards/rejected": -0.27524057030677795, + "step": 11066 + }, + { + "epoch": 7.653526970954357, + "grad_norm": 4.1204376220703125, + "learning_rate": 1.3035961272475797e-05, + "log_odds_chosen": 10.075401306152344, + "log_odds_ratio": -0.00044693853124044836, + "logits/chosen": -0.20077644288539886, + "logits/rejected": -0.24853789806365967, + "logps/chosen": -0.0005817624041810632, + "logps/rejected": -1.96353018283844, + "loss": 0.5577, + "nll_loss": 0.13937382400035858, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.817624332848936e-05, + "rewards/margins": 0.1962948441505432, + "rewards/rejected": -0.196353018283844, + "step": 11067 + }, + { + "epoch": 7.654218533886584, + "grad_norm": 3.545658588409424, + "learning_rate": 1.3032119256185646e-05, + "log_odds_chosen": 10.331792831420898, + "log_odds_ratio": -0.000348773377481848, + "logits/chosen": -0.47433212399482727, + "logits/rejected": -0.4328223168849945, + "logps/chosen": -0.00035468151327222586, + "logps/rejected": -1.8747044801712036, + "loss": 0.4037, + "nll_loss": 0.10088086873292923, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.546815423760563e-05, + "rewards/margins": 0.18743497133255005, + "rewards/rejected": -0.1874704360961914, + "step": 11068 + }, + { + "epoch": 7.654910096818811, + "grad_norm": 3.4151241779327393, + "learning_rate": 1.3028277239895498e-05, + "log_odds_chosen": 10.321552276611328, + "log_odds_ratio": -0.00017826601106207818, + "logits/chosen": -0.28782233595848083, + "logits/rejected": -0.38986390829086304, + "logps/chosen": -0.00032893777824938297, + "logps/rejected": -1.685686469078064, + "loss": 0.4947, + "nll_loss": 0.12365522235631943, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.289377855253406e-05, + "rewards/margins": 0.1685357540845871, + "rewards/rejected": -0.16856864094734192, + "step": 11069 + }, + { + "epoch": 7.655601659751038, + "grad_norm": 5.626875400543213, + "learning_rate": 1.302443522360535e-05, + "log_odds_chosen": 11.525062561035156, + "log_odds_ratio": -1.7547064999234863e-05, + "logits/chosen": -0.4266539216041565, + "logits/rejected": -0.5539487600326538, + "logps/chosen": -0.0002981769503094256, + "logps/rejected": -2.565019130706787, + "loss": 0.5214, + "nll_loss": 0.13034754991531372, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.981769466714468e-05, + "rewards/margins": 0.256472110748291, + "rewards/rejected": -0.2565019130706787, + "step": 11070 + }, + { + "epoch": 7.6562932226832645, + "grad_norm": 5.510021209716797, + "learning_rate": 1.30205932073152e-05, + "log_odds_chosen": 10.636519432067871, + "log_odds_ratio": -0.0009921689052134752, + "logits/chosen": -0.39235857129096985, + "logits/rejected": -0.566108226776123, + "logps/chosen": -0.00031541811767965555, + "logps/rejected": -2.1545658111572266, + "loss": 0.559, + "nll_loss": 0.13966050744056702, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.15418146783486e-05, + "rewards/margins": 0.215425044298172, + "rewards/rejected": -0.21545659005641937, + "step": 11071 + }, + { + "epoch": 7.656984785615491, + "grad_norm": 5.544671058654785, + "learning_rate": 1.301675119102505e-05, + "log_odds_chosen": 11.99682903289795, + "log_odds_ratio": -9.31865088205086e-06, + "logits/chosen": -0.5022668838500977, + "logits/rejected": -0.5518388152122498, + "logps/chosen": -0.0001445444650016725, + "logps/rejected": -2.7820653915405273, + "loss": 0.5069, + "nll_loss": 0.1267344355583191, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4454448319156654e-05, + "rewards/margins": 0.27819210290908813, + "rewards/rejected": -0.2782065272331238, + "step": 11072 + }, + { + "epoch": 7.657676348547718, + "grad_norm": 5.128425598144531, + "learning_rate": 1.3012909174734903e-05, + "log_odds_chosen": 10.186700820922852, + "log_odds_ratio": -8.977761899586767e-05, + "logits/chosen": -0.5508114099502563, + "logits/rejected": -0.5424075126647949, + "logps/chosen": -0.00023308811069000512, + "logps/rejected": -1.5030337572097778, + "loss": 0.3516, + "nll_loss": 0.08789639919996262, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.330881034140475e-05, + "rewards/margins": 0.15028007328510284, + "rewards/rejected": -0.15030337870121002, + "step": 11073 + }, + { + "epoch": 7.658367911479945, + "grad_norm": 3.445617198944092, + "learning_rate": 1.3009067158444752e-05, + "log_odds_chosen": 10.100408554077148, + "log_odds_ratio": -6.970556569285691e-05, + "logits/chosen": -0.5728808641433716, + "logits/rejected": -0.5881326794624329, + "logps/chosen": -0.00023653008975088596, + "logps/rejected": -1.698958396911621, + "loss": 0.3007, + "nll_loss": 0.07517505437135696, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3653008611290716e-05, + "rewards/margins": 0.16987217962741852, + "rewards/rejected": -0.16989585757255554, + "step": 11074 + }, + { + "epoch": 7.659059474412172, + "grad_norm": 7.81683874130249, + "learning_rate": 1.3005225142154604e-05, + "log_odds_chosen": 10.81536865234375, + "log_odds_ratio": -0.00025127388653345406, + "logits/chosen": -0.18729661405086517, + "logits/rejected": -0.21124188601970673, + "logps/chosen": -0.0003586372477002442, + "logps/rejected": -2.481471538543701, + "loss": 0.4688, + "nll_loss": 0.11716914176940918, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.586372622521594e-05, + "rewards/margins": 0.2481113076210022, + "rewards/rejected": -0.2481471598148346, + "step": 11075 + }, + { + "epoch": 7.659751037344399, + "grad_norm": 6.080760955810547, + "learning_rate": 1.3001383125864455e-05, + "log_odds_chosen": 10.210432052612305, + "log_odds_ratio": -0.00011322993668727577, + "logits/chosen": -0.2846037745475769, + "logits/rejected": -0.33245670795440674, + "logps/chosen": -0.0004584690905176103, + "logps/rejected": -1.9872753620147705, + "loss": 0.4108, + "nll_loss": 0.10269831866025925, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5846911234548315e-05, + "rewards/margins": 0.19868171215057373, + "rewards/rejected": -0.1987275630235672, + "step": 11076 + }, + { + "epoch": 7.6604426002766255, + "grad_norm": 6.209712028503418, + "learning_rate": 1.2997541109574304e-05, + "log_odds_chosen": 10.95954704284668, + "log_odds_ratio": -2.849074371624738e-05, + "logits/chosen": -0.3062824010848999, + "logits/rejected": -0.248677596449852, + "logps/chosen": -0.000221608963329345, + "logps/rejected": -1.973862886428833, + "loss": 0.5438, + "nll_loss": 0.1359541416168213, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2160895241540857e-05, + "rewards/margins": 0.19736410677433014, + "rewards/rejected": -0.19738629460334778, + "step": 11077 + }, + { + "epoch": 7.661134163208852, + "grad_norm": 6.562619209289551, + "learning_rate": 1.2993699093284156e-05, + "log_odds_chosen": 10.852315902709961, + "log_odds_ratio": -0.0002974254311993718, + "logits/chosen": -0.46977511048316956, + "logits/rejected": -0.5326958894729614, + "logps/chosen": -0.0002460549003444612, + "logps/rejected": -2.452684164047241, + "loss": 0.4886, + "nll_loss": 0.12212768197059631, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4605491489637643e-05, + "rewards/margins": 0.24524381756782532, + "rewards/rejected": -0.24526841938495636, + "step": 11078 + }, + { + "epoch": 7.661825726141079, + "grad_norm": 8.187552452087402, + "learning_rate": 1.2989857076994009e-05, + "log_odds_chosen": 11.422736167907715, + "log_odds_ratio": -0.00018802558770403266, + "logits/chosen": -0.5413578748703003, + "logits/rejected": -0.5794011354446411, + "logps/chosen": -0.000185214274097234, + "logps/rejected": -2.6838579177856445, + "loss": 0.4113, + "nll_loss": 0.10279948264360428, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.852142668212764e-05, + "rewards/margins": 0.26836729049682617, + "rewards/rejected": -0.26838579773902893, + "step": 11079 + }, + { + "epoch": 7.662517289073306, + "grad_norm": 5.590038776397705, + "learning_rate": 1.2986015060703858e-05, + "log_odds_chosen": 10.133560180664062, + "log_odds_ratio": -0.0005444634589366615, + "logits/chosen": -0.4628927409648895, + "logits/rejected": -0.5485700368881226, + "logps/chosen": -0.001269020838662982, + "logps/rejected": -1.8179410696029663, + "loss": 0.4836, + "nll_loss": 0.12085415422916412, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012690208677668124, + "rewards/margins": 0.18166720867156982, + "rewards/rejected": -0.18179410696029663, + "step": 11080 + }, + { + "epoch": 7.663208852005533, + "grad_norm": 3.306156635284424, + "learning_rate": 1.2982173044413709e-05, + "log_odds_chosen": 10.308717727661133, + "log_odds_ratio": -0.00011060374527005479, + "logits/chosen": -0.660594642162323, + "logits/rejected": -0.6330130100250244, + "logps/chosen": -0.00020389862766023725, + "logps/rejected": -1.6218162775039673, + "loss": 0.304, + "nll_loss": 0.07598458230495453, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0389863493619487e-05, + "rewards/margins": 0.1621612161397934, + "rewards/rejected": -0.16218163073062897, + "step": 11081 + }, + { + "epoch": 7.66390041493776, + "grad_norm": 5.0822272300720215, + "learning_rate": 1.2978331028123561e-05, + "log_odds_chosen": 10.214324951171875, + "log_odds_ratio": -0.00010539717914070934, + "logits/chosen": -0.35386863350868225, + "logits/rejected": -0.3555700182914734, + "logps/chosen": -0.0003553010756149888, + "logps/rejected": -1.734631061553955, + "loss": 0.5304, + "nll_loss": 0.13260126113891602, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.55301090166904e-05, + "rewards/margins": 0.17342758178710938, + "rewards/rejected": -0.1734631210565567, + "step": 11082 + }, + { + "epoch": 7.6645919778699865, + "grad_norm": 7.021419048309326, + "learning_rate": 1.297448901183341e-05, + "log_odds_chosen": 10.682586669921875, + "log_odds_ratio": -5.314256122801453e-05, + "logits/chosen": -0.4502115249633789, + "logits/rejected": -0.33163556456565857, + "logps/chosen": -0.0002302555221831426, + "logps/rejected": -2.1205430030822754, + "loss": 0.437, + "nll_loss": 0.10923456400632858, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3025553673505783e-05, + "rewards/margins": 0.2120312750339508, + "rewards/rejected": -0.2120543122291565, + "step": 11083 + }, + { + "epoch": 7.665283540802213, + "grad_norm": 3.6946628093719482, + "learning_rate": 1.2970646995543263e-05, + "log_odds_chosen": 11.259334564208984, + "log_odds_ratio": -2.1235533495200798e-05, + "logits/chosen": -0.5278284549713135, + "logits/rejected": -0.6166843175888062, + "logps/chosen": -7.506051770178601e-05, + "logps/rejected": -1.926891803741455, + "loss": 0.3498, + "nll_loss": 0.08743873238563538, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.506051588279661e-06, + "rewards/margins": 0.192681685090065, + "rewards/rejected": -0.1926891952753067, + "step": 11084 + }, + { + "epoch": 7.66597510373444, + "grad_norm": 3.6382389068603516, + "learning_rate": 1.2966804979253113e-05, + "log_odds_chosen": 11.002378463745117, + "log_odds_ratio": -4.40895528299734e-05, + "logits/chosen": -0.4844134449958801, + "logits/rejected": -0.40494978427886963, + "logps/chosen": -0.00016172064351849258, + "logps/rejected": -1.9656169414520264, + "loss": 0.3551, + "nll_loss": 0.08876403421163559, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6172063624253497e-05, + "rewards/margins": 0.1965455263853073, + "rewards/rejected": -0.19656170904636383, + "step": 11085 + }, + { + "epoch": 7.666666666666667, + "grad_norm": 8.224577903747559, + "learning_rate": 1.2962962962962962e-05, + "log_odds_chosen": 11.308886528015137, + "log_odds_ratio": -4.0616108890390024e-05, + "logits/chosen": -0.5384029746055603, + "logits/rejected": -0.6056035161018372, + "logps/chosen": -0.00028480388573370874, + "logps/rejected": -2.533832311630249, + "loss": 0.5159, + "nll_loss": 0.1289687156677246, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8480388209572993e-05, + "rewards/margins": 0.2533547282218933, + "rewards/rejected": -0.25338321924209595, + "step": 11086 + }, + { + "epoch": 7.667358229598894, + "grad_norm": 14.134316444396973, + "learning_rate": 1.2959120946672815e-05, + "log_odds_chosen": 11.041220664978027, + "log_odds_ratio": -2.8689417376881465e-05, + "logits/chosen": -0.821334719657898, + "logits/rejected": -0.8351207375526428, + "logps/chosen": -8.886594150681049e-05, + "logps/rejected": -1.7878015041351318, + "loss": 0.5122, + "nll_loss": 0.12804894149303436, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.88659451447893e-06, + "rewards/margins": 0.1787712723016739, + "rewards/rejected": -0.17878015339374542, + "step": 11087 + }, + { + "epoch": 7.668049792531121, + "grad_norm": 4.176999092102051, + "learning_rate": 1.2955278930382667e-05, + "log_odds_chosen": 10.662544250488281, + "log_odds_ratio": -3.5465975088300183e-05, + "logits/chosen": -0.5635327696800232, + "logits/rejected": -0.7098397016525269, + "logps/chosen": -0.0001393976272083819, + "logps/rejected": -1.786032795906067, + "loss": 0.4045, + "nll_loss": 0.10113196074962616, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.393976344843395e-05, + "rewards/margins": 0.1785893440246582, + "rewards/rejected": -0.17860327661037445, + "step": 11088 + }, + { + "epoch": 7.6687413554633475, + "grad_norm": 3.8286681175231934, + "learning_rate": 1.2951436914092516e-05, + "log_odds_chosen": 10.819299697875977, + "log_odds_ratio": -5.4313921282300726e-05, + "logits/chosen": -0.44108110666275024, + "logits/rejected": -0.5384078621864319, + "logps/chosen": -0.0001469150447519496, + "logps/rejected": -1.7243348360061646, + "loss": 0.3425, + "nll_loss": 0.08562694489955902, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.469150447519496e-05, + "rewards/margins": 0.17241880297660828, + "rewards/rejected": -0.1724334955215454, + "step": 11089 + }, + { + "epoch": 7.669432918395574, + "grad_norm": 4.699414253234863, + "learning_rate": 1.2947594897802367e-05, + "log_odds_chosen": 10.169301986694336, + "log_odds_ratio": -0.00018610645201988518, + "logits/chosen": -0.6933047771453857, + "logits/rejected": -0.7240607738494873, + "logps/chosen": -0.0007905750535428524, + "logps/rejected": -2.3316659927368164, + "loss": 0.7884, + "nll_loss": 0.19707630574703217, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.905750680947676e-05, + "rewards/margins": 0.23308753967285156, + "rewards/rejected": -0.23316660523414612, + "step": 11090 + }, + { + "epoch": 7.670124481327801, + "grad_norm": 7.806046962738037, + "learning_rate": 1.294375288151222e-05, + "log_odds_chosen": 10.760869979858398, + "log_odds_ratio": -0.00026300305034965277, + "logits/chosen": -0.2349396049976349, + "logits/rejected": -0.4316999316215515, + "logps/chosen": -0.00018964064656756818, + "logps/rejected": -2.3453969955444336, + "loss": 0.7376, + "nll_loss": 0.1843789964914322, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.89640650205547e-05, + "rewards/margins": 0.23452074825763702, + "rewards/rejected": -0.2345397025346756, + "step": 11091 + }, + { + "epoch": 7.670816044260028, + "grad_norm": 3.226038694381714, + "learning_rate": 1.2939910865222069e-05, + "log_odds_chosen": 10.356670379638672, + "log_odds_ratio": -7.697167893638834e-05, + "logits/chosen": -0.4536958932876587, + "logits/rejected": -0.35600998997688293, + "logps/chosen": -0.00035774571006186306, + "logps/rejected": -1.8557066917419434, + "loss": 0.3579, + "nll_loss": 0.08946461975574493, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5774570278590545e-05, + "rewards/margins": 0.18553489446640015, + "rewards/rejected": -0.18557068705558777, + "step": 11092 + }, + { + "epoch": 7.671507607192255, + "grad_norm": 12.186654090881348, + "learning_rate": 1.2936068848931921e-05, + "log_odds_chosen": 11.077951431274414, + "log_odds_ratio": -0.00010484673111932352, + "logits/chosen": -0.623490571975708, + "logits/rejected": -0.6357483863830566, + "logps/chosen": -0.00024245721579063684, + "logps/rejected": -2.1002511978149414, + "loss": 0.3713, + "nll_loss": 0.09281662106513977, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4245720851467922e-05, + "rewards/margins": 0.2100008726119995, + "rewards/rejected": -0.2100251317024231, + "step": 11093 + }, + { + "epoch": 7.672199170124482, + "grad_norm": 3.616793394088745, + "learning_rate": 1.2932226832641772e-05, + "log_odds_chosen": 10.724013328552246, + "log_odds_ratio": -3.4062111808452755e-05, + "logits/chosen": -0.3245074152946472, + "logits/rejected": -0.2890125811100006, + "logps/chosen": -0.00021971345995552838, + "logps/rejected": -2.0852818489074707, + "loss": 0.3126, + "nll_loss": 0.07814963161945343, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1971345631754957e-05, + "rewards/margins": 0.20850621163845062, + "rewards/rejected": -0.20852817595005035, + "step": 11094 + }, + { + "epoch": 7.672890733056708, + "grad_norm": 4.2207112312316895, + "learning_rate": 1.2928384816351621e-05, + "log_odds_chosen": 10.779802322387695, + "log_odds_ratio": -0.00010899059270741418, + "logits/chosen": -0.4756370484828949, + "logits/rejected": -0.522205114364624, + "logps/chosen": -0.00020636826229747385, + "logps/rejected": -2.0602314472198486, + "loss": 0.4028, + "nll_loss": 0.10067801177501678, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0636827684938908e-05, + "rewards/margins": 0.20600253343582153, + "rewards/rejected": -0.20602315664291382, + "step": 11095 + }, + { + "epoch": 7.673582295988935, + "grad_norm": 6.0017781257629395, + "learning_rate": 1.2924542800061473e-05, + "log_odds_chosen": 11.695032119750977, + "log_odds_ratio": -1.26361783259199e-05, + "logits/chosen": -0.21643948554992676, + "logits/rejected": -0.38103434443473816, + "logps/chosen": -0.0001555204507894814, + "logps/rejected": -2.811371326446533, + "loss": 0.4623, + "nll_loss": 0.11556336283683777, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5552042896160856e-05, + "rewards/margins": 0.28112155199050903, + "rewards/rejected": -0.2811371088027954, + "step": 11096 + }, + { + "epoch": 7.674273858921162, + "grad_norm": 3.479611873626709, + "learning_rate": 1.2920700783771326e-05, + "log_odds_chosen": 11.381592750549316, + "log_odds_ratio": -4.066659676027484e-05, + "logits/chosen": -0.38168367743492126, + "logits/rejected": -0.37484219670295715, + "logps/chosen": -0.00011951103806495667, + "logps/rejected": -1.8855031728744507, + "loss": 0.4033, + "nll_loss": 0.10081565380096436, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1951105079788249e-05, + "rewards/margins": 0.1885383576154709, + "rewards/rejected": -0.18855032324790955, + "step": 11097 + }, + { + "epoch": 7.674965421853389, + "grad_norm": 4.836270332336426, + "learning_rate": 1.2916858767481175e-05, + "log_odds_chosen": 12.316877365112305, + "log_odds_ratio": -4.044532761326991e-05, + "logits/chosen": -0.6205891966819763, + "logits/rejected": -0.6618015766143799, + "logps/chosen": -0.00032085011480376124, + "logps/rejected": -3.1996188163757324, + "loss": 0.3858, + "nll_loss": 0.09643936157226562, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.208500856999308e-05, + "rewards/margins": 0.3199298083782196, + "rewards/rejected": -0.31996190547943115, + "step": 11098 + }, + { + "epoch": 7.675656984785616, + "grad_norm": 4.804100513458252, + "learning_rate": 1.2913016751191026e-05, + "log_odds_chosen": 11.746265411376953, + "log_odds_ratio": -2.240795765828807e-05, + "logits/chosen": -0.36457559466362, + "logits/rejected": -0.4602331221103668, + "logps/chosen": -0.00010922572982963175, + "logps/rejected": -2.3472108840942383, + "loss": 0.4049, + "nll_loss": 0.101227305829525, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0922573892457876e-05, + "rewards/margins": 0.23471017181873322, + "rewards/rejected": -0.2347211092710495, + "step": 11099 + }, + { + "epoch": 7.676348547717843, + "grad_norm": 5.350192546844482, + "learning_rate": 1.2909174734900878e-05, + "log_odds_chosen": 10.736270904541016, + "log_odds_ratio": -3.7787984183523804e-05, + "logits/chosen": -0.39044010639190674, + "logits/rejected": -0.5081877708435059, + "logps/chosen": -0.00017108801694121212, + "logps/rejected": -1.6307861804962158, + "loss": 0.5719, + "nll_loss": 0.14297832548618317, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7108803149312735e-05, + "rewards/margins": 0.16306151449680328, + "rewards/rejected": -0.16307863593101501, + "step": 11100 + }, + { + "epoch": 7.677040110650069, + "grad_norm": 6.268930435180664, + "learning_rate": 1.2905332718610727e-05, + "log_odds_chosen": 11.114761352539062, + "log_odds_ratio": -0.00010535813635215163, + "logits/chosen": -0.4555742144584656, + "logits/rejected": -0.5609944462776184, + "logps/chosen": -0.0002988710184581578, + "logps/rejected": -2.241150140762329, + "loss": 0.3602, + "nll_loss": 0.09004272520542145, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9887100026826374e-05, + "rewards/margins": 0.22408512234687805, + "rewards/rejected": -0.22411498427391052, + "step": 11101 + }, + { + "epoch": 7.677731673582296, + "grad_norm": 4.027389049530029, + "learning_rate": 1.290149070232058e-05, + "log_odds_chosen": 10.439371109008789, + "log_odds_ratio": -0.00016615752247162163, + "logits/chosen": -0.40644049644470215, + "logits/rejected": -0.4407414197921753, + "logps/chosen": -0.00029875640757381916, + "logps/rejected": -1.6533267498016357, + "loss": 0.2944, + "nll_loss": 0.07359431684017181, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9875640393584035e-05, + "rewards/margins": 0.1653028130531311, + "rewards/rejected": -0.16533267498016357, + "step": 11102 + }, + { + "epoch": 7.678423236514523, + "grad_norm": 3.1208536624908447, + "learning_rate": 1.289764868603043e-05, + "log_odds_chosen": 9.625261306762695, + "log_odds_ratio": -0.000835335929878056, + "logits/chosen": -0.658169686794281, + "logits/rejected": -0.645994246006012, + "logps/chosen": -0.00024093702086247504, + "logps/rejected": -1.2727044820785522, + "loss": 0.4887, + "nll_loss": 0.12207914888858795, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4093702450045384e-05, + "rewards/margins": 0.12724635004997253, + "rewards/rejected": -0.12727044522762299, + "step": 11103 + }, + { + "epoch": 7.67911479944675, + "grad_norm": 4.69044303894043, + "learning_rate": 1.289380666974028e-05, + "log_odds_chosen": 11.716411590576172, + "log_odds_ratio": -1.2548175618576352e-05, + "logits/chosen": -0.4701429605484009, + "logits/rejected": -0.46198830008506775, + "logps/chosen": -0.00010991649469360709, + "logps/rejected": -2.313502311706543, + "loss": 0.4781, + "nll_loss": 0.11953376978635788, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0991649105562828e-05, + "rewards/margins": 0.2313392460346222, + "rewards/rejected": -0.23135024309158325, + "step": 11104 + }, + { + "epoch": 7.679806362378977, + "grad_norm": 5.351927280426025, + "learning_rate": 1.2889964653450132e-05, + "log_odds_chosen": 10.438713073730469, + "log_odds_ratio": -0.00010361884051235393, + "logits/chosen": -0.4354976415634155, + "logits/rejected": -0.6020984649658203, + "logps/chosen": -0.0008680078317411244, + "logps/rejected": -2.54000186920166, + "loss": 0.3392, + "nll_loss": 0.08479855209589005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.680079190526158e-05, + "rewards/margins": 0.25391340255737305, + "rewards/rejected": -0.254000186920166, + "step": 11105 + }, + { + "epoch": 7.680497925311204, + "grad_norm": 5.8652143478393555, + "learning_rate": 1.288612263715998e-05, + "log_odds_chosen": 10.755338668823242, + "log_odds_ratio": -0.00010338029096601531, + "logits/chosen": -0.24784396588802338, + "logits/rejected": -0.2557956874370575, + "logps/chosen": -0.0004714262904599309, + "logps/rejected": -2.3482601642608643, + "loss": 0.8675, + "nll_loss": 0.2168610394001007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7142631956376135e-05, + "rewards/margins": 0.23477886617183685, + "rewards/rejected": -0.234825998544693, + "step": 11106 + }, + { + "epoch": 7.68118948824343, + "grad_norm": 4.124483108520508, + "learning_rate": 1.2882280620869833e-05, + "log_odds_chosen": 9.291773796081543, + "log_odds_ratio": -0.0010523165110498667, + "logits/chosen": -0.38579070568084717, + "logits/rejected": -0.15841074287891388, + "logps/chosen": -0.0006801905110478401, + "logps/rejected": -1.8890366554260254, + "loss": 0.404, + "nll_loss": 0.10088435560464859, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.80190569255501e-05, + "rewards/margins": 0.18883565068244934, + "rewards/rejected": -0.18890367448329926, + "step": 11107 + }, + { + "epoch": 7.681881051175657, + "grad_norm": 5.067246913909912, + "learning_rate": 1.2878438604579684e-05, + "log_odds_chosen": 10.63860034942627, + "log_odds_ratio": -3.794751319219358e-05, + "logits/chosen": -0.3715812861919403, + "logits/rejected": -0.4050114154815674, + "logps/chosen": -0.0002055682853097096, + "logps/rejected": -1.8258544206619263, + "loss": 0.3734, + "nll_loss": 0.09333580732345581, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.055682853097096e-05, + "rewards/margins": 0.1825648993253708, + "rewards/rejected": -0.1825854480266571, + "step": 11108 + }, + { + "epoch": 7.682572614107884, + "grad_norm": 4.823143005371094, + "learning_rate": 1.2874596588289533e-05, + "log_odds_chosen": 11.639387130737305, + "log_odds_ratio": -1.680310560914222e-05, + "logits/chosen": -0.630001425743103, + "logits/rejected": -0.719383180141449, + "logps/chosen": -0.0002806360134854913, + "logps/rejected": -2.560408592224121, + "loss": 0.4805, + "nll_loss": 0.12012257426977158, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8063604986527935e-05, + "rewards/margins": 0.25601279735565186, + "rewards/rejected": -0.25604087114334106, + "step": 11109 + }, + { + "epoch": 7.683264177040111, + "grad_norm": 3.9779834747314453, + "learning_rate": 1.2870754571999386e-05, + "log_odds_chosen": 11.51318359375, + "log_odds_ratio": -2.5370054572704248e-05, + "logits/chosen": -0.1353602111339569, + "logits/rejected": -0.055926613509655, + "logps/chosen": -9.17123252293095e-05, + "logps/rejected": -2.1241979598999023, + "loss": 0.3467, + "nll_loss": 0.08666293323040009, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.17123270482989e-06, + "rewards/margins": 0.21241062879562378, + "rewards/rejected": -0.2124198079109192, + "step": 11110 + }, + { + "epoch": 7.683955739972338, + "grad_norm": 4.638302326202393, + "learning_rate": 1.2866912555709238e-05, + "log_odds_chosen": 10.735479354858398, + "log_odds_ratio": -3.148660107399337e-05, + "logits/chosen": -0.18519659340381622, + "logits/rejected": -0.28615376353263855, + "logps/chosen": -0.0001478224148740992, + "logps/rejected": -1.9038336277008057, + "loss": 0.3745, + "nll_loss": 0.09363032132387161, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.478224203310674e-05, + "rewards/margins": 0.19036859273910522, + "rewards/rejected": -0.19038337469100952, + "step": 11111 + }, + { + "epoch": 7.6846473029045645, + "grad_norm": 4.754729270935059, + "learning_rate": 1.2863070539419087e-05, + "log_odds_chosen": 11.607973098754883, + "log_odds_ratio": -0.000153713597683236, + "logits/chosen": -0.2973116338253021, + "logits/rejected": -0.28141382336616516, + "logps/chosen": -0.00012407683243509382, + "logps/rejected": -2.377037525177002, + "loss": 0.5049, + "nll_loss": 0.12619741261005402, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2407683243509382e-05, + "rewards/margins": 0.23769137263298035, + "rewards/rejected": -0.23770377039909363, + "step": 11112 + }, + { + "epoch": 7.685338865836791, + "grad_norm": 5.249823570251465, + "learning_rate": 1.2859228523128938e-05, + "log_odds_chosen": 11.334756851196289, + "log_odds_ratio": -0.00019641799735836685, + "logits/chosen": -0.28146034479141235, + "logits/rejected": -0.31021493673324585, + "logps/chosen": -0.0004445326921995729, + "logps/rejected": -3.2133309841156006, + "loss": 0.4754, + "nll_loss": 0.1188407689332962, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.445326703717001e-05, + "rewards/margins": 0.3212886452674866, + "rewards/rejected": -0.321333110332489, + "step": 11113 + }, + { + "epoch": 7.686030428769018, + "grad_norm": 4.652561187744141, + "learning_rate": 1.285538650683879e-05, + "log_odds_chosen": 11.96465015411377, + "log_odds_ratio": -1.159081693913322e-05, + "logits/chosen": -0.2252419888973236, + "logits/rejected": -0.304015189409256, + "logps/chosen": -0.00014822710363660008, + "logps/rejected": -3.0069327354431152, + "loss": 0.4671, + "nll_loss": 0.11676928400993347, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4822710909356829e-05, + "rewards/margins": 0.30067846179008484, + "rewards/rejected": -0.3006933033466339, + "step": 11114 + }, + { + "epoch": 7.686721991701245, + "grad_norm": 5.863364219665527, + "learning_rate": 1.285154449054864e-05, + "log_odds_chosen": 9.848235130310059, + "log_odds_ratio": -9.699568181531504e-05, + "logits/chosen": -0.31627774238586426, + "logits/rejected": -0.4065108895301819, + "logps/chosen": -0.0008459068485535681, + "logps/rejected": -2.004502296447754, + "loss": 0.4704, + "nll_loss": 0.11758262664079666, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.459068340016529e-05, + "rewards/margins": 0.20036561787128448, + "rewards/rejected": -0.20045022666454315, + "step": 11115 + }, + { + "epoch": 7.687413554633472, + "grad_norm": 8.533553123474121, + "learning_rate": 1.2847702474258492e-05, + "log_odds_chosen": 9.963769912719727, + "log_odds_ratio": -0.0006467354251071811, + "logits/chosen": -0.8026405572891235, + "logits/rejected": -0.7909748554229736, + "logps/chosen": -0.000525271927472204, + "logps/rejected": -1.9408668279647827, + "loss": 0.4588, + "nll_loss": 0.11463183909654617, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.252718983683735e-05, + "rewards/margins": 0.1940341591835022, + "rewards/rejected": -0.1940866857767105, + "step": 11116 + }, + { + "epoch": 7.688105117565699, + "grad_norm": 4.827835559844971, + "learning_rate": 1.2843860457968342e-05, + "log_odds_chosen": 10.789627075195312, + "log_odds_ratio": -0.00013693736400455236, + "logits/chosen": -0.22920483350753784, + "logits/rejected": -0.24551275372505188, + "logps/chosen": -0.0010796966962516308, + "logps/rejected": -2.63169527053833, + "loss": 0.3298, + "nll_loss": 0.08242494612932205, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010796968126669526, + "rewards/margins": 0.2630615532398224, + "rewards/rejected": -0.263169527053833, + "step": 11117 + }, + { + "epoch": 7.6887966804979255, + "grad_norm": 5.003745079040527, + "learning_rate": 1.2840018441678192e-05, + "log_odds_chosen": 10.771403312683105, + "log_odds_ratio": -6.530247628688812e-05, + "logits/chosen": -0.29149118065834045, + "logits/rejected": -0.2357296198606491, + "logps/chosen": -0.00015494701801799238, + "logps/rejected": -1.824528455734253, + "loss": 0.3593, + "nll_loss": 0.089822918176651, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.549470289319288e-05, + "rewards/margins": 0.18243736028671265, + "rewards/rejected": -0.18245285749435425, + "step": 11118 + }, + { + "epoch": 7.689488243430152, + "grad_norm": 3.796119451522827, + "learning_rate": 1.2836176425388044e-05, + "log_odds_chosen": 10.518040657043457, + "log_odds_ratio": -0.0004800974566023797, + "logits/chosen": 0.30365198850631714, + "logits/rejected": 0.26311445236206055, + "logps/chosen": -0.0006324481219053268, + "logps/rejected": -2.0838565826416016, + "loss": 0.4678, + "nll_loss": 0.11689867079257965, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.324481364572421e-05, + "rewards/margins": 0.2083224356174469, + "rewards/rejected": -0.2083856612443924, + "step": 11119 + }, + { + "epoch": 7.690179806362379, + "grad_norm": 5.890296936035156, + "learning_rate": 1.2832334409097896e-05, + "log_odds_chosen": 11.040729522705078, + "log_odds_ratio": -9.762219269759953e-05, + "logits/chosen": -0.4285397529602051, + "logits/rejected": -0.23988042771816254, + "logps/chosen": -0.00014194345567375422, + "logps/rejected": -2.098386526107788, + "loss": 0.3864, + "nll_loss": 0.09659399092197418, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4194343748386018e-05, + "rewards/margins": 0.20982447266578674, + "rewards/rejected": -0.2098386585712433, + "step": 11120 + }, + { + "epoch": 7.690871369294606, + "grad_norm": 5.301235198974609, + "learning_rate": 1.2828492392807745e-05, + "log_odds_chosen": 11.336024284362793, + "log_odds_ratio": -2.032413613051176e-05, + "logits/chosen": -0.1643558144569397, + "logits/rejected": -0.24725359678268433, + "logps/chosen": -0.00471664872020483, + "logps/rejected": -3.218682050704956, + "loss": 0.5756, + "nll_loss": 0.14389663934707642, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004716648254543543, + "rewards/margins": 0.3213965594768524, + "rewards/rejected": -0.32186824083328247, + "step": 11121 + }, + { + "epoch": 7.691562932226833, + "grad_norm": 5.216907024383545, + "learning_rate": 1.2824650376517596e-05, + "log_odds_chosen": 11.451807022094727, + "log_odds_ratio": -2.9015925974817947e-05, + "logits/chosen": -0.4482243061065674, + "logits/rejected": -0.46607935428619385, + "logps/chosen": -0.0003059771261177957, + "logps/rejected": -2.219949722290039, + "loss": 0.3293, + "nll_loss": 0.08232185244560242, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0597715522162616e-05, + "rewards/margins": 0.22196437418460846, + "rewards/rejected": -0.22199496626853943, + "step": 11122 + }, + { + "epoch": 7.69225449515906, + "grad_norm": 4.615692615509033, + "learning_rate": 1.2820808360227449e-05, + "log_odds_chosen": 11.06494426727295, + "log_odds_ratio": -4.2912073695333675e-05, + "logits/chosen": -0.5642020106315613, + "logits/rejected": -0.5679250955581665, + "logps/chosen": -0.00022454469581134617, + "logps/rejected": -2.2882659435272217, + "loss": 0.5167, + "nll_loss": 0.12918058037757874, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2454469217336737e-05, + "rewards/margins": 0.22880414128303528, + "rewards/rejected": -0.2288265973329544, + "step": 11123 + }, + { + "epoch": 7.6929460580912865, + "grad_norm": 7.286032676696777, + "learning_rate": 1.2816966343937298e-05, + "log_odds_chosen": 11.843324661254883, + "log_odds_ratio": -1.1874160918523557e-05, + "logits/chosen": -0.2214704006910324, + "logits/rejected": -0.12504197657108307, + "logps/chosen": -0.00011633805115707219, + "logps/rejected": -2.239959239959717, + "loss": 0.4741, + "nll_loss": 0.11852260679006577, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1633805115707219e-05, + "rewards/margins": 0.2239842712879181, + "rewards/rejected": -0.22399590909481049, + "step": 11124 + }, + { + "epoch": 7.693637621023513, + "grad_norm": 4.300328254699707, + "learning_rate": 1.281312432764715e-05, + "log_odds_chosen": 10.524087905883789, + "log_odds_ratio": -7.253968942677602e-05, + "logits/chosen": 0.20716895163059235, + "logits/rejected": 0.08800049871206284, + "logps/chosen": -0.00034770212369039655, + "logps/rejected": -2.085852861404419, + "loss": 0.5194, + "nll_loss": 0.12983600795269012, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.477021164144389e-05, + "rewards/margins": 0.20855051279067993, + "rewards/rejected": -0.20858529210090637, + "step": 11125 + }, + { + "epoch": 7.69432918395574, + "grad_norm": 7.918651103973389, + "learning_rate": 1.2809282311357001e-05, + "log_odds_chosen": 9.899200439453125, + "log_odds_ratio": -0.0010072446893900633, + "logits/chosen": -0.49368736147880554, + "logits/rejected": -0.596975564956665, + "logps/chosen": -0.0014353056903928518, + "logps/rejected": -1.908512830734253, + "loss": 1.0489, + "nll_loss": 0.2621241807937622, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014353056030813605, + "rewards/margins": 0.1907077431678772, + "rewards/rejected": -0.19085128605365753, + "step": 11126 + }, + { + "epoch": 7.695020746887967, + "grad_norm": 5.294893264770508, + "learning_rate": 1.280544029506685e-05, + "log_odds_chosen": 9.854940414428711, + "log_odds_ratio": -0.0003655731270555407, + "logits/chosen": -0.22233858704566956, + "logits/rejected": -0.26600658893585205, + "logps/chosen": -0.00037876679562032223, + "logps/rejected": -1.5811405181884766, + "loss": 0.389, + "nll_loss": 0.0972183495759964, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.787668174481951e-05, + "rewards/margins": 0.1580761820077896, + "rewards/rejected": -0.15811406075954437, + "step": 11127 + }, + { + "epoch": 7.695712309820194, + "grad_norm": 3.5334877967834473, + "learning_rate": 1.2801598278776702e-05, + "log_odds_chosen": 11.761754989624023, + "log_odds_ratio": -1.1992600775556639e-05, + "logits/chosen": -0.7332363724708557, + "logits/rejected": -0.7346802949905396, + "logps/chosen": -0.00010219802788924426, + "logps/rejected": -2.2925033569335938, + "loss": 0.4746, + "nll_loss": 0.11864231526851654, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0219801879429724e-05, + "rewards/margins": 0.22924014925956726, + "rewards/rejected": -0.22925037145614624, + "step": 11128 + }, + { + "epoch": 7.696403872752421, + "grad_norm": 3.8847203254699707, + "learning_rate": 1.2797756262486555e-05, + "log_odds_chosen": 10.920198440551758, + "log_odds_ratio": -3.487545109237544e-05, + "logits/chosen": -0.09859403967857361, + "logits/rejected": -0.21252302825450897, + "logps/chosen": -0.00011548738984856755, + "logps/rejected": -1.8839833736419678, + "loss": 0.5111, + "nll_loss": 0.1277817338705063, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1548739166755695e-05, + "rewards/margins": 0.18838679790496826, + "rewards/rejected": -0.1883983463048935, + "step": 11129 + }, + { + "epoch": 7.6970954356846475, + "grad_norm": 7.35653018951416, + "learning_rate": 1.2793914246196404e-05, + "log_odds_chosen": 11.698114395141602, + "log_odds_ratio": -4.405621439218521e-05, + "logits/chosen": -0.4789350628852844, + "logits/rejected": -0.6115681529045105, + "logps/chosen": -0.00026024412363767624, + "logps/rejected": -3.242624521255493, + "loss": 0.4186, + "nll_loss": 0.10464924573898315, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.602441054477822e-05, + "rewards/margins": 0.3242364227771759, + "rewards/rejected": -0.324262410402298, + "step": 11130 + }, + { + "epoch": 7.697786998616874, + "grad_norm": 5.486740589141846, + "learning_rate": 1.2790072229906255e-05, + "log_odds_chosen": 11.674176216125488, + "log_odds_ratio": -2.3742195480735973e-05, + "logits/chosen": -0.4114059507846832, + "logits/rejected": -0.4603744149208069, + "logps/chosen": -0.0005180458538234234, + "logps/rejected": -2.970984697341919, + "loss": 0.3862, + "nll_loss": 0.09653568267822266, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1804589020321146e-05, + "rewards/margins": 0.2970466613769531, + "rewards/rejected": -0.29709845781326294, + "step": 11131 + }, + { + "epoch": 7.698478561549101, + "grad_norm": 5.388491630554199, + "learning_rate": 1.2786230213616107e-05, + "log_odds_chosen": 11.102253913879395, + "log_odds_ratio": -5.318888361216523e-05, + "logits/chosen": -0.4546557068824768, + "logits/rejected": -0.453656405210495, + "logps/chosen": -0.00015464294119738042, + "logps/rejected": -2.2363927364349365, + "loss": 0.4861, + "nll_loss": 0.12153112143278122, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5464294847333804e-05, + "rewards/margins": 0.22362381219863892, + "rewards/rejected": -0.22363927960395813, + "step": 11132 + }, + { + "epoch": 7.699170124481328, + "grad_norm": 3.3381705284118652, + "learning_rate": 1.2782388197325956e-05, + "log_odds_chosen": 10.243101119995117, + "log_odds_ratio": -0.00012456311378628016, + "logits/chosen": -0.18851953744888306, + "logits/rejected": -0.20321039855480194, + "logps/chosen": -0.0005285230581648648, + "logps/rejected": -1.5706794261932373, + "loss": 0.3261, + "nll_loss": 0.08151256293058395, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2852308726869524e-05, + "rewards/margins": 0.1570151001214981, + "rewards/rejected": -0.15706795454025269, + "step": 11133 + }, + { + "epoch": 7.699861687413555, + "grad_norm": 4.073915481567383, + "learning_rate": 1.2778546181035809e-05, + "log_odds_chosen": 10.793458938598633, + "log_odds_ratio": -0.00012205556413391605, + "logits/chosen": -0.5016660690307617, + "logits/rejected": -0.5064582824707031, + "logps/chosen": -0.0006956355064176023, + "logps/rejected": -2.2256133556365967, + "loss": 0.4542, + "nll_loss": 0.11353007704019547, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.956355355214328e-05, + "rewards/margins": 0.2224917858839035, + "rewards/rejected": -0.2225613296031952, + "step": 11134 + }, + { + "epoch": 7.700553250345782, + "grad_norm": 5.965721130371094, + "learning_rate": 1.277470416474566e-05, + "log_odds_chosen": 9.991838455200195, + "log_odds_ratio": -0.00011569932394195348, + "logits/chosen": -0.2887195944786072, + "logits/rejected": -0.35821449756622314, + "logps/chosen": -0.0003183086810167879, + "logps/rejected": -1.6685514450073242, + "loss": 0.616, + "nll_loss": 0.15399618446826935, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1830866646487266e-05, + "rewards/margins": 0.16682332754135132, + "rewards/rejected": -0.16685515642166138, + "step": 11135 + }, + { + "epoch": 7.7012448132780085, + "grad_norm": 4.846920967102051, + "learning_rate": 1.2770862148455508e-05, + "log_odds_chosen": 11.548954010009766, + "log_odds_ratio": -1.586158396094106e-05, + "logits/chosen": -0.5817251801490784, + "logits/rejected": -0.6089695692062378, + "logps/chosen": -9.639248310122639e-05, + "logps/rejected": -2.2452001571655273, + "loss": 0.614, + "nll_loss": 0.1535080224275589, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.63924867392052e-06, + "rewards/margins": 0.22451037168502808, + "rewards/rejected": -0.2245200127363205, + "step": 11136 + }, + { + "epoch": 7.701936376210235, + "grad_norm": 5.040010929107666, + "learning_rate": 1.276702013216536e-05, + "log_odds_chosen": 11.656959533691406, + "log_odds_ratio": -3.1027902878122404e-05, + "logits/chosen": 0.11758328974246979, + "logits/rejected": 0.0003274455666542053, + "logps/chosen": -0.0001720060536172241, + "logps/rejected": -2.435650587081909, + "loss": 0.6889, + "nll_loss": 0.17223119735717773, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.720060572552029e-05, + "rewards/margins": 0.24354785680770874, + "rewards/rejected": -0.24356506764888763, + "step": 11137 + }, + { + "epoch": 7.702627939142462, + "grad_norm": 7.25405740737915, + "learning_rate": 1.2763178115875213e-05, + "log_odds_chosen": 9.189305305480957, + "log_odds_ratio": -0.0007634533103555441, + "logits/chosen": -0.340925931930542, + "logits/rejected": -0.4222091734409332, + "logps/chosen": -0.0002917966339737177, + "logps/rejected": -1.4182566404342651, + "loss": 0.7082, + "nll_loss": 0.17698562145233154, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9179662305978127e-05, + "rewards/margins": 0.14179648458957672, + "rewards/rejected": -0.14182567596435547, + "step": 11138 + }, + { + "epoch": 7.703319502074689, + "grad_norm": 4.661027908325195, + "learning_rate": 1.2759336099585062e-05, + "log_odds_chosen": 11.074902534484863, + "log_odds_ratio": -0.00014685062342323363, + "logits/chosen": -0.5922839641571045, + "logits/rejected": -0.5710658431053162, + "logps/chosen": -0.000186232035048306, + "logps/rejected": -2.023669481277466, + "loss": 0.4383, + "nll_loss": 0.10955186188220978, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8623202777234837e-05, + "rewards/margins": 0.20234830677509308, + "rewards/rejected": -0.2023669332265854, + "step": 11139 + }, + { + "epoch": 7.704011065006916, + "grad_norm": 4.876995086669922, + "learning_rate": 1.2755494083294913e-05, + "log_odds_chosen": 10.508922576904297, + "log_odds_ratio": -0.00020858444622717798, + "logits/chosen": -0.3271537721157074, + "logits/rejected": -0.34680402278900146, + "logps/chosen": -0.00028275157092139125, + "logps/rejected": -2.260883331298828, + "loss": 0.6873, + "nll_loss": 0.1718018352985382, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8275157092139125e-05, + "rewards/margins": 0.22606006264686584, + "rewards/rejected": -0.22608834505081177, + "step": 11140 + }, + { + "epoch": 7.704702627939143, + "grad_norm": 4.962493896484375, + "learning_rate": 1.2751652067004766e-05, + "log_odds_chosen": 10.637468338012695, + "log_odds_ratio": -0.00045328630949370563, + "logits/chosen": -0.7488378286361694, + "logits/rejected": -0.7832546234130859, + "logps/chosen": -0.0004857642634306103, + "logps/rejected": -2.3516077995300293, + "loss": 0.3865, + "nll_loss": 0.09657375514507294, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8576428525848314e-05, + "rewards/margins": 0.23511220514774323, + "rewards/rejected": -0.23516079783439636, + "step": 11141 + }, + { + "epoch": 7.7053941908713695, + "grad_norm": 4.3264617919921875, + "learning_rate": 1.2747810050714615e-05, + "log_odds_chosen": 11.630284309387207, + "log_odds_ratio": -0.00012610270641744137, + "logits/chosen": -0.39398303627967834, + "logits/rejected": -0.5046098828315735, + "logps/chosen": -0.00018928886856883764, + "logps/rejected": -2.340277671813965, + "loss": 0.4792, + "nll_loss": 0.1197928637266159, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8928887584479526e-05, + "rewards/margins": 0.2340088188648224, + "rewards/rejected": -0.23402777314186096, + "step": 11142 + }, + { + "epoch": 7.706085753803596, + "grad_norm": 4.560120582580566, + "learning_rate": 1.2743968034424467e-05, + "log_odds_chosen": 10.950681686401367, + "log_odds_ratio": -0.00018634075240697712, + "logits/chosen": -0.6208133101463318, + "logits/rejected": -0.6838634610176086, + "logps/chosen": -0.0005703361239284277, + "logps/rejected": -1.7782117128372192, + "loss": 0.4036, + "nll_loss": 0.10088105499744415, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.703361239284277e-05, + "rewards/margins": 0.1777641475200653, + "rewards/rejected": -0.17782118916511536, + "step": 11143 + }, + { + "epoch": 7.706777316735823, + "grad_norm": 5.527631759643555, + "learning_rate": 1.2740126018134318e-05, + "log_odds_chosen": 10.481454849243164, + "log_odds_ratio": -0.00026709839585237205, + "logits/chosen": -0.10641849040985107, + "logits/rejected": -0.11019318550825119, + "logps/chosen": -0.0004817845765501261, + "logps/rejected": -2.248533248901367, + "loss": 0.5156, + "nll_loss": 0.12888376414775848, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.817845911020413e-05, + "rewards/margins": 0.22480514645576477, + "rewards/rejected": -0.22485333681106567, + "step": 11144 + }, + { + "epoch": 7.70746887966805, + "grad_norm": 5.158650875091553, + "learning_rate": 1.2736284001844167e-05, + "log_odds_chosen": 10.584527969360352, + "log_odds_ratio": -5.264970241114497e-05, + "logits/chosen": -0.32706624269485474, + "logits/rejected": -0.3761626183986664, + "logps/chosen": -0.00027035269886255264, + "logps/rejected": -1.951050043106079, + "loss": 0.3769, + "nll_loss": 0.09421977400779724, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7035270250053145e-05, + "rewards/margins": 0.19507798552513123, + "rewards/rejected": -0.19510501623153687, + "step": 11145 + }, + { + "epoch": 7.708160442600277, + "grad_norm": 4.094021320343018, + "learning_rate": 1.273244198555402e-05, + "log_odds_chosen": 10.128005981445312, + "log_odds_ratio": -0.0001861144119175151, + "logits/chosen": -0.32210487127304077, + "logits/rejected": -0.3130614161491394, + "logps/chosen": -0.0003607625840231776, + "logps/rejected": -1.6804444789886475, + "loss": 0.5249, + "nll_loss": 0.1311992108821869, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.607625694712624e-05, + "rewards/margins": 0.16800838708877563, + "rewards/rejected": -0.16804444789886475, + "step": 11146 + }, + { + "epoch": 7.708852005532504, + "grad_norm": 3.946537971496582, + "learning_rate": 1.2728599969263872e-05, + "log_odds_chosen": 11.047607421875, + "log_odds_ratio": -2.5542063667671755e-05, + "logits/chosen": -0.4898287057876587, + "logits/rejected": -0.4658747613430023, + "logps/chosen": -0.00014541887503582984, + "logps/rejected": -1.984635353088379, + "loss": 0.378, + "nll_loss": 0.09449951350688934, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4541888958774507e-05, + "rewards/margins": 0.1984490007162094, + "rewards/rejected": -0.1984635442495346, + "step": 11147 + }, + { + "epoch": 7.70954356846473, + "grad_norm": 4.478142738342285, + "learning_rate": 1.272475795297372e-05, + "log_odds_chosen": 11.520525932312012, + "log_odds_ratio": -2.9978171369293705e-05, + "logits/chosen": -0.514273464679718, + "logits/rejected": -0.3573247194290161, + "logps/chosen": -0.00017810847202781588, + "logps/rejected": -2.5793113708496094, + "loss": 0.393, + "nll_loss": 0.09824325144290924, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7810845747590065e-05, + "rewards/margins": 0.25791335105895996, + "rewards/rejected": -0.2579311430454254, + "step": 11148 + }, + { + "epoch": 7.710235131396957, + "grad_norm": 3.2450404167175293, + "learning_rate": 1.2720915936683572e-05, + "log_odds_chosen": 11.055948257446289, + "log_odds_ratio": -2.0638492060243152e-05, + "logits/chosen": -0.30314433574676514, + "logits/rejected": -0.24709904193878174, + "logps/chosen": -0.00018884678138419986, + "logps/rejected": -2.2960658073425293, + "loss": 0.331, + "nll_loss": 0.08274277299642563, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8884678866015747e-05, + "rewards/margins": 0.22958768904209137, + "rewards/rejected": -0.22960656881332397, + "step": 11149 + }, + { + "epoch": 7.710926694329184, + "grad_norm": 3.7017691135406494, + "learning_rate": 1.2717073920393424e-05, + "log_odds_chosen": 10.632525444030762, + "log_odds_ratio": -0.0001720143627608195, + "logits/chosen": -0.27402323484420776, + "logits/rejected": -0.34319669008255005, + "logps/chosen": -0.0004001731285825372, + "logps/rejected": -2.192150115966797, + "loss": 0.4616, + "nll_loss": 0.11538498103618622, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.001730849267915e-05, + "rewards/margins": 0.21917499601840973, + "rewards/rejected": -0.2192150205373764, + "step": 11150 + }, + { + "epoch": 7.711618257261411, + "grad_norm": 7.314864158630371, + "learning_rate": 1.2713231904103273e-05, + "log_odds_chosen": 10.405158996582031, + "log_odds_ratio": -0.0001515313924755901, + "logits/chosen": -0.35661429166793823, + "logits/rejected": -0.48725754022598267, + "logps/chosen": -0.0005308876279741526, + "logps/rejected": -2.6299867630004883, + "loss": 0.6595, + "nll_loss": 0.1648668348789215, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3088762797415257e-05, + "rewards/margins": 0.2629455626010895, + "rewards/rejected": -0.26299867033958435, + "step": 11151 + }, + { + "epoch": 7.712309820193638, + "grad_norm": 4.250590801239014, + "learning_rate": 1.2709389887813125e-05, + "log_odds_chosen": 10.082304954528809, + "log_odds_ratio": -0.00037102343048900366, + "logits/chosen": -0.08294828236103058, + "logits/rejected": -0.015783540904521942, + "logps/chosen": -0.0008273039711639285, + "logps/rejected": -1.81986665725708, + "loss": 0.4346, + "nll_loss": 0.10861542075872421, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.273040293715894e-05, + "rewards/margins": 0.18190395832061768, + "rewards/rejected": -0.18198668956756592, + "step": 11152 + }, + { + "epoch": 7.713001383125865, + "grad_norm": 3.800506591796875, + "learning_rate": 1.2705547871522976e-05, + "log_odds_chosen": 11.379698753356934, + "log_odds_ratio": -3.8342433981597424e-05, + "logits/chosen": -0.014530891552567482, + "logits/rejected": -0.01644635759294033, + "logps/chosen": -0.0003199596831109375, + "logps/rejected": -2.4525840282440186, + "loss": 0.3931, + "nll_loss": 0.09826381504535675, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.19959654007107e-05, + "rewards/margins": 0.2452264130115509, + "rewards/rejected": -0.2452584058046341, + "step": 11153 + }, + { + "epoch": 7.713692946058091, + "grad_norm": 3.971003293991089, + "learning_rate": 1.2701705855232825e-05, + "log_odds_chosen": 10.103784561157227, + "log_odds_ratio": -0.00028593253227882087, + "logits/chosen": -0.8284880518913269, + "logits/rejected": -0.8635731935501099, + "logps/chosen": -0.0005365914548747241, + "logps/rejected": -2.1351566314697266, + "loss": 0.4528, + "nll_loss": 0.11316827684640884, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.365914330468513e-05, + "rewards/margins": 0.2134619951248169, + "rewards/rejected": -0.21351563930511475, + "step": 11154 + }, + { + "epoch": 7.714384508990318, + "grad_norm": 5.799152851104736, + "learning_rate": 1.2697863838942678e-05, + "log_odds_chosen": 10.579777717590332, + "log_odds_ratio": -0.0002642400795593858, + "logits/chosen": -0.16273128986358643, + "logits/rejected": -0.1782502830028534, + "logps/chosen": -0.00022879459720570594, + "logps/rejected": -2.074464797973633, + "loss": 0.7465, + "nll_loss": 0.18660762906074524, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2879459720570594e-05, + "rewards/margins": 0.2074236273765564, + "rewards/rejected": -0.20744650065898895, + "step": 11155 + }, + { + "epoch": 7.715076071922545, + "grad_norm": 5.766885280609131, + "learning_rate": 1.269402182265253e-05, + "log_odds_chosen": 10.02370834350586, + "log_odds_ratio": -9.30255264393054e-05, + "logits/chosen": -0.023411564528942108, + "logits/rejected": -0.11966176331043243, + "logps/chosen": -0.0001804605417419225, + "logps/rejected": -1.4501714706420898, + "loss": 0.5821, + "nll_loss": 0.14550897479057312, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8046055629383773e-05, + "rewards/margins": 0.14499910175800323, + "rewards/rejected": -0.14501714706420898, + "step": 11156 + }, + { + "epoch": 7.715767634854772, + "grad_norm": 4.2179341316223145, + "learning_rate": 1.269017980636238e-05, + "log_odds_chosen": 9.422050476074219, + "log_odds_ratio": -0.0002549506607465446, + "logits/chosen": 0.053544774651527405, + "logits/rejected": -0.015079125761985779, + "logps/chosen": -0.0005149506032466888, + "logps/rejected": -1.647516131401062, + "loss": 0.4013, + "nll_loss": 0.10029100626707077, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.149505886947736e-05, + "rewards/margins": 0.16470013558864594, + "rewards/rejected": -0.16475161910057068, + "step": 11157 + }, + { + "epoch": 7.716459197786999, + "grad_norm": 5.201953887939453, + "learning_rate": 1.268633779007223e-05, + "log_odds_chosen": 10.34267520904541, + "log_odds_ratio": -0.001971145858988166, + "logits/chosen": -0.20927292108535767, + "logits/rejected": -0.18938323855400085, + "logps/chosen": -0.014904233627021313, + "logps/rejected": -1.9391664266586304, + "loss": 0.4861, + "nll_loss": 0.12132315337657928, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001490423339419067, + "rewards/margins": 0.19242621958255768, + "rewards/rejected": -0.19391664862632751, + "step": 11158 + }, + { + "epoch": 7.717150760719226, + "grad_norm": 4.524053573608398, + "learning_rate": 1.2682495773782082e-05, + "log_odds_chosen": 11.030694961547852, + "log_odds_ratio": -3.949225720134564e-05, + "logits/chosen": -0.36548861861228943, + "logits/rejected": -0.45517057180404663, + "logps/chosen": -0.0001286904443986714, + "logps/rejected": -1.940189003944397, + "loss": 0.4174, + "nll_loss": 0.1043517142534256, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2869046258856542e-05, + "rewards/margins": 0.1940060406923294, + "rewards/rejected": -0.1940189003944397, + "step": 11159 + }, + { + "epoch": 7.717842323651452, + "grad_norm": 6.148046493530273, + "learning_rate": 1.2678653757491931e-05, + "log_odds_chosen": 11.965886116027832, + "log_odds_ratio": -1.712049197521992e-05, + "logits/chosen": -0.7160577774047852, + "logits/rejected": -0.739776611328125, + "logps/chosen": -0.00015731708845123649, + "logps/rejected": -2.9925074577331543, + "loss": 0.653, + "nll_loss": 0.16325743496418, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.573170993651729e-05, + "rewards/margins": 0.2992349863052368, + "rewards/rejected": -0.2992507517337799, + "step": 11160 + }, + { + "epoch": 7.718533886583679, + "grad_norm": 12.926697731018066, + "learning_rate": 1.2674811741201784e-05, + "log_odds_chosen": 11.440237998962402, + "log_odds_ratio": -2.1183654098422267e-05, + "logits/chosen": -0.7331098318099976, + "logits/rejected": -0.609634518623352, + "logps/chosen": -0.00011901859397767112, + "logps/rejected": -2.155032157897949, + "loss": 0.4618, + "nll_loss": 0.11544632911682129, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1901860489160754e-05, + "rewards/margins": 0.21549132466316223, + "rewards/rejected": -0.21550323069095612, + "step": 11161 + }, + { + "epoch": 7.719225449515906, + "grad_norm": 3.8279616832733154, + "learning_rate": 1.2670969724911635e-05, + "log_odds_chosen": 11.816350936889648, + "log_odds_ratio": -3.3580814488232136e-05, + "logits/chosen": -0.4197129011154175, + "logits/rejected": -0.5066426992416382, + "logps/chosen": -0.00012585737567860633, + "logps/rejected": -2.701681613922119, + "loss": 0.7284, + "nll_loss": 0.1820848286151886, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2585736840264872e-05, + "rewards/margins": 0.27015554904937744, + "rewards/rejected": -0.27016815543174744, + "step": 11162 + }, + { + "epoch": 7.719917012448133, + "grad_norm": 4.701117038726807, + "learning_rate": 1.2667127708621485e-05, + "log_odds_chosen": 10.379667282104492, + "log_odds_ratio": -0.00025896576698869467, + "logits/chosen": -0.5930665731430054, + "logits/rejected": -0.6023825407028198, + "logps/chosen": -0.00022300105774775147, + "logps/rejected": -1.7857420444488525, + "loss": 0.5595, + "nll_loss": 0.13983792066574097, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2300104319583625e-05, + "rewards/margins": 0.17855191230773926, + "rewards/rejected": -0.17857420444488525, + "step": 11163 + }, + { + "epoch": 7.72060857538036, + "grad_norm": 4.207928657531738, + "learning_rate": 1.2663285692331336e-05, + "log_odds_chosen": 11.770381927490234, + "log_odds_ratio": -5.4319327318808064e-05, + "logits/chosen": -0.654198944568634, + "logits/rejected": -0.6638980507850647, + "logps/chosen": -0.0002516515669412911, + "logps/rejected": -2.8353614807128906, + "loss": 0.6246, + "nll_loss": 0.1561528444290161, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.516515633033123e-05, + "rewards/margins": 0.2835109829902649, + "rewards/rejected": -0.2835361361503601, + "step": 11164 + }, + { + "epoch": 7.7213001383125865, + "grad_norm": 7.018038272857666, + "learning_rate": 1.2659443676041189e-05, + "log_odds_chosen": 11.889036178588867, + "log_odds_ratio": -4.769517545355484e-05, + "logits/chosen": -0.2306787520647049, + "logits/rejected": -0.31250622868537903, + "logps/chosen": -0.0002063388383248821, + "logps/rejected": -3.314582347869873, + "loss": 0.6666, + "nll_loss": 0.1666460931301117, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.063388456008397e-05, + "rewards/margins": 0.3314375877380371, + "rewards/rejected": -0.3314582109451294, + "step": 11165 + }, + { + "epoch": 7.721991701244813, + "grad_norm": 4.5476765632629395, + "learning_rate": 1.2655601659751038e-05, + "log_odds_chosen": 11.020938873291016, + "log_odds_ratio": -3.345979712321423e-05, + "logits/chosen": -0.14058321714401245, + "logits/rejected": -0.1784745305776596, + "logps/chosen": -0.00021612163982354105, + "logps/rejected": -2.4194188117980957, + "loss": 0.5841, + "nll_loss": 0.14602458477020264, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.161216252716258e-05, + "rewards/margins": 0.24192027747631073, + "rewards/rejected": -0.241941899061203, + "step": 11166 + }, + { + "epoch": 7.72268326417704, + "grad_norm": 4.855893611907959, + "learning_rate": 1.265175964346089e-05, + "log_odds_chosen": 13.27073860168457, + "log_odds_ratio": -4.107092536287382e-06, + "logits/chosen": -0.27976834774017334, + "logits/rejected": -0.25163838267326355, + "logps/chosen": -0.0001417580060660839, + "logps/rejected": -4.15740966796875, + "loss": 0.5532, + "nll_loss": 0.13828860223293304, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4175801879900973e-05, + "rewards/margins": 0.41572678089141846, + "rewards/rejected": -0.415740966796875, + "step": 11167 + }, + { + "epoch": 7.723374827109267, + "grad_norm": 4.087342262268066, + "learning_rate": 1.264791762717074e-05, + "log_odds_chosen": 10.767053604125977, + "log_odds_ratio": -0.00036937909317202866, + "logits/chosen": -0.4580538272857666, + "logits/rejected": -0.4867129623889923, + "logps/chosen": -0.0004119900113437325, + "logps/rejected": -2.1865885257720947, + "loss": 0.3387, + "nll_loss": 0.08462625741958618, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.119900404475629e-05, + "rewards/margins": 0.21861764788627625, + "rewards/rejected": -0.21865884959697723, + "step": 11168 + }, + { + "epoch": 7.724066390041494, + "grad_norm": 3.855748176574707, + "learning_rate": 1.264407561088059e-05, + "log_odds_chosen": 11.13068962097168, + "log_odds_ratio": -1.747292117215693e-05, + "logits/chosen": -0.3536403179168701, + "logits/rejected": -0.4293423295021057, + "logps/chosen": -0.00011429526784922928, + "logps/rejected": -2.155888795852661, + "loss": 0.3741, + "nll_loss": 0.09352263063192368, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.142952805821551e-05, + "rewards/margins": 0.21557745337486267, + "rewards/rejected": -0.21558886766433716, + "step": 11169 + }, + { + "epoch": 7.724757952973721, + "grad_norm": 3.3689465522766113, + "learning_rate": 1.2640233594590442e-05, + "log_odds_chosen": 11.25421142578125, + "log_odds_ratio": -3.45587613992393e-05, + "logits/chosen": -0.6251857280731201, + "logits/rejected": -0.6823061108589172, + "logps/chosen": -0.0001753601391101256, + "logps/rejected": -2.045698404312134, + "loss": 0.3463, + "nll_loss": 0.08658169209957123, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7536012819618918e-05, + "rewards/margins": 0.2045523226261139, + "rewards/rejected": -0.20456984639167786, + "step": 11170 + }, + { + "epoch": 7.7254495159059475, + "grad_norm": 5.983028411865234, + "learning_rate": 1.2636391578300295e-05, + "log_odds_chosen": 10.396875381469727, + "log_odds_ratio": -0.00011761592759285122, + "logits/chosen": -0.6582087278366089, + "logits/rejected": -0.7967391610145569, + "logps/chosen": -0.00023359763144981116, + "logps/rejected": -1.4850988388061523, + "loss": 0.2688, + "nll_loss": 0.06719997525215149, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3359763872576877e-05, + "rewards/margins": 0.14848652482032776, + "rewards/rejected": -0.1485099047422409, + "step": 11171 + }, + { + "epoch": 7.726141078838174, + "grad_norm": 3.940302848815918, + "learning_rate": 1.2632549562010144e-05, + "log_odds_chosen": 11.063858032226562, + "log_odds_ratio": -8.894936763681471e-05, + "logits/chosen": -0.6144142746925354, + "logits/rejected": -0.7118813991546631, + "logps/chosen": -0.000178087706444785, + "logps/rejected": -2.3514490127563477, + "loss": 0.4841, + "nll_loss": 0.12101202458143234, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7808772099670023e-05, + "rewards/margins": 0.23512712121009827, + "rewards/rejected": -0.23514492809772491, + "step": 11172 + }, + { + "epoch": 7.726832641770401, + "grad_norm": 3.3931593894958496, + "learning_rate": 1.2628707545719995e-05, + "log_odds_chosen": 10.330915451049805, + "log_odds_ratio": -0.00016846258949954063, + "logits/chosen": -0.22229152917861938, + "logits/rejected": -0.3545496463775635, + "logps/chosen": -0.0005119829438626766, + "logps/rejected": -2.021285057067871, + "loss": 0.3727, + "nll_loss": 0.0931679829955101, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.119829802424647e-05, + "rewards/margins": 0.20207729935646057, + "rewards/rejected": -0.20212849974632263, + "step": 11173 + }, + { + "epoch": 7.727524204702628, + "grad_norm": 8.75682258605957, + "learning_rate": 1.2624865529429844e-05, + "log_odds_chosen": 11.253244400024414, + "log_odds_ratio": -3.685813135234639e-05, + "logits/chosen": -0.41329488158226013, + "logits/rejected": -0.5813735127449036, + "logps/chosen": -0.00014242672477848828, + "logps/rejected": -2.198551654815674, + "loss": 0.4275, + "nll_loss": 0.10687272250652313, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4242672477848828e-05, + "rewards/margins": 0.21984094381332397, + "rewards/rejected": -0.2198551744222641, + "step": 11174 + }, + { + "epoch": 7.728215767634855, + "grad_norm": 4.5937981605529785, + "learning_rate": 1.2621023513139696e-05, + "log_odds_chosen": 10.833547592163086, + "log_odds_ratio": -0.0001014374356600456, + "logits/chosen": -0.4467146098613739, + "logits/rejected": -0.4794977903366089, + "logps/chosen": -0.0005390796577557921, + "logps/rejected": -2.4630837440490723, + "loss": 0.3927, + "nll_loss": 0.09816577285528183, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.390797014115378e-05, + "rewards/margins": 0.24625447392463684, + "rewards/rejected": -0.24630838632583618, + "step": 11175 + }, + { + "epoch": 7.728907330567082, + "grad_norm": 11.93628978729248, + "learning_rate": 1.2617181496849549e-05, + "log_odds_chosen": 9.881686210632324, + "log_odds_ratio": -0.0003544997307471931, + "logits/chosen": -0.7389744520187378, + "logits/rejected": -0.7917199730873108, + "logps/chosen": -0.0008742506033740938, + "logps/rejected": -1.5302459001541138, + "loss": 0.5525, + "nll_loss": 0.13810065388679504, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.742506906855851e-05, + "rewards/margins": 0.1529371738433838, + "rewards/rejected": -0.1530245989561081, + "step": 11176 + }, + { + "epoch": 7.7295988934993085, + "grad_norm": 4.479918479919434, + "learning_rate": 1.2613339480559398e-05, + "log_odds_chosen": 10.797189712524414, + "log_odds_ratio": -2.9541673939093016e-05, + "logits/chosen": -0.1515422761440277, + "logits/rejected": -0.14504599571228027, + "logps/chosen": -0.0001397371233906597, + "logps/rejected": -1.710416316986084, + "loss": 0.3542, + "nll_loss": 0.08855848014354706, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.397371306666173e-05, + "rewards/margins": 0.17102766036987305, + "rewards/rejected": -0.17104163765907288, + "step": 11177 + }, + { + "epoch": 7.730290456431535, + "grad_norm": 4.554428577423096, + "learning_rate": 1.2609497464269248e-05, + "log_odds_chosen": 11.5568265914917, + "log_odds_ratio": -1.1084370271419175e-05, + "logits/chosen": -0.6276842355728149, + "logits/rejected": -0.6418471336364746, + "logps/chosen": -0.00015286514826584607, + "logps/rejected": -2.5270235538482666, + "loss": 0.6737, + "nll_loss": 0.16842985153198242, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5286514098988846e-05, + "rewards/margins": 0.2526870667934418, + "rewards/rejected": -0.25270235538482666, + "step": 11178 + }, + { + "epoch": 7.730982019363762, + "grad_norm": 7.3763251304626465, + "learning_rate": 1.26056554479791e-05, + "log_odds_chosen": 11.22471809387207, + "log_odds_ratio": -4.1468985727988183e-05, + "logits/chosen": -0.4443528652191162, + "logits/rejected": -0.47654908895492554, + "logps/chosen": -0.00032382557401433587, + "logps/rejected": -2.4713592529296875, + "loss": 0.6009, + "nll_loss": 0.1502087265253067, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.238255885662511e-05, + "rewards/margins": 0.24710355699062347, + "rewards/rejected": -0.2471359521150589, + "step": 11179 + }, + { + "epoch": 7.731673582295989, + "grad_norm": 7.088914394378662, + "learning_rate": 1.260181343168895e-05, + "log_odds_chosen": 9.736190795898438, + "log_odds_ratio": -0.000677534204442054, + "logits/chosen": -0.3276589512825012, + "logits/rejected": -0.4400746822357178, + "logps/chosen": -0.0006125522195361555, + "logps/rejected": -1.2078614234924316, + "loss": 0.5129, + "nll_loss": 0.12815217673778534, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.125521758804098e-05, + "rewards/margins": 0.1207248866558075, + "rewards/rejected": -0.12078613042831421, + "step": 11180 + }, + { + "epoch": 7.732365145228216, + "grad_norm": 4.531628131866455, + "learning_rate": 1.2597971415398802e-05, + "log_odds_chosen": 11.409263610839844, + "log_odds_ratio": -3.7987174437148497e-05, + "logits/chosen": -0.3952420949935913, + "logits/rejected": -0.45311498641967773, + "logps/chosen": -0.00011559738049982116, + "logps/rejected": -2.317403554916382, + "loss": 0.3281, + "nll_loss": 0.08202047646045685, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1559739505173638e-05, + "rewards/margins": 0.23172880709171295, + "rewards/rejected": -0.23174037039279938, + "step": 11181 + }, + { + "epoch": 7.733056708160443, + "grad_norm": 3.2501275539398193, + "learning_rate": 1.2594129399108653e-05, + "log_odds_chosen": 11.311736106872559, + "log_odds_ratio": -2.3325386791839264e-05, + "logits/chosen": -0.3043041229248047, + "logits/rejected": -0.3732486665248871, + "logps/chosen": -0.000279276049695909, + "logps/rejected": -2.639509677886963, + "loss": 0.4197, + "nll_loss": 0.10492255538702011, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7927606424782425e-05, + "rewards/margins": 0.2639230787754059, + "rewards/rejected": -0.26395100355148315, + "step": 11182 + }, + { + "epoch": 7.7337482710926695, + "grad_norm": 5.095405578613281, + "learning_rate": 1.2590287382818502e-05, + "log_odds_chosen": 11.507871627807617, + "log_odds_ratio": -5.955259985057637e-05, + "logits/chosen": -0.4925724267959595, + "logits/rejected": -0.5230408906936646, + "logps/chosen": -9.127370140049607e-05, + "logps/rejected": -2.333247661590576, + "loss": 0.6057, + "nll_loss": 0.1514206826686859, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.127370503847487e-06, + "rewards/margins": 0.2333156317472458, + "rewards/rejected": -0.23332476615905762, + "step": 11183 + }, + { + "epoch": 7.734439834024896, + "grad_norm": 5.569227695465088, + "learning_rate": 1.2586445366528355e-05, + "log_odds_chosen": 11.26315689086914, + "log_odds_ratio": -3.517435470712371e-05, + "logits/chosen": 0.6779569387435913, + "logits/rejected": 0.5444155931472778, + "logps/chosen": -0.0002390643785474822, + "logps/rejected": -2.7062904834747314, + "loss": 0.5392, + "nll_loss": 0.13479849696159363, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3906435671960935e-05, + "rewards/margins": 0.2706051468849182, + "rewards/rejected": -0.27062904834747314, + "step": 11184 + }, + { + "epoch": 7.735131396957123, + "grad_norm": 4.753345489501953, + "learning_rate": 1.2582603350238207e-05, + "log_odds_chosen": 10.940960884094238, + "log_odds_ratio": -0.0001814025454223156, + "logits/chosen": -0.47536489367485046, + "logits/rejected": -0.42041611671447754, + "logps/chosen": -0.0004682550788857043, + "logps/rejected": -2.2718544006347656, + "loss": 0.5859, + "nll_loss": 0.14645753800868988, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.682551298174076e-05, + "rewards/margins": 0.22713860869407654, + "rewards/rejected": -0.2271854281425476, + "step": 11185 + }, + { + "epoch": 7.73582295988935, + "grad_norm": 5.558916091918945, + "learning_rate": 1.2578761333948056e-05, + "log_odds_chosen": 10.662283897399902, + "log_odds_ratio": -0.0003959111636504531, + "logits/chosen": -0.6355177164077759, + "logits/rejected": -0.7223750352859497, + "logps/chosen": -0.0004100181395187974, + "logps/rejected": -2.0126688480377197, + "loss": 0.5465, + "nll_loss": 0.13657298684120178, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.100181831745431e-05, + "rewards/margins": 0.2012258768081665, + "rewards/rejected": -0.20126686990261078, + "step": 11186 + }, + { + "epoch": 7.736514522821577, + "grad_norm": 6.592702865600586, + "learning_rate": 1.2574919317657907e-05, + "log_odds_chosen": 11.67574405670166, + "log_odds_ratio": -3.718010339071043e-05, + "logits/chosen": -0.4506959617137909, + "logits/rejected": -0.5245539546012878, + "logps/chosen": -5.4668325901729986e-05, + "logps/rejected": -2.1702640056610107, + "loss": 0.4492, + "nll_loss": 0.11230722069740295, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.466832590172999e-06, + "rewards/margins": 0.2170209288597107, + "rewards/rejected": -0.21702641248703003, + "step": 11187 + }, + { + "epoch": 7.737206085753804, + "grad_norm": 5.0268330574035645, + "learning_rate": 1.257107730136776e-05, + "log_odds_chosen": 12.745853424072266, + "log_odds_ratio": -5.975254225631943e-06, + "logits/chosen": -0.32701608538627625, + "logits/rejected": -0.4010179340839386, + "logps/chosen": -8.823679672786966e-05, + "logps/rejected": -3.32920503616333, + "loss": 0.4433, + "nll_loss": 0.11081268638372421, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.823679308989085e-06, + "rewards/margins": 0.3329116702079773, + "rewards/rejected": -0.33292049169540405, + "step": 11188 + }, + { + "epoch": 7.7378976486860305, + "grad_norm": 5.755482196807861, + "learning_rate": 1.2567235285077608e-05, + "log_odds_chosen": 10.414119720458984, + "log_odds_ratio": -0.00011008892033714801, + "logits/chosen": -0.3587581515312195, + "logits/rejected": -0.3946435749530792, + "logps/chosen": -0.00015682679077144712, + "logps/rejected": -1.538665533065796, + "loss": 0.4158, + "nll_loss": 0.10394784063100815, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5682679077144712e-05, + "rewards/margins": 0.15385086834430695, + "rewards/rejected": -0.15386654436588287, + "step": 11189 + }, + { + "epoch": 7.738589211618257, + "grad_norm": 4.126373291015625, + "learning_rate": 1.256339326878746e-05, + "log_odds_chosen": 9.167192459106445, + "log_odds_ratio": -0.0003323358250781894, + "logits/chosen": -0.3083547055721283, + "logits/rejected": -0.71805739402771, + "logps/chosen": -0.0006533075356855989, + "logps/rejected": -1.5606307983398438, + "loss": 0.3303, + "nll_loss": 0.08255146443843842, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.533075065817684e-05, + "rewards/margins": 0.15599775314331055, + "rewards/rejected": -0.15606307983398438, + "step": 11190 + }, + { + "epoch": 7.739280774550484, + "grad_norm": 5.740198135375977, + "learning_rate": 1.2559551252497311e-05, + "log_odds_chosen": 12.055227279663086, + "log_odds_ratio": -1.5766985598020256e-05, + "logits/chosen": -0.1922997236251831, + "logits/rejected": -0.23225519061088562, + "logps/chosen": -0.00014473784540314227, + "logps/rejected": -2.7764201164245605, + "loss": 0.5185, + "nll_loss": 0.12963469326496124, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4473785086011048e-05, + "rewards/margins": 0.277627557516098, + "rewards/rejected": -0.27764201164245605, + "step": 11191 + }, + { + "epoch": 7.739972337482711, + "grad_norm": 4.969605445861816, + "learning_rate": 1.255570923620716e-05, + "log_odds_chosen": 11.182586669921875, + "log_odds_ratio": -4.7119465307332575e-05, + "logits/chosen": -0.3878333866596222, + "logits/rejected": -0.44411617517471313, + "logps/chosen": -0.00016241194680333138, + "logps/rejected": -2.5389904975891113, + "loss": 0.4358, + "nll_loss": 0.10894952714443207, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6241194316535257e-05, + "rewards/margins": 0.25388282537460327, + "rewards/rejected": -0.2538990378379822, + "step": 11192 + }, + { + "epoch": 7.740663900414938, + "grad_norm": 9.156740188598633, + "learning_rate": 1.2551867219917013e-05, + "log_odds_chosen": 12.061928749084473, + "log_odds_ratio": -4.9793354264693335e-05, + "logits/chosen": -0.2080545723438263, + "logits/rejected": -0.23290136456489563, + "logps/chosen": -0.0007853920687921345, + "logps/rejected": -3.5089850425720215, + "loss": 0.5568, + "nll_loss": 0.13918514549732208, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.85392039688304e-05, + "rewards/margins": 0.3508199453353882, + "rewards/rejected": -0.35089850425720215, + "step": 11193 + }, + { + "epoch": 7.741355463347165, + "grad_norm": 4.986725330352783, + "learning_rate": 1.2548025203626865e-05, + "log_odds_chosen": 11.318840026855469, + "log_odds_ratio": -2.896355726988986e-05, + "logits/chosen": -0.5200918316841125, + "logits/rejected": -0.587061882019043, + "logps/chosen": -0.00019649678142741323, + "logps/rejected": -2.0499584674835205, + "loss": 0.4781, + "nll_loss": 0.11951858550310135, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9649680325528607e-05, + "rewards/margins": 0.20497620105743408, + "rewards/rejected": -0.20499587059020996, + "step": 11194 + }, + { + "epoch": 7.7420470262793915, + "grad_norm": 7.151978015899658, + "learning_rate": 1.2544183187336714e-05, + "log_odds_chosen": 10.956339836120605, + "log_odds_ratio": -0.00020657852292060852, + "logits/chosen": -0.5801454782485962, + "logits/rejected": -0.6348856687545776, + "logps/chosen": -0.00025836541317403316, + "logps/rejected": -2.077286720275879, + "loss": 0.4545, + "nll_loss": 0.11360951513051987, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.58365435001906e-05, + "rewards/margins": 0.20770283043384552, + "rewards/rejected": -0.20772868394851685, + "step": 11195 + }, + { + "epoch": 7.742738589211618, + "grad_norm": 6.095242500305176, + "learning_rate": 1.2540341171046565e-05, + "log_odds_chosen": 11.290716171264648, + "log_odds_ratio": -4.969790097675286e-05, + "logits/chosen": -0.2075842320919037, + "logits/rejected": -0.27315694093704224, + "logps/chosen": -0.0009419232374057174, + "logps/rejected": -2.3505849838256836, + "loss": 0.4756, + "nll_loss": 0.1188892275094986, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.419232083018869e-05, + "rewards/margins": 0.2349643111228943, + "rewards/rejected": -0.2350585013628006, + "step": 11196 + }, + { + "epoch": 7.743430152143845, + "grad_norm": 4.93026876449585, + "learning_rate": 1.2536499154756418e-05, + "log_odds_chosen": 12.610750198364258, + "log_odds_ratio": -1.0907778232649434e-05, + "logits/chosen": -0.2769096791744232, + "logits/rejected": -0.36176517605781555, + "logps/chosen": -0.00010222066339338198, + "logps/rejected": -3.3806655406951904, + "loss": 0.3054, + "nll_loss": 0.07633821666240692, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.022206743073184e-05, + "rewards/margins": 0.3380563259124756, + "rewards/rejected": -0.33806657791137695, + "step": 11197 + }, + { + "epoch": 7.744121715076072, + "grad_norm": 5.709609508514404, + "learning_rate": 1.2532657138466267e-05, + "log_odds_chosen": 10.945411682128906, + "log_odds_ratio": -5.631058229482733e-05, + "logits/chosen": -0.5118638873100281, + "logits/rejected": -0.532324492931366, + "logps/chosen": -0.0004599790263455361, + "logps/rejected": -2.5661821365356445, + "loss": 0.5729, + "nll_loss": 0.1432260125875473, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.599790554493666e-05, + "rewards/margins": 0.2565721869468689, + "rewards/rejected": -0.2566182017326355, + "step": 11198 + }, + { + "epoch": 7.744813278008299, + "grad_norm": 3.967132329940796, + "learning_rate": 1.2528815122176119e-05, + "log_odds_chosen": 10.008167266845703, + "log_odds_ratio": -0.00038261100417003036, + "logits/chosen": -0.37153691053390503, + "logits/rejected": -0.3605520725250244, + "logps/chosen": -0.0008499649120494723, + "logps/rejected": -1.7307593822479248, + "loss": 0.3355, + "nll_loss": 0.08382457494735718, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.499649266013876e-05, + "rewards/margins": 0.17299094796180725, + "rewards/rejected": -0.17307592928409576, + "step": 11199 + }, + { + "epoch": 7.745504840940526, + "grad_norm": 7.0854926109313965, + "learning_rate": 1.252497310588597e-05, + "log_odds_chosen": 11.453516006469727, + "log_odds_ratio": -2.7779607989941724e-05, + "logits/chosen": -0.5036885738372803, + "logits/rejected": -0.6609827280044556, + "logps/chosen": -0.00011579034617170691, + "logps/rejected": -2.313054084777832, + "loss": 0.4022, + "nll_loss": 0.1005503386259079, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1579034435271751e-05, + "rewards/margins": 0.23129382729530334, + "rewards/rejected": -0.23130539059638977, + "step": 11200 + }, + { + "epoch": 7.746196403872752, + "grad_norm": 5.6303324699401855, + "learning_rate": 1.2521131089595819e-05, + "log_odds_chosen": 11.321676254272461, + "log_odds_ratio": -0.0001658283144934103, + "logits/chosen": -0.40062427520751953, + "logits/rejected": -0.42020952701568604, + "logps/chosen": -0.0008212847169488668, + "logps/rejected": -2.997318983078003, + "loss": 0.4117, + "nll_loss": 0.10292016714811325, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.212847023969516e-05, + "rewards/margins": 0.29964977502822876, + "rewards/rejected": -0.29973188042640686, + "step": 11201 + }, + { + "epoch": 7.746887966804979, + "grad_norm": 6.021998882293701, + "learning_rate": 1.2517289073305671e-05, + "log_odds_chosen": 10.015493392944336, + "log_odds_ratio": -0.000374117458704859, + "logits/chosen": -0.20265470445156097, + "logits/rejected": -0.1699371337890625, + "logps/chosen": -0.000535747327376157, + "logps/rejected": -1.8978259563446045, + "loss": 0.4803, + "nll_loss": 0.12004905939102173, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.357473128242418e-05, + "rewards/margins": 0.1897290199995041, + "rewards/rejected": -0.18978260457515717, + "step": 11202 + }, + { + "epoch": 7.747579529737206, + "grad_norm": 5.411603927612305, + "learning_rate": 1.2513447057015524e-05, + "log_odds_chosen": 9.879266738891602, + "log_odds_ratio": -0.00016255848458968103, + "logits/chosen": -0.39416828751564026, + "logits/rejected": -0.46854713559150696, + "logps/chosen": -0.0005647170473821461, + "logps/rejected": -2.0203137397766113, + "loss": 0.4652, + "nll_loss": 0.11628206074237823, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.647170473821461e-05, + "rewards/margins": 0.20197489857673645, + "rewards/rejected": -0.20203135907649994, + "step": 11203 + }, + { + "epoch": 7.748271092669433, + "grad_norm": 7.1708760261535645, + "learning_rate": 1.2509605040725373e-05, + "log_odds_chosen": 9.07894229888916, + "log_odds_ratio": -0.000547174655366689, + "logits/chosen": -0.12856854498386383, + "logits/rejected": -0.23630261421203613, + "logps/chosen": -0.001496141660027206, + "logps/rejected": -1.2579138278961182, + "loss": 0.2841, + "nll_loss": 0.07098029553890228, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014961417764425278, + "rewards/margins": 0.12564176321029663, + "rewards/rejected": -0.12579138576984406, + "step": 11204 + }, + { + "epoch": 7.74896265560166, + "grad_norm": 3.752485990524292, + "learning_rate": 1.2505763024435224e-05, + "log_odds_chosen": 11.626092910766602, + "log_odds_ratio": -3.133751670247875e-05, + "logits/chosen": -0.34028932452201843, + "logits/rejected": -0.46844393014907837, + "logps/chosen": -0.0001301189768128097, + "logps/rejected": -2.595146656036377, + "loss": 0.3965, + "nll_loss": 0.09911525249481201, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3011898772674613e-05, + "rewards/margins": 0.2595016658306122, + "rewards/rejected": -0.2595146894454956, + "step": 11205 + }, + { + "epoch": 7.749654218533887, + "grad_norm": 4.600625038146973, + "learning_rate": 1.2501921008145076e-05, + "log_odds_chosen": 12.2091646194458, + "log_odds_ratio": -1.5390061889775097e-05, + "logits/chosen": -0.6013935804367065, + "logits/rejected": -0.6590204238891602, + "logps/chosen": -0.00014516216469928622, + "logps/rejected": -2.883413791656494, + "loss": 0.382, + "nll_loss": 0.0955069437623024, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4516216651827563e-05, + "rewards/margins": 0.28832685947418213, + "rewards/rejected": -0.2883414030075073, + "step": 11206 + }, + { + "epoch": 7.750345781466113, + "grad_norm": 6.549100875854492, + "learning_rate": 1.2498078991854927e-05, + "log_odds_chosen": 10.933587074279785, + "log_odds_ratio": -4.656921373680234e-05, + "logits/chosen": -0.5206800699234009, + "logits/rejected": -0.6020963191986084, + "logps/chosen": -0.0008355957688763738, + "logps/rejected": -2.7682807445526123, + "loss": 0.528, + "nll_loss": 0.1319846659898758, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.355958561878651e-05, + "rewards/margins": 0.2767445147037506, + "rewards/rejected": -0.2768280804157257, + "step": 11207 + }, + { + "epoch": 7.75103734439834, + "grad_norm": 3.6843156814575195, + "learning_rate": 1.2494236975564778e-05, + "log_odds_chosen": 11.547121047973633, + "log_odds_ratio": -0.00010025502706412226, + "logits/chosen": -0.6024690270423889, + "logits/rejected": -0.6342105865478516, + "logps/chosen": -0.00016567941929679364, + "logps/rejected": -2.5956060886383057, + "loss": 0.5138, + "nll_loss": 0.1284426748752594, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6567943021073006e-05, + "rewards/margins": 0.2595440447330475, + "rewards/rejected": -0.25956061482429504, + "step": 11208 + }, + { + "epoch": 7.751728907330567, + "grad_norm": 8.264753341674805, + "learning_rate": 1.2490394959274628e-05, + "log_odds_chosen": 10.360306739807129, + "log_odds_ratio": -6.233261956367642e-05, + "logits/chosen": -0.6511682271957397, + "logits/rejected": -0.6333962082862854, + "logps/chosen": -0.00028001246391795576, + "logps/rejected": -1.442769169807434, + "loss": 0.3138, + "nll_loss": 0.07843394577503204, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8001246391795576e-05, + "rewards/margins": 0.14424893260002136, + "rewards/rejected": -0.1442769169807434, + "step": 11209 + }, + { + "epoch": 7.752420470262794, + "grad_norm": 3.439617872238159, + "learning_rate": 1.2486552942984479e-05, + "log_odds_chosen": 10.704526901245117, + "log_odds_ratio": -0.0003997480671387166, + "logits/chosen": -0.41600725054740906, + "logits/rejected": -0.4207799434661865, + "logps/chosen": -0.0009102488402277231, + "logps/rejected": -2.3439385890960693, + "loss": 0.4589, + "nll_loss": 0.11469479650259018, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.102488547796384e-05, + "rewards/margins": 0.2343028485774994, + "rewards/rejected": -0.2343938648700714, + "step": 11210 + }, + { + "epoch": 7.753112033195021, + "grad_norm": 6.369625568389893, + "learning_rate": 1.248271092669433e-05, + "log_odds_chosen": 12.472749710083008, + "log_odds_ratio": -1.946235897776205e-05, + "logits/chosen": -0.30656394362449646, + "logits/rejected": -0.3388577699661255, + "logps/chosen": -0.00020343753567431122, + "logps/rejected": -3.5933454036712646, + "loss": 0.6374, + "nll_loss": 0.15935854613780975, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0343755750218406e-05, + "rewards/margins": 0.3593142032623291, + "rewards/rejected": -0.3593345284461975, + "step": 11211 + }, + { + "epoch": 7.753803596127248, + "grad_norm": 6.118873596191406, + "learning_rate": 1.247886891040418e-05, + "log_odds_chosen": 11.584425926208496, + "log_odds_ratio": -7.112725143088028e-05, + "logits/chosen": -0.2803153097629547, + "logits/rejected": -0.3472374677658081, + "logps/chosen": -0.00031616457272320986, + "logps/rejected": -3.152381181716919, + "loss": 0.2362, + "nll_loss": 0.05905373767018318, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.161645872751251e-05, + "rewards/margins": 0.31520652770996094, + "rewards/rejected": -0.3152381181716919, + "step": 11212 + }, + { + "epoch": 7.754495159059474, + "grad_norm": 6.023653984069824, + "learning_rate": 1.2475026894114033e-05, + "log_odds_chosen": 11.131653785705566, + "log_odds_ratio": -4.222708594170399e-05, + "logits/chosen": -0.8086551427841187, + "logits/rejected": -0.814570963382721, + "logps/chosen": -0.00013568079157266766, + "logps/rejected": -1.8936514854431152, + "loss": 0.3326, + "nll_loss": 0.08315111696720123, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3568078429671004e-05, + "rewards/margins": 0.18935158848762512, + "rewards/rejected": -0.1893651783466339, + "step": 11213 + }, + { + "epoch": 7.755186721991701, + "grad_norm": 4.0896477699279785, + "learning_rate": 1.2471184877823882e-05, + "log_odds_chosen": 11.216859817504883, + "log_odds_ratio": -3.130736877210438e-05, + "logits/chosen": -0.3861258029937744, + "logits/rejected": -0.4017869532108307, + "logps/chosen": -0.00017719688185025007, + "logps/rejected": -2.3807382583618164, + "loss": 0.8433, + "nll_loss": 0.21082894504070282, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7719688912620768e-05, + "rewards/margins": 0.23805612325668335, + "rewards/rejected": -0.23807384073734283, + "step": 11214 + }, + { + "epoch": 7.755878284923928, + "grad_norm": 8.639068603515625, + "learning_rate": 1.2467342861533733e-05, + "log_odds_chosen": 8.53171157836914, + "log_odds_ratio": -0.002693082904443145, + "logits/chosen": -0.4846501052379608, + "logits/rejected": -0.5627926588058472, + "logps/chosen": -0.0017211114754900336, + "logps/rejected": -1.4384164810180664, + "loss": 0.7398, + "nll_loss": 0.18466946482658386, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017211114754900336, + "rewards/margins": 0.14366954565048218, + "rewards/rejected": -0.14384163916110992, + "step": 11215 + }, + { + "epoch": 7.756569847856155, + "grad_norm": 4.447142601013184, + "learning_rate": 1.2463500845243585e-05, + "log_odds_chosen": 11.100719451904297, + "log_odds_ratio": -2.4733282771194354e-05, + "logits/chosen": -0.4554653763771057, + "logits/rejected": -0.44291388988494873, + "logps/chosen": -6.685042171739042e-05, + "logps/rejected": -1.6726263761520386, + "loss": 0.455, + "nll_loss": 0.11374975740909576, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6850425355369225e-06, + "rewards/margins": 0.1672559529542923, + "rewards/rejected": -0.16726262867450714, + "step": 11216 + }, + { + "epoch": 7.757261410788382, + "grad_norm": 4.845477104187012, + "learning_rate": 1.2459658828953436e-05, + "log_odds_chosen": 10.642885208129883, + "log_odds_ratio": -0.00018591950356494635, + "logits/chosen": -0.2249833643436432, + "logits/rejected": -0.2929491400718689, + "logps/chosen": -0.0009210944408550858, + "logps/rejected": -2.7952280044555664, + "loss": 0.4597, + "nll_loss": 0.11489509046077728, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.210944699589163e-05, + "rewards/margins": 0.27943071722984314, + "rewards/rejected": -0.2795228362083435, + "step": 11217 + }, + { + "epoch": 7.7579529737206085, + "grad_norm": 4.492068290710449, + "learning_rate": 1.2455816812663287e-05, + "log_odds_chosen": 10.5167236328125, + "log_odds_ratio": -0.00040569447446614504, + "logits/chosen": -0.5178202390670776, + "logits/rejected": -0.5283166170120239, + "logps/chosen": -0.0005154095706529915, + "logps/rejected": -2.447777032852173, + "loss": 0.3911, + "nll_loss": 0.09773518145084381, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1540962886065245e-05, + "rewards/margins": 0.24472618103027344, + "rewards/rejected": -0.24477770924568176, + "step": 11218 + }, + { + "epoch": 7.758644536652835, + "grad_norm": 4.011480331420898, + "learning_rate": 1.2451974796373138e-05, + "log_odds_chosen": 10.492677688598633, + "log_odds_ratio": -4.538420762401074e-05, + "logits/chosen": -0.39754992723464966, + "logits/rejected": -0.4307247996330261, + "logps/chosen": -0.0001875983434729278, + "logps/rejected": -1.51975679397583, + "loss": 0.4099, + "nll_loss": 0.10245904326438904, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.875983434729278e-05, + "rewards/margins": 0.1519569307565689, + "rewards/rejected": -0.15197569131851196, + "step": 11219 + }, + { + "epoch": 7.759336099585062, + "grad_norm": 5.337882995605469, + "learning_rate": 1.2448132780082988e-05, + "log_odds_chosen": 10.46602725982666, + "log_odds_ratio": -0.00019935752789024264, + "logits/chosen": -0.3067324459552765, + "logits/rejected": -0.3594750463962555, + "logps/chosen": -0.0002527673204895109, + "logps/rejected": -1.8997879028320312, + "loss": 0.6223, + "nll_loss": 0.15556585788726807, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.527673314034473e-05, + "rewards/margins": 0.1899535059928894, + "rewards/rejected": -0.18997877836227417, + "step": 11220 + }, + { + "epoch": 7.760027662517289, + "grad_norm": 5.162551403045654, + "learning_rate": 1.2444290763792839e-05, + "log_odds_chosen": 11.330338478088379, + "log_odds_ratio": -9.858874545898288e-05, + "logits/chosen": -0.5896552801132202, + "logits/rejected": -0.5296061635017395, + "logps/chosen": -0.0004703239828813821, + "logps/rejected": -3.0334887504577637, + "loss": 0.7643, + "nll_loss": 0.19106769561767578, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7032401198521256e-05, + "rewards/margins": 0.3033018410205841, + "rewards/rejected": -0.3033488988876343, + "step": 11221 + }, + { + "epoch": 7.760719225449516, + "grad_norm": 3.615222215652466, + "learning_rate": 1.2440448747502691e-05, + "log_odds_chosen": 11.154950141906738, + "log_odds_ratio": -9.620700438972563e-05, + "logits/chosen": -0.07681339979171753, + "logits/rejected": -0.17916563153266907, + "logps/chosen": -0.0001603415294084698, + "logps/rejected": -2.4335439205169678, + "loss": 0.4096, + "nll_loss": 0.1023990586400032, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6034151485655457e-05, + "rewards/margins": 0.24333836138248444, + "rewards/rejected": -0.24335438013076782, + "step": 11222 + }, + { + "epoch": 7.761410788381743, + "grad_norm": 5.476311683654785, + "learning_rate": 1.243660673121254e-05, + "log_odds_chosen": 11.82361125946045, + "log_odds_ratio": -1.4367436961038038e-05, + "logits/chosen": -0.5733404159545898, + "logits/rejected": -0.6344277858734131, + "logps/chosen": -0.000166439451277256, + "logps/rejected": -2.5303244590759277, + "loss": 0.4878, + "nll_loss": 0.1219361424446106, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.664394403633196e-05, + "rewards/margins": 0.25301581621170044, + "rewards/rejected": -0.25303247570991516, + "step": 11223 + }, + { + "epoch": 7.7621023513139695, + "grad_norm": 4.696140766143799, + "learning_rate": 1.2432764714922391e-05, + "log_odds_chosen": 11.077871322631836, + "log_odds_ratio": -2.367735214647837e-05, + "logits/chosen": -0.2561139762401581, + "logits/rejected": -0.3287086486816406, + "logps/chosen": -0.0003140079788863659, + "logps/rejected": -2.57572603225708, + "loss": 0.5179, + "nll_loss": 0.12948326766490936, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.140079934382811e-05, + "rewards/margins": 0.2575412392616272, + "rewards/rejected": -0.25757265090942383, + "step": 11224 + }, + { + "epoch": 7.762793914246196, + "grad_norm": 12.073077201843262, + "learning_rate": 1.2428922698632242e-05, + "log_odds_chosen": 11.019550323486328, + "log_odds_ratio": -0.00019632314797490835, + "logits/chosen": -0.3313988149166107, + "logits/rejected": -0.36905309557914734, + "logps/chosen": -0.00270017609000206, + "logps/rejected": -3.295527458190918, + "loss": 0.5591, + "nll_loss": 0.1397487372159958, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00027001762646250427, + "rewards/margins": 0.3292827606201172, + "rewards/rejected": -0.3295527696609497, + "step": 11225 + }, + { + "epoch": 7.763485477178423, + "grad_norm": 6.284043788909912, + "learning_rate": 1.2425080682342094e-05, + "log_odds_chosen": 10.246824264526367, + "log_odds_ratio": -0.0005744930822402239, + "logits/chosen": -0.10029926151037216, + "logits/rejected": -0.10771449655294418, + "logps/chosen": -0.0012491054367274046, + "logps/rejected": -2.3530826568603516, + "loss": 0.3035, + "nll_loss": 0.07581494003534317, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012491054076235741, + "rewards/margins": 0.23518335819244385, + "rewards/rejected": -0.23530825972557068, + "step": 11226 + }, + { + "epoch": 7.76417704011065, + "grad_norm": 4.9749369621276855, + "learning_rate": 1.2421238666051945e-05, + "log_odds_chosen": 10.407770156860352, + "log_odds_ratio": -0.00017144394223578274, + "logits/chosen": 0.13365231454372406, + "logits/rejected": 0.12635274231433868, + "logps/chosen": -0.0010794727131724358, + "logps/rejected": -2.232872247695923, + "loss": 0.5565, + "nll_loss": 0.1391078531742096, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010794726404128596, + "rewards/margins": 0.22317931056022644, + "rewards/rejected": -0.22328722476959229, + "step": 11227 + }, + { + "epoch": 7.764868603042877, + "grad_norm": 6.39568567276001, + "learning_rate": 1.2417396649761794e-05, + "log_odds_chosen": 9.643097877502441, + "log_odds_ratio": -0.12463778257369995, + "logits/chosen": -0.33734792470932007, + "logits/rejected": -0.3988925516605377, + "logps/chosen": -0.02458512969315052, + "logps/rejected": -2.240410804748535, + "loss": 0.7192, + "nll_loss": 0.16734297573566437, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0024585130158811808, + "rewards/margins": 0.2215825617313385, + "rewards/rejected": -0.22404105961322784, + "step": 11228 + }, + { + "epoch": 7.765560165975104, + "grad_norm": 5.003486156463623, + "learning_rate": 1.2413554633471647e-05, + "log_odds_chosen": 10.819863319396973, + "log_odds_ratio": -2.375691110501066e-05, + "logits/chosen": 0.34223243594169617, + "logits/rejected": 0.2088853418827057, + "logps/chosen": -0.00014355707389768213, + "logps/rejected": -1.991088628768921, + "loss": 0.6244, + "nll_loss": 0.1560867726802826, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4355708117363974e-05, + "rewards/margins": 0.1990945041179657, + "rewards/rejected": -0.19910886883735657, + "step": 11229 + }, + { + "epoch": 7.7662517289073305, + "grad_norm": 6.905767917633057, + "learning_rate": 1.2409712617181497e-05, + "log_odds_chosen": 9.822664260864258, + "log_odds_ratio": -0.00029731536051258445, + "logits/chosen": -0.2679555416107178, + "logits/rejected": -0.2359420359134674, + "logps/chosen": -0.0014667949872091413, + "logps/rejected": -1.726415753364563, + "loss": 0.4251, + "nll_loss": 0.10625658184289932, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014667949290014803, + "rewards/margins": 0.17249488830566406, + "rewards/rejected": -0.1726415753364563, + "step": 11230 + }, + { + "epoch": 7.766943291839557, + "grad_norm": 5.629059314727783, + "learning_rate": 1.2405870600891348e-05, + "log_odds_chosen": 12.017097473144531, + "log_odds_ratio": -1.483617234043777e-05, + "logits/chosen": -0.5283292531967163, + "logits/rejected": -0.5332249402999878, + "logps/chosen": -0.00043940573232248425, + "logps/rejected": -3.0613012313842773, + "loss": 0.444, + "nll_loss": 0.11100263893604279, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.394057032186538e-05, + "rewards/margins": 0.3060861825942993, + "rewards/rejected": -0.3061301112174988, + "step": 11231 + }, + { + "epoch": 7.767634854771784, + "grad_norm": 4.639513969421387, + "learning_rate": 1.2402028584601199e-05, + "log_odds_chosen": 11.180654525756836, + "log_odds_ratio": -0.0002833344624377787, + "logits/chosen": -0.13472601771354675, + "logits/rejected": -0.18717388808727264, + "logps/chosen": -0.0004253677325323224, + "logps/rejected": -2.7793374061584473, + "loss": 0.4613, + "nll_loss": 0.11529567092657089, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.253677252563648e-05, + "rewards/margins": 0.2778911888599396, + "rewards/rejected": -0.2779337465763092, + "step": 11232 + }, + { + "epoch": 7.768326417704011, + "grad_norm": 4.344387531280518, + "learning_rate": 1.239818656831105e-05, + "log_odds_chosen": 10.438720703125, + "log_odds_ratio": -0.0005811832961626351, + "logits/chosen": -0.01854592189192772, + "logits/rejected": -0.04074572026729584, + "logps/chosen": -0.0006477055721916258, + "logps/rejected": -2.2782235145568848, + "loss": 0.3492, + "nll_loss": 0.08724640309810638, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.477056012954563e-05, + "rewards/margins": 0.22775757312774658, + "rewards/rejected": -0.22782233357429504, + "step": 11233 + }, + { + "epoch": 7.769017980636238, + "grad_norm": 4.033237934112549, + "learning_rate": 1.23943445520209e-05, + "log_odds_chosen": 10.35333251953125, + "log_odds_ratio": -6.425260653486475e-05, + "logits/chosen": -0.11870370805263519, + "logits/rejected": -0.32155144214630127, + "logps/chosen": -9.639868221711367e-05, + "logps/rejected": -1.346225619316101, + "loss": 0.4018, + "nll_loss": 0.10044016689062119, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.639867130317725e-06, + "rewards/margins": 0.13461291790008545, + "rewards/rejected": -0.13462257385253906, + "step": 11234 + }, + { + "epoch": 7.769709543568465, + "grad_norm": 3.540543794631958, + "learning_rate": 1.2390502535730753e-05, + "log_odds_chosen": 12.143965721130371, + "log_odds_ratio": -0.0001564951817272231, + "logits/chosen": 0.0017357002943754196, + "logits/rejected": -0.016939736902713776, + "logps/chosen": -0.00022795412223786116, + "logps/rejected": -3.7460548877716064, + "loss": 0.5938, + "nll_loss": 0.14844663441181183, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2795411496190354e-05, + "rewards/margins": 0.3745827078819275, + "rewards/rejected": -0.3746054768562317, + "step": 11235 + }, + { + "epoch": 7.7704011065006915, + "grad_norm": 5.323680400848389, + "learning_rate": 1.2386660519440604e-05, + "log_odds_chosen": 11.648907661437988, + "log_odds_ratio": -7.141516107367352e-05, + "logits/chosen": -0.12177214026451111, + "logits/rejected": -0.26459285616874695, + "logps/chosen": -0.0006559010944329202, + "logps/rejected": -2.6246397495269775, + "loss": 0.4249, + "nll_loss": 0.10621163249015808, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.559010216733441e-05, + "rewards/margins": 0.2623984217643738, + "rewards/rejected": -0.2624639868736267, + "step": 11236 + }, + { + "epoch": 7.771092669432918, + "grad_norm": 4.8484578132629395, + "learning_rate": 1.2382818503150453e-05, + "log_odds_chosen": 11.023386001586914, + "log_odds_ratio": -0.00012878225243184716, + "logits/chosen": -0.40300998091697693, + "logits/rejected": -0.4514096975326538, + "logps/chosen": -0.000475711131002754, + "logps/rejected": -2.070675849914551, + "loss": 0.4069, + "nll_loss": 0.10171553492546082, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.75711131002754e-05, + "rewards/margins": 0.20702001452445984, + "rewards/rejected": -0.2070675939321518, + "step": 11237 + }, + { + "epoch": 7.771784232365145, + "grad_norm": 4.760718822479248, + "learning_rate": 1.2378976486860305e-05, + "log_odds_chosen": 10.488458633422852, + "log_odds_ratio": -5.355846224119887e-05, + "logits/chosen": -0.1504298448562622, + "logits/rejected": -0.2084628939628601, + "logps/chosen": -0.0002689189277589321, + "logps/rejected": -2.0269899368286133, + "loss": 0.4227, + "nll_loss": 0.10567829012870789, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.689189204829745e-05, + "rewards/margins": 0.2026720941066742, + "rewards/rejected": -0.2026989907026291, + "step": 11238 + }, + { + "epoch": 7.772475795297372, + "grad_norm": 5.360637187957764, + "learning_rate": 1.2375134470570156e-05, + "log_odds_chosen": 9.567817687988281, + "log_odds_ratio": -0.0001716063270578161, + "logits/chosen": 0.03479413315653801, + "logits/rejected": -0.12412481009960175, + "logps/chosen": -0.00041789220995269716, + "logps/rejected": -1.7624620199203491, + "loss": 0.5261, + "nll_loss": 0.13150463998317719, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.178922245046124e-05, + "rewards/margins": 0.17620441317558289, + "rewards/rejected": -0.17624621093273163, + "step": 11239 + }, + { + "epoch": 7.773167358229599, + "grad_norm": 5.343230247497559, + "learning_rate": 1.2371292454280007e-05, + "log_odds_chosen": 9.847702026367188, + "log_odds_ratio": -0.00029629640630446374, + "logits/chosen": -0.32641664147377014, + "logits/rejected": -0.4043084979057312, + "logps/chosen": -0.0004543064278550446, + "logps/rejected": -1.8167530298233032, + "loss": 0.5669, + "nll_loss": 0.14169108867645264, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.543064278550446e-05, + "rewards/margins": 0.18162989616394043, + "rewards/rejected": -0.18167531490325928, + "step": 11240 + }, + { + "epoch": 7.773858921161826, + "grad_norm": 5.036294937133789, + "learning_rate": 1.2367450437989857e-05, + "log_odds_chosen": 11.093664169311523, + "log_odds_ratio": -0.00019268778851255774, + "logits/chosen": -0.466901034116745, + "logits/rejected": -0.5163452625274658, + "logps/chosen": -0.0002453480556141585, + "logps/rejected": -1.9668821096420288, + "loss": 0.6219, + "nll_loss": 0.15545853972434998, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.453480556141585e-05, + "rewards/margins": 0.19666367769241333, + "rewards/rejected": -0.1966882199048996, + "step": 11241 + }, + { + "epoch": 7.7745504840940525, + "grad_norm": 7.579738140106201, + "learning_rate": 1.2363608421699708e-05, + "log_odds_chosen": 11.08250904083252, + "log_odds_ratio": -0.00017796877364162356, + "logits/chosen": -0.28047680854797363, + "logits/rejected": -0.34084808826446533, + "logps/chosen": -0.00011370638094376773, + "logps/rejected": -2.112016201019287, + "loss": 0.5131, + "nll_loss": 0.12825888395309448, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1370637366781011e-05, + "rewards/margins": 0.21119023859500885, + "rewards/rejected": -0.21120160818099976, + "step": 11242 + }, + { + "epoch": 7.775242047026279, + "grad_norm": 6.279829978942871, + "learning_rate": 1.2359766405409559e-05, + "log_odds_chosen": 10.941543579101562, + "log_odds_ratio": -5.108896948513575e-05, + "logits/chosen": -0.5631160140037537, + "logits/rejected": -0.5826612114906311, + "logps/chosen": -0.00022564077517017722, + "logps/rejected": -1.958135962486267, + "loss": 0.5337, + "nll_loss": 0.1334185153245926, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2564077880815603e-05, + "rewards/margins": 0.19579105079174042, + "rewards/rejected": -0.1958136260509491, + "step": 11243 + }, + { + "epoch": 7.775933609958506, + "grad_norm": 6.054798603057861, + "learning_rate": 1.2355924389119411e-05, + "log_odds_chosen": 10.729474067687988, + "log_odds_ratio": -4.8919762775767595e-05, + "logits/chosen": -0.3799744248390198, + "logits/rejected": -0.4536796808242798, + "logps/chosen": -0.00012781617988366634, + "logps/rejected": -1.7896045446395874, + "loss": 0.5936, + "nll_loss": 0.1483994424343109, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2781618352164514e-05, + "rewards/margins": 0.17894765734672546, + "rewards/rejected": -0.17896045744419098, + "step": 11244 + }, + { + "epoch": 7.776625172890733, + "grad_norm": 16.403644561767578, + "learning_rate": 1.2352082372829262e-05, + "log_odds_chosen": 10.198113441467285, + "log_odds_ratio": -0.000842939771246165, + "logits/chosen": -0.49508100748062134, + "logits/rejected": -0.5563443303108215, + "logps/chosen": -0.001958235399797559, + "logps/rejected": -2.2131824493408203, + "loss": 0.5583, + "nll_loss": 0.1394910216331482, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019582355162128806, + "rewards/margins": 0.22112241387367249, + "rewards/rejected": -0.22131824493408203, + "step": 11245 + }, + { + "epoch": 7.77731673582296, + "grad_norm": 3.130303382873535, + "learning_rate": 1.2348240356539111e-05, + "log_odds_chosen": 10.778337478637695, + "log_odds_ratio": -3.900337833329104e-05, + "logits/chosen": -0.6491649150848389, + "logits/rejected": -0.6453468799591064, + "logps/chosen": -0.0001705015602055937, + "logps/rejected": -1.8707752227783203, + "loss": 0.2614, + "nll_loss": 0.06534810364246368, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.705015529296361e-05, + "rewards/margins": 0.18706047534942627, + "rewards/rejected": -0.18707750737667084, + "step": 11246 + }, + { + "epoch": 7.778008298755187, + "grad_norm": 4.403022289276123, + "learning_rate": 1.2344398340248964e-05, + "log_odds_chosen": 9.740409851074219, + "log_odds_ratio": -0.0019135898910462856, + "logits/chosen": -0.7549378871917725, + "logits/rejected": -0.8666030168533325, + "logps/chosen": -0.0005426771240308881, + "logps/rejected": -1.688401460647583, + "loss": 0.5451, + "nll_loss": 0.136093869805336, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.426771167549305e-05, + "rewards/margins": 0.16878588497638702, + "rewards/rejected": -0.16884015500545502, + "step": 11247 + }, + { + "epoch": 7.7786998616874135, + "grad_norm": 5.307613849639893, + "learning_rate": 1.2340556323958814e-05, + "log_odds_chosen": 10.75597095489502, + "log_odds_ratio": -3.23170970659703e-05, + "logits/chosen": -0.4456849694252014, + "logits/rejected": -0.46171778440475464, + "logps/chosen": -0.0001080541405826807, + "logps/rejected": -1.5207815170288086, + "loss": 0.3725, + "nll_loss": 0.0931098461151123, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.080541369447019e-05, + "rewards/margins": 0.15206734836101532, + "rewards/rejected": -0.15207815170288086, + "step": 11248 + }, + { + "epoch": 7.77939142461964, + "grad_norm": 8.923099517822266, + "learning_rate": 1.2336714307668665e-05, + "log_odds_chosen": 10.36142635345459, + "log_odds_ratio": -7.614222704432905e-05, + "logits/chosen": -0.6055785417556763, + "logits/rejected": -0.6693324446678162, + "logps/chosen": -0.00045596505515277386, + "logps/rejected": -1.8207474946975708, + "loss": 1.2258, + "nll_loss": 0.3064342737197876, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5596505515277386e-05, + "rewards/margins": 0.1820291429758072, + "rewards/rejected": -0.18207474052906036, + "step": 11249 + }, + { + "epoch": 7.780082987551867, + "grad_norm": 9.660794258117676, + "learning_rate": 1.2332872291378516e-05, + "log_odds_chosen": 11.588338851928711, + "log_odds_ratio": -3.191693394910544e-05, + "logits/chosen": 0.021339019760489464, + "logits/rejected": -0.17270031571388245, + "logps/chosen": -0.00010125144763151184, + "logps/rejected": -2.145808219909668, + "loss": 0.373, + "nll_loss": 0.09324344992637634, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0125144399353303e-05, + "rewards/margins": 0.21457070112228394, + "rewards/rejected": -0.21458081901073456, + "step": 11250 + }, + { + "epoch": 7.780774550484094, + "grad_norm": 4.1214728355407715, + "learning_rate": 1.2329030275088367e-05, + "log_odds_chosen": 11.965429306030273, + "log_odds_ratio": -1.558518852107227e-05, + "logits/chosen": -0.23796215653419495, + "logits/rejected": -0.246348038315773, + "logps/chosen": -0.0002504836884327233, + "logps/rejected": -3.3777589797973633, + "loss": 0.5093, + "nll_loss": 0.1273270845413208, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5048371753655374e-05, + "rewards/margins": 0.3377508819103241, + "rewards/rejected": -0.33777591586112976, + "step": 11251 + }, + { + "epoch": 7.781466113416321, + "grad_norm": 3.6593050956726074, + "learning_rate": 1.2325188258798217e-05, + "log_odds_chosen": 10.831826210021973, + "log_odds_ratio": -3.931194805772975e-05, + "logits/chosen": -0.3259609043598175, + "logits/rejected": -0.3795009255409241, + "logps/chosen": -0.00019056108430959284, + "logps/rejected": -2.411302089691162, + "loss": 0.4118, + "nll_loss": 0.10293841361999512, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9056107703363523e-05, + "rewards/margins": 0.241111159324646, + "rewards/rejected": -0.24113020300865173, + "step": 11252 + }, + { + "epoch": 7.782157676348548, + "grad_norm": 5.860049724578857, + "learning_rate": 1.232134624250807e-05, + "log_odds_chosen": 10.41196060180664, + "log_odds_ratio": -0.00012101135507691652, + "logits/chosen": -0.7500457167625427, + "logits/rejected": -0.8380433320999146, + "logps/chosen": -0.000306506990455091, + "logps/rejected": -2.0738277435302734, + "loss": 0.5564, + "nll_loss": 0.13908186554908752, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.065070268348791e-05, + "rewards/margins": 0.20735211670398712, + "rewards/rejected": -0.20738276839256287, + "step": 11253 + }, + { + "epoch": 7.782849239280774, + "grad_norm": 4.6908674240112305, + "learning_rate": 1.231750422621792e-05, + "log_odds_chosen": 11.748579025268555, + "log_odds_ratio": -1.6796086129033938e-05, + "logits/chosen": -0.45081204175949097, + "logits/rejected": -0.5256200432777405, + "logps/chosen": -0.000127775885630399, + "logps/rejected": -2.526742935180664, + "loss": 0.5556, + "nll_loss": 0.13889187574386597, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.277758929063566e-05, + "rewards/margins": 0.2526615262031555, + "rewards/rejected": -0.25267428159713745, + "step": 11254 + }, + { + "epoch": 7.783540802213001, + "grad_norm": 5.625558376312256, + "learning_rate": 1.2313662209927771e-05, + "log_odds_chosen": 11.430329322814941, + "log_odds_ratio": -4.6198027121135965e-05, + "logits/chosen": -0.3089427053928375, + "logits/rejected": -0.3261297047138214, + "logps/chosen": -0.000235933912335895, + "logps/rejected": -2.89105224609375, + "loss": 0.4137, + "nll_loss": 0.10342574119567871, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.35933912335895e-05, + "rewards/margins": 0.2890816330909729, + "rewards/rejected": -0.28910520672798157, + "step": 11255 + }, + { + "epoch": 7.784232365145228, + "grad_norm": 4.9262189865112305, + "learning_rate": 1.2309820193637622e-05, + "log_odds_chosen": 9.46239948272705, + "log_odds_ratio": -0.0007331681554205716, + "logits/chosen": -0.5546839833259583, + "logits/rejected": -0.7506892085075378, + "logps/chosen": -0.001192574854940176, + "logps/rejected": -1.8165473937988281, + "loss": 0.4424, + "nll_loss": 0.11051993072032928, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001192574854940176, + "rewards/margins": 0.1815354973077774, + "rewards/rejected": -0.18165475130081177, + "step": 11256 + }, + { + "epoch": 7.784923928077455, + "grad_norm": 4.006134033203125, + "learning_rate": 1.2305978177347473e-05, + "log_odds_chosen": 11.54184341430664, + "log_odds_ratio": -1.7175989341922104e-05, + "logits/chosen": -0.20995602011680603, + "logits/rejected": -0.19702300429344177, + "logps/chosen": -0.00015768143930472434, + "logps/rejected": -2.5994372367858887, + "loss": 0.3523, + "nll_loss": 0.08806820958852768, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5768144294270314e-05, + "rewards/margins": 0.2599279582500458, + "rewards/rejected": -0.25994372367858887, + "step": 11257 + }, + { + "epoch": 7.785615491009682, + "grad_norm": 7.681565761566162, + "learning_rate": 1.2302136161057323e-05, + "log_odds_chosen": 11.597307205200195, + "log_odds_ratio": -0.00026457561762072146, + "logits/chosen": -0.3409126400947571, + "logits/rejected": -0.40174397826194763, + "logps/chosen": -0.00023098831297829747, + "logps/rejected": -3.1674602031707764, + "loss": 0.4596, + "nll_loss": 0.11487455666065216, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3098831661627628e-05, + "rewards/margins": 0.31672292947769165, + "rewards/rejected": -0.3167460262775421, + "step": 11258 + }, + { + "epoch": 7.786307053941909, + "grad_norm": 7.546180725097656, + "learning_rate": 1.2298294144767174e-05, + "log_odds_chosen": 10.36526107788086, + "log_odds_ratio": -5.682875053025782e-05, + "logits/chosen": -0.2339021861553192, + "logits/rejected": -0.2601647973060608, + "logps/chosen": -0.0003002375306095928, + "logps/rejected": -2.0971832275390625, + "loss": 0.5309, + "nll_loss": 0.13272957503795624, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0023751605767757e-05, + "rewards/margins": 0.20968829095363617, + "rewards/rejected": -0.20971833169460297, + "step": 11259 + }, + { + "epoch": 7.786998616874135, + "grad_norm": 7.056270122528076, + "learning_rate": 1.2294452128477025e-05, + "log_odds_chosen": 11.92055606842041, + "log_odds_ratio": -0.00018327760335523635, + "logits/chosen": -0.22537848353385925, + "logits/rejected": -0.3611562252044678, + "logps/chosen": -0.0003575455048121512, + "logps/rejected": -3.3726859092712402, + "loss": 0.4225, + "nll_loss": 0.1056140884757042, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5754554119193926e-05, + "rewards/margins": 0.3372328579425812, + "rewards/rejected": -0.3372686207294464, + "step": 11260 + }, + { + "epoch": 7.787690179806362, + "grad_norm": 4.214644432067871, + "learning_rate": 1.2290610112186876e-05, + "log_odds_chosen": 10.311920166015625, + "log_odds_ratio": -0.0001279880088986829, + "logits/chosen": -0.09888796508312225, + "logits/rejected": -0.12999199330806732, + "logps/chosen": -0.00010510604624869302, + "logps/rejected": -1.3345422744750977, + "loss": 0.715, + "nll_loss": 0.17872878909111023, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0510604624869302e-05, + "rewards/margins": 0.13344372808933258, + "rewards/rejected": -0.13345423340797424, + "step": 11261 + }, + { + "epoch": 7.788381742738589, + "grad_norm": 6.3505706787109375, + "learning_rate": 1.2286768095896726e-05, + "log_odds_chosen": 12.054739952087402, + "log_odds_ratio": -2.838961881934665e-05, + "logits/chosen": -0.29715636372566223, + "logits/rejected": -0.3512900471687317, + "logps/chosen": -0.00019150454318150878, + "logps/rejected": -2.992788791656494, + "loss": 0.3565, + "nll_loss": 0.08911924809217453, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.915045504574664e-05, + "rewards/margins": 0.2992597222328186, + "rewards/rejected": -0.2992788851261139, + "step": 11262 + }, + { + "epoch": 7.789073305670816, + "grad_norm": 6.878966808319092, + "learning_rate": 1.2282926079606579e-05, + "log_odds_chosen": 10.905218124389648, + "log_odds_ratio": -3.369908517925069e-05, + "logits/chosen": -0.27406126260757446, + "logits/rejected": -0.3869612216949463, + "logps/chosen": -0.00012873244122602046, + "logps/rejected": -1.9237127304077148, + "loss": 0.3148, + "nll_loss": 0.07870644330978394, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.287324630538933e-05, + "rewards/margins": 0.19235840439796448, + "rewards/rejected": -0.19237127900123596, + "step": 11263 + }, + { + "epoch": 7.789764868603043, + "grad_norm": 8.763483047485352, + "learning_rate": 1.227908406331643e-05, + "log_odds_chosen": 10.882516860961914, + "log_odds_ratio": -4.163644916843623e-05, + "logits/chosen": -0.25387126207351685, + "logits/rejected": -0.27315399050712585, + "logps/chosen": -0.0001641416602069512, + "logps/rejected": -2.1190123558044434, + "loss": 0.4698, + "nll_loss": 0.11745646595954895, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.641416565689724e-05, + "rewards/margins": 0.21188482642173767, + "rewards/rejected": -0.2119012475013733, + "step": 11264 + }, + { + "epoch": 7.79045643153527, + "grad_norm": 3.134868860244751, + "learning_rate": 1.2275242047026279e-05, + "log_odds_chosen": 10.67641830444336, + "log_odds_ratio": -4.588044248521328e-05, + "logits/chosen": 0.09237219393253326, + "logits/rejected": 0.11966807395219803, + "logps/chosen": -0.0004638899117708206, + "logps/rejected": -2.1800992488861084, + "loss": 0.434, + "nll_loss": 0.10850457102060318, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6388988266699016e-05, + "rewards/margins": 0.21796353161334991, + "rewards/rejected": -0.21800991892814636, + "step": 11265 + }, + { + "epoch": 7.791147994467496, + "grad_norm": 4.402527332305908, + "learning_rate": 1.2271400030736131e-05, + "log_odds_chosen": 10.308174133300781, + "log_odds_ratio": -0.00018428012845106423, + "logits/chosen": -0.2486291527748108, + "logits/rejected": -0.38063308596611023, + "logps/chosen": -0.0004640338593162596, + "logps/rejected": -2.1923608779907227, + "loss": 0.3503, + "nll_loss": 0.08756566047668457, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6403387386817485e-05, + "rewards/margins": 0.21918968856334686, + "rewards/rejected": -0.2192360907793045, + "step": 11266 + }, + { + "epoch": 7.791839557399723, + "grad_norm": 5.159994125366211, + "learning_rate": 1.2267558014445982e-05, + "log_odds_chosen": 9.89212417602539, + "log_odds_ratio": -0.00020230353402439505, + "logits/chosen": -0.1726974993944168, + "logits/rejected": -0.2927550673484802, + "logps/chosen": -0.0005461431574076414, + "logps/rejected": -1.6139581203460693, + "loss": 0.4721, + "nll_loss": 0.11800789833068848, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4614312830381095e-05, + "rewards/margins": 0.16134120523929596, + "rewards/rejected": -0.1613958179950714, + "step": 11267 + }, + { + "epoch": 7.79253112033195, + "grad_norm": 4.365382671356201, + "learning_rate": 1.2263715998155833e-05, + "log_odds_chosen": 10.91610336303711, + "log_odds_ratio": -8.126306056510657e-05, + "logits/chosen": 0.11642088741064072, + "logits/rejected": 0.125750333070755, + "logps/chosen": -0.00019425661594141275, + "logps/rejected": -2.039365768432617, + "loss": 0.3607, + "nll_loss": 0.09015924483537674, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9425662685534917e-05, + "rewards/margins": 0.20391714572906494, + "rewards/rejected": -0.20393657684326172, + "step": 11268 + }, + { + "epoch": 7.793222683264177, + "grad_norm": 5.306851863861084, + "learning_rate": 1.2259873981865683e-05, + "log_odds_chosen": 11.79000186920166, + "log_odds_ratio": -6.93091715220362e-05, + "logits/chosen": -0.3146926760673523, + "logits/rejected": -0.21157246828079224, + "logps/chosen": -0.00012302336108405143, + "logps/rejected": -2.6283581256866455, + "loss": 0.3521, + "nll_loss": 0.08803052455186844, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2302336472203024e-05, + "rewards/margins": 0.26282352209091187, + "rewards/rejected": -0.2628358006477356, + "step": 11269 + }, + { + "epoch": 7.793914246196404, + "grad_norm": 3.641671895980835, + "learning_rate": 1.2256031965575534e-05, + "log_odds_chosen": 10.492412567138672, + "log_odds_ratio": -6.292194302659482e-05, + "logits/chosen": -0.20354798436164856, + "logits/rejected": -0.21678128838539124, + "logps/chosen": -0.0001462570216972381, + "logps/rejected": -1.646343469619751, + "loss": 0.2938, + "nll_loss": 0.07344779372215271, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.462570253352169e-05, + "rewards/margins": 0.16461974382400513, + "rewards/rejected": -0.1646343618631363, + "step": 11270 + }, + { + "epoch": 7.7946058091286305, + "grad_norm": 3.143221855163574, + "learning_rate": 1.2252189949285385e-05, + "log_odds_chosen": 10.551334381103516, + "log_odds_ratio": -0.00016412035620305687, + "logits/chosen": -0.6718156337738037, + "logits/rejected": -0.7369803190231323, + "logps/chosen": -0.00035478276549838483, + "logps/rejected": -2.0037951469421387, + "loss": 0.3303, + "nll_loss": 0.08257079869508743, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.547827873262577e-05, + "rewards/margins": 0.2003440409898758, + "rewards/rejected": -0.20037952065467834, + "step": 11271 + }, + { + "epoch": 7.795297372060857, + "grad_norm": 4.969653129577637, + "learning_rate": 1.2248347932995237e-05, + "log_odds_chosen": 11.730992317199707, + "log_odds_ratio": -2.987973130075261e-05, + "logits/chosen": -0.39510875940322876, + "logits/rejected": -0.46338707208633423, + "logps/chosen": -0.00012269796570762992, + "logps/rejected": -2.4157376289367676, + "loss": 0.4528, + "nll_loss": 0.11319657415151596, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2269796570762992e-05, + "rewards/margins": 0.24156148731708527, + "rewards/rejected": -0.2415737509727478, + "step": 11272 + }, + { + "epoch": 7.795988934993084, + "grad_norm": 5.745721340179443, + "learning_rate": 1.2244505916705088e-05, + "log_odds_chosen": 10.081501960754395, + "log_odds_ratio": -0.00018471617659088224, + "logits/chosen": -0.12037422508001328, + "logits/rejected": -0.16611051559448242, + "logps/chosen": -0.00048183047329075634, + "logps/rejected": -1.7776180505752563, + "loss": 0.4966, + "nll_loss": 0.12412483990192413, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.818305023945868e-05, + "rewards/margins": 0.17771361768245697, + "rewards/rejected": -0.17776180803775787, + "step": 11273 + }, + { + "epoch": 7.796680497925311, + "grad_norm": 5.706189155578613, + "learning_rate": 1.2240663900414937e-05, + "log_odds_chosen": 9.718204498291016, + "log_odds_ratio": -0.0006743724225088954, + "logits/chosen": 0.06922922283411026, + "logits/rejected": 0.03767970949411392, + "logps/chosen": -0.0011344578815624118, + "logps/rejected": -1.807583212852478, + "loss": 0.4193, + "nll_loss": 0.10475035011768341, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011344579252181575, + "rewards/margins": 0.18064486980438232, + "rewards/rejected": -0.18075832724571228, + "step": 11274 + }, + { + "epoch": 7.797372060857538, + "grad_norm": 4.138829231262207, + "learning_rate": 1.223682188412479e-05, + "log_odds_chosen": 10.995828628540039, + "log_odds_ratio": -3.327604281366803e-05, + "logits/chosen": -0.8057668209075928, + "logits/rejected": -0.7984911203384399, + "logps/chosen": -0.0002747337566688657, + "logps/rejected": -2.0947799682617188, + "loss": 0.3938, + "nll_loss": 0.09843438118696213, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7473375666886568e-05, + "rewards/margins": 0.20945051312446594, + "rewards/rejected": -0.2094779908657074, + "step": 11275 + }, + { + "epoch": 7.798063623789765, + "grad_norm": 5.856276512145996, + "learning_rate": 1.223297986783464e-05, + "log_odds_chosen": 10.868249893188477, + "log_odds_ratio": -0.00011836976773338392, + "logits/chosen": -0.3161469101905823, + "logits/rejected": -0.42098069190979004, + "logps/chosen": -0.0003598304174374789, + "logps/rejected": -2.4453377723693848, + "loss": 0.4851, + "nll_loss": 0.12127453088760376, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.598304101615213e-05, + "rewards/margins": 0.24449782073497772, + "rewards/rejected": -0.24453380703926086, + "step": 11276 + }, + { + "epoch": 7.7987551867219915, + "grad_norm": 5.088725566864014, + "learning_rate": 1.2229137851544491e-05, + "log_odds_chosen": 10.577735900878906, + "log_odds_ratio": -0.0001267546758754179, + "logits/chosen": -0.6664071083068848, + "logits/rejected": -0.6439728140830994, + "logps/chosen": -0.0004955183831043541, + "logps/rejected": -2.272801399230957, + "loss": 0.6415, + "nll_loss": 0.16036275029182434, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9551843403605744e-05, + "rewards/margins": 0.22723057866096497, + "rewards/rejected": -0.2272801399230957, + "step": 11277 + }, + { + "epoch": 7.799446749654218, + "grad_norm": 3.760568857192993, + "learning_rate": 1.2225295835254342e-05, + "log_odds_chosen": 11.093595504760742, + "log_odds_ratio": -3.8573536585317925e-05, + "logits/chosen": -0.03364332765340805, + "logits/rejected": 0.08477126061916351, + "logps/chosen": -0.00011093143257312477, + "logps/rejected": -2.148498773574829, + "loss": 0.3584, + "nll_loss": 0.08959043025970459, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1093143257312477e-05, + "rewards/margins": 0.21483880281448364, + "rewards/rejected": -0.21484988927841187, + "step": 11278 + }, + { + "epoch": 7.800138312586445, + "grad_norm": 4.837119102478027, + "learning_rate": 1.2221453818964193e-05, + "log_odds_chosen": 10.585123062133789, + "log_odds_ratio": -0.00012280464579816908, + "logits/chosen": -0.39586615562438965, + "logits/rejected": -0.3422686755657196, + "logps/chosen": -0.0003601500065997243, + "logps/rejected": -2.448976516723633, + "loss": 0.4123, + "nll_loss": 0.10305467993021011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.601500065997243e-05, + "rewards/margins": 0.2448616325855255, + "rewards/rejected": -0.24489764869213104, + "step": 11279 + }, + { + "epoch": 7.800829875518672, + "grad_norm": 6.671242713928223, + "learning_rate": 1.2217611802674043e-05, + "log_odds_chosen": 10.349712371826172, + "log_odds_ratio": -0.00029979500686749816, + "logits/chosen": -0.01447632908821106, + "logits/rejected": -0.006284177303314209, + "logps/chosen": -0.0008054234203882515, + "logps/rejected": -2.1930935382843018, + "loss": 0.5554, + "nll_loss": 0.13880924880504608, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.054233330767602e-05, + "rewards/margins": 0.2192288339138031, + "rewards/rejected": -0.21930935978889465, + "step": 11280 + }, + { + "epoch": 7.801521438450899, + "grad_norm": 4.729337692260742, + "learning_rate": 1.2213769786383896e-05, + "log_odds_chosen": 10.608678817749023, + "log_odds_ratio": -0.00032640784047544, + "logits/chosen": -0.19049939513206482, + "logits/rejected": -0.23994635045528412, + "logps/chosen": -0.00044982333201915026, + "logps/rejected": -2.493246078491211, + "loss": 0.4774, + "nll_loss": 0.11930903792381287, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.498233101912774e-05, + "rewards/margins": 0.24927963316440582, + "rewards/rejected": -0.24932461977005005, + "step": 11281 + }, + { + "epoch": 7.802213001383126, + "grad_norm": 4.366937637329102, + "learning_rate": 1.2209927770093747e-05, + "log_odds_chosen": 9.542795181274414, + "log_odds_ratio": -0.00035635344102047384, + "logits/chosen": -0.5525764226913452, + "logits/rejected": -0.411281943321228, + "logps/chosen": -0.00037306107697077096, + "logps/rejected": -1.736010193824768, + "loss": 0.3269, + "nll_loss": 0.08169664442539215, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7306112062651664e-05, + "rewards/margins": 0.17356370389461517, + "rewards/rejected": -0.17360101640224457, + "step": 11282 + }, + { + "epoch": 7.8029045643153525, + "grad_norm": 5.749885559082031, + "learning_rate": 1.2206085753803596e-05, + "log_odds_chosen": 10.277787208557129, + "log_odds_ratio": -0.000137269904371351, + "logits/chosen": -0.5405263900756836, + "logits/rejected": -0.5402116775512695, + "logps/chosen": -0.00047800407628528774, + "logps/rejected": -2.369875907897949, + "loss": 0.7689, + "nll_loss": 0.19220447540283203, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7800407628528774e-05, + "rewards/margins": 0.23693978786468506, + "rewards/rejected": -0.23698759078979492, + "step": 11283 + }, + { + "epoch": 7.803596127247579, + "grad_norm": 4.322267055511475, + "learning_rate": 1.2202243737513448e-05, + "log_odds_chosen": 11.628303527832031, + "log_odds_ratio": -1.104071725421818e-05, + "logits/chosen": 0.1335178166627884, + "logits/rejected": 0.13221508264541626, + "logps/chosen": -0.00010630176984705031, + "logps/rejected": -2.399397373199463, + "loss": 0.6508, + "nll_loss": 0.16270360350608826, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0630175893311389e-05, + "rewards/margins": 0.23992910981178284, + "rewards/rejected": -0.23993973433971405, + "step": 11284 + }, + { + "epoch": 7.804287690179806, + "grad_norm": 4.353506088256836, + "learning_rate": 1.2198401721223299e-05, + "log_odds_chosen": 11.208612442016602, + "log_odds_ratio": -5.571136352955364e-05, + "logits/chosen": -0.5028769969940186, + "logits/rejected": -0.4641305208206177, + "logps/chosen": -0.00043331741471774876, + "logps/rejected": -2.2058372497558594, + "loss": 0.3791, + "nll_loss": 0.09477502107620239, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.333174001658335e-05, + "rewards/margins": 0.220540389418602, + "rewards/rejected": -0.2205837368965149, + "step": 11285 + }, + { + "epoch": 7.804979253112033, + "grad_norm": 5.568900108337402, + "learning_rate": 1.219455970493315e-05, + "log_odds_chosen": 11.898628234863281, + "log_odds_ratio": -3.059850132558495e-05, + "logits/chosen": -0.48825883865356445, + "logits/rejected": -0.575466513633728, + "logps/chosen": -0.0002759605704341084, + "logps/rejected": -2.878927230834961, + "loss": 0.3277, + "nll_loss": 0.08191083371639252, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.759605740720872e-05, + "rewards/margins": 0.2878651022911072, + "rewards/rejected": -0.2878926992416382, + "step": 11286 + }, + { + "epoch": 7.80567081604426, + "grad_norm": 3.559267997741699, + "learning_rate": 1.2190717688643e-05, + "log_odds_chosen": 10.472837448120117, + "log_odds_ratio": -8.084255387075245e-05, + "logits/chosen": -0.303886741399765, + "logits/rejected": -0.3993980884552002, + "logps/chosen": -0.0005313065485097468, + "logps/rejected": -2.0672004222869873, + "loss": 0.5227, + "nll_loss": 0.13066476583480835, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.313065412337892e-05, + "rewards/margins": 0.20666691660881042, + "rewards/rejected": -0.2067200392484665, + "step": 11287 + }, + { + "epoch": 7.806362378976487, + "grad_norm": 4.979940414428711, + "learning_rate": 1.2186875672352851e-05, + "log_odds_chosen": 10.651639938354492, + "log_odds_ratio": -0.00013395682617556304, + "logits/chosen": 0.0262556504458189, + "logits/rejected": -0.04236229509115219, + "logps/chosen": -0.0012710446026176214, + "logps/rejected": -2.7116594314575195, + "loss": 0.4917, + "nll_loss": 0.12290848791599274, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012710444570984691, + "rewards/margins": 0.27103888988494873, + "rewards/rejected": -0.27116596698760986, + "step": 11288 + }, + { + "epoch": 7.8070539419087135, + "grad_norm": 5.531417369842529, + "learning_rate": 1.2183033656062702e-05, + "log_odds_chosen": 10.462217330932617, + "log_odds_ratio": -7.152614853112027e-05, + "logits/chosen": -0.1546785831451416, + "logits/rejected": -0.28727149963378906, + "logps/chosen": -0.0002609742514323443, + "logps/rejected": -1.9173094034194946, + "loss": 0.5325, + "nll_loss": 0.13312757015228271, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.609742477943655e-05, + "rewards/margins": 0.19170483946800232, + "rewards/rejected": -0.19173094630241394, + "step": 11289 + }, + { + "epoch": 7.80774550484094, + "grad_norm": 4.879218101501465, + "learning_rate": 1.2179191639772554e-05, + "log_odds_chosen": 10.708136558532715, + "log_odds_ratio": -0.0003337001253385097, + "logits/chosen": -0.17093488574028015, + "logits/rejected": -0.2471480518579483, + "logps/chosen": -0.00016180059174075723, + "logps/rejected": -1.9442338943481445, + "loss": 0.6707, + "nll_loss": 0.16764989495277405, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6180059901671484e-05, + "rewards/margins": 0.19440722465515137, + "rewards/rejected": -0.1944233924150467, + "step": 11290 + }, + { + "epoch": 7.808437067773167, + "grad_norm": 4.727686405181885, + "learning_rate": 1.2175349623482405e-05, + "log_odds_chosen": 10.129864692687988, + "log_odds_ratio": -0.00031297910027205944, + "logits/chosen": 0.06406151503324509, + "logits/rejected": 0.027005136013031006, + "logps/chosen": -0.000604638596996665, + "logps/rejected": -2.3089687824249268, + "loss": 0.6432, + "nll_loss": 0.16076260805130005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.046385897207074e-05, + "rewards/margins": 0.230836421251297, + "rewards/rejected": -0.23089689016342163, + "step": 11291 + }, + { + "epoch": 7.809128630705394, + "grad_norm": 4.830107688903809, + "learning_rate": 1.2171507607192254e-05, + "log_odds_chosen": 11.263401985168457, + "log_odds_ratio": -2.79559098999016e-05, + "logits/chosen": -0.01324993371963501, + "logits/rejected": -0.14258237183094025, + "logps/chosen": -7.992141763679683e-05, + "logps/rejected": -1.9093828201293945, + "loss": 0.4813, + "nll_loss": 0.12033096700906754, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.992141036083922e-06, + "rewards/margins": 0.19093027710914612, + "rewards/rejected": -0.19093827903270721, + "step": 11292 + }, + { + "epoch": 7.809820193637621, + "grad_norm": 10.839892387390137, + "learning_rate": 1.2167665590902105e-05, + "log_odds_chosen": 10.911890983581543, + "log_odds_ratio": -0.00012246929691173136, + "logits/chosen": -0.04008027911186218, + "logits/rejected": -0.20957855880260468, + "logps/chosen": -0.00028015434509143233, + "logps/rejected": -1.8444762229919434, + "loss": 0.6795, + "nll_loss": 0.16986694931983948, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.801543269015383e-05, + "rewards/margins": 0.18441961705684662, + "rewards/rejected": -0.18444763123989105, + "step": 11293 + }, + { + "epoch": 7.810511756569848, + "grad_norm": 4.458373069763184, + "learning_rate": 1.2163823574611957e-05, + "log_odds_chosen": 10.007013320922852, + "log_odds_ratio": -9.765625145519152e-05, + "logits/chosen": -0.30763986706733704, + "logits/rejected": -0.3580760359764099, + "logps/chosen": -0.0020079452078789473, + "logps/rejected": -2.068343162536621, + "loss": 0.4123, + "nll_loss": 0.10306943207979202, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00020079451496712863, + "rewards/margins": 0.206633523106575, + "rewards/rejected": -0.2068343162536621, + "step": 11294 + }, + { + "epoch": 7.8112033195020745, + "grad_norm": 10.859085083007812, + "learning_rate": 1.2159981558321808e-05, + "log_odds_chosen": 11.415952682495117, + "log_odds_ratio": -3.398352055228315e-05, + "logits/chosen": -0.49685126543045044, + "logits/rejected": -0.4071289300918579, + "logps/chosen": -0.00017499136447440833, + "logps/rejected": -2.3988001346588135, + "loss": 0.3024, + "nll_loss": 0.07560379803180695, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7499136447440833e-05, + "rewards/margins": 0.23986250162124634, + "rewards/rejected": -0.2398800104856491, + "step": 11295 + }, + { + "epoch": 7.811894882434301, + "grad_norm": 4.0313897132873535, + "learning_rate": 1.2156139542031659e-05, + "log_odds_chosen": 10.66956901550293, + "log_odds_ratio": -0.00013550990843214095, + "logits/chosen": -0.30885058641433716, + "logits/rejected": -0.44245588779449463, + "logps/chosen": -0.00030423677526414394, + "logps/rejected": -2.413630962371826, + "loss": 0.4288, + "nll_loss": 0.10718473792076111, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0423678254010156e-05, + "rewards/margins": 0.24133266508579254, + "rewards/rejected": -0.24136309325695038, + "step": 11296 + }, + { + "epoch": 7.812586445366528, + "grad_norm": 10.214454650878906, + "learning_rate": 1.215229752574151e-05, + "log_odds_chosen": 11.460878372192383, + "log_odds_ratio": -2.284135189256631e-05, + "logits/chosen": -0.31639039516448975, + "logits/rejected": -0.3789241313934326, + "logps/chosen": -0.00014475402713287622, + "logps/rejected": -2.2148890495300293, + "loss": 0.5287, + "nll_loss": 0.13216793537139893, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4475403986580204e-05, + "rewards/margins": 0.22147443890571594, + "rewards/rejected": -0.22148890793323517, + "step": 11297 + }, + { + "epoch": 7.813278008298755, + "grad_norm": 6.10732889175415, + "learning_rate": 1.214845550945136e-05, + "log_odds_chosen": 11.131383895874023, + "log_odds_ratio": -4.605175490723923e-05, + "logits/chosen": -0.15626344084739685, + "logits/rejected": -0.3107757568359375, + "logps/chosen": -0.00013610447058454156, + "logps/rejected": -2.1935107707977295, + "loss": 0.2989, + "nll_loss": 0.07473208755254745, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3610448149847798e-05, + "rewards/margins": 0.21933746337890625, + "rewards/rejected": -0.21935108304023743, + "step": 11298 + }, + { + "epoch": 7.813969571230982, + "grad_norm": 4.278329372406006, + "learning_rate": 1.2144613493161211e-05, + "log_odds_chosen": 11.866846084594727, + "log_odds_ratio": -1.1192752936040051e-05, + "logits/chosen": -0.49567800760269165, + "logits/rejected": -0.5212503671646118, + "logps/chosen": -0.00011445317068137228, + "logps/rejected": -2.430196762084961, + "loss": 0.5736, + "nll_loss": 0.14339041709899902, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.144531779573299e-05, + "rewards/margins": 0.24300822615623474, + "rewards/rejected": -0.24301967024803162, + "step": 11299 + }, + { + "epoch": 7.814661134163209, + "grad_norm": 4.524301528930664, + "learning_rate": 1.2140771476871063e-05, + "log_odds_chosen": 10.06701946258545, + "log_odds_ratio": -0.000978961936198175, + "logits/chosen": -0.029292069375514984, + "logits/rejected": 0.005871415138244629, + "logps/chosen": -0.0012817583046853542, + "logps/rejected": -2.1486034393310547, + "loss": 0.4852, + "nll_loss": 0.12119489163160324, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012817583046853542, + "rewards/margins": 0.21473215520381927, + "rewards/rejected": -0.21486034989356995, + "step": 11300 + }, + { + "epoch": 7.8153526970954355, + "grad_norm": 4.445444107055664, + "learning_rate": 1.2136929460580914e-05, + "log_odds_chosen": 11.179705619812012, + "log_odds_ratio": -0.00019098444317933172, + "logits/chosen": 0.027649089694023132, + "logits/rejected": -0.013102950528264046, + "logps/chosen": -0.0003603753575589508, + "logps/rejected": -2.5527210235595703, + "loss": 0.4274, + "nll_loss": 0.10682663321495056, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.603753793868236e-05, + "rewards/margins": 0.25523608922958374, + "rewards/rejected": -0.25527212023735046, + "step": 11301 + }, + { + "epoch": 7.816044260027662, + "grad_norm": 4.457228660583496, + "learning_rate": 1.2133087444290763e-05, + "log_odds_chosen": 8.977705001831055, + "log_odds_ratio": -0.00042235839646309614, + "logits/chosen": -0.599679172039032, + "logits/rejected": -0.5758978724479675, + "logps/chosen": -0.006479393225163221, + "logps/rejected": -1.8167825937271118, + "loss": 0.3143, + "nll_loss": 0.07853072136640549, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006479393341578543, + "rewards/margins": 0.18103033304214478, + "rewards/rejected": -0.18167826533317566, + "step": 11302 + }, + { + "epoch": 7.816735822959889, + "grad_norm": 4.476196765899658, + "learning_rate": 1.2129245428000616e-05, + "log_odds_chosen": 10.765111923217773, + "log_odds_ratio": -4.8187474021688104e-05, + "logits/chosen": -0.2516087591648102, + "logits/rejected": -0.33950358629226685, + "logps/chosen": -0.00015329348389059305, + "logps/rejected": -1.8095844984054565, + "loss": 0.453, + "nll_loss": 0.11323701590299606, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5329349480452947e-05, + "rewards/margins": 0.18094313144683838, + "rewards/rejected": -0.18095846474170685, + "step": 11303 + }, + { + "epoch": 7.817427385892116, + "grad_norm": 3.8020856380462646, + "learning_rate": 1.2125403411710466e-05, + "log_odds_chosen": 11.011605262756348, + "log_odds_ratio": -3.462144377408549e-05, + "logits/chosen": -0.2966512441635132, + "logits/rejected": -0.3090302050113678, + "logps/chosen": -8.981661812867969e-05, + "logps/rejected": -1.5665464401245117, + "loss": 0.3049, + "nll_loss": 0.07622778415679932, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.98166217666585e-06, + "rewards/margins": 0.15664567053318024, + "rewards/rejected": -0.15665464103221893, + "step": 11304 + }, + { + "epoch": 7.818118948824343, + "grad_norm": 4.5586371421813965, + "learning_rate": 1.2121561395420317e-05, + "log_odds_chosen": 9.883445739746094, + "log_odds_ratio": -0.00030278839403763413, + "logits/chosen": -0.13593541085720062, + "logits/rejected": -0.10266265273094177, + "logps/chosen": -0.0006718160002492368, + "logps/rejected": -1.2286714315414429, + "loss": 0.3804, + "nll_loss": 0.09507386386394501, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.718160875607282e-05, + "rewards/margins": 0.12279996275901794, + "rewards/rejected": -0.122867152094841, + "step": 11305 + }, + { + "epoch": 7.81881051175657, + "grad_norm": 7.4004387855529785, + "learning_rate": 1.2117719379130168e-05, + "log_odds_chosen": 10.306143760681152, + "log_odds_ratio": -9.625229722587392e-05, + "logits/chosen": -0.35738950967788696, + "logits/rejected": -0.29458296298980713, + "logps/chosen": -0.0006742849946022034, + "logps/rejected": -2.2270631790161133, + "loss": 0.401, + "nll_loss": 0.1002361923456192, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.74285038257949e-05, + "rewards/margins": 0.22263889014720917, + "rewards/rejected": -0.22270631790161133, + "step": 11306 + }, + { + "epoch": 7.819502074688796, + "grad_norm": 5.4306440353393555, + "learning_rate": 1.2113877362840019e-05, + "log_odds_chosen": 10.84564208984375, + "log_odds_ratio": -0.000269198149908334, + "logits/chosen": -0.039085280150175095, + "logits/rejected": 0.03959418460726738, + "logps/chosen": -0.0005255647120065987, + "logps/rejected": -2.5015833377838135, + "loss": 0.3889, + "nll_loss": 0.09719796478748322, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.255647556623444e-05, + "rewards/margins": 0.2501057982444763, + "rewards/rejected": -0.2501583397388458, + "step": 11307 + }, + { + "epoch": 7.820193637621023, + "grad_norm": 4.5981125831604, + "learning_rate": 1.211003534654987e-05, + "log_odds_chosen": 11.37891960144043, + "log_odds_ratio": -6.482828757725656e-05, + "logits/chosen": -0.5004502534866333, + "logits/rejected": -0.6703431010246277, + "logps/chosen": -0.00022166120470501482, + "logps/rejected": -2.438640594482422, + "loss": 0.4862, + "nll_loss": 0.1215510442852974, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2166121198097244e-05, + "rewards/margins": 0.24384190142154694, + "rewards/rejected": -0.24386407434940338, + "step": 11308 + }, + { + "epoch": 7.82088520055325, + "grad_norm": 6.168004035949707, + "learning_rate": 1.2106193330259722e-05, + "log_odds_chosen": 10.734867095947266, + "log_odds_ratio": -0.0005011963658034801, + "logits/chosen": -0.5463290810585022, + "logits/rejected": -0.5193485617637634, + "logps/chosen": -0.001047707861289382, + "logps/rejected": -2.4238944053649902, + "loss": 0.6156, + "nll_loss": 0.1538413017988205, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010477077739778906, + "rewards/margins": 0.24228468537330627, + "rewards/rejected": -0.24238945543766022, + "step": 11309 + }, + { + "epoch": 7.821576763485477, + "grad_norm": 3.5904340744018555, + "learning_rate": 1.2102351313969573e-05, + "log_odds_chosen": 10.541328430175781, + "log_odds_ratio": -9.438677807338536e-05, + "logits/chosen": -0.26121988892555237, + "logits/rejected": -0.27295538783073425, + "logps/chosen": -0.0011980930576100945, + "logps/rejected": -2.52219820022583, + "loss": 0.4023, + "nll_loss": 0.1005641371011734, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001198093086713925, + "rewards/margins": 0.25209999084472656, + "rewards/rejected": -0.2522197961807251, + "step": 11310 + }, + { + "epoch": 7.822268326417704, + "grad_norm": 4.74156379699707, + "learning_rate": 1.2098509297679422e-05, + "log_odds_chosen": 10.732425689697266, + "log_odds_ratio": -0.00021223133080638945, + "logits/chosen": -0.5578321218490601, + "logits/rejected": -0.6234152317047119, + "logps/chosen": -0.00027943385066464543, + "logps/rejected": -2.379189968109131, + "loss": 0.4482, + "nll_loss": 0.11202280968427658, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7943386157858185e-05, + "rewards/margins": 0.2378910481929779, + "rewards/rejected": -0.23791900277137756, + "step": 11311 + }, + { + "epoch": 7.822959889349931, + "grad_norm": 4.3947038650512695, + "learning_rate": 1.2094667281389274e-05, + "log_odds_chosen": 12.0952730178833, + "log_odds_ratio": -3.203017695341259e-05, + "logits/chosen": -0.13149462640285492, + "logits/rejected": -0.323131799697876, + "logps/chosen": -0.00017533727805130184, + "logps/rejected": -3.08715558052063, + "loss": 0.5112, + "nll_loss": 0.12779048085212708, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7533728168928064e-05, + "rewards/margins": 0.30869802832603455, + "rewards/rejected": -0.3087155520915985, + "step": 11312 + }, + { + "epoch": 7.823651452282157, + "grad_norm": 3.9417388439178467, + "learning_rate": 1.2090825265099125e-05, + "log_odds_chosen": 10.992733001708984, + "log_odds_ratio": -5.133766171638854e-05, + "logits/chosen": -0.1718611717224121, + "logits/rejected": -0.1951899528503418, + "logps/chosen": -0.00015353634080383927, + "logps/rejected": -2.26267409324646, + "loss": 0.3833, + "nll_loss": 0.09582962095737457, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5353634807979688e-05, + "rewards/margins": 0.22625204920768738, + "rewards/rejected": -0.22626741230487823, + "step": 11313 + }, + { + "epoch": 7.824343015214384, + "grad_norm": 3.576083183288574, + "learning_rate": 1.2086983248808976e-05, + "log_odds_chosen": 10.381665229797363, + "log_odds_ratio": -0.0003442099259700626, + "logits/chosen": -0.650234580039978, + "logits/rejected": -0.5741584300994873, + "logps/chosen": -0.00028753330116160214, + "logps/rejected": -1.8145102262496948, + "loss": 0.5317, + "nll_loss": 0.13288593292236328, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8753329388564453e-05, + "rewards/margins": 0.18142227828502655, + "rewards/rejected": -0.18145102262496948, + "step": 11314 + }, + { + "epoch": 7.825034578146611, + "grad_norm": 3.879359245300293, + "learning_rate": 1.2083141232518826e-05, + "log_odds_chosen": 11.245022773742676, + "log_odds_ratio": -2.560452776378952e-05, + "logits/chosen": 0.10069958865642548, + "logits/rejected": 0.02351771481335163, + "logps/chosen": -0.00020363567455206066, + "logps/rejected": -2.3778505325317383, + "loss": 0.4817, + "nll_loss": 0.12041270732879639, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0363568182801828e-05, + "rewards/margins": 0.23776471614837646, + "rewards/rejected": -0.23778507113456726, + "step": 11315 + }, + { + "epoch": 7.825726141078838, + "grad_norm": 5.315532207489014, + "learning_rate": 1.2079299216228677e-05, + "log_odds_chosen": 11.995738983154297, + "log_odds_ratio": -8.50264259497635e-05, + "logits/chosen": -0.3942870497703552, + "logits/rejected": -0.4254767894744873, + "logps/chosen": -0.00015716595225967467, + "logps/rejected": -2.919708013534546, + "loss": 0.4898, + "nll_loss": 0.12245209515094757, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5716595953563228e-05, + "rewards/margins": 0.2919551134109497, + "rewards/rejected": -0.2919708490371704, + "step": 11316 + }, + { + "epoch": 7.826417704011065, + "grad_norm": 2.549464702606201, + "learning_rate": 1.2075457199938528e-05, + "log_odds_chosen": 12.045283317565918, + "log_odds_ratio": -2.14578649320174e-05, + "logits/chosen": -0.19537949562072754, + "logits/rejected": -0.22888311743736267, + "logps/chosen": -8.560554124414921e-05, + "logps/rejected": -2.7150845527648926, + "loss": 0.3413, + "nll_loss": 0.0853186771273613, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.560555215808563e-06, + "rewards/margins": 0.2714998722076416, + "rewards/rejected": -0.27150842547416687, + "step": 11317 + }, + { + "epoch": 7.827109266943292, + "grad_norm": 8.127381324768066, + "learning_rate": 1.207161518364838e-05, + "log_odds_chosen": 11.11913776397705, + "log_odds_ratio": -0.00015894531679805368, + "logits/chosen": -0.5177460312843323, + "logits/rejected": -0.5508327484130859, + "logps/chosen": -0.00036550668301060796, + "logps/rejected": -2.83428692817688, + "loss": 0.7238, + "nll_loss": 0.18092840909957886, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6550667573465034e-05, + "rewards/margins": 0.28339216113090515, + "rewards/rejected": -0.28342872858047485, + "step": 11318 + }, + { + "epoch": 7.827800829875518, + "grad_norm": 2.9939863681793213, + "learning_rate": 1.2067773167358231e-05, + "log_odds_chosen": 10.294910430908203, + "log_odds_ratio": -6.300478707998991e-05, + "logits/chosen": -0.2557978630065918, + "logits/rejected": -0.2889474332332611, + "logps/chosen": -0.00011574958625715226, + "logps/rejected": -1.292106032371521, + "loss": 0.2875, + "nll_loss": 0.07187668979167938, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1574958989513107e-05, + "rewards/margins": 0.12919902801513672, + "rewards/rejected": -0.12921059131622314, + "step": 11319 + }, + { + "epoch": 7.828492392807745, + "grad_norm": 4.445178031921387, + "learning_rate": 1.206393115106808e-05, + "log_odds_chosen": 10.54962158203125, + "log_odds_ratio": -8.115536911645904e-05, + "logits/chosen": -0.16899898648262024, + "logits/rejected": -0.19577506184577942, + "logps/chosen": -0.000257675041211769, + "logps/rejected": -1.8648037910461426, + "loss": 0.4215, + "nll_loss": 0.10536029934883118, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5767503757379018e-05, + "rewards/margins": 0.1864546239376068, + "rewards/rejected": -0.18648038804531097, + "step": 11320 + }, + { + "epoch": 7.829183955739972, + "grad_norm": 3.650851011276245, + "learning_rate": 1.2060089134777933e-05, + "log_odds_chosen": 11.044684410095215, + "log_odds_ratio": -3.610683779697865e-05, + "logits/chosen": -0.7067569494247437, + "logits/rejected": -0.7932747602462769, + "logps/chosen": -0.00035222587757743895, + "logps/rejected": -2.135831356048584, + "loss": 0.4366, + "nll_loss": 0.10913494229316711, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5222587030148134e-05, + "rewards/margins": 0.21354791522026062, + "rewards/rejected": -0.21358314156532288, + "step": 11321 + }, + { + "epoch": 7.829875518672199, + "grad_norm": 4.814984321594238, + "learning_rate": 1.2056247118487783e-05, + "log_odds_chosen": 9.837475776672363, + "log_odds_ratio": -0.0007566395797766745, + "logits/chosen": 0.09012100845575333, + "logits/rejected": -0.02145160734653473, + "logps/chosen": -0.0023908771108835936, + "logps/rejected": -2.0067663192749023, + "loss": 0.5799, + "nll_loss": 0.14490315318107605, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00023908769071567804, + "rewards/margins": 0.20043756067752838, + "rewards/rejected": -0.20067663490772247, + "step": 11322 + }, + { + "epoch": 7.830567081604426, + "grad_norm": 6.39214563369751, + "learning_rate": 1.2052405102197634e-05, + "log_odds_chosen": 10.75027847290039, + "log_odds_ratio": -7.44945282349363e-05, + "logits/chosen": -0.36986637115478516, + "logits/rejected": -0.4756673574447632, + "logps/chosen": -0.00022458047897089273, + "logps/rejected": -1.8328531980514526, + "loss": 0.3638, + "nll_loss": 0.09094793349504471, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.245804716949351e-05, + "rewards/margins": 0.1832628697156906, + "rewards/rejected": -0.18328531086444855, + "step": 11323 + }, + { + "epoch": 7.8312586445366525, + "grad_norm": 4.306397914886475, + "learning_rate": 1.2048563085907485e-05, + "log_odds_chosen": 12.279438972473145, + "log_odds_ratio": -1.2914264516439289e-05, + "logits/chosen": -0.20310020446777344, + "logits/rejected": -0.26829591393470764, + "logps/chosen": -8.419219375355169e-05, + "logps/rejected": -2.8877882957458496, + "loss": 0.519, + "nll_loss": 0.12974447011947632, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.41921973915305e-06, + "rewards/margins": 0.2887704074382782, + "rewards/rejected": -0.2887788414955139, + "step": 11324 + }, + { + "epoch": 7.831950207468879, + "grad_norm": 3.6690616607666016, + "learning_rate": 1.2044721069617336e-05, + "log_odds_chosen": 11.022123336791992, + "log_odds_ratio": -4.5447537559084594e-05, + "logits/chosen": -0.512175977230072, + "logits/rejected": -0.6419370174407959, + "logps/chosen": -0.00017706479411572218, + "logps/rejected": -2.087106704711914, + "loss": 0.3042, + "nll_loss": 0.07603558897972107, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7706479411572218e-05, + "rewards/margins": 0.20869295299053192, + "rewards/rejected": -0.2087106555700302, + "step": 11325 + }, + { + "epoch": 7.832641770401106, + "grad_norm": 10.011808395385742, + "learning_rate": 1.2040879053327186e-05, + "log_odds_chosen": 10.784045219421387, + "log_odds_ratio": -7.086223195074126e-05, + "logits/chosen": -0.5523691773414612, + "logits/rejected": -0.4052152931690216, + "logps/chosen": -0.0002722898207139224, + "logps/rejected": -2.209475040435791, + "loss": 0.4355, + "nll_loss": 0.10886941105127335, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7228981707594357e-05, + "rewards/margins": 0.22092029452323914, + "rewards/rejected": -0.2209475338459015, + "step": 11326 + }, + { + "epoch": 7.833333333333333, + "grad_norm": 4.256576061248779, + "learning_rate": 1.2037037037037037e-05, + "log_odds_chosen": 10.996548652648926, + "log_odds_ratio": -5.791713920189068e-05, + "logits/chosen": -0.4284515380859375, + "logits/rejected": -0.5301926136016846, + "logps/chosen": -0.00014487968292087317, + "logps/rejected": -1.900244951248169, + "loss": 0.3767, + "nll_loss": 0.09417097270488739, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4487968655885197e-05, + "rewards/margins": 0.19001001119613647, + "rewards/rejected": -0.1900244951248169, + "step": 11327 + }, + { + "epoch": 7.83402489626556, + "grad_norm": 4.8243632316589355, + "learning_rate": 1.203319502074689e-05, + "log_odds_chosen": 10.47626781463623, + "log_odds_ratio": -0.00010160038073081523, + "logits/chosen": -0.4039806127548218, + "logits/rejected": -0.32478436827659607, + "logps/chosen": -0.00019927705579902977, + "logps/rejected": -1.7954881191253662, + "loss": 0.4262, + "nll_loss": 0.1065467894077301, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.992770739889238e-05, + "rewards/margins": 0.1795288771390915, + "rewards/rejected": -0.17954879999160767, + "step": 11328 + }, + { + "epoch": 7.834716459197787, + "grad_norm": 3.5320465564727783, + "learning_rate": 1.2029353004456739e-05, + "log_odds_chosen": 9.576192855834961, + "log_odds_ratio": -0.0002370043657720089, + "logits/chosen": -0.48754382133483887, + "logits/rejected": -0.5002470016479492, + "logps/chosen": -0.00023099326062947512, + "logps/rejected": -0.8696978688240051, + "loss": 0.2745, + "nll_loss": 0.06859709322452545, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3099326426745392e-05, + "rewards/margins": 0.08694669604301453, + "rewards/rejected": -0.08696979284286499, + "step": 11329 + }, + { + "epoch": 7.8354080221300135, + "grad_norm": 3.702303171157837, + "learning_rate": 1.202551098816659e-05, + "log_odds_chosen": 11.211159706115723, + "log_odds_ratio": -3.524010753608309e-05, + "logits/chosen": -0.4746825695037842, + "logits/rejected": -0.6030235290527344, + "logps/chosen": -0.00015717296628281474, + "logps/rejected": -2.056490898132324, + "loss": 0.3888, + "nll_loss": 0.09720071405172348, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5717296264483593e-05, + "rewards/margins": 0.20563337206840515, + "rewards/rejected": -0.20564907789230347, + "step": 11330 + }, + { + "epoch": 7.83609958506224, + "grad_norm": 4.379319190979004, + "learning_rate": 1.2021668971876442e-05, + "log_odds_chosen": 9.556562423706055, + "log_odds_ratio": -0.00018041238945443183, + "logits/chosen": -0.22800880670547485, + "logits/rejected": -0.2613986134529114, + "logps/chosen": -0.0013600703096017241, + "logps/rejected": -1.9764504432678223, + "loss": 0.464, + "nll_loss": 0.11598940938711166, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013600703096017241, + "rewards/margins": 0.1975090503692627, + "rewards/rejected": -0.19764505326747894, + "step": 11331 + }, + { + "epoch": 7.836791147994467, + "grad_norm": 3.46706223487854, + "learning_rate": 1.2017826955586292e-05, + "log_odds_chosen": 10.588600158691406, + "log_odds_ratio": -0.0007458007312379777, + "logits/chosen": -0.1496710330247879, + "logits/rejected": -0.21533580124378204, + "logps/chosen": -0.0007539233192801476, + "logps/rejected": -1.8086034059524536, + "loss": 0.37, + "nll_loss": 0.09242701530456543, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.539233774878085e-05, + "rewards/margins": 0.18078495562076569, + "rewards/rejected": -0.18086032569408417, + "step": 11332 + }, + { + "epoch": 7.837482710926694, + "grad_norm": 5.556495666503906, + "learning_rate": 1.2013984939296143e-05, + "log_odds_chosen": 10.02688217163086, + "log_odds_ratio": -0.0004603726847562939, + "logits/chosen": -0.47641515731811523, + "logits/rejected": -0.5979164242744446, + "logps/chosen": -0.000672024383675307, + "logps/rejected": -2.213634967803955, + "loss": 0.5368, + "nll_loss": 0.13416364789009094, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.720244709867984e-05, + "rewards/margins": 0.2212963104248047, + "rewards/rejected": -0.22136351466178894, + "step": 11333 + }, + { + "epoch": 7.838174273858921, + "grad_norm": 5.558751583099365, + "learning_rate": 1.2010142923005994e-05, + "log_odds_chosen": 10.478246688842773, + "log_odds_ratio": -0.00028890155954286456, + "logits/chosen": -0.2527915835380554, + "logits/rejected": -0.28625351190567017, + "logps/chosen": -0.0002432153996778652, + "logps/rejected": -2.022489309310913, + "loss": 0.522, + "nll_loss": 0.13046440482139587, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.432153996778652e-05, + "rewards/margins": 0.20222459733486176, + "rewards/rejected": -0.2022489309310913, + "step": 11334 + }, + { + "epoch": 7.838865836791148, + "grad_norm": 4.615329265594482, + "learning_rate": 1.2006300906715845e-05, + "log_odds_chosen": 12.855207443237305, + "log_odds_ratio": -4.950938546244288e-06, + "logits/chosen": -0.5159450769424438, + "logits/rejected": -0.5952770709991455, + "logps/chosen": -7.59071990614757e-05, + "logps/rejected": -3.2945661544799805, + "loss": 0.519, + "nll_loss": 0.12975125014781952, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.59072008804651e-06, + "rewards/margins": 0.32944902777671814, + "rewards/rejected": -0.3294565975666046, + "step": 11335 + }, + { + "epoch": 7.8395573997233745, + "grad_norm": 3.821167230606079, + "learning_rate": 1.2002458890425695e-05, + "log_odds_chosen": 9.323131561279297, + "log_odds_ratio": -0.0018681931542232633, + "logits/chosen": -0.29701679944992065, + "logits/rejected": -0.4008544683456421, + "logps/chosen": -0.0014447573339566588, + "logps/rejected": -1.4508062601089478, + "loss": 0.4377, + "nll_loss": 0.10923688858747482, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014447573630604893, + "rewards/margins": 0.14493615925312042, + "rewards/rejected": -0.14508062601089478, + "step": 11336 + }, + { + "epoch": 7.840248962655601, + "grad_norm": 4.55873966217041, + "learning_rate": 1.1998616874135548e-05, + "log_odds_chosen": 11.785822868347168, + "log_odds_ratio": -1.9246745068812743e-05, + "logits/chosen": -0.3788655996322632, + "logits/rejected": -0.40937554836273193, + "logps/chosen": -0.00016271023196168244, + "logps/rejected": -2.8028204441070557, + "loss": 0.4659, + "nll_loss": 0.11648114025592804, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6271023923764005e-05, + "rewards/margins": 0.28026577830314636, + "rewards/rejected": -0.28028205037117004, + "step": 11337 + }, + { + "epoch": 7.840940525587828, + "grad_norm": 3.9258370399475098, + "learning_rate": 1.1994774857845397e-05, + "log_odds_chosen": 11.954495429992676, + "log_odds_ratio": -2.6376255846116692e-05, + "logits/chosen": -0.40627315640449524, + "logits/rejected": -0.5198106169700623, + "logps/chosen": -0.00019579721265472472, + "logps/rejected": -2.973041534423828, + "loss": 0.5688, + "nll_loss": 0.14218628406524658, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.957972017407883e-05, + "rewards/margins": 0.2972846031188965, + "rewards/rejected": -0.2973041534423828, + "step": 11338 + }, + { + "epoch": 7.841632088520055, + "grad_norm": 4.359646797180176, + "learning_rate": 1.1990932841555248e-05, + "log_odds_chosen": 10.56201171875, + "log_odds_ratio": -0.0001327977515757084, + "logits/chosen": -0.327246755361557, + "logits/rejected": -0.3993592858314514, + "logps/chosen": -0.0003068206424359232, + "logps/rejected": -2.1173148155212402, + "loss": 0.3493, + "nll_loss": 0.08732067048549652, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0682065698783845e-05, + "rewards/margins": 0.21170082688331604, + "rewards/rejected": -0.21173149347305298, + "step": 11339 + }, + { + "epoch": 7.842323651452282, + "grad_norm": 4.125725269317627, + "learning_rate": 1.19870908252651e-05, + "log_odds_chosen": 9.908230781555176, + "log_odds_ratio": -0.00039624038618057966, + "logits/chosen": -0.7032617330551147, + "logits/rejected": -0.691260814666748, + "logps/chosen": -0.0010323630413040519, + "logps/rejected": -1.8295907974243164, + "loss": 0.6721, + "nll_loss": 0.16799086332321167, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010323632159270346, + "rewards/margins": 0.18285587430000305, + "rewards/rejected": -0.18295909464359283, + "step": 11340 + }, + { + "epoch": 7.843015214384509, + "grad_norm": 4.721843242645264, + "learning_rate": 1.1983248808974951e-05, + "log_odds_chosen": 11.312594413757324, + "log_odds_ratio": -3.6237441236153245e-05, + "logits/chosen": -0.5396626591682434, + "logits/rejected": -0.5061202049255371, + "logps/chosen": -0.00019042623171117157, + "logps/rejected": -2.7666544914245605, + "loss": 0.595, + "nll_loss": 0.14875343441963196, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.904262535390444e-05, + "rewards/margins": 0.2766464054584503, + "rewards/rejected": -0.27666544914245605, + "step": 11341 + }, + { + "epoch": 7.8437067773167355, + "grad_norm": 4.29473352432251, + "learning_rate": 1.1979406792684802e-05, + "log_odds_chosen": 10.837879180908203, + "log_odds_ratio": -0.0003252569295000285, + "logits/chosen": -0.20344379544258118, + "logits/rejected": -0.29107415676116943, + "logps/chosen": -0.000317359488690272, + "logps/rejected": -2.1068313121795654, + "loss": 0.3754, + "nll_loss": 0.0938163474202156, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1735948141431436e-05, + "rewards/margins": 0.21065139770507812, + "rewards/rejected": -0.21068313717842102, + "step": 11342 + }, + { + "epoch": 7.844398340248962, + "grad_norm": 4.473525524139404, + "learning_rate": 1.1975564776394652e-05, + "log_odds_chosen": 11.027624130249023, + "log_odds_ratio": -5.361594230635092e-05, + "logits/chosen": -0.7446596026420593, + "logits/rejected": -0.8093918561935425, + "logps/chosen": -0.0003591571585275233, + "logps/rejected": -2.716170310974121, + "loss": 0.7645, + "nll_loss": 0.191114604473114, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.591571294236928e-05, + "rewards/margins": 0.27158111333847046, + "rewards/rejected": -0.27161702513694763, + "step": 11343 + }, + { + "epoch": 7.845089903181189, + "grad_norm": 4.6528639793396, + "learning_rate": 1.1971722760104503e-05, + "log_odds_chosen": 10.873839378356934, + "log_odds_ratio": -7.976902270456776e-05, + "logits/chosen": -0.08298560976982117, + "logits/rejected": -0.2112613320350647, + "logps/chosen": -0.0002032502816291526, + "logps/rejected": -2.1599695682525635, + "loss": 0.8459, + "nll_loss": 0.21146902441978455, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0325027435319498e-05, + "rewards/margins": 0.21597662568092346, + "rewards/rejected": -0.21599695086479187, + "step": 11344 + }, + { + "epoch": 7.845781466113416, + "grad_norm": 3.8563454151153564, + "learning_rate": 1.1967880743814354e-05, + "log_odds_chosen": 10.505448341369629, + "log_odds_ratio": -0.00021720759104937315, + "logits/chosen": -0.21184486150741577, + "logits/rejected": -0.3287862539291382, + "logps/chosen": -0.000897840247489512, + "logps/rejected": -3.0171875953674316, + "loss": 0.3882, + "nll_loss": 0.09702227264642715, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.978402183856815e-05, + "rewards/margins": 0.3016289472579956, + "rewards/rejected": -0.30171874165534973, + "step": 11345 + }, + { + "epoch": 7.846473029045643, + "grad_norm": 3.9811694622039795, + "learning_rate": 1.1964038727524206e-05, + "log_odds_chosen": 10.677752494812012, + "log_odds_ratio": -8.449415327049792e-05, + "logits/chosen": -0.6144614219665527, + "logits/rejected": -0.6651566624641418, + "logps/chosen": -0.00035767414374276996, + "logps/rejected": -2.4102203845977783, + "loss": 0.4091, + "nll_loss": 0.10226333886384964, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5767414374276996e-05, + "rewards/margins": 0.24098627269268036, + "rewards/rejected": -0.2410220354795456, + "step": 11346 + }, + { + "epoch": 7.84716459197787, + "grad_norm": 3.5016424655914307, + "learning_rate": 1.1960196711234057e-05, + "log_odds_chosen": 11.957420349121094, + "log_odds_ratio": -2.618016878841445e-05, + "logits/chosen": -0.2839650809764862, + "logits/rejected": -0.24141938984394073, + "logps/chosen": -0.00011954591900575906, + "logps/rejected": -2.8302571773529053, + "loss": 0.3573, + "nll_loss": 0.08932714909315109, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1954592082474846e-05, + "rewards/margins": 0.2830137610435486, + "rewards/rejected": -0.28302574157714844, + "step": 11347 + }, + { + "epoch": 7.8478561549100965, + "grad_norm": 2.5719215869903564, + "learning_rate": 1.1956354694943906e-05, + "log_odds_chosen": 10.805449485778809, + "log_odds_ratio": -6.877508712932467e-05, + "logits/chosen": -0.31896281242370605, + "logits/rejected": -0.31459271907806396, + "logps/chosen": -0.00021419592667371035, + "logps/rejected": -1.801652431488037, + "loss": 0.2996, + "nll_loss": 0.0749046728014946, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1419591575977392e-05, + "rewards/margins": 0.1801438331604004, + "rewards/rejected": -0.18016526103019714, + "step": 11348 + }, + { + "epoch": 7.848547717842323, + "grad_norm": 3.6286022663116455, + "learning_rate": 1.1952512678653759e-05, + "log_odds_chosen": 11.099205017089844, + "log_odds_ratio": -4.535958214546554e-05, + "logits/chosen": 0.25572267174720764, + "logits/rejected": 0.12598130106925964, + "logps/chosen": -0.00010003315401263535, + "logps/rejected": -1.739362120628357, + "loss": 0.6519, + "nll_loss": 0.1629638522863388, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0003315765061416e-05, + "rewards/margins": 0.1739262342453003, + "rewards/rejected": -0.17393621802330017, + "step": 11349 + }, + { + "epoch": 7.84923928077455, + "grad_norm": 4.637201309204102, + "learning_rate": 1.194867066236361e-05, + "log_odds_chosen": 11.232458114624023, + "log_odds_ratio": -3.946627475670539e-05, + "logits/chosen": -0.4672451615333557, + "logits/rejected": -0.48350343108177185, + "logps/chosen": -5.887038423679769e-05, + "logps/rejected": -1.6665010452270508, + "loss": 0.5222, + "nll_loss": 0.13054095208644867, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.887038241780829e-06, + "rewards/margins": 0.16664421558380127, + "rewards/rejected": -0.16665011644363403, + "step": 11350 + }, + { + "epoch": 7.849930843706777, + "grad_norm": 6.867190361022949, + "learning_rate": 1.194482864607346e-05, + "log_odds_chosen": 10.638004302978516, + "log_odds_ratio": -4.492142761591822e-05, + "logits/chosen": -0.371229887008667, + "logits/rejected": -0.43291574716567993, + "logps/chosen": -0.00024072341329883784, + "logps/rejected": -2.168043851852417, + "loss": 0.4847, + "nll_loss": 0.1211802065372467, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4072340238490142e-05, + "rewards/margins": 0.21678031980991364, + "rewards/rejected": -0.2168044000864029, + "step": 11351 + }, + { + "epoch": 7.850622406639004, + "grad_norm": 5.103278636932373, + "learning_rate": 1.1940986629783311e-05, + "log_odds_chosen": 11.732542037963867, + "log_odds_ratio": -2.349566057091579e-05, + "logits/chosen": -0.06722603738307953, + "logits/rejected": -0.14607380330562592, + "logps/chosen": -0.00017894791380967945, + "logps/rejected": -3.0719339847564697, + "loss": 0.5151, + "nll_loss": 0.12876620888710022, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7894790289574303e-05, + "rewards/margins": 0.30717551708221436, + "rewards/rejected": -0.307193398475647, + "step": 11352 + }, + { + "epoch": 7.851313969571231, + "grad_norm": 3.148937225341797, + "learning_rate": 1.1937144613493162e-05, + "log_odds_chosen": 9.59694766998291, + "log_odds_ratio": -0.00046361677232198417, + "logits/chosen": -0.16655105352401733, + "logits/rejected": -0.22968561947345734, + "logps/chosen": -0.0014130291528999805, + "logps/rejected": -1.6357417106628418, + "loss": 0.4493, + "nll_loss": 0.11227520555257797, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014130290946923196, + "rewards/margins": 0.16343286633491516, + "rewards/rejected": -0.1635741889476776, + "step": 11353 + }, + { + "epoch": 7.8520055325034575, + "grad_norm": 2.4725232124328613, + "learning_rate": 1.1933302597203012e-05, + "log_odds_chosen": 10.359973907470703, + "log_odds_ratio": -7.92969367466867e-05, + "logits/chosen": -0.25961601734161377, + "logits/rejected": -0.32079118490219116, + "logps/chosen": -0.0001499430218245834, + "logps/rejected": -1.4932595491409302, + "loss": 0.2224, + "nll_loss": 0.0555829294025898, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4994302546256222e-05, + "rewards/margins": 0.14931097626686096, + "rewards/rejected": -0.14932596683502197, + "step": 11354 + }, + { + "epoch": 7.852697095435684, + "grad_norm": 5.656296730041504, + "learning_rate": 1.1929460580912865e-05, + "log_odds_chosen": 10.169926643371582, + "log_odds_ratio": -0.000213885388802737, + "logits/chosen": -0.06691788136959076, + "logits/rejected": -0.08190090954303741, + "logps/chosen": -0.004533851984888315, + "logps/rejected": -2.7446389198303223, + "loss": 0.3907, + "nll_loss": 0.0976482480764389, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00045338517520576715, + "rewards/margins": 0.2740105390548706, + "rewards/rejected": -0.2744638919830322, + "step": 11355 + }, + { + "epoch": 7.853388658367911, + "grad_norm": 8.513496398925781, + "learning_rate": 1.1925618564622716e-05, + "log_odds_chosen": 10.300134658813477, + "log_odds_ratio": -0.00021386246953625232, + "logits/chosen": -0.4488566517829895, + "logits/rejected": -0.49245864152908325, + "logps/chosen": -0.00041282945312559605, + "logps/rejected": -2.0244665145874023, + "loss": 0.5869, + "nll_loss": 0.14670827984809875, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.128294676775113e-05, + "rewards/margins": 0.2024053931236267, + "rewards/rejected": -0.20244666934013367, + "step": 11356 + }, + { + "epoch": 7.854080221300138, + "grad_norm": 5.423783779144287, + "learning_rate": 1.1921776548332565e-05, + "log_odds_chosen": 11.50162124633789, + "log_odds_ratio": -3.0906550819054246e-05, + "logits/chosen": -0.38133561611175537, + "logits/rejected": -0.5150085091590881, + "logps/chosen": -0.00015852053184062243, + "logps/rejected": -2.555122137069702, + "loss": 0.5308, + "nll_loss": 0.13269172608852386, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5852054275455885e-05, + "rewards/margins": 0.2554963529109955, + "rewards/rejected": -0.25551217794418335, + "step": 11357 + }, + { + "epoch": 7.854771784232365, + "grad_norm": 7.287617206573486, + "learning_rate": 1.1917934532042417e-05, + "log_odds_chosen": 12.808563232421875, + "log_odds_ratio": -9.047294952324592e-06, + "logits/chosen": -0.13653582334518433, + "logits/rejected": -0.24647362530231476, + "logps/chosen": -8.96514393389225e-05, + "logps/rejected": -3.2580180168151855, + "loss": 0.5415, + "nll_loss": 0.13537272810935974, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.96514393389225e-06, + "rewards/margins": 0.3257928788661957, + "rewards/rejected": -0.3258018493652344, + "step": 11358 + }, + { + "epoch": 7.855463347164592, + "grad_norm": 5.785372734069824, + "learning_rate": 1.1914092515752268e-05, + "log_odds_chosen": 10.555879592895508, + "log_odds_ratio": -0.00010297319386154413, + "logits/chosen": -0.3700430393218994, + "logits/rejected": -0.39971691370010376, + "logps/chosen": -0.0007725593168288469, + "logps/rejected": -1.972679853439331, + "loss": 0.3809, + "nll_loss": 0.0952107161283493, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.725592877250165e-05, + "rewards/margins": 0.19719073176383972, + "rewards/rejected": -0.19726797938346863, + "step": 11359 + }, + { + "epoch": 7.856154910096818, + "grad_norm": 12.34350872039795, + "learning_rate": 1.1910250499462119e-05, + "log_odds_chosen": 11.76555061340332, + "log_odds_ratio": -2.7817648515338078e-05, + "logits/chosen": -0.3621176481246948, + "logits/rejected": -0.19518381357192993, + "logps/chosen": -0.00010538271453697234, + "logps/rejected": -2.216698169708252, + "loss": 0.7454, + "nll_loss": 0.18635067343711853, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0538271453697234e-05, + "rewards/margins": 0.22165925800800323, + "rewards/rejected": -0.22166979312896729, + "step": 11360 + }, + { + "epoch": 7.856846473029045, + "grad_norm": 3.945906639099121, + "learning_rate": 1.190640848317197e-05, + "log_odds_chosen": 10.77137565612793, + "log_odds_ratio": -0.00023443363897968084, + "logits/chosen": -0.5106649398803711, + "logits/rejected": -0.676784873008728, + "logps/chosen": -0.0003039441944565624, + "logps/rejected": -2.170654058456421, + "loss": 0.389, + "nll_loss": 0.09721540659666061, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.039441980945412e-05, + "rewards/margins": 0.21703502535820007, + "rewards/rejected": -0.21706542372703552, + "step": 11361 + }, + { + "epoch": 7.857538035961272, + "grad_norm": 6.773676872253418, + "learning_rate": 1.190256646688182e-05, + "log_odds_chosen": 10.748955726623535, + "log_odds_ratio": -0.0003274211485404521, + "logits/chosen": -0.5217385292053223, + "logits/rejected": -0.6142206192016602, + "logps/chosen": -0.0005004443228244781, + "logps/rejected": -2.384121894836426, + "loss": 0.6019, + "nll_loss": 0.15044429898262024, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0044436648022383e-05, + "rewards/margins": 0.2383621484041214, + "rewards/rejected": -0.23841220140457153, + "step": 11362 + }, + { + "epoch": 7.858229598893499, + "grad_norm": 4.326676845550537, + "learning_rate": 1.189872445059167e-05, + "log_odds_chosen": 11.014705657958984, + "log_odds_ratio": -3.578926043701358e-05, + "logits/chosen": -0.1777215152978897, + "logits/rejected": -0.33752867579460144, + "logps/chosen": -0.00015030778013169765, + "logps/rejected": -1.9089435338974, + "loss": 0.5565, + "nll_loss": 0.13911172747612, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5030776921776123e-05, + "rewards/margins": 0.19087931513786316, + "rewards/rejected": -0.19089436531066895, + "step": 11363 + }, + { + "epoch": 7.858921161825726, + "grad_norm": 7.081151485443115, + "learning_rate": 1.1894882434301522e-05, + "log_odds_chosen": 10.904725074768066, + "log_odds_ratio": -4.0231516322819516e-05, + "logits/chosen": -0.6134161949157715, + "logits/rejected": -0.5148768424987793, + "logps/chosen": -0.00023824315576348454, + "logps/rejected": -2.381795644760132, + "loss": 0.3085, + "nll_loss": 0.07712505757808685, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3824315576348454e-05, + "rewards/margins": 0.23815575242042542, + "rewards/rejected": -0.23817956447601318, + "step": 11364 + }, + { + "epoch": 7.8596127247579535, + "grad_norm": 5.808526039123535, + "learning_rate": 1.1891040418011374e-05, + "log_odds_chosen": 11.700054168701172, + "log_odds_ratio": -2.5583209207979962e-05, + "logits/chosen": -0.04865068197250366, + "logits/rejected": -0.04677605628967285, + "logps/chosen": -0.00012595993757713586, + "logps/rejected": -2.695266008377075, + "loss": 0.6089, + "nll_loss": 0.1522296518087387, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2595994121511467e-05, + "rewards/margins": 0.2695139944553375, + "rewards/rejected": -0.2695266008377075, + "step": 11365 + }, + { + "epoch": 7.86030428769018, + "grad_norm": 3.7067861557006836, + "learning_rate": 1.1887198401721223e-05, + "log_odds_chosen": 10.165199279785156, + "log_odds_ratio": -0.0002667968219611794, + "logits/chosen": -0.048863768577575684, + "logits/rejected": -0.08387540280818939, + "logps/chosen": -0.00018476907280273736, + "logps/rejected": -1.482744812965393, + "loss": 0.4206, + "nll_loss": 0.1051340401172638, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.847690873546526e-05, + "rewards/margins": 0.14825600385665894, + "rewards/rejected": -0.1482744812965393, + "step": 11366 + }, + { + "epoch": 7.860995850622407, + "grad_norm": 9.02211856842041, + "learning_rate": 1.1883356385431074e-05, + "log_odds_chosen": 11.678213119506836, + "log_odds_ratio": -1.4590928913094103e-05, + "logits/chosen": -0.4199620187282562, + "logits/rejected": -0.37431225180625916, + "logps/chosen": -9.888997010421008e-05, + "logps/rejected": -2.3296117782592773, + "loss": 0.4223, + "nll_loss": 0.10557062923908234, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.88899773801677e-06, + "rewards/margins": 0.23295128345489502, + "rewards/rejected": -0.23296119272708893, + "step": 11367 + }, + { + "epoch": 7.861687413554634, + "grad_norm": 5.323549747467041, + "learning_rate": 1.1879514369140926e-05, + "log_odds_chosen": 10.693358421325684, + "log_odds_ratio": -5.926968879066408e-05, + "logits/chosen": 0.18157647550106049, + "logits/rejected": 0.08172719180583954, + "logps/chosen": -0.0004564730334095657, + "logps/rejected": -2.4843926429748535, + "loss": 0.4597, + "nll_loss": 0.11491090804338455, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.564730261336081e-05, + "rewards/margins": 0.24839362502098083, + "rewards/rejected": -0.24843928217887878, + "step": 11368 + }, + { + "epoch": 7.862378976486861, + "grad_norm": 6.065701961517334, + "learning_rate": 1.1875672352850777e-05, + "log_odds_chosen": 10.91854476928711, + "log_odds_ratio": -8.575063839089125e-05, + "logits/chosen": -0.4183662533760071, + "logits/rejected": -0.4504457712173462, + "logps/chosen": -0.0009298054501414299, + "logps/rejected": -2.360944986343384, + "loss": 0.6333, + "nll_loss": 0.1583259552717209, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.298054646933451e-05, + "rewards/margins": 0.23600150644779205, + "rewards/rejected": -0.23609450459480286, + "step": 11369 + }, + { + "epoch": 7.863070539419088, + "grad_norm": 5.207459449768066, + "learning_rate": 1.1871830336560628e-05, + "log_odds_chosen": 10.09914779663086, + "log_odds_ratio": -0.00018985196948051453, + "logits/chosen": -0.2728763222694397, + "logits/rejected": -0.29989945888519287, + "logps/chosen": -0.0006764763966202736, + "logps/rejected": -1.8139231204986572, + "loss": 0.4903, + "nll_loss": 0.12256253510713577, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.764764111721888e-05, + "rewards/margins": 0.18132466077804565, + "rewards/rejected": -0.18139231204986572, + "step": 11370 + }, + { + "epoch": 7.8637621023513145, + "grad_norm": 4.561739921569824, + "learning_rate": 1.1867988320270478e-05, + "log_odds_chosen": 10.708756446838379, + "log_odds_ratio": -0.00019307366164866835, + "logits/chosen": -0.18084324896335602, + "logits/rejected": -0.2568657100200653, + "logps/chosen": -0.0005610042135231197, + "logps/rejected": -2.330329179763794, + "loss": 0.5474, + "nll_loss": 0.13683900237083435, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6100419897120446e-05, + "rewards/margins": 0.23297682404518127, + "rewards/rejected": -0.23303291201591492, + "step": 11371 + }, + { + "epoch": 7.864453665283541, + "grad_norm": 4.787899494171143, + "learning_rate": 1.186414630398033e-05, + "log_odds_chosen": 10.986349105834961, + "log_odds_ratio": -5.588722706306726e-05, + "logits/chosen": -0.3971659541130066, + "logits/rejected": -0.41987258195877075, + "logps/chosen": -0.00015680750948376954, + "logps/rejected": -1.9517314434051514, + "loss": 0.5272, + "nll_loss": 0.1318061649799347, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5680752767366357e-05, + "rewards/margins": 0.1951574832201004, + "rewards/rejected": -0.19517315924167633, + "step": 11372 + }, + { + "epoch": 7.865145228215768, + "grad_norm": 4.8560285568237305, + "learning_rate": 1.186030428769018e-05, + "log_odds_chosen": 10.537019729614258, + "log_odds_ratio": -0.0001434181467629969, + "logits/chosen": -0.12688449025154114, + "logits/rejected": -0.23663949966430664, + "logps/chosen": -0.0009801515843719244, + "logps/rejected": -2.249166488647461, + "loss": 0.4461, + "nll_loss": 0.11150926351547241, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.801516716834158e-05, + "rewards/margins": 0.2248186469078064, + "rewards/rejected": -0.22491665184497833, + "step": 11373 + }, + { + "epoch": 7.865836791147995, + "grad_norm": 3.163818359375, + "learning_rate": 1.1856462271400032e-05, + "log_odds_chosen": 10.34457015991211, + "log_odds_ratio": -0.0002107964246533811, + "logits/chosen": -0.2862035930156708, + "logits/rejected": -0.2810376286506653, + "logps/chosen": -0.0007940607611089945, + "logps/rejected": -1.9568583965301514, + "loss": 0.5205, + "nll_loss": 0.13010820746421814, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.940607611089945e-05, + "rewards/margins": 0.19560644030570984, + "rewards/rejected": -0.19568583369255066, + "step": 11374 + }, + { + "epoch": 7.866528354080222, + "grad_norm": 3.7396209239959717, + "learning_rate": 1.1852620255109881e-05, + "log_odds_chosen": 11.540573120117188, + "log_odds_ratio": -2.3659078578930348e-05, + "logits/chosen": -0.1099567711353302, + "logits/rejected": -0.20732180774211884, + "logps/chosen": -0.00012644784874282777, + "logps/rejected": -2.3760976791381836, + "loss": 0.3376, + "nll_loss": 0.08440861850976944, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2644785783777479e-05, + "rewards/margins": 0.23759713768959045, + "rewards/rejected": -0.23760978877544403, + "step": 11375 + }, + { + "epoch": 7.867219917012449, + "grad_norm": 5.468509197235107, + "learning_rate": 1.1848778238819732e-05, + "log_odds_chosen": 11.563232421875, + "log_odds_ratio": -3.333418135298416e-05, + "logits/chosen": -0.07910732924938202, + "logits/rejected": -0.14191415905952454, + "logps/chosen": -0.00014625617768615484, + "logps/rejected": -2.604708194732666, + "loss": 0.5986, + "nll_loss": 0.1496472954750061, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4625617950514425e-05, + "rewards/margins": 0.2604562044143677, + "rewards/rejected": -0.26047080755233765, + "step": 11376 + }, + { + "epoch": 7.867911479944675, + "grad_norm": 11.16819953918457, + "learning_rate": 1.1844936222529585e-05, + "log_odds_chosen": 10.969552040100098, + "log_odds_ratio": -0.0002762842341326177, + "logits/chosen": -0.4043792486190796, + "logits/rejected": -0.5222166180610657, + "logps/chosen": -0.0009811146883293986, + "logps/rejected": -2.2527077198028564, + "loss": 0.4811, + "nll_loss": 0.12023838609457016, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.811147174332291e-05, + "rewards/margins": 0.22517266869544983, + "rewards/rejected": -0.22527077794075012, + "step": 11377 + }, + { + "epoch": 7.868603042876902, + "grad_norm": 3.129385232925415, + "learning_rate": 1.1841094206239435e-05, + "log_odds_chosen": 10.737401008605957, + "log_odds_ratio": -5.994061575620435e-05, + "logits/chosen": -0.6261686086654663, + "logits/rejected": -0.6428536176681519, + "logps/chosen": -0.0005035304930061102, + "logps/rejected": -2.0155394077301025, + "loss": 0.3526, + "nll_loss": 0.08814893662929535, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.035305002820678e-05, + "rewards/margins": 0.20150357484817505, + "rewards/rejected": -0.20155392587184906, + "step": 11378 + }, + { + "epoch": 7.869294605809129, + "grad_norm": 3.615654468536377, + "learning_rate": 1.1837252189949286e-05, + "log_odds_chosen": 9.873108863830566, + "log_odds_ratio": -0.000126995742903091, + "logits/chosen": -0.11937469989061356, + "logits/rejected": -0.16411955654621124, + "logps/chosen": -0.00027166143991053104, + "logps/rejected": -1.670121192932129, + "loss": 0.7555, + "nll_loss": 0.18886922299861908, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.716614653763827e-05, + "rewards/margins": 0.16698496043682098, + "rewards/rejected": -0.16701211035251617, + "step": 11379 + }, + { + "epoch": 7.869986168741356, + "grad_norm": 59.96883773803711, + "learning_rate": 1.1833410173659137e-05, + "log_odds_chosen": 8.984199523925781, + "log_odds_ratio": -1.482587456703186, + "logits/chosen": -0.24020972847938538, + "logits/rejected": -0.35119855403900146, + "logps/chosen": -0.23727771639823914, + "logps/rejected": -1.865845799446106, + "loss": 1.033, + "nll_loss": 0.1099969893693924, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.023727772757411003, + "rewards/margins": 0.1628568172454834, + "rewards/rejected": -0.18658457696437836, + "step": 11380 + }, + { + "epoch": 7.870677731673583, + "grad_norm": 3.3628134727478027, + "learning_rate": 1.1829568157368988e-05, + "log_odds_chosen": 10.520112037658691, + "log_odds_ratio": -0.00017830124124884605, + "logits/chosen": -0.5980362296104431, + "logits/rejected": -0.6402428150177002, + "logps/chosen": -0.0003178414481226355, + "logps/rejected": -2.241232395172119, + "loss": 0.3839, + "nll_loss": 0.09596016258001328, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.178414408466779e-05, + "rewards/margins": 0.22409147024154663, + "rewards/rejected": -0.2241232544183731, + "step": 11381 + }, + { + "epoch": 7.87136929460581, + "grad_norm": 5.686932563781738, + "learning_rate": 1.1825726141078838e-05, + "log_odds_chosen": 12.08934211730957, + "log_odds_ratio": -7.481579814339057e-06, + "logits/chosen": 0.09978464990854263, + "logits/rejected": 0.06304813921451569, + "logps/chosen": -0.0002518608816899359, + "logps/rejected": -3.102248430252075, + "loss": 0.4265, + "nll_loss": 0.10661499202251434, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.518608744139783e-05, + "rewards/margins": 0.31019964814186096, + "rewards/rejected": -0.31022483110427856, + "step": 11382 + }, + { + "epoch": 7.872060857538036, + "grad_norm": 4.011767864227295, + "learning_rate": 1.1821884124788691e-05, + "log_odds_chosen": 11.806448936462402, + "log_odds_ratio": -2.2543908926309086e-05, + "logits/chosen": -0.8501976728439331, + "logits/rejected": -0.9305237531661987, + "logps/chosen": -7.412520062644035e-05, + "logps/rejected": -2.3156652450561523, + "loss": 0.4158, + "nll_loss": 0.10394752770662308, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.412520062644035e-06, + "rewards/margins": 0.2315591275691986, + "rewards/rejected": -0.23156653344631195, + "step": 11383 + }, + { + "epoch": 7.872752420470263, + "grad_norm": 3.638526439666748, + "learning_rate": 1.181804210849854e-05, + "log_odds_chosen": 9.84005355834961, + "log_odds_ratio": -0.0012426018947735429, + "logits/chosen": -0.3885806202888489, + "logits/rejected": -0.2935905456542969, + "logps/chosen": -0.000531262659933418, + "logps/rejected": -1.8630025386810303, + "loss": 0.6088, + "nll_loss": 0.15207722783088684, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3126266720937565e-05, + "rewards/margins": 0.18624712526798248, + "rewards/rejected": -0.18630024790763855, + "step": 11384 + }, + { + "epoch": 7.87344398340249, + "grad_norm": 4.375177383422852, + "learning_rate": 1.181420009220839e-05, + "log_odds_chosen": 10.815048217773438, + "log_odds_ratio": -3.960235699196346e-05, + "logits/chosen": -0.04029744863510132, + "logits/rejected": -0.00986124575138092, + "logps/chosen": -0.00011397508205845952, + "logps/rejected": -1.61173415184021, + "loss": 0.4739, + "nll_loss": 0.11848108470439911, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1397508387744892e-05, + "rewards/margins": 0.16116203367710114, + "rewards/rejected": -0.16117341816425323, + "step": 11385 + }, + { + "epoch": 7.874135546334717, + "grad_norm": 6.68137264251709, + "learning_rate": 1.1810358075918243e-05, + "log_odds_chosen": 10.267988204956055, + "log_odds_ratio": -9.309072629548609e-05, + "logits/chosen": -0.5029177069664001, + "logits/rejected": -0.4623737335205078, + "logps/chosen": -0.0002573465171735734, + "logps/rejected": -1.8290235996246338, + "loss": 0.4549, + "nll_loss": 0.11370395123958588, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.573465280875098e-05, + "rewards/margins": 0.18287664651870728, + "rewards/rejected": -0.18290236592292786, + "step": 11386 + }, + { + "epoch": 7.874827109266944, + "grad_norm": 4.482590675354004, + "learning_rate": 1.1806516059628094e-05, + "log_odds_chosen": 11.114848136901855, + "log_odds_ratio": -0.00025937389000318944, + "logits/chosen": -0.4152233302593231, + "logits/rejected": -0.5423761606216431, + "logps/chosen": -0.0001527878048364073, + "logps/rejected": -2.3076977729797363, + "loss": 0.4969, + "nll_loss": 0.12419218569993973, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5278781575034373e-05, + "rewards/margins": 0.2307545244693756, + "rewards/rejected": -0.2307698130607605, + "step": 11387 + }, + { + "epoch": 7.875518672199171, + "grad_norm": 5.104621410369873, + "learning_rate": 1.1802674043337945e-05, + "log_odds_chosen": 11.1847562789917, + "log_odds_ratio": -1.6824447811814025e-05, + "logits/chosen": -0.5179380774497986, + "logits/rejected": -0.5166718363761902, + "logps/chosen": -0.00022529246052727103, + "logps/rejected": -2.265241861343384, + "loss": 0.4431, + "nll_loss": 0.11076471954584122, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2529246052727103e-05, + "rewards/margins": 0.2265016734600067, + "rewards/rejected": -0.22652418911457062, + "step": 11388 + }, + { + "epoch": 7.876210235131397, + "grad_norm": 4.958249092102051, + "learning_rate": 1.1798832027047795e-05, + "log_odds_chosen": 11.588720321655273, + "log_odds_ratio": -1.3179649613448419e-05, + "logits/chosen": -0.27237236499786377, + "logits/rejected": -0.2907797694206238, + "logps/chosen": -0.00013127666898071766, + "logps/rejected": -2.4104108810424805, + "loss": 0.4595, + "nll_loss": 0.11488554626703262, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3127668353263289e-05, + "rewards/margins": 0.24102795124053955, + "rewards/rejected": -0.24104109406471252, + "step": 11389 + }, + { + "epoch": 7.876901798063624, + "grad_norm": 4.1554412841796875, + "learning_rate": 1.1794990010757646e-05, + "log_odds_chosen": 11.154207229614258, + "log_odds_ratio": -3.690812081913464e-05, + "logits/chosen": -0.46767714619636536, + "logits/rejected": -0.6114166975021362, + "logps/chosen": -0.00019952871662098914, + "logps/rejected": -2.374128818511963, + "loss": 0.5106, + "nll_loss": 0.12764978408813477, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9952873117290437e-05, + "rewards/margins": 0.23739290237426758, + "rewards/rejected": -0.23741286993026733, + "step": 11390 + }, + { + "epoch": 7.877593360995851, + "grad_norm": 4.927827835083008, + "learning_rate": 1.1791147994467497e-05, + "log_odds_chosen": 10.432454109191895, + "log_odds_ratio": -0.000577417784370482, + "logits/chosen": -0.5227782726287842, + "logits/rejected": -0.4943715035915375, + "logps/chosen": -0.0006076360587030649, + "logps/rejected": -1.9413719177246094, + "loss": 0.4588, + "nll_loss": 0.11463840305805206, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0763602959923446e-05, + "rewards/margins": 0.19407644867897034, + "rewards/rejected": -0.19413720071315765, + "step": 11391 + }, + { + "epoch": 7.878284923928078, + "grad_norm": 8.029701232910156, + "learning_rate": 1.178730597817735e-05, + "log_odds_chosen": 10.086861610412598, + "log_odds_ratio": -0.0001571264147059992, + "logits/chosen": -0.3266010582447052, + "logits/rejected": -0.41374966502189636, + "logps/chosen": -0.0011758707696571946, + "logps/rejected": -1.8890432119369507, + "loss": 0.5781, + "nll_loss": 0.14450611174106598, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001175870856968686, + "rewards/margins": 0.18878674507141113, + "rewards/rejected": -0.1889043152332306, + "step": 11392 + }, + { + "epoch": 7.878976486860305, + "grad_norm": 4.888491153717041, + "learning_rate": 1.1783463961887198e-05, + "log_odds_chosen": 11.464473724365234, + "log_odds_ratio": -1.954937215487007e-05, + "logits/chosen": -0.6128780841827393, + "logits/rejected": -0.7605924606323242, + "logps/chosen": -0.00020476209465414286, + "logps/rejected": -2.521747589111328, + "loss": 0.3513, + "nll_loss": 0.08781325817108154, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.047621092060581e-05, + "rewards/margins": 0.25215429067611694, + "rewards/rejected": -0.2521747648715973, + "step": 11393 + }, + { + "epoch": 7.8796680497925315, + "grad_norm": 4.10215950012207, + "learning_rate": 1.1779621945597049e-05, + "log_odds_chosen": 12.365216255187988, + "log_odds_ratio": -9.213494195137173e-06, + "logits/chosen": 0.05360027775168419, + "logits/rejected": 0.00046793476212769747, + "logps/chosen": -0.00018678676860872656, + "logps/rejected": -3.1028411388397217, + "loss": 0.3805, + "nll_loss": 0.09511671960353851, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.867867831606418e-05, + "rewards/margins": 0.3102654218673706, + "rewards/rejected": -0.3102841377258301, + "step": 11394 + }, + { + "epoch": 7.880359612724758, + "grad_norm": 3.1337502002716064, + "learning_rate": 1.17757799293069e-05, + "log_odds_chosen": 11.70418643951416, + "log_odds_ratio": -1.843475729401689e-05, + "logits/chosen": -0.10929186642169952, + "logits/rejected": -0.029111243784427643, + "logps/chosen": -0.0001291261869482696, + "logps/rejected": -2.3684635162353516, + "loss": 0.3893, + "nll_loss": 0.09732114523649216, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.291261833102908e-05, + "rewards/margins": 0.23683345317840576, + "rewards/rejected": -0.23684635758399963, + "step": 11395 + }, + { + "epoch": 7.881051175656985, + "grad_norm": 3.5384857654571533, + "learning_rate": 1.1771937913016752e-05, + "log_odds_chosen": 11.739799499511719, + "log_odds_ratio": -1.185938799608266e-05, + "logits/chosen": 0.07834568619728088, + "logits/rejected": -0.056670159101486206, + "logps/chosen": -0.0001279369171243161, + "logps/rejected": -2.635584592819214, + "loss": 0.4849, + "nll_loss": 0.12123227119445801, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2793692803825252e-05, + "rewards/margins": 0.2635456621646881, + "rewards/rejected": -0.26355844736099243, + "step": 11396 + }, + { + "epoch": 7.881742738589212, + "grad_norm": 3.704191207885742, + "learning_rate": 1.1768095896726603e-05, + "log_odds_chosen": 9.989892959594727, + "log_odds_ratio": -0.00014886785356793553, + "logits/chosen": 0.043805141001939774, + "logits/rejected": -0.012445596978068352, + "logps/chosen": -0.00040336043457500637, + "logps/rejected": -1.742424726486206, + "loss": 0.2857, + "nll_loss": 0.07141406834125519, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0336042729904875e-05, + "rewards/margins": 0.17420212924480438, + "rewards/rejected": -0.17424246668815613, + "step": 11397 + }, + { + "epoch": 7.882434301521439, + "grad_norm": 4.47479772567749, + "learning_rate": 1.1764253880436454e-05, + "log_odds_chosen": 11.427066802978516, + "log_odds_ratio": -4.571495810523629e-05, + "logits/chosen": -0.20953892171382904, + "logits/rejected": -0.324074387550354, + "logps/chosen": -0.00020148059411440045, + "logps/rejected": -2.3615403175354004, + "loss": 0.5508, + "nll_loss": 0.13769517838954926, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0148057956248522e-05, + "rewards/margins": 0.2361339032649994, + "rewards/rejected": -0.23615404963493347, + "step": 11398 + }, + { + "epoch": 7.883125864453666, + "grad_norm": 5.660167694091797, + "learning_rate": 1.1760411864146305e-05, + "log_odds_chosen": 10.79905891418457, + "log_odds_ratio": -0.00018808482855092734, + "logits/chosen": 0.26020288467407227, + "logits/rejected": 0.17786680161952972, + "logps/chosen": -0.0012968253577128053, + "logps/rejected": -2.7913854122161865, + "loss": 0.4628, + "nll_loss": 0.11568672955036163, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012968253577128053, + "rewards/margins": 0.2790088653564453, + "rewards/rejected": -0.27913856506347656, + "step": 11399 + }, + { + "epoch": 7.8838174273858925, + "grad_norm": 5.293552875518799, + "learning_rate": 1.1756569847856155e-05, + "log_odds_chosen": 9.490236282348633, + "log_odds_ratio": -0.0009365877485834062, + "logits/chosen": 0.11345387250185013, + "logits/rejected": -0.07768663018941879, + "logps/chosen": -0.00144152098800987, + "logps/rejected": -2.5067501068115234, + "loss": 0.5639, + "nll_loss": 0.14088603854179382, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014415210171137005, + "rewards/margins": 0.25053083896636963, + "rewards/rejected": -0.2506750226020813, + "step": 11400 + }, + { + "epoch": 7.884508990318119, + "grad_norm": 4.563834190368652, + "learning_rate": 1.1752727831566006e-05, + "log_odds_chosen": 11.245931625366211, + "log_odds_ratio": -2.0148238036199473e-05, + "logits/chosen": -0.014509126543998718, + "logits/rejected": -0.10480161011219025, + "logps/chosen": -0.00014589951024390757, + "logps/rejected": -2.1419601440429688, + "loss": 0.5504, + "nll_loss": 0.13758864998817444, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4589951206289697e-05, + "rewards/margins": 0.21418142318725586, + "rewards/rejected": -0.21419601142406464, + "step": 11401 + }, + { + "epoch": 7.885200553250346, + "grad_norm": 4.083811283111572, + "learning_rate": 1.1748885815275858e-05, + "log_odds_chosen": 9.566080093383789, + "log_odds_ratio": -0.0007488796254619956, + "logits/chosen": -0.2967919707298279, + "logits/rejected": -0.3396698832511902, + "logps/chosen": -0.0015874492237344384, + "logps/rejected": -2.4406490325927734, + "loss": 0.3039, + "nll_loss": 0.07590655982494354, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015874492237344384, + "rewards/margins": 0.243906170129776, + "rewards/rejected": -0.24406489729881287, + "step": 11402 + }, + { + "epoch": 7.885892116182573, + "grad_norm": 5.50595760345459, + "learning_rate": 1.1745043798985708e-05, + "log_odds_chosen": 11.039468765258789, + "log_odds_ratio": -0.0003161336644552648, + "logits/chosen": -0.3856697082519531, + "logits/rejected": -0.3346801698207855, + "logps/chosen": -0.0003978805907536298, + "logps/rejected": -2.7548675537109375, + "loss": 0.67, + "nll_loss": 0.1674737185239792, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9788061258150265e-05, + "rewards/margins": 0.27544695138931274, + "rewards/rejected": -0.2754867672920227, + "step": 11403 + }, + { + "epoch": 7.8865836791148, + "grad_norm": 3.6710848808288574, + "learning_rate": 1.1741201782695558e-05, + "log_odds_chosen": 11.459981918334961, + "log_odds_ratio": -9.545421198708937e-05, + "logits/chosen": -0.16816915571689606, + "logits/rejected": -0.31154415011405945, + "logps/chosen": -0.0001600806281203404, + "logps/rejected": -2.341383934020996, + "loss": 0.5636, + "nll_loss": 0.1408993899822235, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6008063539629802e-05, + "rewards/margins": 0.2341223806142807, + "rewards/rejected": -0.2341383695602417, + "step": 11404 + }, + { + "epoch": 7.887275242047027, + "grad_norm": 4.168887615203857, + "learning_rate": 1.173735976640541e-05, + "log_odds_chosen": 11.432533264160156, + "log_odds_ratio": -3.0281080398708582e-05, + "logits/chosen": -0.03569936007261276, + "logits/rejected": -0.014538850635290146, + "logps/chosen": -0.00017260480672121048, + "logps/rejected": -2.6162784099578857, + "loss": 0.5044, + "nll_loss": 0.1261000633239746, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.726048139971681e-05, + "rewards/margins": 0.26161056756973267, + "rewards/rejected": -0.26162785291671753, + "step": 11405 + }, + { + "epoch": 7.8879668049792535, + "grad_norm": 10.902813911437988, + "learning_rate": 1.1733517750115261e-05, + "log_odds_chosen": 11.470321655273438, + "log_odds_ratio": -1.4454016309173312e-05, + "logits/chosen": 0.037685878574848175, + "logits/rejected": -0.08560698479413986, + "logps/chosen": -0.00012334572966210544, + "logps/rejected": -2.1695070266723633, + "loss": 0.5225, + "nll_loss": 0.13062696158885956, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2334573511907365e-05, + "rewards/margins": 0.21693839132785797, + "rewards/rejected": -0.21695071458816528, + "step": 11406 + }, + { + "epoch": 7.88865836791148, + "grad_norm": 4.911862850189209, + "learning_rate": 1.1729675733825112e-05, + "log_odds_chosen": 11.952144622802734, + "log_odds_ratio": -2.8422375180525705e-05, + "logits/chosen": -0.16157597303390503, + "logits/rejected": -0.2448599487543106, + "logps/chosen": -0.00015069168875925243, + "logps/rejected": -2.9311952590942383, + "loss": 0.4916, + "nll_loss": 0.12288607656955719, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5069169421622064e-05, + "rewards/margins": 0.29310446977615356, + "rewards/rejected": -0.29311954975128174, + "step": 11407 + }, + { + "epoch": 7.889349930843707, + "grad_norm": 3.9067697525024414, + "learning_rate": 1.1725833717534963e-05, + "log_odds_chosen": 11.630581855773926, + "log_odds_ratio": -8.880384848453104e-05, + "logits/chosen": -0.16571418941020966, + "logits/rejected": -0.17225387692451477, + "logps/chosen": -0.00026298040756955743, + "logps/rejected": -3.0806257724761963, + "loss": 0.4761, + "nll_loss": 0.11901277303695679, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6298041120753624e-05, + "rewards/margins": 0.30803629755973816, + "rewards/rejected": -0.3080625832080841, + "step": 11408 + }, + { + "epoch": 7.890041493775934, + "grad_norm": 4.061779499053955, + "learning_rate": 1.1721991701244814e-05, + "log_odds_chosen": 10.276396751403809, + "log_odds_ratio": -0.0003999832842964679, + "logits/chosen": -0.6998578310012817, + "logits/rejected": -0.5479745864868164, + "logps/chosen": -0.00022533259470947087, + "logps/rejected": -1.7632088661193848, + "loss": 0.3925, + "nll_loss": 0.09808278828859329, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2533258743351325e-05, + "rewards/margins": 0.1762983649969101, + "rewards/rejected": -0.176320880651474, + "step": 11409 + }, + { + "epoch": 7.890733056708161, + "grad_norm": 4.482332706451416, + "learning_rate": 1.1718149684954664e-05, + "log_odds_chosen": 11.261518478393555, + "log_odds_ratio": -4.342007377999835e-05, + "logits/chosen": -0.4291597008705139, + "logits/rejected": -0.3953316807746887, + "logps/chosen": -0.0003815246745944023, + "logps/rejected": -2.932671308517456, + "loss": 0.4683, + "nll_loss": 0.1170596033334732, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8152469642227516e-05, + "rewards/margins": 0.29322898387908936, + "rewards/rejected": -0.2932671308517456, + "step": 11410 + }, + { + "epoch": 7.891424619640388, + "grad_norm": 5.705649375915527, + "learning_rate": 1.1714307668664517e-05, + "log_odds_chosen": 12.284934997558594, + "log_odds_ratio": -1.5169678590609692e-05, + "logits/chosen": -0.4257548153400421, + "logits/rejected": -0.39802616834640503, + "logps/chosen": -0.00021122126781847328, + "logps/rejected": -2.713761568069458, + "loss": 0.4794, + "nll_loss": 0.11984308063983917, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1122126781847328e-05, + "rewards/margins": 0.2713550329208374, + "rewards/rejected": -0.2713761627674103, + "step": 11411 + }, + { + "epoch": 7.8921161825726145, + "grad_norm": 6.233856201171875, + "learning_rate": 1.1710465652374366e-05, + "log_odds_chosen": 11.601268768310547, + "log_odds_ratio": -2.2449883545050398e-05, + "logits/chosen": -0.3950972855091095, + "logits/rejected": -0.38946732878685, + "logps/chosen": -9.428364137420431e-05, + "logps/rejected": -2.0098214149475098, + "loss": 0.5088, + "nll_loss": 0.12719620764255524, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.428363227925729e-06, + "rewards/margins": 0.20097270607948303, + "rewards/rejected": -0.20098212361335754, + "step": 11412 + }, + { + "epoch": 7.892807745504841, + "grad_norm": 8.967583656311035, + "learning_rate": 1.1706623636084217e-05, + "log_odds_chosen": 13.042219161987305, + "log_odds_ratio": -4.923728283756645e-06, + "logits/chosen": -0.48502588272094727, + "logits/rejected": -0.5291862487792969, + "logps/chosen": -0.00027994526317343116, + "logps/rejected": -4.361847400665283, + "loss": 0.676, + "nll_loss": 0.16900193691253662, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7994525225949474e-05, + "rewards/margins": 0.4361567497253418, + "rewards/rejected": -0.43618476390838623, + "step": 11413 + }, + { + "epoch": 7.893499308437068, + "grad_norm": 4.976434230804443, + "learning_rate": 1.170278161979407e-05, + "log_odds_chosen": 11.235292434692383, + "log_odds_ratio": -5.498766404343769e-05, + "logits/chosen": -0.390159547328949, + "logits/rejected": -0.4429699182510376, + "logps/chosen": -0.0002683410421013832, + "logps/rejected": -2.61037015914917, + "loss": 0.5119, + "nll_loss": 0.12796220183372498, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.68341045739362e-05, + "rewards/margins": 0.26101019978523254, + "rewards/rejected": -0.26103702187538147, + "step": 11414 + }, + { + "epoch": 7.894190871369295, + "grad_norm": 4.873762130737305, + "learning_rate": 1.169893960350392e-05, + "log_odds_chosen": 11.534000396728516, + "log_odds_ratio": -2.1693673261324875e-05, + "logits/chosen": -0.5807253122329712, + "logits/rejected": -0.6946268677711487, + "logps/chosen": -0.00021547307551372796, + "logps/rejected": -2.3493354320526123, + "loss": 0.5295, + "nll_loss": 0.1323809027671814, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1547308278968558e-05, + "rewards/margins": 0.2349119931459427, + "rewards/rejected": -0.234933540225029, + "step": 11415 + }, + { + "epoch": 7.894882434301522, + "grad_norm": 4.372593879699707, + "learning_rate": 1.169509758721377e-05, + "log_odds_chosen": 11.154789924621582, + "log_odds_ratio": -0.0002964198647532612, + "logits/chosen": -0.18853700160980225, + "logits/rejected": -0.14731237292289734, + "logps/chosen": -0.00035180928534828126, + "logps/rejected": -2.643852710723877, + "loss": 0.4617, + "nll_loss": 0.11539193987846375, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5180928534828126e-05, + "rewards/margins": 0.2643500864505768, + "rewards/rejected": -0.26438528299331665, + "step": 11416 + }, + { + "epoch": 7.895573997233749, + "grad_norm": 13.949653625488281, + "learning_rate": 1.1691255570923621e-05, + "log_odds_chosen": 13.114603042602539, + "log_odds_ratio": -8.687659828865435e-06, + "logits/chosen": -0.3029056191444397, + "logits/rejected": -0.41169899702072144, + "logps/chosen": -7.216949597932398e-05, + "logps/rejected": -3.5470874309539795, + "loss": 0.6499, + "nll_loss": 0.16247348487377167, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.216949597932398e-06, + "rewards/margins": 0.35470154881477356, + "rewards/rejected": -0.3547087609767914, + "step": 11417 + }, + { + "epoch": 7.8962655601659755, + "grad_norm": 5.432498931884766, + "learning_rate": 1.1687413554633472e-05, + "log_odds_chosen": 11.624490737915039, + "log_odds_ratio": -3.6980825825594366e-05, + "logits/chosen": 0.11878293752670288, + "logits/rejected": -0.020380035042762756, + "logps/chosen": -0.00031964771915227175, + "logps/rejected": -2.654505491256714, + "loss": 0.5729, + "nll_loss": 0.1432090550661087, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.196476973243989e-05, + "rewards/margins": 0.2654185891151428, + "rewards/rejected": -0.26545053720474243, + "step": 11418 + }, + { + "epoch": 7.896957123098202, + "grad_norm": 3.285291910171509, + "learning_rate": 1.1683571538343323e-05, + "log_odds_chosen": 11.328643798828125, + "log_odds_ratio": -4.0556675230618566e-05, + "logits/chosen": -0.32337823510169983, + "logits/rejected": -0.32799458503723145, + "logps/chosen": -0.00010209741594735533, + "logps/rejected": -2.072064161300659, + "loss": 0.437, + "nll_loss": 0.1092538833618164, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0209741958533414e-05, + "rewards/margins": 0.2071962058544159, + "rewards/rejected": -0.20720641314983368, + "step": 11419 + }, + { + "epoch": 7.897648686030429, + "grad_norm": 5.5434465408325195, + "learning_rate": 1.1679729522053175e-05, + "log_odds_chosen": 11.171856880187988, + "log_odds_ratio": -6.727038999088109e-05, + "logits/chosen": -0.48523566126823425, + "logits/rejected": -0.4981197714805603, + "logps/chosen": -0.00019483445794321597, + "logps/rejected": -2.566240072250366, + "loss": 0.4962, + "nll_loss": 0.12404817342758179, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9483444702927954e-05, + "rewards/margins": 0.25660449266433716, + "rewards/rejected": -0.2566240131855011, + "step": 11420 + }, + { + "epoch": 7.898340248962656, + "grad_norm": 7.740990161895752, + "learning_rate": 1.1675887505763024e-05, + "log_odds_chosen": 11.712762832641602, + "log_odds_ratio": -2.097015931212809e-05, + "logits/chosen": -0.6795220375061035, + "logits/rejected": -0.6612729430198669, + "logps/chosen": -0.00015862970030866563, + "logps/rejected": -2.8680872917175293, + "loss": 0.4811, + "nll_loss": 0.12026054412126541, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.586297366884537e-05, + "rewards/margins": 0.2867928743362427, + "rewards/rejected": -0.28680872917175293, + "step": 11421 + }, + { + "epoch": 7.899031811894883, + "grad_norm": 4.974034786224365, + "learning_rate": 1.1672045489472875e-05, + "log_odds_chosen": 10.817819595336914, + "log_odds_ratio": -6.758319068467245e-05, + "logits/chosen": -0.35871621966362, + "logits/rejected": -0.3858879804611206, + "logps/chosen": -0.0001772976538632065, + "logps/rejected": -2.009894371032715, + "loss": 0.3896, + "nll_loss": 0.09739303588867188, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.772976429492701e-05, + "rewards/margins": 0.20097172260284424, + "rewards/rejected": -0.20098945498466492, + "step": 11422 + }, + { + "epoch": 7.89972337482711, + "grad_norm": 7.3034210205078125, + "learning_rate": 1.1668203473182728e-05, + "log_odds_chosen": 9.981136322021484, + "log_odds_ratio": -9.297035285271704e-05, + "logits/chosen": -0.38603436946868896, + "logits/rejected": -0.4565512537956238, + "logps/chosen": -0.00041265651816502213, + "logps/rejected": -1.6070398092269897, + "loss": 0.3876, + "nll_loss": 0.09689034521579742, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1265651816502213e-05, + "rewards/margins": 0.1606627255678177, + "rewards/rejected": -0.16070398688316345, + "step": 11423 + }, + { + "epoch": 7.9004149377593365, + "grad_norm": 3.654689073562622, + "learning_rate": 1.1664361456892578e-05, + "log_odds_chosen": 10.258405685424805, + "log_odds_ratio": -0.0008943437715061009, + "logits/chosen": -0.12104588747024536, + "logits/rejected": -0.20898351073265076, + "logps/chosen": -0.008668285794556141, + "logps/rejected": -2.51485013961792, + "loss": 0.3651, + "nll_loss": 0.09118712693452835, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008668285445310175, + "rewards/margins": 0.2506181597709656, + "rewards/rejected": -0.25148501992225647, + "step": 11424 + }, + { + "epoch": 7.901106500691563, + "grad_norm": 4.506947994232178, + "learning_rate": 1.1660519440602429e-05, + "log_odds_chosen": 10.826162338256836, + "log_odds_ratio": -3.588577237678692e-05, + "logits/chosen": -0.24709933996200562, + "logits/rejected": -0.34494447708129883, + "logps/chosen": -0.00024828611640259624, + "logps/rejected": -1.8355225324630737, + "loss": 0.3383, + "nll_loss": 0.08458095788955688, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4828612367855385e-05, + "rewards/margins": 0.18352742493152618, + "rewards/rejected": -0.18355226516723633, + "step": 11425 + }, + { + "epoch": 7.90179806362379, + "grad_norm": 6.3836469650268555, + "learning_rate": 1.165667742431228e-05, + "log_odds_chosen": 10.288522720336914, + "log_odds_ratio": -0.00035393572761677206, + "logits/chosen": -0.7220645546913147, + "logits/rejected": -0.8198657631874084, + "logps/chosen": -0.0012157809687778354, + "logps/rejected": -2.153752565383911, + "loss": 0.5226, + "nll_loss": 0.13060925900936127, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001215781012433581, + "rewards/margins": 0.21525368094444275, + "rewards/rejected": -0.21537525951862335, + "step": 11426 + }, + { + "epoch": 7.902489626556017, + "grad_norm": 3.665544271469116, + "learning_rate": 1.165283540802213e-05, + "log_odds_chosen": 10.943013191223145, + "log_odds_ratio": -8.478549716528505e-05, + "logits/chosen": -0.548704981803894, + "logits/rejected": -0.5903621315956116, + "logps/chosen": -0.00020362951909191906, + "logps/rejected": -2.2660512924194336, + "loss": 0.3336, + "nll_loss": 0.08338891714811325, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0362949726404622e-05, + "rewards/margins": 0.226584792137146, + "rewards/rejected": -0.2266051471233368, + "step": 11427 + }, + { + "epoch": 7.903181189488244, + "grad_norm": 5.852066516876221, + "learning_rate": 1.1648993391731981e-05, + "log_odds_chosen": 10.770855903625488, + "log_odds_ratio": -9.15360651561059e-05, + "logits/chosen": -0.3121373951435089, + "logits/rejected": -0.42780694365501404, + "logps/chosen": -0.0005003748228773475, + "logps/rejected": -2.425102710723877, + "loss": 0.5866, + "nll_loss": 0.14663726091384888, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0037480832543224e-05, + "rewards/margins": 0.2424602508544922, + "rewards/rejected": -0.24251027405261993, + "step": 11428 + }, + { + "epoch": 7.903872752420471, + "grad_norm": 3.679690361022949, + "learning_rate": 1.1645151375441832e-05, + "log_odds_chosen": 10.845489501953125, + "log_odds_ratio": -3.326304431539029e-05, + "logits/chosen": -0.5469524264335632, + "logits/rejected": -0.5827398896217346, + "logps/chosen": -0.00013200710236560553, + "logps/rejected": -1.939971685409546, + "loss": 0.3076, + "nll_loss": 0.07689175009727478, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3200711691752076e-05, + "rewards/margins": 0.19398397207260132, + "rewards/rejected": -0.19399715960025787, + "step": 11429 + }, + { + "epoch": 7.904564315352697, + "grad_norm": 4.049372673034668, + "learning_rate": 1.1641309359151683e-05, + "log_odds_chosen": 11.037715911865234, + "log_odds_ratio": -0.00010179554374190047, + "logits/chosen": -0.06870530545711517, + "logits/rejected": -0.08930937945842743, + "logps/chosen": -0.00024262507213279605, + "logps/rejected": -2.683316230773926, + "loss": 0.43, + "nll_loss": 0.1074962466955185, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4262508304673247e-05, + "rewards/margins": 0.2683073580265045, + "rewards/rejected": -0.2683316171169281, + "step": 11430 + }, + { + "epoch": 7.905255878284924, + "grad_norm": 4.094974994659424, + "learning_rate": 1.1637467342861534e-05, + "log_odds_chosen": 12.618879318237305, + "log_odds_ratio": -7.0655078161507845e-06, + "logits/chosen": -0.16843059659004211, + "logits/rejected": -0.24288104474544525, + "logps/chosen": -9.244784450856969e-05, + "logps/rejected": -2.8020706176757812, + "loss": 0.3937, + "nll_loss": 0.09841499477624893, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.244784450856969e-06, + "rewards/margins": 0.28019779920578003, + "rewards/rejected": -0.2802070379257202, + "step": 11431 + }, + { + "epoch": 7.905947441217151, + "grad_norm": 7.394796848297119, + "learning_rate": 1.1633625326571384e-05, + "log_odds_chosen": 12.214794158935547, + "log_odds_ratio": -5.331547981768381e-06, + "logits/chosen": -0.6335046291351318, + "logits/rejected": -0.6970090866088867, + "logps/chosen": -6.661218503722921e-05, + "logps/rejected": -2.4878671169281006, + "loss": 0.6625, + "nll_loss": 0.1656339168548584, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6612192313186824e-06, + "rewards/margins": 0.24878007173538208, + "rewards/rejected": -0.24878671765327454, + "step": 11432 + }, + { + "epoch": 7.906639004149378, + "grad_norm": 4.621971130371094, + "learning_rate": 1.1629783310281237e-05, + "log_odds_chosen": 11.147924423217773, + "log_odds_ratio": -2.468354068696499e-05, + "logits/chosen": -0.40830671787261963, + "logits/rejected": -0.459160178899765, + "logps/chosen": -0.00010135288175661117, + "logps/rejected": -1.8910176753997803, + "loss": 0.4126, + "nll_loss": 0.1031472310423851, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0135287993762176e-05, + "rewards/margins": 0.18909165263175964, + "rewards/rejected": -0.18910178542137146, + "step": 11433 + }, + { + "epoch": 7.907330567081605, + "grad_norm": 5.314410209655762, + "learning_rate": 1.1625941293991088e-05, + "log_odds_chosen": 10.044839859008789, + "log_odds_ratio": -0.00013832849799655378, + "logits/chosen": -0.4927716851234436, + "logits/rejected": -0.4732247292995453, + "logps/chosen": -0.00022110360441729426, + "logps/rejected": -1.7093604803085327, + "loss": 0.4769, + "nll_loss": 0.11922001838684082, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.211036189692095e-05, + "rewards/margins": 0.1709139347076416, + "rewards/rejected": -0.17093604803085327, + "step": 11434 + }, + { + "epoch": 7.908022130013832, + "grad_norm": 3.985020399093628, + "learning_rate": 1.1622099277700937e-05, + "log_odds_chosen": 11.439200401306152, + "log_odds_ratio": -3.155437298119068e-05, + "logits/chosen": -0.3941129744052887, + "logits/rejected": -0.39420852065086365, + "logps/chosen": -0.0002207858196925372, + "logps/rejected": -2.724975347518921, + "loss": 0.839, + "nll_loss": 0.20973655581474304, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.20785823330516e-05, + "rewards/margins": 0.2724754810333252, + "rewards/rejected": -0.2724975347518921, + "step": 11435 + }, + { + "epoch": 7.908713692946058, + "grad_norm": 5.979078769683838, + "learning_rate": 1.1618257261410789e-05, + "log_odds_chosen": 11.143678665161133, + "log_odds_ratio": -3.0088162020547315e-05, + "logits/chosen": -0.5770678520202637, + "logits/rejected": -0.6415504813194275, + "logps/chosen": -8.087012975011021e-05, + "logps/rejected": -1.7887535095214844, + "loss": 0.438, + "nll_loss": 0.10950732231140137, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.08701224741526e-06, + "rewards/margins": 0.17886726558208466, + "rewards/rejected": -0.17887535691261292, + "step": 11436 + }, + { + "epoch": 7.909405255878285, + "grad_norm": 3.349957227706909, + "learning_rate": 1.161441524512064e-05, + "log_odds_chosen": 10.178857803344727, + "log_odds_ratio": -0.0002570028882473707, + "logits/chosen": -0.3043663799762726, + "logits/rejected": -0.26984646916389465, + "logps/chosen": -0.0008028277661651373, + "logps/rejected": -1.7114442586898804, + "loss": 0.2807, + "nll_loss": 0.07015819102525711, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.028277079574764e-05, + "rewards/margins": 0.17106413841247559, + "rewards/rejected": -0.17114444077014923, + "step": 11437 + }, + { + "epoch": 7.910096818810512, + "grad_norm": 10.283522605895996, + "learning_rate": 1.161057322883049e-05, + "log_odds_chosen": 9.67963981628418, + "log_odds_ratio": -0.0006223957170732319, + "logits/chosen": -0.5854220986366272, + "logits/rejected": -0.598517894744873, + "logps/chosen": -0.0002304011140950024, + "logps/rejected": -0.9623869061470032, + "loss": 0.4476, + "nll_loss": 0.11184649169445038, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3040109226712957e-05, + "rewards/margins": 0.0962156429886818, + "rewards/rejected": -0.09623868018388748, + "step": 11438 + }, + { + "epoch": 7.910788381742739, + "grad_norm": 7.8791913986206055, + "learning_rate": 1.1606731212540341e-05, + "log_odds_chosen": 11.597796440124512, + "log_odds_ratio": -2.790990583889652e-05, + "logits/chosen": -0.503038763999939, + "logits/rejected": -0.5723360776901245, + "logps/chosen": -9.536659490549937e-05, + "logps/rejected": -2.3894646167755127, + "loss": 0.5268, + "nll_loss": 0.13169816136360168, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.536659490549937e-06, + "rewards/margins": 0.2389369159936905, + "rewards/rejected": -0.23894645273685455, + "step": 11439 + }, + { + "epoch": 7.911479944674966, + "grad_norm": 3.3033556938171387, + "learning_rate": 1.1602889196250192e-05, + "log_odds_chosen": 10.922914505004883, + "log_odds_ratio": -3.322264092275873e-05, + "logits/chosen": -0.14358104765415192, + "logits/rejected": -0.15931063890457153, + "logps/chosen": -0.00041416287422180176, + "logps/rejected": -2.0859086513519287, + "loss": 0.4858, + "nll_loss": 0.12145288288593292, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.141628960496746e-05, + "rewards/margins": 0.20854943990707397, + "rewards/rejected": -0.20859088003635406, + "step": 11440 + }, + { + "epoch": 7.912171507607193, + "grad_norm": 3.984626293182373, + "learning_rate": 1.1599047179960043e-05, + "log_odds_chosen": 10.762500762939453, + "log_odds_ratio": -0.00010356571146985516, + "logits/chosen": -0.728722333908081, + "logits/rejected": -0.72029709815979, + "logps/chosen": -0.0002596940321382135, + "logps/rejected": -1.7457057237625122, + "loss": 0.4164, + "nll_loss": 0.10409042239189148, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.596940248622559e-05, + "rewards/margins": 0.17454461753368378, + "rewards/rejected": -0.17457059025764465, + "step": 11441 + }, + { + "epoch": 7.912863070539419, + "grad_norm": 4.8249006271362305, + "learning_rate": 1.1595205163669895e-05, + "log_odds_chosen": 11.676660537719727, + "log_odds_ratio": -5.567781408899464e-05, + "logits/chosen": -0.23649048805236816, + "logits/rejected": -0.27683085203170776, + "logps/chosen": -0.0002755652240011841, + "logps/rejected": -3.1232707500457764, + "loss": 0.5273, + "nll_loss": 0.13182684779167175, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.755652167252265e-05, + "rewards/margins": 0.31229954957962036, + "rewards/rejected": -0.3123270869255066, + "step": 11442 + }, + { + "epoch": 7.913554633471646, + "grad_norm": 6.4415669441223145, + "learning_rate": 1.1591363147379746e-05, + "log_odds_chosen": 10.647878646850586, + "log_odds_ratio": -9.721822425490245e-05, + "logits/chosen": -0.35493314266204834, + "logits/rejected": -0.2665443420410156, + "logps/chosen": -0.00035465031396597624, + "logps/rejected": -2.031789541244507, + "loss": 0.4099, + "nll_loss": 0.10246631503105164, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.546503285178915e-05, + "rewards/margins": 0.20314347743988037, + "rewards/rejected": -0.20317894220352173, + "step": 11443 + }, + { + "epoch": 7.914246196403873, + "grad_norm": 3.6676669120788574, + "learning_rate": 1.1587521131089597e-05, + "log_odds_chosen": 11.052009582519531, + "log_odds_ratio": -0.00010791603563120589, + "logits/chosen": -0.5329593420028687, + "logits/rejected": -0.4889289140701294, + "logps/chosen": -0.0007934218156151474, + "logps/rejected": -2.691516637802124, + "loss": 0.4453, + "nll_loss": 0.11130984127521515, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.934217865113169e-05, + "rewards/margins": 0.26907235383987427, + "rewards/rejected": -0.2691516876220703, + "step": 11444 + }, + { + "epoch": 7.9149377593361, + "grad_norm": 2.9632012844085693, + "learning_rate": 1.1583679114799447e-05, + "log_odds_chosen": 10.19129753112793, + "log_odds_ratio": -0.00015468697529286146, + "logits/chosen": -0.7022165060043335, + "logits/rejected": -0.6874955892562866, + "logps/chosen": -0.00013341064914129674, + "logps/rejected": -1.3488829135894775, + "loss": 0.4193, + "nll_loss": 0.10479914397001266, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3341065823624376e-05, + "rewards/margins": 0.13487495481967926, + "rewards/rejected": -0.13488830626010895, + "step": 11445 + }, + { + "epoch": 7.915629322268327, + "grad_norm": 4.115353584289551, + "learning_rate": 1.1579837098509298e-05, + "log_odds_chosen": 11.92996883392334, + "log_odds_ratio": -0.00016364931070711464, + "logits/chosen": 0.059397876262664795, + "logits/rejected": -0.04412535950541496, + "logps/chosen": -0.00012382738350424916, + "logps/rejected": -2.920637369155884, + "loss": 0.6305, + "nll_loss": 0.15759626030921936, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2382738532323856e-05, + "rewards/margins": 0.2920513451099396, + "rewards/rejected": -0.29206374287605286, + "step": 11446 + }, + { + "epoch": 7.9163208852005535, + "grad_norm": 4.740466117858887, + "learning_rate": 1.1575995082219149e-05, + "log_odds_chosen": 10.352497100830078, + "log_odds_ratio": -0.00018813650240190327, + "logits/chosen": 0.11943431943655014, + "logits/rejected": 0.10199232399463654, + "logps/chosen": -0.0015034006210044026, + "logps/rejected": -2.6411702632904053, + "loss": 0.5293, + "nll_loss": 0.13230976462364197, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015034008538350463, + "rewards/margins": 0.26396670937538147, + "rewards/rejected": -0.264117032289505, + "step": 11447 + }, + { + "epoch": 7.91701244813278, + "grad_norm": 4.00980806350708, + "learning_rate": 1.1572153065929001e-05, + "log_odds_chosen": 11.186810493469238, + "log_odds_ratio": -0.00021705195831600577, + "logits/chosen": -0.3363361656665802, + "logits/rejected": -0.3838905096054077, + "logps/chosen": -0.0002448104496579617, + "logps/rejected": -2.4751431941986084, + "loss": 0.3961, + "nll_loss": 0.09899172931909561, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4481045329594053e-05, + "rewards/margins": 0.2474898397922516, + "rewards/rejected": -0.24751430749893188, + "step": 11448 + }, + { + "epoch": 7.917704011065007, + "grad_norm": 5.31357479095459, + "learning_rate": 1.156831104963885e-05, + "log_odds_chosen": 10.24382495880127, + "log_odds_ratio": -0.00011630626977421343, + "logits/chosen": -0.18191811442375183, + "logits/rejected": -0.13572078943252563, + "logps/chosen": -0.0009642197983339429, + "logps/rejected": -2.7066733837127686, + "loss": 0.6845, + "nll_loss": 0.1711220145225525, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.64219871093519e-05, + "rewards/margins": 0.27057090401649475, + "rewards/rejected": -0.27066731452941895, + "step": 11449 + }, + { + "epoch": 7.918395573997234, + "grad_norm": 7.6785197257995605, + "learning_rate": 1.1564469033348701e-05, + "log_odds_chosen": 10.965532302856445, + "log_odds_ratio": -3.787028981605545e-05, + "logits/chosen": -0.17806333303451538, + "logits/rejected": -0.3148902654647827, + "logps/chosen": -0.00010926988761639223, + "logps/rejected": -1.8761992454528809, + "loss": 0.2852, + "nll_loss": 0.07128973305225372, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0926989489234984e-05, + "rewards/margins": 0.1876089870929718, + "rewards/rejected": -0.18761992454528809, + "step": 11450 + }, + { + "epoch": 7.919087136929461, + "grad_norm": 6.197747707366943, + "learning_rate": 1.1560627017058554e-05, + "log_odds_chosen": 11.333625793457031, + "log_odds_ratio": -9.571119153406471e-05, + "logits/chosen": -0.09422504901885986, + "logits/rejected": -0.2055082619190216, + "logps/chosen": -0.00024049320199992508, + "logps/rejected": -2.4040274620056152, + "loss": 0.4777, + "nll_loss": 0.11942285299301147, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.404932092758827e-05, + "rewards/margins": 0.24037869274616241, + "rewards/rejected": -0.24040274322032928, + "step": 11451 + }, + { + "epoch": 7.919778699861688, + "grad_norm": 13.796125411987305, + "learning_rate": 1.1556785000768404e-05, + "log_odds_chosen": 11.29486083984375, + "log_odds_ratio": -7.908118277555332e-05, + "logits/chosen": -0.1983284056186676, + "logits/rejected": -0.22550822794437408, + "logps/chosen": -0.00020663285977207124, + "logps/rejected": -2.398308038711548, + "loss": 0.4954, + "nll_loss": 0.12384440749883652, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.066328306682408e-05, + "rewards/margins": 0.23981015384197235, + "rewards/rejected": -0.23983082175254822, + "step": 11452 + }, + { + "epoch": 7.9204702627939145, + "grad_norm": 6.0768208503723145, + "learning_rate": 1.1552942984478255e-05, + "log_odds_chosen": 12.187479019165039, + "log_odds_ratio": -2.8749112971127033e-05, + "logits/chosen": -0.4303026497364044, + "logits/rejected": -0.3580453097820282, + "logps/chosen": -0.00015580881154164672, + "logps/rejected": -2.984548568725586, + "loss": 0.5069, + "nll_loss": 0.12673157453536987, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5580882973154075e-05, + "rewards/margins": 0.29843926429748535, + "rewards/rejected": -0.29845482110977173, + "step": 11453 + }, + { + "epoch": 7.921161825726141, + "grad_norm": 6.099323749542236, + "learning_rate": 1.1549100968188106e-05, + "log_odds_chosen": 10.10976791381836, + "log_odds_ratio": -9.947900980478153e-05, + "logits/chosen": -0.38822758197784424, + "logits/rejected": -0.3949180841445923, + "logps/chosen": -0.0005124437739141285, + "logps/rejected": -1.9129855632781982, + "loss": 0.3069, + "nll_loss": 0.07670585811138153, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1244373025838286e-05, + "rewards/margins": 0.1912473440170288, + "rewards/rejected": -0.19129857420921326, + "step": 11454 + }, + { + "epoch": 7.921853388658368, + "grad_norm": 5.255620956420898, + "learning_rate": 1.1545258951897957e-05, + "log_odds_chosen": 10.686015129089355, + "log_odds_ratio": -0.00011767195246648043, + "logits/chosen": -0.05838357284665108, + "logits/rejected": 0.05113779753446579, + "logps/chosen": -0.0008657700382173061, + "logps/rejected": -2.244718074798584, + "loss": 0.6678, + "nll_loss": 0.16692709922790527, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.657699800096452e-05, + "rewards/margins": 0.22438523173332214, + "rewards/rejected": -0.2244718074798584, + "step": 11455 + }, + { + "epoch": 7.922544951590595, + "grad_norm": 4.817359447479248, + "learning_rate": 1.1541416935607807e-05, + "log_odds_chosen": 9.26960563659668, + "log_odds_ratio": -0.0009645846439525485, + "logits/chosen": -0.24357624351978302, + "logits/rejected": -0.23238223791122437, + "logps/chosen": -0.008362173102796078, + "logps/rejected": -1.7482798099517822, + "loss": 0.7388, + "nll_loss": 0.18461009860038757, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008362172520719469, + "rewards/margins": 0.17399176955223083, + "rewards/rejected": -0.17482797801494598, + "step": 11456 + }, + { + "epoch": 7.923236514522822, + "grad_norm": 5.3235673904418945, + "learning_rate": 1.153757491931766e-05, + "log_odds_chosen": 10.89983081817627, + "log_odds_ratio": -0.00013438466703519225, + "logits/chosen": -0.2583634853363037, + "logits/rejected": -0.10850981622934341, + "logps/chosen": -0.00022484775399789214, + "logps/rejected": -2.3494505882263184, + "loss": 0.4482, + "nll_loss": 0.11204710602760315, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2484775399789214e-05, + "rewards/margins": 0.2349225878715515, + "rewards/rejected": -0.23494507372379303, + "step": 11457 + }, + { + "epoch": 7.923928077455049, + "grad_norm": 6.07865047454834, + "learning_rate": 1.1533732903027509e-05, + "log_odds_chosen": 11.569040298461914, + "log_odds_ratio": -2.4782182663329877e-05, + "logits/chosen": -0.3904721140861511, + "logits/rejected": -0.507940411567688, + "logps/chosen": -0.00013616510841529816, + "logps/rejected": -2.530514717102051, + "loss": 0.6587, + "nll_loss": 0.16466355323791504, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3616510841529816e-05, + "rewards/margins": 0.25303786993026733, + "rewards/rejected": -0.2530514597892761, + "step": 11458 + }, + { + "epoch": 7.9246196403872755, + "grad_norm": 8.31247615814209, + "learning_rate": 1.152989088673736e-05, + "log_odds_chosen": 11.501436233520508, + "log_odds_ratio": -2.56904440902872e-05, + "logits/chosen": -0.6161126494407654, + "logits/rejected": -0.640153169631958, + "logps/chosen": -0.00013021615450270474, + "logps/rejected": -2.619170665740967, + "loss": 0.4468, + "nll_loss": 0.11169376969337463, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3021613995078951e-05, + "rewards/margins": 0.2619040310382843, + "rewards/rejected": -0.2619170546531677, + "step": 11459 + }, + { + "epoch": 7.925311203319502, + "grad_norm": 6.786872386932373, + "learning_rate": 1.1526048870447212e-05, + "log_odds_chosen": 10.474227905273438, + "log_odds_ratio": -6.774486973881721e-05, + "logits/chosen": -0.0007668398320674896, + "logits/rejected": -0.2563415765762329, + "logps/chosen": -0.00016562627570237964, + "logps/rejected": -1.8405344486236572, + "loss": 0.3454, + "nll_loss": 0.08634229004383087, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6562627934035845e-05, + "rewards/margins": 0.18403686583042145, + "rewards/rejected": -0.184053435921669, + "step": 11460 + }, + { + "epoch": 7.926002766251729, + "grad_norm": 3.9103434085845947, + "learning_rate": 1.1522206854157063e-05, + "log_odds_chosen": 11.24027156829834, + "log_odds_ratio": -9.854403469944373e-05, + "logits/chosen": -0.39807748794555664, + "logits/rejected": -0.4943687915802002, + "logps/chosen": -0.00030215978040359914, + "logps/rejected": -2.5182738304138184, + "loss": 0.4634, + "nll_loss": 0.11585089564323425, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.021598058694508e-05, + "rewards/margins": 0.2517971694469452, + "rewards/rejected": -0.2518273890018463, + "step": 11461 + }, + { + "epoch": 7.926694329183956, + "grad_norm": 4.208470344543457, + "learning_rate": 1.1518364837866914e-05, + "log_odds_chosen": 11.54410171508789, + "log_odds_ratio": -1.839596006902866e-05, + "logits/chosen": -0.344673216342926, + "logits/rejected": -0.4512706995010376, + "logps/chosen": -0.00018507882487028837, + "logps/rejected": -2.8429622650146484, + "loss": 0.5383, + "nll_loss": 0.13457489013671875, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8507882487028837e-05, + "rewards/margins": 0.2842777371406555, + "rewards/rejected": -0.2842962443828583, + "step": 11462 + }, + { + "epoch": 7.927385892116183, + "grad_norm": 6.069661617279053, + "learning_rate": 1.1514522821576763e-05, + "log_odds_chosen": 11.567630767822266, + "log_odds_ratio": -3.671016020234674e-05, + "logits/chosen": 0.17913606762886047, + "logits/rejected": 0.18328267335891724, + "logps/chosen": -0.00011550368799362332, + "logps/rejected": -2.375983238220215, + "loss": 0.3726, + "nll_loss": 0.09314057976007462, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1550368981261272e-05, + "rewards/margins": 0.23758679628372192, + "rewards/rejected": -0.23759834468364716, + "step": 11463 + }, + { + "epoch": 7.92807745504841, + "grad_norm": 7.161730766296387, + "learning_rate": 1.1510680805286615e-05, + "log_odds_chosen": 13.012365341186523, + "log_odds_ratio": -6.403481165762059e-06, + "logits/chosen": -0.21157574653625488, + "logits/rejected": -0.28957462310791016, + "logps/chosen": -7.955124601721764e-05, + "logps/rejected": -3.544642925262451, + "loss": 0.53, + "nll_loss": 0.13249525427818298, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.955125511216465e-06, + "rewards/margins": 0.3544563353061676, + "rewards/rejected": -0.3544643223285675, + "step": 11464 + }, + { + "epoch": 7.9287690179806365, + "grad_norm": 4.447282314300537, + "learning_rate": 1.1506838788996466e-05, + "log_odds_chosen": 10.233051300048828, + "log_odds_ratio": -7.13659028406255e-05, + "logits/chosen": -0.11875317990779877, + "logits/rejected": -0.3084731101989746, + "logps/chosen": -0.00024481149739585817, + "logps/rejected": -1.5526336431503296, + "loss": 0.5617, + "nll_loss": 0.14041319489479065, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.448115083097946e-05, + "rewards/margins": 0.15523889660835266, + "rewards/rejected": -0.15526337921619415, + "step": 11465 + }, + { + "epoch": 7.929460580912863, + "grad_norm": 4.517125129699707, + "learning_rate": 1.1502996772706317e-05, + "log_odds_chosen": 10.96858024597168, + "log_odds_ratio": -5.081095878267661e-05, + "logits/chosen": -0.31112051010131836, + "logits/rejected": -0.3540710210800171, + "logps/chosen": -0.0004644246364478022, + "logps/rejected": -2.4904298782348633, + "loss": 0.4102, + "nll_loss": 0.10255465656518936, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.644246291718446e-05, + "rewards/margins": 0.2489965558052063, + "rewards/rejected": -0.24904300272464752, + "step": 11466 + }, + { + "epoch": 7.93015214384509, + "grad_norm": 6.432483196258545, + "learning_rate": 1.1499154756416167e-05, + "log_odds_chosen": 10.742692947387695, + "log_odds_ratio": -0.00017364558880217373, + "logits/chosen": -0.46216145157814026, + "logits/rejected": -0.44457200169563293, + "logps/chosen": -0.00041976282955147326, + "logps/rejected": -2.02523136138916, + "loss": 0.6607, + "nll_loss": 0.16514545679092407, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1976287320721895e-05, + "rewards/margins": 0.20248116552829742, + "rewards/rejected": -0.2025231420993805, + "step": 11467 + }, + { + "epoch": 7.930843706777317, + "grad_norm": 4.329329967498779, + "learning_rate": 1.1495312740126018e-05, + "log_odds_chosen": 11.087435722351074, + "log_odds_ratio": -3.9717786421533674e-05, + "logits/chosen": -0.37949714064598083, + "logits/rejected": -0.4751831293106079, + "logps/chosen": -0.00022894766880199313, + "logps/rejected": -2.1983184814453125, + "loss": 0.4495, + "nll_loss": 0.11236318945884705, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.289476469741203e-05, + "rewards/margins": 0.21980898082256317, + "rewards/rejected": -0.21983186900615692, + "step": 11468 + }, + { + "epoch": 7.931535269709544, + "grad_norm": 4.523159503936768, + "learning_rate": 1.1491470723835869e-05, + "log_odds_chosen": 11.431248664855957, + "log_odds_ratio": -4.009368785773404e-05, + "logits/chosen": -0.3916298747062683, + "logits/rejected": -0.44648686051368713, + "logps/chosen": -0.00023831524595152587, + "logps/rejected": -2.716752529144287, + "loss": 0.39, + "nll_loss": 0.09749448299407959, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.383152605034411e-05, + "rewards/margins": 0.27165141701698303, + "rewards/rejected": -0.2716752588748932, + "step": 11469 + }, + { + "epoch": 7.932226832641771, + "grad_norm": 3.75484037399292, + "learning_rate": 1.1487628707545721e-05, + "log_odds_chosen": 11.424932479858398, + "log_odds_ratio": -9.248127753380686e-05, + "logits/chosen": -0.4830518662929535, + "logits/rejected": -0.5308735370635986, + "logps/chosen": -0.00013168362784199417, + "logps/rejected": -2.41013765335083, + "loss": 0.3641, + "nll_loss": 0.09101329743862152, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3168361874704715e-05, + "rewards/margins": 0.24100060760974884, + "rewards/rejected": -0.241013765335083, + "step": 11470 + }, + { + "epoch": 7.9329183955739975, + "grad_norm": 3.4767870903015137, + "learning_rate": 1.1483786691255572e-05, + "log_odds_chosen": 11.029151916503906, + "log_odds_ratio": -8.472451736452058e-05, + "logits/chosen": -0.412309855222702, + "logits/rejected": -0.39536869525909424, + "logps/chosen": -0.000157872709678486, + "logps/rejected": -1.6841447353363037, + "loss": 0.2727, + "nll_loss": 0.06816992908716202, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5787269148859195e-05, + "rewards/margins": 0.1683986932039261, + "rewards/rejected": -0.16841447353363037, + "step": 11471 + }, + { + "epoch": 7.933609958506224, + "grad_norm": 4.748891830444336, + "learning_rate": 1.1479944674965421e-05, + "log_odds_chosen": 11.620475769042969, + "log_odds_ratio": -3.167948671034537e-05, + "logits/chosen": -0.4358367323875427, + "logits/rejected": -0.48927396535873413, + "logps/chosen": -0.00023318035528063774, + "logps/rejected": -3.058469533920288, + "loss": 0.8373, + "nll_loss": 0.2093205600976944, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.331803807464894e-05, + "rewards/margins": 0.3058236241340637, + "rewards/rejected": -0.3058469593524933, + "step": 11472 + }, + { + "epoch": 7.934301521438451, + "grad_norm": 3.9937808513641357, + "learning_rate": 1.1476102658675274e-05, + "log_odds_chosen": 11.555363655090332, + "log_odds_ratio": -1.470993083785288e-05, + "logits/chosen": 0.05103334039449692, + "logits/rejected": 0.05239225924015045, + "logps/chosen": -0.0001619577524252236, + "logps/rejected": -2.7972123622894287, + "loss": 0.5296, + "nll_loss": 0.13239486515522003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6195774151128717e-05, + "rewards/margins": 0.2797050178050995, + "rewards/rejected": -0.279721200466156, + "step": 11473 + }, + { + "epoch": 7.934993084370678, + "grad_norm": 3.7396841049194336, + "learning_rate": 1.1472260642385124e-05, + "log_odds_chosen": 11.454257011413574, + "log_odds_ratio": -0.00012060473090969026, + "logits/chosen": 0.004928797483444214, + "logits/rejected": -0.13088713586330414, + "logps/chosen": -0.00043558087782002985, + "logps/rejected": -3.108194351196289, + "loss": 0.4087, + "nll_loss": 0.10216562449932098, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.355808414402418e-05, + "rewards/margins": 0.31077584624290466, + "rewards/rejected": -0.3108194172382355, + "step": 11474 + }, + { + "epoch": 7.935684647302905, + "grad_norm": 6.406213760375977, + "learning_rate": 1.1468418626094975e-05, + "log_odds_chosen": 10.826868057250977, + "log_odds_ratio": -3.152354838675819e-05, + "logits/chosen": -0.38109397888183594, + "logits/rejected": -0.46262824535369873, + "logps/chosen": -0.0002834878396242857, + "logps/rejected": -2.1760125160217285, + "loss": 0.6058, + "nll_loss": 0.1514561027288437, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8348786145215854e-05, + "rewards/margins": 0.21757292747497559, + "rewards/rejected": -0.21760126948356628, + "step": 11475 + }, + { + "epoch": 7.936376210235132, + "grad_norm": 4.276910305023193, + "learning_rate": 1.1464576609804826e-05, + "log_odds_chosen": 11.250327110290527, + "log_odds_ratio": -3.175999518134631e-05, + "logits/chosen": -0.11398166418075562, + "logits/rejected": -0.13943475484848022, + "logps/chosen": -0.00012849734048359096, + "logps/rejected": -2.0981101989746094, + "loss": 0.354, + "nll_loss": 0.08849801123142242, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2849733138864394e-05, + "rewards/margins": 0.2097981572151184, + "rewards/rejected": -0.2098110169172287, + "step": 11476 + }, + { + "epoch": 7.9370677731673585, + "grad_norm": 4.719145774841309, + "learning_rate": 1.1460734593514677e-05, + "log_odds_chosen": 11.08926773071289, + "log_odds_ratio": -2.3812872314010747e-05, + "logits/chosen": -0.00807216577231884, + "logits/rejected": -0.039079755544662476, + "logps/chosen": -0.00039097192347981036, + "logps/rejected": -2.996049404144287, + "loss": 0.853, + "nll_loss": 0.21324507892131805, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.909719453076832e-05, + "rewards/margins": 0.2995658218860626, + "rewards/rejected": -0.2996049225330353, + "step": 11477 + }, + { + "epoch": 7.937759336099585, + "grad_norm": 16.17727279663086, + "learning_rate": 1.1456892577224527e-05, + "log_odds_chosen": 10.801301002502441, + "log_odds_ratio": -5.282912024995312e-05, + "logits/chosen": -0.3326207399368286, + "logits/rejected": -0.3832235634326935, + "logps/chosen": -0.0002026814327109605, + "logps/rejected": -2.0088541507720947, + "loss": 0.4545, + "nll_loss": 0.11361575871706009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0268143998691812e-05, + "rewards/margins": 0.20086514949798584, + "rewards/rejected": -0.20088540017604828, + "step": 11478 + }, + { + "epoch": 7.938450899031812, + "grad_norm": 5.010890007019043, + "learning_rate": 1.145305056093438e-05, + "log_odds_chosen": 9.897591590881348, + "log_odds_ratio": -0.0007988571305759251, + "logits/chosen": -0.5147840976715088, + "logits/rejected": -0.4357095956802368, + "logps/chosen": -0.0008524827426299453, + "logps/rejected": -2.4883193969726562, + "loss": 0.6784, + "nll_loss": 0.16951148211956024, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.524827717337757e-05, + "rewards/margins": 0.24874672293663025, + "rewards/rejected": -0.24883195757865906, + "step": 11479 + }, + { + "epoch": 7.939142461964039, + "grad_norm": 4.151998996734619, + "learning_rate": 1.144920854464423e-05, + "log_odds_chosen": 9.914112091064453, + "log_odds_ratio": -0.0006920411833561957, + "logits/chosen": -0.42303866147994995, + "logits/rejected": -0.44322729110717773, + "logps/chosen": -0.0007232284406200051, + "logps/rejected": -1.6944925785064697, + "loss": 0.4771, + "nll_loss": 0.11921073496341705, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.232284406200051e-05, + "rewards/margins": 0.169376939535141, + "rewards/rejected": -0.16944925487041473, + "step": 11480 + }, + { + "epoch": 7.939834024896266, + "grad_norm": 4.5900702476501465, + "learning_rate": 1.144536652835408e-05, + "log_odds_chosen": 11.515774726867676, + "log_odds_ratio": -0.0001623444986762479, + "logits/chosen": -0.46452170610427856, + "logits/rejected": -0.5852384567260742, + "logps/chosen": -0.0002695045550353825, + "logps/rejected": -3.310964584350586, + "loss": 0.4925, + "nll_loss": 0.12309806793928146, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.695045441214461e-05, + "rewards/margins": 0.3310695290565491, + "rewards/rejected": -0.33109647035598755, + "step": 11481 + }, + { + "epoch": 7.940525587828493, + "grad_norm": 4.539413928985596, + "learning_rate": 1.1441524512063932e-05, + "log_odds_chosen": 12.16585922241211, + "log_odds_ratio": -3.725421993294731e-05, + "logits/chosen": -0.1982676088809967, + "logits/rejected": -0.30366623401641846, + "logps/chosen": -0.0003364937729202211, + "logps/rejected": -3.962510108947754, + "loss": 0.5444, + "nll_loss": 0.13608859479427338, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3649375836830586e-05, + "rewards/margins": 0.39621734619140625, + "rewards/rejected": -0.39625105261802673, + "step": 11482 + }, + { + "epoch": 7.941217150760719, + "grad_norm": 5.325148105621338, + "learning_rate": 1.1437682495773783e-05, + "log_odds_chosen": 10.951173782348633, + "log_odds_ratio": -3.835307143162936e-05, + "logits/chosen": -0.5989179015159607, + "logits/rejected": -0.6143508553504944, + "logps/chosen": -0.0007528235437348485, + "logps/rejected": -2.59660005569458, + "loss": 0.7433, + "nll_loss": 0.18583013117313385, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.528235437348485e-05, + "rewards/margins": 0.2595847249031067, + "rewards/rejected": -0.259660005569458, + "step": 11483 + }, + { + "epoch": 7.941908713692946, + "grad_norm": 3.9619648456573486, + "learning_rate": 1.1433840479483633e-05, + "log_odds_chosen": 10.706184387207031, + "log_odds_ratio": -7.730752258794382e-05, + "logits/chosen": -0.5238598585128784, + "logits/rejected": -0.5505596995353699, + "logps/chosen": -0.0002677328302524984, + "logps/rejected": -1.9348207712173462, + "loss": 0.3211, + "nll_loss": 0.08026138693094254, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6773284844239242e-05, + "rewards/margins": 0.19345532357692719, + "rewards/rejected": -0.19348207116127014, + "step": 11484 + }, + { + "epoch": 7.942600276625173, + "grad_norm": 3.4554922580718994, + "learning_rate": 1.1429998463193484e-05, + "log_odds_chosen": 10.798149108886719, + "log_odds_ratio": -7.70141341490671e-05, + "logits/chosen": -0.0674898773431778, + "logits/rejected": -0.22109772264957428, + "logps/chosen": -0.00019740698917303234, + "logps/rejected": -1.794944405555725, + "loss": 0.3689, + "nll_loss": 0.09222354739904404, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9740698917303234e-05, + "rewards/margins": 0.17947471141815186, + "rewards/rejected": -0.1794944405555725, + "step": 11485 + }, + { + "epoch": 7.9432918395574, + "grad_norm": 10.894437789916992, + "learning_rate": 1.1426156446903335e-05, + "log_odds_chosen": 11.126977920532227, + "log_odds_ratio": -2.2043872377253138e-05, + "logits/chosen": 0.04185080528259277, + "logits/rejected": -0.05076335370540619, + "logps/chosen": -0.0001953870232682675, + "logps/rejected": -2.524343252182007, + "loss": 0.5484, + "nll_loss": 0.13709130883216858, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9538700144039467e-05, + "rewards/margins": 0.2524147927761078, + "rewards/rejected": -0.25243431329727173, + "step": 11486 + }, + { + "epoch": 7.943983402489627, + "grad_norm": 3.130251407623291, + "learning_rate": 1.1422314430613186e-05, + "log_odds_chosen": 11.048722267150879, + "log_odds_ratio": -2.3887419956736267e-05, + "logits/chosen": 0.43735024333000183, + "logits/rejected": 0.42700478434562683, + "logps/chosen": -0.00016354123363271356, + "logps/rejected": -2.004218816757202, + "loss": 0.3407, + "nll_loss": 0.08516620099544525, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6354124454664998e-05, + "rewards/margins": 0.2004055380821228, + "rewards/rejected": -0.20042188465595245, + "step": 11487 + }, + { + "epoch": 7.944674965421854, + "grad_norm": 3.934866428375244, + "learning_rate": 1.1418472414323038e-05, + "log_odds_chosen": 11.085222244262695, + "log_odds_ratio": -4.9376852985005826e-05, + "logits/chosen": -0.007519755512475967, + "logits/rejected": -0.07623255252838135, + "logps/chosen": -9.085766214411706e-05, + "logps/rejected": -2.0956532955169678, + "loss": 0.4641, + "nll_loss": 0.11601592600345612, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.085766578209586e-06, + "rewards/margins": 0.20955625176429749, + "rewards/rejected": -0.20956535637378693, + "step": 11488 + }, + { + "epoch": 7.94536652835408, + "grad_norm": 3.9306583404541016, + "learning_rate": 1.1414630398032889e-05, + "log_odds_chosen": 9.938104629516602, + "log_odds_ratio": -0.00024783535627648234, + "logits/chosen": -0.18199634552001953, + "logits/rejected": -0.19213847815990448, + "logps/chosen": -0.000775124819483608, + "logps/rejected": -1.7354786396026611, + "loss": 0.4551, + "nll_loss": 0.11375893652439117, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.751248631393537e-05, + "rewards/margins": 0.17347033321857452, + "rewards/rejected": -0.17354784905910492, + "step": 11489 + }, + { + "epoch": 7.946058091286307, + "grad_norm": 5.256687164306641, + "learning_rate": 1.141078838174274e-05, + "log_odds_chosen": 11.401453018188477, + "log_odds_ratio": -2.7039030101150274e-05, + "logits/chosen": -0.0590481162071228, + "logits/rejected": -0.1423584222793579, + "logps/chosen": -0.00030657826573587954, + "logps/rejected": -2.694973945617676, + "loss": 0.498, + "nll_loss": 0.12448520213365555, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0657829483971e-05, + "rewards/margins": 0.269466757774353, + "rewards/rejected": -0.2694973945617676, + "step": 11490 + }, + { + "epoch": 7.946749654218534, + "grad_norm": 4.1328582763671875, + "learning_rate": 1.140694636545259e-05, + "log_odds_chosen": 9.960987091064453, + "log_odds_ratio": -9.83233330771327e-05, + "logits/chosen": -0.2254234403371811, + "logits/rejected": -0.3161728084087372, + "logps/chosen": -0.0005620485171675682, + "logps/rejected": -1.6886545419692993, + "loss": 0.3165, + "nll_loss": 0.07911548018455505, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.620485171675682e-05, + "rewards/margins": 0.16880926489830017, + "rewards/rejected": -0.16886545717716217, + "step": 11491 + }, + { + "epoch": 7.947441217150761, + "grad_norm": 5.220170497894287, + "learning_rate": 1.1403104349162441e-05, + "log_odds_chosen": 9.741656303405762, + "log_odds_ratio": -0.00037368456833064556, + "logits/chosen": 0.35090339183807373, + "logits/rejected": 0.47903770208358765, + "logps/chosen": -0.0005976616521365941, + "logps/rejected": -1.4262577295303345, + "loss": 0.5657, + "nll_loss": 0.14137572050094604, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.976616375846788e-05, + "rewards/margins": 0.1425660103559494, + "rewards/rejected": -0.14262576401233673, + "step": 11492 + }, + { + "epoch": 7.948132780082988, + "grad_norm": 5.341917514801025, + "learning_rate": 1.1399262332872292e-05, + "log_odds_chosen": 11.33029842376709, + "log_odds_ratio": -7.657535024918616e-05, + "logits/chosen": -0.37551289796829224, + "logits/rejected": -0.34889504313468933, + "logps/chosen": -0.0002113444497808814, + "logps/rejected": -2.9321506023406982, + "loss": 0.4439, + "nll_loss": 0.11097773909568787, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.113444497808814e-05, + "rewards/margins": 0.2931939363479614, + "rewards/rejected": -0.2932150661945343, + "step": 11493 + }, + { + "epoch": 7.948824343015215, + "grad_norm": 3.6530838012695312, + "learning_rate": 1.1395420316582144e-05, + "log_odds_chosen": 11.197941780090332, + "log_odds_ratio": -5.698140012100339e-05, + "logits/chosen": -0.18177399039268494, + "logits/rejected": -0.34647101163864136, + "logps/chosen": -0.000269897049292922, + "logps/rejected": -2.4861698150634766, + "loss": 0.3696, + "nll_loss": 0.09239768236875534, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.698970456549432e-05, + "rewards/margins": 0.24859000742435455, + "rewards/rejected": -0.2486169934272766, + "step": 11494 + }, + { + "epoch": 7.949515905947441, + "grad_norm": 4.900040149688721, + "learning_rate": 1.1391578300291993e-05, + "log_odds_chosen": 11.157360076904297, + "log_odds_ratio": -2.2148213247419335e-05, + "logits/chosen": -0.04906994849443436, + "logits/rejected": -0.3092145323753357, + "logps/chosen": -0.0004341888125054538, + "logps/rejected": -2.4475364685058594, + "loss": 0.5372, + "nll_loss": 0.1343008279800415, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3418887798907235e-05, + "rewards/margins": 0.24471025168895721, + "rewards/rejected": -0.2447536587715149, + "step": 11495 + }, + { + "epoch": 7.950207468879668, + "grad_norm": 8.374804496765137, + "learning_rate": 1.1387736284001844e-05, + "log_odds_chosen": 10.95303726196289, + "log_odds_ratio": -2.8403139367583208e-05, + "logits/chosen": -0.3107652962207794, + "logits/rejected": -0.33756834268569946, + "logps/chosen": -0.00012624255032278597, + "logps/rejected": -1.8674782514572144, + "loss": 0.5196, + "nll_loss": 0.12989729642868042, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2624254850379657e-05, + "rewards/margins": 0.18673519790172577, + "rewards/rejected": -0.18674781918525696, + "step": 11496 + }, + { + "epoch": 7.950899031811895, + "grad_norm": 3.8882741928100586, + "learning_rate": 1.1383894267711695e-05, + "log_odds_chosen": 10.171780586242676, + "log_odds_ratio": -0.0004873498110100627, + "logits/chosen": -0.41246718168258667, + "logits/rejected": -0.4909096956253052, + "logps/chosen": -0.0005409514997154474, + "logps/rejected": -2.4792728424072266, + "loss": 0.4081, + "nll_loss": 0.10198444873094559, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4095151426736265e-05, + "rewards/margins": 0.2478732019662857, + "rewards/rejected": -0.24792727828025818, + "step": 11497 + }, + { + "epoch": 7.951590594744122, + "grad_norm": 3.493499994277954, + "learning_rate": 1.1380052251421547e-05, + "log_odds_chosen": 10.759477615356445, + "log_odds_ratio": -0.0001155830395873636, + "logits/chosen": -0.48726120591163635, + "logits/rejected": -0.47793930768966675, + "logps/chosen": -0.00021786931029055268, + "logps/rejected": -2.2457547187805176, + "loss": 0.3185, + "nll_loss": 0.07962332665920258, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1786931029055268e-05, + "rewards/margins": 0.22455370426177979, + "rewards/rejected": -0.224575474858284, + "step": 11498 + }, + { + "epoch": 7.952282157676349, + "grad_norm": 9.014684677124023, + "learning_rate": 1.1376210235131398e-05, + "log_odds_chosen": 10.160914421081543, + "log_odds_ratio": -0.00027200212934985757, + "logits/chosen": 0.3355557322502136, + "logits/rejected": 0.2384757101535797, + "logps/chosen": -0.0005482410779222846, + "logps/rejected": -1.730358362197876, + "loss": 0.7167, + "nll_loss": 0.17913565039634705, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.482410779222846e-05, + "rewards/margins": 0.17298100888729095, + "rewards/rejected": -0.17303583025932312, + "step": 11499 + }, + { + "epoch": 7.9529737206085755, + "grad_norm": 4.0301032066345215, + "learning_rate": 1.1372368218841247e-05, + "log_odds_chosen": 11.374445915222168, + "log_odds_ratio": -5.591554509010166e-05, + "logits/chosen": -0.13466550409793854, + "logits/rejected": -0.2583548128604889, + "logps/chosen": -0.00019826041534543037, + "logps/rejected": -2.5781736373901367, + "loss": 0.4475, + "nll_loss": 0.11187275499105453, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.98260422621388e-05, + "rewards/margins": 0.2577975392341614, + "rewards/rejected": -0.2578173875808716, + "step": 11500 + }, + { + "epoch": 7.953665283540802, + "grad_norm": 5.895437717437744, + "learning_rate": 1.13685262025511e-05, + "log_odds_chosen": 10.141064643859863, + "log_odds_ratio": -0.00096508814021945, + "logits/chosen": -0.25293049216270447, + "logits/rejected": -0.2667233347892761, + "logps/chosen": -0.001047789235599339, + "logps/rejected": -2.1944336891174316, + "loss": 0.3557, + "nll_loss": 0.08882546424865723, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010477892647031695, + "rewards/margins": 0.21933861076831818, + "rewards/rejected": -0.21944338083267212, + "step": 11501 + }, + { + "epoch": 7.954356846473029, + "grad_norm": 5.355554580688477, + "learning_rate": 1.136468418626095e-05, + "log_odds_chosen": 11.279321670532227, + "log_odds_ratio": -0.0001632093481021002, + "logits/chosen": -0.2464003562927246, + "logits/rejected": -0.24605363607406616, + "logps/chosen": -0.00020973285427317023, + "logps/rejected": -2.0839450359344482, + "loss": 0.5622, + "nll_loss": 0.1405428946018219, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0973287973902188e-05, + "rewards/margins": 0.2083735316991806, + "rewards/rejected": -0.20839449763298035, + "step": 11502 + }, + { + "epoch": 7.955048409405256, + "grad_norm": 5.61575174331665, + "learning_rate": 1.1360842169970801e-05, + "log_odds_chosen": 10.429574966430664, + "log_odds_ratio": -0.00010323894093744457, + "logits/chosen": -0.3633131980895996, + "logits/rejected": -0.46927785873413086, + "logps/chosen": -0.0003013019450008869, + "logps/rejected": -1.9691195487976074, + "loss": 0.6195, + "nll_loss": 0.15487197041511536, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.013019340869505e-05, + "rewards/margins": 0.19688181579113007, + "rewards/rejected": -0.19691196084022522, + "step": 11503 + }, + { + "epoch": 7.955739972337483, + "grad_norm": 5.102035045623779, + "learning_rate": 1.1357000153680652e-05, + "log_odds_chosen": 10.669812202453613, + "log_odds_ratio": -3.363552241353318e-05, + "logits/chosen": 0.0006479024887084961, + "logits/rejected": -0.08887091279029846, + "logps/chosen": -0.0002845787676051259, + "logps/rejected": -2.112110137939453, + "loss": 0.5355, + "nll_loss": 0.13386455178260803, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8457874577725306e-05, + "rewards/margins": 0.21118253469467163, + "rewards/rejected": -0.21121101081371307, + "step": 11504 + }, + { + "epoch": 7.95643153526971, + "grad_norm": 6.779682159423828, + "learning_rate": 1.1353158137390503e-05, + "log_odds_chosen": 11.504022598266602, + "log_odds_ratio": -3.1273968488676473e-05, + "logits/chosen": -0.15041357278823853, + "logits/rejected": -0.26987728476524353, + "logps/chosen": -0.0007018953328952193, + "logps/rejected": -3.0383384227752686, + "loss": 0.8323, + "nll_loss": 0.2080681324005127, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.018954056547955e-05, + "rewards/margins": 0.30376365780830383, + "rewards/rejected": -0.30383384227752686, + "step": 11505 + }, + { + "epoch": 7.9571230982019365, + "grad_norm": 3.7123119831085205, + "learning_rate": 1.1349316121100353e-05, + "log_odds_chosen": 10.654646873474121, + "log_odds_ratio": -4.022525536129251e-05, + "logits/chosen": -0.7917324900627136, + "logits/rejected": -0.7620425820350647, + "logps/chosen": -0.00012117061851313338, + "logps/rejected": -1.6779022216796875, + "loss": 0.3672, + "nll_loss": 0.09179645776748657, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2117062397010159e-05, + "rewards/margins": 0.1677781045436859, + "rewards/rejected": -0.1677902340888977, + "step": 11506 + }, + { + "epoch": 7.957814661134163, + "grad_norm": 5.274533748626709, + "learning_rate": 1.1345474104810206e-05, + "log_odds_chosen": 11.018901824951172, + "log_odds_ratio": -3.798465695581399e-05, + "logits/chosen": -0.3230525255203247, + "logits/rejected": -0.3524434566497803, + "logps/chosen": -0.0002959494886454195, + "logps/rejected": -2.0793910026550293, + "loss": 0.58, + "nll_loss": 0.14500178396701813, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.959495031973347e-05, + "rewards/margins": 0.20790952444076538, + "rewards/rejected": -0.20793911814689636, + "step": 11507 + }, + { + "epoch": 7.95850622406639, + "grad_norm": 3.123476028442383, + "learning_rate": 1.1341632088520057e-05, + "log_odds_chosen": 10.636898040771484, + "log_odds_ratio": -0.00014882789400871843, + "logits/chosen": -0.3683475852012634, + "logits/rejected": -0.4598306715488434, + "logps/chosen": -0.0013843890046700835, + "logps/rejected": -2.420956611633301, + "loss": 0.2991, + "nll_loss": 0.07475338876247406, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013843891792930663, + "rewards/margins": 0.24195721745491028, + "rewards/rejected": -0.24209564924240112, + "step": 11508 + }, + { + "epoch": 7.959197786998617, + "grad_norm": 4.339691638946533, + "learning_rate": 1.1337790072229906e-05, + "log_odds_chosen": 11.068500518798828, + "log_odds_ratio": -0.00010646334703778848, + "logits/chosen": -0.5828951001167297, + "logits/rejected": -0.6474270820617676, + "logps/chosen": -0.00015746541612315923, + "logps/rejected": -1.9267382621765137, + "loss": 0.4962, + "nll_loss": 0.12404583394527435, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5746541976113804e-05, + "rewards/margins": 0.19265806674957275, + "rewards/rejected": -0.19267383217811584, + "step": 11509 + }, + { + "epoch": 7.959889349930844, + "grad_norm": 4.105745792388916, + "learning_rate": 1.1333948055939758e-05, + "log_odds_chosen": 11.290447235107422, + "log_odds_ratio": -2.614833283587359e-05, + "logits/chosen": -0.07597924023866653, + "logits/rejected": -0.1646428108215332, + "logps/chosen": -0.00018273312889505178, + "logps/rejected": -2.607384204864502, + "loss": 0.5353, + "nll_loss": 0.13381241261959076, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8273312889505178e-05, + "rewards/margins": 0.2607201337814331, + "rewards/rejected": -0.26073840260505676, + "step": 11510 + }, + { + "epoch": 7.960580912863071, + "grad_norm": 5.290945529937744, + "learning_rate": 1.1330106039649609e-05, + "log_odds_chosen": 11.960050582885742, + "log_odds_ratio": -8.36965955386404e-06, + "logits/chosen": -0.1975208818912506, + "logits/rejected": -0.19062384963035583, + "logps/chosen": -9.22078761504963e-05, + "logps/rejected": -2.4583163261413574, + "loss": 0.4986, + "nll_loss": 0.12464690953493118, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.220788342645392e-06, + "rewards/margins": 0.24582241475582123, + "rewards/rejected": -0.2458316683769226, + "step": 11511 + }, + { + "epoch": 7.9612724757952975, + "grad_norm": 4.686517238616943, + "learning_rate": 1.132626402335946e-05, + "log_odds_chosen": 9.780715942382812, + "log_odds_ratio": -0.0007018402102403343, + "logits/chosen": -0.33034276962280273, + "logits/rejected": -0.3925611078739166, + "logps/chosen": -0.0009610651759430766, + "logps/rejected": -1.9885149002075195, + "loss": 0.4466, + "nll_loss": 0.11156748980283737, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.610652341507375e-05, + "rewards/margins": 0.1987553834915161, + "rewards/rejected": -0.19885149598121643, + "step": 11512 + }, + { + "epoch": 7.961964038727524, + "grad_norm": 5.430578231811523, + "learning_rate": 1.132242200706931e-05, + "log_odds_chosen": 11.476561546325684, + "log_odds_ratio": -0.0001910024438984692, + "logits/chosen": -0.09294851869344711, + "logits/rejected": -0.2029493749141693, + "logps/chosen": -0.00015575737052131444, + "logps/rejected": -2.6696534156799316, + "loss": 0.5014, + "nll_loss": 0.1253352165222168, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5575737052131444e-05, + "rewards/margins": 0.26694974303245544, + "rewards/rejected": -0.2669653296470642, + "step": 11513 + }, + { + "epoch": 7.962655601659751, + "grad_norm": 4.263656139373779, + "learning_rate": 1.1318579990779161e-05, + "log_odds_chosen": 9.819611549377441, + "log_odds_ratio": -0.00027837679954245687, + "logits/chosen": -0.3863220810890198, + "logits/rejected": -0.32077890634536743, + "logps/chosen": -0.00024438605760224164, + "logps/rejected": -1.1592479944229126, + "loss": 0.2405, + "nll_loss": 0.06010923534631729, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4438606487819925e-05, + "rewards/margins": 0.11590036749839783, + "rewards/rejected": -0.11592480540275574, + "step": 11514 + }, + { + "epoch": 7.963347164591978, + "grad_norm": 3.0869855880737305, + "learning_rate": 1.1314737974489012e-05, + "log_odds_chosen": 11.091203689575195, + "log_odds_ratio": -3.3251842978643253e-05, + "logits/chosen": -0.4070943593978882, + "logits/rejected": -0.37126943469047546, + "logps/chosen": -0.00011688778613461182, + "logps/rejected": -2.079681396484375, + "loss": 0.2682, + "nll_loss": 0.06704328954219818, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1688778613461182e-05, + "rewards/margins": 0.2079564481973648, + "rewards/rejected": -0.20796814560890198, + "step": 11515 + }, + { + "epoch": 7.964038727524205, + "grad_norm": 4.022365093231201, + "learning_rate": 1.1310895958198864e-05, + "log_odds_chosen": 11.098390579223633, + "log_odds_ratio": -0.00019448368402663618, + "logits/chosen": -0.3664087653160095, + "logits/rejected": -0.4097881019115448, + "logps/chosen": -0.0002823981922119856, + "logps/rejected": -2.2339224815368652, + "loss": 0.388, + "nll_loss": 0.09698802977800369, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8239821403985843e-05, + "rewards/margins": 0.22336401045322418, + "rewards/rejected": -0.22339224815368652, + "step": 11516 + }, + { + "epoch": 7.964730290456432, + "grad_norm": 3.9575083255767822, + "learning_rate": 1.1307053941908715e-05, + "log_odds_chosen": 10.273996353149414, + "log_odds_ratio": -0.00010742698214016855, + "logits/chosen": -0.360485315322876, + "logits/rejected": -0.3785179853439331, + "logps/chosen": -0.0003668889112304896, + "logps/rejected": -1.7709505558013916, + "loss": 0.4295, + "nll_loss": 0.10736609995365143, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6688892578240484e-05, + "rewards/margins": 0.1770583689212799, + "rewards/rejected": -0.17709505558013916, + "step": 11517 + }, + { + "epoch": 7.9654218533886585, + "grad_norm": 8.95025634765625, + "learning_rate": 1.1303211925618564e-05, + "log_odds_chosen": 11.682476997375488, + "log_odds_ratio": -3.4601594961714e-05, + "logits/chosen": -0.20466911792755127, + "logits/rejected": -0.18964964151382446, + "logps/chosen": -0.00021859840489923954, + "logps/rejected": -2.6859614849090576, + "loss": 0.4589, + "nll_loss": 0.11471740901470184, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1859839762328193e-05, + "rewards/margins": 0.2685742974281311, + "rewards/rejected": -0.26859617233276367, + "step": 11518 + }, + { + "epoch": 7.966113416320885, + "grad_norm": 17.208377838134766, + "learning_rate": 1.1299369909328416e-05, + "log_odds_chosen": 11.69450855255127, + "log_odds_ratio": -1.2062869245710317e-05, + "logits/chosen": -0.25031498074531555, + "logits/rejected": -0.3548222780227661, + "logps/chosen": -8.374622120754793e-05, + "logps/rejected": -2.2749221324920654, + "loss": 0.4229, + "nll_loss": 0.10573307424783707, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.374622666451614e-06, + "rewards/margins": 0.2274838387966156, + "rewards/rejected": -0.22749219834804535, + "step": 11519 + }, + { + "epoch": 7.966804979253112, + "grad_norm": 4.069872856140137, + "learning_rate": 1.1295527893038267e-05, + "log_odds_chosen": 10.876702308654785, + "log_odds_ratio": -0.0001423271605744958, + "logits/chosen": -0.374772310256958, + "logits/rejected": -0.35536468029022217, + "logps/chosen": -0.00019855469872709364, + "logps/rejected": -2.4102046489715576, + "loss": 0.3834, + "nll_loss": 0.09582436829805374, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9855469872709364e-05, + "rewards/margins": 0.24100060760974884, + "rewards/rejected": -0.24102044105529785, + "step": 11520 + }, + { + "epoch": 7.967496542185339, + "grad_norm": 4.0037360191345215, + "learning_rate": 1.1291685876748118e-05, + "log_odds_chosen": 10.799403190612793, + "log_odds_ratio": -6.53384777251631e-05, + "logits/chosen": -0.4056221842765808, + "logits/rejected": -0.4481501579284668, + "logps/chosen": -0.0011118586407974362, + "logps/rejected": -3.182638645172119, + "loss": 0.4393, + "nll_loss": 0.10982559621334076, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011118586553493515, + "rewards/margins": 0.31815269589424133, + "rewards/rejected": -0.3182638883590698, + "step": 11521 + }, + { + "epoch": 7.968188105117566, + "grad_norm": 6.190622806549072, + "learning_rate": 1.1287843860457969e-05, + "log_odds_chosen": 9.207051277160645, + "log_odds_ratio": -0.0008154031820595264, + "logits/chosen": -0.22212673723697662, + "logits/rejected": -0.2690417170524597, + "logps/chosen": -0.0015860882122069597, + "logps/rejected": -1.5020241737365723, + "loss": 0.7381, + "nll_loss": 0.18443426489830017, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015860881831031293, + "rewards/margins": 0.1500438153743744, + "rewards/rejected": -0.1502024084329605, + "step": 11522 + }, + { + "epoch": 7.968879668049793, + "grad_norm": 3.886345386505127, + "learning_rate": 1.128400184416782e-05, + "log_odds_chosen": 10.890275955200195, + "log_odds_ratio": -9.133804269367829e-05, + "logits/chosen": -0.529086172580719, + "logits/rejected": -0.5643143057823181, + "logps/chosen": -0.00021087020286358893, + "logps/rejected": -2.2759876251220703, + "loss": 0.7748, + "nll_loss": 0.19368351995944977, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1087022105348296e-05, + "rewards/margins": 0.22757765650749207, + "rewards/rejected": -0.22759875655174255, + "step": 11523 + }, + { + "epoch": 7.9695712309820195, + "grad_norm": 13.72425651550293, + "learning_rate": 1.128015982787767e-05, + "log_odds_chosen": 12.068330764770508, + "log_odds_ratio": -1.0651285265339538e-05, + "logits/chosen": -0.16793814301490784, + "logits/rejected": -0.34160295128822327, + "logps/chosen": -0.00011844123218907043, + "logps/rejected": -2.5251269340515137, + "loss": 0.6213, + "nll_loss": 0.15533268451690674, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1844123946502805e-05, + "rewards/margins": 0.25250083208084106, + "rewards/rejected": -0.25251269340515137, + "step": 11524 + }, + { + "epoch": 7.970262793914246, + "grad_norm": 5.1223344802856445, + "learning_rate": 1.1276317811587523e-05, + "log_odds_chosen": 11.770263671875, + "log_odds_ratio": -1.9887751477654092e-05, + "logits/chosen": -0.21339300274848938, + "logits/rejected": -0.23123294115066528, + "logps/chosen": -0.00027726683765649796, + "logps/rejected": -3.490894079208374, + "loss": 0.7112, + "nll_loss": 0.17779025435447693, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7726682674256153e-05, + "rewards/margins": 0.3490616977214813, + "rewards/rejected": -0.3490894138813019, + "step": 11525 + }, + { + "epoch": 7.970954356846473, + "grad_norm": 4.5738091468811035, + "learning_rate": 1.1272475795297373e-05, + "log_odds_chosen": 11.602575302124023, + "log_odds_ratio": -3.434224709053524e-05, + "logits/chosen": -0.14012958109378815, + "logits/rejected": -0.2598492503166199, + "logps/chosen": -0.00012942799367010593, + "logps/rejected": -2.3440585136413574, + "loss": 0.4871, + "nll_loss": 0.12176584452390671, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2942798093718011e-05, + "rewards/margins": 0.234392911195755, + "rewards/rejected": -0.23440584540367126, + "step": 11526 + }, + { + "epoch": 7.9716459197787, + "grad_norm": 3.709731101989746, + "learning_rate": 1.1268633779007222e-05, + "log_odds_chosen": 10.255630493164062, + "log_odds_ratio": -0.00013107730774208903, + "logits/chosen": -0.6991348266601562, + "logits/rejected": -0.7393057346343994, + "logps/chosen": -0.00039511447539553046, + "logps/rejected": -2.147977352142334, + "loss": 0.331, + "nll_loss": 0.08272667229175568, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9511447539553046e-05, + "rewards/margins": 0.2147582471370697, + "rewards/rejected": -0.2147977650165558, + "step": 11527 + }, + { + "epoch": 7.972337482710927, + "grad_norm": 4.13797664642334, + "learning_rate": 1.1264791762717075e-05, + "log_odds_chosen": 9.618062973022461, + "log_odds_ratio": -0.00018889813509304076, + "logits/chosen": -0.588722288608551, + "logits/rejected": -0.5970766544342041, + "logps/chosen": -0.00038104315171949565, + "logps/rejected": -1.406938910484314, + "loss": 0.4677, + "nll_loss": 0.11689627170562744, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.810431371675804e-05, + "rewards/margins": 0.1406557857990265, + "rewards/rejected": -0.14069388806819916, + "step": 11528 + }, + { + "epoch": 7.973029045643154, + "grad_norm": 3.487117290496826, + "learning_rate": 1.1260949746426926e-05, + "log_odds_chosen": 10.02828311920166, + "log_odds_ratio": -0.00018125462520401925, + "logits/chosen": -0.33877819776535034, + "logits/rejected": -0.404274046421051, + "logps/chosen": -0.0007607677252963185, + "logps/rejected": -1.83676278591156, + "loss": 0.2745, + "nll_loss": 0.06860494613647461, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.607677252963185e-05, + "rewards/margins": 0.18360021710395813, + "rewards/rejected": -0.18367627263069153, + "step": 11529 + }, + { + "epoch": 7.9737206085753805, + "grad_norm": 3.9003255367279053, + "learning_rate": 1.1257107730136776e-05, + "log_odds_chosen": 9.846776962280273, + "log_odds_ratio": -0.00033953096135519445, + "logits/chosen": -0.7263999581336975, + "logits/rejected": -0.733031690120697, + "logps/chosen": -0.0005431174067780375, + "logps/rejected": -1.4025123119354248, + "loss": 0.3954, + "nll_loss": 0.09882722795009613, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.431173849501647e-05, + "rewards/margins": 0.14019691944122314, + "rewards/rejected": -0.14025121927261353, + "step": 11530 + }, + { + "epoch": 7.974412171507607, + "grad_norm": 4.604633331298828, + "learning_rate": 1.1253265713846627e-05, + "log_odds_chosen": 11.10637378692627, + "log_odds_ratio": -9.243666136171669e-05, + "logits/chosen": -0.06658074259757996, + "logits/rejected": -0.21274365484714508, + "logps/chosen": -0.00020368752302601933, + "logps/rejected": -2.292241096496582, + "loss": 0.5003, + "nll_loss": 0.12505874037742615, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0368752302601933e-05, + "rewards/margins": 0.2292037308216095, + "rewards/rejected": -0.2292240858078003, + "step": 11531 + }, + { + "epoch": 7.975103734439834, + "grad_norm": 4.5084638595581055, + "learning_rate": 1.1249423697556478e-05, + "log_odds_chosen": 11.897830963134766, + "log_odds_ratio": -1.5846128007979132e-05, + "logits/chosen": -0.37234407663345337, + "logits/rejected": -0.38094252347946167, + "logps/chosen": -0.00034496927401050925, + "logps/rejected": -2.6722660064697266, + "loss": 0.6085, + "nll_loss": 0.15213480591773987, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.449693031143397e-05, + "rewards/margins": 0.26719212532043457, + "rewards/rejected": -0.26722660660743713, + "step": 11532 + }, + { + "epoch": 7.975795297372061, + "grad_norm": 3.4178168773651123, + "learning_rate": 1.1245581681266329e-05, + "log_odds_chosen": 12.06242561340332, + "log_odds_ratio": -1.9183351469109766e-05, + "logits/chosen": -0.19393685460090637, + "logits/rejected": -0.22952347993850708, + "logps/chosen": -8.642624743515626e-05, + "logps/rejected": -2.72680926322937, + "loss": 0.3382, + "nll_loss": 0.08455608785152435, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.642624379717745e-06, + "rewards/margins": 0.27267229557037354, + "rewards/rejected": -0.27268096804618835, + "step": 11533 + }, + { + "epoch": 7.976486860304288, + "grad_norm": 4.270136833190918, + "learning_rate": 1.124173966497618e-05, + "log_odds_chosen": 12.044026374816895, + "log_odds_ratio": -1.63989097927697e-05, + "logits/chosen": -0.0951736569404602, + "logits/rejected": -0.19301308691501617, + "logps/chosen": -0.0001634317304706201, + "logps/rejected": -2.8399291038513184, + "loss": 0.4765, + "nll_loss": 0.11912365257740021, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6343172319466248e-05, + "rewards/margins": 0.28397655487060547, + "rewards/rejected": -0.2839928865432739, + "step": 11534 + }, + { + "epoch": 7.977178423236515, + "grad_norm": 25.706132888793945, + "learning_rate": 1.1237897648686032e-05, + "log_odds_chosen": 11.003204345703125, + "log_odds_ratio": -0.0006372515927068889, + "logits/chosen": -0.24361053109169006, + "logits/rejected": -0.17340120673179626, + "logps/chosen": -0.0016507849795743823, + "logps/rejected": -2.280179500579834, + "loss": 0.4292, + "nll_loss": 0.10722636431455612, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016507849795743823, + "rewards/margins": 0.22785286605358124, + "rewards/rejected": -0.22801794111728668, + "step": 11535 + }, + { + "epoch": 7.977869986168741, + "grad_norm": 5.740812301635742, + "learning_rate": 1.1234055632395883e-05, + "log_odds_chosen": 10.96840763092041, + "log_odds_ratio": -0.00022587321291211993, + "logits/chosen": -0.12754343450069427, + "logits/rejected": -0.25832250714302063, + "logps/chosen": -0.00031263873097486794, + "logps/rejected": -2.5789904594421387, + "loss": 0.6194, + "nll_loss": 0.15482082962989807, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1263873097486794e-05, + "rewards/margins": 0.2578677833080292, + "rewards/rejected": -0.25789904594421387, + "step": 11536 + }, + { + "epoch": 7.978561549100968, + "grad_norm": 4.151486396789551, + "learning_rate": 1.1230213616105732e-05, + "log_odds_chosen": 10.758419036865234, + "log_odds_ratio": -5.185231566429138e-05, + "logits/chosen": -0.14621517062187195, + "logits/rejected": -0.19490604102611542, + "logps/chosen": -0.0005299575277604163, + "logps/rejected": -2.638571262359619, + "loss": 0.6106, + "nll_loss": 0.15265339612960815, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.299575423123315e-05, + "rewards/margins": 0.2638041377067566, + "rewards/rejected": -0.2638571262359619, + "step": 11537 + }, + { + "epoch": 7.979253112033195, + "grad_norm": 17.210065841674805, + "learning_rate": 1.1226371599815584e-05, + "log_odds_chosen": 11.015816688537598, + "log_odds_ratio": -5.2646784752141684e-05, + "logits/chosen": -0.12005338817834854, + "logits/rejected": -0.23084372282028198, + "logps/chosen": -0.00026947163860313594, + "logps/rejected": -1.9214720726013184, + "loss": 0.3624, + "nll_loss": 0.09059273451566696, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6947163860313594e-05, + "rewards/margins": 0.1921202689409256, + "rewards/rejected": -0.19214721024036407, + "step": 11538 + }, + { + "epoch": 7.979944674965422, + "grad_norm": 4.7975568771362305, + "learning_rate": 1.1222529583525435e-05, + "log_odds_chosen": 10.247209548950195, + "log_odds_ratio": -0.00016718886035960168, + "logits/chosen": -0.5553616881370544, + "logits/rejected": -0.7003533840179443, + "logps/chosen": -0.0002022799162659794, + "logps/rejected": -1.9307901859283447, + "loss": 0.499, + "nll_loss": 0.12474517524242401, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.022799162659794e-05, + "rewards/margins": 0.1930587887763977, + "rewards/rejected": -0.19307900965213776, + "step": 11539 + }, + { + "epoch": 7.980636237897649, + "grad_norm": 5.050432205200195, + "learning_rate": 1.1218687567235286e-05, + "log_odds_chosen": 10.200136184692383, + "log_odds_ratio": -0.000156211550347507, + "logits/chosen": -0.23092946410179138, + "logits/rejected": -0.2568545341491699, + "logps/chosen": -0.0002525176969356835, + "logps/rejected": -1.5876084566116333, + "loss": 0.4569, + "nll_loss": 0.11419893801212311, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5251771148759872e-05, + "rewards/margins": 0.15873560309410095, + "rewards/rejected": -0.15876084566116333, + "step": 11540 + }, + { + "epoch": 7.981327800829876, + "grad_norm": 4.645444869995117, + "learning_rate": 1.1214845550945136e-05, + "log_odds_chosen": 11.110513687133789, + "log_odds_ratio": -5.435135244624689e-05, + "logits/chosen": -0.19689714908599854, + "logits/rejected": -0.21087950468063354, + "logps/chosen": -0.00016647031588945538, + "logps/rejected": -2.2465925216674805, + "loss": 0.4227, + "nll_loss": 0.105674147605896, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.664703268033918e-05, + "rewards/margins": 0.22464261949062347, + "rewards/rejected": -0.224659264087677, + "step": 11541 + }, + { + "epoch": 7.982019363762102, + "grad_norm": 4.564971923828125, + "learning_rate": 1.1211003534654987e-05, + "log_odds_chosen": 11.38237190246582, + "log_odds_ratio": -7.819420716259629e-05, + "logits/chosen": -0.06043093651533127, + "logits/rejected": -0.18942205607891083, + "logps/chosen": -0.00024105122429318726, + "logps/rejected": -2.574265480041504, + "loss": 0.3603, + "nll_loss": 0.09007608890533447, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4105120246531442e-05, + "rewards/margins": 0.2574024498462677, + "rewards/rejected": -0.25742655992507935, + "step": 11542 + }, + { + "epoch": 7.982710926694329, + "grad_norm": 5.620864391326904, + "learning_rate": 1.1207161518364838e-05, + "log_odds_chosen": 10.171491622924805, + "log_odds_ratio": -0.00013046340609434992, + "logits/chosen": -0.28354907035827637, + "logits/rejected": -0.3111303448677063, + "logps/chosen": -0.00016116946062538773, + "logps/rejected": -1.7502444982528687, + "loss": 0.5933, + "nll_loss": 0.14830997586250305, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6116946426336654e-05, + "rewards/margins": 0.17500832676887512, + "rewards/rejected": -0.17502444982528687, + "step": 11543 + }, + { + "epoch": 7.983402489626556, + "grad_norm": 5.144567966461182, + "learning_rate": 1.120331950207469e-05, + "log_odds_chosen": 11.948741912841797, + "log_odds_ratio": -3.4696993679972365e-05, + "logits/chosen": -0.47868379950523376, + "logits/rejected": -0.5196173787117004, + "logps/chosen": -0.00011416654160711914, + "logps/rejected": -2.7108850479125977, + "loss": 0.507, + "nll_loss": 0.12674714624881744, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1416654160711914e-05, + "rewards/margins": 0.27107712626457214, + "rewards/rejected": -0.27108854055404663, + "step": 11544 + }, + { + "epoch": 7.984094052558783, + "grad_norm": 5.7527618408203125, + "learning_rate": 1.1199477485784541e-05, + "log_odds_chosen": 10.331581115722656, + "log_odds_ratio": -0.0002976985997520387, + "logits/chosen": -0.42527955770492554, + "logits/rejected": -0.5214613676071167, + "logps/chosen": -0.0005123602459207177, + "logps/rejected": -1.9454360008239746, + "loss": 0.4936, + "nll_loss": 0.12336038053035736, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.123602750245482e-05, + "rewards/margins": 0.19449235498905182, + "rewards/rejected": -0.19454360008239746, + "step": 11545 + }, + { + "epoch": 7.98478561549101, + "grad_norm": 7.524501323699951, + "learning_rate": 1.119563546949439e-05, + "log_odds_chosen": 10.434951782226562, + "log_odds_ratio": -0.00023595344100613147, + "logits/chosen": -0.13727258145809174, + "logits/rejected": -0.21067550778388977, + "logps/chosen": -0.0011036460055038333, + "logps/rejected": -2.0079808235168457, + "loss": 0.4717, + "nll_loss": 0.11789001524448395, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001103645990951918, + "rewards/margins": 0.2006877362728119, + "rewards/rejected": -0.20079809427261353, + "step": 11546 + }, + { + "epoch": 7.985477178423237, + "grad_norm": 5.442744731903076, + "learning_rate": 1.1191793453204243e-05, + "log_odds_chosen": 10.40377426147461, + "log_odds_ratio": -0.0002690352266654372, + "logits/chosen": -0.4835624098777771, + "logits/rejected": -0.5982956886291504, + "logps/chosen": -0.0005080453120172024, + "logps/rejected": -2.0457310676574707, + "loss": 0.6759, + "nll_loss": 0.1689365804195404, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.080453775008209e-05, + "rewards/margins": 0.20452231168746948, + "rewards/rejected": -0.2045731246471405, + "step": 11547 + }, + { + "epoch": 7.986168741355463, + "grad_norm": 3.2058820724487305, + "learning_rate": 1.1187951436914093e-05, + "log_odds_chosen": 10.801993370056152, + "log_odds_ratio": -0.00011381193326087669, + "logits/chosen": -0.1311768740415573, + "logits/rejected": -0.2777857482433319, + "logps/chosen": -0.00014527339953929186, + "logps/rejected": -2.0049283504486084, + "loss": 0.4059, + "nll_loss": 0.10146152973175049, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4527340681524947e-05, + "rewards/margins": 0.20047834515571594, + "rewards/rejected": -0.20049285888671875, + "step": 11548 + }, + { + "epoch": 7.98686030428769, + "grad_norm": 12.101853370666504, + "learning_rate": 1.1184109420623944e-05, + "log_odds_chosen": 12.284296035766602, + "log_odds_ratio": -4.364710912341252e-05, + "logits/chosen": -0.0765245109796524, + "logits/rejected": -0.23267234861850739, + "logps/chosen": -0.00017167633632197976, + "logps/rejected": -3.3040640354156494, + "loss": 0.4213, + "nll_loss": 0.10531854629516602, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7167634723591618e-05, + "rewards/margins": 0.33038923144340515, + "rewards/rejected": -0.33040642738342285, + "step": 11549 + }, + { + "epoch": 7.987551867219917, + "grad_norm": 6.506622314453125, + "learning_rate": 1.1180267404333795e-05, + "log_odds_chosen": 10.512768745422363, + "log_odds_ratio": -8.126518514472991e-05, + "logits/chosen": -0.21105435490608215, + "logits/rejected": -0.20511992275714874, + "logps/chosen": -0.00044915868784300983, + "logps/rejected": -1.9867548942565918, + "loss": 0.4599, + "nll_loss": 0.11497482657432556, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.491586878430098e-05, + "rewards/margins": 0.19863058626651764, + "rewards/rejected": -0.1986754983663559, + "step": 11550 + }, + { + "epoch": 7.988243430152144, + "grad_norm": 4.189328193664551, + "learning_rate": 1.1176425388043646e-05, + "log_odds_chosen": 11.506170272827148, + "log_odds_ratio": -2.5864368581096642e-05, + "logits/chosen": -0.3027508556842804, + "logits/rejected": -0.3504965305328369, + "logps/chosen": -0.00014351757999975234, + "logps/rejected": -2.5394859313964844, + "loss": 0.3577, + "nll_loss": 0.08942808210849762, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4351757272379473e-05, + "rewards/margins": 0.25393423438072205, + "rewards/rejected": -0.2539485991001129, + "step": 11551 + }, + { + "epoch": 7.988934993084371, + "grad_norm": 5.709794521331787, + "learning_rate": 1.1172583371753496e-05, + "log_odds_chosen": 11.556946754455566, + "log_odds_ratio": -6.319672684185207e-05, + "logits/chosen": -0.22862932085990906, + "logits/rejected": -0.19877254962921143, + "logps/chosen": -0.0001135072743636556, + "logps/rejected": -2.5816869735717773, + "loss": 0.5891, + "nll_loss": 0.1472797989845276, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1350726708769798e-05, + "rewards/margins": 0.2581573724746704, + "rewards/rejected": -0.2581687271595001, + "step": 11552 + }, + { + "epoch": 7.9896265560165975, + "grad_norm": 5.9907073974609375, + "learning_rate": 1.1168741355463349e-05, + "log_odds_chosen": 10.62165641784668, + "log_odds_ratio": -9.188729745801538e-05, + "logits/chosen": -0.8795843720436096, + "logits/rejected": -0.9018745422363281, + "logps/chosen": -0.00021737195493187755, + "logps/rejected": -2.174501657485962, + "loss": 0.4843, + "nll_loss": 0.12106455862522125, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1737196220783517e-05, + "rewards/margins": 0.21742843091487885, + "rewards/rejected": -0.21745017170906067, + "step": 11553 + }, + { + "epoch": 7.990318118948824, + "grad_norm": 4.190145015716553, + "learning_rate": 1.11648993391732e-05, + "log_odds_chosen": 11.428888320922852, + "log_odds_ratio": -5.247345688985661e-05, + "logits/chosen": -0.016565974801778793, + "logits/rejected": -0.2098257839679718, + "logps/chosen": -9.872991358861327e-05, + "logps/rejected": -2.324761390686035, + "loss": 0.7625, + "nll_loss": 0.19062167406082153, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.87299245025497e-06, + "rewards/margins": 0.23246626555919647, + "rewards/rejected": -0.232476145029068, + "step": 11554 + }, + { + "epoch": 7.991009681881051, + "grad_norm": 2.4398210048675537, + "learning_rate": 1.1161057322883049e-05, + "log_odds_chosen": 11.650123596191406, + "log_odds_ratio": -3.244515391997993e-05, + "logits/chosen": -0.3551556468009949, + "logits/rejected": -0.37677669525146484, + "logps/chosen": -0.0009142399067059159, + "logps/rejected": -3.184720516204834, + "loss": 0.4227, + "nll_loss": 0.10568135976791382, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.142398630501702e-05, + "rewards/margins": 0.3183806240558624, + "rewards/rejected": -0.3184720277786255, + "step": 11555 + }, + { + "epoch": 7.991701244813278, + "grad_norm": 5.6060261726379395, + "learning_rate": 1.1157215306592901e-05, + "log_odds_chosen": 9.99009895324707, + "log_odds_ratio": -0.00025115531752817333, + "logits/chosen": -0.47244876623153687, + "logits/rejected": -0.5255929231643677, + "logps/chosen": -0.0007966295816004276, + "logps/rejected": -1.9693882465362549, + "loss": 0.477, + "nll_loss": 0.11922810226678848, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.96629537944682e-05, + "rewards/margins": 0.19685915112495422, + "rewards/rejected": -0.19693884253501892, + "step": 11556 + }, + { + "epoch": 7.992392807745505, + "grad_norm": 5.761258602142334, + "learning_rate": 1.1153373290302752e-05, + "log_odds_chosen": 10.994587898254395, + "log_odds_ratio": -0.00020646223856601864, + "logits/chosen": -0.40414923429489136, + "logits/rejected": -0.5490624904632568, + "logps/chosen": -0.0001353785046376288, + "logps/rejected": -2.2475452423095703, + "loss": 0.5792, + "nll_loss": 0.1447751373052597, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3537851373257581e-05, + "rewards/margins": 0.22474099695682526, + "rewards/rejected": -0.22475454211235046, + "step": 11557 + }, + { + "epoch": 7.993084370677732, + "grad_norm": 3.093449831008911, + "learning_rate": 1.1149531274012602e-05, + "log_odds_chosen": 10.990373611450195, + "log_odds_ratio": -5.954828520771116e-05, + "logits/chosen": -0.8087414503097534, + "logits/rejected": -0.8039386868476868, + "logps/chosen": -0.0004817190929315984, + "logps/rejected": -2.7298450469970703, + "loss": 0.3396, + "nll_loss": 0.08489243686199188, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8171910748351365e-05, + "rewards/margins": 0.27293631434440613, + "rewards/rejected": -0.27298450469970703, + "step": 11558 + }, + { + "epoch": 7.9937759336099585, + "grad_norm": 4.10735559463501, + "learning_rate": 1.1145689257722453e-05, + "log_odds_chosen": 10.438919067382812, + "log_odds_ratio": -0.0001241380232386291, + "logits/chosen": -0.345724493265152, + "logits/rejected": -0.38152217864990234, + "logps/chosen": -0.0003483956679701805, + "logps/rejected": -2.2857847213745117, + "loss": 0.4543, + "nll_loss": 0.11357006430625916, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.48395697074011e-05, + "rewards/margins": 0.22854363918304443, + "rewards/rejected": -0.22857847809791565, + "step": 11559 + }, + { + "epoch": 7.994467496542185, + "grad_norm": 6.2611403465271, + "learning_rate": 1.1141847241432304e-05, + "log_odds_chosen": 11.2708101272583, + "log_odds_ratio": -9.53746130107902e-05, + "logits/chosen": -0.23702967166900635, + "logits/rejected": -0.3147750496864319, + "logps/chosen": -0.00017018525977618992, + "logps/rejected": -2.328094482421875, + "loss": 0.5765, + "nll_loss": 0.1441277414560318, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7018526705214754e-05, + "rewards/margins": 0.23279240727424622, + "rewards/rejected": -0.23280943930149078, + "step": 11560 + }, + { + "epoch": 7.995159059474412, + "grad_norm": 4.762275695800781, + "learning_rate": 1.1138005225142155e-05, + "log_odds_chosen": 11.396519660949707, + "log_odds_ratio": -8.356718171853572e-05, + "logits/chosen": -0.6899176239967346, + "logits/rejected": -0.7288756370544434, + "logps/chosen": -0.00012063531903550029, + "logps/rejected": -2.24245023727417, + "loss": 0.4872, + "nll_loss": 0.12178252637386322, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.206353226734791e-05, + "rewards/margins": 0.22423294186592102, + "rewards/rejected": -0.22424501180648804, + "step": 11561 + }, + { + "epoch": 7.995850622406639, + "grad_norm": 5.18743371963501, + "learning_rate": 1.1134163208852007e-05, + "log_odds_chosen": 11.00822925567627, + "log_odds_ratio": -0.0007812322000972927, + "logits/chosen": -0.6710682511329651, + "logits/rejected": -0.7657569050788879, + "logps/chosen": -0.002024431247264147, + "logps/rejected": -2.8388352394104004, + "loss": 0.4816, + "nll_loss": 0.12031008303165436, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00020244311599526554, + "rewards/margins": 0.28368109464645386, + "rewards/rejected": -0.28388354182243347, + "step": 11562 + }, + { + "epoch": 7.996542185338866, + "grad_norm": 4.819424152374268, + "learning_rate": 1.1130321192561858e-05, + "log_odds_chosen": 11.201433181762695, + "log_odds_ratio": -3.710830060299486e-05, + "logits/chosen": -0.07250084728002548, + "logits/rejected": -0.17073743045330048, + "logps/chosen": -0.0001514070318080485, + "logps/rejected": -2.1362361907958984, + "loss": 0.5593, + "nll_loss": 0.13982567191123962, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.514070390840061e-05, + "rewards/margins": 0.21360847353935242, + "rewards/rejected": -0.21362361311912537, + "step": 11563 + }, + { + "epoch": 7.997233748271093, + "grad_norm": 4.8403215408325195, + "learning_rate": 1.1126479176271707e-05, + "log_odds_chosen": 13.158977508544922, + "log_odds_ratio": -3.391482550796354e-06, + "logits/chosen": -0.4475821852684021, + "logits/rejected": -0.48769521713256836, + "logps/chosen": -7.10982785676606e-05, + "logps/rejected": -3.243312358856201, + "loss": 0.4071, + "nll_loss": 0.10178689658641815, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.109828402462881e-06, + "rewards/margins": 0.3243241310119629, + "rewards/rejected": -0.32433122396469116, + "step": 11564 + }, + { + "epoch": 7.9979253112033195, + "grad_norm": 5.4192938804626465, + "learning_rate": 1.1122637159981558e-05, + "log_odds_chosen": 11.397504806518555, + "log_odds_ratio": -4.034879748360254e-05, + "logits/chosen": -0.5433164834976196, + "logits/rejected": -0.5988404154777527, + "logps/chosen": -7.839765021344647e-05, + "logps/rejected": -2.162886142730713, + "loss": 0.4798, + "nll_loss": 0.11994602531194687, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.839765203243587e-06, + "rewards/margins": 0.2162807583808899, + "rewards/rejected": -0.21628859639167786, + "step": 11565 + }, + { + "epoch": 7.998616874135546, + "grad_norm": 3.662543535232544, + "learning_rate": 1.111879514369141e-05, + "log_odds_chosen": 12.392351150512695, + "log_odds_ratio": -4.640197403205093e-06, + "logits/chosen": -0.6322042346000671, + "logits/rejected": -0.6288639903068542, + "logps/chosen": -0.0001787141227396205, + "logps/rejected": -3.1459133625030518, + "loss": 0.5516, + "nll_loss": 0.13790521025657654, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7871414456749335e-05, + "rewards/margins": 0.3145734965801239, + "rewards/rejected": -0.31459134817123413, + "step": 11566 + }, + { + "epoch": 7.999308437067773, + "grad_norm": 8.122488021850586, + "learning_rate": 1.1114953127401261e-05, + "log_odds_chosen": 10.237462043762207, + "log_odds_ratio": -0.00034276203950867057, + "logits/chosen": -0.3474234938621521, + "logits/rejected": -0.34457603096961975, + "logps/chosen": -0.0008376427576877177, + "logps/rejected": -2.554757595062256, + "loss": 1.0159, + "nll_loss": 0.2539416253566742, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.376428013434634e-05, + "rewards/margins": 0.25539201498031616, + "rewards/rejected": -0.2554757595062256, + "step": 11567 + }, + { + "epoch": 8.0, + "grad_norm": 3.228456497192383, + "learning_rate": 1.1111111111111112e-05, + "log_odds_chosen": 11.486684799194336, + "log_odds_ratio": -0.0001133008481701836, + "logits/chosen": -0.6037741899490356, + "logits/rejected": -0.4470462501049042, + "logps/chosen": -0.0001116130079026334, + "logps/rejected": -2.2419252395629883, + "loss": 0.3898, + "nll_loss": 0.09744127839803696, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1161301699758042e-05, + "rewards/margins": 0.22418135404586792, + "rewards/rejected": -0.2241925150156021, + "step": 11568 + }, + { + "epoch": 8.000691562932227, + "grad_norm": 2.2350728511810303, + "learning_rate": 1.1107269094820962e-05, + "log_odds_chosen": 10.858540534973145, + "log_odds_ratio": -3.378919791430235e-05, + "logits/chosen": -0.45352715253829956, + "logits/rejected": -0.36443424224853516, + "logps/chosen": -0.00014449376612901688, + "logps/rejected": -2.170830726623535, + "loss": 0.249, + "nll_loss": 0.06224460154771805, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4449376976699568e-05, + "rewards/margins": 0.217068612575531, + "rewards/rejected": -0.21708306670188904, + "step": 11569 + }, + { + "epoch": 8.001383125864454, + "grad_norm": 3.7147507667541504, + "learning_rate": 1.1103427078530813e-05, + "log_odds_chosen": 12.093175888061523, + "log_odds_ratio": -1.1225498383282684e-05, + "logits/chosen": -0.36110997200012207, + "logits/rejected": -0.3920789659023285, + "logps/chosen": -0.00011191416706424206, + "logps/rejected": -2.748927116394043, + "loss": 0.4191, + "nll_loss": 0.10478252172470093, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1191416888323147e-05, + "rewards/margins": 0.2748815417289734, + "rewards/rejected": -0.27489274740219116, + "step": 11570 + }, + { + "epoch": 8.00207468879668, + "grad_norm": 3.585357427597046, + "learning_rate": 1.1099585062240664e-05, + "log_odds_chosen": 10.27100944519043, + "log_odds_ratio": -0.00012903407332487404, + "logits/chosen": -0.7512014508247375, + "logits/rejected": -0.7946118116378784, + "logps/chosen": -0.000788436271250248, + "logps/rejected": -2.2089595794677734, + "loss": 0.4822, + "nll_loss": 0.12052592635154724, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.884363731136546e-05, + "rewards/margins": 0.22081711888313293, + "rewards/rejected": -0.22089596092700958, + "step": 11571 + }, + { + "epoch": 8.002766251728907, + "grad_norm": 5.002890110015869, + "learning_rate": 1.1095743045950516e-05, + "log_odds_chosen": 11.039785385131836, + "log_odds_ratio": -2.9156521122786216e-05, + "logits/chosen": -0.3379184603691101, + "logits/rejected": -0.4358202815055847, + "logps/chosen": -0.0001397759042447433, + "logps/rejected": -1.9672892093658447, + "loss": 0.4984, + "nll_loss": 0.12460320442914963, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3977590242575388e-05, + "rewards/margins": 0.19671493768692017, + "rewards/rejected": -0.19672891497612, + "step": 11572 + }, + { + "epoch": 8.003457814661134, + "grad_norm": 4.253098011016846, + "learning_rate": 1.1091901029660365e-05, + "log_odds_chosen": 11.585793495178223, + "log_odds_ratio": -2.382851562288124e-05, + "logits/chosen": -0.2774587869644165, + "logits/rejected": -0.35317111015319824, + "logps/chosen": -0.0001064272946678102, + "logps/rejected": -2.1052513122558594, + "loss": 0.4296, + "nll_loss": 0.10740029066801071, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0642728739185259e-05, + "rewards/margins": 0.21051448583602905, + "rewards/rejected": -0.21052512526512146, + "step": 11573 + }, + { + "epoch": 8.004149377593361, + "grad_norm": 6.317983627319336, + "learning_rate": 1.1088059013370216e-05, + "log_odds_chosen": 10.828872680664062, + "log_odds_ratio": -6.566552474396303e-05, + "logits/chosen": -0.26200076937675476, + "logits/rejected": -0.33439376950263977, + "logps/chosen": -0.0003057863796129823, + "logps/rejected": -2.421590566635132, + "loss": 0.3395, + "nll_loss": 0.084869883954525, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.057863796129823e-05, + "rewards/margins": 0.24212849140167236, + "rewards/rejected": -0.24215905368328094, + "step": 11574 + }, + { + "epoch": 8.004840940525588, + "grad_norm": 4.240973949432373, + "learning_rate": 1.1084216997080069e-05, + "log_odds_chosen": 9.857244491577148, + "log_odds_ratio": -0.00043547729728743434, + "logits/chosen": -0.46123895049095154, + "logits/rejected": -0.5853416323661804, + "logps/chosen": -0.00031145193497650325, + "logps/rejected": -1.161023736000061, + "loss": 0.2557, + "nll_loss": 0.06387601792812347, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.114519495284185e-05, + "rewards/margins": 0.11607123166322708, + "rewards/rejected": -0.11610237509012222, + "step": 11575 + }, + { + "epoch": 8.005532503457815, + "grad_norm": 4.478937149047852, + "learning_rate": 1.108037498078992e-05, + "log_odds_chosen": 12.100873947143555, + "log_odds_ratio": -1.816160511225462e-05, + "logits/chosen": -0.15364758670330048, + "logits/rejected": -0.18345826864242554, + "logps/chosen": -0.0001433866418665275, + "logps/rejected": -2.913017749786377, + "loss": 0.5397, + "nll_loss": 0.1349209100008011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4338663277158048e-05, + "rewards/margins": 0.2912874221801758, + "rewards/rejected": -0.29130178689956665, + "step": 11576 + }, + { + "epoch": 8.006224066390041, + "grad_norm": 5.591915607452393, + "learning_rate": 1.107653296449977e-05, + "log_odds_chosen": 10.005435943603516, + "log_odds_ratio": -0.0005105708260089159, + "logits/chosen": -0.32908353209495544, + "logits/rejected": -0.39447811245918274, + "logps/chosen": -0.0005771131254732609, + "logps/rejected": -1.6710118055343628, + "loss": 0.4533, + "nll_loss": 0.11326804757118225, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.771130963694304e-05, + "rewards/margins": 0.16704347729682922, + "rewards/rejected": -0.1671011745929718, + "step": 11577 + }, + { + "epoch": 8.006915629322268, + "grad_norm": 3.091831684112549, + "learning_rate": 1.107269094820962e-05, + "log_odds_chosen": 10.920646667480469, + "log_odds_ratio": -0.00010340339940739796, + "logits/chosen": -0.3442881107330322, + "logits/rejected": -0.4258328676223755, + "logps/chosen": -0.00026276629068888724, + "logps/rejected": -1.8977267742156982, + "loss": 0.3592, + "nll_loss": 0.08978812396526337, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.627663161547389e-05, + "rewards/margins": 0.1897464096546173, + "rewards/rejected": -0.18977268040180206, + "step": 11578 + }, + { + "epoch": 8.007607192254495, + "grad_norm": 6.157090663909912, + "learning_rate": 1.1068848931919472e-05, + "log_odds_chosen": 11.511394500732422, + "log_odds_ratio": -6.123785715317354e-05, + "logits/chosen": -0.3286557197570801, + "logits/rejected": -0.4205593466758728, + "logps/chosen": -0.00011942606943193823, + "logps/rejected": -2.7054905891418457, + "loss": 0.3561, + "nll_loss": 0.08902223408222198, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1942607670789585e-05, + "rewards/margins": 0.2705371379852295, + "rewards/rejected": -0.27054905891418457, + "step": 11579 + }, + { + "epoch": 8.008298755186722, + "grad_norm": 3.5800986289978027, + "learning_rate": 1.1065006915629322e-05, + "log_odds_chosen": 11.658208847045898, + "log_odds_ratio": -2.0560266420943663e-05, + "logits/chosen": -0.27686649560928345, + "logits/rejected": -0.360891729593277, + "logps/chosen": -0.00014447391731664538, + "logps/rejected": -2.26849627494812, + "loss": 0.4374, + "nll_loss": 0.10934217274188995, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4447391549765598e-05, + "rewards/margins": 0.2268351912498474, + "rewards/rejected": -0.22684964537620544, + "step": 11580 + }, + { + "epoch": 8.008990318118949, + "grad_norm": 3.4170472621917725, + "learning_rate": 1.1061164899339175e-05, + "log_odds_chosen": 10.878726959228516, + "log_odds_ratio": -4.549963341560215e-05, + "logits/chosen": -0.6824355125427246, + "logits/rejected": -0.683419942855835, + "logps/chosen": -0.00018226999964099377, + "logps/rejected": -2.0756149291992188, + "loss": 0.3822, + "nll_loss": 0.09554344415664673, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.82270014192909e-05, + "rewards/margins": 0.2075432538986206, + "rewards/rejected": -0.20756149291992188, + "step": 11581 + }, + { + "epoch": 8.009681881051176, + "grad_norm": 3.8616960048675537, + "learning_rate": 1.1057322883049026e-05, + "log_odds_chosen": 10.877504348754883, + "log_odds_ratio": -4.084506872459315e-05, + "logits/chosen": -0.381029337644577, + "logits/rejected": -0.5061492323875427, + "logps/chosen": -0.00019211815379094332, + "logps/rejected": -1.7320504188537598, + "loss": 0.4776, + "nll_loss": 0.11939746141433716, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9211815015296452e-05, + "rewards/margins": 0.1731858253479004, + "rewards/rejected": -0.17320504784584045, + "step": 11582 + }, + { + "epoch": 8.010373443983402, + "grad_norm": 2.913362979888916, + "learning_rate": 1.1053480866758875e-05, + "log_odds_chosen": 11.55898666381836, + "log_odds_ratio": -1.3246997696114704e-05, + "logits/chosen": 0.10358059406280518, + "logits/rejected": -0.024889543652534485, + "logps/chosen": -0.00011885564163094386, + "logps/rejected": -2.18052339553833, + "loss": 0.3903, + "nll_loss": 0.09757485240697861, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1885564163094386e-05, + "rewards/margins": 0.21804048120975494, + "rewards/rejected": -0.21805237233638763, + "step": 11583 + }, + { + "epoch": 8.01106500691563, + "grad_norm": 4.308811664581299, + "learning_rate": 1.1049638850468727e-05, + "log_odds_chosen": 10.301643371582031, + "log_odds_ratio": -9.870291978586465e-05, + "logits/chosen": -0.1822156012058258, + "logits/rejected": -0.27328962087631226, + "logps/chosen": -0.0004364804772194475, + "logps/rejected": -1.9258673191070557, + "loss": 0.4504, + "nll_loss": 0.11259178072214127, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.364804772194475e-05, + "rewards/margins": 0.19254308938980103, + "rewards/rejected": -0.192586749792099, + "step": 11584 + }, + { + "epoch": 8.011756569847856, + "grad_norm": 4.623989582061768, + "learning_rate": 1.1045796834178578e-05, + "log_odds_chosen": 11.376053810119629, + "log_odds_ratio": -2.161969678127207e-05, + "logits/chosen": -0.04305008053779602, + "logits/rejected": -0.11707738041877747, + "logps/chosen": -0.00014561483112629503, + "logps/rejected": -2.256037950515747, + "loss": 0.6169, + "nll_loss": 0.15423165261745453, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4561482203134801e-05, + "rewards/margins": 0.22558923065662384, + "rewards/rejected": -0.22560378909111023, + "step": 11585 + }, + { + "epoch": 8.012448132780083, + "grad_norm": 2.276597023010254, + "learning_rate": 1.1041954817888429e-05, + "log_odds_chosen": 10.119415283203125, + "log_odds_ratio": -0.0006033480749465525, + "logits/chosen": -0.6786028742790222, + "logits/rejected": -0.6111587285995483, + "logps/chosen": -0.0020572494249790907, + "logps/rejected": -1.9731730222702026, + "loss": 0.2592, + "nll_loss": 0.06475097686052322, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002057249512290582, + "rewards/margins": 0.1971115618944168, + "rewards/rejected": -0.19731728732585907, + "step": 11586 + }, + { + "epoch": 8.01313969571231, + "grad_norm": 4.214800834655762, + "learning_rate": 1.103811280159828e-05, + "log_odds_chosen": 9.672806739807129, + "log_odds_ratio": -0.00034011263051070273, + "logits/chosen": 0.0970701351761818, + "logits/rejected": -0.09030620008707047, + "logps/chosen": -0.0003722485271282494, + "logps/rejected": -1.773955225944519, + "loss": 0.9936, + "nll_loss": 0.24835583567619324, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7224854168016464e-05, + "rewards/margins": 0.17735828459262848, + "rewards/rejected": -0.1773955225944519, + "step": 11587 + }, + { + "epoch": 8.013831258644537, + "grad_norm": 3.8448140621185303, + "learning_rate": 1.103427078530813e-05, + "log_odds_chosen": 12.254363059997559, + "log_odds_ratio": -3.8563590351259336e-05, + "logits/chosen": -0.16270017623901367, + "logits/rejected": -0.21075886487960815, + "logps/chosen": -0.00015512807294726372, + "logps/rejected": -3.3416941165924072, + "loss": 0.5497, + "nll_loss": 0.13743028044700623, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5512807294726372e-05, + "rewards/margins": 0.3341538906097412, + "rewards/rejected": -0.3341693878173828, + "step": 11588 + }, + { + "epoch": 8.014522821576763, + "grad_norm": 3.889132022857666, + "learning_rate": 1.103042876901798e-05, + "log_odds_chosen": 9.956747055053711, + "log_odds_ratio": -0.0002764341770671308, + "logits/chosen": -0.5464695692062378, + "logits/rejected": -0.6339938044548035, + "logps/chosen": -0.00048362798406742513, + "logps/rejected": -1.6153647899627686, + "loss": 0.5563, + "nll_loss": 0.1390453428030014, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8362799134338275e-05, + "rewards/margins": 0.1614881306886673, + "rewards/rejected": -0.16153648495674133, + "step": 11589 + }, + { + "epoch": 8.01521438450899, + "grad_norm": 3.216763496398926, + "learning_rate": 1.1026586752727833e-05, + "log_odds_chosen": 11.178112983703613, + "log_odds_ratio": -3.872377419611439e-05, + "logits/chosen": -0.12826332449913025, + "logits/rejected": -0.20491275191307068, + "logps/chosen": -0.00033394325873814523, + "logps/rejected": -2.4093754291534424, + "loss": 0.5163, + "nll_loss": 0.1290750652551651, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.339432805660181e-05, + "rewards/margins": 0.24090415239334106, + "rewards/rejected": -0.24093753099441528, + "step": 11590 + }, + { + "epoch": 8.015905947441217, + "grad_norm": 3.4779105186462402, + "learning_rate": 1.1022744736437684e-05, + "log_odds_chosen": 9.287093162536621, + "log_odds_ratio": -0.00034964943188242614, + "logits/chosen": -0.42299336194992065, + "logits/rejected": -0.4615306258201599, + "logps/chosen": -0.0003623334923759103, + "logps/rejected": -1.330190658569336, + "loss": 0.3426, + "nll_loss": 0.08560502529144287, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6233352147974074e-05, + "rewards/margins": 0.13298282027244568, + "rewards/rejected": -0.13301905989646912, + "step": 11591 + }, + { + "epoch": 8.016597510373444, + "grad_norm": 3.7570581436157227, + "learning_rate": 1.1018902720147533e-05, + "log_odds_chosen": 10.141767501831055, + "log_odds_ratio": -0.0001211938142660074, + "logits/chosen": -0.6092631816864014, + "logits/rejected": -0.642663300037384, + "logps/chosen": -0.0007956069894134998, + "logps/rejected": -1.4864659309387207, + "loss": 0.3201, + "nll_loss": 0.08002170920372009, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.956070476211607e-05, + "rewards/margins": 0.1485670506954193, + "rewards/rejected": -0.14864660799503326, + "step": 11592 + }, + { + "epoch": 8.01728907330567, + "grad_norm": 4.438507556915283, + "learning_rate": 1.1015060703857385e-05, + "log_odds_chosen": 11.716581344604492, + "log_odds_ratio": -4.336609708843753e-05, + "logits/chosen": -0.5998443365097046, + "logits/rejected": -0.6375141739845276, + "logps/chosen": -0.00041599958785809577, + "logps/rejected": -2.9002466201782227, + "loss": 0.4021, + "nll_loss": 0.10051152110099792, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.15999602410011e-05, + "rewards/margins": 0.2899830639362335, + "rewards/rejected": -0.29002466797828674, + "step": 11593 + }, + { + "epoch": 8.017980636237898, + "grad_norm": 5.6682000160217285, + "learning_rate": 1.1011218687567236e-05, + "log_odds_chosen": 10.26571273803711, + "log_odds_ratio": -0.00014328473480418324, + "logits/chosen": -0.3404668867588043, + "logits/rejected": -0.18539004027843475, + "logps/chosen": -0.00023215243709273636, + "logps/rejected": -1.9225413799285889, + "loss": 0.5967, + "nll_loss": 0.14915236830711365, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3215243345475756e-05, + "rewards/margins": 0.1922309398651123, + "rewards/rejected": -0.19225415587425232, + "step": 11594 + }, + { + "epoch": 8.018672199170124, + "grad_norm": 5.593873977661133, + "learning_rate": 1.1007376671277087e-05, + "log_odds_chosen": 9.951410293579102, + "log_odds_ratio": -9.344021964352578e-05, + "logits/chosen": -0.2194986343383789, + "logits/rejected": -0.21711423993110657, + "logps/chosen": -0.00020417847554199398, + "logps/rejected": -1.3823903799057007, + "loss": 0.4061, + "nll_loss": 0.1015174463391304, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.041784864559304e-05, + "rewards/margins": 0.13821862637996674, + "rewards/rejected": -0.1382390409708023, + "step": 11595 + }, + { + "epoch": 8.019363762102351, + "grad_norm": 7.501003742218018, + "learning_rate": 1.1003534654986938e-05, + "log_odds_chosen": 10.992705345153809, + "log_odds_ratio": -5.0389258831273764e-05, + "logits/chosen": -0.22574585676193237, + "logits/rejected": -0.3487429618835449, + "logps/chosen": -0.000133042354718782, + "logps/rejected": -2.1215598583221436, + "loss": 0.2911, + "nll_loss": 0.07276320457458496, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3304234016686678e-05, + "rewards/margins": 0.212142676115036, + "rewards/rejected": -0.21215596795082092, + "step": 11596 + }, + { + "epoch": 8.020055325034578, + "grad_norm": 5.872107982635498, + "learning_rate": 1.0999692638696788e-05, + "log_odds_chosen": 11.554754257202148, + "log_odds_ratio": -4.173180786892772e-05, + "logits/chosen": -0.061662398278713226, + "logits/rejected": -0.3210103213787079, + "logps/chosen": -0.0003299083618912846, + "logps/rejected": -2.537262439727783, + "loss": 0.5086, + "nll_loss": 0.12715497612953186, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.299083618912846e-05, + "rewards/margins": 0.25369325280189514, + "rewards/rejected": -0.25372621417045593, + "step": 11597 + }, + { + "epoch": 8.020746887966805, + "grad_norm": 6.822277069091797, + "learning_rate": 1.099585062240664e-05, + "log_odds_chosen": 11.166653633117676, + "log_odds_ratio": -5.960985799902119e-05, + "logits/chosen": -0.6631227135658264, + "logits/rejected": -0.6951757669448853, + "logps/chosen": -0.00016014205175451934, + "logps/rejected": -2.3508334159851074, + "loss": 0.2911, + "nll_loss": 0.07277705520391464, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6014204447856173e-05, + "rewards/margins": 0.23506733775138855, + "rewards/rejected": -0.23508334159851074, + "step": 11598 + }, + { + "epoch": 8.021438450899032, + "grad_norm": 4.478630065917969, + "learning_rate": 1.099200860611649e-05, + "log_odds_chosen": 10.090757369995117, + "log_odds_ratio": -0.0001920961367432028, + "logits/chosen": -0.14601203799247742, + "logits/rejected": -0.1455862820148468, + "logps/chosen": -0.0005073798238299787, + "logps/rejected": -1.910022258758545, + "loss": 0.375, + "nll_loss": 0.09372103214263916, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0737984565785155e-05, + "rewards/margins": 0.19095146656036377, + "rewards/rejected": -0.19100221991539001, + "step": 11599 + }, + { + "epoch": 8.022130013831259, + "grad_norm": 4.48646879196167, + "learning_rate": 1.0988166589826342e-05, + "log_odds_chosen": 10.446651458740234, + "log_odds_ratio": -0.0001280968717765063, + "logits/chosen": -0.07451249659061432, + "logits/rejected": -0.23958072066307068, + "logps/chosen": -0.0002140738070011139, + "logps/rejected": -1.84334397315979, + "loss": 0.47, + "nll_loss": 0.11749942600727081, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.140738070011139e-05, + "rewards/margins": 0.18431299924850464, + "rewards/rejected": -0.184334397315979, + "step": 11600 + }, + { + "epoch": 8.022821576763485, + "grad_norm": 4.529657363891602, + "learning_rate": 1.0984324573536191e-05, + "log_odds_chosen": 10.769058227539062, + "log_odds_ratio": -0.00019904434157069772, + "logits/chosen": -0.4270152747631073, + "logits/rejected": -0.3482168912887573, + "logps/chosen": -0.0003587648388929665, + "logps/rejected": -1.9207621812820435, + "loss": 0.4951, + "nll_loss": 0.12376239895820618, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.587648461689241e-05, + "rewards/margins": 0.1920403391122818, + "rewards/rejected": -0.19207622110843658, + "step": 11601 + }, + { + "epoch": 8.023513139695712, + "grad_norm": 3.555155038833618, + "learning_rate": 1.0980482557246042e-05, + "log_odds_chosen": 11.555574417114258, + "log_odds_ratio": -2.083612525893841e-05, + "logits/chosen": -0.5681159496307373, + "logits/rejected": -0.6570307612419128, + "logps/chosen": -9.780601249076426e-05, + "logps/rejected": -2.4376511573791504, + "loss": 0.3922, + "nll_loss": 0.09804081916809082, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.780602340470068e-06, + "rewards/margins": 0.24375534057617188, + "rewards/rejected": -0.24376511573791504, + "step": 11602 + }, + { + "epoch": 8.024204702627939, + "grad_norm": 4.330643177032471, + "learning_rate": 1.0976640540955895e-05, + "log_odds_chosen": 9.720133781433105, + "log_odds_ratio": -0.00024881906574591994, + "logits/chosen": -0.27257040143013, + "logits/rejected": -0.4565042555332184, + "logps/chosen": -0.00029421134968288243, + "logps/rejected": -1.327944040298462, + "loss": 0.5498, + "nll_loss": 0.13742215931415558, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9421134968288243e-05, + "rewards/margins": 0.13276498019695282, + "rewards/rejected": -0.13279439508914948, + "step": 11603 + }, + { + "epoch": 8.024896265560166, + "grad_norm": 3.577317476272583, + "learning_rate": 1.0972798524665745e-05, + "log_odds_chosen": 12.49842643737793, + "log_odds_ratio": -9.71730059973197e-06, + "logits/chosen": -0.5571168065071106, + "logits/rejected": -0.5459440350532532, + "logps/chosen": -0.00013112853048369288, + "logps/rejected": -3.0554847717285156, + "loss": 0.5315, + "nll_loss": 0.132865309715271, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3112854503560811e-05, + "rewards/margins": 0.30553534626960754, + "rewards/rejected": -0.30554845929145813, + "step": 11604 + }, + { + "epoch": 8.025587828492393, + "grad_norm": 3.6991682052612305, + "learning_rate": 1.0968956508375596e-05, + "log_odds_chosen": 10.687396049499512, + "log_odds_ratio": -6.707415741402656e-05, + "logits/chosen": -0.47288778424263, + "logits/rejected": -0.5687136054039001, + "logps/chosen": -0.0001317668065894395, + "logps/rejected": -1.7559115886688232, + "loss": 0.4231, + "nll_loss": 0.10575664043426514, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3176681022741832e-05, + "rewards/margins": 0.17557796835899353, + "rewards/rejected": -0.1755911409854889, + "step": 11605 + }, + { + "epoch": 8.02627939142462, + "grad_norm": 3.841230630874634, + "learning_rate": 1.0965114492085447e-05, + "log_odds_chosen": 11.022579193115234, + "log_odds_ratio": -9.507785580353811e-05, + "logits/chosen": -0.3446550965309143, + "logits/rejected": -0.4020662009716034, + "logps/chosen": -0.000423995777964592, + "logps/rejected": -2.345393657684326, + "loss": 0.4796, + "nll_loss": 0.11990146338939667, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2399580706842244e-05, + "rewards/margins": 0.23449698090553284, + "rewards/rejected": -0.23453938961029053, + "step": 11606 + }, + { + "epoch": 8.026970954356846, + "grad_norm": 3.552696704864502, + "learning_rate": 1.0961272475795298e-05, + "log_odds_chosen": 9.273900032043457, + "log_odds_ratio": -0.0002788471174426377, + "logits/chosen": -0.521811306476593, + "logits/rejected": -0.546200156211853, + "logps/chosen": -0.00033644909854047, + "logps/rejected": -1.2243123054504395, + "loss": 0.3082, + "nll_loss": 0.07701878994703293, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.364490476087667e-05, + "rewards/margins": 0.12239758670330048, + "rewards/rejected": -0.12243123352527618, + "step": 11607 + }, + { + "epoch": 8.027662517289073, + "grad_norm": 5.155054569244385, + "learning_rate": 1.0957430459505148e-05, + "log_odds_chosen": 10.779470443725586, + "log_odds_ratio": -3.2731564715504646e-05, + "logits/chosen": -0.2122465968132019, + "logits/rejected": -0.29279085993766785, + "logps/chosen": -0.00015573952987324446, + "logps/rejected": -1.9129401445388794, + "loss": 0.4411, + "nll_loss": 0.11027791351079941, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.557395444251597e-05, + "rewards/margins": 0.19127842783927917, + "rewards/rejected": -0.19129401445388794, + "step": 11608 + }, + { + "epoch": 8.0283540802213, + "grad_norm": 3.937246799468994, + "learning_rate": 1.0953588443215e-05, + "log_odds_chosen": 11.833447456359863, + "log_odds_ratio": -9.855545067694038e-05, + "logits/chosen": -0.40071818232536316, + "logits/rejected": -0.5153113007545471, + "logps/chosen": -0.00011262780026299879, + "logps/rejected": -2.7883224487304688, + "loss": 0.4929, + "nll_loss": 0.12320946156978607, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.126278039009776e-05, + "rewards/margins": 0.2788209617137909, + "rewards/rejected": -0.27883222699165344, + "step": 11609 + }, + { + "epoch": 8.029045643153527, + "grad_norm": 2.6097638607025146, + "learning_rate": 1.094974642692485e-05, + "log_odds_chosen": 10.482734680175781, + "log_odds_ratio": -0.0008700615144334733, + "logits/chosen": -0.10049141943454742, + "logits/rejected": -0.0984073355793953, + "logps/chosen": -0.0010803381446748972, + "logps/rejected": -2.508087396621704, + "loss": 0.2925, + "nll_loss": 0.0730450376868248, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010803381883306429, + "rewards/margins": 0.2507007122039795, + "rewards/rejected": -0.2508087754249573, + "step": 11610 + }, + { + "epoch": 8.029737206085754, + "grad_norm": 2.400596857070923, + "learning_rate": 1.09459044106347e-05, + "log_odds_chosen": 10.24705982208252, + "log_odds_ratio": -8.93109681783244e-05, + "logits/chosen": -0.3052368462085724, + "logits/rejected": -0.37610214948654175, + "logps/chosen": -0.0007598986267112195, + "logps/rejected": -1.868806004524231, + "loss": 0.271, + "nll_loss": 0.06773454695940018, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.598986121593043e-05, + "rewards/margins": 0.1868046224117279, + "rewards/rejected": -0.18688060343265533, + "step": 11611 + }, + { + "epoch": 8.03042876901798, + "grad_norm": 4.1927971839904785, + "learning_rate": 1.0942062394344553e-05, + "log_odds_chosen": 11.035076141357422, + "log_odds_ratio": -4.5136461267247796e-05, + "logits/chosen": -0.28807422518730164, + "logits/rejected": -0.36113497614860535, + "logps/chosen": -0.00015464363968931139, + "logps/rejected": -2.043996810913086, + "loss": 0.5135, + "nll_loss": 0.12838131189346313, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5464365787920542e-05, + "rewards/margins": 0.2043842226266861, + "rewards/rejected": -0.20439967513084412, + "step": 11612 + }, + { + "epoch": 8.031120331950207, + "grad_norm": 4.350894451141357, + "learning_rate": 1.0938220378054404e-05, + "log_odds_chosen": 9.81594467163086, + "log_odds_ratio": -0.04112391918897629, + "logits/chosen": -0.26520606875419617, + "logits/rejected": -0.19644969701766968, + "logps/chosen": -0.007886898703873158, + "logps/rejected": -2.090251922607422, + "loss": 0.422, + "nll_loss": 0.10137651860713959, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000788689823821187, + "rewards/margins": 0.20823650062084198, + "rewards/rejected": -0.20902520418167114, + "step": 11613 + }, + { + "epoch": 8.031811894882434, + "grad_norm": 9.433053970336914, + "learning_rate": 1.0934378361764255e-05, + "log_odds_chosen": 10.647979736328125, + "log_odds_ratio": -8.212029933929443e-05, + "logits/chosen": -0.6354914903640747, + "logits/rejected": -0.6734293103218079, + "logps/chosen": -0.00031259743263944983, + "logps/rejected": -2.2240993976593018, + "loss": 0.4741, + "nll_loss": 0.11851217597723007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.125974035356194e-05, + "rewards/margins": 0.222378671169281, + "rewards/rejected": -0.2224099338054657, + "step": 11614 + }, + { + "epoch": 8.032503457814661, + "grad_norm": 3.092810869216919, + "learning_rate": 1.0930536345474105e-05, + "log_odds_chosen": 11.151169776916504, + "log_odds_ratio": -0.0006351221818476915, + "logits/chosen": -0.5462251901626587, + "logits/rejected": -0.5252736806869507, + "logps/chosen": -0.006568577140569687, + "logps/rejected": -3.098931312561035, + "loss": 0.6262, + "nll_loss": 0.1564791053533554, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000656857795547694, + "rewards/margins": 0.3092362582683563, + "rewards/rejected": -0.3098931312561035, + "step": 11615 + }, + { + "epoch": 8.033195020746888, + "grad_norm": 2.6533544063568115, + "learning_rate": 1.0926694329183956e-05, + "log_odds_chosen": 9.996108055114746, + "log_odds_ratio": -0.00024949904764071107, + "logits/chosen": -0.19736546277999878, + "logits/rejected": -0.3198302388191223, + "logps/chosen": -0.00030988510116003454, + "logps/rejected": -1.627938985824585, + "loss": 0.262, + "nll_loss": 0.06546853482723236, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.098850720562041e-05, + "rewards/margins": 0.16276292502880096, + "rewards/rejected": -0.16279390454292297, + "step": 11616 + }, + { + "epoch": 8.033886583679115, + "grad_norm": 4.698807716369629, + "learning_rate": 1.0922852312893807e-05, + "log_odds_chosen": 11.338987350463867, + "log_odds_ratio": -0.00014085850853007287, + "logits/chosen": -0.3571838438510895, + "logits/rejected": -0.34271275997161865, + "logps/chosen": -0.0004037006292492151, + "logps/rejected": -2.928548574447632, + "loss": 0.5348, + "nll_loss": 0.1336820274591446, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0370068745687604e-05, + "rewards/margins": 0.2928144931793213, + "rewards/rejected": -0.2928548753261566, + "step": 11617 + }, + { + "epoch": 8.034578146611342, + "grad_norm": 4.942789077758789, + "learning_rate": 1.091901029660366e-05, + "log_odds_chosen": 11.588337898254395, + "log_odds_ratio": -3.798071702476591e-05, + "logits/chosen": -0.36027687788009644, + "logits/rejected": -0.5589436292648315, + "logps/chosen": -0.00031201704405248165, + "logps/rejected": -2.9800496101379395, + "loss": 0.4361, + "nll_loss": 0.1090322956442833, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.12017000396736e-05, + "rewards/margins": 0.29797375202178955, + "rewards/rejected": -0.29800495505332947, + "step": 11618 + }, + { + "epoch": 8.035269709543568, + "grad_norm": 4.74745512008667, + "learning_rate": 1.0915168280313508e-05, + "log_odds_chosen": 11.68956184387207, + "log_odds_ratio": -3.099131572525948e-05, + "logits/chosen": -0.10821807384490967, + "logits/rejected": -0.2176653891801834, + "logps/chosen": -0.0001303794706473127, + "logps/rejected": -2.342233657836914, + "loss": 0.3688, + "nll_loss": 0.09219465404748917, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3037947610428091e-05, + "rewards/margins": 0.23421034216880798, + "rewards/rejected": -0.2342233657836914, + "step": 11619 + }, + { + "epoch": 8.035961272475795, + "grad_norm": 4.976463317871094, + "learning_rate": 1.0911326264023359e-05, + "log_odds_chosen": 10.253108024597168, + "log_odds_ratio": -0.0005621587042696774, + "logits/chosen": -0.11551451683044434, + "logits/rejected": -0.1854763776063919, + "logps/chosen": -0.0005382780218496919, + "logps/rejected": -1.9607380628585815, + "loss": 0.3469, + "nll_loss": 0.08666163682937622, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.382780364016071e-05, + "rewards/margins": 0.1960199624300003, + "rewards/rejected": -0.19607380032539368, + "step": 11620 + }, + { + "epoch": 8.036652835408022, + "grad_norm": 4.694047451019287, + "learning_rate": 1.0907484247733212e-05, + "log_odds_chosen": 10.585967063903809, + "log_odds_ratio": -0.0002062514831777662, + "logits/chosen": -0.21767069399356842, + "logits/rejected": -0.3120375871658325, + "logps/chosen": -0.00042118871351704, + "logps/rejected": -2.0939784049987793, + "loss": 0.3806, + "nll_loss": 0.0951206386089325, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.211887426208705e-05, + "rewards/margins": 0.20935575664043427, + "rewards/rejected": -0.20939785242080688, + "step": 11621 + }, + { + "epoch": 8.037344398340249, + "grad_norm": 3.249535083770752, + "learning_rate": 1.0903642231443062e-05, + "log_odds_chosen": 11.063423156738281, + "log_odds_ratio": -9.601256169844419e-05, + "logits/chosen": 0.0025239139795303345, + "logits/rejected": 0.01908857375383377, + "logps/chosen": -0.00028602141537703574, + "logps/rejected": -2.2366976737976074, + "loss": 0.5118, + "nll_loss": 0.12793602049350739, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8602142265299335e-05, + "rewards/margins": 0.22364118695259094, + "rewards/rejected": -0.22366978228092194, + "step": 11622 + }, + { + "epoch": 8.038035961272476, + "grad_norm": 4.789508819580078, + "learning_rate": 1.0899800215152913e-05, + "log_odds_chosen": 10.925281524658203, + "log_odds_ratio": -0.00011090299813076854, + "logits/chosen": 0.2372182309627533, + "logits/rejected": 0.08504534512758255, + "logps/chosen": -0.00030619502649642527, + "logps/rejected": -2.3250906467437744, + "loss": 0.5231, + "nll_loss": 0.13077004253864288, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.061950337723829e-05, + "rewards/margins": 0.23247846961021423, + "rewards/rejected": -0.2325090765953064, + "step": 11623 + }, + { + "epoch": 8.038727524204702, + "grad_norm": 3.4549219608306885, + "learning_rate": 1.0895958198862764e-05, + "log_odds_chosen": 11.175882339477539, + "log_odds_ratio": -3.460890002315864e-05, + "logits/chosen": 0.020357206463813782, + "logits/rejected": -0.09108800441026688, + "logps/chosen": -0.0002967551117762923, + "logps/rejected": -2.847583770751953, + "loss": 0.3917, + "nll_loss": 0.09792071580886841, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9675511541427113e-05, + "rewards/margins": 0.2847287058830261, + "rewards/rejected": -0.2847583591938019, + "step": 11624 + }, + { + "epoch": 8.03941908713693, + "grad_norm": 9.085065841674805, + "learning_rate": 1.0892116182572615e-05, + "log_odds_chosen": 11.052441596984863, + "log_odds_ratio": -3.7479036109289154e-05, + "logits/chosen": -0.21325267851352692, + "logits/rejected": -0.2510700225830078, + "logps/chosen": -0.00045145026524551213, + "logps/rejected": -2.528874635696411, + "loss": 0.3844, + "nll_loss": 0.09608972817659378, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.51450287073385e-05, + "rewards/margins": 0.25284233689308167, + "rewards/rejected": -0.25288745760917664, + "step": 11625 + }, + { + "epoch": 8.040110650069156, + "grad_norm": 7.780580997467041, + "learning_rate": 1.0888274166282465e-05, + "log_odds_chosen": 11.271392822265625, + "log_odds_ratio": -1.960910958587192e-05, + "logits/chosen": -0.30678537487983704, + "logits/rejected": -0.32438284158706665, + "logps/chosen": -0.00010147874127142131, + "logps/rejected": -1.9354403018951416, + "loss": 0.3865, + "nll_loss": 0.09662692248821259, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0147874490940012e-05, + "rewards/margins": 0.19353388249874115, + "rewards/rejected": -0.19354403018951416, + "step": 11626 + }, + { + "epoch": 8.040802213001383, + "grad_norm": 2.9626879692077637, + "learning_rate": 1.0884432149992318e-05, + "log_odds_chosen": 10.288896560668945, + "log_odds_ratio": -0.00017442651733290404, + "logits/chosen": 0.11750302463769913, + "logits/rejected": 0.08454018831253052, + "logps/chosen": -0.00024051466607488692, + "logps/rejected": -1.6254515647888184, + "loss": 0.2825, + "nll_loss": 0.07061274349689484, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4051467335084453e-05, + "rewards/margins": 0.1625211089849472, + "rewards/rejected": -0.16254517436027527, + "step": 11627 + }, + { + "epoch": 8.04149377593361, + "grad_norm": 3.7291109561920166, + "learning_rate": 1.0880590133702168e-05, + "log_odds_chosen": 10.420429229736328, + "log_odds_ratio": -7.376746361842379e-05, + "logits/chosen": 0.1731017827987671, + "logits/rejected": 0.1459406614303589, + "logps/chosen": -0.0007401591865345836, + "logps/rejected": -1.845339059829712, + "loss": 0.4723, + "nll_loss": 0.11805613338947296, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.401593029499054e-05, + "rewards/margins": 0.1844598799943924, + "rewards/rejected": -0.18453389406204224, + "step": 11628 + }, + { + "epoch": 8.042185338865837, + "grad_norm": 4.236103534698486, + "learning_rate": 1.0876748117412018e-05, + "log_odds_chosen": 10.347618103027344, + "log_odds_ratio": -7.569259469164535e-05, + "logits/chosen": 0.058949798345565796, + "logits/rejected": 0.04264757037162781, + "logps/chosen": -0.0009270127629861236, + "logps/rejected": -2.4099678993225098, + "loss": 0.5477, + "nll_loss": 0.13691957294940948, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.270127338822931e-05, + "rewards/margins": 0.2409040927886963, + "rewards/rejected": -0.24099679291248322, + "step": 11629 + }, + { + "epoch": 8.042876901798063, + "grad_norm": 4.982583045959473, + "learning_rate": 1.087290610112187e-05, + "log_odds_chosen": 11.669830322265625, + "log_odds_ratio": -2.4999659217428416e-05, + "logits/chosen": -0.3383077085018158, + "logits/rejected": -0.3936065435409546, + "logps/chosen": -0.0004458319745026529, + "logps/rejected": -3.0715909004211426, + "loss": 0.5125, + "nll_loss": 0.12813270092010498, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4583201088244095e-05, + "rewards/margins": 0.30711451172828674, + "rewards/rejected": -0.3071591258049011, + "step": 11630 + }, + { + "epoch": 8.04356846473029, + "grad_norm": 4.099587440490723, + "learning_rate": 1.086906408483172e-05, + "log_odds_chosen": 10.979185104370117, + "log_odds_ratio": -0.000795874570030719, + "logits/chosen": -0.07905671745538712, + "logits/rejected": -0.26628753542900085, + "logps/chosen": -0.0004789835074916482, + "logps/rejected": -2.364879846572876, + "loss": 0.3142, + "nll_loss": 0.07848026603460312, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.78983492939733e-05, + "rewards/margins": 0.23644009232521057, + "rewards/rejected": -0.2364879995584488, + "step": 11631 + }, + { + "epoch": 8.044260027662517, + "grad_norm": 7.567932605743408, + "learning_rate": 1.0865222068541571e-05, + "log_odds_chosen": 11.234302520751953, + "log_odds_ratio": -2.353546369704418e-05, + "logits/chosen": -0.24551396071910858, + "logits/rejected": -0.1941598355770111, + "logps/chosen": -0.00015664119564462453, + "logps/rejected": -2.4318954944610596, + "loss": 0.4114, + "nll_loss": 0.10283748060464859, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5664119928260334e-05, + "rewards/margins": 0.24317388236522675, + "rewards/rejected": -0.24318955838680267, + "step": 11632 + }, + { + "epoch": 8.044951590594744, + "grad_norm": 4.48465633392334, + "learning_rate": 1.0861380052251422e-05, + "log_odds_chosen": 11.189987182617188, + "log_odds_ratio": -6.808263424318284e-05, + "logits/chosen": -0.851357638835907, + "logits/rejected": -0.8038666248321533, + "logps/chosen": -0.0001597387745277956, + "logps/rejected": -2.1322896480560303, + "loss": 0.419, + "nll_loss": 0.10473240911960602, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5973879271768965e-05, + "rewards/margins": 0.2132129967212677, + "rewards/rejected": -0.2132289707660675, + "step": 11633 + }, + { + "epoch": 8.04564315352697, + "grad_norm": 3.913274049758911, + "learning_rate": 1.0857538035961273e-05, + "log_odds_chosen": 12.231964111328125, + "log_odds_ratio": -1.1904116945515852e-05, + "logits/chosen": -0.18402579426765442, + "logits/rejected": -0.18470388650894165, + "logps/chosen": -0.00018358533270657063, + "logps/rejected": -2.8793082237243652, + "loss": 0.3135, + "nll_loss": 0.07838185131549835, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8358534362050705e-05, + "rewards/margins": 0.2879124879837036, + "rewards/rejected": -0.28793084621429443, + "step": 11634 + }, + { + "epoch": 8.046334716459198, + "grad_norm": 7.306576728820801, + "learning_rate": 1.0853696019671124e-05, + "log_odds_chosen": 11.634334564208984, + "log_odds_ratio": -1.9609975424828008e-05, + "logits/chosen": -0.11489503085613251, + "logits/rejected": -0.18257029354572296, + "logps/chosen": -0.0002044460125034675, + "logps/rejected": -2.74857234954834, + "loss": 0.5027, + "nll_loss": 0.125673308968544, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.044460052275099e-05, + "rewards/margins": 0.27483680844306946, + "rewards/rejected": -0.2748572528362274, + "step": 11635 + }, + { + "epoch": 8.047026279391424, + "grad_norm": 7.451847076416016, + "learning_rate": 1.0849854003380974e-05, + "log_odds_chosen": 11.932010650634766, + "log_odds_ratio": -5.420656452770345e-05, + "logits/chosen": -0.017487986013293266, + "logits/rejected": -0.21278372406959534, + "logps/chosen": -0.00030589461675845087, + "logps/rejected": -3.496171474456787, + "loss": 0.6613, + "nll_loss": 0.1653130054473877, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.058946458622813e-05, + "rewards/margins": 0.349586546421051, + "rewards/rejected": -0.3496171236038208, + "step": 11636 + }, + { + "epoch": 8.047717842323651, + "grad_norm": 4.815349102020264, + "learning_rate": 1.0846011987090827e-05, + "log_odds_chosen": 9.847448348999023, + "log_odds_ratio": -0.0005425678100436926, + "logits/chosen": -0.18823321163654327, + "logits/rejected": -0.3128855526447296, + "logps/chosen": -0.0012195882154628634, + "logps/rejected": -2.4391961097717285, + "loss": 0.5404, + "nll_loss": 0.13504886627197266, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012195881572552025, + "rewards/margins": 0.2437976449728012, + "rewards/rejected": -0.24391961097717285, + "step": 11637 + }, + { + "epoch": 8.048409405255878, + "grad_norm": 3.5785138607025146, + "learning_rate": 1.0842169970800676e-05, + "log_odds_chosen": 10.383599281311035, + "log_odds_ratio": -0.00015447475016117096, + "logits/chosen": -0.44339680671691895, + "logits/rejected": -0.4116237759590149, + "logps/chosen": -0.00036477501271292567, + "logps/rejected": -1.862234354019165, + "loss": 0.4705, + "nll_loss": 0.11760586500167847, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.647750418167561e-05, + "rewards/margins": 0.18618696928024292, + "rewards/rejected": -0.18622344732284546, + "step": 11638 + }, + { + "epoch": 8.049100968188105, + "grad_norm": 4.362621784210205, + "learning_rate": 1.0838327954510527e-05, + "log_odds_chosen": 10.884719848632812, + "log_odds_ratio": -5.54381767869927e-05, + "logits/chosen": -0.3651423156261444, + "logits/rejected": -0.37535524368286133, + "logps/chosen": -0.0001369999663438648, + "logps/rejected": -1.898221731185913, + "loss": 0.54, + "nll_loss": 0.13500377535820007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3699996088689659e-05, + "rewards/margins": 0.18980847299098969, + "rewards/rejected": -0.18982218205928802, + "step": 11639 + }, + { + "epoch": 8.049792531120332, + "grad_norm": 3.383798837661743, + "learning_rate": 1.0834485938220379e-05, + "log_odds_chosen": 11.25387191772461, + "log_odds_ratio": -8.179190626833588e-05, + "logits/chosen": -0.4336949586868286, + "logits/rejected": -0.5522239804267883, + "logps/chosen": -0.00037895014975219965, + "logps/rejected": -2.568528652191162, + "loss": 0.2993, + "nll_loss": 0.07481571286916733, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.789501715800725e-05, + "rewards/margins": 0.25681498646736145, + "rewards/rejected": -0.2568528652191162, + "step": 11640 + }, + { + "epoch": 8.050484094052559, + "grad_norm": 2.910865306854248, + "learning_rate": 1.083064392193023e-05, + "log_odds_chosen": 9.588981628417969, + "log_odds_ratio": -0.0007639298564754426, + "logits/chosen": 0.09794837981462479, + "logits/rejected": -0.042546890676021576, + "logps/chosen": -0.0018523625330999494, + "logps/rejected": -1.5158963203430176, + "loss": 0.2927, + "nll_loss": 0.07309460639953613, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018523624748922884, + "rewards/margins": 0.15140441060066223, + "rewards/rejected": -0.15158963203430176, + "step": 11641 + }, + { + "epoch": 8.051175656984785, + "grad_norm": 2.5796313285827637, + "learning_rate": 1.082680190564008e-05, + "log_odds_chosen": 9.901168823242188, + "log_odds_ratio": -9.957759903045371e-05, + "logits/chosen": -0.4514090418815613, + "logits/rejected": -0.5148862600326538, + "logps/chosen": -0.00025001997710205615, + "logps/rejected": -1.448334813117981, + "loss": 0.2457, + "nll_loss": 0.06141233444213867, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5001998437801376e-05, + "rewards/margins": 0.14480847120285034, + "rewards/rejected": -0.14483347535133362, + "step": 11642 + }, + { + "epoch": 8.051867219917012, + "grad_norm": 3.1253342628479004, + "learning_rate": 1.0822959889349931e-05, + "log_odds_chosen": 9.646997451782227, + "log_odds_ratio": -0.0008379703504033387, + "logits/chosen": -0.213422030210495, + "logits/rejected": -0.14800477027893066, + "logps/chosen": -0.0015627527609467506, + "logps/rejected": -1.773625373840332, + "loss": 0.2836, + "nll_loss": 0.07080502808094025, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015627527318429202, + "rewards/margins": 0.17720626294612885, + "rewards/rejected": -0.17736253142356873, + "step": 11643 + }, + { + "epoch": 8.052558782849239, + "grad_norm": 4.140681266784668, + "learning_rate": 1.0819117873059782e-05, + "log_odds_chosen": 11.40479564666748, + "log_odds_ratio": -0.000390512403100729, + "logits/chosen": -0.0796457827091217, + "logits/rejected": -0.1556369662284851, + "logps/chosen": -0.0013447202509269118, + "logps/rejected": -3.1734018325805664, + "loss": 0.3751, + "nll_loss": 0.09372792392969131, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013447203673422337, + "rewards/margins": 0.3172057271003723, + "rewards/rejected": -0.3173401951789856, + "step": 11644 + }, + { + "epoch": 8.053250345781466, + "grad_norm": 4.3886919021606445, + "learning_rate": 1.0815275856769633e-05, + "log_odds_chosen": 11.26523494720459, + "log_odds_ratio": -4.273475133231841e-05, + "logits/chosen": -0.4076208174228668, + "logits/rejected": -0.5760940909385681, + "logps/chosen": -0.00014619230933021754, + "logps/rejected": -2.109987258911133, + "loss": 0.276, + "nll_loss": 0.06899204105138779, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4619231478718575e-05, + "rewards/margins": 0.21098411083221436, + "rewards/rejected": -0.21099872887134552, + "step": 11645 + }, + { + "epoch": 8.053941908713693, + "grad_norm": 6.297887802124023, + "learning_rate": 1.0811433840479485e-05, + "log_odds_chosen": 11.587589263916016, + "log_odds_ratio": -1.1658419680316001e-05, + "logits/chosen": -0.5171922445297241, + "logits/rejected": -0.5611253976821899, + "logps/chosen": -8.005322160897776e-05, + "logps/rejected": -2.2642323970794678, + "loss": 0.5005, + "nll_loss": 0.12512831389904022, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.005322342796717e-06, + "rewards/margins": 0.2264152467250824, + "rewards/rejected": -0.2264232635498047, + "step": 11646 + }, + { + "epoch": 8.05463347164592, + "grad_norm": 4.215669631958008, + "learning_rate": 1.0807591824189334e-05, + "log_odds_chosen": 11.566927909851074, + "log_odds_ratio": -2.2225858629099093e-05, + "logits/chosen": -0.3866409957408905, + "logits/rejected": -0.3898853659629822, + "logps/chosen": -0.0001798073499230668, + "logps/rejected": -2.6164865493774414, + "loss": 0.3943, + "nll_loss": 0.09856823086738586, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.798073571990244e-05, + "rewards/margins": 0.26163071393966675, + "rewards/rejected": -0.26164865493774414, + "step": 11647 + }, + { + "epoch": 8.055325034578146, + "grad_norm": 5.187557697296143, + "learning_rate": 1.0803749807899185e-05, + "log_odds_chosen": 10.51015567779541, + "log_odds_ratio": -6.061392923584208e-05, + "logits/chosen": -0.3999212682247162, + "logits/rejected": -0.41192543506622314, + "logps/chosen": -0.00026394022279419005, + "logps/rejected": -2.035250663757324, + "loss": 0.4287, + "nll_loss": 0.10716467350721359, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6394021915621124e-05, + "rewards/margins": 0.2034986913204193, + "rewards/rejected": -0.20352506637573242, + "step": 11648 + }, + { + "epoch": 8.056016597510373, + "grad_norm": 4.2619547843933105, + "learning_rate": 1.0799907791609038e-05, + "log_odds_chosen": 11.489641189575195, + "log_odds_ratio": -5.318426701705903e-05, + "logits/chosen": -0.5062676668167114, + "logits/rejected": -0.6034945845603943, + "logps/chosen": -0.00047905201790854335, + "logps/rejected": -2.9666566848754883, + "loss": 0.432, + "nll_loss": 0.108005091547966, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.790520324604586e-05, + "rewards/margins": 0.2966177463531494, + "rewards/rejected": -0.29666566848754883, + "step": 11649 + }, + { + "epoch": 8.0567081604426, + "grad_norm": 3.2921361923217773, + "learning_rate": 1.0796065775318888e-05, + "log_odds_chosen": 10.716009140014648, + "log_odds_ratio": -5.8777186495717615e-05, + "logits/chosen": 0.06899416446685791, + "logits/rejected": 0.008984297513961792, + "logps/chosen": -0.0003209186252206564, + "logps/rejected": -2.0981249809265137, + "loss": 0.3706, + "nll_loss": 0.09265469014644623, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.209186252206564e-05, + "rewards/margins": 0.20978042483329773, + "rewards/rejected": -0.2098124921321869, + "step": 11650 + }, + { + "epoch": 8.057399723374827, + "grad_norm": 18.162288665771484, + "learning_rate": 1.0792223759028739e-05, + "log_odds_chosen": 11.46948528289795, + "log_odds_ratio": -0.0001158723680418916, + "logits/chosen": -0.14668020606040955, + "logits/rejected": -0.4037422835826874, + "logps/chosen": -0.0009772483026608825, + "logps/rejected": -2.8705010414123535, + "loss": 0.4352, + "nll_loss": 0.10878217220306396, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.772483463166282e-05, + "rewards/margins": 0.2869523763656616, + "rewards/rejected": -0.2870500981807709, + "step": 11651 + }, + { + "epoch": 8.058091286307054, + "grad_norm": 9.27110767364502, + "learning_rate": 1.078838174273859e-05, + "log_odds_chosen": 11.206290245056152, + "log_odds_ratio": -2.393356589891482e-05, + "logits/chosen": 0.10491567105054855, + "logits/rejected": 0.07313147187232971, + "logps/chosen": -0.00015125813661143184, + "logps/rejected": -2.252528190612793, + "loss": 0.6418, + "nll_loss": 0.16045981645584106, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5125813661143184e-05, + "rewards/margins": 0.22523772716522217, + "rewards/rejected": -0.22525283694267273, + "step": 11652 + }, + { + "epoch": 8.05878284923928, + "grad_norm": 7.424502849578857, + "learning_rate": 1.078453972644844e-05, + "log_odds_chosen": 10.65821361541748, + "log_odds_ratio": -0.00013696661335416138, + "logits/chosen": -0.32704007625579834, + "logits/rejected": -0.2945024371147156, + "logps/chosen": -0.00035628239857032895, + "logps/rejected": -2.109651565551758, + "loss": 0.4554, + "nll_loss": 0.11382496356964111, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5628239857032895e-05, + "rewards/margins": 0.2109295278787613, + "rewards/rejected": -0.21096515655517578, + "step": 11653 + }, + { + "epoch": 8.059474412171507, + "grad_norm": 3.443453073501587, + "learning_rate": 1.0780697710158291e-05, + "log_odds_chosen": 10.71337890625, + "log_odds_ratio": -5.269534085527994e-05, + "logits/chosen": -0.31750330328941345, + "logits/rejected": -0.41674119234085083, + "logps/chosen": -0.00018927460769191384, + "logps/rejected": -2.038935661315918, + "loss": 0.4133, + "nll_loss": 0.10332353413105011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8927461496787146e-05, + "rewards/margins": 0.2038746178150177, + "rewards/rejected": -0.20389357209205627, + "step": 11654 + }, + { + "epoch": 8.060165975103734, + "grad_norm": 5.964850902557373, + "learning_rate": 1.0776855693868144e-05, + "log_odds_chosen": 11.15042495727539, + "log_odds_ratio": -0.0005366685218177736, + "logits/chosen": -0.2146221250295639, + "logits/rejected": -0.2757033109664917, + "logps/chosen": -0.0006667596171610057, + "logps/rejected": -2.569218397140503, + "loss": 0.4874, + "nll_loss": 0.1217995136976242, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.667595880571753e-05, + "rewards/margins": 0.2568551301956177, + "rewards/rejected": -0.25692182779312134, + "step": 11655 + }, + { + "epoch": 8.060857538035961, + "grad_norm": 5.175530433654785, + "learning_rate": 1.0773013677577993e-05, + "log_odds_chosen": 11.816387176513672, + "log_odds_ratio": -1.1562333384063095e-05, + "logits/chosen": -0.252761572599411, + "logits/rejected": -0.30724823474884033, + "logps/chosen": -0.00015155701839830726, + "logps/rejected": -2.734372138977051, + "loss": 0.4865, + "nll_loss": 0.12162323296070099, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5155701476032846e-05, + "rewards/margins": 0.2734220623970032, + "rewards/rejected": -0.2734372317790985, + "step": 11656 + }, + { + "epoch": 8.061549100968188, + "grad_norm": 4.897181987762451, + "learning_rate": 1.0769171661287844e-05, + "log_odds_chosen": 12.112031936645508, + "log_odds_ratio": -8.372231604880653e-06, + "logits/chosen": -0.08842036128044128, + "logits/rejected": -0.07480307668447495, + "logps/chosen": -0.00013405829668045044, + "logps/rejected": -2.833430767059326, + "loss": 0.3035, + "nll_loss": 0.07588332146406174, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3405830941337626e-05, + "rewards/margins": 0.28332969546318054, + "rewards/rejected": -0.2833430767059326, + "step": 11657 + }, + { + "epoch": 8.062240663900415, + "grad_norm": 3.7266387939453125, + "learning_rate": 1.0765329644997696e-05, + "log_odds_chosen": 11.5302095413208, + "log_odds_ratio": -0.0002347620902583003, + "logits/chosen": -0.3410811126232147, + "logits/rejected": -0.37336307764053345, + "logps/chosen": -0.0015355985378846526, + "logps/rejected": -2.878851890563965, + "loss": 0.3232, + "nll_loss": 0.08078762888908386, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015355987125076354, + "rewards/margins": 0.2877316176891327, + "rewards/rejected": -0.2878851890563965, + "step": 11658 + }, + { + "epoch": 8.062932226832642, + "grad_norm": 7.588277339935303, + "learning_rate": 1.0761487628707547e-05, + "log_odds_chosen": 10.4693603515625, + "log_odds_ratio": -0.0007120502414181828, + "logits/chosen": -0.262089341878891, + "logits/rejected": -0.2863765358924866, + "logps/chosen": -0.0006862751324661076, + "logps/rejected": -2.2005093097686768, + "loss": 0.5006, + "nll_loss": 0.12508998811244965, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.86275161569938e-05, + "rewards/margins": 0.2199822962284088, + "rewards/rejected": -0.22005091607570648, + "step": 11659 + }, + { + "epoch": 8.063623789764868, + "grad_norm": 3.875340223312378, + "learning_rate": 1.0757645612417398e-05, + "log_odds_chosen": 11.094024658203125, + "log_odds_ratio": -0.00012229704589117318, + "logits/chosen": -0.7080512046813965, + "logits/rejected": -0.675679624080658, + "logps/chosen": -0.00026382546639069915, + "logps/rejected": -2.685532331466675, + "loss": 0.3727, + "nll_loss": 0.09317460656166077, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.638254954945296e-05, + "rewards/margins": 0.2685268521308899, + "rewards/rejected": -0.2685532569885254, + "step": 11660 + }, + { + "epoch": 8.064315352697095, + "grad_norm": 5.578856945037842, + "learning_rate": 1.0753803596127248e-05, + "log_odds_chosen": 11.5849609375, + "log_odds_ratio": -4.599348903866485e-05, + "logits/chosen": -0.713058590888977, + "logits/rejected": -0.7514088153839111, + "logps/chosen": -0.00018928774807136506, + "logps/rejected": -2.5326950550079346, + "loss": 0.4545, + "nll_loss": 0.11361332982778549, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8928774807136506e-05, + "rewards/margins": 0.2532505691051483, + "rewards/rejected": -0.2532695233821869, + "step": 11661 + }, + { + "epoch": 8.065006915629322, + "grad_norm": 3.122255325317383, + "learning_rate": 1.0749961579837099e-05, + "log_odds_chosen": 10.60268783569336, + "log_odds_ratio": -0.0003385727177374065, + "logits/chosen": -0.8399189710617065, + "logits/rejected": -0.8972233533859253, + "logps/chosen": -0.0006615786114707589, + "logps/rejected": -1.813633918762207, + "loss": 0.3471, + "nll_loss": 0.08674322068691254, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.615785969188437e-05, + "rewards/margins": 0.18129724264144897, + "rewards/rejected": -0.18136340379714966, + "step": 11662 + }, + { + "epoch": 8.065698478561549, + "grad_norm": 3.3046131134033203, + "learning_rate": 1.074611956354695e-05, + "log_odds_chosen": 11.259008407592773, + "log_odds_ratio": -0.00023068346490617841, + "logits/chosen": -0.5720511674880981, + "logits/rejected": -0.6492300033569336, + "logps/chosen": -0.00022080892813391984, + "logps/rejected": -2.3607053756713867, + "loss": 0.3182, + "nll_loss": 0.07953804731369019, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2080894268583506e-05, + "rewards/margins": 0.23604848980903625, + "rewards/rejected": -0.23607054352760315, + "step": 11663 + }, + { + "epoch": 8.066390041493776, + "grad_norm": 3.43304443359375, + "learning_rate": 1.0742277547256802e-05, + "log_odds_chosen": 10.118446350097656, + "log_odds_ratio": -0.00044802669435739517, + "logits/chosen": -0.23905214667320251, + "logits/rejected": -0.16716815531253815, + "logps/chosen": -0.0006772215710952878, + "logps/rejected": -2.0158321857452393, + "loss": 0.2561, + "nll_loss": 0.06397796422243118, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.772215419914573e-05, + "rewards/margins": 0.2015155404806137, + "rewards/rejected": -0.20158325135707855, + "step": 11664 + }, + { + "epoch": 8.067081604426003, + "grad_norm": 7.999740123748779, + "learning_rate": 1.0738435530966651e-05, + "log_odds_chosen": 11.912637710571289, + "log_odds_ratio": -4.0913459088187665e-05, + "logits/chosen": -0.07696963846683502, + "logits/rejected": -0.15583869814872742, + "logps/chosen": -0.00018690910656005144, + "logps/rejected": -2.847935676574707, + "loss": 1.0301, + "nll_loss": 0.2575136423110962, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.869090920081362e-05, + "rewards/margins": 0.28477486968040466, + "rewards/rejected": -0.28479355573654175, + "step": 11665 + }, + { + "epoch": 8.06777316735823, + "grad_norm": 3.754718065261841, + "learning_rate": 1.0734593514676502e-05, + "log_odds_chosen": 10.807878494262695, + "log_odds_ratio": -0.00012588589743245393, + "logits/chosen": -0.5574550628662109, + "logits/rejected": -0.6791661977767944, + "logps/chosen": -0.0003695531631819904, + "logps/rejected": -2.385138511657715, + "loss": 0.5487, + "nll_loss": 0.1371651589870453, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6955319956177846e-05, + "rewards/margins": 0.23847690224647522, + "rewards/rejected": -0.23851384222507477, + "step": 11666 + }, + { + "epoch": 8.068464730290456, + "grad_norm": 3.555729866027832, + "learning_rate": 1.0730751498386353e-05, + "log_odds_chosen": 11.32563304901123, + "log_odds_ratio": -3.7144818634260446e-05, + "logits/chosen": -0.3752025365829468, + "logits/rejected": -0.4483998417854309, + "logps/chosen": -0.0004869193071499467, + "logps/rejected": -2.7447586059570312, + "loss": 0.4554, + "nll_loss": 0.11384133249521255, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.869193071499467e-05, + "rewards/margins": 0.2744271755218506, + "rewards/rejected": -0.2744758725166321, + "step": 11667 + }, + { + "epoch": 8.069156293222683, + "grad_norm": 3.514678716659546, + "learning_rate": 1.0726909482096205e-05, + "log_odds_chosen": 9.528676986694336, + "log_odds_ratio": -0.0005053476197645068, + "logits/chosen": -0.4819263219833374, + "logits/rejected": -0.46735963225364685, + "logps/chosen": -0.0002845790295396, + "logps/rejected": -1.1177395582199097, + "loss": 0.3643, + "nll_loss": 0.09101204574108124, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8457903681555763e-05, + "rewards/margins": 0.11174549907445908, + "rewards/rejected": -0.11177396774291992, + "step": 11668 + }, + { + "epoch": 8.06984785615491, + "grad_norm": 11.719605445861816, + "learning_rate": 1.0723067465806056e-05, + "log_odds_chosen": 10.538747787475586, + "log_odds_ratio": -0.00031500202021561563, + "logits/chosen": -0.5664748549461365, + "logits/rejected": -0.5411901473999023, + "logps/chosen": -0.0004703707236330956, + "logps/rejected": -2.0027015209198, + "loss": 0.4376, + "nll_loss": 0.1093745082616806, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.703707236330956e-05, + "rewards/margins": 0.20022311806678772, + "rewards/rejected": -0.2002701610326767, + "step": 11669 + }, + { + "epoch": 8.070539419087137, + "grad_norm": 4.784857273101807, + "learning_rate": 1.0719225449515907e-05, + "log_odds_chosen": 11.857826232910156, + "log_odds_ratio": -5.775447789346799e-05, + "logits/chosen": -0.38018327951431274, + "logits/rejected": -0.41651451587677, + "logps/chosen": -0.0003240426303818822, + "logps/rejected": -2.7629175186157227, + "loss": 0.549, + "nll_loss": 0.13724809885025024, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.240426303818822e-05, + "rewards/margins": 0.2762593626976013, + "rewards/rejected": -0.27629178762435913, + "step": 11670 + }, + { + "epoch": 8.071230982019364, + "grad_norm": 3.8318252563476562, + "learning_rate": 1.0715383433225757e-05, + "log_odds_chosen": 10.338508605957031, + "log_odds_ratio": -7.135741179808974e-05, + "logits/chosen": -0.6009719371795654, + "logits/rejected": -0.47979089617729187, + "logps/chosen": -0.00045763261732645333, + "logps/rejected": -2.244736909866333, + "loss": 0.3491, + "nll_loss": 0.08727248758077621, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.576325954985805e-05, + "rewards/margins": 0.22442790865898132, + "rewards/rejected": -0.22447368502616882, + "step": 11671 + }, + { + "epoch": 8.07192254495159, + "grad_norm": 4.182869911193848, + "learning_rate": 1.0711541416935608e-05, + "log_odds_chosen": 11.189848899841309, + "log_odds_ratio": -0.0003522265760693699, + "logits/chosen": -0.47810447216033936, + "logits/rejected": -0.5015765428543091, + "logps/chosen": -0.0004364237829577178, + "logps/rejected": -2.4150683879852295, + "loss": 0.409, + "nll_loss": 0.10220624506473541, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.36423797509633e-05, + "rewards/margins": 0.24146321415901184, + "rewards/rejected": -0.24150684475898743, + "step": 11672 + }, + { + "epoch": 8.072614107883817, + "grad_norm": 3.8744325637817383, + "learning_rate": 1.0707699400645459e-05, + "log_odds_chosen": 10.744927406311035, + "log_odds_ratio": -0.00014370067219715565, + "logits/chosen": -0.34662389755249023, + "logits/rejected": -0.30861902236938477, + "logps/chosen": -0.00019246491137892008, + "logps/rejected": -2.105988025665283, + "loss": 0.4266, + "nll_loss": 0.1066313087940216, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9246490410296246e-05, + "rewards/margins": 0.2105795443058014, + "rewards/rejected": -0.21059879660606384, + "step": 11673 + }, + { + "epoch": 8.073305670816044, + "grad_norm": 3.939030885696411, + "learning_rate": 1.0703857384355311e-05, + "log_odds_chosen": 10.826810836791992, + "log_odds_ratio": -9.675160981714725e-05, + "logits/chosen": -0.5049489736557007, + "logits/rejected": -0.5568135380744934, + "logps/chosen": -0.00012390982010401785, + "logps/rejected": -1.9049142599105835, + "loss": 0.3574, + "nll_loss": 0.08934393525123596, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2390981282806024e-05, + "rewards/margins": 0.19047902524471283, + "rewards/rejected": -0.1904914230108261, + "step": 11674 + }, + { + "epoch": 8.07399723374827, + "grad_norm": 3.718947172164917, + "learning_rate": 1.070001536806516e-05, + "log_odds_chosen": 12.584778785705566, + "log_odds_ratio": -1.6311325452988967e-05, + "logits/chosen": -0.4680939018726349, + "logits/rejected": -0.4668935239315033, + "logps/chosen": -0.0004223872674629092, + "logps/rejected": -3.424896240234375, + "loss": 0.3788, + "nll_loss": 0.09469722211360931, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2238727473886684e-05, + "rewards/margins": 0.3424473702907562, + "rewards/rejected": -0.342489629983902, + "step": 11675 + }, + { + "epoch": 8.074688796680498, + "grad_norm": 3.831510305404663, + "learning_rate": 1.0696173351775011e-05, + "log_odds_chosen": 10.228010177612305, + "log_odds_ratio": -0.00012409157352522016, + "logits/chosen": -0.6946989297866821, + "logits/rejected": -0.7984243035316467, + "logps/chosen": -0.00046772955101914704, + "logps/rejected": -2.4283576011657715, + "loss": 0.3698, + "nll_loss": 0.09244365990161896, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.677295873989351e-05, + "rewards/margins": 0.24278900027275085, + "rewards/rejected": -0.24283577501773834, + "step": 11676 + }, + { + "epoch": 8.075380359612724, + "grad_norm": 5.196197986602783, + "learning_rate": 1.0692331335484864e-05, + "log_odds_chosen": 10.614950180053711, + "log_odds_ratio": -0.001403479604050517, + "logits/chosen": -0.3574877977371216, + "logits/rejected": -0.4411139190196991, + "logps/chosen": -0.0010486284736543894, + "logps/rejected": -2.6322503089904785, + "loss": 0.9034, + "nll_loss": 0.22569890320301056, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010486285464139655, + "rewards/margins": 0.263120174407959, + "rewards/rejected": -0.2632250189781189, + "step": 11677 + }, + { + "epoch": 8.076071922544951, + "grad_norm": 3.0540876388549805, + "learning_rate": 1.0688489319194714e-05, + "log_odds_chosen": 9.706781387329102, + "log_odds_ratio": -0.0003603575169108808, + "logits/chosen": -0.5334009528160095, + "logits/rejected": -0.6449951529502869, + "logps/chosen": -0.0004149775777477771, + "logps/rejected": -1.623328447341919, + "loss": 0.3817, + "nll_loss": 0.09539544582366943, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.149776214035228e-05, + "rewards/margins": 0.16229134798049927, + "rewards/rejected": -0.16233286261558533, + "step": 11678 + }, + { + "epoch": 8.076763485477178, + "grad_norm": 5.293995380401611, + "learning_rate": 1.0684647302904565e-05, + "log_odds_chosen": 9.589630126953125, + "log_odds_ratio": -0.00028994199237786233, + "logits/chosen": -0.38811492919921875, + "logits/rejected": -0.5318203568458557, + "logps/chosen": -0.0005249874666333199, + "logps/rejected": -1.6922428607940674, + "loss": 0.4026, + "nll_loss": 0.10061775892972946, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.249874811852351e-05, + "rewards/margins": 0.1691717952489853, + "rewards/rejected": -0.16922429203987122, + "step": 11679 + }, + { + "epoch": 8.077455048409405, + "grad_norm": 4.3679351806640625, + "learning_rate": 1.0680805286614416e-05, + "log_odds_chosen": 11.298325538635254, + "log_odds_ratio": -4.026488750241697e-05, + "logits/chosen": -0.27658116817474365, + "logits/rejected": -0.30183690786361694, + "logps/chosen": -0.00022356417321134359, + "logps/rejected": -2.728652000427246, + "loss": 0.5673, + "nll_loss": 0.1418299674987793, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2356416593538597e-05, + "rewards/margins": 0.2728428542613983, + "rewards/rejected": -0.2728652060031891, + "step": 11680 + }, + { + "epoch": 8.078146611341632, + "grad_norm": 5.592859268188477, + "learning_rate": 1.0676963270324267e-05, + "log_odds_chosen": 11.06486701965332, + "log_odds_ratio": -0.00012340693501755595, + "logits/chosen": -0.41180622577667236, + "logits/rejected": -0.5484762191772461, + "logps/chosen": -0.00026425684336572886, + "logps/rejected": -2.107800245285034, + "loss": 0.574, + "nll_loss": 0.1434963345527649, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.642568688315805e-05, + "rewards/margins": 0.21075361967086792, + "rewards/rejected": -0.2107800394296646, + "step": 11681 + }, + { + "epoch": 8.078838174273859, + "grad_norm": 4.267911434173584, + "learning_rate": 1.0673121254034117e-05, + "log_odds_chosen": 10.70009994506836, + "log_odds_ratio": -5.717628664569929e-05, + "logits/chosen": -0.38904839754104614, + "logits/rejected": -0.5367807149887085, + "logps/chosen": -0.0003111464611720294, + "logps/rejected": -2.428642749786377, + "loss": 0.4344, + "nll_loss": 0.10860622674226761, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.11146468447987e-05, + "rewards/margins": 0.24283316731452942, + "rewards/rejected": -0.24286428093910217, + "step": 11682 + }, + { + "epoch": 8.079529737206085, + "grad_norm": 9.669475555419922, + "learning_rate": 1.066927923774397e-05, + "log_odds_chosen": 11.366908073425293, + "log_odds_ratio": -5.51422344869934e-05, + "logits/chosen": -0.20215241611003876, + "logits/rejected": -0.30782490968704224, + "logps/chosen": -0.0003663312818389386, + "logps/rejected": -2.961658000946045, + "loss": 0.5382, + "nll_loss": 0.1345456838607788, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.663312963908538e-05, + "rewards/margins": 0.29612916707992554, + "rewards/rejected": -0.29616579413414, + "step": 11683 + }, + { + "epoch": 8.080221300138312, + "grad_norm": 9.101593017578125, + "learning_rate": 1.0665437221453819e-05, + "log_odds_chosen": 11.405898094177246, + "log_odds_ratio": -3.301673132227734e-05, + "logits/chosen": -0.060617730021476746, + "logits/rejected": -0.19241876900196075, + "logps/chosen": -0.00026630048523657024, + "logps/rejected": -2.7963948249816895, + "loss": 0.3653, + "nll_loss": 0.09131920337677002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6630050342646427e-05, + "rewards/margins": 0.27961283922195435, + "rewards/rejected": -0.27963948249816895, + "step": 11684 + }, + { + "epoch": 8.08091286307054, + "grad_norm": 6.0880208015441895, + "learning_rate": 1.066159520516367e-05, + "log_odds_chosen": 10.40422248840332, + "log_odds_ratio": -0.00013111547741573304, + "logits/chosen": -0.4507831335067749, + "logits/rejected": -0.4514143466949463, + "logps/chosen": -0.0001635592634556815, + "logps/rejected": -1.5886892080307007, + "loss": 0.3797, + "nll_loss": 0.09490390866994858, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6355925254174508e-05, + "rewards/margins": 0.15885257720947266, + "rewards/rejected": -0.1588689237833023, + "step": 11685 + }, + { + "epoch": 8.081604426002766, + "grad_norm": 3.816469669342041, + "learning_rate": 1.0657753188873522e-05, + "log_odds_chosen": 11.360533714294434, + "log_odds_ratio": -2.9774164431728423e-05, + "logits/chosen": -0.1263056844472885, + "logits/rejected": -0.3316514492034912, + "logps/chosen": -0.00037145017995499074, + "logps/rejected": -2.7097506523132324, + "loss": 0.809, + "nll_loss": 0.2022462636232376, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.714502236107364e-05, + "rewards/margins": 0.2709379196166992, + "rewards/rejected": -0.2709750831127167, + "step": 11686 + }, + { + "epoch": 8.082295988934993, + "grad_norm": 4.535207748413086, + "learning_rate": 1.0653911172583373e-05, + "log_odds_chosen": 8.963878631591797, + "log_odds_ratio": -0.0005296789458952844, + "logits/chosen": -0.18249739706516266, + "logits/rejected": -0.3741540014743805, + "logps/chosen": -0.0005585459875874221, + "logps/rejected": -1.2982432842254639, + "loss": 0.2973, + "nll_loss": 0.07427582889795303, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5854594393167645e-05, + "rewards/margins": 0.1297684609889984, + "rewards/rejected": -0.12982431054115295, + "step": 11687 + }, + { + "epoch": 8.08298755186722, + "grad_norm": 4.2169270515441895, + "learning_rate": 1.0650069156293224e-05, + "log_odds_chosen": 11.26722526550293, + "log_odds_ratio": -3.825961539405398e-05, + "logits/chosen": 0.1320866346359253, + "logits/rejected": 0.07569442689418793, + "logps/chosen": -0.0002050643670372665, + "logps/rejected": -2.474332571029663, + "loss": 0.41, + "nll_loss": 0.10250428318977356, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0506438886513934e-05, + "rewards/margins": 0.2474127560853958, + "rewards/rejected": -0.24743324518203735, + "step": 11688 + }, + { + "epoch": 8.083679114799446, + "grad_norm": 4.922435760498047, + "learning_rate": 1.0646227140003074e-05, + "log_odds_chosen": 11.057992935180664, + "log_odds_ratio": -0.00010158990335185081, + "logits/chosen": -0.4223814904689789, + "logits/rejected": -0.2926499843597412, + "logps/chosen": -0.0004958102363161743, + "logps/rejected": -2.3514440059661865, + "loss": 0.3024, + "nll_loss": 0.07559921592473984, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.958102363161743e-05, + "rewards/margins": 0.2350948005914688, + "rewards/rejected": -0.23514439165592194, + "step": 11689 + }, + { + "epoch": 8.084370677731673, + "grad_norm": 5.644584655761719, + "learning_rate": 1.0642385123712925e-05, + "log_odds_chosen": 11.906888961791992, + "log_odds_ratio": -2.213385232607834e-05, + "logits/chosen": -0.28304755687713623, + "logits/rejected": -0.35353371500968933, + "logps/chosen": -0.00018962196190841496, + "logps/rejected": -3.090592384338379, + "loss": 0.311, + "nll_loss": 0.0777548998594284, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8962196918437257e-05, + "rewards/margins": 0.3090403079986572, + "rewards/rejected": -0.3090592622756958, + "step": 11690 + }, + { + "epoch": 8.0850622406639, + "grad_norm": 2.3281097412109375, + "learning_rate": 1.0638543107422776e-05, + "log_odds_chosen": 10.885601997375488, + "log_odds_ratio": -0.0003212861774954945, + "logits/chosen": -0.32111576199531555, + "logits/rejected": -0.3752807378768921, + "logps/chosen": -0.0005567088956013322, + "logps/rejected": -2.0498268604278564, + "loss": 0.2634, + "nll_loss": 0.06582079827785492, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5670887377345935e-05, + "rewards/margins": 0.2049270123243332, + "rewards/rejected": -0.2049826830625534, + "step": 11691 + }, + { + "epoch": 8.085753803596127, + "grad_norm": 3.4586286544799805, + "learning_rate": 1.0634701091132628e-05, + "log_odds_chosen": 12.256976127624512, + "log_odds_ratio": -2.0153183868387714e-05, + "logits/chosen": -0.8171830773353577, + "logits/rejected": -0.8613120913505554, + "logps/chosen": -0.00020997013780288398, + "logps/rejected": -3.2046985626220703, + "loss": 0.4156, + "nll_loss": 0.10389473289251328, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0997013052692637e-05, + "rewards/margins": 0.3204488754272461, + "rewards/rejected": -0.32046985626220703, + "step": 11692 + }, + { + "epoch": 8.086445366528354, + "grad_norm": 4.121342658996582, + "learning_rate": 1.0630859074842477e-05, + "log_odds_chosen": 11.76333236694336, + "log_odds_ratio": -2.062761996057816e-05, + "logits/chosen": -0.19090475142002106, + "logits/rejected": -0.24660451710224152, + "logps/chosen": -0.0002958507393486798, + "logps/rejected": -2.9039571285247803, + "loss": 0.4254, + "nll_loss": 0.10634127259254456, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9585071388282813e-05, + "rewards/margins": 0.29036611318588257, + "rewards/rejected": -0.29039567708969116, + "step": 11693 + }, + { + "epoch": 8.08713692946058, + "grad_norm": 3.8869848251342773, + "learning_rate": 1.0627017058552328e-05, + "log_odds_chosen": 10.896076202392578, + "log_odds_ratio": -5.0996481149923056e-05, + "logits/chosen": 0.1542137861251831, + "logits/rejected": 0.10924919694662094, + "logps/chosen": -0.000141911645187065, + "logps/rejected": -1.7987464666366577, + "loss": 0.4506, + "nll_loss": 0.11265307664871216, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4191165973898023e-05, + "rewards/margins": 0.17986047267913818, + "rewards/rejected": -0.17987464368343353, + "step": 11694 + }, + { + "epoch": 8.087828492392807, + "grad_norm": 3.6346919536590576, + "learning_rate": 1.062317504226218e-05, + "log_odds_chosen": 11.227770805358887, + "log_odds_ratio": -0.00011380699288565665, + "logits/chosen": -0.29328569769859314, + "logits/rejected": -0.30168992280960083, + "logps/chosen": -0.0001262957084691152, + "logps/rejected": -1.8149688243865967, + "loss": 0.3976, + "nll_loss": 0.09939021617174149, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2629571756406222e-05, + "rewards/margins": 0.18148425221443176, + "rewards/rejected": -0.18149688839912415, + "step": 11695 + }, + { + "epoch": 8.088520055325034, + "grad_norm": 4.028033256530762, + "learning_rate": 1.0619333025972031e-05, + "log_odds_chosen": 10.170938491821289, + "log_odds_ratio": -0.0011964394943788648, + "logits/chosen": -0.5307091474533081, + "logits/rejected": -0.575198769569397, + "logps/chosen": -0.000592139782384038, + "logps/rejected": -1.9726135730743408, + "loss": 0.4069, + "nll_loss": 0.10161113739013672, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.921397314523347e-05, + "rewards/margins": 0.1972021460533142, + "rewards/rejected": -0.19726136326789856, + "step": 11696 + }, + { + "epoch": 8.089211618257261, + "grad_norm": 6.926070213317871, + "learning_rate": 1.0615491009681882e-05, + "log_odds_chosen": 9.902064323425293, + "log_odds_ratio": -0.16591793298721313, + "logits/chosen": -0.3648636043071747, + "logits/rejected": -0.27038756012916565, + "logps/chosen": -0.03061908297240734, + "logps/rejected": -2.4003472328186035, + "loss": 0.5452, + "nll_loss": 0.11971628665924072, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0030619085300713778, + "rewards/margins": 0.23697280883789062, + "rewards/rejected": -0.24003471434116364, + "step": 11697 + }, + { + "epoch": 8.089903181189488, + "grad_norm": 4.866793155670166, + "learning_rate": 1.0611648993391731e-05, + "log_odds_chosen": 11.041983604431152, + "log_odds_ratio": -5.963775038253516e-05, + "logits/chosen": -0.2963463068008423, + "logits/rejected": -0.3301407992839813, + "logps/chosen": -0.000261253968346864, + "logps/rejected": -2.1881203651428223, + "loss": 0.5279, + "nll_loss": 0.13197532296180725, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.612539719848428e-05, + "rewards/margins": 0.21878591179847717, + "rewards/rejected": -0.21881204843521118, + "step": 11698 + }, + { + "epoch": 8.090594744121715, + "grad_norm": 4.243380546569824, + "learning_rate": 1.0607806977101584e-05, + "log_odds_chosen": 11.479931831359863, + "log_odds_ratio": -1.3241216947790235e-05, + "logits/chosen": -0.5409437417984009, + "logits/rejected": -0.5891100168228149, + "logps/chosen": -0.00010321621084585786, + "logps/rejected": -2.011333465576172, + "loss": 0.4353, + "nll_loss": 0.10882383584976196, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0321620720787905e-05, + "rewards/margins": 0.20112305879592896, + "rewards/rejected": -0.2011333703994751, + "step": 11699 + }, + { + "epoch": 8.091286307053942, + "grad_norm": 4.312553405761719, + "learning_rate": 1.0603964960811434e-05, + "log_odds_chosen": 11.484989166259766, + "log_odds_ratio": -0.00010771736560855061, + "logits/chosen": -0.011333595961332321, + "logits/rejected": -0.0748065859079361, + "logps/chosen": -0.00029604186420328915, + "logps/rejected": -2.8591153621673584, + "loss": 0.547, + "nll_loss": 0.13673458993434906, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9604187147924677e-05, + "rewards/margins": 0.2858819365501404, + "rewards/rejected": -0.28591153025627136, + "step": 11700 + }, + { + "epoch": 8.091977869986168, + "grad_norm": 3.610816717147827, + "learning_rate": 1.0600122944521285e-05, + "log_odds_chosen": 11.068406105041504, + "log_odds_ratio": -0.00010744159953901544, + "logits/chosen": -0.2816433012485504, + "logits/rejected": -0.34033089876174927, + "logps/chosen": -0.00041612557834014297, + "logps/rejected": -2.609286308288574, + "loss": 0.3272, + "nll_loss": 0.08178265392780304, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.161255492363125e-05, + "rewards/margins": 0.2608869969844818, + "rewards/rejected": -0.2609286308288574, + "step": 11701 + }, + { + "epoch": 8.092669432918395, + "grad_norm": 2.7508389949798584, + "learning_rate": 1.0596280928231136e-05, + "log_odds_chosen": 10.45711612701416, + "log_odds_ratio": -0.0003854866372421384, + "logits/chosen": -0.2440710961818695, + "logits/rejected": -0.21395137906074524, + "logps/chosen": -0.0003028765204362571, + "logps/rejected": -1.627083420753479, + "loss": 0.209, + "nll_loss": 0.05221348628401756, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.028765058843419e-05, + "rewards/margins": 0.162678062915802, + "rewards/rejected": -0.1627083420753479, + "step": 11702 + }, + { + "epoch": 8.093360995850622, + "grad_norm": 3.8575940132141113, + "learning_rate": 1.0592438911940986e-05, + "log_odds_chosen": 10.117890357971191, + "log_odds_ratio": -0.0002173526445403695, + "logits/chosen": -0.1967553347349167, + "logits/rejected": -0.24321748316287994, + "logps/chosen": -0.000661263766232878, + "logps/rejected": -1.8736224174499512, + "loss": 0.4698, + "nll_loss": 0.11741577088832855, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.61263766232878e-05, + "rewards/margins": 0.18729612231254578, + "rewards/rejected": -0.18736225366592407, + "step": 11703 + }, + { + "epoch": 8.094052558782849, + "grad_norm": 3.7447421550750732, + "learning_rate": 1.0588596895650837e-05, + "log_odds_chosen": 10.763427734375, + "log_odds_ratio": -7.86112723289989e-05, + "logits/chosen": -0.45996594429016113, + "logits/rejected": -0.4717139005661011, + "logps/chosen": -0.00015374028589576483, + "logps/rejected": -1.995914101600647, + "loss": 0.3841, + "nll_loss": 0.09600993990898132, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5374029317172244e-05, + "rewards/margins": 0.19957603514194489, + "rewards/rejected": -0.19959142804145813, + "step": 11704 + }, + { + "epoch": 8.094744121715076, + "grad_norm": 3.0664539337158203, + "learning_rate": 1.058475487936069e-05, + "log_odds_chosen": 10.705331802368164, + "log_odds_ratio": -0.00010342212772229686, + "logits/chosen": -0.12351376563310623, + "logits/rejected": -0.19361796975135803, + "logps/chosen": -0.0003090745012741536, + "logps/rejected": -2.340331554412842, + "loss": 0.2656, + "nll_loss": 0.06640031188726425, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.09074493998196e-05, + "rewards/margins": 0.2340022623538971, + "rewards/rejected": -0.23403316736221313, + "step": 11705 + }, + { + "epoch": 8.095435684647303, + "grad_norm": 3.6997134685516357, + "learning_rate": 1.058091286307054e-05, + "log_odds_chosen": 11.018940925598145, + "log_odds_ratio": -2.4768874936853535e-05, + "logits/chosen": 0.04698491469025612, + "logits/rejected": 0.04826957359910011, + "logps/chosen": -0.0004916464095003903, + "logps/rejected": -2.697220802307129, + "loss": 0.5134, + "nll_loss": 0.1283407062292099, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9164649681188166e-05, + "rewards/margins": 0.2696729302406311, + "rewards/rejected": -0.2697220742702484, + "step": 11706 + }, + { + "epoch": 8.09612724757953, + "grad_norm": 3.7170534133911133, + "learning_rate": 1.057707084678039e-05, + "log_odds_chosen": 10.96964168548584, + "log_odds_ratio": -6.920520536368713e-05, + "logits/chosen": -0.34352627396583557, + "logits/rejected": -0.24258866906166077, + "logps/chosen": -0.0002988358319271356, + "logps/rejected": -2.604693651199341, + "loss": 0.3497, + "nll_loss": 0.08741586655378342, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.988358392030932e-05, + "rewards/margins": 0.26043951511383057, + "rewards/rejected": -0.26046937704086304, + "step": 11707 + }, + { + "epoch": 8.096818810511756, + "grad_norm": 3.525240421295166, + "learning_rate": 1.0573228830490242e-05, + "log_odds_chosen": 10.831472396850586, + "log_odds_ratio": -0.00013157624925952405, + "logits/chosen": -0.2283937633037567, + "logits/rejected": -0.3139522075653076, + "logps/chosen": -0.00024043480516411364, + "logps/rejected": -2.343863010406494, + "loss": 0.406, + "nll_loss": 0.10149849951267242, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4043480152613483e-05, + "rewards/margins": 0.23436225950717926, + "rewards/rejected": -0.23438629508018494, + "step": 11708 + }, + { + "epoch": 8.097510373443983, + "grad_norm": 7.458436965942383, + "learning_rate": 1.0569386814200093e-05, + "log_odds_chosen": 11.120165824890137, + "log_odds_ratio": -2.6500281819608063e-05, + "logits/chosen": -0.23689518868923187, + "logits/rejected": -0.25085359811782837, + "logps/chosen": -0.00014912855112925172, + "logps/rejected": -1.925016164779663, + "loss": 0.3739, + "nll_loss": 0.09347623586654663, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4912855476723053e-05, + "rewards/margins": 0.1924867033958435, + "rewards/rejected": -0.19250163435935974, + "step": 11709 + }, + { + "epoch": 8.09820193637621, + "grad_norm": 3.490351438522339, + "learning_rate": 1.0565544797909943e-05, + "log_odds_chosen": 10.826065063476562, + "log_odds_ratio": -0.00010091800504596904, + "logits/chosen": -0.027560945600271225, + "logits/rejected": -0.059178732335567474, + "logps/chosen": -0.00033488537883386016, + "logps/rejected": -2.706929922103882, + "loss": 0.311, + "nll_loss": 0.07775228470563889, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3488537155790254e-05, + "rewards/margins": 0.27065950632095337, + "rewards/rejected": -0.27069300413131714, + "step": 11710 + }, + { + "epoch": 8.098893499308437, + "grad_norm": 2.662122964859009, + "learning_rate": 1.0561702781619794e-05, + "log_odds_chosen": 10.835779190063477, + "log_odds_ratio": -4.204745346214622e-05, + "logits/chosen": -0.11458122730255127, + "logits/rejected": -0.1769244521856308, + "logps/chosen": -0.00015519153384957463, + "logps/rejected": -1.9499348402023315, + "loss": 0.3402, + "nll_loss": 0.0850481316447258, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5519153748755343e-05, + "rewards/margins": 0.19497796893119812, + "rewards/rejected": -0.19499346613883972, + "step": 11711 + }, + { + "epoch": 8.099585062240664, + "grad_norm": 3.402977228164673, + "learning_rate": 1.0557860765329645e-05, + "log_odds_chosen": 10.551286697387695, + "log_odds_ratio": -0.00014364722301252186, + "logits/chosen": -0.33826518058776855, + "logits/rejected": -0.4199278950691223, + "logps/chosen": -0.0012256636982783675, + "logps/rejected": -2.1779050827026367, + "loss": 0.4171, + "nll_loss": 0.10425589978694916, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001225663727382198, + "rewards/margins": 0.21766793727874756, + "rewards/rejected": -0.21779048442840576, + "step": 11712 + }, + { + "epoch": 8.10027662517289, + "grad_norm": 5.266214847564697, + "learning_rate": 1.0554018749039496e-05, + "log_odds_chosen": 12.025720596313477, + "log_odds_ratio": -2.9008931960561313e-05, + "logits/chosen": -0.4864197075366974, + "logits/rejected": -0.503054678440094, + "logps/chosen": -0.00017336485325358808, + "logps/rejected": -2.8456907272338867, + "loss": 0.539, + "nll_loss": 0.13474708795547485, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.733648605295457e-05, + "rewards/margins": 0.284551739692688, + "rewards/rejected": -0.2845690846443176, + "step": 11713 + }, + { + "epoch": 8.100968188105117, + "grad_norm": 3.9011898040771484, + "learning_rate": 1.0550176732749348e-05, + "log_odds_chosen": 10.942675590515137, + "log_odds_ratio": -2.8809481591451913e-05, + "logits/chosen": 0.13352540135383606, + "logits/rejected": 0.006145041435956955, + "logps/chosen": -0.0001652721839491278, + "logps/rejected": -2.090564727783203, + "loss": 0.3707, + "nll_loss": 0.09268441796302795, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6527217667317018e-05, + "rewards/margins": 0.20903997123241425, + "rewards/rejected": -0.20905649662017822, + "step": 11714 + }, + { + "epoch": 8.101659751037344, + "grad_norm": 7.787944316864014, + "learning_rate": 1.0546334716459199e-05, + "log_odds_chosen": 12.608068466186523, + "log_odds_ratio": -2.1099880541441962e-05, + "logits/chosen": 0.07174703478813171, + "logits/rejected": 0.07831723988056183, + "logps/chosen": -0.000200631155166775, + "logps/rejected": -3.4012465476989746, + "loss": 0.3049, + "nll_loss": 0.0762215405702591, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0063114789081737e-05, + "rewards/margins": 0.3401045799255371, + "rewards/rejected": -0.3401246666908264, + "step": 11715 + }, + { + "epoch": 8.10235131396957, + "grad_norm": 4.928945064544678, + "learning_rate": 1.0542492700169048e-05, + "log_odds_chosen": 10.747015953063965, + "log_odds_ratio": -0.000226613599807024, + "logits/chosen": -0.14238809049129486, + "logits/rejected": -0.19443312287330627, + "logps/chosen": -0.0008222081232815981, + "logps/rejected": -2.1754188537597656, + "loss": 0.5648, + "nll_loss": 0.1411823034286499, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.222080941777676e-05, + "rewards/margins": 0.21745967864990234, + "rewards/rejected": -0.21754190325737, + "step": 11716 + }, + { + "epoch": 8.103042876901798, + "grad_norm": 3.7822341918945312, + "learning_rate": 1.05386506838789e-05, + "log_odds_chosen": 10.048130989074707, + "log_odds_ratio": -0.00016685103764757514, + "logits/chosen": -0.23168587684631348, + "logits/rejected": -0.19154129922389984, + "logps/chosen": -0.0003420043212827295, + "logps/rejected": -1.6004074811935425, + "loss": 0.4462, + "nll_loss": 0.11152322590351105, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.420043140067719e-05, + "rewards/margins": 0.1600065529346466, + "rewards/rejected": -0.1600407361984253, + "step": 11717 + }, + { + "epoch": 8.103734439834025, + "grad_norm": 4.209904193878174, + "learning_rate": 1.0534808667588751e-05, + "log_odds_chosen": 11.270818710327148, + "log_odds_ratio": -5.3001502237748355e-05, + "logits/chosen": -0.6203676462173462, + "logits/rejected": -0.6190280914306641, + "logps/chosen": -0.00022040428302716464, + "logps/rejected": -2.402357578277588, + "loss": 0.3598, + "nll_loss": 0.08995695412158966, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2040429030312225e-05, + "rewards/margins": 0.24021372199058533, + "rewards/rejected": -0.24023577570915222, + "step": 11718 + }, + { + "epoch": 8.104426002766251, + "grad_norm": 4.452395439147949, + "learning_rate": 1.0530966651298602e-05, + "log_odds_chosen": 11.229473114013672, + "log_odds_ratio": -0.00017697452858556062, + "logits/chosen": -0.3201093375682831, + "logits/rejected": -0.4116101861000061, + "logps/chosen": -0.00024741722154431045, + "logps/rejected": -2.6428608894348145, + "loss": 0.6095, + "nll_loss": 0.15235979855060577, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4741722882026806e-05, + "rewards/margins": 0.2642613649368286, + "rewards/rejected": -0.2642861008644104, + "step": 11719 + }, + { + "epoch": 8.105117565698478, + "grad_norm": 3.6376991271972656, + "learning_rate": 1.0527124635008454e-05, + "log_odds_chosen": 11.011824607849121, + "log_odds_ratio": -9.336704533779994e-05, + "logits/chosen": -0.18999135494232178, + "logits/rejected": -0.1827242523431778, + "logps/chosen": -0.0004461828211788088, + "logps/rejected": -2.6694254875183105, + "loss": 0.3557, + "nll_loss": 0.08890706300735474, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.461828211788088e-05, + "rewards/margins": 0.26689794659614563, + "rewards/rejected": -0.26694256067276, + "step": 11720 + }, + { + "epoch": 8.105809128630705, + "grad_norm": 3.3639659881591797, + "learning_rate": 1.0523282618718303e-05, + "log_odds_chosen": 11.908004760742188, + "log_odds_ratio": -4.112864189664833e-05, + "logits/chosen": -0.1534234881401062, + "logits/rejected": -0.15365737676620483, + "logps/chosen": -0.00016780171426944435, + "logps/rejected": -2.8708534240722656, + "loss": 0.3309, + "nll_loss": 0.08273102343082428, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.678017360973172e-05, + "rewards/margins": 0.28706854581832886, + "rewards/rejected": -0.2870853543281555, + "step": 11721 + }, + { + "epoch": 8.106500691562932, + "grad_norm": 8.023885726928711, + "learning_rate": 1.0519440602428154e-05, + "log_odds_chosen": 9.650749206542969, + "log_odds_ratio": -0.00020404420502018183, + "logits/chosen": -0.5126906037330627, + "logits/rejected": -0.6327559351921082, + "logps/chosen": -0.0004856606828980148, + "logps/rejected": -1.758907675743103, + "loss": 0.4059, + "nll_loss": 0.10146161913871765, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8566071200184524e-05, + "rewards/margins": 0.17584219574928284, + "rewards/rejected": -0.1758907586336136, + "step": 11722 + }, + { + "epoch": 8.107192254495159, + "grad_norm": 3.2019126415252686, + "learning_rate": 1.0515598586138007e-05, + "log_odds_chosen": 10.740392684936523, + "log_odds_ratio": -3.777168603846803e-05, + "logits/chosen": -0.8093961477279663, + "logits/rejected": -0.7486404180526733, + "logps/chosen": -0.0002348105190321803, + "logps/rejected": -2.1447434425354004, + "loss": 0.3787, + "nll_loss": 0.09465982019901276, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.348105226701591e-05, + "rewards/margins": 0.214450865983963, + "rewards/rejected": -0.21447435021400452, + "step": 11723 + }, + { + "epoch": 8.107883817427386, + "grad_norm": 5.206589221954346, + "learning_rate": 1.0511756569847857e-05, + "log_odds_chosen": 11.088725090026855, + "log_odds_ratio": -0.0001943162496900186, + "logits/chosen": -0.5015290379524231, + "logits/rejected": -0.41111627221107483, + "logps/chosen": -0.0004091473820153624, + "logps/rejected": -2.845104217529297, + "loss": 0.3381, + "nll_loss": 0.08451039344072342, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.091473965672776e-05, + "rewards/margins": 0.28446948528289795, + "rewards/rejected": -0.28451040387153625, + "step": 11724 + }, + { + "epoch": 8.108575380359612, + "grad_norm": 3.697836399078369, + "learning_rate": 1.0507914553557708e-05, + "log_odds_chosen": 10.652294158935547, + "log_odds_ratio": -5.1158724090782925e-05, + "logits/chosen": -0.9492626190185547, + "logits/rejected": -0.9893613457679749, + "logps/chosen": -0.0005272809648886323, + "logps/rejected": -2.086449146270752, + "loss": 0.4128, + "nll_loss": 0.10319921374320984, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2728093578480184e-05, + "rewards/margins": 0.20859217643737793, + "rewards/rejected": -0.20864489674568176, + "step": 11725 + }, + { + "epoch": 8.10926694329184, + "grad_norm": 5.2985920906066895, + "learning_rate": 1.0504072537267559e-05, + "log_odds_chosen": 10.933512687683105, + "log_odds_ratio": -0.00010600912355585024, + "logits/chosen": -0.6199065446853638, + "logits/rejected": -0.6506941318511963, + "logps/chosen": -0.0006066207424737513, + "logps/rejected": -2.5779573917388916, + "loss": 0.3398, + "nll_loss": 0.0849461555480957, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.066207424737513e-05, + "rewards/margins": 0.25773507356643677, + "rewards/rejected": -0.2577957510948181, + "step": 11726 + }, + { + "epoch": 8.109958506224066, + "grad_norm": 4.998779296875, + "learning_rate": 1.050023052097741e-05, + "log_odds_chosen": 10.645483016967773, + "log_odds_ratio": -9.701005183160305e-05, + "logits/chosen": -0.3984414041042328, + "logits/rejected": -0.38287946581840515, + "logps/chosen": -0.0003364400181453675, + "logps/rejected": -2.164236068725586, + "loss": 0.3282, + "nll_loss": 0.08203479647636414, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.364400254213251e-05, + "rewards/margins": 0.2163899540901184, + "rewards/rejected": -0.2164236158132553, + "step": 11727 + }, + { + "epoch": 8.110650069156293, + "grad_norm": 4.733262062072754, + "learning_rate": 1.049638850468726e-05, + "log_odds_chosen": 11.274560928344727, + "log_odds_ratio": -0.0001574632478877902, + "logits/chosen": -0.10809072107076645, + "logits/rejected": -0.18218806385993958, + "logps/chosen": -0.0002483499119989574, + "logps/rejected": -3.040714740753174, + "loss": 0.5118, + "nll_loss": 0.12793704867362976, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.483499156369362e-05, + "rewards/margins": 0.304046630859375, + "rewards/rejected": -0.30407148599624634, + "step": 11728 + }, + { + "epoch": 8.11134163208852, + "grad_norm": 8.088932037353516, + "learning_rate": 1.0492546488397113e-05, + "log_odds_chosen": 11.483865737915039, + "log_odds_ratio": -0.00010575917985988781, + "logits/chosen": -0.4476820230484009, + "logits/rejected": -0.37150460481643677, + "logps/chosen": -0.0007105075637809932, + "logps/rejected": -2.8520753383636475, + "loss": 0.3618, + "nll_loss": 0.09043564647436142, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.105076656443998e-05, + "rewards/margins": 0.28513649106025696, + "rewards/rejected": -0.2852075397968292, + "step": 11729 + }, + { + "epoch": 8.112033195020746, + "grad_norm": 6.1691460609436035, + "learning_rate": 1.0488704472106962e-05, + "log_odds_chosen": 12.182022094726562, + "log_odds_ratio": -2.4681070499354973e-05, + "logits/chosen": -0.4261326193809509, + "logits/rejected": -0.5570498704910278, + "logps/chosen": -0.0003534106654115021, + "logps/rejected": -3.1881463527679443, + "loss": 0.5829, + "nll_loss": 0.1457168161869049, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.534106508595869e-05, + "rewards/margins": 0.318779319524765, + "rewards/rejected": -0.3188146650791168, + "step": 11730 + }, + { + "epoch": 8.112724757952973, + "grad_norm": 3.71748948097229, + "learning_rate": 1.0484862455816813e-05, + "log_odds_chosen": 10.750641822814941, + "log_odds_ratio": -0.00017214790568687022, + "logits/chosen": -0.31991609930992126, + "logits/rejected": -0.3900543749332428, + "logps/chosen": -0.0003686299023684114, + "logps/rejected": -2.6714460849761963, + "loss": 0.3827, + "nll_loss": 0.09565538167953491, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6862991692032665e-05, + "rewards/margins": 0.267107754945755, + "rewards/rejected": -0.2671446204185486, + "step": 11731 + }, + { + "epoch": 8.1134163208852, + "grad_norm": 2.332948923110962, + "learning_rate": 1.0481020439526663e-05, + "log_odds_chosen": 10.546846389770508, + "log_odds_ratio": -0.0001390865072607994, + "logits/chosen": -0.4906228482723236, + "logits/rejected": -0.4684491455554962, + "logps/chosen": -0.00023612199584022164, + "logps/rejected": -1.8974788188934326, + "loss": 0.3942, + "nll_loss": 0.09854131937026978, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3612201403011568e-05, + "rewards/margins": 0.18972426652908325, + "rewards/rejected": -0.1897478997707367, + "step": 11732 + }, + { + "epoch": 8.114107883817427, + "grad_norm": 4.659921169281006, + "learning_rate": 1.0477178423236516e-05, + "log_odds_chosen": 11.114786148071289, + "log_odds_ratio": -0.00022430458921007812, + "logits/chosen": 0.07960556447505951, + "logits/rejected": -0.11168298870325089, + "logps/chosen": -0.00036845554132014513, + "logps/rejected": -2.572969913482666, + "loss": 0.6721, + "nll_loss": 0.1679985523223877, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6845554859610274e-05, + "rewards/margins": 0.25726014375686646, + "rewards/rejected": -0.25729697942733765, + "step": 11733 + }, + { + "epoch": 8.114799446749654, + "grad_norm": 3.753039598464966, + "learning_rate": 1.0473336406946366e-05, + "log_odds_chosen": 10.989168167114258, + "log_odds_ratio": -8.060281106736511e-05, + "logits/chosen": -0.5705875754356384, + "logits/rejected": -0.6338287591934204, + "logps/chosen": -0.0005902528646402061, + "logps/rejected": -2.472515821456909, + "loss": 0.2849, + "nll_loss": 0.07122041285037994, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9025289374403656e-05, + "rewards/margins": 0.24719256162643433, + "rewards/rejected": -0.24725157022476196, + "step": 11734 + }, + { + "epoch": 8.11549100968188, + "grad_norm": 4.227781295776367, + "learning_rate": 1.0469494390656216e-05, + "log_odds_chosen": 10.857701301574707, + "log_odds_ratio": -5.3189338359516114e-05, + "logits/chosen": -0.3402118384838104, + "logits/rejected": -0.32114389538764954, + "logps/chosen": -0.00019546672410797328, + "logps/rejected": -2.3145198822021484, + "loss": 0.4392, + "nll_loss": 0.10979792475700378, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9546674593584612e-05, + "rewards/margins": 0.2314324676990509, + "rewards/rejected": -0.23145200312137604, + "step": 11735 + }, + { + "epoch": 8.116182572614107, + "grad_norm": 4.631288051605225, + "learning_rate": 1.0465652374366068e-05, + "log_odds_chosen": 11.65530776977539, + "log_odds_ratio": -0.00029073667246848345, + "logits/chosen": -0.27442625164985657, + "logits/rejected": -0.4939839839935303, + "logps/chosen": -0.0006930760573595762, + "logps/rejected": -3.0342812538146973, + "loss": 0.4844, + "nll_loss": 0.1210765540599823, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.930760719114915e-05, + "rewards/margins": 0.303358793258667, + "rewards/rejected": -0.30342811346054077, + "step": 11736 + }, + { + "epoch": 8.116874135546334, + "grad_norm": 2.9287948608398438, + "learning_rate": 1.0461810358075919e-05, + "log_odds_chosen": 11.522528648376465, + "log_odds_ratio": -1.7534979633637704e-05, + "logits/chosen": -0.5429355502128601, + "logits/rejected": -0.614828884601593, + "logps/chosen": -0.0002202479518018663, + "logps/rejected": -2.4436118602752686, + "loss": 0.323, + "nll_loss": 0.080753393471241, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.202479481638875e-05, + "rewards/margins": 0.24433915317058563, + "rewards/rejected": -0.24436119198799133, + "step": 11737 + }, + { + "epoch": 8.117565698478561, + "grad_norm": 5.3048577308654785, + "learning_rate": 1.045796834178577e-05, + "log_odds_chosen": 11.234204292297363, + "log_odds_ratio": -1.8929702491732314e-05, + "logits/chosen": -0.5729963779449463, + "logits/rejected": -0.6361346244812012, + "logps/chosen": -9.631240391172469e-05, + "logps/rejected": -1.866896390914917, + "loss": 0.6894, + "nll_loss": 0.17235279083251953, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.63124148256611e-06, + "rewards/margins": 0.18668000400066376, + "rewards/rejected": -0.18668964505195618, + "step": 11738 + }, + { + "epoch": 8.118257261410788, + "grad_norm": 4.327181816101074, + "learning_rate": 1.045412632549562e-05, + "log_odds_chosen": 10.270843505859375, + "log_odds_ratio": -0.0002234416315332055, + "logits/chosen": -0.4312435984611511, + "logits/rejected": -0.469840943813324, + "logps/chosen": -0.0014292667619884014, + "logps/rejected": -2.169156789779663, + "loss": 0.4497, + "nll_loss": 0.11241503059864044, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001429266994819045, + "rewards/margins": 0.21677275002002716, + "rewards/rejected": -0.21691568195819855, + "step": 11739 + }, + { + "epoch": 8.118948824343015, + "grad_norm": 4.3350396156311035, + "learning_rate": 1.0450284309205471e-05, + "log_odds_chosen": 10.924783706665039, + "log_odds_ratio": -0.000957698212005198, + "logits/chosen": 0.08049260079860687, + "logits/rejected": -0.008040845394134521, + "logps/chosen": -0.00039710375131107867, + "logps/rejected": -2.2647242546081543, + "loss": 0.5589, + "nll_loss": 0.13964161276817322, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9710379496682435e-05, + "rewards/margins": 0.22643274068832397, + "rewards/rejected": -0.22647245228290558, + "step": 11740 + }, + { + "epoch": 8.119640387275242, + "grad_norm": 3.162846326828003, + "learning_rate": 1.0446442292915322e-05, + "log_odds_chosen": 12.0403470993042, + "log_odds_ratio": -8.135210009641014e-06, + "logits/chosen": -0.2058313935995102, + "logits/rejected": -0.23427899181842804, + "logps/chosen": -6.742282857885584e-05, + "logps/rejected": -2.2653045654296875, + "loss": 0.4328, + "nll_loss": 0.10819646716117859, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.742283403582405e-06, + "rewards/margins": 0.22652371227741241, + "rewards/rejected": -0.22653046250343323, + "step": 11741 + }, + { + "epoch": 8.120331950207468, + "grad_norm": 3.899838447570801, + "learning_rate": 1.0442600276625174e-05, + "log_odds_chosen": 10.268180847167969, + "log_odds_ratio": -0.00013161652896087617, + "logits/chosen": -0.6410156488418579, + "logits/rejected": -0.6552188396453857, + "logps/chosen": -0.00023809485719539225, + "logps/rejected": -1.8581407070159912, + "loss": 0.4474, + "nll_loss": 0.11183653771877289, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.380948353675194e-05, + "rewards/margins": 0.1857902556657791, + "rewards/rejected": -0.18581408262252808, + "step": 11742 + }, + { + "epoch": 8.121023513139695, + "grad_norm": 3.759852647781372, + "learning_rate": 1.0438758260335025e-05, + "log_odds_chosen": 10.801055908203125, + "log_odds_ratio": -8.971167699201033e-05, + "logits/chosen": -0.24352969229221344, + "logits/rejected": -0.27986443042755127, + "logps/chosen": -0.0002285518276039511, + "logps/rejected": -2.148073196411133, + "loss": 0.4463, + "nll_loss": 0.11155528575181961, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2855185306980275e-05, + "rewards/margins": 0.21478447318077087, + "rewards/rejected": -0.21480733156204224, + "step": 11743 + }, + { + "epoch": 8.121715076071922, + "grad_norm": 3.330958127975464, + "learning_rate": 1.0434916244044874e-05, + "log_odds_chosen": 11.091012001037598, + "log_odds_ratio": -0.00026707968208938837, + "logits/chosen": -0.41995424032211304, + "logits/rejected": -0.457638144493103, + "logps/chosen": -0.00033980689477175474, + "logps/rejected": -2.834537982940674, + "loss": 0.3658, + "nll_loss": 0.09141948074102402, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.398069384275004e-05, + "rewards/margins": 0.28341981768608093, + "rewards/rejected": -0.2834537923336029, + "step": 11744 + }, + { + "epoch": 8.122406639004149, + "grad_norm": 4.29794454574585, + "learning_rate": 1.0431074227754726e-05, + "log_odds_chosen": 11.204475402832031, + "log_odds_ratio": -2.0994948499719612e-05, + "logits/chosen": -0.2526164650917053, + "logits/rejected": -0.24609719216823578, + "logps/chosen": -0.0009924235055223107, + "logps/rejected": -2.580258369445801, + "loss": 0.3958, + "nll_loss": 0.09894296526908875, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.924236655933782e-05, + "rewards/margins": 0.2579265832901001, + "rewards/rejected": -0.2580258250236511, + "step": 11745 + }, + { + "epoch": 8.123098201936376, + "grad_norm": 3.5008950233459473, + "learning_rate": 1.0427232211464577e-05, + "log_odds_chosen": 10.998022079467773, + "log_odds_ratio": -0.00012253725435584784, + "logits/chosen": -0.020180311053991318, + "logits/rejected": -0.07521750032901764, + "logps/chosen": -0.0004845786024816334, + "logps/rejected": -2.175794839859009, + "loss": 0.4081, + "nll_loss": 0.10201320797204971, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.845785952056758e-05, + "rewards/margins": 0.21753105521202087, + "rewards/rejected": -0.21757948398590088, + "step": 11746 + }, + { + "epoch": 8.123789764868603, + "grad_norm": 4.073617935180664, + "learning_rate": 1.0423390195174428e-05, + "log_odds_chosen": 10.413125038146973, + "log_odds_ratio": -0.00023272483667824417, + "logits/chosen": -0.308295339345932, + "logits/rejected": -0.3603138327598572, + "logps/chosen": -0.000880507577676326, + "logps/rejected": -2.106022834777832, + "loss": 0.3743, + "nll_loss": 0.09354668110609055, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.805075049167499e-05, + "rewards/margins": 0.21051423251628876, + "rewards/rejected": -0.2106022834777832, + "step": 11747 + }, + { + "epoch": 8.12448132780083, + "grad_norm": 5.366794109344482, + "learning_rate": 1.0419548178884279e-05, + "log_odds_chosen": 11.902491569519043, + "log_odds_ratio": -1.66897243616404e-05, + "logits/chosen": -0.07255479693412781, + "logits/rejected": -0.2560787796974182, + "logps/chosen": -0.00012625358067452908, + "logps/rejected": -2.5823984146118164, + "loss": 0.4061, + "nll_loss": 0.10153479874134064, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2625358067452908e-05, + "rewards/margins": 0.25822722911834717, + "rewards/rejected": -0.25823986530303955, + "step": 11748 + }, + { + "epoch": 8.125172890733056, + "grad_norm": 5.0338215827941895, + "learning_rate": 1.041570616259413e-05, + "log_odds_chosen": 11.396980285644531, + "log_odds_ratio": -7.926767284516245e-05, + "logits/chosen": -0.045376040041446686, + "logits/rejected": -0.08236894011497498, + "logps/chosen": -0.00019739723938982934, + "logps/rejected": -2.5047426223754883, + "loss": 0.4258, + "nll_loss": 0.1064419075846672, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9739725757972337e-05, + "rewards/margins": 0.25045451521873474, + "rewards/rejected": -0.2504742741584778, + "step": 11749 + }, + { + "epoch": 8.125864453665283, + "grad_norm": 2.8154377937316895, + "learning_rate": 1.041186414630398e-05, + "log_odds_chosen": 10.108574867248535, + "log_odds_ratio": -7.602085679536685e-05, + "logits/chosen": -0.5656677484512329, + "logits/rejected": -0.6253648996353149, + "logps/chosen": -0.00017004036635626107, + "logps/rejected": -1.509115219116211, + "loss": 0.3699, + "nll_loss": 0.09246040880680084, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.700403845461551e-05, + "rewards/margins": 0.15089452266693115, + "rewards/rejected": -0.15091153979301453, + "step": 11750 + }, + { + "epoch": 8.12655601659751, + "grad_norm": 5.296730041503906, + "learning_rate": 1.0408022130013833e-05, + "log_odds_chosen": 11.02202033996582, + "log_odds_ratio": -4.4179709220770746e-05, + "logits/chosen": -0.4976728558540344, + "logits/rejected": -0.6354691386222839, + "logps/chosen": -0.00023194911773316562, + "logps/rejected": -2.3813910484313965, + "loss": 0.4758, + "nll_loss": 0.11895264685153961, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3194912500912324e-05, + "rewards/margins": 0.23811590671539307, + "rewards/rejected": -0.2381390929222107, + "step": 11751 + }, + { + "epoch": 8.127247579529737, + "grad_norm": 3.8053126335144043, + "learning_rate": 1.0404180113723683e-05, + "log_odds_chosen": 10.760663986206055, + "log_odds_ratio": -0.00013347077765502036, + "logits/chosen": 0.2298506796360016, + "logits/rejected": 0.12136253714561462, + "logps/chosen": -0.00028582217055372894, + "logps/rejected": -2.100955009460449, + "loss": 0.4429, + "nll_loss": 0.11070973426103592, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8582218874362297e-05, + "rewards/margins": 0.21006692945957184, + "rewards/rejected": -0.21009549498558044, + "step": 11752 + }, + { + "epoch": 8.127939142461964, + "grad_norm": 3.647557497024536, + "learning_rate": 1.0400338097433532e-05, + "log_odds_chosen": 11.35352897644043, + "log_odds_ratio": -6.096455399529077e-05, + "logits/chosen": -0.39178648591041565, + "logits/rejected": -0.45617157220840454, + "logps/chosen": -0.0001079787325579673, + "logps/rejected": -2.3015360832214355, + "loss": 0.3426, + "nll_loss": 0.08565562218427658, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.079787307389779e-05, + "rewards/margins": 0.23014283180236816, + "rewards/rejected": -0.2301536202430725, + "step": 11753 + }, + { + "epoch": 8.12863070539419, + "grad_norm": 4.891421794891357, + "learning_rate": 1.0396496081143385e-05, + "log_odds_chosen": 10.965896606445312, + "log_odds_ratio": -0.00021092222596053034, + "logits/chosen": -0.4915197491645813, + "logits/rejected": -0.5347434878349304, + "logps/chosen": -0.0016437186859548092, + "logps/rejected": -2.8035781383514404, + "loss": 0.5808, + "nll_loss": 0.14518901705741882, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016437187150586396, + "rewards/margins": 0.2801934480667114, + "rewards/rejected": -0.28035783767700195, + "step": 11754 + }, + { + "epoch": 8.129322268326417, + "grad_norm": 3.891859531402588, + "learning_rate": 1.0392654064853236e-05, + "log_odds_chosen": 12.413590431213379, + "log_odds_ratio": -2.2598998839384876e-05, + "logits/chosen": -0.8400213718414307, + "logits/rejected": -0.8548598289489746, + "logps/chosen": -0.00011090834595961496, + "logps/rejected": -3.0911455154418945, + "loss": 0.3709, + "nll_loss": 0.09272802621126175, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1090835869254079e-05, + "rewards/margins": 0.30910348892211914, + "rewards/rejected": -0.30911457538604736, + "step": 11755 + }, + { + "epoch": 8.130013831258644, + "grad_norm": 5.259171962738037, + "learning_rate": 1.0388812048563086e-05, + "log_odds_chosen": 11.260395050048828, + "log_odds_ratio": -0.00016276906535495073, + "logits/chosen": -0.04953872784972191, + "logits/rejected": -0.14904235303401947, + "logps/chosen": -0.00020939150999765843, + "logps/rejected": -2.425943613052368, + "loss": 0.4882, + "nll_loss": 0.12202227860689163, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0939150999765843e-05, + "rewards/margins": 0.24257344007492065, + "rewards/rejected": -0.24259436130523682, + "step": 11756 + }, + { + "epoch": 8.130705394190871, + "grad_norm": 3.7498621940612793, + "learning_rate": 1.0384970032272937e-05, + "log_odds_chosen": 10.983440399169922, + "log_odds_ratio": -6.46675398456864e-05, + "logits/chosen": -0.5816982984542847, + "logits/rejected": -0.5811514854431152, + "logps/chosen": -0.00016490585403516889, + "logps/rejected": -1.9317435026168823, + "loss": 0.3925, + "nll_loss": 0.0981072410941124, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6490585039719008e-05, + "rewards/margins": 0.1931578516960144, + "rewards/rejected": -0.1931743323802948, + "step": 11757 + }, + { + "epoch": 8.131396957123098, + "grad_norm": 3.3982486724853516, + "learning_rate": 1.0381128015982788e-05, + "log_odds_chosen": 11.659255027770996, + "log_odds_ratio": -2.207270881626755e-05, + "logits/chosen": -0.3394581079483032, + "logits/rejected": -0.31356391310691833, + "logps/chosen": -0.00010923718218691647, + "logps/rejected": -2.487722873687744, + "loss": 0.3847, + "nll_loss": 0.09616082906723022, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0923718946287408e-05, + "rewards/margins": 0.248761385679245, + "rewards/rejected": -0.24877230823040009, + "step": 11758 + }, + { + "epoch": 8.132088520055325, + "grad_norm": 5.193324565887451, + "learning_rate": 1.0377285999692639e-05, + "log_odds_chosen": 10.180469512939453, + "log_odds_ratio": -0.00010024676885223016, + "logits/chosen": -0.20359095931053162, + "logits/rejected": -0.1900109499692917, + "logps/chosen": -0.0003616532776504755, + "logps/rejected": -2.0986833572387695, + "loss": 0.4011, + "nll_loss": 0.1002582311630249, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.616532922023907e-05, + "rewards/margins": 0.20983219146728516, + "rewards/rejected": -0.20986835658550262, + "step": 11759 + }, + { + "epoch": 8.132780082987551, + "grad_norm": 8.418069839477539, + "learning_rate": 1.0373443983402491e-05, + "log_odds_chosen": 10.925003051757812, + "log_odds_ratio": -0.00010719512647483498, + "logits/chosen": -0.6948219537734985, + "logits/rejected": -0.6730424761772156, + "logps/chosen": -0.0001794162963051349, + "logps/rejected": -2.2363123893737793, + "loss": 0.4189, + "nll_loss": 0.10472062230110168, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.794162926671561e-05, + "rewards/margins": 0.22361332178115845, + "rewards/rejected": -0.22363126277923584, + "step": 11760 + }, + { + "epoch": 8.133471645919778, + "grad_norm": 4.210556507110596, + "learning_rate": 1.0369601967112342e-05, + "log_odds_chosen": 11.479969024658203, + "log_odds_ratio": -4.065928442287259e-05, + "logits/chosen": -0.10444462299346924, + "logits/rejected": -0.20772212743759155, + "logps/chosen": -0.00046929556992836297, + "logps/rejected": -2.8166074752807617, + "loss": 0.3729, + "nll_loss": 0.09321242570877075, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6929555537644774e-05, + "rewards/margins": 0.281613826751709, + "rewards/rejected": -0.2816607356071472, + "step": 11761 + }, + { + "epoch": 8.134163208852005, + "grad_norm": 3.6201324462890625, + "learning_rate": 1.0365759950822191e-05, + "log_odds_chosen": 9.515316009521484, + "log_odds_ratio": -0.00029265222838148475, + "logits/chosen": -0.45242300629615784, + "logits/rejected": -0.5208587646484375, + "logps/chosen": -0.0007963213138282299, + "logps/rejected": -1.312931776046753, + "loss": 0.7563, + "nll_loss": 0.18905624747276306, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.963214011397213e-05, + "rewards/margins": 0.13121354579925537, + "rewards/rejected": -0.1312931925058365, + "step": 11762 + }, + { + "epoch": 8.134854771784232, + "grad_norm": 5.176267147064209, + "learning_rate": 1.0361917934532043e-05, + "log_odds_chosen": 11.17673110961914, + "log_odds_ratio": -0.00012910067744087428, + "logits/chosen": -0.6295480728149414, + "logits/rejected": -0.5572465658187866, + "logps/chosen": -0.0006317183724604547, + "logps/rejected": -3.2958240509033203, + "loss": 0.2742, + "nll_loss": 0.06854239851236343, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.317184306681156e-05, + "rewards/margins": 0.32951924204826355, + "rewards/rejected": -0.32958242297172546, + "step": 11763 + }, + { + "epoch": 8.135546334716459, + "grad_norm": 3.88042950630188, + "learning_rate": 1.0358075918241894e-05, + "log_odds_chosen": 10.987863540649414, + "log_odds_ratio": -2.5229761376976967e-05, + "logits/chosen": 0.14450721442699432, + "logits/rejected": 0.16441746056079865, + "logps/chosen": -0.00013737456174567342, + "logps/rejected": -1.970505952835083, + "loss": 0.4267, + "nll_loss": 0.1066756397485733, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3737457265960984e-05, + "rewards/margins": 0.19703687727451324, + "rewards/rejected": -0.19705060124397278, + "step": 11764 + }, + { + "epoch": 8.136237897648686, + "grad_norm": 4.437648773193359, + "learning_rate": 1.0354233901951745e-05, + "log_odds_chosen": 11.371580123901367, + "log_odds_ratio": -4.398940654937178e-05, + "logits/chosen": -0.5579978227615356, + "logits/rejected": -0.44501692056655884, + "logps/chosen": -0.00014620760339312255, + "logps/rejected": -2.3657233715057373, + "loss": 0.3917, + "nll_loss": 0.09792998433113098, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4620760339312255e-05, + "rewards/margins": 0.2365577071905136, + "rewards/rejected": -0.23657235503196716, + "step": 11765 + }, + { + "epoch": 8.136929460580912, + "grad_norm": 3.7708580493927, + "learning_rate": 1.0350391885661596e-05, + "log_odds_chosen": 11.193588256835938, + "log_odds_ratio": -2.5481427655904554e-05, + "logits/chosen": 0.14303867518901825, + "logits/rejected": 0.02036203444004059, + "logps/chosen": -0.00014616517000831664, + "logps/rejected": -2.056412696838379, + "loss": 0.404, + "nll_loss": 0.10100395977497101, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4616516637033783e-05, + "rewards/margins": 0.20562666654586792, + "rewards/rejected": -0.2056412696838379, + "step": 11766 + }, + { + "epoch": 8.13762102351314, + "grad_norm": 8.705947875976562, + "learning_rate": 1.0346549869371446e-05, + "log_odds_chosen": 10.947639465332031, + "log_odds_ratio": -5.8245903346687555e-05, + "logits/chosen": -0.5470505952835083, + "logits/rejected": -0.5812299251556396, + "logps/chosen": -0.0004906191607005894, + "logps/rejected": -2.086725950241089, + "loss": 0.3744, + "nll_loss": 0.09358873963356018, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.90619167976547e-05, + "rewards/margins": 0.20862354338169098, + "rewards/rejected": -0.20867261290550232, + "step": 11767 + }, + { + "epoch": 8.138312586445366, + "grad_norm": 4.619115829467773, + "learning_rate": 1.0342707853081297e-05, + "log_odds_chosen": 10.422161102294922, + "log_odds_ratio": -6.12240910413675e-05, + "logits/chosen": -0.33057886362075806, + "logits/rejected": -0.3734843134880066, + "logps/chosen": -0.00027954988763667643, + "logps/rejected": -2.0615577697753906, + "loss": 0.3589, + "nll_loss": 0.08972762525081635, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7954987672274e-05, + "rewards/margins": 0.2061278223991394, + "rewards/rejected": -0.20615577697753906, + "step": 11768 + }, + { + "epoch": 8.139004149377593, + "grad_norm": 4.015398025512695, + "learning_rate": 1.0338865836791148e-05, + "log_odds_chosen": 11.448203086853027, + "log_odds_ratio": -2.4008486434468068e-05, + "logits/chosen": 0.008959844708442688, + "logits/rejected": -0.04505334049463272, + "logps/chosen": -0.00021311917225830257, + "logps/rejected": -2.0293219089508057, + "loss": 0.389, + "nll_loss": 0.09724324196577072, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.13119183172239e-05, + "rewards/margins": 0.2029108852148056, + "rewards/rejected": -0.2029321789741516, + "step": 11769 + }, + { + "epoch": 8.13969571230982, + "grad_norm": 3.8475868701934814, + "learning_rate": 1.0335023820501e-05, + "log_odds_chosen": 10.870960235595703, + "log_odds_ratio": -0.00019746023463085294, + "logits/chosen": -0.06472350656986237, + "logits/rejected": -0.24817217886447906, + "logps/chosen": -0.0004481807118281722, + "logps/rejected": -2.510279417037964, + "loss": 0.5035, + "nll_loss": 0.12584780156612396, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4818069000029936e-05, + "rewards/margins": 0.2509831488132477, + "rewards/rejected": -0.2510279417037964, + "step": 11770 + }, + { + "epoch": 8.140387275242047, + "grad_norm": 3.4504826068878174, + "learning_rate": 1.0331181804210851e-05, + "log_odds_chosen": 10.947843551635742, + "log_odds_ratio": -2.3020382286631502e-05, + "logits/chosen": -0.10515400767326355, + "logits/rejected": -0.16172391176223755, + "logps/chosen": -0.0002008742158068344, + "logps/rejected": -2.12304425239563, + "loss": 0.3276, + "nll_loss": 0.08189594745635986, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.008742194448132e-05, + "rewards/margins": 0.21228432655334473, + "rewards/rejected": -0.21230441331863403, + "step": 11771 + }, + { + "epoch": 8.141078838174273, + "grad_norm": 3.5445170402526855, + "learning_rate": 1.03273397879207e-05, + "log_odds_chosen": 10.88604736328125, + "log_odds_ratio": -8.225615601986647e-05, + "logits/chosen": -0.18684306740760803, + "logits/rejected": -0.23621304333209991, + "logps/chosen": -0.00030686456011608243, + "logps/rejected": -2.4081804752349854, + "loss": 0.3665, + "nll_loss": 0.09162560850381851, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.068646037718281e-05, + "rewards/margins": 0.2407873570919037, + "rewards/rejected": -0.24081803858280182, + "step": 11772 + }, + { + "epoch": 8.1417704011065, + "grad_norm": 6.290334224700928, + "learning_rate": 1.0323497771630552e-05, + "log_odds_chosen": 12.10903549194336, + "log_odds_ratio": -1.729245559545234e-05, + "logits/chosen": 0.04256100207567215, + "logits/rejected": -0.14866912364959717, + "logps/chosen": -0.00031669961754232645, + "logps/rejected": -3.1162285804748535, + "loss": 0.5985, + "nll_loss": 0.14962761104106903, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1669962481828406e-05, + "rewards/margins": 0.31159117817878723, + "rewards/rejected": -0.31162285804748535, + "step": 11773 + }, + { + "epoch": 8.142461964038727, + "grad_norm": 3.9987449645996094, + "learning_rate": 1.0319655755340403e-05, + "log_odds_chosen": 11.783027648925781, + "log_odds_ratio": -2.3419324861606583e-05, + "logits/chosen": -0.1818915605545044, + "logits/rejected": -0.1939907670021057, + "logps/chosen": -0.00046360495616681874, + "logps/rejected": -3.1576924324035645, + "loss": 0.4404, + "nll_loss": 0.11008903384208679, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6360495616681874e-05, + "rewards/margins": 0.31572288274765015, + "rewards/rejected": -0.3157692551612854, + "step": 11774 + }, + { + "epoch": 8.143153526970954, + "grad_norm": 6.004032611846924, + "learning_rate": 1.0315813739050254e-05, + "log_odds_chosen": 10.846492767333984, + "log_odds_ratio": -4.8165969928959385e-05, + "logits/chosen": -0.6579508185386658, + "logits/rejected": -0.6603182554244995, + "logps/chosen": -0.00030509906355291605, + "logps/rejected": -2.0657896995544434, + "loss": 0.5789, + "nll_loss": 0.14472922682762146, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0509905627695844e-05, + "rewards/margins": 0.20654848217964172, + "rewards/rejected": -0.20657898485660553, + "step": 11775 + }, + { + "epoch": 8.14384508990318, + "grad_norm": 3.6259446144104004, + "learning_rate": 1.0311971722760105e-05, + "log_odds_chosen": 11.277198791503906, + "log_odds_ratio": -7.718206325080246e-05, + "logits/chosen": -0.22679950296878815, + "logits/rejected": -0.28733691573143005, + "logps/chosen": -0.0002462563570588827, + "logps/rejected": -2.8088607788085938, + "loss": 0.4148, + "nll_loss": 0.103690966963768, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4625636797281913e-05, + "rewards/margins": 0.28086143732070923, + "rewards/rejected": -0.28088608384132385, + "step": 11776 + }, + { + "epoch": 8.144536652835408, + "grad_norm": 2.8234851360321045, + "learning_rate": 1.0308129706469955e-05, + "log_odds_chosen": 11.608818054199219, + "log_odds_ratio": -2.148692510672845e-05, + "logits/chosen": 0.08555248379707336, + "logits/rejected": 0.060075029730796814, + "logps/chosen": -0.00011031079338863492, + "logps/rejected": -2.2806997299194336, + "loss": 0.5941, + "nll_loss": 0.14852266013622284, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.103107842936879e-05, + "rewards/margins": 0.22805896401405334, + "rewards/rejected": -0.2280699908733368, + "step": 11777 + }, + { + "epoch": 8.145228215767634, + "grad_norm": 3.5794787406921387, + "learning_rate": 1.0304287690179806e-05, + "log_odds_chosen": 10.891388893127441, + "log_odds_ratio": -0.00013585921260528266, + "logits/chosen": -0.7135385274887085, + "logits/rejected": -0.84055495262146, + "logps/chosen": -0.0002605140907689929, + "logps/rejected": -2.4400527477264404, + "loss": 0.4378, + "nll_loss": 0.10942918062210083, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6051407985505648e-05, + "rewards/margins": 0.24397921562194824, + "rewards/rejected": -0.2440052628517151, + "step": 11778 + }, + { + "epoch": 8.145919778699861, + "grad_norm": 4.064663887023926, + "learning_rate": 1.0300445673889659e-05, + "log_odds_chosen": 11.208548545837402, + "log_odds_ratio": -8.20230197859928e-05, + "logits/chosen": -0.3107730448246002, + "logits/rejected": -0.275856614112854, + "logps/chosen": -0.0007718256092630327, + "logps/rejected": -2.843167304992676, + "loss": 0.4666, + "nll_loss": 0.11663761734962463, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.718256529187784e-05, + "rewards/margins": 0.2842395603656769, + "rewards/rejected": -0.284316748380661, + "step": 11779 + }, + { + "epoch": 8.146611341632088, + "grad_norm": 4.965521812438965, + "learning_rate": 1.029660365759951e-05, + "log_odds_chosen": 10.105997085571289, + "log_odds_ratio": -6.436584226321429e-05, + "logits/chosen": -0.10426747053861618, + "logits/rejected": -0.13834770023822784, + "logps/chosen": -0.0003076042339671403, + "logps/rejected": -1.3770906925201416, + "loss": 0.3954, + "nll_loss": 0.0988457053899765, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.076042412430979e-05, + "rewards/margins": 0.13767831027507782, + "rewards/rejected": -0.13770908117294312, + "step": 11780 + }, + { + "epoch": 8.147302904564315, + "grad_norm": 2.708247423171997, + "learning_rate": 1.0292761641309358e-05, + "log_odds_chosen": 11.923017501831055, + "log_odds_ratio": -8.847984645399265e-06, + "logits/chosen": -0.5788747072219849, + "logits/rejected": -0.492117702960968, + "logps/chosen": -6.703606777591631e-05, + "logps/rejected": -2.3570947647094727, + "loss": 0.2904, + "nll_loss": 0.07258927077054977, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.70360623189481e-06, + "rewards/margins": 0.23570279777050018, + "rewards/rejected": -0.23570948839187622, + "step": 11781 + }, + { + "epoch": 8.147994467496542, + "grad_norm": 3.8277664184570312, + "learning_rate": 1.0288919625019211e-05, + "log_odds_chosen": 10.717927932739258, + "log_odds_ratio": -0.00020918138034176081, + "logits/chosen": -0.47696831822395325, + "logits/rejected": -0.43856728076934814, + "logps/chosen": -0.00017271784599870443, + "logps/rejected": -2.05072283744812, + "loss": 0.3742, + "nll_loss": 0.0935308188199997, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7271784599870443e-05, + "rewards/margins": 0.20505502820014954, + "rewards/rejected": -0.205072283744812, + "step": 11782 + }, + { + "epoch": 8.148686030428768, + "grad_norm": 4.970156192779541, + "learning_rate": 1.0285077608729062e-05, + "log_odds_chosen": 10.575085639953613, + "log_odds_ratio": -0.0001494882453698665, + "logits/chosen": -0.24861939251422882, + "logits/rejected": -0.3763630986213684, + "logps/chosen": -0.0002744381199590862, + "logps/rejected": -2.2138054370880127, + "loss": 0.7953, + "nll_loss": 0.1987987905740738, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7443811632110737e-05, + "rewards/margins": 0.22135311365127563, + "rewards/rejected": -0.2213805466890335, + "step": 11783 + }, + { + "epoch": 8.149377593360995, + "grad_norm": 3.446476459503174, + "learning_rate": 1.0281235592438912e-05, + "log_odds_chosen": 10.981772422790527, + "log_odds_ratio": -3.398286207811907e-05, + "logits/chosen": -0.15307289361953735, + "logits/rejected": -0.32064151763916016, + "logps/chosen": -0.000214852683711797, + "logps/rejected": -2.250171422958374, + "loss": 0.536, + "nll_loss": 0.13399583101272583, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1485269826371223e-05, + "rewards/margins": 0.2249956727027893, + "rewards/rejected": -0.22501714527606964, + "step": 11784 + }, + { + "epoch": 8.150069156293222, + "grad_norm": 5.106137275695801, + "learning_rate": 1.0277393576148763e-05, + "log_odds_chosen": 11.902192115783691, + "log_odds_ratio": -1.2729897207464091e-05, + "logits/chosen": -0.40789902210235596, + "logits/rejected": -0.5072999000549316, + "logps/chosen": -0.00012040400179103017, + "logps/rejected": -2.754049777984619, + "loss": 0.3974, + "nll_loss": 0.09935708343982697, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2040400179103017e-05, + "rewards/margins": 0.27539294958114624, + "rewards/rejected": -0.27540498971939087, + "step": 11785 + }, + { + "epoch": 8.150760719225449, + "grad_norm": 4.746187210083008, + "learning_rate": 1.0273551559858614e-05, + "log_odds_chosen": 11.31079387664795, + "log_odds_ratio": -2.847578070941381e-05, + "logits/chosen": -0.35697272419929504, + "logits/rejected": -0.35685810446739197, + "logps/chosen": -0.00015442268340848386, + "logps/rejected": -2.295431137084961, + "loss": 0.5456, + "nll_loss": 0.13639333844184875, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.544227052363567e-05, + "rewards/margins": 0.22952768206596375, + "rewards/rejected": -0.22954311966896057, + "step": 11786 + }, + { + "epoch": 8.151452282157676, + "grad_norm": 4.14506196975708, + "learning_rate": 1.0269709543568465e-05, + "log_odds_chosen": 10.621984481811523, + "log_odds_ratio": -3.840686258627102e-05, + "logits/chosen": -0.2479134351015091, + "logits/rejected": -0.2759229242801666, + "logps/chosen": -0.00012764088751282543, + "logps/rejected": -1.6436896324157715, + "loss": 0.5525, + "nll_loss": 0.1381143033504486, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2764088751282543e-05, + "rewards/margins": 0.16435620188713074, + "rewards/rejected": -0.16436897218227386, + "step": 11787 + }, + { + "epoch": 8.152143845089903, + "grad_norm": 6.618087291717529, + "learning_rate": 1.0265867527278317e-05, + "log_odds_chosen": 11.717941284179688, + "log_odds_ratio": -2.240051981061697e-05, + "logits/chosen": -0.7674741744995117, + "logits/rejected": -0.7641814947128296, + "logps/chosen": -7.586412539239973e-05, + "logps/rejected": -1.8555504083633423, + "loss": 0.3964, + "nll_loss": 0.09909586608409882, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.586412266391562e-06, + "rewards/margins": 0.18554747104644775, + "rewards/rejected": -0.18555505573749542, + "step": 11788 + }, + { + "epoch": 8.15283540802213, + "grad_norm": 8.497203826904297, + "learning_rate": 1.0262025510988168e-05, + "log_odds_chosen": 11.106819152832031, + "log_odds_ratio": -4.6539134928025305e-05, + "logits/chosen": -0.480145663022995, + "logits/rejected": -0.5249185562133789, + "logps/chosen": -0.0002152361412299797, + "logps/rejected": -2.2641043663024902, + "loss": 0.4401, + "nll_loss": 0.11001075804233551, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1523612304008566e-05, + "rewards/margins": 0.22638891637325287, + "rewards/rejected": -0.22641046345233917, + "step": 11789 + }, + { + "epoch": 8.153526970954356, + "grad_norm": 2.5081300735473633, + "learning_rate": 1.0258183494698017e-05, + "log_odds_chosen": 11.968205451965332, + "log_odds_ratio": -1.1951849955949001e-05, + "logits/chosen": -0.3167341351509094, + "logits/rejected": -0.3897658586502075, + "logps/chosen": -5.4685951909050345e-05, + "logps/rejected": -1.9660568237304688, + "loss": 0.3649, + "nll_loss": 0.09123249351978302, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.468594736157684e-06, + "rewards/margins": 0.19660022854804993, + "rewards/rejected": -0.19660569727420807, + "step": 11790 + }, + { + "epoch": 8.154218533886583, + "grad_norm": 4.652139663696289, + "learning_rate": 1.025434147840787e-05, + "log_odds_chosen": 11.587353706359863, + "log_odds_ratio": -2.519033478165511e-05, + "logits/chosen": -0.056919172406196594, + "logits/rejected": -0.08620971441268921, + "logps/chosen": -0.00015185258234851062, + "logps/rejected": -2.5194754600524902, + "loss": 0.7231, + "nll_loss": 0.1807647943496704, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5185258234851062e-05, + "rewards/margins": 0.25193238258361816, + "rewards/rejected": -0.2519475221633911, + "step": 11791 + }, + { + "epoch": 8.15491009681881, + "grad_norm": 5.727940082550049, + "learning_rate": 1.025049946211772e-05, + "log_odds_chosen": 11.416126251220703, + "log_odds_ratio": -5.229043017607182e-05, + "logits/chosen": -0.43798014521598816, + "logits/rejected": -0.37286901473999023, + "logps/chosen": -0.0001776377612259239, + "logps/rejected": -2.1930556297302246, + "loss": 0.5775, + "nll_loss": 0.14437736570835114, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7763777577783912e-05, + "rewards/margins": 0.21928778290748596, + "rewards/rejected": -0.21930554509162903, + "step": 11792 + }, + { + "epoch": 8.155601659751037, + "grad_norm": 3.599702835083008, + "learning_rate": 1.0246657445827571e-05, + "log_odds_chosen": 11.529845237731934, + "log_odds_ratio": -3.249730434617959e-05, + "logits/chosen": -0.5446562767028809, + "logits/rejected": -0.558387041091919, + "logps/chosen": -0.0001430368865840137, + "logps/rejected": -2.637044906616211, + "loss": 0.3239, + "nll_loss": 0.08098269253969193, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4303689567896072e-05, + "rewards/margins": 0.26369020342826843, + "rewards/rejected": -0.26370447874069214, + "step": 11793 + }, + { + "epoch": 8.156293222683264, + "grad_norm": 4.702301025390625, + "learning_rate": 1.0242815429537422e-05, + "log_odds_chosen": 10.420896530151367, + "log_odds_ratio": -6.696030322927982e-05, + "logits/chosen": 0.09832927584648132, + "logits/rejected": 0.023856177926063538, + "logps/chosen": -0.00029260682640597224, + "logps/rejected": -1.9983750581741333, + "loss": 0.4929, + "nll_loss": 0.12321722507476807, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9260681913001463e-05, + "rewards/margins": 0.1998082399368286, + "rewards/rejected": -0.19983749091625214, + "step": 11794 + }, + { + "epoch": 8.15698478561549, + "grad_norm": 2.956678628921509, + "learning_rate": 1.0238973413247272e-05, + "log_odds_chosen": 11.952880859375, + "log_odds_ratio": -1.3823173503624275e-05, + "logits/chosen": -0.6629164218902588, + "logits/rejected": -0.788197934627533, + "logps/chosen": -0.00010116046178154647, + "logps/rejected": -2.5265512466430664, + "loss": 0.3091, + "nll_loss": 0.07727310061454773, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.011604763334617e-05, + "rewards/margins": 0.25264501571655273, + "rewards/rejected": -0.25265511870384216, + "step": 11795 + }, + { + "epoch": 8.157676348547717, + "grad_norm": 4.642671585083008, + "learning_rate": 1.0235131396957123e-05, + "log_odds_chosen": 10.813591003417969, + "log_odds_ratio": -0.0005754618323408067, + "logits/chosen": -0.5008108019828796, + "logits/rejected": -0.49642693996429443, + "logps/chosen": -0.0010430947877466679, + "logps/rejected": -2.2011709213256836, + "loss": 0.3081, + "nll_loss": 0.07697095721960068, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010430948168504983, + "rewards/margins": 0.22001276910305023, + "rewards/rejected": -0.22011709213256836, + "step": 11796 + }, + { + "epoch": 8.158367911479944, + "grad_norm": 3.9972987174987793, + "learning_rate": 1.0231289380666976e-05, + "log_odds_chosen": 11.959649085998535, + "log_odds_ratio": -1.605860234121792e-05, + "logits/chosen": -0.7508334517478943, + "logits/rejected": -0.7808016538619995, + "logps/chosen": -0.0001648375764489174, + "logps/rejected": -2.869148015975952, + "loss": 0.3852, + "nll_loss": 0.09630319476127625, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6483760191476904e-05, + "rewards/margins": 0.28689831495285034, + "rewards/rejected": -0.2869148254394531, + "step": 11797 + }, + { + "epoch": 8.159059474412171, + "grad_norm": 3.9052696228027344, + "learning_rate": 1.0227447364376826e-05, + "log_odds_chosen": 12.224464416503906, + "log_odds_ratio": -1.2303040421102196e-05, + "logits/chosen": -0.33660537004470825, + "logits/rejected": -0.3174796402454376, + "logps/chosen": -7.784018816892058e-05, + "logps/rejected": -2.7565410137176514, + "loss": 0.3949, + "nll_loss": 0.09873456507921219, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.784018634993117e-06, + "rewards/margins": 0.2756463289260864, + "rewards/rejected": -0.2756541073322296, + "step": 11798 + }, + { + "epoch": 8.159751037344398, + "grad_norm": 4.492676734924316, + "learning_rate": 1.0223605348086675e-05, + "log_odds_chosen": 10.744736671447754, + "log_odds_ratio": -0.0003734507772605866, + "logits/chosen": -0.0019226372241973877, + "logits/rejected": 0.035294584929943085, + "logps/chosen": -0.0003265690465923399, + "logps/rejected": -1.884582757949829, + "loss": 0.506, + "nll_loss": 0.1264553815126419, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.26569024764467e-05, + "rewards/margins": 0.18842563033103943, + "rewards/rejected": -0.18845829367637634, + "step": 11799 + }, + { + "epoch": 8.160442600276625, + "grad_norm": 4.888298988342285, + "learning_rate": 1.0219763331796526e-05, + "log_odds_chosen": 11.357194900512695, + "log_odds_ratio": -3.0228675313992426e-05, + "logits/chosen": -0.4045562148094177, + "logits/rejected": -0.4352848529815674, + "logps/chosen": -0.00028314441442489624, + "logps/rejected": -2.7511138916015625, + "loss": 0.31, + "nll_loss": 0.0774860605597496, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8314443625276908e-05, + "rewards/margins": 0.275083065032959, + "rewards/rejected": -0.2751113772392273, + "step": 11800 + }, + { + "epoch": 8.161134163208851, + "grad_norm": 4.62258768081665, + "learning_rate": 1.0215921315506379e-05, + "log_odds_chosen": 10.585798263549805, + "log_odds_ratio": -3.910541272489354e-05, + "logits/chosen": -0.291620135307312, + "logits/rejected": -0.32577189803123474, + "logps/chosen": -0.00016927148681133986, + "logps/rejected": -1.7979787588119507, + "loss": 0.513, + "nll_loss": 0.12825287878513336, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6927147953538224e-05, + "rewards/margins": 0.1797809600830078, + "rewards/rejected": -0.17979787290096283, + "step": 11801 + }, + { + "epoch": 8.161825726141078, + "grad_norm": 3.9092464447021484, + "learning_rate": 1.021207929921623e-05, + "log_odds_chosen": 10.713225364685059, + "log_odds_ratio": -7.490997086279094e-05, + "logits/chosen": -0.1884932816028595, + "logits/rejected": -0.263356477022171, + "logps/chosen": -0.00043695775093510747, + "logps/rejected": -1.96100914478302, + "loss": 0.4515, + "nll_loss": 0.11286258697509766, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3695774365914986e-05, + "rewards/margins": 0.19605720043182373, + "rewards/rejected": -0.19610090553760529, + "step": 11802 + }, + { + "epoch": 8.162517289073305, + "grad_norm": 4.2609171867370605, + "learning_rate": 1.020823728292608e-05, + "log_odds_chosen": 10.838077545166016, + "log_odds_ratio": -0.000666751351673156, + "logits/chosen": -0.4899050295352936, + "logits/rejected": -0.5162637829780579, + "logps/chosen": -0.002191039966419339, + "logps/rejected": -2.339308977127075, + "loss": 0.398, + "nll_loss": 0.09942437708377838, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00021910399664193392, + "rewards/margins": 0.23371180891990662, + "rewards/rejected": -0.23393090069293976, + "step": 11803 + }, + { + "epoch": 8.163208852005532, + "grad_norm": 3.8477578163146973, + "learning_rate": 1.020439526663593e-05, + "log_odds_chosen": 11.547567367553711, + "log_odds_ratio": -5.723809954361059e-05, + "logits/chosen": -0.16270583868026733, + "logits/rejected": -0.2292848378419876, + "logps/chosen": -0.0004535773186944425, + "logps/rejected": -2.771484375, + "loss": 0.8456, + "nll_loss": 0.2113933265209198, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.535773041425273e-05, + "rewards/margins": 0.27710309624671936, + "rewards/rejected": -0.27714845538139343, + "step": 11804 + }, + { + "epoch": 8.163900414937759, + "grad_norm": 5.081116676330566, + "learning_rate": 1.0200553250345782e-05, + "log_odds_chosen": 10.498542785644531, + "log_odds_ratio": -0.00021043805463705212, + "logits/chosen": -0.15183137357234955, + "logits/rejected": -0.26076340675354004, + "logps/chosen": -0.0002863018016796559, + "logps/rejected": -1.7621605396270752, + "loss": 0.6046, + "nll_loss": 0.15112774074077606, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.863018016796559e-05, + "rewards/margins": 0.1761874258518219, + "rewards/rejected": -0.1762160360813141, + "step": 11805 + }, + { + "epoch": 8.164591977869986, + "grad_norm": 3.674516201019287, + "learning_rate": 1.0196711234055632e-05, + "log_odds_chosen": 11.024900436401367, + "log_odds_ratio": -2.3138207325246185e-05, + "logits/chosen": -0.15182772278785706, + "logits/rejected": -0.2801783084869385, + "logps/chosen": -0.000141732714837417, + "logps/rejected": -2.197587490081787, + "loss": 0.4128, + "nll_loss": 0.10320456326007843, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.417327075614594e-05, + "rewards/margins": 0.21974456310272217, + "rewards/rejected": -0.2197587490081787, + "step": 11806 + }, + { + "epoch": 8.165283540802212, + "grad_norm": 3.9621241092681885, + "learning_rate": 1.0192869217765485e-05, + "log_odds_chosen": 10.270439147949219, + "log_odds_ratio": -7.926029502414167e-05, + "logits/chosen": -0.06136815622448921, + "logits/rejected": 0.0402841717004776, + "logps/chosen": -0.0002291160635650158, + "logps/rejected": -1.7038178443908691, + "loss": 0.4296, + "nll_loss": 0.10739271342754364, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.291160672029946e-05, + "rewards/margins": 0.17035886645317078, + "rewards/rejected": -0.17038178443908691, + "step": 11807 + }, + { + "epoch": 8.16597510373444, + "grad_norm": 4.1792216300964355, + "learning_rate": 1.0189027201475334e-05, + "log_odds_chosen": 9.855060577392578, + "log_odds_ratio": -0.0003239849756937474, + "logits/chosen": -0.07540471851825714, + "logits/rejected": -0.09573408961296082, + "logps/chosen": -0.0012772049522027373, + "logps/rejected": -2.435047149658203, + "loss": 0.5607, + "nll_loss": 0.14014235138893127, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012772050104103982, + "rewards/margins": 0.2433769702911377, + "rewards/rejected": -0.24350470304489136, + "step": 11808 + }, + { + "epoch": 8.166666666666666, + "grad_norm": 3.7249915599823, + "learning_rate": 1.0185185185185185e-05, + "log_odds_chosen": 9.077827453613281, + "log_odds_ratio": -0.00032695557456463575, + "logits/chosen": -0.1378357857465744, + "logits/rejected": -0.1385318785905838, + "logps/chosen": -0.0005659713642671704, + "logps/rejected": -0.9379556179046631, + "loss": 0.2906, + "nll_loss": 0.07261990010738373, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6597142247483134e-05, + "rewards/margins": 0.09373895823955536, + "rewards/rejected": -0.09379556775093079, + "step": 11809 + }, + { + "epoch": 8.167358229598893, + "grad_norm": 3.8937172889709473, + "learning_rate": 1.0181343168895037e-05, + "log_odds_chosen": 10.944185256958008, + "log_odds_ratio": -3.4075532312272117e-05, + "logits/chosen": -0.16298729181289673, + "logits/rejected": -0.24571256339550018, + "logps/chosen": -0.00010787440987769514, + "logps/rejected": -1.5944535732269287, + "loss": 0.3732, + "nll_loss": 0.09330610185861588, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0787441169668455e-05, + "rewards/margins": 0.15943457186222076, + "rewards/rejected": -0.1594453603029251, + "step": 11810 + }, + { + "epoch": 8.16804979253112, + "grad_norm": 4.463415145874023, + "learning_rate": 1.0177501152604888e-05, + "log_odds_chosen": 11.398051261901855, + "log_odds_ratio": -2.5497522074147128e-05, + "logits/chosen": -0.048560142517089844, + "logits/rejected": 0.1563296765089035, + "logps/chosen": -0.00015461770817637444, + "logps/rejected": -2.366382122039795, + "loss": 0.3542, + "nll_loss": 0.08854615688323975, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5461770090041682e-05, + "rewards/margins": 0.23662272095680237, + "rewards/rejected": -0.23663818836212158, + "step": 11811 + }, + { + "epoch": 8.168741355463347, + "grad_norm": 3.4059031009674072, + "learning_rate": 1.0173659136314738e-05, + "log_odds_chosen": 9.915818214416504, + "log_odds_ratio": -0.00022760103456676006, + "logits/chosen": -0.2256205677986145, + "logits/rejected": -0.451595664024353, + "logps/chosen": -0.0004684239684138447, + "logps/rejected": -1.9310569763183594, + "loss": 0.2849, + "nll_loss": 0.07119649648666382, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.684239684138447e-05, + "rewards/margins": 0.19305887818336487, + "rewards/rejected": -0.19310569763183594, + "step": 11812 + }, + { + "epoch": 8.169432918395573, + "grad_norm": 4.045708656311035, + "learning_rate": 1.016981712002459e-05, + "log_odds_chosen": 11.007131576538086, + "log_odds_ratio": -5.7747245591599494e-05, + "logits/chosen": 0.21243897080421448, + "logits/rejected": 0.13054439425468445, + "logps/chosen": -0.0003317214432172477, + "logps/rejected": -2.272037982940674, + "loss": 0.4716, + "nll_loss": 0.1179007962346077, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.317214213893749e-05, + "rewards/margins": 0.22717060148715973, + "rewards/rejected": -0.22720378637313843, + "step": 11813 + }, + { + "epoch": 8.1701244813278, + "grad_norm": 2.7341315746307373, + "learning_rate": 1.016597510373444e-05, + "log_odds_chosen": 10.3163423538208, + "log_odds_ratio": -5.366649565985426e-05, + "logits/chosen": -0.20679514110088348, + "logits/rejected": -0.19088438153266907, + "logps/chosen": -0.0001653216895647347, + "logps/rejected": -1.5532350540161133, + "loss": 0.229, + "nll_loss": 0.05723622441291809, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.653216895647347e-05, + "rewards/margins": 0.15530698001384735, + "rewards/rejected": -0.15532350540161133, + "step": 11814 + }, + { + "epoch": 8.170816044260027, + "grad_norm": 5.030986309051514, + "learning_rate": 1.016213308744429e-05, + "log_odds_chosen": 10.275716781616211, + "log_odds_ratio": -0.00022884247300680727, + "logits/chosen": -0.5482270121574402, + "logits/rejected": -0.47892725467681885, + "logps/chosen": -0.00023503063130192459, + "logps/rejected": -1.9703631401062012, + "loss": 0.4739, + "nll_loss": 0.11845801770687103, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.350306385778822e-05, + "rewards/margins": 0.19701281189918518, + "rewards/rejected": -0.19703632593154907, + "step": 11815 + }, + { + "epoch": 8.171507607192254, + "grad_norm": 12.4885892868042, + "learning_rate": 1.0158291071154143e-05, + "log_odds_chosen": 11.943331718444824, + "log_odds_ratio": -9.822726497077383e-06, + "logits/chosen": -0.07970777153968811, + "logits/rejected": -0.1332622617483139, + "logps/chosen": -8.568631892558187e-05, + "logps/rejected": -2.324699878692627, + "loss": 0.5231, + "nll_loss": 0.13078296184539795, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.568631528760307e-06, + "rewards/margins": 0.23246142268180847, + "rewards/rejected": -0.23246999084949493, + "step": 11816 + }, + { + "epoch": 8.17219917012448, + "grad_norm": 5.055882930755615, + "learning_rate": 1.0154449054863994e-05, + "log_odds_chosen": 9.738774299621582, + "log_odds_ratio": -0.00040705245919525623, + "logits/chosen": -0.6552140712738037, + "logits/rejected": -0.7386389970779419, + "logps/chosen": -0.0006381264538504183, + "logps/rejected": -1.8834278583526611, + "loss": 0.4624, + "nll_loss": 0.11555864661931992, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.381265120580792e-05, + "rewards/margins": 0.1882789582014084, + "rewards/rejected": -0.18834277987480164, + "step": 11817 + }, + { + "epoch": 8.172890733056708, + "grad_norm": 4.6499505043029785, + "learning_rate": 1.0150607038573843e-05, + "log_odds_chosen": 10.724308967590332, + "log_odds_ratio": -5.115863314131275e-05, + "logits/chosen": -0.25859251618385315, + "logits/rejected": -0.3702784776687622, + "logps/chosen": -0.00018527252541389316, + "logps/rejected": -2.147641897201538, + "loss": 0.4386, + "nll_loss": 0.10964687168598175, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8527252905187197e-05, + "rewards/margins": 0.2147456556558609, + "rewards/rejected": -0.21476417779922485, + "step": 11818 + }, + { + "epoch": 8.173582295988934, + "grad_norm": 4.096492290496826, + "learning_rate": 1.0146765022283695e-05, + "log_odds_chosen": 11.613139152526855, + "log_odds_ratio": -4.364008054835722e-05, + "logits/chosen": -0.44727540016174316, + "logits/rejected": -0.5089828372001648, + "logps/chosen": -0.0002530687488615513, + "logps/rejected": -2.85605525970459, + "loss": 0.3856, + "nll_loss": 0.09638633579015732, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.530687561375089e-05, + "rewards/margins": 0.28558024764060974, + "rewards/rejected": -0.2856055498123169, + "step": 11819 + }, + { + "epoch": 8.174273858921161, + "grad_norm": 3.698554039001465, + "learning_rate": 1.0142923005993546e-05, + "log_odds_chosen": 10.253783226013184, + "log_odds_ratio": -9.055588452611119e-05, + "logits/chosen": -0.10288023203611374, + "logits/rejected": -0.14574682712554932, + "logps/chosen": -0.0001656345120863989, + "logps/rejected": -1.596636414527893, + "loss": 0.3126, + "nll_loss": 0.07813283801078796, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.656345193623565e-05, + "rewards/margins": 0.15964707732200623, + "rewards/rejected": -0.1596636325120926, + "step": 11820 + }, + { + "epoch": 8.174965421853388, + "grad_norm": 3.5334091186523438, + "learning_rate": 1.0139080989703397e-05, + "log_odds_chosen": 11.575165748596191, + "log_odds_ratio": -3.546190419001505e-05, + "logits/chosen": -0.6529729962348938, + "logits/rejected": -0.7235559225082397, + "logps/chosen": -0.000588984985370189, + "logps/rejected": -3.3183703422546387, + "loss": 0.7238, + "nll_loss": 0.1809358298778534, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8898498537018895e-05, + "rewards/margins": 0.3317781686782837, + "rewards/rejected": -0.3318370580673218, + "step": 11821 + }, + { + "epoch": 8.175656984785615, + "grad_norm": 4.298321723937988, + "learning_rate": 1.0135238973413248e-05, + "log_odds_chosen": 11.861610412597656, + "log_odds_ratio": -2.6712465114542283e-05, + "logits/chosen": -0.22402334213256836, + "logits/rejected": -0.23995819687843323, + "logps/chosen": -0.0014007949503138661, + "logps/rejected": -3.094560384750366, + "loss": 0.4677, + "nll_loss": 0.11691083014011383, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001400795008521527, + "rewards/margins": 0.309315949678421, + "rewards/rejected": -0.3094560205936432, + "step": 11822 + }, + { + "epoch": 8.176348547717842, + "grad_norm": 3.889859437942505, + "learning_rate": 1.0131396957123098e-05, + "log_odds_chosen": 10.959208488464355, + "log_odds_ratio": -5.500488987308927e-05, + "logits/chosen": -0.42340776324272156, + "logits/rejected": -0.37429279088974, + "logps/chosen": -0.000169272898347117, + "logps/rejected": -2.243509292602539, + "loss": 0.3862, + "nll_loss": 0.09653585404157639, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.69272898347117e-05, + "rewards/margins": 0.2243340015411377, + "rewards/rejected": -0.2243509292602539, + "step": 11823 + }, + { + "epoch": 8.177040110650069, + "grad_norm": 4.201392650604248, + "learning_rate": 1.012755494083295e-05, + "log_odds_chosen": 11.622950553894043, + "log_odds_ratio": -2.326399044250138e-05, + "logits/chosen": -0.8622159361839294, + "logits/rejected": -0.8740267157554626, + "logps/chosen": -0.00017339483019895852, + "logps/rejected": -2.517303943634033, + "loss": 0.5086, + "nll_loss": 0.12715020775794983, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7339485566481017e-05, + "rewards/margins": 0.2517130374908447, + "rewards/rejected": -0.25173038244247437, + "step": 11824 + }, + { + "epoch": 8.177731673582295, + "grad_norm": 4.291811943054199, + "learning_rate": 1.0123712924542802e-05, + "log_odds_chosen": 9.529434204101562, + "log_odds_ratio": -0.0008333018631674349, + "logits/chosen": -0.5183630585670471, + "logits/rejected": -0.45871567726135254, + "logps/chosen": -0.0013324617175385356, + "logps/rejected": -1.9355536699295044, + "loss": 0.2921, + "nll_loss": 0.07294148206710815, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013324618339538574, + "rewards/margins": 0.1934221237897873, + "rewards/rejected": -0.19355536997318268, + "step": 11825 + }, + { + "epoch": 8.178423236514522, + "grad_norm": 4.345532417297363, + "learning_rate": 1.0119870908252652e-05, + "log_odds_chosen": 11.120061874389648, + "log_odds_ratio": -3.411036595935002e-05, + "logits/chosen": -0.23412591218948364, + "logits/rejected": -0.2507272958755493, + "logps/chosen": -0.00027076120022684336, + "logps/rejected": -2.612562656402588, + "loss": 0.4705, + "nll_loss": 0.11761318892240524, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7076119295088574e-05, + "rewards/margins": 0.26122918725013733, + "rewards/rejected": -0.26125624775886536, + "step": 11826 + }, + { + "epoch": 8.179114799446749, + "grad_norm": 5.8843793869018555, + "learning_rate": 1.0116028891962501e-05, + "log_odds_chosen": 11.541424751281738, + "log_odds_ratio": -1.2509261068771593e-05, + "logits/chosen": -0.5906883478164673, + "logits/rejected": -0.5607191324234009, + "logps/chosen": -8.202745812013745e-05, + "logps/rejected": -2.0678231716156006, + "loss": 0.4322, + "nll_loss": 0.10804738849401474, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.202745448215865e-06, + "rewards/margins": 0.20677411556243896, + "rewards/rejected": -0.20678231120109558, + "step": 11827 + }, + { + "epoch": 8.179806362378976, + "grad_norm": 3.337306261062622, + "learning_rate": 1.0112186875672354e-05, + "log_odds_chosen": 11.85464096069336, + "log_odds_ratio": -1.6896887245820835e-05, + "logits/chosen": -0.39175254106521606, + "logits/rejected": -0.4269852936267853, + "logps/chosen": -8.84580222191289e-05, + "logps/rejected": -2.1351940631866455, + "loss": 0.2785, + "nll_loss": 0.06961159408092499, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.84580185811501e-06, + "rewards/margins": 0.21351055800914764, + "rewards/rejected": -0.2135194092988968, + "step": 11828 + }, + { + "epoch": 8.180497925311203, + "grad_norm": 2.875051736831665, + "learning_rate": 1.0108344859382205e-05, + "log_odds_chosen": 9.571345329284668, + "log_odds_ratio": -7.962647941894829e-05, + "logits/chosen": -0.3000252842903137, + "logits/rejected": -0.42375290393829346, + "logps/chosen": -0.0002875003847293556, + "logps/rejected": -1.5732154846191406, + "loss": 0.3661, + "nll_loss": 0.0915074497461319, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8750037017744035e-05, + "rewards/margins": 0.15729279816150665, + "rewards/rejected": -0.15732155740261078, + "step": 11829 + }, + { + "epoch": 8.18118948824343, + "grad_norm": 3.728848457336426, + "learning_rate": 1.0104502843092055e-05, + "log_odds_chosen": 10.975574493408203, + "log_odds_ratio": -2.4418277462245896e-05, + "logits/chosen": -0.514509379863739, + "logits/rejected": -0.6360599994659424, + "logps/chosen": -0.00021895149257034063, + "logps/rejected": -2.1837289333343506, + "loss": 0.3772, + "nll_loss": 0.09429260343313217, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.189514816564042e-05, + "rewards/margins": 0.21835100650787354, + "rewards/rejected": -0.2183728963136673, + "step": 11830 + }, + { + "epoch": 8.181881051175656, + "grad_norm": 3.2154626846313477, + "learning_rate": 1.0100660826801906e-05, + "log_odds_chosen": 10.822830200195312, + "log_odds_ratio": -0.00029166098101995885, + "logits/chosen": -0.773431658744812, + "logits/rejected": -0.7771180868148804, + "logps/chosen": -0.00030548672657459974, + "logps/rejected": -2.0690057277679443, + "loss": 0.3367, + "nll_loss": 0.08415533602237701, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.054867556784302e-05, + "rewards/margins": 0.20687001943588257, + "rewards/rejected": -0.20690058171749115, + "step": 11831 + }, + { + "epoch": 8.182572614107883, + "grad_norm": 4.847975730895996, + "learning_rate": 1.0096818810511757e-05, + "log_odds_chosen": 11.216750144958496, + "log_odds_ratio": -9.346687147626653e-05, + "logits/chosen": -0.18628649413585663, + "logits/rejected": -0.23374198377132416, + "logps/chosen": -0.00024281159858219326, + "logps/rejected": -2.7525458335876465, + "loss": 0.2698, + "nll_loss": 0.06744711846113205, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4281160222017206e-05, + "rewards/margins": 0.2752303183078766, + "rewards/rejected": -0.27525460720062256, + "step": 11832 + }, + { + "epoch": 8.18326417704011, + "grad_norm": 2.793700695037842, + "learning_rate": 1.0092976794221608e-05, + "log_odds_chosen": 10.602350234985352, + "log_odds_ratio": -7.463981455657631e-05, + "logits/chosen": -0.7802792191505432, + "logits/rejected": -0.8679121732711792, + "logps/chosen": -0.0009457082487642765, + "logps/rejected": -2.089808464050293, + "loss": 0.3161, + "nll_loss": 0.07902882993221283, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.457083797315136e-05, + "rewards/margins": 0.2088862806558609, + "rewards/rejected": -0.20898084342479706, + "step": 11833 + }, + { + "epoch": 8.183955739972337, + "grad_norm": 4.563675880432129, + "learning_rate": 1.0089134777931458e-05, + "log_odds_chosen": 10.466887474060059, + "log_odds_ratio": -0.00010899512562900782, + "logits/chosen": -0.5160598158836365, + "logits/rejected": -0.5109041929244995, + "logps/chosen": -0.000562231638468802, + "logps/rejected": -2.570701837539673, + "loss": 0.339, + "nll_loss": 0.08474425971508026, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.622316894005053e-05, + "rewards/margins": 0.2570139467716217, + "rewards/rejected": -0.2570701837539673, + "step": 11834 + }, + { + "epoch": 8.184647302904564, + "grad_norm": 5.127349853515625, + "learning_rate": 1.008529276164131e-05, + "log_odds_chosen": 11.212685585021973, + "log_odds_ratio": -0.0007525036344304681, + "logits/chosen": -0.4909210205078125, + "logits/rejected": -0.4616711735725403, + "logps/chosen": -0.00021634719450958073, + "logps/rejected": -2.375697135925293, + "loss": 0.6213, + "nll_loss": 0.1552608758211136, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1634719814755954e-05, + "rewards/margins": 0.2375481128692627, + "rewards/rejected": -0.23756971955299377, + "step": 11835 + }, + { + "epoch": 8.18533886583679, + "grad_norm": 4.297598838806152, + "learning_rate": 1.008145074535116e-05, + "log_odds_chosen": 11.013813018798828, + "log_odds_ratio": -0.000144320132676512, + "logits/chosen": -0.34821152687072754, + "logits/rejected": -0.40879786014556885, + "logps/chosen": -0.0002897988888435066, + "logps/rejected": -2.404247760772705, + "loss": 0.4883, + "nll_loss": 0.12206760048866272, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8979891794733703e-05, + "rewards/margins": 0.24039578437805176, + "rewards/rejected": -0.24042478203773499, + "step": 11836 + }, + { + "epoch": 8.186030428769017, + "grad_norm": 4.018962860107422, + "learning_rate": 1.007760872906101e-05, + "log_odds_chosen": 11.19547176361084, + "log_odds_ratio": -4.441758574103005e-05, + "logits/chosen": -0.26014795899391174, + "logits/rejected": -0.3397113084793091, + "logps/chosen": -0.0002606114430818707, + "logps/rejected": -2.4648585319519043, + "loss": 0.6568, + "nll_loss": 0.1642070859670639, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.606114503578283e-05, + "rewards/margins": 0.24645981192588806, + "rewards/rejected": -0.2464858591556549, + "step": 11837 + }, + { + "epoch": 8.186721991701244, + "grad_norm": 3.1762855052948, + "learning_rate": 1.0073766712770863e-05, + "log_odds_chosen": 10.405686378479004, + "log_odds_ratio": -9.969981329049915e-05, + "logits/chosen": -0.5910043716430664, + "logits/rejected": -0.6282913088798523, + "logps/chosen": -0.0003152275749016553, + "logps/rejected": -1.9180370569229126, + "loss": 0.2665, + "nll_loss": 0.06660556048154831, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.152275530737825e-05, + "rewards/margins": 0.1917721927165985, + "rewards/rejected": -0.1918036937713623, + "step": 11838 + }, + { + "epoch": 8.187413554633471, + "grad_norm": 3.890634059906006, + "learning_rate": 1.0069924696480714e-05, + "log_odds_chosen": 11.562515258789062, + "log_odds_ratio": -3.290794847998768e-05, + "logits/chosen": -0.5644373893737793, + "logits/rejected": -0.6337707042694092, + "logps/chosen": -0.00021835914230905473, + "logps/rejected": -2.628633975982666, + "loss": 0.456, + "nll_loss": 0.11399047076702118, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.183591277571395e-05, + "rewards/margins": 0.2628415822982788, + "rewards/rejected": -0.2628633975982666, + "step": 11839 + }, + { + "epoch": 8.188105117565698, + "grad_norm": 4.618536472320557, + "learning_rate": 1.0066082680190565e-05, + "log_odds_chosen": 10.990549087524414, + "log_odds_ratio": -0.001142018474638462, + "logits/chosen": -0.260355144739151, + "logits/rejected": -0.3478155732154846, + "logps/chosen": -0.0024526086635887623, + "logps/rejected": -2.606916904449463, + "loss": 0.4936, + "nll_loss": 0.12329496443271637, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002452608314342797, + "rewards/margins": 0.2604464292526245, + "rewards/rejected": -0.26069170236587524, + "step": 11840 + }, + { + "epoch": 8.188796680497925, + "grad_norm": 3.341639518737793, + "learning_rate": 1.0062240663900415e-05, + "log_odds_chosen": 10.685967445373535, + "log_odds_ratio": -6.907777424203232e-05, + "logits/chosen": -0.27254483103752136, + "logits/rejected": -0.35793501138687134, + "logps/chosen": -0.0011382971424609423, + "logps/rejected": -2.0838351249694824, + "loss": 0.3415, + "nll_loss": 0.0853566825389862, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011382971570128575, + "rewards/margins": 0.20826967060565948, + "rewards/rejected": -0.2083835005760193, + "step": 11841 + }, + { + "epoch": 8.189488243430151, + "grad_norm": 3.463982105255127, + "learning_rate": 1.0058398647610266e-05, + "log_odds_chosen": 11.680423736572266, + "log_odds_ratio": -5.199678707867861e-05, + "logits/chosen": -0.4965265989303589, + "logits/rejected": -0.6189468502998352, + "logps/chosen": -0.0002663609047885984, + "logps/rejected": -2.63663911819458, + "loss": 0.3655, + "nll_loss": 0.09136730432510376, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6636087568476796e-05, + "rewards/margins": 0.2636372745037079, + "rewards/rejected": -0.2636639177799225, + "step": 11842 + }, + { + "epoch": 8.190179806362378, + "grad_norm": 3.801323175430298, + "learning_rate": 1.0054556631320117e-05, + "log_odds_chosen": 11.148916244506836, + "log_odds_ratio": -2.395886076556053e-05, + "logits/chosen": -0.11259734630584717, + "logits/rejected": -0.19178740680217743, + "logps/chosen": -0.00017104196012951434, + "logps/rejected": -2.371026039123535, + "loss": 0.2818, + "nll_loss": 0.07044067978858948, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.710419383016415e-05, + "rewards/margins": 0.2370855212211609, + "rewards/rejected": -0.23710262775421143, + "step": 11843 + }, + { + "epoch": 8.190871369294605, + "grad_norm": 5.517773151397705, + "learning_rate": 1.005071461502997e-05, + "log_odds_chosen": 11.084335327148438, + "log_odds_ratio": -5.177776256459765e-05, + "logits/chosen": -0.18667830526828766, + "logits/rejected": -0.1939568817615509, + "logps/chosen": -0.0010915655875578523, + "logps/rejected": -2.410806655883789, + "loss": 0.6075, + "nll_loss": 0.15186257660388947, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010915656457655132, + "rewards/margins": 0.24097150564193726, + "rewards/rejected": -0.2410806566476822, + "step": 11844 + }, + { + "epoch": 8.191562932226832, + "grad_norm": 3.830199718475342, + "learning_rate": 1.0046872598739818e-05, + "log_odds_chosen": 9.771222114562988, + "log_odds_ratio": -0.0008265036158263683, + "logits/chosen": -0.10115376114845276, + "logits/rejected": -0.15087993443012238, + "logps/chosen": -0.0015982795739546418, + "logps/rejected": -2.151283025741577, + "loss": 0.4022, + "nll_loss": 0.10046719759702682, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015982796321623027, + "rewards/margins": 0.2149684727191925, + "rewards/rejected": -0.21512830257415771, + "step": 11845 + }, + { + "epoch": 8.192254495159059, + "grad_norm": 4.922761917114258, + "learning_rate": 1.0043030582449669e-05, + "log_odds_chosen": 10.53628921508789, + "log_odds_ratio": -0.00016840444004628807, + "logits/chosen": -0.5491094589233398, + "logits/rejected": -0.5219364762306213, + "logps/chosen": -0.0001730314252199605, + "logps/rejected": -1.8352140188217163, + "loss": 0.4915, + "nll_loss": 0.122856006026268, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7303143977187574e-05, + "rewards/margins": 0.1835041046142578, + "rewards/rejected": -0.18352138996124268, + "step": 11846 + }, + { + "epoch": 8.192946058091286, + "grad_norm": 2.727844715118408, + "learning_rate": 1.0039188566159521e-05, + "log_odds_chosen": 10.407556533813477, + "log_odds_ratio": -0.0002446332073304802, + "logits/chosen": -0.0866495743393898, + "logits/rejected": -0.3335264325141907, + "logps/chosen": -0.0008980625425465405, + "logps/rejected": -1.89154052734375, + "loss": 0.3546, + "nll_loss": 0.0886288732290268, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.98062571650371e-05, + "rewards/margins": 0.18906424939632416, + "rewards/rejected": -0.18915404379367828, + "step": 11847 + }, + { + "epoch": 8.193637621023512, + "grad_norm": 2.52756404876709, + "learning_rate": 1.0035346549869372e-05, + "log_odds_chosen": 10.452568054199219, + "log_odds_ratio": -8.982956933323294e-05, + "logits/chosen": -0.6266956329345703, + "logits/rejected": -0.5948130488395691, + "logps/chosen": -0.00014999463746789843, + "logps/rejected": -1.5260772705078125, + "loss": 0.2274, + "nll_loss": 0.05684291943907738, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4999463019194081e-05, + "rewards/margins": 0.1525927186012268, + "rewards/rejected": -0.1526077389717102, + "step": 11848 + }, + { + "epoch": 8.19432918395574, + "grad_norm": 3.984170436859131, + "learning_rate": 1.0031504533579223e-05, + "log_odds_chosen": 10.623090744018555, + "log_odds_ratio": -0.00012865487951785326, + "logits/chosen": -0.19675840437412262, + "logits/rejected": -0.18537622690200806, + "logps/chosen": -0.0013972955057397485, + "logps/rejected": -2.636220932006836, + "loss": 0.564, + "nll_loss": 0.1409960240125656, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013972955639474094, + "rewards/margins": 0.26348239183425903, + "rewards/rejected": -0.26362210512161255, + "step": 11849 + }, + { + "epoch": 8.195020746887966, + "grad_norm": 8.14512825012207, + "learning_rate": 1.0027662517289074e-05, + "log_odds_chosen": 10.018821716308594, + "log_odds_ratio": -0.0001869121624622494, + "logits/chosen": -0.4175964295864105, + "logits/rejected": -0.5103138089179993, + "logps/chosen": -0.0005602188175544143, + "logps/rejected": -2.2952799797058105, + "loss": 0.3597, + "nll_loss": 0.08990222215652466, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.602188321063295e-05, + "rewards/margins": 0.22947199642658234, + "rewards/rejected": -0.22952800989151, + "step": 11850 + }, + { + "epoch": 8.195712309820193, + "grad_norm": 4.538632869720459, + "learning_rate": 1.0023820500998924e-05, + "log_odds_chosen": 11.071784973144531, + "log_odds_ratio": -7.922316581243649e-05, + "logits/chosen": 0.10132365673780441, + "logits/rejected": 0.0524822473526001, + "logps/chosen": -0.00031479087192565203, + "logps/rejected": -2.6277666091918945, + "loss": 0.8658, + "nll_loss": 0.21643231809139252, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.147908500977792e-05, + "rewards/margins": 0.262745201587677, + "rewards/rejected": -0.2627766728401184, + "step": 11851 + }, + { + "epoch": 8.19640387275242, + "grad_norm": 3.5381200313568115, + "learning_rate": 1.0019978484708775e-05, + "log_odds_chosen": 11.354914665222168, + "log_odds_ratio": -2.0675848645623773e-05, + "logits/chosen": -0.44369906187057495, + "logits/rejected": -0.4636278450489044, + "logps/chosen": -9.953975677490234e-05, + "logps/rejected": -2.1649391651153564, + "loss": 0.3316, + "nll_loss": 0.08289721608161926, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.953975677490234e-06, + "rewards/margins": 0.2164839655160904, + "rewards/rejected": -0.21649391949176788, + "step": 11852 + }, + { + "epoch": 8.197095435684647, + "grad_norm": 4.758200645446777, + "learning_rate": 1.0016136468418628e-05, + "log_odds_chosen": 11.802242279052734, + "log_odds_ratio": -6.090774331823923e-05, + "logits/chosen": -0.6279882192611694, + "logits/rejected": -0.6225122213363647, + "logps/chosen": -6.893193494761363e-05, + "logps/rejected": -2.4928269386291504, + "loss": 0.4056, + "nll_loss": 0.10139544308185577, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.893194040458184e-06, + "rewards/margins": 0.249275803565979, + "rewards/rejected": -0.24928270280361176, + "step": 11853 + }, + { + "epoch": 8.197786998616873, + "grad_norm": 8.527070999145508, + "learning_rate": 1.0012294452128477e-05, + "log_odds_chosen": 10.675111770629883, + "log_odds_ratio": -6.840465357527137e-05, + "logits/chosen": -0.4798826575279236, + "logits/rejected": -0.4432487189769745, + "logps/chosen": -9.258472709916532e-05, + "logps/rejected": -1.6617554426193237, + "loss": 0.377, + "nll_loss": 0.09423164278268814, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.258472346118651e-06, + "rewards/margins": 0.166166290640831, + "rewards/rejected": -0.16617554426193237, + "step": 11854 + }, + { + "epoch": 8.1984785615491, + "grad_norm": 3.8203048706054688, + "learning_rate": 1.0008452435838327e-05, + "log_odds_chosen": 11.196329116821289, + "log_odds_ratio": -5.840366429765709e-05, + "logits/chosen": -0.23118522763252258, + "logits/rejected": -0.3228698968887329, + "logps/chosen": -0.00047525044647045434, + "logps/rejected": -2.8010623455047607, + "loss": 0.5395, + "nll_loss": 0.13487224280834198, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.752504173666239e-05, + "rewards/margins": 0.28005874156951904, + "rewards/rejected": -0.28010624647140503, + "step": 11855 + }, + { + "epoch": 8.199170124481327, + "grad_norm": 6.984753608703613, + "learning_rate": 1.000461041954818e-05, + "log_odds_chosen": 10.723838806152344, + "log_odds_ratio": -0.00010693888179957867, + "logits/chosen": -0.3761310875415802, + "logits/rejected": -0.38185182213783264, + "logps/chosen": -0.00027034140657633543, + "logps/rejected": -1.8784973621368408, + "loss": 0.7063, + "nll_loss": 0.17655616998672485, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7034140657633543e-05, + "rewards/margins": 0.18782271444797516, + "rewards/rejected": -0.187849760055542, + "step": 11856 + }, + { + "epoch": 8.199861687413554, + "grad_norm": 5.063896656036377, + "learning_rate": 1.000076840325803e-05, + "log_odds_chosen": 9.568683624267578, + "log_odds_ratio": -0.00012912966485600919, + "logits/chosen": -0.13418522477149963, + "logits/rejected": -0.1199626699090004, + "logps/chosen": -0.0005637831054627895, + "logps/rejected": -1.5717370510101318, + "loss": 0.7314, + "nll_loss": 0.18283309042453766, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6378310546278954e-05, + "rewards/margins": 0.1571173220872879, + "rewards/rejected": -0.15717370808124542, + "step": 11857 + }, + { + "epoch": 8.20055325034578, + "grad_norm": 3.49157452583313, + "learning_rate": 9.996926386967881e-06, + "log_odds_chosen": 11.587803840637207, + "log_odds_ratio": -2.791120277834125e-05, + "logits/chosen": -0.040826573967933655, + "logits/rejected": -0.03154802322387695, + "logps/chosen": -0.00015233646263368428, + "logps/rejected": -2.730283260345459, + "loss": 0.3362, + "nll_loss": 0.08404259383678436, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.523364699096419e-05, + "rewards/margins": 0.27301308512687683, + "rewards/rejected": -0.27302831411361694, + "step": 11858 + }, + { + "epoch": 8.201244813278008, + "grad_norm": 5.375125408172607, + "learning_rate": 9.993084370677732e-06, + "log_odds_chosen": 10.799838066101074, + "log_odds_ratio": -8.351242286153138e-05, + "logits/chosen": -0.0013588108122348785, + "logits/rejected": -0.020535770803689957, + "logps/chosen": -0.0009846636094152927, + "logps/rejected": -3.3954825401306152, + "loss": 0.4605, + "nll_loss": 0.11510814726352692, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.846636385191232e-05, + "rewards/margins": 0.33944979310035706, + "rewards/rejected": -0.3395482897758484, + "step": 11859 + }, + { + "epoch": 8.201936376210234, + "grad_norm": 3.087932825088501, + "learning_rate": 9.989242354387583e-06, + "log_odds_chosen": 10.207304954528809, + "log_odds_ratio": -0.0001233558141393587, + "logits/chosen": -0.006505733355879784, + "logits/rejected": -0.06693039834499359, + "logps/chosen": -0.00036496983375400305, + "logps/rejected": -1.86687433719635, + "loss": 0.4002, + "nll_loss": 0.10004688054323196, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.649698555818759e-05, + "rewards/margins": 0.18665093183517456, + "rewards/rejected": -0.1866874396800995, + "step": 11860 + }, + { + "epoch": 8.202627939142461, + "grad_norm": 3.8856632709503174, + "learning_rate": 9.985400338097434e-06, + "log_odds_chosen": 12.092461585998535, + "log_odds_ratio": -1.748041540849954e-05, + "logits/chosen": -0.11215893179178238, + "logits/rejected": -0.11910742521286011, + "logps/chosen": -0.00010045425733551383, + "logps/rejected": -2.9239437580108643, + "loss": 0.4678, + "nll_loss": 0.11693894863128662, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0045426279248204e-05, + "rewards/margins": 0.2923843562602997, + "rewards/rejected": -0.29239436984062195, + "step": 11861 + }, + { + "epoch": 8.203319502074688, + "grad_norm": 3.417696475982666, + "learning_rate": 9.981558321807286e-06, + "log_odds_chosen": 10.846441268920898, + "log_odds_ratio": -6.147399835754186e-05, + "logits/chosen": -0.2771728038787842, + "logits/rejected": -0.28151994943618774, + "logps/chosen": -0.00015563865599688143, + "logps/rejected": -1.9300942420959473, + "loss": 0.3209, + "nll_loss": 0.08022533357143402, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5563866327283904e-05, + "rewards/margins": 0.19299384951591492, + "rewards/rejected": -0.1930094212293625, + "step": 11862 + }, + { + "epoch": 8.204011065006915, + "grad_norm": 4.589306354522705, + "learning_rate": 9.977716305517137e-06, + "log_odds_chosen": 11.178730964660645, + "log_odds_ratio": -6.520461465697736e-05, + "logits/chosen": -0.3614737093448639, + "logits/rejected": -0.45247286558151245, + "logps/chosen": -0.0003307850274723023, + "logps/rejected": -2.6263513565063477, + "loss": 0.721, + "nll_loss": 0.18025082349777222, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3078504202421755e-05, + "rewards/margins": 0.2626020908355713, + "rewards/rejected": -0.26263514161109924, + "step": 11863 + }, + { + "epoch": 8.204702627939142, + "grad_norm": 5.1506028175354, + "learning_rate": 9.973874289226986e-06, + "log_odds_chosen": 11.342752456665039, + "log_odds_ratio": -5.206582136452198e-05, + "logits/chosen": -0.552399754524231, + "logits/rejected": -0.5551222562789917, + "logps/chosen": -0.0006311584729701281, + "logps/rejected": -2.6455674171447754, + "loss": 0.4314, + "nll_loss": 0.10783309489488602, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.31158472970128e-05, + "rewards/margins": 0.2644936144351959, + "rewards/rejected": -0.26455673575401306, + "step": 11864 + }, + { + "epoch": 8.205394190871369, + "grad_norm": 3.7225213050842285, + "learning_rate": 9.970032272936838e-06, + "log_odds_chosen": 11.683008193969727, + "log_odds_ratio": -1.5529356460319832e-05, + "logits/chosen": -0.21142897009849548, + "logits/rejected": -0.3459831476211548, + "logps/chosen": -0.00011245267523918301, + "logps/rejected": -2.5953619480133057, + "loss": 0.3665, + "nll_loss": 0.09162183105945587, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.124526716012042e-05, + "rewards/margins": 0.259524941444397, + "rewards/rejected": -0.25953617691993713, + "step": 11865 + }, + { + "epoch": 8.206085753803595, + "grad_norm": 4.879708766937256, + "learning_rate": 9.966190256646689e-06, + "log_odds_chosen": 10.546676635742188, + "log_odds_ratio": -0.00018046073091682047, + "logits/chosen": -0.7415679693222046, + "logits/rejected": -0.7752382755279541, + "logps/chosen": -0.00023197535483632237, + "logps/rejected": -2.147796154022217, + "loss": 0.3633, + "nll_loss": 0.09080185741186142, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3197535483632237e-05, + "rewards/margins": 0.21475641429424286, + "rewards/rejected": -0.21477960050106049, + "step": 11866 + }, + { + "epoch": 8.206777316735822, + "grad_norm": 3.5352513790130615, + "learning_rate": 9.96234824035654e-06, + "log_odds_chosen": 11.506108283996582, + "log_odds_ratio": -3.419344648136757e-05, + "logits/chosen": -0.3386998474597931, + "logits/rejected": -0.42703479528427124, + "logps/chosen": -0.0004458025505300611, + "logps/rejected": -2.6201348304748535, + "loss": 0.475, + "nll_loss": 0.1187388151884079, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.458025432541035e-05, + "rewards/margins": 0.2619689106941223, + "rewards/rejected": -0.2620134949684143, + "step": 11867 + }, + { + "epoch": 8.207468879668049, + "grad_norm": 3.62316632270813, + "learning_rate": 9.95850622406639e-06, + "log_odds_chosen": 10.394129753112793, + "log_odds_ratio": -8.255482680397108e-05, + "logits/chosen": -0.5799174904823303, + "logits/rejected": -0.6252142190933228, + "logps/chosen": -0.00030348976724781096, + "logps/rejected": -1.5779904127120972, + "loss": 0.4337, + "nll_loss": 0.10842124372720718, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.034897417819593e-05, + "rewards/margins": 0.15776869654655457, + "rewards/rejected": -0.15779903531074524, + "step": 11868 + }, + { + "epoch": 8.208160442600276, + "grad_norm": 4.204887390136719, + "learning_rate": 9.954664207776241e-06, + "log_odds_chosen": 10.332489013671875, + "log_odds_ratio": -0.00021437757823150605, + "logits/chosen": 0.18167400360107422, + "logits/rejected": -0.0008254246786236763, + "logps/chosen": -0.00041375181172043085, + "logps/rejected": -2.1173388957977295, + "loss": 0.5232, + "nll_loss": 0.1307739019393921, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.137518408242613e-05, + "rewards/margins": 0.21169252693653107, + "rewards/rejected": -0.2117338925600052, + "step": 11869 + }, + { + "epoch": 8.208852005532503, + "grad_norm": 3.426561117172241, + "learning_rate": 9.950822191486092e-06, + "log_odds_chosen": 10.906770706176758, + "log_odds_ratio": -6.29575879429467e-05, + "logits/chosen": -0.3955892324447632, + "logits/rejected": -0.34222662448883057, + "logps/chosen": -0.0001728545903461054, + "logps/rejected": -1.9308866262435913, + "loss": 0.3435, + "nll_loss": 0.08586615324020386, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.72854597622063e-05, + "rewards/margins": 0.1930713802576065, + "rewards/rejected": -0.19308865070343018, + "step": 11870 + }, + { + "epoch": 8.20954356846473, + "grad_norm": 4.162632942199707, + "learning_rate": 9.946980175195943e-06, + "log_odds_chosen": 10.008193969726562, + "log_odds_ratio": -0.00013287433830555528, + "logits/chosen": 0.018637903034687042, + "logits/rejected": 0.04746834561228752, + "logps/chosen": -0.0002809230354614556, + "logps/rejected": -1.9701550006866455, + "loss": 0.6517, + "nll_loss": 0.16291363537311554, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8092303182347678e-05, + "rewards/margins": 0.19698740541934967, + "rewards/rejected": -0.19701550900936127, + "step": 11871 + }, + { + "epoch": 8.210235131396956, + "grad_norm": 4.263104438781738, + "learning_rate": 9.943138158905795e-06, + "log_odds_chosen": 10.590243339538574, + "log_odds_ratio": -5.595001857727766e-05, + "logits/chosen": -0.37323158979415894, + "logits/rejected": -0.36012914776802063, + "logps/chosen": -0.0002597020356915891, + "logps/rejected": -2.14939546585083, + "loss": 0.3621, + "nll_loss": 0.09050886332988739, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.597020284156315e-05, + "rewards/margins": 0.21491357684135437, + "rewards/rejected": -0.21493953466415405, + "step": 11872 + }, + { + "epoch": 8.210926694329183, + "grad_norm": 8.134347915649414, + "learning_rate": 9.939296142615644e-06, + "log_odds_chosen": 10.484792709350586, + "log_odds_ratio": -0.00042060474515892565, + "logits/chosen": 0.01173505187034607, + "logits/rejected": -0.018582195043563843, + "logps/chosen": -0.0008123047882691026, + "logps/rejected": -2.4332172870635986, + "loss": 0.3267, + "nll_loss": 0.08162355422973633, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.123047882691026e-05, + "rewards/margins": 0.24324052035808563, + "rewards/rejected": -0.2433217465877533, + "step": 11873 + }, + { + "epoch": 8.21161825726141, + "grad_norm": 6.031350135803223, + "learning_rate": 9.935454126325495e-06, + "log_odds_chosen": 10.102872848510742, + "log_odds_ratio": -0.00033000472467392683, + "logits/chosen": -0.744584321975708, + "logits/rejected": -0.69932621717453, + "logps/chosen": -0.0010207390878349543, + "logps/rejected": -1.9441288709640503, + "loss": 0.3996, + "nll_loss": 0.09986548870801926, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010207389277638867, + "rewards/margins": 0.19431079924106598, + "rewards/rejected": -0.19441288709640503, + "step": 11874 + }, + { + "epoch": 8.212309820193637, + "grad_norm": 5.100654125213623, + "learning_rate": 9.931612110035348e-06, + "log_odds_chosen": 10.475208282470703, + "log_odds_ratio": -5.977584078209475e-05, + "logits/chosen": -0.6865031719207764, + "logits/rejected": -0.6750398874282837, + "logps/chosen": -0.0001411703269695863, + "logps/rejected": -1.342596411705017, + "loss": 0.2567, + "nll_loss": 0.06417740136384964, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4117033060756512e-05, + "rewards/margins": 0.13424552977085114, + "rewards/rejected": -0.1342596411705017, + "step": 11875 + }, + { + "epoch": 8.213001383125864, + "grad_norm": 3.6606369018554688, + "learning_rate": 9.927770093745198e-06, + "log_odds_chosen": 11.791744232177734, + "log_odds_ratio": -1.4413297321880236e-05, + "logits/chosen": 0.10309985280036926, + "logits/rejected": 0.07292437553405762, + "logps/chosen": -0.00011298189929220825, + "logps/rejected": -2.626086711883545, + "loss": 0.4187, + "nll_loss": 0.10467715561389923, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1298190656816587e-05, + "rewards/margins": 0.26259738206863403, + "rewards/rejected": -0.26260867714881897, + "step": 11876 + }, + { + "epoch": 8.21369294605809, + "grad_norm": 5.350027084350586, + "learning_rate": 9.923928077455049e-06, + "log_odds_chosen": 11.024895668029785, + "log_odds_ratio": -4.6858815039740875e-05, + "logits/chosen": -0.15542323887348175, + "logits/rejected": -0.19625607132911682, + "logps/chosen": -0.00010433487477712333, + "logps/rejected": -1.9152851104736328, + "loss": 0.3122, + "nll_loss": 0.07805629074573517, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0433487659611274e-05, + "rewards/margins": 0.19151808321475983, + "rewards/rejected": -0.19152851402759552, + "step": 11877 + }, + { + "epoch": 8.214384508990317, + "grad_norm": 4.7614946365356445, + "learning_rate": 9.9200860611649e-06, + "log_odds_chosen": 10.522577285766602, + "log_odds_ratio": -0.00020592294458765537, + "logits/chosen": 0.07700219005346298, + "logits/rejected": -0.1007341742515564, + "logps/chosen": -0.001401687623001635, + "logps/rejected": -2.5692789554595947, + "loss": 0.3943, + "nll_loss": 0.09854703396558762, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001401687623001635, + "rewards/margins": 0.2567877173423767, + "rewards/rejected": -0.25692787766456604, + "step": 11878 + }, + { + "epoch": 8.215076071922544, + "grad_norm": 4.957317352294922, + "learning_rate": 9.91624404487475e-06, + "log_odds_chosen": 9.691752433776855, + "log_odds_ratio": -0.0007342268945649266, + "logits/chosen": -0.5804744958877563, + "logits/rejected": -0.5840776562690735, + "logps/chosen": -0.00027017746469937265, + "logps/rejected": -1.3753609657287598, + "loss": 0.5321, + "nll_loss": 0.1329451948404312, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7017747925128788e-05, + "rewards/margins": 0.1375090777873993, + "rewards/rejected": -0.13753610849380493, + "step": 11879 + }, + { + "epoch": 8.215767634854771, + "grad_norm": 3.693619728088379, + "learning_rate": 9.912402028584601e-06, + "log_odds_chosen": 10.603633880615234, + "log_odds_ratio": -7.273280061781406e-05, + "logits/chosen": -0.18116453289985657, + "logits/rejected": -0.2575952410697937, + "logps/chosen": -0.0001808809902286157, + "logps/rejected": -1.8459020853042603, + "loss": 0.3505, + "nll_loss": 0.0876234918832779, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8088101569446735e-05, + "rewards/margins": 0.18457213044166565, + "rewards/rejected": -0.18459022045135498, + "step": 11880 + }, + { + "epoch": 8.216459197786998, + "grad_norm": 5.794073104858398, + "learning_rate": 9.908560012294454e-06, + "log_odds_chosen": 9.234406471252441, + "log_odds_ratio": -0.0003990632831119001, + "logits/chosen": -0.40051859617233276, + "logits/rejected": -0.3610343933105469, + "logps/chosen": -0.0010065014939755201, + "logps/rejected": -1.7072646617889404, + "loss": 0.6433, + "nll_loss": 0.16079066693782806, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010065016249427572, + "rewards/margins": 0.17062580585479736, + "rewards/rejected": -0.1707264631986618, + "step": 11881 + }, + { + "epoch": 8.217150760719225, + "grad_norm": 4.333235740661621, + "learning_rate": 9.904717996004303e-06, + "log_odds_chosen": 11.285673141479492, + "log_odds_ratio": -1.546269231766928e-05, + "logits/chosen": 0.07247123122215271, + "logits/rejected": -0.045315831899642944, + "logps/chosen": -9.770147153176367e-05, + "logps/rejected": -1.9647116661071777, + "loss": 0.4386, + "nll_loss": 0.1096573919057846, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.770147698873188e-06, + "rewards/margins": 0.19646139442920685, + "rewards/rejected": -0.19647115468978882, + "step": 11882 + }, + { + "epoch": 8.217842323651452, + "grad_norm": 3.874656915664673, + "learning_rate": 9.900875979714154e-06, + "log_odds_chosen": 11.24226188659668, + "log_odds_ratio": -2.1476273104781285e-05, + "logits/chosen": -0.3501582145690918, + "logits/rejected": -0.3311619162559509, + "logps/chosen": -0.00010106174158863723, + "logps/rejected": -2.1260154247283936, + "loss": 0.4227, + "nll_loss": 0.10567951202392578, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0106174158863723e-05, + "rewards/margins": 0.21259145438671112, + "rewards/rejected": -0.21260155737400055, + "step": 11883 + }, + { + "epoch": 8.218533886583678, + "grad_norm": 3.4754481315612793, + "learning_rate": 9.897033963424006e-06, + "log_odds_chosen": 10.466971397399902, + "log_odds_ratio": -5.804416286991909e-05, + "logits/chosen": -0.14578090608119965, + "logits/rejected": -0.08390910178422928, + "logps/chosen": -0.00012252731539774686, + "logps/rejected": -1.4804524183273315, + "loss": 0.3364, + "nll_loss": 0.08408783376216888, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2252730812178925e-05, + "rewards/margins": 0.1480329930782318, + "rewards/rejected": -0.14804525673389435, + "step": 11884 + }, + { + "epoch": 8.219225449515905, + "grad_norm": 3.501004457473755, + "learning_rate": 9.893191947133857e-06, + "log_odds_chosen": 11.709239959716797, + "log_odds_ratio": -2.7292315280647017e-05, + "logits/chosen": -0.1410721093416214, + "logits/rejected": -0.23302824795246124, + "logps/chosen": -8.871190948411822e-05, + "logps/rejected": -2.1929562091827393, + "loss": 0.4125, + "nll_loss": 0.10312415659427643, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.871191312209703e-06, + "rewards/margins": 0.21928676962852478, + "rewards/rejected": -0.2192956507205963, + "step": 11885 + }, + { + "epoch": 8.219917012448132, + "grad_norm": 3.4106521606445312, + "learning_rate": 9.889349930843707e-06, + "log_odds_chosen": 10.613020896911621, + "log_odds_ratio": -0.00017831180593930185, + "logits/chosen": -0.6792394518852234, + "logits/rejected": -0.6880941987037659, + "logps/chosen": -0.0004885156522504985, + "logps/rejected": -2.28318190574646, + "loss": 0.2814, + "nll_loss": 0.07033880054950714, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.885156522504985e-05, + "rewards/margins": 0.22826935350894928, + "rewards/rejected": -0.2283182144165039, + "step": 11886 + }, + { + "epoch": 8.220608575380359, + "grad_norm": 3.315471887588501, + "learning_rate": 9.885507914553558e-06, + "log_odds_chosen": 10.854877471923828, + "log_odds_ratio": -6.311023025773466e-05, + "logits/chosen": -0.26056307554244995, + "logits/rejected": -0.2880066931247711, + "logps/chosen": -0.0003195895696990192, + "logps/rejected": -2.209442377090454, + "loss": 0.4086, + "nll_loss": 0.10214497148990631, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1958959880284965e-05, + "rewards/margins": 0.22091226279735565, + "rewards/rejected": -0.22094422578811646, + "step": 11887 + }, + { + "epoch": 8.221300138312586, + "grad_norm": 4.172388553619385, + "learning_rate": 9.881665898263409e-06, + "log_odds_chosen": 10.32114315032959, + "log_odds_ratio": -0.00030060680001042783, + "logits/chosen": 0.08563702553510666, + "logits/rejected": 0.1317209154367447, + "logps/chosen": -0.0005976824322715402, + "logps/rejected": -2.283992052078247, + "loss": 0.3728, + "nll_loss": 0.09316124022006989, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9768248320324346e-05, + "rewards/margins": 0.22833941876888275, + "rewards/rejected": -0.22839921712875366, + "step": 11888 + }, + { + "epoch": 8.221991701244812, + "grad_norm": 7.285219192504883, + "learning_rate": 9.87782388197326e-06, + "log_odds_chosen": 11.31580924987793, + "log_odds_ratio": -4.5658751332666725e-05, + "logits/chosen": -0.7076231241226196, + "logits/rejected": -0.6245047450065613, + "logps/chosen": -0.0002565417962614447, + "logps/rejected": -2.7010302543640137, + "loss": 0.4708, + "nll_loss": 0.11768751591444016, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5654178898548707e-05, + "rewards/margins": 0.2700774073600769, + "rewards/rejected": -0.2701030671596527, + "step": 11889 + }, + { + "epoch": 8.22268326417704, + "grad_norm": 3.4534096717834473, + "learning_rate": 9.873981865683112e-06, + "log_odds_chosen": 12.416353225708008, + "log_odds_ratio": -3.0102659366093576e-05, + "logits/chosen": -0.34680306911468506, + "logits/rejected": -0.42320072650909424, + "logps/chosen": -0.00020526333537418395, + "logps/rejected": -3.8194408416748047, + "loss": 0.4224, + "nll_loss": 0.10560045391321182, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.052633135463111e-05, + "rewards/margins": 0.3819235563278198, + "rewards/rejected": -0.38194406032562256, + "step": 11890 + }, + { + "epoch": 8.223374827109266, + "grad_norm": 7.042224407196045, + "learning_rate": 9.870139849392961e-06, + "log_odds_chosen": 10.453751564025879, + "log_odds_ratio": -0.00010059493797598407, + "logits/chosen": 0.1438857614994049, + "logits/rejected": 0.07333517074584961, + "logps/chosen": -0.00031427317298948765, + "logps/rejected": -2.2300024032592773, + "loss": 0.2914, + "nll_loss": 0.07283707708120346, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1427316571353e-05, + "rewards/margins": 0.22296880185604095, + "rewards/rejected": -0.22300024330615997, + "step": 11891 + }, + { + "epoch": 8.224066390041493, + "grad_norm": 3.609309673309326, + "learning_rate": 9.866297833102812e-06, + "log_odds_chosen": 11.430818557739258, + "log_odds_ratio": -3.3167856599902734e-05, + "logits/chosen": -0.3859490156173706, + "logits/rejected": -0.3728792369365692, + "logps/chosen": -0.00021129933884367347, + "logps/rejected": -1.9585065841674805, + "loss": 0.5518, + "nll_loss": 0.13795900344848633, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1129933884367347e-05, + "rewards/margins": 0.19582951068878174, + "rewards/rejected": -0.195850670337677, + "step": 11892 + }, + { + "epoch": 8.22475795297372, + "grad_norm": 3.3586835861206055, + "learning_rate": 9.862455816812664e-06, + "log_odds_chosen": 11.46454906463623, + "log_odds_ratio": -2.6788418836076744e-05, + "logits/chosen": -0.2561917304992676, + "logits/rejected": -0.2784116268157959, + "logps/chosen": -0.00014413880126085132, + "logps/rejected": -2.368454933166504, + "loss": 0.4054, + "nll_loss": 0.10135869681835175, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4413879398489371e-05, + "rewards/margins": 0.23683109879493713, + "rewards/rejected": -0.23684552311897278, + "step": 11893 + }, + { + "epoch": 8.225449515905947, + "grad_norm": 5.3997344970703125, + "learning_rate": 9.858613800522515e-06, + "log_odds_chosen": 11.586647987365723, + "log_odds_ratio": -2.1225545424385928e-05, + "logits/chosen": -0.052325744181871414, + "logits/rejected": -0.13196569681167603, + "logps/chosen": -0.0001532369205961004, + "logps/rejected": -2.433154344558716, + "loss": 0.4923, + "nll_loss": 0.12306039035320282, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5323690604418516e-05, + "rewards/margins": 0.24330011010169983, + "rewards/rejected": -0.2433154433965683, + "step": 11894 + }, + { + "epoch": 8.226141078838173, + "grad_norm": 11.203163146972656, + "learning_rate": 9.854771784232366e-06, + "log_odds_chosen": 11.251202583312988, + "log_odds_ratio": -3.209187707398087e-05, + "logits/chosen": -0.17335253953933716, + "logits/rejected": -0.24422144889831543, + "logps/chosen": -0.0001083131501218304, + "logps/rejected": -1.97250497341156, + "loss": 0.5254, + "nll_loss": 0.13133439421653748, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.083131519408198e-05, + "rewards/margins": 0.19723966717720032, + "rewards/rejected": -0.19725048542022705, + "step": 11895 + }, + { + "epoch": 8.2268326417704, + "grad_norm": 4.20961332321167, + "learning_rate": 9.850929767942217e-06, + "log_odds_chosen": 10.151579856872559, + "log_odds_ratio": -0.00021857497631572187, + "logits/chosen": -0.5564976930618286, + "logits/rejected": -0.7172648906707764, + "logps/chosen": -0.0008203632314689457, + "logps/rejected": -2.225661277770996, + "loss": 0.529, + "nll_loss": 0.13222664594650269, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.203632751246914e-05, + "rewards/margins": 0.22248411178588867, + "rewards/rejected": -0.2225661277770996, + "step": 11896 + }, + { + "epoch": 8.227524204702627, + "grad_norm": 2.7075605392456055, + "learning_rate": 9.847087751652067e-06, + "log_odds_chosen": 10.783187866210938, + "log_odds_ratio": -0.00012845300079789013, + "logits/chosen": -0.42407190799713135, + "logits/rejected": -0.36330926418304443, + "logps/chosen": -0.00020437348575796932, + "logps/rejected": -1.8607637882232666, + "loss": 0.3263, + "nll_loss": 0.08155211806297302, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.043734821199905e-05, + "rewards/margins": 0.18605592846870422, + "rewards/rejected": -0.18607638776302338, + "step": 11897 + }, + { + "epoch": 8.228215767634854, + "grad_norm": 5.69032096862793, + "learning_rate": 9.843245735361918e-06, + "log_odds_chosen": 12.672260284423828, + "log_odds_ratio": -1.0369312803959474e-05, + "logits/chosen": -0.3008144199848175, + "logits/rejected": -0.40587708353996277, + "logps/chosen": -0.00024107444914989173, + "logps/rejected": -3.753671646118164, + "loss": 0.5438, + "nll_loss": 0.13596031069755554, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.410744309599977e-05, + "rewards/margins": 0.37534308433532715, + "rewards/rejected": -0.375367134809494, + "step": 11898 + }, + { + "epoch": 8.22890733056708, + "grad_norm": 7.97053337097168, + "learning_rate": 9.83940371907177e-06, + "log_odds_chosen": 10.980829238891602, + "log_odds_ratio": -3.558182652341202e-05, + "logits/chosen": -0.22462347149848938, + "logits/rejected": -0.21821101009845734, + "logps/chosen": -0.0003172093420289457, + "logps/rejected": -2.1453938484191895, + "loss": 0.5254, + "nll_loss": 0.1313534528017044, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.172093420289457e-05, + "rewards/margins": 0.21450765430927277, + "rewards/rejected": -0.21453937888145447, + "step": 11899 + }, + { + "epoch": 8.229598893499308, + "grad_norm": 4.776434898376465, + "learning_rate": 9.83556170278162e-06, + "log_odds_chosen": 10.950285911560059, + "log_odds_ratio": -8.446057472610846e-05, + "logits/chosen": -0.3791242837905884, + "logits/rejected": -0.37221699953079224, + "logps/chosen": -0.0003215722390450537, + "logps/rejected": -2.2435872554779053, + "loss": 0.5553, + "nll_loss": 0.13881456851959229, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2157226087292656e-05, + "rewards/margins": 0.22432658076286316, + "rewards/rejected": -0.22435873746871948, + "step": 11900 + }, + { + "epoch": 8.230290456431534, + "grad_norm": 4.019122123718262, + "learning_rate": 9.83171968649147e-06, + "log_odds_chosen": 11.55584716796875, + "log_odds_ratio": -1.9348452042322606e-05, + "logits/chosen": -0.2871738374233246, + "logits/rejected": -0.4227055311203003, + "logps/chosen": -0.0002205895580118522, + "logps/rejected": -2.533057928085327, + "loss": 0.3716, + "nll_loss": 0.09290733933448792, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2058957256376743e-05, + "rewards/margins": 0.2532837390899658, + "rewards/rejected": -0.2533057928085327, + "step": 11901 + }, + { + "epoch": 8.230982019363761, + "grad_norm": 4.368720054626465, + "learning_rate": 9.827877670201321e-06, + "log_odds_chosen": 10.433951377868652, + "log_odds_ratio": -0.0005253016715869308, + "logits/chosen": -0.41565749049186707, + "logits/rejected": -0.5145907402038574, + "logps/chosen": -0.0009341444238089025, + "logps/rejected": -2.0665760040283203, + "loss": 0.8229, + "nll_loss": 0.20566663146018982, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.341444092569873e-05, + "rewards/margins": 0.20656421780586243, + "rewards/rejected": -0.20665761828422546, + "step": 11902 + }, + { + "epoch": 8.231673582295988, + "grad_norm": 3.7045254707336426, + "learning_rate": 9.824035653911174e-06, + "log_odds_chosen": 10.184175491333008, + "log_odds_ratio": -8.395993791054934e-05, + "logits/chosen": -0.3897843062877655, + "logits/rejected": -0.37134748697280884, + "logps/chosen": -0.0006803659489378333, + "logps/rejected": -2.4080569744110107, + "loss": 0.4519, + "nll_loss": 0.11295488476753235, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.80365992593579e-05, + "rewards/margins": 0.2407376766204834, + "rewards/rejected": -0.2408057004213333, + "step": 11903 + }, + { + "epoch": 8.232365145228215, + "grad_norm": 8.1160249710083, + "learning_rate": 9.820193637621024e-06, + "log_odds_chosen": 10.831419944763184, + "log_odds_ratio": -4.790275852428749e-05, + "logits/chosen": -0.32159915566444397, + "logits/rejected": -0.4292929172515869, + "logps/chosen": -0.00021727457351516932, + "logps/rejected": -1.9313918352127075, + "loss": 0.3378, + "nll_loss": 0.08443906158208847, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1727457351516932e-05, + "rewards/margins": 0.1931174397468567, + "rewards/rejected": -0.1931391954421997, + "step": 11904 + }, + { + "epoch": 8.233056708160442, + "grad_norm": 8.284672737121582, + "learning_rate": 9.816351621330875e-06, + "log_odds_chosen": 12.164765357971191, + "log_odds_ratio": -8.79414346854901e-06, + "logits/chosen": -0.1950538158416748, + "logits/rejected": -0.3341953158378601, + "logps/chosen": -0.00016817479627206922, + "logps/rejected": -3.2107934951782227, + "loss": 0.6467, + "nll_loss": 0.1616656333208084, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.681747926340904e-05, + "rewards/margins": 0.32106253504753113, + "rewards/rejected": -0.3210793435573578, + "step": 11905 + }, + { + "epoch": 8.233748271092669, + "grad_norm": 7.224637508392334, + "learning_rate": 9.812509605040726e-06, + "log_odds_chosen": 11.125343322753906, + "log_odds_ratio": -0.00014493428170681, + "logits/chosen": 0.05868227779865265, + "logits/rejected": 0.05458486080169678, + "logps/chosen": -0.00013728538760915399, + "logps/rejected": -2.3505802154541016, + "loss": 0.3263, + "nll_loss": 0.08156958967447281, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3728540579904802e-05, + "rewards/margins": 0.23504430055618286, + "rewards/rejected": -0.2350580394268036, + "step": 11906 + }, + { + "epoch": 8.234439834024895, + "grad_norm": 4.543659210205078, + "learning_rate": 9.808667588750577e-06, + "log_odds_chosen": 11.368104934692383, + "log_odds_ratio": -0.00010937463230220601, + "logits/chosen": -0.16547857224941254, + "logits/rejected": -0.19322216510772705, + "logps/chosen": -0.0007860024925321341, + "logps/rejected": -2.766275405883789, + "loss": 0.4289, + "nll_loss": 0.10720409452915192, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.86002492532134e-05, + "rewards/margins": 0.27654892206192017, + "rewards/rejected": -0.2766275405883789, + "step": 11907 + }, + { + "epoch": 8.235131396957122, + "grad_norm": 3.6898257732391357, + "learning_rate": 9.804825572460427e-06, + "log_odds_chosen": 10.989175796508789, + "log_odds_ratio": -0.00010870847472688183, + "logits/chosen": -0.31423258781433105, + "logits/rejected": -0.5556448101997375, + "logps/chosen": -0.00040482316398993134, + "logps/rejected": -2.564761161804199, + "loss": 0.8335, + "nll_loss": 0.2083652764558792, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.048231494380161e-05, + "rewards/margins": 0.25643566250801086, + "rewards/rejected": -0.25647613406181335, + "step": 11908 + }, + { + "epoch": 8.235822959889349, + "grad_norm": 6.863066673278809, + "learning_rate": 9.80098355617028e-06, + "log_odds_chosen": 11.990863800048828, + "log_odds_ratio": -2.5272656785091385e-05, + "logits/chosen": -0.513546347618103, + "logits/rejected": -0.5886914134025574, + "logps/chosen": -0.00012314581545069814, + "logps/rejected": -2.846043348312378, + "loss": 0.2662, + "nll_loss": 0.06654417514801025, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2314581908867694e-05, + "rewards/margins": 0.28459203243255615, + "rewards/rejected": -0.28460434079170227, + "step": 11909 + }, + { + "epoch": 8.236514522821576, + "grad_norm": 3.6549644470214844, + "learning_rate": 9.797141539880129e-06, + "log_odds_chosen": 10.573244094848633, + "log_odds_ratio": -0.00011436867498559877, + "logits/chosen": 0.0248430036008358, + "logits/rejected": 0.11080904304981232, + "logps/chosen": -0.00022818223806098104, + "logps/rejected": -2.2123470306396484, + "loss": 0.3748, + "nll_loss": 0.0936833918094635, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2818225261289626e-05, + "rewards/margins": 0.22121189534664154, + "rewards/rejected": -0.22123470902442932, + "step": 11910 + }, + { + "epoch": 8.237206085753803, + "grad_norm": 4.8388495445251465, + "learning_rate": 9.79329952358998e-06, + "log_odds_chosen": 11.364583969116211, + "log_odds_ratio": -0.00023915823840070516, + "logits/chosen": -0.8164856433868408, + "logits/rejected": -0.8062664270401001, + "logps/chosen": -0.0001275077520404011, + "logps/rejected": -2.420968532562256, + "loss": 0.3621, + "nll_loss": 0.09050973504781723, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2750776477332693e-05, + "rewards/margins": 0.2420840859413147, + "rewards/rejected": -0.24209685623645782, + "step": 11911 + }, + { + "epoch": 8.23789764868603, + "grad_norm": 3.9634106159210205, + "learning_rate": 9.789457507299832e-06, + "log_odds_chosen": 10.325061798095703, + "log_odds_ratio": -6.946113717276603e-05, + "logits/chosen": -0.23887857794761658, + "logits/rejected": -0.2788199484348297, + "logps/chosen": -0.0012760156532749534, + "logps/rejected": -2.2283194065093994, + "loss": 0.4579, + "nll_loss": 0.11447641253471375, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012760156823787838, + "rewards/margins": 0.22270435094833374, + "rewards/rejected": -0.22283194959163666, + "step": 11912 + }, + { + "epoch": 8.238589211618256, + "grad_norm": 3.8492555618286133, + "learning_rate": 9.785615491009683e-06, + "log_odds_chosen": 11.542254447937012, + "log_odds_ratio": -1.5774694475112483e-05, + "logits/chosen": -0.5774122476577759, + "logits/rejected": -0.5654475092887878, + "logps/chosen": -7.090227882144973e-05, + "logps/rejected": -2.098196029663086, + "loss": 0.3943, + "nll_loss": 0.09858129918575287, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.090227882144973e-06, + "rewards/margins": 0.2098124921321869, + "rewards/rejected": -0.20981959998607635, + "step": 11913 + }, + { + "epoch": 8.239280774550483, + "grad_norm": 5.6373982429504395, + "learning_rate": 9.781773474719534e-06, + "log_odds_chosen": 11.087379455566406, + "log_odds_ratio": -8.546755998395383e-05, + "logits/chosen": -0.6590429544448853, + "logits/rejected": -0.7686599493026733, + "logps/chosen": -0.00012145326763857156, + "logps/rejected": -2.06939959526062, + "loss": 0.3423, + "nll_loss": 0.08557528257369995, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2145326763857156e-05, + "rewards/margins": 0.2069278210401535, + "rewards/rejected": -0.2069399654865265, + "step": 11914 + }, + { + "epoch": 8.23997233748271, + "grad_norm": 4.372389793395996, + "learning_rate": 9.777931458429384e-06, + "log_odds_chosen": 10.3779935836792, + "log_odds_ratio": -6.900944572407752e-05, + "logits/chosen": -0.49039220809936523, + "logits/rejected": -0.45434218645095825, + "logps/chosen": -0.00029394644661806524, + "logps/rejected": -2.24226450920105, + "loss": 0.4261, + "nll_loss": 0.10651591420173645, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9394643206615e-05, + "rewards/margins": 0.22419705986976624, + "rewards/rejected": -0.2242264598608017, + "step": 11915 + }, + { + "epoch": 8.240663900414937, + "grad_norm": 3.81577730178833, + "learning_rate": 9.774089442139235e-06, + "log_odds_chosen": 11.480928421020508, + "log_odds_ratio": -0.00013029044202994555, + "logits/chosen": -0.25032010674476624, + "logits/rejected": -0.3787113130092621, + "logps/chosen": -0.00018976113642565906, + "logps/rejected": -2.105654239654541, + "loss": 0.3038, + "nll_loss": 0.0759335458278656, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8976112187374383e-05, + "rewards/margins": 0.21054644882678986, + "rewards/rejected": -0.210565447807312, + "step": 11916 + }, + { + "epoch": 8.241355463347164, + "grad_norm": 3.03281569480896, + "learning_rate": 9.770247425849086e-06, + "log_odds_chosen": 10.569976806640625, + "log_odds_ratio": -0.00010669581388356164, + "logits/chosen": -0.13931097090244293, + "logits/rejected": -0.2053048312664032, + "logps/chosen": -0.00015964414342306554, + "logps/rejected": -1.923472285270691, + "loss": 0.2408, + "nll_loss": 0.06017881631851196, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5964415069902316e-05, + "rewards/margins": 0.19233126938343048, + "rewards/rejected": -0.1923472285270691, + "step": 11917 + }, + { + "epoch": 8.24204702627939, + "grad_norm": 3.186953544616699, + "learning_rate": 9.766405409558938e-06, + "log_odds_chosen": 11.287572860717773, + "log_odds_ratio": -3.111410478595644e-05, + "logits/chosen": 0.07891267538070679, + "logits/rejected": 0.013783074915409088, + "logps/chosen": -0.00014697012375108898, + "logps/rejected": -2.314755439758301, + "loss": 0.3065, + "nll_loss": 0.07661764323711395, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4697012375108898e-05, + "rewards/margins": 0.23146085441112518, + "rewards/rejected": -0.23147554695606232, + "step": 11918 + }, + { + "epoch": 8.242738589211617, + "grad_norm": 5.805723190307617, + "learning_rate": 9.762563393268787e-06, + "log_odds_chosen": 12.481376647949219, + "log_odds_ratio": -7.375199857051484e-06, + "logits/chosen": -0.5111141800880432, + "logits/rejected": -0.5841240286827087, + "logps/chosen": -5.368373604142107e-05, + "logps/rejected": -2.6199378967285156, + "loss": 0.4566, + "nll_loss": 0.11414913088083267, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.368373422243167e-06, + "rewards/margins": 0.26198846101760864, + "rewards/rejected": -0.2619938254356384, + "step": 11919 + }, + { + "epoch": 8.243430152143844, + "grad_norm": 3.241328716278076, + "learning_rate": 9.758721376978638e-06, + "log_odds_chosen": 11.063311576843262, + "log_odds_ratio": -0.00022662435367237777, + "logits/chosen": -0.35234570503234863, + "logits/rejected": -0.3682654798030853, + "logps/chosen": -0.00032121199183166027, + "logps/rejected": -2.4346401691436768, + "loss": 0.2577, + "nll_loss": 0.06439036130905151, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.212119918316603e-05, + "rewards/margins": 0.24343189597129822, + "rewards/rejected": -0.24346402287483215, + "step": 11920 + }, + { + "epoch": 8.244121715076071, + "grad_norm": 3.0784521102905273, + "learning_rate": 9.75487936068849e-06, + "log_odds_chosen": 11.698726654052734, + "log_odds_ratio": -2.0082336050109006e-05, + "logits/chosen": -0.3633999228477478, + "logits/rejected": -0.40448373556137085, + "logps/chosen": -0.0005937843234278262, + "logps/rejected": -3.2323977947235107, + "loss": 0.3206, + "nll_loss": 0.08015991002321243, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.937842797720805e-05, + "rewards/margins": 0.3231803774833679, + "rewards/rejected": -0.3232397735118866, + "step": 11921 + }, + { + "epoch": 8.244813278008298, + "grad_norm": 3.568092107772827, + "learning_rate": 9.751037344398341e-06, + "log_odds_chosen": 11.089823722839355, + "log_odds_ratio": -0.00012849707854911685, + "logits/chosen": -0.09959299862384796, + "logits/rejected": -0.22197115421295166, + "logps/chosen": -0.0003033549874089658, + "logps/rejected": -2.257047176361084, + "loss": 0.4518, + "nll_loss": 0.11293190717697144, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0335499104694463e-05, + "rewards/margins": 0.22567439079284668, + "rewards/rejected": -0.22570469975471497, + "step": 11922 + }, + { + "epoch": 8.245504840940525, + "grad_norm": 5.759087562561035, + "learning_rate": 9.747195328108192e-06, + "log_odds_chosen": 12.131702423095703, + "log_odds_ratio": -1.2354894352029078e-05, + "logits/chosen": -0.3387818932533264, + "logits/rejected": -0.459004282951355, + "logps/chosen": -0.00013360743469092995, + "logps/rejected": -2.852360725402832, + "loss": 0.5861, + "nll_loss": 0.14653277397155762, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3360744560486637e-05, + "rewards/margins": 0.28522270917892456, + "rewards/rejected": -0.28523609042167664, + "step": 11923 + }, + { + "epoch": 8.246196403872752, + "grad_norm": 4.073774814605713, + "learning_rate": 9.743353311818043e-06, + "log_odds_chosen": 11.734428405761719, + "log_odds_ratio": -1.382894697599113e-05, + "logits/chosen": -0.06801869720220566, + "logits/rejected": -0.16279873251914978, + "logps/chosen": -0.00010149933223146945, + "logps/rejected": -2.4640071392059326, + "loss": 0.4536, + "nll_loss": 0.11338850855827332, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0149933586944826e-05, + "rewards/margins": 0.24639055132865906, + "rewards/rejected": -0.24640071392059326, + "step": 11924 + }, + { + "epoch": 8.24688796680498, + "grad_norm": 4.897947788238525, + "learning_rate": 9.739511295527893e-06, + "log_odds_chosen": 10.829876899719238, + "log_odds_ratio": -8.704655920155346e-05, + "logits/chosen": 0.05027832090854645, + "logits/rejected": 0.09646852314472198, + "logps/chosen": -0.0003285640850663185, + "logps/rejected": -2.5738582611083984, + "loss": 0.343, + "nll_loss": 0.08574408292770386, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.285640923422761e-05, + "rewards/margins": 0.2573529779911041, + "rewards/rejected": -0.25738582015037537, + "step": 11925 + }, + { + "epoch": 8.247579529737205, + "grad_norm": 4.184667587280273, + "learning_rate": 9.735669279237744e-06, + "log_odds_chosen": 10.841010093688965, + "log_odds_ratio": -4.2419906094437465e-05, + "logits/chosen": -0.17443953454494476, + "logits/rejected": -0.2786588668823242, + "logps/chosen": -0.00013494440645445138, + "logps/rejected": -1.8008975982666016, + "loss": 0.4378, + "nll_loss": 0.10945791751146317, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.349444119114196e-05, + "rewards/margins": 0.18007627129554749, + "rewards/rejected": -0.18008975684642792, + "step": 11926 + }, + { + "epoch": 8.248271092669434, + "grad_norm": 5.399971961975098, + "learning_rate": 9.731827262947597e-06, + "log_odds_chosen": 10.012310028076172, + "log_odds_ratio": -0.0003816061362158507, + "logits/chosen": -0.12254303693771362, + "logits/rejected": -0.08368848264217377, + "logps/chosen": -0.0005873021436855197, + "logps/rejected": -2.2701234817504883, + "loss": 0.4698, + "nll_loss": 0.11741822957992554, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8730216551339254e-05, + "rewards/margins": 0.22695361077785492, + "rewards/rejected": -0.22701233625411987, + "step": 11927 + }, + { + "epoch": 8.248962655601659, + "grad_norm": 4.6173930168151855, + "learning_rate": 9.727985246657446e-06, + "log_odds_chosen": 11.417407989501953, + "log_odds_ratio": -0.00016668268654029816, + "logits/chosen": -0.17062698304653168, + "logits/rejected": -0.256955087184906, + "logps/chosen": -0.0002462010888848454, + "logps/rejected": -2.1261165142059326, + "loss": 0.3565, + "nll_loss": 0.08911935985088348, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4620108888484538e-05, + "rewards/margins": 0.21258702874183655, + "rewards/rejected": -0.21261164546012878, + "step": 11928 + }, + { + "epoch": 8.249654218533887, + "grad_norm": 6.097728252410889, + "learning_rate": 9.724143230367296e-06, + "log_odds_chosen": 11.014291763305664, + "log_odds_ratio": -4.301685112295672e-05, + "logits/chosen": -0.14947861433029175, + "logits/rejected": -0.22163355350494385, + "logps/chosen": -0.00014046431169845164, + "logps/rejected": -2.1883559226989746, + "loss": 0.5594, + "nll_loss": 0.1398484706878662, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4046430806047283e-05, + "rewards/margins": 0.21882155537605286, + "rewards/rejected": -0.21883559226989746, + "step": 11929 + }, + { + "epoch": 8.250345781466113, + "grad_norm": 3.72836971282959, + "learning_rate": 9.720301214077149e-06, + "log_odds_chosen": 11.393030166625977, + "log_odds_ratio": -1.6560083167860284e-05, + "logits/chosen": -0.37463024258613586, + "logits/rejected": -0.41027507185935974, + "logps/chosen": -0.00012916355626657605, + "logps/rejected": -2.406658172607422, + "loss": 0.4886, + "nll_loss": 0.1221555545926094, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2916356354253367e-05, + "rewards/margins": 0.2406529039144516, + "rewards/rejected": -0.24066582322120667, + "step": 11930 + }, + { + "epoch": 8.251037344398341, + "grad_norm": 3.9670143127441406, + "learning_rate": 9.716459197787e-06, + "log_odds_chosen": 11.113236427307129, + "log_odds_ratio": -2.2357296984409913e-05, + "logits/chosen": -0.28098994493484497, + "logits/rejected": -0.2765815854072571, + "logps/chosen": -9.308641165262088e-05, + "logps/rejected": -1.8025569915771484, + "loss": 0.3587, + "nll_loss": 0.08968042582273483, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.308640983363148e-06, + "rewards/margins": 0.18024641275405884, + "rewards/rejected": -0.180255725979805, + "step": 11931 + }, + { + "epoch": 8.251728907330566, + "grad_norm": 8.630131721496582, + "learning_rate": 9.71261718149685e-06, + "log_odds_chosen": 9.948583602905273, + "log_odds_ratio": -9.74518625298515e-05, + "logits/chosen": -0.2843390703201294, + "logits/rejected": -0.3486748933792114, + "logps/chosen": -0.000242379741393961, + "logps/rejected": -1.5226190090179443, + "loss": 0.4809, + "nll_loss": 0.12021724879741669, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4237977413577028e-05, + "rewards/margins": 0.1522376537322998, + "rewards/rejected": -0.152261883020401, + "step": 11932 + }, + { + "epoch": 8.252420470262795, + "grad_norm": 4.932256698608398, + "learning_rate": 9.708775165206701e-06, + "log_odds_chosen": 12.347237586975098, + "log_odds_ratio": -1.0626763469190337e-05, + "logits/chosen": -0.16214779019355774, + "logits/rejected": -0.29443246126174927, + "logps/chosen": -0.0003793642681557685, + "logps/rejected": -3.688446044921875, + "loss": 0.4671, + "nll_loss": 0.11676811426877975, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7936428270768374e-05, + "rewards/margins": 0.3688066303730011, + "rewards/rejected": -0.368844598531723, + "step": 11933 + }, + { + "epoch": 8.25311203319502, + "grad_norm": 4.48406982421875, + "learning_rate": 9.704933148916552e-06, + "log_odds_chosen": 11.195205688476562, + "log_odds_ratio": -8.202636672649533e-05, + "logits/chosen": -0.11493605375289917, + "logits/rejected": -0.2281782329082489, + "logps/chosen": -0.00037876551505178213, + "logps/rejected": -2.323265552520752, + "loss": 0.3885, + "nll_loss": 0.09712628275156021, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.787655441556126e-05, + "rewards/margins": 0.2322886884212494, + "rewards/rejected": -0.23232656717300415, + "step": 11934 + }, + { + "epoch": 8.253803596127248, + "grad_norm": 4.716032981872559, + "learning_rate": 9.701091132626403e-06, + "log_odds_chosen": 10.351265907287598, + "log_odds_ratio": -9.08208021428436e-05, + "logits/chosen": -0.022960372269153595, + "logits/rejected": -0.1691531538963318, + "logps/chosen": -0.00030095313559286296, + "logps/rejected": -1.9174537658691406, + "loss": 0.3216, + "nll_loss": 0.08038497716188431, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0095314286882058e-05, + "rewards/margins": 0.191715270280838, + "rewards/rejected": -0.19174537062644958, + "step": 11935 + }, + { + "epoch": 8.254495159059474, + "grad_norm": 4.077927112579346, + "learning_rate": 9.697249116336253e-06, + "log_odds_chosen": 10.980993270874023, + "log_odds_ratio": -6.214114546310157e-05, + "logits/chosen": -0.43654143810272217, + "logits/rejected": -0.4607735872268677, + "logps/chosen": -0.00015193871513474733, + "logps/rejected": -1.98684561252594, + "loss": 0.4099, + "nll_loss": 0.10245777666568756, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5193872059171554e-05, + "rewards/margins": 0.19866937398910522, + "rewards/rejected": -0.19868457317352295, + "step": 11936 + }, + { + "epoch": 8.255186721991702, + "grad_norm": 3.7622458934783936, + "learning_rate": 9.693407100046104e-06, + "log_odds_chosen": 10.723194122314453, + "log_odds_ratio": -6.372190546244383e-05, + "logits/chosen": -0.36659157276153564, + "logits/rejected": -0.41019150614738464, + "logps/chosen": -0.0001748724898789078, + "logps/rejected": -2.0019383430480957, + "loss": 0.3297, + "nll_loss": 0.08241233229637146, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.748724935168866e-05, + "rewards/margins": 0.20017635822296143, + "rewards/rejected": -0.2001938372850418, + "step": 11937 + }, + { + "epoch": 8.255878284923927, + "grad_norm": 3.4987776279449463, + "learning_rate": 9.689565083755955e-06, + "log_odds_chosen": 11.87031364440918, + "log_odds_ratio": -4.831314799957909e-05, + "logits/chosen": -0.13452598452568054, + "logits/rejected": -0.32432645559310913, + "logps/chosen": -0.00027610603137873113, + "logps/rejected": -2.5127694606781006, + "loss": 0.3822, + "nll_loss": 0.09553464502096176, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7610603865468875e-05, + "rewards/margins": 0.2512493431568146, + "rewards/rejected": -0.25127696990966797, + "step": 11938 + }, + { + "epoch": 8.256569847856156, + "grad_norm": 3.179476737976074, + "learning_rate": 9.685723067465806e-06, + "log_odds_chosen": 11.044832229614258, + "log_odds_ratio": -0.0004461394564714283, + "logits/chosen": -0.328361451625824, + "logits/rejected": -0.29957258701324463, + "logps/chosen": -0.0006780716357752681, + "logps/rejected": -2.9000000953674316, + "loss": 0.2553, + "nll_loss": 0.06378158181905746, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.780715921195224e-05, + "rewards/margins": 0.2899321913719177, + "rewards/rejected": -0.28999999165534973, + "step": 11939 + }, + { + "epoch": 8.25726141078838, + "grad_norm": 3.719775915145874, + "learning_rate": 9.681881051175658e-06, + "log_odds_chosen": 11.379352569580078, + "log_odds_ratio": -4.088229616172612e-05, + "logits/chosen": -0.5665051341056824, + "logits/rejected": -0.6805068850517273, + "logps/chosen": -8.75107798492536e-05, + "logps/rejected": -1.9972556829452515, + "loss": 0.4335, + "nll_loss": 0.10838184505701065, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.751078894420061e-06, + "rewards/margins": 0.19971680641174316, + "rewards/rejected": -0.19972556829452515, + "step": 11940 + }, + { + "epoch": 8.25795297372061, + "grad_norm": 4.267411708831787, + "learning_rate": 9.678039034885509e-06, + "log_odds_chosen": 11.939834594726562, + "log_odds_ratio": -2.9243588869576342e-05, + "logits/chosen": -0.52231764793396, + "logits/rejected": -0.5219714641571045, + "logps/chosen": -0.00013408969971351326, + "logps/rejected": -2.323073387145996, + "loss": 0.4224, + "nll_loss": 0.1055847555398941, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3408969607553445e-05, + "rewards/margins": 0.23229394853115082, + "rewards/rejected": -0.2323073446750641, + "step": 11941 + }, + { + "epoch": 8.258644536652836, + "grad_norm": 3.8554868698120117, + "learning_rate": 9.674197018595358e-06, + "log_odds_chosen": 12.19814682006836, + "log_odds_ratio": -1.4316083252197132e-05, + "logits/chosen": -0.8943642377853394, + "logits/rejected": -0.9774724245071411, + "logps/chosen": -0.00010931852739304304, + "logps/rejected": -2.5393433570861816, + "loss": 0.3261, + "nll_loss": 0.08151516318321228, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0931853466900066e-05, + "rewards/margins": 0.2539233863353729, + "rewards/rejected": -0.2539343237876892, + "step": 11942 + }, + { + "epoch": 8.259336099585063, + "grad_norm": 5.9453864097595215, + "learning_rate": 9.67035500230521e-06, + "log_odds_chosen": 10.217976570129395, + "log_odds_ratio": -0.0001286189362872392, + "logits/chosen": -0.23656044900417328, + "logits/rejected": -0.308110773563385, + "logps/chosen": -0.0004473762819543481, + "logps/rejected": -1.6824361085891724, + "loss": 0.4505, + "nll_loss": 0.11260005086660385, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4737626012647524e-05, + "rewards/margins": 0.1681988686323166, + "rewards/rejected": -0.1682436168193817, + "step": 11943 + }, + { + "epoch": 8.26002766251729, + "grad_norm": 3.4335649013519287, + "learning_rate": 9.666512986015061e-06, + "log_odds_chosen": 9.835412979125977, + "log_odds_ratio": -7.321751763811335e-05, + "logits/chosen": -0.7244390845298767, + "logits/rejected": -0.7772824168205261, + "logps/chosen": -0.0006460523582063615, + "logps/rejected": -1.6226214170455933, + "loss": 0.2635, + "nll_loss": 0.06585706025362015, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.460522854467854e-05, + "rewards/margins": 0.1621975302696228, + "rewards/rejected": -0.16226214170455933, + "step": 11944 + }, + { + "epoch": 8.260719225449517, + "grad_norm": 4.816346645355225, + "learning_rate": 9.662670969724912e-06, + "log_odds_chosen": 11.229515075683594, + "log_odds_ratio": -3.3624597563175485e-05, + "logits/chosen": -0.19780333340168, + "logits/rejected": -0.20678195357322693, + "logps/chosen": -0.00016987889830488712, + "logps/rejected": -2.421178102493286, + "loss": 0.5191, + "nll_loss": 0.12976379692554474, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.698788946669083e-05, + "rewards/margins": 0.24210083484649658, + "rewards/rejected": -0.24211782217025757, + "step": 11945 + }, + { + "epoch": 8.261410788381744, + "grad_norm": 3.3699300289154053, + "learning_rate": 9.658828953434763e-06, + "log_odds_chosen": 10.996805191040039, + "log_odds_ratio": -0.00020430742006283253, + "logits/chosen": -0.3011920750141144, + "logits/rejected": -0.26979607343673706, + "logps/chosen": -0.00016290269559249282, + "logps/rejected": -1.726262092590332, + "loss": 0.3178, + "nll_loss": 0.07941769808530807, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6290270650642924e-05, + "rewards/margins": 0.17260992527008057, + "rewards/rejected": -0.17262621223926544, + "step": 11946 + }, + { + "epoch": 8.26210235131397, + "grad_norm": 5.184370517730713, + "learning_rate": 9.654986937144613e-06, + "log_odds_chosen": 9.627692222595215, + "log_odds_ratio": -0.000175231893081218, + "logits/chosen": -0.6078090071678162, + "logits/rejected": -0.6238219141960144, + "logps/chosen": -0.00043194310273975134, + "logps/rejected": -1.395906686782837, + "loss": 0.3988, + "nll_loss": 0.0996830016374588, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.319431172916666e-05, + "rewards/margins": 0.1395474672317505, + "rewards/rejected": -0.13959068059921265, + "step": 11947 + }, + { + "epoch": 8.262793914246197, + "grad_norm": 3.9152684211730957, + "learning_rate": 9.651144920854464e-06, + "log_odds_chosen": 11.434577941894531, + "log_odds_ratio": -5.2444524044403806e-05, + "logits/chosen": 0.019659768790006638, + "logits/rejected": -0.13808220624923706, + "logps/chosen": -0.0010777817806228995, + "logps/rejected": -3.294294595718384, + "loss": 0.3711, + "nll_loss": 0.09276201575994492, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010777818533824757, + "rewards/margins": 0.3293216824531555, + "rewards/rejected": -0.3294294476509094, + "step": 11948 + }, + { + "epoch": 8.263485477178424, + "grad_norm": 4.5004143714904785, + "learning_rate": 9.647302904564317e-06, + "log_odds_chosen": 10.837656021118164, + "log_odds_ratio": -0.00035609930637292564, + "logits/chosen": 0.02208644151687622, + "logits/rejected": -0.09548585116863251, + "logps/chosen": -0.0006469839718192816, + "logps/rejected": -2.8256313800811768, + "loss": 0.4955, + "nll_loss": 0.1238352507352829, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.469839718192816e-05, + "rewards/margins": 0.28249841928482056, + "rewards/rejected": -0.28256314992904663, + "step": 11949 + }, + { + "epoch": 8.264177040110651, + "grad_norm": 5.343628406524658, + "learning_rate": 9.643460888274167e-06, + "log_odds_chosen": 11.494791030883789, + "log_odds_ratio": -4.55570625490509e-05, + "logits/chosen": -0.24458926916122437, + "logits/rejected": -0.27908527851104736, + "logps/chosen": -0.00032541013206355274, + "logps/rejected": -2.9635403156280518, + "loss": 0.3951, + "nll_loss": 0.09877359867095947, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2541014661546797e-05, + "rewards/margins": 0.2963215112686157, + "rewards/rejected": -0.2963540256023407, + "step": 11950 + }, + { + "epoch": 8.264868603042878, + "grad_norm": 3.323399066925049, + "learning_rate": 9.639618871984018e-06, + "log_odds_chosen": 10.727117538452148, + "log_odds_ratio": -5.0795613788068295e-05, + "logits/chosen": -0.5654752254486084, + "logits/rejected": -0.6004953980445862, + "logps/chosen": -0.00021166335500311106, + "logps/rejected": -1.810473918914795, + "loss": 0.3586, + "nll_loss": 0.08963505923748016, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1166335500311106e-05, + "rewards/margins": 0.18102622032165527, + "rewards/rejected": -0.18104739487171173, + "step": 11951 + }, + { + "epoch": 8.265560165975105, + "grad_norm": 4.638492584228516, + "learning_rate": 9.635776855693869e-06, + "log_odds_chosen": 10.843294143676758, + "log_odds_ratio": -3.520100290188566e-05, + "logits/chosen": -0.6220878958702087, + "logits/rejected": -0.688339114189148, + "logps/chosen": -0.00038481189403682947, + "logps/rejected": -2.357971668243408, + "loss": 0.3556, + "nll_loss": 0.08889587223529816, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.84811864932999e-05, + "rewards/margins": 0.23575866222381592, + "rewards/rejected": -0.23579715192317963, + "step": 11952 + }, + { + "epoch": 8.266251728907331, + "grad_norm": 3.359079360961914, + "learning_rate": 9.63193483940372e-06, + "log_odds_chosen": 11.392254829406738, + "log_odds_ratio": -2.3173997760750353e-05, + "logits/chosen": -0.22304686903953552, + "logits/rejected": -0.45449167490005493, + "logps/chosen": -0.000344922038493678, + "logps/rejected": -2.812790870666504, + "loss": 0.3852, + "nll_loss": 0.09629756212234497, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4492208214942366e-05, + "rewards/margins": 0.28124457597732544, + "rewards/rejected": -0.2812790870666504, + "step": 11953 + }, + { + "epoch": 8.266943291839558, + "grad_norm": 7.765052795410156, + "learning_rate": 9.62809282311357e-06, + "log_odds_chosen": 11.814998626708984, + "log_odds_ratio": -0.00015538069419562817, + "logits/chosen": -0.08499948680400848, + "logits/rejected": -0.2897992730140686, + "logps/chosen": -0.0007674909429624677, + "logps/rejected": -3.324516773223877, + "loss": 0.5177, + "nll_loss": 0.12940450012683868, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.674910739297047e-05, + "rewards/margins": 0.3323749303817749, + "rewards/rejected": -0.3324517011642456, + "step": 11954 + }, + { + "epoch": 8.267634854771785, + "grad_norm": 4.8267974853515625, + "learning_rate": 9.624250806823423e-06, + "log_odds_chosen": 10.045418739318848, + "log_odds_ratio": -9.528405644232407e-05, + "logits/chosen": -0.7230570316314697, + "logits/rejected": -0.7842307090759277, + "logps/chosen": -0.00027150430832989514, + "logps/rejected": -1.7861135005950928, + "loss": 0.4108, + "nll_loss": 0.10269634425640106, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7150430469191633e-05, + "rewards/margins": 0.17858418822288513, + "rewards/rejected": -0.17861135303974152, + "step": 11955 + }, + { + "epoch": 8.268326417704012, + "grad_norm": 5.728063106536865, + "learning_rate": 9.620408790533272e-06, + "log_odds_chosen": 10.704045295715332, + "log_odds_ratio": -0.00032331154216080904, + "logits/chosen": -0.1421336978673935, + "logits/rejected": -0.20743423700332642, + "logps/chosen": -0.0004792292893398553, + "logps/rejected": -2.226273775100708, + "loss": 0.473, + "nll_loss": 0.11822532117366791, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7922931116772816e-05, + "rewards/margins": 0.2225794643163681, + "rewards/rejected": -0.22262738645076752, + "step": 11956 + }, + { + "epoch": 8.269017980636239, + "grad_norm": 3.7819995880126953, + "learning_rate": 9.616566774243123e-06, + "log_odds_chosen": 10.83586311340332, + "log_odds_ratio": -3.1590192520525306e-05, + "logits/chosen": -0.4727594554424286, + "logits/rejected": -0.6352878212928772, + "logps/chosen": -6.83098696754314e-05, + "logps/rejected": -1.3699564933776855, + "loss": 0.3469, + "nll_loss": 0.08671838790178299, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.830986421846319e-06, + "rewards/margins": 0.1369888186454773, + "rewards/rejected": -0.13699564337730408, + "step": 11957 + }, + { + "epoch": 8.269709543568466, + "grad_norm": 3.127134323120117, + "learning_rate": 9.612724757952975e-06, + "log_odds_chosen": 10.934288024902344, + "log_odds_ratio": -4.619704486685805e-05, + "logits/chosen": -0.042971763759851456, + "logits/rejected": -0.12262189388275146, + "logps/chosen": -0.00011522687418619171, + "logps/rejected": -1.725632667541504, + "loss": 0.4098, + "nll_loss": 0.10243922472000122, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1522687600518111e-05, + "rewards/margins": 0.17255175113677979, + "rewards/rejected": -0.17256325483322144, + "step": 11958 + }, + { + "epoch": 8.270401106500692, + "grad_norm": 3.787686824798584, + "learning_rate": 9.608882741662826e-06, + "log_odds_chosen": 11.660400390625, + "log_odds_ratio": -7.071129948599264e-05, + "logits/chosen": -0.18112653493881226, + "logits/rejected": -0.2788428068161011, + "logps/chosen": -0.0006761641707271338, + "logps/rejected": -3.0301501750946045, + "loss": 0.3963, + "nll_loss": 0.09906087815761566, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.76164127071388e-05, + "rewards/margins": 0.30294740200042725, + "rewards/rejected": -0.30301499366760254, + "step": 11959 + }, + { + "epoch": 8.27109266943292, + "grad_norm": 4.528950214385986, + "learning_rate": 9.605040725372676e-06, + "log_odds_chosen": 11.360909461975098, + "log_odds_ratio": -0.0002581155567895621, + "logits/chosen": -0.05907332897186279, + "logits/rejected": -0.15207792818546295, + "logps/chosen": -0.00123701267875731, + "logps/rejected": -2.7253646850585938, + "loss": 0.3377, + "nll_loss": 0.08440046012401581, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001237012620549649, + "rewards/margins": 0.2724127769470215, + "rewards/rejected": -0.2725364863872528, + "step": 11960 + }, + { + "epoch": 8.271784232365146, + "grad_norm": 3.4399218559265137, + "learning_rate": 9.601198709082527e-06, + "log_odds_chosen": 11.670557022094727, + "log_odds_ratio": -2.467823651386425e-05, + "logits/chosen": -0.6992796659469604, + "logits/rejected": -0.7787087559700012, + "logps/chosen": -0.00024264826788567007, + "logps/rejected": -2.8075666427612305, + "loss": 0.3266, + "nll_loss": 0.08164522796869278, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4264827516162768e-05, + "rewards/margins": 0.2807323932647705, + "rewards/rejected": -0.2807566523551941, + "step": 11961 + }, + { + "epoch": 8.272475795297373, + "grad_norm": 6.1883625984191895, + "learning_rate": 9.597356692792378e-06, + "log_odds_chosen": 12.197917938232422, + "log_odds_ratio": -1.1628735592239536e-05, + "logits/chosen": -0.4272370934486389, + "logits/rejected": -0.34923166036605835, + "logps/chosen": -0.00018850239575840533, + "logps/rejected": -3.2359747886657715, + "loss": 0.3713, + "nll_loss": 0.09283263236284256, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8850239939638413e-05, + "rewards/margins": 0.3235785961151123, + "rewards/rejected": -0.3235974609851837, + "step": 11962 + }, + { + "epoch": 8.2731673582296, + "grad_norm": 3.554027557373047, + "learning_rate": 9.593514676502229e-06, + "log_odds_chosen": 10.515666961669922, + "log_odds_ratio": -6.209921411937103e-05, + "logits/chosen": -0.540285587310791, + "logits/rejected": -0.5418251752853394, + "logps/chosen": -0.000453216111054644, + "logps/rejected": -2.0480105876922607, + "loss": 0.3462, + "nll_loss": 0.08654274046421051, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.532161256065592e-05, + "rewards/margins": 0.2047557532787323, + "rewards/rejected": -0.2048010528087616, + "step": 11963 + }, + { + "epoch": 8.273858921161827, + "grad_norm": 3.581557035446167, + "learning_rate": 9.589672660212081e-06, + "log_odds_chosen": 11.442468643188477, + "log_odds_ratio": -0.0001763361506164074, + "logits/chosen": -0.5076885223388672, + "logits/rejected": -0.5351912975311279, + "logps/chosen": -0.0005444286507554352, + "logps/rejected": -2.9354920387268066, + "loss": 0.4353, + "nll_loss": 0.10881911218166351, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.444286216516048e-05, + "rewards/margins": 0.2934947609901428, + "rewards/rejected": -0.29354918003082275, + "step": 11964 + }, + { + "epoch": 8.274550484094053, + "grad_norm": 9.163952827453613, + "learning_rate": 9.58583064392193e-06, + "log_odds_chosen": 10.351091384887695, + "log_odds_ratio": -8.336950850207359e-05, + "logits/chosen": -0.029397621750831604, + "logits/rejected": -0.03363652527332306, + "logps/chosen": -0.0003193440497852862, + "logps/rejected": -2.0989863872528076, + "loss": 0.6588, + "nll_loss": 0.1646943837404251, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.19344071613159e-05, + "rewards/margins": 0.2098666876554489, + "rewards/rejected": -0.20989862084388733, + "step": 11965 + }, + { + "epoch": 8.27524204702628, + "grad_norm": 3.1572184562683105, + "learning_rate": 9.581988627631781e-06, + "log_odds_chosen": 11.891756057739258, + "log_odds_ratio": -1.1338936019456014e-05, + "logits/chosen": -0.7405799627304077, + "logits/rejected": -0.771040141582489, + "logps/chosen": -9.116944420384243e-05, + "logps/rejected": -2.6080727577209473, + "loss": 0.3485, + "nll_loss": 0.08713450282812119, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.116944056586362e-06, + "rewards/margins": 0.2607981562614441, + "rewards/rejected": -0.2608072757720947, + "step": 11966 + }, + { + "epoch": 8.275933609958507, + "grad_norm": 3.5829575061798096, + "learning_rate": 9.578146611341633e-06, + "log_odds_chosen": 9.52750015258789, + "log_odds_ratio": -0.0012475146213546395, + "logits/chosen": -0.6426373720169067, + "logits/rejected": -0.6256951689720154, + "logps/chosen": -0.0017292017582803965, + "logps/rejected": -1.389811635017395, + "loss": 0.4721, + "nll_loss": 0.11789209395647049, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017292017582803965, + "rewards/margins": 0.1388082504272461, + "rewards/rejected": -0.1389811635017395, + "step": 11967 + }, + { + "epoch": 8.276625172890734, + "grad_norm": 4.077126979827881, + "learning_rate": 9.574304595051484e-06, + "log_odds_chosen": 10.81316089630127, + "log_odds_ratio": -4.982240352546796e-05, + "logits/chosen": -0.6174009442329407, + "logits/rejected": -0.5894420146942139, + "logps/chosen": -0.0002926184970419854, + "logps/rejected": -2.6595776081085205, + "loss": 0.3411, + "nll_loss": 0.08527268469333649, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9261851523187943e-05, + "rewards/margins": 0.2659285068511963, + "rewards/rejected": -0.2659577429294586, + "step": 11968 + }, + { + "epoch": 8.27731673582296, + "grad_norm": 5.055628299713135, + "learning_rate": 9.570462578761335e-06, + "log_odds_chosen": 11.854629516601562, + "log_odds_ratio": -1.4586242286895867e-05, + "logits/chosen": -0.12202761322259903, + "logits/rejected": -0.17216485738754272, + "logps/chosen": -0.00012257893104106188, + "logps/rejected": -2.690403938293457, + "loss": 0.3756, + "nll_loss": 0.09390611946582794, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.225789401360089e-05, + "rewards/margins": 0.2690281569957733, + "rewards/rejected": -0.26904040575027466, + "step": 11969 + }, + { + "epoch": 8.278008298755188, + "grad_norm": 3.672074556350708, + "learning_rate": 9.566620562471184e-06, + "log_odds_chosen": 10.932299613952637, + "log_odds_ratio": -5.1668266678461805e-05, + "logits/chosen": -0.8195986747741699, + "logits/rejected": -0.771497368812561, + "logps/chosen": -0.0001285741018364206, + "logps/rejected": -1.9971344470977783, + "loss": 0.4194, + "nll_loss": 0.10485148429870605, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2857411093136761e-05, + "rewards/margins": 0.19970059394836426, + "rewards/rejected": -0.19971343874931335, + "step": 11970 + }, + { + "epoch": 8.278699861687414, + "grad_norm": 5.524552822113037, + "learning_rate": 9.562778546181036e-06, + "log_odds_chosen": 10.681053161621094, + "log_odds_ratio": -6.293236219789833e-05, + "logits/chosen": -0.4877323508262634, + "logits/rejected": -0.4758765697479248, + "logps/chosen": -0.00020377582404762506, + "logps/rejected": -1.9765567779541016, + "loss": 0.5573, + "nll_loss": 0.13932693004608154, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0377581677166745e-05, + "rewards/margins": 0.19763529300689697, + "rewards/rejected": -0.19765567779541016, + "step": 11971 + }, + { + "epoch": 8.279391424619641, + "grad_norm": 3.4568281173706055, + "learning_rate": 9.558936529890887e-06, + "log_odds_chosen": 10.67101764678955, + "log_odds_ratio": -6.874544487800449e-05, + "logits/chosen": -0.13481301069259644, + "logits/rejected": -0.16933226585388184, + "logps/chosen": -0.0008989893249236047, + "logps/rejected": -1.8208454847335815, + "loss": 0.4204, + "nll_loss": 0.10510402172803879, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.989893103716895e-05, + "rewards/margins": 0.18199466168880463, + "rewards/rejected": -0.18208454549312592, + "step": 11972 + }, + { + "epoch": 8.280082987551868, + "grad_norm": 3.5602478981018066, + "learning_rate": 9.555094513600738e-06, + "log_odds_chosen": 11.59362506866455, + "log_odds_ratio": -0.00014446736895479262, + "logits/chosen": 0.11925359070301056, + "logits/rejected": 0.03051447868347168, + "logps/chosen": -0.00034636116470210254, + "logps/rejected": -2.8364691734313965, + "loss": 0.3448, + "nll_loss": 0.08617541939020157, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.463611574261449e-05, + "rewards/margins": 0.28361228108406067, + "rewards/rejected": -0.28364691138267517, + "step": 11973 + }, + { + "epoch": 8.280774550484095, + "grad_norm": 4.688289165496826, + "learning_rate": 9.551252497310589e-06, + "log_odds_chosen": 11.239917755126953, + "log_odds_ratio": -3.473057586234063e-05, + "logits/chosen": -0.2402084618806839, + "logits/rejected": -0.17532745003700256, + "logps/chosen": -0.0004617611411958933, + "logps/rejected": -3.1051254272460938, + "loss": 1.0618, + "nll_loss": 0.26544082164764404, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.617611557478085e-05, + "rewards/margins": 0.3104664087295532, + "rewards/rejected": -0.31051257252693176, + "step": 11974 + }, + { + "epoch": 8.281466113416322, + "grad_norm": 3.4036073684692383, + "learning_rate": 9.54741048102044e-06, + "log_odds_chosen": 10.610368728637695, + "log_odds_ratio": -9.449348726775497e-05, + "logits/chosen": -0.4558260142803192, + "logits/rejected": -0.4234117865562439, + "logps/chosen": -0.00013078105985186994, + "logps/rejected": -1.9758434295654297, + "loss": 0.3565, + "nll_loss": 0.08910534530878067, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3078107258479577e-05, + "rewards/margins": 0.1975712776184082, + "rewards/rejected": -0.1975843459367752, + "step": 11975 + }, + { + "epoch": 8.282157676348548, + "grad_norm": 4.087696075439453, + "learning_rate": 9.54356846473029e-06, + "log_odds_chosen": 10.31620979309082, + "log_odds_ratio": -0.00024037774710450321, + "logits/chosen": -0.2854808568954468, + "logits/rejected": -0.3649406433105469, + "logps/chosen": -0.0005136193940415978, + "logps/rejected": -1.529449462890625, + "loss": 0.4878, + "nll_loss": 0.12192420661449432, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1361945224925876e-05, + "rewards/margins": 0.15289360284805298, + "rewards/rejected": -0.15294496715068817, + "step": 11976 + }, + { + "epoch": 8.282849239280775, + "grad_norm": 3.270496368408203, + "learning_rate": 9.539726448440143e-06, + "log_odds_chosen": 11.231451034545898, + "log_odds_ratio": -0.00034369956119917333, + "logits/chosen": -0.326479434967041, + "logits/rejected": -0.439314067363739, + "logps/chosen": -0.00022332128719426692, + "logps/rejected": -2.6155924797058105, + "loss": 0.3325, + "nll_loss": 0.08309418708086014, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.233212762803305e-05, + "rewards/margins": 0.26153692603111267, + "rewards/rejected": -0.26155924797058105, + "step": 11977 + }, + { + "epoch": 8.283540802213002, + "grad_norm": 4.652988910675049, + "learning_rate": 9.535884432149993e-06, + "log_odds_chosen": 10.786218643188477, + "log_odds_ratio": -6.878216663608328e-05, + "logits/chosen": -0.687461256980896, + "logits/rejected": -0.6254911422729492, + "logps/chosen": -0.000608769478276372, + "logps/rejected": -2.4494590759277344, + "loss": 0.4639, + "nll_loss": 0.1159728467464447, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0876947827637196e-05, + "rewards/margins": 0.24488502740859985, + "rewards/rejected": -0.24494591355323792, + "step": 11978 + }, + { + "epoch": 8.284232365145229, + "grad_norm": 3.475490093231201, + "learning_rate": 9.532042415859842e-06, + "log_odds_chosen": 11.721963882446289, + "log_odds_ratio": -1.3145630873623304e-05, + "logits/chosen": -0.3036603331565857, + "logits/rejected": -0.3121589124202728, + "logps/chosen": -0.0003267270512878895, + "logps/rejected": -2.862849473953247, + "loss": 0.4222, + "nll_loss": 0.1055576428771019, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.26727022184059e-05, + "rewards/margins": 0.28625229001045227, + "rewards/rejected": -0.2862849533557892, + "step": 11979 + }, + { + "epoch": 8.284923928077456, + "grad_norm": 2.267528772354126, + "learning_rate": 9.528200399569695e-06, + "log_odds_chosen": 10.153470039367676, + "log_odds_ratio": -8.534399967174977e-05, + "logits/chosen": -0.3335803747177124, + "logits/rejected": -0.3047267496585846, + "logps/chosen": -0.00017864150868263096, + "logps/rejected": -1.5260097980499268, + "loss": 0.2074, + "nll_loss": 0.05183638632297516, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7864151232060976e-05, + "rewards/margins": 0.15258312225341797, + "rewards/rejected": -0.1526009887456894, + "step": 11980 + }, + { + "epoch": 8.285615491009683, + "grad_norm": 2.8518567085266113, + "learning_rate": 9.524358383279546e-06, + "log_odds_chosen": 11.519308090209961, + "log_odds_ratio": -2.236364525742829e-05, + "logits/chosen": -0.3141467571258545, + "logits/rejected": -0.3175681531429291, + "logps/chosen": -0.00032692833337932825, + "logps/rejected": -2.522468090057373, + "loss": 0.3041, + "nll_loss": 0.07602708041667938, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.269283479312435e-05, + "rewards/margins": 0.25221410393714905, + "rewards/rejected": -0.25224679708480835, + "step": 11981 + }, + { + "epoch": 8.28630705394191, + "grad_norm": 5.877493858337402, + "learning_rate": 9.520516366989396e-06, + "log_odds_chosen": 11.447668075561523, + "log_odds_ratio": -2.5962668587453663e-05, + "logits/chosen": -0.521180272102356, + "logits/rejected": -0.587950587272644, + "logps/chosen": -0.0001497505436418578, + "logps/rejected": -2.5814712047576904, + "loss": 0.4525, + "nll_loss": 0.11313392221927643, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4975053090893198e-05, + "rewards/margins": 0.2581321597099304, + "rewards/rejected": -0.25814712047576904, + "step": 11982 + }, + { + "epoch": 8.286998616874136, + "grad_norm": 4.329975605010986, + "learning_rate": 9.516674350699247e-06, + "log_odds_chosen": 11.1446533203125, + "log_odds_ratio": -4.236675886204466e-05, + "logits/chosen": -0.2646929919719696, + "logits/rejected": -0.2811059355735779, + "logps/chosen": -0.00033514917595312, + "logps/rejected": -2.479694128036499, + "loss": 0.3655, + "nll_loss": 0.09138160943984985, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3514916140120476e-05, + "rewards/margins": 0.24793589115142822, + "rewards/rejected": -0.24796943366527557, + "step": 11983 + }, + { + "epoch": 8.287690179806363, + "grad_norm": 3.3373169898986816, + "learning_rate": 9.512832334409098e-06, + "log_odds_chosen": 11.226622581481934, + "log_odds_ratio": -0.00012851627252530307, + "logits/chosen": -0.06570761650800705, + "logits/rejected": -0.13167516887187958, + "logps/chosen": -0.0010037928586825728, + "logps/rejected": -2.534026861190796, + "loss": 0.3234, + "nll_loss": 0.08082625269889832, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001003792931442149, + "rewards/margins": 0.25330230593681335, + "rewards/rejected": -0.2534027099609375, + "step": 11984 + }, + { + "epoch": 8.28838174273859, + "grad_norm": 2.9590377807617188, + "learning_rate": 9.508990318118949e-06, + "log_odds_chosen": 9.680686950683594, + "log_odds_ratio": -0.00016874133143574, + "logits/chosen": -0.16577889025211334, + "logits/rejected": -0.15216651558876038, + "logps/chosen": -0.00026318003074266016, + "logps/rejected": -1.310761570930481, + "loss": 0.3329, + "nll_loss": 0.08320856094360352, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.631800452945754e-05, + "rewards/margins": 0.13104984164237976, + "rewards/rejected": -0.1310761570930481, + "step": 11985 + }, + { + "epoch": 8.289073305670817, + "grad_norm": 5.105093955993652, + "learning_rate": 9.505148301828801e-06, + "log_odds_chosen": 10.774724960327148, + "log_odds_ratio": -0.0006012396770529449, + "logits/chosen": -0.34266242384910583, + "logits/rejected": -0.4288986623287201, + "logps/chosen": -0.00046030114754103124, + "logps/rejected": -2.547994613647461, + "loss": 0.4747, + "nll_loss": 0.11861294507980347, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6030119847273454e-05, + "rewards/margins": 0.2547534108161926, + "rewards/rejected": -0.2547994554042816, + "step": 11986 + }, + { + "epoch": 8.289764868603044, + "grad_norm": 3.635981798171997, + "learning_rate": 9.501306285538652e-06, + "log_odds_chosen": 12.766622543334961, + "log_odds_ratio": -1.744990368024446e-05, + "logits/chosen": -0.020735815167427063, + "logits/rejected": -0.05121392011642456, + "logps/chosen": -0.00016500083438586444, + "logps/rejected": -3.8408167362213135, + "loss": 0.3746, + "nll_loss": 0.09365012496709824, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6500083802384324e-05, + "rewards/margins": 0.384065181016922, + "rewards/rejected": -0.3840816915035248, + "step": 11987 + }, + { + "epoch": 8.29045643153527, + "grad_norm": 3.4699249267578125, + "learning_rate": 9.4974642692485e-06, + "log_odds_chosen": 10.973389625549316, + "log_odds_ratio": -4.201291085337289e-05, + "logits/chosen": -0.5190303325653076, + "logits/rejected": -0.5497217178344727, + "logps/chosen": -0.0002159866999136284, + "logps/rejected": -1.9042290449142456, + "loss": 0.3383, + "nll_loss": 0.08458288013935089, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1598672901745886e-05, + "rewards/margins": 0.19040131568908691, + "rewards/rejected": -0.190422922372818, + "step": 11988 + }, + { + "epoch": 8.291147994467497, + "grad_norm": 3.639089345932007, + "learning_rate": 9.493622252958353e-06, + "log_odds_chosen": 10.550485610961914, + "log_odds_ratio": -0.0005758869810961187, + "logits/chosen": -0.05049346387386322, + "logits/rejected": -0.134282186627388, + "logps/chosen": -0.0009018158307299018, + "logps/rejected": -2.437182903289795, + "loss": 0.469, + "nll_loss": 0.11719910055398941, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.018159471452236e-05, + "rewards/margins": 0.24362812936306, + "rewards/rejected": -0.24371829628944397, + "step": 11989 + }, + { + "epoch": 8.291839557399724, + "grad_norm": 5.109975814819336, + "learning_rate": 9.489780236668204e-06, + "log_odds_chosen": 11.503591537475586, + "log_odds_ratio": -7.810613897163421e-05, + "logits/chosen": -0.27875831723213196, + "logits/rejected": -0.29320570826530457, + "logps/chosen": -0.0001523328828625381, + "logps/rejected": -2.7631423473358154, + "loss": 0.3234, + "nll_loss": 0.08085079491138458, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5233287740556989e-05, + "rewards/margins": 0.27629899978637695, + "rewards/rejected": -0.27631425857543945, + "step": 11990 + }, + { + "epoch": 8.292531120331951, + "grad_norm": 8.194153785705566, + "learning_rate": 9.485938220378055e-06, + "log_odds_chosen": 11.782495498657227, + "log_odds_ratio": -2.081832462863531e-05, + "logits/chosen": -0.14003007113933563, + "logits/rejected": -0.1627955138683319, + "logps/chosen": -0.00012124376371502876, + "logps/rejected": -2.629612922668457, + "loss": 0.4105, + "nll_loss": 0.1026250571012497, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2124375643907115e-05, + "rewards/margins": 0.2629491686820984, + "rewards/rejected": -0.2629612982273102, + "step": 11991 + }, + { + "epoch": 8.293222683264178, + "grad_norm": 11.853130340576172, + "learning_rate": 9.482096204087906e-06, + "log_odds_chosen": 10.236540794372559, + "log_odds_ratio": -0.00012168378452770412, + "logits/chosen": -0.143966943025589, + "logits/rejected": -0.17248134315013885, + "logps/chosen": -0.00038400760968215764, + "logps/rejected": -1.9316377639770508, + "loss": 0.5801, + "nll_loss": 0.1450154036283493, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8400761695811525e-05, + "rewards/margins": 0.19312536716461182, + "rewards/rejected": -0.19316376745700836, + "step": 11992 + }, + { + "epoch": 8.293914246196405, + "grad_norm": 6.01344108581543, + "learning_rate": 9.478254187797756e-06, + "log_odds_chosen": 10.616826057434082, + "log_odds_ratio": -0.0004952213494107127, + "logits/chosen": 0.25140225887298584, + "logits/rejected": 0.23632827401161194, + "logps/chosen": -0.0021340660750865936, + "logps/rejected": -2.39619779586792, + "loss": 0.6051, + "nll_loss": 0.1512296199798584, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00021340660168789327, + "rewards/margins": 0.23940636217594147, + "rewards/rejected": -0.23961979150772095, + "step": 11993 + }, + { + "epoch": 8.294605809128631, + "grad_norm": 3.8722987174987793, + "learning_rate": 9.474412171507607e-06, + "log_odds_chosen": 11.499201774597168, + "log_odds_ratio": -1.2345096365606878e-05, + "logits/chosen": 0.2737366557121277, + "logits/rejected": 0.10590272396802902, + "logps/chosen": -0.00012341570982243866, + "logps/rejected": -2.270833969116211, + "loss": 0.9109, + "nll_loss": 0.22772641479969025, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2341570254648104e-05, + "rewards/margins": 0.22707104682922363, + "rewards/rejected": -0.22708338499069214, + "step": 11994 + }, + { + "epoch": 8.295297372060858, + "grad_norm": 4.4436492919921875, + "learning_rate": 9.47057015521746e-06, + "log_odds_chosen": 10.22150707244873, + "log_odds_ratio": -0.00022509277914650738, + "logits/chosen": 0.5555198192596436, + "logits/rejected": 0.45036596059799194, + "logps/chosen": -0.0005974264349788427, + "logps/rejected": -1.5443403720855713, + "loss": 0.3662, + "nll_loss": 0.09152275323867798, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.974264786345884e-05, + "rewards/margins": 0.15437428653240204, + "rewards/rejected": -0.15443404018878937, + "step": 11995 + }, + { + "epoch": 8.295988934993085, + "grad_norm": 3.988182783126831, + "learning_rate": 9.46672813892731e-06, + "log_odds_chosen": 11.658870697021484, + "log_odds_ratio": -1.8149998140870593e-05, + "logits/chosen": 0.2656916379928589, + "logits/rejected": 0.17560166120529175, + "logps/chosen": -0.0001451415300834924, + "logps/rejected": -2.2492456436157227, + "loss": 0.4222, + "nll_loss": 0.10554390400648117, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.451415300834924e-05, + "rewards/margins": 0.22491006553173065, + "rewards/rejected": -0.22492457926273346, + "step": 11996 + }, + { + "epoch": 8.296680497925312, + "grad_norm": 4.026516914367676, + "learning_rate": 9.462886122637161e-06, + "log_odds_chosen": 11.03490924835205, + "log_odds_ratio": -5.12512560817413e-05, + "logits/chosen": 0.1875906139612198, + "logits/rejected": -0.0007145889103412628, + "logps/chosen": -0.00017578538972884417, + "logps/rejected": -2.1576361656188965, + "loss": 0.4374, + "nll_loss": 0.10933278501033783, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7578538972884417e-05, + "rewards/margins": 0.21574603021144867, + "rewards/rejected": -0.2157636135816574, + "step": 11997 + }, + { + "epoch": 8.297372060857539, + "grad_norm": 3.844303846359253, + "learning_rate": 9.459044106347012e-06, + "log_odds_chosen": 11.463105201721191, + "log_odds_ratio": -3.170102718286216e-05, + "logits/chosen": -0.10093332827091217, + "logits/rejected": -0.18920347094535828, + "logps/chosen": -0.0001887732360046357, + "logps/rejected": -2.466240882873535, + "loss": 0.3886, + "nll_loss": 0.09715703874826431, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8877322872867808e-05, + "rewards/margins": 0.24660521745681763, + "rewards/rejected": -0.24662411212921143, + "step": 11998 + }, + { + "epoch": 8.298063623789766, + "grad_norm": 4.110239028930664, + "learning_rate": 9.455202090056862e-06, + "log_odds_chosen": 11.454631805419922, + "log_odds_ratio": -1.3583087820734363e-05, + "logits/chosen": -0.5618329644203186, + "logits/rejected": -0.5980328917503357, + "logps/chosen": -6.422075966838747e-05, + "logps/rejected": -1.8239201307296753, + "loss": 0.3823, + "nll_loss": 0.09557496011257172, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.422075784939807e-06, + "rewards/margins": 0.18238559365272522, + "rewards/rejected": -0.18239201605319977, + "step": 11999 + }, + { + "epoch": 8.298755186721992, + "grad_norm": 2.6601874828338623, + "learning_rate": 9.451360073766713e-06, + "log_odds_chosen": 9.818875312805176, + "log_odds_ratio": -0.00027578973094932735, + "logits/chosen": -0.2035187929868698, + "logits/rejected": -0.24134671688079834, + "logps/chosen": -0.0003357850364409387, + "logps/rejected": -1.7158150672912598, + "loss": 0.2818, + "nll_loss": 0.07043261080980301, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.357850437168963e-05, + "rewards/margins": 0.17154793441295624, + "rewards/rejected": -0.17158152163028717, + "step": 12000 + }, + { + "epoch": 8.29944674965422, + "grad_norm": 3.5226309299468994, + "learning_rate": 9.447518057476566e-06, + "log_odds_chosen": 11.647101402282715, + "log_odds_ratio": -3.9287675463128835e-05, + "logits/chosen": -0.18191269040107727, + "logits/rejected": -0.2964354455471039, + "logps/chosen": -0.00018018539412878454, + "logps/rejected": -2.9349350929260254, + "loss": 0.4718, + "nll_loss": 0.11795572191476822, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8018541595665738e-05, + "rewards/margins": 0.29347550868988037, + "rewards/rejected": -0.29349350929260254, + "step": 12001 + }, + { + "epoch": 8.300138312586446, + "grad_norm": 4.153374195098877, + "learning_rate": 9.443676041186415e-06, + "log_odds_chosen": 10.61652946472168, + "log_odds_ratio": -0.0006325379363261163, + "logits/chosen": -0.34415724873542786, + "logits/rejected": -0.33158639073371887, + "logps/chosen": -0.00023147767933551222, + "logps/rejected": -1.5006442070007324, + "loss": 0.3987, + "nll_loss": 0.09960442036390305, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3147767933551222e-05, + "rewards/margins": 0.15004128217697144, + "rewards/rejected": -0.15006442368030548, + "step": 12002 + }, + { + "epoch": 8.300829875518673, + "grad_norm": 4.306608200073242, + "learning_rate": 9.439834024896265e-06, + "log_odds_chosen": 11.898919105529785, + "log_odds_ratio": -1.5399793483084068e-05, + "logits/chosen": -0.06470970064401627, + "logits/rejected": -0.22172331809997559, + "logps/chosen": -6.676278280792758e-05, + "logps/rejected": -2.223916530609131, + "loss": 0.3876, + "nll_loss": 0.09691078960895538, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.676278644590639e-06, + "rewards/margins": 0.2223849892616272, + "rewards/rejected": -0.22239167988300323, + "step": 12003 + }, + { + "epoch": 8.3015214384509, + "grad_norm": 7.656002521514893, + "learning_rate": 9.435992008606116e-06, + "log_odds_chosen": 12.206582069396973, + "log_odds_ratio": -0.00027584240888245404, + "logits/chosen": -0.37336617708206177, + "logits/rejected": -0.26509660482406616, + "logps/chosen": -0.00016218278324231505, + "logps/rejected": -3.6707310676574707, + "loss": 0.6351, + "nll_loss": 0.15875668823719025, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6218276869039983e-05, + "rewards/margins": 0.3670569062232971, + "rewards/rejected": -0.36707305908203125, + "step": 12004 + }, + { + "epoch": 8.302213001383127, + "grad_norm": 4.507673263549805, + "learning_rate": 9.432149992315969e-06, + "log_odds_chosen": 11.556356430053711, + "log_odds_ratio": -0.00019117892952635884, + "logits/chosen": -0.03574218600988388, + "logits/rejected": -0.04328570514917374, + "logps/chosen": -0.0006461284938268363, + "logps/rejected": -2.985804319381714, + "loss": 0.6046, + "nll_loss": 0.1511344313621521, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.46128537482582e-05, + "rewards/margins": 0.29851582646369934, + "rewards/rejected": -0.2985804080963135, + "step": 12005 + }, + { + "epoch": 8.302904564315353, + "grad_norm": 6.191859245300293, + "learning_rate": 9.42830797602582e-06, + "log_odds_chosen": 11.82589340209961, + "log_odds_ratio": -1.3909155313740484e-05, + "logits/chosen": -0.29082420468330383, + "logits/rejected": -0.13369691371917725, + "logps/chosen": -0.00014884091797284782, + "logps/rejected": -2.8574018478393555, + "loss": 0.5995, + "nll_loss": 0.1498684287071228, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4884091797284782e-05, + "rewards/margins": 0.28572532534599304, + "rewards/rejected": -0.2857401967048645, + "step": 12006 + }, + { + "epoch": 8.30359612724758, + "grad_norm": 5.1775946617126465, + "learning_rate": 9.424465959735668e-06, + "log_odds_chosen": 10.76055908203125, + "log_odds_ratio": -2.980104909511283e-05, + "logits/chosen": -0.07235311716794968, + "logits/rejected": -0.1714707314968109, + "logps/chosen": -0.0002315481542609632, + "logps/rejected": -1.8887486457824707, + "loss": 0.6075, + "nll_loss": 0.1518632173538208, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3154816517489962e-05, + "rewards/margins": 0.1888517141342163, + "rewards/rejected": -0.18887485563755035, + "step": 12007 + }, + { + "epoch": 8.304287690179807, + "grad_norm": 5.869711875915527, + "learning_rate": 9.420623943445521e-06, + "log_odds_chosen": 11.506105422973633, + "log_odds_ratio": -1.4619786270486657e-05, + "logits/chosen": -0.4481998085975647, + "logits/rejected": -0.4345892667770386, + "logps/chosen": -5.6978064094437286e-05, + "logps/rejected": -1.6851485967636108, + "loss": 0.3266, + "nll_loss": 0.0816439688205719, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.697806500393199e-06, + "rewards/margins": 0.16850917041301727, + "rewards/rejected": -0.16851487755775452, + "step": 12008 + }, + { + "epoch": 8.304979253112034, + "grad_norm": 4.347225189208984, + "learning_rate": 9.416781927155372e-06, + "log_odds_chosen": 11.164071083068848, + "log_odds_ratio": -0.00010318847489543259, + "logits/chosen": -0.27535247802734375, + "logits/rejected": -0.3589295744895935, + "logps/chosen": -0.00023351672280114144, + "logps/rejected": -2.745594024658203, + "loss": 0.674, + "nll_loss": 0.16848233342170715, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3351673007709906e-05, + "rewards/margins": 0.2745360732078552, + "rewards/rejected": -0.2745594382286072, + "step": 12009 + }, + { + "epoch": 8.30567081604426, + "grad_norm": 4.195399284362793, + "learning_rate": 9.412939910865222e-06, + "log_odds_chosen": 11.876922607421875, + "log_odds_ratio": -1.9107839762000367e-05, + "logits/chosen": -0.33225470781326294, + "logits/rejected": -0.32006558775901794, + "logps/chosen": -9.441829752177e-05, + "logps/rejected": -2.646901845932007, + "loss": 0.5167, + "nll_loss": 0.1291753351688385, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.44183011597488e-06, + "rewards/margins": 0.26468074321746826, + "rewards/rejected": -0.26469019055366516, + "step": 12010 + }, + { + "epoch": 8.306362378976488, + "grad_norm": 3.7133564949035645, + "learning_rate": 9.409097894575073e-06, + "log_odds_chosen": 9.273460388183594, + "log_odds_ratio": -0.0001292635570280254, + "logits/chosen": -0.6664369106292725, + "logits/rejected": -0.6849371194839478, + "logps/chosen": -0.0003915390116162598, + "logps/rejected": -1.3386191129684448, + "loss": 0.3377, + "nll_loss": 0.0844026431441307, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9153903344413266e-05, + "rewards/margins": 0.1338227391242981, + "rewards/rejected": -0.13386189937591553, + "step": 12011 + }, + { + "epoch": 8.307053941908714, + "grad_norm": 8.380539894104004, + "learning_rate": 9.405255878284924e-06, + "log_odds_chosen": 10.739407539367676, + "log_odds_ratio": -9.787101589608938e-05, + "logits/chosen": -0.2542285919189453, + "logits/rejected": -0.3180106282234192, + "logps/chosen": -0.0004297649720683694, + "logps/rejected": -2.015817403793335, + "loss": 0.3486, + "nll_loss": 0.08714074641466141, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.297649866202846e-05, + "rewards/margins": 0.20153875648975372, + "rewards/rejected": -0.20158173143863678, + "step": 12012 + }, + { + "epoch": 8.307745504840941, + "grad_norm": 3.912259101867676, + "learning_rate": 9.401413861994775e-06, + "log_odds_chosen": 11.8102445602417, + "log_odds_ratio": -4.580942550092004e-05, + "logits/chosen": -0.34821221232414246, + "logits/rejected": -0.39427390694618225, + "logps/chosen": -0.00019697409879881889, + "logps/rejected": -2.733593463897705, + "loss": 0.3726, + "nll_loss": 0.09314112365245819, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9697408788488247e-05, + "rewards/margins": 0.2733396887779236, + "rewards/rejected": -0.27335938811302185, + "step": 12013 + }, + { + "epoch": 8.308437067773168, + "grad_norm": 3.243351459503174, + "learning_rate": 9.397571845704627e-06, + "log_odds_chosen": 10.895149230957031, + "log_odds_ratio": -0.0001612679334357381, + "logits/chosen": -0.4643056392669678, + "logits/rejected": -0.5565376281738281, + "logps/chosen": -0.00031255075009539723, + "logps/rejected": -2.2396113872528076, + "loss": 0.7204, + "nll_loss": 0.18007400631904602, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1255076464731246e-05, + "rewards/margins": 0.22392991185188293, + "rewards/rejected": -0.22396114468574524, + "step": 12014 + }, + { + "epoch": 8.309128630705395, + "grad_norm": 3.78821063041687, + "learning_rate": 9.393729829414478e-06, + "log_odds_chosen": 9.914217948913574, + "log_odds_ratio": -0.00014684694178868085, + "logits/chosen": -0.14503690600395203, + "logits/rejected": -0.16446837782859802, + "logps/chosen": -0.00018237555923406035, + "logps/rejected": -1.2952816486358643, + "loss": 0.7321, + "nll_loss": 0.18300293385982513, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8237555195810273e-05, + "rewards/margins": 0.12950992584228516, + "rewards/rejected": -0.12952816486358643, + "step": 12015 + }, + { + "epoch": 8.309820193637622, + "grad_norm": 4.330719470977783, + "learning_rate": 9.389887813124327e-06, + "log_odds_chosen": 11.473482131958008, + "log_odds_ratio": -0.0002337690384592861, + "logits/chosen": 0.139499232172966, + "logits/rejected": 0.1419997215270996, + "logps/chosen": -0.0003186473622918129, + "logps/rejected": -2.680612564086914, + "loss": 0.4573, + "nll_loss": 0.11430097371339798, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.186473622918129e-05, + "rewards/margins": 0.2680293917655945, + "rewards/rejected": -0.2680612802505493, + "step": 12016 + }, + { + "epoch": 8.310511756569849, + "grad_norm": 3.5915465354919434, + "learning_rate": 9.38604579683418e-06, + "log_odds_chosen": 10.803346633911133, + "log_odds_ratio": -3.7507430533878505e-05, + "logits/chosen": -0.41293051838874817, + "logits/rejected": -0.4520162343978882, + "logps/chosen": -0.0001572552282596007, + "logps/rejected": -1.758723258972168, + "loss": 0.4038, + "nll_loss": 0.10093595087528229, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.572552355355583e-05, + "rewards/margins": 0.17585662007331848, + "rewards/rejected": -0.1758723258972168, + "step": 12017 + }, + { + "epoch": 8.311203319502075, + "grad_norm": 5.969757556915283, + "learning_rate": 9.38220378054403e-06, + "log_odds_chosen": 11.04709529876709, + "log_odds_ratio": -0.00011035658826585859, + "logits/chosen": -0.6205735802650452, + "logits/rejected": -0.8208602070808411, + "logps/chosen": -0.00020124486763961613, + "logps/rejected": -2.5319573879241943, + "loss": 0.3032, + "nll_loss": 0.07580114901065826, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0124487491557375e-05, + "rewards/margins": 0.25317561626434326, + "rewards/rejected": -0.25319573283195496, + "step": 12018 + }, + { + "epoch": 8.311894882434302, + "grad_norm": 5.246040344238281, + "learning_rate": 9.37836176425388e-06, + "log_odds_chosen": 11.330859184265137, + "log_odds_ratio": -2.4554348783567548e-05, + "logits/chosen": -0.47599172592163086, + "logits/rejected": -0.4993157386779785, + "logps/chosen": -0.0002337495534447953, + "logps/rejected": -2.5536375045776367, + "loss": 0.5799, + "nll_loss": 0.144969642162323, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3374956072075292e-05, + "rewards/margins": 0.2553403973579407, + "rewards/rejected": -0.2553637623786926, + "step": 12019 + }, + { + "epoch": 8.312586445366529, + "grad_norm": 4.8827714920043945, + "learning_rate": 9.374519747963732e-06, + "log_odds_chosen": 10.16954231262207, + "log_odds_ratio": -0.00012112972763134167, + "logits/chosen": -0.686266303062439, + "logits/rejected": -0.7174581289291382, + "logps/chosen": -0.00019134815374854952, + "logps/rejected": -1.2021300792694092, + "loss": 0.2649, + "nll_loss": 0.06621464341878891, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9134817193844356e-05, + "rewards/margins": 0.12019386887550354, + "rewards/rejected": -0.12021300196647644, + "step": 12020 + }, + { + "epoch": 8.313278008298756, + "grad_norm": 5.487241744995117, + "learning_rate": 9.370677731673582e-06, + "log_odds_chosen": 10.528274536132812, + "log_odds_ratio": -4.080952930962667e-05, + "logits/chosen": -0.6552819609642029, + "logits/rejected": -0.6959894299507141, + "logps/chosen": -0.00021928038040641695, + "logps/rejected": -1.752156376838684, + "loss": 0.3278, + "nll_loss": 0.08195218443870544, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1928039132035337e-05, + "rewards/margins": 0.17519372701644897, + "rewards/rejected": -0.17521564662456512, + "step": 12021 + }, + { + "epoch": 8.313969571230983, + "grad_norm": 8.090567588806152, + "learning_rate": 9.366835715383433e-06, + "log_odds_chosen": 10.119335174560547, + "log_odds_ratio": -0.00010075949830934405, + "logits/chosen": -0.4302787184715271, + "logits/rejected": -0.5251643061637878, + "logps/chosen": -0.00014466950960922986, + "logps/rejected": -1.6406134366989136, + "loss": 0.6859, + "nll_loss": 0.17146024107933044, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4466952052316628e-05, + "rewards/margins": 0.16404689848423004, + "rewards/rejected": -0.16406136751174927, + "step": 12022 + }, + { + "epoch": 8.31466113416321, + "grad_norm": 6.585088729858398, + "learning_rate": 9.362993699093286e-06, + "log_odds_chosen": 10.886041641235352, + "log_odds_ratio": -0.0005051199696026742, + "logits/chosen": -0.18099522590637207, + "logits/rejected": -0.47358912229537964, + "logps/chosen": -0.000296997808618471, + "logps/rejected": -2.187826633453369, + "loss": 0.6549, + "nll_loss": 0.16368287801742554, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9699782317038625e-05, + "rewards/margins": 0.21875298023223877, + "rewards/rejected": -0.2187826931476593, + "step": 12023 + }, + { + "epoch": 8.315352697095436, + "grad_norm": 3.961249828338623, + "learning_rate": 9.359151682803136e-06, + "log_odds_chosen": 11.450935363769531, + "log_odds_ratio": -2.176938869524747e-05, + "logits/chosen": -0.3161793351173401, + "logits/rejected": -0.284781277179718, + "logps/chosen": -0.00032644631573930383, + "logps/rejected": -2.914153814315796, + "loss": 0.5379, + "nll_loss": 0.1344832479953766, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.264463157393038e-05, + "rewards/margins": 0.29138273000717163, + "rewards/rejected": -0.29141539335250854, + "step": 12024 + }, + { + "epoch": 8.316044260027663, + "grad_norm": 4.829967975616455, + "learning_rate": 9.355309666512985e-06, + "log_odds_chosen": 11.148256301879883, + "log_odds_ratio": -3.4924247302114964e-05, + "logits/chosen": 0.11780108511447906, + "logits/rejected": 0.09679309278726578, + "logps/chosen": -0.0003267015272285789, + "logps/rejected": -2.400546073913574, + "loss": 0.4474, + "nll_loss": 0.11185356974601746, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.267015563324094e-05, + "rewards/margins": 0.24002191424369812, + "rewards/rejected": -0.24005459249019623, + "step": 12025 + }, + { + "epoch": 8.31673582295989, + "grad_norm": 2.4630846977233887, + "learning_rate": 9.351467650222838e-06, + "log_odds_chosen": 10.963375091552734, + "log_odds_ratio": -0.000224971110583283, + "logits/chosen": -0.5358527898788452, + "logits/rejected": -0.5666942596435547, + "logps/chosen": -0.0004893578588962555, + "logps/rejected": -2.31632137298584, + "loss": 0.3522, + "nll_loss": 0.08801663666963577, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8935784434434026e-05, + "rewards/margins": 0.23158320784568787, + "rewards/rejected": -0.23163215816020966, + "step": 12026 + }, + { + "epoch": 8.317427385892117, + "grad_norm": 5.256807804107666, + "learning_rate": 9.347625633932689e-06, + "log_odds_chosen": 11.870455741882324, + "log_odds_ratio": -1.347633315162966e-05, + "logits/chosen": -0.4692208468914032, + "logits/rejected": -0.47148019075393677, + "logps/chosen": -8.793309825705364e-05, + "logps/rejected": -2.2496893405914307, + "loss": 0.3668, + "nll_loss": 0.09170769155025482, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.793309461907484e-06, + "rewards/margins": 0.22496017813682556, + "rewards/rejected": -0.22496894001960754, + "step": 12027 + }, + { + "epoch": 8.318118948824344, + "grad_norm": 3.6092746257781982, + "learning_rate": 9.34378361764254e-06, + "log_odds_chosen": 11.170661926269531, + "log_odds_ratio": -3.2713978725951165e-05, + "logits/chosen": -0.5941171050071716, + "logits/rejected": -0.7027862071990967, + "logps/chosen": -0.00018664052186068147, + "logps/rejected": -2.2522315979003906, + "loss": 0.4077, + "nll_loss": 0.10191075503826141, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8664051822270267e-05, + "rewards/margins": 0.2252044975757599, + "rewards/rejected": -0.22522315382957458, + "step": 12028 + }, + { + "epoch": 8.31881051175657, + "grad_norm": 3.9987618923187256, + "learning_rate": 9.33994160135239e-06, + "log_odds_chosen": 11.089741706848145, + "log_odds_ratio": -7.078353519318625e-05, + "logits/chosen": 0.07066502422094345, + "logits/rejected": 0.05900406092405319, + "logps/chosen": -0.0052088359370827675, + "logps/rejected": -2.619210720062256, + "loss": 0.4813, + "nll_loss": 0.12032265961170197, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005208835937082767, + "rewards/margins": 0.2614002227783203, + "rewards/rejected": -0.26192110776901245, + "step": 12029 + }, + { + "epoch": 8.319502074688797, + "grad_norm": 3.9945900440216064, + "learning_rate": 9.33609958506224e-06, + "log_odds_chosen": 11.662487030029297, + "log_odds_ratio": -2.972657239297405e-05, + "logits/chosen": -0.2970031201839447, + "logits/rejected": -0.3440643548965454, + "logps/chosen": -0.0003341581905260682, + "logps/rejected": -2.6654715538024902, + "loss": 0.4138, + "nll_loss": 0.10345922410488129, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.34158175974153e-05, + "rewards/margins": 0.26651373505592346, + "rewards/rejected": -0.26654714345932007, + "step": 12030 + }, + { + "epoch": 8.320193637621024, + "grad_norm": 4.668849468231201, + "learning_rate": 9.332257568772092e-06, + "log_odds_chosen": 10.378423690795898, + "log_odds_ratio": -0.00013369829684961587, + "logits/chosen": -0.42694246768951416, + "logits/rejected": -0.46096310019493103, + "logps/chosen": -0.0009023980237543583, + "logps/rejected": -1.9191800355911255, + "loss": 0.4479, + "nll_loss": 0.11196404695510864, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.023980237543583e-05, + "rewards/margins": 0.19182777404785156, + "rewards/rejected": -0.1919180154800415, + "step": 12031 + }, + { + "epoch": 8.320885200553251, + "grad_norm": 3.3766138553619385, + "learning_rate": 9.328415552481944e-06, + "log_odds_chosen": 11.727277755737305, + "log_odds_ratio": -3.624118471634574e-05, + "logits/chosen": -0.2877175211906433, + "logits/rejected": -0.28738832473754883, + "logps/chosen": -0.00013960532669443637, + "logps/rejected": -2.453561305999756, + "loss": 0.3224, + "nll_loss": 0.08059151470661163, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3960533578938339e-05, + "rewards/margins": 0.24534213542938232, + "rewards/rejected": -0.24535611271858215, + "step": 12032 + }, + { + "epoch": 8.321576763485478, + "grad_norm": 4.295798301696777, + "learning_rate": 9.324573536191795e-06, + "log_odds_chosen": 10.742616653442383, + "log_odds_ratio": -0.00027178574237041175, + "logits/chosen": -0.2917238473892212, + "logits/rejected": -0.33401742577552795, + "logps/chosen": -0.0009895612020045519, + "logps/rejected": -2.305877208709717, + "loss": 0.3827, + "nll_loss": 0.09564106166362762, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.895613038679585e-05, + "rewards/margins": 0.23048877716064453, + "rewards/rejected": -0.23058775067329407, + "step": 12033 + }, + { + "epoch": 8.322268326417705, + "grad_norm": 4.387558937072754, + "learning_rate": 9.320731519901644e-06, + "log_odds_chosen": 11.680660247802734, + "log_odds_ratio": -1.2796385817637201e-05, + "logits/chosen": -0.5847553014755249, + "logits/rejected": -0.6642088890075684, + "logps/chosen": -0.00010414030111860484, + "logps/rejected": -2.4134974479675293, + "loss": 0.5157, + "nll_loss": 0.1289164125919342, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0414029020466842e-05, + "rewards/margins": 0.24133934080600739, + "rewards/rejected": -0.24134975671768188, + "step": 12034 + }, + { + "epoch": 8.322959889349931, + "grad_norm": 7.636205673217773, + "learning_rate": 9.316889503611496e-06, + "log_odds_chosen": 11.007831573486328, + "log_odds_ratio": -5.737036553910002e-05, + "logits/chosen": -0.4497582018375397, + "logits/rejected": -0.41020840406417847, + "logps/chosen": -7.495034515159205e-05, + "logps/rejected": -1.4946386814117432, + "loss": 0.2984, + "nll_loss": 0.0746031254529953, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.495034878957085e-06, + "rewards/margins": 0.14945638179779053, + "rewards/rejected": -0.14946386218070984, + "step": 12035 + }, + { + "epoch": 8.323651452282158, + "grad_norm": 4.195898532867432, + "learning_rate": 9.313047487321347e-06, + "log_odds_chosen": 11.578995704650879, + "log_odds_ratio": -1.3797025530948304e-05, + "logits/chosen": -0.23090381920337677, + "logits/rejected": -0.2601828873157501, + "logps/chosen": -8.795601752353832e-05, + "logps/rejected": -2.304961681365967, + "loss": 0.4116, + "nll_loss": 0.10290920734405518, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.795602298050653e-06, + "rewards/margins": 0.2304873913526535, + "rewards/rejected": -0.23049618303775787, + "step": 12036 + }, + { + "epoch": 8.324343015214385, + "grad_norm": 5.19864559173584, + "learning_rate": 9.309205471031198e-06, + "log_odds_chosen": 11.066790580749512, + "log_odds_ratio": -4.9645372200757265e-05, + "logits/chosen": -0.3808850646018982, + "logits/rejected": -0.4125874936580658, + "logps/chosen": -0.00017530072364024818, + "logps/rejected": -2.204151153564453, + "loss": 0.8283, + "nll_loss": 0.2070685774087906, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7530072000226937e-05, + "rewards/margins": 0.22039756178855896, + "rewards/rejected": -0.22041510045528412, + "step": 12037 + }, + { + "epoch": 8.325034578146612, + "grad_norm": 3.293100118637085, + "learning_rate": 9.305363454741048e-06, + "log_odds_chosen": 11.432401657104492, + "log_odds_ratio": -3.613935405155644e-05, + "logits/chosen": -0.44712987542152405, + "logits/rejected": -0.4820421636104584, + "logps/chosen": -0.00016187304572667927, + "logps/rejected": -2.1475753784179688, + "loss": 0.348, + "nll_loss": 0.08698876202106476, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6187303117476404e-05, + "rewards/margins": 0.2147413194179535, + "rewards/rejected": -0.2147575169801712, + "step": 12038 + }, + { + "epoch": 8.325726141078839, + "grad_norm": 3.3491287231445312, + "learning_rate": 9.3015214384509e-06, + "log_odds_chosen": 11.12563705444336, + "log_odds_ratio": -0.00013439056056085974, + "logits/chosen": -0.35802096128463745, + "logits/rejected": -0.316034197807312, + "logps/chosen": -0.000603663909714669, + "logps/rejected": -2.807803153991699, + "loss": 0.8425, + "nll_loss": 0.2106177806854248, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0366393881849945e-05, + "rewards/margins": 0.28071993589401245, + "rewards/rejected": -0.2807803452014923, + "step": 12039 + }, + { + "epoch": 8.326417704011066, + "grad_norm": 5.166214942932129, + "learning_rate": 9.29767942216075e-06, + "log_odds_chosen": 10.950658798217773, + "log_odds_ratio": -2.832374957506545e-05, + "logits/chosen": -0.5255546569824219, + "logits/rejected": -0.5025449395179749, + "logps/chosen": -0.00038333734846673906, + "logps/rejected": -2.604123115539551, + "loss": 0.7587, + "nll_loss": 0.1896737813949585, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.833373193629086e-05, + "rewards/margins": 0.2603739798069, + "rewards/rejected": -0.260412335395813, + "step": 12040 + }, + { + "epoch": 8.327109266943292, + "grad_norm": 3.9560062885284424, + "learning_rate": 9.2938374058706e-06, + "log_odds_chosen": 11.090538024902344, + "log_odds_ratio": -7.66869488870725e-05, + "logits/chosen": -0.6735488176345825, + "logits/rejected": -0.6175107955932617, + "logps/chosen": -0.00021816727530676872, + "logps/rejected": -2.164520502090454, + "loss": 0.4316, + "nll_loss": 0.10789860039949417, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.181672607548535e-05, + "rewards/margins": 0.21643024682998657, + "rewards/rejected": -0.21645204722881317, + "step": 12041 + }, + { + "epoch": 8.32780082987552, + "grad_norm": 3.482357978820801, + "learning_rate": 9.289995389580453e-06, + "log_odds_chosen": 11.901061058044434, + "log_odds_ratio": -2.272317578899674e-05, + "logits/chosen": -0.21799947321414948, + "logits/rejected": -0.3493354916572571, + "logps/chosen": -0.0001254864619113505, + "logps/rejected": -2.394380569458008, + "loss": 0.3969, + "nll_loss": 0.09922779351472855, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2548645827337168e-05, + "rewards/margins": 0.23942552506923676, + "rewards/rejected": -0.23943805694580078, + "step": 12042 + }, + { + "epoch": 8.328492392807746, + "grad_norm": 12.327674865722656, + "learning_rate": 9.286153373290304e-06, + "log_odds_chosen": 10.347898483276367, + "log_odds_ratio": -7.437378371832892e-05, + "logits/chosen": -0.21823422610759735, + "logits/rejected": -0.22773411870002747, + "logps/chosen": -0.00033282540971413255, + "logps/rejected": -1.9625000953674316, + "loss": 0.3499, + "nll_loss": 0.08747636526823044, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.32825438817963e-05, + "rewards/margins": 0.19621673226356506, + "rewards/rejected": -0.19625002145767212, + "step": 12043 + }, + { + "epoch": 8.329183955739973, + "grad_norm": 3.386772394180298, + "learning_rate": 9.282311357000153e-06, + "log_odds_chosen": 10.630339622497559, + "log_odds_ratio": -0.0003122264170087874, + "logits/chosen": -0.2610680162906647, + "logits/rejected": -0.30401110649108887, + "logps/chosen": -0.00017395528266206384, + "logps/rejected": -2.0399298667907715, + "loss": 0.3553, + "nll_loss": 0.08878161013126373, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7395528630004264e-05, + "rewards/margins": 0.2039755880832672, + "rewards/rejected": -0.20399296283721924, + "step": 12044 + }, + { + "epoch": 8.3298755186722, + "grad_norm": 4.99088191986084, + "learning_rate": 9.278469340710005e-06, + "log_odds_chosen": 10.918338775634766, + "log_odds_ratio": -0.00010383578046457842, + "logits/chosen": 0.016888275742530823, + "logits/rejected": -0.03546109050512314, + "logps/chosen": -0.00026136101223528385, + "logps/rejected": -2.3230907917022705, + "loss": 0.3535, + "nll_loss": 0.08835355192422867, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6136101951124147e-05, + "rewards/margins": 0.23228295147418976, + "rewards/rejected": -0.23230908811092377, + "step": 12045 + }, + { + "epoch": 8.330567081604427, + "grad_norm": 3.7360870838165283, + "learning_rate": 9.274627324419856e-06, + "log_odds_chosen": 10.23141098022461, + "log_odds_ratio": -0.00010504803503863513, + "logits/chosen": -0.5646274089813232, + "logits/rejected": -0.5441697835922241, + "logps/chosen": -0.00023329338000621647, + "logps/rejected": -1.9220106601715088, + "loss": 0.456, + "nll_loss": 0.11398179829120636, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.332933945581317e-05, + "rewards/margins": 0.19217772781848907, + "rewards/rejected": -0.19220104813575745, + "step": 12046 + }, + { + "epoch": 8.331258644536653, + "grad_norm": 5.112025260925293, + "learning_rate": 9.270785308129707e-06, + "log_odds_chosen": 10.852737426757812, + "log_odds_ratio": -0.0002522445283830166, + "logits/chosen": -0.5581848621368408, + "logits/rejected": -0.5673970580101013, + "logps/chosen": -0.0002991710207425058, + "logps/rejected": -2.4401395320892334, + "loss": 0.556, + "nll_loss": 0.13898655772209167, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9917100619059056e-05, + "rewards/margins": 0.24398404359817505, + "rewards/rejected": -0.2440139502286911, + "step": 12047 + }, + { + "epoch": 8.33195020746888, + "grad_norm": 4.515886306762695, + "learning_rate": 9.266943291839558e-06, + "log_odds_chosen": 10.767580032348633, + "log_odds_ratio": -7.623255078215152e-05, + "logits/chosen": -0.25946271419525146, + "logits/rejected": -0.3516313433647156, + "logps/chosen": -0.00024921749718487263, + "logps/rejected": -2.1053853034973145, + "loss": 0.5781, + "nll_loss": 0.14452941715717316, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.492174826329574e-05, + "rewards/margins": 0.21051359176635742, + "rewards/rejected": -0.21053853631019592, + "step": 12048 + }, + { + "epoch": 8.332641770401107, + "grad_norm": 5.479881763458252, + "learning_rate": 9.263101275549408e-06, + "log_odds_chosen": 10.361333847045898, + "log_odds_ratio": -0.00015310835442505777, + "logits/chosen": -0.43369877338409424, + "logits/rejected": -0.3672742545604706, + "logps/chosen": -0.0005247532390058041, + "logps/rejected": -2.2179088592529297, + "loss": 0.4675, + "nll_loss": 0.11685863882303238, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.247531953500584e-05, + "rewards/margins": 0.22173842787742615, + "rewards/rejected": -0.22179089486598969, + "step": 12049 + }, + { + "epoch": 8.333333333333334, + "grad_norm": 4.073111534118652, + "learning_rate": 9.259259259259259e-06, + "log_odds_chosen": 11.245848655700684, + "log_odds_ratio": -4.935469405609183e-05, + "logits/chosen": 0.07112825661897659, + "logits/rejected": 0.03870762512087822, + "logps/chosen": -0.00018877757247537374, + "logps/rejected": -2.5663628578186035, + "loss": 0.4235, + "nll_loss": 0.10587833821773529, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.887775579234585e-05, + "rewards/margins": 0.2566174268722534, + "rewards/rejected": -0.25663629174232483, + "step": 12050 + }, + { + "epoch": 8.33402489626556, + "grad_norm": 3.9776432514190674, + "learning_rate": 9.255417242969112e-06, + "log_odds_chosen": 11.619805335998535, + "log_odds_ratio": -1.539092045277357e-05, + "logits/chosen": -0.29793232679367065, + "logits/rejected": -0.3029605746269226, + "logps/chosen": -0.00016180785314645618, + "logps/rejected": -2.5110225677490234, + "loss": 0.3194, + "nll_loss": 0.07984956353902817, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.61807856784435e-05, + "rewards/margins": 0.2510860562324524, + "rewards/rejected": -0.2511022388935089, + "step": 12051 + }, + { + "epoch": 8.334716459197788, + "grad_norm": 3.287205219268799, + "learning_rate": 9.251575226678962e-06, + "log_odds_chosen": 9.773767471313477, + "log_odds_ratio": -0.0002140636497642845, + "logits/chosen": -0.22501704096794128, + "logits/rejected": -0.22273699939250946, + "logps/chosen": -0.00026829185662791133, + "logps/rejected": -1.6013988256454468, + "loss": 0.3525, + "nll_loss": 0.08809895068407059, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6829187845578417e-05, + "rewards/margins": 0.16011305153369904, + "rewards/rejected": -0.16013988852500916, + "step": 12052 + }, + { + "epoch": 8.335408022130014, + "grad_norm": 3.6830883026123047, + "learning_rate": 9.247733210388811e-06, + "log_odds_chosen": 11.520283699035645, + "log_odds_ratio": -1.649722980801016e-05, + "logits/chosen": -0.09420540928840637, + "logits/rejected": -0.09450474381446838, + "logps/chosen": -0.0001776470453478396, + "logps/rejected": -2.614943027496338, + "loss": 0.44, + "nll_loss": 0.11000753939151764, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.776470526237972e-05, + "rewards/margins": 0.2614765465259552, + "rewards/rejected": -0.26149433851242065, + "step": 12053 + }, + { + "epoch": 8.336099585062241, + "grad_norm": 3.466386318206787, + "learning_rate": 9.243891194098664e-06, + "log_odds_chosen": 11.200641632080078, + "log_odds_ratio": -1.9975921532022767e-05, + "logits/chosen": -0.3815682828426361, + "logits/rejected": -0.4218658208847046, + "logps/chosen": -0.00012150880502304062, + "logps/rejected": -2.148578643798828, + "loss": 0.3755, + "nll_loss": 0.09387080371379852, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2150880138506182e-05, + "rewards/margins": 0.21484573185443878, + "rewards/rejected": -0.21485787630081177, + "step": 12054 + }, + { + "epoch": 8.336791147994468, + "grad_norm": 8.70840835571289, + "learning_rate": 9.240049177808515e-06, + "log_odds_chosen": 10.134420394897461, + "log_odds_ratio": -0.0004330216906964779, + "logits/chosen": -0.3002817630767822, + "logits/rejected": -0.43083691596984863, + "logps/chosen": -0.000349003414157778, + "logps/rejected": -1.3802651166915894, + "loss": 0.3248, + "nll_loss": 0.08115517348051071, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.490033850539476e-05, + "rewards/margins": 0.13799162209033966, + "rewards/rejected": -0.13802652060985565, + "step": 12055 + }, + { + "epoch": 8.337482710926695, + "grad_norm": 3.6529688835144043, + "learning_rate": 9.236207161518365e-06, + "log_odds_chosen": 9.931303024291992, + "log_odds_ratio": -0.0006245232652872801, + "logits/chosen": -0.2061011642217636, + "logits/rejected": -0.22699670493602753, + "logps/chosen": -0.0006724453414790332, + "logps/rejected": -1.889383316040039, + "loss": 0.5147, + "nll_loss": 0.128619983792305, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.724453851347789e-05, + "rewards/margins": 0.1888710856437683, + "rewards/rejected": -0.18893831968307495, + "step": 12056 + }, + { + "epoch": 8.338174273858922, + "grad_norm": 4.977734565734863, + "learning_rate": 9.232365145228216e-06, + "log_odds_chosen": 11.125265121459961, + "log_odds_ratio": -0.00022959726629778743, + "logits/chosen": -0.04439122974872589, + "logits/rejected": -0.02409200370311737, + "logps/chosen": -0.00017756447778083384, + "logps/rejected": -2.2842795848846436, + "loss": 0.5022, + "nll_loss": 0.12553563714027405, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7756448869477026e-05, + "rewards/margins": 0.22841018438339233, + "rewards/rejected": -0.2284279465675354, + "step": 12057 + }, + { + "epoch": 8.338865836791149, + "grad_norm": 4.1805853843688965, + "learning_rate": 9.228523128938067e-06, + "log_odds_chosen": 11.242300987243652, + "log_odds_ratio": -6.137518357718363e-05, + "logits/chosen": -0.17572718858718872, + "logits/rejected": -0.16212934255599976, + "logps/chosen": -0.0001616746449144557, + "logps/rejected": -2.141157865524292, + "loss": 0.3299, + "nll_loss": 0.08248007297515869, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6167465219041333e-05, + "rewards/margins": 0.21409964561462402, + "rewards/rejected": -0.21411579847335815, + "step": 12058 + }, + { + "epoch": 8.339557399723375, + "grad_norm": 3.5841095447540283, + "learning_rate": 9.224681112647918e-06, + "log_odds_chosen": 11.254419326782227, + "log_odds_ratio": -0.0021393040660768747, + "logits/chosen": -0.2600412964820862, + "logits/rejected": -0.31797146797180176, + "logps/chosen": -0.016505086794495583, + "logps/rejected": -3.2776453495025635, + "loss": 0.4008, + "nll_loss": 0.09999529272317886, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001650508726015687, + "rewards/margins": 0.32611405849456787, + "rewards/rejected": -0.3277645409107208, + "step": 12059 + }, + { + "epoch": 8.340248962655602, + "grad_norm": 5.044835090637207, + "learning_rate": 9.22083909635777e-06, + "log_odds_chosen": 11.485102653503418, + "log_odds_ratio": -1.8132033801521175e-05, + "logits/chosen": -0.4359765648841858, + "logits/rejected": -0.48265624046325684, + "logps/chosen": -0.0002121072611771524, + "logps/rejected": -2.7514710426330566, + "loss": 0.4768, + "nll_loss": 0.119210384786129, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.121072611771524e-05, + "rewards/margins": 0.2751259207725525, + "rewards/rejected": -0.27514714002609253, + "step": 12060 + }, + { + "epoch": 8.340940525587829, + "grad_norm": 5.355807304382324, + "learning_rate": 9.21699708006762e-06, + "log_odds_chosen": 11.519990921020508, + "log_odds_ratio": -1.8206472304882482e-05, + "logits/chosen": -0.1301870346069336, + "logits/rejected": -0.20199596881866455, + "logps/chosen": -0.00019549021089915186, + "logps/rejected": -2.5161893367767334, + "loss": 0.4364, + "nll_loss": 0.10910181701183319, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.954902290890459e-05, + "rewards/margins": 0.25159937143325806, + "rewards/rejected": -0.2516189217567444, + "step": 12061 + }, + { + "epoch": 8.341632088520056, + "grad_norm": 5.413180828094482, + "learning_rate": 9.21315506377747e-06, + "log_odds_chosen": 10.790306091308594, + "log_odds_ratio": -6.160003977129236e-05, + "logits/chosen": -0.14251375198364258, + "logits/rejected": -0.2059217244386673, + "logps/chosen": -0.00035422618384473026, + "logps/rejected": -2.222874164581299, + "loss": 0.3951, + "nll_loss": 0.09876702725887299, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.542262129485607e-05, + "rewards/margins": 0.2222519963979721, + "rewards/rejected": -0.22228741645812988, + "step": 12062 + }, + { + "epoch": 8.342323651452283, + "grad_norm": 3.652357339859009, + "learning_rate": 9.209313047487322e-06, + "log_odds_chosen": 10.979130744934082, + "log_odds_ratio": -0.0004109518777113408, + "logits/chosen": -0.09868557006120682, + "logits/rejected": -0.17369824647903442, + "logps/chosen": -0.0015244006644934416, + "logps/rejected": -2.420994997024536, + "loss": 0.4998, + "nll_loss": 0.12490478903055191, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015244007227011025, + "rewards/margins": 0.24194706976413727, + "rewards/rejected": -0.24209947884082794, + "step": 12063 + }, + { + "epoch": 8.34301521438451, + "grad_norm": 3.9147958755493164, + "learning_rate": 9.205471031197173e-06, + "log_odds_chosen": 10.266362190246582, + "log_odds_ratio": -0.00010355141421314329, + "logits/chosen": -0.11190681904554367, + "logits/rejected": -0.11449551582336426, + "logps/chosen": -0.0005471897311508656, + "logps/rejected": -2.3507325649261475, + "loss": 0.4864, + "nll_loss": 0.12158728390932083, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.471897384268232e-05, + "rewards/margins": 0.2350185364484787, + "rewards/rejected": -0.2350732535123825, + "step": 12064 + }, + { + "epoch": 8.343706777316736, + "grad_norm": 4.411325931549072, + "learning_rate": 9.201629014907024e-06, + "log_odds_chosen": 11.177576065063477, + "log_odds_ratio": -3.77266296709422e-05, + "logits/chosen": -0.574793815612793, + "logits/rejected": -0.5641138553619385, + "logps/chosen": -0.00012600421905517578, + "logps/rejected": -1.910329818725586, + "loss": 0.4758, + "nll_loss": 0.11893565207719803, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.26004224512144e-05, + "rewards/margins": 0.19102038443088531, + "rewards/rejected": -0.19103297591209412, + "step": 12065 + }, + { + "epoch": 8.344398340248963, + "grad_norm": 6.895480632781982, + "learning_rate": 9.197786998616875e-06, + "log_odds_chosen": 11.2932710647583, + "log_odds_ratio": -3.7191490264376625e-05, + "logits/chosen": -0.4668288230895996, + "logits/rejected": -0.6262189149856567, + "logps/chosen": -0.00012896041152998805, + "logps/rejected": -2.0346803665161133, + "loss": 0.429, + "nll_loss": 0.10724306106567383, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2896041880594566e-05, + "rewards/margins": 0.2034551203250885, + "rewards/rejected": -0.20346803963184357, + "step": 12066 + }, + { + "epoch": 8.34508990318119, + "grad_norm": 3.7116177082061768, + "learning_rate": 9.193944982326725e-06, + "log_odds_chosen": 10.76824951171875, + "log_odds_ratio": -7.783662294968963e-05, + "logits/chosen": -0.4730428457260132, + "logits/rejected": -0.5344309210777283, + "logps/chosen": -0.00038928643334656954, + "logps/rejected": -1.9175119400024414, + "loss": 0.4823, + "nll_loss": 0.12056785821914673, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8928643334656954e-05, + "rewards/margins": 0.19171224534511566, + "rewards/rejected": -0.19175118207931519, + "step": 12067 + }, + { + "epoch": 8.345781466113417, + "grad_norm": 3.591780662536621, + "learning_rate": 9.190102966036576e-06, + "log_odds_chosen": 10.766321182250977, + "log_odds_ratio": -4.374091440695338e-05, + "logits/chosen": -0.4310253858566284, + "logits/rejected": -0.47409093379974365, + "logps/chosen": -0.00013906153617426753, + "logps/rejected": -1.8614153861999512, + "loss": 0.3285, + "nll_loss": 0.08212421089410782, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3906153981224634e-05, + "rewards/margins": 0.18612763285636902, + "rewards/rejected": -0.18614153563976288, + "step": 12068 + }, + { + "epoch": 8.346473029045644, + "grad_norm": 4.2191009521484375, + "learning_rate": 9.186260949746428e-06, + "log_odds_chosen": 11.604708671569824, + "log_odds_ratio": -1.1452235412434675e-05, + "logits/chosen": -0.24239104986190796, + "logits/rejected": -0.2985646426677704, + "logps/chosen": -0.00012208960833959281, + "logps/rejected": -2.358849048614502, + "loss": 0.368, + "nll_loss": 0.09199665486812592, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.22089604701614e-05, + "rewards/margins": 0.23587268590927124, + "rewards/rejected": -0.2358849048614502, + "step": 12069 + }, + { + "epoch": 8.34716459197787, + "grad_norm": 2.925367593765259, + "learning_rate": 9.18241893345628e-06, + "log_odds_chosen": 10.485984802246094, + "log_odds_ratio": -6.372129428200424e-05, + "logits/chosen": -0.42998701333999634, + "logits/rejected": -0.4588412344455719, + "logps/chosen": -0.00015813851496204734, + "logps/rejected": -1.2758333683013916, + "loss": 0.3471, + "nll_loss": 0.08675874769687653, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5813851860002615e-05, + "rewards/margins": 0.12756752967834473, + "rewards/rejected": -0.1275833398103714, + "step": 12070 + }, + { + "epoch": 8.347856154910097, + "grad_norm": 5.613522529602051, + "learning_rate": 9.178576917166128e-06, + "log_odds_chosen": 11.866766929626465, + "log_odds_ratio": -1.0126213965122588e-05, + "logits/chosen": -0.06285068392753601, + "logits/rejected": -0.13774362206459045, + "logps/chosen": -0.00015430677740368992, + "logps/rejected": -2.7546706199645996, + "loss": 0.5627, + "nll_loss": 0.1406654268503189, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5430678104166873e-05, + "rewards/margins": 0.27545166015625, + "rewards/rejected": -0.27546706795692444, + "step": 12071 + }, + { + "epoch": 8.348547717842324, + "grad_norm": 5.118077278137207, + "learning_rate": 9.174734900875979e-06, + "log_odds_chosen": 11.430276870727539, + "log_odds_ratio": -5.91300122323446e-05, + "logits/chosen": -0.19762642681598663, + "logits/rejected": -0.2530553340911865, + "logps/chosen": -0.00019286992028355598, + "logps/rejected": -2.1932029724121094, + "loss": 0.4997, + "nll_loss": 0.12492159754037857, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.92869920283556e-05, + "rewards/margins": 0.2193010300397873, + "rewards/rejected": -0.21932031214237213, + "step": 12072 + }, + { + "epoch": 8.349239280774551, + "grad_norm": 4.854573726654053, + "learning_rate": 9.170892884585831e-06, + "log_odds_chosen": 11.556985855102539, + "log_odds_ratio": -5.447504372568801e-05, + "logits/chosen": -0.3260948359966278, + "logits/rejected": -0.3692905008792877, + "logps/chosen": -0.00039603933691978455, + "logps/rejected": -2.43080735206604, + "loss": 0.7249, + "nll_loss": 0.18123196065425873, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.960393587476574e-05, + "rewards/margins": 0.24304112792015076, + "rewards/rejected": -0.243080735206604, + "step": 12073 + }, + { + "epoch": 8.349930843706778, + "grad_norm": 4.689001083374023, + "learning_rate": 9.167050868295682e-06, + "log_odds_chosen": 10.60562515258789, + "log_odds_ratio": -6.536449654959142e-05, + "logits/chosen": -0.5237111449241638, + "logits/rejected": -0.4365498125553131, + "logps/chosen": -0.00016227777814492583, + "logps/rejected": -1.987557291984558, + "loss": 0.2749, + "nll_loss": 0.0687284991145134, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.62277756317053e-05, + "rewards/margins": 0.19873949885368347, + "rewards/rejected": -0.19875574111938477, + "step": 12074 + }, + { + "epoch": 8.350622406639005, + "grad_norm": 3.7940571308135986, + "learning_rate": 9.163208852005533e-06, + "log_odds_chosen": 10.647010803222656, + "log_odds_ratio": -0.00011371282016625628, + "logits/chosen": -0.14266344904899597, + "logits/rejected": -0.1224905252456665, + "logps/chosen": -0.0002618691651150584, + "logps/rejected": -1.930260419845581, + "loss": 0.3824, + "nll_loss": 0.09559597074985504, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6186917239101604e-05, + "rewards/margins": 0.19299986958503723, + "rewards/rejected": -0.19302606582641602, + "step": 12075 + }, + { + "epoch": 8.351313969571232, + "grad_norm": 3.318660020828247, + "learning_rate": 9.159366835715384e-06, + "log_odds_chosen": 9.893108367919922, + "log_odds_ratio": -0.0004447439860086888, + "logits/chosen": -0.5518521070480347, + "logits/rejected": -0.5224223136901855, + "logps/chosen": -0.00021021733118686825, + "logps/rejected": -1.7410143613815308, + "loss": 0.3573, + "nll_loss": 0.08928947895765305, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1021733118686825e-05, + "rewards/margins": 0.17408041656017303, + "rewards/rejected": -0.17410144209861755, + "step": 12076 + }, + { + "epoch": 8.352005532503458, + "grad_norm": 5.724148750305176, + "learning_rate": 9.155524819425234e-06, + "log_odds_chosen": 11.430328369140625, + "log_odds_ratio": -3.109716999460943e-05, + "logits/chosen": -0.3521016538143158, + "logits/rejected": -0.31222349405288696, + "logps/chosen": -9.736038191476837e-05, + "logps/rejected": -2.2274422645568848, + "loss": 0.3271, + "nll_loss": 0.08176403492689133, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.736038919072598e-06, + "rewards/margins": 0.2227345108985901, + "rewards/rejected": -0.22274425625801086, + "step": 12077 + }, + { + "epoch": 8.352697095435685, + "grad_norm": 4.420742988586426, + "learning_rate": 9.151682803135085e-06, + "log_odds_chosen": 11.0013427734375, + "log_odds_ratio": -3.3046468161046505e-05, + "logits/chosen": -0.07796618342399597, + "logits/rejected": 0.006830569356679916, + "logps/chosen": -0.00027067813789471984, + "logps/rejected": -2.153871536254883, + "loss": 0.7758, + "nll_loss": 0.19394733011722565, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7067813789471984e-05, + "rewards/margins": 0.21536009013652802, + "rewards/rejected": -0.21538715064525604, + "step": 12078 + }, + { + "epoch": 8.353388658367912, + "grad_norm": 6.765323162078857, + "learning_rate": 9.147840786844938e-06, + "log_odds_chosen": 10.374045372009277, + "log_odds_ratio": -0.0005182506865821779, + "logits/chosen": -0.1451987773180008, + "logits/rejected": -0.16935515403747559, + "logps/chosen": -0.0005734489532187581, + "logps/rejected": -2.156761407852173, + "loss": 0.6643, + "nll_loss": 0.16601383686065674, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.734489968745038e-05, + "rewards/margins": 0.2156187891960144, + "rewards/rejected": -0.21567615866661072, + "step": 12079 + }, + { + "epoch": 8.354080221300139, + "grad_norm": 3.6350746154785156, + "learning_rate": 9.143998770554787e-06, + "log_odds_chosen": 10.419280052185059, + "log_odds_ratio": -0.00027461652643978596, + "logits/chosen": -0.03309977054595947, + "logits/rejected": 0.003689005970954895, + "logps/chosen": -0.0006293084588833153, + "logps/rejected": -2.2337255477905273, + "loss": 0.4066, + "nll_loss": 0.10161426663398743, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.293084879871458e-05, + "rewards/margins": 0.22330963611602783, + "rewards/rejected": -0.22337256371974945, + "step": 12080 + }, + { + "epoch": 8.354771784232366, + "grad_norm": 11.4356050491333, + "learning_rate": 9.140156754264637e-06, + "log_odds_chosen": 11.893506050109863, + "log_odds_ratio": -1.4665483831777237e-05, + "logits/chosen": -0.024583622813224792, + "logits/rejected": -0.10612979531288147, + "logps/chosen": -0.0001170150499092415, + "logps/rejected": -2.6500325202941895, + "loss": 0.4769, + "nll_loss": 0.11923235654830933, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.170150517282309e-05, + "rewards/margins": 0.26499155163764954, + "rewards/rejected": -0.2650032639503479, + "step": 12081 + }, + { + "epoch": 8.355463347164592, + "grad_norm": 9.346952438354492, + "learning_rate": 9.13631473797449e-06, + "log_odds_chosen": 10.354605674743652, + "log_odds_ratio": -4.912112854071893e-05, + "logits/chosen": -0.4560277462005615, + "logits/rejected": -0.4287424683570862, + "logps/chosen": -0.00019161278032697737, + "logps/rejected": -1.747044324874878, + "loss": 0.3942, + "nll_loss": 0.09854578971862793, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9161278032697737e-05, + "rewards/margins": 0.1746852695941925, + "rewards/rejected": -0.1747044324874878, + "step": 12082 + }, + { + "epoch": 8.35615491009682, + "grad_norm": 3.4023470878601074, + "learning_rate": 9.13247272168434e-06, + "log_odds_chosen": 11.008105278015137, + "log_odds_ratio": -6.754696369171143e-05, + "logits/chosen": -0.6563997268676758, + "logits/rejected": -0.7640130519866943, + "logps/chosen": -7.45670186006464e-05, + "logps/rejected": -1.789588451385498, + "loss": 0.319, + "nll_loss": 0.07973403483629227, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.456702405761462e-06, + "rewards/margins": 0.17895139753818512, + "rewards/rejected": -0.17895883321762085, + "step": 12083 + }, + { + "epoch": 8.356846473029046, + "grad_norm": 4.0229315757751465, + "learning_rate": 9.128630705394191e-06, + "log_odds_chosen": 11.00053596496582, + "log_odds_ratio": -9.482467430643737e-05, + "logits/chosen": -0.7036505341529846, + "logits/rejected": -0.7025102376937866, + "logps/chosen": -0.0002775189932435751, + "logps/rejected": -2.4160661697387695, + "loss": 0.3879, + "nll_loss": 0.09697068482637405, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.775189932435751e-05, + "rewards/margins": 0.2415788620710373, + "rewards/rejected": -0.24160662293434143, + "step": 12084 + }, + { + "epoch": 8.357538035961273, + "grad_norm": 4.352490425109863, + "learning_rate": 9.124788689104042e-06, + "log_odds_chosen": 10.189447402954102, + "log_odds_ratio": -0.0006835731328465044, + "logits/chosen": -0.39894899725914, + "logits/rejected": -0.5076238512992859, + "logps/chosen": -0.0012036004336550832, + "logps/rejected": -1.9045734405517578, + "loss": 0.5762, + "nll_loss": 0.14398697018623352, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012036004045512527, + "rewards/margins": 0.19033700227737427, + "rewards/rejected": -0.19045734405517578, + "step": 12085 + }, + { + "epoch": 8.3582295988935, + "grad_norm": 5.3557658195495605, + "learning_rate": 9.120946672813893e-06, + "log_odds_chosen": 11.98507308959961, + "log_odds_ratio": -0.00010645409201970324, + "logits/chosen": -0.4352482259273529, + "logits/rejected": -0.5673301815986633, + "logps/chosen": -0.00016585568664595485, + "logps/rejected": -2.974811553955078, + "loss": 0.7435, + "nll_loss": 0.1858687847852707, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6585569028393365e-05, + "rewards/margins": 0.2974645495414734, + "rewards/rejected": -0.29748111963272095, + "step": 12086 + }, + { + "epoch": 8.358921161825727, + "grad_norm": 5.833132743835449, + "learning_rate": 9.117104656523744e-06, + "log_odds_chosen": 11.723255157470703, + "log_odds_ratio": -1.0014520739787258e-05, + "logits/chosen": 0.12011925131082535, + "logits/rejected": 0.16198879480361938, + "logps/chosen": -0.00010595491039566696, + "logps/rejected": -2.3934383392333984, + "loss": 0.4981, + "nll_loss": 0.12452814728021622, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0595492312859278e-05, + "rewards/margins": 0.23933324217796326, + "rewards/rejected": -0.2393438220024109, + "step": 12087 + }, + { + "epoch": 8.359612724757953, + "grad_norm": 5.850451469421387, + "learning_rate": 9.113262640233596e-06, + "log_odds_chosen": 10.695171356201172, + "log_odds_ratio": -0.00017267999646719545, + "logits/chosen": -0.3322739601135254, + "logits/rejected": -0.3321690261363983, + "logps/chosen": -0.00021232443396002054, + "logps/rejected": -2.0772128105163574, + "loss": 0.3142, + "nll_loss": 0.07852254807949066, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.123244121321477e-05, + "rewards/margins": 0.20770004391670227, + "rewards/rejected": -0.2077212780714035, + "step": 12088 + }, + { + "epoch": 8.36030428769018, + "grad_norm": 4.204518795013428, + "learning_rate": 9.109420623943445e-06, + "log_odds_chosen": 11.224333763122559, + "log_odds_ratio": -2.6536186851444654e-05, + "logits/chosen": -0.47312721610069275, + "logits/rejected": -0.5358306169509888, + "logps/chosen": -0.0001297694689128548, + "logps/rejected": -2.1915783882141113, + "loss": 0.379, + "nll_loss": 0.09475453197956085, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2976946891285479e-05, + "rewards/margins": 0.21914485096931458, + "rewards/rejected": -0.21915782988071442, + "step": 12089 + }, + { + "epoch": 8.360995850622407, + "grad_norm": 2.6959381103515625, + "learning_rate": 9.105578607653296e-06, + "log_odds_chosen": 10.964401245117188, + "log_odds_ratio": -4.955555414198898e-05, + "logits/chosen": -0.19748425483703613, + "logits/rejected": -0.23469477891921997, + "logps/chosen": -0.00014199868019204587, + "logps/rejected": -1.934066891670227, + "loss": 0.3309, + "nll_loss": 0.08271662145853043, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4199868019204587e-05, + "rewards/margins": 0.19339248538017273, + "rewards/rejected": -0.19340670108795166, + "step": 12090 + }, + { + "epoch": 8.361687413554634, + "grad_norm": 3.581792116165161, + "learning_rate": 9.101736591363148e-06, + "log_odds_chosen": 10.874795913696289, + "log_odds_ratio": -0.0003300320531707257, + "logits/chosen": -0.12171950191259384, + "logits/rejected": 0.01747075468301773, + "logps/chosen": -0.00027409259928390384, + "logps/rejected": -1.9720582962036133, + "loss": 0.377, + "nll_loss": 0.09421323239803314, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.740925810940098e-05, + "rewards/margins": 0.1971784085035324, + "rewards/rejected": -0.1972058117389679, + "step": 12091 + }, + { + "epoch": 8.36237897648686, + "grad_norm": 5.888037204742432, + "learning_rate": 9.097894575072999e-06, + "log_odds_chosen": 9.601099014282227, + "log_odds_ratio": -0.0004326591733843088, + "logits/chosen": -0.6578595638275146, + "logits/rejected": -0.747802734375, + "logps/chosen": -0.0004255325475241989, + "logps/rejected": -1.3348298072814941, + "loss": 0.3399, + "nll_loss": 0.08492371439933777, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.255325620761141e-05, + "rewards/margins": 0.13344043493270874, + "rewards/rejected": -0.13348299264907837, + "step": 12092 + }, + { + "epoch": 8.363070539419088, + "grad_norm": 3.4145381450653076, + "learning_rate": 9.09405255878285e-06, + "log_odds_chosen": 11.088571548461914, + "log_odds_ratio": -6.40248617855832e-05, + "logits/chosen": 0.2030135989189148, + "logits/rejected": 0.13781681656837463, + "logps/chosen": -0.0005639658775180578, + "logps/rejected": -2.5252761840820312, + "loss": 0.4239, + "nll_loss": 0.10596401244401932, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.639659138978459e-05, + "rewards/margins": 0.2524712085723877, + "rewards/rejected": -0.2525276243686676, + "step": 12093 + }, + { + "epoch": 8.363762102351314, + "grad_norm": 3.536792039871216, + "learning_rate": 9.0902105424927e-06, + "log_odds_chosen": 10.259236335754395, + "log_odds_ratio": -0.0006960731698200107, + "logits/chosen": -0.039343543350696564, + "logits/rejected": -0.08427160978317261, + "logps/chosen": -0.0003419583954382688, + "logps/rejected": -1.974465012550354, + "loss": 0.4182, + "nll_loss": 0.10447600483894348, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.419584027142264e-05, + "rewards/margins": 0.19741231203079224, + "rewards/rejected": -0.19744651019573212, + "step": 12094 + }, + { + "epoch": 8.364453665283541, + "grad_norm": 5.547048568725586, + "learning_rate": 9.086368526202551e-06, + "log_odds_chosen": 10.491537094116211, + "log_odds_ratio": -0.00026805573725141585, + "logits/chosen": -0.1757640242576599, + "logits/rejected": -0.14906059205532074, + "logps/chosen": -0.002750436309725046, + "logps/rejected": -2.2830355167388916, + "loss": 0.7167, + "nll_loss": 0.17914150655269623, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00027504368335939944, + "rewards/margins": 0.22802849113941193, + "rewards/rejected": -0.22830355167388916, + "step": 12095 + }, + { + "epoch": 8.365145228215768, + "grad_norm": 3.5837619304656982, + "learning_rate": 9.082526509912402e-06, + "log_odds_chosen": 11.178289413452148, + "log_odds_ratio": -0.0002664781059138477, + "logits/chosen": -0.34205400943756104, + "logits/rejected": -0.44289296865463257, + "logps/chosen": -0.00012246904952917248, + "logps/rejected": -1.9099770784378052, + "loss": 0.4891, + "nll_loss": 0.12223700433969498, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2246905498614069e-05, + "rewards/margins": 0.19098547101020813, + "rewards/rejected": -0.19099771976470947, + "step": 12096 + }, + { + "epoch": 8.365836791147995, + "grad_norm": 3.8647124767303467, + "learning_rate": 9.078684493622255e-06, + "log_odds_chosen": 10.128311157226562, + "log_odds_ratio": -7.842542981961742e-05, + "logits/chosen": -0.407065749168396, + "logits/rejected": -0.47137102484703064, + "logps/chosen": -0.00037784138112328947, + "logps/rejected": -1.7541797161102295, + "loss": 0.4091, + "nll_loss": 0.10227620601654053, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7784142477903515e-05, + "rewards/margins": 0.17538020014762878, + "rewards/rejected": -0.17541798949241638, + "step": 12097 + }, + { + "epoch": 8.366528354080222, + "grad_norm": 5.806434154510498, + "learning_rate": 9.074842477332105e-06, + "log_odds_chosen": 10.017135620117188, + "log_odds_ratio": -0.0001657155662542209, + "logits/chosen": -0.41675513982772827, + "logits/rejected": -0.4195055067539215, + "logps/chosen": -0.0003376719541847706, + "logps/rejected": -1.8514747619628906, + "loss": 0.3504, + "nll_loss": 0.08758871257305145, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3767199056455866e-05, + "rewards/margins": 0.18511369824409485, + "rewards/rejected": -0.1851474642753601, + "step": 12098 + }, + { + "epoch": 8.367219917012449, + "grad_norm": 3.287838935852051, + "learning_rate": 9.071000461041954e-06, + "log_odds_chosen": 10.787469863891602, + "log_odds_ratio": -0.00012153637362644076, + "logits/chosen": -0.18966057896614075, + "logits/rejected": -0.2829188108444214, + "logps/chosen": -0.0003287080326117575, + "logps/rejected": -2.6297342777252197, + "loss": 0.534, + "nll_loss": 0.13347730040550232, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2870804716367275e-05, + "rewards/margins": 0.26294058561325073, + "rewards/rejected": -0.262973427772522, + "step": 12099 + }, + { + "epoch": 8.367911479944675, + "grad_norm": 4.554553031921387, + "learning_rate": 9.067158444751807e-06, + "log_odds_chosen": 11.362350463867188, + "log_odds_ratio": -2.019827297772281e-05, + "logits/chosen": -0.17572999000549316, + "logits/rejected": -0.281780481338501, + "logps/chosen": -0.00023074873024597764, + "logps/rejected": -2.5494611263275146, + "loss": 0.35, + "nll_loss": 0.08750632405281067, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.307487557118293e-05, + "rewards/margins": 0.2549230456352234, + "rewards/rejected": -0.25494611263275146, + "step": 12100 + }, + { + "epoch": 8.368603042876902, + "grad_norm": 5.976508140563965, + "learning_rate": 9.063316428461658e-06, + "log_odds_chosen": 11.16000747680664, + "log_odds_ratio": -4.418138269102201e-05, + "logits/chosen": -0.12001601606607437, + "logits/rejected": -0.13546162843704224, + "logps/chosen": -0.0001884956145659089, + "logps/rejected": -2.1030831336975098, + "loss": 0.4924, + "nll_loss": 0.12308388948440552, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.884956145659089e-05, + "rewards/margins": 0.21028946340084076, + "rewards/rejected": -0.21030831336975098, + "step": 12101 + }, + { + "epoch": 8.369294605809129, + "grad_norm": 4.149490833282471, + "learning_rate": 9.059474412171508e-06, + "log_odds_chosen": 11.068801879882812, + "log_odds_ratio": -0.00016131362644955516, + "logits/chosen": 0.06716214120388031, + "logits/rejected": 6.300210952758789e-05, + "logps/chosen": -0.00033710466232150793, + "logps/rejected": -2.0708696842193604, + "loss": 0.4826, + "nll_loss": 0.12063352763652802, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.371046477695927e-05, + "rewards/margins": 0.2070532739162445, + "rewards/rejected": -0.2070869505405426, + "step": 12102 + }, + { + "epoch": 8.369986168741356, + "grad_norm": 4.572751522064209, + "learning_rate": 9.055632395881359e-06, + "log_odds_chosen": 10.715896606445312, + "log_odds_ratio": -0.0006129711982794106, + "logits/chosen": -0.6699153184890747, + "logits/rejected": -0.7459685802459717, + "logps/chosen": -0.0007518876809626818, + "logps/rejected": -2.247579574584961, + "loss": 0.4651, + "nll_loss": 0.11621610075235367, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.518876373069361e-05, + "rewards/margins": 0.2246827781200409, + "rewards/rejected": -0.22475793957710266, + "step": 12103 + }, + { + "epoch": 8.370677731673583, + "grad_norm": 3.426314353942871, + "learning_rate": 9.05179037959121e-06, + "log_odds_chosen": 10.887182235717773, + "log_odds_ratio": -5.992503429297358e-05, + "logits/chosen": -0.32155394554138184, + "logits/rejected": -0.3380813002586365, + "logps/chosen": -0.0007114798063412309, + "logps/rejected": -2.13638973236084, + "loss": 0.3702, + "nll_loss": 0.0925537571310997, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.114798063412309e-05, + "rewards/margins": 0.2135678231716156, + "rewards/rejected": -0.21363899111747742, + "step": 12104 + }, + { + "epoch": 8.37136929460581, + "grad_norm": 3.439755916595459, + "learning_rate": 9.04794836330106e-06, + "log_odds_chosen": 10.813738822937012, + "log_odds_ratio": -3.219860082026571e-05, + "logits/chosen": -0.46938440203666687, + "logits/rejected": -0.43001264333724976, + "logps/chosen": -0.0005872396286576986, + "logps/rejected": -2.688671588897705, + "loss": 0.5214, + "nll_loss": 0.13035404682159424, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8723959227791056e-05, + "rewards/margins": 0.26880842447280884, + "rewards/rejected": -0.2688671946525574, + "step": 12105 + }, + { + "epoch": 8.372060857538036, + "grad_norm": 4.733785629272461, + "learning_rate": 9.044106347010911e-06, + "log_odds_chosen": 9.735095977783203, + "log_odds_ratio": -0.0002063530555460602, + "logits/chosen": -0.41500332951545715, + "logits/rejected": -0.36877870559692383, + "logps/chosen": -0.00025138130877166986, + "logps/rejected": -1.0509212017059326, + "loss": 0.3176, + "nll_loss": 0.07937469333410263, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.513812796678394e-05, + "rewards/margins": 0.10506697744131088, + "rewards/rejected": -0.1050921157002449, + "step": 12106 + }, + { + "epoch": 8.372752420470263, + "grad_norm": 3.8326022624969482, + "learning_rate": 9.040264330720764e-06, + "log_odds_chosen": 11.010136604309082, + "log_odds_ratio": -4.120486482861452e-05, + "logits/chosen": -0.9739433526992798, + "logits/rejected": -0.9723539352416992, + "logps/chosen": -0.000342509156325832, + "logps/rejected": -2.3533740043640137, + "loss": 0.3739, + "nll_loss": 0.09346066415309906, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.425091927056201e-05, + "rewards/margins": 0.23530316352844238, + "rewards/rejected": -0.23533740639686584, + "step": 12107 + }, + { + "epoch": 8.37344398340249, + "grad_norm": 3.7740235328674316, + "learning_rate": 9.036422314430613e-06, + "log_odds_chosen": 11.705621719360352, + "log_odds_ratio": -0.00019673358474392444, + "logits/chosen": -0.33428582549095154, + "logits/rejected": -0.41250723600387573, + "logps/chosen": -0.00018244172679260373, + "logps/rejected": -2.9301788806915283, + "loss": 0.307, + "nll_loss": 0.07673575729131699, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8244172679260373e-05, + "rewards/margins": 0.29299962520599365, + "rewards/rejected": -0.2930178940296173, + "step": 12108 + }, + { + "epoch": 8.374135546334717, + "grad_norm": 7.275375843048096, + "learning_rate": 9.032580298140464e-06, + "log_odds_chosen": 11.373159408569336, + "log_odds_ratio": -4.282902227714658e-05, + "logits/chosen": -0.09751863777637482, + "logits/rejected": -0.10958105325698853, + "logps/chosen": -0.0001243318838533014, + "logps/rejected": -2.1905131340026855, + "loss": 0.6857, + "nll_loss": 0.17140938341617584, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2433189112925902e-05, + "rewards/margins": 0.21903890371322632, + "rewards/rejected": -0.219051331281662, + "step": 12109 + }, + { + "epoch": 8.374827109266944, + "grad_norm": 2.739427328109741, + "learning_rate": 9.028738281850316e-06, + "log_odds_chosen": 13.150906562805176, + "log_odds_ratio": -3.6712751807499444e-06, + "logits/chosen": -0.6241665482521057, + "logits/rejected": -0.6573482751846313, + "logps/chosen": -8.547461038688198e-05, + "logps/rejected": -3.4761362075805664, + "loss": 0.3557, + "nll_loss": 0.08893192559480667, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.547461220587138e-06, + "rewards/margins": 0.3476050794124603, + "rewards/rejected": -0.3476136326789856, + "step": 12110 + }, + { + "epoch": 8.37551867219917, + "grad_norm": 4.139987945556641, + "learning_rate": 9.024896265560167e-06, + "log_odds_chosen": 10.556331634521484, + "log_odds_ratio": -0.00036771217128261924, + "logits/chosen": -0.1802615076303482, + "logits/rejected": -0.2879849076271057, + "logps/chosen": -0.0007980070076882839, + "logps/rejected": -2.4222426414489746, + "loss": 0.4299, + "nll_loss": 0.10743677616119385, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.980070222401991e-05, + "rewards/margins": 0.24214446544647217, + "rewards/rejected": -0.2422242909669876, + "step": 12111 + }, + { + "epoch": 8.376210235131397, + "grad_norm": 6.3887481689453125, + "learning_rate": 9.021054249270017e-06, + "log_odds_chosen": 10.783723831176758, + "log_odds_ratio": -0.00010180518438573927, + "logits/chosen": -0.34942975640296936, + "logits/rejected": -0.3650428056716919, + "logps/chosen": -0.00045497252722270787, + "logps/rejected": -2.3204410076141357, + "loss": 0.6587, + "nll_loss": 0.16466113924980164, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.549725417746231e-05, + "rewards/margins": 0.23199859261512756, + "rewards/rejected": -0.23204410076141357, + "step": 12112 + }, + { + "epoch": 8.376901798063624, + "grad_norm": 3.7242636680603027, + "learning_rate": 9.017212232979868e-06, + "log_odds_chosen": 11.271821975708008, + "log_odds_ratio": -3.3475887903477997e-05, + "logits/chosen": -0.5573599338531494, + "logits/rejected": -0.6018040180206299, + "logps/chosen": -0.0002326086541870609, + "logps/rejected": -2.2966699600219727, + "loss": 0.3278, + "nll_loss": 0.08195127546787262, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.326086541870609e-05, + "rewards/margins": 0.22964373230934143, + "rewards/rejected": -0.22966697812080383, + "step": 12113 + }, + { + "epoch": 8.377593360995851, + "grad_norm": 3.9293277263641357, + "learning_rate": 9.013370216689719e-06, + "log_odds_chosen": 12.213785171508789, + "log_odds_ratio": -1.0291758371749893e-05, + "logits/chosen": -0.31658226251602173, + "logits/rejected": -0.580470085144043, + "logps/chosen": -0.00015860905114095658, + "logps/rejected": -3.0600228309631348, + "loss": 0.4878, + "nll_loss": 0.1219446212053299, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5860903658904135e-05, + "rewards/margins": 0.3059864640235901, + "rewards/rejected": -0.30600231885910034, + "step": 12114 + }, + { + "epoch": 8.378284923928078, + "grad_norm": 5.134614944458008, + "learning_rate": 9.00952820039957e-06, + "log_odds_chosen": 12.008981704711914, + "log_odds_ratio": -6.621122156502679e-05, + "logits/chosen": -0.10655619204044342, + "logits/rejected": -0.10315324366092682, + "logps/chosen": -0.00018991855904459953, + "logps/rejected": -3.2541890144348145, + "loss": 0.5297, + "nll_loss": 0.13242211937904358, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8991855540662073e-05, + "rewards/margins": 0.32539990544319153, + "rewards/rejected": -0.3254188895225525, + "step": 12115 + }, + { + "epoch": 8.378976486860305, + "grad_norm": 2.5658793449401855, + "learning_rate": 9.005686184109422e-06, + "log_odds_chosen": 11.466197967529297, + "log_odds_ratio": -2.4516677513020113e-05, + "logits/chosen": -0.6814978122711182, + "logits/rejected": -0.7252025604248047, + "logps/chosen": -9.871042129816487e-05, + "logps/rejected": -1.9790711402893066, + "loss": 0.3263, + "nll_loss": 0.08158230036497116, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.871042493614368e-06, + "rewards/margins": 0.19789722561836243, + "rewards/rejected": -0.19790711998939514, + "step": 12116 + }, + { + "epoch": 8.379668049792532, + "grad_norm": 3.70289945602417, + "learning_rate": 9.001844167819271e-06, + "log_odds_chosen": 11.636565208435059, + "log_odds_ratio": -2.1965597625239752e-05, + "logits/chosen": 0.254207968711853, + "logits/rejected": 0.1469896137714386, + "logps/chosen": -0.00010575105989119038, + "logps/rejected": -2.418914318084717, + "loss": 0.437, + "nll_loss": 0.1092444434762001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0575105989119038e-05, + "rewards/margins": 0.24188083410263062, + "rewards/rejected": -0.24189142882823944, + "step": 12117 + }, + { + "epoch": 8.380359612724758, + "grad_norm": 2.544630765914917, + "learning_rate": 8.998002151529122e-06, + "log_odds_chosen": 10.39298152923584, + "log_odds_ratio": -8.388479909626767e-05, + "logits/chosen": -0.4096953272819519, + "logits/rejected": -0.47978413105010986, + "logps/chosen": -0.0002411601017229259, + "logps/rejected": -1.8249914646148682, + "loss": 0.3276, + "nll_loss": 0.08188360184431076, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4116008717101067e-05, + "rewards/margins": 0.1824750453233719, + "rewards/rejected": -0.18249915540218353, + "step": 12118 + }, + { + "epoch": 8.381051175656985, + "grad_norm": 7.59868860244751, + "learning_rate": 8.994160135238974e-06, + "log_odds_chosen": 12.390218734741211, + "log_odds_ratio": -2.03878698812332e-05, + "logits/chosen": -0.06851515173912048, + "logits/rejected": -0.19629979133605957, + "logps/chosen": -0.00019201249233447015, + "logps/rejected": -3.3422369956970215, + "loss": 0.7047, + "nll_loss": 0.17616420984268188, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9201250324840657e-05, + "rewards/margins": 0.3342045247554779, + "rewards/rejected": -0.3342237174510956, + "step": 12119 + }, + { + "epoch": 8.381742738589212, + "grad_norm": 3.9883933067321777, + "learning_rate": 8.990318118948825e-06, + "log_odds_chosen": 10.478074073791504, + "log_odds_ratio": -0.00011548143811523914, + "logits/chosen": -0.2359369695186615, + "logits/rejected": -0.3406507968902588, + "logps/chosen": -0.0005425603594630957, + "logps/rejected": -2.217989683151245, + "loss": 0.2968, + "nll_loss": 0.0741945430636406, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.425603376352228e-05, + "rewards/margins": 0.22174471616744995, + "rewards/rejected": -0.22179898619651794, + "step": 12120 + }, + { + "epoch": 8.382434301521439, + "grad_norm": 4.174715995788574, + "learning_rate": 8.986476102658676e-06, + "log_odds_chosen": 10.57568359375, + "log_odds_ratio": -0.00012932793470099568, + "logits/chosen": -0.03034919500350952, + "logits/rejected": -0.0810474306344986, + "logps/chosen": -0.00015235492901410908, + "logps/rejected": -1.7275258302688599, + "loss": 0.3434, + "nll_loss": 0.08582790195941925, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5235491446219385e-05, + "rewards/margins": 0.17273734509944916, + "rewards/rejected": -0.17275258898735046, + "step": 12121 + }, + { + "epoch": 8.383125864453666, + "grad_norm": 3.964179754257202, + "learning_rate": 8.982634086368527e-06, + "log_odds_chosen": 11.0924072265625, + "log_odds_ratio": -6.431773363146931e-05, + "logits/chosen": -0.1575760841369629, + "logits/rejected": -0.22044521570205688, + "logps/chosen": -0.0001404537761118263, + "logps/rejected": -2.251370906829834, + "loss": 0.5174, + "nll_loss": 0.1293363720178604, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.404537761118263e-05, + "rewards/margins": 0.22512304782867432, + "rewards/rejected": -0.22513708472251892, + "step": 12122 + }, + { + "epoch": 8.383817427385893, + "grad_norm": 5.138609886169434, + "learning_rate": 8.978792070078377e-06, + "log_odds_chosen": 10.380916595458984, + "log_odds_ratio": -0.0002986867038998753, + "logits/chosen": -0.2631905972957611, + "logits/rejected": -0.38608020544052124, + "logps/chosen": -0.0035598543472588062, + "logps/rejected": -2.1883394718170166, + "loss": 0.4938, + "nll_loss": 0.12340797483921051, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003559854521881789, + "rewards/margins": 0.21847794950008392, + "rewards/rejected": -0.21883393824100494, + "step": 12123 + }, + { + "epoch": 8.38450899031812, + "grad_norm": 7.429013729095459, + "learning_rate": 8.974950053788228e-06, + "log_odds_chosen": 9.923589706420898, + "log_odds_ratio": -0.12461341172456741, + "logits/chosen": -0.33789655566215515, + "logits/rejected": -0.371481329202652, + "logps/chosen": -0.02460256591439247, + "logps/rejected": -2.5953493118286133, + "loss": 0.7909, + "nll_loss": 0.18525590002536774, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0024602566845715046, + "rewards/margins": 0.25707465410232544, + "rewards/rejected": -0.25953492522239685, + "step": 12124 + }, + { + "epoch": 8.385200553250346, + "grad_norm": 5.318549633026123, + "learning_rate": 8.97110803749808e-06, + "log_odds_chosen": 9.908432960510254, + "log_odds_ratio": -0.00018403760623186827, + "logits/chosen": -0.3189855217933655, + "logits/rejected": -0.25400811433792114, + "logps/chosen": -0.0002738266484811902, + "logps/rejected": -1.2519633769989014, + "loss": 0.3783, + "nll_loss": 0.09455760568380356, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7382666303310543e-05, + "rewards/margins": 0.12516896426677704, + "rewards/rejected": -0.12519635260105133, + "step": 12125 + }, + { + "epoch": 8.385892116182573, + "grad_norm": 4.526805400848389, + "learning_rate": 8.96726602120793e-06, + "log_odds_chosen": 11.152012825012207, + "log_odds_ratio": -4.143865226069465e-05, + "logits/chosen": 0.08569681644439697, + "logits/rejected": 0.12167654931545258, + "logps/chosen": -0.00021132684196345508, + "logps/rejected": -2.5041356086730957, + "loss": 0.3601, + "nll_loss": 0.0900086835026741, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.113268601533491e-05, + "rewards/margins": 0.25039243698120117, + "rewards/rejected": -0.25041356682777405, + "step": 12126 + }, + { + "epoch": 8.3865836791148, + "grad_norm": 3.170875072479248, + "learning_rate": 8.96342400491778e-06, + "log_odds_chosen": 11.103178977966309, + "log_odds_ratio": -0.00010288170597050339, + "logits/chosen": -0.45769423246383667, + "logits/rejected": -0.5105969905853271, + "logps/chosen": -0.0003910820232704282, + "logps/rejected": -2.262617349624634, + "loss": 0.2931, + "nll_loss": 0.07327709347009659, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.910820305463858e-05, + "rewards/margins": 0.22622261941432953, + "rewards/rejected": -0.22626172006130219, + "step": 12127 + }, + { + "epoch": 8.387275242047027, + "grad_norm": 3.580827236175537, + "learning_rate": 8.959581988627633e-06, + "log_odds_chosen": 11.565444946289062, + "log_odds_ratio": -1.3538083294406533e-05, + "logits/chosen": -0.4459022283554077, + "logits/rejected": -0.5162770748138428, + "logps/chosen": -0.00011571186769288033, + "logps/rejected": -2.497537851333618, + "loss": 0.3723, + "nll_loss": 0.09307833015918732, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1571187314984854e-05, + "rewards/margins": 0.24974222481250763, + "rewards/rejected": -0.24975380301475525, + "step": 12128 + }, + { + "epoch": 8.387966804979254, + "grad_norm": 3.24259614944458, + "learning_rate": 8.955739972337484e-06, + "log_odds_chosen": 11.701902389526367, + "log_odds_ratio": -3.0087014238233678e-05, + "logits/chosen": -0.4742031991481781, + "logits/rejected": -0.48803767561912537, + "logps/chosen": -0.00012482488818932325, + "logps/rejected": -2.4717555046081543, + "loss": 0.5772, + "nll_loss": 0.14430077373981476, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2482489182730205e-05, + "rewards/margins": 0.2471630871295929, + "rewards/rejected": -0.24717557430267334, + "step": 12129 + }, + { + "epoch": 8.38865836791148, + "grad_norm": 16.22124671936035, + "learning_rate": 8.951897956047334e-06, + "log_odds_chosen": 11.777971267700195, + "log_odds_ratio": -0.00025148893473669887, + "logits/chosen": -0.3254454731941223, + "logits/rejected": -0.3738795816898346, + "logps/chosen": -0.000138016912387684, + "logps/rejected": -2.7632956504821777, + "loss": 0.4057, + "nll_loss": 0.10140404105186462, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3801690329273697e-05, + "rewards/margins": 0.2763157784938812, + "rewards/rejected": -0.27632957696914673, + "step": 12130 + }, + { + "epoch": 8.389349930843707, + "grad_norm": 6.9681315422058105, + "learning_rate": 8.948055939757185e-06, + "log_odds_chosen": 9.457818984985352, + "log_odds_ratio": -0.003858291544020176, + "logits/chosen": 0.024935171008110046, + "logits/rejected": -0.09445832669734955, + "logps/chosen": -0.0016933679580688477, + "logps/rejected": -1.6940945386886597, + "loss": 0.6565, + "nll_loss": 0.1637456715106964, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016933679580688477, + "rewards/margins": 0.16924011707305908, + "rewards/rejected": -0.16940946877002716, + "step": 12131 + }, + { + "epoch": 8.390041493775934, + "grad_norm": 3.176541566848755, + "learning_rate": 8.944213923467036e-06, + "log_odds_chosen": 11.606234550476074, + "log_odds_ratio": -3.1693620258010924e-05, + "logits/chosen": -0.545473575592041, + "logits/rejected": -0.634528636932373, + "logps/chosen": -0.00034183548996225, + "logps/rejected": -2.784149408340454, + "loss": 0.3399, + "nll_loss": 0.08497949689626694, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4183547541033477e-05, + "rewards/margins": 0.27838078141212463, + "rewards/rejected": -0.2784149646759033, + "step": 12132 + }, + { + "epoch": 8.39073305670816, + "grad_norm": 3.890603542327881, + "learning_rate": 8.940371907176887e-06, + "log_odds_chosen": 10.679001808166504, + "log_odds_ratio": -4.887073737336323e-05, + "logits/chosen": -0.487888365983963, + "logits/rejected": -0.7082811594009399, + "logps/chosen": -0.00012446560140233487, + "logps/rejected": -1.8423631191253662, + "loss": 0.6429, + "nll_loss": 0.1607137769460678, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2446559594536666e-05, + "rewards/margins": 0.18422386050224304, + "rewards/rejected": -0.1842363178730011, + "step": 12133 + }, + { + "epoch": 8.391424619640388, + "grad_norm": 3.3028998374938965, + "learning_rate": 8.936529890886739e-06, + "log_odds_chosen": 11.168549537658691, + "log_odds_ratio": -0.00025892583653330803, + "logits/chosen": -0.12940393388271332, + "logits/rejected": -0.15886016190052032, + "logps/chosen": -0.0007486994145438075, + "logps/rejected": -2.3878705501556396, + "loss": 0.4677, + "nll_loss": 0.11690366268157959, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.48699385439977e-05, + "rewards/margins": 0.23871219158172607, + "rewards/rejected": -0.23878705501556396, + "step": 12134 + }, + { + "epoch": 8.392116182572614, + "grad_norm": 2.5992722511291504, + "learning_rate": 8.932687874596588e-06, + "log_odds_chosen": 10.669702529907227, + "log_odds_ratio": -4.008759788121097e-05, + "logits/chosen": -0.40739911794662476, + "logits/rejected": -0.3629014492034912, + "logps/chosen": -0.00013733016385231167, + "logps/rejected": -1.8446671962738037, + "loss": 0.2705, + "nll_loss": 0.06761143356561661, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3733016203332227e-05, + "rewards/margins": 0.18445296585559845, + "rewards/rejected": -0.18446670472621918, + "step": 12135 + }, + { + "epoch": 8.392807745504841, + "grad_norm": 4.764731407165527, + "learning_rate": 8.928845858306439e-06, + "log_odds_chosen": 11.26877498626709, + "log_odds_ratio": -0.00014127125905361027, + "logits/chosen": -0.03872055560350418, + "logits/rejected": 0.004581443965435028, + "logps/chosen": -0.00023471549502573907, + "logps/rejected": -2.5994601249694824, + "loss": 0.3531, + "nll_loss": 0.08826877921819687, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3471548047382385e-05, + "rewards/margins": 0.2599225342273712, + "rewards/rejected": -0.2599460184574127, + "step": 12136 + }, + { + "epoch": 8.393499308437068, + "grad_norm": 3.465942144393921, + "learning_rate": 8.925003842016291e-06, + "log_odds_chosen": 11.4708251953125, + "log_odds_ratio": -3.860402648570016e-05, + "logits/chosen": -0.20086407661437988, + "logits/rejected": -0.37650150060653687, + "logps/chosen": -0.0001465227105654776, + "logps/rejected": -2.322443962097168, + "loss": 0.2577, + "nll_loss": 0.0644330084323883, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4652272511739284e-05, + "rewards/margins": 0.23222973942756653, + "rewards/rejected": -0.23224438726902008, + "step": 12137 + }, + { + "epoch": 8.394190871369295, + "grad_norm": 17.046611785888672, + "learning_rate": 8.921161825726142e-06, + "log_odds_chosen": 11.655769348144531, + "log_odds_ratio": -2.2001067918608896e-05, + "logits/chosen": -0.15086019039154053, + "logits/rejected": -0.19133687019348145, + "logps/chosen": -0.00010712641233112663, + "logps/rejected": -2.574110269546509, + "loss": 0.528, + "nll_loss": 0.13198719918727875, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0712641596910544e-05, + "rewards/margins": 0.2574003338813782, + "rewards/rejected": -0.25741100311279297, + "step": 12138 + }, + { + "epoch": 8.394882434301522, + "grad_norm": 3.9673593044281006, + "learning_rate": 8.917319809435993e-06, + "log_odds_chosen": 10.512715339660645, + "log_odds_ratio": -6.370164919644594e-05, + "logits/chosen": 0.008853770792484283, + "logits/rejected": -0.06970393657684326, + "logps/chosen": -0.00030971781234256923, + "logps/rejected": -2.1695480346679688, + "loss": 0.5843, + "nll_loss": 0.14605872333049774, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.097178341704421e-05, + "rewards/margins": 0.21692386269569397, + "rewards/rejected": -0.2169548124074936, + "step": 12139 + }, + { + "epoch": 8.395573997233749, + "grad_norm": 4.223468780517578, + "learning_rate": 8.913477793145844e-06, + "log_odds_chosen": 12.047952651977539, + "log_odds_ratio": -1.451267598895356e-05, + "logits/chosen": -0.08651173114776611, + "logits/rejected": -0.259906530380249, + "logps/chosen": -6.24086387688294e-05, + "logps/rejected": -2.3703420162200928, + "loss": 0.3786, + "nll_loss": 0.09465420246124268, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.240864422579762e-06, + "rewards/margins": 0.23702794313430786, + "rewards/rejected": -0.23703420162200928, + "step": 12140 + }, + { + "epoch": 8.396265560165975, + "grad_norm": 4.868855953216553, + "learning_rate": 8.909635776855694e-06, + "log_odds_chosen": 12.190462112426758, + "log_odds_ratio": -1.316713314736262e-05, + "logits/chosen": -0.4914645552635193, + "logits/rejected": -0.46773529052734375, + "logps/chosen": -0.00014875730266794562, + "logps/rejected": -3.121089458465576, + "loss": 0.4575, + "nll_loss": 0.11437132209539413, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4875729902996682e-05, + "rewards/margins": 0.3120940923690796, + "rewards/rejected": -0.31210893392562866, + "step": 12141 + }, + { + "epoch": 8.396957123098202, + "grad_norm": 4.057255744934082, + "learning_rate": 8.905793760565545e-06, + "log_odds_chosen": 11.078142166137695, + "log_odds_ratio": -0.0001451470161555335, + "logits/chosen": -0.03570991009473801, + "logits/rejected": -0.21599245071411133, + "logps/chosen": -0.00032920308876782656, + "logps/rejected": -2.516672134399414, + "loss": 0.4433, + "nll_loss": 0.11080868542194366, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.292031033197418e-05, + "rewards/margins": 0.25163429975509644, + "rewards/rejected": -0.25166723132133484, + "step": 12142 + }, + { + "epoch": 8.39764868603043, + "grad_norm": 4.454241752624512, + "learning_rate": 8.901951744275396e-06, + "log_odds_chosen": 10.203397750854492, + "log_odds_ratio": -0.00020603620214387774, + "logits/chosen": -0.5759111046791077, + "logits/rejected": -0.5808447003364563, + "logps/chosen": -0.00016467072418890893, + "logps/rejected": -1.8405020236968994, + "loss": 0.5661, + "nll_loss": 0.14150398969650269, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6467072782688774e-05, + "rewards/margins": 0.18403376638889313, + "rewards/rejected": -0.18405021727085114, + "step": 12143 + }, + { + "epoch": 8.398340248962656, + "grad_norm": 3.6221299171447754, + "learning_rate": 8.898109727985248e-06, + "log_odds_chosen": 10.22618293762207, + "log_odds_ratio": -0.00020153902005404234, + "logits/chosen": -0.3695487380027771, + "logits/rejected": -0.38564279675483704, + "logps/chosen": -0.000372840411728248, + "logps/rejected": -1.8671324253082275, + "loss": 0.4263, + "nll_loss": 0.10655222088098526, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.728403680725023e-05, + "rewards/margins": 0.18667596578598022, + "rewards/rejected": -0.18671324849128723, + "step": 12144 + }, + { + "epoch": 8.399031811894883, + "grad_norm": 4.070497512817383, + "learning_rate": 8.894267711695097e-06, + "log_odds_chosen": 11.722437858581543, + "log_odds_ratio": -6.260615919018164e-05, + "logits/chosen": -0.11679200828075409, + "logits/rejected": -0.408610463142395, + "logps/chosen": -0.0001105478877434507, + "logps/rejected": -2.658935070037842, + "loss": 0.4122, + "nll_loss": 0.1030396968126297, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1054788046749309e-05, + "rewards/margins": 0.2658824324607849, + "rewards/rejected": -0.26589351892471313, + "step": 12145 + }, + { + "epoch": 8.39972337482711, + "grad_norm": 4.5460205078125, + "learning_rate": 8.890425695404948e-06, + "log_odds_chosen": 11.706779479980469, + "log_odds_ratio": -5.225447239354253e-05, + "logits/chosen": -0.19363567233085632, + "logits/rejected": -0.23461072146892548, + "logps/chosen": -0.00026260357117280364, + "logps/rejected": -2.646803855895996, + "loss": 0.5108, + "nll_loss": 0.1277005672454834, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6260357117280364e-05, + "rewards/margins": 0.26465412974357605, + "rewards/rejected": -0.2646803855895996, + "step": 12146 + }, + { + "epoch": 8.400414937759336, + "grad_norm": 5.4933929443359375, + "learning_rate": 8.8865836791148e-06, + "log_odds_chosen": 11.374542236328125, + "log_odds_ratio": -5.557585245696828e-05, + "logits/chosen": -0.22197853028774261, + "logits/rejected": -0.28390344977378845, + "logps/chosen": -0.00048701392370276153, + "logps/rejected": -3.2011351585388184, + "loss": 0.4014, + "nll_loss": 0.10034617781639099, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8701393097871915e-05, + "rewards/margins": 0.32006484270095825, + "rewards/rejected": -0.32011356949806213, + "step": 12147 + }, + { + "epoch": 8.401106500691563, + "grad_norm": 2.9592678546905518, + "learning_rate": 8.882741662824651e-06, + "log_odds_chosen": 10.759661674499512, + "log_odds_ratio": -4.308186180423945e-05, + "logits/chosen": -0.39755889773368835, + "logits/rejected": -0.39187881350517273, + "logps/chosen": -0.00021549042139668018, + "logps/rejected": -2.106257200241089, + "loss": 0.3276, + "nll_loss": 0.08189596235752106, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1549041775870137e-05, + "rewards/margins": 0.21060419082641602, + "rewards/rejected": -0.21062573790550232, + "step": 12148 + }, + { + "epoch": 8.40179806362379, + "grad_norm": 3.577422857284546, + "learning_rate": 8.878899646534502e-06, + "log_odds_chosen": 10.447932243347168, + "log_odds_ratio": -0.0003077391884289682, + "logits/chosen": -0.12919040024280548, + "logits/rejected": -0.12657864391803741, + "logps/chosen": -0.0006332011544145644, + "logps/rejected": -2.045531749725342, + "loss": 0.3966, + "nll_loss": 0.09911017119884491, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.332011253107339e-05, + "rewards/margins": 0.20448985695838928, + "rewards/rejected": -0.20455315709114075, + "step": 12149 + }, + { + "epoch": 8.402489626556017, + "grad_norm": 8.003704071044922, + "learning_rate": 8.875057630244353e-06, + "log_odds_chosen": 11.208322525024414, + "log_odds_ratio": -3.253010436310433e-05, + "logits/chosen": -0.5645467638969421, + "logits/rejected": -0.4849031865596771, + "logps/chosen": -0.0001778160803951323, + "logps/rejected": -2.103116035461426, + "loss": 0.4208, + "nll_loss": 0.10519391298294067, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.778160731191747e-05, + "rewards/margins": 0.21029382944107056, + "rewards/rejected": -0.21031160652637482, + "step": 12150 + }, + { + "epoch": 8.403181189488244, + "grad_norm": 5.889824867248535, + "learning_rate": 8.871215613954203e-06, + "log_odds_chosen": 10.591680526733398, + "log_odds_ratio": -0.00012240585056133568, + "logits/chosen": -0.0922984853386879, + "logits/rejected": -0.10059482604265213, + "logps/chosen": -0.00026184759917669, + "logps/rejected": -1.976510763168335, + "loss": 0.5141, + "nll_loss": 0.12850183248519897, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6184758098679595e-05, + "rewards/margins": 0.19762490689754486, + "rewards/rejected": -0.19765108823776245, + "step": 12151 + }, + { + "epoch": 8.40387275242047, + "grad_norm": 4.742496967315674, + "learning_rate": 8.867373597664054e-06, + "log_odds_chosen": 12.126361846923828, + "log_odds_ratio": -1.0117659257957712e-05, + "logits/chosen": -0.13543446362018585, + "logits/rejected": -0.10724420845508575, + "logps/chosen": -0.0001919452624861151, + "logps/rejected": -3.2606234550476074, + "loss": 0.4523, + "nll_loss": 0.11307486891746521, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.919452552101575e-05, + "rewards/margins": 0.32604315876960754, + "rewards/rejected": -0.3260623514652252, + "step": 12152 + }, + { + "epoch": 8.404564315352697, + "grad_norm": 3.5070595741271973, + "learning_rate": 8.863531581373907e-06, + "log_odds_chosen": 11.187559127807617, + "log_odds_ratio": -3.286149149062112e-05, + "logits/chosen": -0.2311168909072876, + "logits/rejected": -0.33597952127456665, + "logps/chosen": -0.0003408733173273504, + "logps/rejected": -2.568341016769409, + "loss": 0.4203, + "nll_loss": 0.10506104677915573, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4087330277543515e-05, + "rewards/margins": 0.2568000257015228, + "rewards/rejected": -0.25683411955833435, + "step": 12153 + }, + { + "epoch": 8.405255878284924, + "grad_norm": 3.7609317302703857, + "learning_rate": 8.859689565083756e-06, + "log_odds_chosen": 10.210521697998047, + "log_odds_ratio": -0.0002451605396345258, + "logits/chosen": -0.42259418964385986, + "logits/rejected": -0.5031062960624695, + "logps/chosen": -0.0001644161675358191, + "logps/rejected": -1.500652551651001, + "loss": 0.4255, + "nll_loss": 0.10636003315448761, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.644161602598615e-05, + "rewards/margins": 0.15004882216453552, + "rewards/rejected": -0.15006527304649353, + "step": 12154 + }, + { + "epoch": 8.405947441217151, + "grad_norm": 4.2350358963012695, + "learning_rate": 8.855847548793606e-06, + "log_odds_chosen": 9.83108139038086, + "log_odds_ratio": -0.00022007252846378833, + "logits/chosen": -0.23014146089553833, + "logits/rejected": -0.30746909976005554, + "logps/chosen": -0.00015861319843679667, + "logps/rejected": -1.3929765224456787, + "loss": 0.3984, + "nll_loss": 0.09957799315452576, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5861318388488144e-05, + "rewards/margins": 0.13928177952766418, + "rewards/rejected": -0.13929766416549683, + "step": 12155 + }, + { + "epoch": 8.406639004149378, + "grad_norm": 4.1333160400390625, + "learning_rate": 8.852005532503459e-06, + "log_odds_chosen": 10.633649826049805, + "log_odds_ratio": -0.0001436761813238263, + "logits/chosen": -0.21853643655776978, + "logits/rejected": -0.16347083449363708, + "logps/chosen": -0.00018755270866677165, + "logps/rejected": -2.1546311378479004, + "loss": 0.468, + "nll_loss": 0.1169951781630516, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8755272321868688e-05, + "rewards/margins": 0.21544435620307922, + "rewards/rejected": -0.21546313166618347, + "step": 12156 + }, + { + "epoch": 8.407330567081605, + "grad_norm": 3.858767509460449, + "learning_rate": 8.84816351621331e-06, + "log_odds_chosen": 12.503584861755371, + "log_odds_ratio": -0.0003335903456900269, + "logits/chosen": -0.7343727946281433, + "logits/rejected": -0.6697561740875244, + "logps/chosen": -0.0002777479530777782, + "logps/rejected": -2.963214874267578, + "loss": 0.3465, + "nll_loss": 0.08658871054649353, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7774796762969345e-05, + "rewards/margins": 0.296293705701828, + "rewards/rejected": -0.29632145166397095, + "step": 12157 + }, + { + "epoch": 8.408022130013832, + "grad_norm": 4.064150333404541, + "learning_rate": 8.84432149992316e-06, + "log_odds_chosen": 11.826985359191895, + "log_odds_ratio": -4.684683881350793e-05, + "logits/chosen": -0.5308201909065247, + "logits/rejected": -0.6423518061637878, + "logps/chosen": -0.00013899643090553582, + "logps/rejected": -2.4273645877838135, + "loss": 0.368, + "nll_loss": 0.09199005365371704, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3899643818149343e-05, + "rewards/margins": 0.2427225559949875, + "rewards/rejected": -0.24273645877838135, + "step": 12158 + }, + { + "epoch": 8.408713692946058, + "grad_norm": 3.600189447402954, + "learning_rate": 8.840479483633011e-06, + "log_odds_chosen": 11.107683181762695, + "log_odds_ratio": -9.071611566469073e-05, + "logits/chosen": -0.22912006080150604, + "logits/rejected": -0.08014730364084244, + "logps/chosen": -0.0002447239530738443, + "logps/rejected": -2.6281778812408447, + "loss": 0.3179, + "nll_loss": 0.07946532219648361, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4472396034980193e-05, + "rewards/margins": 0.26279330253601074, + "rewards/rejected": -0.2628178000450134, + "step": 12159 + }, + { + "epoch": 8.409405255878285, + "grad_norm": 4.881404876708984, + "learning_rate": 8.836637467342862e-06, + "log_odds_chosen": 11.663158416748047, + "log_odds_ratio": -2.5977791665354744e-05, + "logits/chosen": -0.43095946311950684, + "logits/rejected": -0.5290078520774841, + "logps/chosen": -6.659415521426126e-05, + "logps/rejected": -2.0811989307403564, + "loss": 0.592, + "nll_loss": 0.14798519015312195, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.659415248577716e-06, + "rewards/margins": 0.20811323821544647, + "rewards/rejected": -0.20811989903450012, + "step": 12160 + }, + { + "epoch": 8.410096818810512, + "grad_norm": 5.0877861976623535, + "learning_rate": 8.832795451052713e-06, + "log_odds_chosen": 10.184471130371094, + "log_odds_ratio": -0.0008171540684998035, + "logits/chosen": 0.12613186240196228, + "logits/rejected": 0.050111062824726105, + "logps/chosen": -0.000566254137083888, + "logps/rejected": -2.0396904945373535, + "loss": 0.4631, + "nll_loss": 0.11569279432296753, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.662541661877185e-05, + "rewards/margins": 0.20391243696212769, + "rewards/rejected": -0.2039690613746643, + "step": 12161 + }, + { + "epoch": 8.410788381742739, + "grad_norm": 4.5020036697387695, + "learning_rate": 8.828953434762565e-06, + "log_odds_chosen": 11.530824661254883, + "log_odds_ratio": -5.652059189742431e-05, + "logits/chosen": -0.2417091429233551, + "logits/rejected": -0.24491730332374573, + "logps/chosen": -0.0006334069184958935, + "logps/rejected": -3.11706805229187, + "loss": 0.5545, + "nll_loss": 0.13860823214054108, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.334069621516392e-05, + "rewards/margins": 0.31164348125457764, + "rewards/rejected": -0.3117068409919739, + "step": 12162 + }, + { + "epoch": 8.411479944674966, + "grad_norm": 4.6040167808532715, + "learning_rate": 8.825111418472414e-06, + "log_odds_chosen": 11.503437995910645, + "log_odds_ratio": -1.1247922884649597e-05, + "logits/chosen": -0.41219836473464966, + "logits/rejected": -0.45347335934638977, + "logps/chosen": -0.00012556483852677047, + "logps/rejected": -2.360114574432373, + "loss": 0.4967, + "nll_loss": 0.12417368590831757, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.255648567166645e-05, + "rewards/margins": 0.23599891364574432, + "rewards/rejected": -0.23601147532463074, + "step": 12163 + }, + { + "epoch": 8.412171507607193, + "grad_norm": 4.140271186828613, + "learning_rate": 8.821269402182265e-06, + "log_odds_chosen": 11.344810485839844, + "log_odds_ratio": -0.00012324423005338758, + "logits/chosen": -0.49369677901268005, + "logits/rejected": -0.614727795124054, + "logps/chosen": -0.0002669915556907654, + "logps/rejected": -2.55715012550354, + "loss": 0.32, + "nll_loss": 0.07999327778816223, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6699155569076538e-05, + "rewards/margins": 0.255688339471817, + "rewards/rejected": -0.255715012550354, + "step": 12164 + }, + { + "epoch": 8.41286307053942, + "grad_norm": 4.0589776039123535, + "learning_rate": 8.817427385892117e-06, + "log_odds_chosen": 11.506391525268555, + "log_odds_ratio": -3.021215343324002e-05, + "logits/chosen": -0.32307207584381104, + "logits/rejected": -0.4154369831085205, + "logps/chosen": -7.630437175976112e-05, + "logps/rejected": -2.0438153743743896, + "loss": 0.4018, + "nll_loss": 0.10045389086008072, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.630436812178232e-06, + "rewards/margins": 0.20437388122081757, + "rewards/rejected": -0.20438152551651, + "step": 12165 + }, + { + "epoch": 8.413554633471646, + "grad_norm": 3.8586294651031494, + "learning_rate": 8.813585369601968e-06, + "log_odds_chosen": 10.577253341674805, + "log_odds_ratio": -5.563483136938885e-05, + "logits/chosen": 0.056054629385471344, + "logits/rejected": -0.09274060279130936, + "logps/chosen": -0.0007029641419649124, + "logps/rejected": -2.80169415473938, + "loss": 0.5042, + "nll_loss": 0.12604326009750366, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.029641710687429e-05, + "rewards/margins": 0.28009912371635437, + "rewards/rejected": -0.28016942739486694, + "step": 12166 + }, + { + "epoch": 8.414246196403873, + "grad_norm": 2.5449228286743164, + "learning_rate": 8.809743353311819e-06, + "log_odds_chosen": 10.405815124511719, + "log_odds_ratio": -0.00010505613317945972, + "logits/chosen": -0.5005052089691162, + "logits/rejected": -0.5660674571990967, + "logps/chosen": -0.001792833092622459, + "logps/rejected": -1.830446720123291, + "loss": 0.2082, + "nll_loss": 0.05203032121062279, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017928332090377808, + "rewards/margins": 0.18286538124084473, + "rewards/rejected": -0.1830446720123291, + "step": 12167 + }, + { + "epoch": 8.4149377593361, + "grad_norm": 4.84822416305542, + "learning_rate": 8.80590133702167e-06, + "log_odds_chosen": 11.884775161743164, + "log_odds_ratio": -7.349726365646347e-05, + "logits/chosen": -0.3754521608352661, + "logits/rejected": -0.4655861556529999, + "logps/chosen": -0.000315560755552724, + "logps/rejected": -3.4204797744750977, + "loss": 0.5375, + "nll_loss": 0.13436771929264069, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.155607919325121e-05, + "rewards/margins": 0.34201645851135254, + "rewards/rejected": -0.3420480191707611, + "step": 12168 + }, + { + "epoch": 8.415629322268327, + "grad_norm": 4.6386847496032715, + "learning_rate": 8.80205932073152e-06, + "log_odds_chosen": 10.568868637084961, + "log_odds_ratio": -4.5589913497678936e-05, + "logits/chosen": 0.06592310965061188, + "logits/rejected": 0.012271258980035782, + "logps/chosen": -0.0003107009397353977, + "logps/rejected": -2.078974485397339, + "loss": 0.4368, + "nll_loss": 0.10918490588665009, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.107009251834825e-05, + "rewards/margins": 0.207866370677948, + "rewards/rejected": -0.20789743959903717, + "step": 12169 + }, + { + "epoch": 8.416320885200554, + "grad_norm": 3.8307695388793945, + "learning_rate": 8.798217304441371e-06, + "log_odds_chosen": 11.827062606811523, + "log_odds_ratio": -0.00010294328239979222, + "logits/chosen": -0.36065948009490967, + "logits/rejected": -0.27619484066963196, + "logps/chosen": -0.0002090797497658059, + "logps/rejected": -2.57730770111084, + "loss": 0.4453, + "nll_loss": 0.11131422221660614, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.090797534037847e-05, + "rewards/margins": 0.2577098608016968, + "rewards/rejected": -0.25773078203201294, + "step": 12170 + }, + { + "epoch": 8.41701244813278, + "grad_norm": 2.962205648422241, + "learning_rate": 8.794375288151224e-06, + "log_odds_chosen": 11.268369674682617, + "log_odds_ratio": -1.8425016605760902e-05, + "logits/chosen": 0.2704715430736542, + "logits/rejected": 0.19724391400814056, + "logps/chosen": -0.00040048419032245874, + "logps/rejected": -2.1504323482513428, + "loss": 0.4012, + "nll_loss": 0.10029654204845428, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.00484204874374e-05, + "rewards/margins": 0.21500319242477417, + "rewards/rejected": -0.21504324674606323, + "step": 12171 + }, + { + "epoch": 8.417704011065007, + "grad_norm": 3.4812543392181396, + "learning_rate": 8.790533271861073e-06, + "log_odds_chosen": 11.892035484313965, + "log_odds_ratio": -1.1453821571194567e-05, + "logits/chosen": -0.08979454636573792, + "logits/rejected": -0.12990817427635193, + "logps/chosen": -7.508957060053945e-05, + "logps/rejected": -2.176224708557129, + "loss": 0.484, + "nll_loss": 0.12100017070770264, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.508956969104474e-06, + "rewards/margins": 0.21761493384838104, + "rewards/rejected": -0.21762245893478394, + "step": 12172 + }, + { + "epoch": 8.418395573997234, + "grad_norm": 3.4574899673461914, + "learning_rate": 8.786691255570923e-06, + "log_odds_chosen": 10.994365692138672, + "log_odds_ratio": -4.289807839086279e-05, + "logits/chosen": -0.12744206190109253, + "logits/rejected": -0.32379990816116333, + "logps/chosen": -0.0004646638408303261, + "logps/rejected": -2.8650026321411133, + "loss": 0.3915, + "nll_loss": 0.09787625074386597, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6466382627841085e-05, + "rewards/margins": 0.2864537835121155, + "rewards/rejected": -0.2865002751350403, + "step": 12173 + }, + { + "epoch": 8.41908713692946, + "grad_norm": 3.1096792221069336, + "learning_rate": 8.782849239280774e-06, + "log_odds_chosen": 11.081042289733887, + "log_odds_ratio": -3.763330460060388e-05, + "logits/chosen": -0.5980318784713745, + "logits/rejected": -0.5375435948371887, + "logps/chosen": -0.00017640799342188984, + "logps/rejected": -1.9923516511917114, + "loss": 0.3995, + "nll_loss": 0.09987182170152664, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7640801161178388e-05, + "rewards/margins": 0.1992175281047821, + "rewards/rejected": -0.19923515617847443, + "step": 12174 + }, + { + "epoch": 8.419778699861688, + "grad_norm": 3.380220890045166, + "learning_rate": 8.779007222990627e-06, + "log_odds_chosen": 11.464609146118164, + "log_odds_ratio": -2.2556834665010683e-05, + "logits/chosen": -0.016593724489212036, + "logits/rejected": -0.14954808354377747, + "logps/chosen": -0.00011275989527348429, + "logps/rejected": -2.324753999710083, + "loss": 0.3875, + "nll_loss": 0.09687241911888123, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1275988072156906e-05, + "rewards/margins": 0.23246413469314575, + "rewards/rejected": -0.2324753999710083, + "step": 12175 + }, + { + "epoch": 8.420470262793915, + "grad_norm": 5.193109512329102, + "learning_rate": 8.775165206700477e-06, + "log_odds_chosen": 9.941858291625977, + "log_odds_ratio": -0.00018944896874018013, + "logits/chosen": 0.13896964490413666, + "logits/rejected": -0.0418696403503418, + "logps/chosen": -0.0012561215553432703, + "logps/rejected": -2.5737171173095703, + "loss": 0.6994, + "nll_loss": 0.17483317852020264, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012561216135509312, + "rewards/margins": 0.25724610686302185, + "rewards/rejected": -0.257371723651886, + "step": 12176 + }, + { + "epoch": 8.421161825726141, + "grad_norm": 7.516174793243408, + "learning_rate": 8.771323190410326e-06, + "log_odds_chosen": 12.087610244750977, + "log_odds_ratio": -1.127936047851108e-05, + "logits/chosen": 0.2538374364376068, + "logits/rejected": 0.08998441696166992, + "logps/chosen": -0.00012017204426229, + "logps/rejected": -2.966963529586792, + "loss": 0.6934, + "nll_loss": 0.1733420491218567, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2017204426229e-05, + "rewards/margins": 0.2966843247413635, + "rewards/rejected": -0.29669636487960815, + "step": 12177 + }, + { + "epoch": 8.421853388658368, + "grad_norm": 4.537851810455322, + "learning_rate": 8.767481174120179e-06, + "log_odds_chosen": 12.000404357910156, + "log_odds_ratio": -2.539575143600814e-05, + "logits/chosen": -0.07242706418037415, + "logits/rejected": -0.17881931364536285, + "logps/chosen": -0.0002449548337608576, + "logps/rejected": -3.37038254737854, + "loss": 0.4343, + "nll_loss": 0.10857339203357697, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.44954844674794e-05, + "rewards/margins": 0.33701375126838684, + "rewards/rejected": -0.3370382785797119, + "step": 12178 + }, + { + "epoch": 8.422544951590595, + "grad_norm": 5.332724571228027, + "learning_rate": 8.76363915783003e-06, + "log_odds_chosen": 11.437095642089844, + "log_odds_ratio": -3.303804624010809e-05, + "logits/chosen": 0.06925775110721588, + "logits/rejected": -0.05285045504570007, + "logps/chosen": -0.0002176310954382643, + "logps/rejected": -2.8794593811035156, + "loss": 0.4062, + "nll_loss": 0.10154794156551361, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.176310954382643e-05, + "rewards/margins": 0.2879241704940796, + "rewards/rejected": -0.2879459261894226, + "step": 12179 + }, + { + "epoch": 8.423236514522822, + "grad_norm": 3.739887237548828, + "learning_rate": 8.75979714153988e-06, + "log_odds_chosen": 10.907541275024414, + "log_odds_ratio": -0.00019406666979193687, + "logits/chosen": 0.007940517738461494, + "logits/rejected": -0.0989658460021019, + "logps/chosen": -0.0011361661599949002, + "logps/rejected": -2.5060086250305176, + "loss": 0.4233, + "nll_loss": 0.1058058887720108, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011361661017872393, + "rewards/margins": 0.2504872679710388, + "rewards/rejected": -0.2506008744239807, + "step": 12180 + }, + { + "epoch": 8.423928077455049, + "grad_norm": 4.398841857910156, + "learning_rate": 8.755955125249731e-06, + "log_odds_chosen": 11.400894165039062, + "log_odds_ratio": -0.00013864760694559664, + "logits/chosen": -0.18514317274093628, + "logits/rejected": -0.3388518691062927, + "logps/chosen": -0.0001667686883592978, + "logps/rejected": -2.3160228729248047, + "loss": 0.4045, + "nll_loss": 0.10111506283283234, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6676869563525543e-05, + "rewards/margins": 0.23158562183380127, + "rewards/rejected": -0.23160231113433838, + "step": 12181 + }, + { + "epoch": 8.424619640387276, + "grad_norm": 3.549445629119873, + "learning_rate": 8.752113108959582e-06, + "log_odds_chosen": 11.261871337890625, + "log_odds_ratio": -8.981912833405659e-05, + "logits/chosen": 0.05491316318511963, + "logits/rejected": 0.14074671268463135, + "logps/chosen": -0.0008438285440206528, + "logps/rejected": -2.6764464378356934, + "loss": 0.3137, + "nll_loss": 0.07841696590185165, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.438285294687375e-05, + "rewards/margins": 0.26756027340888977, + "rewards/rejected": -0.26764464378356934, + "step": 12182 + }, + { + "epoch": 8.425311203319502, + "grad_norm": 4.180157661437988, + "learning_rate": 8.748271092669432e-06, + "log_odds_chosen": 11.801605224609375, + "log_odds_ratio": -1.6300808056257665e-05, + "logits/chosen": -0.31147193908691406, + "logits/rejected": -0.3165523111820221, + "logps/chosen": -0.0001688749180175364, + "logps/rejected": -2.610840320587158, + "loss": 0.3341, + "nll_loss": 0.08351986855268478, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6887490346562117e-05, + "rewards/margins": 0.26106715202331543, + "rewards/rejected": -0.26108402013778687, + "step": 12183 + }, + { + "epoch": 8.42600276625173, + "grad_norm": 3.374314069747925, + "learning_rate": 8.744429076379285e-06, + "log_odds_chosen": 11.43653392791748, + "log_odds_ratio": -1.3258251783554442e-05, + "logits/chosen": 0.20424631237983704, + "logits/rejected": 0.13409529626369476, + "logps/chosen": -6.864860915811732e-05, + "logps/rejected": -1.869539499282837, + "loss": 0.3789, + "nll_loss": 0.09472458064556122, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.864860552013852e-06, + "rewards/margins": 0.1869470775127411, + "rewards/rejected": -0.18695393204689026, + "step": 12184 + }, + { + "epoch": 8.426694329183956, + "grad_norm": 5.52797269821167, + "learning_rate": 8.740587060089136e-06, + "log_odds_chosen": 12.257134437561035, + "log_odds_ratio": -4.6418386773439124e-05, + "logits/chosen": 0.3385429084300995, + "logits/rejected": 0.2501536011695862, + "logps/chosen": -0.0001583172706887126, + "logps/rejected": -3.3235623836517334, + "loss": 0.6013, + "nll_loss": 0.15032291412353516, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5831728887860663e-05, + "rewards/margins": 0.33234041929244995, + "rewards/rejected": -0.3323562443256378, + "step": 12185 + }, + { + "epoch": 8.427385892116183, + "grad_norm": 3.664227247238159, + "learning_rate": 8.736745043798986e-06, + "log_odds_chosen": 10.88051700592041, + "log_odds_ratio": -4.372180410427973e-05, + "logits/chosen": -0.19316840171813965, + "logits/rejected": -0.2230408787727356, + "logps/chosen": -0.00013133355241734535, + "logps/rejected": -1.4515959024429321, + "loss": 0.3532, + "nll_loss": 0.08829930424690247, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3133354514138773e-05, + "rewards/margins": 0.1451464593410492, + "rewards/rejected": -0.14515958726406097, + "step": 12186 + }, + { + "epoch": 8.42807745504841, + "grad_norm": 5.888396263122559, + "learning_rate": 8.732903027508837e-06, + "log_odds_chosen": 11.336963653564453, + "log_odds_ratio": -2.995860813825857e-05, + "logits/chosen": -0.3141791522502899, + "logits/rejected": -0.24508805572986603, + "logps/chosen": -0.0001898179471027106, + "logps/rejected": -2.411524772644043, + "loss": 0.5331, + "nll_loss": 0.13326820731163025, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.898179471027106e-05, + "rewards/margins": 0.24113348126411438, + "rewards/rejected": -0.24115246534347534, + "step": 12187 + }, + { + "epoch": 8.428769017980636, + "grad_norm": 4.525623321533203, + "learning_rate": 8.729061011218688e-06, + "log_odds_chosen": 11.285069465637207, + "log_odds_ratio": -2.8658303563133813e-05, + "logits/chosen": -0.325903058052063, + "logits/rejected": -0.3379442095756531, + "logps/chosen": -0.0001579608942847699, + "logps/rejected": -2.2226176261901855, + "loss": 0.5298, + "nll_loss": 0.13244321942329407, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.579608942847699e-05, + "rewards/margins": 0.2222459614276886, + "rewards/rejected": -0.22226177155971527, + "step": 12188 + }, + { + "epoch": 8.429460580912863, + "grad_norm": 2.9861581325531006, + "learning_rate": 8.725218994928539e-06, + "log_odds_chosen": 11.143623352050781, + "log_odds_ratio": -0.00014747746172361076, + "logits/chosen": -0.3942258059978485, + "logits/rejected": -0.37120938301086426, + "logps/chosen": -0.00024196562299039215, + "logps/rejected": -2.6612982749938965, + "loss": 0.349, + "nll_loss": 0.08722300827503204, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4196562662837096e-05, + "rewards/margins": 0.26610565185546875, + "rewards/rejected": -0.26612985134124756, + "step": 12189 + }, + { + "epoch": 8.43015214384509, + "grad_norm": 4.748229503631592, + "learning_rate": 8.721376978638391e-06, + "log_odds_chosen": 10.968669891357422, + "log_odds_ratio": -7.720799476373941e-05, + "logits/chosen": 0.15589505434036255, + "logits/rejected": 0.08062975853681564, + "logps/chosen": -0.00024540620506741107, + "logps/rejected": -2.3836829662323, + "loss": 0.5417, + "nll_loss": 0.13542327284812927, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4540620870538987e-05, + "rewards/margins": 0.238343745470047, + "rewards/rejected": -0.23836830258369446, + "step": 12190 + }, + { + "epoch": 8.430843706777317, + "grad_norm": 4.7469377517700195, + "learning_rate": 8.71753496234824e-06, + "log_odds_chosen": 10.221165657043457, + "log_odds_ratio": -0.00020709558157250285, + "logits/chosen": -0.17670588195323944, + "logits/rejected": -0.38517507910728455, + "logps/chosen": -0.001517834491096437, + "logps/rejected": -2.492877244949341, + "loss": 0.3678, + "nll_loss": 0.09192745387554169, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015178346075117588, + "rewards/margins": 0.24913595616817474, + "rewards/rejected": -0.24928775429725647, + "step": 12191 + }, + { + "epoch": 8.431535269709544, + "grad_norm": 11.059328079223633, + "learning_rate": 8.713692946058091e-06, + "log_odds_chosen": 11.756535530090332, + "log_odds_ratio": -7.888920663390309e-05, + "logits/chosen": 0.21010424196720123, + "logits/rejected": 0.19032058119773865, + "logps/chosen": -0.0002800583897624165, + "logps/rejected": -3.1366214752197266, + "loss": 0.6249, + "nll_loss": 0.15621690452098846, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8005841159028932e-05, + "rewards/margins": 0.31363415718078613, + "rewards/rejected": -0.31366217136383057, + "step": 12192 + }, + { + "epoch": 8.43222683264177, + "grad_norm": 3.9881293773651123, + "learning_rate": 8.709850929767943e-06, + "log_odds_chosen": 10.353635787963867, + "log_odds_ratio": -0.0001635959924897179, + "logits/chosen": -0.20906057953834534, + "logits/rejected": -0.1871834546327591, + "logps/chosen": -0.00031968281837180257, + "logps/rejected": -1.9287995100021362, + "loss": 0.44, + "nll_loss": 0.10998797416687012, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.196828401996754e-05, + "rewards/margins": 0.19284798204898834, + "rewards/rejected": -0.19287994503974915, + "step": 12193 + }, + { + "epoch": 8.432918395573997, + "grad_norm": 8.100662231445312, + "learning_rate": 8.706008913477794e-06, + "log_odds_chosen": 11.065887451171875, + "log_odds_ratio": -6.525550998048857e-05, + "logits/chosen": 0.10948903858661652, + "logits/rejected": 0.01994427479803562, + "logps/chosen": -0.0005141894216649234, + "logps/rejected": -2.825793504714966, + "loss": 0.4932, + "nll_loss": 0.12329844385385513, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1418937800917774e-05, + "rewards/margins": 0.2825279235839844, + "rewards/rejected": -0.28257936239242554, + "step": 12194 + }, + { + "epoch": 8.433609958506224, + "grad_norm": 4.43437385559082, + "learning_rate": 8.702166897187645e-06, + "log_odds_chosen": 12.670177459716797, + "log_odds_ratio": -9.009807399706915e-06, + "logits/chosen": -0.007829040288925171, + "logits/rejected": -0.13075649738311768, + "logps/chosen": -0.00011624552280409262, + "logps/rejected": -3.3971920013427734, + "loss": 0.6189, + "nll_loss": 0.15472808480262756, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1624552826106083e-05, + "rewards/margins": 0.339707612991333, + "rewards/rejected": -0.3397192358970642, + "step": 12195 + }, + { + "epoch": 8.434301521438451, + "grad_norm": 4.17750358581543, + "learning_rate": 8.698324880897496e-06, + "log_odds_chosen": 10.271980285644531, + "log_odds_ratio": -0.00017558068793732673, + "logits/chosen": -0.05984572321176529, + "logits/rejected": -0.07835595309734344, + "logps/chosen": -0.00019501293718349189, + "logps/rejected": -1.7530505657196045, + "loss": 0.4146, + "nll_loss": 0.10362517088651657, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.950129444594495e-05, + "rewards/margins": 0.17528554797172546, + "rewards/rejected": -0.1753050684928894, + "step": 12196 + }, + { + "epoch": 8.434993084370678, + "grad_norm": 2.6712281703948975, + "learning_rate": 8.694482864607346e-06, + "log_odds_chosen": 11.466583251953125, + "log_odds_ratio": -4.071402145200409e-05, + "logits/chosen": -0.17353668808937073, + "logits/rejected": -0.24364444613456726, + "logps/chosen": -0.00044437983888201416, + "logps/rejected": -2.8944315910339355, + "loss": 0.2417, + "nll_loss": 0.06041009724140167, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4437983888201416e-05, + "rewards/margins": 0.289398729801178, + "rewards/rejected": -0.28944316506385803, + "step": 12197 + }, + { + "epoch": 8.435684647302905, + "grad_norm": 5.480512619018555, + "learning_rate": 8.690640848317197e-06, + "log_odds_chosen": 11.018938064575195, + "log_odds_ratio": -7.402783376164734e-05, + "logits/chosen": -0.02111491560935974, + "logits/rejected": -0.09096451848745346, + "logps/chosen": -0.0005470228497870266, + "logps/rejected": -2.91145658493042, + "loss": 0.4897, + "nll_loss": 0.12242485582828522, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4702290071872994e-05, + "rewards/margins": 0.2910909950733185, + "rewards/rejected": -0.2911456823348999, + "step": 12198 + }, + { + "epoch": 8.436376210235132, + "grad_norm": 3.627214193344116, + "learning_rate": 8.68679883202705e-06, + "log_odds_chosen": 9.854841232299805, + "log_odds_ratio": -0.0004699954588431865, + "logits/chosen": -0.14234337210655212, + "logits/rejected": -0.1919478327035904, + "logps/chosen": -0.0007636388181708753, + "logps/rejected": -1.9873713254928589, + "loss": 0.354, + "nll_loss": 0.08845454454421997, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.636388181708753e-05, + "rewards/margins": 0.19866077601909637, + "rewards/rejected": -0.19873715937137604, + "step": 12199 + }, + { + "epoch": 8.437067773167358, + "grad_norm": 3.840066432952881, + "learning_rate": 8.682956815736899e-06, + "log_odds_chosen": 10.535361289978027, + "log_odds_ratio": -9.704182593850419e-05, + "logits/chosen": -0.348332941532135, + "logits/rejected": -0.3617256283760071, + "logps/chosen": -0.0003035268746316433, + "logps/rejected": -1.9288250207901, + "loss": 0.4512, + "nll_loss": 0.1127912625670433, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.035268855455797e-05, + "rewards/margins": 0.19285213947296143, + "rewards/rejected": -0.1928825080394745, + "step": 12200 + }, + { + "epoch": 8.437759336099585, + "grad_norm": 5.707167148590088, + "learning_rate": 8.67911479944675e-06, + "log_odds_chosen": 10.545770645141602, + "log_odds_ratio": -6.985733489273116e-05, + "logits/chosen": 0.03148500621318817, + "logits/rejected": 0.056927334517240524, + "logps/chosen": -0.0003280769451521337, + "logps/rejected": -2.1979610919952393, + "loss": 0.3663, + "nll_loss": 0.0915786474943161, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.280769306002185e-05, + "rewards/margins": 0.21976329386234283, + "rewards/rejected": -0.2197960913181305, + "step": 12201 + }, + { + "epoch": 8.438450899031812, + "grad_norm": 4.823429584503174, + "learning_rate": 8.675272783156602e-06, + "log_odds_chosen": 11.148094177246094, + "log_odds_ratio": -0.00015547266229987144, + "logits/chosen": 0.022853679955005646, + "logits/rejected": -0.06651067733764648, + "logps/chosen": -0.0002881488180719316, + "logps/rejected": -2.530770778656006, + "loss": 0.3408, + "nll_loss": 0.08519028127193451, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.881488217099104e-05, + "rewards/margins": 0.25304824113845825, + "rewards/rejected": -0.25307705998420715, + "step": 12202 + }, + { + "epoch": 8.439142461964039, + "grad_norm": 5.328279495239258, + "learning_rate": 8.671430766866453e-06, + "log_odds_chosen": 10.745915412902832, + "log_odds_ratio": -6.916501297382638e-05, + "logits/chosen": 0.02286948636174202, + "logits/rejected": -0.07234127074480057, + "logps/chosen": -0.00023049935407470912, + "logps/rejected": -2.2224698066711426, + "loss": 0.3664, + "nll_loss": 0.0915989875793457, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3049935407470912e-05, + "rewards/margins": 0.2222239375114441, + "rewards/rejected": -0.22224700450897217, + "step": 12203 + }, + { + "epoch": 8.439834024896266, + "grad_norm": 4.388050556182861, + "learning_rate": 8.667588750576303e-06, + "log_odds_chosen": 11.873761177062988, + "log_odds_ratio": -9.016584954224527e-06, + "logits/chosen": -0.25353333353996277, + "logits/rejected": -0.5208718776702881, + "logps/chosen": -0.0001195125951198861, + "logps/rejected": -2.624521017074585, + "loss": 0.5946, + "nll_loss": 0.14865511655807495, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1951260603382252e-05, + "rewards/margins": 0.26244014501571655, + "rewards/rejected": -0.26245206594467163, + "step": 12204 + }, + { + "epoch": 8.440525587828493, + "grad_norm": 3.7123875617980957, + "learning_rate": 8.663746734286154e-06, + "log_odds_chosen": 10.771041870117188, + "log_odds_ratio": -6.33986564935185e-05, + "logits/chosen": -0.1201104074716568, + "logits/rejected": -0.16946665942668915, + "logps/chosen": -0.000875930767506361, + "logps/rejected": -2.485856771469116, + "loss": 0.4159, + "nll_loss": 0.10397932678461075, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.759308548178524e-05, + "rewards/margins": 0.2484980821609497, + "rewards/rejected": -0.24858567118644714, + "step": 12205 + }, + { + "epoch": 8.44121715076072, + "grad_norm": 2.749152660369873, + "learning_rate": 8.659904717996005e-06, + "log_odds_chosen": 11.550658226013184, + "log_odds_ratio": -3.068416481255554e-05, + "logits/chosen": -0.594039261341095, + "logits/rejected": -0.49890458583831787, + "logps/chosen": -0.0001349577505607158, + "logps/rejected": -2.299560308456421, + "loss": 0.4535, + "nll_loss": 0.11336538195610046, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.349577541986946e-05, + "rewards/margins": 0.22994254529476166, + "rewards/rejected": -0.2299560308456421, + "step": 12206 + }, + { + "epoch": 8.441908713692946, + "grad_norm": 7.287164211273193, + "learning_rate": 8.656062701705856e-06, + "log_odds_chosen": 11.473183631896973, + "log_odds_ratio": -1.6418533050455153e-05, + "logits/chosen": -0.10909983515739441, + "logits/rejected": -0.20523229241371155, + "logps/chosen": -0.0001011871499940753, + "logps/rejected": -2.2168242931365967, + "loss": 0.6241, + "nll_loss": 0.15602414309978485, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.011871518130647e-05, + "rewards/margins": 0.22167231142520905, + "rewards/rejected": -0.22168241441249847, + "step": 12207 + }, + { + "epoch": 8.442600276625173, + "grad_norm": 3.5397424697875977, + "learning_rate": 8.652220685415706e-06, + "log_odds_chosen": 9.55927848815918, + "log_odds_ratio": -0.0002645787608344108, + "logits/chosen": -0.10018275678157806, + "logits/rejected": -0.09783291816711426, + "logps/chosen": -0.00026651995722204447, + "logps/rejected": -1.4151195287704468, + "loss": 0.2867, + "nll_loss": 0.07166013866662979, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6651994630810805e-05, + "rewards/margins": 0.1414853036403656, + "rewards/rejected": -0.1415119469165802, + "step": 12208 + }, + { + "epoch": 8.4432918395574, + "grad_norm": 3.645803451538086, + "learning_rate": 8.648378669125557e-06, + "log_odds_chosen": 10.460966110229492, + "log_odds_ratio": -0.0001973931648535654, + "logits/chosen": -0.31744301319122314, + "logits/rejected": -0.2794133424758911, + "logps/chosen": -0.00026282010367140174, + "logps/rejected": -1.6829901933670044, + "loss": 0.4669, + "nll_loss": 0.11669819056987762, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6282012186129577e-05, + "rewards/margins": 0.1682727336883545, + "rewards/rejected": -0.16829901933670044, + "step": 12209 + }, + { + "epoch": 8.443983402489627, + "grad_norm": 6.920041084289551, + "learning_rate": 8.644536652835408e-06, + "log_odds_chosen": 10.067420959472656, + "log_odds_ratio": -0.0001373344857711345, + "logits/chosen": -0.1907285749912262, + "logits/rejected": -0.2630813419818878, + "logps/chosen": -0.00036663428181782365, + "logps/rejected": -1.7894313335418701, + "loss": 0.4426, + "nll_loss": 0.11062738299369812, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.666342672659084e-05, + "rewards/margins": 0.17890647053718567, + "rewards/rejected": -0.17894312739372253, + "step": 12210 + }, + { + "epoch": 8.444674965421854, + "grad_norm": 3.9832401275634766, + "learning_rate": 8.640694636545259e-06, + "log_odds_chosen": 12.233833312988281, + "log_odds_ratio": -9.003819286590442e-06, + "logits/chosen": -0.12104113399982452, + "logits/rejected": -0.3305937647819519, + "logps/chosen": -9.159260662272573e-05, + "logps/rejected": -2.7944068908691406, + "loss": 0.3574, + "nll_loss": 0.0893465206027031, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.159261026070453e-06, + "rewards/margins": 0.2794315218925476, + "rewards/rejected": -0.27944067120552063, + "step": 12211 + }, + { + "epoch": 8.44536652835408, + "grad_norm": 4.182587623596191, + "learning_rate": 8.636852620255111e-06, + "log_odds_chosen": 11.980340003967285, + "log_odds_ratio": -1.8716213162406348e-05, + "logits/chosen": -0.16231654584407806, + "logits/rejected": -0.24108725786209106, + "logps/chosen": -6.173996371217072e-05, + "logps/rejected": -2.3546369075775146, + "loss": 0.2665, + "nll_loss": 0.06663516908884048, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.173996553116012e-06, + "rewards/margins": 0.23545750975608826, + "rewards/rejected": -0.2354636788368225, + "step": 12212 + }, + { + "epoch": 8.446058091286307, + "grad_norm": 5.585501670837402, + "learning_rate": 8.633010603964962e-06, + "log_odds_chosen": 12.061046600341797, + "log_odds_ratio": -4.808824087376706e-05, + "logits/chosen": 0.07987240701913834, + "logits/rejected": 0.04905199632048607, + "logps/chosen": -0.000453970511443913, + "logps/rejected": -3.1471643447875977, + "loss": 0.4296, + "nll_loss": 0.10740132629871368, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5397049689199775e-05, + "rewards/margins": 0.31467103958129883, + "rewards/rejected": -0.3147164583206177, + "step": 12213 + }, + { + "epoch": 8.446749654218534, + "grad_norm": 3.547139883041382, + "learning_rate": 8.62916858767481e-06, + "log_odds_chosen": 11.334451675415039, + "log_odds_ratio": -0.0001582876720931381, + "logits/chosen": -0.045442111790180206, + "logits/rejected": -0.13509945571422577, + "logps/chosen": -0.00022221980907488614, + "logps/rejected": -2.3844339847564697, + "loss": 0.4707, + "nll_loss": 0.11766263842582703, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2221982362680137e-05, + "rewards/margins": 0.23842118680477142, + "rewards/rejected": -0.23844340443611145, + "step": 12214 + }, + { + "epoch": 8.447441217150761, + "grad_norm": 3.663975477218628, + "learning_rate": 8.625326571384663e-06, + "log_odds_chosen": 10.991447448730469, + "log_odds_ratio": -0.00013837986625730991, + "logits/chosen": 0.10032883286476135, + "logits/rejected": 0.15407739579677582, + "logps/chosen": -0.0005728535470552742, + "logps/rejected": -2.9996249675750732, + "loss": 0.8703, + "nll_loss": 0.2175583392381668, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.72853532503359e-05, + "rewards/margins": 0.2999052405357361, + "rewards/rejected": -0.2999625504016876, + "step": 12215 + }, + { + "epoch": 8.448132780082988, + "grad_norm": 4.488317012786865, + "learning_rate": 8.621484555094514e-06, + "log_odds_chosen": 11.125843048095703, + "log_odds_ratio": -0.00010230368934571743, + "logits/chosen": -0.05891960486769676, + "logits/rejected": -0.1504005342721939, + "logps/chosen": -0.0002756164758466184, + "logps/rejected": -2.4272990226745605, + "loss": 0.5428, + "nll_loss": 0.135688915848732, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.756165122264065e-05, + "rewards/margins": 0.24270235002040863, + "rewards/rejected": -0.24272991716861725, + "step": 12216 + }, + { + "epoch": 8.448824343015215, + "grad_norm": 4.28665018081665, + "learning_rate": 8.617642538804365e-06, + "log_odds_chosen": 10.961759567260742, + "log_odds_ratio": -6.393397052306682e-05, + "logits/chosen": -0.5306499004364014, + "logits/rejected": -0.4578840136528015, + "logps/chosen": -0.00016053387662395835, + "logps/rejected": -1.6820318698883057, + "loss": 0.4358, + "nll_loss": 0.10893905162811279, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6053387298597954e-05, + "rewards/margins": 0.16818714141845703, + "rewards/rejected": -0.168203204870224, + "step": 12217 + }, + { + "epoch": 8.449515905947441, + "grad_norm": 4.335683345794678, + "learning_rate": 8.613800522514215e-06, + "log_odds_chosen": 10.537773132324219, + "log_odds_ratio": -0.00010391612886451185, + "logits/chosen": -0.15171320736408234, + "logits/rejected": -0.08037220686674118, + "logps/chosen": -0.000234838473261334, + "logps/rejected": -2.0352041721343994, + "loss": 0.576, + "nll_loss": 0.14398017525672913, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3483846234739758e-05, + "rewards/margins": 0.20349694788455963, + "rewards/rejected": -0.20352043211460114, + "step": 12218 + }, + { + "epoch": 8.450207468879668, + "grad_norm": 2.9263315200805664, + "learning_rate": 8.609958506224066e-06, + "log_odds_chosen": 10.841743469238281, + "log_odds_ratio": -3.680678128148429e-05, + "logits/chosen": -0.35723212361335754, + "logits/rejected": -0.3755517899990082, + "logps/chosen": -0.00010235553054371849, + "logps/rejected": -1.8037569522857666, + "loss": 0.3004, + "nll_loss": 0.07508653402328491, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.023555341816973e-05, + "rewards/margins": 0.18036547303199768, + "rewards/rejected": -0.18037571012973785, + "step": 12219 + }, + { + "epoch": 8.450899031811895, + "grad_norm": 3.9375154972076416, + "learning_rate": 8.606116489933917e-06, + "log_odds_chosen": 11.44171142578125, + "log_odds_ratio": -2.285877963004168e-05, + "logits/chosen": -0.1019626185297966, + "logits/rejected": -0.05697731673717499, + "logps/chosen": -0.00022979540517553687, + "logps/rejected": -2.6647655963897705, + "loss": 0.3392, + "nll_loss": 0.08479012548923492, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2979538698564284e-05, + "rewards/margins": 0.2664535641670227, + "rewards/rejected": -0.2664765417575836, + "step": 12220 + }, + { + "epoch": 8.451590594744122, + "grad_norm": 2.777099370956421, + "learning_rate": 8.60227447364377e-06, + "log_odds_chosen": 10.597506523132324, + "log_odds_ratio": -0.0005935538210906088, + "logits/chosen": -0.14212588965892792, + "logits/rejected": -0.21859464049339294, + "logps/chosen": -0.00168894964735955, + "logps/rejected": -2.2412731647491455, + "loss": 0.3362, + "nll_loss": 0.0839921236038208, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001688949705567211, + "rewards/margins": 0.22395843267440796, + "rewards/rejected": -0.22412732243537903, + "step": 12221 + }, + { + "epoch": 8.452282157676349, + "grad_norm": 4.140081405639648, + "learning_rate": 8.59843245735362e-06, + "log_odds_chosen": 10.09188175201416, + "log_odds_ratio": -0.00014964811271056533, + "logits/chosen": -0.27937549352645874, + "logits/rejected": -0.32379117608070374, + "logps/chosen": -0.0002269076940137893, + "logps/rejected": -1.7309942245483398, + "loss": 0.3503, + "nll_loss": 0.08755438029766083, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2690768673783168e-05, + "rewards/margins": 0.17307671904563904, + "rewards/rejected": -0.17309942841529846, + "step": 12222 + }, + { + "epoch": 8.452973720608576, + "grad_norm": 3.208634376525879, + "learning_rate": 8.59459044106347e-06, + "log_odds_chosen": 10.598793029785156, + "log_odds_ratio": -0.0008950430783443153, + "logits/chosen": -0.35770896077156067, + "logits/rejected": -0.44755396246910095, + "logps/chosen": -0.00039982280577532947, + "logps/rejected": -1.6339364051818848, + "loss": 0.3351, + "nll_loss": 0.08368603885173798, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.998228203272447e-05, + "rewards/margins": 0.16335365176200867, + "rewards/rejected": -0.16339364647865295, + "step": 12223 + }, + { + "epoch": 8.453665283540802, + "grad_norm": 5.284857749938965, + "learning_rate": 8.590748424773322e-06, + "log_odds_chosen": 11.325607299804688, + "log_odds_ratio": -8.094254735624418e-05, + "logits/chosen": -0.6272037029266357, + "logits/rejected": -0.5712891817092896, + "logps/chosen": -0.00017863322864286602, + "logps/rejected": -2.5380496978759766, + "loss": 0.408, + "nll_loss": 0.10198329389095306, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7863323591882363e-05, + "rewards/margins": 0.253787100315094, + "rewards/rejected": -0.2538049519062042, + "step": 12224 + }, + { + "epoch": 8.45435684647303, + "grad_norm": 7.783411979675293, + "learning_rate": 8.586906408483172e-06, + "log_odds_chosen": 10.21047306060791, + "log_odds_ratio": -7.494394958484918e-05, + "logits/chosen": -0.2875575125217438, + "logits/rejected": -0.3262416124343872, + "logps/chosen": -0.0003216335317119956, + "logps/rejected": -2.103597402572632, + "loss": 0.3015, + "nll_loss": 0.07537385821342468, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2163356081582606e-05, + "rewards/margins": 0.21032759547233582, + "rewards/rejected": -0.21035973727703094, + "step": 12225 + }, + { + "epoch": 8.455048409405256, + "grad_norm": 3.302433729171753, + "learning_rate": 8.583064392193023e-06, + "log_odds_chosen": 10.463157653808594, + "log_odds_ratio": -0.00010431169357616454, + "logits/chosen": -0.1801649034023285, + "logits/rejected": -0.28582602739334106, + "logps/chosen": -0.00015797067317180336, + "logps/rejected": -1.7762954235076904, + "loss": 0.3563, + "nll_loss": 0.08906436711549759, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5797068044776097e-05, + "rewards/margins": 0.1776137501001358, + "rewards/rejected": -0.17762956023216248, + "step": 12226 + }, + { + "epoch": 8.455739972337483, + "grad_norm": 3.054396629333496, + "learning_rate": 8.579222375902874e-06, + "log_odds_chosen": 11.380236625671387, + "log_odds_ratio": -2.2073149011703208e-05, + "logits/chosen": -0.28868213295936584, + "logits/rejected": -0.34390684962272644, + "logps/chosen": -0.00012567838712129742, + "logps/rejected": -2.3157641887664795, + "loss": 0.4203, + "nll_loss": 0.10508093237876892, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.256783798453398e-05, + "rewards/margins": 0.23156385123729706, + "rewards/rejected": -0.23157641291618347, + "step": 12227 + }, + { + "epoch": 8.45643153526971, + "grad_norm": 5.460083961486816, + "learning_rate": 8.575380359612725e-06, + "log_odds_chosen": 11.746459007263184, + "log_odds_ratio": -9.499966108705848e-05, + "logits/chosen": -0.4380587637424469, + "logits/rejected": -0.5310646891593933, + "logps/chosen": -0.0008287655073218048, + "logps/rejected": -2.680570602416992, + "loss": 0.3497, + "nll_loss": 0.0874042958021164, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.287656237371266e-05, + "rewards/margins": 0.2679741680622101, + "rewards/rejected": -0.26805704832077026, + "step": 12228 + }, + { + "epoch": 8.457123098201937, + "grad_norm": 4.851430892944336, + "learning_rate": 8.571538343322575e-06, + "log_odds_chosen": 10.439013481140137, + "log_odds_ratio": -7.416032894980162e-05, + "logits/chosen": -0.2315261960029602, + "logits/rejected": -0.3399898111820221, + "logps/chosen": -0.00019020246691070497, + "logps/rejected": -1.8271386623382568, + "loss": 0.4617, + "nll_loss": 0.11541399359703064, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9020246327272616e-05, + "rewards/margins": 0.18269485235214233, + "rewards/rejected": -0.18271386623382568, + "step": 12229 + }, + { + "epoch": 8.457814661134163, + "grad_norm": 2.786869764328003, + "learning_rate": 8.567696327032428e-06, + "log_odds_chosen": 11.525938987731934, + "log_odds_ratio": -4.216610614093952e-05, + "logits/chosen": -0.24258199334144592, + "logits/rejected": -0.24917370080947876, + "logps/chosen": -0.00016670665354467928, + "logps/rejected": -2.475355625152588, + "loss": 0.3162, + "nll_loss": 0.07905568182468414, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6670664990670048e-05, + "rewards/margins": 0.2475188970565796, + "rewards/rejected": -0.2475355714559555, + "step": 12230 + }, + { + "epoch": 8.45850622406639, + "grad_norm": 3.303145170211792, + "learning_rate": 8.563854310742279e-06, + "log_odds_chosen": 10.78154468536377, + "log_odds_ratio": -0.00016814623086247593, + "logits/chosen": -0.4072422683238983, + "logits/rejected": -0.4645257592201233, + "logps/chosen": -0.0005194892291910946, + "logps/rejected": -1.976096749305725, + "loss": 0.5252, + "nll_loss": 0.13128185272216797, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1948918553534895e-05, + "rewards/margins": 0.197557732462883, + "rewards/rejected": -0.19760967791080475, + "step": 12231 + }, + { + "epoch": 8.459197786998617, + "grad_norm": 6.349554538726807, + "learning_rate": 8.56001229445213e-06, + "log_odds_chosen": 10.750853538513184, + "log_odds_ratio": -0.00018462100706528872, + "logits/chosen": -0.2011597901582718, + "logits/rejected": -0.3059113621711731, + "logps/chosen": -0.0006470892112702131, + "logps/rejected": -2.489020586013794, + "loss": 0.3602, + "nll_loss": 0.09003066271543503, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.470891821663827e-05, + "rewards/margins": 0.24883735179901123, + "rewards/rejected": -0.2489020824432373, + "step": 12232 + }, + { + "epoch": 8.459889349930844, + "grad_norm": 3.7937495708465576, + "learning_rate": 8.55617027816198e-06, + "log_odds_chosen": 13.031977653503418, + "log_odds_ratio": -5.436918399936985e-06, + "logits/chosen": -0.5057649612426758, + "logits/rejected": -0.5373073816299438, + "logps/chosen": -8.866170537658036e-05, + "logps/rejected": -3.4923675060272217, + "loss": 0.3489, + "nll_loss": 0.08722809702157974, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.866170901455916e-06, + "rewards/margins": 0.3492278754711151, + "rewards/rejected": -0.34923672676086426, + "step": 12233 + }, + { + "epoch": 8.46058091286307, + "grad_norm": 3.9468917846679688, + "learning_rate": 8.552328261871831e-06, + "log_odds_chosen": 10.901199340820312, + "log_odds_ratio": -2.931928975158371e-05, + "logits/chosen": -0.45410242676734924, + "logits/rejected": -0.3756451904773712, + "logps/chosen": -0.00015654783055651933, + "logps/rejected": -2.104238986968994, + "loss": 0.2744, + "nll_loss": 0.06858496367931366, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5654783055651933e-05, + "rewards/margins": 0.2104082554578781, + "rewards/rejected": -0.21042391657829285, + "step": 12234 + }, + { + "epoch": 8.461272475795298, + "grad_norm": 3.589395046234131, + "learning_rate": 8.548486245581682e-06, + "log_odds_chosen": 10.89686393737793, + "log_odds_ratio": -0.0001275877293664962, + "logits/chosen": -0.20068512856960297, + "logits/rejected": -0.2796490788459778, + "logps/chosen": -0.00015666541003156453, + "logps/rejected": -1.6563934087753296, + "loss": 0.369, + "nll_loss": 0.0922326073050499, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5666542822145857e-05, + "rewards/margins": 0.16562367975711823, + "rewards/rejected": -0.16563934087753296, + "step": 12235 + }, + { + "epoch": 8.461964038727524, + "grad_norm": 3.8936188220977783, + "learning_rate": 8.544644229291534e-06, + "log_odds_chosen": 11.288116455078125, + "log_odds_ratio": -0.00013722201401833445, + "logits/chosen": -0.5005241632461548, + "logits/rejected": -0.5129424929618835, + "logps/chosen": -0.00038747471990063787, + "logps/rejected": -3.04249906539917, + "loss": 0.4177, + "nll_loss": 0.10440607368946075, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8747471990063787e-05, + "rewards/margins": 0.3042111396789551, + "rewards/rejected": -0.30424991250038147, + "step": 12236 + }, + { + "epoch": 8.462655601659751, + "grad_norm": 4.5463151931762695, + "learning_rate": 8.540802213001383e-06, + "log_odds_chosen": 11.708345413208008, + "log_odds_ratio": -2.0565257727866992e-05, + "logits/chosen": -0.37573695182800293, + "logits/rejected": -0.3132202923297882, + "logps/chosen": -0.00018604497017804533, + "logps/rejected": -2.4184212684631348, + "loss": 0.4605, + "nll_loss": 0.11511382460594177, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8604498109198175e-05, + "rewards/margins": 0.24182350933551788, + "rewards/rejected": -0.241842120885849, + "step": 12237 + }, + { + "epoch": 8.463347164591978, + "grad_norm": 4.1725077629089355, + "learning_rate": 8.536960196711234e-06, + "log_odds_chosen": 11.375772476196289, + "log_odds_ratio": -1.633433930692263e-05, + "logits/chosen": -0.03759506717324257, + "logits/rejected": -0.0693630501627922, + "logps/chosen": -0.00016117203631438315, + "logps/rejected": -2.4761452674865723, + "loss": 0.4233, + "nll_loss": 0.10581202805042267, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6117204722831957e-05, + "rewards/margins": 0.24759840965270996, + "rewards/rejected": -0.2476145327091217, + "step": 12238 + }, + { + "epoch": 8.464038727524205, + "grad_norm": 3.870225429534912, + "learning_rate": 8.533118180421086e-06, + "log_odds_chosen": 11.548178672790527, + "log_odds_ratio": -8.487018931191415e-05, + "logits/chosen": -0.5760559439659119, + "logits/rejected": -0.5758537650108337, + "logps/chosen": -0.00016482733190059662, + "logps/rejected": -2.121696949005127, + "loss": 0.3367, + "nll_loss": 0.0841749906539917, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.64827324624639e-05, + "rewards/margins": 0.21215322613716125, + "rewards/rejected": -0.21216970682144165, + "step": 12239 + }, + { + "epoch": 8.464730290456432, + "grad_norm": 4.679924964904785, + "learning_rate": 8.529276164130937e-06, + "log_odds_chosen": 11.397396087646484, + "log_odds_ratio": -2.749465056695044e-05, + "logits/chosen": -0.2636311650276184, + "logits/rejected": -0.4466463625431061, + "logps/chosen": -0.00019256738596595824, + "logps/rejected": -2.625746250152588, + "loss": 0.527, + "nll_loss": 0.1317574381828308, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9256738596595824e-05, + "rewards/margins": 0.2625553607940674, + "rewards/rejected": -0.26257461309432983, + "step": 12240 + }, + { + "epoch": 8.465421853388658, + "grad_norm": 6.500254154205322, + "learning_rate": 8.525434147840788e-06, + "log_odds_chosen": 10.750042915344238, + "log_odds_ratio": -0.0003058892616536468, + "logits/chosen": -0.0664311945438385, + "logits/rejected": -0.08879110962152481, + "logps/chosen": -0.00020295722060836852, + "logps/rejected": -2.48722505569458, + "loss": 0.5886, + "nll_loss": 0.1471274346113205, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.029572169703897e-05, + "rewards/margins": 0.24870222806930542, + "rewards/rejected": -0.24872252345085144, + "step": 12241 + }, + { + "epoch": 8.466113416320885, + "grad_norm": 3.87945556640625, + "learning_rate": 8.521592131550637e-06, + "log_odds_chosen": 11.44169807434082, + "log_odds_ratio": -1.966490162885748e-05, + "logits/chosen": -0.4701942205429077, + "logits/rejected": -0.46853071451187134, + "logps/chosen": -0.00022171103046275675, + "logps/rejected": -2.206536293029785, + "loss": 0.3678, + "nll_loss": 0.09194795042276382, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2171103410073556e-05, + "rewards/margins": 0.22063148021697998, + "rewards/rejected": -0.22065363824367523, + "step": 12242 + }, + { + "epoch": 8.466804979253112, + "grad_norm": 32.3742790222168, + "learning_rate": 8.51775011526049e-06, + "log_odds_chosen": 11.439393997192383, + "log_odds_ratio": -0.00010279798152623698, + "logits/chosen": -0.4229426980018616, + "logits/rejected": -0.45810258388519287, + "logps/chosen": -0.0003110724501311779, + "logps/rejected": -2.6357650756835938, + "loss": 0.4091, + "nll_loss": 0.1022646576166153, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.110724355792627e-05, + "rewards/margins": 0.2635453939437866, + "rewards/rejected": -0.2635765075683594, + "step": 12243 + }, + { + "epoch": 8.467496542185339, + "grad_norm": 4.420064926147461, + "learning_rate": 8.51390809897034e-06, + "log_odds_chosen": 10.127920150756836, + "log_odds_ratio": -0.00014040838868822902, + "logits/chosen": -0.4244319200515747, + "logits/rejected": -0.4691627025604248, + "logps/chosen": -0.00036072367220185697, + "logps/rejected": -1.6990998983383179, + "loss": 0.5634, + "nll_loss": 0.14082545042037964, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6072371585760266e-05, + "rewards/margins": 0.1698739230632782, + "rewards/rejected": -0.1699099987745285, + "step": 12244 + }, + { + "epoch": 8.468188105117566, + "grad_norm": 3.776829242706299, + "learning_rate": 8.51006608268019e-06, + "log_odds_chosen": 9.505712509155273, + "log_odds_ratio": -0.0007945825927890837, + "logits/chosen": -0.6400420069694519, + "logits/rejected": -0.6537680625915527, + "logps/chosen": -0.001891196588985622, + "logps/rejected": -1.7540403604507446, + "loss": 0.4024, + "nll_loss": 0.10051442682743073, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001891196588985622, + "rewards/margins": 0.17521491646766663, + "rewards/rejected": -0.17540404200553894, + "step": 12245 + }, + { + "epoch": 8.468879668049793, + "grad_norm": 3.0698983669281006, + "learning_rate": 8.506224066390042e-06, + "log_odds_chosen": 12.119697570800781, + "log_odds_ratio": -0.000320388819091022, + "logits/chosen": -0.15294787287712097, + "logits/rejected": -0.17274241149425507, + "logps/chosen": -0.0005173084791749716, + "logps/rejected": -3.080294609069824, + "loss": 0.3448, + "nll_loss": 0.0861569195985794, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.17308471899014e-05, + "rewards/margins": 0.30797770619392395, + "rewards/rejected": -0.3080294728279114, + "step": 12246 + }, + { + "epoch": 8.46957123098202, + "grad_norm": 3.754049777984619, + "learning_rate": 8.502382050099892e-06, + "log_odds_chosen": 9.665637016296387, + "log_odds_ratio": -0.0006061159074306488, + "logits/chosen": -0.5966587662696838, + "logits/rejected": -0.6343376040458679, + "logps/chosen": -0.0006555309519171715, + "logps/rejected": -2.0075156688690186, + "loss": 0.3651, + "nll_loss": 0.09120656549930573, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.555309664690867e-05, + "rewards/margins": 0.2006860226392746, + "rewards/rejected": -0.20075158774852753, + "step": 12247 + }, + { + "epoch": 8.470262793914246, + "grad_norm": 2.940383195877075, + "learning_rate": 8.498540033809743e-06, + "log_odds_chosen": 9.661136627197266, + "log_odds_ratio": -0.00047493373858742416, + "logits/chosen": -0.20162512362003326, + "logits/rejected": -0.3301006853580475, + "logps/chosen": -0.00038299331208691, + "logps/rejected": -1.3793359994888306, + "loss": 0.2561, + "nll_loss": 0.0639653131365776, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8299327570712194e-05, + "rewards/margins": 0.13789530098438263, + "rewards/rejected": -0.137933611869812, + "step": 12248 + }, + { + "epoch": 8.470954356846473, + "grad_norm": 3.431257486343384, + "learning_rate": 8.494698017519595e-06, + "log_odds_chosen": 11.056364059448242, + "log_odds_ratio": -7.521332008764148e-05, + "logits/chosen": 0.04458847641944885, + "logits/rejected": -0.13365104794502258, + "logps/chosen": -0.0005594642134383321, + "logps/rejected": -2.1823577880859375, + "loss": 0.3501, + "nll_loss": 0.08752588927745819, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5946424254216254e-05, + "rewards/margins": 0.2181798219680786, + "rewards/rejected": -0.21823576092720032, + "step": 12249 + }, + { + "epoch": 8.4716459197787, + "grad_norm": 4.6607489585876465, + "learning_rate": 8.490856001229446e-06, + "log_odds_chosen": 12.409748077392578, + "log_odds_ratio": -8.075324331002776e-06, + "logits/chosen": -0.3750876486301422, + "logits/rejected": -0.30534347891807556, + "logps/chosen": -0.00014612097584176809, + "logps/rejected": -3.4359819889068604, + "loss": 0.4619, + "nll_loss": 0.11548404395580292, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.461209831177257e-05, + "rewards/margins": 0.3435836136341095, + "rewards/rejected": -0.34359821677207947, + "step": 12250 + }, + { + "epoch": 8.472337482710927, + "grad_norm": 4.8379340171813965, + "learning_rate": 8.487013984939295e-06, + "log_odds_chosen": 12.72007942199707, + "log_odds_ratio": -4.733223249786533e-06, + "logits/chosen": -0.7242316603660583, + "logits/rejected": -0.8033230304718018, + "logps/chosen": -3.626089164754376e-05, + "logps/rejected": -2.414877414703369, + "loss": 0.5539, + "nll_loss": 0.138469398021698, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.626089210229111e-06, + "rewards/margins": 0.24148410558700562, + "rewards/rejected": -0.24148772656917572, + "step": 12251 + }, + { + "epoch": 8.473029045643154, + "grad_norm": 4.147072792053223, + "learning_rate": 8.483171968649148e-06, + "log_odds_chosen": 10.668270111083984, + "log_odds_ratio": -8.216902642743662e-05, + "logits/chosen": -0.13350696861743927, + "logits/rejected": -0.16221120953559875, + "logps/chosen": -0.0008802133379504085, + "logps/rejected": -1.8316550254821777, + "loss": 1.1092, + "nll_loss": 0.2772865295410156, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.802134107099846e-05, + "rewards/margins": 0.18307748436927795, + "rewards/rejected": -0.18316550552845, + "step": 12252 + }, + { + "epoch": 8.47372060857538, + "grad_norm": 3.817070484161377, + "learning_rate": 8.479329952358998e-06, + "log_odds_chosen": 11.888955116271973, + "log_odds_ratio": -2.0903258700855076e-05, + "logits/chosen": 0.04838571697473526, + "logits/rejected": -0.07474128156900406, + "logps/chosen": -0.00016068377590272576, + "logps/rejected": -2.808671712875366, + "loss": 0.4464, + "nll_loss": 0.111586794257164, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.606837940926198e-05, + "rewards/margins": 0.2808510959148407, + "rewards/rejected": -0.28086715936660767, + "step": 12253 + }, + { + "epoch": 8.474412171507607, + "grad_norm": 3.8038136959075928, + "learning_rate": 8.47548793606885e-06, + "log_odds_chosen": 10.744874000549316, + "log_odds_ratio": -4.887772956863046e-05, + "logits/chosen": -0.0147324800491333, + "logits/rejected": -0.019203737378120422, + "logps/chosen": -0.0002447717997711152, + "logps/rejected": -1.7378864288330078, + "loss": 0.33, + "nll_loss": 0.08249451965093613, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.447717997711152e-05, + "rewards/margins": 0.1737641543149948, + "rewards/rejected": -0.1737886369228363, + "step": 12254 + }, + { + "epoch": 8.475103734439834, + "grad_norm": 3.671657085418701, + "learning_rate": 8.4716459197787e-06, + "log_odds_chosen": 11.238716125488281, + "log_odds_ratio": -5.318321200320497e-05, + "logits/chosen": -0.7400655150413513, + "logits/rejected": -0.7886126041412354, + "logps/chosen": -0.00022681456175632775, + "logps/rejected": -2.474215507507324, + "loss": 0.3976, + "nll_loss": 0.09940271824598312, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2681457267026417e-05, + "rewards/margins": 0.24739889800548553, + "rewards/rejected": -0.24742157757282257, + "step": 12255 + }, + { + "epoch": 8.475795297372061, + "grad_norm": 11.087566375732422, + "learning_rate": 8.46780390348855e-06, + "log_odds_chosen": 10.086482048034668, + "log_odds_ratio": -0.014995497651398182, + "logits/chosen": -0.5288323163986206, + "logits/rejected": -0.5677878856658936, + "logps/chosen": -0.002039603190496564, + "logps/rejected": -2.0050981044769287, + "loss": 0.4747, + "nll_loss": 0.11717545986175537, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00020396032778080553, + "rewards/margins": 0.20030583441257477, + "rewards/rejected": -0.20050980150699615, + "step": 12256 + }, + { + "epoch": 8.476486860304288, + "grad_norm": 4.343221187591553, + "learning_rate": 8.463961887198401e-06, + "log_odds_chosen": 11.159956932067871, + "log_odds_ratio": -0.00019973155576735735, + "logits/chosen": -0.03564649820327759, + "logits/rejected": -0.13567349314689636, + "logps/chosen": -0.0003570501576177776, + "logps/rejected": -2.8839199542999268, + "loss": 0.3822, + "nll_loss": 0.09552636742591858, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5705019399756566e-05, + "rewards/margins": 0.28835630416870117, + "rewards/rejected": -0.28839200735092163, + "step": 12257 + }, + { + "epoch": 8.477178423236515, + "grad_norm": 4.364232540130615, + "learning_rate": 8.460119870908254e-06, + "log_odds_chosen": 12.289931297302246, + "log_odds_ratio": -3.1001669412944466e-05, + "logits/chosen": -0.1956077218055725, + "logits/rejected": -0.26861050724983215, + "logps/chosen": -0.00019162609532941133, + "logps/rejected": -3.37749981880188, + "loss": 0.5817, + "nll_loss": 0.14541320502758026, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9162609532941133e-05, + "rewards/margins": 0.3377308249473572, + "rewards/rejected": -0.3377499580383301, + "step": 12258 + }, + { + "epoch": 8.477869986168741, + "grad_norm": 4.493581771850586, + "learning_rate": 8.456277854618105e-06, + "log_odds_chosen": 12.043193817138672, + "log_odds_ratio": -1.669879929977469e-05, + "logits/chosen": -0.35270121693611145, + "logits/rejected": -0.35970330238342285, + "logps/chosen": -0.00017171446233987808, + "logps/rejected": -2.586911201477051, + "loss": 0.408, + "nll_loss": 0.10200444608926773, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.717144732538145e-05, + "rewards/margins": 0.2586739659309387, + "rewards/rejected": -0.25869113206863403, + "step": 12259 + }, + { + "epoch": 8.478561549100968, + "grad_norm": 3.1778171062469482, + "learning_rate": 8.452435838327954e-06, + "log_odds_chosen": 11.451273918151855, + "log_odds_ratio": -3.4607517591211945e-05, + "logits/chosen": -0.5733155012130737, + "logits/rejected": -0.5721181035041809, + "logps/chosen": -0.00013759022112935781, + "logps/rejected": -2.4554004669189453, + "loss": 0.4866, + "nll_loss": 0.12163795530796051, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3759023204329424e-05, + "rewards/margins": 0.2455262839794159, + "rewards/rejected": -0.245540052652359, + "step": 12260 + }, + { + "epoch": 8.479253112033195, + "grad_norm": 3.6349940299987793, + "learning_rate": 8.448593822037806e-06, + "log_odds_chosen": 10.85304069519043, + "log_odds_ratio": -5.069684630143456e-05, + "logits/chosen": -0.5185079574584961, + "logits/rejected": -0.5641573071479797, + "logps/chosen": -0.0011122021824121475, + "logps/rejected": -2.387133836746216, + "loss": 0.3159, + "nll_loss": 0.07897399365901947, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001112202153308317, + "rewards/margins": 0.2386021614074707, + "rewards/rejected": -0.23871338367462158, + "step": 12261 + }, + { + "epoch": 8.479944674965422, + "grad_norm": 5.675334930419922, + "learning_rate": 8.444751805747657e-06, + "log_odds_chosen": 11.700498580932617, + "log_odds_ratio": -2.6974110369337723e-05, + "logits/chosen": -0.15723538398742676, + "logits/rejected": -0.28113627433776855, + "logps/chosen": -0.0002666892542038113, + "logps/rejected": -2.8769774436950684, + "loss": 0.6429, + "nll_loss": 0.16072174906730652, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6668923965189606e-05, + "rewards/margins": 0.2876710891723633, + "rewards/rejected": -0.2876977324485779, + "step": 12262 + }, + { + "epoch": 8.480636237897649, + "grad_norm": 2.913564443588257, + "learning_rate": 8.440909789457508e-06, + "log_odds_chosen": 10.883523941040039, + "log_odds_ratio": -0.0002211461978731677, + "logits/chosen": -0.4537699818611145, + "logits/rejected": -0.4931557774543762, + "logps/chosen": -0.0014517331728711724, + "logps/rejected": -3.1377670764923096, + "loss": 0.3183, + "nll_loss": 0.07955101877450943, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014517331146635115, + "rewards/margins": 0.313631534576416, + "rewards/rejected": -0.3137767016887665, + "step": 12263 + }, + { + "epoch": 8.481327800829876, + "grad_norm": 4.738100528717041, + "learning_rate": 8.437067773167358e-06, + "log_odds_chosen": 11.844022750854492, + "log_odds_ratio": -1.6696354578016326e-05, + "logits/chosen": -0.5999171733856201, + "logits/rejected": -0.7666305899620056, + "logps/chosen": -0.00015944114420562983, + "logps/rejected": -2.4665656089782715, + "loss": 0.4528, + "nll_loss": 0.1132088303565979, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5944115148158744e-05, + "rewards/margins": 0.2466406524181366, + "rewards/rejected": -0.24665658175945282, + "step": 12264 + }, + { + "epoch": 8.482019363762102, + "grad_norm": 4.688370704650879, + "learning_rate": 8.43322575687721e-06, + "log_odds_chosen": 10.04583740234375, + "log_odds_ratio": -0.0032382027711719275, + "logits/chosen": -0.5382931232452393, + "logits/rejected": -0.5006886124610901, + "logps/chosen": -0.014946999959647655, + "logps/rejected": -2.335589647293091, + "loss": 0.55, + "nll_loss": 0.13716506958007812, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001494700089097023, + "rewards/margins": 0.23206427693367004, + "rewards/rejected": -0.23355896770954132, + "step": 12265 + }, + { + "epoch": 8.48271092669433, + "grad_norm": 6.848077297210693, + "learning_rate": 8.42938374058706e-06, + "log_odds_chosen": 12.40252685546875, + "log_odds_ratio": -2.2604503101320006e-05, + "logits/chosen": -0.4898621439933777, + "logits/rejected": -0.5430005788803101, + "logps/chosen": -0.00012176691961940378, + "logps/rejected": -3.148735284805298, + "loss": 0.3172, + "nll_loss": 0.07930289208889008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2176690688647795e-05, + "rewards/margins": 0.31486135721206665, + "rewards/rejected": -0.3148735463619232, + "step": 12266 + }, + { + "epoch": 8.483402489626556, + "grad_norm": 2.9791226387023926, + "learning_rate": 8.425541724296912e-06, + "log_odds_chosen": 11.094881057739258, + "log_odds_ratio": -3.153106808895245e-05, + "logits/chosen": -0.7539383172988892, + "logits/rejected": -0.6733986139297485, + "logps/chosen": -0.00010487916006240994, + "logps/rejected": -1.760202169418335, + "loss": 0.3915, + "nll_loss": 0.09787617623806, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0487916370038874e-05, + "rewards/margins": 0.17600972950458527, + "rewards/rejected": -0.17602021992206573, + "step": 12267 + }, + { + "epoch": 8.484094052558783, + "grad_norm": 3.049551248550415, + "learning_rate": 8.421699708006763e-06, + "log_odds_chosen": 11.18005657196045, + "log_odds_ratio": -7.208783063106239e-05, + "logits/chosen": -0.48468202352523804, + "logits/rejected": -0.5185460448265076, + "logps/chosen": -0.00017760792979970574, + "logps/rejected": -2.4330902099609375, + "loss": 0.242, + "nll_loss": 0.06049405038356781, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7760794435162097e-05, + "rewards/margins": 0.24329127371311188, + "rewards/rejected": -0.24330900609493256, + "step": 12268 + }, + { + "epoch": 8.48478561549101, + "grad_norm": 4.436456203460693, + "learning_rate": 8.417857691716612e-06, + "log_odds_chosen": 11.085650444030762, + "log_odds_ratio": -0.0005416136118583381, + "logits/chosen": -0.3808354437351227, + "logits/rejected": -0.33489638566970825, + "logps/chosen": -0.0010177572257816792, + "logps/rejected": -2.746601104736328, + "loss": 0.3895, + "nll_loss": 0.0973270907998085, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010177572403335944, + "rewards/margins": 0.27455833554267883, + "rewards/rejected": -0.2746601104736328, + "step": 12269 + }, + { + "epoch": 8.485477178423237, + "grad_norm": 4.066561222076416, + "learning_rate": 8.414015675426465e-06, + "log_odds_chosen": 12.0043363571167, + "log_odds_ratio": -8.750682354730088e-06, + "logits/chosen": -0.24367359280586243, + "logits/rejected": -0.44787317514419556, + "logps/chosen": -0.00016268889885395765, + "logps/rejected": -2.643892288208008, + "loss": 0.4556, + "nll_loss": 0.11390087008476257, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6268888430204242e-05, + "rewards/margins": 0.26437294483184814, + "rewards/rejected": -0.2643892168998718, + "step": 12270 + }, + { + "epoch": 8.486168741355463, + "grad_norm": 4.154919147491455, + "learning_rate": 8.410173659136315e-06, + "log_odds_chosen": 11.176225662231445, + "log_odds_ratio": -0.0002369789290241897, + "logits/chosen": -0.6168784499168396, + "logits/rejected": -0.749509871006012, + "logps/chosen": -0.00018601972260512412, + "logps/rejected": -2.156191825866699, + "loss": 0.4435, + "nll_loss": 0.11084267497062683, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.860197153291665e-05, + "rewards/margins": 0.2156006097793579, + "rewards/rejected": -0.21561920642852783, + "step": 12271 + }, + { + "epoch": 8.48686030428769, + "grad_norm": 3.97359037399292, + "learning_rate": 8.406331642846166e-06, + "log_odds_chosen": 9.596004486083984, + "log_odds_ratio": -0.00012900654110126197, + "logits/chosen": -0.5155652761459351, + "logits/rejected": -0.501971960067749, + "logps/chosen": -0.00044222682481631637, + "logps/rejected": -1.644789695739746, + "loss": 0.35, + "nll_loss": 0.0874941423535347, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.422268466441892e-05, + "rewards/margins": 0.1644347608089447, + "rewards/rejected": -0.16447898745536804, + "step": 12272 + }, + { + "epoch": 8.487551867219917, + "grad_norm": 3.0364789962768555, + "learning_rate": 8.402489626556017e-06, + "log_odds_chosen": 10.494122505187988, + "log_odds_ratio": -0.00013410058454610407, + "logits/chosen": -0.7801151871681213, + "logits/rejected": -0.8372578620910645, + "logps/chosen": -0.00021797572844661772, + "logps/rejected": -1.7526401281356812, + "loss": 0.3125, + "nll_loss": 0.07811284065246582, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.179757211706601e-05, + "rewards/margins": 0.17524223029613495, + "rewards/rejected": -0.17526403069496155, + "step": 12273 + }, + { + "epoch": 8.488243430152144, + "grad_norm": 4.63698148727417, + "learning_rate": 8.398647610265868e-06, + "log_odds_chosen": 12.364485740661621, + "log_odds_ratio": -1.7829099306254648e-05, + "logits/chosen": -0.2129366248846054, + "logits/rejected": -0.28443700075149536, + "logps/chosen": -0.000122582889162004, + "logps/rejected": -3.121785879135132, + "loss": 0.3408, + "nll_loss": 0.08520200848579407, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2258290553290863e-05, + "rewards/margins": 0.312166303396225, + "rewards/rejected": -0.3121785819530487, + "step": 12274 + }, + { + "epoch": 8.48893499308437, + "grad_norm": 2.922107458114624, + "learning_rate": 8.394805593975718e-06, + "log_odds_chosen": 10.744071960449219, + "log_odds_ratio": -4.883888686890714e-05, + "logits/chosen": -0.6283242106437683, + "logits/rejected": -0.6851826310157776, + "logps/chosen": -0.0001939169887918979, + "logps/rejected": -1.8735204935073853, + "loss": 0.251, + "nll_loss": 0.06274637579917908, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9391700334381312e-05, + "rewards/margins": 0.1873326599597931, + "rewards/rejected": -0.18735206127166748, + "step": 12275 + }, + { + "epoch": 8.489626556016598, + "grad_norm": 4.272983074188232, + "learning_rate": 8.390963577685569e-06, + "log_odds_chosen": 9.681724548339844, + "log_odds_ratio": -0.0021691133733838797, + "logits/chosen": 0.08250885456800461, + "logits/rejected": -0.03361310809850693, + "logps/chosen": -0.015918850898742676, + "logps/rejected": -1.5145361423492432, + "loss": 0.5234, + "nll_loss": 0.13062867522239685, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015918852295726538, + "rewards/margins": 0.14986173808574677, + "rewards/rejected": -0.1514536291360855, + "step": 12276 + }, + { + "epoch": 8.490318118948824, + "grad_norm": 4.874604225158691, + "learning_rate": 8.387121561395422e-06, + "log_odds_chosen": 10.615398406982422, + "log_odds_ratio": -0.00016952301666606218, + "logits/chosen": -0.39670851826667786, + "logits/rejected": -0.50009685754776, + "logps/chosen": -0.00037242763210088015, + "logps/rejected": -2.066460609436035, + "loss": 0.4313, + "nll_loss": 0.10781387984752655, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7242763937683776e-05, + "rewards/margins": 0.20660880208015442, + "rewards/rejected": -0.20664605498313904, + "step": 12277 + }, + { + "epoch": 8.491009681881051, + "grad_norm": 2.926595449447632, + "learning_rate": 8.383279545105272e-06, + "log_odds_chosen": 11.713717460632324, + "log_odds_ratio": -3.113495040452108e-05, + "logits/chosen": -0.7721793055534363, + "logits/rejected": -0.7168105244636536, + "logps/chosen": -9.897611744236201e-05, + "logps/rejected": -2.2081406116485596, + "loss": 0.337, + "nll_loss": 0.08423657715320587, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.897612471831962e-06, + "rewards/margins": 0.2208041548728943, + "rewards/rejected": -0.220814049243927, + "step": 12278 + }, + { + "epoch": 8.491701244813278, + "grad_norm": 5.314805030822754, + "learning_rate": 8.379437528815121e-06, + "log_odds_chosen": 11.87393569946289, + "log_odds_ratio": -9.592822607373819e-06, + "logits/chosen": -0.386319637298584, + "logits/rejected": -0.4082978367805481, + "logps/chosen": -0.00014236682909540832, + "logps/rejected": -2.774528980255127, + "loss": 0.4811, + "nll_loss": 0.12028633803129196, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4236682545742951e-05, + "rewards/margins": 0.2774386703968048, + "rewards/rejected": -0.27745291590690613, + "step": 12279 + }, + { + "epoch": 8.492392807745505, + "grad_norm": 2.6736106872558594, + "learning_rate": 8.375595512524974e-06, + "log_odds_chosen": 9.523404121398926, + "log_odds_ratio": -0.0007579984958283603, + "logits/chosen": 0.22026318311691284, + "logits/rejected": 0.17569872736930847, + "logps/chosen": -0.0019360886653885245, + "logps/rejected": -1.5031076669692993, + "loss": 0.3019, + "nll_loss": 0.07539349049329758, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00019360888109076768, + "rewards/margins": 0.1501171588897705, + "rewards/rejected": -0.15031076967716217, + "step": 12280 + }, + { + "epoch": 8.493084370677732, + "grad_norm": 5.25847864151001, + "learning_rate": 8.371753496234825e-06, + "log_odds_chosen": 10.68351936340332, + "log_odds_ratio": -0.0001417723105987534, + "logits/chosen": -0.12427209317684174, + "logits/rejected": -0.21619272232055664, + "logps/chosen": -0.0007611039909534156, + "logps/rejected": -2.130582094192505, + "loss": 0.5091, + "nll_loss": 0.12725578248500824, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.611038745380938e-05, + "rewards/margins": 0.21298208832740784, + "rewards/rejected": -0.2130582183599472, + "step": 12281 + }, + { + "epoch": 8.493775933609959, + "grad_norm": 3.5506532192230225, + "learning_rate": 8.367911479944675e-06, + "log_odds_chosen": 11.635926246643066, + "log_odds_ratio": -2.9104781788191758e-05, + "logits/chosen": -0.15105272829532623, + "logits/rejected": -0.13993534445762634, + "logps/chosen": -0.0004803336050827056, + "logps/rejected": -2.7633471488952637, + "loss": 0.4275, + "nll_loss": 0.10686735808849335, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8033361963462085e-05, + "rewards/margins": 0.27628669142723083, + "rewards/rejected": -0.2763347029685974, + "step": 12282 + }, + { + "epoch": 8.494467496542185, + "grad_norm": 4.606646537780762, + "learning_rate": 8.364069463654526e-06, + "log_odds_chosen": 9.82073974609375, + "log_odds_ratio": -0.0006549949757754803, + "logits/chosen": -0.16509506106376648, + "logits/rejected": -0.18839670717716217, + "logps/chosen": -0.0017432832391932607, + "logps/rejected": -1.6615170240402222, + "loss": 0.49, + "nll_loss": 0.12244555354118347, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017432833556085825, + "rewards/margins": 0.1659773886203766, + "rewards/rejected": -0.16615170240402222, + "step": 12283 + }, + { + "epoch": 8.495159059474412, + "grad_norm": 3.7970452308654785, + "learning_rate": 8.360227447364377e-06, + "log_odds_chosen": 10.787665367126465, + "log_odds_ratio": -6.754439527867362e-05, + "logits/chosen": -0.34210094809532166, + "logits/rejected": -0.29848796129226685, + "logps/chosen": -0.0002140133292414248, + "logps/rejected": -2.0320956707000732, + "loss": 0.4188, + "nll_loss": 0.10468335449695587, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1401334379334003e-05, + "rewards/margins": 0.20318818092346191, + "rewards/rejected": -0.20320956408977509, + "step": 12284 + }, + { + "epoch": 8.495850622406639, + "grad_norm": 4.580048561096191, + "learning_rate": 8.356385431074228e-06, + "log_odds_chosen": 11.143314361572266, + "log_odds_ratio": -2.657229924807325e-05, + "logits/chosen": 0.09083308279514313, + "logits/rejected": 0.03352555260062218, + "logps/chosen": -0.0001280966680496931, + "logps/rejected": -2.0760304927825928, + "loss": 0.5341, + "nll_loss": 0.13351541757583618, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2809668078261893e-05, + "rewards/margins": 0.2075902372598648, + "rewards/rejected": -0.20760303735733032, + "step": 12285 + }, + { + "epoch": 8.496542185338866, + "grad_norm": 3.6384947299957275, + "learning_rate": 8.35254341478408e-06, + "log_odds_chosen": 11.60141372680664, + "log_odds_ratio": -1.9051440176554024e-05, + "logits/chosen": 0.24498462677001953, + "logits/rejected": 0.22010770440101624, + "logps/chosen": -0.00018078883294947445, + "logps/rejected": -2.8363616466522217, + "loss": 0.4146, + "nll_loss": 0.10365588217973709, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8078884750138968e-05, + "rewards/margins": 0.28361809253692627, + "rewards/rejected": -0.2836361527442932, + "step": 12286 + }, + { + "epoch": 8.497233748271093, + "grad_norm": 2.5925114154815674, + "learning_rate": 8.34870139849393e-06, + "log_odds_chosen": 10.944217681884766, + "log_odds_ratio": -2.410522210993804e-05, + "logits/chosen": -0.44311681389808655, + "logits/rejected": -0.48636582493782043, + "logps/chosen": -0.0001314889086643234, + "logps/rejected": -1.9731330871582031, + "loss": 0.2769, + "nll_loss": 0.06922472268342972, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.314889141212916e-05, + "rewards/margins": 0.19730015099048615, + "rewards/rejected": -0.1973133087158203, + "step": 12287 + }, + { + "epoch": 8.49792531120332, + "grad_norm": 5.674478530883789, + "learning_rate": 8.34485938220378e-06, + "log_odds_chosen": 10.447786331176758, + "log_odds_ratio": -6.981974001973867e-05, + "logits/chosen": -0.27241989970207214, + "logits/rejected": -0.2633300721645355, + "logps/chosen": -0.00027206639060750604, + "logps/rejected": -2.0152056217193604, + "loss": 0.5358, + "nll_loss": 0.13393688201904297, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.720664269872941e-05, + "rewards/margins": 0.20149333775043488, + "rewards/rejected": -0.20152056217193604, + "step": 12288 + }, + { + "epoch": 8.498616874135546, + "grad_norm": 3.292074203491211, + "learning_rate": 8.341017365913632e-06, + "log_odds_chosen": 10.503924369812012, + "log_odds_ratio": -3.558215394150466e-05, + "logits/chosen": 0.45011278986930847, + "logits/rejected": 0.3444060683250427, + "logps/chosen": -0.00011096706293756142, + "logps/rejected": -1.5259891748428345, + "loss": 0.314, + "nll_loss": 0.07848691940307617, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1096706657554023e-05, + "rewards/margins": 0.15258783102035522, + "rewards/rejected": -0.15259891748428345, + "step": 12289 + }, + { + "epoch": 8.499308437067773, + "grad_norm": 5.755599021911621, + "learning_rate": 8.337175349623483e-06, + "log_odds_chosen": 11.601394653320312, + "log_odds_ratio": -7.088996062520891e-05, + "logits/chosen": 0.17589890956878662, + "logits/rejected": 0.13562311232089996, + "logps/chosen": -0.00022538311895914376, + "logps/rejected": -2.678291082382202, + "loss": 0.3043, + "nll_loss": 0.07607081532478333, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2538310076924972e-05, + "rewards/margins": 0.2678065598011017, + "rewards/rejected": -0.26782912015914917, + "step": 12290 + }, + { + "epoch": 8.5, + "grad_norm": 3.1782124042510986, + "learning_rate": 8.333333333333334e-06, + "log_odds_chosen": 11.137289047241211, + "log_odds_ratio": -0.00018118553271051496, + "logits/chosen": -0.3635497987270355, + "logits/rejected": -0.47213447093963623, + "logps/chosen": -0.0005629704101011157, + "logps/rejected": -2.5133163928985596, + "loss": 0.3656, + "nll_loss": 0.09139396250247955, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.629704537568614e-05, + "rewards/margins": 0.25127536058425903, + "rewards/rejected": -0.251331627368927, + "step": 12291 + }, + { + "epoch": 8.500691562932227, + "grad_norm": 5.488527774810791, + "learning_rate": 8.329491317043184e-06, + "log_odds_chosen": 11.514001846313477, + "log_odds_ratio": -2.5029741664184257e-05, + "logits/chosen": -0.24020123481750488, + "logits/rejected": -0.2714656591415405, + "logps/chosen": -0.00017623655730858445, + "logps/rejected": -2.542510986328125, + "loss": 0.4081, + "nll_loss": 0.10201875120401382, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7623655367060564e-05, + "rewards/margins": 0.2542335093021393, + "rewards/rejected": -0.2542511224746704, + "step": 12292 + }, + { + "epoch": 8.501383125864454, + "grad_norm": 4.107009410858154, + "learning_rate": 8.325649300753035e-06, + "log_odds_chosen": 12.135652542114258, + "log_odds_ratio": -7.428465323755518e-05, + "logits/chosen": -0.044987812638282776, + "logits/rejected": -0.0901426374912262, + "logps/chosen": -0.00034369496279396117, + "logps/rejected": -2.7795896530151367, + "loss": 0.3878, + "nll_loss": 0.09694696962833405, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.436949918977916e-05, + "rewards/margins": 0.2779245972633362, + "rewards/rejected": -0.2779589593410492, + "step": 12293 + }, + { + "epoch": 8.50207468879668, + "grad_norm": 4.193845748901367, + "learning_rate": 8.321807284462886e-06, + "log_odds_chosen": 11.168009757995605, + "log_odds_ratio": -5.719634282286279e-05, + "logits/chosen": 0.2666996121406555, + "logits/rejected": 0.18223300576210022, + "logps/chosen": -0.00015161471674218774, + "logps/rejected": -2.2643349170684814, + "loss": 0.4812, + "nll_loss": 0.12029269337654114, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5161472219915595e-05, + "rewards/margins": 0.22641833126544952, + "rewards/rejected": -0.22643348574638367, + "step": 12294 + }, + { + "epoch": 8.502766251728907, + "grad_norm": 5.293142795562744, + "learning_rate": 8.317965268172738e-06, + "log_odds_chosen": 10.646807670593262, + "log_odds_ratio": -0.00019855228310916573, + "logits/chosen": -0.18091386556625366, + "logits/rejected": -0.22599759697914124, + "logps/chosen": -0.0007174762431532145, + "logps/rejected": -2.800795316696167, + "loss": 0.606, + "nll_loss": 0.15148796141147614, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.17476214049384e-05, + "rewards/margins": 0.2800077795982361, + "rewards/rejected": -0.28007954359054565, + "step": 12295 + }, + { + "epoch": 8.503457814661134, + "grad_norm": 4.917719841003418, + "learning_rate": 8.31412325188259e-06, + "log_odds_chosen": 11.533758163452148, + "log_odds_ratio": -0.0001370185927953571, + "logits/chosen": -0.42198553681373596, + "logits/rejected": -0.43564262986183167, + "logps/chosen": -0.000686085200868547, + "logps/rejected": -3.2868759632110596, + "loss": 0.5574, + "nll_loss": 0.1393275260925293, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.860852590762079e-05, + "rewards/margins": 0.32861900329589844, + "rewards/rejected": -0.3286876082420349, + "step": 12296 + }, + { + "epoch": 8.504149377593361, + "grad_norm": 2.980889081954956, + "learning_rate": 8.310281235592438e-06, + "log_odds_chosen": 11.35621452331543, + "log_odds_ratio": -4.0140759665519e-05, + "logits/chosen": -0.21690016984939575, + "logits/rejected": -0.2787854075431824, + "logps/chosen": -0.00018471668590791523, + "logps/rejected": -2.469820022583008, + "loss": 0.4044, + "nll_loss": 0.10108595341444016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8471670045983046e-05, + "rewards/margins": 0.2469635307788849, + "rewards/rejected": -0.24698200821876526, + "step": 12297 + }, + { + "epoch": 8.504840940525588, + "grad_norm": 3.8085482120513916, + "learning_rate": 8.30643921930229e-06, + "log_odds_chosen": 10.582088470458984, + "log_odds_ratio": -0.00012150261318311095, + "logits/chosen": -0.16991454362869263, + "logits/rejected": -0.16436061263084412, + "logps/chosen": -0.0006639507482759655, + "logps/rejected": -2.382948398590088, + "loss": 0.326, + "nll_loss": 0.08148948848247528, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.639507773797959e-05, + "rewards/margins": 0.238228440284729, + "rewards/rejected": -0.2382948398590088, + "step": 12298 + }, + { + "epoch": 8.505532503457815, + "grad_norm": 3.6274943351745605, + "learning_rate": 8.302597203012141e-06, + "log_odds_chosen": 11.005544662475586, + "log_odds_ratio": -0.00012421110295690596, + "logits/chosen": 0.21192803978919983, + "logits/rejected": 0.2692212462425232, + "logps/chosen": -0.0002638070727698505, + "logps/rejected": -2.1027445793151855, + "loss": 0.2937, + "nll_loss": 0.07340645045042038, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6380705094197765e-05, + "rewards/margins": 0.21024811267852783, + "rewards/rejected": -0.21027448773384094, + "step": 12299 + }, + { + "epoch": 8.506224066390041, + "grad_norm": 3.427682638168335, + "learning_rate": 8.298755186721992e-06, + "log_odds_chosen": 11.169326782226562, + "log_odds_ratio": -4.729488864541054e-05, + "logits/chosen": 0.011243600398302078, + "logits/rejected": -0.03214600309729576, + "logps/chosen": -0.0001485542015871033, + "logps/rejected": -2.084603786468506, + "loss": 0.4336, + "nll_loss": 0.10838790237903595, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.485541997681139e-05, + "rewards/margins": 0.20844553411006927, + "rewards/rejected": -0.20846039056777954, + "step": 12300 + }, + { + "epoch": 8.506915629322268, + "grad_norm": 4.2835493087768555, + "learning_rate": 8.294913170431843e-06, + "log_odds_chosen": 9.640701293945312, + "log_odds_ratio": -0.0003793557989411056, + "logits/chosen": -0.517144501209259, + "logits/rejected": -0.5247098207473755, + "logps/chosen": -0.00040619890205562115, + "logps/rejected": -1.758765697479248, + "loss": 0.3626, + "nll_loss": 0.09061383455991745, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.061988875037059e-05, + "rewards/margins": 0.17583593726158142, + "rewards/rejected": -0.17587657272815704, + "step": 12301 + }, + { + "epoch": 8.507607192254495, + "grad_norm": 4.868540287017822, + "learning_rate": 8.291071154141694e-06, + "log_odds_chosen": 10.661447525024414, + "log_odds_ratio": -6.143888458609581e-05, + "logits/chosen": -0.4865027368068695, + "logits/rejected": -0.5694153904914856, + "logps/chosen": -0.0001912845327751711, + "logps/rejected": -2.037360191345215, + "loss": 0.4801, + "nll_loss": 0.12001441419124603, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.912845254992135e-05, + "rewards/margins": 0.20371690392494202, + "rewards/rejected": -0.20373603701591492, + "step": 12302 + }, + { + "epoch": 8.508298755186722, + "grad_norm": 3.5946638584136963, + "learning_rate": 8.287229137851544e-06, + "log_odds_chosen": 11.281814575195312, + "log_odds_ratio": -4.3780812120530754e-05, + "logits/chosen": -0.24768759310245514, + "logits/rejected": -0.3600189685821533, + "logps/chosen": -0.00018627429381012917, + "logps/rejected": -1.897853970527649, + "loss": 0.3271, + "nll_loss": 0.08176817744970322, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8627430108608678e-05, + "rewards/margins": 0.1897667795419693, + "rewards/rejected": -0.1897854208946228, + "step": 12303 + }, + { + "epoch": 8.508990318118949, + "grad_norm": 3.5438852310180664, + "learning_rate": 8.283387121561397e-06, + "log_odds_chosen": 10.485623359680176, + "log_odds_ratio": -4.5006632717559114e-05, + "logits/chosen": -0.20240357518196106, + "logits/rejected": -0.17878305912017822, + "logps/chosen": -0.00014959985855966806, + "logps/rejected": -1.6814618110656738, + "loss": 0.3431, + "nll_loss": 0.08578226715326309, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4959986401663627e-05, + "rewards/margins": 0.16813123226165771, + "rewards/rejected": -0.16814617812633514, + "step": 12304 + }, + { + "epoch": 8.509681881051176, + "grad_norm": 4.398319244384766, + "learning_rate": 8.279545105271248e-06, + "log_odds_chosen": 12.11357593536377, + "log_odds_ratio": -2.4059154384303838e-05, + "logits/chosen": -0.024562709033489227, + "logits/rejected": -0.2101573646068573, + "logps/chosen": -0.00010782388562802225, + "logps/rejected": -3.0146188735961914, + "loss": 0.3632, + "nll_loss": 0.09080375730991364, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0782388926600106e-05, + "rewards/margins": 0.3014511168003082, + "rewards/rejected": -0.3014618754386902, + "step": 12305 + }, + { + "epoch": 8.510373443983402, + "grad_norm": 2.8083410263061523, + "learning_rate": 8.275703088981097e-06, + "log_odds_chosen": 11.353227615356445, + "log_odds_ratio": -2.210174898209516e-05, + "logits/chosen": -0.3386459946632385, + "logits/rejected": -0.2464192658662796, + "logps/chosen": -0.0001623683492653072, + "logps/rejected": -1.993700623512268, + "loss": 0.242, + "nll_loss": 0.06050754711031914, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6236834198934957e-05, + "rewards/margins": 0.19935382902622223, + "rewards/rejected": -0.19937007129192352, + "step": 12306 + }, + { + "epoch": 8.51106500691563, + "grad_norm": 4.819612503051758, + "learning_rate": 8.271861072690949e-06, + "log_odds_chosen": 10.617549896240234, + "log_odds_ratio": -0.00035102281253784895, + "logits/chosen": -0.1268101930618286, + "logits/rejected": -0.16175130009651184, + "logps/chosen": -0.0003959328751079738, + "logps/rejected": -2.089851140975952, + "loss": 0.6655, + "nll_loss": 0.1663326919078827, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.959328751079738e-05, + "rewards/margins": 0.20894552767276764, + "rewards/rejected": -0.2089851051568985, + "step": 12307 + }, + { + "epoch": 8.511756569847856, + "grad_norm": 6.782039165496826, + "learning_rate": 8.2680190564008e-06, + "log_odds_chosen": 11.941649436950684, + "log_odds_ratio": -7.390565588138998e-05, + "logits/chosen": -0.27633628249168396, + "logits/rejected": -0.2620106041431427, + "logps/chosen": -7.525588443968445e-05, + "logps/rejected": -2.5158262252807617, + "loss": 0.5836, + "nll_loss": 0.14588358998298645, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.525588443968445e-06, + "rewards/margins": 0.2515750825405121, + "rewards/rejected": -0.25158262252807617, + "step": 12308 + }, + { + "epoch": 8.512448132780083, + "grad_norm": 4.102380752563477, + "learning_rate": 8.26417704011065e-06, + "log_odds_chosen": 11.285173416137695, + "log_odds_ratio": -2.9311428079381585e-05, + "logits/chosen": -0.19089141488075256, + "logits/rejected": -0.20214393734931946, + "logps/chosen": -0.0001509404683019966, + "logps/rejected": -2.247863531112671, + "loss": 0.7208, + "nll_loss": 0.18019136786460876, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5094046830199659e-05, + "rewards/margins": 0.22477126121520996, + "rewards/rejected": -0.22478634119033813, + "step": 12309 + }, + { + "epoch": 8.51313969571231, + "grad_norm": 3.3171002864837646, + "learning_rate": 8.260335023820501e-06, + "log_odds_chosen": 9.499252319335938, + "log_odds_ratio": -0.0002143854071618989, + "logits/chosen": -0.07529734075069427, + "logits/rejected": -0.044407326728105545, + "logps/chosen": -0.0005177122075110674, + "logps/rejected": -1.8591790199279785, + "loss": 0.2827, + "nll_loss": 0.07066380232572556, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.17712214787025e-05, + "rewards/margins": 0.185866117477417, + "rewards/rejected": -0.1859178990125656, + "step": 12310 + }, + { + "epoch": 8.513831258644537, + "grad_norm": 3.6642651557922363, + "learning_rate": 8.256493007530352e-06, + "log_odds_chosen": 10.917474746704102, + "log_odds_ratio": -5.3249630582286045e-05, + "logits/chosen": -0.39446714520454407, + "logits/rejected": -0.44384223222732544, + "logps/chosen": -0.0001516881602583453, + "logps/rejected": -1.9198907613754272, + "loss": 0.3015, + "nll_loss": 0.07537909597158432, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5168816389632411e-05, + "rewards/margins": 0.19197390973567963, + "rewards/rejected": -0.19198909401893616, + "step": 12311 + }, + { + "epoch": 8.514522821576763, + "grad_norm": 3.984049081802368, + "learning_rate": 8.252650991240203e-06, + "log_odds_chosen": 11.000242233276367, + "log_odds_ratio": -5.1191036618547514e-05, + "logits/chosen": -0.6422182321548462, + "logits/rejected": -0.51802659034729, + "logps/chosen": -0.0002157797134714201, + "logps/rejected": -2.0283071994781494, + "loss": 0.4572, + "nll_loss": 0.11429233103990555, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.157797098334413e-05, + "rewards/margins": 0.20280912518501282, + "rewards/rejected": -0.2028307020664215, + "step": 12312 + }, + { + "epoch": 8.51521438450899, + "grad_norm": 8.821842193603516, + "learning_rate": 8.248808974950054e-06, + "log_odds_chosen": 10.934852600097656, + "log_odds_ratio": -7.927478145575151e-05, + "logits/chosen": -0.536807656288147, + "logits/rejected": -0.5345730185508728, + "logps/chosen": -0.00025932028074748814, + "logps/rejected": -2.190347194671631, + "loss": 0.252, + "nll_loss": 0.06299175322055817, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5932029529940337e-05, + "rewards/margins": 0.21900878846645355, + "rewards/rejected": -0.21903470158576965, + "step": 12313 + }, + { + "epoch": 8.515905947441217, + "grad_norm": 2.862980365753174, + "learning_rate": 8.244966958659906e-06, + "log_odds_chosen": 11.223052024841309, + "log_odds_ratio": -1.99354635697091e-05, + "logits/chosen": -0.40606260299682617, + "logits/rejected": -0.47642767429351807, + "logps/chosen": -9.023462189361453e-05, + "logps/rejected": -1.9665813446044922, + "loss": 0.3914, + "nll_loss": 0.09783907979726791, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.023462553159334e-06, + "rewards/margins": 0.19664910435676575, + "rewards/rejected": -0.19665813446044922, + "step": 12314 + }, + { + "epoch": 8.516597510373444, + "grad_norm": 6.2338738441467285, + "learning_rate": 8.241124942369755e-06, + "log_odds_chosen": 11.054450988769531, + "log_odds_ratio": -3.191823998349719e-05, + "logits/chosen": -0.6752179265022278, + "logits/rejected": -0.6671661734580994, + "logps/chosen": -0.0002096227981382981, + "logps/rejected": -1.9763360023498535, + "loss": 0.5009, + "nll_loss": 0.12522666156291962, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.096227945003193e-05, + "rewards/margins": 0.19761261343955994, + "rewards/rejected": -0.19763359427452087, + "step": 12315 + }, + { + "epoch": 8.51728907330567, + "grad_norm": 3.3995327949523926, + "learning_rate": 8.237282926079606e-06, + "log_odds_chosen": 10.937870025634766, + "log_odds_ratio": -0.00079381960676983, + "logits/chosen": -0.6280683279037476, + "logits/rejected": -0.671973705291748, + "logps/chosen": -0.0005241141188889742, + "logps/rejected": -2.157975435256958, + "loss": 0.2516, + "nll_loss": 0.06282602250576019, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2411414799280465e-05, + "rewards/margins": 0.21574516594409943, + "rewards/rejected": -0.2157975733280182, + "step": 12316 + }, + { + "epoch": 8.517980636237898, + "grad_norm": 3.2070159912109375, + "learning_rate": 8.233440909789458e-06, + "log_odds_chosen": 11.502121925354004, + "log_odds_ratio": -2.8572507289936766e-05, + "logits/chosen": -0.16348521411418915, + "logits/rejected": -0.14277753233909607, + "logps/chosen": -0.00018423848086968064, + "logps/rejected": -2.4327659606933594, + "loss": 0.3631, + "nll_loss": 0.09076198935508728, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8423846995574422e-05, + "rewards/margins": 0.24325817823410034, + "rewards/rejected": -0.24327659606933594, + "step": 12317 + }, + { + "epoch": 8.518672199170124, + "grad_norm": 5.480223178863525, + "learning_rate": 8.229598893499309e-06, + "log_odds_chosen": 10.304766654968262, + "log_odds_ratio": -0.00010444460349390283, + "logits/chosen": -0.5530600547790527, + "logits/rejected": -0.5032128691673279, + "logps/chosen": -0.00040447936044074595, + "logps/rejected": -2.0766894817352295, + "loss": 0.3791, + "nll_loss": 0.09476219862699509, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0447936044074595e-05, + "rewards/margins": 0.20762848854064941, + "rewards/rejected": -0.2076689451932907, + "step": 12318 + }, + { + "epoch": 8.519363762102351, + "grad_norm": 4.5215559005737305, + "learning_rate": 8.22575687720916e-06, + "log_odds_chosen": 10.32512092590332, + "log_odds_ratio": -0.00013688394392374903, + "logits/chosen": -0.22447320818901062, + "logits/rejected": -0.2803284227848053, + "logps/chosen": -0.00019440040341578424, + "logps/rejected": -1.9294097423553467, + "loss": 0.5061, + "nll_loss": 0.1265106052160263, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9440039977780543e-05, + "rewards/margins": 0.19292153418064117, + "rewards/rejected": -0.19294098019599915, + "step": 12319 + }, + { + "epoch": 8.520055325034578, + "grad_norm": 5.495118618011475, + "learning_rate": 8.22191486091901e-06, + "log_odds_chosen": 10.946943283081055, + "log_odds_ratio": -0.00013044103980064392, + "logits/chosen": -0.2992880046367645, + "logits/rejected": -0.4046541154384613, + "logps/chosen": -0.00020025305275339633, + "logps/rejected": -2.3770759105682373, + "loss": 0.4413, + "nll_loss": 0.11030249297618866, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0025305275339633e-05, + "rewards/margins": 0.2376875877380371, + "rewards/rejected": -0.23770761489868164, + "step": 12320 + }, + { + "epoch": 8.520746887966805, + "grad_norm": 3.188117742538452, + "learning_rate": 8.218072844628861e-06, + "log_odds_chosen": 9.897478103637695, + "log_odds_ratio": -0.00010892859427258372, + "logits/chosen": -0.3278999328613281, + "logits/rejected": -0.38061243295669556, + "logps/chosen": -0.0004890036070719361, + "logps/rejected": -1.8024688959121704, + "loss": 0.3539, + "nll_loss": 0.08847436308860779, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.890035779681057e-05, + "rewards/margins": 0.18019799888134003, + "rewards/rejected": -0.18024688959121704, + "step": 12321 + }, + { + "epoch": 8.521438450899032, + "grad_norm": 3.4911656379699707, + "learning_rate": 8.214230828338712e-06, + "log_odds_chosen": 11.001504898071289, + "log_odds_ratio": -5.02866787428502e-05, + "logits/chosen": -0.5928372144699097, + "logits/rejected": -0.5494281053543091, + "logps/chosen": -0.00030572060495615005, + "logps/rejected": -2.429985284805298, + "loss": 0.3272, + "nll_loss": 0.08180222660303116, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0572060495615005e-05, + "rewards/margins": 0.24296796321868896, + "rewards/rejected": -0.24299854040145874, + "step": 12322 + }, + { + "epoch": 8.522130013831259, + "grad_norm": 3.057826280593872, + "learning_rate": 8.210388812048564e-06, + "log_odds_chosen": 11.435529708862305, + "log_odds_ratio": -3.5779325116891414e-05, + "logits/chosen": -0.6360045671463013, + "logits/rejected": -0.7269700765609741, + "logps/chosen": -0.00019725115271285176, + "logps/rejected": -2.423240900039673, + "loss": 0.3774, + "nll_loss": 0.09433731436729431, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.972511745407246e-05, + "rewards/margins": 0.24230436980724335, + "rewards/rejected": -0.2423241138458252, + "step": 12323 + }, + { + "epoch": 8.522821576763485, + "grad_norm": 4.978005886077881, + "learning_rate": 8.206546795758415e-06, + "log_odds_chosen": 10.862587928771973, + "log_odds_ratio": -0.00013348314678296447, + "logits/chosen": -0.23941171169281006, + "logits/rejected": -0.3022950291633606, + "logps/chosen": -0.00032461649971082807, + "logps/rejected": -2.654205560684204, + "loss": 0.3983, + "nll_loss": 0.0995587706565857, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2461648515891284e-05, + "rewards/margins": 0.2653881013393402, + "rewards/rejected": -0.2654205560684204, + "step": 12324 + }, + { + "epoch": 8.523513139695712, + "grad_norm": 3.5846354961395264, + "learning_rate": 8.202704779468264e-06, + "log_odds_chosen": 11.656807899475098, + "log_odds_ratio": -5.0690625357674435e-05, + "logits/chosen": -0.4642685651779175, + "logits/rejected": -0.5309992432594299, + "logps/chosen": -0.00020530421170406044, + "logps/rejected": -3.100234270095825, + "loss": 0.3337, + "nll_loss": 0.08341825753450394, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.053042408078909e-05, + "rewards/margins": 0.3100028932094574, + "rewards/rejected": -0.3100234270095825, + "step": 12325 + }, + { + "epoch": 8.524204702627939, + "grad_norm": 4.919602394104004, + "learning_rate": 8.198862763178117e-06, + "log_odds_chosen": 12.687283515930176, + "log_odds_ratio": -2.108301669068169e-05, + "logits/chosen": -0.6012700796127319, + "logits/rejected": -0.7129382491111755, + "logps/chosen": -0.00016982763190753758, + "logps/rejected": -3.3141398429870605, + "loss": 0.3157, + "nll_loss": 0.07891141623258591, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6982761735562235e-05, + "rewards/margins": 0.33139699697494507, + "rewards/rejected": -0.33141398429870605, + "step": 12326 + }, + { + "epoch": 8.524896265560166, + "grad_norm": 11.470624923706055, + "learning_rate": 8.195020746887967e-06, + "log_odds_chosen": 11.43114948272705, + "log_odds_ratio": -2.383892933721654e-05, + "logits/chosen": -0.47737187147140503, + "logits/rejected": -0.5778495073318481, + "logps/chosen": -0.00020170229254290462, + "logps/rejected": -2.5206422805786133, + "loss": 0.4786, + "nll_loss": 0.11964017152786255, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0170229618088342e-05, + "rewards/margins": 0.25204405188560486, + "rewards/rejected": -0.25206422805786133, + "step": 12327 + }, + { + "epoch": 8.525587828492393, + "grad_norm": 3.273254871368408, + "learning_rate": 8.191178730597818e-06, + "log_odds_chosen": 11.17822265625, + "log_odds_ratio": -2.2545445972355083e-05, + "logits/chosen": -0.7379517555236816, + "logits/rejected": -0.8989492654800415, + "logps/chosen": -0.0003359833499416709, + "logps/rejected": -2.4224159717559814, + "loss": 0.3547, + "nll_loss": 0.08866336941719055, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3598338632145897e-05, + "rewards/margins": 0.24220798909664154, + "rewards/rejected": -0.24224157631397247, + "step": 12328 + }, + { + "epoch": 8.52627939142462, + "grad_norm": 3.759458065032959, + "learning_rate": 8.187336714307669e-06, + "log_odds_chosen": 11.058753967285156, + "log_odds_ratio": -4.0378348785452545e-05, + "logits/chosen": -0.06687570363283157, + "logits/rejected": -0.15267179906368256, + "logps/chosen": -0.00019187794532626867, + "logps/rejected": -2.1204071044921875, + "loss": 0.4475, + "nll_loss": 0.11186422407627106, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.918779526022263e-05, + "rewards/margins": 0.21202154457569122, + "rewards/rejected": -0.2120407074689865, + "step": 12329 + }, + { + "epoch": 8.526970954356846, + "grad_norm": 4.646742343902588, + "learning_rate": 8.18349469801752e-06, + "log_odds_chosen": 10.960451126098633, + "log_odds_ratio": -5.372767918743193e-05, + "logits/chosen": -0.49918264150619507, + "logits/rejected": -0.4375155568122864, + "logps/chosen": -0.00021653309522662312, + "logps/rejected": -2.144824981689453, + "loss": 0.7104, + "nll_loss": 0.1775887906551361, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1653311705449596e-05, + "rewards/margins": 0.2144608497619629, + "rewards/rejected": -0.21448248624801636, + "step": 12330 + }, + { + "epoch": 8.527662517289073, + "grad_norm": 5.321842670440674, + "learning_rate": 8.17965268172737e-06, + "log_odds_chosen": 10.420870780944824, + "log_odds_ratio": -0.0002663970517460257, + "logits/chosen": -0.40803062915802, + "logits/rejected": -0.29475855827331543, + "logps/chosen": -0.0004102752427570522, + "logps/rejected": -1.6895699501037598, + "loss": 0.673, + "nll_loss": 0.16821430623531342, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.102752427570522e-05, + "rewards/margins": 0.16891595721244812, + "rewards/rejected": -0.16895699501037598, + "step": 12331 + }, + { + "epoch": 8.5283540802213, + "grad_norm": 12.990723609924316, + "learning_rate": 8.175810665437223e-06, + "log_odds_chosen": 11.02590560913086, + "log_odds_ratio": -5.349262210074812e-05, + "logits/chosen": -0.6384226083755493, + "logits/rejected": -0.6647124290466309, + "logps/chosen": -0.000241591376834549, + "logps/rejected": -2.1901700496673584, + "loss": 0.4627, + "nll_loss": 0.11568032205104828, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.415913695585914e-05, + "rewards/margins": 0.21899282932281494, + "rewards/rejected": -0.21901699900627136, + "step": 12332 + }, + { + "epoch": 8.529045643153527, + "grad_norm": 3.829329490661621, + "learning_rate": 8.171968649147074e-06, + "log_odds_chosen": 11.376579284667969, + "log_odds_ratio": -2.9566979719675146e-05, + "logits/chosen": -0.5997239351272583, + "logits/rejected": -0.5821545124053955, + "logps/chosen": -7.834136340534315e-05, + "logps/rejected": -2.0063858032226562, + "loss": 0.4316, + "nll_loss": 0.107905812561512, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.834136340534315e-06, + "rewards/margins": 0.2006307691335678, + "rewards/rejected": -0.20063860714435577, + "step": 12333 + }, + { + "epoch": 8.529737206085754, + "grad_norm": 3.409250497817993, + "learning_rate": 8.168126632856923e-06, + "log_odds_chosen": 11.758556365966797, + "log_odds_ratio": -4.655357770388946e-05, + "logits/chosen": -0.16191110014915466, + "logits/rejected": -0.19495658576488495, + "logps/chosen": -0.00020725368813145906, + "logps/rejected": -2.755053997039795, + "loss": 0.623, + "nll_loss": 0.15575700998306274, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0725368813145906e-05, + "rewards/margins": 0.2754846513271332, + "rewards/rejected": -0.275505393743515, + "step": 12334 + }, + { + "epoch": 8.53042876901798, + "grad_norm": 2.668457269668579, + "learning_rate": 8.164284616566775e-06, + "log_odds_chosen": 10.386791229248047, + "log_odds_ratio": -0.00017962889978662133, + "logits/chosen": -0.37340259552001953, + "logits/rejected": -0.33036869764328003, + "logps/chosen": -0.0003662610542960465, + "logps/rejected": -2.257016658782959, + "loss": 0.2609, + "nll_loss": 0.06521536409854889, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6626108339987695e-05, + "rewards/margins": 0.22566506266593933, + "rewards/rejected": -0.22570167481899261, + "step": 12335 + }, + { + "epoch": 8.531120331950207, + "grad_norm": 3.2178878784179688, + "learning_rate": 8.160442600276626e-06, + "log_odds_chosen": 10.688413619995117, + "log_odds_ratio": -8.053887722780928e-05, + "logits/chosen": -0.1574697643518448, + "logits/rejected": -0.30061471462249756, + "logps/chosen": -0.00027518076240085065, + "logps/rejected": -1.84926438331604, + "loss": 0.4135, + "nll_loss": 0.10337050259113312, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7518077331478707e-05, + "rewards/margins": 0.18489891290664673, + "rewards/rejected": -0.18492642045021057, + "step": 12336 + }, + { + "epoch": 8.531811894882434, + "grad_norm": 2.9085309505462646, + "learning_rate": 8.156600583986477e-06, + "log_odds_chosen": 10.503604888916016, + "log_odds_ratio": -0.00011857294884976, + "logits/chosen": -0.28567206859588623, + "logits/rejected": -0.28339099884033203, + "logps/chosen": -0.00027978868456557393, + "logps/rejected": -1.8618590831756592, + "loss": 0.6276, + "nll_loss": 0.15689247846603394, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7978869184153154e-05, + "rewards/margins": 0.1861579269170761, + "rewards/rejected": -0.18618589639663696, + "step": 12337 + }, + { + "epoch": 8.532503457814661, + "grad_norm": 4.562682628631592, + "learning_rate": 8.152758567696327e-06, + "log_odds_chosen": 11.258023262023926, + "log_odds_ratio": -2.551450779719744e-05, + "logits/chosen": -0.6869363784790039, + "logits/rejected": -0.7892690896987915, + "logps/chosen": -0.0001631429768167436, + "logps/rejected": -2.302780866622925, + "loss": 0.4823, + "nll_loss": 0.12058058381080627, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.631429768167436e-05, + "rewards/margins": 0.23026177287101746, + "rewards/rejected": -0.23027808964252472, + "step": 12338 + }, + { + "epoch": 8.533195020746888, + "grad_norm": 3.6148369312286377, + "learning_rate": 8.148916551406178e-06, + "log_odds_chosen": 11.666459083557129, + "log_odds_ratio": -3.3464657462900504e-05, + "logits/chosen": 0.041404321789741516, + "logits/rejected": 0.026137858629226685, + "logps/chosen": -0.0001358877052552998, + "logps/rejected": -2.480074882507324, + "loss": 0.3857, + "nll_loss": 0.0964311957359314, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3588771253125742e-05, + "rewards/margins": 0.24799390137195587, + "rewards/rejected": -0.24800749123096466, + "step": 12339 + }, + { + "epoch": 8.533886583679115, + "grad_norm": 3.3950388431549072, + "learning_rate": 8.145074535116029e-06, + "log_odds_chosen": 10.787850379943848, + "log_odds_ratio": -4.8465499276062474e-05, + "logits/chosen": -0.3860059082508087, + "logits/rejected": -0.459553599357605, + "logps/chosen": -0.0003434290993027389, + "logps/rejected": -2.387206792831421, + "loss": 0.2738, + "nll_loss": 0.06845402717590332, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.434290920267813e-05, + "rewards/margins": 0.23868635296821594, + "rewards/rejected": -0.23872068524360657, + "step": 12340 + }, + { + "epoch": 8.534578146611342, + "grad_norm": 3.6412434577941895, + "learning_rate": 8.141232518825881e-06, + "log_odds_chosen": 9.9017333984375, + "log_odds_ratio": -0.00012677724589593709, + "logits/chosen": -0.21874283254146576, + "logits/rejected": -0.2888812720775604, + "logps/chosen": -0.0003149479744024575, + "logps/rejected": -1.8529309034347534, + "loss": 0.343, + "nll_loss": 0.08573202043771744, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.149479744024575e-05, + "rewards/margins": 0.18526160717010498, + "rewards/rejected": -0.18529310822486877, + "step": 12341 + }, + { + "epoch": 8.535269709543568, + "grad_norm": 3.061228036880493, + "learning_rate": 8.137390502535732e-06, + "log_odds_chosen": 11.397375106811523, + "log_odds_ratio": -1.8148359231418e-05, + "logits/chosen": -0.2281343787908554, + "logits/rejected": -0.18515226244926453, + "logps/chosen": -8.230004459619522e-05, + "logps/rejected": -1.8191204071044922, + "loss": 0.3075, + "nll_loss": 0.07687129825353622, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.230003913922701e-06, + "rewards/margins": 0.18190382421016693, + "rewards/rejected": -0.18191204965114594, + "step": 12342 + }, + { + "epoch": 8.535961272475795, + "grad_norm": 5.672790050506592, + "learning_rate": 8.133548486245581e-06, + "log_odds_chosen": 12.381575584411621, + "log_odds_ratio": -1.670279016252607e-05, + "logits/chosen": -0.1853232979774475, + "logits/rejected": -0.25712287425994873, + "logps/chosen": -0.00012582400813698769, + "logps/rejected": -3.195413827896118, + "loss": 0.6336, + "nll_loss": 0.15838681161403656, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2582400813698769e-05, + "rewards/margins": 0.3195287883281708, + "rewards/rejected": -0.31954139471054077, + "step": 12343 + }, + { + "epoch": 8.536652835408022, + "grad_norm": 4.7994160652160645, + "learning_rate": 8.129706469955432e-06, + "log_odds_chosen": 11.000398635864258, + "log_odds_ratio": -9.112850239034742e-05, + "logits/chosen": 0.0034987851977348328, + "logits/rejected": -0.10294229537248611, + "logps/chosen": -0.0002761443320196122, + "logps/rejected": -2.4395899772644043, + "loss": 0.4132, + "nll_loss": 0.10329889506101608, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7614434657152742e-05, + "rewards/margins": 0.243931382894516, + "rewards/rejected": -0.2439589947462082, + "step": 12344 + }, + { + "epoch": 8.537344398340249, + "grad_norm": 3.5451295375823975, + "learning_rate": 8.125864453665284e-06, + "log_odds_chosen": 11.421430587768555, + "log_odds_ratio": -2.3883238100097515e-05, + "logits/chosen": -0.3302974998950958, + "logits/rejected": -0.44120854139328003, + "logps/chosen": -0.00019329342467244714, + "logps/rejected": -2.6955575942993164, + "loss": 0.4013, + "nll_loss": 0.10033205151557922, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9329343558638357e-05, + "rewards/margins": 0.26953643560409546, + "rewards/rejected": -0.2695557773113251, + "step": 12345 + }, + { + "epoch": 8.538035961272476, + "grad_norm": 4.600787162780762, + "learning_rate": 8.122022437375135e-06, + "log_odds_chosen": 11.199943542480469, + "log_odds_ratio": -4.997500218451023e-05, + "logits/chosen": -0.07723156362771988, + "logits/rejected": -0.1380307674407959, + "logps/chosen": -0.00027827589656226337, + "logps/rejected": -2.495893716812134, + "loss": 0.4573, + "nll_loss": 0.11431986093521118, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.782759111141786e-05, + "rewards/margins": 0.24956156313419342, + "rewards/rejected": -0.24958938360214233, + "step": 12346 + }, + { + "epoch": 8.538727524204702, + "grad_norm": 4.425629615783691, + "learning_rate": 8.118180421084986e-06, + "log_odds_chosen": 11.156241416931152, + "log_odds_ratio": -3.892029417329468e-05, + "logits/chosen": -0.3224028944969177, + "logits/rejected": -0.3680450916290283, + "logps/chosen": -0.0006028384086675942, + "logps/rejected": -2.400455951690674, + "loss": 0.6719, + "nll_loss": 0.16797758638858795, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0283848142717034e-05, + "rewards/margins": 0.23998533189296722, + "rewards/rejected": -0.24004562199115753, + "step": 12347 + }, + { + "epoch": 8.53941908713693, + "grad_norm": 7.704127311706543, + "learning_rate": 8.114338404794837e-06, + "log_odds_chosen": 11.423298835754395, + "log_odds_ratio": -0.0003145392402075231, + "logits/chosen": -0.03623528778553009, + "logits/rejected": -0.10272793471813202, + "logps/chosen": -0.00033639915636740625, + "logps/rejected": -3.0414867401123047, + "loss": 0.5372, + "nll_loss": 0.1342613697052002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.363991709193215e-05, + "rewards/margins": 0.3041149973869324, + "rewards/rejected": -0.30414867401123047, + "step": 12348 + }, + { + "epoch": 8.540110650069156, + "grad_norm": 6.523242473602295, + "learning_rate": 8.110496388504687e-06, + "log_odds_chosen": 11.448200225830078, + "log_odds_ratio": -0.00010262696741847321, + "logits/chosen": -0.054167747497558594, + "logits/rejected": -0.14434875547885895, + "logps/chosen": -0.00013683061115443707, + "logps/rejected": -2.6697921752929688, + "loss": 0.5282, + "nll_loss": 0.13205023109912872, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3683061297342647e-05, + "rewards/margins": 0.2669655382633209, + "rewards/rejected": -0.2669792175292969, + "step": 12349 + }, + { + "epoch": 8.540802213001383, + "grad_norm": 8.026880264282227, + "learning_rate": 8.106654372214538e-06, + "log_odds_chosen": 10.558284759521484, + "log_odds_ratio": -0.00018640939379110932, + "logits/chosen": -0.33718621730804443, + "logits/rejected": -0.38743701577186584, + "logps/chosen": -0.0006922598695382476, + "logps/rejected": -2.594275712966919, + "loss": 0.5186, + "nll_loss": 0.12963704764842987, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.922599277459085e-05, + "rewards/margins": 0.25935834646224976, + "rewards/rejected": -0.25942760705947876, + "step": 12350 + }, + { + "epoch": 8.54149377593361, + "grad_norm": 2.8525102138519287, + "learning_rate": 8.10281235592439e-06, + "log_odds_chosen": 11.669790267944336, + "log_odds_ratio": -7.162422116380185e-05, + "logits/chosen": 0.1007673367857933, + "logits/rejected": -0.023373395204544067, + "logps/chosen": -0.00022936250024940819, + "logps/rejected": -2.460688352584839, + "loss": 0.4003, + "nll_loss": 0.10007898509502411, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2936252207728103e-05, + "rewards/margins": 0.24604588747024536, + "rewards/rejected": -0.2460688352584839, + "step": 12351 + }, + { + "epoch": 8.542185338865837, + "grad_norm": 2.9962687492370605, + "learning_rate": 8.09897033963424e-06, + "log_odds_chosen": 12.462955474853516, + "log_odds_ratio": -8.526945748599246e-06, + "logits/chosen": -0.02684374898672104, + "logits/rejected": 0.05897844582796097, + "logps/chosen": -8.549378253519535e-05, + "logps/rejected": -2.8761844635009766, + "loss": 0.348, + "nll_loss": 0.0869932547211647, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.549377525923774e-06, + "rewards/margins": 0.28760990500450134, + "rewards/rejected": -0.2876184284687042, + "step": 12352 + }, + { + "epoch": 8.542876901798063, + "grad_norm": 3.766939878463745, + "learning_rate": 8.09512832334409e-06, + "log_odds_chosen": 11.607439041137695, + "log_odds_ratio": -2.3744345526210964e-05, + "logits/chosen": -0.41923362016677856, + "logits/rejected": -0.3867270052433014, + "logps/chosen": -0.00019827390497084707, + "logps/rejected": -2.717073440551758, + "loss": 0.4329, + "nll_loss": 0.10822376608848572, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9827390133286826e-05, + "rewards/margins": 0.27168750762939453, + "rewards/rejected": -0.27170735597610474, + "step": 12353 + }, + { + "epoch": 8.54356846473029, + "grad_norm": 3.8533971309661865, + "learning_rate": 8.091286307053943e-06, + "log_odds_chosen": 12.08335018157959, + "log_odds_ratio": -1.4224663573259022e-05, + "logits/chosen": -0.22680625319480896, + "logits/rejected": -0.25222718715667725, + "logps/chosen": -0.0003561171470209956, + "logps/rejected": -3.0477356910705566, + "loss": 0.433, + "nll_loss": 0.10824976861476898, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5611716157291085e-05, + "rewards/margins": 0.30473795533180237, + "rewards/rejected": -0.30477356910705566, + "step": 12354 + }, + { + "epoch": 8.544260027662517, + "grad_norm": 3.2876503467559814, + "learning_rate": 8.087444290763794e-06, + "log_odds_chosen": 10.656986236572266, + "log_odds_ratio": -0.0011162598384544253, + "logits/chosen": -0.17624646425247192, + "logits/rejected": -0.17000812292099, + "logps/chosen": -0.0006880313740111887, + "logps/rejected": -2.123157024383545, + "loss": 0.3177, + "nll_loss": 0.07931582629680634, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.880313594592735e-05, + "rewards/margins": 0.21224690973758698, + "rewards/rejected": -0.21231570839881897, + "step": 12355 + }, + { + "epoch": 8.544951590594744, + "grad_norm": 4.662578105926514, + "learning_rate": 8.083602274473644e-06, + "log_odds_chosen": 11.871183395385742, + "log_odds_ratio": -3.7982386857038364e-05, + "logits/chosen": -0.0023404359817504883, + "logits/rejected": -0.0741780549287796, + "logps/chosen": -7.023196667432785e-05, + "logps/rejected": -2.2987864017486572, + "loss": 0.4072, + "nll_loss": 0.10178625583648682, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.023197667876957e-06, + "rewards/margins": 0.22987163066864014, + "rewards/rejected": -0.22987863421440125, + "step": 12356 + }, + { + "epoch": 8.54564315352697, + "grad_norm": 4.404334545135498, + "learning_rate": 8.079760258183495e-06, + "log_odds_chosen": 10.278458595275879, + "log_odds_ratio": -0.00012774733477272093, + "logits/chosen": 0.058876536786556244, + "logits/rejected": 0.03073546290397644, + "logps/chosen": -0.000667063519358635, + "logps/rejected": -2.4232096672058105, + "loss": 0.2532, + "nll_loss": 0.06329509615898132, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.670635048067197e-05, + "rewards/margins": 0.24225425720214844, + "rewards/rejected": -0.2423209697008133, + "step": 12357 + }, + { + "epoch": 8.546334716459198, + "grad_norm": 3.8350791931152344, + "learning_rate": 8.075918241893346e-06, + "log_odds_chosen": 10.941924095153809, + "log_odds_ratio": -6.48950444883667e-05, + "logits/chosen": -0.28380313515663147, + "logits/rejected": -0.3791557848453522, + "logps/chosen": -0.00013279700942803174, + "logps/rejected": -2.2448134422302246, + "loss": 0.3658, + "nll_loss": 0.0914371907711029, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3279700397106353e-05, + "rewards/margins": 0.22446808218955994, + "rewards/rejected": -0.22448134422302246, + "step": 12358 + }, + { + "epoch": 8.547026279391424, + "grad_norm": 2.915459394454956, + "learning_rate": 8.072076225603197e-06, + "log_odds_chosen": 10.384876251220703, + "log_odds_ratio": -0.00010581470269244164, + "logits/chosen": 0.02290746383368969, + "logits/rejected": -0.03376729041337967, + "logps/chosen": -0.0005225928616710007, + "logps/rejected": -2.035266876220703, + "loss": 0.3995, + "nll_loss": 0.09986265748739243, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2259281801525503e-05, + "rewards/margins": 0.20347443222999573, + "rewards/rejected": -0.20352670550346375, + "step": 12359 + }, + { + "epoch": 8.547717842323651, + "grad_norm": 4.734531402587891, + "learning_rate": 8.068234209313049e-06, + "log_odds_chosen": 10.698156356811523, + "log_odds_ratio": -5.0863098294939846e-05, + "logits/chosen": -0.4898606836795807, + "logits/rejected": -0.5427123308181763, + "logps/chosen": -0.00018635543528944254, + "logps/rejected": -1.9825899600982666, + "loss": 0.4297, + "nll_loss": 0.10742726922035217, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8635542801348493e-05, + "rewards/margins": 0.19824035465717316, + "rewards/rejected": -0.19825901091098785, + "step": 12360 + }, + { + "epoch": 8.548409405255878, + "grad_norm": 4.701021671295166, + "learning_rate": 8.064392193022898e-06, + "log_odds_chosen": 10.699748992919922, + "log_odds_ratio": -4.7124533011810854e-05, + "logits/chosen": -0.43253517150878906, + "logits/rejected": -0.4224990904331207, + "logps/chosen": -0.00014709580864291638, + "logps/rejected": -1.9485095739364624, + "loss": 0.4198, + "nll_loss": 0.10495181381702423, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4709580682392698e-05, + "rewards/margins": 0.19483624398708344, + "rewards/rejected": -0.19485095143318176, + "step": 12361 + }, + { + "epoch": 8.549100968188105, + "grad_norm": 4.619813919067383, + "learning_rate": 8.060550176732749e-06, + "log_odds_chosen": 11.04226303100586, + "log_odds_ratio": -2.6943056582240388e-05, + "logits/chosen": -0.5407166481018066, + "logits/rejected": -0.5664966106414795, + "logps/chosen": -0.000211311416933313, + "logps/rejected": -2.360203504562378, + "loss": 0.4539, + "nll_loss": 0.11346258223056793, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.11311416933313e-05, + "rewards/margins": 0.2359992116689682, + "rewards/rejected": -0.23602034151554108, + "step": 12362 + }, + { + "epoch": 8.549792531120332, + "grad_norm": 4.517255783081055, + "learning_rate": 8.056708160442601e-06, + "log_odds_chosen": 10.63552474975586, + "log_odds_ratio": -7.231361814774573e-05, + "logits/chosen": -0.549342930316925, + "logits/rejected": -0.6850473880767822, + "logps/chosen": -0.0008130917558446527, + "logps/rejected": -2.4010677337646484, + "loss": 0.4628, + "nll_loss": 0.11569619923830032, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.130916830850765e-05, + "rewards/margins": 0.24002546072006226, + "rewards/rejected": -0.2401067614555359, + "step": 12363 + }, + { + "epoch": 8.550484094052559, + "grad_norm": 5.180215835571289, + "learning_rate": 8.052866144152452e-06, + "log_odds_chosen": 10.763911247253418, + "log_odds_ratio": -0.00020632933592423797, + "logits/chosen": -0.12438270449638367, + "logits/rejected": -0.02698398008942604, + "logps/chosen": -0.00023761746706441045, + "logps/rejected": -2.5075294971466064, + "loss": 0.4828, + "nll_loss": 0.1206858828663826, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3761747797834687e-05, + "rewards/margins": 0.25072920322418213, + "rewards/rejected": -0.2507529556751251, + "step": 12364 + }, + { + "epoch": 8.551175656984785, + "grad_norm": 3.4681193828582764, + "learning_rate": 8.049024127862303e-06, + "log_odds_chosen": 10.928182601928711, + "log_odds_ratio": -3.5743283660849556e-05, + "logits/chosen": -0.16697700321674347, + "logits/rejected": -0.2656247615814209, + "logps/chosen": -0.00037262385012581944, + "logps/rejected": -2.7088029384613037, + "loss": 0.3534, + "nll_loss": 0.08834241330623627, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.726238355739042e-05, + "rewards/margins": 0.2708430290222168, + "rewards/rejected": -0.2708802819252014, + "step": 12365 + }, + { + "epoch": 8.551867219917012, + "grad_norm": 3.508986234664917, + "learning_rate": 8.045182111572153e-06, + "log_odds_chosen": 11.227447509765625, + "log_odds_ratio": -2.6481051463633776e-05, + "logits/chosen": -0.0640401542186737, + "logits/rejected": -0.12471738457679749, + "logps/chosen": -0.00022163873654790223, + "logps/rejected": -2.4728031158447266, + "loss": 0.3749, + "nll_loss": 0.09372153133153915, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2163874746183865e-05, + "rewards/margins": 0.24725812673568726, + "rewards/rejected": -0.2472802996635437, + "step": 12366 + }, + { + "epoch": 8.552558782849239, + "grad_norm": 4.076083183288574, + "learning_rate": 8.041340095282004e-06, + "log_odds_chosen": 11.609827995300293, + "log_odds_ratio": -2.9761686164420098e-05, + "logits/chosen": -0.3336213231086731, + "logits/rejected": -0.4385530352592468, + "logps/chosen": -7.464332156814635e-05, + "logps/rejected": -1.9322714805603027, + "loss": 0.37, + "nll_loss": 0.09250232577323914, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.464332156814635e-06, + "rewards/margins": 0.19321969151496887, + "rewards/rejected": -0.1932271420955658, + "step": 12367 + }, + { + "epoch": 8.553250345781466, + "grad_norm": 4.22585391998291, + "learning_rate": 8.037498078991855e-06, + "log_odds_chosen": 11.770487785339355, + "log_odds_ratio": -1.6943175069172867e-05, + "logits/chosen": -0.1061343103647232, + "logits/rejected": -0.073185995221138, + "logps/chosen": -9.793087519938126e-05, + "logps/rejected": -2.4531030654907227, + "loss": 0.397, + "nll_loss": 0.09924229234457016, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.793087883736007e-06, + "rewards/margins": 0.2453005015850067, + "rewards/rejected": -0.24531030654907227, + "step": 12368 + }, + { + "epoch": 8.553941908713693, + "grad_norm": 3.291067123413086, + "learning_rate": 8.033656062701707e-06, + "log_odds_chosen": 10.750139236450195, + "log_odds_ratio": -7.862604252295569e-05, + "logits/chosen": -0.17375710606575012, + "logits/rejected": -0.277298241853714, + "logps/chosen": -0.0002167547499993816, + "logps/rejected": -2.1705241203308105, + "loss": 0.552, + "nll_loss": 0.13799560070037842, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1675476091331802e-05, + "rewards/margins": 0.21703073382377625, + "rewards/rejected": -0.2170524150133133, + "step": 12369 + }, + { + "epoch": 8.55463347164592, + "grad_norm": 3.480898857116699, + "learning_rate": 8.029814046411558e-06, + "log_odds_chosen": 10.99404525756836, + "log_odds_ratio": -4.4477874325821176e-05, + "logits/chosen": -0.33993351459503174, + "logits/rejected": -0.3758736848831177, + "logps/chosen": -0.00010217801172984764, + "logps/rejected": -1.725305438041687, + "loss": 0.3707, + "nll_loss": 0.09266746044158936, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0217800991085824e-05, + "rewards/margins": 0.17252033948898315, + "rewards/rejected": -0.17253056168556213, + "step": 12370 + }, + { + "epoch": 8.555325034578146, + "grad_norm": 4.511864185333252, + "learning_rate": 8.025972030121407e-06, + "log_odds_chosen": 11.547143936157227, + "log_odds_ratio": -2.7884903829544783e-05, + "logits/chosen": -0.5886281132698059, + "logits/rejected": -0.5377705097198486, + "logps/chosen": -0.0010218716925010085, + "logps/rejected": -3.1817193031311035, + "loss": 0.5387, + "nll_loss": 0.1346650868654251, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001021871721604839, + "rewards/margins": 0.3180697560310364, + "rewards/rejected": -0.3181719481945038, + "step": 12371 + }, + { + "epoch": 8.556016597510373, + "grad_norm": 7.103489398956299, + "learning_rate": 8.02213001383126e-06, + "log_odds_chosen": 11.430948257446289, + "log_odds_ratio": -3.2747379009379074e-05, + "logits/chosen": -0.3653043210506439, + "logits/rejected": -0.35591912269592285, + "logps/chosen": -6.450832006521523e-05, + "logps/rejected": -1.9181818962097168, + "loss": 0.3688, + "nll_loss": 0.09220267832279205, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.450833097915165e-06, + "rewards/margins": 0.19181175529956818, + "rewards/rejected": -0.1918182075023651, + "step": 12372 + }, + { + "epoch": 8.5567081604426, + "grad_norm": 2.6456801891326904, + "learning_rate": 8.01828799754111e-06, + "log_odds_chosen": 10.765140533447266, + "log_odds_ratio": -0.00013235537335276604, + "logits/chosen": -0.3317301571369171, + "logits/rejected": -0.4527689516544342, + "logps/chosen": -0.00024052447406575084, + "logps/rejected": -1.942337989807129, + "loss": 0.2734, + "nll_loss": 0.0683254674077034, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4052449589362368e-05, + "rewards/margins": 0.1942097395658493, + "rewards/rejected": -0.19423377513885498, + "step": 12373 + }, + { + "epoch": 8.557399723374827, + "grad_norm": 3.8534891605377197, + "learning_rate": 8.014445981250961e-06, + "log_odds_chosen": 11.574892044067383, + "log_odds_ratio": -6.876789120724425e-05, + "logits/chosen": 0.031613051891326904, + "logits/rejected": -0.07511621713638306, + "logps/chosen": -0.00015929131768643856, + "logps/rejected": -2.676453113555908, + "loss": 0.565, + "nll_loss": 0.14123935997486115, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5929130313452333e-05, + "rewards/margins": 0.26762938499450684, + "rewards/rejected": -0.26764532923698425, + "step": 12374 + }, + { + "epoch": 8.558091286307054, + "grad_norm": 3.723045587539673, + "learning_rate": 8.010603964960812e-06, + "log_odds_chosen": 11.224947929382324, + "log_odds_ratio": -0.0001394870487274602, + "logits/chosen": -0.09539203345775604, + "logits/rejected": -0.027994796633720398, + "logps/chosen": -0.00020092641352675855, + "logps/rejected": -2.439518690109253, + "loss": 0.4812, + "nll_loss": 0.12027867138385773, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0092640625080094e-05, + "rewards/margins": 0.24393177032470703, + "rewards/rejected": -0.24395185708999634, + "step": 12375 + }, + { + "epoch": 8.55878284923928, + "grad_norm": 3.302299737930298, + "learning_rate": 8.006761948670663e-06, + "log_odds_chosen": 11.917454719543457, + "log_odds_ratio": -1.1386916412448045e-05, + "logits/chosen": -0.380159467458725, + "logits/rejected": -0.4698712229728699, + "logps/chosen": -9.014501847559586e-05, + "logps/rejected": -2.5528717041015625, + "loss": 0.4428, + "nll_loss": 0.11068766564130783, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.014502211357467e-06, + "rewards/margins": 0.25527817010879517, + "rewards/rejected": -0.25528717041015625, + "step": 12376 + }, + { + "epoch": 8.559474412171507, + "grad_norm": 3.8763320446014404, + "learning_rate": 8.002919932380513e-06, + "log_odds_chosen": 10.3782320022583, + "log_odds_ratio": -9.551684343023226e-05, + "logits/chosen": -0.3191932737827301, + "logits/rejected": -0.4106774926185608, + "logps/chosen": -0.00038932557799853384, + "logps/rejected": -1.5302492380142212, + "loss": 0.3755, + "nll_loss": 0.09385703504085541, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8932557799853384e-05, + "rewards/margins": 0.15298599004745483, + "rewards/rejected": -0.15302491188049316, + "step": 12377 + }, + { + "epoch": 8.560165975103734, + "grad_norm": 5.742145538330078, + "learning_rate": 7.999077916090364e-06, + "log_odds_chosen": 11.48661994934082, + "log_odds_ratio": -1.9364917534403503e-05, + "logits/chosen": -0.6679335236549377, + "logits/rejected": -0.7200274467468262, + "logps/chosen": -0.00029462837846949697, + "logps/rejected": -2.473931312561035, + "loss": 0.4403, + "nll_loss": 0.11007969826459885, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9462837119353935e-05, + "rewards/margins": 0.24736365675926208, + "rewards/rejected": -0.24739313125610352, + "step": 12378 + }, + { + "epoch": 8.560857538035961, + "grad_norm": 3.9970247745513916, + "learning_rate": 7.995235899800217e-06, + "log_odds_chosen": 11.762286186218262, + "log_odds_ratio": -1.555400376673788e-05, + "logits/chosen": -0.34955230355262756, + "logits/rejected": -0.38188910484313965, + "logps/chosen": -0.00016597509966231883, + "logps/rejected": -2.812894344329834, + "loss": 0.4296, + "nll_loss": 0.10739898681640625, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.659750887483824e-05, + "rewards/margins": 0.281272828578949, + "rewards/rejected": -0.2812894284725189, + "step": 12379 + }, + { + "epoch": 8.561549100968188, + "grad_norm": 2.7898218631744385, + "learning_rate": 7.991393883510066e-06, + "log_odds_chosen": 10.549398422241211, + "log_odds_ratio": -0.00034211043384857476, + "logits/chosen": -0.32482385635375977, + "logits/rejected": -0.42106741666793823, + "logps/chosen": -0.0004134580958634615, + "logps/rejected": -1.7813361883163452, + "loss": 0.2325, + "nll_loss": 0.05810131877660751, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.134581104153767e-05, + "rewards/margins": 0.17809228599071503, + "rewards/rejected": -0.17813362181186676, + "step": 12380 + }, + { + "epoch": 8.562240663900415, + "grad_norm": 4.60001802444458, + "learning_rate": 7.987551867219916e-06, + "log_odds_chosen": 9.886676788330078, + "log_odds_ratio": -0.00040853844257071614, + "logits/chosen": -0.5294456481933594, + "logits/rejected": -0.574863612651825, + "logps/chosen": -0.0002222563634859398, + "logps/rejected": -1.5985755920410156, + "loss": 0.5867, + "nll_loss": 0.14664258062839508, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.222563671239186e-05, + "rewards/margins": 0.1598353236913681, + "rewards/rejected": -0.15985754132270813, + "step": 12381 + }, + { + "epoch": 8.562932226832642, + "grad_norm": 3.649996042251587, + "learning_rate": 7.983709850929769e-06, + "log_odds_chosen": 11.81763744354248, + "log_odds_ratio": -1.9539475033525378e-05, + "logits/chosen": -0.33109086751937866, + "logits/rejected": -0.2591181993484497, + "logps/chosen": -0.00014043958799447864, + "logps/rejected": -2.5570220947265625, + "loss": 0.3677, + "nll_loss": 0.0919288769364357, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4043957889953163e-05, + "rewards/margins": 0.2556881606578827, + "rewards/rejected": -0.2557021975517273, + "step": 12382 + }, + { + "epoch": 8.563623789764868, + "grad_norm": 3.148855447769165, + "learning_rate": 7.97986783463962e-06, + "log_odds_chosen": 11.481773376464844, + "log_odds_ratio": -3.30365655827336e-05, + "logits/chosen": -0.32694217562675476, + "logits/rejected": -0.2874327301979065, + "logps/chosen": -0.00010169432789552957, + "logps/rejected": -2.180878162384033, + "loss": 0.327, + "nll_loss": 0.08175890147686005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0169432243856136e-05, + "rewards/margins": 0.2180776447057724, + "rewards/rejected": -0.2180878221988678, + "step": 12383 + }, + { + "epoch": 8.564315352697095, + "grad_norm": 3.285156011581421, + "learning_rate": 7.97602581834947e-06, + "log_odds_chosen": 11.242053985595703, + "log_odds_ratio": -3.248647408327088e-05, + "logits/chosen": -0.43714556097984314, + "logits/rejected": -0.5591869354248047, + "logps/chosen": -0.000421356875449419, + "logps/rejected": -2.650657892227173, + "loss": 0.3332, + "nll_loss": 0.0832899734377861, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.213568536215462e-05, + "rewards/margins": 0.2650236487388611, + "rewards/rejected": -0.2650657892227173, + "step": 12384 + }, + { + "epoch": 8.565006915629322, + "grad_norm": 4.97734260559082, + "learning_rate": 7.972183802059321e-06, + "log_odds_chosen": 10.468923568725586, + "log_odds_ratio": -0.00040179112693294883, + "logits/chosen": -0.8248478174209595, + "logits/rejected": -0.7545952796936035, + "logps/chosen": -0.0006409147172234952, + "logps/rejected": -2.1354901790618896, + "loss": 0.4791, + "nll_loss": 0.11974728107452393, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.409147317754105e-05, + "rewards/margins": 0.21348492801189423, + "rewards/rejected": -0.21354901790618896, + "step": 12385 + }, + { + "epoch": 8.565698478561549, + "grad_norm": 3.5596377849578857, + "learning_rate": 7.968341785769172e-06, + "log_odds_chosen": 11.15113353729248, + "log_odds_ratio": -2.0991963538108394e-05, + "logits/chosen": -0.4224829375743866, + "logits/rejected": -0.3148638904094696, + "logps/chosen": -7.999094668775797e-05, + "logps/rejected": -1.7266626358032227, + "loss": 0.3593, + "nll_loss": 0.08982215076684952, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.999095032573678e-06, + "rewards/margins": 0.1726582646369934, + "rewards/rejected": -0.1726662665605545, + "step": 12386 + }, + { + "epoch": 8.566390041493776, + "grad_norm": 3.6879234313964844, + "learning_rate": 7.964499769479023e-06, + "log_odds_chosen": 9.973072052001953, + "log_odds_ratio": -0.000349164882209152, + "logits/chosen": -0.3515580892562866, + "logits/rejected": -0.3518614172935486, + "logps/chosen": -0.0003501469036564231, + "logps/rejected": -1.7841719388961792, + "loss": 0.5321, + "nll_loss": 0.13299128413200378, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.501469473121688e-05, + "rewards/margins": 0.17838218808174133, + "rewards/rejected": -0.1784171760082245, + "step": 12387 + }, + { + "epoch": 8.567081604426003, + "grad_norm": 5.057744026184082, + "learning_rate": 7.960657753188875e-06, + "log_odds_chosen": 10.082660675048828, + "log_odds_ratio": -0.11251819878816605, + "logits/chosen": 0.10367824137210846, + "logits/rejected": 0.042181506752967834, + "logps/chosen": -0.01484946720302105, + "logps/rejected": -1.9130041599273682, + "loss": 0.4188, + "nll_loss": 0.09344511479139328, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0014849468134343624, + "rewards/margins": 0.1898154616355896, + "rewards/rejected": -0.1913003921508789, + "step": 12388 + }, + { + "epoch": 8.56777316735823, + "grad_norm": 5.113132953643799, + "learning_rate": 7.956815736898724e-06, + "log_odds_chosen": 11.004137992858887, + "log_odds_ratio": -4.713025555247441e-05, + "logits/chosen": -0.3097335696220398, + "logits/rejected": -0.28606897592544556, + "logps/chosen": -0.0001457059697713703, + "logps/rejected": -2.219068765640259, + "loss": 0.5757, + "nll_loss": 0.14392338693141937, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.457059715903597e-05, + "rewards/margins": 0.2218923270702362, + "rewards/rejected": -0.2219068855047226, + "step": 12389 + }, + { + "epoch": 8.568464730290456, + "grad_norm": 3.081252098083496, + "learning_rate": 7.952973720608575e-06, + "log_odds_chosen": 9.218973159790039, + "log_odds_ratio": -0.0005203372566029429, + "logits/chosen": -0.31809869408607483, + "logits/rejected": -0.4148636758327484, + "logps/chosen": -0.00035823852522298694, + "logps/rejected": -1.401037335395813, + "loss": 0.4117, + "nll_loss": 0.10286275297403336, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.582385033951141e-05, + "rewards/margins": 0.1400679051876068, + "rewards/rejected": -0.14010372757911682, + "step": 12390 + }, + { + "epoch": 8.569156293222683, + "grad_norm": 3.4991044998168945, + "learning_rate": 7.949131704318427e-06, + "log_odds_chosen": 10.65394115447998, + "log_odds_ratio": -0.00010200730321230367, + "logits/chosen": -0.5947825312614441, + "logits/rejected": -0.6780396699905396, + "logps/chosen": -0.00046277031651698053, + "logps/rejected": -2.273961067199707, + "loss": 0.3214, + "nll_loss": 0.08033182471990585, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.627702946891077e-05, + "rewards/margins": 0.22734983265399933, + "rewards/rejected": -0.22739610075950623, + "step": 12391 + }, + { + "epoch": 8.56984785615491, + "grad_norm": 5.116530895233154, + "learning_rate": 7.945289688028278e-06, + "log_odds_chosen": 10.812143325805664, + "log_odds_ratio": -0.0005723558133468032, + "logits/chosen": -0.3249131739139557, + "logits/rejected": -0.38664811849594116, + "logps/chosen": -0.0001360624737571925, + "logps/rejected": -1.6817561388015747, + "loss": 0.3159, + "nll_loss": 0.07892321795225143, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3606247193820309e-05, + "rewards/margins": 0.168162003159523, + "rewards/rejected": -0.168175607919693, + "step": 12392 + }, + { + "epoch": 8.570539419087137, + "grad_norm": 3.7217743396759033, + "learning_rate": 7.941447671738129e-06, + "log_odds_chosen": 11.626294136047363, + "log_odds_ratio": -1.4052928236196749e-05, + "logits/chosen": -0.10477419197559357, + "logits/rejected": -0.15939825773239136, + "logps/chosen": -9.7850919701159e-05, + "logps/rejected": -2.1122097969055176, + "loss": 0.4629, + "nll_loss": 0.11572445929050446, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.78509160631802e-06, + "rewards/margins": 0.2112111747264862, + "rewards/rejected": -0.21122097969055176, + "step": 12393 + }, + { + "epoch": 8.571230982019364, + "grad_norm": 3.8822696208953857, + "learning_rate": 7.93760565544798e-06, + "log_odds_chosen": 11.469244003295898, + "log_odds_ratio": -3.676761480164714e-05, + "logits/chosen": -0.7538586258888245, + "logits/rejected": -0.7865060567855835, + "logps/chosen": -0.00016852424596436322, + "logps/rejected": -2.6194863319396973, + "loss": 0.3714, + "nll_loss": 0.09284963458776474, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.685242386884056e-05, + "rewards/margins": 0.26193177700042725, + "rewards/rejected": -0.2619486451148987, + "step": 12394 + }, + { + "epoch": 8.57192254495159, + "grad_norm": 4.259050369262695, + "learning_rate": 7.93376363915783e-06, + "log_odds_chosen": 10.146892547607422, + "log_odds_ratio": -0.0005833891336806118, + "logits/chosen": 0.22315584123134613, + "logits/rejected": -0.10273627936840057, + "logps/chosen": -0.0004596480284817517, + "logps/rejected": -2.2382736206054688, + "loss": 0.4593, + "nll_loss": 0.11477246880531311, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.596479993779212e-05, + "rewards/margins": 0.22378139197826385, + "rewards/rejected": -0.22382736206054688, + "step": 12395 + }, + { + "epoch": 8.572614107883817, + "grad_norm": 4.730365753173828, + "learning_rate": 7.929921622867681e-06, + "log_odds_chosen": 10.769886016845703, + "log_odds_ratio": -5.231639079283923e-05, + "logits/chosen": -0.2158849686384201, + "logits/rejected": -0.21605895459651947, + "logps/chosen": -0.0001321196323260665, + "logps/rejected": -1.861050009727478, + "loss": 0.4505, + "nll_loss": 0.11261654645204544, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3211963050707709e-05, + "rewards/margins": 0.18609178066253662, + "rewards/rejected": -0.18610499799251556, + "step": 12396 + }, + { + "epoch": 8.573305670816044, + "grad_norm": 3.3090145587921143, + "learning_rate": 7.926079606577533e-06, + "log_odds_chosen": 12.224320411682129, + "log_odds_ratio": -7.869349246902857e-06, + "logits/chosen": -0.3469293713569641, + "logits/rejected": -0.39128121733665466, + "logps/chosen": -0.00012052787496941164, + "logps/rejected": -3.163099765777588, + "loss": 0.5273, + "nll_loss": 0.13183526694774628, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2052787496941164e-05, + "rewards/margins": 0.3162979483604431, + "rewards/rejected": -0.31630995869636536, + "step": 12397 + }, + { + "epoch": 8.57399723374827, + "grad_norm": 3.909712553024292, + "learning_rate": 7.922237590287383e-06, + "log_odds_chosen": 10.657337188720703, + "log_odds_ratio": -7.064934470690787e-05, + "logits/chosen": -0.21469080448150635, + "logits/rejected": -0.3312116861343384, + "logps/chosen": -0.0002643395564518869, + "logps/rejected": -1.6290345191955566, + "loss": 0.4238, + "nll_loss": 0.10594627261161804, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.643395600898657e-05, + "rewards/margins": 0.16287702322006226, + "rewards/rejected": -0.16290345788002014, + "step": 12398 + }, + { + "epoch": 8.574688796680498, + "grad_norm": 4.151533603668213, + "learning_rate": 7.918395573997233e-06, + "log_odds_chosen": 11.33218765258789, + "log_odds_ratio": -2.8675931389443576e-05, + "logits/chosen": -0.005869865417480469, + "logits/rejected": -0.07197088748216629, + "logps/chosen": -0.00016606459394097328, + "logps/rejected": -2.044290781021118, + "loss": 0.3302, + "nll_loss": 0.082536980509758, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6606458302703686e-05, + "rewards/margins": 0.20441249012947083, + "rewards/rejected": -0.20442909002304077, + "step": 12399 + }, + { + "epoch": 8.575380359612724, + "grad_norm": 3.3870725631713867, + "learning_rate": 7.914553557707086e-06, + "log_odds_chosen": 11.454788208007812, + "log_odds_ratio": -1.4353579899761826e-05, + "logits/chosen": -0.4412459433078766, + "logits/rejected": -0.455695241689682, + "logps/chosen": -4.6074765123194084e-05, + "logps/rejected": -1.6434885263442993, + "loss": 0.4064, + "nll_loss": 0.10160781443119049, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.607476967066759e-06, + "rewards/margins": 0.16434425115585327, + "rewards/rejected": -0.16434885561466217, + "step": 12400 + }, + { + "epoch": 8.576071922544951, + "grad_norm": 7.673307418823242, + "learning_rate": 7.910711541416936e-06, + "log_odds_chosen": 9.517881393432617, + "log_odds_ratio": -0.00014684451161883771, + "logits/chosen": -0.3621896505355835, + "logits/rejected": -0.4492979049682617, + "logps/chosen": -0.0006431568181142211, + "logps/rejected": -1.7415471076965332, + "loss": 0.75, + "nll_loss": 0.18749095499515533, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.431568181142211e-05, + "rewards/margins": 0.17409038543701172, + "rewards/rejected": -0.17415471374988556, + "step": 12401 + }, + { + "epoch": 8.576763485477178, + "grad_norm": 2.8077375888824463, + "learning_rate": 7.906869525126787e-06, + "log_odds_chosen": 10.993677139282227, + "log_odds_ratio": -6.793371721869335e-05, + "logits/chosen": -0.23306405544281006, + "logits/rejected": -0.24650588631629944, + "logps/chosen": -7.878048199927434e-05, + "logps/rejected": -1.7447535991668701, + "loss": 0.2995, + "nll_loss": 0.07486958801746368, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.878048563725315e-06, + "rewards/margins": 0.17446748912334442, + "rewards/rejected": -0.17447537183761597, + "step": 12402 + }, + { + "epoch": 8.577455048409405, + "grad_norm": 3.4173429012298584, + "learning_rate": 7.903027508836638e-06, + "log_odds_chosen": 10.148531913757324, + "log_odds_ratio": -0.0002441834658384323, + "logits/chosen": -0.2620766758918762, + "logits/rejected": -0.38100454211235046, + "logps/chosen": -0.0008849852601997554, + "logps/rejected": -2.099001169204712, + "loss": 0.3004, + "nll_loss": 0.07506556063890457, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.849853475112468e-05, + "rewards/margins": 0.20981164276599884, + "rewards/rejected": -0.2099001407623291, + "step": 12403 + }, + { + "epoch": 8.578146611341632, + "grad_norm": 3.674800395965576, + "learning_rate": 7.899185492546489e-06, + "log_odds_chosen": 9.341479301452637, + "log_odds_ratio": -0.00031686053262092173, + "logits/chosen": 0.1067897230386734, + "logits/rejected": 0.0006720144301652908, + "logps/chosen": -0.00023755063011776656, + "logps/rejected": -1.1609251499176025, + "loss": 0.5396, + "nll_loss": 0.1348668336868286, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3755063011776656e-05, + "rewards/margins": 0.1160687655210495, + "rewards/rejected": -0.11609251797199249, + "step": 12404 + }, + { + "epoch": 8.578838174273859, + "grad_norm": 3.9436371326446533, + "learning_rate": 7.89534347625634e-06, + "log_odds_chosen": 11.42556381225586, + "log_odds_ratio": -0.00021511407976504415, + "logits/chosen": 0.05667072534561157, + "logits/rejected": -0.06897382438182831, + "logps/chosen": -0.0002811176818795502, + "logps/rejected": -2.468977928161621, + "loss": 0.4235, + "nll_loss": 0.10585974156856537, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8111768187955022e-05, + "rewards/margins": 0.24686969816684723, + "rewards/rejected": -0.24689781665802002, + "step": 12405 + }, + { + "epoch": 8.579529737206085, + "grad_norm": 5.153789043426514, + "learning_rate": 7.891501459966192e-06, + "log_odds_chosen": 11.876541137695312, + "log_odds_ratio": -0.00033157187863253057, + "logits/chosen": -0.26482778787612915, + "logits/rejected": -0.08884061872959137, + "logps/chosen": -0.0004525747208390385, + "logps/rejected": -3.064087390899658, + "loss": 0.5616, + "nll_loss": 0.14036419987678528, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.525747135630809e-05, + "rewards/margins": 0.3063634932041168, + "rewards/rejected": -0.30640873312950134, + "step": 12406 + }, + { + "epoch": 8.580221300138312, + "grad_norm": 4.294795513153076, + "learning_rate": 7.887659443676041e-06, + "log_odds_chosen": 9.854249954223633, + "log_odds_ratio": -0.0002540835994295776, + "logits/chosen": -0.417694628238678, + "logits/rejected": -0.4679708480834961, + "logps/chosen": -0.0001627085730433464, + "logps/rejected": -1.143477201461792, + "loss": 0.3042, + "nll_loss": 0.07602757960557938, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.627085657673888e-05, + "rewards/margins": 0.11433145403862, + "rewards/rejected": -0.11434773355722427, + "step": 12407 + }, + { + "epoch": 8.58091286307054, + "grad_norm": 6.635833740234375, + "learning_rate": 7.883817427385892e-06, + "log_odds_chosen": 12.390829086303711, + "log_odds_ratio": -5.070034239906818e-05, + "logits/chosen": -0.33184492588043213, + "logits/rejected": -0.35565727949142456, + "logps/chosen": -0.00014887124416418374, + "logps/rejected": -3.6356828212738037, + "loss": 0.5374, + "nll_loss": 0.1343454271554947, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4887124962115195e-05, + "rewards/margins": 0.36355340480804443, + "rewards/rejected": -0.3635683059692383, + "step": 12408 + }, + { + "epoch": 8.581604426002766, + "grad_norm": 4.665489673614502, + "learning_rate": 7.879975411095744e-06, + "log_odds_chosen": 10.724321365356445, + "log_odds_ratio": -7.045797974569723e-05, + "logits/chosen": -0.4190210700035095, + "logits/rejected": -0.4892667531967163, + "logps/chosen": -0.00021666633256245404, + "logps/rejected": -2.175072431564331, + "loss": 0.3865, + "nll_loss": 0.09662678092718124, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1666633983841166e-05, + "rewards/margins": 0.21748557686805725, + "rewards/rejected": -0.2175072431564331, + "step": 12409 + }, + { + "epoch": 8.582295988934993, + "grad_norm": 3.4573514461517334, + "learning_rate": 7.876133394805595e-06, + "log_odds_chosen": 11.061342239379883, + "log_odds_ratio": -1.9485076336422935e-05, + "logits/chosen": -0.38122037053108215, + "logits/rejected": -0.3857944905757904, + "logps/chosen": -0.00016124022658914328, + "logps/rejected": -2.0994670391082764, + "loss": 0.3621, + "nll_loss": 0.0905316025018692, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6124022295116447e-05, + "rewards/margins": 0.20993059873580933, + "rewards/rejected": -0.20994670689105988, + "step": 12410 + }, + { + "epoch": 8.58298755186722, + "grad_norm": 4.032299518585205, + "learning_rate": 7.872291378515446e-06, + "log_odds_chosen": 11.768798828125, + "log_odds_ratio": -1.0957606718875468e-05, + "logits/chosen": -0.13458450138568878, + "logits/rejected": -0.19058941304683685, + "logps/chosen": -9.453172970097512e-05, + "logps/rejected": -2.56960129737854, + "loss": 0.5784, + "nll_loss": 0.14460241794586182, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.453172424400691e-06, + "rewards/margins": 0.2569506764411926, + "rewards/rejected": -0.2569601535797119, + "step": 12411 + }, + { + "epoch": 8.583679114799446, + "grad_norm": 7.356479644775391, + "learning_rate": 7.868449362225295e-06, + "log_odds_chosen": 10.914276123046875, + "log_odds_ratio": -2.452870467095636e-05, + "logits/chosen": -0.6373881101608276, + "logits/rejected": -0.7422344088554382, + "logps/chosen": -0.00029046228155493736, + "logps/rejected": -2.442608594894409, + "loss": 0.5577, + "nll_loss": 0.13943205773830414, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9046228519291617e-05, + "rewards/margins": 0.24423182010650635, + "rewards/rejected": -0.24426086246967316, + "step": 12412 + }, + { + "epoch": 8.584370677731673, + "grad_norm": 2.993687391281128, + "learning_rate": 7.864607345935147e-06, + "log_odds_chosen": 10.779678344726562, + "log_odds_ratio": -0.00016207742737606168, + "logits/chosen": -0.6277985572814941, + "logits/rejected": -0.5838664770126343, + "logps/chosen": -0.000505428877659142, + "logps/rejected": -2.434213638305664, + "loss": 0.3336, + "nll_loss": 0.08338885009288788, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.054288340033963e-05, + "rewards/margins": 0.24337083101272583, + "rewards/rejected": -0.24342137575149536, + "step": 12413 + }, + { + "epoch": 8.5850622406639, + "grad_norm": 3.3739326000213623, + "learning_rate": 7.860765329644998e-06, + "log_odds_chosen": 11.435413360595703, + "log_odds_ratio": -6.362981366692111e-05, + "logits/chosen": -0.43177902698516846, + "logits/rejected": -0.5248620510101318, + "logps/chosen": -0.00018691572768148035, + "logps/rejected": -2.1464920043945312, + "loss": 0.5922, + "nll_loss": 0.14805501699447632, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8691573131945916e-05, + "rewards/margins": 0.21463051438331604, + "rewards/rejected": -0.2146492302417755, + "step": 12414 + }, + { + "epoch": 8.585753803596127, + "grad_norm": 4.994083881378174, + "learning_rate": 7.856923313354849e-06, + "log_odds_chosen": 10.883642196655273, + "log_odds_ratio": -0.00011137684487039223, + "logits/chosen": 0.15582235157489777, + "logits/rejected": 0.09947144240140915, + "logps/chosen": -0.00015272808377631009, + "logps/rejected": -2.2074105739593506, + "loss": 0.7234, + "nll_loss": 0.1808408498764038, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.527280983282253e-05, + "rewards/margins": 0.22072578966617584, + "rewards/rejected": -0.22074106335639954, + "step": 12415 + }, + { + "epoch": 8.586445366528354, + "grad_norm": 4.330446720123291, + "learning_rate": 7.853081297064701e-06, + "log_odds_chosen": 11.47640609741211, + "log_odds_ratio": -2.2898813767824322e-05, + "logits/chosen": -0.5861948728561401, + "logits/rejected": -0.5868821740150452, + "logps/chosen": -0.0001652640785323456, + "logps/rejected": -2.2207837104797363, + "loss": 0.6718, + "nll_loss": 0.16793853044509888, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6526406398043036e-05, + "rewards/margins": 0.2220618724822998, + "rewards/rejected": -0.22207841277122498, + "step": 12416 + }, + { + "epoch": 8.58713692946058, + "grad_norm": 4.56218957901001, + "learning_rate": 7.84923928077455e-06, + "log_odds_chosen": 9.333669662475586, + "log_odds_ratio": -0.00046048639342188835, + "logits/chosen": -0.4840073883533478, + "logits/rejected": -0.40228593349456787, + "logps/chosen": -0.00044993084156885743, + "logps/rejected": -1.4231839179992676, + "loss": 0.3605, + "nll_loss": 0.09007444977760315, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4993084884481505e-05, + "rewards/margins": 0.14227339625358582, + "rewards/rejected": -0.14231839776039124, + "step": 12417 + }, + { + "epoch": 8.587828492392807, + "grad_norm": 2.649446964263916, + "learning_rate": 7.845397264484401e-06, + "log_odds_chosen": 11.239232063293457, + "log_odds_ratio": -2.4983983166748658e-05, + "logits/chosen": -0.27129191160202026, + "logits/rejected": -0.3342203199863434, + "logps/chosen": -0.00010748470958787948, + "logps/rejected": -2.064061164855957, + "loss": 0.3006, + "nll_loss": 0.07513561099767685, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0748471140686888e-05, + "rewards/margins": 0.20639537274837494, + "rewards/rejected": -0.2064061164855957, + "step": 12418 + }, + { + "epoch": 8.588520055325034, + "grad_norm": 2.885756492614746, + "learning_rate": 7.841555248194253e-06, + "log_odds_chosen": 10.988302230834961, + "log_odds_ratio": -7.131589518394321e-05, + "logits/chosen": -0.6593400835990906, + "logits/rejected": -0.646030068397522, + "logps/chosen": -0.00020226562628522515, + "logps/rejected": -2.248378276824951, + "loss": 0.2897, + "nll_loss": 0.07242387533187866, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0226561900926754e-05, + "rewards/margins": 0.22481761872768402, + "rewards/rejected": -0.22483783960342407, + "step": 12419 + }, + { + "epoch": 8.589211618257261, + "grad_norm": 3.9579591751098633, + "learning_rate": 7.837713231904104e-06, + "log_odds_chosen": 10.152627944946289, + "log_odds_ratio": -0.00012383170542307198, + "logits/chosen": -0.15010769665241241, + "logits/rejected": -0.30715739727020264, + "logps/chosen": -0.0010937991319224238, + "logps/rejected": -1.7039655447006226, + "loss": 0.3133, + "nll_loss": 0.07831807434558868, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010937990737147629, + "rewards/margins": 0.17028717696666718, + "rewards/rejected": -0.1703965663909912, + "step": 12420 + }, + { + "epoch": 8.589903181189488, + "grad_norm": 11.929691314697266, + "learning_rate": 7.833871215613955e-06, + "log_odds_chosen": 11.3349609375, + "log_odds_ratio": -3.4468917874619365e-05, + "logits/chosen": -0.407043993473053, + "logits/rejected": -0.4227757155895233, + "logps/chosen": -9.850895730778575e-05, + "logps/rejected": -2.127613067626953, + "loss": 0.3972, + "nll_loss": 0.09930659830570221, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.850896276475396e-06, + "rewards/margins": 0.21275146305561066, + "rewards/rejected": -0.2127612978219986, + "step": 12421 + }, + { + "epoch": 8.590594744121715, + "grad_norm": 5.26082706451416, + "learning_rate": 7.830029199323806e-06, + "log_odds_chosen": 12.367096900939941, + "log_odds_ratio": -1.268644609808689e-05, + "logits/chosen": -0.6866305470466614, + "logits/rejected": -0.640007495880127, + "logps/chosen": -0.0001297138660447672, + "logps/rejected": -3.2089452743530273, + "loss": 0.3444, + "nll_loss": 0.08609630912542343, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2971387150173541e-05, + "rewards/margins": 0.32088154554367065, + "rewards/rejected": -0.3208945393562317, + "step": 12422 + }, + { + "epoch": 8.591286307053942, + "grad_norm": 4.022059917449951, + "learning_rate": 7.826187183033656e-06, + "log_odds_chosen": 11.207496643066406, + "log_odds_ratio": -2.807136843330227e-05, + "logits/chosen": -0.17429150640964508, + "logits/rejected": -0.2393246293067932, + "logps/chosen": -0.00024039827985689044, + "logps/rejected": -2.346452236175537, + "loss": 0.4454, + "nll_loss": 0.11134126782417297, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4039827621891163e-05, + "rewards/margins": 0.23462116718292236, + "rewards/rejected": -0.23464521765708923, + "step": 12423 + }, + { + "epoch": 8.591977869986168, + "grad_norm": 7.19039249420166, + "learning_rate": 7.822345166743507e-06, + "log_odds_chosen": 11.800032615661621, + "log_odds_ratio": -1.4056697182240896e-05, + "logits/chosen": -0.503092348575592, + "logits/rejected": -0.5850523710250854, + "logps/chosen": -0.00042195821879431605, + "logps/rejected": -2.707442045211792, + "loss": 0.4701, + "nll_loss": 0.11752810329198837, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.219581751385704e-05, + "rewards/margins": 0.2707020044326782, + "rewards/rejected": -0.2707442045211792, + "step": 12424 + }, + { + "epoch": 8.592669432918395, + "grad_norm": 3.98699951171875, + "learning_rate": 7.81850315045336e-06, + "log_odds_chosen": 9.630369186401367, + "log_odds_ratio": -0.000857133767567575, + "logits/chosen": -0.7769922018051147, + "logits/rejected": -0.6913413405418396, + "logps/chosen": -0.0009730067104101181, + "logps/rejected": -1.8855459690093994, + "loss": 0.3904, + "nll_loss": 0.09750308841466904, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.730067540658638e-05, + "rewards/margins": 0.18845731019973755, + "rewards/rejected": -0.18855461478233337, + "step": 12425 + }, + { + "epoch": 8.593360995850622, + "grad_norm": 2.6930856704711914, + "learning_rate": 7.814661134163209e-06, + "log_odds_chosen": 11.874320030212402, + "log_odds_ratio": -4.8003526899265125e-05, + "logits/chosen": -0.37165212631225586, + "logits/rejected": -0.37712541222572327, + "logps/chosen": -0.0001580321550136432, + "logps/rejected": -2.784151554107666, + "loss": 0.2959, + "nll_loss": 0.0739690363407135, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5803216228960082e-05, + "rewards/margins": 0.27839934825897217, + "rewards/rejected": -0.27841517329216003, + "step": 12426 + }, + { + "epoch": 8.594052558782849, + "grad_norm": 2.8228485584259033, + "learning_rate": 7.81081911787306e-06, + "log_odds_chosen": 10.419713973999023, + "log_odds_ratio": -7.389950769720599e-05, + "logits/chosen": 0.11854930967092514, + "logits/rejected": 0.037088390439748764, + "logps/chosen": -0.00019729827181436121, + "logps/rejected": -1.8497958183288574, + "loss": 0.3051, + "nll_loss": 0.07627741247415543, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9729828636627644e-05, + "rewards/margins": 0.18495984375476837, + "rewards/rejected": -0.18497957289218903, + "step": 12427 + }, + { + "epoch": 8.594744121715076, + "grad_norm": 3.9206323623657227, + "learning_rate": 7.806977101582912e-06, + "log_odds_chosen": 12.833907127380371, + "log_odds_ratio": -7.3549958869989496e-06, + "logits/chosen": -0.41607871651649475, + "logits/rejected": -0.4357336461544037, + "logps/chosen": -0.0001064469397533685, + "logps/rejected": -3.4070348739624023, + "loss": 0.3021, + "nll_loss": 0.07551757246255875, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0644693247741088e-05, + "rewards/margins": 0.3406928479671478, + "rewards/rejected": -0.34070348739624023, + "step": 12428 + }, + { + "epoch": 8.595435684647303, + "grad_norm": 4.900330543518066, + "learning_rate": 7.803135085292763e-06, + "log_odds_chosen": 10.996088981628418, + "log_odds_ratio": -4.514288957579993e-05, + "logits/chosen": -0.7172442078590393, + "logits/rejected": -0.7768953442573547, + "logps/chosen": -0.00010613269114401191, + "logps/rejected": -1.6444897651672363, + "loss": 0.3359, + "nll_loss": 0.08397156745195389, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0613269296300132e-05, + "rewards/margins": 0.1644383668899536, + "rewards/rejected": -0.16444897651672363, + "step": 12429 + }, + { + "epoch": 8.59612724757953, + "grad_norm": 3.610318899154663, + "learning_rate": 7.799293069002613e-06, + "log_odds_chosen": 11.238482475280762, + "log_odds_ratio": -9.320876415586099e-05, + "logits/chosen": -0.23434729874134064, + "logits/rejected": -0.25972285866737366, + "logps/chosen": -0.00015973681001923978, + "logps/rejected": -2.5857834815979004, + "loss": 0.3783, + "nll_loss": 0.09457194060087204, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.597368100192398e-05, + "rewards/margins": 0.2585623562335968, + "rewards/rejected": -0.2585783302783966, + "step": 12430 + }, + { + "epoch": 8.596818810511756, + "grad_norm": 2.846229314804077, + "learning_rate": 7.795451052712464e-06, + "log_odds_chosen": 11.087169647216797, + "log_odds_ratio": -2.728665822360199e-05, + "logits/chosen": -0.5193180441856384, + "logits/rejected": -0.49915337562561035, + "logps/chosen": -0.00019525145762600005, + "logps/rejected": -2.261341094970703, + "loss": 0.2822, + "nll_loss": 0.07054363936185837, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.952514321601484e-05, + "rewards/margins": 0.22611457109451294, + "rewards/rejected": -0.22613409161567688, + "step": 12431 + }, + { + "epoch": 8.597510373443983, + "grad_norm": 5.272435188293457, + "learning_rate": 7.791609036422315e-06, + "log_odds_chosen": 11.377605438232422, + "log_odds_ratio": -2.1794385247631e-05, + "logits/chosen": -0.6591401696205139, + "logits/rejected": -0.6738142371177673, + "logps/chosen": -0.00019038034952245653, + "logps/rejected": -2.741790771484375, + "loss": 0.4957, + "nll_loss": 0.12391241639852524, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9038036043639295e-05, + "rewards/margins": 0.2741600275039673, + "rewards/rejected": -0.274179071187973, + "step": 12432 + }, + { + "epoch": 8.59820193637621, + "grad_norm": 3.132481336593628, + "learning_rate": 7.787767020132166e-06, + "log_odds_chosen": 10.9615478515625, + "log_odds_ratio": -8.768655243329704e-05, + "logits/chosen": -0.422199010848999, + "logits/rejected": -0.4062215983867645, + "logps/chosen": -0.0003607200342230499, + "logps/rejected": -2.757406234741211, + "loss": 0.4979, + "nll_loss": 0.1244717612862587, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.607200414990075e-05, + "rewards/margins": 0.2757045328617096, + "rewards/rejected": -0.2757406234741211, + "step": 12433 + }, + { + "epoch": 8.598893499308437, + "grad_norm": 3.0860936641693115, + "learning_rate": 7.783925003842018e-06, + "log_odds_chosen": 10.253957748413086, + "log_odds_ratio": -8.260829781647772e-05, + "logits/chosen": -0.5182685852050781, + "logits/rejected": -0.5591882467269897, + "logps/chosen": -0.000137411494506523, + "logps/rejected": -1.4636437892913818, + "loss": 0.3903, + "nll_loss": 0.09755666553974152, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3741150723944884e-05, + "rewards/margins": 0.14635063707828522, + "rewards/rejected": -0.14636439085006714, + "step": 12434 + }, + { + "epoch": 8.599585062240664, + "grad_norm": 4.983059883117676, + "learning_rate": 7.780082987551867e-06, + "log_odds_chosen": 11.465124130249023, + "log_odds_ratio": -2.0820034478674643e-05, + "logits/chosen": 0.12352912127971649, + "logits/rejected": 0.09706210345029831, + "logps/chosen": -0.0013850387185811996, + "logps/rejected": -2.9979348182678223, + "loss": 0.5263, + "nll_loss": 0.13156157732009888, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013850386312697083, + "rewards/margins": 0.299655020236969, + "rewards/rejected": -0.2997935116291046, + "step": 12435 + }, + { + "epoch": 8.60027662517289, + "grad_norm": 4.760752201080322, + "learning_rate": 7.776240971261718e-06, + "log_odds_chosen": 11.416793823242188, + "log_odds_ratio": -8.722698112251237e-05, + "logits/chosen": -0.33795422315597534, + "logits/rejected": -0.3896262049674988, + "logps/chosen": -0.0003545653016772121, + "logps/rejected": -2.9029064178466797, + "loss": 0.6167, + "nll_loss": 0.15416058897972107, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.545653453329578e-05, + "rewards/margins": 0.29025518894195557, + "rewards/rejected": -0.2902906537055969, + "step": 12436 + }, + { + "epoch": 8.600968188105117, + "grad_norm": 26.122861862182617, + "learning_rate": 7.77239895497157e-06, + "log_odds_chosen": 10.64809513092041, + "log_odds_ratio": -8.275601430796087e-05, + "logits/chosen": -0.2887398600578308, + "logits/rejected": -0.355773001909256, + "logps/chosen": -0.00025288446340709925, + "logps/rejected": -2.215735912322998, + "loss": 0.4121, + "nll_loss": 0.10302183032035828, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5288447432103567e-05, + "rewards/margins": 0.22154831886291504, + "rewards/rejected": -0.221573606133461, + "step": 12437 + }, + { + "epoch": 8.601659751037344, + "grad_norm": 3.66646671295166, + "learning_rate": 7.768556938681421e-06, + "log_odds_chosen": 12.302300453186035, + "log_odds_ratio": -8.563475603295956e-06, + "logits/chosen": -0.06317726522684097, + "logits/rejected": -0.10501933842897415, + "logps/chosen": -0.00013157639477867633, + "logps/rejected": -3.0213968753814697, + "loss": 0.4363, + "nll_loss": 0.10907794535160065, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3157639841665514e-05, + "rewards/margins": 0.30212652683258057, + "rewards/rejected": -0.3021396994590759, + "step": 12438 + }, + { + "epoch": 8.60235131396957, + "grad_norm": 4.6148152351379395, + "learning_rate": 7.764714922391272e-06, + "log_odds_chosen": 11.005189895629883, + "log_odds_ratio": -3.72265130863525e-05, + "logits/chosen": -0.02495836466550827, + "logits/rejected": -0.11913137137889862, + "logps/chosen": -0.0002928634639829397, + "logps/rejected": -2.1169567108154297, + "loss": 0.6361, + "nll_loss": 0.1590229868888855, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.928634603449609e-05, + "rewards/margins": 0.21166637539863586, + "rewards/rejected": -0.21169567108154297, + "step": 12439 + }, + { + "epoch": 8.603042876901798, + "grad_norm": 2.781829833984375, + "learning_rate": 7.760872906101122e-06, + "log_odds_chosen": 10.781734466552734, + "log_odds_ratio": -9.079500159714371e-05, + "logits/chosen": -0.6345353722572327, + "logits/rejected": -0.6050483584403992, + "logps/chosen": -0.000219700435991399, + "logps/rejected": -1.947325587272644, + "loss": 0.1992, + "nll_loss": 0.04979780316352844, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.197004323534202e-05, + "rewards/margins": 0.19471058249473572, + "rewards/rejected": -0.19473256170749664, + "step": 12440 + }, + { + "epoch": 8.603734439834025, + "grad_norm": 3.6431546211242676, + "learning_rate": 7.757030889810973e-06, + "log_odds_chosen": 11.355770111083984, + "log_odds_ratio": -3.7065976357553154e-05, + "logits/chosen": -0.2684091329574585, + "logits/rejected": -0.22216904163360596, + "logps/chosen": -0.00013720057904720306, + "logps/rejected": -2.2910001277923584, + "loss": 0.4009, + "nll_loss": 0.10022085905075073, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3720056813326664e-05, + "rewards/margins": 0.22908629477024078, + "rewards/rejected": -0.22910000383853912, + "step": 12441 + }, + { + "epoch": 8.604426002766251, + "grad_norm": 4.933355808258057, + "learning_rate": 7.753188873520824e-06, + "log_odds_chosen": 10.542715072631836, + "log_odds_ratio": -6.371807830873877e-05, + "logits/chosen": -0.36058422923088074, + "logits/rejected": -0.39982569217681885, + "logps/chosen": -0.0005256114527583122, + "logps/rejected": -2.4589436054229736, + "loss": 0.5143, + "nll_loss": 0.12855909764766693, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.256115036900155e-05, + "rewards/margins": 0.24584180116653442, + "rewards/rejected": -0.24589437246322632, + "step": 12442 + }, + { + "epoch": 8.605117565698478, + "grad_norm": 3.6169495582580566, + "learning_rate": 7.749346857230676e-06, + "log_odds_chosen": 10.901447296142578, + "log_odds_ratio": -0.00013576316996477544, + "logits/chosen": -0.3425856828689575, + "logits/rejected": -0.526218831539154, + "logps/chosen": -0.00019560789223760366, + "logps/rejected": -1.9449979066848755, + "loss": 0.3021, + "nll_loss": 0.07550856471061707, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9560789951356128e-05, + "rewards/margins": 0.19448024034500122, + "rewards/rejected": -0.19449979066848755, + "step": 12443 + }, + { + "epoch": 8.605809128630705, + "grad_norm": 7.017158508300781, + "learning_rate": 7.745504840940525e-06, + "log_odds_chosen": 11.732519149780273, + "log_odds_ratio": -1.1716860171873122e-05, + "logits/chosen": -0.661844789981842, + "logits/rejected": -0.7151435613632202, + "logps/chosen": -9.2258196673356e-05, + "logps/rejected": -2.1086206436157227, + "loss": 0.4627, + "nll_loss": 0.11568126827478409, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.2258196673356e-06, + "rewards/margins": 0.21085286140441895, + "rewards/rejected": -0.21086208522319794, + "step": 12444 + }, + { + "epoch": 8.606500691562932, + "grad_norm": 4.3740010261535645, + "learning_rate": 7.741662824650376e-06, + "log_odds_chosen": 11.738712310791016, + "log_odds_ratio": -1.8135553546017036e-05, + "logits/chosen": -0.4909563958644867, + "logits/rejected": -0.5950556397438049, + "logps/chosen": -8.437960059382021e-05, + "logps/rejected": -2.2165842056274414, + "loss": 0.3897, + "nll_loss": 0.09741343557834625, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.437959877483081e-06, + "rewards/margins": 0.22164995968341827, + "rewards/rejected": -0.22165840864181519, + "step": 12445 + }, + { + "epoch": 8.607192254495159, + "grad_norm": 3.511035680770874, + "learning_rate": 7.737820808360227e-06, + "log_odds_chosen": 11.519828796386719, + "log_odds_ratio": -0.0002825894916895777, + "logits/chosen": -0.09426072239875793, + "logits/rejected": -0.2573559582233429, + "logps/chosen": -0.0005895392969250679, + "logps/rejected": -3.521434783935547, + "loss": 0.4748, + "nll_loss": 0.1186603531241417, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.895393042010255e-05, + "rewards/margins": 0.35208457708358765, + "rewards/rejected": -0.3521435260772705, + "step": 12446 + }, + { + "epoch": 8.607883817427386, + "grad_norm": 2.761916160583496, + "learning_rate": 7.73397879207008e-06, + "log_odds_chosen": 10.481441497802734, + "log_odds_ratio": -6.497966387541965e-05, + "logits/chosen": -0.3832981586456299, + "logits/rejected": -0.512840211391449, + "logps/chosen": -0.0011071816552430391, + "logps/rejected": -2.6986961364746094, + "loss": 0.2443, + "nll_loss": 0.061074674129486084, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011071816697949544, + "rewards/margins": 0.2697589099407196, + "rewards/rejected": -0.2698696255683899, + "step": 12447 + }, + { + "epoch": 8.608575380359612, + "grad_norm": 4.164094924926758, + "learning_rate": 7.73013677577993e-06, + "log_odds_chosen": 9.988368034362793, + "log_odds_ratio": -0.00010692431533243507, + "logits/chosen": -0.6198857426643372, + "logits/rejected": -0.5629023313522339, + "logps/chosen": -0.00011031327449018136, + "logps/rejected": -1.316125512123108, + "loss": 0.368, + "nll_loss": 0.09198827296495438, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1031327630917076e-05, + "rewards/margins": 0.1316015124320984, + "rewards/rejected": -0.13161253929138184, + "step": 12448 + }, + { + "epoch": 8.60926694329184, + "grad_norm": 3.2900867462158203, + "learning_rate": 7.72629475948978e-06, + "log_odds_chosen": 11.8999662399292, + "log_odds_ratio": -1.8995482605532743e-05, + "logits/chosen": -0.4648793041706085, + "logits/rejected": -0.4841955900192261, + "logps/chosen": -0.00017940175894182175, + "logps/rejected": -2.465615749359131, + "loss": 0.3498, + "nll_loss": 0.08744871616363525, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7940175894182175e-05, + "rewards/margins": 0.24654364585876465, + "rewards/rejected": -0.24656157195568085, + "step": 12449 + }, + { + "epoch": 8.609958506224066, + "grad_norm": 3.0412774085998535, + "learning_rate": 7.722452743199632e-06, + "log_odds_chosen": 11.757244110107422, + "log_odds_ratio": -1.554191840114072e-05, + "logits/chosen": -0.47457292675971985, + "logits/rejected": -0.43818992376327515, + "logps/chosen": -8.676251309225336e-05, + "logps/rejected": -2.4316704273223877, + "loss": 0.3544, + "nll_loss": 0.08859597146511078, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.676252036821097e-06, + "rewards/margins": 0.24315837025642395, + "rewards/rejected": -0.24316704273223877, + "step": 12450 + }, + { + "epoch": 8.610650069156293, + "grad_norm": 3.6737210750579834, + "learning_rate": 7.718610726909482e-06, + "log_odds_chosen": 11.129951477050781, + "log_odds_ratio": -8.118825644487515e-05, + "logits/chosen": -0.49961328506469727, + "logits/rejected": -0.5508215427398682, + "logps/chosen": -0.001525634783320129, + "logps/rejected": -2.4258229732513428, + "loss": 0.4255, + "nll_loss": 0.10636596381664276, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015256348706316203, + "rewards/margins": 0.24242974817752838, + "rewards/rejected": -0.242582306265831, + "step": 12451 + }, + { + "epoch": 8.61134163208852, + "grad_norm": 3.468428134918213, + "learning_rate": 7.714768710619333e-06, + "log_odds_chosen": 11.673049926757812, + "log_odds_ratio": -3.096312502748333e-05, + "logits/chosen": 0.007082067430019379, + "logits/rejected": -0.08783157914876938, + "logps/chosen": -0.00016287853941321373, + "logps/rejected": -2.8624157905578613, + "loss": 0.4315, + "nll_loss": 0.10786047577857971, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6287855032715015e-05, + "rewards/margins": 0.2862253189086914, + "rewards/rejected": -0.2862415909767151, + "step": 12452 + }, + { + "epoch": 8.612033195020746, + "grad_norm": 3.6790342330932617, + "learning_rate": 7.710926694329184e-06, + "log_odds_chosen": 10.868727684020996, + "log_odds_ratio": -0.012142255902290344, + "logits/chosen": -0.2326192855834961, + "logits/rejected": -0.2559518814086914, + "logps/chosen": -0.005303188692778349, + "logps/rejected": -2.8267157077789307, + "loss": 0.4519, + "nll_loss": 0.11177083104848862, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005303188809193671, + "rewards/margins": 0.28214123845100403, + "rewards/rejected": -0.28267157077789307, + "step": 12453 + }, + { + "epoch": 8.612724757952973, + "grad_norm": 4.647916316986084, + "learning_rate": 7.707084678039035e-06, + "log_odds_chosen": 9.514710426330566, + "log_odds_ratio": -0.00031602318631485105, + "logits/chosen": -0.09073175489902496, + "logits/rejected": -0.037744827568531036, + "logps/chosen": -0.00106034183409065, + "logps/rejected": -1.9035536050796509, + "loss": 0.3582, + "nll_loss": 0.08952006697654724, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010603418195387349, + "rewards/margins": 0.19024932384490967, + "rewards/rejected": -0.1903553605079651, + "step": 12454 + }, + { + "epoch": 8.6134163208852, + "grad_norm": 4.053160190582275, + "learning_rate": 7.703242661748885e-06, + "log_odds_chosen": 11.832403182983398, + "log_odds_ratio": -1.3382677025219891e-05, + "logits/chosen": -0.3503800928592682, + "logits/rejected": -0.3921029567718506, + "logps/chosen": -0.00010898220352828503, + "logps/rejected": -2.5034584999084473, + "loss": 0.5187, + "nll_loss": 0.12968364357948303, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0898222171817906e-05, + "rewards/margins": 0.2503349781036377, + "rewards/rejected": -0.2503458261489868, + "step": 12455 + }, + { + "epoch": 8.614107883817427, + "grad_norm": 3.5278303623199463, + "learning_rate": 7.699400645458738e-06, + "log_odds_chosen": 11.763395309448242, + "log_odds_ratio": -1.0844772987184115e-05, + "logits/chosen": -0.5048444867134094, + "logits/rejected": -0.5952221751213074, + "logps/chosen": -0.00012985311332158744, + "logps/rejected": -2.556190013885498, + "loss": 0.3979, + "nll_loss": 0.09947910159826279, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2985312423552386e-05, + "rewards/margins": 0.2556060254573822, + "rewards/rejected": -0.25561901926994324, + "step": 12456 + }, + { + "epoch": 8.614799446749654, + "grad_norm": 4.416454315185547, + "learning_rate": 7.695558629168589e-06, + "log_odds_chosen": 10.293468475341797, + "log_odds_ratio": -0.00017848135030362755, + "logits/chosen": -0.4820539355278015, + "logits/rejected": -0.4339297115802765, + "logps/chosen": -0.0003156516177114099, + "logps/rejected": -1.7361714839935303, + "loss": 0.2258, + "nll_loss": 0.056426484137773514, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.156516686431132e-05, + "rewards/margins": 0.17358556389808655, + "rewards/rejected": -0.1736171394586563, + "step": 12457 + }, + { + "epoch": 8.61549100968188, + "grad_norm": 3.6610524654388428, + "learning_rate": 7.691716612878438e-06, + "log_odds_chosen": 11.170249938964844, + "log_odds_ratio": -3.065021155634895e-05, + "logits/chosen": -0.13752108812332153, + "logits/rejected": -0.06596602499485016, + "logps/chosen": -0.000118286392535083, + "logps/rejected": -2.0699398517608643, + "loss": 0.3302, + "nll_loss": 0.082545205950737, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1828638889710419e-05, + "rewards/margins": 0.20698216557502747, + "rewards/rejected": -0.20699399709701538, + "step": 12458 + }, + { + "epoch": 8.616182572614107, + "grad_norm": 3.788586378097534, + "learning_rate": 7.68787459658829e-06, + "log_odds_chosen": 10.646763801574707, + "log_odds_ratio": -0.00024764935369603336, + "logits/chosen": -0.470620334148407, + "logits/rejected": -0.593694269657135, + "logps/chosen": -0.0006167700048536062, + "logps/rejected": -2.0630569458007812, + "loss": 0.2904, + "nll_loss": 0.07256941497325897, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.167699757497758e-05, + "rewards/margins": 0.20624405145645142, + "rewards/rejected": -0.20630571246147156, + "step": 12459 + }, + { + "epoch": 8.616874135546334, + "grad_norm": 2.969237804412842, + "learning_rate": 7.68403258029814e-06, + "log_odds_chosen": 10.207712173461914, + "log_odds_ratio": -0.00015455170068889856, + "logits/chosen": -0.785926878452301, + "logits/rejected": -0.7482253909111023, + "logps/chosen": -0.00015500865993089974, + "logps/rejected": -1.4396579265594482, + "loss": 0.3058, + "nll_loss": 0.07643543183803558, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5500865629292093e-05, + "rewards/margins": 0.14395028352737427, + "rewards/rejected": -0.14396579563617706, + "step": 12460 + }, + { + "epoch": 8.617565698478561, + "grad_norm": 4.449199676513672, + "learning_rate": 7.680190564007992e-06, + "log_odds_chosen": 11.42137336730957, + "log_odds_ratio": -2.0058974769199267e-05, + "logits/chosen": -0.28403031826019287, + "logits/rejected": -0.32174474000930786, + "logps/chosen": -0.0007978876237757504, + "logps/rejected": -2.9671578407287598, + "loss": 0.3856, + "nll_loss": 0.0964074581861496, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.978876965353265e-05, + "rewards/margins": 0.2966359853744507, + "rewards/rejected": -0.29671579599380493, + "step": 12461 + }, + { + "epoch": 8.618257261410788, + "grad_norm": 2.8202242851257324, + "learning_rate": 7.676348547717842e-06, + "log_odds_chosen": 10.440109252929688, + "log_odds_ratio": -0.00012012768274871632, + "logits/chosen": -0.6182264089584351, + "logits/rejected": -0.645176351070404, + "logps/chosen": -0.00036001091939397156, + "logps/rejected": -2.136899948120117, + "loss": 0.3381, + "nll_loss": 0.08450530469417572, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6001096304971725e-05, + "rewards/margins": 0.21365398168563843, + "rewards/rejected": -0.21368998289108276, + "step": 12462 + }, + { + "epoch": 8.618948824343015, + "grad_norm": 5.376323223114014, + "learning_rate": 7.672506531427693e-06, + "log_odds_chosen": 10.457123756408691, + "log_odds_ratio": -0.00016366233467124403, + "logits/chosen": -0.18849360942840576, + "logits/rejected": -0.015208382159471512, + "logps/chosen": -0.00011773478763643652, + "logps/rejected": -1.2152167558670044, + "loss": 0.2718, + "nll_loss": 0.06792550534009933, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.177347803604789e-05, + "rewards/margins": 0.12150990962982178, + "rewards/rejected": -0.12152168154716492, + "step": 12463 + }, + { + "epoch": 8.619640387275242, + "grad_norm": 5.826288223266602, + "learning_rate": 7.668664515137544e-06, + "log_odds_chosen": 11.298666954040527, + "log_odds_ratio": -3.8496054912684485e-05, + "logits/chosen": -0.18737944960594177, + "logits/rejected": -0.30055534839630127, + "logps/chosen": -0.0005287721287459135, + "logps/rejected": -2.9021079540252686, + "loss": 0.5334, + "nll_loss": 0.13334029912948608, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.287720705382526e-05, + "rewards/margins": 0.2901579439640045, + "rewards/rejected": -0.2902108132839203, + "step": 12464 + }, + { + "epoch": 8.620331950207468, + "grad_norm": 2.5242791175842285, + "learning_rate": 7.664822498847396e-06, + "log_odds_chosen": 10.83395004272461, + "log_odds_ratio": -0.0002878225641325116, + "logits/chosen": 0.11042840778827667, + "logits/rejected": -0.012141779065132141, + "logps/chosen": -0.0005985196912661195, + "logps/rejected": -2.4075498580932617, + "loss": 0.2705, + "nll_loss": 0.0676034688949585, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.985196912661195e-05, + "rewards/margins": 0.24069511890411377, + "rewards/rejected": -0.24075497686862946, + "step": 12465 + }, + { + "epoch": 8.621023513139695, + "grad_norm": 3.6309618949890137, + "learning_rate": 7.660980482557247e-06, + "log_odds_chosen": 10.47065258026123, + "log_odds_ratio": -0.00021392585767898709, + "logits/chosen": -0.21913385391235352, + "logits/rejected": -0.24686682224273682, + "logps/chosen": -0.00020930226310156286, + "logps/rejected": -1.8738446235656738, + "loss": 0.3847, + "nll_loss": 0.0961625725030899, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0930227037752047e-05, + "rewards/margins": 0.18736353516578674, + "rewards/rejected": -0.1873844414949417, + "step": 12466 + }, + { + "epoch": 8.621715076071922, + "grad_norm": 5.189695835113525, + "learning_rate": 7.657138466267098e-06, + "log_odds_chosen": 10.602476119995117, + "log_odds_ratio": -0.00012163497740402818, + "logits/chosen": -0.5747342705726624, + "logits/rejected": -0.7193750739097595, + "logps/chosen": -0.00024523091269657016, + "logps/rejected": -2.1552419662475586, + "loss": 0.5453, + "nll_loss": 0.13631293177604675, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4523091269657016e-05, + "rewards/margins": 0.2154996693134308, + "rewards/rejected": -0.21552419662475586, + "step": 12467 + }, + { + "epoch": 8.622406639004149, + "grad_norm": 3.0300586223602295, + "learning_rate": 7.653296449976949e-06, + "log_odds_chosen": 9.845947265625, + "log_odds_ratio": -0.0002649944508448243, + "logits/chosen": 0.23362484574317932, + "logits/rejected": 0.2671985626220703, + "logps/chosen": -0.0010978097561746836, + "logps/rejected": -1.7431879043579102, + "loss": 0.2776, + "nll_loss": 0.06936274468898773, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010978097998304293, + "rewards/margins": 0.17420899868011475, + "rewards/rejected": -0.17431879043579102, + "step": 12468 + }, + { + "epoch": 8.623098201936376, + "grad_norm": 3.6477975845336914, + "learning_rate": 7.6494544336868e-06, + "log_odds_chosen": 10.568387031555176, + "log_odds_ratio": -0.00020521630358416587, + "logits/chosen": -0.5410515666007996, + "logits/rejected": -0.6348446011543274, + "logps/chosen": -0.0006153516587801278, + "logps/rejected": -2.5193915367126465, + "loss": 0.4283, + "nll_loss": 0.10704678297042847, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.153517460916191e-05, + "rewards/margins": 0.2518776059150696, + "rewards/rejected": -0.25193914771080017, + "step": 12469 + }, + { + "epoch": 8.623789764868603, + "grad_norm": 4.447237968444824, + "learning_rate": 7.64561241739665e-06, + "log_odds_chosen": 10.911580085754395, + "log_odds_ratio": -2.6519053790252656e-05, + "logits/chosen": -0.6546154618263245, + "logits/rejected": -0.6488785147666931, + "logps/chosen": -9.79092437773943e-05, + "logps/rejected": -1.491811752319336, + "loss": 0.2256, + "nll_loss": 0.05638564005494118, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.790924195840489e-06, + "rewards/margins": 0.149171382188797, + "rewards/rejected": -0.14918117225170135, + "step": 12470 + }, + { + "epoch": 8.62448132780083, + "grad_norm": 4.240222454071045, + "learning_rate": 7.641770401106502e-06, + "log_odds_chosen": 11.76026725769043, + "log_odds_ratio": -1.119616899813991e-05, + "logits/chosen": -0.9003368616104126, + "logits/rejected": -0.8908242583274841, + "logps/chosen": -0.00013467471580952406, + "logps/rejected": -2.6712076663970947, + "loss": 0.4495, + "nll_loss": 0.1123720034956932, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3467471035255585e-05, + "rewards/margins": 0.2671073079109192, + "rewards/rejected": -0.2671207785606384, + "step": 12471 + }, + { + "epoch": 8.625172890733056, + "grad_norm": 3.913267135620117, + "learning_rate": 7.637928384816352e-06, + "log_odds_chosen": 10.158397674560547, + "log_odds_ratio": -0.0004838298773393035, + "logits/chosen": 0.24459408223628998, + "logits/rejected": 0.2145857959985733, + "logps/chosen": -0.0006363195134326816, + "logps/rejected": -1.689408779144287, + "loss": 0.5827, + "nll_loss": 0.14563320577144623, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.363195279845968e-05, + "rewards/margins": 0.1688772737979889, + "rewards/rejected": -0.16894090175628662, + "step": 12472 + }, + { + "epoch": 8.625864453665283, + "grad_norm": 5.289096355438232, + "learning_rate": 7.634086368526202e-06, + "log_odds_chosen": 10.315433502197266, + "log_odds_ratio": -0.00011007794819306582, + "logits/chosen": -0.5646550059318542, + "logits/rejected": -0.6940526962280273, + "logps/chosen": -0.00021625487715937197, + "logps/rejected": -1.8233420848846436, + "loss": 0.4642, + "nll_loss": 0.11602815240621567, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.162548844353296e-05, + "rewards/margins": 0.18231257796287537, + "rewards/rejected": -0.18233419954776764, + "step": 12473 + }, + { + "epoch": 8.62655601659751, + "grad_norm": 6.1395263671875, + "learning_rate": 7.630244352236055e-06, + "log_odds_chosen": 11.674334526062012, + "log_odds_ratio": -3.4381027944618836e-05, + "logits/chosen": -0.43784981966018677, + "logits/rejected": -0.528411865234375, + "logps/chosen": -0.0001351241226075217, + "logps/rejected": -2.595203399658203, + "loss": 0.3261, + "nll_loss": 0.08151505142450333, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.351241189695429e-05, + "rewards/margins": 0.25950679183006287, + "rewards/rejected": -0.2595203220844269, + "step": 12474 + }, + { + "epoch": 8.627247579529737, + "grad_norm": 3.8196938037872314, + "learning_rate": 7.626402335945905e-06, + "log_odds_chosen": 11.671865463256836, + "log_odds_ratio": -2.4040429707383737e-05, + "logits/chosen": -0.07806392014026642, + "logits/rejected": -0.16425377130508423, + "logps/chosen": -9.739068627823144e-05, + "logps/rejected": -2.3208045959472656, + "loss": 0.4907, + "nll_loss": 0.12267977744340897, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.739068445924204e-06, + "rewards/margins": 0.2320706844329834, + "rewards/rejected": -0.23208042979240417, + "step": 12475 + }, + { + "epoch": 8.627939142461964, + "grad_norm": 4.208126068115234, + "learning_rate": 7.622560319655755e-06, + "log_odds_chosen": 11.09755802154541, + "log_odds_ratio": -1.8838678442989476e-05, + "logits/chosen": -0.17204993963241577, + "logits/rejected": -0.23128736019134521, + "logps/chosen": -0.00017000493244268, + "logps/rejected": -2.1367721557617188, + "loss": 0.4703, + "nll_loss": 0.11758065223693848, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7000493244268e-05, + "rewards/margins": 0.21366021037101746, + "rewards/rejected": -0.21367719769477844, + "step": 12476 + }, + { + "epoch": 8.62863070539419, + "grad_norm": 2.9325435161590576, + "learning_rate": 7.618718303365607e-06, + "log_odds_chosen": 10.483333587646484, + "log_odds_ratio": -0.0002429414598736912, + "logits/chosen": -0.5756587386131287, + "logits/rejected": -0.5522986054420471, + "logps/chosen": -0.000315562792820856, + "logps/rejected": -2.3459036350250244, + "loss": 0.283, + "nll_loss": 0.07073624432086945, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.15562792820856e-05, + "rewards/margins": 0.2345588207244873, + "rewards/rejected": -0.23459038138389587, + "step": 12477 + }, + { + "epoch": 8.629322268326417, + "grad_norm": 2.7504851818084717, + "learning_rate": 7.614876287075458e-06, + "log_odds_chosen": 11.237836837768555, + "log_odds_ratio": -2.547233634686563e-05, + "logits/chosen": -0.08669843524694443, + "logits/rejected": -0.11949992179870605, + "logps/chosen": -0.00014983415894676, + "logps/rejected": -2.472243309020996, + "loss": 0.2966, + "nll_loss": 0.07414659857749939, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4983415894676e-05, + "rewards/margins": 0.247209370136261, + "rewards/rejected": -0.2472243309020996, + "step": 12478 + }, + { + "epoch": 8.630013831258644, + "grad_norm": 4.2703857421875, + "learning_rate": 7.6110342707853085e-06, + "log_odds_chosen": 10.029302597045898, + "log_odds_ratio": -5.834795592818409e-05, + "logits/chosen": -0.7825497984886169, + "logits/rejected": -0.7867951393127441, + "logps/chosen": -0.00041046348633244634, + "logps/rejected": -1.5852978229522705, + "loss": 0.5751, + "nll_loss": 0.14375720918178558, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.104634717805311e-05, + "rewards/margins": 0.15848875045776367, + "rewards/rejected": -0.15852978825569153, + "step": 12479 + }, + { + "epoch": 8.630705394190871, + "grad_norm": 3.0581610202789307, + "learning_rate": 7.607192254495158e-06, + "log_odds_chosen": 10.007383346557617, + "log_odds_ratio": -0.0001696431718301028, + "logits/chosen": -0.7570608854293823, + "logits/rejected": -0.7735745310783386, + "logps/chosen": -0.0006320398533716798, + "logps/rejected": -1.4598944187164307, + "loss": 0.3194, + "nll_loss": 0.07982651144266129, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.320398097159341e-05, + "rewards/margins": 0.14592623710632324, + "rewards/rejected": -0.14598944783210754, + "step": 12480 + }, + { + "epoch": 8.631396957123098, + "grad_norm": 2.993604898452759, + "learning_rate": 7.603350238205011e-06, + "log_odds_chosen": 10.759903907775879, + "log_odds_ratio": -7.590532186441123e-05, + "logits/chosen": -0.4560757577419281, + "logits/rejected": -0.37916862964630127, + "logps/chosen": -0.0006725385319441557, + "logps/rejected": -1.7399346828460693, + "loss": 0.3037, + "nll_loss": 0.0759168341755867, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.725385901518166e-05, + "rewards/margins": 0.1739262193441391, + "rewards/rejected": -0.17399348318576813, + "step": 12481 + }, + { + "epoch": 8.632088520055325, + "grad_norm": 2.7968311309814453, + "learning_rate": 7.599508221914861e-06, + "log_odds_chosen": 9.725410461425781, + "log_odds_ratio": -0.0001912424631882459, + "logits/chosen": -0.5142273902893066, + "logits/rejected": -0.44338592886924744, + "logps/chosen": -0.0003131648409180343, + "logps/rejected": -1.332704782485962, + "loss": 0.2163, + "nll_loss": 0.05405230447649956, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1316485546994954e-05, + "rewards/margins": 0.13323916494846344, + "rewards/rejected": -0.1332704722881317, + "step": 12482 + }, + { + "epoch": 8.632780082987551, + "grad_norm": 3.0606510639190674, + "learning_rate": 7.5956662056247115e-06, + "log_odds_chosen": 10.806468963623047, + "log_odds_ratio": -7.89956102380529e-05, + "logits/chosen": 0.03566619008779526, + "logits/rejected": 0.007338635623455048, + "logps/chosen": -0.0015425796154886484, + "logps/rejected": -3.2979660034179688, + "loss": 0.3353, + "nll_loss": 0.0838153064250946, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015425795572809875, + "rewards/margins": 0.3296423554420471, + "rewards/rejected": -0.32979661226272583, + "step": 12483 + }, + { + "epoch": 8.633471645919778, + "grad_norm": 2.7655177116394043, + "learning_rate": 7.591824189334563e-06, + "log_odds_chosen": 11.027166366577148, + "log_odds_ratio": -0.0002024203713517636, + "logits/chosen": -0.3868097960948944, + "logits/rejected": -0.4246591329574585, + "logps/chosen": -0.0002740652998909354, + "logps/rejected": -2.0678699016571045, + "loss": 0.2639, + "nll_loss": 0.06594347208738327, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.740652962529566e-05, + "rewards/margins": 0.20675958693027496, + "rewards/rejected": -0.20678699016571045, + "step": 12484 + }, + { + "epoch": 8.634163208852005, + "grad_norm": 3.056570529937744, + "learning_rate": 7.587982173044414e-06, + "log_odds_chosen": 10.932531356811523, + "log_odds_ratio": -3.319044117233716e-05, + "logits/chosen": -0.4801771640777588, + "logits/rejected": -0.5469212532043457, + "logps/chosen": -0.00012442399747669697, + "logps/rejected": -1.8965985774993896, + "loss": 0.3506, + "nll_loss": 0.08763975650072098, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2442400475265458e-05, + "rewards/margins": 0.18964743614196777, + "rewards/rejected": -0.18965987861156464, + "step": 12485 + }, + { + "epoch": 8.634854771784232, + "grad_norm": 3.596604108810425, + "learning_rate": 7.5841401567542645e-06, + "log_odds_chosen": 11.202720642089844, + "log_odds_ratio": -2.8192072932142764e-05, + "logits/chosen": -0.016135483980178833, + "logits/rejected": -0.15050634741783142, + "logps/chosen": -0.000246469076955691, + "logps/rejected": -2.520927667617798, + "loss": 0.381, + "nll_loss": 0.0952383279800415, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.464690805936698e-05, + "rewards/margins": 0.25206810235977173, + "rewards/rejected": -0.25209277868270874, + "step": 12486 + }, + { + "epoch": 8.635546334716459, + "grad_norm": 3.749194383621216, + "learning_rate": 7.580298140464116e-06, + "log_odds_chosen": 10.0772705078125, + "log_odds_ratio": -7.941433432279155e-05, + "logits/chosen": -0.6670262217521667, + "logits/rejected": -0.6328946352005005, + "logps/chosen": -0.00027387653244659305, + "logps/rejected": -1.8500633239746094, + "loss": 0.4957, + "nll_loss": 0.1239088922739029, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7387653972255066e-05, + "rewards/margins": 0.1849789321422577, + "rewards/rejected": -0.18500632047653198, + "step": 12487 + }, + { + "epoch": 8.636237897648686, + "grad_norm": 4.95552396774292, + "learning_rate": 7.576456124173967e-06, + "log_odds_chosen": 11.116426467895508, + "log_odds_ratio": -2.903983295254875e-05, + "logits/chosen": 0.04311956465244293, + "logits/rejected": -0.02329292893409729, + "logps/chosen": -0.0002852858742699027, + "logps/rejected": -2.4899492263793945, + "loss": 0.4551, + "nll_loss": 0.11377936601638794, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8528589609777555e-05, + "rewards/margins": 0.24896641075611115, + "rewards/rejected": -0.24899493157863617, + "step": 12488 + }, + { + "epoch": 8.636929460580912, + "grad_norm": 3.235852003097534, + "learning_rate": 7.572614107883818e-06, + "log_odds_chosen": 9.979238510131836, + "log_odds_ratio": -0.00012453217641450465, + "logits/chosen": -0.32398855686187744, + "logits/rejected": -0.4147062301635742, + "logps/chosen": -0.000752622087020427, + "logps/rejected": -2.2185888290405273, + "loss": 0.4353, + "nll_loss": 0.10880966484546661, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.526220724685118e-05, + "rewards/margins": 0.22178363800048828, + "rewards/rejected": -0.2218588888645172, + "step": 12489 + }, + { + "epoch": 8.63762102351314, + "grad_norm": 3.885465383529663, + "learning_rate": 7.568772091593669e-06, + "log_odds_chosen": 10.54115104675293, + "log_odds_ratio": -7.24983838154003e-05, + "logits/chosen": -0.3686515688896179, + "logits/rejected": -0.37892991304397583, + "logps/chosen": -0.0004790807724930346, + "logps/rejected": -1.7787805795669556, + "loss": 0.2843, + "nll_loss": 0.0710686445236206, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.790807724930346e-05, + "rewards/margins": 0.17783014476299286, + "rewards/rejected": -0.17787805199623108, + "step": 12490 + }, + { + "epoch": 8.638312586445366, + "grad_norm": 3.279143810272217, + "learning_rate": 7.56493007530352e-06, + "log_odds_chosen": 11.464825630187988, + "log_odds_ratio": -2.6816604076884687e-05, + "logits/chosen": -0.3882400691509247, + "logits/rejected": -0.4565013647079468, + "logps/chosen": -0.00019119751232210547, + "logps/rejected": -2.661956548690796, + "loss": 0.3415, + "nll_loss": 0.08537627756595612, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9119750504614785e-05, + "rewards/margins": 0.2661765515804291, + "rewards/rejected": -0.2661956548690796, + "step": 12491 + }, + { + "epoch": 8.639004149377593, + "grad_norm": 3.959516763687134, + "learning_rate": 7.56108805901337e-06, + "log_odds_chosen": 11.49631118774414, + "log_odds_ratio": -1.8636765162227675e-05, + "logits/chosen": -0.13141991198062897, + "logits/rejected": -0.11028458178043365, + "logps/chosen": -0.0002770294086076319, + "logps/rejected": -3.0129501819610596, + "loss": 0.4554, + "nll_loss": 0.11384393274784088, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.770293940557167e-05, + "rewards/margins": 0.3012672960758209, + "rewards/rejected": -0.30129504203796387, + "step": 12492 + }, + { + "epoch": 8.63969571230982, + "grad_norm": 6.257438659667969, + "learning_rate": 7.557246042723222e-06, + "log_odds_chosen": 11.744972229003906, + "log_odds_ratio": -1.1943647223233711e-05, + "logits/chosen": -0.3867034316062927, + "logits/rejected": -0.39720794558525085, + "logps/chosen": -0.00018167459347750992, + "logps/rejected": -2.951838970184326, + "loss": 0.3992, + "nll_loss": 0.09978760778903961, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8167460439144634e-05, + "rewards/margins": 0.2951657176017761, + "rewards/rejected": -0.2951838970184326, + "step": 12493 + }, + { + "epoch": 8.640387275242047, + "grad_norm": 3.343430519104004, + "learning_rate": 7.553404026433072e-06, + "log_odds_chosen": 10.571279525756836, + "log_odds_ratio": -0.0001090109144570306, + "logits/chosen": 0.21396251022815704, + "logits/rejected": 0.18011826276779175, + "logps/chosen": -0.0004061561485286802, + "logps/rejected": -1.7455267906188965, + "loss": 0.5452, + "nll_loss": 0.13628709316253662, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0615617763251066e-05, + "rewards/margins": 0.1745120733976364, + "rewards/rejected": -0.17455267906188965, + "step": 12494 + }, + { + "epoch": 8.641078838174273, + "grad_norm": 2.921513557434082, + "learning_rate": 7.549562010142923e-06, + "log_odds_chosen": 10.715484619140625, + "log_odds_ratio": -0.00015988711675163358, + "logits/chosen": -0.2499525099992752, + "logits/rejected": -0.32474803924560547, + "logps/chosen": -0.00015378088573925197, + "logps/rejected": -2.1522469520568848, + "loss": 0.3132, + "nll_loss": 0.07827220112085342, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.537808930152096e-05, + "rewards/margins": 0.21520933508872986, + "rewards/rejected": -0.21522469818592072, + "step": 12495 + }, + { + "epoch": 8.6417704011065, + "grad_norm": 5.468051433563232, + "learning_rate": 7.545719993852775e-06, + "log_odds_chosen": 10.28244400024414, + "log_odds_ratio": -6.366943853208795e-05, + "logits/chosen": -0.24660497903823853, + "logits/rejected": -0.3208356201648712, + "logps/chosen": -0.00017482020484749228, + "logps/rejected": -1.4915902614593506, + "loss": 0.6028, + "nll_loss": 0.15068833529949188, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7482019757153466e-05, + "rewards/margins": 0.14914155006408691, + "rewards/rejected": -0.1491590291261673, + "step": 12496 + }, + { + "epoch": 8.642461964038727, + "grad_norm": 4.540300369262695, + "learning_rate": 7.541877977562625e-06, + "log_odds_chosen": 11.03828239440918, + "log_odds_ratio": -0.00013013739953748882, + "logits/chosen": -0.259831964969635, + "logits/rejected": -0.3700653612613678, + "logps/chosen": -0.00030625227373093367, + "logps/rejected": -2.2334890365600586, + "loss": 0.3429, + "nll_loss": 0.08570323884487152, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0625225917901844e-05, + "rewards/margins": 0.22331829369068146, + "rewards/rejected": -0.22334891557693481, + "step": 12497 + }, + { + "epoch": 8.643153526970954, + "grad_norm": 8.347272872924805, + "learning_rate": 7.538035961272476e-06, + "log_odds_chosen": 11.745546340942383, + "log_odds_ratio": -1.4129373994364869e-05, + "logits/chosen": -0.32230523228645325, + "logits/rejected": -0.39761868119239807, + "logps/chosen": -6.494563422165811e-05, + "logps/rejected": -2.194420099258423, + "loss": 0.3645, + "nll_loss": 0.09113363921642303, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.494563422165811e-06, + "rewards/margins": 0.21943552792072296, + "rewards/rejected": -0.21944202482700348, + "step": 12498 + }, + { + "epoch": 8.64384508990318, + "grad_norm": 4.28175687789917, + "learning_rate": 7.534193944982328e-06, + "log_odds_chosen": 10.019571304321289, + "log_odds_ratio": -9.600758494343609e-05, + "logits/chosen": -0.4364526867866516, + "logits/rejected": -0.5668889284133911, + "logps/chosen": -0.00032136833760887384, + "logps/rejected": -1.647362470626831, + "loss": 0.4576, + "nll_loss": 0.11438850313425064, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.213683521607891e-05, + "rewards/margins": 0.1647041141986847, + "rewards/rejected": -0.16473624110221863, + "step": 12499 + }, + { + "epoch": 8.644536652835408, + "grad_norm": 3.8962275981903076, + "learning_rate": 7.5303519286921784e-06, + "log_odds_chosen": 10.956960678100586, + "log_odds_ratio": -0.0005306452512741089, + "logits/chosen": -0.4991099238395691, + "logits/rejected": -0.6036314964294434, + "logps/chosen": -0.0008679937454871833, + "logps/rejected": -2.1128909587860107, + "loss": 0.4405, + "nll_loss": 0.11007068306207657, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.67993658175692e-05, + "rewards/margins": 0.21120230853557587, + "rewards/rejected": -0.21128910779953003, + "step": 12500 + }, + { + "epoch": 8.645228215767634, + "grad_norm": 3.9985406398773193, + "learning_rate": 7.526509912402028e-06, + "log_odds_chosen": 11.414043426513672, + "log_odds_ratio": -1.5188716133707203e-05, + "logits/chosen": -0.35912656784057617, + "logits/rejected": -0.4726759195327759, + "logps/chosen": -0.00020685499475803226, + "logps/rejected": -2.3544654846191406, + "loss": 0.3269, + "nll_loss": 0.0817258358001709, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.068549656542018e-05, + "rewards/margins": 0.23542584478855133, + "rewards/rejected": -0.23544654250144958, + "step": 12501 + }, + { + "epoch": 8.645919778699861, + "grad_norm": 3.2609992027282715, + "learning_rate": 7.522667896111881e-06, + "log_odds_chosen": 10.814139366149902, + "log_odds_ratio": -0.00016953656449913979, + "logits/chosen": -0.33186572790145874, + "logits/rejected": -0.43221697211265564, + "logps/chosen": -0.0002584143658168614, + "logps/rejected": -2.140547275543213, + "loss": 0.446, + "nll_loss": 0.11149090528488159, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.584143658168614e-05, + "rewards/margins": 0.21402889490127563, + "rewards/rejected": -0.21405473351478577, + "step": 12502 + }, + { + "epoch": 8.646611341632088, + "grad_norm": 5.537511825561523, + "learning_rate": 7.518825879821731e-06, + "log_odds_chosen": 11.373611450195312, + "log_odds_ratio": -5.9118709032190964e-05, + "logits/chosen": -0.6136024594306946, + "logits/rejected": -0.6121117472648621, + "logps/chosen": -0.0002486594021320343, + "logps/rejected": -2.7007157802581787, + "loss": 0.2574, + "nll_loss": 0.06435317546129227, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4865941668394953e-05, + "rewards/margins": 0.2700467109680176, + "rewards/rejected": -0.2700715959072113, + "step": 12503 + }, + { + "epoch": 8.647302904564315, + "grad_norm": 2.877756357192993, + "learning_rate": 7.514983863531581e-06, + "log_odds_chosen": 11.185656547546387, + "log_odds_ratio": -0.000456160691101104, + "logits/chosen": -0.34598487615585327, + "logits/rejected": -0.3764715790748596, + "logps/chosen": -0.00172577821649611, + "logps/rejected": -3.0296967029571533, + "loss": 0.3978, + "nll_loss": 0.09940056502819061, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001725778274703771, + "rewards/margins": 0.3027970790863037, + "rewards/rejected": -0.30296963453292847, + "step": 12504 + }, + { + "epoch": 8.647994467496542, + "grad_norm": 7.3781232833862305, + "learning_rate": 7.511141847241433e-06, + "log_odds_chosen": 11.257057189941406, + "log_odds_ratio": -0.00010700400889618322, + "logits/chosen": 0.1673416644334793, + "logits/rejected": 0.12245422601699829, + "logps/chosen": -0.0006069620721973479, + "logps/rejected": -2.762889862060547, + "loss": 0.6208, + "nll_loss": 0.15518996119499207, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0696205764543265e-05, + "rewards/margins": 0.27622830867767334, + "rewards/rejected": -0.2762889862060547, + "step": 12505 + }, + { + "epoch": 8.648686030428768, + "grad_norm": 2.924121618270874, + "learning_rate": 7.507299830951284e-06, + "log_odds_chosen": 9.39946174621582, + "log_odds_ratio": -0.0004493095329962671, + "logits/chosen": -0.41169384121894836, + "logits/rejected": -0.5052452087402344, + "logps/chosen": -0.001000140910036862, + "logps/rejected": -1.6869672536849976, + "loss": 0.2957, + "nll_loss": 0.07387848198413849, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010001410555560142, + "rewards/margins": 0.16859671473503113, + "rewards/rejected": -0.16869673132896423, + "step": 12506 + }, + { + "epoch": 8.649377593360995, + "grad_norm": 5.006665229797363, + "learning_rate": 7.5034578146611345e-06, + "log_odds_chosen": 11.246997833251953, + "log_odds_ratio": -0.0006229121354408562, + "logits/chosen": -0.15815459191799164, + "logits/rejected": -0.07593107223510742, + "logps/chosen": -0.0007676490349695086, + "logps/rejected": -2.4148197174072266, + "loss": 0.6833, + "nll_loss": 0.17075814306735992, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.676490349695086e-05, + "rewards/margins": 0.24140521883964539, + "rewards/rejected": -0.2414819896221161, + "step": 12507 + }, + { + "epoch": 8.650069156293222, + "grad_norm": 4.329807758331299, + "learning_rate": 7.499615798370986e-06, + "log_odds_chosen": 11.328744888305664, + "log_odds_ratio": -1.9021983462153003e-05, + "logits/chosen": -0.06117581948637962, + "logits/rejected": -0.043100398033857346, + "logps/chosen": -0.000164288270752877, + "logps/rejected": -2.2955570220947266, + "loss": 0.3944, + "nll_loss": 0.09859944134950638, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6428828530479223e-05, + "rewards/margins": 0.22953930497169495, + "rewards/rejected": -0.22955572605133057, + "step": 12508 + }, + { + "epoch": 8.650760719225449, + "grad_norm": 3.991814613342285, + "learning_rate": 7.495773782080837e-06, + "log_odds_chosen": 10.413236618041992, + "log_odds_ratio": -5.779110506409779e-05, + "logits/chosen": -0.7478570342063904, + "logits/rejected": -0.7675559520721436, + "logps/chosen": -0.00017067301087081432, + "logps/rejected": -1.0780225992202759, + "loss": 0.2589, + "nll_loss": 0.06472624838352203, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7067301087081432e-05, + "rewards/margins": 0.1077851951122284, + "rewards/rejected": -0.10780227184295654, + "step": 12509 + }, + { + "epoch": 8.651452282157676, + "grad_norm": 2.4446229934692383, + "learning_rate": 7.491931765790687e-06, + "log_odds_chosen": 10.83348274230957, + "log_odds_ratio": -5.711670019081794e-05, + "logits/chosen": -0.6774818897247314, + "logits/rejected": -0.7664629817008972, + "logps/chosen": -0.0003899620787706226, + "logps/rejected": -2.597978115081787, + "loss": 0.3238, + "nll_loss": 0.08095055818557739, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.899620787706226e-05, + "rewards/margins": 0.2597588300704956, + "rewards/rejected": -0.2597978115081787, + "step": 12510 + }, + { + "epoch": 8.652143845089903, + "grad_norm": 5.429533958435059, + "learning_rate": 7.488089749500539e-06, + "log_odds_chosen": 9.524100303649902, + "log_odds_ratio": -0.00021248232224024832, + "logits/chosen": -0.29696527123451233, + "logits/rejected": -0.3293287456035614, + "logps/chosen": -0.0004965565167367458, + "logps/rejected": -1.6754616498947144, + "loss": 0.4853, + "nll_loss": 0.12129941582679749, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9655653128866106e-05, + "rewards/margins": 0.16749650239944458, + "rewards/rejected": -0.16754615306854248, + "step": 12511 + }, + { + "epoch": 8.65283540802213, + "grad_norm": 4.308308124542236, + "learning_rate": 7.484247733210389e-06, + "log_odds_chosen": 10.372257232666016, + "log_odds_ratio": -0.000601473671849817, + "logits/chosen": -0.281741201877594, + "logits/rejected": -0.30346786975860596, + "logps/chosen": -0.0005873222835361958, + "logps/rejected": -2.0079917907714844, + "loss": 0.4027, + "nll_loss": 0.10062563419342041, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8732228353619576e-05, + "rewards/margins": 0.20074047148227692, + "rewards/rejected": -0.20079921185970306, + "step": 12512 + }, + { + "epoch": 8.653526970954356, + "grad_norm": 4.821245193481445, + "learning_rate": 7.48040571692024e-06, + "log_odds_chosen": 11.229162216186523, + "log_odds_ratio": -5.2564311772584915e-05, + "logits/chosen": 0.06918393075466156, + "logits/rejected": 0.17258884012699127, + "logps/chosen": -0.0002505563898012042, + "logps/rejected": -2.54415225982666, + "loss": 0.5032, + "nll_loss": 0.12579584121704102, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5055640435311943e-05, + "rewards/margins": 0.254390150308609, + "rewards/rejected": -0.25441521406173706, + "step": 12513 + }, + { + "epoch": 8.654218533886583, + "grad_norm": 3.9665966033935547, + "learning_rate": 7.476563700630091e-06, + "log_odds_chosen": 11.276213645935059, + "log_odds_ratio": -3.955683496315032e-05, + "logits/chosen": -0.1911214292049408, + "logits/rejected": -0.22925563156604767, + "logps/chosen": -0.00015900543075986207, + "logps/rejected": -2.275547742843628, + "loss": 0.388, + "nll_loss": 0.09700003266334534, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5900543075986207e-05, + "rewards/margins": 0.2275388538837433, + "rewards/rejected": -0.22755476832389832, + "step": 12514 + }, + { + "epoch": 8.65491009681881, + "grad_norm": 3.5284528732299805, + "learning_rate": 7.472721684339942e-06, + "log_odds_chosen": 11.778959274291992, + "log_odds_ratio": -1.4203714272298384e-05, + "logits/chosen": -0.2765466570854187, + "logits/rejected": -0.3749210834503174, + "logps/chosen": -0.00013069694978184998, + "logps/rejected": -2.9017252922058105, + "loss": 0.4733, + "nll_loss": 0.11832163482904434, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3069693522993475e-05, + "rewards/margins": 0.2901594340801239, + "rewards/rejected": -0.2901725172996521, + "step": 12515 + }, + { + "epoch": 8.655601659751037, + "grad_norm": 3.512288808822632, + "learning_rate": 7.468879668049793e-06, + "log_odds_chosen": 10.894959449768066, + "log_odds_ratio": -5.7100933190668e-05, + "logits/chosen": -0.3321828544139862, + "logits/rejected": -0.42700040340423584, + "logps/chosen": -0.00021454865054693073, + "logps/rejected": -2.0859456062316895, + "loss": 0.3536, + "nll_loss": 0.08839131146669388, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1454865418490954e-05, + "rewards/margins": 0.2085730880498886, + "rewards/rejected": -0.20859453082084656, + "step": 12516 + }, + { + "epoch": 8.656293222683264, + "grad_norm": 4.475125789642334, + "learning_rate": 7.465037651759643e-06, + "log_odds_chosen": 10.695079803466797, + "log_odds_ratio": -9.16783683351241e-05, + "logits/chosen": -0.2723444104194641, + "logits/rejected": -0.42437297105789185, + "logps/chosen": -0.00010329029464628547, + "logps/rejected": -1.5851585865020752, + "loss": 0.315, + "nll_loss": 0.07873716205358505, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.032903037412325e-05, + "rewards/margins": 0.15850552916526794, + "rewards/rejected": -0.15851587057113647, + "step": 12517 + }, + { + "epoch": 8.65698478561549, + "grad_norm": 2.8404202461242676, + "learning_rate": 7.461195635469495e-06, + "log_odds_chosen": 10.811230659484863, + "log_odds_ratio": -8.040317334234715e-05, + "logits/chosen": -0.255277156829834, + "logits/rejected": -0.32991933822631836, + "logps/chosen": -0.00025593285681679845, + "logps/rejected": -2.292391300201416, + "loss": 0.3287, + "nll_loss": 0.08215835690498352, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5593286409275606e-05, + "rewards/margins": 0.22921353578567505, + "rewards/rejected": -0.22923913598060608, + "step": 12518 + }, + { + "epoch": 8.657676348547717, + "grad_norm": 5.891508102416992, + "learning_rate": 7.457353619179345e-06, + "log_odds_chosen": 11.82068920135498, + "log_odds_ratio": -3.0011773560545407e-05, + "logits/chosen": -0.4721341133117676, + "logits/rejected": -0.41853195428848267, + "logps/chosen": -0.0001907099795062095, + "logps/rejected": -3.0076072216033936, + "loss": 0.3828, + "nll_loss": 0.0956956297159195, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.907099795062095e-05, + "rewards/margins": 0.30074167251586914, + "rewards/rejected": -0.30076074600219727, + "step": 12519 + }, + { + "epoch": 8.658367911479944, + "grad_norm": 4.65853214263916, + "learning_rate": 7.453511602889196e-06, + "log_odds_chosen": 10.887121200561523, + "log_odds_ratio": -0.0004176338261459023, + "logits/chosen": -0.2970947027206421, + "logits/rejected": -0.3491339087486267, + "logps/chosen": -0.0006700168596580625, + "logps/rejected": -2.4534764289855957, + "loss": 0.4927, + "nll_loss": 0.12313016504049301, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.700168160023168e-05, + "rewards/margins": 0.2452806532382965, + "rewards/rejected": -0.24534766376018524, + "step": 12520 + }, + { + "epoch": 8.659059474412171, + "grad_norm": 3.566943645477295, + "learning_rate": 7.4496695865990475e-06, + "log_odds_chosen": 11.55712604522705, + "log_odds_ratio": -8.315506420331076e-05, + "logits/chosen": -0.3606160283088684, + "logits/rejected": -0.4622876048088074, + "logps/chosen": -0.0003821479913312942, + "logps/rejected": -2.8081679344177246, + "loss": 0.5489, + "nll_loss": 0.1372266411781311, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.821479913312942e-05, + "rewards/margins": 0.28077858686447144, + "rewards/rejected": -0.28081679344177246, + "step": 12521 + }, + { + "epoch": 8.659751037344398, + "grad_norm": 2.726372241973877, + "learning_rate": 7.445827570308898e-06, + "log_odds_chosen": 10.538716316223145, + "log_odds_ratio": -6.737098738085479e-05, + "logits/chosen": -0.3288825452327728, + "logits/rejected": -0.35905057191848755, + "logps/chosen": -0.00027229191618971527, + "logps/rejected": -2.214806318283081, + "loss": 0.3153, + "nll_loss": 0.07881700247526169, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7229190891375765e-05, + "rewards/margins": 0.22145341336727142, + "rewards/rejected": -0.22148065268993378, + "step": 12522 + }, + { + "epoch": 8.660442600276625, + "grad_norm": 3.5521833896636963, + "learning_rate": 7.441985554018749e-06, + "log_odds_chosen": 11.483410835266113, + "log_odds_ratio": -1.6197574950638227e-05, + "logits/chosen": -0.2110901176929474, + "logits/rejected": -0.4438256323337555, + "logps/chosen": -0.00015359108510892838, + "logps/rejected": -2.654276132583618, + "loss": 0.4356, + "nll_loss": 0.10890985280275345, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.535910996608436e-05, + "rewards/margins": 0.26541227102279663, + "rewards/rejected": -0.2654276192188263, + "step": 12523 + }, + { + "epoch": 8.661134163208851, + "grad_norm": 5.966548442840576, + "learning_rate": 7.438143537728601e-06, + "log_odds_chosen": 10.445170402526855, + "log_odds_ratio": -0.0004751587985083461, + "logits/chosen": -0.10279648751020432, + "logits/rejected": -0.05743744224309921, + "logps/chosen": -0.0004982667742297053, + "logps/rejected": -2.129284381866455, + "loss": 0.4002, + "nll_loss": 0.10000036656856537, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9826678150566295e-05, + "rewards/margins": 0.21287861466407776, + "rewards/rejected": -0.21292844414710999, + "step": 12524 + }, + { + "epoch": 8.661825726141078, + "grad_norm": 4.023740768432617, + "learning_rate": 7.434301521438451e-06, + "log_odds_chosen": 10.345528602600098, + "log_odds_ratio": -0.0005008649313822389, + "logits/chosen": -0.14824295043945312, + "logits/rejected": -0.12089388072490692, + "logps/chosen": -0.00037331614294089377, + "logps/rejected": -2.3994011878967285, + "loss": 0.566, + "nll_loss": 0.14145886898040771, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.733161065611057e-05, + "rewards/margins": 0.2399027794599533, + "rewards/rejected": -0.2399401217699051, + "step": 12525 + }, + { + "epoch": 8.662517289073305, + "grad_norm": 5.07864236831665, + "learning_rate": 7.430459505148301e-06, + "log_odds_chosen": 11.224198341369629, + "log_odds_ratio": -3.23234053212218e-05, + "logits/chosen": -0.24662211537361145, + "logits/rejected": -0.1978820562362671, + "logps/chosen": -0.00024225276138167828, + "logps/rejected": -2.860351085662842, + "loss": 0.5755, + "nll_loss": 0.14386588335037231, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4225275410572067e-05, + "rewards/margins": 0.28601086139678955, + "rewards/rejected": -0.28603509068489075, + "step": 12526 + }, + { + "epoch": 8.663208852005532, + "grad_norm": 4.163898944854736, + "learning_rate": 7.426617488858154e-06, + "log_odds_chosen": 10.051204681396484, + "log_odds_ratio": -6.197369657456875e-05, + "logits/chosen": 0.10701654106378555, + "logits/rejected": 0.10469865053892136, + "logps/chosen": -0.00023373853764496744, + "logps/rejected": -1.4638707637786865, + "loss": 0.795, + "nll_loss": 0.19875499606132507, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3373853764496744e-05, + "rewards/margins": 0.14636372029781342, + "rewards/rejected": -0.14638708531856537, + "step": 12527 + }, + { + "epoch": 8.663900414937759, + "grad_norm": 2.7802116870880127, + "learning_rate": 7.422775472568004e-06, + "log_odds_chosen": 9.961128234863281, + "log_odds_ratio": -0.00017989228945225477, + "logits/chosen": -0.06987164914608002, + "logits/rejected": -0.024553827941417694, + "logps/chosen": -0.00034464691998437047, + "logps/rejected": -1.683363676071167, + "loss": 0.3131, + "nll_loss": 0.07825967669487, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4464690543245524e-05, + "rewards/margins": 0.16830191016197205, + "rewards/rejected": -0.16833636164665222, + "step": 12528 + }, + { + "epoch": 8.664591977869986, + "grad_norm": 4.402554512023926, + "learning_rate": 7.418933456277854e-06, + "log_odds_chosen": 10.956361770629883, + "log_odds_ratio": -7.46956720831804e-05, + "logits/chosen": -0.07751287519931793, + "logits/rejected": -0.07672972977161407, + "logps/chosen": -0.00017957479576580226, + "logps/rejected": -2.2911131381988525, + "loss": 0.4873, + "nll_loss": 0.12182983011007309, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7957479940378107e-05, + "rewards/margins": 0.22909337282180786, + "rewards/rejected": -0.22911131381988525, + "step": 12529 + }, + { + "epoch": 8.665283540802212, + "grad_norm": 3.342729330062866, + "learning_rate": 7.415091439987706e-06, + "log_odds_chosen": 10.63759994506836, + "log_odds_ratio": -3.1314044463215396e-05, + "logits/chosen": -0.37202584743499756, + "logits/rejected": -0.3920147716999054, + "logps/chosen": -0.00014479250239674002, + "logps/rejected": -1.799759030342102, + "loss": 0.3138, + "nll_loss": 0.0784500390291214, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4479250239674002e-05, + "rewards/margins": 0.1799614429473877, + "rewards/rejected": -0.17997589707374573, + "step": 12530 + }, + { + "epoch": 8.66597510373444, + "grad_norm": 4.784072399139404, + "learning_rate": 7.411249423697557e-06, + "log_odds_chosen": 11.285941123962402, + "log_odds_ratio": -2.2382810129784048e-05, + "logits/chosen": -0.5358566641807556, + "logits/rejected": -0.5093519687652588, + "logps/chosen": -0.0001689097553025931, + "logps/rejected": -2.4816975593566895, + "loss": 0.3534, + "nll_loss": 0.08835049718618393, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.689097553025931e-05, + "rewards/margins": 0.24815288186073303, + "rewards/rejected": -0.24816977977752686, + "step": 12531 + }, + { + "epoch": 8.666666666666666, + "grad_norm": 3.1711583137512207, + "learning_rate": 7.4074074074074075e-06, + "log_odds_chosen": 11.662884712219238, + "log_odds_ratio": -3.922557152691297e-05, + "logits/chosen": -0.03454257547855377, + "logits/rejected": -0.17196756601333618, + "logps/chosen": -0.00015968605293892324, + "logps/rejected": -2.495405673980713, + "loss": 0.3412, + "nll_loss": 0.085299551486969, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5968604202498682e-05, + "rewards/margins": 0.24952462315559387, + "rewards/rejected": -0.2495405673980713, + "step": 12532 + }, + { + "epoch": 8.667358229598893, + "grad_norm": 5.585811138153076, + "learning_rate": 7.403565391117259e-06, + "log_odds_chosen": 10.636640548706055, + "log_odds_ratio": -0.0001833289279602468, + "logits/chosen": -0.36438095569610596, + "logits/rejected": -0.3280951976776123, + "logps/chosen": -0.0007131825550459325, + "logps/rejected": -2.577727794647217, + "loss": 0.489, + "nll_loss": 0.1222197413444519, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.131825987016782e-05, + "rewards/margins": 0.25770145654678345, + "rewards/rejected": -0.2577727735042572, + "step": 12533 + }, + { + "epoch": 8.66804979253112, + "grad_norm": 3.481321334838867, + "learning_rate": 7.39972337482711e-06, + "log_odds_chosen": 11.821352005004883, + "log_odds_ratio": -3.541969272191636e-05, + "logits/chosen": -0.10815463215112686, + "logits/rejected": -0.0630636140704155, + "logps/chosen": -0.00010432667477289215, + "logps/rejected": -2.1872663497924805, + "loss": 0.3699, + "nll_loss": 0.09248305857181549, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0432667295390274e-05, + "rewards/margins": 0.21871618926525116, + "rewards/rejected": -0.21872663497924805, + "step": 12534 + }, + { + "epoch": 8.668741355463347, + "grad_norm": 3.907029867172241, + "learning_rate": 7.3958813585369606e-06, + "log_odds_chosen": 10.354106903076172, + "log_odds_ratio": -0.00018895111861638725, + "logits/chosen": -0.3448829650878906, + "logits/rejected": -0.3576931357383728, + "logps/chosen": -0.00028838051366619766, + "logps/rejected": -1.6199371814727783, + "loss": 0.2629, + "nll_loss": 0.0656980574131012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.88380488200346e-05, + "rewards/margins": 0.16196487843990326, + "rewards/rejected": -0.16199371218681335, + "step": 12535 + }, + { + "epoch": 8.669432918395573, + "grad_norm": 3.7974905967712402, + "learning_rate": 7.392039342246812e-06, + "log_odds_chosen": 10.901939392089844, + "log_odds_ratio": -2.7674408556777053e-05, + "logits/chosen": -0.11420188844203949, + "logits/rejected": -0.16965460777282715, + "logps/chosen": -0.0002406880958005786, + "logps/rejected": -2.3652772903442383, + "loss": 0.403, + "nll_loss": 0.10075122117996216, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.406880958005786e-05, + "rewards/margins": 0.23650366067886353, + "rewards/rejected": -0.2365277260541916, + "step": 12536 + }, + { + "epoch": 8.6701244813278, + "grad_norm": 4.031060218811035, + "learning_rate": 7.388197325956663e-06, + "log_odds_chosen": 10.959932327270508, + "log_odds_ratio": -2.7098983991891146e-05, + "logits/chosen": -0.4994911253452301, + "logits/rejected": -0.4914383590221405, + "logps/chosen": -0.0001192599447676912, + "logps/rejected": -2.019474506378174, + "loss": 0.4702, + "nll_loss": 0.11753655225038528, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1925993021577597e-05, + "rewards/margins": 0.2019355297088623, + "rewards/rejected": -0.20194746553897858, + "step": 12537 + }, + { + "epoch": 8.670816044260027, + "grad_norm": 6.080068111419678, + "learning_rate": 7.384355309666513e-06, + "log_odds_chosen": 11.134516716003418, + "log_odds_ratio": -3.303651828900911e-05, + "logits/chosen": -0.47941821813583374, + "logits/rejected": -0.5384718179702759, + "logps/chosen": -9.550550021231174e-05, + "logps/rejected": -1.8790864944458008, + "loss": 0.5679, + "nll_loss": 0.1419667899608612, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.550550203130115e-06, + "rewards/margins": 0.18789908289909363, + "rewards/rejected": -0.18790864944458008, + "step": 12538 + }, + { + "epoch": 8.671507607192254, + "grad_norm": 7.047338485717773, + "learning_rate": 7.380513293376365e-06, + "log_odds_chosen": 11.69442367553711, + "log_odds_ratio": -2.412005778751336e-05, + "logits/chosen": -0.36900442838668823, + "logits/rejected": -0.37385451793670654, + "logps/chosen": -0.00014486766303889453, + "logps/rejected": -2.316293478012085, + "loss": 0.3567, + "nll_loss": 0.08917532861232758, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4486766303889453e-05, + "rewards/margins": 0.2316148579120636, + "rewards/rejected": -0.23162934184074402, + "step": 12539 + }, + { + "epoch": 8.67219917012448, + "grad_norm": 4.273880481719971, + "learning_rate": 7.376671277086215e-06, + "log_odds_chosen": 11.082413673400879, + "log_odds_ratio": -0.00023529511236120015, + "logits/chosen": 0.06696252524852753, + "logits/rejected": -0.06390891969203949, + "logps/chosen": -0.000319934100843966, + "logps/rejected": -2.3815979957580566, + "loss": 0.3856, + "nll_loss": 0.0963764488697052, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1993411539588124e-05, + "rewards/margins": 0.23812782764434814, + "rewards/rejected": -0.23815982043743134, + "step": 12540 + }, + { + "epoch": 8.672890733056708, + "grad_norm": 3.5581562519073486, + "learning_rate": 7.372829260796066e-06, + "log_odds_chosen": 9.663544654846191, + "log_odds_ratio": -0.0004422256606630981, + "logits/chosen": -0.39682891964912415, + "logits/rejected": -0.4791784882545471, + "logps/chosen": -0.0005750549025833607, + "logps/rejected": -1.5449265241622925, + "loss": 0.4687, + "nll_loss": 0.11713872104883194, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.750549098593183e-05, + "rewards/margins": 0.1544351577758789, + "rewards/rejected": -0.15449264645576477, + "step": 12541 + }, + { + "epoch": 8.673582295988934, + "grad_norm": 3.255485773086548, + "learning_rate": 7.3689872445059175e-06, + "log_odds_chosen": 10.975412368774414, + "log_odds_ratio": -0.00011924972204724327, + "logits/chosen": -0.16594725847244263, + "logits/rejected": -0.31802070140838623, + "logps/chosen": -0.0004657926328945905, + "logps/rejected": -2.780015468597412, + "loss": 0.4166, + "nll_loss": 0.10414905846118927, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.657926183426753e-05, + "rewards/margins": 0.27795499563217163, + "rewards/rejected": -0.2780015766620636, + "step": 12542 + }, + { + "epoch": 8.674273858921161, + "grad_norm": 4.244312286376953, + "learning_rate": 7.365145228215768e-06, + "log_odds_chosen": 11.83520793914795, + "log_odds_ratio": -1.8101507521350868e-05, + "logits/chosen": -0.24233081936836243, + "logits/rejected": -0.35883381962776184, + "logps/chosen": -9.870299254544079e-05, + "logps/rejected": -2.3833212852478027, + "loss": 0.471, + "nll_loss": 0.11775927245616913, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.870300345937721e-06, + "rewards/margins": 0.23832225799560547, + "rewards/rejected": -0.2383321225643158, + "step": 12543 + }, + { + "epoch": 8.674965421853388, + "grad_norm": 6.95550537109375, + "learning_rate": 7.361303211925619e-06, + "log_odds_chosen": 12.086908340454102, + "log_odds_ratio": -1.8966737115988508e-05, + "logits/chosen": -0.5800905823707581, + "logits/rejected": -0.5021611452102661, + "logps/chosen": -0.00015917661949060857, + "logps/rejected": -3.2165818214416504, + "loss": 0.365, + "nll_loss": 0.09124217927455902, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5917659766273573e-05, + "rewards/margins": 0.32164227962493896, + "rewards/rejected": -0.321658194065094, + "step": 12544 + }, + { + "epoch": 8.675656984785615, + "grad_norm": 4.134158611297607, + "learning_rate": 7.357461195635471e-06, + "log_odds_chosen": 10.438495635986328, + "log_odds_ratio": -0.00031156576005741954, + "logits/chosen": -0.5812631845474243, + "logits/rejected": -0.6295356154441833, + "logps/chosen": -0.00029764368082396686, + "logps/rejected": -2.0201539993286133, + "loss": 0.3774, + "nll_loss": 0.09431064873933792, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9764367354800925e-05, + "rewards/margins": 0.20198562741279602, + "rewards/rejected": -0.20201539993286133, + "step": 12545 + }, + { + "epoch": 8.676348547717842, + "grad_norm": 3.0808298587799072, + "learning_rate": 7.353619179345321e-06, + "log_odds_chosen": 11.856780052185059, + "log_odds_ratio": -1.4338037544803228e-05, + "logits/chosen": -0.8203387260437012, + "logits/rejected": -0.9521452188491821, + "logps/chosen": -0.00018739672668743879, + "logps/rejected": -2.838536262512207, + "loss": 0.3641, + "nll_loss": 0.09101735055446625, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.873967266874388e-05, + "rewards/margins": 0.28383487462997437, + "rewards/rejected": -0.28385359048843384, + "step": 12546 + }, + { + "epoch": 8.677040110650069, + "grad_norm": 4.475383281707764, + "learning_rate": 7.349777163055171e-06, + "log_odds_chosen": 10.019126892089844, + "log_odds_ratio": -0.0007663845317438245, + "logits/chosen": 0.10257889330387115, + "logits/rejected": -0.01933739334344864, + "logps/chosen": -0.0010240648407489061, + "logps/rejected": -2.111617088317871, + "loss": 0.8074, + "nll_loss": 0.20177187025547028, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010240648407489061, + "rewards/margins": 0.2110593020915985, + "rewards/rejected": -0.21116170287132263, + "step": 12547 + }, + { + "epoch": 8.677731673582295, + "grad_norm": 4.1379852294921875, + "learning_rate": 7.345935146765022e-06, + "log_odds_chosen": 11.253612518310547, + "log_odds_ratio": -8.35497266962193e-05, + "logits/chosen": -0.14084036648273468, + "logits/rejected": -0.2441224902868271, + "logps/chosen": -0.0010256430832669139, + "logps/rejected": -2.3779947757720947, + "loss": 0.4041, + "nll_loss": 0.1010219156742096, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010256430687149987, + "rewards/margins": 0.23769691586494446, + "rewards/rejected": -0.2377994805574417, + "step": 12548 + }, + { + "epoch": 8.678423236514522, + "grad_norm": 4.599795818328857, + "learning_rate": 7.342093130474874e-06, + "log_odds_chosen": 9.960695266723633, + "log_odds_ratio": -0.0007999525987543166, + "logits/chosen": -0.3448615074157715, + "logits/rejected": -0.4812205135822296, + "logps/chosen": -0.00048607925418764353, + "logps/rejected": -1.8350410461425781, + "loss": 0.4098, + "nll_loss": 0.10236094892024994, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8607926146360114e-05, + "rewards/margins": 0.18345551192760468, + "rewards/rejected": -0.183504119515419, + "step": 12549 + }, + { + "epoch": 8.679114799446749, + "grad_norm": 2.339559555053711, + "learning_rate": 7.338251114184724e-06, + "log_odds_chosen": 10.132892608642578, + "log_odds_ratio": -0.00018708023708313704, + "logits/chosen": -0.37915366888046265, + "logits/rejected": -0.3266010582447052, + "logps/chosen": -0.00019164435798302293, + "logps/rejected": -1.3973734378814697, + "loss": 0.2702, + "nll_loss": 0.06754133850336075, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9164435798302293e-05, + "rewards/margins": 0.1397181898355484, + "rewards/rejected": -0.1397373378276825, + "step": 12550 + }, + { + "epoch": 8.679806362378976, + "grad_norm": 5.420063018798828, + "learning_rate": 7.334409097894575e-06, + "log_odds_chosen": 10.726099014282227, + "log_odds_ratio": -0.00015335682837758213, + "logits/chosen": -0.07302002608776093, + "logits/rejected": -0.20137712359428406, + "logps/chosen": -0.0007101158262230456, + "logps/rejected": -2.376372814178467, + "loss": 0.4888, + "nll_loss": 0.1221751719713211, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.10115855326876e-05, + "rewards/margins": 0.2375662922859192, + "rewards/rejected": -0.23763728141784668, + "step": 12551 + }, + { + "epoch": 8.680497925311203, + "grad_norm": 3.888152837753296, + "learning_rate": 7.330567081604427e-06, + "log_odds_chosen": 9.247932434082031, + "log_odds_ratio": -0.00015723289106972516, + "logits/chosen": -0.3507700562477112, + "logits/rejected": -0.39326196908950806, + "logps/chosen": -0.0003475480480119586, + "logps/rejected": -1.3601428270339966, + "loss": 0.4583, + "nll_loss": 0.11455010622739792, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.475480116321705e-05, + "rewards/margins": 0.1359795182943344, + "rewards/rejected": -0.13601426780223846, + "step": 12552 + }, + { + "epoch": 8.68118948824343, + "grad_norm": 3.6204445362091064, + "learning_rate": 7.3267250653142774e-06, + "log_odds_chosen": 11.143669128417969, + "log_odds_ratio": -4.348631773609668e-05, + "logits/chosen": -0.22094419598579407, + "logits/rejected": -0.3634456992149353, + "logps/chosen": -0.0003180534695275128, + "logps/rejected": -2.687262535095215, + "loss": 0.3249, + "nll_loss": 0.08122783154249191, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1805349863134325e-05, + "rewards/margins": 0.2686944603919983, + "rewards/rejected": -0.26872625946998596, + "step": 12553 + }, + { + "epoch": 8.681881051175656, + "grad_norm": 2.4743692874908447, + "learning_rate": 7.322883049024127e-06, + "log_odds_chosen": 10.48563003540039, + "log_odds_ratio": -3.777826714213006e-05, + "logits/chosen": -0.24010899662971497, + "logits/rejected": -0.2702690660953522, + "logps/chosen": -0.0003850167558994144, + "logps/rejected": -1.9075738191604614, + "loss": 0.2904, + "nll_loss": 0.07258377224206924, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.850167558994144e-05, + "rewards/margins": 0.1907188892364502, + "rewards/rejected": -0.1907573938369751, + "step": 12554 + }, + { + "epoch": 8.682572614107883, + "grad_norm": 4.934008598327637, + "learning_rate": 7.31904103273398e-06, + "log_odds_chosen": 11.115760803222656, + "log_odds_ratio": -3.487409048830159e-05, + "logits/chosen": -0.39205920696258545, + "logits/rejected": -0.44652342796325684, + "logps/chosen": -0.0001003592333290726, + "logps/rejected": -1.8912320137023926, + "loss": 0.4261, + "nll_loss": 0.10652204602956772, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0035922969109379e-05, + "rewards/margins": 0.18911318480968475, + "rewards/rejected": -0.18912319839000702, + "step": 12555 + }, + { + "epoch": 8.68326417704011, + "grad_norm": 7.027980804443359, + "learning_rate": 7.31519901644383e-06, + "log_odds_chosen": 10.794315338134766, + "log_odds_ratio": -3.511262548272498e-05, + "logits/chosen": -0.06356941163539886, + "logits/rejected": -0.16646791994571686, + "logps/chosen": -0.00010867961827898398, + "logps/rejected": -1.8001667261123657, + "loss": 0.8285, + "nll_loss": 0.20711015164852142, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0867961464100517e-05, + "rewards/margins": 0.18000580370426178, + "rewards/rejected": -0.1800166666507721, + "step": 12556 + }, + { + "epoch": 8.683955739972337, + "grad_norm": 4.8774943351745605, + "learning_rate": 7.3113570001536804e-06, + "log_odds_chosen": 11.061723709106445, + "log_odds_ratio": -4.698024713434279e-05, + "logits/chosen": 0.021711044013500214, + "logits/rejected": -0.04401899129152298, + "logps/chosen": -0.00020374648738652468, + "logps/rejected": -2.322935104370117, + "loss": 0.334, + "nll_loss": 0.08349205553531647, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0374651285237633e-05, + "rewards/margins": 0.2322731465101242, + "rewards/rejected": -0.2322934865951538, + "step": 12557 + }, + { + "epoch": 8.684647302904564, + "grad_norm": 5.135361194610596, + "learning_rate": 7.307514983863532e-06, + "log_odds_chosen": 9.509974479675293, + "log_odds_ratio": -0.0002750680723693222, + "logits/chosen": -0.3695323169231415, + "logits/rejected": -0.41089409589767456, + "logps/chosen": -0.0005933195352554321, + "logps/rejected": -1.8128838539123535, + "loss": 0.4796, + "nll_loss": 0.11986302584409714, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9331960073905066e-05, + "rewards/margins": 0.18122906982898712, + "rewards/rejected": -0.18128840625286102, + "step": 12558 + }, + { + "epoch": 8.68533886583679, + "grad_norm": 5.54158878326416, + "learning_rate": 7.303672967573383e-06, + "log_odds_chosen": 11.576949119567871, + "log_odds_ratio": -3.356914021424018e-05, + "logits/chosen": -0.33821767568588257, + "logits/rejected": -0.2222682237625122, + "logps/chosen": -0.0006055298144929111, + "logps/rejected": -2.9858524799346924, + "loss": 0.4759, + "nll_loss": 0.11898404359817505, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0552989452844486e-05, + "rewards/margins": 0.2985247075557709, + "rewards/rejected": -0.2985852360725403, + "step": 12559 + }, + { + "epoch": 8.686030428769017, + "grad_norm": 3.1131091117858887, + "learning_rate": 7.2998309512832335e-06, + "log_odds_chosen": 9.934978485107422, + "log_odds_ratio": -0.00014396195183508098, + "logits/chosen": -0.383375346660614, + "logits/rejected": -0.39323872327804565, + "logps/chosen": -0.00020214408868923783, + "logps/rejected": -1.5136477947235107, + "loss": 0.3916, + "nll_loss": 0.09787482768297195, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0214409232721664e-05, + "rewards/margins": 0.15134456753730774, + "rewards/rejected": -0.1513647884130478, + "step": 12560 + }, + { + "epoch": 8.686721991701244, + "grad_norm": 2.7549195289611816, + "learning_rate": 7.295988934993085e-06, + "log_odds_chosen": 10.395133972167969, + "log_odds_ratio": -0.0001060390131897293, + "logits/chosen": 0.09680791199207306, + "logits/rejected": -0.08237051218748093, + "logps/chosen": -0.00030865223379805684, + "logps/rejected": -2.023832321166992, + "loss": 0.2334, + "nll_loss": 0.058332134038209915, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.086522701778449e-05, + "rewards/margins": 0.2023523598909378, + "rewards/rejected": -0.20238322019577026, + "step": 12561 + }, + { + "epoch": 8.687413554633471, + "grad_norm": 4.050837516784668, + "learning_rate": 7.292146918702936e-06, + "log_odds_chosen": 11.533906936645508, + "log_odds_ratio": -4.815634747501463e-05, + "logits/chosen": -0.41314229369163513, + "logits/rejected": -0.3989812135696411, + "logps/chosen": -0.00024749228032305837, + "logps/rejected": -2.437464714050293, + "loss": 0.413, + "nll_loss": 0.10324068367481232, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4749231670284644e-05, + "rewards/margins": 0.24372172355651855, + "rewards/rejected": -0.24374648928642273, + "step": 12562 + }, + { + "epoch": 8.688105117565698, + "grad_norm": 4.586285591125488, + "learning_rate": 7.288304902412786e-06, + "log_odds_chosen": 12.125259399414062, + "log_odds_ratio": -1.427874667569995e-05, + "logits/chosen": -0.2432543784379959, + "logits/rejected": -0.3548405170440674, + "logps/chosen": -0.00017908416339196265, + "logps/rejected": -3.028646469116211, + "loss": 0.6192, + "nll_loss": 0.15479278564453125, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7908416339196265e-05, + "rewards/margins": 0.3028467297554016, + "rewards/rejected": -0.3028646409511566, + "step": 12563 + }, + { + "epoch": 8.688796680497925, + "grad_norm": 3.127497434616089, + "learning_rate": 7.284462886122638e-06, + "log_odds_chosen": 11.781786918640137, + "log_odds_ratio": -2.0596940885297954e-05, + "logits/chosen": -0.7333821654319763, + "logits/rejected": -0.80907142162323, + "logps/chosen": -0.0001434225996490568, + "logps/rejected": -2.65181565284729, + "loss": 0.4069, + "nll_loss": 0.10172779858112335, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4342261238198262e-05, + "rewards/margins": 0.265167236328125, + "rewards/rejected": -0.2651815712451935, + "step": 12564 + }, + { + "epoch": 8.689488243430151, + "grad_norm": 5.145379066467285, + "learning_rate": 7.280620869832488e-06, + "log_odds_chosen": 11.970072746276855, + "log_odds_ratio": -1.3783266695099883e-05, + "logits/chosen": -0.2855966091156006, + "logits/rejected": -0.2714526653289795, + "logps/chosen": -0.0001378589222440496, + "logps/rejected": -2.928068161010742, + "loss": 0.5744, + "nll_loss": 0.1435983031988144, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3785893315798603e-05, + "rewards/margins": 0.29279303550720215, + "rewards/rejected": -0.29280680418014526, + "step": 12565 + }, + { + "epoch": 8.690179806362378, + "grad_norm": 4.036627769470215, + "learning_rate": 7.276778853542339e-06, + "log_odds_chosen": 10.717005729675293, + "log_odds_ratio": -0.0003459883155301213, + "logits/chosen": -0.26304930448532104, + "logits/rejected": -0.363567054271698, + "logps/chosen": -0.00023860861256252974, + "logps/rejected": -1.7904492616653442, + "loss": 0.4647, + "nll_loss": 0.11615101993083954, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3860862711444497e-05, + "rewards/margins": 0.17902106046676636, + "rewards/rejected": -0.1790449321269989, + "step": 12566 + }, + { + "epoch": 8.690871369294605, + "grad_norm": 4.8369011878967285, + "learning_rate": 7.2729368372521905e-06, + "log_odds_chosen": 12.138534545898438, + "log_odds_ratio": -1.8096015992341563e-05, + "logits/chosen": -0.3726271688938141, + "logits/rejected": -0.3865164518356323, + "logps/chosen": -0.0001636394445085898, + "logps/rejected": -2.8079988956451416, + "loss": 0.4224, + "nll_loss": 0.1056041345000267, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6363945178454742e-05, + "rewards/margins": 0.2807835340499878, + "rewards/rejected": -0.28079989552497864, + "step": 12567 + }, + { + "epoch": 8.691562932226832, + "grad_norm": 4.219342231750488, + "learning_rate": 7.269094820962041e-06, + "log_odds_chosen": 11.014745712280273, + "log_odds_ratio": -4.224685108056292e-05, + "logits/chosen": -0.633184015750885, + "logits/rejected": -0.5657342076301575, + "logps/chosen": -0.00012159225298091769, + "logps/rejected": -2.0499179363250732, + "loss": 0.3539, + "nll_loss": 0.08847401291131973, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.215922566188965e-05, + "rewards/margins": 0.20497965812683105, + "rewards/rejected": -0.20499181747436523, + "step": 12568 + }, + { + "epoch": 8.692254495159059, + "grad_norm": 3.421389579772949, + "learning_rate": 7.265252804671892e-06, + "log_odds_chosen": 11.144598007202148, + "log_odds_ratio": -2.0327761376393028e-05, + "logits/chosen": 0.20896287262439728, + "logits/rejected": 0.19816726446151733, + "logps/chosen": -0.0002626884379424155, + "logps/rejected": -2.649585485458374, + "loss": 0.4227, + "nll_loss": 0.10567466914653778, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.626884452183731e-05, + "rewards/margins": 0.2649322748184204, + "rewards/rejected": -0.26495856046676636, + "step": 12569 + }, + { + "epoch": 8.692946058091286, + "grad_norm": 3.9324991703033447, + "learning_rate": 7.2614107883817436e-06, + "log_odds_chosen": 12.014698028564453, + "log_odds_ratio": -1.5872808944550343e-05, + "logits/chosen": -0.4532574415206909, + "logits/rejected": -0.5123213529586792, + "logps/chosen": -9.749118908075616e-05, + "logps/rejected": -2.4674978256225586, + "loss": 0.4644, + "nll_loss": 0.11609979718923569, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.749119271873496e-06, + "rewards/margins": 0.24674005806446075, + "rewards/rejected": -0.24674980342388153, + "step": 12570 + }, + { + "epoch": 8.693637621023512, + "grad_norm": 5.530608177185059, + "learning_rate": 7.257568772091594e-06, + "log_odds_chosen": 9.991994857788086, + "log_odds_ratio": -0.0002486288140062243, + "logits/chosen": -0.33492887020111084, + "logits/rejected": -0.41823023557662964, + "logps/chosen": -0.0004774140543304384, + "logps/rejected": -2.2885494232177734, + "loss": 0.4417, + "nll_loss": 0.11040748655796051, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.774140688823536e-05, + "rewards/margins": 0.2288072109222412, + "rewards/rejected": -0.2288549542427063, + "step": 12571 + }, + { + "epoch": 8.69432918395574, + "grad_norm": 3.9434962272644043, + "learning_rate": 7.253726755801444e-06, + "log_odds_chosen": 11.857610702514648, + "log_odds_ratio": -2.52670215559192e-05, + "logits/chosen": -0.10745374858379364, + "logits/rejected": -0.15593905746936798, + "logps/chosen": -0.00013630901230499148, + "logps/rejected": -2.7091565132141113, + "loss": 0.5198, + "nll_loss": 0.12993690371513367, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.363090177619597e-05, + "rewards/margins": 0.27090200781822205, + "rewards/rejected": -0.2709156572818756, + "step": 12572 + }, + { + "epoch": 8.695020746887966, + "grad_norm": 4.2418212890625, + "learning_rate": 7.249884739511297e-06, + "log_odds_chosen": 11.451732635498047, + "log_odds_ratio": -3.787030072999187e-05, + "logits/chosen": -0.5961735844612122, + "logits/rejected": -0.6251358389854431, + "logps/chosen": -0.0001301948941545561, + "logps/rejected": -2.368989944458008, + "loss": 0.4418, + "nll_loss": 0.11045637726783752, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.301948941545561e-05, + "rewards/margins": 0.23688596487045288, + "rewards/rejected": -0.2368989884853363, + "step": 12573 + }, + { + "epoch": 8.695712309820193, + "grad_norm": 3.2620646953582764, + "learning_rate": 7.2460427232211466e-06, + "log_odds_chosen": 11.303452491760254, + "log_odds_ratio": -3.886825652443804e-05, + "logits/chosen": -0.9809278249740601, + "logits/rejected": -0.7991894483566284, + "logps/chosen": -0.0001622609415790066, + "logps/rejected": -1.9842360019683838, + "loss": 0.3051, + "nll_loss": 0.07626000046730042, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6226094885496423e-05, + "rewards/margins": 0.198407381772995, + "rewards/rejected": -0.1984236091375351, + "step": 12574 + }, + { + "epoch": 8.69640387275242, + "grad_norm": 2.8854849338531494, + "learning_rate": 7.242200706930997e-06, + "log_odds_chosen": 10.891356468200684, + "log_odds_ratio": -0.00012532320397440344, + "logits/chosen": -0.41142168641090393, + "logits/rejected": -0.4985794425010681, + "logps/chosen": -0.0002292045101057738, + "logps/rejected": -2.070760726928711, + "loss": 0.267, + "nll_loss": 0.06673512607812881, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.29204506467795e-05, + "rewards/margins": 0.20705315470695496, + "rewards/rejected": -0.2070760726928711, + "step": 12575 + }, + { + "epoch": 8.697095435684647, + "grad_norm": 4.4359211921691895, + "learning_rate": 7.238358690640849e-06, + "log_odds_chosen": 10.509967803955078, + "log_odds_ratio": -0.00018874961824622005, + "logits/chosen": 0.07059525698423386, + "logits/rejected": -0.06952603161334991, + "logps/chosen": -0.00029744295170530677, + "logps/rejected": -2.1389076709747314, + "loss": 0.5862, + "nll_loss": 0.14652371406555176, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9744294806732796e-05, + "rewards/margins": 0.21386101841926575, + "rewards/rejected": -0.21389076113700867, + "step": 12576 + }, + { + "epoch": 8.697786998616873, + "grad_norm": 4.180637836456299, + "learning_rate": 7.2345166743507e-06, + "log_odds_chosen": 11.173707962036133, + "log_odds_ratio": -3.307564475107938e-05, + "logits/chosen": -0.3616059124469757, + "logits/rejected": -0.38579240441322327, + "logps/chosen": -0.000232769685680978, + "logps/rejected": -2.3975720405578613, + "loss": 0.4217, + "nll_loss": 0.1054258793592453, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3276967112906277e-05, + "rewards/margins": 0.23973393440246582, + "rewards/rejected": -0.2397572249174118, + "step": 12577 + }, + { + "epoch": 8.6984785615491, + "grad_norm": 4.1263346672058105, + "learning_rate": 7.23067465806055e-06, + "log_odds_chosen": 10.084461212158203, + "log_odds_ratio": -0.000205255564651452, + "logits/chosen": -0.4047377109527588, + "logits/rejected": -0.41672807931900024, + "logps/chosen": -0.0005328357219696045, + "logps/rejected": -2.0177836418151855, + "loss": 0.4105, + "nll_loss": 0.1026056632399559, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3283569286577404e-05, + "rewards/margins": 0.2017250657081604, + "rewards/rejected": -0.2017783522605896, + "step": 12578 + }, + { + "epoch": 8.699170124481327, + "grad_norm": 4.116734981536865, + "learning_rate": 7.226832641770402e-06, + "log_odds_chosen": 11.684539794921875, + "log_odds_ratio": -3.806072345469147e-05, + "logits/chosen": -0.21267051994800568, + "logits/rejected": -0.277524471282959, + "logps/chosen": -0.00014213379472494125, + "logps/rejected": -2.839292049407959, + "loss": 0.6086, + "nll_loss": 0.1521454155445099, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4213380381988827e-05, + "rewards/margins": 0.2839149534702301, + "rewards/rejected": -0.28392916917800903, + "step": 12579 + }, + { + "epoch": 8.699861687413554, + "grad_norm": 4.5581135749816895, + "learning_rate": 7.222990625480253e-06, + "log_odds_chosen": 10.256507873535156, + "log_odds_ratio": -0.0004174423520453274, + "logits/chosen": 0.030440326780080795, + "logits/rejected": -0.15859153866767883, + "logps/chosen": -0.0005254342686384916, + "logps/rejected": -1.7349371910095215, + "loss": 0.458, + "nll_loss": 0.11446496844291687, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.254342977423221e-05, + "rewards/margins": 0.17344118654727936, + "rewards/rejected": -0.17349371314048767, + "step": 12580 + }, + { + "epoch": 8.70055325034578, + "grad_norm": 3.6485166549682617, + "learning_rate": 7.2191486091901035e-06, + "log_odds_chosen": 10.942009925842285, + "log_odds_ratio": -6.669512367807329e-05, + "logits/chosen": -0.26107847690582275, + "logits/rejected": -0.24480968713760376, + "logps/chosen": -0.0002021636173594743, + "logps/rejected": -2.267293930053711, + "loss": 0.3822, + "nll_loss": 0.09555569291114807, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0216362827341072e-05, + "rewards/margins": 0.22670917212963104, + "rewards/rejected": -0.2267293930053711, + "step": 12581 + }, + { + "epoch": 8.701244813278008, + "grad_norm": 5.63314962387085, + "learning_rate": 7.215306592899953e-06, + "log_odds_chosen": 11.545077323913574, + "log_odds_ratio": -1.9420609532971866e-05, + "logits/chosen": -0.17932385206222534, + "logits/rejected": -0.29974794387817383, + "logps/chosen": -0.00010438306344440207, + "logps/rejected": -2.3992764949798584, + "loss": 0.4362, + "nll_loss": 0.10905801504850388, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0438306162541267e-05, + "rewards/margins": 0.23991720378398895, + "rewards/rejected": -0.23992764949798584, + "step": 12582 + }, + { + "epoch": 8.701936376210234, + "grad_norm": 8.555891036987305, + "learning_rate": 7.211464576609806e-06, + "log_odds_chosen": 11.477520942687988, + "log_odds_ratio": -1.730481380946003e-05, + "logits/chosen": -0.5060890316963196, + "logits/rejected": -0.5176623463630676, + "logps/chosen": -0.00015273058670572937, + "logps/rejected": -2.328230619430542, + "loss": 0.3748, + "nll_loss": 0.0936933383345604, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5273057215381414e-05, + "rewards/margins": 0.23280777037143707, + "rewards/rejected": -0.23282305896282196, + "step": 12583 + }, + { + "epoch": 8.702627939142461, + "grad_norm": 6.84820556640625, + "learning_rate": 7.207622560319656e-06, + "log_odds_chosen": 10.907526969909668, + "log_odds_ratio": -6.034345642547123e-05, + "logits/chosen": -0.2660806477069855, + "logits/rejected": -0.34288960695266724, + "logps/chosen": -0.0004112754249945283, + "logps/rejected": -2.787398338317871, + "loss": 0.4983, + "nll_loss": 0.12456180900335312, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.11275468650274e-05, + "rewards/margins": 0.27869871258735657, + "rewards/rejected": -0.2787398397922516, + "step": 12584 + }, + { + "epoch": 8.703319502074688, + "grad_norm": 5.089015007019043, + "learning_rate": 7.2037805440295065e-06, + "log_odds_chosen": 10.05128288269043, + "log_odds_ratio": -0.00040466885548084974, + "logits/chosen": -0.9034023880958557, + "logits/rejected": -0.8430752754211426, + "logps/chosen": -0.00016636928194202483, + "logps/rejected": -1.6874971389770508, + "loss": 0.5554, + "nll_loss": 0.13880708813667297, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6636928194202483e-05, + "rewards/margins": 0.16873309016227722, + "rewards/rejected": -0.16874971985816956, + "step": 12585 + }, + { + "epoch": 8.704011065006915, + "grad_norm": 5.3731465339660645, + "learning_rate": 7.199938527739358e-06, + "log_odds_chosen": 11.04498291015625, + "log_odds_ratio": -8.862379036145285e-05, + "logits/chosen": -0.3212606906890869, + "logits/rejected": -0.44569075107574463, + "logps/chosen": -0.0004522545204963535, + "logps/rejected": -2.747040271759033, + "loss": 0.4199, + "nll_loss": 0.10495775192975998, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5225449866848066e-05, + "rewards/margins": 0.27465879917144775, + "rewards/rejected": -0.2747040390968323, + "step": 12586 + }, + { + "epoch": 8.704702627939142, + "grad_norm": 3.756265640258789, + "learning_rate": 7.196096511449209e-06, + "log_odds_chosen": 9.995338439941406, + "log_odds_ratio": -8.589846402173862e-05, + "logits/chosen": -0.2591727674007416, + "logits/rejected": -0.32403162121772766, + "logps/chosen": -0.00024264455714728683, + "logps/rejected": -1.6683635711669922, + "loss": 0.3217, + "nll_loss": 0.08042872697114944, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4264456442324445e-05, + "rewards/margins": 0.16681209206581116, + "rewards/rejected": -0.16683635115623474, + "step": 12587 + }, + { + "epoch": 8.705394190871369, + "grad_norm": 3.1371610164642334, + "learning_rate": 7.19225449515906e-06, + "log_odds_chosen": 9.871097564697266, + "log_odds_ratio": -0.00022018066374585032, + "logits/chosen": -0.3875494599342346, + "logits/rejected": -0.42681199312210083, + "logps/chosen": -0.0001549256849102676, + "logps/rejected": -1.2135441303253174, + "loss": 0.2897, + "nll_loss": 0.07240563631057739, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.549256921862252e-05, + "rewards/margins": 0.12133892625570297, + "rewards/rejected": -0.12135441601276398, + "step": 12588 + }, + { + "epoch": 8.706085753803595, + "grad_norm": 8.206844329833984, + "learning_rate": 7.188412478868911e-06, + "log_odds_chosen": 9.889083862304688, + "log_odds_ratio": -7.846077642170712e-05, + "logits/chosen": -0.5176295638084412, + "logits/rejected": -0.6087726950645447, + "logps/chosen": -0.0003467805508989841, + "logps/rejected": -1.7795064449310303, + "loss": 0.3239, + "nll_loss": 0.08095990866422653, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4678054362302646e-05, + "rewards/margins": 0.17791596055030823, + "rewards/rejected": -0.1779506504535675, + "step": 12589 + }, + { + "epoch": 8.706777316735822, + "grad_norm": 3.7519938945770264, + "learning_rate": 7.184570462578762e-06, + "log_odds_chosen": 10.750160217285156, + "log_odds_ratio": -0.00017055787611752748, + "logits/chosen": -0.15037044882774353, + "logits/rejected": -0.26256707310676575, + "logps/chosen": -0.000679384043905884, + "logps/rejected": -2.163872003555298, + "loss": 0.3447, + "nll_loss": 0.08616575598716736, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.793840293539688e-05, + "rewards/margins": 0.2163192480802536, + "rewards/rejected": -0.21638718247413635, + "step": 12590 + }, + { + "epoch": 8.707468879668049, + "grad_norm": 3.6036159992218018, + "learning_rate": 7.180728446288612e-06, + "log_odds_chosen": 10.451128959655762, + "log_odds_ratio": -9.225706162396818e-05, + "logits/chosen": -0.20861420035362244, + "logits/rejected": -0.27226394414901733, + "logps/chosen": -0.00017163812299259007, + "logps/rejected": -2.0080366134643555, + "loss": 0.5543, + "nll_loss": 0.13857188820838928, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7163811207865365e-05, + "rewards/margins": 0.2007865309715271, + "rewards/rejected": -0.20080366730690002, + "step": 12591 + }, + { + "epoch": 8.708160442600276, + "grad_norm": 3.12872314453125, + "learning_rate": 7.176886429998464e-06, + "log_odds_chosen": 11.708080291748047, + "log_odds_ratio": -1.2221262295497581e-05, + "logits/chosen": -0.8714234232902527, + "logits/rejected": -0.8446594476699829, + "logps/chosen": -0.0001258108823094517, + "logps/rejected": -2.541940927505493, + "loss": 0.2771, + "nll_loss": 0.0692775622010231, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2581089322338812e-05, + "rewards/margins": 0.25418150424957275, + "rewards/rejected": -0.25419408082962036, + "step": 12592 + }, + { + "epoch": 8.708852005532503, + "grad_norm": 3.5991594791412354, + "learning_rate": 7.173044413708314e-06, + "log_odds_chosen": 10.483360290527344, + "log_odds_ratio": -0.00011691125109791756, + "logits/chosen": -0.4403286278247833, + "logits/rejected": -0.5139305591583252, + "logps/chosen": -0.00017070303147193044, + "logps/rejected": -2.086862087249756, + "loss": 0.4266, + "nll_loss": 0.10663561522960663, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7070302419597283e-05, + "rewards/margins": 0.20866911113262177, + "rewards/rejected": -0.2086862176656723, + "step": 12593 + }, + { + "epoch": 8.70954356846473, + "grad_norm": 5.748128890991211, + "learning_rate": 7.169202397418165e-06, + "log_odds_chosen": 9.889494895935059, + "log_odds_ratio": -0.0004682771395891905, + "logits/chosen": -0.1965133547782898, + "logits/rejected": -0.24669188261032104, + "logps/chosen": -0.00048058730317279696, + "logps/rejected": -1.8298323154449463, + "loss": 0.334, + "nll_loss": 0.08344646543264389, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8058729589683935e-05, + "rewards/margins": 0.1829351782798767, + "rewards/rejected": -0.18298323452472687, + "step": 12594 + }, + { + "epoch": 8.710235131396956, + "grad_norm": 2.409346103668213, + "learning_rate": 7.1653603811280165e-06, + "log_odds_chosen": 10.281041145324707, + "log_odds_ratio": -8.93459000508301e-05, + "logits/chosen": -0.09198392927646637, + "logits/rejected": -0.20935139060020447, + "logps/chosen": -0.00020214702817611396, + "logps/rejected": -1.562282681465149, + "loss": 0.2675, + "nll_loss": 0.06685630977153778, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0214702090015635e-05, + "rewards/margins": 0.15620805323123932, + "rewards/rejected": -0.15622827410697937, + "step": 12595 + }, + { + "epoch": 8.710926694329183, + "grad_norm": 3.746424674987793, + "learning_rate": 7.161518364837867e-06, + "log_odds_chosen": 10.644408226013184, + "log_odds_ratio": -0.0003350527840666473, + "logits/chosen": -0.25374066829681396, + "logits/rejected": -0.3007930517196655, + "logps/chosen": -0.0003922658215742558, + "logps/rejected": -1.6606199741363525, + "loss": 0.3313, + "nll_loss": 0.08277983963489532, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9226582885021344e-05, + "rewards/margins": 0.16602277755737305, + "rewards/rejected": -0.16606199741363525, + "step": 12596 + }, + { + "epoch": 8.71161825726141, + "grad_norm": 4.901035785675049, + "learning_rate": 7.157676348547718e-06, + "log_odds_chosen": 11.265356063842773, + "log_odds_ratio": -2.715396294661332e-05, + "logits/chosen": 0.05210549384355545, + "logits/rejected": -0.05895791947841644, + "logps/chosen": -0.00033751592854969203, + "logps/rejected": -2.655134916305542, + "loss": 0.6153, + "nll_loss": 0.15382874011993408, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.375159576535225e-05, + "rewards/margins": 0.2654797434806824, + "rewards/rejected": -0.26551347970962524, + "step": 12597 + }, + { + "epoch": 8.712309820193637, + "grad_norm": 5.053715705871582, + "learning_rate": 7.15383433225757e-06, + "log_odds_chosen": 11.885496139526367, + "log_odds_ratio": -4.7778376028873026e-05, + "logits/chosen": -0.03668589144945145, + "logits/rejected": -0.13197988271713257, + "logps/chosen": -0.00018731525051407516, + "logps/rejected": -2.8411545753479004, + "loss": 0.4438, + "nll_loss": 0.11095211654901505, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.87315272341948e-05, + "rewards/margins": 0.28409671783447266, + "rewards/rejected": -0.2841154634952545, + "step": 12598 + }, + { + "epoch": 8.713001383125864, + "grad_norm": 3.501352071762085, + "learning_rate": 7.14999231596742e-06, + "log_odds_chosen": 10.418107986450195, + "log_odds_ratio": -4.948129935655743e-05, + "logits/chosen": -0.17310462892055511, + "logits/rejected": -0.11154043674468994, + "logps/chosen": -0.0005186637281440198, + "logps/rejected": -1.961129903793335, + "loss": 0.4102, + "nll_loss": 0.10254395008087158, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1866372814401984e-05, + "rewards/margins": 0.1960611343383789, + "rewards/rejected": -0.1961129903793335, + "step": 12599 + }, + { + "epoch": 8.71369294605809, + "grad_norm": 3.993454694747925, + "learning_rate": 7.14615029967727e-06, + "log_odds_chosen": 11.306490898132324, + "log_odds_ratio": -1.9935872842324898e-05, + "logits/chosen": -0.36342853307724, + "logits/rejected": -0.5179613828659058, + "logps/chosen": -0.00020630184735637158, + "logps/rejected": -2.2232353687286377, + "loss": 0.3613, + "nll_loss": 0.09033246338367462, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0630184735637158e-05, + "rewards/margins": 0.22230291366577148, + "rewards/rejected": -0.22232355177402496, + "step": 12600 + }, + { + "epoch": 8.714384508990317, + "grad_norm": 3.8765039443969727, + "learning_rate": 7.142308283387123e-06, + "log_odds_chosen": 11.331442832946777, + "log_odds_ratio": -6.65429761284031e-05, + "logits/chosen": -0.6077317595481873, + "logits/rejected": -0.58716881275177, + "logps/chosen": -0.0002417774376226589, + "logps/rejected": -2.399134635925293, + "loss": 0.4247, + "nll_loss": 0.10616884380578995, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4177745217457414e-05, + "rewards/margins": 0.23988929390907288, + "rewards/rejected": -0.2399134635925293, + "step": 12601 + }, + { + "epoch": 8.715076071922544, + "grad_norm": 4.196615219116211, + "learning_rate": 7.138466267096973e-06, + "log_odds_chosen": 10.872209548950195, + "log_odds_ratio": -5.1147879275958985e-05, + "logits/chosen": 0.13419261574745178, + "logits/rejected": 0.06465723365545273, + "logps/chosen": -0.00032623554579913616, + "logps/rejected": -1.8758091926574707, + "loss": 0.6551, + "nll_loss": 0.1637628972530365, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.26235567627009e-05, + "rewards/margins": 0.18754829466342926, + "rewards/rejected": -0.1875808984041214, + "step": 12602 + }, + { + "epoch": 8.715767634854771, + "grad_norm": 3.0449070930480957, + "learning_rate": 7.134624250806823e-06, + "log_odds_chosen": 12.087318420410156, + "log_odds_ratio": -3.8094327464932576e-05, + "logits/chosen": -0.8015158176422119, + "logits/rejected": -0.8395053148269653, + "logps/chosen": -9.241785301128402e-05, + "logps/rejected": -2.986556053161621, + "loss": 0.3298, + "nll_loss": 0.08245350420475006, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.241785846825223e-06, + "rewards/margins": 0.29864639043807983, + "rewards/rejected": -0.29865562915802, + "step": 12603 + }, + { + "epoch": 8.716459197786998, + "grad_norm": 4.552496433258057, + "learning_rate": 7.130782234516675e-06, + "log_odds_chosen": 11.790977478027344, + "log_odds_ratio": -1.52910051838262e-05, + "logits/chosen": -0.12492383271455765, + "logits/rejected": 0.024242829531431198, + "logps/chosen": -6.92198591423221e-05, + "logps/rejected": -2.2918920516967773, + "loss": 0.2776, + "nll_loss": 0.06939011067152023, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.921985914232209e-06, + "rewards/margins": 0.22918227314949036, + "rewards/rejected": -0.2291892021894455, + "step": 12604 + }, + { + "epoch": 8.717150760719225, + "grad_norm": 2.587273359298706, + "learning_rate": 7.126940218226526e-06, + "log_odds_chosen": 11.168664932250977, + "log_odds_ratio": -9.771065379027277e-05, + "logits/chosen": -0.48414313793182373, + "logits/rejected": -0.2710132300853729, + "logps/chosen": -0.00022592968889512122, + "logps/rejected": -2.34855318069458, + "loss": 0.274, + "nll_loss": 0.06848978996276855, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2592970708501525e-05, + "rewards/margins": 0.2348327338695526, + "rewards/rejected": -0.23485532402992249, + "step": 12605 + }, + { + "epoch": 8.717842323651452, + "grad_norm": 3.0871849060058594, + "learning_rate": 7.1230982019363765e-06, + "log_odds_chosen": 11.11591911315918, + "log_odds_ratio": -0.0003519427846185863, + "logits/chosen": -0.18353600800037384, + "logits/rejected": -0.18372249603271484, + "logps/chosen": -0.0002869067538995296, + "logps/rejected": -2.1926016807556152, + "loss": 0.3918, + "nll_loss": 0.09791938215494156, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.869067611754872e-05, + "rewards/margins": 0.2192314714193344, + "rewards/rejected": -0.21926017105579376, + "step": 12606 + }, + { + "epoch": 8.718533886583678, + "grad_norm": 4.727206707000732, + "learning_rate": 7.119256185646228e-06, + "log_odds_chosen": 12.682234764099121, + "log_odds_ratio": -4.2280080378986895e-06, + "logits/chosen": -0.4567912817001343, + "logits/rejected": -0.41550421714782715, + "logps/chosen": -7.959036156535149e-05, + "logps/rejected": -3.1277332305908203, + "loss": 0.4349, + "nll_loss": 0.10871396958827972, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.959036338434089e-06, + "rewards/margins": 0.31276535987854004, + "rewards/rejected": -0.31277331709861755, + "step": 12607 + }, + { + "epoch": 8.719225449515905, + "grad_norm": 4.500866413116455, + "learning_rate": 7.115414169356079e-06, + "log_odds_chosen": 10.519453048706055, + "log_odds_ratio": -0.00015784561401233077, + "logits/chosen": -0.0011763647198677063, + "logits/rejected": -0.051256291568279266, + "logps/chosen": -0.0008152094087563455, + "logps/rejected": -2.0021238327026367, + "loss": 0.4842, + "nll_loss": 0.12104595452547073, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.152094233082607e-05, + "rewards/margins": 0.2001308798789978, + "rewards/rejected": -0.20021241903305054, + "step": 12608 + }, + { + "epoch": 8.719917012448132, + "grad_norm": 5.224221706390381, + "learning_rate": 7.111572153065929e-06, + "log_odds_chosen": 11.925086975097656, + "log_odds_ratio": -9.902300917019602e-06, + "logits/chosen": -0.7283359169960022, + "logits/rejected": -0.7884764075279236, + "logps/chosen": -8.355738100362942e-05, + "logps/rejected": -2.3458425998687744, + "loss": 0.8125, + "nll_loss": 0.20313361287117004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.355738827958703e-06, + "rewards/margins": 0.23457591235637665, + "rewards/rejected": -0.2345842719078064, + "step": 12609 + }, + { + "epoch": 8.720608575380359, + "grad_norm": 3.826673746109009, + "learning_rate": 7.107730136775781e-06, + "log_odds_chosen": 10.73037052154541, + "log_odds_ratio": -8.004449045984074e-05, + "logits/chosen": -0.19814665615558624, + "logits/rejected": -0.29370230436325073, + "logps/chosen": -0.00048773473827168345, + "logps/rejected": -2.245574712753296, + "loss": 0.4769, + "nll_loss": 0.11920658499002457, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.877347237197682e-05, + "rewards/margins": 0.22450870275497437, + "rewards/rejected": -0.22455745935440063, + "step": 12610 + }, + { + "epoch": 8.721300138312586, + "grad_norm": 4.613309860229492, + "learning_rate": 7.103888120485631e-06, + "log_odds_chosen": 10.130619049072266, + "log_odds_ratio": -9.19510202948004e-05, + "logits/chosen": 0.07331767678260803, + "logits/rejected": -0.019994735717773438, + "logps/chosen": -0.00010537736670812592, + "logps/rejected": -1.35574471950531, + "loss": 0.4954, + "nll_loss": 0.12383987754583359, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0537736670812592e-05, + "rewards/margins": 0.1355639398097992, + "rewards/rejected": -0.13557447493076324, + "step": 12611 + }, + { + "epoch": 8.721991701244812, + "grad_norm": 10.10327434539795, + "learning_rate": 7.100046104195482e-06, + "log_odds_chosen": 11.801833152770996, + "log_odds_ratio": -3.427134288358502e-05, + "logits/chosen": -0.4082002639770508, + "logits/rejected": -0.5625779628753662, + "logps/chosen": -0.0001247481704922393, + "logps/rejected": -2.620378255844116, + "loss": 0.4185, + "nll_loss": 0.10463136434555054, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2474816685426049e-05, + "rewards/margins": 0.2620253562927246, + "rewards/rejected": -0.26203781366348267, + "step": 12612 + }, + { + "epoch": 8.72268326417704, + "grad_norm": 3.1714866161346436, + "learning_rate": 7.0962040879053326e-06, + "log_odds_chosen": 10.744078636169434, + "log_odds_ratio": -4.0337850805372e-05, + "logits/chosen": -0.4073140025138855, + "logits/rejected": -0.4945356547832489, + "logps/chosen": -0.00017250381642952561, + "logps/rejected": -1.7121458053588867, + "loss": 0.3845, + "nll_loss": 0.09612710773944855, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.725038055155892e-05, + "rewards/margins": 0.1711973249912262, + "rewards/rejected": -0.17121456563472748, + "step": 12613 + }, + { + "epoch": 8.723374827109266, + "grad_norm": 3.5619633197784424, + "learning_rate": 7.092362071615184e-06, + "log_odds_chosen": 11.23532485961914, + "log_odds_ratio": -4.3949934479314834e-05, + "logits/chosen": -0.22376228868961334, + "logits/rejected": -0.18069423735141754, + "logps/chosen": -0.0001710898068267852, + "logps/rejected": -2.4304232597351074, + "loss": 0.3353, + "nll_loss": 0.08382796496152878, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7108981410274282e-05, + "rewards/margins": 0.24302522838115692, + "rewards/rejected": -0.24304234981536865, + "step": 12614 + }, + { + "epoch": 8.724066390041493, + "grad_norm": 4.019108295440674, + "learning_rate": 7.088520055325035e-06, + "log_odds_chosen": 11.954331398010254, + "log_odds_ratio": -3.5667791962623596e-05, + "logits/chosen": -0.6766592860221863, + "logits/rejected": -0.7944373488426208, + "logps/chosen": -0.00015910463116597384, + "logps/rejected": -2.911555767059326, + "loss": 0.4366, + "nll_loss": 0.10915455222129822, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5910463844193146e-05, + "rewards/margins": 0.2911396622657776, + "rewards/rejected": -0.2911555767059326, + "step": 12615 + }, + { + "epoch": 8.72475795297372, + "grad_norm": 4.540794372558594, + "learning_rate": 7.084678039034885e-06, + "log_odds_chosen": 12.210094451904297, + "log_odds_ratio": -1.2035889085382223e-05, + "logits/chosen": -0.42616957426071167, + "logits/rejected": -0.4792676568031311, + "logps/chosen": -4.8736692406237125e-05, + "logps/rejected": -2.242910861968994, + "loss": 0.337, + "nll_loss": 0.08424394577741623, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8736692406237125e-06, + "rewards/margins": 0.22428621351718903, + "rewards/rejected": -0.22429108619689941, + "step": 12616 + }, + { + "epoch": 8.725449515905947, + "grad_norm": 3.611435890197754, + "learning_rate": 7.080836022744737e-06, + "log_odds_chosen": 12.544656753540039, + "log_odds_ratio": -9.19141893973574e-06, + "logits/chosen": -0.47252994775772095, + "logits/rejected": -0.5143290758132935, + "logps/chosen": -0.00015017985424492508, + "logps/rejected": -3.355624198913574, + "loss": 0.3825, + "nll_loss": 0.09561695158481598, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5017985788290389e-05, + "rewards/margins": 0.33554738759994507, + "rewards/rejected": -0.33556240797042847, + "step": 12617 + }, + { + "epoch": 8.726141078838173, + "grad_norm": 5.643194675445557, + "learning_rate": 7.076994006454587e-06, + "log_odds_chosen": 11.000103950500488, + "log_odds_ratio": -0.008125041611492634, + "logits/chosen": -0.12783555686473846, + "logits/rejected": -0.19143040478229523, + "logps/chosen": -0.00514611043035984, + "logps/rejected": -2.533423900604248, + "loss": 0.5958, + "nll_loss": 0.14812599122524261, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005146110197529197, + "rewards/margins": 0.2528277635574341, + "rewards/rejected": -0.2533423900604248, + "step": 12618 + }, + { + "epoch": 8.7268326417704, + "grad_norm": 5.0216803550720215, + "learning_rate": 7.073151990164438e-06, + "log_odds_chosen": 10.841806411743164, + "log_odds_ratio": -0.00014175310207065195, + "logits/chosen": -0.37150660157203674, + "logits/rejected": -0.4800848066806793, + "logps/chosen": -0.00025622386601753533, + "logps/rejected": -2.5595288276672363, + "loss": 0.5147, + "nll_loss": 0.12865594029426575, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5622386601753533e-05, + "rewards/margins": 0.25592726469039917, + "rewards/rejected": -0.2559528946876526, + "step": 12619 + }, + { + "epoch": 8.727524204702627, + "grad_norm": 3.8709161281585693, + "learning_rate": 7.0693099738742895e-06, + "log_odds_chosen": 10.722715377807617, + "log_odds_ratio": -0.00010744819155661389, + "logits/chosen": -0.04501526430249214, + "logits/rejected": -0.27509522438049316, + "logps/chosen": -0.00015670969150960445, + "logps/rejected": -1.9246792793273926, + "loss": 0.5193, + "nll_loss": 0.12981583178043365, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5670970242354088e-05, + "rewards/margins": 0.19245225191116333, + "rewards/rejected": -0.19246794283390045, + "step": 12620 + }, + { + "epoch": 8.728215767634854, + "grad_norm": 3.180833339691162, + "learning_rate": 7.06546795758414e-06, + "log_odds_chosen": 10.660358428955078, + "log_odds_ratio": -8.569403871661052e-05, + "logits/chosen": -0.3871857523918152, + "logits/rejected": -0.3466258943080902, + "logps/chosen": -0.0003636727633420378, + "logps/rejected": -2.124570608139038, + "loss": 0.3908, + "nll_loss": 0.0976894348859787, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.636727706179954e-05, + "rewards/margins": 0.21242070198059082, + "rewards/rejected": -0.2124570608139038, + "step": 12621 + }, + { + "epoch": 8.72890733056708, + "grad_norm": 4.775147914886475, + "learning_rate": 7.061625941293991e-06, + "log_odds_chosen": 10.252630233764648, + "log_odds_ratio": -0.0007642924902029335, + "logits/chosen": -0.6822867393493652, + "logits/rejected": -0.7102404832839966, + "logps/chosen": -0.0007607060251757503, + "logps/rejected": -2.434619426727295, + "loss": 0.2722, + "nll_loss": 0.06797634810209274, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.607060251757503e-05, + "rewards/margins": 0.24338586628437042, + "rewards/rejected": -0.24346192181110382, + "step": 12622 + }, + { + "epoch": 8.729598893499308, + "grad_norm": 3.3071470260620117, + "learning_rate": 7.057783925003843e-06, + "log_odds_chosen": 11.80417251586914, + "log_odds_ratio": -4.535269908956252e-05, + "logits/chosen": -0.32505127787590027, + "logits/rejected": -0.33316338062286377, + "logps/chosen": -0.00013507023686543107, + "logps/rejected": -2.901522397994995, + "loss": 0.3179, + "nll_loss": 0.07946906983852386, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3507024050340988e-05, + "rewards/margins": 0.29013872146606445, + "rewards/rejected": -0.2901522219181061, + "step": 12623 + }, + { + "epoch": 8.730290456431534, + "grad_norm": 2.9452579021453857, + "learning_rate": 7.053941908713693e-06, + "log_odds_chosen": 9.710962295532227, + "log_odds_ratio": -0.0014099262189120054, + "logits/chosen": -0.1288309246301651, + "logits/rejected": -0.1669916808605194, + "logps/chosen": -0.002055376535281539, + "logps/rejected": -2.0088515281677246, + "loss": 0.3109, + "nll_loss": 0.07759317755699158, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00020553766808006912, + "rewards/margins": 0.20067963004112244, + "rewards/rejected": -0.20088517665863037, + "step": 12624 + }, + { + "epoch": 8.730982019363761, + "grad_norm": 2.780109167098999, + "learning_rate": 7.050099892423544e-06, + "log_odds_chosen": 10.963064193725586, + "log_odds_ratio": -2.1850135453860275e-05, + "logits/chosen": -0.32372623682022095, + "logits/rejected": -0.37397193908691406, + "logps/chosen": -0.0004387985682114959, + "logps/rejected": -2.7941770553588867, + "loss": 0.4054, + "nll_loss": 0.1013399064540863, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3879856093553826e-05, + "rewards/margins": 0.27937382459640503, + "rewards/rejected": -0.2794176936149597, + "step": 12625 + }, + { + "epoch": 8.731673582295988, + "grad_norm": 4.202791213989258, + "learning_rate": 7.046257876133396e-06, + "log_odds_chosen": 11.598773002624512, + "log_odds_ratio": -1.712938137643505e-05, + "logits/chosen": 0.002315342426300049, + "logits/rejected": -0.0762111097574234, + "logps/chosen": -0.00010078588093165308, + "logps/rejected": -2.4241111278533936, + "loss": 0.5578, + "nll_loss": 0.13943594694137573, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.007858918455895e-05, + "rewards/margins": 0.24240103363990784, + "rewards/rejected": -0.24241109192371368, + "step": 12626 + }, + { + "epoch": 8.732365145228215, + "grad_norm": 4.474009990692139, + "learning_rate": 7.0424158598432464e-06, + "log_odds_chosen": 10.89125919342041, + "log_odds_ratio": -0.00019299234554637223, + "logits/chosen": -0.2681261897087097, + "logits/rejected": -0.3043631315231323, + "logps/chosen": -0.001370918471366167, + "logps/rejected": -1.9742841720581055, + "loss": 0.798, + "nll_loss": 0.1994776874780655, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013709186168853194, + "rewards/margins": 0.1972913146018982, + "rewards/rejected": -0.1974284052848816, + "step": 12627 + }, + { + "epoch": 8.733056708160442, + "grad_norm": 3.0392186641693115, + "learning_rate": 7.038573843553096e-06, + "log_odds_chosen": 11.702457427978516, + "log_odds_ratio": -1.5169207472354174e-05, + "logits/chosen": -0.5146140456199646, + "logits/rejected": -0.5965772867202759, + "logps/chosen": -7.573777838842943e-05, + "logps/rejected": -2.1300292015075684, + "loss": 0.3362, + "nll_loss": 0.08405561745166779, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.573778020741884e-06, + "rewards/margins": 0.21299535036087036, + "rewards/rejected": -0.21300292015075684, + "step": 12628 + }, + { + "epoch": 8.733748271092669, + "grad_norm": 4.198177337646484, + "learning_rate": 7.034731827262949e-06, + "log_odds_chosen": 10.71436882019043, + "log_odds_ratio": -0.00033096273546107113, + "logits/chosen": -0.3540208041667938, + "logits/rejected": -0.417741596698761, + "logps/chosen": -0.00041849075932987034, + "logps/rejected": -2.053122043609619, + "loss": 0.4197, + "nll_loss": 0.10489177703857422, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.184907447779551e-05, + "rewards/margins": 0.20527033507823944, + "rewards/rejected": -0.20531219244003296, + "step": 12629 + }, + { + "epoch": 8.734439834024897, + "grad_norm": 6.847844123840332, + "learning_rate": 7.030889810972799e-06, + "log_odds_chosen": 11.515414237976074, + "log_odds_ratio": -5.265471190796234e-05, + "logits/chosen": -0.12134791165590286, + "logits/rejected": -0.2235623151063919, + "logps/chosen": -9.148490789812058e-05, + "logps/rejected": -2.0303359031677246, + "loss": 0.5226, + "nll_loss": 0.13064485788345337, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.14849169930676e-06, + "rewards/margins": 0.2030244767665863, + "rewards/rejected": -0.20303361117839813, + "step": 12630 + }, + { + "epoch": 8.735131396957122, + "grad_norm": 3.86293625831604, + "learning_rate": 7.0270477946826494e-06, + "log_odds_chosen": 10.796789169311523, + "log_odds_ratio": -5.433675687527284e-05, + "logits/chosen": -0.15211236476898193, + "logits/rejected": -0.2016526758670807, + "logps/chosen": -0.0005663117044605315, + "logps/rejected": -2.7149319648742676, + "loss": 0.5026, + "nll_loss": 0.12565679848194122, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.663117190124467e-05, + "rewards/margins": 0.27143657207489014, + "rewards/rejected": -0.27149319648742676, + "step": 12631 + }, + { + "epoch": 8.73582295988935, + "grad_norm": 5.464266777038574, + "learning_rate": 7.023205778392501e-06, + "log_odds_chosen": 10.550954818725586, + "log_odds_ratio": -7.085293327691033e-05, + "logits/chosen": -0.4823170304298401, + "logits/rejected": -0.607326328754425, + "logps/chosen": -0.00017116402159444988, + "logps/rejected": -1.851062536239624, + "loss": 0.4346, + "nll_loss": 0.10864056646823883, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.711640288704075e-05, + "rewards/margins": 0.1850891411304474, + "rewards/rejected": -0.18510624766349792, + "step": 12632 + }, + { + "epoch": 8.736514522821576, + "grad_norm": 2.564517021179199, + "learning_rate": 7.019363762102352e-06, + "log_odds_chosen": 10.802752494812012, + "log_odds_ratio": -5.0851380365202203e-05, + "logits/chosen": -0.13559579849243164, + "logits/rejected": -0.07748744636774063, + "logps/chosen": -0.0002571075165178627, + "logps/rejected": -2.224565267562866, + "loss": 0.3544, + "nll_loss": 0.08859890699386597, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.571075310697779e-05, + "rewards/margins": 0.22243082523345947, + "rewards/rejected": -0.22245654463768005, + "step": 12633 + }, + { + "epoch": 8.737206085753805, + "grad_norm": 2.7891175746917725, + "learning_rate": 7.0155217458122025e-06, + "log_odds_chosen": 10.613808631896973, + "log_odds_ratio": -3.704680784721859e-05, + "logits/chosen": -0.3028711974620819, + "logits/rejected": -0.13343878090381622, + "logps/chosen": -0.00017175314133055508, + "logps/rejected": -1.9700030088424683, + "loss": 0.3105, + "nll_loss": 0.07763022929430008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7175312677863985e-05, + "rewards/margins": 0.19698314368724823, + "rewards/rejected": -0.19700030982494354, + "step": 12634 + }, + { + "epoch": 8.73789764868603, + "grad_norm": 3.843463182449341, + "learning_rate": 7.011679729522054e-06, + "log_odds_chosen": 11.393573760986328, + "log_odds_ratio": -0.00019181430980097502, + "logits/chosen": -0.39797934889793396, + "logits/rejected": -0.5349745154380798, + "logps/chosen": -0.00023400085046887398, + "logps/rejected": -2.7963550090789795, + "loss": 0.3211, + "nll_loss": 0.08026115596294403, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3400087229674682e-05, + "rewards/margins": 0.27961212396621704, + "rewards/rejected": -0.279635488986969, + "step": 12635 + }, + { + "epoch": 8.738589211618258, + "grad_norm": 3.8218467235565186, + "learning_rate": 7.007837713231905e-06, + "log_odds_chosen": 11.811062812805176, + "log_odds_ratio": -8.913544661481865e-06, + "logits/chosen": -0.33981579542160034, + "logits/rejected": -0.4165121912956238, + "logps/chosen": -0.00015810728655196726, + "logps/rejected": -2.7615065574645996, + "loss": 0.3937, + "nll_loss": 0.09841378778219223, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5810728655196726e-05, + "rewards/margins": 0.2761348485946655, + "rewards/rejected": -0.276150643825531, + "step": 12636 + }, + { + "epoch": 8.739280774550483, + "grad_norm": 3.311710834503174, + "learning_rate": 7.003995696941755e-06, + "log_odds_chosen": 11.325023651123047, + "log_odds_ratio": -3.8699912693118677e-05, + "logits/chosen": -0.4233155846595764, + "logits/rejected": -0.43373045325279236, + "logps/chosen": -0.000263809080934152, + "logps/rejected": -2.1461169719696045, + "loss": 0.3791, + "nll_loss": 0.09477078914642334, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6380910640000366e-05, + "rewards/margins": 0.2145853191614151, + "rewards/rejected": -0.2146117091178894, + "step": 12637 + }, + { + "epoch": 8.739972337482712, + "grad_norm": 4.0727715492248535, + "learning_rate": 7.000153680651607e-06, + "log_odds_chosen": 11.421920776367188, + "log_odds_ratio": -5.327256803866476e-05, + "logits/chosen": -0.4592892527580261, + "logits/rejected": -0.5124595165252686, + "logps/chosen": -0.00022929380065761507, + "logps/rejected": -2.3530163764953613, + "loss": 0.4648, + "nll_loss": 0.11619154363870621, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2929380065761507e-05, + "rewards/margins": 0.23527869582176208, + "rewards/rejected": -0.2353016436100006, + "step": 12638 + }, + { + "epoch": 8.740663900414937, + "grad_norm": 6.244078159332275, + "learning_rate": 6.996311664361457e-06, + "log_odds_chosen": 11.745616912841797, + "log_odds_ratio": -9.599408076610416e-05, + "logits/chosen": -0.004837028216570616, + "logits/rejected": 0.05525718629360199, + "logps/chosen": -0.0002785644610412419, + "logps/rejected": -3.0801918506622314, + "loss": 0.3365, + "nll_loss": 0.08412357419729233, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7856445740326308e-05, + "rewards/margins": 0.3079913258552551, + "rewards/rejected": -0.3080191910266876, + "step": 12639 + }, + { + "epoch": 8.741355463347166, + "grad_norm": 3.8222501277923584, + "learning_rate": 6.992469648071308e-06, + "log_odds_chosen": 12.153668403625488, + "log_odds_ratio": -1.307290222030133e-05, + "logits/chosen": -0.1858936995267868, + "logits/rejected": -0.28293725848197937, + "logps/chosen": -0.00013134724576957524, + "logps/rejected": -2.8148396015167236, + "loss": 0.6301, + "nll_loss": 0.15752094984054565, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3134726032149047e-05, + "rewards/margins": 0.2814708352088928, + "rewards/rejected": -0.2814839482307434, + "step": 12640 + }, + { + "epoch": 8.74204702627939, + "grad_norm": 3.784421682357788, + "learning_rate": 6.9886276317811595e-06, + "log_odds_chosen": 11.424051284790039, + "log_odds_ratio": -7.20424868632108e-05, + "logits/chosen": -0.2345329374074936, + "logits/rejected": -0.20033228397369385, + "logps/chosen": -0.00031157504417933524, + "logps/rejected": -3.2700071334838867, + "loss": 0.3724, + "nll_loss": 0.09310246258974075, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1157505873125046e-05, + "rewards/margins": 0.32696956396102905, + "rewards/rejected": -0.3270007371902466, + "step": 12641 + }, + { + "epoch": 8.74273858921162, + "grad_norm": 3.0729806423187256, + "learning_rate": 6.98478561549101e-06, + "log_odds_chosen": 12.01394271850586, + "log_odds_ratio": -2.7902418878511526e-05, + "logits/chosen": -0.21632501482963562, + "logits/rejected": -0.30397987365722656, + "logps/chosen": -0.00028304714942350984, + "logps/rejected": -3.192939519882202, + "loss": 0.4473, + "nll_loss": 0.11181334406137466, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.830471385095734e-05, + "rewards/margins": 0.3192656636238098, + "rewards/rejected": -0.31929394602775574, + "step": 12642 + }, + { + "epoch": 8.743430152143844, + "grad_norm": 4.0299458503723145, + "learning_rate": 6.980943599200861e-06, + "log_odds_chosen": 12.166967391967773, + "log_odds_ratio": -1.8124801499652676e-05, + "logits/chosen": -0.19768285751342773, + "logits/rejected": -0.19605018198490143, + "logps/chosen": -8.931868069339544e-05, + "logps/rejected": -2.820674419403076, + "loss": 0.4265, + "nll_loss": 0.10661443322896957, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.931868251238484e-06, + "rewards/margins": 0.2820585072040558, + "rewards/rejected": -0.2820674479007721, + "step": 12643 + }, + { + "epoch": 8.744121715076073, + "grad_norm": 4.370787143707275, + "learning_rate": 6.9771015829107126e-06, + "log_odds_chosen": 9.561683654785156, + "log_odds_ratio": -0.0007459899061359465, + "logits/chosen": -0.4039541184902191, + "logits/rejected": -0.4170818328857422, + "logps/chosen": -0.0006070085219107568, + "logps/rejected": -1.8454440832138062, + "loss": 0.5765, + "nll_loss": 0.14405278861522675, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.070085510145873e-05, + "rewards/margins": 0.18448373675346375, + "rewards/rejected": -0.1845444142818451, + "step": 12644 + }, + { + "epoch": 8.744813278008298, + "grad_norm": 3.183621406555176, + "learning_rate": 6.973259566620563e-06, + "log_odds_chosen": 11.528457641601562, + "log_odds_ratio": -2.550122735556215e-05, + "logits/chosen": -0.5070438981056213, + "logits/rejected": -0.5771378874778748, + "logps/chosen": -0.00021793476480524987, + "logps/rejected": -2.513467311859131, + "loss": 0.3878, + "nll_loss": 0.09694357216358185, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.179347757191863e-05, + "rewards/margins": 0.25132498145103455, + "rewards/rejected": -0.25134676694869995, + "step": 12645 + }, + { + "epoch": 8.745504840940526, + "grad_norm": 4.302793025970459, + "learning_rate": 6.969417550330413e-06, + "log_odds_chosen": 10.690472602844238, + "log_odds_ratio": -0.00014246918726712465, + "logits/chosen": 0.21797120571136475, + "logits/rejected": 0.06780949980020523, + "logps/chosen": -0.0004303157329559326, + "logps/rejected": -2.490640878677368, + "loss": 0.4077, + "nll_loss": 0.10190653800964355, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.303157766116783e-05, + "rewards/margins": 0.24902105331420898, + "rewards/rejected": -0.24906408786773682, + "step": 12646 + }, + { + "epoch": 8.746196403872752, + "grad_norm": 4.100922107696533, + "learning_rate": 6.965575534040264e-06, + "log_odds_chosen": 9.01432991027832, + "log_odds_ratio": -0.0014760670019313693, + "logits/chosen": -0.33062905073165894, + "logits/rejected": -0.38164985179901123, + "logps/chosen": -0.0009245839901268482, + "logps/rejected": -1.5423380136489868, + "loss": 0.3171, + "nll_loss": 0.07912150025367737, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.245839464711025e-05, + "rewards/margins": 0.1541413515806198, + "rewards/rejected": -0.15423379838466644, + "step": 12647 + }, + { + "epoch": 8.74688796680498, + "grad_norm": 10.031594276428223, + "learning_rate": 6.9617335177501156e-06, + "log_odds_chosen": 11.902109146118164, + "log_odds_ratio": -2.4353628759854473e-05, + "logits/chosen": -0.010901231318712234, + "logits/rejected": -0.11041456460952759, + "logps/chosen": -0.0003525819047354162, + "logps/rejected": -3.04818058013916, + "loss": 0.6412, + "nll_loss": 0.1602867841720581, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.525819556671195e-05, + "rewards/margins": 0.30478280782699585, + "rewards/rejected": -0.3048180639743805, + "step": 12648 + }, + { + "epoch": 8.747579529737205, + "grad_norm": 2.9875195026397705, + "learning_rate": 6.957891501459966e-06, + "log_odds_chosen": 10.417152404785156, + "log_odds_ratio": -0.0002167121128877625, + "logits/chosen": -0.4560806155204773, + "logits/rejected": -0.5182000994682312, + "logps/chosen": -0.00053954723989591, + "logps/rejected": -2.046635389328003, + "loss": 0.3537, + "nll_loss": 0.0883985161781311, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3954725444782525e-05, + "rewards/margins": 0.20460957288742065, + "rewards/rejected": -0.20466352999210358, + "step": 12649 + }, + { + "epoch": 8.748271092669434, + "grad_norm": 3.9878203868865967, + "learning_rate": 6.954049485169817e-06, + "log_odds_chosen": 11.966552734375, + "log_odds_ratio": -5.745194721384905e-05, + "logits/chosen": -0.5101600289344788, + "logits/rejected": -0.6078588962554932, + "logps/chosen": -0.00016526752733625472, + "logps/rejected": -3.0164387226104736, + "loss": 0.3714, + "nll_loss": 0.09284399449825287, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6526753825019114e-05, + "rewards/margins": 0.30162736773490906, + "rewards/rejected": -0.30164390802383423, + "step": 12650 + }, + { + "epoch": 8.748962655601659, + "grad_norm": 3.7651588916778564, + "learning_rate": 6.950207468879669e-06, + "log_odds_chosen": 11.419900894165039, + "log_odds_ratio": -2.6444653485668823e-05, + "logits/chosen": -0.3522256016731262, + "logits/rejected": -0.42689967155456543, + "logps/chosen": -0.00010521085641812533, + "logps/rejected": -2.185702323913574, + "loss": 0.2686, + "nll_loss": 0.06714515388011932, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0521085641812533e-05, + "rewards/margins": 0.21855969727039337, + "rewards/rejected": -0.21857021749019623, + "step": 12651 + }, + { + "epoch": 8.749654218533887, + "grad_norm": 2.433018922805786, + "learning_rate": 6.946365452589519e-06, + "log_odds_chosen": 10.421948432922363, + "log_odds_ratio": -0.00015964567137416452, + "logits/chosen": -0.5980747938156128, + "logits/rejected": -0.6149604320526123, + "logps/chosen": -0.0002093097718898207, + "logps/rejected": -1.6080046892166138, + "loss": 0.2819, + "nll_loss": 0.07044967263936996, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0930976461386308e-05, + "rewards/margins": 0.16077953577041626, + "rewards/rejected": -0.16080045700073242, + "step": 12652 + }, + { + "epoch": 8.750345781466113, + "grad_norm": 3.1320106983184814, + "learning_rate": 6.942523436299369e-06, + "log_odds_chosen": 10.990400314331055, + "log_odds_ratio": -0.00020457607752177864, + "logits/chosen": -0.2756420969963074, + "logits/rejected": -0.2904963195323944, + "logps/chosen": -0.00015380124386865646, + "logps/rejected": -1.4968993663787842, + "loss": 0.3821, + "nll_loss": 0.09550082683563232, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5380124750663526e-05, + "rewards/margins": 0.14967454969882965, + "rewards/rejected": -0.1496899425983429, + "step": 12653 + }, + { + "epoch": 8.751037344398341, + "grad_norm": 5.653360366821289, + "learning_rate": 6.938681420009222e-06, + "log_odds_chosen": 11.22561264038086, + "log_odds_ratio": -8.661947504151613e-05, + "logits/chosen": -0.484736829996109, + "logits/rejected": -0.5598161220550537, + "logps/chosen": -0.0004911787691526115, + "logps/rejected": -3.0218851566314697, + "loss": 0.9148, + "nll_loss": 0.22868333756923676, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9117879825644195e-05, + "rewards/margins": 0.30213940143585205, + "rewards/rejected": -0.302188515663147, + "step": 12654 + }, + { + "epoch": 8.751728907330566, + "grad_norm": 3.8202199935913086, + "learning_rate": 6.934839403719072e-06, + "log_odds_chosen": 11.486774444580078, + "log_odds_ratio": -0.0001702620356809348, + "logits/chosen": -0.17967629432678223, + "logits/rejected": -0.17935806512832642, + "logps/chosen": -0.00038216577377170324, + "logps/rejected": -2.837064266204834, + "loss": 0.3708, + "nll_loss": 0.0926813930273056, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8216578104766086e-05, + "rewards/margins": 0.2836682200431824, + "rewards/rejected": -0.2837064266204834, + "step": 12655 + }, + { + "epoch": 8.752420470262795, + "grad_norm": 3.8114304542541504, + "learning_rate": 6.930997387428922e-06, + "log_odds_chosen": 10.966930389404297, + "log_odds_ratio": -3.6104695027461275e-05, + "logits/chosen": -0.33328402042388916, + "logits/rejected": -0.40690383315086365, + "logps/chosen": -0.00014368303527589887, + "logps/rejected": -1.8688400983810425, + "loss": 0.4061, + "nll_loss": 0.1015223041176796, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4368303709488828e-05, + "rewards/margins": 0.18686963617801666, + "rewards/rejected": -0.18688401579856873, + "step": 12656 + }, + { + "epoch": 8.75311203319502, + "grad_norm": 3.9191980361938477, + "learning_rate": 6.927155371138774e-06, + "log_odds_chosen": 12.820165634155273, + "log_odds_ratio": -7.132274731702637e-06, + "logits/chosen": -0.6197792887687683, + "logits/rejected": -0.637600302696228, + "logps/chosen": -8.839755173539743e-05, + "logps/rejected": -3.3203587532043457, + "loss": 0.3296, + "nll_loss": 0.0824105441570282, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.839757356327027e-06, + "rewards/margins": 0.33202704787254333, + "rewards/rejected": -0.3320358693599701, + "step": 12657 + }, + { + "epoch": 8.753803596127248, + "grad_norm": 4.134014129638672, + "learning_rate": 6.923313354848625e-06, + "log_odds_chosen": 11.503602981567383, + "log_odds_ratio": -2.0928862795699388e-05, + "logits/chosen": -0.5688618421554565, + "logits/rejected": -0.6299799680709839, + "logps/chosen": -0.00018121409812010825, + "logps/rejected": -1.9686450958251953, + "loss": 0.3294, + "nll_loss": 0.08235260099172592, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8121409084415063e-05, + "rewards/margins": 0.1968463659286499, + "rewards/rejected": -0.196864515542984, + "step": 12658 + }, + { + "epoch": 8.754495159059474, + "grad_norm": 3.557575225830078, + "learning_rate": 6.9194713385584755e-06, + "log_odds_chosen": 11.22537612915039, + "log_odds_ratio": -3.842857404379174e-05, + "logits/chosen": -0.21988216042518616, + "logits/rejected": -0.36144596338272095, + "logps/chosen": -0.0005637517315335572, + "logps/rejected": -2.552690267562866, + "loss": 0.3812, + "nll_loss": 0.09529484063386917, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.637517460854724e-05, + "rewards/margins": 0.255212664604187, + "rewards/rejected": -0.25526905059814453, + "step": 12659 + }, + { + "epoch": 8.755186721991702, + "grad_norm": 3.9204976558685303, + "learning_rate": 6.915629322268327e-06, + "log_odds_chosen": 12.630598068237305, + "log_odds_ratio": -1.0285146345268004e-05, + "logits/chosen": -0.6050891280174255, + "logits/rejected": -0.6181715130805969, + "logps/chosen": -0.00013520887296181172, + "logps/rejected": -3.6140971183776855, + "loss": 0.3421, + "nll_loss": 0.08551633358001709, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3520888387574814e-05, + "rewards/margins": 0.3613961637020111, + "rewards/rejected": -0.3614096939563751, + "step": 12660 + }, + { + "epoch": 8.755878284923927, + "grad_norm": 2.353888750076294, + "learning_rate": 6.911787305978178e-06, + "log_odds_chosen": 8.957233428955078, + "log_odds_ratio": -0.0003664310206659138, + "logits/chosen": -0.26996999979019165, + "logits/rejected": -0.34410420060157776, + "logps/chosen": -0.000894219963811338, + "logps/rejected": -1.4619256258010864, + "loss": 0.2453, + "nll_loss": 0.0612957589328289, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.942200656747445e-05, + "rewards/margins": 0.1461031436920166, + "rewards/rejected": -0.14619258046150208, + "step": 12661 + }, + { + "epoch": 8.756569847856156, + "grad_norm": 4.985196590423584, + "learning_rate": 6.907945289688028e-06, + "log_odds_chosen": 11.134862899780273, + "log_odds_ratio": -4.339057340985164e-05, + "logits/chosen": -0.422914981842041, + "logits/rejected": -0.5093221068382263, + "logps/chosen": -0.0001525114494143054, + "logps/rejected": -2.2537786960601807, + "loss": 0.4558, + "nll_loss": 0.11395804584026337, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5251144759531599e-05, + "rewards/margins": 0.2253626137971878, + "rewards/rejected": -0.2253778576850891, + "step": 12662 + }, + { + "epoch": 8.75726141078838, + "grad_norm": 4.657219886779785, + "learning_rate": 6.90410327339788e-06, + "log_odds_chosen": 11.336843490600586, + "log_odds_ratio": -3.69651970686391e-05, + "logits/chosen": -0.07278013229370117, + "logits/rejected": -0.4056493639945984, + "logps/chosen": -0.00011815188190666959, + "logps/rejected": -1.9549498558044434, + "loss": 0.3547, + "nll_loss": 0.08867579698562622, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1815188372565899e-05, + "rewards/margins": 0.19548316299915314, + "rewards/rejected": -0.19549499452114105, + "step": 12663 + }, + { + "epoch": 8.75795297372061, + "grad_norm": 3.1503381729125977, + "learning_rate": 6.90026125710773e-06, + "log_odds_chosen": 11.625396728515625, + "log_odds_ratio": -2.158879578928463e-05, + "logits/chosen": -0.19551731646060944, + "logits/rejected": -0.27250638604164124, + "logps/chosen": -0.00017402239609509706, + "logps/rejected": -2.6546432971954346, + "loss": 0.2782, + "nll_loss": 0.06954455375671387, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7402238881913945e-05, + "rewards/margins": 0.2654469311237335, + "rewards/rejected": -0.26546433568000793, + "step": 12664 + }, + { + "epoch": 8.758644536652834, + "grad_norm": 6.812473297119141, + "learning_rate": 6.896419240817581e-06, + "log_odds_chosen": 12.126462936401367, + "log_odds_ratio": -2.7259564376436174e-05, + "logits/chosen": -0.6818879842758179, + "logits/rejected": -0.7184396982192993, + "logps/chosen": -0.00011000910308212042, + "logps/rejected": -2.533221960067749, + "loss": 0.3746, + "nll_loss": 0.09363803267478943, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1000910490110982e-05, + "rewards/margins": 0.2533112168312073, + "rewards/rejected": -0.25332218408584595, + "step": 12665 + }, + { + "epoch": 8.759336099585063, + "grad_norm": 3.495081901550293, + "learning_rate": 6.8925772245274324e-06, + "log_odds_chosen": 10.932785034179688, + "log_odds_ratio": -6.819087866460904e-05, + "logits/chosen": -0.6123518943786621, + "logits/rejected": -0.5373475551605225, + "logps/chosen": -0.0001256611431017518, + "logps/rejected": -2.137763738632202, + "loss": 0.337, + "nll_loss": 0.08423256874084473, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2566115401568823e-05, + "rewards/margins": 0.21376380324363708, + "rewards/rejected": -0.2137763649225235, + "step": 12666 + }, + { + "epoch": 8.760027662517288, + "grad_norm": 2.8406386375427246, + "learning_rate": 6.888735208237283e-06, + "log_odds_chosen": 11.421842575073242, + "log_odds_ratio": -2.9473754693754017e-05, + "logits/chosen": -0.24375933408737183, + "logits/rejected": -0.04970206320285797, + "logps/chosen": -0.00015173686551861465, + "logps/rejected": -2.3240654468536377, + "loss": 0.3222, + "nll_loss": 0.0805358737707138, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5173687643255107e-05, + "rewards/margins": 0.2323913723230362, + "rewards/rejected": -0.23240655660629272, + "step": 12667 + }, + { + "epoch": 8.760719225449517, + "grad_norm": 7.1928935050964355, + "learning_rate": 6.884893191947134e-06, + "log_odds_chosen": 11.925975799560547, + "log_odds_ratio": -1.749107650539372e-05, + "logits/chosen": -0.6639752984046936, + "logits/rejected": -0.6779666543006897, + "logps/chosen": -7.219088001875207e-05, + "logps/rejected": -2.313953399658203, + "loss": 0.3397, + "nll_loss": 0.08491578698158264, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.219087365228916e-06, + "rewards/margins": 0.23138810694217682, + "rewards/rejected": -0.23139533400535583, + "step": 12668 + }, + { + "epoch": 8.761410788381742, + "grad_norm": 5.088903427124023, + "learning_rate": 6.8810511756569855e-06, + "log_odds_chosen": 10.844564437866211, + "log_odds_ratio": -4.963607352692634e-05, + "logits/chosen": -0.06909926980733871, + "logits/rejected": -0.11633356660604477, + "logps/chosen": -0.00018518233264330775, + "logps/rejected": -2.1275336742401123, + "loss": 0.5095, + "nll_loss": 0.1273745894432068, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8518232536735013e-05, + "rewards/margins": 0.21273484826087952, + "rewards/rejected": -0.21275337040424347, + "step": 12669 + }, + { + "epoch": 8.76210235131397, + "grad_norm": 5.377123832702637, + "learning_rate": 6.877209159366836e-06, + "log_odds_chosen": 12.353730201721191, + "log_odds_ratio": -8.520941264578141e-06, + "logits/chosen": -0.5854908227920532, + "logits/rejected": -0.6783852577209473, + "logps/chosen": -8.955941302701831e-05, + "logps/rejected": -2.8780431747436523, + "loss": 0.5907, + "nll_loss": 0.14767661690711975, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.95594075700501e-06, + "rewards/margins": 0.28779539465904236, + "rewards/rejected": -0.28780436515808105, + "step": 12670 + }, + { + "epoch": 8.762793914246195, + "grad_norm": 2.7313125133514404, + "learning_rate": 6.873367143076687e-06, + "log_odds_chosen": 11.512551307678223, + "log_odds_ratio": -2.1834759536432102e-05, + "logits/chosen": -0.3850710988044739, + "logits/rejected": -0.48258426785469055, + "logps/chosen": -0.00015986453217919916, + "logps/rejected": -2.486776351928711, + "loss": 0.2892, + "nll_loss": 0.07229001820087433, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5986453945515677e-05, + "rewards/margins": 0.24866165220737457, + "rewards/rejected": -0.24867764115333557, + "step": 12671 + }, + { + "epoch": 8.763485477178424, + "grad_norm": 3.895965814590454, + "learning_rate": 6.869525126786539e-06, + "log_odds_chosen": 10.890059471130371, + "log_odds_ratio": -4.000085391453467e-05, + "logits/chosen": -0.42519626021385193, + "logits/rejected": -0.6218904256820679, + "logps/chosen": -0.0002847716968972236, + "logps/rejected": -2.296088218688965, + "loss": 0.379, + "nll_loss": 0.09474711120128632, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8477168598328717e-05, + "rewards/margins": 0.229580357670784, + "rewards/rejected": -0.22960881888866425, + "step": 12672 + }, + { + "epoch": 8.76417704011065, + "grad_norm": 3.6324892044067383, + "learning_rate": 6.865683110496389e-06, + "log_odds_chosen": 11.511186599731445, + "log_odds_ratio": -2.8699216272798367e-05, + "logits/chosen": -0.6761150360107422, + "logits/rejected": -0.6927566528320312, + "logps/chosen": -0.0001163781707873568, + "logps/rejected": -1.9792041778564453, + "loss": 0.3162, + "nll_loss": 0.07904962450265884, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1637816896836739e-05, + "rewards/margins": 0.19790878891944885, + "rewards/rejected": -0.19792042672634125, + "step": 12673 + }, + { + "epoch": 8.764868603042878, + "grad_norm": 3.701308488845825, + "learning_rate": 6.861841094206239e-06, + "log_odds_chosen": 11.138769149780273, + "log_odds_ratio": -0.00015294156037271023, + "logits/chosen": -0.4651656150817871, + "logits/rejected": -0.5149001479148865, + "logps/chosen": -0.00016836769646033645, + "logps/rejected": -2.1938812732696533, + "loss": 0.3896, + "nll_loss": 0.09738843142986298, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.683676782704424e-05, + "rewards/margins": 0.21937128901481628, + "rewards/rejected": -0.21938814222812653, + "step": 12674 + }, + { + "epoch": 8.765560165975103, + "grad_norm": 4.318880081176758, + "learning_rate": 6.857999077916092e-06, + "log_odds_chosen": 11.026379585266113, + "log_odds_ratio": -0.00032751416438259184, + "logits/chosen": -0.38952934741973877, + "logits/rejected": -0.44622597098350525, + "logps/chosen": -0.001070479047484696, + "logps/rejected": -2.7773513793945312, + "loss": 0.3938, + "nll_loss": 0.09841464459896088, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010704790474846959, + "rewards/margins": 0.277628093957901, + "rewards/rejected": -0.2777351438999176, + "step": 12675 + }, + { + "epoch": 8.766251728907331, + "grad_norm": 4.59893798828125, + "learning_rate": 6.854157061625942e-06, + "log_odds_chosen": 10.647390365600586, + "log_odds_ratio": -8.746745879761875e-05, + "logits/chosen": -0.607392430305481, + "logits/rejected": -0.6704075336456299, + "logps/chosen": -0.000778252724558115, + "logps/rejected": -2.5483808517456055, + "loss": 0.4577, + "nll_loss": 0.11440734565258026, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.78252724558115e-05, + "rewards/margins": 0.2547602653503418, + "rewards/rejected": -0.25483807921409607, + "step": 12676 + }, + { + "epoch": 8.766943291839558, + "grad_norm": 3.0734097957611084, + "learning_rate": 6.850315045335792e-06, + "log_odds_chosen": 11.056943893432617, + "log_odds_ratio": -2.5260575057473034e-05, + "logits/chosen": -0.47094714641571045, + "logits/rejected": -0.5174641013145447, + "logps/chosen": -0.00048814882757142186, + "logps/rejected": -2.6240057945251465, + "loss": 0.3099, + "nll_loss": 0.07747091352939606, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8814887122716755e-05, + "rewards/margins": 0.26235175132751465, + "rewards/rejected": -0.2624005675315857, + "step": 12677 + }, + { + "epoch": 8.767634854771785, + "grad_norm": 3.880401849746704, + "learning_rate": 6.846473029045644e-06, + "log_odds_chosen": 11.165346145629883, + "log_odds_ratio": -3.870287036988884e-05, + "logits/chosen": -0.4656296372413635, + "logits/rejected": -0.5163675546646118, + "logps/chosen": -0.0003671708982437849, + "logps/rejected": -2.0970325469970703, + "loss": 0.4177, + "nll_loss": 0.10441119223833084, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.671709418995306e-05, + "rewards/margins": 0.20966653525829315, + "rewards/rejected": -0.2097032368183136, + "step": 12678 + }, + { + "epoch": 8.768326417704012, + "grad_norm": 4.935536861419678, + "learning_rate": 6.842631012755495e-06, + "log_odds_chosen": 11.351346969604492, + "log_odds_ratio": -9.1105917817913e-05, + "logits/chosen": 0.022925205528736115, + "logits/rejected": -0.13815544545650482, + "logps/chosen": -0.00024218103499151766, + "logps/rejected": -2.6840310096740723, + "loss": 0.525, + "nll_loss": 0.1312437355518341, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4218103135353886e-05, + "rewards/margins": 0.2683789134025574, + "rewards/rejected": -0.2684031128883362, + "step": 12679 + }, + { + "epoch": 8.769017980636239, + "grad_norm": 3.416334867477417, + "learning_rate": 6.8387889964653455e-06, + "log_odds_chosen": 11.296628952026367, + "log_odds_ratio": -0.0004493595624808222, + "logits/chosen": -0.4751221239566803, + "logits/rejected": -0.5552091598510742, + "logps/chosen": -0.00039027677848935127, + "logps/rejected": -2.32190203666687, + "loss": 0.5385, + "nll_loss": 0.1345841884613037, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.902768366970122e-05, + "rewards/margins": 0.23215121030807495, + "rewards/rejected": -0.23219022154808044, + "step": 12680 + }, + { + "epoch": 8.769709543568466, + "grad_norm": 4.126625061035156, + "learning_rate": 6.834946980175195e-06, + "log_odds_chosen": 11.260299682617188, + "log_odds_ratio": -6.328269228106365e-05, + "logits/chosen": -0.07310329377651215, + "logits/rejected": 0.08745068311691284, + "logps/chosen": -0.00016332468658220023, + "logps/rejected": -2.020930051803589, + "loss": 0.3832, + "nll_loss": 0.09578186273574829, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.633246756682638e-05, + "rewards/margins": 0.20207667350769043, + "rewards/rejected": -0.2020930051803589, + "step": 12681 + }, + { + "epoch": 8.770401106500692, + "grad_norm": 5.452424049377441, + "learning_rate": 6.831104963885048e-06, + "log_odds_chosen": 12.215993881225586, + "log_odds_ratio": -1.5033554518595338e-05, + "logits/chosen": -0.1208108440041542, + "logits/rejected": -0.15237045288085938, + "logps/chosen": -0.0002600625157356262, + "logps/rejected": -3.15916109085083, + "loss": 0.6392, + "nll_loss": 0.15979792177677155, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6006251573562622e-05, + "rewards/margins": 0.3158901035785675, + "rewards/rejected": -0.31591612100601196, + "step": 12682 + }, + { + "epoch": 8.77109266943292, + "grad_norm": 5.0705342292785645, + "learning_rate": 6.827262947594898e-06, + "log_odds_chosen": 10.764846801757812, + "log_odds_ratio": -0.00030922293080948293, + "logits/chosen": -0.9068706035614014, + "logits/rejected": -0.8894181251525879, + "logps/chosen": -0.00034139491617679596, + "logps/rejected": -2.2270474433898926, + "loss": 0.4354, + "nll_loss": 0.1088085025548935, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.41394952556584e-05, + "rewards/margins": 0.22267059981822968, + "rewards/rejected": -0.22270473837852478, + "step": 12683 + }, + { + "epoch": 8.771784232365146, + "grad_norm": 3.2080435752868652, + "learning_rate": 6.8234209313047485e-06, + "log_odds_chosen": 11.981725692749023, + "log_odds_ratio": -1.5340634490712546e-05, + "logits/chosen": -0.30534428358078003, + "logits/rejected": -0.4273209869861603, + "logps/chosen": -0.0001579856761964038, + "logps/rejected": -2.5716822147369385, + "loss": 0.3241, + "nll_loss": 0.08102867007255554, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.579856689204462e-05, + "rewards/margins": 0.25715240836143494, + "rewards/rejected": -0.2571682035923004, + "step": 12684 + }, + { + "epoch": 8.772475795297373, + "grad_norm": 2.746500015258789, + "learning_rate": 6.8195789150146e-06, + "log_odds_chosen": 11.367488861083984, + "log_odds_ratio": -4.743778481497429e-05, + "logits/chosen": -0.10710550844669342, + "logits/rejected": -0.1570693403482437, + "logps/chosen": -0.0003469569201115519, + "logps/rejected": -2.4717087745666504, + "loss": 0.2992, + "nll_loss": 0.07480612397193909, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.469569128355943e-05, + "rewards/margins": 0.2471361756324768, + "rewards/rejected": -0.24717086553573608, + "step": 12685 + }, + { + "epoch": 8.7731673582296, + "grad_norm": 3.9325008392333984, + "learning_rate": 6.815736898724451e-06, + "log_odds_chosen": 11.478471755981445, + "log_odds_ratio": -4.558680666377768e-05, + "logits/chosen": -0.07658401131629944, + "logits/rejected": -0.29487428069114685, + "logps/chosen": -0.00018664757953956723, + "logps/rejected": -2.660466194152832, + "loss": 0.456, + "nll_loss": 0.11400678753852844, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8664759409148246e-05, + "rewards/margins": 0.26602795720100403, + "rewards/rejected": -0.2660466432571411, + "step": 12686 + }, + { + "epoch": 8.773858921161827, + "grad_norm": 4.728855609893799, + "learning_rate": 6.8118948824343016e-06, + "log_odds_chosen": 11.943270683288574, + "log_odds_ratio": -2.3963817511685193e-05, + "logits/chosen": 0.271952748298645, + "logits/rejected": 0.23709207773208618, + "logps/chosen": -0.00030409041210077703, + "logps/rejected": -3.301213264465332, + "loss": 0.5569, + "nll_loss": 0.13921433687210083, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0409040846279822e-05, + "rewards/margins": 0.3300909101963043, + "rewards/rejected": -0.33012130856513977, + "step": 12687 + }, + { + "epoch": 8.774550484094053, + "grad_norm": 4.821633815765381, + "learning_rate": 6.808052866144153e-06, + "log_odds_chosen": 10.78215503692627, + "log_odds_ratio": -0.00015306704153772444, + "logits/chosen": -0.5289968252182007, + "logits/rejected": -0.5817938446998596, + "logps/chosen": -0.00021679667406715453, + "logps/rejected": -1.9820137023925781, + "loss": 0.3593, + "nll_loss": 0.08979903161525726, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1679668861906976e-05, + "rewards/margins": 0.198179692029953, + "rewards/rejected": -0.19820138812065125, + "step": 12688 + }, + { + "epoch": 8.77524204702628, + "grad_norm": 4.591336727142334, + "learning_rate": 6.804210849854004e-06, + "log_odds_chosen": 11.119473457336426, + "log_odds_ratio": -3.22980122291483e-05, + "logits/chosen": -0.4589541554450989, + "logits/rejected": -0.4797210097312927, + "logps/chosen": -5.7230747188441455e-05, + "logps/rejected": -1.5906126499176025, + "loss": 0.433, + "nll_loss": 0.10825856029987335, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.723074991692556e-06, + "rewards/margins": 0.15905553102493286, + "rewards/rejected": -0.1590612530708313, + "step": 12689 + }, + { + "epoch": 8.775933609958507, + "grad_norm": 2.532083511352539, + "learning_rate": 6.800368833563854e-06, + "log_odds_chosen": 10.686437606811523, + "log_odds_ratio": -0.00012513159890659153, + "logits/chosen": -0.4632389545440674, + "logits/rejected": -0.608898401260376, + "logps/chosen": -0.0006360713741742074, + "logps/rejected": -1.9530318975448608, + "loss": 0.3341, + "nll_loss": 0.08351817727088928, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.360714178299531e-05, + "rewards/margins": 0.1952395737171173, + "rewards/rejected": -0.19530320167541504, + "step": 12690 + }, + { + "epoch": 8.776625172890734, + "grad_norm": 4.333111763000488, + "learning_rate": 6.796526817273706e-06, + "log_odds_chosen": 10.954401969909668, + "log_odds_ratio": -5.6871689594117925e-05, + "logits/chosen": -0.2808159589767456, + "logits/rejected": -0.30360111594200134, + "logps/chosen": -0.0002845522831194103, + "logps/rejected": -2.3893260955810547, + "loss": 0.4027, + "nll_loss": 0.10066375136375427, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.845522976713255e-05, + "rewards/margins": 0.2389041632413864, + "rewards/rejected": -0.23893260955810547, + "step": 12691 + }, + { + "epoch": 8.77731673582296, + "grad_norm": 2.768235445022583, + "learning_rate": 6.792684800983556e-06, + "log_odds_chosen": 11.034823417663574, + "log_odds_ratio": -2.2319993149721995e-05, + "logits/chosen": -0.24669495224952698, + "logits/rejected": -0.46884268522262573, + "logps/chosen": -0.00020692654652521014, + "logps/rejected": -2.3289706707000732, + "loss": 0.3428, + "nll_loss": 0.08570310473442078, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0692654288723134e-05, + "rewards/margins": 0.23287639021873474, + "rewards/rejected": -0.232897087931633, + "step": 12692 + }, + { + "epoch": 8.778008298755188, + "grad_norm": 2.6725335121154785, + "learning_rate": 6.788842784693407e-06, + "log_odds_chosen": 10.67829418182373, + "log_odds_ratio": -0.00024256770848296583, + "logits/chosen": -0.3352501690387726, + "logits/rejected": -0.44572025537490845, + "logps/chosen": -0.0055665490217506886, + "logps/rejected": -2.264678955078125, + "loss": 0.3241, + "nll_loss": 0.0809900239109993, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005566548788920045, + "rewards/margins": 0.2259112298488617, + "rewards/rejected": -0.22646789252758026, + "step": 12693 + }, + { + "epoch": 8.778699861687414, + "grad_norm": 3.7822775840759277, + "learning_rate": 6.7850007684032585e-06, + "log_odds_chosen": 11.56524658203125, + "log_odds_ratio": -1.3682654753210954e-05, + "logits/chosen": -0.3407299816608429, + "logits/rejected": -0.44203171133995056, + "logps/chosen": -0.00014287997328210622, + "logps/rejected": -2.7055978775024414, + "loss": 0.5001, + "nll_loss": 0.12502838671207428, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4287997146311682e-05, + "rewards/margins": 0.27054551243782043, + "rewards/rejected": -0.27055978775024414, + "step": 12694 + }, + { + "epoch": 8.779391424619641, + "grad_norm": 3.6747331619262695, + "learning_rate": 6.781158752113109e-06, + "log_odds_chosen": 10.785626411437988, + "log_odds_ratio": -0.00014876978821121156, + "logits/chosen": -0.5394725799560547, + "logits/rejected": -0.572901725769043, + "logps/chosen": -6.740719254594296e-05, + "logps/rejected": -1.3617208003997803, + "loss": 0.418, + "nll_loss": 0.10448471456766129, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.740719072695356e-06, + "rewards/margins": 0.1361653357744217, + "rewards/rejected": -0.1361720860004425, + "step": 12695 + }, + { + "epoch": 8.780082987551868, + "grad_norm": 2.931102752685547, + "learning_rate": 6.77731673582296e-06, + "log_odds_chosen": 10.604806900024414, + "log_odds_ratio": -3.93532682210207e-05, + "logits/chosen": -0.4118063449859619, + "logits/rejected": -0.4413941502571106, + "logps/chosen": -0.00024991604732349515, + "logps/rejected": -1.7044813632965088, + "loss": 0.299, + "nll_loss": 0.0747336596250534, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4991604732349515e-05, + "rewards/margins": 0.17042315006256104, + "rewards/rejected": -0.17044813930988312, + "step": 12696 + }, + { + "epoch": 8.780774550484095, + "grad_norm": 3.015228509902954, + "learning_rate": 6.773474719532812e-06, + "log_odds_chosen": 11.405709266662598, + "log_odds_ratio": -4.749331856146455e-05, + "logits/chosen": -0.34732571244239807, + "logits/rejected": -0.3698383867740631, + "logps/chosen": -0.00031884806230664253, + "logps/rejected": -2.771390438079834, + "loss": 0.4244, + "nll_loss": 0.10610456019639969, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1884806958260015e-05, + "rewards/margins": 0.2771071791648865, + "rewards/rejected": -0.2771390378475189, + "step": 12697 + }, + { + "epoch": 8.781466113416322, + "grad_norm": 3.2666025161743164, + "learning_rate": 6.769632703242662e-06, + "log_odds_chosen": 10.806560516357422, + "log_odds_ratio": -4.139018710702658e-05, + "logits/chosen": -0.33733034133911133, + "logits/rejected": -0.3503572344779968, + "logps/chosen": -0.00016613573825452477, + "logps/rejected": -2.0087902545928955, + "loss": 0.5032, + "nll_loss": 0.1257835030555725, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6613574189250357e-05, + "rewards/margins": 0.20086240768432617, + "rewards/rejected": -0.2008790373802185, + "step": 12698 + }, + { + "epoch": 8.782157676348548, + "grad_norm": 5.432217121124268, + "learning_rate": 6.765790686952512e-06, + "log_odds_chosen": 10.60544204711914, + "log_odds_ratio": -7.107143756002188e-05, + "logits/chosen": -0.37671369314193726, + "logits/rejected": -0.36169812083244324, + "logps/chosen": -0.0004219827533233911, + "logps/rejected": -2.247098445892334, + "loss": 0.305, + "nll_loss": 0.07624978572130203, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.219827678753063e-05, + "rewards/margins": 0.22466763854026794, + "rewards/rejected": -0.22470983862876892, + "step": 12699 + }, + { + "epoch": 8.782849239280775, + "grad_norm": 3.230746269226074, + "learning_rate": 6.761948670662365e-06, + "log_odds_chosen": 11.212976455688477, + "log_odds_ratio": -2.6032004825538024e-05, + "logits/chosen": -0.11250437796115875, + "logits/rejected": -0.10483334958553314, + "logps/chosen": -0.00019747507758438587, + "logps/rejected": -1.8300416469573975, + "loss": 0.2308, + "nll_loss": 0.05768878012895584, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9747507394640706e-05, + "rewards/margins": 0.18298442661762238, + "rewards/rejected": -0.18300415575504303, + "step": 12700 + }, + { + "epoch": 8.783540802213002, + "grad_norm": 2.508547782897949, + "learning_rate": 6.758106654372215e-06, + "log_odds_chosen": 9.457145690917969, + "log_odds_ratio": -0.0013111267471686006, + "logits/chosen": -0.25359445810317993, + "logits/rejected": -0.09427622705698013, + "logps/chosen": -0.0009612166322767735, + "logps/rejected": -1.4299101829528809, + "loss": 0.3206, + "nll_loss": 0.08000911772251129, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.612165740691125e-05, + "rewards/margins": 0.1428948938846588, + "rewards/rejected": -0.14299100637435913, + "step": 12701 + }, + { + "epoch": 8.784232365145229, + "grad_norm": 4.50275993347168, + "learning_rate": 6.754264638082065e-06, + "log_odds_chosen": 11.06375789642334, + "log_odds_ratio": -3.1684601708548144e-05, + "logits/chosen": -0.05529871582984924, + "logits/rejected": -0.04770771414041519, + "logps/chosen": -0.00018408260075375438, + "logps/rejected": -2.2451171875, + "loss": 0.488, + "nll_loss": 0.12198619544506073, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8408260075375438e-05, + "rewards/margins": 0.22449329495429993, + "rewards/rejected": -0.22451171278953552, + "step": 12702 + }, + { + "epoch": 8.784923928077456, + "grad_norm": 3.462139844894409, + "learning_rate": 6.750422621791917e-06, + "log_odds_chosen": 11.399433135986328, + "log_odds_ratio": -1.8106655261362903e-05, + "logits/chosen": -0.27013203501701355, + "logits/rejected": -0.31153810024261475, + "logps/chosen": -0.00017975937225855887, + "logps/rejected": -2.3728885650634766, + "loss": 0.4365, + "nll_loss": 0.10911567509174347, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.797593904484529e-05, + "rewards/margins": 0.23727090656757355, + "rewards/rejected": -0.23728886246681213, + "step": 12703 + }, + { + "epoch": 8.785615491009683, + "grad_norm": 3.590181589126587, + "learning_rate": 6.746580605501768e-06, + "log_odds_chosen": 12.149531364440918, + "log_odds_ratio": -1.190086368296761e-05, + "logits/chosen": -0.37782835960388184, + "logits/rejected": -0.43353283405303955, + "logps/chosen": -7.747412018943578e-05, + "logps/rejected": -2.699915885925293, + "loss": 0.3816, + "nll_loss": 0.09540417790412903, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.747412382741459e-06, + "rewards/margins": 0.26998385787010193, + "rewards/rejected": -0.26999160647392273, + "step": 12704 + }, + { + "epoch": 8.78630705394191, + "grad_norm": 2.9558005332946777, + "learning_rate": 6.7427385892116184e-06, + "log_odds_chosen": 12.330540657043457, + "log_odds_ratio": -7.616674338351004e-06, + "logits/chosen": -0.32497966289520264, + "logits/rejected": -0.3744075894355774, + "logps/chosen": -0.00010802918404806405, + "logps/rejected": -3.1242334842681885, + "loss": 0.4138, + "nll_loss": 0.10344050079584122, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0802918950503226e-05, + "rewards/margins": 0.3124125301837921, + "rewards/rejected": -0.31242334842681885, + "step": 12705 + }, + { + "epoch": 8.786998616874136, + "grad_norm": 3.709592580795288, + "learning_rate": 6.73889657292147e-06, + "log_odds_chosen": 9.82625961303711, + "log_odds_ratio": -0.0006991361733525991, + "logits/chosen": -0.666206955909729, + "logits/rejected": -0.7812671065330505, + "logps/chosen": -0.0008450027671642601, + "logps/rejected": -1.7990524768829346, + "loss": 0.3182, + "nll_loss": 0.0794852152466774, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.450027962680906e-05, + "rewards/margins": 0.17982074618339539, + "rewards/rejected": -0.1799052357673645, + "step": 12706 + }, + { + "epoch": 8.787690179806363, + "grad_norm": 3.0048162937164307, + "learning_rate": 6.735054556631321e-06, + "log_odds_chosen": 11.722905158996582, + "log_odds_ratio": -0.00030517796403728426, + "logits/chosen": -0.16083022952079773, + "logits/rejected": -0.28174495697021484, + "logps/chosen": -0.0002863854169845581, + "logps/rejected": -2.798520803451538, + "loss": 0.4008, + "nll_loss": 0.1001795083284378, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8638543881243095e-05, + "rewards/margins": 0.2798234224319458, + "rewards/rejected": -0.2798520624637604, + "step": 12707 + }, + { + "epoch": 8.78838174273859, + "grad_norm": 4.8236403465271, + "learning_rate": 6.731212540341171e-06, + "log_odds_chosen": 11.70523452758789, + "log_odds_ratio": -3.543759521562606e-05, + "logits/chosen": -0.1501210629940033, + "logits/rejected": -0.18344929814338684, + "logps/chosen": -0.0006368308095261455, + "logps/rejected": -3.0577917098999023, + "loss": 0.4492, + "nll_loss": 0.11230667680501938, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.368308095261455e-05, + "rewards/margins": 0.30571550130844116, + "rewards/rejected": -0.3057791590690613, + "step": 12708 + }, + { + "epoch": 8.789073305670817, + "grad_norm": 5.425579071044922, + "learning_rate": 6.727370524051023e-06, + "log_odds_chosen": 11.123497009277344, + "log_odds_ratio": -2.579005558800418e-05, + "logits/chosen": -0.5647867918014526, + "logits/rejected": -0.6334821581840515, + "logps/chosen": -0.00023311632685363293, + "logps/rejected": -2.4519762992858887, + "loss": 0.6806, + "nll_loss": 0.17014306783676147, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3311633412959054e-05, + "rewards/margins": 0.2451743185520172, + "rewards/rejected": -0.2451976239681244, + "step": 12709 + }, + { + "epoch": 8.789764868603044, + "grad_norm": 2.829416513442993, + "learning_rate": 6.723528507760873e-06, + "log_odds_chosen": 12.006536483764648, + "log_odds_ratio": -7.780406122037675e-06, + "logits/chosen": -0.2859390377998352, + "logits/rejected": -0.3590567111968994, + "logps/chosen": -7.057748734951019e-05, + "logps/rejected": -2.3626785278320312, + "loss": 0.3268, + "nll_loss": 0.08168688416481018, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.057748007355258e-06, + "rewards/margins": 0.2362608164548874, + "rewards/rejected": -0.23626789450645447, + "step": 12710 + }, + { + "epoch": 8.79045643153527, + "grad_norm": 2.6061692237854004, + "learning_rate": 6.719686491470724e-06, + "log_odds_chosen": 11.23437213897705, + "log_odds_ratio": -5.0204274884890765e-05, + "logits/chosen": -0.7914714813232422, + "logits/rejected": -0.7617835998535156, + "logps/chosen": -8.995120879262686e-05, + "logps/rejected": -2.085355520248413, + "loss": 0.3244, + "nll_loss": 0.08109920471906662, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.995120879262686e-06, + "rewards/margins": 0.20852655172348022, + "rewards/rejected": -0.2085355520248413, + "step": 12711 + }, + { + "epoch": 8.791147994467497, + "grad_norm": 6.227917671203613, + "learning_rate": 6.715844475180575e-06, + "log_odds_chosen": 10.377006530761719, + "log_odds_ratio": -0.00018965032359119505, + "logits/chosen": -0.47637438774108887, + "logits/rejected": -0.36658811569213867, + "logps/chosen": -0.00047040573554113507, + "logps/rejected": -2.130073070526123, + "loss": 0.3822, + "nll_loss": 0.09552697837352753, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.704057573690079e-05, + "rewards/margins": 0.21296028792858124, + "rewards/rejected": -0.21300733089447021, + "step": 12712 + }, + { + "epoch": 8.791839557399724, + "grad_norm": 4.331904411315918, + "learning_rate": 6.712002458890426e-06, + "log_odds_chosen": 11.185213088989258, + "log_odds_ratio": -6.156775634735823e-05, + "logits/chosen": -0.012590788304805756, + "logits/rejected": -0.12466521561145782, + "logps/chosen": -0.0002501815906725824, + "logps/rejected": -2.258512258529663, + "loss": 0.4147, + "nll_loss": 0.10368071496486664, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.501816015865188e-05, + "rewards/margins": 0.2258262038230896, + "rewards/rejected": -0.22585120797157288, + "step": 12713 + }, + { + "epoch": 8.792531120331951, + "grad_norm": 3.270273447036743, + "learning_rate": 6.708160442600277e-06, + "log_odds_chosen": 11.703085899353027, + "log_odds_ratio": -1.3806956303596962e-05, + "logits/chosen": -0.054852619767189026, + "logits/rejected": -0.10522682964801788, + "logps/chosen": -8.263815107056871e-05, + "logps/rejected": -2.3079423904418945, + "loss": 0.3706, + "nll_loss": 0.09264373034238815, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.26381437946111e-06, + "rewards/margins": 0.23078598082065582, + "rewards/rejected": -0.2307942509651184, + "step": 12714 + }, + { + "epoch": 8.793222683264178, + "grad_norm": 3.0255978107452393, + "learning_rate": 6.704318426310127e-06, + "log_odds_chosen": 11.447072982788086, + "log_odds_ratio": -2.2637163056060672e-05, + "logits/chosen": -0.2060781866312027, + "logits/rejected": -0.2843821048736572, + "logps/chosen": -0.0001971060410141945, + "logps/rejected": -2.666531562805176, + "loss": 0.2886, + "nll_loss": 0.07215721905231476, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9710603737621568e-05, + "rewards/margins": 0.26663345098495483, + "rewards/rejected": -0.2666531503200531, + "step": 12715 + }, + { + "epoch": 8.793914246196405, + "grad_norm": 3.2624900341033936, + "learning_rate": 6.700476410019979e-06, + "log_odds_chosen": 11.005241394042969, + "log_odds_ratio": -4.519320646068081e-05, + "logits/chosen": 0.07528844475746155, + "logits/rejected": 0.016680315136909485, + "logps/chosen": -0.00014727868256159127, + "logps/rejected": -1.9385497570037842, + "loss": 0.323, + "nll_loss": 0.08074452728033066, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.472786971135065e-05, + "rewards/margins": 0.19384025037288666, + "rewards/rejected": -0.19385498762130737, + "step": 12716 + }, + { + "epoch": 8.794605809128631, + "grad_norm": 6.010182857513428, + "learning_rate": 6.69663439372983e-06, + "log_odds_chosen": 10.442169189453125, + "log_odds_ratio": -7.135375926736742e-05, + "logits/chosen": 0.12811151146888733, + "logits/rejected": 0.1726951003074646, + "logps/chosen": -0.001846662606112659, + "logps/rejected": -2.3940958976745605, + "loss": 0.6773, + "nll_loss": 0.1693248599767685, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018466624896973372, + "rewards/margins": 0.23922495543956757, + "rewards/rejected": -0.23940961062908173, + "step": 12717 + }, + { + "epoch": 8.795297372060858, + "grad_norm": 3.396695852279663, + "learning_rate": 6.69279237743968e-06, + "log_odds_chosen": 10.815861701965332, + "log_odds_ratio": -0.00030398424132727087, + "logits/chosen": -0.3280583620071411, + "logits/rejected": -0.3126670718193054, + "logps/chosen": -0.0002372421440668404, + "logps/rejected": -2.193223237991333, + "loss": 0.3949, + "nll_loss": 0.09870284795761108, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3724214770481922e-05, + "rewards/margins": 0.2192986160516739, + "rewards/rejected": -0.2193223387002945, + "step": 12718 + }, + { + "epoch": 8.795988934993085, + "grad_norm": 3.7363529205322266, + "learning_rate": 6.688950361149532e-06, + "log_odds_chosen": 10.347034454345703, + "log_odds_ratio": -0.00011987592733930796, + "logits/chosen": -0.3330230116844177, + "logits/rejected": -0.39492470026016235, + "logps/chosen": -0.0006288375006988645, + "logps/rejected": -2.307096481323242, + "loss": 0.3899, + "nll_loss": 0.09745712578296661, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.288375152507797e-05, + "rewards/margins": 0.2306467741727829, + "rewards/rejected": -0.23070964217185974, + "step": 12719 + }, + { + "epoch": 8.796680497925312, + "grad_norm": 4.139132022857666, + "learning_rate": 6.685108344859382e-06, + "log_odds_chosen": 12.102994918823242, + "log_odds_ratio": -5.405969568528235e-05, + "logits/chosen": -0.44276031851768494, + "logits/rejected": -0.5707353353500366, + "logps/chosen": -0.00011724488285835832, + "logps/rejected": -2.9865834712982178, + "loss": 0.2988, + "nll_loss": 0.07468465715646744, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1724488103936892e-05, + "rewards/margins": 0.29864659905433655, + "rewards/rejected": -0.2986583113670349, + "step": 12720 + }, + { + "epoch": 8.797372060857539, + "grad_norm": 3.834527015686035, + "learning_rate": 6.681266328569233e-06, + "log_odds_chosen": 11.772808074951172, + "log_odds_ratio": -1.9629087546491064e-05, + "logits/chosen": -0.4522663354873657, + "logits/rejected": -0.4562579393386841, + "logps/chosen": -0.00013129066792316735, + "logps/rejected": -2.5432262420654297, + "loss": 0.3911, + "nll_loss": 0.09776200354099274, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3129067156114615e-05, + "rewards/margins": 0.2543094754219055, + "rewards/rejected": -0.2543226182460785, + "step": 12721 + }, + { + "epoch": 8.798063623789766, + "grad_norm": 4.2890238761901855, + "learning_rate": 6.6774243122790845e-06, + "log_odds_chosen": 11.038679122924805, + "log_odds_ratio": -0.00011169728531967849, + "logits/chosen": -0.16470174491405487, + "logits/rejected": -0.12332076579332352, + "logps/chosen": -0.00019009933748748153, + "logps/rejected": -2.4229607582092285, + "loss": 0.4674, + "nll_loss": 0.11683283746242523, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9009934476343915e-05, + "rewards/margins": 0.24227705597877502, + "rewards/rejected": -0.24229606986045837, + "step": 12722 + }, + { + "epoch": 8.798755186721992, + "grad_norm": 3.0468249320983887, + "learning_rate": 6.673582295988935e-06, + "log_odds_chosen": 9.727727890014648, + "log_odds_ratio": -0.00012284204422030598, + "logits/chosen": -0.4163380563259125, + "logits/rejected": -0.4225500822067261, + "logps/chosen": -0.0009680213988758624, + "logps/rejected": -2.3284566402435303, + "loss": 0.3026, + "nll_loss": 0.07564578950405121, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.680214861873537e-05, + "rewards/margins": 0.2327488511800766, + "rewards/rejected": -0.23284566402435303, + "step": 12723 + }, + { + "epoch": 8.79944674965422, + "grad_norm": 4.225685119628906, + "learning_rate": 6.669740279698786e-06, + "log_odds_chosen": 11.941052436828613, + "log_odds_ratio": -3.5983946872875094e-05, + "logits/chosen": -0.12287880480289459, + "logits/rejected": -0.22012335062026978, + "logps/chosen": -0.0004759762668982148, + "logps/rejected": -3.008145570755005, + "loss": 0.3765, + "nll_loss": 0.09410983324050903, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7597633965779096e-05, + "rewards/margins": 0.3007669746875763, + "rewards/rejected": -0.30081456899642944, + "step": 12724 + }, + { + "epoch": 8.800138312586446, + "grad_norm": 3.547755479812622, + "learning_rate": 6.665898263408638e-06, + "log_odds_chosen": 10.471291542053223, + "log_odds_ratio": -0.00019993323076050729, + "logits/chosen": -0.3355550169944763, + "logits/rejected": -0.39193442463874817, + "logps/chosen": -0.00028798278071917593, + "logps/rejected": -1.8607462644577026, + "loss": 0.3266, + "nll_loss": 0.08162867277860641, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8798280254704878e-05, + "rewards/margins": 0.1860458254814148, + "rewards/rejected": -0.1860746443271637, + "step": 12725 + }, + { + "epoch": 8.800829875518673, + "grad_norm": 3.7131428718566895, + "learning_rate": 6.662056247118488e-06, + "log_odds_chosen": 11.517807006835938, + "log_odds_ratio": -2.6031164452433586e-05, + "logits/chosen": -0.22525209188461304, + "logits/rejected": -0.2523791790008545, + "logps/chosen": -0.00043190247379243374, + "logps/rejected": -3.1265716552734375, + "loss": 0.4177, + "nll_loss": 0.1044142097234726, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3190248106839135e-05, + "rewards/margins": 0.31261399388313293, + "rewards/rejected": -0.3126571774482727, + "step": 12726 + }, + { + "epoch": 8.8015214384509, + "grad_norm": 3.570354700088501, + "learning_rate": 6.658214230828338e-06, + "log_odds_chosen": 10.523756980895996, + "log_odds_ratio": -0.00022319788695313036, + "logits/chosen": -0.08164535462856293, + "logits/rejected": -0.4064290523529053, + "logps/chosen": -0.0003809003974311054, + "logps/rejected": -1.9425835609436035, + "loss": 0.398, + "nll_loss": 0.09946616739034653, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.809004192589782e-05, + "rewards/margins": 0.19422025978565216, + "rewards/rejected": -0.19425833225250244, + "step": 12727 + }, + { + "epoch": 8.802213001383127, + "grad_norm": 4.042540073394775, + "learning_rate": 6.654372214538191e-06, + "log_odds_chosen": 12.080122947692871, + "log_odds_ratio": -1.2566923942358699e-05, + "logits/chosen": -0.351513534784317, + "logits/rejected": -0.5006579756736755, + "logps/chosen": -0.00039561832090839744, + "logps/rejected": -3.51840877532959, + "loss": 0.4536, + "nll_loss": 0.11340173333883286, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9561837184010074e-05, + "rewards/margins": 0.3518013060092926, + "rewards/rejected": -0.3518408536911011, + "step": 12728 + }, + { + "epoch": 8.802904564315353, + "grad_norm": 4.987198829650879, + "learning_rate": 6.650530198248041e-06, + "log_odds_chosen": 11.424623489379883, + "log_odds_ratio": -5.1819573855027556e-05, + "logits/chosen": -0.5144933462142944, + "logits/rejected": -0.5797327756881714, + "logps/chosen": -0.0001781594182830304, + "logps/rejected": -2.4864256381988525, + "loss": 0.5341, + "nll_loss": 0.1335185170173645, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.78159425558988e-05, + "rewards/margins": 0.2486247718334198, + "rewards/rejected": -0.24864259362220764, + "step": 12729 + }, + { + "epoch": 8.80359612724758, + "grad_norm": 4.397002220153809, + "learning_rate": 6.646688181957891e-06, + "log_odds_chosen": 10.954423904418945, + "log_odds_ratio": -0.00013551233860198408, + "logits/chosen": 0.3836933672428131, + "logits/rejected": 0.2545219659805298, + "logps/chosen": -0.0003029437211807817, + "logps/rejected": -2.4139468669891357, + "loss": 0.6459, + "nll_loss": 0.16146302223205566, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0294373573269695e-05, + "rewards/margins": 0.24136443436145782, + "rewards/rejected": -0.24139472842216492, + "step": 12730 + }, + { + "epoch": 8.804287690179807, + "grad_norm": 3.042309284210205, + "learning_rate": 6.642846165667743e-06, + "log_odds_chosen": 11.329593658447266, + "log_odds_ratio": -2.613785363791976e-05, + "logits/chosen": -0.47953522205352783, + "logits/rejected": -0.503200888633728, + "logps/chosen": -0.00014091703633312136, + "logps/rejected": -2.2789580821990967, + "loss": 0.3789, + "nll_loss": 0.0947137251496315, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4091704542806838e-05, + "rewards/margins": 0.22788169980049133, + "rewards/rejected": -0.2278957962989807, + "step": 12731 + }, + { + "epoch": 8.804979253112034, + "grad_norm": 3.1978232860565186, + "learning_rate": 6.639004149377594e-06, + "log_odds_chosen": 11.014985084533691, + "log_odds_ratio": -0.0003254017501603812, + "logits/chosen": -0.42042186856269836, + "logits/rejected": -0.44931113719940186, + "logps/chosen": -0.000167686928762123, + "logps/rejected": -2.00655460357666, + "loss": 0.3797, + "nll_loss": 0.09488354623317719, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6768692148616537e-05, + "rewards/margins": 0.20063868165016174, + "rewards/rejected": -0.20065546035766602, + "step": 12732 + }, + { + "epoch": 8.80567081604426, + "grad_norm": 3.5881807804107666, + "learning_rate": 6.6351621330874445e-06, + "log_odds_chosen": 8.955921173095703, + "log_odds_ratio": -0.0038107852451503277, + "logits/chosen": -0.3033401370048523, + "logits/rejected": -0.18464888632297516, + "logps/chosen": -0.026263626292347908, + "logps/rejected": -1.458088994026184, + "loss": 0.3769, + "nll_loss": 0.09385372698307037, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0026263627223670483, + "rewards/margins": 0.14318254590034485, + "rewards/rejected": -0.14580890536308289, + "step": 12733 + }, + { + "epoch": 8.806362378976488, + "grad_norm": 4.713569164276123, + "learning_rate": 6.631320116797296e-06, + "log_odds_chosen": 12.279664993286133, + "log_odds_ratio": -0.00010816467693075538, + "logits/chosen": -0.08790473639965057, + "logits/rejected": -0.1655513495206833, + "logps/chosen": -0.00010481144272489473, + "logps/rejected": -2.963526487350464, + "loss": 0.6965, + "nll_loss": 0.17410698533058167, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0481143362994771e-05, + "rewards/margins": 0.2963421642780304, + "rewards/rejected": -0.2963526248931885, + "step": 12734 + }, + { + "epoch": 8.807053941908714, + "grad_norm": 5.424431800842285, + "learning_rate": 6.627478100507147e-06, + "log_odds_chosen": 11.423365592956543, + "log_odds_ratio": -1.666683965595439e-05, + "logits/chosen": 0.09181933850049973, + "logits/rejected": 0.11511439085006714, + "logps/chosen": -7.890022243373096e-05, + "logps/rejected": -1.944343090057373, + "loss": 0.549, + "nll_loss": 0.13723790645599365, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.890022970968857e-06, + "rewards/margins": 0.19442641735076904, + "rewards/rejected": -0.1944343000650406, + "step": 12735 + }, + { + "epoch": 8.807745504840941, + "grad_norm": 3.7672154903411865, + "learning_rate": 6.623636084216997e-06, + "log_odds_chosen": 10.905462265014648, + "log_odds_ratio": -6.981779006309807e-05, + "logits/chosen": -0.5699967741966248, + "logits/rejected": -0.6361503005027771, + "logps/chosen": -0.00023144498118199408, + "logps/rejected": -1.8278840780258179, + "loss": 0.304, + "nll_loss": 0.07600371539592743, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3144497390603647e-05, + "rewards/margins": 0.18276527523994446, + "rewards/rejected": -0.1827884167432785, + "step": 12736 + }, + { + "epoch": 8.808437067773168, + "grad_norm": 2.5110762119293213, + "learning_rate": 6.619794067926849e-06, + "log_odds_chosen": 11.414780616760254, + "log_odds_ratio": -1.6504767700098455e-05, + "logits/chosen": -0.24265910685062408, + "logits/rejected": -0.21113057434558868, + "logps/chosen": -0.0001499099307693541, + "logps/rejected": -2.345609664916992, + "loss": 0.2051, + "nll_loss": 0.05127387121319771, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4990991985541768e-05, + "rewards/margins": 0.2345459908246994, + "rewards/rejected": -0.23456096649169922, + "step": 12737 + }, + { + "epoch": 8.809128630705395, + "grad_norm": 3.5081794261932373, + "learning_rate": 6.615952051636699e-06, + "log_odds_chosen": 10.710822105407715, + "log_odds_ratio": -0.0003082406474277377, + "logits/chosen": -0.42299550771713257, + "logits/rejected": -0.458909809589386, + "logps/chosen": -0.00019325618632137775, + "logps/rejected": -2.1691081523895264, + "loss": 0.3748, + "nll_loss": 0.09366155415773392, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9325620087329298e-05, + "rewards/margins": 0.21689146757125854, + "rewards/rejected": -0.21691077947616577, + "step": 12738 + }, + { + "epoch": 8.809820193637622, + "grad_norm": 2.9431204795837402, + "learning_rate": 6.61211003534655e-06, + "log_odds_chosen": 11.093559265136719, + "log_odds_ratio": -0.00018852236098609865, + "logits/chosen": -0.7440529465675354, + "logits/rejected": -0.7985327243804932, + "logps/chosen": -0.0003106665099039674, + "logps/rejected": -2.407902240753174, + "loss": 0.3018, + "nll_loss": 0.07542867213487625, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.106664735241793e-05, + "rewards/margins": 0.24075916409492493, + "rewards/rejected": -0.2407902181148529, + "step": 12739 + }, + { + "epoch": 8.810511756569849, + "grad_norm": 4.121976852416992, + "learning_rate": 6.608268019056401e-06, + "log_odds_chosen": 11.665343284606934, + "log_odds_ratio": -1.7321885025012307e-05, + "logits/chosen": -0.3306739330291748, + "logits/rejected": -0.3703592121601105, + "logps/chosen": -0.0002438789524603635, + "logps/rejected": -2.5956716537475586, + "loss": 0.3885, + "nll_loss": 0.097113698720932, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4387896701227874e-05, + "rewards/margins": 0.2595427930355072, + "rewards/rejected": -0.2595672011375427, + "step": 12740 + }, + { + "epoch": 8.811203319502075, + "grad_norm": 4.804633617401123, + "learning_rate": 6.604426002766252e-06, + "log_odds_chosen": 10.657184600830078, + "log_odds_ratio": -6.92599278409034e-05, + "logits/chosen": -0.5924953818321228, + "logits/rejected": -0.6714246273040771, + "logps/chosen": -0.0002521405986044556, + "logps/rejected": -2.042790174484253, + "loss": 0.3351, + "nll_loss": 0.08377320319414139, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.521405986044556e-05, + "rewards/margins": 0.20425380766391754, + "rewards/rejected": -0.20427900552749634, + "step": 12741 + }, + { + "epoch": 8.811894882434302, + "grad_norm": 8.727124214172363, + "learning_rate": 6.600583986476103e-06, + "log_odds_chosen": 9.931739807128906, + "log_odds_ratio": -0.0005722044734284282, + "logits/chosen": 0.05851912498474121, + "logits/rejected": -0.1667538285255432, + "logps/chosen": -0.0011412083404138684, + "logps/rejected": -2.185616970062256, + "loss": 0.4747, + "nll_loss": 0.11861248314380646, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011412083404138684, + "rewards/margins": 0.21844759583473206, + "rewards/rejected": -0.21856170892715454, + "step": 12742 + }, + { + "epoch": 8.812586445366529, + "grad_norm": 2.942943811416626, + "learning_rate": 6.5967419701859545e-06, + "log_odds_chosen": 10.176290512084961, + "log_odds_ratio": -0.0004964149557054043, + "logits/chosen": -0.1795501857995987, + "logits/rejected": -0.2746792435646057, + "logps/chosen": -0.0007704338058829308, + "logps/rejected": -1.7060078382492065, + "loss": 0.2599, + "nll_loss": 0.06492795795202255, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.704339077463374e-05, + "rewards/margins": 0.1705237329006195, + "rewards/rejected": -0.1706007719039917, + "step": 12743 + }, + { + "epoch": 8.813278008298756, + "grad_norm": 3.4092957973480225, + "learning_rate": 6.592899953895805e-06, + "log_odds_chosen": 11.288472175598145, + "log_odds_ratio": -2.884104469558224e-05, + "logits/chosen": -0.457816481590271, + "logits/rejected": -0.5336398482322693, + "logps/chosen": -0.00020037713693454862, + "logps/rejected": -2.184605598449707, + "loss": 0.3271, + "nll_loss": 0.08177774399518967, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0037714421050623e-05, + "rewards/margins": 0.21844051778316498, + "rewards/rejected": -0.2184605598449707, + "step": 12744 + }, + { + "epoch": 8.813969571230983, + "grad_norm": 2.6864378452301025, + "learning_rate": 6.589057937605655e-06, + "log_odds_chosen": 11.065103530883789, + "log_odds_ratio": -4.73825384688098e-05, + "logits/chosen": -0.09832610189914703, + "logits/rejected": -0.10603950172662735, + "logps/chosen": -0.00018619374895934016, + "logps/rejected": -2.327572822570801, + "loss": 0.2336, + "nll_loss": 0.058385420590639114, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8619373804540373e-05, + "rewards/margins": 0.2327386736869812, + "rewards/rejected": -0.2327573001384735, + "step": 12745 + }, + { + "epoch": 8.81466113416321, + "grad_norm": 3.6604156494140625, + "learning_rate": 6.585215921315508e-06, + "log_odds_chosen": 11.522943496704102, + "log_odds_ratio": -4.35113615822047e-05, + "logits/chosen": -0.1032416969537735, + "logits/rejected": -0.17721807956695557, + "logps/chosen": -0.00027219849289394915, + "logps/rejected": -3.0030527114868164, + "loss": 0.4215, + "nll_loss": 0.10537984222173691, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.721985219977796e-05, + "rewards/margins": 0.30027803778648376, + "rewards/rejected": -0.3003052771091461, + "step": 12746 + }, + { + "epoch": 8.815352697095436, + "grad_norm": 5.408121585845947, + "learning_rate": 6.5813739050253575e-06, + "log_odds_chosen": 10.814599990844727, + "log_odds_ratio": -0.0005240280297584832, + "logits/chosen": -0.16442151367664337, + "logits/rejected": -0.22039881348609924, + "logps/chosen": -0.00017183725140057504, + "logps/rejected": -1.8447155952453613, + "loss": 0.4989, + "nll_loss": 0.12467427551746368, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7183725503855385e-05, + "rewards/margins": 0.18445438146591187, + "rewards/rejected": -0.18447156250476837, + "step": 12747 + }, + { + "epoch": 8.816044260027663, + "grad_norm": 5.701321125030518, + "learning_rate": 6.577531888735208e-06, + "log_odds_chosen": 10.8905611038208, + "log_odds_ratio": -2.500377013348043e-05, + "logits/chosen": -0.32780396938323975, + "logits/rejected": -0.30016857385635376, + "logps/chosen": -8.67684866534546e-05, + "logps/rejected": -1.7069416046142578, + "loss": 0.6302, + "nll_loss": 0.15754103660583496, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.676849574840162e-06, + "rewards/margins": 0.17068548500537872, + "rewards/rejected": -0.17069417238235474, + "step": 12748 + }, + { + "epoch": 8.81673582295989, + "grad_norm": 3.65494704246521, + "learning_rate": 6.573689872445059e-06, + "log_odds_chosen": 12.122621536254883, + "log_odds_ratio": -1.0510473657632247e-05, + "logits/chosen": -0.31138962507247925, + "logits/rejected": -0.33350175619125366, + "logps/chosen": -8.071251795627177e-05, + "logps/rejected": -2.522864818572998, + "loss": 0.4539, + "nll_loss": 0.11346264183521271, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.071252523222938e-06, + "rewards/margins": 0.2522784173488617, + "rewards/rejected": -0.25228649377822876, + "step": 12749 + }, + { + "epoch": 8.817427385892117, + "grad_norm": 3.0768041610717773, + "learning_rate": 6.569847856154911e-06, + "log_odds_chosen": 11.274198532104492, + "log_odds_ratio": -3.721012399182655e-05, + "logits/chosen": -0.5060542225837708, + "logits/rejected": -0.6420814990997314, + "logps/chosen": -0.00011597540287766606, + "logps/rejected": -2.2312769889831543, + "loss": 0.2978, + "nll_loss": 0.07445670664310455, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1597541742958128e-05, + "rewards/margins": 0.22311611473560333, + "rewards/rejected": -0.22312772274017334, + "step": 12750 + }, + { + "epoch": 8.818118948824344, + "grad_norm": 3.526170492172241, + "learning_rate": 6.566005839864761e-06, + "log_odds_chosen": 11.549579620361328, + "log_odds_ratio": -4.6264962293207645e-05, + "logits/chosen": -0.5068678259849548, + "logits/rejected": -0.526757001876831, + "logps/chosen": -0.00010017196473199874, + "logps/rejected": -2.3823142051696777, + "loss": 0.3506, + "nll_loss": 0.08764593303203583, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0017196473199874e-05, + "rewards/margins": 0.23822137713432312, + "rewards/rejected": -0.23823140561580658, + "step": 12751 + }, + { + "epoch": 8.81881051175657, + "grad_norm": 3.2240893840789795, + "learning_rate": 6.562163823574611e-06, + "log_odds_chosen": 10.802983283996582, + "log_odds_ratio": -5.024659913033247e-05, + "logits/chosen": -0.5198428630828857, + "logits/rejected": -0.5384092330932617, + "logps/chosen": -0.0004417779855430126, + "logps/rejected": -2.2304627895355225, + "loss": 0.3948, + "nll_loss": 0.09869439899921417, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.417780655785464e-05, + "rewards/margins": 0.223002091050148, + "rewards/rejected": -0.22304627299308777, + "step": 12752 + }, + { + "epoch": 8.819502074688797, + "grad_norm": 3.056912422180176, + "learning_rate": 6.558321807284464e-06, + "log_odds_chosen": 11.202829360961914, + "log_odds_ratio": -3.9832044421928003e-05, + "logits/chosen": -0.4405937194824219, + "logits/rejected": -0.4304983913898468, + "logps/chosen": -0.0003462162858340889, + "logps/rejected": -2.4193732738494873, + "loss": 0.3596, + "nll_loss": 0.08990298211574554, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.462163294898346e-05, + "rewards/margins": 0.24190272390842438, + "rewards/rejected": -0.24193733930587769, + "step": 12753 + }, + { + "epoch": 8.820193637621024, + "grad_norm": 3.57071590423584, + "learning_rate": 6.554479790994314e-06, + "log_odds_chosen": 11.801494598388672, + "log_odds_ratio": -8.60178770381026e-05, + "logits/chosen": -0.14969930052757263, + "logits/rejected": -0.1381671130657196, + "logps/chosen": -0.00033189854002557695, + "logps/rejected": -2.458753824234009, + "loss": 0.2773, + "nll_loss": 0.0693252757191658, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.318985545774922e-05, + "rewards/margins": 0.24584218859672546, + "rewards/rejected": -0.24587538838386536, + "step": 12754 + }, + { + "epoch": 8.820885200553251, + "grad_norm": 4.084262847900391, + "learning_rate": 6.550637774704164e-06, + "log_odds_chosen": 11.506061553955078, + "log_odds_ratio": -5.4857049690326676e-05, + "logits/chosen": -0.5423182249069214, + "logits/rejected": -0.5082879662513733, + "logps/chosen": -0.00020827885600738227, + "logps/rejected": -2.6145410537719727, + "loss": 0.3825, + "nll_loss": 0.09560876339673996, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.082788705592975e-05, + "rewards/margins": 0.26143327355384827, + "rewards/rejected": -0.26145410537719727, + "step": 12755 + }, + { + "epoch": 8.821576763485478, + "grad_norm": 2.3327832221984863, + "learning_rate": 6.546795758414016e-06, + "log_odds_chosen": 11.193686485290527, + "log_odds_ratio": -5.154366954229772e-05, + "logits/chosen": -0.5911445617675781, + "logits/rejected": -0.733811616897583, + "logps/chosen": -0.0001743299071677029, + "logps/rejected": -2.031386613845825, + "loss": 0.253, + "nll_loss": 0.06324849277734756, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7432992535759695e-05, + "rewards/margins": 0.20312124490737915, + "rewards/rejected": -0.20313867926597595, + "step": 12756 + }, + { + "epoch": 8.822268326417705, + "grad_norm": 4.580896854400635, + "learning_rate": 6.542953742123867e-06, + "log_odds_chosen": 9.616424560546875, + "log_odds_ratio": -0.0015286378329619765, + "logits/chosen": -0.2255147099494934, + "logits/rejected": -0.14628499746322632, + "logps/chosen": -0.0004408976819831878, + "logps/rejected": -1.315403938293457, + "loss": 0.5038, + "nll_loss": 0.12580369412899017, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4089771108701825e-05, + "rewards/margins": 0.13149632513523102, + "rewards/rejected": -0.13154040277004242, + "step": 12757 + }, + { + "epoch": 8.822959889349931, + "grad_norm": 3.8236727714538574, + "learning_rate": 6.5391117258337174e-06, + "log_odds_chosen": 10.288022994995117, + "log_odds_ratio": -9.033164678839967e-05, + "logits/chosen": -0.2098008245229721, + "logits/rejected": -0.24087196588516235, + "logps/chosen": -0.0004149182641413063, + "logps/rejected": -2.0759353637695312, + "loss": 0.358, + "nll_loss": 0.08948257565498352, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.149182859691791e-05, + "rewards/margins": 0.20755203068256378, + "rewards/rejected": -0.20759351551532745, + "step": 12758 + }, + { + "epoch": 8.823651452282158, + "grad_norm": 3.7073469161987305, + "learning_rate": 6.535269709543569e-06, + "log_odds_chosen": 11.815654754638672, + "log_odds_ratio": -1.0503194062039256e-05, + "logits/chosen": -0.21260342001914978, + "logits/rejected": -0.1908436417579651, + "logps/chosen": -0.0004060858045704663, + "logps/rejected": -2.8777055740356445, + "loss": 0.3155, + "nll_loss": 0.078867606818676, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0608578274259344e-05, + "rewards/margins": 0.28772997856140137, + "rewards/rejected": -0.2877705693244934, + "step": 12759 + }, + { + "epoch": 8.824343015214385, + "grad_norm": 3.740062952041626, + "learning_rate": 6.53142769325342e-06, + "log_odds_chosen": 10.717554092407227, + "log_odds_ratio": -5.8261815865989774e-05, + "logits/chosen": -0.02434053272008896, + "logits/rejected": -0.04800887778401375, + "logps/chosen": -0.0002340275823371485, + "logps/rejected": -1.6554569005966187, + "loss": 0.338, + "nll_loss": 0.08449292927980423, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.340275932510849e-05, + "rewards/margins": 0.16552230715751648, + "rewards/rejected": -0.16554570198059082, + "step": 12760 + }, + { + "epoch": 8.825034578146612, + "grad_norm": 4.184948921203613, + "learning_rate": 6.52758567696327e-06, + "log_odds_chosen": 10.76385498046875, + "log_odds_ratio": -3.3372751204296947e-05, + "logits/chosen": -0.340414822101593, + "logits/rejected": -0.31318366527557373, + "logps/chosen": -0.0007341946475207806, + "logps/rejected": -2.632049560546875, + "loss": 0.4692, + "nll_loss": 0.11730688810348511, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.341946911765262e-05, + "rewards/margins": 0.2631315290927887, + "rewards/rejected": -0.2632049322128296, + "step": 12761 + }, + { + "epoch": 8.825726141078839, + "grad_norm": 5.250175476074219, + "learning_rate": 6.523743660673122e-06, + "log_odds_chosen": 11.203939437866211, + "log_odds_ratio": -0.0003403636219445616, + "logits/chosen": -0.3742835819721222, + "logits/rejected": -0.33408549427986145, + "logps/chosen": -0.0002061006671283394, + "logps/rejected": -2.6237549781799316, + "loss": 0.4048, + "nll_loss": 0.10115813463926315, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0610064893844537e-05, + "rewards/margins": 0.26235488057136536, + "rewards/rejected": -0.26237550377845764, + "step": 12762 + }, + { + "epoch": 8.826417704011066, + "grad_norm": 10.360694885253906, + "learning_rate": 6.519901644382972e-06, + "log_odds_chosen": 12.885150909423828, + "log_odds_ratio": -1.928115489135962e-05, + "logits/chosen": -0.2504520118236542, + "logits/rejected": -0.33734339475631714, + "logps/chosen": -0.00021348144218791276, + "logps/rejected": -4.043447971343994, + "loss": 0.4204, + "nll_loss": 0.10510491579771042, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1348147129174322e-05, + "rewards/margins": 0.4043235182762146, + "rewards/rejected": -0.4043447971343994, + "step": 12763 + }, + { + "epoch": 8.827109266943292, + "grad_norm": 3.3477675914764404, + "learning_rate": 6.516059628092823e-06, + "log_odds_chosen": 11.33936595916748, + "log_odds_ratio": -3.511543764034286e-05, + "logits/chosen": -0.41620469093322754, + "logits/rejected": -0.473169207572937, + "logps/chosen": -7.452804129570723e-05, + "logps/rejected": -1.7823110818862915, + "loss": 0.3579, + "nll_loss": 0.08947095274925232, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.452804311469663e-06, + "rewards/margins": 0.17822366952896118, + "rewards/rejected": -0.1782311052083969, + "step": 12764 + }, + { + "epoch": 8.82780082987552, + "grad_norm": 2.7336463928222656, + "learning_rate": 6.512217611802675e-06, + "log_odds_chosen": 10.976394653320312, + "log_odds_ratio": -5.622572643915191e-05, + "logits/chosen": -0.1379978358745575, + "logits/rejected": -0.2915462553501129, + "logps/chosen": -0.00033241885830648243, + "logps/rejected": -1.9764206409454346, + "loss": 0.3088, + "nll_loss": 0.07719646394252777, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3241885830648243e-05, + "rewards/margins": 0.1976088136434555, + "rewards/rejected": -0.19764205813407898, + "step": 12765 + }, + { + "epoch": 8.828492392807746, + "grad_norm": 3.080021858215332, + "learning_rate": 6.508375595512525e-06, + "log_odds_chosen": 10.0333833694458, + "log_odds_ratio": -0.00017765231314115226, + "logits/chosen": -0.5421645641326904, + "logits/rejected": -0.5306675434112549, + "logps/chosen": -0.00044594579958356917, + "logps/rejected": -1.6764360666275024, + "loss": 0.346, + "nll_loss": 0.0864863246679306, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.459457704797387e-05, + "rewards/margins": 0.16759900748729706, + "rewards/rejected": -0.16764359176158905, + "step": 12766 + }, + { + "epoch": 8.829183955739973, + "grad_norm": 2.9879536628723145, + "learning_rate": 6.504533579222376e-06, + "log_odds_chosen": 10.86699390411377, + "log_odds_ratio": -4.3034284317400306e-05, + "logits/chosen": -0.01258639246225357, + "logits/rejected": 0.09293460100889206, + "logps/chosen": -0.00016677375242579728, + "logps/rejected": -1.8913809061050415, + "loss": 0.3406, + "nll_loss": 0.08515028655529022, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6677373423590325e-05, + "rewards/margins": 0.18912141025066376, + "rewards/rejected": -0.18913809955120087, + "step": 12767 + }, + { + "epoch": 8.8298755186722, + "grad_norm": 4.226680278778076, + "learning_rate": 6.5006915629322275e-06, + "log_odds_chosen": 11.947617530822754, + "log_odds_ratio": -2.5241959519917145e-05, + "logits/chosen": -0.3539278507232666, + "logits/rejected": -0.3582938313484192, + "logps/chosen": -0.00020406056137289852, + "logps/rejected": -3.0730202198028564, + "loss": 0.3761, + "nll_loss": 0.09401322901248932, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0406056137289852e-05, + "rewards/margins": 0.30728161334991455, + "rewards/rejected": -0.3073020279407501, + "step": 12768 + }, + { + "epoch": 8.830567081604427, + "grad_norm": 4.433466911315918, + "learning_rate": 6.496849546642078e-06, + "log_odds_chosen": 12.175585746765137, + "log_odds_ratio": -9.85627411864698e-06, + "logits/chosen": 0.19564247131347656, + "logits/rejected": 0.14580342173576355, + "logps/chosen": -0.00020560537814162672, + "logps/rejected": -2.9848077297210693, + "loss": 0.4718, + "nll_loss": 0.11795386672019958, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.056053745036479e-05, + "rewards/margins": 0.2984602153301239, + "rewards/rejected": -0.2984807789325714, + "step": 12769 + }, + { + "epoch": 8.831258644536653, + "grad_norm": 2.693063974380493, + "learning_rate": 6.493007530351929e-06, + "log_odds_chosen": 10.376236915588379, + "log_odds_ratio": -0.00017671348177827895, + "logits/chosen": 0.007927365601062775, + "logits/rejected": -0.03316565603017807, + "logps/chosen": -0.0006801652489230037, + "logps/rejected": -2.863428831100464, + "loss": 0.3297, + "nll_loss": 0.08241531997919083, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.801652489230037e-05, + "rewards/margins": 0.28627485036849976, + "rewards/rejected": -0.28634288907051086, + "step": 12770 + }, + { + "epoch": 8.83195020746888, + "grad_norm": 5.1559977531433105, + "learning_rate": 6.4891655140617806e-06, + "log_odds_chosen": 11.185357093811035, + "log_odds_ratio": -3.578070754883811e-05, + "logits/chosen": -0.4364149570465088, + "logits/rejected": -0.4735616445541382, + "logps/chosen": -0.00016242300625890493, + "logps/rejected": -2.062422275543213, + "loss": 0.3282, + "nll_loss": 0.08205679804086685, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6242302081082016e-05, + "rewards/margins": 0.20622599124908447, + "rewards/rejected": -0.20624223351478577, + "step": 12771 + }, + { + "epoch": 8.832641770401107, + "grad_norm": 4.903589725494385, + "learning_rate": 6.485323497771631e-06, + "log_odds_chosen": 11.552923202514648, + "log_odds_ratio": -2.0057505025761202e-05, + "logits/chosen": -0.35818684101104736, + "logits/rejected": -0.38597989082336426, + "logps/chosen": -0.00014166624168865383, + "logps/rejected": -2.558972120285034, + "loss": 0.3711, + "nll_loss": 0.09278266131877899, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4166622349875979e-05, + "rewards/margins": 0.25588303804397583, + "rewards/rejected": -0.2558972239494324, + "step": 12772 + }, + { + "epoch": 8.833333333333334, + "grad_norm": 2.9457814693450928, + "learning_rate": 6.481481481481481e-06, + "log_odds_chosen": 11.662984848022461, + "log_odds_ratio": -4.007641473435797e-05, + "logits/chosen": -0.1337454915046692, + "logits/rejected": -0.2075233906507492, + "logps/chosen": -0.00017183000454679132, + "logps/rejected": -2.5259475708007812, + "loss": 0.3374, + "nll_loss": 0.08435782045125961, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7183001546072774e-05, + "rewards/margins": 0.25257760286331177, + "rewards/rejected": -0.2525947690010071, + "step": 12773 + }, + { + "epoch": 8.83402489626556, + "grad_norm": 3.8707709312438965, + "learning_rate": 6.477639465191334e-06, + "log_odds_chosen": 12.129270553588867, + "log_odds_ratio": -6.537245099025313e-06, + "logits/chosen": -0.6894609928131104, + "logits/rejected": -0.6343203186988831, + "logps/chosen": -0.00013332456001080573, + "logps/rejected": -2.8702447414398193, + "loss": 0.3374, + "nll_loss": 0.08435693383216858, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3332456546777394e-05, + "rewards/margins": 0.28701114654541016, + "rewards/rejected": -0.28702446818351746, + "step": 12774 + }, + { + "epoch": 8.834716459197788, + "grad_norm": 3.372408628463745, + "learning_rate": 6.4737974489011836e-06, + "log_odds_chosen": 11.362260818481445, + "log_odds_ratio": -2.4923283490352333e-05, + "logits/chosen": -0.23244208097457886, + "logits/rejected": -0.30278605222702026, + "logps/chosen": -0.00016180599050130695, + "logps/rejected": -2.273146152496338, + "loss": 0.3145, + "nll_loss": 0.07862184941768646, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6180598322534934e-05, + "rewards/margins": 0.22729843854904175, + "rewards/rejected": -0.22731462121009827, + "step": 12775 + }, + { + "epoch": 8.835408022130014, + "grad_norm": 4.276350498199463, + "learning_rate": 6.469955432611034e-06, + "log_odds_chosen": 11.451852798461914, + "log_odds_ratio": -0.00010747795749921352, + "logits/chosen": -0.5826900005340576, + "logits/rejected": -0.622979998588562, + "logps/chosen": -0.00019800482550635934, + "logps/rejected": -2.329267740249634, + "loss": 0.2783, + "nll_loss": 0.06955676525831223, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9800481823040172e-05, + "rewards/margins": 0.2329069823026657, + "rewards/rejected": -0.23292678594589233, + "step": 12776 + }, + { + "epoch": 8.836099585062241, + "grad_norm": 4.371342182159424, + "learning_rate": 6.466113416320886e-06, + "log_odds_chosen": 11.968100547790527, + "log_odds_ratio": -1.7586673493497074e-05, + "logits/chosen": 0.07722847908735275, + "logits/rejected": 0.11953555047512054, + "logps/chosen": -9.393729851581156e-05, + "logps/rejected": -2.5000882148742676, + "loss": 0.4745, + "nll_loss": 0.11862242221832275, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.393729669682216e-06, + "rewards/margins": 0.24999943375587463, + "rewards/rejected": -0.25000882148742676, + "step": 12777 + }, + { + "epoch": 8.836791147994468, + "grad_norm": 3.7952606678009033, + "learning_rate": 6.462271400030737e-06, + "log_odds_chosen": 12.824507713317871, + "log_odds_ratio": -1.2694898032350466e-05, + "logits/chosen": 0.04276101291179657, + "logits/rejected": -0.013445567339658737, + "logps/chosen": -0.00018952536629512906, + "logps/rejected": -3.921670436859131, + "loss": 0.3081, + "nll_loss": 0.07701949775218964, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.895253808470443e-05, + "rewards/margins": 0.39214810729026794, + "rewards/rejected": -0.3921670615673065, + "step": 12778 + }, + { + "epoch": 8.837482710926695, + "grad_norm": 3.1971945762634277, + "learning_rate": 6.458429383740587e-06, + "log_odds_chosen": 10.57689094543457, + "log_odds_ratio": -4.320785592426546e-05, + "logits/chosen": -0.3199876844882965, + "logits/rejected": -0.33395707607269287, + "logps/chosen": -0.0002134922833647579, + "logps/rejected": -1.9453153610229492, + "loss": 0.3245, + "nll_loss": 0.08111564069986343, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1349231246858835e-05, + "rewards/margins": 0.19451019167900085, + "rewards/rejected": -0.19453153014183044, + "step": 12779 + }, + { + "epoch": 8.838174273858922, + "grad_norm": 3.570117950439453, + "learning_rate": 6.454587367450439e-06, + "log_odds_chosen": 10.053936004638672, + "log_odds_ratio": -0.0001422764325980097, + "logits/chosen": -0.28322821855545044, + "logits/rejected": -0.3680499196052551, + "logps/chosen": -0.00044421886559575796, + "logps/rejected": -1.7309963703155518, + "loss": 0.3456, + "nll_loss": 0.08637740463018417, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4421885831980035e-05, + "rewards/margins": 0.17305521667003632, + "rewards/rejected": -0.17309962213039398, + "step": 12780 + }, + { + "epoch": 8.838865836791149, + "grad_norm": 4.41160249710083, + "learning_rate": 6.45074535116029e-06, + "log_odds_chosen": 11.318617820739746, + "log_odds_ratio": -8.401113882428035e-05, + "logits/chosen": 0.19311122596263885, + "logits/rejected": 0.1457151174545288, + "logps/chosen": -0.00013428172678686678, + "logps/rejected": -2.4825916290283203, + "loss": 0.4355, + "nll_loss": 0.10887414216995239, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3428171769191977e-05, + "rewards/margins": 0.2482457309961319, + "rewards/rejected": -0.24825915694236755, + "step": 12781 + }, + { + "epoch": 8.839557399723375, + "grad_norm": 3.3861706256866455, + "learning_rate": 6.44690333487014e-06, + "log_odds_chosen": 11.522031784057617, + "log_odds_ratio": -0.00010014892177423462, + "logits/chosen": -0.46342214941978455, + "logits/rejected": -0.5653108954429626, + "logps/chosen": -0.0008928571478463709, + "logps/rejected": -2.940377950668335, + "loss": 0.4646, + "nll_loss": 0.1161460429430008, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.928572060540318e-05, + "rewards/margins": 0.29394853115081787, + "rewards/rejected": -0.294037789106369, + "step": 12782 + }, + { + "epoch": 8.840248962655602, + "grad_norm": 3.975029945373535, + "learning_rate": 6.44306131857999e-06, + "log_odds_chosen": 11.149683952331543, + "log_odds_ratio": -3.567495150491595e-05, + "logits/chosen": -0.31347280740737915, + "logits/rejected": -0.3032105565071106, + "logps/chosen": -0.0001983102411031723, + "logps/rejected": -1.9459149837493896, + "loss": 0.4983, + "nll_loss": 0.12457741796970367, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9831026293104514e-05, + "rewards/margins": 0.19457167387008667, + "rewards/rejected": -0.19459150731563568, + "step": 12783 + }, + { + "epoch": 8.840940525587829, + "grad_norm": 4.7988600730896, + "learning_rate": 6.439219302289842e-06, + "log_odds_chosen": 12.046151161193848, + "log_odds_ratio": -4.2681695049395785e-05, + "logits/chosen": -0.053906239569187164, + "logits/rejected": -0.14985615015029907, + "logps/chosen": -0.00016439243336208165, + "logps/rejected": -2.840974807739258, + "loss": 0.5175, + "nll_loss": 0.12937091290950775, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.643924588279333e-05, + "rewards/margins": 0.28408104181289673, + "rewards/rejected": -0.28409749269485474, + "step": 12784 + }, + { + "epoch": 8.841632088520056, + "grad_norm": 4.1446146965026855, + "learning_rate": 6.435377285999693e-06, + "log_odds_chosen": 11.283458709716797, + "log_odds_ratio": -3.4393011446809396e-05, + "logits/chosen": -0.2070007473230362, + "logits/rejected": -0.3016629219055176, + "logps/chosen": -0.000497006403747946, + "logps/rejected": -2.5244784355163574, + "loss": 0.422, + "nll_loss": 0.10548632591962814, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.97006403747946e-05, + "rewards/margins": 0.25239813327789307, + "rewards/rejected": -0.25244784355163574, + "step": 12785 + }, + { + "epoch": 8.842323651452283, + "grad_norm": 4.659236907958984, + "learning_rate": 6.4315352697095435e-06, + "log_odds_chosen": 11.342458724975586, + "log_odds_ratio": -3.612410364439711e-05, + "logits/chosen": -0.4639470875263214, + "logits/rejected": -0.5213142037391663, + "logps/chosen": -0.00042212463449686766, + "logps/rejected": -2.7059743404388428, + "loss": 0.3976, + "nll_loss": 0.0993962287902832, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.221246490487829e-05, + "rewards/margins": 0.27055519819259644, + "rewards/rejected": -0.2705973982810974, + "step": 12786 + }, + { + "epoch": 8.84301521438451, + "grad_norm": 3.9992127418518066, + "learning_rate": 6.427693253419395e-06, + "log_odds_chosen": 10.961469650268555, + "log_odds_ratio": -3.790142363868654e-05, + "logits/chosen": -0.443245530128479, + "logits/rejected": -0.5575623512268066, + "logps/chosen": -0.0001961943635251373, + "logps/rejected": -1.908239722251892, + "loss": 0.3471, + "nll_loss": 0.08676640689373016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9619437807705253e-05, + "rewards/margins": 0.1908043622970581, + "rewards/rejected": -0.1908239722251892, + "step": 12787 + }, + { + "epoch": 8.843706777316736, + "grad_norm": 3.0610389709472656, + "learning_rate": 6.423851237129246e-06, + "log_odds_chosen": 11.528380393981934, + "log_odds_ratio": -2.1074079995742068e-05, + "logits/chosen": -0.31700849533081055, + "logits/rejected": -0.369879812002182, + "logps/chosen": -0.00015870729112066329, + "logps/rejected": -2.5586135387420654, + "loss": 0.373, + "nll_loss": 0.09325310587882996, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.587072983966209e-05, + "rewards/margins": 0.25584548711776733, + "rewards/rejected": -0.2558613419532776, + "step": 12788 + }, + { + "epoch": 8.844398340248963, + "grad_norm": 3.834096908569336, + "learning_rate": 6.420009220839096e-06, + "log_odds_chosen": 10.404363632202148, + "log_odds_ratio": -0.0003085200733039528, + "logits/chosen": -0.34495866298675537, + "logits/rejected": -0.43560492992401123, + "logps/chosen": -0.00025962202926166356, + "logps/rejected": -1.8464734554290771, + "loss": 0.4628, + "nll_loss": 0.11567828059196472, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5962202926166356e-05, + "rewards/margins": 0.1846213936805725, + "rewards/rejected": -0.1846473515033722, + "step": 12789 + }, + { + "epoch": 8.84508990318119, + "grad_norm": 3.1313297748565674, + "learning_rate": 6.416167204548948e-06, + "log_odds_chosen": 11.057754516601562, + "log_odds_ratio": -4.367155270301737e-05, + "logits/chosen": -0.10010676085948944, + "logits/rejected": -0.2379499077796936, + "logps/chosen": -0.00020684795163106173, + "logps/rejected": -2.0329058170318604, + "loss": 0.2781, + "nll_loss": 0.06952624022960663, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0684796254499815e-05, + "rewards/margins": 0.20326989889144897, + "rewards/rejected": -0.20329056680202484, + "step": 12790 + }, + { + "epoch": 8.845781466113417, + "grad_norm": 3.7589199542999268, + "learning_rate": 6.412325188258798e-06, + "log_odds_chosen": 10.544598579406738, + "log_odds_ratio": -6.38895871816203e-05, + "logits/chosen": -0.15169522166252136, + "logits/rejected": -0.2930341958999634, + "logps/chosen": -0.0003536183503456414, + "logps/rejected": -2.1912853717803955, + "loss": 0.3678, + "nll_loss": 0.09195493161678314, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.536183794494718e-05, + "rewards/margins": 0.2190931737422943, + "rewards/rejected": -0.2191285341978073, + "step": 12791 + }, + { + "epoch": 8.846473029045644, + "grad_norm": 3.619842767715454, + "learning_rate": 6.408483171968649e-06, + "log_odds_chosen": 11.351167678833008, + "log_odds_ratio": -4.215494482195936e-05, + "logits/chosen": -0.37345826625823975, + "logits/rejected": -0.4001314342021942, + "logps/chosen": -0.00016402498295065016, + "logps/rejected": -2.2864413261413574, + "loss": 0.3679, + "nll_loss": 0.09198301285505295, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6402498658862896e-05, + "rewards/margins": 0.2286277413368225, + "rewards/rejected": -0.22864416241645813, + "step": 12792 + }, + { + "epoch": 8.84716459197787, + "grad_norm": 4.5168561935424805, + "learning_rate": 6.4046411556785004e-06, + "log_odds_chosen": 10.029914855957031, + "log_odds_ratio": -0.00015833518409635872, + "logits/chosen": -0.16558289527893066, + "logits/rejected": -0.21536770462989807, + "logps/chosen": -0.0013712483923882246, + "logps/rejected": -2.173950672149658, + "loss": 0.3235, + "nll_loss": 0.08085213601589203, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013712485088035464, + "rewards/margins": 0.21725796163082123, + "rewards/rejected": -0.21739508211612701, + "step": 12793 + }, + { + "epoch": 8.847856154910097, + "grad_norm": 5.88995885848999, + "learning_rate": 6.400799139388351e-06, + "log_odds_chosen": 11.599223136901855, + "log_odds_ratio": -0.00055232661543414, + "logits/chosen": -0.4300704598426819, + "logits/rejected": -0.5501849055290222, + "logps/chosen": -0.0002075859229080379, + "logps/rejected": -2.715794563293457, + "loss": 0.5537, + "nll_loss": 0.1383628249168396, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0758594473591074e-05, + "rewards/margins": 0.2715587019920349, + "rewards/rejected": -0.27157944440841675, + "step": 12794 + }, + { + "epoch": 8.848547717842324, + "grad_norm": 3.6452476978302, + "learning_rate": 6.396957123098202e-06, + "log_odds_chosen": 10.567415237426758, + "log_odds_ratio": -0.00012351528857834637, + "logits/chosen": 0.004386186599731445, + "logits/rejected": -0.0502280592918396, + "logps/chosen": -0.00024670836864970624, + "logps/rejected": -1.9211772680282593, + "loss": 0.4473, + "nll_loss": 0.11181392520666122, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4670836864970624e-05, + "rewards/margins": 0.19209304451942444, + "rewards/rejected": -0.19211772084236145, + "step": 12795 + }, + { + "epoch": 8.849239280774551, + "grad_norm": 3.6943931579589844, + "learning_rate": 6.3931151068080535e-06, + "log_odds_chosen": 11.638896942138672, + "log_odds_ratio": -1.964723560377024e-05, + "logits/chosen": -0.6820564866065979, + "logits/rejected": -0.7239702343940735, + "logps/chosen": -6.672356539638713e-05, + "logps/rejected": -2.1108269691467285, + "loss": 0.4618, + "nll_loss": 0.11546014249324799, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.672355993941892e-06, + "rewards/margins": 0.211076021194458, + "rewards/rejected": -0.21108269691467285, + "step": 12796 + }, + { + "epoch": 8.849930843706778, + "grad_norm": 2.904240369796753, + "learning_rate": 6.389273090517904e-06, + "log_odds_chosen": 10.31950569152832, + "log_odds_ratio": -0.00010807502258103341, + "logits/chosen": -0.19965432584285736, + "logits/rejected": -0.07110458612442017, + "logps/chosen": -0.0005260208854451776, + "logps/rejected": -1.9000871181488037, + "loss": 0.3618, + "nll_loss": 0.09044182300567627, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2602088544517756e-05, + "rewards/margins": 0.18995609879493713, + "rewards/rejected": -0.19000869989395142, + "step": 12797 + }, + { + "epoch": 8.850622406639005, + "grad_norm": 3.6862058639526367, + "learning_rate": 6.385431074227754e-06, + "log_odds_chosen": 9.927431106567383, + "log_odds_ratio": -0.001017512520775199, + "logits/chosen": -0.2530617415904999, + "logits/rejected": -0.2509405016899109, + "logps/chosen": -0.0012779454700648785, + "logps/rejected": -1.9349370002746582, + "loss": 0.4581, + "nll_loss": 0.11442754417657852, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012779454118572176, + "rewards/margins": 0.1933659017086029, + "rewards/rejected": -0.19349370896816254, + "step": 12798 + }, + { + "epoch": 8.851313969571232, + "grad_norm": 4.178948402404785, + "learning_rate": 6.381589057937607e-06, + "log_odds_chosen": 10.891372680664062, + "log_odds_ratio": -4.565313793136738e-05, + "logits/chosen": -0.4360129237174988, + "logits/rejected": -0.5046184062957764, + "logps/chosen": -0.00018814804207067937, + "logps/rejected": -1.9857664108276367, + "loss": 0.4265, + "nll_loss": 0.10661087185144424, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8814802388078533e-05, + "rewards/margins": 0.19855782389640808, + "rewards/rejected": -0.19857662916183472, + "step": 12799 + }, + { + "epoch": 8.852005532503458, + "grad_norm": 2.3786985874176025, + "learning_rate": 6.3777470416474565e-06, + "log_odds_chosen": 11.908330917358398, + "log_odds_ratio": -2.3679904188611545e-05, + "logits/chosen": -0.5502616763114929, + "logits/rejected": -0.5585063099861145, + "logps/chosen": -0.0001272763474844396, + "logps/rejected": -2.439074993133545, + "loss": 0.3383, + "nll_loss": 0.08457043021917343, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2727635294140782e-05, + "rewards/margins": 0.2438947856426239, + "rewards/rejected": -0.24390751123428345, + "step": 12800 + }, + { + "epoch": 8.852697095435685, + "grad_norm": 3.721696376800537, + "learning_rate": 6.373905025357307e-06, + "log_odds_chosen": 11.587570190429688, + "log_odds_ratio": -2.8774469683412462e-05, + "logits/chosen": -0.2774308919906616, + "logits/rejected": -0.40616902709007263, + "logps/chosen": -0.00034545804373919964, + "logps/rejected": -2.403249502182007, + "loss": 0.4794, + "nll_loss": 0.11984878778457642, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.454580291872844e-05, + "rewards/margins": 0.24029040336608887, + "rewards/rejected": -0.2403249442577362, + "step": 12801 + }, + { + "epoch": 8.853388658367912, + "grad_norm": 3.288451671600342, + "learning_rate": 6.370063009067159e-06, + "log_odds_chosen": 11.266716957092285, + "log_odds_ratio": -3.245002881158143e-05, + "logits/chosen": -0.1537206918001175, + "logits/rejected": -0.2285475730895996, + "logps/chosen": -0.00011662822362268344, + "logps/rejected": -2.271165370941162, + "loss": 0.4268, + "nll_loss": 0.10669860988855362, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1662822544167284e-05, + "rewards/margins": 0.22710487246513367, + "rewards/rejected": -0.22711655497550964, + "step": 12802 + }, + { + "epoch": 8.854080221300139, + "grad_norm": 3.4580912590026855, + "learning_rate": 6.36622099277701e-06, + "log_odds_chosen": 11.14916706085205, + "log_odds_ratio": -7.049908163025975e-05, + "logits/chosen": -0.3396326005458832, + "logits/rejected": -0.39334046840667725, + "logps/chosen": -0.00017810799181461334, + "logps/rejected": -2.1985089778900146, + "loss": 0.3566, + "nll_loss": 0.08914760500192642, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7810798453865573e-05, + "rewards/margins": 0.21983309090137482, + "rewards/rejected": -0.21985091269016266, + "step": 12803 + }, + { + "epoch": 8.854771784232366, + "grad_norm": 4.225655555725098, + "learning_rate": 6.36237897648686e-06, + "log_odds_chosen": 10.638168334960938, + "log_odds_ratio": -0.00013483702787198126, + "logits/chosen": -0.5227504372596741, + "logits/rejected": -0.6066082715988159, + "logps/chosen": -0.00039914826629683375, + "logps/rejected": -2.0195770263671875, + "loss": 0.3202, + "nll_loss": 0.08004799485206604, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9914826629683375e-05, + "rewards/margins": 0.20191779732704163, + "rewards/rejected": -0.20195770263671875, + "step": 12804 + }, + { + "epoch": 8.855463347164592, + "grad_norm": 4.3255157470703125, + "learning_rate": 6.358536960196712e-06, + "log_odds_chosen": 11.599845886230469, + "log_odds_ratio": -2.2031070329830982e-05, + "logits/chosen": 0.013053441420197487, + "logits/rejected": -0.16108641028404236, + "logps/chosen": -0.00015875123790465295, + "logps/rejected": -2.578869581222534, + "loss": 0.399, + "nll_loss": 0.09974716603755951, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5875124518061057e-05, + "rewards/margins": 0.2578710913658142, + "rewards/rejected": -0.25788694620132446, + "step": 12805 + }, + { + "epoch": 8.85615491009682, + "grad_norm": 3.2311954498291016, + "learning_rate": 6.354694943906563e-06, + "log_odds_chosen": 11.382307052612305, + "log_odds_ratio": -5.089869591756724e-05, + "logits/chosen": -0.6852065324783325, + "logits/rejected": -0.7032715678215027, + "logps/chosen": -0.00012299341324251145, + "logps/rejected": -2.0894734859466553, + "loss": 0.3764, + "nll_loss": 0.09410445392131805, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2299342415644787e-05, + "rewards/margins": 0.20893505215644836, + "rewards/rejected": -0.20894736051559448, + "step": 12806 + }, + { + "epoch": 8.856846473029046, + "grad_norm": 3.9020462036132812, + "learning_rate": 6.350852927616413e-06, + "log_odds_chosen": 10.981860160827637, + "log_odds_ratio": -0.001179091283120215, + "logits/chosen": -0.4362276494503021, + "logits/rejected": -0.36282485723495483, + "logps/chosen": -0.0008756135357543826, + "logps/rejected": -2.592684745788574, + "loss": 0.6029, + "nll_loss": 0.150602787733078, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.756135503062978e-05, + "rewards/margins": 0.2591809034347534, + "rewards/rejected": -0.25926846265792847, + "step": 12807 + }, + { + "epoch": 8.857538035961273, + "grad_norm": 3.7402217388153076, + "learning_rate": 6.347010911326265e-06, + "log_odds_chosen": 11.241284370422363, + "log_odds_ratio": -0.000701241078786552, + "logits/chosen": -0.2342221438884735, + "logits/rejected": -0.319629043340683, + "logps/chosen": -0.001087275566533208, + "logps/rejected": -2.5344743728637695, + "loss": 0.4757, + "nll_loss": 0.11884773522615433, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010872755228774622, + "rewards/margins": 0.25333869457244873, + "rewards/rejected": -0.25344744324684143, + "step": 12808 + }, + { + "epoch": 8.8582295988935, + "grad_norm": 3.1583425998687744, + "learning_rate": 6.343168895036115e-06, + "log_odds_chosen": 11.552385330200195, + "log_odds_ratio": -2.8983889933442697e-05, + "logits/chosen": 0.15185663104057312, + "logits/rejected": -0.0315750315785408, + "logps/chosen": -0.001065196585841477, + "logps/rejected": -3.191751003265381, + "loss": 0.3775, + "nll_loss": 0.0943649560213089, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010651966294972226, + "rewards/margins": 0.3190686106681824, + "rewards/rejected": -0.319175124168396, + "step": 12809 + }, + { + "epoch": 8.858921161825727, + "grad_norm": 5.184572696685791, + "learning_rate": 6.339326878745966e-06, + "log_odds_chosen": 11.042671203613281, + "log_odds_ratio": -2.5831781385932118e-05, + "logits/chosen": -0.3577697277069092, + "logits/rejected": -0.4123019576072693, + "logps/chosen": -0.00017461557581555098, + "logps/rejected": -2.0305471420288086, + "loss": 0.2783, + "nll_loss": 0.06958448886871338, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.746155794535298e-05, + "rewards/margins": 0.20303726196289062, + "rewards/rejected": -0.20305472612380981, + "step": 12810 + }, + { + "epoch": 8.859612724757953, + "grad_norm": 3.8851094245910645, + "learning_rate": 6.335484862455817e-06, + "log_odds_chosen": 12.024246215820312, + "log_odds_ratio": -3.257915159338154e-05, + "logits/chosen": -0.0674266517162323, + "logits/rejected": -0.004043757915496826, + "logps/chosen": -0.0001970323792193085, + "logps/rejected": -3.0870656967163086, + "loss": 0.3569, + "nll_loss": 0.08922448754310608, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.970323864952661e-05, + "rewards/margins": 0.30868688225746155, + "rewards/rejected": -0.3087065815925598, + "step": 12811 + }, + { + "epoch": 8.86030428769018, + "grad_norm": 3.7420690059661865, + "learning_rate": 6.331642846165668e-06, + "log_odds_chosen": 10.842025756835938, + "log_odds_ratio": -6.420222052838653e-05, + "logits/chosen": -0.5012819766998291, + "logits/rejected": -0.5137823820114136, + "logps/chosen": -0.00014037819346413016, + "logps/rejected": -1.833478569984436, + "loss": 0.3821, + "nll_loss": 0.09550876915454865, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4037818800716195e-05, + "rewards/margins": 0.18333381414413452, + "rewards/rejected": -0.18334785103797913, + "step": 12812 + }, + { + "epoch": 8.860995850622407, + "grad_norm": 3.35178279876709, + "learning_rate": 6.327800829875519e-06, + "log_odds_chosen": 10.516077041625977, + "log_odds_ratio": -0.0001255777315236628, + "logits/chosen": -0.52434903383255, + "logits/rejected": -0.5578416585922241, + "logps/chosen": -0.00040695726056583226, + "logps/rejected": -2.4701924324035645, + "loss": 0.391, + "nll_loss": 0.0977269783616066, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.069573333254084e-05, + "rewards/margins": 0.2469785511493683, + "rewards/rejected": -0.2470192313194275, + "step": 12813 + }, + { + "epoch": 8.861687413554634, + "grad_norm": 4.4756669998168945, + "learning_rate": 6.32395881358537e-06, + "log_odds_chosen": 12.155223846435547, + "log_odds_ratio": -6.89135049469769e-06, + "logits/chosen": -0.38654935359954834, + "logits/rejected": -0.5143874287605286, + "logps/chosen": -0.000187495126738213, + "logps/rejected": -3.0094921588897705, + "loss": 0.5471, + "nll_loss": 0.136766716837883, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.874951340141706e-05, + "rewards/margins": 0.3009304702281952, + "rewards/rejected": -0.30094921588897705, + "step": 12814 + }, + { + "epoch": 8.86237897648686, + "grad_norm": 3.7717156410217285, + "learning_rate": 6.320116797295221e-06, + "log_odds_chosen": 11.97378158569336, + "log_odds_ratio": -3.298856609035283e-05, + "logits/chosen": -0.763237714767456, + "logits/rejected": -0.8728320598602295, + "logps/chosen": -9.731641330290586e-05, + "logps/rejected": -2.331000804901123, + "loss": 0.4268, + "nll_loss": 0.10668745636940002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.731641512189526e-06, + "rewards/margins": 0.2330903708934784, + "rewards/rejected": -0.23310008645057678, + "step": 12815 + }, + { + "epoch": 8.863070539419088, + "grad_norm": 3.1342639923095703, + "learning_rate": 6.316274781005072e-06, + "log_odds_chosen": 10.921533584594727, + "log_odds_ratio": -2.9228267521830276e-05, + "logits/chosen": -0.3252316415309906, + "logits/rejected": -0.15217556059360504, + "logps/chosen": -0.0001484237000113353, + "logps/rejected": -1.9739879369735718, + "loss": 0.5619, + "nll_loss": 0.14046324789524078, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4842370546830352e-05, + "rewards/margins": 0.19738394021987915, + "rewards/rejected": -0.19739878177642822, + "step": 12816 + }, + { + "epoch": 8.863762102351314, + "grad_norm": 3.5931262969970703, + "learning_rate": 6.312432764714922e-06, + "log_odds_chosen": 11.546354293823242, + "log_odds_ratio": -2.073409268632531e-05, + "logits/chosen": 0.14714400470256805, + "logits/rejected": 0.061776965856552124, + "logps/chosen": -0.0003727427392732352, + "logps/rejected": -2.9483630657196045, + "loss": 0.4372, + "nll_loss": 0.10930097848176956, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.727427974808961e-05, + "rewards/margins": 0.2947990596294403, + "rewards/rejected": -0.2948363423347473, + "step": 12817 + }, + { + "epoch": 8.864453665283541, + "grad_norm": 4.673916339874268, + "learning_rate": 6.308590748424774e-06, + "log_odds_chosen": 11.306009292602539, + "log_odds_ratio": -5.6018761824816465e-05, + "logits/chosen": -0.8934646844863892, + "logits/rejected": -1.0032732486724854, + "logps/chosen": -0.00013073139416519552, + "logps/rejected": -2.1071205139160156, + "loss": 0.559, + "nll_loss": 0.1397353559732437, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3073140507913195e-05, + "rewards/margins": 0.21069897711277008, + "rewards/rejected": -0.21071207523345947, + "step": 12818 + }, + { + "epoch": 8.865145228215768, + "grad_norm": 3.451753616333008, + "learning_rate": 6.304748732134624e-06, + "log_odds_chosen": 11.227121353149414, + "log_odds_ratio": -0.00012289262667763978, + "logits/chosen": 0.05771773308515549, + "logits/rejected": -0.11866432428359985, + "logps/chosen": -0.00041187513852491975, + "logps/rejected": -2.3950722217559814, + "loss": 0.2846, + "nll_loss": 0.07113710045814514, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.11875153076835e-05, + "rewards/margins": 0.23946604132652283, + "rewards/rejected": -0.23950722813606262, + "step": 12819 + }, + { + "epoch": 8.865836791147995, + "grad_norm": 5.063889980316162, + "learning_rate": 6.300906715844475e-06, + "log_odds_chosen": 10.330509185791016, + "log_odds_ratio": -9.231100557371974e-05, + "logits/chosen": 0.4023495018482208, + "logits/rejected": 0.2766904830932617, + "logps/chosen": -0.00024465369642712176, + "logps/rejected": -2.0109057426452637, + "loss": 0.4294, + "nll_loss": 0.10734772682189941, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4465369278914295e-05, + "rewards/margins": 0.20106610655784607, + "rewards/rejected": -0.20109057426452637, + "step": 12820 + }, + { + "epoch": 8.866528354080222, + "grad_norm": 5.208076477050781, + "learning_rate": 6.2970646995543265e-06, + "log_odds_chosen": 10.887624740600586, + "log_odds_ratio": -4.503025775193237e-05, + "logits/chosen": -0.4728430509567261, + "logits/rejected": -0.36085015535354614, + "logps/chosen": -0.0001553138135932386, + "logps/rejected": -1.9350595474243164, + "loss": 0.368, + "nll_loss": 0.0919969379901886, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5531382814515382e-05, + "rewards/margins": 0.1934904307126999, + "rewards/rejected": -0.19350595772266388, + "step": 12821 + }, + { + "epoch": 8.867219917012449, + "grad_norm": 3.113422393798828, + "learning_rate": 6.293222683264177e-06, + "log_odds_chosen": 9.91431999206543, + "log_odds_ratio": -0.0001638552057556808, + "logits/chosen": -0.30638688802719116, + "logits/rejected": -0.24746140837669373, + "logps/chosen": -0.0005692498525604606, + "logps/rejected": -1.891648769378662, + "loss": 0.3147, + "nll_loss": 0.07865750789642334, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6924989621620625e-05, + "rewards/margins": 0.1891079545021057, + "rewards/rejected": -0.1891648769378662, + "step": 12822 + }, + { + "epoch": 8.867911479944675, + "grad_norm": 7.623961925506592, + "learning_rate": 6.289380666974028e-06, + "log_odds_chosen": 11.233999252319336, + "log_odds_ratio": -1.57922477228567e-05, + "logits/chosen": -0.10346847772598267, + "logits/rejected": -0.01052796095609665, + "logps/chosen": -0.00010200871474808082, + "logps/rejected": -2.065570592880249, + "loss": 0.3413, + "nll_loss": 0.08533357083797455, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.020087074721232e-05, + "rewards/margins": 0.2065468579530716, + "rewards/rejected": -0.20655705034732819, + "step": 12823 + }, + { + "epoch": 8.868603042876902, + "grad_norm": 3.6400489807128906, + "learning_rate": 6.28553865068388e-06, + "log_odds_chosen": 12.271528244018555, + "log_odds_ratio": -3.0035564122954383e-05, + "logits/chosen": -0.13494722545146942, + "logits/rejected": -0.13900548219680786, + "logps/chosen": -0.0001601783442310989, + "logps/rejected": -2.7965166568756104, + "loss": 0.3943, + "nll_loss": 0.09856868535280228, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.601783515070565e-05, + "rewards/margins": 0.2796356678009033, + "rewards/rejected": -0.2796516716480255, + "step": 12824 + }, + { + "epoch": 8.869294605809129, + "grad_norm": 4.36489725112915, + "learning_rate": 6.28169663439373e-06, + "log_odds_chosen": 10.686090469360352, + "log_odds_ratio": -0.00018325365090277046, + "logits/chosen": -0.00666133314371109, + "logits/rejected": 0.12504035234451294, + "logps/chosen": -0.0009015874238684773, + "logps/rejected": -2.6624984741210938, + "loss": 0.7697, + "nll_loss": 0.19239458441734314, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.015874820761383e-05, + "rewards/margins": 0.26615968346595764, + "rewards/rejected": -0.2662498354911804, + "step": 12825 + }, + { + "epoch": 8.869986168741356, + "grad_norm": 4.69269323348999, + "learning_rate": 6.27785461810358e-06, + "log_odds_chosen": 11.004969596862793, + "log_odds_ratio": -0.00010248890612274408, + "logits/chosen": -0.6516998410224915, + "logits/rejected": -0.6196410059928894, + "logps/chosen": -0.00013203633716329932, + "logps/rejected": -1.8846098184585571, + "loss": 0.322, + "nll_loss": 0.08050191402435303, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3203634807723574e-05, + "rewards/margins": 0.18844778835773468, + "rewards/rejected": -0.18846097588539124, + "step": 12826 + }, + { + "epoch": 8.870677731673583, + "grad_norm": 3.3342418670654297, + "learning_rate": 6.274012601813433e-06, + "log_odds_chosen": 11.465997695922852, + "log_odds_ratio": -1.9525301468092948e-05, + "logits/chosen": -0.5683550834655762, + "logits/rejected": -0.691111147403717, + "logps/chosen": -9.050694643519819e-05, + "logps/rejected": -2.0195186138153076, + "loss": 0.391, + "nll_loss": 0.09774242341518402, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.050694643519819e-06, + "rewards/margins": 0.2019428014755249, + "rewards/rejected": -0.20195186138153076, + "step": 12827 + }, + { + "epoch": 8.87136929460581, + "grad_norm": 4.982881546020508, + "learning_rate": 6.270170585523283e-06, + "log_odds_chosen": 10.338197708129883, + "log_odds_ratio": -0.00015780533431097865, + "logits/chosen": -0.06611031293869019, + "logits/rejected": -0.04280729219317436, + "logps/chosen": -0.0048825982958078384, + "logps/rejected": -1.9869780540466309, + "loss": 0.5907, + "nll_loss": 0.14765596389770508, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00048825977137312293, + "rewards/margins": 0.19820955395698547, + "rewards/rejected": -0.19869780540466309, + "step": 12828 + }, + { + "epoch": 8.872060857538036, + "grad_norm": 4.2163848876953125, + "learning_rate": 6.266328569233133e-06, + "log_odds_chosen": 11.460411071777344, + "log_odds_ratio": -8.664889173815027e-05, + "logits/chosen": -0.27214065194129944, + "logits/rejected": -0.33706462383270264, + "logps/chosen": -0.00016919002518989146, + "logps/rejected": -2.4618775844573975, + "loss": 0.4722, + "nll_loss": 0.1180378869175911, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.691900433797855e-05, + "rewards/margins": 0.24617084860801697, + "rewards/rejected": -0.2461877465248108, + "step": 12829 + }, + { + "epoch": 8.872752420470263, + "grad_norm": 4.774320602416992, + "learning_rate": 6.262486552942985e-06, + "log_odds_chosen": 9.06969165802002, + "log_odds_ratio": -0.0004509476129896939, + "logits/chosen": -0.46754372119903564, + "logits/rejected": -0.3664955794811249, + "logps/chosen": -0.0012682451633736491, + "logps/rejected": -1.5781537294387817, + "loss": 0.5363, + "nll_loss": 0.13401776552200317, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012682451051659882, + "rewards/margins": 0.15768855810165405, + "rewards/rejected": -0.1578153669834137, + "step": 12830 + }, + { + "epoch": 8.87344398340249, + "grad_norm": 4.465846538543701, + "learning_rate": 6.258644536652836e-06, + "log_odds_chosen": 11.783326148986816, + "log_odds_ratio": -4.0657272620592266e-05, + "logits/chosen": -0.37636798620224, + "logits/rejected": -0.3685925602912903, + "logps/chosen": -0.0005404252442531288, + "logps/rejected": -3.2265686988830566, + "loss": 0.3596, + "nll_loss": 0.08990206569433212, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.404253170127049e-05, + "rewards/margins": 0.3226028382778168, + "rewards/rejected": -0.32265686988830566, + "step": 12831 + }, + { + "epoch": 8.874135546334717, + "grad_norm": 5.006585597991943, + "learning_rate": 6.2548025203626864e-06, + "log_odds_chosen": 11.334182739257812, + "log_odds_ratio": -6.983600906096399e-05, + "logits/chosen": -0.3358827829360962, + "logits/rejected": -0.37939217686653137, + "logps/chosen": -0.00030704212258569896, + "logps/rejected": -2.3556134700775146, + "loss": 0.3131, + "nll_loss": 0.07827488332986832, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.070421371376142e-05, + "rewards/margins": 0.23553064465522766, + "rewards/rejected": -0.23556135594844818, + "step": 12832 + }, + { + "epoch": 8.874827109266944, + "grad_norm": 2.919361114501953, + "learning_rate": 6.250960504072538e-06, + "log_odds_chosen": 10.745523452758789, + "log_odds_ratio": -3.151583223370835e-05, + "logits/chosen": 0.00840643048286438, + "logits/rejected": -0.03765976428985596, + "logps/chosen": -0.00019609363516792655, + "logps/rejected": -1.8906185626983643, + "loss": 0.3046, + "nll_loss": 0.07615765929222107, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9609364244388416e-05, + "rewards/margins": 0.18904224038124084, + "rewards/rejected": -0.18906186521053314, + "step": 12833 + }, + { + "epoch": 8.87551867219917, + "grad_norm": 4.858453273773193, + "learning_rate": 6.247118487782389e-06, + "log_odds_chosen": 11.026376724243164, + "log_odds_ratio": -0.0001252561341971159, + "logits/chosen": -0.1558229774236679, + "logits/rejected": -0.21528393030166626, + "logps/chosen": -0.00019361113663762808, + "logps/rejected": -1.9671748876571655, + "loss": 0.511, + "nll_loss": 0.12774495780467987, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9361112208571285e-05, + "rewards/margins": 0.1966981291770935, + "rewards/rejected": -0.1967175006866455, + "step": 12834 + }, + { + "epoch": 8.876210235131397, + "grad_norm": 3.224592447280884, + "learning_rate": 6.2432764714922395e-06, + "log_odds_chosen": 10.939685821533203, + "log_odds_ratio": -4.270240242476575e-05, + "logits/chosen": -0.08519141376018524, + "logits/rejected": -0.14094194769859314, + "logps/chosen": -0.00024868431501090527, + "logps/rejected": -2.2184906005859375, + "loss": 0.4215, + "nll_loss": 0.10537970066070557, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4868431864888407e-05, + "rewards/margins": 0.22182418406009674, + "rewards/rejected": -0.22184905409812927, + "step": 12835 + }, + { + "epoch": 8.876901798063624, + "grad_norm": 9.507604598999023, + "learning_rate": 6.23943445520209e-06, + "log_odds_chosen": 11.620631217956543, + "log_odds_ratio": -1.5148013517318759e-05, + "logits/chosen": -0.27090179920196533, + "logits/rejected": -0.4988439083099365, + "logps/chosen": -0.00016171421157196164, + "logps/rejected": -2.8916406631469727, + "loss": 1.2096, + "nll_loss": 0.30239495635032654, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6171421520994045e-05, + "rewards/margins": 0.28914791345596313, + "rewards/rejected": -0.28916406631469727, + "step": 12836 + }, + { + "epoch": 8.877593360995851, + "grad_norm": 3.323967695236206, + "learning_rate": 6.235592438911941e-06, + "log_odds_chosen": 11.173929214477539, + "log_odds_ratio": -4.231243292451836e-05, + "logits/chosen": 0.00477018766105175, + "logits/rejected": 0.00024968013167381287, + "logps/chosen": -0.00010535558249102905, + "logps/rejected": -2.1376261711120605, + "loss": 0.3183, + "nll_loss": 0.07958222180604935, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0535557521507144e-05, + "rewards/margins": 0.21375209093093872, + "rewards/rejected": -0.21376262605190277, + "step": 12837 + }, + { + "epoch": 8.878284923928078, + "grad_norm": 3.3721134662628174, + "learning_rate": 6.231750422621793e-06, + "log_odds_chosen": 10.522093772888184, + "log_odds_ratio": -0.0001202807470690459, + "logits/chosen": 0.16521060466766357, + "logits/rejected": 0.08164684474468231, + "logps/chosen": -0.00020281919569242746, + "logps/rejected": -1.9066367149353027, + "loss": 0.4108, + "nll_loss": 0.10269135236740112, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.028192102443427e-05, + "rewards/margins": 0.19064339995384216, + "rewards/rejected": -0.1906636655330658, + "step": 12838 + }, + { + "epoch": 8.878976486860305, + "grad_norm": 7.772226810455322, + "learning_rate": 6.227908406331643e-06, + "log_odds_chosen": 11.112709999084473, + "log_odds_ratio": -8.9738801761996e-05, + "logits/chosen": -0.10383787751197815, + "logits/rejected": -0.08016571402549744, + "logps/chosen": -0.00013127163401804864, + "logps/rejected": -2.2397923469543457, + "loss": 0.3076, + "nll_loss": 0.07689593732357025, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3127162674209103e-05, + "rewards/margins": 0.22396612167358398, + "rewards/rejected": -0.22397923469543457, + "step": 12839 + }, + { + "epoch": 8.879668049792532, + "grad_norm": 3.7768611907958984, + "learning_rate": 6.224066390041494e-06, + "log_odds_chosen": 11.829188346862793, + "log_odds_ratio": -2.597944694571197e-05, + "logits/chosen": -0.05140957981348038, + "logits/rejected": -0.1450553834438324, + "logps/chosen": -0.00020165527530480176, + "logps/rejected": -2.8034486770629883, + "loss": 0.5474, + "nll_loss": 0.13685037195682526, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0165527530480176e-05, + "rewards/margins": 0.28032469749450684, + "rewards/rejected": -0.2803449034690857, + "step": 12840 + }, + { + "epoch": 8.880359612724758, + "grad_norm": 2.841261863708496, + "learning_rate": 6.220224373751346e-06, + "log_odds_chosen": 10.983772277832031, + "log_odds_ratio": -0.00022373626416083425, + "logits/chosen": -0.1124921515583992, + "logits/rejected": -0.0917963832616806, + "logps/chosen": -0.00021983537590131164, + "logps/rejected": -2.258674144744873, + "loss": 0.2772, + "nll_loss": 0.06927579641342163, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.198353649873752e-05, + "rewards/margins": 0.22584544122219086, + "rewards/rejected": -0.22586743533611298, + "step": 12841 + }, + { + "epoch": 8.881051175656985, + "grad_norm": 2.910684823989868, + "learning_rate": 6.216382357461196e-06, + "log_odds_chosen": 11.847644805908203, + "log_odds_ratio": -9.29053385334555e-06, + "logits/chosen": -0.6474719643592834, + "logits/rejected": -0.6795064210891724, + "logps/chosen": -0.00010121862578671426, + "logps/rejected": -2.5809245109558105, + "loss": 0.4401, + "nll_loss": 0.11002403497695923, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0121862032974605e-05, + "rewards/margins": 0.2580823302268982, + "rewards/rejected": -0.2580924332141876, + "step": 12842 + }, + { + "epoch": 8.881742738589212, + "grad_norm": 3.9916775226593018, + "learning_rate": 6.212540341171047e-06, + "log_odds_chosen": 10.949371337890625, + "log_odds_ratio": -4.622457345249131e-05, + "logits/chosen": -0.677534282207489, + "logits/rejected": -0.6486817002296448, + "logps/chosen": -0.0003831333015114069, + "logps/rejected": -2.3778223991394043, + "loss": 0.5308, + "nll_loss": 0.13269220292568207, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.831333015114069e-05, + "rewards/margins": 0.23774391412734985, + "rewards/rejected": -0.23778222501277924, + "step": 12843 + }, + { + "epoch": 8.882434301521439, + "grad_norm": 2.8240256309509277, + "learning_rate": 6.208698324880897e-06, + "log_odds_chosen": 11.520731925964355, + "log_odds_ratio": -2.8301981728873216e-05, + "logits/chosen": -0.2797636389732361, + "logits/rejected": -0.33228588104248047, + "logps/chosen": -0.0004265240568201989, + "logps/rejected": -2.2186403274536133, + "loss": 0.333, + "nll_loss": 0.08325869590044022, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.265240568201989e-05, + "rewards/margins": 0.2218213826417923, + "rewards/rejected": -0.22186404466629028, + "step": 12844 + }, + { + "epoch": 8.883125864453666, + "grad_norm": 4.2253522872924805, + "learning_rate": 6.204856308590749e-06, + "log_odds_chosen": 10.74767780303955, + "log_odds_ratio": -0.0001074980100383982, + "logits/chosen": -0.32336893677711487, + "logits/rejected": -0.3765081763267517, + "logps/chosen": -0.0002828908618539572, + "logps/rejected": -2.316620111465454, + "loss": 0.2209, + "nll_loss": 0.055208850651979446, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.828908691299148e-05, + "rewards/margins": 0.23163369297981262, + "rewards/rejected": -0.23166200518608093, + "step": 12845 + }, + { + "epoch": 8.883817427385893, + "grad_norm": 3.6948225498199463, + "learning_rate": 6.2010142923005995e-06, + "log_odds_chosen": 11.301068305969238, + "log_odds_ratio": -0.00011821203952422366, + "logits/chosen": -0.37012338638305664, + "logits/rejected": -0.3886184096336365, + "logps/chosen": -0.0001534484763396904, + "logps/rejected": -2.148512601852417, + "loss": 0.4827, + "nll_loss": 0.12065938860177994, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5344847270171158e-05, + "rewards/margins": 0.21483591198921204, + "rewards/rejected": -0.2148512601852417, + "step": 12846 + }, + { + "epoch": 8.88450899031812, + "grad_norm": 4.01563835144043, + "learning_rate": 6.19717227601045e-06, + "log_odds_chosen": 10.94062614440918, + "log_odds_ratio": -0.00011104773147962987, + "logits/chosen": -0.49986332654953003, + "logits/rejected": -0.4930412173271179, + "logps/chosen": -0.0003276771167293191, + "logps/rejected": -2.2818984985351562, + "loss": 0.5161, + "nll_loss": 0.12901024520397186, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.276771167293191e-05, + "rewards/margins": 0.22815708816051483, + "rewards/rejected": -0.2281898558139801, + "step": 12847 + }, + { + "epoch": 8.885200553250346, + "grad_norm": 7.741686820983887, + "learning_rate": 6.193330259720302e-06, + "log_odds_chosen": 10.345017433166504, + "log_odds_ratio": -0.0004354672273620963, + "logits/chosen": -0.23076802492141724, + "logits/rejected": -0.22433820366859436, + "logps/chosen": -0.001394479419104755, + "logps/rejected": -2.4289042949676514, + "loss": 0.3773, + "nll_loss": 0.0942818820476532, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013944793317932636, + "rewards/margins": 0.24275097250938416, + "rewards/rejected": -0.24289043247699738, + "step": 12848 + }, + { + "epoch": 8.885892116182573, + "grad_norm": 4.3841328620910645, + "learning_rate": 6.1894882434301526e-06, + "log_odds_chosen": 11.631941795349121, + "log_odds_ratio": -1.916138717206195e-05, + "logits/chosen": 0.07189624011516571, + "logits/rejected": 0.013653043657541275, + "logps/chosen": -0.00021943646424915642, + "logps/rejected": -3.1260085105895996, + "loss": 0.4685, + "nll_loss": 0.11713487654924393, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1943644242128357e-05, + "rewards/margins": 0.312578946352005, + "rewards/rejected": -0.31260088086128235, + "step": 12849 + }, + { + "epoch": 8.8865836791148, + "grad_norm": 3.0561251640319824, + "learning_rate": 6.185646227140003e-06, + "log_odds_chosen": 11.04151725769043, + "log_odds_ratio": -0.00019640347454696894, + "logits/chosen": -0.4556815028190613, + "logits/rejected": -0.4344314634799957, + "logps/chosen": -0.0001518328208476305, + "logps/rejected": -2.3554883003234863, + "loss": 0.384, + "nll_loss": 0.09599006175994873, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.518328190286411e-05, + "rewards/margins": 0.23553365468978882, + "rewards/rejected": -0.23554883897304535, + "step": 12850 + }, + { + "epoch": 8.887275242047027, + "grad_norm": 3.5416200160980225, + "learning_rate": 6.181804210849854e-06, + "log_odds_chosen": 11.330231666564941, + "log_odds_ratio": -0.0001224998850375414, + "logits/chosen": -0.487703412771225, + "logits/rejected": -0.4873879551887512, + "logps/chosen": -0.00037789743510074914, + "logps/rejected": -2.9198246002197266, + "loss": 0.3969, + "nll_loss": 0.09920765459537506, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.778974132728763e-05, + "rewards/margins": 0.2919447124004364, + "rewards/rejected": -0.2919824719429016, + "step": 12851 + }, + { + "epoch": 8.887966804979254, + "grad_norm": 3.288649082183838, + "learning_rate": 6.177962194559706e-06, + "log_odds_chosen": 11.113425254821777, + "log_odds_ratio": -0.00010231428314000368, + "logits/chosen": -0.18255461752414703, + "logits/rejected": -0.15630057454109192, + "logps/chosen": -0.0004620938270818442, + "logps/rejected": -2.818892240524292, + "loss": 0.3652, + "nll_loss": 0.09128084778785706, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.62093812529929e-05, + "rewards/margins": 0.28184300661087036, + "rewards/rejected": -0.2818892300128937, + "step": 12852 + }, + { + "epoch": 8.88865836791148, + "grad_norm": 2.824305534362793, + "learning_rate": 6.1741201782695556e-06, + "log_odds_chosen": 11.38699722290039, + "log_odds_ratio": -2.6646030164556578e-05, + "logits/chosen": 0.15093719959259033, + "logits/rejected": 0.32247185707092285, + "logps/chosen": -0.000268961041001603, + "logps/rejected": -2.542618751525879, + "loss": 0.4033, + "nll_loss": 0.10083185136318207, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6896104827756062e-05, + "rewards/margins": 0.2542349696159363, + "rewards/rejected": -0.25426188111305237, + "step": 12853 + }, + { + "epoch": 8.889349930843707, + "grad_norm": 3.5842673778533936, + "learning_rate": 6.170278161979407e-06, + "log_odds_chosen": 10.66506290435791, + "log_odds_ratio": -5.382444942370057e-05, + "logits/chosen": 0.23136094212532043, + "logits/rejected": 0.07855528593063354, + "logps/chosen": -0.00024186752852983773, + "logps/rejected": -2.076673984527588, + "loss": 0.4375, + "nll_loss": 0.10937762260437012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4186752852983773e-05, + "rewards/margins": 0.20764324069023132, + "rewards/rejected": -0.20766742527484894, + "step": 12854 + }, + { + "epoch": 8.890041493775934, + "grad_norm": 4.17619514465332, + "learning_rate": 6.166436145689258e-06, + "log_odds_chosen": 11.726327896118164, + "log_odds_ratio": -1.552937646920327e-05, + "logits/chosen": -0.06163576617836952, + "logits/rejected": -0.042286425828933716, + "logps/chosen": -0.00010107838170370087, + "logps/rejected": -2.2582149505615234, + "loss": 0.5613, + "nll_loss": 0.14033466577529907, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0107837624673266e-05, + "rewards/margins": 0.22581139206886292, + "rewards/rejected": -0.22582149505615234, + "step": 12855 + }, + { + "epoch": 8.89073305670816, + "grad_norm": 4.036785125732422, + "learning_rate": 6.162594129399109e-06, + "log_odds_chosen": 10.615226745605469, + "log_odds_ratio": -6.379517435561866e-05, + "logits/chosen": -0.08870018273591995, + "logits/rejected": -0.16170233488082886, + "logps/chosen": -0.00035349337849766016, + "logps/rejected": -2.0830063819885254, + "loss": 0.4075, + "nll_loss": 0.10186357796192169, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5349337849766016e-05, + "rewards/margins": 0.2082652598619461, + "rewards/rejected": -0.2083006203174591, + "step": 12856 + }, + { + "epoch": 8.891424619640388, + "grad_norm": 2.970618486404419, + "learning_rate": 6.15875211310896e-06, + "log_odds_chosen": 10.837800979614258, + "log_odds_ratio": -8.722803613636643e-05, + "logits/chosen": -0.0712822899222374, + "logits/rejected": -0.06109131500124931, + "logps/chosen": -0.0002337824844289571, + "logps/rejected": -2.4234533309936523, + "loss": 0.3319, + "nll_loss": 0.08297805488109589, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.337824844289571e-05, + "rewards/margins": 0.24232198297977448, + "rewards/rejected": -0.24234536290168762, + "step": 12857 + }, + { + "epoch": 8.892116182572614, + "grad_norm": 3.3171956539154053, + "learning_rate": 6.154910096818811e-06, + "log_odds_chosen": 12.331867218017578, + "log_odds_ratio": -8.634147889097221e-06, + "logits/chosen": 0.17678335309028625, + "logits/rejected": 0.09244874119758606, + "logps/chosen": -0.00012825954763684422, + "logps/rejected": -3.29720401763916, + "loss": 0.3602, + "nll_loss": 0.09005635976791382, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2825955309381243e-05, + "rewards/margins": 0.3297075927257538, + "rewards/rejected": -0.3297204077243805, + "step": 12858 + }, + { + "epoch": 8.892807745504841, + "grad_norm": 3.4692952632904053, + "learning_rate": 6.151068080528662e-06, + "log_odds_chosen": 10.557546615600586, + "log_odds_ratio": -3.8898957427591085e-05, + "logits/chosen": -0.34669697284698486, + "logits/rejected": -0.29970356822013855, + "logps/chosen": -0.000205686577828601, + "logps/rejected": -1.8281556367874146, + "loss": 0.3759, + "nll_loss": 0.09398093819618225, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0568655600072816e-05, + "rewards/margins": 0.18279501795768738, + "rewards/rejected": -0.1828155666589737, + "step": 12859 + }, + { + "epoch": 8.893499308437068, + "grad_norm": 3.5237081050872803, + "learning_rate": 6.1472260642385125e-06, + "log_odds_chosen": 11.03484058380127, + "log_odds_ratio": -8.832294406602159e-05, + "logits/chosen": 0.07353997230529785, + "logits/rejected": 0.06155790761113167, + "logps/chosen": -0.0001543796097394079, + "logps/rejected": -1.8885478973388672, + "loss": 0.3609, + "nll_loss": 0.09021463990211487, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.543796133773867e-05, + "rewards/margins": 0.18883934617042542, + "rewards/rejected": -0.18885478377342224, + "step": 12860 + }, + { + "epoch": 8.894190871369295, + "grad_norm": 4.468094348907471, + "learning_rate": 6.143384047948363e-06, + "log_odds_chosen": 11.360898971557617, + "log_odds_ratio": -2.829604272847064e-05, + "logits/chosen": -0.1274232566356659, + "logits/rejected": -0.1843823790550232, + "logps/chosen": -5.869750748388469e-05, + "logps/rejected": -1.824514627456665, + "loss": 0.39, + "nll_loss": 0.0974905714392662, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.869750566489529e-06, + "rewards/margins": 0.18244561553001404, + "rewards/rejected": -0.18245148658752441, + "step": 12861 + }, + { + "epoch": 8.894882434301522, + "grad_norm": 4.219329357147217, + "learning_rate": 6.139542031658215e-06, + "log_odds_chosen": 12.523521423339844, + "log_odds_ratio": -6.953141564736143e-06, + "logits/chosen": -0.1573798656463623, + "logits/rejected": -0.21202057600021362, + "logps/chosen": -0.0001332549873040989, + "logps/rejected": -3.31657338142395, + "loss": 0.3527, + "nll_loss": 0.08818377554416656, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3325498002814129e-05, + "rewards/margins": 0.3316440284252167, + "rewards/rejected": -0.331657350063324, + "step": 12862 + }, + { + "epoch": 8.895573997233749, + "grad_norm": 2.5606212615966797, + "learning_rate": 6.135700015368066e-06, + "log_odds_chosen": 11.821147918701172, + "log_odds_ratio": -2.5821833332884125e-05, + "logits/chosen": -0.3105715215206146, + "logits/rejected": -0.3597787022590637, + "logps/chosen": -2.3031490854918957e-05, + "logps/rejected": -1.2378180027008057, + "loss": 0.2508, + "nll_loss": 0.06269891560077667, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3031489035929553e-06, + "rewards/margins": 0.12377950549125671, + "rewards/rejected": -0.12378180772066116, + "step": 12863 + }, + { + "epoch": 8.896265560165975, + "grad_norm": 3.686124324798584, + "learning_rate": 6.131857999077916e-06, + "log_odds_chosen": 12.19543743133545, + "log_odds_ratio": -4.4658067054115236e-05, + "logits/chosen": -0.023852862417697906, + "logits/rejected": -0.09455503523349762, + "logps/chosen": -0.0002681456971913576, + "logps/rejected": -3.197145700454712, + "loss": 0.4735, + "nll_loss": 0.1183595210313797, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.681456862774212e-05, + "rewards/margins": 0.31968778371810913, + "rewards/rejected": -0.31971457600593567, + "step": 12864 + }, + { + "epoch": 8.896957123098202, + "grad_norm": 3.9430251121520996, + "learning_rate": 6.128015982787767e-06, + "log_odds_chosen": 12.069925308227539, + "log_odds_ratio": -1.630558108445257e-05, + "logits/chosen": -0.44029539823532104, + "logits/rejected": -0.5152474641799927, + "logps/chosen": -0.00014673490659333766, + "logps/rejected": -3.1295437812805176, + "loss": 0.2993, + "nll_loss": 0.07482878863811493, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4673489204142243e-05, + "rewards/margins": 0.31293970346450806, + "rewards/rejected": -0.3129543364048004, + "step": 12865 + }, + { + "epoch": 8.89764868603043, + "grad_norm": 4.9491353034973145, + "learning_rate": 6.124173966497619e-06, + "log_odds_chosen": 11.404975891113281, + "log_odds_ratio": -7.579707744298503e-05, + "logits/chosen": -0.14094297587871552, + "logits/rejected": -0.07794803380966187, + "logps/chosen": -0.0006937507423572242, + "logps/rejected": -3.015080690383911, + "loss": 0.3813, + "nll_loss": 0.09531792253255844, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.937507714610547e-05, + "rewards/margins": 0.30143871903419495, + "rewards/rejected": -0.3015080690383911, + "step": 12866 + }, + { + "epoch": 8.898340248962656, + "grad_norm": 4.752613067626953, + "learning_rate": 6.120331950207469e-06, + "log_odds_chosen": 10.431466102600098, + "log_odds_ratio": -0.0002222591283498332, + "logits/chosen": -0.10291004180908203, + "logits/rejected": -0.18914756178855896, + "logps/chosen": -0.0002860078529920429, + "logps/rejected": -2.165668249130249, + "loss": 0.5033, + "nll_loss": 0.12581023573875427, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8600787118193693e-05, + "rewards/margins": 0.2165382206439972, + "rewards/rejected": -0.216566801071167, + "step": 12867 + }, + { + "epoch": 8.899031811894883, + "grad_norm": 3.4255270957946777, + "learning_rate": 6.11648993391732e-06, + "log_odds_chosen": 11.101449966430664, + "log_odds_ratio": -0.00017685459170024842, + "logits/chosen": -0.3344976305961609, + "logits/rejected": -0.224727064371109, + "logps/chosen": -0.00043847967754118145, + "logps/rejected": -2.806305408477783, + "loss": 0.3789, + "nll_loss": 0.09469691663980484, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.384796193335205e-05, + "rewards/margins": 0.28058671951293945, + "rewards/rejected": -0.28063055872917175, + "step": 12868 + }, + { + "epoch": 8.89972337482711, + "grad_norm": 3.2223756313323975, + "learning_rate": 6.112647917627171e-06, + "log_odds_chosen": 10.269891738891602, + "log_odds_ratio": -0.0005653017433360219, + "logits/chosen": 0.10738126188516617, + "logits/rejected": 0.05207332223653793, + "logps/chosen": -0.0010791353415697813, + "logps/rejected": -2.058807373046875, + "loss": 0.3766, + "nll_loss": 0.094081811606884, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010791353270178661, + "rewards/margins": 0.20577283203601837, + "rewards/rejected": -0.20588073134422302, + "step": 12869 + }, + { + "epoch": 8.900414937759336, + "grad_norm": 4.243490219116211, + "learning_rate": 6.108805901337022e-06, + "log_odds_chosen": 10.89694595336914, + "log_odds_ratio": -5.5372696806443855e-05, + "logits/chosen": 0.15985409915447235, + "logits/rejected": 0.021556168794631958, + "logps/chosen": -0.00019277591491118073, + "logps/rejected": -1.7762912511825562, + "loss": 0.4085, + "nll_loss": 0.10212212800979614, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9277591491118073e-05, + "rewards/margins": 0.1776098608970642, + "rewards/rejected": -0.17762914299964905, + "step": 12870 + }, + { + "epoch": 8.901106500691563, + "grad_norm": 4.758657932281494, + "learning_rate": 6.104963885046873e-06, + "log_odds_chosen": 10.790678977966309, + "log_odds_ratio": -0.0005490690236911178, + "logits/chosen": 0.3262699544429779, + "logits/rejected": 0.3296666145324707, + "logps/chosen": -0.0004946886328980327, + "logps/rejected": -1.9456416368484497, + "loss": 0.4463, + "nll_loss": 0.11152751743793488, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9468868382973596e-05, + "rewards/margins": 0.1945146918296814, + "rewards/rejected": -0.19456416368484497, + "step": 12871 + }, + { + "epoch": 8.90179806362379, + "grad_norm": 8.041678428649902, + "learning_rate": 6.101121868756724e-06, + "log_odds_chosen": 9.12094783782959, + "log_odds_ratio": -0.3240659534931183, + "logits/chosen": -0.3996647596359253, + "logits/rejected": -0.38311266899108887, + "logps/chosen": -0.05655330419540405, + "logps/rejected": -1.757213830947876, + "loss": 0.4893, + "nll_loss": 0.08991822600364685, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.00565533060580492, + "rewards/margins": 0.17006604373455048, + "rewards/rejected": -0.17572136223316193, + "step": 12872 + }, + { + "epoch": 8.902489626556017, + "grad_norm": 3.2363626956939697, + "learning_rate": 6.097279852466575e-06, + "log_odds_chosen": 10.426342010498047, + "log_odds_ratio": -0.00019338764832355082, + "logits/chosen": -0.3495548963546753, + "logits/rejected": -0.34510067105293274, + "logps/chosen": -0.00038078470970503986, + "logps/rejected": -1.8046441078186035, + "loss": 0.3205, + "nll_loss": 0.08011390268802643, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.807847315329127e-05, + "rewards/margins": 0.18042632937431335, + "rewards/rejected": -0.18046441674232483, + "step": 12873 + }, + { + "epoch": 8.903181189488244, + "grad_norm": 3.247476577758789, + "learning_rate": 6.0934378361764255e-06, + "log_odds_chosen": 12.517592430114746, + "log_odds_ratio": -2.2141441149869934e-05, + "logits/chosen": -0.9256491661071777, + "logits/rejected": -1.013659119606018, + "logps/chosen": -9.961918112821877e-05, + "logps/rejected": -3.2004740238189697, + "loss": 0.4517, + "nll_loss": 0.11293157190084457, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.961918294720817e-06, + "rewards/margins": 0.3200374245643616, + "rewards/rejected": -0.32004737854003906, + "step": 12874 + }, + { + "epoch": 8.90387275242047, + "grad_norm": 3.782608985900879, + "learning_rate": 6.089595819886277e-06, + "log_odds_chosen": 9.190512657165527, + "log_odds_ratio": -0.0004990790039300919, + "logits/chosen": -0.016513854265213013, + "logits/rejected": -0.03796660900115967, + "logps/chosen": -0.005915326066315174, + "logps/rejected": -2.5319509506225586, + "loss": 0.3965, + "nll_loss": 0.09908261150121689, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0005915326182730496, + "rewards/margins": 0.25260356068611145, + "rewards/rejected": -0.2531951069831848, + "step": 12875 + }, + { + "epoch": 8.904564315352697, + "grad_norm": 3.9237937927246094, + "learning_rate": 6.085753803596127e-06, + "log_odds_chosen": 11.165157318115234, + "log_odds_ratio": -0.00014351973368320614, + "logits/chosen": -0.1661158800125122, + "logits/rejected": -0.18346044421195984, + "logps/chosen": -0.00042894965736195445, + "logps/rejected": -2.540163278579712, + "loss": 0.4959, + "nll_loss": 0.12395624816417694, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.289496428100392e-05, + "rewards/margins": 0.25397342443466187, + "rewards/rejected": -0.25401633977890015, + "step": 12876 + }, + { + "epoch": 8.905255878284924, + "grad_norm": 5.909702777862549, + "learning_rate": 6.081911787305979e-06, + "log_odds_chosen": 9.985353469848633, + "log_odds_ratio": -0.00014786752581130713, + "logits/chosen": -0.551906168460846, + "logits/rejected": -0.3992077708244324, + "logps/chosen": -0.00029689978691749275, + "logps/rejected": -1.4320263862609863, + "loss": 0.5359, + "nll_loss": 0.1339660882949829, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9689979783142917e-05, + "rewards/margins": 0.143172949552536, + "rewards/rejected": -0.14320263266563416, + "step": 12877 + }, + { + "epoch": 8.905947441217151, + "grad_norm": 3.7521986961364746, + "learning_rate": 6.078069771015829e-06, + "log_odds_chosen": 11.077217102050781, + "log_odds_ratio": -6.529257370857522e-05, + "logits/chosen": -0.3501136004924774, + "logits/rejected": -0.46925801038742065, + "logps/chosen": -0.00012800595141015947, + "logps/rejected": -2.1673057079315186, + "loss": 0.361, + "nll_loss": 0.09023625403642654, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2800595868611708e-05, + "rewards/margins": 0.21671777963638306, + "rewards/rejected": -0.21673056483268738, + "step": 12878 + }, + { + "epoch": 8.906639004149378, + "grad_norm": 4.921710968017578, + "learning_rate": 6.07422775472568e-06, + "log_odds_chosen": 10.498296737670898, + "log_odds_ratio": -0.00024876854149624705, + "logits/chosen": -0.10423548519611359, + "logits/rejected": -0.07139390707015991, + "logps/chosen": -0.0006196919712238014, + "logps/rejected": -1.8094336986541748, + "loss": 0.6675, + "nll_loss": 0.16684389114379883, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.19692073087208e-05, + "rewards/margins": 0.18088141083717346, + "rewards/rejected": -0.18094336986541748, + "step": 12879 + }, + { + "epoch": 8.907330567081605, + "grad_norm": 3.8833465576171875, + "learning_rate": 6.070385738435532e-06, + "log_odds_chosen": 10.646595001220703, + "log_odds_ratio": -4.954760515829548e-05, + "logits/chosen": -0.4792764484882355, + "logits/rejected": -0.5197547674179077, + "logps/chosen": -0.00021351524628698826, + "logps/rejected": -2.2753090858459473, + "loss": 0.3743, + "nll_loss": 0.09356006234884262, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1351523173507303e-05, + "rewards/margins": 0.22750955820083618, + "rewards/rejected": -0.22753089666366577, + "step": 12880 + }, + { + "epoch": 8.908022130013832, + "grad_norm": 4.093209743499756, + "learning_rate": 6.066543722145382e-06, + "log_odds_chosen": 10.409040451049805, + "log_odds_ratio": -0.00010184202983509749, + "logits/chosen": -0.47333845496177673, + "logits/rejected": -0.42190152406692505, + "logps/chosen": -0.00014664071204606444, + "logps/rejected": -1.6970136165618896, + "loss": 0.3442, + "nll_loss": 0.086033895611763, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4664070477010682e-05, + "rewards/margins": 0.1696867197751999, + "rewards/rejected": -0.16970139741897583, + "step": 12881 + }, + { + "epoch": 8.908713692946058, + "grad_norm": 4.007850646972656, + "learning_rate": 6.062701705855233e-06, + "log_odds_chosen": 10.015172958374023, + "log_odds_ratio": -0.00034102267818525434, + "logits/chosen": -0.16968387365341187, + "logits/rejected": -0.1261231005191803, + "logps/chosen": -0.0008383641252294183, + "logps/rejected": -2.1441397666931152, + "loss": 0.471, + "nll_loss": 0.11772003024816513, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.383641397813335e-05, + "rewards/margins": 0.21433015167713165, + "rewards/rejected": -0.21441400051116943, + "step": 12882 + }, + { + "epoch": 8.909405255878285, + "grad_norm": 3.5645110607147217, + "learning_rate": 6.058859689565084e-06, + "log_odds_chosen": 10.257688522338867, + "log_odds_ratio": -9.336881339550018e-05, + "logits/chosen": -0.2140951156616211, + "logits/rejected": -0.1839694380760193, + "logps/chosen": -0.00048242363845929503, + "logps/rejected": -1.7746849060058594, + "loss": 0.421, + "nll_loss": 0.10525010526180267, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8242360207950696e-05, + "rewards/margins": 0.17742027342319489, + "rewards/rejected": -0.17746850848197937, + "step": 12883 + }, + { + "epoch": 8.910096818810512, + "grad_norm": 4.715599060058594, + "learning_rate": 6.055017673274935e-06, + "log_odds_chosen": 11.176240921020508, + "log_odds_ratio": -0.00010343264148104936, + "logits/chosen": 0.056480832397937775, + "logits/rejected": 0.10483216494321823, + "logps/chosen": -0.0003942837647628039, + "logps/rejected": -2.299161195755005, + "loss": 0.39, + "nll_loss": 0.09748789668083191, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.942838156945072e-05, + "rewards/margins": 0.22987669706344604, + "rewards/rejected": -0.22991611063480377, + "step": 12884 + }, + { + "epoch": 8.910788381742739, + "grad_norm": 3.4167327880859375, + "learning_rate": 6.051175656984786e-06, + "log_odds_chosen": 11.99148941040039, + "log_odds_ratio": -0.00018259882926940918, + "logits/chosen": -0.8361790776252747, + "logits/rejected": -0.8355551958084106, + "logps/chosen": -0.0002680736070033163, + "logps/rejected": -2.5528435707092285, + "loss": 0.2954, + "nll_loss": 0.07383642345666885, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6807359972735867e-05, + "rewards/margins": 0.2552575469017029, + "rewards/rejected": -0.2552843689918518, + "step": 12885 + }, + { + "epoch": 8.911479944674966, + "grad_norm": 3.4526193141937256, + "learning_rate": 6.047333640694637e-06, + "log_odds_chosen": 11.774279594421387, + "log_odds_ratio": -1.844509824877605e-05, + "logits/chosen": -0.26306766271591187, + "logits/rejected": -0.4063612222671509, + "logps/chosen": -0.000151776141137816, + "logps/rejected": -2.588733673095703, + "loss": 0.3469, + "nll_loss": 0.08673368394374847, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.517761393188266e-05, + "rewards/margins": 0.25885817408561707, + "rewards/rejected": -0.2588733434677124, + "step": 12886 + }, + { + "epoch": 8.912171507607193, + "grad_norm": 3.6353368759155273, + "learning_rate": 6.043491624404488e-06, + "log_odds_chosen": 11.046255111694336, + "log_odds_ratio": -2.924896944023203e-05, + "logits/chosen": -0.6610513925552368, + "logits/rejected": -0.7078690528869629, + "logps/chosen": -0.000441804644651711, + "logps/rejected": -2.542691230773926, + "loss": 0.3444, + "nll_loss": 0.08609253168106079, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.418046592036262e-05, + "rewards/margins": 0.254224956035614, + "rewards/rejected": -0.2542691230773926, + "step": 12887 + }, + { + "epoch": 8.91286307053942, + "grad_norm": 5.413662910461426, + "learning_rate": 6.0396496081143386e-06, + "log_odds_chosen": 11.483522415161133, + "log_odds_ratio": -1.6104526366689242e-05, + "logits/chosen": -0.2953591048717499, + "logits/rejected": -0.28520074486732483, + "logps/chosen": -0.00025644036941230297, + "logps/rejected": -2.4214723110198975, + "loss": 0.554, + "nll_loss": 0.13850894570350647, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.564403803262394e-05, + "rewards/margins": 0.2421215921640396, + "rewards/rejected": -0.24214723706245422, + "step": 12888 + }, + { + "epoch": 8.913554633471646, + "grad_norm": 4.65927791595459, + "learning_rate": 6.03580759182419e-06, + "log_odds_chosen": 11.934712409973145, + "log_odds_ratio": -1.6666734154568985e-05, + "logits/chosen": -0.4538784921169281, + "logits/rejected": -0.5382453799247742, + "logps/chosen": -0.00015001199790276587, + "logps/rejected": -2.846168279647827, + "loss": 0.3555, + "nll_loss": 0.0888776034116745, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.500120106356917e-05, + "rewards/margins": 0.2846018373966217, + "rewards/rejected": -0.2846168279647827, + "step": 12889 + }, + { + "epoch": 8.914246196403873, + "grad_norm": 3.734205961227417, + "learning_rate": 6.03196557553404e-06, + "log_odds_chosen": 9.649690628051758, + "log_odds_ratio": -0.0001534527982585132, + "logits/chosen": -0.6405388116836548, + "logits/rejected": -0.6261313557624817, + "logps/chosen": -0.0006668938440270722, + "logps/rejected": -1.7804850339889526, + "loss": 0.3567, + "nll_loss": 0.08914894610643387, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.668939022347331e-05, + "rewards/margins": 0.17798182368278503, + "rewards/rejected": -0.1780485212802887, + "step": 12890 + }, + { + "epoch": 8.9149377593361, + "grad_norm": 4.171419143676758, + "learning_rate": 6.028123559243892e-06, + "log_odds_chosen": 10.836984634399414, + "log_odds_ratio": -8.64302710397169e-05, + "logits/chosen": -0.40992042422294617, + "logits/rejected": -0.5382879972457886, + "logps/chosen": -0.000171576117281802, + "logps/rejected": -1.7506659030914307, + "loss": 0.5129, + "nll_loss": 0.12822216749191284, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.715761209197808e-05, + "rewards/margins": 0.17504942417144775, + "rewards/rejected": -0.17506657540798187, + "step": 12891 + }, + { + "epoch": 8.915629322268327, + "grad_norm": 3.3719029426574707, + "learning_rate": 6.024281542953742e-06, + "log_odds_chosen": 11.46414852142334, + "log_odds_ratio": -6.169763946672902e-05, + "logits/chosen": -0.3673262894153595, + "logits/rejected": -0.3972077965736389, + "logps/chosen": -0.0002595953119453043, + "logps/rejected": -2.636369228363037, + "loss": 0.3731, + "nll_loss": 0.09327857196331024, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5959534468711354e-05, + "rewards/margins": 0.26361095905303955, + "rewards/rejected": -0.26363691687583923, + "step": 12892 + }, + { + "epoch": 8.916320885200554, + "grad_norm": 5.283071041107178, + "learning_rate": 6.020439526663593e-06, + "log_odds_chosen": 11.639951705932617, + "log_odds_ratio": -2.1493771782843396e-05, + "logits/chosen": -0.12732785940170288, + "logits/rejected": -0.19323810935020447, + "logps/chosen": -0.0001850973058026284, + "logps/rejected": -2.5360217094421387, + "loss": 0.4399, + "nll_loss": 0.10997996479272842, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.850973058026284e-05, + "rewards/margins": 0.2535836696624756, + "rewards/rejected": -0.25360217690467834, + "step": 12893 + }, + { + "epoch": 8.91701244813278, + "grad_norm": 2.961632013320923, + "learning_rate": 6.016597510373445e-06, + "log_odds_chosen": 10.04461669921875, + "log_odds_ratio": -0.0001737783313728869, + "logits/chosen": -0.37068772315979004, + "logits/rejected": -0.334428995847702, + "logps/chosen": -0.0004964639665558934, + "logps/rejected": -1.6151020526885986, + "loss": 0.2749, + "nll_loss": 0.0687069222331047, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9646398110780865e-05, + "rewards/margins": 0.16146056354045868, + "rewards/rejected": -0.16151021420955658, + "step": 12894 + }, + { + "epoch": 8.917704011065007, + "grad_norm": 3.3943188190460205, + "learning_rate": 6.012755494083295e-06, + "log_odds_chosen": 11.50641918182373, + "log_odds_ratio": -5.9767960919998586e-05, + "logits/chosen": -0.20371156930923462, + "logits/rejected": -0.21763645112514496, + "logps/chosen": -0.00039010701584629714, + "logps/rejected": -2.814880132675171, + "loss": 0.4541, + "nll_loss": 0.11352216452360153, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9010701584629714e-05, + "rewards/margins": 0.28144901990890503, + "rewards/rejected": -0.28148800134658813, + "step": 12895 + }, + { + "epoch": 8.918395573997234, + "grad_norm": 4.094331741333008, + "learning_rate": 6.008913477793146e-06, + "log_odds_chosen": 11.03071403503418, + "log_odds_ratio": -6.582721835002303e-05, + "logits/chosen": -0.2888537049293518, + "logits/rejected": -0.36299294233322144, + "logps/chosen": -0.00029856865876354277, + "logps/rejected": -2.29645037651062, + "loss": 0.4599, + "nll_loss": 0.11497573554515839, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.985686660395004e-05, + "rewards/margins": 0.2296151965856552, + "rewards/rejected": -0.2296450436115265, + "step": 12896 + }, + { + "epoch": 8.91908713692946, + "grad_norm": 3.5496981143951416, + "learning_rate": 6.005071461502997e-06, + "log_odds_chosen": 11.905342102050781, + "log_odds_ratio": -1.7123305951827206e-05, + "logits/chosen": -0.5121790170669556, + "logits/rejected": -0.6361386179924011, + "logps/chosen": -0.0001013822911772877, + "logps/rejected": -2.4408626556396484, + "loss": 0.3271, + "nll_loss": 0.08177856355905533, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.013822929962771e-05, + "rewards/margins": 0.24407611787319183, + "rewards/rejected": -0.24408625066280365, + "step": 12897 + }, + { + "epoch": 8.919778699861688, + "grad_norm": 4.878851890563965, + "learning_rate": 6.001229445212848e-06, + "log_odds_chosen": 11.991606712341309, + "log_odds_ratio": -1.579675154061988e-05, + "logits/chosen": -0.5167713165283203, + "logits/rejected": -0.4996938705444336, + "logps/chosen": -9.84660437097773e-05, + "logps/rejected": -2.709446430206299, + "loss": 0.4521, + "nll_loss": 0.11301855742931366, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.84660437097773e-06, + "rewards/margins": 0.2709348201751709, + "rewards/rejected": -0.27094465494155884, + "step": 12898 + }, + { + "epoch": 8.920470262793915, + "grad_norm": 2.981738805770874, + "learning_rate": 5.9973874289226985e-06, + "log_odds_chosen": 12.06153678894043, + "log_odds_ratio": -1.4296781955636106e-05, + "logits/chosen": -0.5645366907119751, + "logits/rejected": -0.4843648672103882, + "logps/chosen": -7.617931987624615e-05, + "logps/rejected": -2.453253746032715, + "loss": 0.2887, + "nll_loss": 0.07217483222484589, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.617932169523556e-06, + "rewards/margins": 0.2453177571296692, + "rewards/rejected": -0.24532538652420044, + "step": 12899 + }, + { + "epoch": 8.921161825726141, + "grad_norm": 4.18194580078125, + "learning_rate": 5.99354541263255e-06, + "log_odds_chosen": 10.102523803710938, + "log_odds_ratio": -0.00013515673344954848, + "logits/chosen": -0.22662414610385895, + "logits/rejected": -0.37085866928100586, + "logps/chosen": -0.00022467051167041063, + "logps/rejected": -1.9578008651733398, + "loss": 0.6993, + "nll_loss": 0.17481519281864166, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2467052986030467e-05, + "rewards/margins": 0.19575762748718262, + "rewards/rejected": -0.19578008353710175, + "step": 12900 + }, + { + "epoch": 8.921853388658368, + "grad_norm": 3.408759832382202, + "learning_rate": 5.989703396342401e-06, + "log_odds_chosen": 11.4803466796875, + "log_odds_ratio": -2.0814597519347444e-05, + "logits/chosen": -0.6959565877914429, + "logits/rejected": -0.7700467705726624, + "logps/chosen": -0.00016696771490387619, + "logps/rejected": -2.2839138507843018, + "loss": 0.4165, + "nll_loss": 0.10413195192813873, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.669677294557914e-05, + "rewards/margins": 0.2283746898174286, + "rewards/rejected": -0.2283913791179657, + "step": 12901 + }, + { + "epoch": 8.922544951590595, + "grad_norm": 3.7199792861938477, + "learning_rate": 5.985861380052252e-06, + "log_odds_chosen": 12.063653945922852, + "log_odds_ratio": -1.393444836139679e-05, + "logits/chosen": -0.14315587282180786, + "logits/rejected": -0.15993037819862366, + "logps/chosen": -0.00010108323476742953, + "logps/rejected": -2.712588310241699, + "loss": 0.4464, + "nll_loss": 0.11160380393266678, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0108323294844013e-05, + "rewards/margins": 0.2712487280368805, + "rewards/rejected": -0.2712588310241699, + "step": 12902 + }, + { + "epoch": 8.923236514522822, + "grad_norm": 2.6532492637634277, + "learning_rate": 5.982019363762103e-06, + "log_odds_chosen": 11.91860580444336, + "log_odds_ratio": -0.0002539358683861792, + "logits/chosen": -0.8610085844993591, + "logits/rejected": -0.6951379776000977, + "logps/chosen": -0.0005537466495297849, + "logps/rejected": -2.798727512359619, + "loss": 0.2503, + "nll_loss": 0.06255386024713516, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.537466495297849e-05, + "rewards/margins": 0.2798174023628235, + "rewards/rejected": -0.2798727750778198, + "step": 12903 + }, + { + "epoch": 8.923928077455049, + "grad_norm": 3.5159571170806885, + "learning_rate": 5.978177347471953e-06, + "log_odds_chosen": 10.942304611206055, + "log_odds_ratio": -2.9865241231163964e-05, + "logits/chosen": -0.19039902091026306, + "logits/rejected": -0.3441365659236908, + "logps/chosen": -0.00013759138528257608, + "logps/rejected": -2.0489914417266846, + "loss": 0.3204, + "nll_loss": 0.08008623123168945, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.375913961965125e-05, + "rewards/margins": 0.20488540828227997, + "rewards/rejected": -0.2048991620540619, + "step": 12904 + }, + { + "epoch": 8.924619640387276, + "grad_norm": 4.046666622161865, + "learning_rate": 5.974335331181805e-06, + "log_odds_chosen": 10.682632446289062, + "log_odds_ratio": -0.00018207883113063872, + "logits/chosen": -0.4414520859718323, + "logits/rejected": -0.4927637279033661, + "logps/chosen": -0.00041065309778787196, + "logps/rejected": -2.2062034606933594, + "loss": 0.4563, + "nll_loss": 0.1140667051076889, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.106530832359567e-05, + "rewards/margins": 0.22057931125164032, + "rewards/rejected": -0.22062036395072937, + "step": 12905 + }, + { + "epoch": 8.925311203319502, + "grad_norm": 3.6008899211883545, + "learning_rate": 5.9704933148916554e-06, + "log_odds_chosen": 10.777645111083984, + "log_odds_ratio": -3.5626049793791026e-05, + "logits/chosen": -0.2412605583667755, + "logits/rejected": -0.3672165274620056, + "logps/chosen": -0.00024223854416050017, + "logps/rejected": -1.8778043985366821, + "loss": 0.3319, + "nll_loss": 0.08297750353813171, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4223854779847898e-05, + "rewards/margins": 0.1877562254667282, + "rewards/rejected": -0.1877804547548294, + "step": 12906 + }, + { + "epoch": 8.92600276625173, + "grad_norm": 4.40451717376709, + "learning_rate": 5.966651298601506e-06, + "log_odds_chosen": 10.959539413452148, + "log_odds_ratio": -6.162847421364859e-05, + "logits/chosen": -0.4150117337703705, + "logits/rejected": -0.5284802317619324, + "logps/chosen": -0.00032370290136896074, + "logps/rejected": -2.604623556137085, + "loss": 0.3389, + "nll_loss": 0.08471447229385376, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.23702915920876e-05, + "rewards/margins": 0.2604300081729889, + "rewards/rejected": -0.26046237349510193, + "step": 12907 + }, + { + "epoch": 8.926694329183956, + "grad_norm": 4.132070064544678, + "learning_rate": 5.962809282311358e-06, + "log_odds_chosen": 10.724711418151855, + "log_odds_ratio": -0.00018126626673620194, + "logits/chosen": -0.2756832242012024, + "logits/rejected": -0.46245506405830383, + "logps/chosen": -0.00045413419138640165, + "logps/rejected": -2.5995662212371826, + "loss": 0.2985, + "nll_loss": 0.07460995763540268, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.541342059383169e-05, + "rewards/margins": 0.2599112391471863, + "rewards/rejected": -0.25995662808418274, + "step": 12908 + }, + { + "epoch": 8.927385892116183, + "grad_norm": 4.18914270401001, + "learning_rate": 5.9589672660212085e-06, + "log_odds_chosen": 10.33116340637207, + "log_odds_ratio": -0.00024307092826347798, + "logits/chosen": -0.40187928080558777, + "logits/rejected": -0.4286579489707947, + "logps/chosen": -0.0008695382857695222, + "logps/rejected": -1.8647468090057373, + "loss": 0.7077, + "nll_loss": 0.1768883615732193, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.695383439771831e-05, + "rewards/margins": 0.18638773262500763, + "rewards/rejected": -0.18647469580173492, + "step": 12909 + }, + { + "epoch": 8.92807745504841, + "grad_norm": 3.1195945739746094, + "learning_rate": 5.955125249731059e-06, + "log_odds_chosen": 9.846996307373047, + "log_odds_ratio": -0.00019851140677928925, + "logits/chosen": -0.24981671571731567, + "logits/rejected": -0.4254041314125061, + "logps/chosen": -0.00045403523836284876, + "logps/rejected": -1.7068804502487183, + "loss": 0.293, + "nll_loss": 0.07322686910629272, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5403528929455206e-05, + "rewards/margins": 0.1706426441669464, + "rewards/rejected": -0.17068806290626526, + "step": 12910 + }, + { + "epoch": 8.928769017980636, + "grad_norm": 4.41091775894165, + "learning_rate": 5.95128323344091e-06, + "log_odds_chosen": 11.76103401184082, + "log_odds_ratio": -3.7582372897304595e-05, + "logits/chosen": -0.11107562482357025, + "logits/rejected": -0.142415851354599, + "logps/chosen": -0.0002628167567308992, + "logps/rejected": -3.3587985038757324, + "loss": 0.487, + "nll_loss": 0.12175235152244568, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.628167567308992e-05, + "rewards/margins": 0.33585357666015625, + "rewards/rejected": -0.3358798623085022, + "step": 12911 + }, + { + "epoch": 8.929460580912863, + "grad_norm": 5.352042198181152, + "learning_rate": 5.947441217150761e-06, + "log_odds_chosen": 10.705965042114258, + "log_odds_ratio": -5.521774073713459e-05, + "logits/chosen": -0.138837069272995, + "logits/rejected": -0.13578133285045624, + "logps/chosen": -0.00019610003801062703, + "logps/rejected": -1.6561689376831055, + "loss": 0.6693, + "nll_loss": 0.16732990741729736, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9610004528658465e-05, + "rewards/margins": 0.16559728980064392, + "rewards/rejected": -0.16561688482761383, + "step": 12912 + }, + { + "epoch": 8.93015214384509, + "grad_norm": 2.649043560028076, + "learning_rate": 5.9435992008606115e-06, + "log_odds_chosen": 10.769306182861328, + "log_odds_ratio": -3.2309049856849015e-05, + "logits/chosen": -0.24757800996303558, + "logits/rejected": -0.2868199050426483, + "logps/chosen": -0.00011812502634711564, + "logps/rejected": -1.742016315460205, + "loss": 0.2712, + "nll_loss": 0.06779469549655914, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1812502634711564e-05, + "rewards/margins": 0.17418982088565826, + "rewards/rejected": -0.17420163750648499, + "step": 12913 + }, + { + "epoch": 8.930843706777317, + "grad_norm": 6.05476713180542, + "learning_rate": 5.939757184570463e-06, + "log_odds_chosen": 10.862494468688965, + "log_odds_ratio": -7.975931657711044e-05, + "logits/chosen": -0.015442818403244019, + "logits/rejected": -0.0983901172876358, + "logps/chosen": -0.00032114313216879964, + "logps/rejected": -2.0443334579467773, + "loss": 0.4352, + "nll_loss": 0.10878505557775497, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.21143124892842e-05, + "rewards/margins": 0.20440122485160828, + "rewards/rejected": -0.2044333517551422, + "step": 12914 + }, + { + "epoch": 8.931535269709544, + "grad_norm": 3.4305267333984375, + "learning_rate": 5.935915168280314e-06, + "log_odds_chosen": 11.25173282623291, + "log_odds_ratio": -5.024883648729883e-05, + "logits/chosen": -0.4715416431427002, + "logits/rejected": -0.3380431532859802, + "logps/chosen": -0.00018392120546195656, + "logps/rejected": -2.075536012649536, + "loss": 0.3506, + "nll_loss": 0.08765394985675812, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.839212200138718e-05, + "rewards/margins": 0.20753520727157593, + "rewards/rejected": -0.20755359530448914, + "step": 12915 + }, + { + "epoch": 8.93222683264177, + "grad_norm": 3.3291497230529785, + "learning_rate": 5.932073151990165e-06, + "log_odds_chosen": 10.705180168151855, + "log_odds_ratio": -0.00010199702228419483, + "logits/chosen": -0.5880506038665771, + "logits/rejected": -0.66464763879776, + "logps/chosen": -0.00012990215327590704, + "logps/rejected": -1.8533952236175537, + "loss": 0.4839, + "nll_loss": 0.12096136808395386, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2990216418984346e-05, + "rewards/margins": 0.18532654643058777, + "rewards/rejected": -0.1853395253419876, + "step": 12916 + }, + { + "epoch": 8.932918395573997, + "grad_norm": 4.643276691436768, + "learning_rate": 5.928231135700016e-06, + "log_odds_chosen": 10.787064552307129, + "log_odds_ratio": -5.836702621309087e-05, + "logits/chosen": -0.8610040545463562, + "logits/rejected": -0.8191035389900208, + "logps/chosen": -0.00011820576037280262, + "logps/rejected": -1.7074048519134521, + "loss": 0.709, + "nll_loss": 0.17724096775054932, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1820576219179202e-05, + "rewards/margins": 0.1707286536693573, + "rewards/rejected": -0.17074048519134521, + "step": 12917 + }, + { + "epoch": 8.933609958506224, + "grad_norm": 6.3906569480896, + "learning_rate": 5.924389119409866e-06, + "log_odds_chosen": 11.986827850341797, + "log_odds_ratio": -3.079729140154086e-05, + "logits/chosen": 0.005445163231343031, + "logits/rejected": -0.07006532698869705, + "logps/chosen": -0.00014014035696163774, + "logps/rejected": -2.962845802307129, + "loss": 0.5773, + "nll_loss": 0.1443236768245697, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4014036423759535e-05, + "rewards/margins": 0.29627057909965515, + "rewards/rejected": -0.29628461599349976, + "step": 12918 + }, + { + "epoch": 8.934301521438451, + "grad_norm": 4.157391548156738, + "learning_rate": 5.920547103119718e-06, + "log_odds_chosen": 9.951608657836914, + "log_odds_ratio": -0.0008364100940525532, + "logits/chosen": 0.005071647465229034, + "logits/rejected": -0.21576163172721863, + "logps/chosen": -0.0008652383694425225, + "logps/rejected": -2.1721723079681396, + "loss": 0.5196, + "nll_loss": 0.12982165813446045, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.65238398546353e-05, + "rewards/margins": 0.21713072061538696, + "rewards/rejected": -0.21721722185611725, + "step": 12919 + }, + { + "epoch": 8.934993084370678, + "grad_norm": 3.5984549522399902, + "learning_rate": 5.9167050868295685e-06, + "log_odds_chosen": 10.075098037719727, + "log_odds_ratio": -0.00210709311068058, + "logits/chosen": -0.08690416812896729, + "logits/rejected": -0.06445138901472092, + "logps/chosen": -0.015951991081237793, + "logps/rejected": -1.8065953254699707, + "loss": 0.3356, + "nll_loss": 0.08369840681552887, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015951991081237793, + "rewards/margins": 0.1790643334388733, + "rewards/rejected": -0.18065953254699707, + "step": 12920 + }, + { + "epoch": 8.935684647302905, + "grad_norm": 3.1239354610443115, + "learning_rate": 5.912863070539419e-06, + "log_odds_chosen": 11.789560317993164, + "log_odds_ratio": -1.7286456568399444e-05, + "logits/chosen": -0.07189354300498962, + "logits/rejected": -0.1427178978919983, + "logps/chosen": -9.551585389999673e-05, + "logps/rejected": -2.617459774017334, + "loss": 0.353, + "nll_loss": 0.08825767785310745, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.551585208100732e-06, + "rewards/margins": 0.2617364227771759, + "rewards/rejected": -0.26174598932266235, + "step": 12921 + }, + { + "epoch": 8.936376210235132, + "grad_norm": 2.6034464836120605, + "learning_rate": 5.90902105424927e-06, + "log_odds_chosen": 12.263644218444824, + "log_odds_ratio": -2.3179472918855026e-05, + "logits/chosen": -0.36033573746681213, + "logits/rejected": -0.4884033799171448, + "logps/chosen": -7.522512169089168e-05, + "logps/rejected": -2.77170467376709, + "loss": 0.2692, + "nll_loss": 0.06729036569595337, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.522512078139698e-06, + "rewards/margins": 0.27716296911239624, + "rewards/rejected": -0.27717047929763794, + "step": 12922 + }, + { + "epoch": 8.937067773167358, + "grad_norm": 3.0454115867614746, + "learning_rate": 5.9051790379591216e-06, + "log_odds_chosen": 11.782583236694336, + "log_odds_ratio": -3.901875970768742e-05, + "logits/chosen": -0.2533440887928009, + "logits/rejected": -0.3794962465763092, + "logps/chosen": -0.00028467184165492654, + "logps/rejected": -2.714261531829834, + "loss": 0.4794, + "nll_loss": 0.11983369290828705, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.846718234650325e-05, + "rewards/margins": 0.2713976800441742, + "rewards/rejected": -0.27142617106437683, + "step": 12923 + }, + { + "epoch": 8.937759336099585, + "grad_norm": 5.516876220703125, + "learning_rate": 5.901337021668972e-06, + "log_odds_chosen": 10.510817527770996, + "log_odds_ratio": -8.967219037003815e-05, + "logits/chosen": -0.07414241135120392, + "logits/rejected": -0.1742817759513855, + "logps/chosen": -0.0008987659821286798, + "logps/rejected": -1.9164701700210571, + "loss": 0.3752, + "nll_loss": 0.09378183633089066, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.987660112325102e-05, + "rewards/margins": 0.1915571391582489, + "rewards/rejected": -0.1916470229625702, + "step": 12924 + }, + { + "epoch": 8.938450899031812, + "grad_norm": 3.05340576171875, + "learning_rate": 5.897495005378823e-06, + "log_odds_chosen": 10.531975746154785, + "log_odds_ratio": -0.0001402997731929645, + "logits/chosen": -0.13580265641212463, + "logits/rejected": -0.17524084448814392, + "logps/chosen": -0.0002469943428877741, + "logps/rejected": -1.6168746948242188, + "loss": 0.2895, + "nll_loss": 0.07236262410879135, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4699435016373172e-05, + "rewards/margins": 0.161662757396698, + "rewards/rejected": -0.1616874635219574, + "step": 12925 + }, + { + "epoch": 8.939142461964039, + "grad_norm": 3.0517728328704834, + "learning_rate": 5.893652989088675e-06, + "log_odds_chosen": 10.63259506225586, + "log_odds_ratio": -0.00014863951946608722, + "logits/chosen": -0.848569929599762, + "logits/rejected": -0.7625840902328491, + "logps/chosen": -0.000918300764169544, + "logps/rejected": -2.0427727699279785, + "loss": 0.3004, + "nll_loss": 0.075095035135746, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.183008660329506e-05, + "rewards/margins": 0.20418545603752136, + "rewards/rejected": -0.20427730679512024, + "step": 12926 + }, + { + "epoch": 8.939834024896266, + "grad_norm": 2.812586545944214, + "learning_rate": 5.8898109727985246e-06, + "log_odds_chosen": 11.898021697998047, + "log_odds_ratio": -4.053749216836877e-05, + "logits/chosen": 0.04052726551890373, + "logits/rejected": -0.009697234258055687, + "logps/chosen": -0.0004591932229232043, + "logps/rejected": -3.321937084197998, + "loss": 0.3124, + "nll_loss": 0.07810840010643005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.591932884068228e-05, + "rewards/margins": 0.3321478068828583, + "rewards/rejected": -0.3321937024593353, + "step": 12927 + }, + { + "epoch": 8.940525587828493, + "grad_norm": 2.847050666809082, + "learning_rate": 5.885968956508376e-06, + "log_odds_chosen": 10.623669624328613, + "log_odds_ratio": -0.00014860433293506503, + "logits/chosen": -0.14876213669776917, + "logits/rejected": -0.23775899410247803, + "logps/chosen": -0.00025466305669397116, + "logps/rejected": -1.7008178234100342, + "loss": 0.287, + "nll_loss": 0.07172475010156631, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5466306396992877e-05, + "rewards/margins": 0.1700563132762909, + "rewards/rejected": -0.17008177936077118, + "step": 12928 + }, + { + "epoch": 8.94121715076072, + "grad_norm": 3.5438013076782227, + "learning_rate": 5.882126940218227e-06, + "log_odds_chosen": 11.95173454284668, + "log_odds_ratio": -2.073194264085032e-05, + "logits/chosen": -0.0442764014005661, + "logits/rejected": -0.09602615237236023, + "logps/chosen": -0.0001264129241462797, + "logps/rejected": -2.8235487937927246, + "loss": 0.4455, + "nll_loss": 0.11136940866708755, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2641294233617373e-05, + "rewards/margins": 0.28234225511550903, + "rewards/rejected": -0.2823548913002014, + "step": 12929 + }, + { + "epoch": 8.941908713692946, + "grad_norm": 2.4485747814178467, + "learning_rate": 5.878284923928078e-06, + "log_odds_chosen": 11.85983657836914, + "log_odds_ratio": -1.5567180525977165e-05, + "logits/chosen": -0.5800382494926453, + "logits/rejected": -0.5141011476516724, + "logps/chosen": -0.0001387509546475485, + "logps/rejected": -2.8985164165496826, + "loss": 0.3206, + "nll_loss": 0.08015190809965134, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3875096556148492e-05, + "rewards/margins": 0.2898377776145935, + "rewards/rejected": -0.28985166549682617, + "step": 12930 + }, + { + "epoch": 8.942600276625173, + "grad_norm": 5.132098197937012, + "learning_rate": 5.874442907637929e-06, + "log_odds_chosen": 11.904727935791016, + "log_odds_ratio": -1.5229367818392348e-05, + "logits/chosen": -0.3762471675872803, + "logits/rejected": -0.4908618927001953, + "logps/chosen": -0.00013014674186706543, + "logps/rejected": -2.568297863006592, + "loss": 0.3269, + "nll_loss": 0.08172107487916946, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3014674550504424e-05, + "rewards/margins": 0.2568167746067047, + "rewards/rejected": -0.25682979822158813, + "step": 12931 + }, + { + "epoch": 8.9432918395574, + "grad_norm": 6.083807945251465, + "learning_rate": 5.870600891347779e-06, + "log_odds_chosen": 11.309540748596191, + "log_odds_ratio": -6.240185030037537e-05, + "logits/chosen": -0.37128546833992004, + "logits/rejected": -0.2944900393486023, + "logps/chosen": -0.00011243198241572827, + "logps/rejected": -2.2674102783203125, + "loss": 0.653, + "nll_loss": 0.16324546933174133, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1243198059673887e-05, + "rewards/margins": 0.22672978043556213, + "rewards/rejected": -0.2267410308122635, + "step": 12932 + }, + { + "epoch": 8.943983402489627, + "grad_norm": 6.254725456237793, + "learning_rate": 5.866758875057631e-06, + "log_odds_chosen": 11.77969741821289, + "log_odds_ratio": -1.0002793715102598e-05, + "logits/chosen": -0.018555432558059692, + "logits/rejected": -0.08299314975738525, + "logps/chosen": -0.00010570554877631366, + "logps/rejected": -2.568028450012207, + "loss": 0.5376, + "nll_loss": 0.13440899550914764, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0570555787126068e-05, + "rewards/margins": 0.2567923069000244, + "rewards/rejected": -0.25680285692214966, + "step": 12933 + }, + { + "epoch": 8.944674965421854, + "grad_norm": 3.648500680923462, + "learning_rate": 5.8629168587674815e-06, + "log_odds_chosen": 10.733579635620117, + "log_odds_ratio": -9.244455577572808e-05, + "logits/chosen": -0.45096123218536377, + "logits/rejected": -0.4225650727748871, + "logps/chosen": -0.0001773490075720474, + "logps/rejected": -1.7317311763763428, + "loss": 0.3204, + "nll_loss": 0.08009810745716095, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7734901121002622e-05, + "rewards/margins": 0.17315536737442017, + "rewards/rejected": -0.17317311465740204, + "step": 12934 + }, + { + "epoch": 8.94536652835408, + "grad_norm": 5.030453205108643, + "learning_rate": 5.859074842477332e-06, + "log_odds_chosen": 11.375288009643555, + "log_odds_ratio": -2.013690937019419e-05, + "logits/chosen": -0.36561113595962524, + "logits/rejected": -0.45199379324913025, + "logps/chosen": -0.00012862158473581076, + "logps/rejected": -2.1877007484436035, + "loss": 0.3258, + "nll_loss": 0.08145881444215775, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2862159564974718e-05, + "rewards/margins": 0.21875722706317902, + "rewards/rejected": -0.2187700867652893, + "step": 12935 + }, + { + "epoch": 8.946058091286307, + "grad_norm": 4.5273942947387695, + "learning_rate": 5.855232826187183e-06, + "log_odds_chosen": 11.617217063903809, + "log_odds_ratio": -0.00019359974248800427, + "logits/chosen": -0.2258719503879547, + "logits/rejected": -0.3301970362663269, + "logps/chosen": -0.0002116190444212407, + "logps/rejected": -2.6876113414764404, + "loss": 0.4106, + "nll_loss": 0.10263660550117493, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1161906261113472e-05, + "rewards/margins": 0.2687399685382843, + "rewards/rejected": -0.26876112818717957, + "step": 12936 + }, + { + "epoch": 8.946749654218534, + "grad_norm": 3.5262722969055176, + "learning_rate": 5.851390809897035e-06, + "log_odds_chosen": 11.726446151733398, + "log_odds_ratio": -1.7297075828537345e-05, + "logits/chosen": -0.7810917496681213, + "logits/rejected": -0.7564036846160889, + "logps/chosen": -0.00010531338193686679, + "logps/rejected": -2.2990102767944336, + "loss": 0.2811, + "nll_loss": 0.07026363164186478, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.053133837558562e-05, + "rewards/margins": 0.22989048063755035, + "rewards/rejected": -0.2299010157585144, + "step": 12937 + }, + { + "epoch": 8.947441217150761, + "grad_norm": 3.5038387775421143, + "learning_rate": 5.847548793606885e-06, + "log_odds_chosen": 11.217157363891602, + "log_odds_ratio": -6.293215119512752e-05, + "logits/chosen": 0.015321101993322372, + "logits/rejected": -0.06858345121145248, + "logps/chosen": -0.0003072897670790553, + "logps/rejected": -2.6550493240356445, + "loss": 0.4326, + "nll_loss": 0.10815395414829254, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0728973797522485e-05, + "rewards/margins": 0.26547420024871826, + "rewards/rejected": -0.2655049264431, + "step": 12938 + }, + { + "epoch": 8.948132780082988, + "grad_norm": 6.250890254974365, + "learning_rate": 5.843706777316736e-06, + "log_odds_chosen": 10.85504150390625, + "log_odds_ratio": -4.487161641009152e-05, + "logits/chosen": -0.051928721368312836, + "logits/rejected": -0.1429467797279358, + "logps/chosen": -0.00021378413657657802, + "logps/rejected": -2.436502456665039, + "loss": 0.4156, + "nll_loss": 0.1038907915353775, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1378415112849325e-05, + "rewards/margins": 0.2436288595199585, + "rewards/rejected": -0.24365025758743286, + "step": 12939 + }, + { + "epoch": 8.948824343015215, + "grad_norm": 4.680706024169922, + "learning_rate": 5.839864761026588e-06, + "log_odds_chosen": 10.078415870666504, + "log_odds_ratio": -0.0001346963836112991, + "logits/chosen": -0.7340799570083618, + "logits/rejected": -0.7243325710296631, + "logps/chosen": -0.00021745695266872644, + "logps/rejected": -1.7046865224838257, + "loss": 0.398, + "nll_loss": 0.09948675334453583, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1745694539276883e-05, + "rewards/margins": 0.17044691741466522, + "rewards/rejected": -0.17046865820884705, + "step": 12940 + }, + { + "epoch": 8.949515905947441, + "grad_norm": 3.1285927295684814, + "learning_rate": 5.836022744736438e-06, + "log_odds_chosen": 10.486005783081055, + "log_odds_ratio": -0.0003326584701426327, + "logits/chosen": -0.3442254662513733, + "logits/rejected": -0.44644254446029663, + "logps/chosen": -0.0007124101975932717, + "logps/rejected": -2.2549710273742676, + "loss": 0.3062, + "nll_loss": 0.07652806490659714, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.124102558009326e-05, + "rewards/margins": 0.2254258692264557, + "rewards/rejected": -0.22549709677696228, + "step": 12941 + }, + { + "epoch": 8.950207468879668, + "grad_norm": 4.95887565612793, + "learning_rate": 5.832180728446289e-06, + "log_odds_chosen": 10.1839599609375, + "log_odds_ratio": -0.00018181573250330985, + "logits/chosen": -0.38977324962615967, + "logits/rejected": -0.43788349628448486, + "logps/chosen": -0.0007927245460450649, + "logps/rejected": -2.435636520385742, + "loss": 0.5751, + "nll_loss": 0.1437670737504959, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.927245314931497e-05, + "rewards/margins": 0.24348436295986176, + "rewards/rejected": -0.24356365203857422, + "step": 12942 + }, + { + "epoch": 8.950899031811895, + "grad_norm": 5.5691022872924805, + "learning_rate": 5.82833871215614e-06, + "log_odds_chosen": 10.578014373779297, + "log_odds_ratio": -0.00010544771066633984, + "logits/chosen": -0.3612707853317261, + "logits/rejected": -0.3483430743217468, + "logps/chosen": -0.00033310920116491616, + "logps/rejected": -1.9903244972229004, + "loss": 0.2752, + "nll_loss": 0.06878717243671417, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3310920116491616e-05, + "rewards/margins": 0.19899912178516388, + "rewards/rejected": -0.19903242588043213, + "step": 12943 + }, + { + "epoch": 8.951590594744122, + "grad_norm": 4.931035041809082, + "learning_rate": 5.824496695865991e-06, + "log_odds_chosen": 11.544112205505371, + "log_odds_ratio": -8.371780131710693e-05, + "logits/chosen": -0.01711699366569519, + "logits/rejected": -0.12601056694984436, + "logps/chosen": -0.00022672172053717077, + "logps/rejected": -2.5129387378692627, + "loss": 0.5678, + "nll_loss": 0.14193536341190338, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2672171326121315e-05, + "rewards/margins": 0.25127118825912476, + "rewards/rejected": -0.2512938678264618, + "step": 12944 + }, + { + "epoch": 8.952282157676349, + "grad_norm": 3.42557430267334, + "learning_rate": 5.8206546795758414e-06, + "log_odds_chosen": 11.254566192626953, + "log_odds_ratio": -2.6210473151877522e-05, + "logits/chosen": -0.6701757311820984, + "logits/rejected": -0.7973129153251648, + "logps/chosen": -0.00014112208737060428, + "logps/rejected": -2.1129250526428223, + "loss": 0.3808, + "nll_loss": 0.09519369900226593, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4112210919847712e-05, + "rewards/margins": 0.21127840876579285, + "rewards/rejected": -0.21129250526428223, + "step": 12945 + }, + { + "epoch": 8.952973720608576, + "grad_norm": 4.1768012046813965, + "learning_rate": 5.816812663285692e-06, + "log_odds_chosen": 11.903830528259277, + "log_odds_ratio": -7.812883268343285e-06, + "logits/chosen": -0.0588909387588501, + "logits/rejected": -0.13018256425857544, + "logps/chosen": -6.235972978174686e-05, + "logps/rejected": -2.16908860206604, + "loss": 0.3494, + "nll_loss": 0.08734458684921265, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.235973614820978e-06, + "rewards/margins": 0.21690261363983154, + "rewards/rejected": -0.21690885722637177, + "step": 12946 + }, + { + "epoch": 8.953665283540802, + "grad_norm": 2.765223979949951, + "learning_rate": 5.812970646995544e-06, + "log_odds_chosen": 11.580140113830566, + "log_odds_ratio": -6.168565596453846e-05, + "logits/chosen": -0.2464328408241272, + "logits/rejected": -0.3394399881362915, + "logps/chosen": -0.00020144270092714578, + "logps/rejected": -2.749572277069092, + "loss": 0.2704, + "nll_loss": 0.06760434806346893, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0144269001320936e-05, + "rewards/margins": 0.27493709325790405, + "rewards/rejected": -0.27495723962783813, + "step": 12947 + }, + { + "epoch": 8.95435684647303, + "grad_norm": 3.754606008529663, + "learning_rate": 5.8091286307053945e-06, + "log_odds_chosen": 12.030777931213379, + "log_odds_ratio": -0.0003125116927549243, + "logits/chosen": -0.49243322014808655, + "logits/rejected": -0.5307193994522095, + "logps/chosen": -0.00020017765928059816, + "logps/rejected": -2.820101737976074, + "loss": 0.3514, + "nll_loss": 0.0878116562962532, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.001776738325134e-05, + "rewards/margins": 0.2819901704788208, + "rewards/rejected": -0.28201019763946533, + "step": 12948 + }, + { + "epoch": 8.955048409405256, + "grad_norm": 4.734391689300537, + "learning_rate": 5.805286614415245e-06, + "log_odds_chosen": 11.878615379333496, + "log_odds_ratio": -1.0572795872576535e-05, + "logits/chosen": -0.26124829053878784, + "logits/rejected": -0.26312682032585144, + "logps/chosen": -0.00014705224020872265, + "logps/rejected": -2.939932107925415, + "loss": 0.4128, + "nll_loss": 0.10318911075592041, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4705223293276504e-05, + "rewards/margins": 0.2939785122871399, + "rewards/rejected": -0.293993204832077, + "step": 12949 + }, + { + "epoch": 8.955739972337483, + "grad_norm": 3.333796977996826, + "learning_rate": 5.801444598125096e-06, + "log_odds_chosen": 9.838175773620605, + "log_odds_ratio": -0.001023567165248096, + "logits/chosen": -0.053210288286209106, + "logits/rejected": -0.2315722405910492, + "logps/chosen": -0.0015227515250444412, + "logps/rejected": -2.102912664413452, + "loss": 0.3932, + "nll_loss": 0.0982080027461052, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015227515541482717, + "rewards/margins": 0.21013899147510529, + "rewards/rejected": -0.21029126644134521, + "step": 12950 + }, + { + "epoch": 8.95643153526971, + "grad_norm": 4.7104082107543945, + "learning_rate": 5.797602581834948e-06, + "log_odds_chosen": 12.069852828979492, + "log_odds_ratio": -2.0408857380971313e-05, + "logits/chosen": -0.7314295172691345, + "logits/rejected": -0.7798600196838379, + "logps/chosen": -7.370587263721973e-05, + "logps/rejected": -2.668787956237793, + "loss": 0.4181, + "nll_loss": 0.10451561212539673, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.370587354671443e-06, + "rewards/margins": 0.26687145233154297, + "rewards/rejected": -0.26687881350517273, + "step": 12951 + }, + { + "epoch": 8.957123098201937, + "grad_norm": 4.402284145355225, + "learning_rate": 5.793760565544798e-06, + "log_odds_chosen": 11.385189056396484, + "log_odds_ratio": -1.8733659089775756e-05, + "logits/chosen": -0.5349085330963135, + "logits/rejected": -0.529983639717102, + "logps/chosen": -0.00043169912532903254, + "logps/rejected": -2.567232370376587, + "loss": 0.6042, + "nll_loss": 0.1510457843542099, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.31699154432863e-05, + "rewards/margins": 0.25668007135391235, + "rewards/rejected": -0.25672322511672974, + "step": 12952 + }, + { + "epoch": 8.957814661134163, + "grad_norm": 2.8857409954071045, + "learning_rate": 5.789918549254649e-06, + "log_odds_chosen": 11.362333297729492, + "log_odds_ratio": -0.00023769524705130607, + "logits/chosen": -0.3102943003177643, + "logits/rejected": -0.35927265882492065, + "logps/chosen": -0.00030940567376092076, + "logps/rejected": -2.809323310852051, + "loss": 0.3118, + "nll_loss": 0.0779205933213234, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0940565920900553e-05, + "rewards/margins": 0.28090137243270874, + "rewards/rejected": -0.28093230724334717, + "step": 12953 + }, + { + "epoch": 8.95850622406639, + "grad_norm": 3.743520498275757, + "learning_rate": 5.786076532964501e-06, + "log_odds_chosen": 11.816137313842773, + "log_odds_ratio": -1.3433329513645731e-05, + "logits/chosen": 0.0074203647673130035, + "logits/rejected": -0.024694515392184258, + "logps/chosen": -0.00023023040557745844, + "logps/rejected": -2.7371764183044434, + "loss": 0.4441, + "nll_loss": 0.11101721227169037, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.302304346812889e-05, + "rewards/margins": 0.27369460463523865, + "rewards/rejected": -0.27371764183044434, + "step": 12954 + }, + { + "epoch": 8.959197786998617, + "grad_norm": 2.8320579528808594, + "learning_rate": 5.782234516674351e-06, + "log_odds_chosen": 10.388065338134766, + "log_odds_ratio": -0.00019531026191543788, + "logits/chosen": -0.14298123121261597, + "logits/rejected": 0.0031429678201675415, + "logps/chosen": -0.0002625812776386738, + "logps/rejected": -2.0366084575653076, + "loss": 0.2295, + "nll_loss": 0.05736586079001427, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6258127036271617e-05, + "rewards/margins": 0.2036345899105072, + "rewards/rejected": -0.20366084575653076, + "step": 12955 + }, + { + "epoch": 8.959889349930844, + "grad_norm": 2.7911875247955322, + "learning_rate": 5.778392500384202e-06, + "log_odds_chosen": 11.16344928741455, + "log_odds_ratio": -3.560550248948857e-05, + "logits/chosen": -0.5641869306564331, + "logits/rejected": -0.6493061780929565, + "logps/chosen": -9.963040793081746e-05, + "logps/rejected": -1.9015522003173828, + "loss": 0.3369, + "nll_loss": 0.08421522378921509, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.963041520677507e-06, + "rewards/margins": 0.19014525413513184, + "rewards/rejected": -0.19015520811080933, + "step": 12956 + }, + { + "epoch": 8.96058091286307, + "grad_norm": 3.4528636932373047, + "learning_rate": 5.774550484094053e-06, + "log_odds_chosen": 13.004242897033691, + "log_odds_ratio": -9.532086551189423e-06, + "logits/chosen": -0.02442549169063568, + "logits/rejected": -0.16204750537872314, + "logps/chosen": -9.38226148718968e-05, + "logps/rejected": -3.5031580924987793, + "loss": 0.4265, + "nll_loss": 0.10662779957056046, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.382261850987561e-06, + "rewards/margins": 0.3503064215183258, + "rewards/rejected": -0.35031580924987793, + "step": 12957 + }, + { + "epoch": 8.961272475795298, + "grad_norm": 3.688526153564453, + "learning_rate": 5.770708467803904e-06, + "log_odds_chosen": 12.311029434204102, + "log_odds_ratio": -2.615476660139393e-05, + "logits/chosen": -0.21588446199893951, + "logits/rejected": -0.20584505796432495, + "logps/chosen": -0.00017990361084230244, + "logps/rejected": -3.441439151763916, + "loss": 0.5047, + "nll_loss": 0.12618131935596466, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7990361811826006e-05, + "rewards/margins": 0.3441259264945984, + "rewards/rejected": -0.34414392709732056, + "step": 12958 + }, + { + "epoch": 8.961964038727524, + "grad_norm": 3.9806787967681885, + "learning_rate": 5.7668664515137545e-06, + "log_odds_chosen": 12.16893196105957, + "log_odds_ratio": -2.0543651771731675e-05, + "logits/chosen": -0.4245246648788452, + "logits/rejected": -0.48382627964019775, + "logps/chosen": -0.00013065878010820597, + "logps/rejected": -2.8527469635009766, + "loss": 0.3881, + "nll_loss": 0.09702569246292114, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3065878192719538e-05, + "rewards/margins": 0.2852616310119629, + "rewards/rejected": -0.2852747142314911, + "step": 12959 + }, + { + "epoch": 8.962655601659751, + "grad_norm": 4.678699970245361, + "learning_rate": 5.763024435223606e-06, + "log_odds_chosen": 11.23891830444336, + "log_odds_ratio": -0.00018691481091082096, + "logits/chosen": -0.15080493688583374, + "logits/rejected": -0.20680178701877594, + "logps/chosen": -0.0006821725401096046, + "logps/rejected": -2.517026901245117, + "loss": 0.4356, + "nll_loss": 0.10888192802667618, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.821725401096046e-05, + "rewards/margins": 0.25163447856903076, + "rewards/rejected": -0.2517027258872986, + "step": 12960 + }, + { + "epoch": 8.963347164591978, + "grad_norm": 3.943449020385742, + "learning_rate": 5.759182418933457e-06, + "log_odds_chosen": 11.836612701416016, + "log_odds_ratio": -2.155277252313681e-05, + "logits/chosen": 0.08393380045890808, + "logits/rejected": -0.059216536581516266, + "logps/chosen": -0.00016867309750523418, + "logps/rejected": -2.7192699909210205, + "loss": 0.3419, + "nll_loss": 0.08547375351190567, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.686731047811918e-05, + "rewards/margins": 0.2719101309776306, + "rewards/rejected": -0.27192699909210205, + "step": 12961 + }, + { + "epoch": 8.964038727524205, + "grad_norm": 4.497346878051758, + "learning_rate": 5.7553404026433075e-06, + "log_odds_chosen": 10.36648941040039, + "log_odds_ratio": -0.00037106170202605426, + "logits/chosen": -0.08332974463701248, + "logits/rejected": -0.0729660615324974, + "logps/chosen": -0.0007888587424531579, + "logps/rejected": -2.127197027206421, + "loss": 0.6249, + "nll_loss": 0.15619061887264252, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.88858815212734e-05, + "rewards/margins": 0.21264082193374634, + "rewards/rejected": -0.21271970868110657, + "step": 12962 + }, + { + "epoch": 8.964730290456432, + "grad_norm": 7.316700458526611, + "learning_rate": 5.751498386353158e-06, + "log_odds_chosen": 12.941807746887207, + "log_odds_ratio": -1.3672158274857793e-05, + "logits/chosen": 0.25198501348495483, + "logits/rejected": 0.14804057776927948, + "logps/chosen": -0.00013084606325719506, + "logps/rejected": -3.711289405822754, + "loss": 0.6218, + "nll_loss": 0.1554485559463501, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3084607417113148e-05, + "rewards/margins": 0.37111586332321167, + "rewards/rejected": -0.37112894654273987, + "step": 12963 + }, + { + "epoch": 8.965421853388658, + "grad_norm": 3.6101772785186768, + "learning_rate": 5.747656370063009e-06, + "log_odds_chosen": 11.11297607421875, + "log_odds_ratio": -3.442693196120672e-05, + "logits/chosen": -0.390951931476593, + "logits/rejected": -0.4710812568664551, + "logps/chosen": -0.002315881662070751, + "logps/rejected": -2.4413504600524902, + "loss": 0.3658, + "nll_loss": 0.09143579751253128, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00023158815747592598, + "rewards/margins": 0.24390347301959991, + "rewards/rejected": -0.2441350519657135, + "step": 12964 + }, + { + "epoch": 8.966113416320885, + "grad_norm": 3.953753709793091, + "learning_rate": 5.743814353772861e-06, + "log_odds_chosen": 11.519864082336426, + "log_odds_ratio": -7.848943641874939e-05, + "logits/chosen": -0.42134904861450195, + "logits/rejected": -0.43484270572662354, + "logps/chosen": -0.00011462083057267591, + "logps/rejected": -2.2720160484313965, + "loss": 0.469, + "nll_loss": 0.11724768579006195, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1462083421065472e-05, + "rewards/margins": 0.22719016671180725, + "rewards/rejected": -0.22720161080360413, + "step": 12965 + }, + { + "epoch": 8.966804979253112, + "grad_norm": 3.7234745025634766, + "learning_rate": 5.7399723374827105e-06, + "log_odds_chosen": 11.299764633178711, + "log_odds_ratio": -9.398536349181086e-05, + "logits/chosen": -0.06496064364910126, + "logits/rejected": -0.0666482150554657, + "logps/chosen": -0.0002664439380168915, + "logps/rejected": -2.4187681674957275, + "loss": 0.3625, + "nll_loss": 0.09060367941856384, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6644391255103983e-05, + "rewards/margins": 0.24185019731521606, + "rewards/rejected": -0.24187684059143066, + "step": 12966 + }, + { + "epoch": 8.967496542185339, + "grad_norm": 2.955653667449951, + "learning_rate": 5.736130321192562e-06, + "log_odds_chosen": 10.262022972106934, + "log_odds_ratio": -0.00018744800763670355, + "logits/chosen": -0.3373003304004669, + "logits/rejected": -0.4345345199108124, + "logps/chosen": -0.0005691086989827454, + "logps/rejected": -1.997305989265442, + "loss": 0.3906, + "nll_loss": 0.09762973338365555, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.69108706258703e-05, + "rewards/margins": 0.19967368245124817, + "rewards/rejected": -0.19973058998584747, + "step": 12967 + }, + { + "epoch": 8.968188105117566, + "grad_norm": 4.628073692321777, + "learning_rate": 5.732288304902413e-06, + "log_odds_chosen": 11.930381774902344, + "log_odds_ratio": -0.00014643717440776527, + "logits/chosen": 0.09199459105730057, + "logits/rejected": -0.00980144739151001, + "logps/chosen": -0.00012088009680155665, + "logps/rejected": -2.930823802947998, + "loss": 0.5376, + "nll_loss": 0.13438236713409424, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2088009498256724e-05, + "rewards/margins": 0.2930702865123749, + "rewards/rejected": -0.2930823564529419, + "step": 12968 + }, + { + "epoch": 8.968879668049793, + "grad_norm": 2.5830917358398438, + "learning_rate": 5.728446288612264e-06, + "log_odds_chosen": 11.369787216186523, + "log_odds_ratio": -4.7115587221924216e-05, + "logits/chosen": -0.30060356855392456, + "logits/rejected": -0.2330998182296753, + "logps/chosen": -0.00012587365927174687, + "logps/rejected": -2.4167582988739014, + "loss": 0.2575, + "nll_loss": 0.06437969207763672, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2587365745275747e-05, + "rewards/margins": 0.24166324734687805, + "rewards/rejected": -0.24167583882808685, + "step": 12969 + }, + { + "epoch": 8.96957123098202, + "grad_norm": 4.0085883140563965, + "learning_rate": 5.724604272322115e-06, + "log_odds_chosen": 12.000626564025879, + "log_odds_ratio": -1.0436234333610628e-05, + "logits/chosen": -0.11000185459852219, + "logits/rejected": -0.19354018568992615, + "logps/chosen": -0.00019879452884197235, + "logps/rejected": -3.1691694259643555, + "loss": 0.3915, + "nll_loss": 0.09788475185632706, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.987945506698452e-05, + "rewards/margins": 0.31689709424972534, + "rewards/rejected": -0.31691697239875793, + "step": 12970 + }, + { + "epoch": 8.970262793914246, + "grad_norm": 5.15786075592041, + "learning_rate": 5.720762256031966e-06, + "log_odds_chosen": 11.258201599121094, + "log_odds_ratio": -3.323886630823836e-05, + "logits/chosen": 0.225187748670578, + "logits/rejected": 0.07851268351078033, + "logps/chosen": -0.0005183239700272679, + "logps/rejected": -2.8408079147338867, + "loss": 0.5503, + "nll_loss": 0.1375592052936554, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.183239409234375e-05, + "rewards/margins": 0.2840290069580078, + "rewards/rejected": -0.2840808033943176, + "step": 12971 + }, + { + "epoch": 8.970954356846473, + "grad_norm": 3.935385227203369, + "learning_rate": 5.716920239741817e-06, + "log_odds_chosen": 12.17033863067627, + "log_odds_ratio": -1.7809870769269764e-05, + "logits/chosen": -0.10739308595657349, + "logits/rejected": -0.17841652035713196, + "logps/chosen": -0.00014362987712956965, + "logps/rejected": -3.1119933128356934, + "loss": 0.3589, + "nll_loss": 0.08972074836492538, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4362988622451667e-05, + "rewards/margins": 0.31118500232696533, + "rewards/rejected": -0.3111993670463562, + "step": 12972 + }, + { + "epoch": 8.9716459197787, + "grad_norm": 4.132678508758545, + "learning_rate": 5.7130782234516675e-06, + "log_odds_chosen": 11.330833435058594, + "log_odds_ratio": -2.018288978433702e-05, + "logits/chosen": -0.0428299643099308, + "logits/rejected": -0.17370249330997467, + "logps/chosen": -0.00012135677388869226, + "logps/rejected": -2.0014662742614746, + "loss": 0.4309, + "nll_loss": 0.10771875083446503, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2135677025071345e-05, + "rewards/margins": 0.20013447105884552, + "rewards/rejected": -0.2001466155052185, + "step": 12973 + }, + { + "epoch": 8.972337482710927, + "grad_norm": 3.055609703063965, + "learning_rate": 5.709236207161519e-06, + "log_odds_chosen": 10.535971641540527, + "log_odds_ratio": -8.944002911448479e-05, + "logits/chosen": -0.21757450699806213, + "logits/rejected": -0.28312134742736816, + "logps/chosen": -0.0004208739846944809, + "logps/rejected": -2.163292407989502, + "loss": 0.3287, + "nll_loss": 0.08217275142669678, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.208739846944809e-05, + "rewards/margins": 0.21628715097904205, + "rewards/rejected": -0.21632923185825348, + "step": 12974 + }, + { + "epoch": 8.973029045643154, + "grad_norm": 3.2214202880859375, + "learning_rate": 5.70539419087137e-06, + "log_odds_chosen": 10.623476028442383, + "log_odds_ratio": -0.0001413396093994379, + "logits/chosen": -0.4127468466758728, + "logits/rejected": -0.38557878136634827, + "logps/chosen": -0.0007369809318333864, + "logps/rejected": -2.2583162784576416, + "loss": 0.4966, + "nll_loss": 0.12413250654935837, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.369809463853016e-05, + "rewards/margins": 0.2257579118013382, + "rewards/rejected": -0.22583162784576416, + "step": 12975 + }, + { + "epoch": 8.97372060857538, + "grad_norm": 4.862353801727295, + "learning_rate": 5.701552174581221e-06, + "log_odds_chosen": 10.94636344909668, + "log_odds_ratio": -0.00038006139220669866, + "logits/chosen": -0.46704912185668945, + "logits/rejected": -0.5940245389938354, + "logps/chosen": -0.0037317401729524136, + "logps/rejected": -2.696951389312744, + "loss": 0.7203, + "nll_loss": 0.18004614114761353, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00037317402893677354, + "rewards/margins": 0.2693219780921936, + "rewards/rejected": -0.2696951627731323, + "step": 12976 + }, + { + "epoch": 8.974412171507607, + "grad_norm": 3.3329851627349854, + "learning_rate": 5.697710158291072e-06, + "log_odds_chosen": 10.701688766479492, + "log_odds_ratio": -8.773025183472782e-05, + "logits/chosen": -0.247919499874115, + "logits/rejected": -0.2692440450191498, + "logps/chosen": -0.0001977643696591258, + "logps/rejected": -1.7542986869812012, + "loss": 0.3565, + "nll_loss": 0.08911775797605515, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.97764366021147e-05, + "rewards/margins": 0.17541009187698364, + "rewards/rejected": -0.17542988061904907, + "step": 12977 + }, + { + "epoch": 8.975103734439834, + "grad_norm": 3.1159744262695312, + "learning_rate": 5.693868142000922e-06, + "log_odds_chosen": 11.69921875, + "log_odds_ratio": -3.141276101814583e-05, + "logits/chosen": -0.11053688824176788, + "logits/rejected": -0.18538329005241394, + "logps/chosen": -0.00016071014397311956, + "logps/rejected": -2.599425792694092, + "loss": 0.3581, + "nll_loss": 0.08951176702976227, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6071015124907717e-05, + "rewards/margins": 0.25992652773857117, + "rewards/rejected": -0.25994259119033813, + "step": 12978 + }, + { + "epoch": 8.975795297372061, + "grad_norm": 4.339756488800049, + "learning_rate": 5.690026125710774e-06, + "log_odds_chosen": 10.366867065429688, + "log_odds_ratio": -0.0003004825266543776, + "logits/chosen": -0.6165687441825867, + "logits/rejected": -0.7288893461227417, + "logps/chosen": -0.0002853489713743329, + "logps/rejected": -1.3389570713043213, + "loss": 0.3429, + "nll_loss": 0.08568733930587769, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8534897865029052e-05, + "rewards/margins": 0.13386717438697815, + "rewards/rejected": -0.13389572501182556, + "step": 12979 + }, + { + "epoch": 8.976486860304288, + "grad_norm": 4.901917934417725, + "learning_rate": 5.6861841094206236e-06, + "log_odds_chosen": 10.370695114135742, + "log_odds_ratio": -9.051861707121134e-05, + "logits/chosen": 0.14978300034999847, + "logits/rejected": 0.1547352373600006, + "logps/chosen": -0.00029195219394750893, + "logps/rejected": -1.8662596940994263, + "loss": 0.4795, + "nll_loss": 0.11985791474580765, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9195220122346655e-05, + "rewards/margins": 0.18659678101539612, + "rewards/rejected": -0.18662597239017487, + "step": 12980 + }, + { + "epoch": 8.977178423236515, + "grad_norm": 3.297598361968994, + "learning_rate": 5.682342093130475e-06, + "log_odds_chosen": 10.424062728881836, + "log_odds_ratio": -0.0006554003339260817, + "logits/chosen": -0.1414889097213745, + "logits/rejected": -0.14082834124565125, + "logps/chosen": -0.002491435967385769, + "logps/rejected": -2.1096932888031006, + "loss": 0.3112, + "nll_loss": 0.0777302086353302, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000249143602559343, + "rewards/margins": 0.21072018146514893, + "rewards/rejected": -0.21096934378147125, + "step": 12981 + }, + { + "epoch": 8.977869986168741, + "grad_norm": 4.84626579284668, + "learning_rate": 5.678500076840326e-06, + "log_odds_chosen": 9.627571105957031, + "log_odds_ratio": -0.00032676331466063857, + "logits/chosen": -0.4661812484264374, + "logits/rejected": -0.4674217402935028, + "logps/chosen": -0.0007495335303246975, + "logps/rejected": -2.1007561683654785, + "loss": 0.4845, + "nll_loss": 0.12110137939453125, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.49533501220867e-05, + "rewards/margins": 0.2100006639957428, + "rewards/rejected": -0.21007561683654785, + "step": 12982 + }, + { + "epoch": 8.978561549100968, + "grad_norm": 3.3853838443756104, + "learning_rate": 5.674658060550177e-06, + "log_odds_chosen": 11.485723495483398, + "log_odds_ratio": -3.2207815820584074e-05, + "logits/chosen": -0.6942331790924072, + "logits/rejected": -0.7253660559654236, + "logps/chosen": -0.00016284678713418543, + "logps/rejected": -2.3455090522766113, + "loss": 0.4133, + "nll_loss": 0.10331219434738159, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6284679077216424e-05, + "rewards/margins": 0.2345346212387085, + "rewards/rejected": -0.23455092310905457, + "step": 12983 + }, + { + "epoch": 8.979253112033195, + "grad_norm": 3.226949691772461, + "learning_rate": 5.670816044260028e-06, + "log_odds_chosen": 11.627418518066406, + "log_odds_ratio": -6.23153755441308e-05, + "logits/chosen": -0.03964445739984512, + "logits/rejected": 0.03879677504301071, + "logps/chosen": -0.0001272661320399493, + "logps/rejected": -2.3287415504455566, + "loss": 0.4016, + "nll_loss": 0.10040175914764404, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.272661302209599e-05, + "rewards/margins": 0.23286142945289612, + "rewards/rejected": -0.23287415504455566, + "step": 12984 + }, + { + "epoch": 8.979944674965422, + "grad_norm": 7.261765480041504, + "learning_rate": 5.666974027969879e-06, + "log_odds_chosen": 11.11042594909668, + "log_odds_ratio": -2.1344809283618815e-05, + "logits/chosen": -0.20489904284477234, + "logits/rejected": -0.22213873267173767, + "logps/chosen": -8.23369700810872e-05, + "logps/rejected": -1.7812402248382568, + "loss": 0.2939, + "nll_loss": 0.07347705215215683, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.233697371906601e-06, + "rewards/margins": 0.17811578512191772, + "rewards/rejected": -0.17812402546405792, + "step": 12985 + }, + { + "epoch": 8.980636237897649, + "grad_norm": 2.761765956878662, + "learning_rate": 5.66313201167973e-06, + "log_odds_chosen": 10.917032241821289, + "log_odds_ratio": -0.0002200824674218893, + "logits/chosen": -0.33595991134643555, + "logits/rejected": -0.3741520047187805, + "logps/chosen": -0.00033478077966719866, + "logps/rejected": -2.235827922821045, + "loss": 0.3161, + "nll_loss": 0.07900466024875641, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3478077966719866e-05, + "rewards/margins": 0.22354930639266968, + "rewards/rejected": -0.22358280420303345, + "step": 12986 + }, + { + "epoch": 8.981327800829876, + "grad_norm": 2.592590808868408, + "learning_rate": 5.6592899953895805e-06, + "log_odds_chosen": 10.91973876953125, + "log_odds_ratio": -3.753927012439817e-05, + "logits/chosen": -0.10205067694187164, + "logits/rejected": -0.08578318357467651, + "logps/chosen": -0.00017666578060016036, + "logps/rejected": -1.973038911819458, + "loss": 0.302, + "nll_loss": 0.07549074292182922, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.766657987900544e-05, + "rewards/margins": 0.19728624820709229, + "rewards/rejected": -0.1973038911819458, + "step": 12987 + }, + { + "epoch": 8.982019363762102, + "grad_norm": 3.359773635864258, + "learning_rate": 5.655447979099432e-06, + "log_odds_chosen": 10.698223114013672, + "log_odds_ratio": -0.000163710443302989, + "logits/chosen": -0.563290536403656, + "logits/rejected": -0.5860562324523926, + "logps/chosen": -0.0032191036734730005, + "logps/rejected": -1.9369779825210571, + "loss": 0.365, + "nll_loss": 0.09123671799898148, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00032191036734730005, + "rewards/margins": 0.19337588548660278, + "rewards/rejected": -0.19369781017303467, + "step": 12988 + }, + { + "epoch": 8.98271092669433, + "grad_norm": 3.8330631256103516, + "learning_rate": 5.651605962809282e-06, + "log_odds_chosen": 11.048598289489746, + "log_odds_ratio": -7.060886855470017e-05, + "logits/chosen": -0.23575690388679504, + "logits/rejected": -0.27460363507270813, + "logps/chosen": -0.00018293302855454385, + "logps/rejected": -2.244535446166992, + "loss": 0.3572, + "nll_loss": 0.08928923308849335, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8293303583050147e-05, + "rewards/margins": 0.2244352400302887, + "rewards/rejected": -0.22445355355739594, + "step": 12989 + }, + { + "epoch": 8.983402489626556, + "grad_norm": 4.441878795623779, + "learning_rate": 5.647763946519134e-06, + "log_odds_chosen": 11.518059730529785, + "log_odds_ratio": -0.00014070258475840092, + "logits/chosen": -0.4614870548248291, + "logits/rejected": -0.3396298587322235, + "logps/chosen": -0.0001907533296616748, + "logps/rejected": -2.7353930473327637, + "loss": 0.402, + "nll_loss": 0.10049546509981155, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.90753326023696e-05, + "rewards/margins": 0.27352023124694824, + "rewards/rejected": -0.27353930473327637, + "step": 12990 + }, + { + "epoch": 8.984094052558783, + "grad_norm": 4.48910665512085, + "learning_rate": 5.643921930228984e-06, + "log_odds_chosen": 11.144182205200195, + "log_odds_ratio": -2.1766394638689235e-05, + "logits/chosen": -0.4736829400062561, + "logits/rejected": -0.49978816509246826, + "logps/chosen": -0.0001298037386732176, + "logps/rejected": -2.041663646697998, + "loss": 0.4048, + "nll_loss": 0.10119934380054474, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2980374776816461e-05, + "rewards/margins": 0.2041533887386322, + "rewards/rejected": -0.20416638255119324, + "step": 12991 + }, + { + "epoch": 8.98478561549101, + "grad_norm": 3.2902517318725586, + "learning_rate": 5.640079913938835e-06, + "log_odds_chosen": 10.783985137939453, + "log_odds_ratio": -0.00022832911054138094, + "logits/chosen": -0.47414305806159973, + "logits/rejected": -0.5175694823265076, + "logps/chosen": -0.00024217108148150146, + "logps/rejected": -2.541477680206299, + "loss": 0.5231, + "nll_loss": 0.13074392080307007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4217108148150146e-05, + "rewards/margins": 0.2541235685348511, + "rewards/rejected": -0.2541477680206299, + "step": 12992 + }, + { + "epoch": 8.985477178423237, + "grad_norm": 12.170414924621582, + "learning_rate": 5.636237897648687e-06, + "log_odds_chosen": 10.079124450683594, + "log_odds_ratio": -0.0012567834928631783, + "logits/chosen": -0.8585034608840942, + "logits/rejected": -0.9505305886268616, + "logps/chosen": -0.031139731407165527, + "logps/rejected": -2.6075291633605957, + "loss": 0.3143, + "nll_loss": 0.07844439148902893, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003113973420113325, + "rewards/margins": 0.25763893127441406, + "rewards/rejected": -0.26075291633605957, + "step": 12993 + }, + { + "epoch": 8.986168741355463, + "grad_norm": 3.859238386154175, + "learning_rate": 5.6323958813585375e-06, + "log_odds_chosen": 10.160091400146484, + "log_odds_ratio": -8.257082663476467e-05, + "logits/chosen": -0.5174241065979004, + "logits/rejected": -0.5387182235717773, + "logps/chosen": -0.00047265770263038576, + "logps/rejected": -1.6943714618682861, + "loss": 0.3584, + "nll_loss": 0.08958704769611359, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7265770263038576e-05, + "rewards/margins": 0.16938988864421844, + "rewards/rejected": -0.16943715512752533, + "step": 12994 + }, + { + "epoch": 8.98686030428769, + "grad_norm": 2.6806588172912598, + "learning_rate": 5.628553865068388e-06, + "log_odds_chosen": 11.655320167541504, + "log_odds_ratio": -1.0061345165013336e-05, + "logits/chosen": -0.09891670197248459, + "logits/rejected": -0.26903587579727173, + "logps/chosen": -5.585578765021637e-05, + "logps/rejected": -1.7518370151519775, + "loss": 0.3508, + "nll_loss": 0.08770234882831573, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5855789469205774e-06, + "rewards/margins": 0.17517811059951782, + "rewards/rejected": -0.1751837134361267, + "step": 12995 + }, + { + "epoch": 8.987551867219917, + "grad_norm": 5.6125264167785645, + "learning_rate": 5.624711848778239e-06, + "log_odds_chosen": 8.728017807006836, + "log_odds_ratio": -0.0006680641090497375, + "logits/chosen": -0.48296207189559937, + "logits/rejected": -0.5777981877326965, + "logps/chosen": -0.0025174610782414675, + "logps/rejected": -1.8701331615447998, + "loss": 0.3588, + "nll_loss": 0.08962175250053406, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000251746125286445, + "rewards/margins": 0.18676157295703888, + "rewards/rejected": -0.18701332807540894, + "step": 12996 + }, + { + "epoch": 8.988243430152144, + "grad_norm": 2.7184691429138184, + "learning_rate": 5.62086983248809e-06, + "log_odds_chosen": 9.169109344482422, + "log_odds_ratio": -0.0022203708067536354, + "logits/chosen": -0.07939259707927704, + "logits/rejected": -0.1898723989725113, + "logps/chosen": -0.010505957528948784, + "logps/rejected": -1.6082786321640015, + "loss": 0.3542, + "nll_loss": 0.0883224830031395, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001050595659762621, + "rewards/margins": 0.15977725386619568, + "rewards/rejected": -0.1608278453350067, + "step": 12997 + }, + { + "epoch": 8.98893499308437, + "grad_norm": 4.824005126953125, + "learning_rate": 5.617027816197941e-06, + "log_odds_chosen": 10.597808837890625, + "log_odds_ratio": -7.037734030745924e-05, + "logits/chosen": 0.06742497533559799, + "logits/rejected": 0.014721512794494629, + "logps/chosen": -0.00015499829896725714, + "logps/rejected": -1.6016286611557007, + "loss": 0.4814, + "nll_loss": 0.12033873051404953, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5499828805332072e-05, + "rewards/margins": 0.16014736890792847, + "rewards/rejected": -0.16016286611557007, + "step": 12998 + }, + { + "epoch": 8.989626556016598, + "grad_norm": 4.160258769989014, + "learning_rate": 5.613185799907792e-06, + "log_odds_chosen": 9.693705558776855, + "log_odds_ratio": -0.00022970604186411947, + "logits/chosen": 0.00805056095123291, + "logits/rejected": -0.2778843641281128, + "logps/chosen": -0.0010679198894649744, + "logps/rejected": -2.0614240169525146, + "loss": 0.3218, + "nll_loss": 0.08041678369045258, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010679199476726353, + "rewards/margins": 0.20603561401367188, + "rewards/rejected": -0.20614241063594818, + "step": 12999 + }, + { + "epoch": 8.990318118948824, + "grad_norm": 4.214306354522705, + "learning_rate": 5.609343783617643e-06, + "log_odds_chosen": 11.489789009094238, + "log_odds_ratio": -1.7366832253173925e-05, + "logits/chosen": -0.06594070047140121, + "logits/rejected": -0.1417391449213028, + "logps/chosen": -0.0001669024204602465, + "logps/rejected": -2.746901273727417, + "loss": 0.7706, + "nll_loss": 0.1926540732383728, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6690242773620412e-05, + "rewards/margins": 0.2746734321117401, + "rewards/rejected": -0.2746901214122772, + "step": 13000 + }, + { + "epoch": 8.991009681881051, + "grad_norm": 3.0645954608917236, + "learning_rate": 5.6055017673274935e-06, + "log_odds_chosen": 10.695037841796875, + "log_odds_ratio": -0.00014979125990066677, + "logits/chosen": -0.7848923206329346, + "logits/rejected": -0.759530246257782, + "logps/chosen": -0.003960360772907734, + "logps/rejected": -2.8756608963012695, + "loss": 0.3462, + "nll_loss": 0.0865248367190361, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003960360772907734, + "rewards/margins": 0.28717008233070374, + "rewards/rejected": -0.2875661253929138, + "step": 13001 + }, + { + "epoch": 8.991701244813278, + "grad_norm": 3.4911797046661377, + "learning_rate": 5.601659751037345e-06, + "log_odds_chosen": 10.644820213317871, + "log_odds_ratio": -0.0004768046783283353, + "logits/chosen": -0.4039542078971863, + "logits/rejected": -0.4295424818992615, + "logps/chosen": -0.0008888083975762129, + "logps/rejected": -2.218118667602539, + "loss": 0.401, + "nll_loss": 0.10020698606967926, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.888084266800433e-05, + "rewards/margins": 0.22172297537326813, + "rewards/rejected": -0.22181186079978943, + "step": 13002 + }, + { + "epoch": 8.992392807745505, + "grad_norm": 3.9166111946105957, + "learning_rate": 5.597817734747195e-06, + "log_odds_chosen": 11.036426544189453, + "log_odds_ratio": -4.672490103985183e-05, + "logits/chosen": -0.46782681345939636, + "logits/rejected": -0.45481324195861816, + "logps/chosen": -0.00010561967792455107, + "logps/rejected": -1.8134853839874268, + "loss": 0.3286, + "nll_loss": 0.0821574330329895, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0561967428657226e-05, + "rewards/margins": 0.18133798241615295, + "rewards/rejected": -0.1813485473394394, + "step": 13003 + }, + { + "epoch": 8.993084370677732, + "grad_norm": 3.3774895668029785, + "learning_rate": 5.593975718457047e-06, + "log_odds_chosen": 11.584017753601074, + "log_odds_ratio": -1.1614972208917607e-05, + "logits/chosen": -0.31898242235183716, + "logits/rejected": -0.4430631995201111, + "logps/chosen": -0.00012595205043908209, + "logps/rejected": -2.2793500423431396, + "loss": 0.3441, + "nll_loss": 0.08601472526788712, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2595206499099731e-05, + "rewards/margins": 0.22792242467403412, + "rewards/rejected": -0.22793501615524292, + "step": 13004 + }, + { + "epoch": 8.993775933609959, + "grad_norm": 4.269271373748779, + "learning_rate": 5.590133702166897e-06, + "log_odds_chosen": 11.4553861618042, + "log_odds_ratio": -0.0001780561578925699, + "logits/chosen": -0.18526721000671387, + "logits/rejected": -0.2101747989654541, + "logps/chosen": -0.00024178973399102688, + "logps/rejected": -2.9740517139434814, + "loss": 0.3912, + "nll_loss": 0.09779347479343414, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4178971216315404e-05, + "rewards/margins": 0.2973810136318207, + "rewards/rejected": -0.2974051833152771, + "step": 13005 + }, + { + "epoch": 8.994467496542185, + "grad_norm": 4.434983730316162, + "learning_rate": 5.586291685876748e-06, + "log_odds_chosen": 10.024709701538086, + "log_odds_ratio": -0.00012213116860948503, + "logits/chosen": -0.43611031770706177, + "logits/rejected": -0.5170964002609253, + "logps/chosen": -0.0002620690211188048, + "logps/rejected": -1.636785864830017, + "loss": 0.4045, + "nll_loss": 0.10110985487699509, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6206904294667765e-05, + "rewards/margins": 0.16365239024162292, + "rewards/rejected": -0.1636785864830017, + "step": 13006 + }, + { + "epoch": 8.995159059474412, + "grad_norm": 5.280633926391602, + "learning_rate": 5.5824496695866e-06, + "log_odds_chosen": 10.429874420166016, + "log_odds_ratio": -0.00018652115250006318, + "logits/chosen": -0.2633684575557709, + "logits/rejected": -0.3636382222175598, + "logps/chosen": -0.0003898479917552322, + "logps/rejected": -2.0011839866638184, + "loss": 0.3037, + "nll_loss": 0.0759064331650734, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8984795537544414e-05, + "rewards/margins": 0.20007941126823425, + "rewards/rejected": -0.20011840760707855, + "step": 13007 + }, + { + "epoch": 8.995850622406639, + "grad_norm": 3.260202169418335, + "learning_rate": 5.5786076532964505e-06, + "log_odds_chosen": 11.279440879821777, + "log_odds_ratio": -2.7851805498357862e-05, + "logits/chosen": -0.3012050986289978, + "logits/rejected": -0.44973263144493103, + "logps/chosen": -0.00024160815519280732, + "logps/rejected": -2.4623751640319824, + "loss": 0.3494, + "nll_loss": 0.0873558521270752, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4160815883078612e-05, + "rewards/margins": 0.24621334671974182, + "rewards/rejected": -0.24623751640319824, + "step": 13008 + }, + { + "epoch": 8.996542185338866, + "grad_norm": 4.038920879364014, + "learning_rate": 5.574765637006301e-06, + "log_odds_chosen": 12.036626815795898, + "log_odds_ratio": -0.0007205168949440122, + "logits/chosen": -0.6345183253288269, + "logits/rejected": -0.6824157238006592, + "logps/chosen": -0.001240055076777935, + "logps/rejected": -4.034633159637451, + "loss": 0.355, + "nll_loss": 0.08866976946592331, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001240055134985596, + "rewards/margins": 0.40333932638168335, + "rewards/rejected": -0.40346336364746094, + "step": 13009 + }, + { + "epoch": 8.997233748271093, + "grad_norm": 5.614645481109619, + "learning_rate": 5.570923620716152e-06, + "log_odds_chosen": 10.99240493774414, + "log_odds_ratio": -3.525690408423543e-05, + "logits/chosen": -0.5974779725074768, + "logits/rejected": -0.5112378597259521, + "logps/chosen": -0.00010461565398145467, + "logps/rejected": -1.6510086059570312, + "loss": 0.44, + "nll_loss": 0.1099885106086731, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0461564670549706e-05, + "rewards/margins": 0.1650903970003128, + "rewards/rejected": -0.1651008576154709, + "step": 13010 + }, + { + "epoch": 8.99792531120332, + "grad_norm": 3.4298603534698486, + "learning_rate": 5.5670816044260036e-06, + "log_odds_chosen": 10.097305297851562, + "log_odds_ratio": -0.0002910443290602416, + "logits/chosen": -0.7691456079483032, + "logits/rejected": -0.8014342784881592, + "logps/chosen": -0.0001380304165650159, + "logps/rejected": -1.7263977527618408, + "loss": 0.4124, + "nll_loss": 0.10307244956493378, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3803042747895233e-05, + "rewards/margins": 0.17262595891952515, + "rewards/rejected": -0.17263977229595184, + "step": 13011 + }, + { + "epoch": 8.998616874135546, + "grad_norm": 4.5642991065979, + "learning_rate": 5.5632395881358535e-06, + "log_odds_chosen": 11.386275291442871, + "log_odds_ratio": -2.4504495740984567e-05, + "logits/chosen": -0.16985617578029633, + "logits/rejected": -0.2218393087387085, + "logps/chosen": -0.00022014914429746568, + "logps/rejected": -2.3555777072906494, + "loss": 0.4621, + "nll_loss": 0.11551637947559357, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2014914065948687e-05, + "rewards/margins": 0.23553577065467834, + "rewards/rejected": -0.23555776476860046, + "step": 13012 + }, + { + "epoch": 8.999308437067773, + "grad_norm": 3.209639310836792, + "learning_rate": 5.559397571845705e-06, + "log_odds_chosen": 10.485466003417969, + "log_odds_ratio": -7.259511039592326e-05, + "logits/chosen": 0.09163352102041245, + "logits/rejected": 0.022199518978595734, + "logps/chosen": -0.0003781650448217988, + "logps/rejected": -2.1259493827819824, + "loss": 0.3894, + "nll_loss": 0.09734359383583069, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7816502299392596e-05, + "rewards/margins": 0.21255715191364288, + "rewards/rejected": -0.21259495615959167, + "step": 13013 + }, + { + "epoch": 9.0, + "grad_norm": 2.81518292427063, + "learning_rate": 5.555555555555556e-06, + "log_odds_chosen": 12.507894515991211, + "log_odds_ratio": -9.165652045339812e-06, + "logits/chosen": -0.10490019619464874, + "logits/rejected": -0.1608884483575821, + "logps/chosen": -8.094940858427435e-05, + "logps/rejected": -2.832207202911377, + "loss": 0.2795, + "nll_loss": 0.06986867636442184, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.094941222225316e-06, + "rewards/margins": 0.2832126319408417, + "rewards/rejected": -0.28322070837020874, + "step": 13014 + }, + { + "epoch": 9.000691562932227, + "grad_norm": 3.2384731769561768, + "learning_rate": 5.5517135392654066e-06, + "log_odds_chosen": 11.528509140014648, + "log_odds_ratio": -4.0053528209682554e-05, + "logits/chosen": -0.030156686902046204, + "logits/rejected": -0.05053587257862091, + "logps/chosen": -0.00010144417319679633, + "logps/rejected": -2.4233481884002686, + "loss": 0.3581, + "nll_loss": 0.0895245298743248, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0144417501578573e-05, + "rewards/margins": 0.24232468008995056, + "rewards/rejected": -0.24233481287956238, + "step": 13015 + }, + { + "epoch": 9.001383125864454, + "grad_norm": 4.159745693206787, + "learning_rate": 5.547871522975258e-06, + "log_odds_chosen": 11.156106948852539, + "log_odds_ratio": -0.00015267080743797123, + "logits/chosen": -0.6843860149383545, + "logits/rejected": -0.6840651631355286, + "logps/chosen": -9.612218127585948e-05, + "logps/rejected": -2.060917615890503, + "loss": 0.4, + "nll_loss": 0.0999767854809761, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.612218491383828e-06, + "rewards/margins": 0.20608215034008026, + "rewards/rejected": -0.2060917615890503, + "step": 13016 + }, + { + "epoch": 9.00207468879668, + "grad_norm": 3.164273262023926, + "learning_rate": 5.544029506685108e-06, + "log_odds_chosen": 11.69143295288086, + "log_odds_ratio": -3.219091013306752e-05, + "logits/chosen": -0.5757405161857605, + "logits/rejected": -0.7102214097976685, + "logps/chosen": -0.00013833888806402683, + "logps/rejected": -2.4703941345214844, + "loss": 0.3825, + "nll_loss": 0.09561428427696228, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3833889170200564e-05, + "rewards/margins": 0.24702557921409607, + "rewards/rejected": -0.24703940749168396, + "step": 13017 + }, + { + "epoch": 9.002766251728907, + "grad_norm": 1.8051459789276123, + "learning_rate": 5.54018749039496e-06, + "log_odds_chosen": 10.524673461914062, + "log_odds_ratio": -4.670019916375168e-05, + "logits/chosen": -0.27058786153793335, + "logits/rejected": -0.11664856225252151, + "logps/chosen": -0.0010510836727917194, + "logps/rejected": -2.2729997634887695, + "loss": 0.2015, + "nll_loss": 0.05036091059446335, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010510836000321433, + "rewards/margins": 0.2271948754787445, + "rewards/rejected": -0.2272999882698059, + "step": 13018 + }, + { + "epoch": 9.003457814661134, + "grad_norm": 6.395131587982178, + "learning_rate": 5.53634547410481e-06, + "log_odds_chosen": 11.191022872924805, + "log_odds_ratio": -2.956366733997129e-05, + "logits/chosen": -0.30440646409988403, + "logits/rejected": -0.4327055513858795, + "logps/chosen": -0.000211410311749205, + "logps/rejected": -2.4802041053771973, + "loss": 0.5894, + "nll_loss": 0.1473536342382431, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1141029719728976e-05, + "rewards/margins": 0.24799926578998566, + "rewards/rejected": -0.24802041053771973, + "step": 13019 + }, + { + "epoch": 9.004149377593361, + "grad_norm": 5.150073051452637, + "learning_rate": 5.532503457814661e-06, + "log_odds_chosen": 10.99140739440918, + "log_odds_ratio": -3.988608659710735e-05, + "logits/chosen": 0.22210730612277985, + "logits/rejected": 0.13139745593070984, + "logps/chosen": -0.0004938662750646472, + "logps/rejected": -2.522759437561035, + "loss": 0.3374, + "nll_loss": 0.08433938771486282, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.938662823406048e-05, + "rewards/margins": 0.2522265613079071, + "rewards/rejected": -0.2522759437561035, + "step": 13020 + }, + { + "epoch": 9.004840940525588, + "grad_norm": 3.9866042137145996, + "learning_rate": 5.528661441524513e-06, + "log_odds_chosen": 11.550115585327148, + "log_odds_ratio": -6.959481834201142e-05, + "logits/chosen": -0.09290512651205063, + "logits/rejected": -0.020532796159386635, + "logps/chosen": -0.00021088341600261629, + "logps/rejected": -2.310567617416382, + "loss": 0.4046, + "nll_loss": 0.10113376379013062, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1088340872665867e-05, + "rewards/margins": 0.23103567957878113, + "rewards/rejected": -0.23105676472187042, + "step": 13021 + }, + { + "epoch": 9.005532503457815, + "grad_norm": 3.425347089767456, + "learning_rate": 5.5248194252343635e-06, + "log_odds_chosen": 10.409988403320312, + "log_odds_ratio": -4.699845885625109e-05, + "logits/chosen": -0.062008604407310486, + "logits/rejected": -0.146600604057312, + "logps/chosen": -0.0004828722740057856, + "logps/rejected": -2.3168044090270996, + "loss": 0.3594, + "nll_loss": 0.08984710276126862, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.828722740057856e-05, + "rewards/margins": 0.23163215816020966, + "rewards/rejected": -0.23168045282363892, + "step": 13022 + }, + { + "epoch": 9.006224066390041, + "grad_norm": 4.393455505371094, + "learning_rate": 5.520977408944214e-06, + "log_odds_chosen": 10.297155380249023, + "log_odds_ratio": -0.00019715650705620646, + "logits/chosen": -0.277144193649292, + "logits/rejected": -0.34454062581062317, + "logps/chosen": -0.0005988165503367782, + "logps/rejected": -2.442268133163452, + "loss": 0.2592, + "nll_loss": 0.06477570533752441, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.988165139569901e-05, + "rewards/margins": 0.24416694045066833, + "rewards/rejected": -0.24422681331634521, + "step": 13023 + }, + { + "epoch": 9.006915629322268, + "grad_norm": 2.774665594100952, + "learning_rate": 5.517135392654065e-06, + "log_odds_chosen": 11.69587516784668, + "log_odds_ratio": -3.743657725863159e-05, + "logits/chosen": -0.456280916929245, + "logits/rejected": -0.44717827439308167, + "logps/chosen": -0.00011302022903691977, + "logps/rejected": -2.711245059967041, + "loss": 0.2757, + "nll_loss": 0.06891781836748123, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1302023267489858e-05, + "rewards/margins": 0.27111321687698364, + "rewards/rejected": -0.27112454175949097, + "step": 13024 + }, + { + "epoch": 9.007607192254495, + "grad_norm": 3.6081957817077637, + "learning_rate": 5.513293376363917e-06, + "log_odds_chosen": 11.367313385009766, + "log_odds_ratio": -1.4467418623098638e-05, + "logits/chosen": -0.32773351669311523, + "logits/rejected": -0.39627397060394287, + "logps/chosen": -8.029278978938237e-05, + "logps/rejected": -1.8186954259872437, + "loss": 0.276, + "nll_loss": 0.06899484992027283, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.029278433241416e-06, + "rewards/margins": 0.1818615198135376, + "rewards/rejected": -0.18186955153942108, + "step": 13025 + }, + { + "epoch": 9.008298755186722, + "grad_norm": 4.3829755783081055, + "learning_rate": 5.5094513600737665e-06, + "log_odds_chosen": 11.095830917358398, + "log_odds_ratio": -0.00013280285929795355, + "logits/chosen": 0.15786179900169373, + "logits/rejected": 0.09668511897325516, + "logps/chosen": -0.00019103838712908328, + "logps/rejected": -2.1930274963378906, + "loss": 0.5074, + "nll_loss": 0.12683293223381042, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9103837985312566e-05, + "rewards/margins": 0.21928367018699646, + "rewards/rejected": -0.21930275857448578, + "step": 13026 + }, + { + "epoch": 9.008990318118949, + "grad_norm": 4.688060760498047, + "learning_rate": 5.505609343783618e-06, + "log_odds_chosen": 11.868011474609375, + "log_odds_ratio": -1.2133230484323576e-05, + "logits/chosen": 0.03171950578689575, + "logits/rejected": -0.1849055290222168, + "logps/chosen": -6.48779678158462e-05, + "logps/rejected": -2.3685197830200195, + "loss": 0.3904, + "nll_loss": 0.09759525954723358, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.4877967815846205e-06, + "rewards/margins": 0.2368454933166504, + "rewards/rejected": -0.2368519902229309, + "step": 13027 + }, + { + "epoch": 9.009681881051176, + "grad_norm": 2.7641689777374268, + "learning_rate": 5.501767327493469e-06, + "log_odds_chosen": 10.573009490966797, + "log_odds_ratio": -8.692661504028365e-05, + "logits/chosen": -0.66283118724823, + "logits/rejected": -0.6663856506347656, + "logps/chosen": -0.00019317277474328876, + "logps/rejected": -2.2193799018859863, + "loss": 0.2742, + "nll_loss": 0.06855355948209763, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9317278201924637e-05, + "rewards/margins": 0.22191870212554932, + "rewards/rejected": -0.22193799912929535, + "step": 13028 + }, + { + "epoch": 9.010373443983402, + "grad_norm": 2.9909303188323975, + "learning_rate": 5.49792531120332e-06, + "log_odds_chosen": 10.86269760131836, + "log_odds_ratio": -8.339952910318971e-05, + "logits/chosen": -0.3977804183959961, + "logits/rejected": -0.3863662779331207, + "logps/chosen": -0.0002721707278396934, + "logps/rejected": -2.121858596801758, + "loss": 0.3169, + "nll_loss": 0.07920723408460617, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7217072783969343e-05, + "rewards/margins": 0.21215865015983582, + "rewards/rejected": -0.21218587458133698, + "step": 13029 + }, + { + "epoch": 9.01106500691563, + "grad_norm": 4.653593063354492, + "learning_rate": 5.494083294913171e-06, + "log_odds_chosen": 11.301478385925293, + "log_odds_ratio": -0.0001647967437747866, + "logits/chosen": 0.008434537798166275, + "logits/rejected": -0.12119042873382568, + "logps/chosen": -0.00045101603609509766, + "logps/rejected": -2.783571720123291, + "loss": 0.3445, + "nll_loss": 0.08611457049846649, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.510160579229705e-05, + "rewards/margins": 0.278312087059021, + "rewards/rejected": -0.2783571779727936, + "step": 13030 + }, + { + "epoch": 9.011756569847856, + "grad_norm": 3.588960647583008, + "learning_rate": 5.490241278623021e-06, + "log_odds_chosen": 11.11030387878418, + "log_odds_ratio": -9.362775017507374e-05, + "logits/chosen": 0.3267763555049896, + "logits/rejected": 0.33334988355636597, + "logps/chosen": -0.00014919544628355652, + "logps/rejected": -2.088637351989746, + "loss": 0.2984, + "nll_loss": 0.07458983361721039, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4919545719749294e-05, + "rewards/margins": 0.20884880423545837, + "rewards/rejected": -0.2088637351989746, + "step": 13031 + }, + { + "epoch": 9.012448132780083, + "grad_norm": 3.110220432281494, + "learning_rate": 5.486399262332873e-06, + "log_odds_chosen": 11.047928810119629, + "log_odds_ratio": -3.0999708542367443e-05, + "logits/chosen": 0.04424513876438141, + "logits/rejected": 0.02165381610393524, + "logps/chosen": -0.00016195660282392055, + "logps/rejected": -2.224316120147705, + "loss": 0.3611, + "nll_loss": 0.0902659147977829, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6195661373785697e-05, + "rewards/margins": 0.22241541743278503, + "rewards/rejected": -0.22243162989616394, + "step": 13032 + }, + { + "epoch": 9.01313969571231, + "grad_norm": 2.3600432872772217, + "learning_rate": 5.4825572460427234e-06, + "log_odds_chosen": 11.666213989257812, + "log_odds_ratio": -2.090056659653783e-05, + "logits/chosen": -0.1270778626203537, + "logits/rejected": -0.2530510723590851, + "logps/chosen": -0.00011234758858336136, + "logps/rejected": -2.5693368911743164, + "loss": 0.2981, + "nll_loss": 0.074515700340271, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1234758858336136e-05, + "rewards/margins": 0.25692248344421387, + "rewards/rejected": -0.25693371891975403, + "step": 13033 + }, + { + "epoch": 9.013831258644537, + "grad_norm": 3.420464515686035, + "learning_rate": 5.478715229752574e-06, + "log_odds_chosen": 10.560396194458008, + "log_odds_ratio": -5.9101683291373774e-05, + "logits/chosen": -0.43361103534698486, + "logits/rejected": -0.454063355922699, + "logps/chosen": -0.0002974130620714277, + "logps/rejected": -1.8523221015930176, + "loss": 0.356, + "nll_loss": 0.08900587260723114, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.974130620714277e-05, + "rewards/margins": 0.1852024793624878, + "rewards/rejected": -0.1852322220802307, + "step": 13034 + }, + { + "epoch": 9.014522821576763, + "grad_norm": 3.225816488265991, + "learning_rate": 5.474873213462425e-06, + "log_odds_chosen": 11.012438774108887, + "log_odds_ratio": -0.0002742533397395164, + "logits/chosen": -0.07121704518795013, + "logits/rejected": -0.1961957961320877, + "logps/chosen": -0.0002828654833137989, + "logps/rejected": -1.8338875770568848, + "loss": 0.3048, + "nll_loss": 0.0761764869093895, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8286549422773533e-05, + "rewards/margins": 0.18336045742034912, + "rewards/rejected": -0.18338875472545624, + "step": 13035 + }, + { + "epoch": 9.01521438450899, + "grad_norm": 4.2216949462890625, + "learning_rate": 5.4710311971722765e-06, + "log_odds_chosen": 10.79727840423584, + "log_odds_ratio": -8.785985846770927e-05, + "logits/chosen": -0.09870100021362305, + "logits/rejected": -0.03432294726371765, + "logps/chosen": -0.00015546029317192733, + "logps/rejected": -2.134243965148926, + "loss": 0.4346, + "nll_loss": 0.10864897072315216, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5546029317192733e-05, + "rewards/margins": 0.21340885758399963, + "rewards/rejected": -0.21342439949512482, + "step": 13036 + }, + { + "epoch": 9.015905947441217, + "grad_norm": 4.209107398986816, + "learning_rate": 5.467189180882127e-06, + "log_odds_chosen": 11.64953899383545, + "log_odds_ratio": -3.766633017221466e-05, + "logits/chosen": 0.06325505673885345, + "logits/rejected": -0.04017649218440056, + "logps/chosen": -0.0003921979514416307, + "logps/rejected": -3.406096935272217, + "loss": 0.4285, + "nll_loss": 0.10713205486536026, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.921979805454612e-05, + "rewards/margins": 0.34057047963142395, + "rewards/rejected": -0.34060966968536377, + "step": 13037 + }, + { + "epoch": 9.016597510373444, + "grad_norm": 3.5881240367889404, + "learning_rate": 5.463347164591978e-06, + "log_odds_chosen": 10.89795970916748, + "log_odds_ratio": -9.123482595896348e-05, + "logits/chosen": 0.0009332895278930664, + "logits/rejected": 0.009780220687389374, + "logps/chosen": -0.0001413720747223124, + "logps/rejected": -1.919877290725708, + "loss": 0.3057, + "nll_loss": 0.07642700523138046, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4137207472231239e-05, + "rewards/margins": 0.19197361171245575, + "rewards/rejected": -0.19198772311210632, + "step": 13038 + }, + { + "epoch": 9.01728907330567, + "grad_norm": 3.7098147869110107, + "learning_rate": 5.45950514830183e-06, + "log_odds_chosen": 11.856633186340332, + "log_odds_ratio": -0.000278906780295074, + "logits/chosen": 0.05790846049785614, + "logits/rejected": -0.055482715368270874, + "logps/chosen": -0.00029186869505792856, + "logps/rejected": -3.1621522903442383, + "loss": 0.5305, + "nll_loss": 0.13260848820209503, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.918687096098438e-05, + "rewards/margins": 0.3161860406398773, + "rewards/rejected": -0.3162152171134949, + "step": 13039 + }, + { + "epoch": 9.017980636237898, + "grad_norm": 4.042640209197998, + "learning_rate": 5.4556631320116795e-06, + "log_odds_chosen": 10.286334991455078, + "log_odds_ratio": -0.00013100498472340405, + "logits/chosen": 0.03478764742612839, + "logits/rejected": 0.015952982008457184, + "logps/chosen": -0.0009522599866613746, + "logps/rejected": -2.5293731689453125, + "loss": 0.5226, + "nll_loss": 0.13063320517539978, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.522600885247812e-05, + "rewards/margins": 0.25284209847450256, + "rewards/rejected": -0.25293731689453125, + "step": 13040 + }, + { + "epoch": 9.018672199170124, + "grad_norm": 2.555697202682495, + "learning_rate": 5.451821115721531e-06, + "log_odds_chosen": 10.420969009399414, + "log_odds_ratio": -0.00016010666149668396, + "logits/chosen": -0.09627027064561844, + "logits/rejected": 0.1425366997718811, + "logps/chosen": -0.00023016006161924452, + "logps/rejected": -1.4496179819107056, + "loss": 0.3197, + "nll_loss": 0.07989896088838577, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3016007617115974e-05, + "rewards/margins": 0.14493878185749054, + "rewards/rejected": -0.14496180415153503, + "step": 13041 + }, + { + "epoch": 9.019363762102351, + "grad_norm": 3.8921778202056885, + "learning_rate": 5.447979099431382e-06, + "log_odds_chosen": 11.134313583374023, + "log_odds_ratio": -0.0004263078444637358, + "logits/chosen": -0.24421606957912445, + "logits/rejected": -0.28580600023269653, + "logps/chosen": -0.000531858648173511, + "logps/rejected": -1.8364176750183105, + "loss": 0.3845, + "nll_loss": 0.09608376026153564, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.318586408975534e-05, + "rewards/margins": 0.18358857929706573, + "rewards/rejected": -0.18364176154136658, + "step": 13042 + }, + { + "epoch": 9.020055325034578, + "grad_norm": 3.8458752632141113, + "learning_rate": 5.444137083141233e-06, + "log_odds_chosen": 10.768726348876953, + "log_odds_ratio": -0.00013082656369078904, + "logits/chosen": -0.16428546607494354, + "logits/rejected": -0.25296562910079956, + "logps/chosen": -0.0003512292169034481, + "logps/rejected": -2.459240674972534, + "loss": 0.4308, + "nll_loss": 0.10768191516399384, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.512292096274905e-05, + "rewards/margins": 0.24588894844055176, + "rewards/rejected": -0.24592408537864685, + "step": 13043 + }, + { + "epoch": 9.020746887966805, + "grad_norm": 3.0180575847625732, + "learning_rate": 5.440295066851084e-06, + "log_odds_chosen": 9.483352661132812, + "log_odds_ratio": -0.0007811450632289052, + "logits/chosen": 0.11073106527328491, + "logits/rejected": 0.10512968897819519, + "logps/chosen": -0.0007253906223922968, + "logps/rejected": -1.383448839187622, + "loss": 0.2585, + "nll_loss": 0.06454954296350479, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.253907097037882e-05, + "rewards/margins": 0.13827234506607056, + "rewards/rejected": -0.1383448839187622, + "step": 13044 + }, + { + "epoch": 9.021438450899032, + "grad_norm": 3.089290142059326, + "learning_rate": 5.436453050560935e-06, + "log_odds_chosen": 11.264272689819336, + "log_odds_ratio": -3.018009738298133e-05, + "logits/chosen": -0.6477086544036865, + "logits/rejected": -0.7271855473518372, + "logps/chosen": -0.0001471658470109105, + "logps/rejected": -2.383556365966797, + "loss": 0.3152, + "nll_loss": 0.07880601286888123, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4716585610585753e-05, + "rewards/margins": 0.23834092915058136, + "rewards/rejected": -0.2383556365966797, + "step": 13045 + }, + { + "epoch": 9.022130013831259, + "grad_norm": 3.0523979663848877, + "learning_rate": 5.432611034270786e-06, + "log_odds_chosen": 10.005661010742188, + "log_odds_ratio": -8.830070873955265e-05, + "logits/chosen": -0.1184106096625328, + "logits/rejected": -0.22679013013839722, + "logps/chosen": -0.00018152232223656029, + "logps/rejected": -1.5227150917053223, + "loss": 0.2984, + "nll_loss": 0.07458432018756866, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8152231859858148e-05, + "rewards/margins": 0.15225335955619812, + "rewards/rejected": -0.15227152407169342, + "step": 13046 + }, + { + "epoch": 9.022821576763485, + "grad_norm": 3.5876870155334473, + "learning_rate": 5.4287690179806365e-06, + "log_odds_chosen": 11.028236389160156, + "log_odds_ratio": -4.377803634270094e-05, + "logits/chosen": -0.12041618674993515, + "logits/rejected": -0.06218274310231209, + "logps/chosen": -0.0003131921112071723, + "logps/rejected": -1.9908089637756348, + "loss": 0.329, + "nll_loss": 0.0822470411658287, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.131921403110027e-05, + "rewards/margins": 0.19904956221580505, + "rewards/rejected": -0.19908089935779572, + "step": 13047 + }, + { + "epoch": 9.023513139695712, + "grad_norm": 3.6984646320343018, + "learning_rate": 5.424927001690487e-06, + "log_odds_chosen": 11.58115005493164, + "log_odds_ratio": -2.4297107302118093e-05, + "logits/chosen": -0.13553261756896973, + "logits/rejected": -0.31182727217674255, + "logps/chosen": -0.00016632108599878848, + "logps/rejected": -2.4116837978363037, + "loss": 0.429, + "nll_loss": 0.10723809152841568, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.663210969127249e-05, + "rewards/margins": 0.24115175008773804, + "rewards/rejected": -0.24116836488246918, + "step": 13048 + }, + { + "epoch": 9.024204702627939, + "grad_norm": 3.875378370285034, + "learning_rate": 5.421084985400338e-06, + "log_odds_chosen": 12.607780456542969, + "log_odds_ratio": -6.794036835344741e-06, + "logits/chosen": -0.46647220849990845, + "logits/rejected": -0.51015305519104, + "logps/chosen": -5.174192483536899e-05, + "logps/rejected": -2.7071480751037598, + "loss": 0.3713, + "nll_loss": 0.09283040463924408, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1741926654358394e-06, + "rewards/margins": 0.2707096338272095, + "rewards/rejected": -0.27071481943130493, + "step": 13049 + }, + { + "epoch": 9.024896265560166, + "grad_norm": 3.5087029933929443, + "learning_rate": 5.4172429691101896e-06, + "log_odds_chosen": 9.820684432983398, + "log_odds_ratio": -0.0006471116794273257, + "logits/chosen": -0.47990888357162476, + "logits/rejected": -0.5877619981765747, + "logps/chosen": -0.00029866749537177384, + "logps/rejected": -1.5530221462249756, + "loss": 0.406, + "nll_loss": 0.10142374038696289, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9866749173379503e-05, + "rewards/margins": 0.15527234971523285, + "rewards/rejected": -0.1553022265434265, + "step": 13050 + }, + { + "epoch": 9.025587828492393, + "grad_norm": 2.8552744388580322, + "learning_rate": 5.41340095282004e-06, + "log_odds_chosen": 11.862985610961914, + "log_odds_ratio": -1.448189141228795e-05, + "logits/chosen": -0.4123150110244751, + "logits/rejected": -0.469348281621933, + "logps/chosen": -0.00014005962293595076, + "logps/rejected": -2.778240919113159, + "loss": 0.4193, + "nll_loss": 0.10481908917427063, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4005961020302493e-05, + "rewards/margins": 0.27781009674072266, + "rewards/rejected": -0.2778240740299225, + "step": 13051 + }, + { + "epoch": 9.02627939142462, + "grad_norm": 3.7326252460479736, + "learning_rate": 5.409558936529891e-06, + "log_odds_chosen": 11.56201457977295, + "log_odds_ratio": -5.6688295444473624e-05, + "logits/chosen": -0.7447749376296997, + "logits/rejected": -0.6999694108963013, + "logps/chosen": -0.0016155238263309002, + "logps/rejected": -2.88490629196167, + "loss": 0.4791, + "nll_loss": 0.1197793036699295, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001615523942746222, + "rewards/margins": 0.2883290648460388, + "rewards/rejected": -0.2884906232357025, + "step": 13052 + }, + { + "epoch": 9.026970954356846, + "grad_norm": 2.2243173122406006, + "learning_rate": 5.405716920239743e-06, + "log_odds_chosen": 11.60845947265625, + "log_odds_ratio": -3.414329330553301e-05, + "logits/chosen": -0.517898440361023, + "logits/rejected": -0.5478456616401672, + "logps/chosen": -0.0010220286203548312, + "logps/rejected": -2.5593619346618652, + "loss": 0.3012, + "nll_loss": 0.07529132813215256, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010220285912510008, + "rewards/margins": 0.2558339834213257, + "rewards/rejected": -0.2559362053871155, + "step": 13053 + }, + { + "epoch": 9.027662517289073, + "grad_norm": 2.877593755722046, + "learning_rate": 5.4018749039495926e-06, + "log_odds_chosen": 10.34636402130127, + "log_odds_ratio": -0.00028239202219992876, + "logits/chosen": -0.515480101108551, + "logits/rejected": -0.5283478498458862, + "logps/chosen": -0.00042652367847040296, + "logps/rejected": -1.8192906379699707, + "loss": 0.2366, + "nll_loss": 0.05912820249795914, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.265236930223182e-05, + "rewards/margins": 0.18188641965389252, + "rewards/rejected": -0.18192905187606812, + "step": 13054 + }, + { + "epoch": 9.0283540802213, + "grad_norm": 3.929666519165039, + "learning_rate": 5.398032887659444e-06, + "log_odds_chosen": 11.522621154785156, + "log_odds_ratio": -2.0546456653391942e-05, + "logits/chosen": -0.435081422328949, + "logits/rejected": -0.45330917835235596, + "logps/chosen": -0.00013197583029977977, + "logps/rejected": -2.1118600368499756, + "loss": 0.5275, + "nll_loss": 0.13188423216342926, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3197583029977977e-05, + "rewards/margins": 0.21117281913757324, + "rewards/rejected": -0.211186021566391, + "step": 13055 + }, + { + "epoch": 9.029045643153527, + "grad_norm": 2.5256550312042236, + "learning_rate": 5.394190871369295e-06, + "log_odds_chosen": 11.52824592590332, + "log_odds_ratio": -3.128191747236997e-05, + "logits/chosen": -0.18849243223667145, + "logits/rejected": -0.24364817142486572, + "logps/chosen": -0.00010533664317335933, + "logps/rejected": -2.4289488792419434, + "loss": 0.2592, + "nll_loss": 0.06480279564857483, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0533663953538053e-05, + "rewards/margins": 0.24288436770439148, + "rewards/rejected": -0.24289490282535553, + "step": 13056 + }, + { + "epoch": 9.029737206085754, + "grad_norm": 4.501963138580322, + "learning_rate": 5.390348855079146e-06, + "log_odds_chosen": 12.487890243530273, + "log_odds_ratio": -1.3090188076603226e-05, + "logits/chosen": -0.2066900134086609, + "logits/rejected": -0.2767907679080963, + "logps/chosen": -0.00013198704982642084, + "logps/rejected": -3.3000569343566895, + "loss": 0.4018, + "nll_loss": 0.10044016689062119, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3198705346439965e-05, + "rewards/margins": 0.32999250292778015, + "rewards/rejected": -0.3300057053565979, + "step": 13057 + }, + { + "epoch": 9.03042876901798, + "grad_norm": 3.2746903896331787, + "learning_rate": 5.386506838788996e-06, + "log_odds_chosen": 11.492822647094727, + "log_odds_ratio": -7.269456546055153e-05, + "logits/chosen": -0.29028403759002686, + "logits/rejected": -0.36464041471481323, + "logps/chosen": -6.488826329587027e-05, + "logps/rejected": -2.028392791748047, + "loss": 0.4216, + "nll_loss": 0.10539723932743073, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.488827239081729e-06, + "rewards/margins": 0.20283278822898865, + "rewards/rejected": -0.20283928513526917, + "step": 13058 + }, + { + "epoch": 9.031120331950207, + "grad_norm": 3.6708459854125977, + "learning_rate": 5.382664822498848e-06, + "log_odds_chosen": 10.951431274414062, + "log_odds_ratio": -0.00013642846897710115, + "logits/chosen": -0.4781211018562317, + "logits/rejected": -0.5032169818878174, + "logps/chosen": -0.000443106924649328, + "logps/rejected": -2.230484962463379, + "loss": 0.455, + "nll_loss": 0.11373548209667206, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4310694647720084e-05, + "rewards/margins": 0.22300422191619873, + "rewards/rejected": -0.22304850816726685, + "step": 13059 + }, + { + "epoch": 9.031811894882434, + "grad_norm": 3.364274024963379, + "learning_rate": 5.378822806208699e-06, + "log_odds_chosen": 9.455132484436035, + "log_odds_ratio": -0.0005840727826580405, + "logits/chosen": -0.2446010559797287, + "logits/rejected": -0.2981387674808502, + "logps/chosen": -0.0006788469036109746, + "logps/rejected": -1.457331895828247, + "loss": 0.2991, + "nll_loss": 0.07472015917301178, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.788469181628898e-05, + "rewards/margins": 0.14566530287265778, + "rewards/rejected": -0.14573319256305695, + "step": 13060 + }, + { + "epoch": 9.032503457814661, + "grad_norm": 4.478184223175049, + "learning_rate": 5.3749807899185495e-06, + "log_odds_chosen": 11.661033630371094, + "log_odds_ratio": -0.0003269641601946205, + "logits/chosen": -0.2188836634159088, + "logits/rejected": -0.2645830512046814, + "logps/chosen": -0.00020673639664892107, + "logps/rejected": -2.8120455741882324, + "loss": 0.5693, + "nll_loss": 0.1423034816980362, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.067364039248787e-05, + "rewards/margins": 0.2811838984489441, + "rewards/rejected": -0.28120458126068115, + "step": 13061 + }, + { + "epoch": 9.033195020746888, + "grad_norm": 3.9616754055023193, + "learning_rate": 5.371138773628401e-06, + "log_odds_chosen": 11.736493110656738, + "log_odds_ratio": -2.007854709518142e-05, + "logits/chosen": -0.30342715978622437, + "logits/rejected": -0.29140985012054443, + "logps/chosen": -0.0002348808920942247, + "logps/rejected": -2.836967945098877, + "loss": 0.427, + "nll_loss": 0.10674826800823212, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.348808993701823e-05, + "rewards/margins": 0.2836732864379883, + "rewards/rejected": -0.2836967706680298, + "step": 13062 + }, + { + "epoch": 9.033886583679115, + "grad_norm": 4.665407657623291, + "learning_rate": 5.367296757338251e-06, + "log_odds_chosen": 10.088314056396484, + "log_odds_ratio": -0.00013019969628658146, + "logits/chosen": -0.29994332790374756, + "logits/rejected": -0.46783965826034546, + "logps/chosen": -0.000256081490078941, + "logps/rejected": -1.404195785522461, + "loss": 0.3107, + "nll_loss": 0.0776708796620369, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.560814937169198e-05, + "rewards/margins": 0.14039397239685059, + "rewards/rejected": -0.1404195874929428, + "step": 13063 + }, + { + "epoch": 9.034578146611342, + "grad_norm": 2.985196113586426, + "learning_rate": 5.363454741048103e-06, + "log_odds_chosen": 11.143171310424805, + "log_odds_ratio": -2.7591235266299918e-05, + "logits/chosen": -0.31959080696105957, + "logits/rejected": -0.38788843154907227, + "logps/chosen": -0.00014333522995002568, + "logps/rejected": -2.1929399967193604, + "loss": 0.2743, + "nll_loss": 0.06858177483081818, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4333522813103627e-05, + "rewards/margins": 0.2192796766757965, + "rewards/rejected": -0.21929402649402618, + "step": 13064 + }, + { + "epoch": 9.035269709543568, + "grad_norm": 2.820173740386963, + "learning_rate": 5.359612724757953e-06, + "log_odds_chosen": 10.983320236206055, + "log_odds_ratio": -5.282166239339858e-05, + "logits/chosen": -0.05022948235273361, + "logits/rejected": -0.16276401281356812, + "logps/chosen": -0.00013071924331597984, + "logps/rejected": -1.7682867050170898, + "loss": 0.3098, + "nll_loss": 0.07744050770998001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3071924513496924e-05, + "rewards/margins": 0.17681559920310974, + "rewards/rejected": -0.17682868242263794, + "step": 13065 + }, + { + "epoch": 9.035961272475795, + "grad_norm": 3.91086483001709, + "learning_rate": 5.355770708467804e-06, + "log_odds_chosen": 11.58726692199707, + "log_odds_ratio": -2.9941369575681165e-05, + "logits/chosen": -0.25834226608276367, + "logits/rejected": -0.2564094662666321, + "logps/chosen": -0.00023409070854540914, + "logps/rejected": -2.926124334335327, + "loss": 0.6506, + "nll_loss": 0.16264963150024414, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.34090730373282e-05, + "rewards/margins": 0.2925890386104584, + "rewards/rejected": -0.2926124632358551, + "step": 13066 + }, + { + "epoch": 9.036652835408022, + "grad_norm": 4.259881973266602, + "learning_rate": 5.351928692177656e-06, + "log_odds_chosen": 11.248711585998535, + "log_odds_ratio": -2.3729864551569335e-05, + "logits/chosen": -0.3928593099117279, + "logits/rejected": -0.42217206954956055, + "logps/chosen": -0.0002243789640488103, + "logps/rejected": -2.3699398040771484, + "loss": 0.5108, + "nll_loss": 0.1276947259902954, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.243789640488103e-05, + "rewards/margins": 0.23697157204151154, + "rewards/rejected": -0.23699399828910828, + "step": 13067 + }, + { + "epoch": 9.037344398340249, + "grad_norm": 2.519014835357666, + "learning_rate": 5.348086675887506e-06, + "log_odds_chosen": 10.962300300598145, + "log_odds_ratio": -4.053633165312931e-05, + "logits/chosen": -0.6078428030014038, + "logits/rejected": -0.5253631472587585, + "logps/chosen": -0.00018711041775532067, + "logps/rejected": -1.7675940990447998, + "loss": 0.3109, + "nll_loss": 0.07772395014762878, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8711041775532067e-05, + "rewards/margins": 0.17674070596694946, + "rewards/rejected": -0.17675942182540894, + "step": 13068 + }, + { + "epoch": 9.038035961272476, + "grad_norm": 3.7536327838897705, + "learning_rate": 5.344244659597357e-06, + "log_odds_chosen": 11.013205528259277, + "log_odds_ratio": -3.441493754507974e-05, + "logits/chosen": 0.10737214982509613, + "logits/rejected": 0.1350289285182953, + "logps/chosen": -9.220686479238793e-05, + "logps/rejected": -1.5986688137054443, + "loss": 0.3746, + "nll_loss": 0.09365054965019226, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.220686479238793e-06, + "rewards/margins": 0.15985766053199768, + "rewards/rejected": -0.15986689925193787, + "step": 13069 + }, + { + "epoch": 9.038727524204702, + "grad_norm": 2.6560962200164795, + "learning_rate": 5.340402643307208e-06, + "log_odds_chosen": 11.294528007507324, + "log_odds_ratio": -2.282520836160984e-05, + "logits/chosen": -0.31131410598754883, + "logits/rejected": -0.3638339340686798, + "logps/chosen": -0.00020951607439201325, + "logps/rejected": -2.445250988006592, + "loss": 0.2748, + "nll_loss": 0.06870685517787933, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0951607439201325e-05, + "rewards/margins": 0.24450412392616272, + "rewards/rejected": -0.24452508985996246, + "step": 13070 + }, + { + "epoch": 9.03941908713693, + "grad_norm": 3.578831195831299, + "learning_rate": 5.336560627017059e-06, + "log_odds_chosen": 10.841608047485352, + "log_odds_ratio": -4.9118079914478585e-05, + "logits/chosen": 0.0027485936880111694, + "logits/rejected": 0.10918466746807098, + "logps/chosen": -0.0004803585179615766, + "logps/rejected": -2.3801283836364746, + "loss": 0.5241, + "nll_loss": 0.1310272514820099, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8035850340966135e-05, + "rewards/margins": 0.23796480894088745, + "rewards/rejected": -0.23801285028457642, + "step": 13071 + }, + { + "epoch": 9.040110650069156, + "grad_norm": 3.7686567306518555, + "learning_rate": 5.3327186107269094e-06, + "log_odds_chosen": 12.269804954528809, + "log_odds_ratio": -3.954406929551624e-05, + "logits/chosen": -0.22742539644241333, + "logits/rejected": -0.4627268314361572, + "logps/chosen": -0.0001714062091195956, + "logps/rejected": -3.400301218032837, + "loss": 0.3694, + "nll_loss": 0.09234748780727386, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.714062091195956e-05, + "rewards/margins": 0.34001296758651733, + "rewards/rejected": -0.34003013372421265, + "step": 13072 + }, + { + "epoch": 9.040802213001383, + "grad_norm": 6.61577844619751, + "learning_rate": 5.328876594436761e-06, + "log_odds_chosen": 10.836341857910156, + "log_odds_ratio": -0.0006617381004616618, + "logits/chosen": -0.4405108392238617, + "logits/rejected": -0.4111618995666504, + "logps/chosen": -0.000346881482983008, + "logps/rejected": -2.331427574157715, + "loss": 0.4129, + "nll_loss": 0.10316643863916397, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4688149753492326e-05, + "rewards/margins": 0.23310808837413788, + "rewards/rejected": -0.23314279317855835, + "step": 13073 + }, + { + "epoch": 9.04149377593361, + "grad_norm": 3.9309210777282715, + "learning_rate": 5.325034578146612e-06, + "log_odds_chosen": 11.050765991210938, + "log_odds_ratio": -3.2668547646608204e-05, + "logits/chosen": -0.4415837228298187, + "logits/rejected": -0.43602824211120605, + "logps/chosen": -0.00020155491074547172, + "logps/rejected": -2.247512102127075, + "loss": 0.5909, + "nll_loss": 0.14771312475204468, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.015549034695141e-05, + "rewards/margins": 0.22473105788230896, + "rewards/rejected": -0.22475121915340424, + "step": 13074 + }, + { + "epoch": 9.042185338865837, + "grad_norm": 6.616016864776611, + "learning_rate": 5.3211925618564625e-06, + "log_odds_chosen": 10.741582870483398, + "log_odds_ratio": -0.00016240161494351923, + "logits/chosen": 0.0491463765501976, + "logits/rejected": -0.027135292068123817, + "logps/chosen": -0.0006349672912620008, + "logps/rejected": -2.457874059677124, + "loss": 0.5645, + "nll_loss": 0.14110106229782104, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.349672912620008e-05, + "rewards/margins": 0.2457239180803299, + "rewards/rejected": -0.24578741192817688, + "step": 13075 + }, + { + "epoch": 9.042876901798063, + "grad_norm": 3.3583385944366455, + "learning_rate": 5.317350545566314e-06, + "log_odds_chosen": 11.328731536865234, + "log_odds_ratio": -3.160857886541635e-05, + "logits/chosen": -0.21437186002731323, + "logits/rejected": -0.386541485786438, + "logps/chosen": -0.00045217963634058833, + "logps/rejected": -2.5081796646118164, + "loss": 0.2882, + "nll_loss": 0.07204939424991608, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.521796290646307e-05, + "rewards/margins": 0.25077277421951294, + "rewards/rejected": -0.2508179843425751, + "step": 13076 + }, + { + "epoch": 9.04356846473029, + "grad_norm": 4.046693801879883, + "learning_rate": 5.313508529276164e-06, + "log_odds_chosen": 10.828814506530762, + "log_odds_ratio": -0.00013855035649612546, + "logits/chosen": -0.19150567054748535, + "logits/rejected": -0.30324339866638184, + "logps/chosen": -0.00014190759975463152, + "logps/rejected": -1.6376926898956299, + "loss": 0.5934, + "nll_loss": 0.14834515750408173, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4190760339261033e-05, + "rewards/margins": 0.16375507414340973, + "rewards/rejected": -0.16376927495002747, + "step": 13077 + }, + { + "epoch": 9.044260027662517, + "grad_norm": 2.2931673526763916, + "learning_rate": 5.309666512986016e-06, + "log_odds_chosen": 10.644186019897461, + "log_odds_ratio": -0.00012147890083724633, + "logits/chosen": 0.014885544776916504, + "logits/rejected": 0.026649564504623413, + "logps/chosen": -0.00023539473477285355, + "logps/rejected": -1.762995719909668, + "loss": 0.2362, + "nll_loss": 0.05904865637421608, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3539472749689594e-05, + "rewards/margins": 0.1762760579586029, + "rewards/rejected": -0.176299586892128, + "step": 13078 + }, + { + "epoch": 9.044951590594744, + "grad_norm": 4.323233127593994, + "learning_rate": 5.3058244966958655e-06, + "log_odds_chosen": 11.343955993652344, + "log_odds_ratio": -8.940276165958494e-05, + "logits/chosen": -0.11328444629907608, + "logits/rejected": -0.14247748255729675, + "logps/chosen": -0.00016477785538882017, + "logps/rejected": -2.03590726852417, + "loss": 0.4436, + "nll_loss": 0.11090175062417984, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6477784811286256e-05, + "rewards/margins": 0.20357424020767212, + "rewards/rejected": -0.20359072089195251, + "step": 13079 + }, + { + "epoch": 9.04564315352697, + "grad_norm": 2.719088077545166, + "learning_rate": 5.301982480405717e-06, + "log_odds_chosen": 12.002786636352539, + "log_odds_ratio": -1.0852253581106197e-05, + "logits/chosen": -0.6455560326576233, + "logits/rejected": -0.6803206205368042, + "logps/chosen": -0.00010349987132940441, + "logps/rejected": -2.305574417114258, + "loss": 0.3786, + "nll_loss": 0.09465332329273224, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0349987860536203e-05, + "rewards/margins": 0.23054710030555725, + "rewards/rejected": -0.23055744171142578, + "step": 13080 + }, + { + "epoch": 9.046334716459198, + "grad_norm": 2.3111789226531982, + "learning_rate": 5.298140464115568e-06, + "log_odds_chosen": 10.059361457824707, + "log_odds_ratio": -0.00012542004697024822, + "logits/chosen": -0.4591970145702362, + "logits/rejected": -0.5288703441619873, + "logps/chosen": -0.0004874311271123588, + "logps/rejected": -1.520819067955017, + "loss": 0.2362, + "nll_loss": 0.059025026857852936, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8743109800852835e-05, + "rewards/margins": 0.15203317999839783, + "rewards/rejected": -0.1520819216966629, + "step": 13081 + }, + { + "epoch": 9.047026279391424, + "grad_norm": 2.565124750137329, + "learning_rate": 5.294298447825419e-06, + "log_odds_chosen": 11.172361373901367, + "log_odds_ratio": -0.00011450420424807817, + "logits/chosen": 0.21182212233543396, + "logits/rejected": 0.255759596824646, + "logps/chosen": -0.00019564552349038422, + "logps/rejected": -2.238527297973633, + "loss": 0.3252, + "nll_loss": 0.08128926157951355, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.956455162144266e-05, + "rewards/margins": 0.22383317351341248, + "rewards/rejected": -0.2238527238368988, + "step": 13082 + }, + { + "epoch": 9.047717842323651, + "grad_norm": 3.543975353240967, + "learning_rate": 5.29045643153527e-06, + "log_odds_chosen": 11.253551483154297, + "log_odds_ratio": -6.646641122642905e-05, + "logits/chosen": -0.13842296600341797, + "logits/rejected": -0.11984425783157349, + "logps/chosen": -0.00011901521065738052, + "logps/rejected": -2.047550678253174, + "loss": 0.4295, + "nll_loss": 0.10736589878797531, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1901522157131694e-05, + "rewards/margins": 0.2047431468963623, + "rewards/rejected": -0.2047550529241562, + "step": 13083 + }, + { + "epoch": 9.048409405255878, + "grad_norm": 2.7371878623962402, + "learning_rate": 5.286614415245121e-06, + "log_odds_chosen": 11.49665641784668, + "log_odds_ratio": -8.669508679304272e-05, + "logits/chosen": -0.06901225447654724, + "logits/rejected": -0.1418294459581375, + "logps/chosen": -0.0002866098075173795, + "logps/rejected": -2.9433188438415527, + "loss": 0.4201, + "nll_loss": 0.10502439737319946, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.866097929654643e-05, + "rewards/margins": 0.2943032383918762, + "rewards/rejected": -0.2943318784236908, + "step": 13084 + }, + { + "epoch": 9.049100968188105, + "grad_norm": 2.7726447582244873, + "learning_rate": 5.282772398954972e-06, + "log_odds_chosen": 9.871931076049805, + "log_odds_ratio": -0.00035949063021689653, + "logits/chosen": -0.4832724332809448, + "logits/rejected": -0.5116129517555237, + "logps/chosen": -0.0005433057667687535, + "logps/rejected": -2.2153072357177734, + "loss": 0.2505, + "nll_loss": 0.06258294731378555, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.43305795872584e-05, + "rewards/margins": 0.22147642076015472, + "rewards/rejected": -0.2215307503938675, + "step": 13085 + }, + { + "epoch": 9.049792531120332, + "grad_norm": 2.650989294052124, + "learning_rate": 5.2789303826648225e-06, + "log_odds_chosen": 10.100696563720703, + "log_odds_ratio": -0.0004444028891157359, + "logits/chosen": -0.4472510814666748, + "logits/rejected": -0.5792377591133118, + "logps/chosen": -0.0005354660097509623, + "logps/rejected": -1.691986322402954, + "loss": 0.2374, + "nll_loss": 0.059308335185050964, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.354660243028775e-05, + "rewards/margins": 0.1691451072692871, + "rewards/rejected": -0.1691986471414566, + "step": 13086 + }, + { + "epoch": 9.050484094052559, + "grad_norm": 4.133640289306641, + "learning_rate": 5.275088366374674e-06, + "log_odds_chosen": 12.123001098632812, + "log_odds_ratio": -1.3905494597565848e-05, + "logits/chosen": -0.06451994925737381, + "logits/rejected": -0.2188749611377716, + "logps/chosen": -0.00024088873760774732, + "logps/rejected": -2.9247636795043945, + "loss": 0.3953, + "nll_loss": 0.09881335496902466, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.408887303317897e-05, + "rewards/margins": 0.29245227575302124, + "rewards/rejected": -0.2924763560295105, + "step": 13087 + }, + { + "epoch": 9.051175656984785, + "grad_norm": 4.221945285797119, + "learning_rate": 5.271246350084524e-06, + "log_odds_chosen": 11.77324390411377, + "log_odds_ratio": -1.764004264259711e-05, + "logits/chosen": -0.13490182161331177, + "logits/rejected": -0.20364238321781158, + "logps/chosen": -0.00011886367428814992, + "logps/rejected": -2.5822014808654785, + "loss": 0.4196, + "nll_loss": 0.10489566624164581, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1886368156410754e-05, + "rewards/margins": 0.2582082748413086, + "rewards/rejected": -0.2582201659679413, + "step": 13088 + }, + { + "epoch": 9.051867219917012, + "grad_norm": 2.730008840560913, + "learning_rate": 5.2674043337943756e-06, + "log_odds_chosen": 10.970198631286621, + "log_odds_ratio": -7.255510718096048e-05, + "logits/chosen": -0.4563295841217041, + "logits/rejected": -0.6076275110244751, + "logps/chosen": -0.0002196189743699506, + "logps/rejected": -2.3138020038604736, + "loss": 0.3483, + "nll_loss": 0.08706316351890564, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.196189780079294e-05, + "rewards/margins": 0.23135823011398315, + "rewards/rejected": -0.23138019442558289, + "step": 13089 + }, + { + "epoch": 9.052558782849239, + "grad_norm": 3.694523334503174, + "learning_rate": 5.263562317504227e-06, + "log_odds_chosen": 11.171448707580566, + "log_odds_ratio": -8.31487777759321e-05, + "logits/chosen": -0.5866619944572449, + "logits/rejected": -0.6402585506439209, + "logps/chosen": -0.0007456215098500252, + "logps/rejected": -2.4630119800567627, + "loss": 0.5786, + "nll_loss": 0.14464695751667023, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.456215826096013e-05, + "rewards/margins": 0.24622663855552673, + "rewards/rejected": -0.24630121886730194, + "step": 13090 + }, + { + "epoch": 9.053250345781466, + "grad_norm": 3.026980400085449, + "learning_rate": 5.259720301214077e-06, + "log_odds_chosen": 11.604578018188477, + "log_odds_ratio": -0.00010914913582382724, + "logits/chosen": -0.5673521757125854, + "logits/rejected": -0.5239881277084351, + "logps/chosen": -0.0001174298522528261, + "logps/rejected": -2.4955615997314453, + "loss": 0.2728, + "nll_loss": 0.06819754093885422, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1742986316676252e-05, + "rewards/margins": 0.24954447150230408, + "rewards/rejected": -0.24955618381500244, + "step": 13091 + }, + { + "epoch": 9.053941908713693, + "grad_norm": 3.0807971954345703, + "learning_rate": 5.255878284923929e-06, + "log_odds_chosen": 10.801340103149414, + "log_odds_ratio": -8.183487079804763e-05, + "logits/chosen": -0.5854331254959106, + "logits/rejected": -0.6655092239379883, + "logps/chosen": -0.0002837886568158865, + "logps/rejected": -1.8841159343719482, + "loss": 0.365, + "nll_loss": 0.09124018251895905, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.837886495399289e-05, + "rewards/margins": 0.18838322162628174, + "rewards/rejected": -0.18841159343719482, + "step": 13092 + }, + { + "epoch": 9.05463347164592, + "grad_norm": 3.904637098312378, + "learning_rate": 5.252036268633779e-06, + "log_odds_chosen": 10.611612319946289, + "log_odds_ratio": -9.759830572875217e-05, + "logits/chosen": -0.5094362497329712, + "logits/rejected": -0.4815782904624939, + "logps/chosen": -0.00036221236223354936, + "logps/rejected": -2.191467046737671, + "loss": 0.306, + "nll_loss": 0.0764811560511589, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.622123767854646e-05, + "rewards/margins": 0.21911050379276276, + "rewards/rejected": -0.2191467136144638, + "step": 13093 + }, + { + "epoch": 9.055325034578146, + "grad_norm": 3.0631561279296875, + "learning_rate": 5.24819425234363e-06, + "log_odds_chosen": 11.680587768554688, + "log_odds_ratio": -3.716135324793868e-05, + "logits/chosen": 0.16301950812339783, + "logits/rejected": 0.12410911917686462, + "logps/chosen": -0.00011040831304853782, + "logps/rejected": -2.468747615814209, + "loss": 0.2997, + "nll_loss": 0.07492666691541672, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1040832760045305e-05, + "rewards/margins": 0.2468637228012085, + "rewards/rejected": -0.24687474966049194, + "step": 13094 + }, + { + "epoch": 9.056016597510373, + "grad_norm": 4.222609996795654, + "learning_rate": 5.244352236053481e-06, + "log_odds_chosen": 11.704336166381836, + "log_odds_ratio": -2.5846133212326095e-05, + "logits/chosen": -0.27149274945259094, + "logits/rejected": -0.33308032155036926, + "logps/chosen": -0.0001536675845272839, + "logps/rejected": -2.5176734924316406, + "loss": 0.5543, + "nll_loss": 0.13858075439929962, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.536675881652627e-05, + "rewards/margins": 0.25175195932388306, + "rewards/rejected": -0.2517673373222351, + "step": 13095 + }, + { + "epoch": 9.0567081604426, + "grad_norm": 3.1188364028930664, + "learning_rate": 5.240510219763332e-06, + "log_odds_chosen": 11.076004028320312, + "log_odds_ratio": -0.00010368539369665086, + "logits/chosen": -0.10039699822664261, + "logits/rejected": -0.10477086156606674, + "logps/chosen": -0.00041912851156666875, + "logps/rejected": -2.0893921852111816, + "loss": 0.3091, + "nll_loss": 0.07725635170936584, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1912851884262636e-05, + "rewards/margins": 0.20889730751514435, + "rewards/rejected": -0.20893922448158264, + "step": 13096 + }, + { + "epoch": 9.057399723374827, + "grad_norm": 2.7609245777130127, + "learning_rate": 5.236668203473183e-06, + "log_odds_chosen": 11.29232406616211, + "log_odds_ratio": -3.3745109249139205e-05, + "logits/chosen": -0.541816234588623, + "logits/rejected": -0.6653363108634949, + "logps/chosen": -0.00010187992302235216, + "logps/rejected": -2.0963993072509766, + "loss": 0.3485, + "nll_loss": 0.08711716532707214, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0187993211729918e-05, + "rewards/margins": 0.20962974429130554, + "rewards/rejected": -0.20963993668556213, + "step": 13097 + }, + { + "epoch": 9.058091286307054, + "grad_norm": 2.5463993549346924, + "learning_rate": 5.232826187183034e-06, + "log_odds_chosen": 10.777527809143066, + "log_odds_ratio": -2.8020123863825575e-05, + "logits/chosen": -0.14532683789730072, + "logits/rejected": -0.19010743498802185, + "logps/chosen": -0.00010921778448391706, + "logps/rejected": -1.6891964673995972, + "loss": 0.3606, + "nll_loss": 0.0901351198554039, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0921778084593825e-05, + "rewards/margins": 0.1689087301492691, + "rewards/rejected": -0.1689196527004242, + "step": 13098 + }, + { + "epoch": 9.05878284923928, + "grad_norm": 3.5195703506469727, + "learning_rate": 5.228984170892885e-06, + "log_odds_chosen": 11.336797714233398, + "log_odds_ratio": -5.716394298360683e-05, + "logits/chosen": -0.2846332788467407, + "logits/rejected": -0.3512307405471802, + "logps/chosen": -0.0002162289310945198, + "logps/rejected": -2.9113054275512695, + "loss": 0.3919, + "nll_loss": 0.097966268658638, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.16228927456541e-05, + "rewards/margins": 0.2911089062690735, + "rewards/rejected": -0.29113057255744934, + "step": 13099 + }, + { + "epoch": 9.059474412171507, + "grad_norm": 4.029845237731934, + "learning_rate": 5.2251421546027355e-06, + "log_odds_chosen": 10.34531021118164, + "log_odds_ratio": -0.00023824439267627895, + "logits/chosen": -0.09753906726837158, + "logits/rejected": -0.09223097562789917, + "logps/chosen": -0.0006019758293405175, + "logps/rejected": -2.266861915588379, + "loss": 0.391, + "nll_loss": 0.09771972894668579, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.019758075126447e-05, + "rewards/margins": 0.22662599384784698, + "rewards/rejected": -0.22668620944023132, + "step": 13100 + }, + { + "epoch": 9.060165975103734, + "grad_norm": 3.6992623805999756, + "learning_rate": 5.221300138312587e-06, + "log_odds_chosen": 11.187959671020508, + "log_odds_ratio": -7.363592885667458e-05, + "logits/chosen": -0.398265540599823, + "logits/rejected": -0.5765001773834229, + "logps/chosen": -0.00021470579667948186, + "logps/rejected": -2.195138454437256, + "loss": 0.4704, + "nll_loss": 0.1176011860370636, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.147058148693759e-05, + "rewards/margins": 0.2194923758506775, + "rewards/rejected": -0.21951386332511902, + "step": 13101 + }, + { + "epoch": 9.060857538035961, + "grad_norm": 3.011972427368164, + "learning_rate": 5.217458122022437e-06, + "log_odds_chosen": 11.178058624267578, + "log_odds_ratio": -4.639428516384214e-05, + "logits/chosen": -0.18648342788219452, + "logits/rejected": -0.3156924545764923, + "logps/chosen": -0.00020562413556035608, + "logps/rejected": -2.582714080810547, + "loss": 0.3531, + "nll_loss": 0.08826884627342224, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.056241464742925e-05, + "rewards/margins": 0.2582508325576782, + "rewards/rejected": -0.25827139616012573, + "step": 13102 + }, + { + "epoch": 9.061549100968188, + "grad_norm": 2.6720283031463623, + "learning_rate": 5.213616105732289e-06, + "log_odds_chosen": 12.21937370300293, + "log_odds_ratio": -1.6247211533482186e-05, + "logits/chosen": -0.47814512252807617, + "logits/rejected": -0.4694201946258545, + "logps/chosen": -9.942967881215736e-05, + "logps/rejected": -2.6553704738616943, + "loss": 0.2414, + "nll_loss": 0.06034007668495178, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.942967153619975e-06, + "rewards/margins": 0.26552706956863403, + "rewards/rejected": -0.2655370533466339, + "step": 13103 + }, + { + "epoch": 9.062240663900415, + "grad_norm": 3.425598621368408, + "learning_rate": 5.209774089442139e-06, + "log_odds_chosen": 10.777713775634766, + "log_odds_ratio": -0.00031900242902338505, + "logits/chosen": -0.09612531960010529, + "logits/rejected": -0.16886131465435028, + "logps/chosen": -0.00017488611047156155, + "logps/rejected": -2.1556687355041504, + "loss": 0.318, + "nll_loss": 0.07945883274078369, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7488609955762513e-05, + "rewards/margins": 0.21554937958717346, + "rewards/rejected": -0.21556688845157623, + "step": 13104 + }, + { + "epoch": 9.062932226832642, + "grad_norm": 2.207106113433838, + "learning_rate": 5.20593207315199e-06, + "log_odds_chosen": 11.121923446655273, + "log_odds_ratio": -0.00023766764206811786, + "logits/chosen": -0.37215495109558105, + "logits/rejected": -0.3522205948829651, + "logps/chosen": -9.444890747545287e-05, + "logps/rejected": -1.9948663711547852, + "loss": 0.2259, + "nll_loss": 0.056441545486450195, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.444891475141048e-06, + "rewards/margins": 0.1994771957397461, + "rewards/rejected": -0.1994866281747818, + "step": 13105 + }, + { + "epoch": 9.063623789764868, + "grad_norm": 3.165987491607666, + "learning_rate": 5.202090056861842e-06, + "log_odds_chosen": 12.515090942382812, + "log_odds_ratio": -5.959595910098869e-06, + "logits/chosen": -0.3708084523677826, + "logits/rejected": -0.4040341377258301, + "logps/chosen": -0.00014049882884137332, + "logps/rejected": -3.0284299850463867, + "loss": 0.3313, + "nll_loss": 0.08281341195106506, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4049882338440511e-05, + "rewards/margins": 0.30282896757125854, + "rewards/rejected": -0.30284303426742554, + "step": 13106 + }, + { + "epoch": 9.064315352697095, + "grad_norm": 2.9175662994384766, + "learning_rate": 5.1982480405716924e-06, + "log_odds_chosen": 10.615434646606445, + "log_odds_ratio": -0.00035317084984853864, + "logits/chosen": -0.5332492589950562, + "logits/rejected": -0.5264109373092651, + "logps/chosen": -0.0006139426259323955, + "logps/rejected": -1.8538782596588135, + "loss": 0.3253, + "nll_loss": 0.08128499984741211, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.139426113804802e-05, + "rewards/margins": 0.18532642722129822, + "rewards/rejected": -0.18538782000541687, + "step": 13107 + }, + { + "epoch": 9.065006915629322, + "grad_norm": 3.3031039237976074, + "learning_rate": 5.194406024281543e-06, + "log_odds_chosen": 11.36837387084961, + "log_odds_ratio": -6.764855788787827e-05, + "logits/chosen": -0.11974788457155228, + "logits/rejected": -0.12750250101089478, + "logps/chosen": -0.0002460972755216062, + "logps/rejected": -2.46195912361145, + "loss": 0.3839, + "nll_loss": 0.09596128761768341, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4609729734947905e-05, + "rewards/margins": 0.24617129564285278, + "rewards/rejected": -0.24619589745998383, + "step": 13108 + }, + { + "epoch": 9.065698478561549, + "grad_norm": 3.527372121810913, + "learning_rate": 5.190564007991394e-06, + "log_odds_chosen": 10.913864135742188, + "log_odds_ratio": -0.00026111333863809705, + "logits/chosen": -0.5446960926055908, + "logits/rejected": -0.5455598831176758, + "logps/chosen": -0.0002539431443437934, + "logps/rejected": -2.0499162673950195, + "loss": 0.4883, + "nll_loss": 0.12203717231750488, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.539431443437934e-05, + "rewards/margins": 0.2049662470817566, + "rewards/rejected": -0.2049916386604309, + "step": 13109 + }, + { + "epoch": 9.066390041493776, + "grad_norm": 3.0204710960388184, + "learning_rate": 5.1867219917012455e-06, + "log_odds_chosen": 10.103961944580078, + "log_odds_ratio": -0.00032514857593923807, + "logits/chosen": -0.10731053352355957, + "logits/rejected": -0.01982625015079975, + "logps/chosen": -0.0006700665107928216, + "logps/rejected": -1.8548423051834106, + "loss": 0.347, + "nll_loss": 0.08671282231807709, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.700665835523978e-05, + "rewards/margins": 0.18541721999645233, + "rewards/rejected": -0.18548423051834106, + "step": 13110 + }, + { + "epoch": 9.067081604426003, + "grad_norm": 5.251509189605713, + "learning_rate": 5.1828799754110954e-06, + "log_odds_chosen": 12.076827049255371, + "log_odds_ratio": -2.3212494852486998e-05, + "logits/chosen": -0.38703128695487976, + "logits/rejected": -0.3221339285373688, + "logps/chosen": -0.0004165376885794103, + "logps/rejected": -3.2787702083587646, + "loss": 0.4495, + "nll_loss": 0.1123625785112381, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1653765947557986e-05, + "rewards/margins": 0.3278353810310364, + "rewards/rejected": -0.3278770446777344, + "step": 13111 + }, + { + "epoch": 9.06777316735823, + "grad_norm": 4.941395282745361, + "learning_rate": 5.179037959120947e-06, + "log_odds_chosen": 11.142715454101562, + "log_odds_ratio": -3.0218645406421274e-05, + "logits/chosen": -0.4054286479949951, + "logits/rejected": -0.5167216062545776, + "logps/chosen": -0.0003387325559742749, + "logps/rejected": -2.3566412925720215, + "loss": 0.4717, + "nll_loss": 0.11792398989200592, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.38732534146402e-05, + "rewards/margins": 0.23563024401664734, + "rewards/rejected": -0.23566412925720215, + "step": 13112 + }, + { + "epoch": 9.068464730290456, + "grad_norm": 2.290341377258301, + "learning_rate": 5.175195942830798e-06, + "log_odds_chosen": 11.20633602142334, + "log_odds_ratio": -3.111179103143513e-05, + "logits/chosen": -0.12001897394657135, + "logits/rejected": -0.14916636049747467, + "logps/chosen": -0.00010382429172750562, + "logps/rejected": -1.8613578081130981, + "loss": 0.2732, + "nll_loss": 0.06830594688653946, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0382428627053741e-05, + "rewards/margins": 0.18612539768218994, + "rewards/rejected": -0.18613578379154205, + "step": 13113 + }, + { + "epoch": 9.069156293222683, + "grad_norm": 3.792060613632202, + "learning_rate": 5.1713539265406485e-06, + "log_odds_chosen": 11.503861427307129, + "log_odds_ratio": -0.0001394737046211958, + "logits/chosen": 0.13606256246566772, + "logits/rejected": 0.025217028334736824, + "logps/chosen": -0.00030634726863354445, + "logps/rejected": -2.5279359817504883, + "loss": 0.3845, + "nll_loss": 0.0961097702383995, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0634731956524774e-05, + "rewards/margins": 0.25276297330856323, + "rewards/rejected": -0.2527935802936554, + "step": 13114 + }, + { + "epoch": 9.06984785615491, + "grad_norm": 3.5024185180664062, + "learning_rate": 5.1675119102505e-06, + "log_odds_chosen": 10.771469116210938, + "log_odds_ratio": -4.546689524431713e-05, + "logits/chosen": 0.012300148606300354, + "logits/rejected": -0.0769965648651123, + "logps/chosen": -0.00017290910182055086, + "logps/rejected": -2.2188332080841064, + "loss": 0.3947, + "nll_loss": 0.09866972267627716, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7290909454459324e-05, + "rewards/margins": 0.22186604142189026, + "rewards/rejected": -0.22188332676887512, + "step": 13115 + }, + { + "epoch": 9.070539419087137, + "grad_norm": 2.8086256980895996, + "learning_rate": 5.16366989396035e-06, + "log_odds_chosen": 11.772309303283691, + "log_odds_ratio": -5.0502352678449824e-05, + "logits/chosen": 0.15161627531051636, + "logits/rejected": 0.05611416697502136, + "logps/chosen": -0.00022595847258344293, + "logps/rejected": -2.136870861053467, + "loss": 0.3632, + "nll_loss": 0.09080210328102112, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2595848349737935e-05, + "rewards/margins": 0.21366450190544128, + "rewards/rejected": -0.21368709206581116, + "step": 13116 + }, + { + "epoch": 9.071230982019364, + "grad_norm": 3.9946985244750977, + "learning_rate": 5.159827877670202e-06, + "log_odds_chosen": 10.807271957397461, + "log_odds_ratio": -4.892574725090526e-05, + "logits/chosen": -0.18946993350982666, + "logits/rejected": -0.34398671984672546, + "logps/chosen": -0.0002714378642849624, + "logps/rejected": -2.0007245540618896, + "loss": 0.5261, + "nll_loss": 0.13151350617408752, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.714378570090048e-05, + "rewards/margins": 0.20004534721374512, + "rewards/rejected": -0.2000724822282791, + "step": 13117 + }, + { + "epoch": 9.07192254495159, + "grad_norm": 3.284456491470337, + "learning_rate": 5.155985861380052e-06, + "log_odds_chosen": 11.461698532104492, + "log_odds_ratio": -6.98392977938056e-05, + "logits/chosen": -0.2301444262266159, + "logits/rejected": -0.2811543345451355, + "logps/chosen": -0.00048389926087111235, + "logps/rejected": -3.2208995819091797, + "loss": 0.2865, + "nll_loss": 0.07162515819072723, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.838992754230276e-05, + "rewards/margins": 0.3220415711402893, + "rewards/rejected": -0.3220899701118469, + "step": 13118 + }, + { + "epoch": 9.072614107883817, + "grad_norm": 3.5966992378234863, + "learning_rate": 5.152143845089903e-06, + "log_odds_chosen": 11.999691009521484, + "log_odds_ratio": -2.1546753487200476e-05, + "logits/chosen": -0.48742491006851196, + "logits/rejected": -0.4489961266517639, + "logps/chosen": -8.737298776395619e-05, + "logps/rejected": -2.584155797958374, + "loss": 0.4487, + "nll_loss": 0.11216401308774948, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.7372991401935e-06, + "rewards/margins": 0.2584068477153778, + "rewards/rejected": -0.2584155797958374, + "step": 13119 + }, + { + "epoch": 9.073305670816044, + "grad_norm": 4.022125720977783, + "learning_rate": 5.148301828799755e-06, + "log_odds_chosen": 10.40102767944336, + "log_odds_ratio": -7.439900218741968e-05, + "logits/chosen": -0.015573695302009583, + "logits/rejected": -0.04067067801952362, + "logps/chosen": -0.00011290111433481798, + "logps/rejected": -1.432818055152893, + "loss": 0.3738, + "nll_loss": 0.09345272183418274, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.129011252487544e-05, + "rewards/margins": 0.14327052235603333, + "rewards/rejected": -0.14328181743621826, + "step": 13120 + }, + { + "epoch": 9.07399723374827, + "grad_norm": 2.8626561164855957, + "learning_rate": 5.1444598125096055e-06, + "log_odds_chosen": 11.430521965026855, + "log_odds_ratio": -4.875660306424834e-05, + "logits/chosen": -0.724206268787384, + "logits/rejected": -0.7216789126396179, + "logps/chosen": -0.00043527281377464533, + "logps/rejected": -2.789267063140869, + "loss": 0.3303, + "nll_loss": 0.08256605267524719, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.352728137746453e-05, + "rewards/margins": 0.278883159160614, + "rewards/rejected": -0.27892670035362244, + "step": 13121 + }, + { + "epoch": 9.074688796680498, + "grad_norm": 3.03627347946167, + "learning_rate": 5.140617796219456e-06, + "log_odds_chosen": 11.022279739379883, + "log_odds_ratio": -0.0005119434790685773, + "logits/chosen": -0.2759706377983093, + "logits/rejected": -0.35239100456237793, + "logps/chosen": -0.00046582252252846956, + "logps/rejected": -2.379591464996338, + "loss": 0.2766, + "nll_loss": 0.06909601390361786, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6582252252846956e-05, + "rewards/margins": 0.23791258037090302, + "rewards/rejected": -0.2379591464996338, + "step": 13122 + }, + { + "epoch": 9.075380359612724, + "grad_norm": 3.4199564456939697, + "learning_rate": 5.136775779929307e-06, + "log_odds_chosen": 11.351811408996582, + "log_odds_ratio": -1.5767138393130153e-05, + "logits/chosen": -0.3166934847831726, + "logits/rejected": -0.365016907453537, + "logps/chosen": -0.0001815901923691854, + "logps/rejected": -2.5316357612609863, + "loss": 0.4853, + "nll_loss": 0.1213144063949585, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8159018509322777e-05, + "rewards/margins": 0.2531454265117645, + "rewards/rejected": -0.25316357612609863, + "step": 13123 + }, + { + "epoch": 9.076071922544951, + "grad_norm": 3.119173049926758, + "learning_rate": 5.1329337636391586e-06, + "log_odds_chosen": 9.569600105285645, + "log_odds_ratio": -0.000530701712705195, + "logits/chosen": 0.002615414559841156, + "logits/rejected": -0.06705156713724136, + "logps/chosen": -0.0006330714095383883, + "logps/rejected": -1.9746882915496826, + "loss": 0.3692, + "nll_loss": 0.09225521981716156, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.33071394986473e-05, + "rewards/margins": 0.19740553200244904, + "rewards/rejected": -0.1974688470363617, + "step": 13124 + }, + { + "epoch": 9.076763485477178, + "grad_norm": 2.911562919616699, + "learning_rate": 5.1290917473490085e-06, + "log_odds_chosen": 11.062809944152832, + "log_odds_ratio": -0.00010248189209960401, + "logits/chosen": -0.22816266119480133, + "logits/rejected": -0.2339855432510376, + "logps/chosen": -0.00022922157950233668, + "logps/rejected": -2.2725064754486084, + "loss": 0.3558, + "nll_loss": 0.08894093334674835, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.292215867782943e-05, + "rewards/margins": 0.22722773253917694, + "rewards/rejected": -0.22725063562393188, + "step": 13125 + }, + { + "epoch": 9.077455048409405, + "grad_norm": 2.968526840209961, + "learning_rate": 5.12524973105886e-06, + "log_odds_chosen": 12.123152732849121, + "log_odds_ratio": -2.1198087779339403e-05, + "logits/chosen": -0.08643770217895508, + "logits/rejected": -0.24279722571372986, + "logps/chosen": -0.0001828512322390452, + "logps/rejected": -3.424088954925537, + "loss": 0.3092, + "nll_loss": 0.07728907465934753, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8285125406691805e-05, + "rewards/margins": 0.34239059686660767, + "rewards/rejected": -0.3424088954925537, + "step": 13126 + }, + { + "epoch": 9.078146611341632, + "grad_norm": 3.580775499343872, + "learning_rate": 5.121407714768711e-06, + "log_odds_chosen": 10.601216316223145, + "log_odds_ratio": -9.730371675686911e-05, + "logits/chosen": -0.46868231892585754, + "logits/rejected": -0.5867648720741272, + "logps/chosen": -0.00010927829134743661, + "logps/rejected": -1.7784924507141113, + "loss": 0.487, + "nll_loss": 0.12175001204013824, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0927829862339422e-05, + "rewards/margins": 0.17783832550048828, + "rewards/rejected": -0.17784924805164337, + "step": 13127 + }, + { + "epoch": 9.078838174273859, + "grad_norm": 4.234866619110107, + "learning_rate": 5.1175656984785616e-06, + "log_odds_chosen": 10.49764347076416, + "log_odds_ratio": -9.967401274479926e-05, + "logits/chosen": -0.34242355823516846, + "logits/rejected": -0.4020652174949646, + "logps/chosen": -0.00039785588160157204, + "logps/rejected": -2.255194902420044, + "loss": 0.8858, + "nll_loss": 0.22144310176372528, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.978558743256144e-05, + "rewards/margins": 0.22547970712184906, + "rewards/rejected": -0.22551949322223663, + "step": 13128 + }, + { + "epoch": 9.079529737206085, + "grad_norm": 3.9162516593933105, + "learning_rate": 5.113723682188413e-06, + "log_odds_chosen": 10.959028244018555, + "log_odds_ratio": -0.00017648242646828294, + "logits/chosen": 0.050031401216983795, + "logits/rejected": 0.008972518146038055, + "logps/chosen": -0.00042688497342169285, + "logps/rejected": -2.5868098735809326, + "loss": 0.4281, + "nll_loss": 0.10699784755706787, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2688498069765046e-05, + "rewards/margins": 0.25863829255104065, + "rewards/rejected": -0.25868096947669983, + "step": 13129 + }, + { + "epoch": 9.080221300138312, + "grad_norm": 2.9817802906036377, + "learning_rate": 5.109881665898263e-06, + "log_odds_chosen": 10.926042556762695, + "log_odds_ratio": -2.3100288672139868e-05, + "logits/chosen": -0.11011086404323578, + "logits/rejected": -0.3051503300666809, + "logps/chosen": -0.00021605490474030375, + "logps/rejected": -2.295247793197632, + "loss": 0.3411, + "nll_loss": 0.08526698499917984, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.160549229301978e-05, + "rewards/margins": 0.22950318455696106, + "rewards/rejected": -0.22952479124069214, + "step": 13130 + }, + { + "epoch": 9.08091286307054, + "grad_norm": 3.780095100402832, + "learning_rate": 5.106039649608115e-06, + "log_odds_chosen": 9.921340942382812, + "log_odds_ratio": -0.00042483158176764846, + "logits/chosen": -0.1498485803604126, + "logits/rejected": -0.18959610164165497, + "logps/chosen": -0.00021947725326754153, + "logps/rejected": -1.544329285621643, + "loss": 0.4839, + "nll_loss": 0.12092307209968567, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.194772423536051e-05, + "rewards/margins": 0.15441098809242249, + "rewards/rejected": -0.15443293750286102, + "step": 13131 + }, + { + "epoch": 9.081604426002766, + "grad_norm": 2.6842169761657715, + "learning_rate": 5.102197633317965e-06, + "log_odds_chosen": 11.452657699584961, + "log_odds_ratio": -5.78801627852954e-05, + "logits/chosen": -0.08300793170928955, + "logits/rejected": -0.11368023604154587, + "logps/chosen": -0.00013762382150162011, + "logps/rejected": -2.3229103088378906, + "loss": 0.3209, + "nll_loss": 0.08022750169038773, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3762381968263071e-05, + "rewards/margins": 0.2322772890329361, + "rewards/rejected": -0.2322910577058792, + "step": 13132 + }, + { + "epoch": 9.082295988934993, + "grad_norm": 3.5657460689544678, + "learning_rate": 5.098355617027816e-06, + "log_odds_chosen": 11.11960506439209, + "log_odds_ratio": -3.4535565646365285e-05, + "logits/chosen": -0.436089426279068, + "logits/rejected": -0.398897647857666, + "logps/chosen": -0.00014532770728692412, + "logps/rejected": -2.1216166019439697, + "loss": 0.4049, + "nll_loss": 0.10122960805892944, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4532770364894532e-05, + "rewards/margins": 0.21214714646339417, + "rewards/rejected": -0.21216166019439697, + "step": 13133 + }, + { + "epoch": 9.08298755186722, + "grad_norm": 3.125889539718628, + "learning_rate": 5.094513600737667e-06, + "log_odds_chosen": 10.308817863464355, + "log_odds_ratio": -9.981192124541849e-05, + "logits/chosen": -0.19146594405174255, + "logits/rejected": -0.33207714557647705, + "logps/chosen": -0.0005562923615798354, + "logps/rejected": -2.1228606700897217, + "loss": 0.2483, + "nll_loss": 0.06205949932336807, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.562922888202593e-05, + "rewards/margins": 0.21223042905330658, + "rewards/rejected": -0.2122860550880432, + "step": 13134 + }, + { + "epoch": 9.083679114799446, + "grad_norm": 3.330157995223999, + "learning_rate": 5.0906715844475185e-06, + "log_odds_chosen": 11.04733943939209, + "log_odds_ratio": -3.28706628351938e-05, + "logits/chosen": -0.291248083114624, + "logits/rejected": -0.4195294678211212, + "logps/chosen": -0.00020840237266384065, + "logps/rejected": -1.991461992263794, + "loss": 0.3979, + "nll_loss": 0.09946480393409729, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0840237993979827e-05, + "rewards/margins": 0.19912534952163696, + "rewards/rejected": -0.19914621114730835, + "step": 13135 + }, + { + "epoch": 9.084370677731673, + "grad_norm": 3.6042749881744385, + "learning_rate": 5.086829568157369e-06, + "log_odds_chosen": 11.078657150268555, + "log_odds_ratio": -2.63779529632302e-05, + "logits/chosen": -0.4319020211696625, + "logits/rejected": -0.4092617928981781, + "logps/chosen": -0.00011186770279891789, + "logps/rejected": -1.9281851053237915, + "loss": 0.3121, + "nll_loss": 0.07801743596792221, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1186770279891789e-05, + "rewards/margins": 0.19280733168125153, + "rewards/rejected": -0.1928185224533081, + "step": 13136 + }, + { + "epoch": 9.0850622406639, + "grad_norm": 3.3793747425079346, + "learning_rate": 5.08298755186722e-06, + "log_odds_chosen": 10.268448829650879, + "log_odds_ratio": -0.00013268145266920328, + "logits/chosen": -0.17872902750968933, + "logits/rejected": -0.25412702560424805, + "logps/chosen": -0.0003009192587342113, + "logps/rejected": -1.6723623275756836, + "loss": 0.3279, + "nll_loss": 0.08195911347866058, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0091925509623252e-05, + "rewards/margins": 0.16720612347126007, + "rewards/rejected": -0.16723620891571045, + "step": 13137 + }, + { + "epoch": 9.085753803596127, + "grad_norm": 2.3682475090026855, + "learning_rate": 5.079145535577072e-06, + "log_odds_chosen": 11.593698501586914, + "log_odds_ratio": -3.967937300330959e-05, + "logits/chosen": -0.3585340976715088, + "logits/rejected": -0.4492180347442627, + "logps/chosen": -0.00010811621905304492, + "logps/rejected": -2.1206421852111816, + "loss": 0.3031, + "nll_loss": 0.07576078921556473, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0811621905304492e-05, + "rewards/margins": 0.21205341815948486, + "rewards/rejected": -0.2120642215013504, + "step": 13138 + }, + { + "epoch": 9.086445366528354, + "grad_norm": 4.587029457092285, + "learning_rate": 5.0753035192869215e-06, + "log_odds_chosen": 11.464735984802246, + "log_odds_ratio": -2.5043180357897654e-05, + "logits/chosen": -0.31821760535240173, + "logits/rejected": -0.39988356828689575, + "logps/chosen": -0.00012989738024771214, + "logps/rejected": -2.334730386734009, + "loss": 0.3182, + "nll_loss": 0.0795440748333931, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2989738024771214e-05, + "rewards/margins": 0.23346003890037537, + "rewards/rejected": -0.2334730327129364, + "step": 13139 + }, + { + "epoch": 9.08713692946058, + "grad_norm": 3.3115270137786865, + "learning_rate": 5.071461502996773e-06, + "log_odds_chosen": 10.78053092956543, + "log_odds_ratio": -6.271836900850758e-05, + "logits/chosen": 0.04377426207065582, + "logits/rejected": 0.06366972625255585, + "logps/chosen": -0.0012491923989728093, + "logps/rejected": -1.8240324258804321, + "loss": 0.3577, + "nll_loss": 0.08942988514900208, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012491924280766398, + "rewards/margins": 0.1822783201932907, + "rewards/rejected": -0.18240323662757874, + "step": 13140 + }, + { + "epoch": 9.087828492392807, + "grad_norm": 3.5181050300598145, + "learning_rate": 5.067619486706624e-06, + "log_odds_chosen": 10.943486213684082, + "log_odds_ratio": -4.695288589573465e-05, + "logits/chosen": -0.0279490128159523, + "logits/rejected": -0.07990120351314545, + "logps/chosen": -0.00024129982921294868, + "logps/rejected": -2.2386720180511475, + "loss": 0.3133, + "nll_loss": 0.07832954823970795, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4129982193699107e-05, + "rewards/margins": 0.2238430678844452, + "rewards/rejected": -0.22386720776557922, + "step": 13141 + }, + { + "epoch": 9.088520055325034, + "grad_norm": 3.6358225345611572, + "learning_rate": 5.063777470416475e-06, + "log_odds_chosen": 11.821958541870117, + "log_odds_ratio": -3.9039121475070715e-05, + "logits/chosen": -0.1069912388920784, + "logits/rejected": -0.16369011998176575, + "logps/chosen": -0.0002085616288240999, + "logps/rejected": -2.8206725120544434, + "loss": 0.4536, + "nll_loss": 0.11340697109699249, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.085616324620787e-05, + "rewards/margins": 0.282046377658844, + "rewards/rejected": -0.2820672392845154, + "step": 13142 + }, + { + "epoch": 9.089211618257261, + "grad_norm": 3.2715563774108887, + "learning_rate": 5.059935454126326e-06, + "log_odds_chosen": 10.589996337890625, + "log_odds_ratio": -4.2793963075382635e-05, + "logits/chosen": -0.4457647502422333, + "logits/rejected": -0.509307861328125, + "logps/chosen": -0.00015700332005508244, + "logps/rejected": -1.5440524816513062, + "loss": 0.3215, + "nll_loss": 0.08037039637565613, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5700332369306125e-05, + "rewards/margins": 0.15438956022262573, + "rewards/rejected": -0.15440526604652405, + "step": 13143 + }, + { + "epoch": 9.089903181189488, + "grad_norm": 3.2407965660095215, + "learning_rate": 5.056093437836177e-06, + "log_odds_chosen": 10.927106857299805, + "log_odds_ratio": -5.0967435527127236e-05, + "logits/chosen": 0.08164151012897491, + "logits/rejected": 0.02284158021211624, + "logps/chosen": -0.00013107166159898043, + "logps/rejected": -1.689274549484253, + "loss": 0.2775, + "nll_loss": 0.06936167180538177, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3107166523695923e-05, + "rewards/margins": 0.16891434788703918, + "rewards/rejected": -0.16892746090888977, + "step": 13144 + }, + { + "epoch": 9.090594744121715, + "grad_norm": 4.398402690887451, + "learning_rate": 5.052251421546028e-06, + "log_odds_chosen": 11.465459823608398, + "log_odds_ratio": -0.00012407473695930094, + "logits/chosen": 0.3121594488620758, + "logits/rejected": 0.27814939618110657, + "logps/chosen": -0.0003415195969864726, + "logps/rejected": -3.200613021850586, + "loss": 0.4124, + "nll_loss": 0.10309255868196487, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.41519589710515e-05, + "rewards/margins": 0.3200271427631378, + "rewards/rejected": -0.3200612962245941, + "step": 13145 + }, + { + "epoch": 9.091286307053942, + "grad_norm": 4.4590253829956055, + "learning_rate": 5.0484094052558784e-06, + "log_odds_chosen": 10.064178466796875, + "log_odds_ratio": -0.00011093214561697096, + "logits/chosen": -0.11966148763895035, + "logits/rejected": -0.21258839964866638, + "logps/chosen": -0.0003168184484820813, + "logps/rejected": -1.9933526515960693, + "loss": 0.3697, + "nll_loss": 0.09242478013038635, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1681847758591175e-05, + "rewards/margins": 0.19930359721183777, + "rewards/rejected": -0.1993352770805359, + "step": 13146 + }, + { + "epoch": 9.091977869986168, + "grad_norm": 3.343506097793579, + "learning_rate": 5.044567388965729e-06, + "log_odds_chosen": 12.137199401855469, + "log_odds_ratio": -2.5066479793167673e-05, + "logits/chosen": -0.35906705260276794, + "logits/rejected": -0.27062171697616577, + "logps/chosen": -0.00017379832570441067, + "logps/rejected": -3.39198637008667, + "loss": 0.379, + "nll_loss": 0.09473510086536407, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.737983438943047e-05, + "rewards/margins": 0.33918124437332153, + "rewards/rejected": -0.33919864892959595, + "step": 13147 + }, + { + "epoch": 9.092669432918395, + "grad_norm": 5.016630172729492, + "learning_rate": 5.04072537267558e-06, + "log_odds_chosen": 11.218782424926758, + "log_odds_ratio": -4.880976484855637e-05, + "logits/chosen": -0.1608268916606903, + "logits/rejected": -0.1783805787563324, + "logps/chosen": -0.0001921278308145702, + "logps/rejected": -2.267690896987915, + "loss": 0.4646, + "nll_loss": 0.11615432053804398, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9212782717659138e-05, + "rewards/margins": 0.22674988210201263, + "rewards/rejected": -0.2267691045999527, + "step": 13148 + }, + { + "epoch": 9.093360995850622, + "grad_norm": 2.3774566650390625, + "learning_rate": 5.0368833563854315e-06, + "log_odds_chosen": 10.962896347045898, + "log_odds_ratio": -0.00010562760871835053, + "logits/chosen": -0.27799174189567566, + "logits/rejected": -0.29090049862861633, + "logps/chosen": -0.00019772813539020717, + "logps/rejected": -2.3116509914398193, + "loss": 0.2353, + "nll_loss": 0.05882162228226662, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.977281499421224e-05, + "rewards/margins": 0.23114533722400665, + "rewards/rejected": -0.2311651110649109, + "step": 13149 + }, + { + "epoch": 9.094052558782849, + "grad_norm": 3.0054092407226562, + "learning_rate": 5.033041340095282e-06, + "log_odds_chosen": 11.95867919921875, + "log_odds_ratio": -1.787081237125676e-05, + "logits/chosen": -0.2743851840496063, + "logits/rejected": -0.32335007190704346, + "logps/chosen": -0.00018235544848721474, + "logps/rejected": -2.9181389808654785, + "loss": 0.3871, + "nll_loss": 0.09678283333778381, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.823554339352995e-05, + "rewards/margins": 0.29179567098617554, + "rewards/rejected": -0.2918138802051544, + "step": 13150 + }, + { + "epoch": 9.094744121715076, + "grad_norm": 4.452502727508545, + "learning_rate": 5.029199323805133e-06, + "log_odds_chosen": 11.339390754699707, + "log_odds_ratio": -6.690513691864908e-05, + "logits/chosen": -0.48545175790786743, + "logits/rejected": -0.5180833339691162, + "logps/chosen": -0.0004081081715412438, + "logps/rejected": -2.6610803604125977, + "loss": 0.5438, + "nll_loss": 0.13594885170459747, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0810820792103186e-05, + "rewards/margins": 0.2660672068595886, + "rewards/rejected": -0.26610803604125977, + "step": 13151 + }, + { + "epoch": 9.095435684647303, + "grad_norm": 6.563195705413818, + "learning_rate": 5.025357307514985e-06, + "log_odds_chosen": 10.881711959838867, + "log_odds_ratio": -0.0003529912792146206, + "logits/chosen": 0.21874408423900604, + "logits/rejected": 0.029261693358421326, + "logps/chosen": -0.0005761877982877195, + "logps/rejected": -1.6720936298370361, + "loss": 0.6063, + "nll_loss": 0.15154193341732025, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.761878128396347e-05, + "rewards/margins": 0.16715176403522491, + "rewards/rejected": -0.16720937192440033, + "step": 13152 + }, + { + "epoch": 9.09612724757953, + "grad_norm": 4.052469730377197, + "learning_rate": 5.0215152912248345e-06, + "log_odds_chosen": 11.643571853637695, + "log_odds_ratio": -0.00046770076733082533, + "logits/chosen": 0.14311496913433075, + "logits/rejected": 0.12904202938079834, + "logps/chosen": -0.0011221003951504827, + "logps/rejected": -3.477919101715088, + "loss": 0.4618, + "nll_loss": 0.11539174616336823, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011221005115658045, + "rewards/margins": 0.3476797044277191, + "rewards/rejected": -0.3477919101715088, + "step": 13153 + }, + { + "epoch": 9.096818810511756, + "grad_norm": 3.6318225860595703, + "learning_rate": 5.017673274934686e-06, + "log_odds_chosen": 11.56639289855957, + "log_odds_ratio": -6.830410711700097e-05, + "logits/chosen": -0.017568401992321014, + "logits/rejected": -0.012221388518810272, + "logps/chosen": -0.0001570779422763735, + "logps/rejected": -2.9654502868652344, + "loss": 0.3252, + "nll_loss": 0.08129463344812393, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5707795682828873e-05, + "rewards/margins": 0.2965293526649475, + "rewards/rejected": -0.2965450584888458, + "step": 13154 + }, + { + "epoch": 9.097510373443983, + "grad_norm": 3.365553855895996, + "learning_rate": 5.013831258644537e-06, + "log_odds_chosen": 10.493221282958984, + "log_odds_ratio": -0.0002885025169234723, + "logits/chosen": 0.04266492277383804, + "logits/rejected": -0.0294787660241127, + "logps/chosen": -0.00023267159122042358, + "logps/rejected": -1.9413899183273315, + "loss": 0.4053, + "nll_loss": 0.10130857676267624, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3267159122042358e-05, + "rewards/margins": 0.19411572813987732, + "rewards/rejected": -0.1941390037536621, + "step": 13155 + }, + { + "epoch": 9.09820193637621, + "grad_norm": 5.188926696777344, + "learning_rate": 5.009989242354388e-06, + "log_odds_chosen": 10.942852973937988, + "log_odds_ratio": -7.25128993508406e-05, + "logits/chosen": -0.48505446314811707, + "logits/rejected": -0.45248520374298096, + "logps/chosen": -0.00043346683378331363, + "logps/rejected": -2.1067748069763184, + "loss": 0.4581, + "nll_loss": 0.11450614780187607, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.334668119554408e-05, + "rewards/margins": 0.21063414216041565, + "rewards/rejected": -0.21067747473716736, + "step": 13156 + }, + { + "epoch": 9.098893499308437, + "grad_norm": 3.068840980529785, + "learning_rate": 5.006147226064238e-06, + "log_odds_chosen": 11.091489791870117, + "log_odds_ratio": -5.561709258472547e-05, + "logits/chosen": -0.3738434612751007, + "logits/rejected": -0.33890044689178467, + "logps/chosen": -0.00016683740250300616, + "logps/rejected": -1.8817553520202637, + "loss": 0.378, + "nll_loss": 0.09448930621147156, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6683739886502735e-05, + "rewards/margins": 0.18815885484218597, + "rewards/rejected": -0.1881755292415619, + "step": 13157 + }, + { + "epoch": 9.099585062240664, + "grad_norm": 2.2089033126831055, + "learning_rate": 5.00230520977409e-06, + "log_odds_chosen": 10.12149429321289, + "log_odds_ratio": -0.0003154563601128757, + "logits/chosen": -0.10125580430030823, + "logits/rejected": -0.0849694237112999, + "logps/chosen": -0.0005034382920712233, + "logps/rejected": -2.024165630340576, + "loss": 0.2075, + "nll_loss": 0.0518321767449379, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.03438277519308e-05, + "rewards/margins": 0.20236621797084808, + "rewards/rejected": -0.2024165689945221, + "step": 13158 + }, + { + "epoch": 9.10027662517289, + "grad_norm": 3.7857158184051514, + "learning_rate": 4.998463193483941e-06, + "log_odds_chosen": 11.10672378540039, + "log_odds_ratio": -5.2406474424060434e-05, + "logits/chosen": -0.22684435546398163, + "logits/rejected": -0.28922390937805176, + "logps/chosen": -0.0003105810610577464, + "logps/rejected": -2.401808977127075, + "loss": 0.4286, + "nll_loss": 0.1071481853723526, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.105810901615769e-05, + "rewards/margins": 0.2401498556137085, + "rewards/rejected": -0.24018090963363647, + "step": 13159 + }, + { + "epoch": 9.100968188105117, + "grad_norm": 4.0806450843811035, + "learning_rate": 4.9946211771937915e-06, + "log_odds_chosen": 11.757749557495117, + "log_odds_ratio": -1.2237173905305099e-05, + "logits/chosen": -0.09956353902816772, + "logits/rejected": -0.11438395828008652, + "logps/chosen": -8.435586642008275e-05, + "logps/rejected": -2.281965970993042, + "loss": 0.517, + "nll_loss": 0.12925131618976593, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.435587005806156e-06, + "rewards/margins": 0.228188157081604, + "rewards/rejected": -0.22819659113883972, + "step": 13160 + }, + { + "epoch": 9.101659751037344, + "grad_norm": 3.7898600101470947, + "learning_rate": 4.990779160903643e-06, + "log_odds_chosen": 10.907726287841797, + "log_odds_ratio": -4.189980973023921e-05, + "logits/chosen": -0.566761314868927, + "logits/rejected": -0.46380579471588135, + "logps/chosen": -0.00017752411076799035, + "logps/rejected": -1.85300874710083, + "loss": 0.4109, + "nll_loss": 0.10271905362606049, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7752410713001154e-05, + "rewards/margins": 0.1852831244468689, + "rewards/rejected": -0.18530087172985077, + "step": 13161 + }, + { + "epoch": 9.10235131396957, + "grad_norm": 4.558443546295166, + "learning_rate": 4.986937144613493e-06, + "log_odds_chosen": 10.4443359375, + "log_odds_ratio": -4.984636325389147e-05, + "logits/chosen": -0.07645373046398163, + "logits/rejected": -0.17390292882919312, + "logps/chosen": -0.00016142117965500802, + "logps/rejected": -1.8318181037902832, + "loss": 0.3211, + "nll_loss": 0.08027378469705582, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.614211760170292e-05, + "rewards/margins": 0.18316569924354553, + "rewards/rejected": -0.18318183720111847, + "step": 13162 + }, + { + "epoch": 9.103042876901798, + "grad_norm": 3.671687602996826, + "learning_rate": 4.9830951283233446e-06, + "log_odds_chosen": 12.03889274597168, + "log_odds_ratio": -1.1940827789658215e-05, + "logits/chosen": 0.09454180300235748, + "logits/rejected": 0.05508112162351608, + "logps/chosen": -0.0002764679375104606, + "logps/rejected": -2.8818044662475586, + "loss": 0.4997, + "nll_loss": 0.12493018805980682, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.764679265965242e-05, + "rewards/margins": 0.2881527543067932, + "rewards/rejected": -0.288180410861969, + "step": 13163 + }, + { + "epoch": 9.103734439834025, + "grad_norm": 2.8568713665008545, + "learning_rate": 4.979253112033195e-06, + "log_odds_chosen": 10.179363250732422, + "log_odds_ratio": -7.92900609667413e-05, + "logits/chosen": -0.549415647983551, + "logits/rejected": -0.526592493057251, + "logps/chosen": -0.0002810688456520438, + "logps/rejected": -1.7964861392974854, + "loss": 0.3399, + "nll_loss": 0.08496960997581482, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.81068842014065e-05, + "rewards/margins": 0.17962051928043365, + "rewards/rejected": -0.17964863777160645, + "step": 13164 + }, + { + "epoch": 9.104426002766251, + "grad_norm": 3.650968313217163, + "learning_rate": 4.975411095743046e-06, + "log_odds_chosen": 10.66606616973877, + "log_odds_ratio": -0.00035028919228352606, + "logits/chosen": -0.08267174661159515, + "logits/rejected": -0.2358940839767456, + "logps/chosen": -0.00016879210306797177, + "logps/rejected": -2.1179189682006836, + "loss": 0.431, + "nll_loss": 0.10771405696868896, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6879210306797177e-05, + "rewards/margins": 0.2117750495672226, + "rewards/rejected": -0.21179193258285522, + "step": 13165 + }, + { + "epoch": 9.105117565698478, + "grad_norm": 4.6579060554504395, + "learning_rate": 4.971569079452898e-06, + "log_odds_chosen": 12.230474472045898, + "log_odds_ratio": -2.4434060833300464e-05, + "logits/chosen": -0.38378095626831055, + "logits/rejected": -0.3854064345359802, + "logps/chosen": -0.00015963416080921888, + "logps/rejected": -3.438502311706543, + "loss": 0.2897, + "nll_loss": 0.07242448627948761, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5963418263709173e-05, + "rewards/margins": 0.3438342809677124, + "rewards/rejected": -0.3438502550125122, + "step": 13166 + }, + { + "epoch": 9.105809128630705, + "grad_norm": 3.5084073543548584, + "learning_rate": 4.9677270631627475e-06, + "log_odds_chosen": 11.561970710754395, + "log_odds_ratio": -2.341391154914163e-05, + "logits/chosen": -0.4982847571372986, + "logits/rejected": -0.4246971011161804, + "logps/chosen": -0.0001479636412113905, + "logps/rejected": -2.591989517211914, + "loss": 0.3273, + "nll_loss": 0.08181080222129822, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4796363757341169e-05, + "rewards/margins": 0.25918418169021606, + "rewards/rejected": -0.25919896364212036, + "step": 13167 + }, + { + "epoch": 9.106500691562932, + "grad_norm": 4.859031677246094, + "learning_rate": 4.963885046872599e-06, + "log_odds_chosen": 10.677106857299805, + "log_odds_ratio": -0.0009081160533241928, + "logits/chosen": 0.2539621591567993, + "logits/rejected": 0.10869896411895752, + "logps/chosen": -0.0006138522294349968, + "logps/rejected": -2.1311261653900146, + "loss": 0.2616, + "nll_loss": 0.06530681252479553, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.138522439869121e-05, + "rewards/margins": 0.2130512297153473, + "rewards/rejected": -0.21311262249946594, + "step": 13168 + }, + { + "epoch": 9.107192254495159, + "grad_norm": 3.615055561065674, + "learning_rate": 4.96004303058245e-06, + "log_odds_chosen": 10.297002792358398, + "log_odds_ratio": -0.00012715287448372692, + "logits/chosen": -0.30406704545021057, + "logits/rejected": -0.18537336587905884, + "logps/chosen": -0.0002870440948754549, + "logps/rejected": -1.7042102813720703, + "loss": 0.371, + "nll_loss": 0.09272780269384384, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.870440948754549e-05, + "rewards/margins": 0.17039233446121216, + "rewards/rejected": -0.1704210340976715, + "step": 13169 + }, + { + "epoch": 9.107883817427386, + "grad_norm": 3.216033697128296, + "learning_rate": 4.956201014292301e-06, + "log_odds_chosen": 12.333643913269043, + "log_odds_ratio": -7.912468390713912e-06, + "logits/chosen": -0.3236329257488251, + "logits/rejected": -0.3933505117893219, + "logps/chosen": -5.795392280560918e-05, + "logps/rejected": -2.314310073852539, + "loss": 0.3152, + "nll_loss": 0.0788043662905693, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.795392553409329e-06, + "rewards/margins": 0.2314252108335495, + "rewards/rejected": -0.2314310073852539, + "step": 13170 + }, + { + "epoch": 9.108575380359612, + "grad_norm": 4.9232282638549805, + "learning_rate": 4.952358998002151e-06, + "log_odds_chosen": 11.705574035644531, + "log_odds_ratio": -2.032737756962888e-05, + "logits/chosen": 0.01616286300122738, + "logits/rejected": -0.04003433510661125, + "logps/chosen": -0.0003002825251314789, + "logps/rejected": -2.7152099609375, + "loss": 0.667, + "nll_loss": 0.16675525903701782, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0028253604541533e-05, + "rewards/margins": 0.27149099111557007, + "rewards/rejected": -0.2715210020542145, + "step": 13171 + }, + { + "epoch": 9.10926694329184, + "grad_norm": 3.5715765953063965, + "learning_rate": 4.948516981712003e-06, + "log_odds_chosen": 11.402990341186523, + "log_odds_ratio": -6.371325434884056e-05, + "logits/chosen": -0.6012564897537231, + "logits/rejected": -0.49459028244018555, + "logps/chosen": -0.00019830641394946724, + "logps/rejected": -2.2350080013275146, + "loss": 0.4016, + "nll_loss": 0.10040025413036346, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9830642486340366e-05, + "rewards/margins": 0.2234809547662735, + "rewards/rejected": -0.2235007882118225, + "step": 13172 + }, + { + "epoch": 9.109958506224066, + "grad_norm": 3.1697309017181396, + "learning_rate": 4.944674965421854e-06, + "log_odds_chosen": 10.932571411132812, + "log_odds_ratio": -8.260000322479755e-05, + "logits/chosen": -0.2529147267341614, + "logits/rejected": -0.2343870997428894, + "logps/chosen": -0.004691335838288069, + "logps/rejected": -2.201251268386841, + "loss": 0.355, + "nll_loss": 0.08873645216226578, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00046913354890421033, + "rewards/margins": 0.21965602040290833, + "rewards/rejected": -0.22012513875961304, + "step": 13173 + }, + { + "epoch": 9.110650069156293, + "grad_norm": 3.2608823776245117, + "learning_rate": 4.9408329491317045e-06, + "log_odds_chosen": 11.447783470153809, + "log_odds_ratio": -4.559377339319326e-05, + "logits/chosen": 0.0027496833354234695, + "logits/rejected": -0.034812696278095245, + "logps/chosen": -0.000319063343340531, + "logps/rejected": -2.4481163024902344, + "loss": 0.2811, + "nll_loss": 0.07026031613349915, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1906336516840383e-05, + "rewards/margins": 0.24477970600128174, + "rewards/rejected": -0.24481162428855896, + "step": 13174 + }, + { + "epoch": 9.11134163208852, + "grad_norm": 4.426057815551758, + "learning_rate": 4.936990932841556e-06, + "log_odds_chosen": 11.010347366333008, + "log_odds_ratio": -6.925136403879151e-05, + "logits/chosen": -0.05647280812263489, + "logits/rejected": -0.2984941899776459, + "logps/chosen": -0.00027949127252213657, + "logps/rejected": -2.3876566886901855, + "loss": 0.4398, + "nll_loss": 0.10994251072406769, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7949130526394583e-05, + "rewards/margins": 0.23873771727085114, + "rewards/rejected": -0.2387656718492508, + "step": 13175 + }, + { + "epoch": 9.112033195020746, + "grad_norm": 3.035013198852539, + "learning_rate": 4.933148916551406e-06, + "log_odds_chosen": 12.202170372009277, + "log_odds_ratio": -7.800666026014369e-06, + "logits/chosen": -0.616619884967804, + "logits/rejected": -0.5678720474243164, + "logps/chosen": -9.361472621094435e-05, + "logps/rejected": -2.7422537803649902, + "loss": 0.3129, + "nll_loss": 0.07821905612945557, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.361472621094435e-06, + "rewards/margins": 0.27421602606773376, + "rewards/rejected": -0.2742254137992859, + "step": 13176 + }, + { + "epoch": 9.112724757952973, + "grad_norm": 3.3646538257598877, + "learning_rate": 4.929306900261258e-06, + "log_odds_chosen": 10.746342658996582, + "log_odds_ratio": -4.711302608484402e-05, + "logits/chosen": -0.28519192337989807, + "logits/rejected": -0.3103817403316498, + "logps/chosen": -0.0002904360298998654, + "logps/rejected": -2.317279100418091, + "loss": 0.4137, + "nll_loss": 0.1034327819943428, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.90436037175823e-05, + "rewards/margins": 0.23169885575771332, + "rewards/rejected": -0.23172789812088013, + "step": 13177 + }, + { + "epoch": 9.1134163208852, + "grad_norm": 3.402470588684082, + "learning_rate": 4.925464883971108e-06, + "log_odds_chosen": 11.85820198059082, + "log_odds_ratio": -1.4866004676150624e-05, + "logits/chosen": 0.08311102539300919, + "logits/rejected": 0.03828234225511551, + "logps/chosen": -0.00020142002904321998, + "logps/rejected": -2.738119602203369, + "loss": 0.5828, + "nll_loss": 0.14569005370140076, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0142002540524118e-05, + "rewards/margins": 0.2737918198108673, + "rewards/rejected": -0.2738119661808014, + "step": 13178 + }, + { + "epoch": 9.114107883817427, + "grad_norm": 3.4675073623657227, + "learning_rate": 4.921622867680959e-06, + "log_odds_chosen": 10.453381538391113, + "log_odds_ratio": -0.0006085903150960803, + "logits/chosen": -0.28540289402008057, + "logits/rejected": -0.2581024467945099, + "logps/chosen": -0.003210814204066992, + "logps/rejected": -2.185744047164917, + "loss": 0.3239, + "nll_loss": 0.08090663701295853, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00032108143204823136, + "rewards/margins": 0.21825332939624786, + "rewards/rejected": -0.2185744196176529, + "step": 13179 + }, + { + "epoch": 9.114799446749654, + "grad_norm": 4.592959403991699, + "learning_rate": 4.91778085139081e-06, + "log_odds_chosen": 11.726934432983398, + "log_odds_ratio": -1.3600827514892444e-05, + "logits/chosen": -0.33622393012046814, + "logits/rejected": -0.4734468460083008, + "logps/chosen": -0.0001402581692673266, + "logps/rejected": -2.4596095085144043, + "loss": 0.5269, + "nll_loss": 0.13171550631523132, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.40258171086316e-05, + "rewards/margins": 0.24594691395759583, + "rewards/rejected": -0.24596095085144043, + "step": 13180 + }, + { + "epoch": 9.11549100968188, + "grad_norm": 3.1724741458892822, + "learning_rate": 4.913938835100661e-06, + "log_odds_chosen": 11.557546615600586, + "log_odds_ratio": -1.7893340555019677e-05, + "logits/chosen": -0.4319803714752197, + "logits/rejected": -0.42902466654777527, + "logps/chosen": -0.00011146764154545963, + "logps/rejected": -2.4004411697387695, + "loss": 0.3218, + "nll_loss": 0.08044193685054779, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1146764336444903e-05, + "rewards/margins": 0.24003298580646515, + "rewards/rejected": -0.24004413187503815, + "step": 13181 + }, + { + "epoch": 9.116182572614107, + "grad_norm": 2.474640130996704, + "learning_rate": 4.910096818810512e-06, + "log_odds_chosen": 11.198797225952148, + "log_odds_ratio": -3.0988761864136904e-05, + "logits/chosen": 0.05575866997241974, + "logits/rejected": 0.0315190851688385, + "logps/chosen": -0.00020474701886996627, + "logps/rejected": -2.4687418937683105, + "loss": 0.2955, + "nll_loss": 0.0738632082939148, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0474701159400865e-05, + "rewards/margins": 0.24685370922088623, + "rewards/rejected": -0.24687419831752777, + "step": 13182 + }, + { + "epoch": 9.116874135546334, + "grad_norm": 2.3888325691223145, + "learning_rate": 4.906254802520363e-06, + "log_odds_chosen": 10.87182903289795, + "log_odds_ratio": -0.0003233412862755358, + "logits/chosen": 0.07042770087718964, + "logits/rejected": 0.014010794460773468, + "logps/chosen": -0.00014998050755821168, + "logps/rejected": -1.9851473569869995, + "loss": 0.2403, + "nll_loss": 0.06004884093999863, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4998049664427526e-05, + "rewards/margins": 0.19849973917007446, + "rewards/rejected": -0.19851475954055786, + "step": 13183 + }, + { + "epoch": 9.117565698478561, + "grad_norm": 2.9431097507476807, + "learning_rate": 4.902412786230214e-06, + "log_odds_chosen": 11.711980819702148, + "log_odds_ratio": -2.999811840709299e-05, + "logits/chosen": 0.02263740450143814, + "logits/rejected": -0.1297261267900467, + "logps/chosen": -0.00025718723190948367, + "logps/rejected": -3.303467273712158, + "loss": 0.3269, + "nll_loss": 0.08172940462827682, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.571872028056532e-05, + "rewards/margins": 0.3303210139274597, + "rewards/rejected": -0.3303467333316803, + "step": 13184 + }, + { + "epoch": 9.118257261410788, + "grad_norm": 3.024400472640991, + "learning_rate": 4.898570769940064e-06, + "log_odds_chosen": 10.90009593963623, + "log_odds_ratio": -0.00043232255848124623, + "logits/chosen": -0.43155479431152344, + "logits/rejected": -0.404721736907959, + "logps/chosen": -0.0004337151476647705, + "logps/rejected": -3.1928770542144775, + "loss": 0.352, + "nll_loss": 0.08794493973255157, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.337151403888129e-05, + "rewards/margins": 0.3192443251609802, + "rewards/rejected": -0.3192877173423767, + "step": 13185 + }, + { + "epoch": 9.118948824343015, + "grad_norm": 2.188103437423706, + "learning_rate": 4.894728753649916e-06, + "log_odds_chosen": 10.891100883483887, + "log_odds_ratio": -0.000127013074234128, + "logits/chosen": -0.31894204020500183, + "logits/rejected": -0.2324240505695343, + "logps/chosen": -0.00011675543646560982, + "logps/rejected": -1.966536521911621, + "loss": 0.2551, + "nll_loss": 0.06376811861991882, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1675543646560982e-05, + "rewards/margins": 0.1966419816017151, + "rewards/rejected": -0.19665364921092987, + "step": 13186 + }, + { + "epoch": 9.119640387275242, + "grad_norm": 3.5622682571411133, + "learning_rate": 4.890886737359767e-06, + "log_odds_chosen": 11.323587417602539, + "log_odds_ratio": -2.4846201995387673e-05, + "logits/chosen": -0.1945473849773407, + "logits/rejected": -0.25932082533836365, + "logps/chosen": -9.580461482983083e-05, + "logps/rejected": -1.8983120918273926, + "loss": 0.2771, + "nll_loss": 0.06927736103534698, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.580460755387321e-06, + "rewards/margins": 0.18982163071632385, + "rewards/rejected": -0.1898311972618103, + "step": 13187 + }, + { + "epoch": 9.120331950207468, + "grad_norm": 4.116031646728516, + "learning_rate": 4.8870447210696175e-06, + "log_odds_chosen": 11.892321586608887, + "log_odds_ratio": -1.5779898603796028e-05, + "logits/chosen": -0.2089371383190155, + "logits/rejected": -0.15336598455905914, + "logps/chosen": -0.00032184182782657444, + "logps/rejected": -3.1460373401641846, + "loss": 0.376, + "nll_loss": 0.09399942308664322, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2184183510253206e-05, + "rewards/margins": 0.3145715594291687, + "rewards/rejected": -0.3146037459373474, + "step": 13188 + }, + { + "epoch": 9.121023513139695, + "grad_norm": 2.5508615970611572, + "learning_rate": 4.883202704779469e-06, + "log_odds_chosen": 12.145647048950195, + "log_odds_ratio": -9.136807420873083e-06, + "logits/chosen": -0.24097317457199097, + "logits/rejected": -0.23421518504619598, + "logps/chosen": -0.00011886593711096793, + "logps/rejected": -2.9510018825531006, + "loss": 0.3036, + "nll_loss": 0.07589846849441528, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1886593711096793e-05, + "rewards/margins": 0.2950882911682129, + "rewards/rejected": -0.2951001524925232, + "step": 13189 + }, + { + "epoch": 9.121715076071922, + "grad_norm": 4.2100443840026855, + "learning_rate": 4.879360688489319e-06, + "log_odds_chosen": 11.170172691345215, + "log_odds_ratio": -8.050798351177946e-05, + "logits/chosen": -0.2680250406265259, + "logits/rejected": -0.38149750232696533, + "logps/chosen": -0.00017637033306527883, + "logps/rejected": -1.7586069107055664, + "loss": 0.432, + "nll_loss": 0.10799022018909454, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.763703221513424e-05, + "rewards/margins": 0.17584306001663208, + "rewards/rejected": -0.1758607029914856, + "step": 13190 + }, + { + "epoch": 9.122406639004149, + "grad_norm": 3.2198972702026367, + "learning_rate": 4.875518672199171e-06, + "log_odds_chosen": 10.57756233215332, + "log_odds_ratio": -0.00038010356365703046, + "logits/chosen": -0.11352365463972092, + "logits/rejected": -0.20239883661270142, + "logps/chosen": -0.0013985616387799382, + "logps/rejected": -2.335310459136963, + "loss": 0.2675, + "nll_loss": 0.06683464348316193, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001398561871610582, + "rewards/margins": 0.2333911955356598, + "rewards/rejected": -0.23353107273578644, + "step": 13191 + }, + { + "epoch": 9.123098201936376, + "grad_norm": 4.147787570953369, + "learning_rate": 4.871676655909021e-06, + "log_odds_chosen": 11.222441673278809, + "log_odds_ratio": -6.227093399502337e-05, + "logits/chosen": -0.44622766971588135, + "logits/rejected": -0.5303174257278442, + "logps/chosen": -0.00029116583755239844, + "logps/rejected": -2.246415138244629, + "loss": 0.5198, + "nll_loss": 0.12995260953903198, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9116585210431367e-05, + "rewards/margins": 0.22461241483688354, + "rewards/rejected": -0.22464153170585632, + "step": 13192 + }, + { + "epoch": 9.123789764868603, + "grad_norm": 3.82718825340271, + "learning_rate": 4.867834639618872e-06, + "log_odds_chosen": 11.536949157714844, + "log_odds_ratio": -1.4661351087852381e-05, + "logits/chosen": 0.2355799823999405, + "logits/rejected": 0.16792237758636475, + "logps/chosen": -0.00010456145537318662, + "logps/rejected": -2.4021620750427246, + "loss": 0.5924, + "nll_loss": 0.14808820188045502, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.045614499162184e-05, + "rewards/margins": 0.2402057647705078, + "rewards/rejected": -0.2402162104845047, + "step": 13193 + }, + { + "epoch": 9.12448132780083, + "grad_norm": 3.064208745956421, + "learning_rate": 4.863992623328723e-06, + "log_odds_chosen": 12.178282737731934, + "log_odds_ratio": -7.479978648916585e-06, + "logits/chosen": -0.19847072660923004, + "logits/rejected": -0.23306681215763092, + "logps/chosen": -7.786977948853746e-05, + "logps/rejected": -2.460477352142334, + "loss": 0.3654, + "nll_loss": 0.09134089946746826, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.786978130752686e-06, + "rewards/margins": 0.24603994190692902, + "rewards/rejected": -0.2460477352142334, + "step": 13194 + }, + { + "epoch": 9.125172890733056, + "grad_norm": 5.66312313079834, + "learning_rate": 4.8601506070385745e-06, + "log_odds_chosen": 11.594490051269531, + "log_odds_ratio": -6.0059661336708814e-05, + "logits/chosen": -0.4265395402908325, + "logits/rejected": -0.4704775810241699, + "logps/chosen": -0.0002761242794804275, + "logps/rejected": -2.5915656089782715, + "loss": 0.487, + "nll_loss": 0.12174142897129059, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7612426492851228e-05, + "rewards/margins": 0.25912895798683167, + "rewards/rejected": -0.25915655493736267, + "step": 13195 + }, + { + "epoch": 9.125864453665283, + "grad_norm": 3.052619457244873, + "learning_rate": 4.856308590748425e-06, + "log_odds_chosen": 11.270345687866211, + "log_odds_ratio": -5.994427192490548e-05, + "logits/chosen": -0.34307360649108887, + "logits/rejected": -0.411067932844162, + "logps/chosen": -0.0003244389081373811, + "logps/rejected": -2.9828009605407715, + "loss": 0.3114, + "nll_loss": 0.07783990353345871, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.244388790335506e-05, + "rewards/margins": 0.29824766516685486, + "rewards/rejected": -0.29828011989593506, + "step": 13196 + }, + { + "epoch": 9.12655601659751, + "grad_norm": 3.9561896324157715, + "learning_rate": 4.852466574458276e-06, + "log_odds_chosen": 11.210367202758789, + "log_odds_ratio": -2.6617461116984487e-05, + "logits/chosen": -0.15798631310462952, + "logits/rejected": -0.2608197033405304, + "logps/chosen": -0.00025633175391703844, + "logps/rejected": -2.435305595397949, + "loss": 0.3488, + "nll_loss": 0.0871882438659668, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5633176846895367e-05, + "rewards/margins": 0.24350491166114807, + "rewards/rejected": -0.2435305416584015, + "step": 13197 + }, + { + "epoch": 9.127247579529737, + "grad_norm": 3.7001283168792725, + "learning_rate": 4.848624558168127e-06, + "log_odds_chosen": 11.743751525878906, + "log_odds_ratio": -6.15029493928887e-05, + "logits/chosen": -0.11739799380302429, + "logits/rejected": -0.03368782252073288, + "logps/chosen": -0.00014788135013077408, + "logps/rejected": -2.913625717163086, + "loss": 0.3259, + "nll_loss": 0.08147196471691132, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4788134649279527e-05, + "rewards/margins": 0.29134780168533325, + "rewards/rejected": -0.29136258363723755, + "step": 13198 + }, + { + "epoch": 9.127939142461964, + "grad_norm": 3.7062315940856934, + "learning_rate": 4.8447825418779775e-06, + "log_odds_chosen": 11.067384719848633, + "log_odds_ratio": -4.263326627551578e-05, + "logits/chosen": 0.023071758449077606, + "logits/rejected": -0.03538735210895538, + "logps/chosen": -0.0001302417367696762, + "logps/rejected": -2.1587891578674316, + "loss": 0.3631, + "nll_loss": 0.09078304469585419, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3024174222664442e-05, + "rewards/margins": 0.21586589515209198, + "rewards/rejected": -0.2158789038658142, + "step": 13199 + }, + { + "epoch": 9.12863070539419, + "grad_norm": 3.1669230461120605, + "learning_rate": 4.840940525587829e-06, + "log_odds_chosen": 11.683211326599121, + "log_odds_ratio": -1.102572787203826e-05, + "logits/chosen": -0.33253878355026245, + "logits/rejected": -0.23485800623893738, + "logps/chosen": -7.140888192225248e-05, + "logps/rejected": -2.051136016845703, + "loss": 0.2974, + "nll_loss": 0.0743558332324028, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.140888556023128e-06, + "rewards/margins": 0.2051064670085907, + "rewards/rejected": -0.20511361956596375, + "step": 13200 + }, + { + "epoch": 9.129322268326417, + "grad_norm": 2.7919909954071045, + "learning_rate": 4.837098509297679e-06, + "log_odds_chosen": 10.918241500854492, + "log_odds_ratio": -2.582272827567067e-05, + "logits/chosen": -0.52049720287323, + "logits/rejected": -0.577045202255249, + "logps/chosen": -0.00011689725215546787, + "logps/rejected": -1.9347944259643555, + "loss": 0.2784, + "nll_loss": 0.0695895105600357, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1689724487951025e-05, + "rewards/margins": 0.19346776604652405, + "rewards/rejected": -0.19347944855690002, + "step": 13201 + }, + { + "epoch": 9.130013831258644, + "grad_norm": 3.1376986503601074, + "learning_rate": 4.8332564930075305e-06, + "log_odds_chosen": 11.279829025268555, + "log_odds_ratio": -4.477910260902718e-05, + "logits/chosen": 0.17851456999778748, + "logits/rejected": 0.10664086788892746, + "logps/chosen": -0.00012507177598308772, + "logps/rejected": -2.1952924728393555, + "loss": 0.2443, + "nll_loss": 0.06107247620820999, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2507178325904533e-05, + "rewards/margins": 0.21951673924922943, + "rewards/rejected": -0.21952924132347107, + "step": 13202 + }, + { + "epoch": 9.130705394190871, + "grad_norm": 2.9854328632354736, + "learning_rate": 4.829414476717381e-06, + "log_odds_chosen": 10.503463745117188, + "log_odds_ratio": -0.0001236013777088374, + "logits/chosen": -0.40685075521469116, + "logits/rejected": -0.43401819467544556, + "logps/chosen": -0.0003842499863822013, + "logps/rejected": -2.254061698913574, + "loss": 0.3579, + "nll_loss": 0.08946387469768524, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.842500154860318e-05, + "rewards/margins": 0.22536775469779968, + "rewards/rejected": -0.22540615499019623, + "step": 13203 + }, + { + "epoch": 9.131396957123098, + "grad_norm": 3.4810919761657715, + "learning_rate": 4.825572460427232e-06, + "log_odds_chosen": 11.446053504943848, + "log_odds_ratio": -3.7420730222947896e-05, + "logits/chosen": -0.3691715598106384, + "logits/rejected": -0.3924041986465454, + "logps/chosen": -0.00024286792904604226, + "logps/rejected": -2.8767917156219482, + "loss": 0.3574, + "nll_loss": 0.08934895694255829, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4286793632199988e-05, + "rewards/margins": 0.2876548767089844, + "rewards/rejected": -0.28767916560173035, + "step": 13204 + }, + { + "epoch": 9.132088520055325, + "grad_norm": 3.536254644393921, + "learning_rate": 4.821730444137084e-06, + "log_odds_chosen": 11.507623672485352, + "log_odds_ratio": -2.183894866902847e-05, + "logits/chosen": 0.04829786717891693, + "logits/rejected": -0.0014524534344673157, + "logps/chosen": -0.00013335171388462186, + "logps/rejected": -2.441422462463379, + "loss": 0.359, + "nll_loss": 0.08974535018205643, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3335172297956888e-05, + "rewards/margins": 0.24412892758846283, + "rewards/rejected": -0.24414226412773132, + "step": 13205 + }, + { + "epoch": 9.132780082987551, + "grad_norm": 3.27201509475708, + "learning_rate": 4.817888427846934e-06, + "log_odds_chosen": 11.764434814453125, + "log_odds_ratio": -0.00012924049224238843, + "logits/chosen": 0.09308671951293945, + "logits/rejected": -0.0931982696056366, + "logps/chosen": -0.0001480036007706076, + "logps/rejected": -2.6317474842071533, + "loss": 0.3179, + "nll_loss": 0.07945744693279266, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4800360077060759e-05, + "rewards/margins": 0.26315996050834656, + "rewards/rejected": -0.26317477226257324, + "step": 13206 + }, + { + "epoch": 9.133471645919778, + "grad_norm": 3.4189751148223877, + "learning_rate": 4.814046411556785e-06, + "log_odds_chosen": 11.023716926574707, + "log_odds_ratio": -3.465122790657915e-05, + "logits/chosen": -0.3066454827785492, + "logits/rejected": -0.2448291927576065, + "logps/chosen": -0.0004443526268005371, + "logps/rejected": -2.4091989994049072, + "loss": 0.5667, + "nll_loss": 0.14166930317878723, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.443526268005371e-05, + "rewards/margins": 0.2408754825592041, + "rewards/rejected": -0.24091993272304535, + "step": 13207 + }, + { + "epoch": 9.134163208852005, + "grad_norm": 4.831146240234375, + "learning_rate": 4.810204395266636e-06, + "log_odds_chosen": 12.028083801269531, + "log_odds_ratio": -4.996353527531028e-05, + "logits/chosen": 0.03542035073041916, + "logits/rejected": 0.009335717186331749, + "logps/chosen": -0.000172814674442634, + "logps/rejected": -2.9869327545166016, + "loss": 0.5162, + "nll_loss": 0.12903599441051483, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7281467080465518e-05, + "rewards/margins": 0.2986759841442108, + "rewards/rejected": -0.2986932694911957, + "step": 13208 + }, + { + "epoch": 9.134854771784232, + "grad_norm": 3.973660469055176, + "learning_rate": 4.8063623789764875e-06, + "log_odds_chosen": 11.09183120727539, + "log_odds_ratio": -2.030867472058162e-05, + "logits/chosen": -0.3006454408168793, + "logits/rejected": -0.3231848478317261, + "logps/chosen": -0.00012072438403265551, + "logps/rejected": -1.8342299461364746, + "loss": 0.4669, + "nll_loss": 0.11673028767108917, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2072438948962372e-05, + "rewards/margins": 0.18341092765331268, + "rewards/rejected": -0.1834229826927185, + "step": 13209 + }, + { + "epoch": 9.135546334716459, + "grad_norm": 3.219853401184082, + "learning_rate": 4.802520362686338e-06, + "log_odds_chosen": 10.024149894714355, + "log_odds_ratio": -0.0005982537404634058, + "logits/chosen": -0.10521053522825241, + "logits/rejected": -0.18743349611759186, + "logps/chosen": -0.0005603090394288301, + "logps/rejected": -1.577427864074707, + "loss": 0.2468, + "nll_loss": 0.0616336427628994, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6030909036053345e-05, + "rewards/margins": 0.1576867550611496, + "rewards/rejected": -0.15774278342723846, + "step": 13210 + }, + { + "epoch": 9.136237897648686, + "grad_norm": 3.0521786212921143, + "learning_rate": 4.798678346396189e-06, + "log_odds_chosen": 12.086573600769043, + "log_odds_ratio": -4.3436888518044725e-05, + "logits/chosen": 0.36556896567344666, + "logits/rejected": 0.10947079211473465, + "logps/chosen": -0.00013567760470323265, + "logps/rejected": -3.012777328491211, + "loss": 0.3374, + "nll_loss": 0.08434304594993591, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3567760106525384e-05, + "rewards/margins": 0.3012641370296478, + "rewards/rejected": -0.30127769708633423, + "step": 13211 + }, + { + "epoch": 9.136929460580912, + "grad_norm": 3.5349905490875244, + "learning_rate": 4.794836330106041e-06, + "log_odds_chosen": 11.848288536071777, + "log_odds_ratio": -0.0001640029513509944, + "logits/chosen": -0.2809010148048401, + "logits/rejected": -0.46270230412483215, + "logps/chosen": -0.0003104759962297976, + "logps/rejected": -2.47737717628479, + "loss": 0.3626, + "nll_loss": 0.09063741564750671, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1047602533362806e-05, + "rewards/margins": 0.24770666658878326, + "rewards/rejected": -0.24773772060871124, + "step": 13212 + }, + { + "epoch": 9.13762102351314, + "grad_norm": 4.55886697769165, + "learning_rate": 4.7909943138158905e-06, + "log_odds_chosen": 11.779890060424805, + "log_odds_ratio": -1.923369745782111e-05, + "logits/chosen": -0.012806426733732224, + "logits/rejected": -0.1646498739719391, + "logps/chosen": -0.00022397295106202364, + "logps/rejected": -2.825650453567505, + "loss": 0.4991, + "nll_loss": 0.12476147711277008, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2397296561393887e-05, + "rewards/margins": 0.2825426459312439, + "rewards/rejected": -0.28256505727767944, + "step": 13213 + }, + { + "epoch": 9.138312586445366, + "grad_norm": 2.7805097103118896, + "learning_rate": 4.787152297525742e-06, + "log_odds_chosen": 11.518321990966797, + "log_odds_ratio": -4.23527744715102e-05, + "logits/chosen": 0.10589182376861572, + "logits/rejected": -0.06496121734380722, + "logps/chosen": -0.0002213009138358757, + "logps/rejected": -2.6526756286621094, + "loss": 0.3069, + "nll_loss": 0.07672608643770218, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2130094293970615e-05, + "rewards/margins": 0.2652454376220703, + "rewards/rejected": -0.265267550945282, + "step": 13214 + }, + { + "epoch": 9.139004149377593, + "grad_norm": 3.2939071655273438, + "learning_rate": 4.783310281235592e-06, + "log_odds_chosen": 10.641448020935059, + "log_odds_ratio": -0.00021939512225799263, + "logits/chosen": -0.47907182574272156, + "logits/rejected": -0.47848352789878845, + "logps/chosen": -0.0005785435787402093, + "logps/rejected": -2.2549679279327393, + "loss": 0.4797, + "nll_loss": 0.11991085112094879, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7854358601616696e-05, + "rewards/margins": 0.2254389524459839, + "rewards/rejected": -0.2254967987537384, + "step": 13215 + }, + { + "epoch": 9.13969571230982, + "grad_norm": 2.7339344024658203, + "learning_rate": 4.7794682649454436e-06, + "log_odds_chosen": 12.13232135772705, + "log_odds_ratio": -1.1469980563560966e-05, + "logits/chosen": -0.4271049201488495, + "logits/rejected": -0.41360318660736084, + "logps/chosen": -0.00045071684871800244, + "logps/rejected": -2.8040518760681152, + "loss": 0.3522, + "nll_loss": 0.08804069459438324, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.507168705458753e-05, + "rewards/margins": 0.2803601324558258, + "rewards/rejected": -0.2804052233695984, + "step": 13216 + }, + { + "epoch": 9.140387275242047, + "grad_norm": 3.3445003032684326, + "learning_rate": 4.775626248655294e-06, + "log_odds_chosen": 11.834087371826172, + "log_odds_ratio": -3.875298716593534e-05, + "logits/chosen": -0.34143972396850586, + "logits/rejected": -0.41385823488235474, + "logps/chosen": -0.0002676868753042072, + "logps/rejected": -2.8256959915161133, + "loss": 0.6308, + "nll_loss": 0.15769058465957642, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.676868643902708e-05, + "rewards/margins": 0.2825428247451782, + "rewards/rejected": -0.28256961703300476, + "step": 13217 + }, + { + "epoch": 9.141078838174273, + "grad_norm": 2.49408221244812, + "learning_rate": 4.771784232365145e-06, + "log_odds_chosen": 11.425577163696289, + "log_odds_ratio": -2.738785588007886e-05, + "logits/chosen": -0.13537722826004028, + "logits/rejected": -0.1677563190460205, + "logps/chosen": -0.0001892523287096992, + "logps/rejected": -2.5062432289123535, + "loss": 0.2329, + "nll_loss": 0.05821177735924721, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8925233234767802e-05, + "rewards/margins": 0.25060540437698364, + "rewards/rejected": -0.25062432885169983, + "step": 13218 + }, + { + "epoch": 9.1417704011065, + "grad_norm": 3.0973517894744873, + "learning_rate": 4.767942216074997e-06, + "log_odds_chosen": 10.915397644042969, + "log_odds_ratio": -2.8263664717087522e-05, + "logits/chosen": -0.27408403158187866, + "logits/rejected": -0.3834518790245056, + "logps/chosen": -0.00018645863747224212, + "logps/rejected": -2.13749098777771, + "loss": 0.2723, + "nll_loss": 0.06806787848472595, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8645861928234808e-05, + "rewards/margins": 0.21373045444488525, + "rewards/rejected": -0.21374909579753876, + "step": 13219 + }, + { + "epoch": 9.142461964038727, + "grad_norm": 4.3579206466674805, + "learning_rate": 4.764100199784847e-06, + "log_odds_chosen": 11.356622695922852, + "log_odds_ratio": -4.268779593985528e-05, + "logits/chosen": -0.3055403232574463, + "logits/rejected": -0.40657907724380493, + "logps/chosen": -0.00023026179405860603, + "logps/rejected": -2.4457850456237793, + "loss": 0.3781, + "nll_loss": 0.09453283250331879, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3026179405860603e-05, + "rewards/margins": 0.2445555031299591, + "rewards/rejected": -0.2445785105228424, + "step": 13220 + }, + { + "epoch": 9.143153526970954, + "grad_norm": 2.9954302310943604, + "learning_rate": 4.760258183494698e-06, + "log_odds_chosen": 10.43281364440918, + "log_odds_ratio": -6.213808956090361e-05, + "logits/chosen": -0.08556319773197174, + "logits/rejected": -0.05594261735677719, + "logps/chosen": -0.000477094785310328, + "logps/rejected": -2.231978416442871, + "loss": 0.2928, + "nll_loss": 0.07320213317871094, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7709479986224324e-05, + "rewards/margins": 0.2231501340866089, + "rewards/rejected": -0.2231978476047516, + "step": 13221 + }, + { + "epoch": 9.14384508990318, + "grad_norm": 3.0216176509857178, + "learning_rate": 4.756416167204549e-06, + "log_odds_chosen": 11.871658325195312, + "log_odds_ratio": -9.193705773213878e-05, + "logits/chosen": -0.07090628892183304, + "logits/rejected": -0.13959215581417084, + "logps/chosen": -0.00019610798335634172, + "logps/rejected": -3.1729345321655273, + "loss": 0.251, + "nll_loss": 0.06273071467876434, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9610799427027814e-05, + "rewards/margins": 0.3172738552093506, + "rewards/rejected": -0.3172934651374817, + "step": 13222 + }, + { + "epoch": 9.144536652835408, + "grad_norm": 2.8475561141967773, + "learning_rate": 4.7525741509144005e-06, + "log_odds_chosen": 10.512632369995117, + "log_odds_ratio": -3.802761057158932e-05, + "logits/chosen": -0.2520372271537781, + "logits/rejected": -0.31475120782852173, + "logps/chosen": -0.00022900404292158782, + "logps/rejected": -1.776663064956665, + "loss": 0.302, + "nll_loss": 0.07548577338457108, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2900405383552425e-05, + "rewards/margins": 0.17764338850975037, + "rewards/rejected": -0.1776663064956665, + "step": 13223 + }, + { + "epoch": 9.145228215767634, + "grad_norm": 4.809469223022461, + "learning_rate": 4.74873213462425e-06, + "log_odds_chosen": 10.421841621398926, + "log_odds_ratio": -0.0001059464702848345, + "logits/chosen": 0.10107007622718811, + "logits/rejected": 0.04501248896121979, + "logps/chosen": -0.0002126324106939137, + "logps/rejected": -2.0651650428771973, + "loss": 0.4475, + "nll_loss": 0.11186723411083221, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1263243979774415e-05, + "rewards/margins": 0.2064952552318573, + "rewards/rejected": -0.20651650428771973, + "step": 13224 + }, + { + "epoch": 9.145919778699861, + "grad_norm": 4.251705169677734, + "learning_rate": 4.744890118334102e-06, + "log_odds_chosen": 11.55975341796875, + "log_odds_ratio": -3.190529241692275e-05, + "logits/chosen": -0.27173903584480286, + "logits/rejected": -0.3221912384033203, + "logps/chosen": -0.0002216800203314051, + "logps/rejected": -2.4351489543914795, + "loss": 0.3814, + "nll_loss": 0.09534931182861328, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.216800203314051e-05, + "rewards/margins": 0.2434927225112915, + "rewards/rejected": -0.24351489543914795, + "step": 13225 + }, + { + "epoch": 9.146611341632088, + "grad_norm": 4.158973217010498, + "learning_rate": 4.741048102043953e-06, + "log_odds_chosen": 10.777334213256836, + "log_odds_ratio": -7.380648457910866e-05, + "logits/chosen": -0.21886664628982544, + "logits/rejected": -0.2848142981529236, + "logps/chosen": -0.0001596217043697834, + "logps/rejected": -2.1103477478027344, + "loss": 0.3993, + "nll_loss": 0.09982932358980179, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.596217043697834e-05, + "rewards/margins": 0.21101883053779602, + "rewards/rejected": -0.21103478968143463, + "step": 13226 + }, + { + "epoch": 9.147302904564315, + "grad_norm": 3.8205955028533936, + "learning_rate": 4.7372060857538035e-06, + "log_odds_chosen": 10.89959716796875, + "log_odds_ratio": -5.4202715546125546e-05, + "logits/chosen": -0.03708948194980621, + "logits/rejected": -0.12110021710395813, + "logps/chosen": -0.0002881829859688878, + "logps/rejected": -2.0008976459503174, + "loss": 0.3845, + "nll_loss": 0.09611043334007263, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.88182982330909e-05, + "rewards/margins": 0.20006093382835388, + "rewards/rejected": -0.20008975267410278, + "step": 13227 + }, + { + "epoch": 9.147994467496542, + "grad_norm": 3.11427640914917, + "learning_rate": 4.733364069463655e-06, + "log_odds_chosen": 10.746748924255371, + "log_odds_ratio": -0.00014942459529265761, + "logits/chosen": -0.20317408442497253, + "logits/rejected": -0.17767639458179474, + "logps/chosen": -0.0002456876100040972, + "logps/rejected": -2.1270229816436768, + "loss": 0.3072, + "nll_loss": 0.07679583132266998, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4568758817622438e-05, + "rewards/margins": 0.2126777470111847, + "rewards/rejected": -0.21270230412483215, + "step": 13228 + }, + { + "epoch": 9.148686030428768, + "grad_norm": 2.43300199508667, + "learning_rate": 4.729522053173506e-06, + "log_odds_chosen": 9.599930763244629, + "log_odds_ratio": -0.0009399011032655835, + "logits/chosen": -0.5691751837730408, + "logits/rejected": -0.6450884938240051, + "logps/chosen": -0.0008461083052679896, + "logps/rejected": -1.8899251222610474, + "loss": 0.2957, + "nll_loss": 0.07382595539093018, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.461083780275658e-05, + "rewards/margins": 0.1889079213142395, + "rewards/rejected": -0.18899253010749817, + "step": 13229 + }, + { + "epoch": 9.149377593360995, + "grad_norm": 3.4470419883728027, + "learning_rate": 4.725680036883357e-06, + "log_odds_chosen": 11.664287567138672, + "log_odds_ratio": -4.2229145037708804e-05, + "logits/chosen": 0.18879088759422302, + "logits/rejected": 0.10272565484046936, + "logps/chosen": -0.00011901649122592062, + "logps/rejected": -2.4768638610839844, + "loss": 0.4301, + "nll_loss": 0.10751504451036453, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1901649486389942e-05, + "rewards/margins": 0.24767446517944336, + "rewards/rejected": -0.24768637120723724, + "step": 13230 + }, + { + "epoch": 9.150069156293222, + "grad_norm": 3.0610811710357666, + "learning_rate": 4.721838020593207e-06, + "log_odds_chosen": 10.91439437866211, + "log_odds_ratio": -0.0006155165028758347, + "logits/chosen": -0.028582245111465454, + "logits/rejected": -0.03339751809835434, + "logps/chosen": -0.0004672374634537846, + "logps/rejected": -2.9429073333740234, + "loss": 0.3093, + "nll_loss": 0.07725828886032104, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.672374780056998e-05, + "rewards/margins": 0.29424402117729187, + "rewards/rejected": -0.2942907214164734, + "step": 13231 + }, + { + "epoch": 9.150760719225449, + "grad_norm": 4.357141494750977, + "learning_rate": 4.717996004303058e-06, + "log_odds_chosen": 11.859966278076172, + "log_odds_ratio": -1.0671762538549956e-05, + "logits/chosen": 0.17385049164295197, + "logits/rejected": 0.0562586784362793, + "logps/chosen": -8.415168122155592e-05, + "logps/rejected": -2.2523984909057617, + "loss": 0.4487, + "nll_loss": 0.1121809184551239, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.415167940256651e-06, + "rewards/margins": 0.22523143887519836, + "rewards/rejected": -0.2252398431301117, + "step": 13232 + }, + { + "epoch": 9.151452282157676, + "grad_norm": 2.952986240386963, + "learning_rate": 4.71415398801291e-06, + "log_odds_chosen": 10.926140785217285, + "log_odds_ratio": -0.0020880952943116426, + "logits/chosen": -0.3445931673049927, + "logits/rejected": -0.3721742630004883, + "logps/chosen": -0.01039061602205038, + "logps/rejected": -2.7033534049987793, + "loss": 0.3446, + "nll_loss": 0.0859515517950058, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010390614625066519, + "rewards/margins": 0.2692962884902954, + "rewards/rejected": -0.2703353464603424, + "step": 13233 + }, + { + "epoch": 9.152143845089903, + "grad_norm": 2.7857892513275146, + "learning_rate": 4.7103119717227604e-06, + "log_odds_chosen": 11.978271484375, + "log_odds_ratio": -2.0148203475400805e-05, + "logits/chosen": -0.4585052728652954, + "logits/rejected": -0.5114193558692932, + "logps/chosen": -0.00017187956837005913, + "logps/rejected": -2.7586655616760254, + "loss": 0.4332, + "nll_loss": 0.10829560458660126, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7187956473208033e-05, + "rewards/margins": 0.2758493423461914, + "rewards/rejected": -0.2758665382862091, + "step": 13234 + }, + { + "epoch": 9.15283540802213, + "grad_norm": 4.290009021759033, + "learning_rate": 4.706469955432611e-06, + "log_odds_chosen": 9.976953506469727, + "log_odds_ratio": -0.00010702211875468493, + "logits/chosen": -0.26274943351745605, + "logits/rejected": -0.33820897340774536, + "logps/chosen": -0.0008673262782394886, + "logps/rejected": -2.315030574798584, + "loss": 0.6599, + "nll_loss": 0.16497503221035004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.67326307343319e-05, + "rewards/margins": 0.23141631484031677, + "rewards/rejected": -0.23150306940078735, + "step": 13235 + }, + { + "epoch": 9.153526970954356, + "grad_norm": 2.912217378616333, + "learning_rate": 4.702627939142462e-06, + "log_odds_chosen": 11.16337776184082, + "log_odds_ratio": -1.8966689822264016e-05, + "logits/chosen": -0.6119264364242554, + "logits/rejected": -0.5414671897888184, + "logps/chosen": -0.00011048486339859664, + "logps/rejected": -1.7643336057662964, + "loss": 0.3305, + "nll_loss": 0.0826151967048645, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1048487067455426e-05, + "rewards/margins": 0.1764223277568817, + "rewards/rejected": -0.17643338441848755, + "step": 13236 + }, + { + "epoch": 9.154218533886583, + "grad_norm": 2.99233341217041, + "learning_rate": 4.6987859228523135e-06, + "log_odds_chosen": 10.353715896606445, + "log_odds_ratio": -0.00020194250100757927, + "logits/chosen": -0.7038260102272034, + "logits/rejected": -0.6964368224143982, + "logps/chosen": -0.0006942212348803878, + "logps/rejected": -2.1969351768493652, + "loss": 0.3749, + "nll_loss": 0.09371718019247055, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.942212348803878e-05, + "rewards/margins": 0.2196240872144699, + "rewards/rejected": -0.21969351172447205, + "step": 13237 + }, + { + "epoch": 9.15491009681881, + "grad_norm": 2.7351162433624268, + "learning_rate": 4.6949439065621634e-06, + "log_odds_chosen": 11.356271743774414, + "log_odds_ratio": -8.974706724984571e-05, + "logits/chosen": -0.26438623666763306, + "logits/rejected": -0.2929072380065918, + "logps/chosen": -0.0002658174198586494, + "logps/rejected": -2.7258095741271973, + "loss": 0.2832, + "nll_loss": 0.07078118622303009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6581741622067057e-05, + "rewards/margins": 0.2725543677806854, + "rewards/rejected": -0.27258095145225525, + "step": 13238 + }, + { + "epoch": 9.155601659751037, + "grad_norm": 4.187948226928711, + "learning_rate": 4.691101890272015e-06, + "log_odds_chosen": 12.833627700805664, + "log_odds_ratio": -1.6834528651088476e-05, + "logits/chosen": -0.151898592710495, + "logits/rejected": -0.24054181575775146, + "logps/chosen": -0.0004079265345353633, + "logps/rejected": -4.010830402374268, + "loss": 0.4739, + "nll_loss": 0.11846268177032471, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.079265636391938e-05, + "rewards/margins": 0.40104228258132935, + "rewards/rejected": -0.4010831117630005, + "step": 13239 + }, + { + "epoch": 9.156293222683264, + "grad_norm": 3.280129909515381, + "learning_rate": 4.687259873981866e-06, + "log_odds_chosen": 10.422073364257812, + "log_odds_ratio": -0.0007630666368640959, + "logits/chosen": -0.3538857102394104, + "logits/rejected": -0.34520047903060913, + "logps/chosen": -0.0005645141354762018, + "logps/rejected": -2.46002459526062, + "loss": 0.5715, + "nll_loss": 0.14279311895370483, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6451412092428654e-05, + "rewards/margins": 0.245946004986763, + "rewards/rejected": -0.2460024654865265, + "step": 13240 + }, + { + "epoch": 9.15698478561549, + "grad_norm": 3.231613874435425, + "learning_rate": 4.6834178576917165e-06, + "log_odds_chosen": 10.341863632202148, + "log_odds_ratio": -0.0001687141921138391, + "logits/chosen": -0.25097715854644775, + "logits/rejected": -0.28076058626174927, + "logps/chosen": -0.00027557072462514043, + "logps/rejected": -1.966200351715088, + "loss": 0.3784, + "nll_loss": 0.09458751976490021, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7557072826311924e-05, + "rewards/margins": 0.19659249484539032, + "rewards/rejected": -0.19662004709243774, + "step": 13241 + }, + { + "epoch": 9.157676348547717, + "grad_norm": 2.4268250465393066, + "learning_rate": 4.679575841401568e-06, + "log_odds_chosen": 9.504556655883789, + "log_odds_ratio": -0.00011133919178973883, + "logits/chosen": -0.1775788515806198, + "logits/rejected": -0.1743740439414978, + "logps/chosen": -0.00029458932112902403, + "logps/rejected": -1.212559700012207, + "loss": 0.3033, + "nll_loss": 0.07581616938114166, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9458933568093926e-05, + "rewards/margins": 0.12122651189565659, + "rewards/rejected": -0.12125596404075623, + "step": 13242 + }, + { + "epoch": 9.158367911479944, + "grad_norm": 4.34454345703125, + "learning_rate": 4.675733825111419e-06, + "log_odds_chosen": 10.734809875488281, + "log_odds_ratio": -0.00022274142247624695, + "logits/chosen": -0.38664209842681885, + "logits/rejected": -0.3307802379131317, + "logps/chosen": -0.0003078333684243262, + "logps/rejected": -2.6354517936706543, + "loss": 0.3499, + "nll_loss": 0.08745646476745605, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0783339752815664e-05, + "rewards/margins": 0.2635143995285034, + "rewards/rejected": -0.2635451555252075, + "step": 13243 + }, + { + "epoch": 9.159059474412171, + "grad_norm": 4.3627495765686035, + "learning_rate": 4.67189180882127e-06, + "log_odds_chosen": 10.740904808044434, + "log_odds_ratio": -9.477129788137972e-05, + "logits/chosen": -0.17556828260421753, + "logits/rejected": -0.27577683329582214, + "logps/chosen": -0.00023830297868698835, + "logps/rejected": -1.8701783418655396, + "loss": 0.3844, + "nll_loss": 0.09609566628932953, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3830298232496716e-05, + "rewards/margins": 0.18699400126934052, + "rewards/rejected": -0.18701782822608948, + "step": 13244 + }, + { + "epoch": 9.159751037344398, + "grad_norm": 3.9714725017547607, + "learning_rate": 4.66804979253112e-06, + "log_odds_chosen": 11.494601249694824, + "log_odds_ratio": -1.8003996956394985e-05, + "logits/chosen": -0.4515254497528076, + "logits/rejected": -0.48374801874160767, + "logps/chosen": -0.0001993764890357852, + "logps/rejected": -2.5851736068725586, + "loss": 0.4451, + "nll_loss": 0.11128557473421097, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.993764817598276e-05, + "rewards/margins": 0.2584974467754364, + "rewards/rejected": -0.25851738452911377, + "step": 13245 + }, + { + "epoch": 9.160442600276625, + "grad_norm": 1.8536094427108765, + "learning_rate": 4.664207776240972e-06, + "log_odds_chosen": 10.494796752929688, + "log_odds_ratio": -0.0001465547102270648, + "logits/chosen": -0.7109159231185913, + "logits/rejected": -0.7627403736114502, + "logps/chosen": -0.00023228241479955614, + "logps/rejected": -1.7776033878326416, + "loss": 0.2253, + "nll_loss": 0.05630149319767952, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3228241843753494e-05, + "rewards/margins": 0.17773711681365967, + "rewards/rejected": -0.17776033282279968, + "step": 13246 + }, + { + "epoch": 9.161134163208851, + "grad_norm": 3.3924193382263184, + "learning_rate": 4.660365759950822e-06, + "log_odds_chosen": 9.826598167419434, + "log_odds_ratio": -0.0005562108126468956, + "logits/chosen": -0.6887165307998657, + "logits/rejected": -0.7289565801620483, + "logps/chosen": -0.0017379340715706348, + "logps/rejected": -2.0645713806152344, + "loss": 0.3568, + "nll_loss": 0.08914747089147568, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017379340715706348, + "rewards/margins": 0.2062833607196808, + "rewards/rejected": -0.20645713806152344, + "step": 13247 + }, + { + "epoch": 9.161825726141078, + "grad_norm": 3.9670066833496094, + "learning_rate": 4.6565237436606735e-06, + "log_odds_chosen": 10.267412185668945, + "log_odds_ratio": -0.0028830033261328936, + "logits/chosen": -0.37665268778800964, + "logits/rejected": -0.3267573118209839, + "logps/chosen": -0.021847128868103027, + "logps/rejected": -2.6090478897094727, + "loss": 0.294, + "nll_loss": 0.0732061117887497, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002184713026508689, + "rewards/margins": 0.2587200403213501, + "rewards/rejected": -0.2609047591686249, + "step": 13248 + }, + { + "epoch": 9.162517289073305, + "grad_norm": 3.581749200820923, + "learning_rate": 4.652681727370524e-06, + "log_odds_chosen": 10.095499992370605, + "log_odds_ratio": -8.256595174316317e-05, + "logits/chosen": 0.1312323659658432, + "logits/rejected": 0.16536569595336914, + "logps/chosen": -0.0003430332290008664, + "logps/rejected": -1.7387230396270752, + "loss": 0.4115, + "nll_loss": 0.10287502408027649, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4303324355278164e-05, + "rewards/margins": 0.17383801937103271, + "rewards/rejected": -0.17387232184410095, + "step": 13249 + }, + { + "epoch": 9.163208852005532, + "grad_norm": 3.686554431915283, + "learning_rate": 4.648839711080375e-06, + "log_odds_chosen": 11.71548080444336, + "log_odds_ratio": -2.5854322302620858e-05, + "logits/chosen": 0.076650470495224, + "logits/rejected": -0.05228927731513977, + "logps/chosen": -8.165779581759125e-05, + "logps/rejected": -2.2370681762695312, + "loss": 0.2813, + "nll_loss": 0.0703134834766388, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.165779945557006e-06, + "rewards/margins": 0.2236986607313156, + "rewards/rejected": -0.22370684146881104, + "step": 13250 + }, + { + "epoch": 9.163900414937759, + "grad_norm": 4.031307697296143, + "learning_rate": 4.6449976947902266e-06, + "log_odds_chosen": 11.770303726196289, + "log_odds_ratio": -3.01509444398107e-05, + "logits/chosen": -0.37642228603363037, + "logits/rejected": -0.41639071702957153, + "logps/chosen": -0.00029356483719311655, + "logps/rejected": -3.3386707305908203, + "loss": 0.6717, + "nll_loss": 0.16791526973247528, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9356484446907416e-05, + "rewards/margins": 0.33383771777153015, + "rewards/rejected": -0.33386707305908203, + "step": 13251 + }, + { + "epoch": 9.164591977869986, + "grad_norm": 3.6375107765197754, + "learning_rate": 4.6411556785000765e-06, + "log_odds_chosen": 10.574867248535156, + "log_odds_ratio": -8.866946154739708e-05, + "logits/chosen": -0.46668383479118347, + "logits/rejected": -0.4627547860145569, + "logps/chosen": -0.000216979649849236, + "logps/rejected": -2.068161725997925, + "loss": 0.3352, + "nll_loss": 0.08379573374986649, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.169796425732784e-05, + "rewards/margins": 0.20679447054862976, + "rewards/rejected": -0.2068161815404892, + "step": 13252 + }, + { + "epoch": 9.165283540802212, + "grad_norm": 4.423868656158447, + "learning_rate": 4.637313662209928e-06, + "log_odds_chosen": 10.797757148742676, + "log_odds_ratio": -5.5326938309008256e-05, + "logits/chosen": 0.00524507462978363, + "logits/rejected": 0.0028075315058231354, + "logps/chosen": -0.00048614057595841587, + "logps/rejected": -2.4063234329223633, + "loss": 0.5175, + "nll_loss": 0.12936095893383026, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8614056140650064e-05, + "rewards/margins": 0.24058371782302856, + "rewards/rejected": -0.24063235521316528, + "step": 13253 + }, + { + "epoch": 9.16597510373444, + "grad_norm": 4.309619903564453, + "learning_rate": 4.633471645919779e-06, + "log_odds_chosen": 11.022064208984375, + "log_odds_ratio": -0.00012898718705400825, + "logits/chosen": -0.18947575986385345, + "logits/rejected": -0.22546103596687317, + "logps/chosen": -0.0002583316236268729, + "logps/rejected": -2.0356574058532715, + "loss": 1.0224, + "nll_loss": 0.25558555126190186, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5833163817878813e-05, + "rewards/margins": 0.2035399079322815, + "rewards/rejected": -0.20356574654579163, + "step": 13254 + }, + { + "epoch": 9.166666666666666, + "grad_norm": 2.9900362491607666, + "learning_rate": 4.6296296296296296e-06, + "log_odds_chosen": 11.046536445617676, + "log_odds_ratio": -0.00019564780814107507, + "logits/chosen": -0.1926698386669159, + "logits/rejected": -0.20588555932044983, + "logps/chosen": -0.00046490365639328957, + "logps/rejected": -2.1357336044311523, + "loss": 0.2424, + "nll_loss": 0.06057025492191315, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6490364184137434e-05, + "rewards/margins": 0.2135268747806549, + "rewards/rejected": -0.21357333660125732, + "step": 13255 + }, + { + "epoch": 9.167358229598893, + "grad_norm": 2.9802370071411133, + "learning_rate": 4.625787613339481e-06, + "log_odds_chosen": 10.443476676940918, + "log_odds_ratio": -0.00010125573317054659, + "logits/chosen": -0.32919472455978394, + "logits/rejected": -0.3984687626361847, + "logps/chosen": -0.0003405904571991414, + "logps/rejected": -1.5294239521026611, + "loss": 0.3343, + "nll_loss": 0.08357033878564835, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.405904499231838e-05, + "rewards/margins": 0.1529083251953125, + "rewards/rejected": -0.15294238924980164, + "step": 13256 + }, + { + "epoch": 9.16804979253112, + "grad_norm": 2.9573252201080322, + "learning_rate": 4.621945597049332e-06, + "log_odds_chosen": 10.908576965332031, + "log_odds_ratio": -0.0001610955805517733, + "logits/chosen": 0.013558404520154, + "logits/rejected": 0.03549904748797417, + "logps/chosen": -0.0005097885732538998, + "logps/rejected": -2.575289249420166, + "loss": 0.3081, + "nll_loss": 0.07699886709451675, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0978858780581504e-05, + "rewards/margins": 0.2574779689311981, + "rewards/rejected": -0.25752896070480347, + "step": 13257 + }, + { + "epoch": 9.168741355463347, + "grad_norm": 3.787930965423584, + "learning_rate": 4.618103580759183e-06, + "log_odds_chosen": 10.715057373046875, + "log_odds_ratio": -8.827356941765174e-05, + "logits/chosen": -0.6768827438354492, + "logits/rejected": -0.727750301361084, + "logps/chosen": -0.00017680115706752986, + "logps/rejected": -1.949446678161621, + "loss": 0.4487, + "nll_loss": 0.11216624081134796, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7680114979157224e-05, + "rewards/margins": 0.19492697715759277, + "rewards/rejected": -0.19494464993476868, + "step": 13258 + }, + { + "epoch": 9.169432918395573, + "grad_norm": 3.8608124256134033, + "learning_rate": 4.614261564469033e-06, + "log_odds_chosen": 11.478191375732422, + "log_odds_ratio": -1.4476814612862654e-05, + "logits/chosen": -0.4526783227920532, + "logits/rejected": -0.45420515537261963, + "logps/chosen": -7.430824916809797e-05, + "logps/rejected": -1.9516966342926025, + "loss": 0.29, + "nll_loss": 0.07249864190816879, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.430824098264566e-06, + "rewards/margins": 0.19516223669052124, + "rewards/rejected": -0.19516965746879578, + "step": 13259 + }, + { + "epoch": 9.1701244813278, + "grad_norm": 2.8934109210968018, + "learning_rate": 4.610419548178885e-06, + "log_odds_chosen": 10.930181503295898, + "log_odds_ratio": -9.478238644078374e-05, + "logits/chosen": -0.47720345854759216, + "logits/rejected": -0.492647647857666, + "logps/chosen": -0.00046956821461208165, + "logps/rejected": -2.098984479904175, + "loss": 0.3205, + "nll_loss": 0.080109603703022, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.695682582678273e-05, + "rewards/margins": 0.2098514884710312, + "rewards/rejected": -0.2098984569311142, + "step": 13260 + }, + { + "epoch": 9.170816044260027, + "grad_norm": 4.492011070251465, + "learning_rate": 4.606577531888735e-06, + "log_odds_chosen": 11.076122283935547, + "log_odds_ratio": -7.659607945242897e-05, + "logits/chosen": -0.44519495964050293, + "logits/rejected": -0.4828508794307709, + "logps/chosen": -0.00019283223082311451, + "logps/rejected": -1.979741096496582, + "loss": 0.368, + "nll_loss": 0.09198319911956787, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.928322308231145e-05, + "rewards/margins": 0.19795484840869904, + "rewards/rejected": -0.19797411561012268, + "step": 13261 + }, + { + "epoch": 9.171507607192254, + "grad_norm": 3.5539863109588623, + "learning_rate": 4.6027355155985865e-06, + "log_odds_chosen": 10.251945495605469, + "log_odds_ratio": -0.00012411485658958554, + "logits/chosen": -0.4729505777359009, + "logits/rejected": -0.5075870156288147, + "logps/chosen": -0.00024340019444935024, + "logps/rejected": -1.953089714050293, + "loss": 0.3644, + "nll_loss": 0.0910826325416565, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4340020900126547e-05, + "rewards/margins": 0.1952846348285675, + "rewards/rejected": -0.19530898332595825, + "step": 13262 + }, + { + "epoch": 9.17219917012448, + "grad_norm": 4.126911640167236, + "learning_rate": 4.598893499308437e-06, + "log_odds_chosen": 10.471588134765625, + "log_odds_ratio": -3.9286449464270845e-05, + "logits/chosen": -0.24379703402519226, + "logits/rejected": -0.3949686884880066, + "logps/chosen": -0.00031498580938205123, + "logps/rejected": -2.1726913452148438, + "loss": 0.4379, + "nll_loss": 0.10947485268115997, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1498577300226316e-05, + "rewards/margins": 0.2172376811504364, + "rewards/rejected": -0.2172691524028778, + "step": 13263 + }, + { + "epoch": 9.172890733056708, + "grad_norm": 4.387599945068359, + "learning_rate": 4.595051483018288e-06, + "log_odds_chosen": 12.87693977355957, + "log_odds_ratio": -7.407785233226605e-06, + "logits/chosen": -0.02308526635169983, + "logits/rejected": -0.10359898954629898, + "logps/chosen": -6.886209303047508e-05, + "logps/rejected": -3.2710678577423096, + "loss": 0.4755, + "nll_loss": 0.11887811124324799, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.886210030643269e-06, + "rewards/margins": 0.32709988951683044, + "rewards/rejected": -0.327106773853302, + "step": 13264 + }, + { + "epoch": 9.173582295988934, + "grad_norm": 2.787508964538574, + "learning_rate": 4.59120946672814e-06, + "log_odds_chosen": 10.15397834777832, + "log_odds_ratio": -9.300727106165141e-05, + "logits/chosen": -0.5829552412033081, + "logits/rejected": -0.643100380897522, + "logps/chosen": -0.0002786455152090639, + "logps/rejected": -1.7501752376556396, + "loss": 0.2358, + "nll_loss": 0.05893716216087341, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.786455297609791e-05, + "rewards/margins": 0.17498967051506042, + "rewards/rejected": -0.17501753568649292, + "step": 13265 + }, + { + "epoch": 9.174273858921161, + "grad_norm": 3.6737401485443115, + "learning_rate": 4.5873674504379895e-06, + "log_odds_chosen": 10.786094665527344, + "log_odds_ratio": -6.616486643906683e-05, + "logits/chosen": -0.32876649498939514, + "logits/rejected": -0.3970365524291992, + "logps/chosen": -0.00031801205477677286, + "logps/rejected": -2.0390477180480957, + "loss": 0.7942, + "nll_loss": 0.19853723049163818, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.180120620527305e-05, + "rewards/margins": 0.20387297868728638, + "rewards/rejected": -0.20390477776527405, + "step": 13266 + }, + { + "epoch": 9.174965421853388, + "grad_norm": 2.290215015411377, + "learning_rate": 4.583525434147841e-06, + "log_odds_chosen": 10.114067077636719, + "log_odds_ratio": -0.0001489850110374391, + "logits/chosen": -0.42826682329177856, + "logits/rejected": -0.4314981698989868, + "logps/chosen": -0.00025290303165093064, + "logps/rejected": -1.6824004650115967, + "loss": 0.261, + "nll_loss": 0.06524519622325897, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.529030098230578e-05, + "rewards/margins": 0.16821476817131042, + "rewards/rejected": -0.1682400405406952, + "step": 13267 + }, + { + "epoch": 9.175656984785615, + "grad_norm": 4.102094650268555, + "learning_rate": 4.579683417857692e-06, + "log_odds_chosen": 11.35265064239502, + "log_odds_ratio": -4.903479202766903e-05, + "logits/chosen": -0.20633630454540253, + "logits/rejected": -0.19792324304580688, + "logps/chosen": -0.0001682281435932964, + "logps/rejected": -2.393183946609497, + "loss": 0.433, + "nll_loss": 0.10825060307979584, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.682281435932964e-05, + "rewards/margins": 0.23930156230926514, + "rewards/rejected": -0.23931840062141418, + "step": 13268 + }, + { + "epoch": 9.176348547717842, + "grad_norm": 3.2632980346679688, + "learning_rate": 4.575841401567543e-06, + "log_odds_chosen": 10.976491928100586, + "log_odds_ratio": -3.518206358421594e-05, + "logits/chosen": -0.20566090941429138, + "logits/rejected": -0.23544615507125854, + "logps/chosen": -0.0001013718792819418, + "logps/rejected": -1.7854700088500977, + "loss": 0.3001, + "nll_loss": 0.07501350343227386, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.013718792819418e-05, + "rewards/margins": 0.17853687703609467, + "rewards/rejected": -0.17854700982570648, + "step": 13269 + }, + { + "epoch": 9.177040110650069, + "grad_norm": 3.7404978275299072, + "learning_rate": 4.571999385277393e-06, + "log_odds_chosen": 10.493355751037598, + "log_odds_ratio": -0.00016397205763496459, + "logits/chosen": 0.13229244947433472, + "logits/rejected": 0.10793633759021759, + "logps/chosen": -0.0002441149263177067, + "logps/rejected": -1.7673245668411255, + "loss": 0.5835, + "nll_loss": 0.1458517611026764, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.441149263177067e-05, + "rewards/margins": 0.17670804262161255, + "rewards/rejected": -0.17673246562480927, + "step": 13270 + }, + { + "epoch": 9.177731673582295, + "grad_norm": 3.478229284286499, + "learning_rate": 4.568157368987245e-06, + "log_odds_chosen": 12.566650390625, + "log_odds_ratio": -1.7088979802792892e-05, + "logits/chosen": -0.2346772700548172, + "logits/rejected": -0.2684975266456604, + "logps/chosen": -0.00018270526197738945, + "logps/rejected": -3.8987607955932617, + "loss": 0.3757, + "nll_loss": 0.09392865002155304, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8270526197738945e-05, + "rewards/margins": 0.38985779881477356, + "rewards/rejected": -0.3898760676383972, + "step": 13271 + }, + { + "epoch": 9.178423236514522, + "grad_norm": 3.211944103240967, + "learning_rate": 4.564315352697096e-06, + "log_odds_chosen": 9.982566833496094, + "log_odds_ratio": -0.00025987907429225743, + "logits/chosen": -0.03721824660897255, + "logits/rejected": -0.15641994774341583, + "logps/chosen": -0.00024055814719758928, + "logps/rejected": -1.7016408443450928, + "loss": 0.3644, + "nll_loss": 0.0910676121711731, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4055814719758928e-05, + "rewards/margins": 0.17014002799987793, + "rewards/rejected": -0.170164093375206, + "step": 13272 + }, + { + "epoch": 9.179114799446749, + "grad_norm": 2.8588945865631104, + "learning_rate": 4.5604733364069464e-06, + "log_odds_chosen": 11.598443984985352, + "log_odds_ratio": -2.767339719866868e-05, + "logits/chosen": -0.44341886043548584, + "logits/rejected": -0.48085397481918335, + "logps/chosen": -0.00019865854119416326, + "logps/rejected": -2.748101234436035, + "loss": 0.2194, + "nll_loss": 0.0548417866230011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9865852664224803e-05, + "rewards/margins": 0.2747902572154999, + "rewards/rejected": -0.2748101055622101, + "step": 13273 + }, + { + "epoch": 9.179806362378976, + "grad_norm": 2.5782322883605957, + "learning_rate": 4.556631320116798e-06, + "log_odds_chosen": 10.460678100585938, + "log_odds_ratio": -0.0001101654561352916, + "logits/chosen": 0.017034312710165977, + "logits/rejected": 0.014345534145832062, + "logps/chosen": -0.00030546486959792674, + "logps/rejected": -1.9944336414337158, + "loss": 0.2767, + "nll_loss": 0.06916406750679016, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.054648914257996e-05, + "rewards/margins": 0.19941282272338867, + "rewards/rejected": -0.19944337010383606, + "step": 13274 + }, + { + "epoch": 9.180497925311203, + "grad_norm": 4.89523458480835, + "learning_rate": 4.552789303826648e-06, + "log_odds_chosen": 11.174288749694824, + "log_odds_ratio": -0.00012297074135858566, + "logits/chosen": 0.06360907107591629, + "logits/rejected": -0.05227748304605484, + "logps/chosen": -0.00015693520253989846, + "logps/rejected": -2.271845817565918, + "loss": 0.4063, + "nll_loss": 0.1015675738453865, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5693520253989846e-05, + "rewards/margins": 0.22716888785362244, + "rewards/rejected": -0.22718459367752075, + "step": 13275 + }, + { + "epoch": 9.18118948824343, + "grad_norm": 2.1220638751983643, + "learning_rate": 4.5489472875364995e-06, + "log_odds_chosen": 11.140460014343262, + "log_odds_ratio": -8.715956209925935e-05, + "logits/chosen": 0.07599927484989166, + "logits/rejected": 0.04757479950785637, + "logps/chosen": -0.00021662801736965775, + "logps/rejected": -2.2675039768218994, + "loss": 0.2438, + "nll_loss": 0.0609496645629406, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1662805011146702e-05, + "rewards/margins": 0.22672873735427856, + "rewards/rejected": -0.22675038874149323, + "step": 13276 + }, + { + "epoch": 9.181881051175656, + "grad_norm": 3.8795008659362793, + "learning_rate": 4.54510527124635e-06, + "log_odds_chosen": 10.69237232208252, + "log_odds_ratio": -0.00011646945495158434, + "logits/chosen": -0.2491566240787506, + "logits/rejected": -0.28201591968536377, + "logps/chosen": -0.0005097347311675549, + "logps/rejected": -2.259579658508301, + "loss": 0.4445, + "nll_loss": 0.11111761629581451, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.09734709339682e-05, + "rewards/margins": 0.22590696811676025, + "rewards/rejected": -0.2259579747915268, + "step": 13277 + }, + { + "epoch": 9.182572614107883, + "grad_norm": 3.0771408081054688, + "learning_rate": 4.541263254956201e-06, + "log_odds_chosen": 11.507431030273438, + "log_odds_ratio": -1.9733077351702377e-05, + "logits/chosen": -0.7390185594558716, + "logits/rejected": -0.6592539548873901, + "logps/chosen": -0.00012037526903441176, + "logps/rejected": -2.3912291526794434, + "loss": 0.2545, + "nll_loss": 0.06361792236566544, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2037526175845414e-05, + "rewards/margins": 0.23911088705062866, + "rewards/rejected": -0.23912294209003448, + "step": 13278 + }, + { + "epoch": 9.18326417704011, + "grad_norm": 3.7033040523529053, + "learning_rate": 4.537421238666053e-06, + "log_odds_chosen": 12.416447639465332, + "log_odds_ratio": -1.0927147741313092e-05, + "logits/chosen": -0.19545245170593262, + "logits/rejected": -0.265900194644928, + "logps/chosen": -0.0001679717533988878, + "logps/rejected": -3.451341152191162, + "loss": 0.3186, + "nll_loss": 0.07965927571058273, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.679717388469726e-05, + "rewards/margins": 0.34511736035346985, + "rewards/rejected": -0.3451341390609741, + "step": 13279 + }, + { + "epoch": 9.183955739972337, + "grad_norm": 3.743194341659546, + "learning_rate": 4.533579222375903e-06, + "log_odds_chosen": 10.992870330810547, + "log_odds_ratio": -0.00026691905804909766, + "logits/chosen": -0.2858811914920807, + "logits/rejected": -0.30002665519714355, + "logps/chosen": -0.00018830844783224165, + "logps/rejected": -1.7395424842834473, + "loss": 0.4446, + "nll_loss": 0.11112523823976517, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8830845874617808e-05, + "rewards/margins": 0.1739354282617569, + "rewards/rejected": -0.17395423352718353, + "step": 13280 + }, + { + "epoch": 9.184647302904564, + "grad_norm": 3.806933879852295, + "learning_rate": 4.529737206085754e-06, + "log_odds_chosen": 10.361151695251465, + "log_odds_ratio": -0.0004903482622466981, + "logits/chosen": -0.4798329472541809, + "logits/rejected": -0.44972285628318787, + "logps/chosen": -0.0005268111126497388, + "logps/rejected": -2.0253071784973145, + "loss": 0.415, + "nll_loss": 0.10369092226028442, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.268111635814421e-05, + "rewards/margins": 0.2024780511856079, + "rewards/rejected": -0.20253071188926697, + "step": 13281 + }, + { + "epoch": 9.18533886583679, + "grad_norm": 3.293419361114502, + "learning_rate": 4.525895189795605e-06, + "log_odds_chosen": 11.961520195007324, + "log_odds_ratio": -1.2666841030295473e-05, + "logits/chosen": -0.01091938465833664, + "logits/rejected": -0.1974954605102539, + "logps/chosen": -0.0001375441934214905, + "logps/rejected": -2.760972738265991, + "loss": 0.3582, + "nll_loss": 0.08954711258411407, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3754419342149049e-05, + "rewards/margins": 0.2760835289955139, + "rewards/rejected": -0.27609729766845703, + "step": 13282 + }, + { + "epoch": 9.186030428769017, + "grad_norm": 3.072108745574951, + "learning_rate": 4.522053173505456e-06, + "log_odds_chosen": 11.435327529907227, + "log_odds_ratio": -5.203645196161233e-05, + "logits/chosen": -0.22686409950256348, + "logits/rejected": -0.29669320583343506, + "logps/chosen": -0.0002450290776323527, + "logps/rejected": -2.336642265319824, + "loss": 0.2849, + "nll_loss": 0.07121407985687256, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.450290776323527e-05, + "rewards/margins": 0.23363973200321198, + "rewards/rejected": -0.23366422951221466, + "step": 13283 + }, + { + "epoch": 9.186721991701244, + "grad_norm": 3.901740789413452, + "learning_rate": 4.518211157215306e-06, + "log_odds_chosen": 11.893566131591797, + "log_odds_ratio": -2.3749666070216335e-05, + "logits/chosen": -0.12406025826931, + "logits/rejected": -0.11168057471513748, + "logps/chosen": -0.0004723105812445283, + "logps/rejected": -3.253361225128174, + "loss": 0.3863, + "nll_loss": 0.09658458828926086, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7231056669261307e-05, + "rewards/margins": 0.32528889179229736, + "rewards/rejected": -0.3253360986709595, + "step": 13284 + }, + { + "epoch": 9.187413554633471, + "grad_norm": 4.156968593597412, + "learning_rate": 4.514369140925158e-06, + "log_odds_chosen": 11.90109634399414, + "log_odds_ratio": -3.2485702831763774e-05, + "logits/chosen": -0.5516144037246704, + "logits/rejected": -0.6592393517494202, + "logps/chosen": -0.0002664893982000649, + "logps/rejected": -3.0378479957580566, + "loss": 0.3442, + "nll_loss": 0.08603926748037338, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6648936909623444e-05, + "rewards/margins": 0.3037582039833069, + "rewards/rejected": -0.3037848174571991, + "step": 13285 + }, + { + "epoch": 9.188105117565698, + "grad_norm": 3.408931016921997, + "learning_rate": 4.510527124635009e-06, + "log_odds_chosen": 11.743860244750977, + "log_odds_ratio": -2.8227870643604547e-05, + "logits/chosen": -0.17038024961948395, + "logits/rejected": -0.19767898321151733, + "logps/chosen": -0.00032378954347223043, + "logps/rejected": -3.1211838722229004, + "loss": 0.2861, + "nll_loss": 0.07151287794113159, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.237895725760609e-05, + "rewards/margins": 0.3120860159397125, + "rewards/rejected": -0.31211841106414795, + "step": 13286 + }, + { + "epoch": 9.188796680497925, + "grad_norm": 3.6295013427734375, + "learning_rate": 4.5066851083448595e-06, + "log_odds_chosen": 10.94865894317627, + "log_odds_ratio": -7.326849299715832e-05, + "logits/chosen": 0.1300758719444275, + "logits/rejected": 0.01392633467912674, + "logps/chosen": -0.00020176218822598457, + "logps/rejected": -2.1468212604522705, + "loss": 0.3371, + "nll_loss": 0.08427339792251587, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.017621955019422e-05, + "rewards/margins": 0.21466195583343506, + "rewards/rejected": -0.21468213200569153, + "step": 13287 + }, + { + "epoch": 9.189488243430151, + "grad_norm": 3.9760258197784424, + "learning_rate": 4.502843092054711e-06, + "log_odds_chosen": 11.105104446411133, + "log_odds_ratio": -5.5100786994444206e-05, + "logits/chosen": -0.17760206758975983, + "logits/rejected": -0.240696519613266, + "logps/chosen": -0.00025667899171821773, + "logps/rejected": -2.6248059272766113, + "loss": 0.4704, + "nll_loss": 0.11759471893310547, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5667901354609057e-05, + "rewards/margins": 0.26245489716529846, + "rewards/rejected": -0.26248058676719666, + "step": 13288 + }, + { + "epoch": 9.190179806362378, + "grad_norm": 2.7358086109161377, + "learning_rate": 4.499001075764561e-06, + "log_odds_chosen": 12.274123191833496, + "log_odds_ratio": -8.188303581846412e-06, + "logits/chosen": -0.38431859016418457, + "logits/rejected": -0.46995633840560913, + "logps/chosen": -0.00018513904069550335, + "logps/rejected": -3.022996425628662, + "loss": 0.2276, + "nll_loss": 0.056895844638347626, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8513903341954574e-05, + "rewards/margins": 0.3022811710834503, + "rewards/rejected": -0.3022996783256531, + "step": 13289 + }, + { + "epoch": 9.190871369294605, + "grad_norm": 2.785196542739868, + "learning_rate": 4.4951590594744126e-06, + "log_odds_chosen": 10.875653266906738, + "log_odds_ratio": -0.0002187418140238151, + "logits/chosen": -0.19698688387870789, + "logits/rejected": -0.34480729699134827, + "logps/chosen": -0.0002780867216642946, + "logps/rejected": -2.1473608016967773, + "loss": 0.2618, + "nll_loss": 0.06543861329555511, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7808673621620983e-05, + "rewards/margins": 0.21470826864242554, + "rewards/rejected": -0.21473607420921326, + "step": 13290 + }, + { + "epoch": 9.191562932226832, + "grad_norm": 3.181849241256714, + "learning_rate": 4.491317043184263e-06, + "log_odds_chosen": 12.038965225219727, + "log_odds_ratio": -1.9076065655099228e-05, + "logits/chosen": -0.6734594106674194, + "logits/rejected": -0.8279802203178406, + "logps/chosen": -8.055789658101276e-05, + "logps/rejected": -2.6196889877319336, + "loss": 0.3114, + "nll_loss": 0.07784523814916611, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.055790203798097e-06, + "rewards/margins": 0.26196086406707764, + "rewards/rejected": -0.2619689106941223, + "step": 13291 + }, + { + "epoch": 9.192254495159059, + "grad_norm": 4.373355865478516, + "learning_rate": 4.487475026894114e-06, + "log_odds_chosen": 11.565893173217773, + "log_odds_ratio": -2.1030267816968262e-05, + "logits/chosen": -0.22011518478393555, + "logits/rejected": -0.19861875474452972, + "logps/chosen": -0.0004119708319194615, + "logps/rejected": -2.8756916522979736, + "loss": 0.4777, + "nll_loss": 0.11942359805107117, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.119707955396734e-05, + "rewards/margins": 0.28752797842025757, + "rewards/rejected": -0.28756919503211975, + "step": 13292 + }, + { + "epoch": 9.192946058091286, + "grad_norm": 2.9223194122314453, + "learning_rate": 4.483633010603965e-06, + "log_odds_chosen": 11.167993545532227, + "log_odds_ratio": -2.5012381229316816e-05, + "logits/chosen": 0.07321306318044662, + "logits/rejected": -0.00747278705239296, + "logps/chosen": -0.00012964705820195377, + "logps/rejected": -2.1343207359313965, + "loss": 0.3096, + "nll_loss": 0.07740969210863113, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2964705092599615e-05, + "rewards/margins": 0.2134191244840622, + "rewards/rejected": -0.21343207359313965, + "step": 13293 + }, + { + "epoch": 9.193637621023512, + "grad_norm": 2.6296606063842773, + "learning_rate": 4.479790994313816e-06, + "log_odds_chosen": 10.825248718261719, + "log_odds_ratio": -4.6251479943748564e-05, + "logits/chosen": -0.4593941271305084, + "logits/rejected": -0.4678587019443512, + "logps/chosen": -0.0001595508656464517, + "logps/rejected": -1.4237827062606812, + "loss": 0.234, + "nll_loss": 0.058507852256298065, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5955087292240933e-05, + "rewards/margins": 0.1423623263835907, + "rewards/rejected": -0.14237827062606812, + "step": 13294 + }, + { + "epoch": 9.19432918395574, + "grad_norm": 4.109340190887451, + "learning_rate": 4.475948978023667e-06, + "log_odds_chosen": 12.034053802490234, + "log_odds_ratio": -3.0341378078446724e-05, + "logits/chosen": 0.08396705240011215, + "logits/rejected": 0.06960800290107727, + "logps/chosen": -0.00012110539682907984, + "logps/rejected": -2.9148576259613037, + "loss": 0.4502, + "nll_loss": 0.11255712807178497, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2110540410503745e-05, + "rewards/margins": 0.2914736568927765, + "rewards/rejected": -0.2914857566356659, + "step": 13295 + }, + { + "epoch": 9.195020746887966, + "grad_norm": 4.959160327911377, + "learning_rate": 4.472106961733518e-06, + "log_odds_chosen": 11.456915855407715, + "log_odds_ratio": -2.070013215416111e-05, + "logits/chosen": 0.10722736269235611, + "logits/rejected": -0.04543411731719971, + "logps/chosen": -0.0005359066999517381, + "logps/rejected": -2.7827417850494385, + "loss": 0.61, + "nll_loss": 0.15250572562217712, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.359066926757805e-05, + "rewards/margins": 0.27822059392929077, + "rewards/rejected": -0.27827420830726624, + "step": 13296 + }, + { + "epoch": 9.195712309820193, + "grad_norm": 3.350621461868286, + "learning_rate": 4.4682649454433695e-06, + "log_odds_chosen": 11.722689628601074, + "log_odds_ratio": -1.1843771972053219e-05, + "logits/chosen": -0.07416524738073349, + "logits/rejected": -0.33439549803733826, + "logps/chosen": -0.000422697514295578, + "logps/rejected": -3.2973790168762207, + "loss": 0.266, + "nll_loss": 0.06649604439735413, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.226975215715356e-05, + "rewards/margins": 0.32969561219215393, + "rewards/rejected": -0.3297378718852997, + "step": 13297 + }, + { + "epoch": 9.19640387275242, + "grad_norm": 3.1698036193847656, + "learning_rate": 4.464422929153219e-06, + "log_odds_chosen": 12.69196891784668, + "log_odds_ratio": -1.0068518349726219e-05, + "logits/chosen": -0.200626403093338, + "logits/rejected": -0.22722738981246948, + "logps/chosen": -7.099766662577167e-05, + "logps/rejected": -3.128727912902832, + "loss": 0.4568, + "nll_loss": 0.11419538408517838, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.099766662577167e-06, + "rewards/margins": 0.3128657042980194, + "rewards/rejected": -0.31287282705307007, + "step": 13298 + }, + { + "epoch": 9.197095435684647, + "grad_norm": 2.4214847087860107, + "learning_rate": 4.460580912863071e-06, + "log_odds_chosen": 11.50594711303711, + "log_odds_ratio": -2.9842894946341403e-05, + "logits/chosen": -0.27305135130882263, + "logits/rejected": -0.24569541215896606, + "logps/chosen": -8.768929546931759e-05, + "logps/rejected": -2.1344499588012695, + "loss": 0.2693, + "nll_loss": 0.06731890141963959, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.768928637437057e-06, + "rewards/margins": 0.21343624591827393, + "rewards/rejected": -0.2134450376033783, + "step": 13299 + }, + { + "epoch": 9.197786998616873, + "grad_norm": 3.382467031478882, + "learning_rate": 4.456738896572922e-06, + "log_odds_chosen": 10.984672546386719, + "log_odds_ratio": -5.946517194388434e-05, + "logits/chosen": 0.5020791292190552, + "logits/rejected": 0.43189650774002075, + "logps/chosen": -0.0009921141900122166, + "logps/rejected": -2.1780381202697754, + "loss": 0.4387, + "nll_loss": 0.10966768860816956, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.921141463564709e-05, + "rewards/margins": 0.21770460903644562, + "rewards/rejected": -0.21780380606651306, + "step": 13300 + }, + { + "epoch": 9.1984785615491, + "grad_norm": 3.9634034633636475, + "learning_rate": 4.4528968802827725e-06, + "log_odds_chosen": 10.432125091552734, + "log_odds_ratio": -0.0005259269964881241, + "logits/chosen": -0.11210166662931442, + "logits/rejected": -0.1380247175693512, + "logps/chosen": -0.0006339678075164557, + "logps/rejected": -2.1972599029541016, + "loss": 0.4485, + "nll_loss": 0.11206686496734619, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.339678657241166e-05, + "rewards/margins": 0.2196625918149948, + "rewards/rejected": -0.21972598135471344, + "step": 13301 + }, + { + "epoch": 9.199170124481327, + "grad_norm": 3.054126262664795, + "learning_rate": 4.449054863992624e-06, + "log_odds_chosen": 10.148401260375977, + "log_odds_ratio": -0.0006780875264666975, + "logits/chosen": -0.3775310516357422, + "logits/rejected": -0.4329449534416199, + "logps/chosen": -0.00037620688090100884, + "logps/rejected": -1.6183583736419678, + "loss": 0.2996, + "nll_loss": 0.07482132315635681, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7620688090100884e-05, + "rewards/margins": 0.16179822385311127, + "rewards/rejected": -0.16183583438396454, + "step": 13302 + }, + { + "epoch": 9.199861687413554, + "grad_norm": 3.3058624267578125, + "learning_rate": 4.445212847702474e-06, + "log_odds_chosen": 11.110430717468262, + "log_odds_ratio": -0.00015114396228455007, + "logits/chosen": -0.46796417236328125, + "logits/rejected": -0.47808900475502014, + "logps/chosen": -7.569074659841135e-05, + "logps/rejected": -1.7901885509490967, + "loss": 0.4575, + "nll_loss": 0.11437175422906876, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.569075023639016e-06, + "rewards/margins": 0.1790112853050232, + "rewards/rejected": -0.17901885509490967, + "step": 13303 + }, + { + "epoch": 9.20055325034578, + "grad_norm": 2.1154303550720215, + "learning_rate": 4.441370831412326e-06, + "log_odds_chosen": 10.566178321838379, + "log_odds_ratio": -0.00013299538113642484, + "logits/chosen": 0.21447968482971191, + "logits/rejected": 0.19621631503105164, + "logps/chosen": -0.00033608500962145627, + "logps/rejected": -1.8940387964248657, + "loss": 0.2311, + "nll_loss": 0.05776310712099075, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3608499506954104e-05, + "rewards/margins": 0.1893702745437622, + "rewards/rejected": -0.18940389156341553, + "step": 13304 + }, + { + "epoch": 9.201244813278008, + "grad_norm": 4.0034098625183105, + "learning_rate": 4.437528815122176e-06, + "log_odds_chosen": 11.385026931762695, + "log_odds_ratio": -3.283590922364965e-05, + "logits/chosen": -0.2957315444946289, + "logits/rejected": -0.40880489349365234, + "logps/chosen": -0.0003526700893417001, + "logps/rejected": -2.501044511795044, + "loss": 0.5239, + "nll_loss": 0.13096894323825836, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.526701038936153e-05, + "rewards/margins": 0.25006917119026184, + "rewards/rejected": -0.25010445713996887, + "step": 13305 + }, + { + "epoch": 9.201936376210234, + "grad_norm": 4.126883029937744, + "learning_rate": 4.433686798832027e-06, + "log_odds_chosen": 12.719415664672852, + "log_odds_ratio": -5.7122347243421245e-06, + "logits/chosen": -0.20841145515441895, + "logits/rejected": -0.295016348361969, + "logps/chosen": -0.00010687931353459135, + "logps/rejected": -3.5534002780914307, + "loss": 0.3601, + "nll_loss": 0.09003311395645142, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0687931535358075e-05, + "rewards/margins": 0.35532933473587036, + "rewards/rejected": -0.35534003376960754, + "step": 13306 + }, + { + "epoch": 9.202627939142461, + "grad_norm": 3.3515937328338623, + "learning_rate": 4.429844782541878e-06, + "log_odds_chosen": 10.834237098693848, + "log_odds_ratio": -0.0002241548936581239, + "logits/chosen": -0.34491413831710815, + "logits/rejected": -0.36780351400375366, + "logps/chosen": -0.00019741068535950035, + "logps/rejected": -1.9140173196792603, + "loss": 0.31, + "nll_loss": 0.07748197019100189, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9741069991141558e-05, + "rewards/margins": 0.19138199090957642, + "rewards/rejected": -0.19140173494815826, + "step": 13307 + }, + { + "epoch": 9.203319502074688, + "grad_norm": 3.089019775390625, + "learning_rate": 4.4260027662517294e-06, + "log_odds_chosen": 10.662595748901367, + "log_odds_ratio": -3.3396681828889996e-05, + "logits/chosen": -0.2811889350414276, + "logits/rejected": -0.3584701418876648, + "logps/chosen": -0.00044508109567686915, + "logps/rejected": -2.1020030975341797, + "loss": 0.3703, + "nll_loss": 0.09256477653980255, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4508109567686915e-05, + "rewards/margins": 0.21015578508377075, + "rewards/rejected": -0.21020029485225677, + "step": 13308 + }, + { + "epoch": 9.204011065006915, + "grad_norm": 4.094654560089111, + "learning_rate": 4.42216074996158e-06, + "log_odds_chosen": 11.015249252319336, + "log_odds_ratio": -4.134257687837817e-05, + "logits/chosen": 0.17103615403175354, + "logits/rejected": 0.05913734436035156, + "logps/chosen": -0.00017921102698892355, + "logps/rejected": -2.214588165283203, + "loss": 0.468, + "nll_loss": 0.11699028313159943, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.79211056092754e-05, + "rewards/margins": 0.2214408963918686, + "rewards/rejected": -0.2214588224887848, + "step": 13309 + }, + { + "epoch": 9.204702627939142, + "grad_norm": 2.4857873916625977, + "learning_rate": 4.418318733671431e-06, + "log_odds_chosen": 10.965414047241211, + "log_odds_ratio": -0.0001001676864689216, + "logits/chosen": -0.0682789534330368, + "logits/rejected": -0.13583692908287048, + "logps/chosen": -0.000550756580196321, + "logps/rejected": -1.8792402744293213, + "loss": 0.2672, + "nll_loss": 0.06679284572601318, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.507566311280243e-05, + "rewards/margins": 0.18786895275115967, + "rewards/rejected": -0.18792402744293213, + "step": 13310 + }, + { + "epoch": 9.205394190871369, + "grad_norm": 3.659666061401367, + "learning_rate": 4.4144767173812825e-06, + "log_odds_chosen": 11.955492973327637, + "log_odds_ratio": -2.0881549062323757e-05, + "logits/chosen": -0.3871539235115051, + "logits/rejected": -0.38239070773124695, + "logps/chosen": -7.013310823822394e-05, + "logps/rejected": -2.347487211227417, + "loss": 0.3603, + "nll_loss": 0.09007039666175842, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.013311005721334e-06, + "rewards/margins": 0.2347417175769806, + "rewards/rejected": -0.2347487509250641, + "step": 13311 + }, + { + "epoch": 9.206085753803595, + "grad_norm": 5.377493381500244, + "learning_rate": 4.4106347010911324e-06, + "log_odds_chosen": 12.305624008178711, + "log_odds_ratio": -9.869532732409425e-06, + "logits/chosen": 0.05969385802745819, + "logits/rejected": 0.022483449429273605, + "logps/chosen": -9.224964014720172e-05, + "logps/rejected": -2.647648811340332, + "loss": 0.5416, + "nll_loss": 0.13539725542068481, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.224963832821231e-06, + "rewards/margins": 0.2647556662559509, + "rewards/rejected": -0.2647649049758911, + "step": 13312 + }, + { + "epoch": 9.206777316735822, + "grad_norm": 2.8959801197052, + "learning_rate": 4.406792684800984e-06, + "log_odds_chosen": 10.231405258178711, + "log_odds_ratio": -0.00010332741658203304, + "logits/chosen": -0.26172447204589844, + "logits/rejected": -0.3718671202659607, + "logps/chosen": -0.00044131246977485716, + "logps/rejected": -1.9781062602996826, + "loss": 0.3332, + "nll_loss": 0.08329068124294281, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.413124770508148e-05, + "rewards/margins": 0.1977664977312088, + "rewards/rejected": -0.19781062006950378, + "step": 13313 + }, + { + "epoch": 9.207468879668049, + "grad_norm": 4.0241217613220215, + "learning_rate": 4.402950668510835e-06, + "log_odds_chosen": 11.831079483032227, + "log_odds_ratio": -3.080906390096061e-05, + "logits/chosen": 0.3046078085899353, + "logits/rejected": 0.1909351944923401, + "logps/chosen": -0.00017704170022625476, + "logps/rejected": -3.0421605110168457, + "loss": 0.5471, + "nll_loss": 0.13676731288433075, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7704169295029715e-05, + "rewards/margins": 0.30419835448265076, + "rewards/rejected": -0.30421608686447144, + "step": 13314 + }, + { + "epoch": 9.208160442600276, + "grad_norm": 3.4288268089294434, + "learning_rate": 4.3991086522206855e-06, + "log_odds_chosen": 11.901636123657227, + "log_odds_ratio": -3.4411190426908433e-05, + "logits/chosen": -0.17353087663650513, + "logits/rejected": -0.27299991250038147, + "logps/chosen": -0.00021824287250638008, + "logps/rejected": -2.9237382411956787, + "loss": 0.4169, + "nll_loss": 0.10421036183834076, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.182428761443589e-05, + "rewards/margins": 0.29235199093818665, + "rewards/rejected": -0.2923738360404968, + "step": 13315 + }, + { + "epoch": 9.208852005532503, + "grad_norm": 4.281277656555176, + "learning_rate": 4.395266635930536e-06, + "log_odds_chosen": 10.813922882080078, + "log_odds_ratio": -0.00017469703743699938, + "logits/chosen": -0.1653359830379486, + "logits/rejected": -0.20505306124687195, + "logps/chosen": -0.0006260251393541694, + "logps/rejected": -2.2798168659210205, + "loss": 0.3885, + "nll_loss": 0.09710270166397095, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.260250665945932e-05, + "rewards/margins": 0.2279190868139267, + "rewards/rejected": -0.22798168659210205, + "step": 13316 + }, + { + "epoch": 9.20954356846473, + "grad_norm": 4.29088020324707, + "learning_rate": 4.391424619640387e-06, + "log_odds_chosen": 12.491415977478027, + "log_odds_ratio": -6.058309281797847e-06, + "logits/chosen": 0.06783643364906311, + "logits/rejected": 0.06438513845205307, + "logps/chosen": -0.00016462888743262738, + "logps/rejected": -3.220424175262451, + "loss": 0.4483, + "nll_loss": 0.11207491159439087, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.646288910706062e-05, + "rewards/margins": 0.32202598452568054, + "rewards/rejected": -0.32204243540763855, + "step": 13317 + }, + { + "epoch": 9.210235131396956, + "grad_norm": 2.623915433883667, + "learning_rate": 4.387582603350239e-06, + "log_odds_chosen": 10.996440887451172, + "log_odds_ratio": -3.61188140232116e-05, + "logits/chosen": -0.515557050704956, + "logits/rejected": -0.6479478478431702, + "logps/chosen": -0.00018121147877536714, + "logps/rejected": -2.1568994522094727, + "loss": 0.2525, + "nll_loss": 0.06312118470668793, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.812115078791976e-05, + "rewards/margins": 0.2156718373298645, + "rewards/rejected": -0.21568995714187622, + "step": 13318 + }, + { + "epoch": 9.210926694329183, + "grad_norm": 4.4565510749816895, + "learning_rate": 4.383740587060089e-06, + "log_odds_chosen": 11.984901428222656, + "log_odds_ratio": -1.0817630936799105e-05, + "logits/chosen": -0.5716065168380737, + "logits/rejected": -0.678408682346344, + "logps/chosen": -0.00011433548934292048, + "logps/rejected": -2.6132736206054688, + "loss": 0.443, + "nll_loss": 0.11074677109718323, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1433548934292048e-05, + "rewards/margins": 0.2613159418106079, + "rewards/rejected": -0.2613273859024048, + "step": 13319 + }, + { + "epoch": 9.21161825726141, + "grad_norm": 2.0655133724212646, + "learning_rate": 4.37989857076994e-06, + "log_odds_chosen": 10.456897735595703, + "log_odds_ratio": -6.552055856445804e-05, + "logits/chosen": -0.38059210777282715, + "logits/rejected": -0.4759252667427063, + "logps/chosen": -0.00036717430339194834, + "logps/rejected": -2.2479348182678223, + "loss": 0.1991, + "nll_loss": 0.049757011234760284, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.671742888400331e-05, + "rewards/margins": 0.22475677728652954, + "rewards/rejected": -0.22479349374771118, + "step": 13320 + }, + { + "epoch": 9.212309820193637, + "grad_norm": 4.373488903045654, + "learning_rate": 4.376056554479791e-06, + "log_odds_chosen": 10.9266357421875, + "log_odds_ratio": -4.441215787664987e-05, + "logits/chosen": 0.042728446424007416, + "logits/rejected": -0.03302290290594101, + "logps/chosen": -0.00030370999593287706, + "logps/rejected": -1.8644747734069824, + "loss": 0.436, + "nll_loss": 0.10898858308792114, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0371000320883468e-05, + "rewards/margins": 0.1864171326160431, + "rewards/rejected": -0.18644748628139496, + "step": 13321 + }, + { + "epoch": 9.213001383125864, + "grad_norm": 2.570322036743164, + "learning_rate": 4.3722145381896425e-06, + "log_odds_chosen": 10.678181648254395, + "log_odds_ratio": -3.701847163029015e-05, + "logits/chosen": -0.30406653881073, + "logits/rejected": -0.30958351492881775, + "logps/chosen": -0.00010654401557985693, + "logps/rejected": -1.6126346588134766, + "loss": 0.2565, + "nll_loss": 0.06412629783153534, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0654401194187813e-05, + "rewards/margins": 0.16125282645225525, + "rewards/rejected": -0.16126346588134766, + "step": 13322 + }, + { + "epoch": 9.21369294605809, + "grad_norm": 3.3682193756103516, + "learning_rate": 4.368372521899493e-06, + "log_odds_chosen": 11.65042495727539, + "log_odds_ratio": -7.514456228818744e-05, + "logits/chosen": -0.3465287983417511, + "logits/rejected": -0.3993972837924957, + "logps/chosen": -0.00011759632616303861, + "logps/rejected": -2.6790201663970947, + "loss": 0.3229, + "nll_loss": 0.08072992414236069, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.17596318887081e-05, + "rewards/margins": 0.26789024472236633, + "rewards/rejected": -0.2679020166397095, + "step": 13323 + }, + { + "epoch": 9.214384508990317, + "grad_norm": 4.059265613555908, + "learning_rate": 4.364530505609344e-06, + "log_odds_chosen": 10.750175476074219, + "log_odds_ratio": -0.0003338758833706379, + "logits/chosen": -0.21408270299434662, + "logits/rejected": -0.2968137860298157, + "logps/chosen": -0.0005572262452915311, + "logps/rejected": -2.1982357501983643, + "loss": 0.5274, + "nll_loss": 0.1318206787109375, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.57226303499192e-05, + "rewards/margins": 0.21976785361766815, + "rewards/rejected": -0.21982358396053314, + "step": 13324 + }, + { + "epoch": 9.215076071922544, + "grad_norm": 3.622119665145874, + "learning_rate": 4.3606884893191956e-06, + "log_odds_chosen": 10.936710357666016, + "log_odds_ratio": -3.496746649034321e-05, + "logits/chosen": -0.4875425100326538, + "logits/rejected": -0.5060606598854065, + "logps/chosen": -0.00024507613852620125, + "logps/rejected": -2.3551130294799805, + "loss": 0.3314, + "nll_loss": 0.08283500373363495, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4507613488822244e-05, + "rewards/margins": 0.23548682034015656, + "rewards/rejected": -0.23551130294799805, + "step": 13325 + }, + { + "epoch": 9.215767634854771, + "grad_norm": 4.0741472244262695, + "learning_rate": 4.3568464730290455e-06, + "log_odds_chosen": 10.13569450378418, + "log_odds_ratio": -0.00016341662558261305, + "logits/chosen": -0.26546046137809753, + "logits/rejected": -0.41923052072525024, + "logps/chosen": -0.0007290366338565946, + "logps/rejected": -1.5351698398590088, + "loss": 0.4524, + "nll_loss": 0.11307627707719803, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.290366193046793e-05, + "rewards/margins": 0.1534440815448761, + "rewards/rejected": -0.1535169780254364, + "step": 13326 + }, + { + "epoch": 9.216459197786998, + "grad_norm": 4.307971477508545, + "learning_rate": 4.353004456738897e-06, + "log_odds_chosen": 10.221899032592773, + "log_odds_ratio": -0.00010868780373129994, + "logits/chosen": -0.07241721451282501, + "logits/rejected": -0.1888885498046875, + "logps/chosen": -0.000540388748049736, + "logps/rejected": -2.0440444946289062, + "loss": 0.456, + "nll_loss": 0.11398278921842575, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.403887917054817e-05, + "rewards/margins": 0.20435041189193726, + "rewards/rejected": -0.20440445840358734, + "step": 13327 + }, + { + "epoch": 9.217150760719225, + "grad_norm": 2.491574287414551, + "learning_rate": 4.349162440448748e-06, + "log_odds_chosen": 10.440887451171875, + "log_odds_ratio": -9.795461664907634e-05, + "logits/chosen": -0.1491602510213852, + "logits/rejected": -0.3474409282207489, + "logps/chosen": -0.00017148329061456025, + "logps/rejected": -1.7223557233810425, + "loss": 0.3691, + "nll_loss": 0.09227250516414642, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7148329789051786e-05, + "rewards/margins": 0.1722184121608734, + "rewards/rejected": -0.17223556339740753, + "step": 13328 + }, + { + "epoch": 9.217842323651452, + "grad_norm": 3.437854528427124, + "learning_rate": 4.3453204241585986e-06, + "log_odds_chosen": 11.172818183898926, + "log_odds_ratio": -2.2042973796487786e-05, + "logits/chosen": 0.32086265087127686, + "logits/rejected": 0.015460759401321411, + "logps/chosen": -0.0001118913060054183, + "logps/rejected": -2.108009099960327, + "loss": 0.398, + "nll_loss": 0.09949032962322235, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.118913041864289e-05, + "rewards/margins": 0.2107897251844406, + "rewards/rejected": -0.2108009159564972, + "step": 13329 + }, + { + "epoch": 9.218533886583678, + "grad_norm": 5.149376392364502, + "learning_rate": 4.341478407868449e-06, + "log_odds_chosen": 11.058161735534668, + "log_odds_ratio": -0.00018130963144358248, + "logits/chosen": -0.3922494053840637, + "logits/rejected": -0.32720354199409485, + "logps/chosen": -0.0002020241809077561, + "logps/rejected": -2.359410285949707, + "loss": 0.5601, + "nll_loss": 0.14000999927520752, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0202420273562893e-05, + "rewards/margins": 0.23592083156108856, + "rewards/rejected": -0.2359410524368286, + "step": 13330 + }, + { + "epoch": 9.219225449515905, + "grad_norm": 4.437367916107178, + "learning_rate": 4.337636391578301e-06, + "log_odds_chosen": 10.904949188232422, + "log_odds_ratio": -0.0001882653741631657, + "logits/chosen": -0.10703521966934204, + "logits/rejected": -0.24360337853431702, + "logps/chosen": -0.00041447189869359136, + "logps/rejected": -2.1529417037963867, + "loss": 0.6033, + "nll_loss": 0.15079988539218903, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.14471905969549e-05, + "rewards/margins": 0.21525274217128754, + "rewards/rejected": -0.21529419720172882, + "step": 13331 + }, + { + "epoch": 9.219917012448132, + "grad_norm": 2.943000316619873, + "learning_rate": 4.333794375288152e-06, + "log_odds_chosen": 10.67567253112793, + "log_odds_ratio": -4.084103420609608e-05, + "logits/chosen": 0.1298995167016983, + "logits/rejected": 0.08547534048557281, + "logps/chosen": -0.00013424924691207707, + "logps/rejected": -1.811115026473999, + "loss": 0.2824, + "nll_loss": 0.07060249149799347, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3424925782601349e-05, + "rewards/margins": 0.1810980886220932, + "rewards/rejected": -0.18111151456832886, + "step": 13332 + }, + { + "epoch": 9.220608575380359, + "grad_norm": 3.3386802673339844, + "learning_rate": 4.329952358998002e-06, + "log_odds_chosen": 10.83498477935791, + "log_odds_ratio": -8.374982280656695e-05, + "logits/chosen": -0.31788182258605957, + "logits/rejected": -0.46187710762023926, + "logps/chosen": -0.00046914478298276663, + "logps/rejected": -2.571054458618164, + "loss": 0.3678, + "nll_loss": 0.09194758534431458, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6914479753468186e-05, + "rewards/margins": 0.2570585608482361, + "rewards/rejected": -0.2571054697036743, + "step": 13333 + }, + { + "epoch": 9.221300138312586, + "grad_norm": 2.7725768089294434, + "learning_rate": 4.326110342707853e-06, + "log_odds_chosen": 10.606285095214844, + "log_odds_ratio": -0.00010153828770853579, + "logits/chosen": -0.6269538402557373, + "logits/rejected": -0.6746143698692322, + "logps/chosen": -0.0003140957560390234, + "logps/rejected": -2.2495834827423096, + "loss": 0.3671, + "nll_loss": 0.09176291525363922, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1409577786689624e-05, + "rewards/margins": 0.22492694854736328, + "rewards/rejected": -0.22495834529399872, + "step": 13334 + }, + { + "epoch": 9.221991701244812, + "grad_norm": 3.7023515701293945, + "learning_rate": 4.322268326417704e-06, + "log_odds_chosen": 10.880495071411133, + "log_odds_ratio": -2.8478403692133725e-05, + "logits/chosen": -0.34293243288993835, + "logits/rejected": -0.37764379382133484, + "logps/chosen": -0.0004085856198798865, + "logps/rejected": -2.6674582958221436, + "loss": 0.3928, + "nll_loss": 0.09819929301738739, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.085856198798865e-05, + "rewards/margins": 0.2667049765586853, + "rewards/rejected": -0.26674583554267883, + "step": 13335 + }, + { + "epoch": 9.22268326417704, + "grad_norm": 2.603764772415161, + "learning_rate": 4.3184263101275555e-06, + "log_odds_chosen": 11.418258666992188, + "log_odds_ratio": -1.953284845512826e-05, + "logits/chosen": -0.6696931719779968, + "logits/rejected": -0.7942060232162476, + "logps/chosen": -5.880265234736726e-05, + "logps/rejected": -1.7821133136749268, + "loss": 0.2921, + "nll_loss": 0.07302499562501907, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.880265234736726e-06, + "rewards/margins": 0.1782054454088211, + "rewards/rejected": -0.17821133136749268, + "step": 13336 + }, + { + "epoch": 9.223374827109266, + "grad_norm": 2.756286859512329, + "learning_rate": 4.314584293837405e-06, + "log_odds_chosen": 12.973590850830078, + "log_odds_ratio": -6.149369710328756e-06, + "logits/chosen": -0.1303907334804535, + "logits/rejected": -0.09264279156923294, + "logps/chosen": -7.317406561924145e-05, + "logps/rejected": -3.3638851642608643, + "loss": 0.2846, + "nll_loss": 0.07113946974277496, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.317406925722025e-06, + "rewards/margins": 0.3363812267780304, + "rewards/rejected": -0.3363885283470154, + "step": 13337 + }, + { + "epoch": 9.224066390041493, + "grad_norm": 5.033344268798828, + "learning_rate": 4.310742277547257e-06, + "log_odds_chosen": 10.399462699890137, + "log_odds_ratio": -9.517098806099966e-05, + "logits/chosen": -0.491542786359787, + "logits/rejected": -0.5521076917648315, + "logps/chosen": -0.00017324337386526167, + "logps/rejected": -1.5467714071273804, + "loss": 0.2613, + "nll_loss": 0.06531417369842529, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7324337022728287e-05, + "rewards/margins": 0.15465980768203735, + "rewards/rejected": -0.1546771377325058, + "step": 13338 + }, + { + "epoch": 9.22475795297372, + "grad_norm": 3.2574455738067627, + "learning_rate": 4.306900261257108e-06, + "log_odds_chosen": 11.016514778137207, + "log_odds_ratio": -3.3485335734440014e-05, + "logits/chosen": -0.7905449271202087, + "logits/rejected": -0.7238627076148987, + "logps/chosen": -0.00017339608166366816, + "logps/rejected": -2.120157480239868, + "loss": 0.2839, + "nll_loss": 0.07096044719219208, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7339609257760458e-05, + "rewards/margins": 0.21199840307235718, + "rewards/rejected": -0.21201574802398682, + "step": 13339 + }, + { + "epoch": 9.225449515905947, + "grad_norm": 3.7507407665252686, + "learning_rate": 4.3030582449669585e-06, + "log_odds_chosen": 11.375577926635742, + "log_odds_ratio": -3.370269769220613e-05, + "logits/chosen": -0.35966721177101135, + "logits/rejected": -0.32348862290382385, + "logps/chosen": -0.00022426230134442449, + "logps/rejected": -2.071716785430908, + "loss": 0.3721, + "nll_loss": 0.09301963448524475, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2426227587857284e-05, + "rewards/margins": 0.20714925229549408, + "rewards/rejected": -0.20717167854309082, + "step": 13340 + }, + { + "epoch": 9.226141078838173, + "grad_norm": 3.5205812454223633, + "learning_rate": 4.29921622867681e-06, + "log_odds_chosen": 12.366856575012207, + "log_odds_ratio": -7.017895768512972e-06, + "logits/chosen": -0.059585437178611755, + "logits/rejected": 0.06428371369838715, + "logps/chosen": -0.00019455334404483438, + "logps/rejected": -3.157578945159912, + "loss": 0.3864, + "nll_loss": 0.09659731388092041, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9455334040685557e-05, + "rewards/margins": 0.3157384395599365, + "rewards/rejected": -0.3157579302787781, + "step": 13341 + }, + { + "epoch": 9.2268326417704, + "grad_norm": 3.987541913986206, + "learning_rate": 4.295374212386661e-06, + "log_odds_chosen": 10.470842361450195, + "log_odds_ratio": -0.00010064824164146557, + "logits/chosen": -0.3783642053604126, + "logits/rejected": -0.4988144636154175, + "logps/chosen": -0.0002472895721439272, + "logps/rejected": -1.7773921489715576, + "loss": 0.2752, + "nll_loss": 0.06877979636192322, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.472895721439272e-05, + "rewards/margins": 0.17771446704864502, + "rewards/rejected": -0.1777392029762268, + "step": 13342 + }, + { + "epoch": 9.227524204702627, + "grad_norm": 6.074353218078613, + "learning_rate": 4.291532196096512e-06, + "log_odds_chosen": 11.384307861328125, + "log_odds_ratio": -0.0003111205587629229, + "logits/chosen": -0.09209015965461731, + "logits/rejected": -0.08078590035438538, + "logps/chosen": -0.0010177076328545809, + "logps/rejected": -2.2785487174987793, + "loss": 0.383, + "nll_loss": 0.09571006894111633, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010177076183026657, + "rewards/margins": 0.22775310277938843, + "rewards/rejected": -0.2278548777103424, + "step": 13343 + }, + { + "epoch": 9.228215767634854, + "grad_norm": 3.827584743499756, + "learning_rate": 4.287690179806362e-06, + "log_odds_chosen": 10.490351676940918, + "log_odds_ratio": -4.755572081194259e-05, + "logits/chosen": -0.32035011053085327, + "logits/rejected": -0.34849417209625244, + "logps/chosen": -0.00032143102725967765, + "logps/rejected": -2.4039080142974854, + "loss": 0.4004, + "nll_loss": 0.10010391473770142, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.214309981558472e-05, + "rewards/margins": 0.24035868048667908, + "rewards/rejected": -0.240390807390213, + "step": 13344 + }, + { + "epoch": 9.22890733056708, + "grad_norm": 98.08912658691406, + "learning_rate": 4.283848163516214e-06, + "log_odds_chosen": 8.916812896728516, + "log_odds_ratio": -0.06944891810417175, + "logits/chosen": -0.22531327605247498, + "logits/rejected": -0.18448351323604584, + "logps/chosen": -0.0012186645762994885, + "logps/rejected": -1.728334665298462, + "loss": 0.6155, + "nll_loss": 0.14693495631217957, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012186646927148104, + "rewards/margins": 0.17271161079406738, + "rewards/rejected": -0.17283347249031067, + "step": 13345 + }, + { + "epoch": 9.229598893499308, + "grad_norm": 3.4264307022094727, + "learning_rate": 4.280006147226065e-06, + "log_odds_chosen": 11.563746452331543, + "log_odds_ratio": -0.00013787433272227645, + "logits/chosen": -0.7487600445747375, + "logits/rejected": -0.8325913548469543, + "logps/chosen": -0.000709613086655736, + "logps/rejected": -3.6102347373962402, + "loss": 0.3832, + "nll_loss": 0.09579276293516159, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.096131594153121e-05, + "rewards/margins": 0.3609524965286255, + "rewards/rejected": -0.361023485660553, + "step": 13346 + }, + { + "epoch": 9.230290456431534, + "grad_norm": 2.988722801208496, + "learning_rate": 4.2761641309359154e-06, + "log_odds_chosen": 11.471531867980957, + "log_odds_ratio": -1.3612433576781768e-05, + "logits/chosen": -0.3343314528465271, + "logits/rejected": -0.35128065943717957, + "logps/chosen": -0.00019705847080331296, + "logps/rejected": -2.150318145751953, + "loss": 0.4472, + "nll_loss": 0.11179892718791962, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9705847080331296e-05, + "rewards/margins": 0.21501211822032928, + "rewards/rejected": -0.21503181755542755, + "step": 13347 + }, + { + "epoch": 9.230982019363761, + "grad_norm": 3.773630142211914, + "learning_rate": 4.272322114645767e-06, + "log_odds_chosen": 11.12164306640625, + "log_odds_ratio": -9.118324669543654e-05, + "logits/chosen": -0.21132421493530273, + "logits/rejected": -0.22750277817249298, + "logps/chosen": -0.0004267761541996151, + "logps/rejected": -3.0749218463897705, + "loss": 0.3305, + "nll_loss": 0.08262672275304794, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.267761323717423e-05, + "rewards/margins": 0.3074495196342468, + "rewards/rejected": -0.307492196559906, + "step": 13348 + }, + { + "epoch": 9.231673582295988, + "grad_norm": 2.9947738647460938, + "learning_rate": 4.268480098355617e-06, + "log_odds_chosen": 11.24973201751709, + "log_odds_ratio": -9.979541937354952e-05, + "logits/chosen": -0.3087311387062073, + "logits/rejected": -0.367544561624527, + "logps/chosen": -0.00039041758282110095, + "logps/rejected": -2.6293392181396484, + "loss": 0.3028, + "nll_loss": 0.07570140063762665, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9041759009705856e-05, + "rewards/margins": 0.2628948986530304, + "rewards/rejected": -0.2629339396953583, + "step": 13349 + }, + { + "epoch": 9.232365145228215, + "grad_norm": 2.789212465286255, + "learning_rate": 4.2646380820654685e-06, + "log_odds_chosen": 10.9905424118042, + "log_odds_ratio": -4.088755667908117e-05, + "logits/chosen": -0.40984347462654114, + "logits/rejected": -0.5168039798736572, + "logps/chosen": -0.00014427973655983806, + "logps/rejected": -1.7862883806228638, + "loss": 0.3145, + "nll_loss": 0.07861229032278061, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4427974747377448e-05, + "rewards/margins": 0.17861440777778625, + "rewards/rejected": -0.1786288321018219, + "step": 13350 + }, + { + "epoch": 9.233056708160442, + "grad_norm": 4.028530597686768, + "learning_rate": 4.2607960657753184e-06, + "log_odds_chosen": 11.188385963439941, + "log_odds_ratio": -3.841559373540804e-05, + "logits/chosen": 0.10398241132497787, + "logits/rejected": 0.01779722422361374, + "logps/chosen": -0.00019596872152760625, + "logps/rejected": -2.461146116256714, + "loss": 0.4678, + "nll_loss": 0.11693590134382248, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9596871425164863e-05, + "rewards/margins": 0.24609503149986267, + "rewards/rejected": -0.2461146116256714, + "step": 13351 + }, + { + "epoch": 9.233748271092669, + "grad_norm": 2.9448742866516113, + "learning_rate": 4.25695404948517e-06, + "log_odds_chosen": 10.838666915893555, + "log_odds_ratio": -0.00013965301332063973, + "logits/chosen": 0.1105722114443779, + "logits/rejected": 0.08422739803791046, + "logps/chosen": -0.000485410651890561, + "logps/rejected": -2.761599063873291, + "loss": 0.3813, + "nll_loss": 0.09530912339687347, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.854106737184338e-05, + "rewards/margins": 0.276111364364624, + "rewards/rejected": -0.2761599123477936, + "step": 13352 + }, + { + "epoch": 9.234439834024895, + "grad_norm": 3.085446357727051, + "learning_rate": 4.253112033195021e-06, + "log_odds_chosen": 11.253827095031738, + "log_odds_ratio": -5.087409226689488e-05, + "logits/chosen": -0.0010985136032104492, + "logits/rejected": -0.08064904808998108, + "logps/chosen": -0.00032150166225619614, + "logps/rejected": -2.4000725746154785, + "loss": 0.3155, + "nll_loss": 0.07887643575668335, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.215016477042809e-05, + "rewards/margins": 0.23997509479522705, + "rewards/rejected": -0.24000725150108337, + "step": 13353 + }, + { + "epoch": 9.235131396957122, + "grad_norm": 3.1542537212371826, + "learning_rate": 4.2492700169048715e-06, + "log_odds_chosen": 10.027928352355957, + "log_odds_ratio": -7.808022201061249e-05, + "logits/chosen": -0.3447612524032593, + "logits/rejected": -0.37404900789260864, + "logps/chosen": -0.0002975011302623898, + "logps/rejected": -1.6374207735061646, + "loss": 0.2792, + "nll_loss": 0.06978316605091095, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.975011375383474e-05, + "rewards/margins": 0.16371232271194458, + "rewards/rejected": -0.1637420654296875, + "step": 13354 + }, + { + "epoch": 9.235822959889349, + "grad_norm": 3.949903726577759, + "learning_rate": 4.245428000614723e-06, + "log_odds_chosen": 12.837517738342285, + "log_odds_ratio": -7.425682724715443e-06, + "logits/chosen": -0.4023365378379822, + "logits/rejected": -0.4887845516204834, + "logps/chosen": -6.580314948223531e-05, + "logps/rejected": -3.1313557624816895, + "loss": 0.3661, + "nll_loss": 0.09151305258274078, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.580315130122472e-06, + "rewards/margins": 0.3131290078163147, + "rewards/rejected": -0.31313556432724, + "step": 13355 + }, + { + "epoch": 9.236514522821576, + "grad_norm": 2.7862908840179443, + "learning_rate": 4.241585984324574e-06, + "log_odds_chosen": 11.264383316040039, + "log_odds_ratio": -1.9450360923656262e-05, + "logits/chosen": -0.47059521079063416, + "logits/rejected": -0.518010139465332, + "logps/chosen": -0.00010168216249439865, + "logps/rejected": -1.9483394622802734, + "loss": 0.2726, + "nll_loss": 0.0681493729352951, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0168217158934567e-05, + "rewards/margins": 0.19482378661632538, + "rewards/rejected": -0.19483394920825958, + "step": 13356 + }, + { + "epoch": 9.237206085753803, + "grad_norm": 3.5049498081207275, + "learning_rate": 4.237743968034425e-06, + "log_odds_chosen": 10.488624572753906, + "log_odds_ratio": -0.00019752327352762222, + "logits/chosen": -0.39020171761512756, + "logits/rejected": -0.4895351529121399, + "logps/chosen": -0.000247740390477702, + "logps/rejected": -1.9443116188049316, + "loss": 0.3809, + "nll_loss": 0.09521554410457611, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4774039047770202e-05, + "rewards/margins": 0.1944064050912857, + "rewards/rejected": -0.19443117082118988, + "step": 13357 + }, + { + "epoch": 9.23789764868603, + "grad_norm": 4.490814208984375, + "learning_rate": 4.233901951744275e-06, + "log_odds_chosen": 11.172880172729492, + "log_odds_ratio": -8.803656965028495e-05, + "logits/chosen": -0.5653146505355835, + "logits/rejected": -0.534360408782959, + "logps/chosen": -0.0006889343494549394, + "logps/rejected": -2.551142692565918, + "loss": 0.4827, + "nll_loss": 0.1206570714712143, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.889343057991937e-05, + "rewards/margins": 0.2550453543663025, + "rewards/rejected": -0.25511425733566284, + "step": 13358 + }, + { + "epoch": 9.238589211618256, + "grad_norm": 2.8272547721862793, + "learning_rate": 4.230059935454127e-06, + "log_odds_chosen": 11.310001373291016, + "log_odds_ratio": -6.405496969819069e-05, + "logits/chosen": -0.3014225363731384, + "logits/rejected": -0.46181774139404297, + "logps/chosen": -0.00023675702686887234, + "logps/rejected": -2.3247127532958984, + "loss": 0.3121, + "nll_loss": 0.07801727950572968, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3675704142078757e-05, + "rewards/margins": 0.23244759440422058, + "rewards/rejected": -0.2324712574481964, + "step": 13359 + }, + { + "epoch": 9.239280774550483, + "grad_norm": 3.4311835765838623, + "learning_rate": 4.226217919163977e-06, + "log_odds_chosen": 11.17218017578125, + "log_odds_ratio": -0.00012092379620298743, + "logits/chosen": -0.7945163249969482, + "logits/rejected": -0.7751541137695312, + "logps/chosen": -0.00014527878374792635, + "logps/rejected": -1.9728879928588867, + "loss": 0.2791, + "nll_loss": 0.06976176053285599, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4527876373904292e-05, + "rewards/margins": 0.19727426767349243, + "rewards/rejected": -0.19728878140449524, + "step": 13360 + }, + { + "epoch": 9.23997233748271, + "grad_norm": 3.174574375152588, + "learning_rate": 4.2223759028738285e-06, + "log_odds_chosen": 11.811761856079102, + "log_odds_ratio": -1.3907579159422312e-05, + "logits/chosen": -0.6732967495918274, + "logits/rejected": -0.7310502529144287, + "logps/chosen": -0.00011698234447976574, + "logps/rejected": -2.339672327041626, + "loss": 0.3738, + "nll_loss": 0.09343670308589935, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1698235539370216e-05, + "rewards/margins": 0.23395554721355438, + "rewards/rejected": -0.23396724462509155, + "step": 13361 + }, + { + "epoch": 9.240663900414937, + "grad_norm": 7.092174053192139, + "learning_rate": 4.218533886583679e-06, + "log_odds_chosen": 10.788871765136719, + "log_odds_ratio": -5.844702900503762e-05, + "logits/chosen": -0.4904933273792267, + "logits/rejected": -0.5811038017272949, + "logps/chosen": -0.0001661910500843078, + "logps/rejected": -2.029539108276367, + "loss": 0.7281, + "nll_loss": 0.1820286214351654, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.661910573602654e-05, + "rewards/margins": 0.2029373049736023, + "rewards/rejected": -0.20295390486717224, + "step": 13362 + }, + { + "epoch": 9.241355463347164, + "grad_norm": 4.031524658203125, + "learning_rate": 4.21469187029353e-06, + "log_odds_chosen": 11.320475578308105, + "log_odds_ratio": -0.00017843538080342114, + "logits/chosen": -0.1563161313533783, + "logits/rejected": -0.33736416697502136, + "logps/chosen": -0.00020673539256677032, + "logps/rejected": -2.886279582977295, + "loss": 0.4222, + "nll_loss": 0.10553856194019318, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0673540348070674e-05, + "rewards/margins": 0.28860729932785034, + "rewards/rejected": -0.2886279821395874, + "step": 13363 + }, + { + "epoch": 9.24204702627939, + "grad_norm": 3.389230489730835, + "learning_rate": 4.2108498540033816e-06, + "log_odds_chosen": 10.792014122009277, + "log_odds_ratio": -0.0001391873083775863, + "logits/chosen": -0.6205494999885559, + "logits/rejected": -0.654026210308075, + "logps/chosen": -0.00013583633699454367, + "logps/rejected": -1.9432293176651, + "loss": 0.3645, + "nll_loss": 0.09111414849758148, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.358363533654483e-05, + "rewards/margins": 0.19430936872959137, + "rewards/rejected": -0.19432294368743896, + "step": 13364 + }, + { + "epoch": 9.242738589211617, + "grad_norm": 2.394958257675171, + "learning_rate": 4.207007837713232e-06, + "log_odds_chosen": 10.103708267211914, + "log_odds_ratio": -0.00018401000124868006, + "logits/chosen": -0.25698402523994446, + "logits/rejected": -0.341422975063324, + "logps/chosen": -0.0002892519405577332, + "logps/rejected": -1.5689125061035156, + "loss": 0.2296, + "nll_loss": 0.05737714469432831, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.892519478336908e-05, + "rewards/margins": 0.15686233341693878, + "rewards/rejected": -0.15689125657081604, + "step": 13365 + }, + { + "epoch": 9.243430152143844, + "grad_norm": 3.7048909664154053, + "learning_rate": 4.203165821423083e-06, + "log_odds_chosen": 10.272027015686035, + "log_odds_ratio": -0.00011471907782834023, + "logits/chosen": -0.7627463936805725, + "logits/rejected": -0.7447300553321838, + "logps/chosen": -0.0012812871718779206, + "logps/rejected": -2.3495349884033203, + "loss": 0.3925, + "nll_loss": 0.09810735285282135, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012812871136702597, + "rewards/margins": 0.23482537269592285, + "rewards/rejected": -0.23495349287986755, + "step": 13366 + }, + { + "epoch": 9.244121715076071, + "grad_norm": 3.1079623699188232, + "learning_rate": 4.199323805132934e-06, + "log_odds_chosen": 11.256549835205078, + "log_odds_ratio": -8.348859410034493e-05, + "logits/chosen": -0.49389970302581787, + "logits/rejected": -0.5464988946914673, + "logps/chosen": -0.0013827980728819966, + "logps/rejected": -2.9009451866149902, + "loss": 0.3342, + "nll_loss": 0.08353433012962341, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001382798218401149, + "rewards/margins": 0.2899562418460846, + "rewards/rejected": -0.2900945246219635, + "step": 13367 + }, + { + "epoch": 9.244813278008298, + "grad_norm": 2.8402726650238037, + "learning_rate": 4.1954817888427846e-06, + "log_odds_chosen": 10.971306800842285, + "log_odds_ratio": -7.381623436231166e-05, + "logits/chosen": 0.45187243819236755, + "logits/rejected": 0.08216936886310577, + "logps/chosen": -0.0003103939234279096, + "logps/rejected": -2.2004811763763428, + "loss": 0.2843, + "nll_loss": 0.071077361702919, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.10393916151952e-05, + "rewards/margins": 0.22001707553863525, + "rewards/rejected": -0.22004812955856323, + "step": 13368 + }, + { + "epoch": 9.245504840940525, + "grad_norm": 3.845113515853882, + "learning_rate": 4.191639772552636e-06, + "log_odds_chosen": 12.36893081665039, + "log_odds_ratio": -8.689417882123962e-06, + "logits/chosen": -0.8230301141738892, + "logits/rejected": -0.818099856376648, + "logps/chosen": -0.00011422030365793034, + "logps/rejected": -2.9345250129699707, + "loss": 0.4345, + "nll_loss": 0.10861419141292572, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1422031093388796e-05, + "rewards/margins": 0.29344111680984497, + "rewards/rejected": -0.29345250129699707, + "step": 13369 + }, + { + "epoch": 9.246196403872752, + "grad_norm": 3.3559730052948, + "learning_rate": 4.187797756262487e-06, + "log_odds_chosen": 11.219395637512207, + "log_odds_ratio": -6.94270056555979e-05, + "logits/chosen": -0.12866298854351044, + "logits/rejected": -0.1472199559211731, + "logps/chosen": -0.00026041181990876794, + "logps/rejected": -2.4518699645996094, + "loss": 0.2824, + "nll_loss": 0.07060521841049194, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6041183446068317e-05, + "rewards/margins": 0.24516098201274872, + "rewards/rejected": -0.24518701434135437, + "step": 13370 + }, + { + "epoch": 9.24688796680498, + "grad_norm": 2.970355749130249, + "learning_rate": 4.183955739972338e-06, + "log_odds_chosen": 11.566314697265625, + "log_odds_ratio": -2.120799763360992e-05, + "logits/chosen": -0.33343368768692017, + "logits/rejected": -0.4095820486545563, + "logps/chosen": -0.0002107325999531895, + "logps/rejected": -2.442662477493286, + "loss": 0.3911, + "nll_loss": 0.0977800190448761, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.107325963152107e-05, + "rewards/margins": 0.24424517154693604, + "rewards/rejected": -0.24426622688770294, + "step": 13371 + }, + { + "epoch": 9.247579529737205, + "grad_norm": 4.310222625732422, + "learning_rate": 4.180113723682188e-06, + "log_odds_chosen": 10.756083488464355, + "log_odds_ratio": -8.993431401904672e-05, + "logits/chosen": -0.3720986247062683, + "logits/rejected": -0.4264030456542969, + "logps/chosen": -0.0008469214662909508, + "logps/rejected": -2.52754807472229, + "loss": 0.4429, + "nll_loss": 0.11071190237998962, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.469213935313746e-05, + "rewards/margins": 0.25267013907432556, + "rewards/rejected": -0.252754807472229, + "step": 13372 + }, + { + "epoch": 9.248271092669434, + "grad_norm": 4.2071428298950195, + "learning_rate": 4.17627170739204e-06, + "log_odds_chosen": 12.32644271850586, + "log_odds_ratio": -9.070672604138963e-06, + "logits/chosen": -0.4278135895729065, + "logits/rejected": -0.4966719150543213, + "logps/chosen": -0.00012434230302460492, + "logps/rejected": -3.097768783569336, + "loss": 0.4411, + "nll_loss": 0.11027605831623077, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2434231393854134e-05, + "rewards/margins": 0.30976441502571106, + "rewards/rejected": -0.3097768723964691, + "step": 13373 + }, + { + "epoch": 9.248962655601659, + "grad_norm": 3.6200366020202637, + "learning_rate": 4.17242969110189e-06, + "log_odds_chosen": 11.759748458862305, + "log_odds_ratio": -1.9820596207864583e-05, + "logits/chosen": -0.36331379413604736, + "logits/rejected": -0.3966585695743561, + "logps/chosen": -0.00013985123950988054, + "logps/rejected": -2.560699462890625, + "loss": 0.4649, + "nll_loss": 0.11621088534593582, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3985122677695472e-05, + "rewards/margins": 0.25605595111846924, + "rewards/rejected": -0.25606992840766907, + "step": 13374 + }, + { + "epoch": 9.249654218533887, + "grad_norm": 3.5906012058258057, + "learning_rate": 4.1685876748117415e-06, + "log_odds_chosen": 10.80752944946289, + "log_odds_ratio": -0.0004959495854564011, + "logits/chosen": -0.7887055277824402, + "logits/rejected": -0.6762387156486511, + "logps/chosen": -0.00022910605184733868, + "logps/rejected": -2.1070122718811035, + "loss": 0.3611, + "nll_loss": 0.09021367132663727, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2910604457138106e-05, + "rewards/margins": 0.2106783092021942, + "rewards/rejected": -0.21070122718811035, + "step": 13375 + }, + { + "epoch": 9.250345781466113, + "grad_norm": 2.9550132751464844, + "learning_rate": 4.164745658521592e-06, + "log_odds_chosen": 11.08054256439209, + "log_odds_ratio": -3.7290137697709724e-05, + "logits/chosen": -0.44283416867256165, + "logits/rejected": -0.4498756229877472, + "logps/chosen": -0.00014053418999537826, + "logps/rejected": -1.827393889427185, + "loss": 0.313, + "nll_loss": 0.07825109362602234, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.405342118232511e-05, + "rewards/margins": 0.18272534012794495, + "rewards/rejected": -0.18273937702178955, + "step": 13376 + }, + { + "epoch": 9.251037344398341, + "grad_norm": 3.1746561527252197, + "learning_rate": 4.160903642231443e-06, + "log_odds_chosen": 11.631880760192871, + "log_odds_ratio": -2.470656909281388e-05, + "logits/chosen": -0.30462899804115295, + "logits/rejected": -0.4730372428894043, + "logps/chosen": -0.0001536312629468739, + "logps/rejected": -2.7632954120635986, + "loss": 0.3668, + "nll_loss": 0.09170671552419662, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.536312629468739e-05, + "rewards/margins": 0.2763141691684723, + "rewards/rejected": -0.27632951736450195, + "step": 13377 + }, + { + "epoch": 9.251728907330566, + "grad_norm": 4.26964807510376, + "learning_rate": 4.157061625941295e-06, + "log_odds_chosen": 10.886106491088867, + "log_odds_ratio": -5.6561413657618687e-05, + "logits/chosen": -0.2692357897758484, + "logits/rejected": -0.25641119480133057, + "logps/chosen": -0.001114910002797842, + "logps/rejected": -2.8041157722473145, + "loss": 0.4504, + "nll_loss": 0.11258277297019958, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011149099736940116, + "rewards/margins": 0.2803000807762146, + "rewards/rejected": -0.28041160106658936, + "step": 13378 + }, + { + "epoch": 9.252420470262795, + "grad_norm": 2.7620761394500732, + "learning_rate": 4.153219609651145e-06, + "log_odds_chosen": 11.351924896240234, + "log_odds_ratio": -8.459114178549498e-05, + "logits/chosen": -0.6529866456985474, + "logits/rejected": -0.5455139875411987, + "logps/chosen": -0.00045952797518111765, + "logps/rejected": -2.823627233505249, + "loss": 0.3881, + "nll_loss": 0.09702349454164505, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.595279824570753e-05, + "rewards/margins": 0.28231680393218994, + "rewards/rejected": -0.2823627293109894, + "step": 13379 + }, + { + "epoch": 9.25311203319502, + "grad_norm": 6.673590183258057, + "learning_rate": 4.149377593360996e-06, + "log_odds_chosen": 11.442913055419922, + "log_odds_ratio": -6.825349555583671e-05, + "logits/chosen": -0.5800237655639648, + "logits/rejected": -0.6468181610107422, + "logps/chosen": -8.98464786587283e-05, + "logps/rejected": -2.273469924926758, + "loss": 0.4706, + "nll_loss": 0.11765521764755249, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.98464804777177e-06, + "rewards/margins": 0.2273380309343338, + "rewards/rejected": -0.2273470014333725, + "step": 13380 + }, + { + "epoch": 9.253803596127248, + "grad_norm": 3.455115556716919, + "learning_rate": 4.145535577070847e-06, + "log_odds_chosen": 11.363089561462402, + "log_odds_ratio": -1.7025658962666057e-05, + "logits/chosen": -0.21496909856796265, + "logits/rejected": -0.279478520154953, + "logps/chosen": -0.00015081878518685699, + "logps/rejected": -2.4104347229003906, + "loss": 0.4615, + "nll_loss": 0.11536456644535065, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.508187961007934e-05, + "rewards/margins": 0.24102838337421417, + "rewards/rejected": -0.24104347825050354, + "step": 13381 + }, + { + "epoch": 9.254495159059474, + "grad_norm": 11.020970344543457, + "learning_rate": 4.1416935607806984e-06, + "log_odds_chosen": 12.45634651184082, + "log_odds_ratio": -2.0810161004192196e-05, + "logits/chosen": -0.5012654662132263, + "logits/rejected": -0.5107147097587585, + "logps/chosen": -0.0001061395087162964, + "logps/rejected": -3.116454601287842, + "loss": 0.326, + "nll_loss": 0.08149020373821259, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0613951417326462e-05, + "rewards/margins": 0.3116348385810852, + "rewards/rejected": -0.3116454482078552, + "step": 13382 + }, + { + "epoch": 9.255186721991702, + "grad_norm": 2.0821995735168457, + "learning_rate": 4.137851544490548e-06, + "log_odds_chosen": 11.173574447631836, + "log_odds_ratio": -0.00016994534234981984, + "logits/chosen": -0.4637315273284912, + "logits/rejected": -0.48671579360961914, + "logps/chosen": -0.0007016340969130397, + "logps/rejected": -2.4009807109832764, + "loss": 0.2577, + "nll_loss": 0.06439636647701263, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.01634053257294e-05, + "rewards/margins": 0.24002791941165924, + "rewards/rejected": -0.24009808897972107, + "step": 13383 + }, + { + "epoch": 9.255878284923927, + "grad_norm": 3.860811948776245, + "learning_rate": 4.1340095282004e-06, + "log_odds_chosen": 11.584202766418457, + "log_odds_ratio": -3.2246229238808155e-05, + "logits/chosen": -0.4585683345794678, + "logits/rejected": -0.49702373147010803, + "logps/chosen": -6.798960384912789e-05, + "logps/rejected": -1.8565504550933838, + "loss": 0.4233, + "nll_loss": 0.10581757873296738, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.798960384912789e-06, + "rewards/margins": 0.18564824759960175, + "rewards/rejected": -0.18565505743026733, + "step": 13384 + }, + { + "epoch": 9.256569847856156, + "grad_norm": 3.1565005779266357, + "learning_rate": 4.130167511910251e-06, + "log_odds_chosen": 10.394317626953125, + "log_odds_ratio": -9.309701272286475e-05, + "logits/chosen": -0.19676190614700317, + "logits/rejected": -0.27044621109962463, + "logps/chosen": -0.0005752279539592564, + "logps/rejected": -2.072195053100586, + "loss": 0.3627, + "nll_loss": 0.09065388888120651, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7522796851117164e-05, + "rewards/margins": 0.20716197788715363, + "rewards/rejected": -0.20721949636936188, + "step": 13385 + }, + { + "epoch": 9.25726141078838, + "grad_norm": 5.39926815032959, + "learning_rate": 4.1263254956201014e-06, + "log_odds_chosen": 10.37912368774414, + "log_odds_ratio": -0.0001625988370506093, + "logits/chosen": -0.24881801009178162, + "logits/rejected": -0.22858017683029175, + "logps/chosen": -0.00021170491527300328, + "logps/rejected": -1.6691802740097046, + "loss": 0.4196, + "nll_loss": 0.10487478971481323, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.117049189109821e-05, + "rewards/margins": 0.16689686477184296, + "rewards/rejected": -0.16691802442073822, + "step": 13386 + }, + { + "epoch": 9.25795297372061, + "grad_norm": 3.983370304107666, + "learning_rate": 4.122483479329953e-06, + "log_odds_chosen": 10.968613624572754, + "log_odds_ratio": -3.0124385375529528e-05, + "logits/chosen": -0.5946142673492432, + "logits/rejected": -0.35130080580711365, + "logps/chosen": -0.00022189400624483824, + "logps/rejected": -2.337252616882324, + "loss": 0.7163, + "nll_loss": 0.179067462682724, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2189400624483824e-05, + "rewards/margins": 0.23370307683944702, + "rewards/rejected": -0.23372526466846466, + "step": 13387 + }, + { + "epoch": 9.258644536652836, + "grad_norm": 3.8471121788024902, + "learning_rate": 4.118641463039803e-06, + "log_odds_chosen": 9.967870712280273, + "log_odds_ratio": -0.00011659861047519371, + "logits/chosen": -0.24749401211738586, + "logits/rejected": -0.2510087788105011, + "logps/chosen": -0.0004633513162843883, + "logps/rejected": -1.5519118309020996, + "loss": 0.2452, + "nll_loss": 0.061281684786081314, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.633513162843883e-05, + "rewards/margins": 0.1551448553800583, + "rewards/rejected": -0.15519118309020996, + "step": 13388 + }, + { + "epoch": 9.259336099585063, + "grad_norm": 3.9011762142181396, + "learning_rate": 4.1147994467496545e-06, + "log_odds_chosen": 10.798189163208008, + "log_odds_ratio": -5.881582910660654e-05, + "logits/chosen": 0.12064599990844727, + "logits/rejected": 0.006398455239832401, + "logps/chosen": -0.0001932132727233693, + "logps/rejected": -1.985304832458496, + "loss": 0.4197, + "nll_loss": 0.10492676496505737, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.932132727233693e-05, + "rewards/margins": 0.19851118326187134, + "rewards/rejected": -0.19853049516677856, + "step": 13389 + }, + { + "epoch": 9.26002766251729, + "grad_norm": 2.5057811737060547, + "learning_rate": 4.110957430459505e-06, + "log_odds_chosen": 10.980243682861328, + "log_odds_ratio": -2.486979792593047e-05, + "logits/chosen": -0.4247814416885376, + "logits/rejected": -0.4757453501224518, + "logps/chosen": -0.00019985140534117818, + "logps/rejected": -2.2650866508483887, + "loss": 0.2637, + "nll_loss": 0.06593403965234756, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9985140170319937e-05, + "rewards/margins": 0.22648866474628448, + "rewards/rejected": -0.22650866210460663, + "step": 13390 + }, + { + "epoch": 9.260719225449517, + "grad_norm": 3.2654213905334473, + "learning_rate": 4.107115414169356e-06, + "log_odds_chosen": 11.39018726348877, + "log_odds_ratio": -7.856798765715212e-05, + "logits/chosen": -0.42784425616264343, + "logits/rejected": -0.4002974033355713, + "logps/chosen": -0.00047765413182787597, + "logps/rejected": -3.1111772060394287, + "loss": 0.3503, + "nll_loss": 0.08757692575454712, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7765417548362166e-05, + "rewards/margins": 0.31106996536254883, + "rewards/rejected": -0.3111177086830139, + "step": 13391 + }, + { + "epoch": 9.261410788381744, + "grad_norm": 4.967043399810791, + "learning_rate": 4.103273397879208e-06, + "log_odds_chosen": 10.794997215270996, + "log_odds_ratio": -3.917513458873145e-05, + "logits/chosen": -0.70682692527771, + "logits/rejected": -0.7203770875930786, + "logps/chosen": -0.00022574425383936614, + "logps/rejected": -2.185096263885498, + "loss": 0.7308, + "nll_loss": 0.18269097805023193, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2574426111532375e-05, + "rewards/margins": 0.21848703920841217, + "rewards/rejected": -0.21850961446762085, + "step": 13392 + }, + { + "epoch": 9.26210235131397, + "grad_norm": 3.9451189041137695, + "learning_rate": 4.099431381589058e-06, + "log_odds_chosen": 10.230953216552734, + "log_odds_ratio": -8.301199704874307e-05, + "logits/chosen": -0.3331199288368225, + "logits/rejected": -0.3650832176208496, + "logps/chosen": -0.0002603030006866902, + "logps/rejected": -1.8632946014404297, + "loss": 0.4404, + "nll_loss": 0.11009424924850464, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6030296794488095e-05, + "rewards/margins": 0.18630343675613403, + "rewards/rejected": -0.18632946908473969, + "step": 13393 + }, + { + "epoch": 9.262793914246197, + "grad_norm": 3.347769260406494, + "learning_rate": 4.095589365298909e-06, + "log_odds_chosen": 10.683270454406738, + "log_odds_ratio": -0.0002262951893499121, + "logits/chosen": -0.3697054386138916, + "logits/rejected": -0.37479496002197266, + "logps/chosen": -0.0003731002798303962, + "logps/rejected": -2.1226487159729004, + "loss": 0.3429, + "nll_loss": 0.08570367842912674, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.73100301658269e-05, + "rewards/margins": 0.21222756803035736, + "rewards/rejected": -0.21226486563682556, + "step": 13394 + }, + { + "epoch": 9.263485477178424, + "grad_norm": 3.057373523712158, + "learning_rate": 4.09174734900876e-06, + "log_odds_chosen": 11.90007495880127, + "log_odds_ratio": -9.3462695076596e-06, + "logits/chosen": -0.3630257248878479, + "logits/rejected": -0.3154976963996887, + "logps/chosen": -0.0001326186174992472, + "logps/rejected": -2.3629908561706543, + "loss": 0.3556, + "nll_loss": 0.08889217674732208, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3261863387015183e-05, + "rewards/margins": 0.23628583550453186, + "rewards/rejected": -0.2362990826368332, + "step": 13395 + }, + { + "epoch": 9.264177040110651, + "grad_norm": 3.1586709022521973, + "learning_rate": 4.0879053327186115e-06, + "log_odds_chosen": 12.43212890625, + "log_odds_ratio": -1.1175688996445388e-05, + "logits/chosen": -0.3281235694885254, + "logits/rejected": -0.36359724402427673, + "logps/chosen": -0.00022547683329321444, + "logps/rejected": -3.7944984436035156, + "loss": 0.4362, + "nll_loss": 0.10905685275793076, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2547683329321444e-05, + "rewards/margins": 0.3794272840023041, + "rewards/rejected": -0.37944984436035156, + "step": 13396 + }, + { + "epoch": 9.264868603042878, + "grad_norm": 3.0777478218078613, + "learning_rate": 4.084063316428461e-06, + "log_odds_chosen": 11.970759391784668, + "log_odds_ratio": -1.6800740922917612e-05, + "logits/chosen": -0.4665202796459198, + "logits/rejected": -0.6135318279266357, + "logps/chosen": -0.00027859132387675345, + "logps/rejected": -3.044900894165039, + "loss": 0.2766, + "nll_loss": 0.06914292275905609, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.785913056868594e-05, + "rewards/margins": 0.3044622540473938, + "rewards/rejected": -0.3044900894165039, + "step": 13397 + }, + { + "epoch": 9.265560165975105, + "grad_norm": 3.3075602054595947, + "learning_rate": 4.080221300138313e-06, + "log_odds_chosen": 11.618972778320312, + "log_odds_ratio": -1.8447863112669438e-05, + "logits/chosen": -0.35420989990234375, + "logits/rejected": -0.3924306333065033, + "logps/chosen": -8.088632603175938e-05, + "logps/rejected": -2.1522703170776367, + "loss": 0.3113, + "nll_loss": 0.07781346887350082, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.088632966973819e-06, + "rewards/margins": 0.21521896123886108, + "rewards/rejected": -0.21522706747055054, + "step": 13398 + }, + { + "epoch": 9.266251728907331, + "grad_norm": 3.825265407562256, + "learning_rate": 4.076379283848164e-06, + "log_odds_chosen": 10.867203712463379, + "log_odds_ratio": -0.00012594895088113844, + "logits/chosen": -0.16059978306293488, + "logits/rejected": -0.40944528579711914, + "logps/chosen": -0.0003734501078724861, + "logps/rejected": -2.837820291519165, + "loss": 0.4377, + "nll_loss": 0.10940054059028625, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.734500933205709e-05, + "rewards/margins": 0.2837446928024292, + "rewards/rejected": -0.283782035112381, + "step": 13399 + }, + { + "epoch": 9.266943291839558, + "grad_norm": 4.893101692199707, + "learning_rate": 4.0725372675580145e-06, + "log_odds_chosen": 10.198366165161133, + "log_odds_ratio": -0.0001832096022553742, + "logits/chosen": 0.3699157238006592, + "logits/rejected": 0.3098328113555908, + "logps/chosen": -0.000413937697885558, + "logps/rejected": -1.8508220911026, + "loss": 0.4808, + "nll_loss": 0.12018544971942902, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.139377051615156e-05, + "rewards/margins": 0.18504083156585693, + "rewards/rejected": -0.18508222699165344, + "step": 13400 + }, + { + "epoch": 9.267634854771785, + "grad_norm": 4.754893779754639, + "learning_rate": 4.068695251267866e-06, + "log_odds_chosen": 10.338882446289062, + "log_odds_ratio": -0.00017796538304537535, + "logits/chosen": -0.13030719757080078, + "logits/rejected": -0.13309229910373688, + "logps/chosen": -0.0021453266963362694, + "logps/rejected": -2.7111756801605225, + "loss": 0.5487, + "nll_loss": 0.13715097308158875, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00021453265799209476, + "rewards/margins": 0.2709030508995056, + "rewards/rejected": -0.27111756801605225, + "step": 13401 + }, + { + "epoch": 9.268326417704012, + "grad_norm": 3.9337031841278076, + "learning_rate": 4.064853234977716e-06, + "log_odds_chosen": 10.759675979614258, + "log_odds_ratio": -3.709693919518031e-05, + "logits/chosen": -0.3434372842311859, + "logits/rejected": -0.2868998944759369, + "logps/chosen": -0.0002389464934822172, + "logps/rejected": -2.3696932792663574, + "loss": 0.251, + "nll_loss": 0.06274402141571045, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3894648620625958e-05, + "rewards/margins": 0.23694540560245514, + "rewards/rejected": -0.23696933686733246, + "step": 13402 + }, + { + "epoch": 9.269017980636239, + "grad_norm": 3.7900757789611816, + "learning_rate": 4.0610112186875676e-06, + "log_odds_chosen": 10.866965293884277, + "log_odds_ratio": -2.9216182156233117e-05, + "logits/chosen": -0.07639175653457642, + "logits/rejected": -0.0732036828994751, + "logps/chosen": -0.00033576946589164436, + "logps/rejected": -2.242311477661133, + "loss": 0.3549, + "nll_loss": 0.08871279656887054, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.35769473167602e-05, + "rewards/margins": 0.22419756650924683, + "rewards/rejected": -0.22423113882541656, + "step": 13403 + }, + { + "epoch": 9.269709543568466, + "grad_norm": 3.4664368629455566, + "learning_rate": 4.057169202397418e-06, + "log_odds_chosen": 11.006653785705566, + "log_odds_ratio": -6.016073530190624e-05, + "logits/chosen": -0.18965166807174683, + "logits/rejected": -0.1772252321243286, + "logps/chosen": -9.795861114980653e-05, + "logps/rejected": -1.912332534790039, + "loss": 0.3006, + "nll_loss": 0.07513882219791412, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.795860933081713e-06, + "rewards/margins": 0.19122344255447388, + "rewards/rejected": -0.19123321771621704, + "step": 13404 + }, + { + "epoch": 9.270401106500692, + "grad_norm": 3.510704278945923, + "learning_rate": 4.053327186107269e-06, + "log_odds_chosen": 12.442310333251953, + "log_odds_ratio": -7.347447535721585e-06, + "logits/chosen": -0.5683971643447876, + "logits/rejected": -0.56528240442276, + "logps/chosen": -0.00013179892266634852, + "logps/rejected": -3.196437120437622, + "loss": 0.4168, + "nll_loss": 0.10418683290481567, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3179893358028494e-05, + "rewards/margins": 0.31963056325912476, + "rewards/rejected": -0.3196437358856201, + "step": 13405 + }, + { + "epoch": 9.27109266943292, + "grad_norm": 3.9302003383636475, + "learning_rate": 4.04948516981712e-06, + "log_odds_chosen": 12.506831169128418, + "log_odds_ratio": -2.83784611383453e-05, + "logits/chosen": 0.01308729313313961, + "logits/rejected": -0.02067945897579193, + "logps/chosen": -0.00013831471733283252, + "logps/rejected": -2.956017017364502, + "loss": 0.442, + "nll_loss": 0.11050447821617126, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3831473552272655e-05, + "rewards/margins": 0.2955878973007202, + "rewards/rejected": -0.2956017255783081, + "step": 13406 + }, + { + "epoch": 9.271784232365146, + "grad_norm": 4.038262844085693, + "learning_rate": 4.045643153526971e-06, + "log_odds_chosen": 10.31809139251709, + "log_odds_ratio": -7.249046757351607e-05, + "logits/chosen": -0.2748219966888428, + "logits/rejected": -0.2382151484489441, + "logps/chosen": -0.0004780899325851351, + "logps/rejected": -1.9250624179840088, + "loss": 0.2966, + "nll_loss": 0.07414360344409943, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.780899689649232e-05, + "rewards/margins": 0.19245845079421997, + "rewards/rejected": -0.19250623881816864, + "step": 13407 + }, + { + "epoch": 9.272475795297373, + "grad_norm": 2.4974985122680664, + "learning_rate": 4.041801137236822e-06, + "log_odds_chosen": 10.449653625488281, + "log_odds_ratio": -0.00027347650029696524, + "logits/chosen": -0.08837146311998367, + "logits/rejected": -0.08992377668619156, + "logps/chosen": -0.0009034351096488535, + "logps/rejected": -1.8143136501312256, + "loss": 0.3107, + "nll_loss": 0.07764752954244614, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.03435138752684e-05, + "rewards/margins": 0.1813410371541977, + "rewards/rejected": -0.181431382894516, + "step": 13408 + }, + { + "epoch": 9.2731673582296, + "grad_norm": 2.697535991668701, + "learning_rate": 4.037959120946673e-06, + "log_odds_chosen": 11.835368156433105, + "log_odds_ratio": -2.7249665436102077e-05, + "logits/chosen": -0.5644937753677368, + "logits/rejected": -0.5307819843292236, + "logps/chosen": -0.00026464249822311103, + "logps/rejected": -2.5975027084350586, + "loss": 0.3318, + "nll_loss": 0.08295246958732605, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6464249458513223e-05, + "rewards/margins": 0.25972384214401245, + "rewards/rejected": -0.2597503066062927, + "step": 13409 + }, + { + "epoch": 9.273858921161827, + "grad_norm": 4.584612846374512, + "learning_rate": 4.0341171046565245e-06, + "log_odds_chosen": 11.347679138183594, + "log_odds_ratio": -1.986040297197178e-05, + "logits/chosen": 0.06065264344215393, + "logits/rejected": -0.03498959168791771, + "logps/chosen": -0.00016907777171581984, + "logps/rejected": -2.6138739585876465, + "loss": 0.4471, + "nll_loss": 0.11177139729261398, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6907777535379864e-05, + "rewards/margins": 0.2613704800605774, + "rewards/rejected": -0.2613874077796936, + "step": 13410 + }, + { + "epoch": 9.274550484094053, + "grad_norm": 2.4704439640045166, + "learning_rate": 4.030275088366374e-06, + "log_odds_chosen": 10.868306159973145, + "log_odds_ratio": -6.289570592343807e-05, + "logits/chosen": -0.2185719907283783, + "logits/rejected": -0.2263566255569458, + "logps/chosen": -0.0001450084673706442, + "logps/rejected": -1.8913393020629883, + "loss": 0.2828, + "nll_loss": 0.07068517059087753, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.45008461913676e-05, + "rewards/margins": 0.18911944329738617, + "rewards/rejected": -0.1891339123249054, + "step": 13411 + }, + { + "epoch": 9.27524204702628, + "grad_norm": 2.365954637527466, + "learning_rate": 4.026433072076226e-06, + "log_odds_chosen": 11.505110740661621, + "log_odds_ratio": -3.883875615429133e-05, + "logits/chosen": -0.2714047133922577, + "logits/rejected": -0.2261641025543213, + "logps/chosen": -0.00017098369426093996, + "logps/rejected": -2.8133089542388916, + "loss": 0.2827, + "nll_loss": 0.07066500186920166, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7098369426093996e-05, + "rewards/margins": 0.28131380677223206, + "rewards/rejected": -0.2813309133052826, + "step": 13412 + }, + { + "epoch": 9.275933609958507, + "grad_norm": 3.3742008209228516, + "learning_rate": 4.022591055786077e-06, + "log_odds_chosen": 12.555534362792969, + "log_odds_ratio": -2.3210215658764355e-05, + "logits/chosen": -0.03291553258895874, + "logits/rejected": -0.1688704490661621, + "logps/chosen": -0.00019250065088272095, + "logps/rejected": -3.8435990810394287, + "loss": 0.392, + "nll_loss": 0.09799201786518097, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9250066543463618e-05, + "rewards/margins": 0.38434064388275146, + "rewards/rejected": -0.3843598961830139, + "step": 13413 + }, + { + "epoch": 9.276625172890734, + "grad_norm": 1.7878432273864746, + "learning_rate": 4.0187490394959275e-06, + "log_odds_chosen": 11.162397384643555, + "log_odds_ratio": -0.0003439478459767997, + "logits/chosen": -0.7669256925582886, + "logits/rejected": -0.7717230319976807, + "logps/chosen": -0.00041456997860223055, + "logps/rejected": -2.3134167194366455, + "loss": 0.2227, + "nll_loss": 0.05564933270215988, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1457002225797623e-05, + "rewards/margins": 0.2313002347946167, + "rewards/rejected": -0.2313416749238968, + "step": 13414 + }, + { + "epoch": 9.27731673582296, + "grad_norm": 2.39280104637146, + "learning_rate": 4.014907023205779e-06, + "log_odds_chosen": 10.64444351196289, + "log_odds_ratio": -8.962116407928988e-05, + "logits/chosen": -0.4091259241104126, + "logits/rejected": -0.4069325923919678, + "logps/chosen": -0.00017965941515285522, + "logps/rejected": -1.992423415184021, + "loss": 0.2297, + "nll_loss": 0.05740624666213989, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7965941879083402e-05, + "rewards/margins": 0.19922436773777008, + "rewards/rejected": -0.19924233853816986, + "step": 13415 + }, + { + "epoch": 9.278008298755188, + "grad_norm": 3.6200830936431885, + "learning_rate": 4.01106500691563e-06, + "log_odds_chosen": 10.999204635620117, + "log_odds_ratio": -0.00010778568685054779, + "logits/chosen": -0.10634081065654755, + "logits/rejected": -0.11936096847057343, + "logps/chosen": -0.0006000410066917539, + "logps/rejected": -2.684356689453125, + "loss": 0.3256, + "nll_loss": 0.08139465004205704, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.000410212436691e-05, + "rewards/margins": 0.2683756649494171, + "rewards/rejected": -0.26843565702438354, + "step": 13416 + }, + { + "epoch": 9.278699861687414, + "grad_norm": 2.941218614578247, + "learning_rate": 4.007222990625481e-06, + "log_odds_chosen": 11.12997055053711, + "log_odds_ratio": -0.003518062410876155, + "logits/chosen": -0.28821274638175964, + "logits/rejected": -0.33975598216056824, + "logps/chosen": -0.002335771918296814, + "logps/rejected": -2.54252290725708, + "loss": 0.2761, + "nll_loss": 0.0686764121055603, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00023357720056083053, + "rewards/margins": 0.25401872396469116, + "rewards/rejected": -0.25425228476524353, + "step": 13417 + }, + { + "epoch": 9.279391424619641, + "grad_norm": 2.9035792350769043, + "learning_rate": 4.003380974335331e-06, + "log_odds_chosen": 10.532563209533691, + "log_odds_ratio": -0.0002298601029906422, + "logits/chosen": -0.0022521987557411194, + "logits/rejected": 0.034229494631290436, + "logps/chosen": -0.0010790006490424275, + "logps/rejected": -2.0970377922058105, + "loss": 0.3064, + "nll_loss": 0.07658690214157104, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010790007945615798, + "rewards/margins": 0.20959590375423431, + "rewards/rejected": -0.20970380306243896, + "step": 13418 + }, + { + "epoch": 9.280082987551868, + "grad_norm": 3.781830310821533, + "learning_rate": 3.999538958045182e-06, + "log_odds_chosen": 11.045536994934082, + "log_odds_ratio": -7.346242637140676e-05, + "logits/chosen": -0.298697829246521, + "logits/rejected": -0.25931185483932495, + "logps/chosen": -0.000305239693261683, + "logps/rejected": -2.1760785579681396, + "loss": 0.2925, + "nll_loss": 0.07311448454856873, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.052396641578525e-05, + "rewards/margins": 0.21757732331752777, + "rewards/rejected": -0.21760785579681396, + "step": 13419 + }, + { + "epoch": 9.280774550484095, + "grad_norm": 3.483137607574463, + "learning_rate": 3.995696941755033e-06, + "log_odds_chosen": 10.634360313415527, + "log_odds_ratio": -3.846009713015519e-05, + "logits/chosen": 0.11051935702562332, + "logits/rejected": 0.08204380422830582, + "logps/chosen": -8.226620411733165e-05, + "logps/rejected": -1.5227370262145996, + "loss": 0.2834, + "nll_loss": 0.07085447758436203, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.226620593632106e-06, + "rewards/margins": 0.15226547420024872, + "rewards/rejected": -0.15227369964122772, + "step": 13420 + }, + { + "epoch": 9.281466113416322, + "grad_norm": 1.9649827480316162, + "learning_rate": 3.9918549254648844e-06, + "log_odds_chosen": 9.993119239807129, + "log_odds_ratio": -0.0001120996312238276, + "logits/chosen": -0.22370699048042297, + "logits/rejected": -0.3054266571998596, + "logps/chosen": -0.00021666797692887485, + "logps/rejected": -1.4185731410980225, + "loss": 0.1858, + "nll_loss": 0.046433914452791214, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.166679951187689e-05, + "rewards/margins": 0.14183564484119415, + "rewards/rejected": -0.14185731112957, + "step": 13421 + }, + { + "epoch": 9.282157676348548, + "grad_norm": 4.2012104988098145, + "learning_rate": 3.988012909174735e-06, + "log_odds_chosen": 10.43075180053711, + "log_odds_ratio": -0.0003460758307483047, + "logits/chosen": -0.3779338598251343, + "logits/rejected": -0.47758790850639343, + "logps/chosen": -0.000670717447064817, + "logps/rejected": -2.635446548461914, + "loss": 0.927, + "nll_loss": 0.23170319199562073, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.707174179609865e-05, + "rewards/margins": 0.2634775936603546, + "rewards/rejected": -0.26354464888572693, + "step": 13422 + }, + { + "epoch": 9.282849239280775, + "grad_norm": 3.3624515533447266, + "learning_rate": 3.984170892884586e-06, + "log_odds_chosen": 11.48715591430664, + "log_odds_ratio": -5.0667844334384426e-05, + "logits/chosen": -0.24923592805862427, + "logits/rejected": -0.28355079889297485, + "logps/chosen": -0.0003556256997399032, + "logps/rejected": -2.6016101837158203, + "loss": 0.4092, + "nll_loss": 0.10228350013494492, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5562567063607275e-05, + "rewards/margins": 0.26012542843818665, + "rewards/rejected": -0.26016098260879517, + "step": 13423 + }, + { + "epoch": 9.283540802213002, + "grad_norm": 3.0172131061553955, + "learning_rate": 3.9803288765944375e-06, + "log_odds_chosen": 10.21627426147461, + "log_odds_ratio": -0.00044269065256230533, + "logits/chosen": -0.38946762681007385, + "logits/rejected": -0.4330682158470154, + "logps/chosen": -0.0002675445284694433, + "logps/rejected": -1.7040836811065674, + "loss": 0.3104, + "nll_loss": 0.07755580544471741, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6754454665933736e-05, + "rewards/margins": 0.17038163542747498, + "rewards/rejected": -0.17040836811065674, + "step": 13424 + }, + { + "epoch": 9.284232365145229, + "grad_norm": 2.2918384075164795, + "learning_rate": 3.976486860304287e-06, + "log_odds_chosen": 10.539897918701172, + "log_odds_ratio": -0.00033111555967479944, + "logits/chosen": -0.2676827013492584, + "logits/rejected": -0.28647491335868835, + "logps/chosen": -0.0006111696711741388, + "logps/rejected": -1.587890625, + "loss": 0.2487, + "nll_loss": 0.062130894511938095, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.111696711741388e-05, + "rewards/margins": 0.15872792899608612, + "rewards/rejected": -0.15878905355930328, + "step": 13425 + }, + { + "epoch": 9.284923928077456, + "grad_norm": 3.063905715942383, + "learning_rate": 3.972644844014139e-06, + "log_odds_chosen": 11.524633407592773, + "log_odds_ratio": -2.583039713499602e-05, + "logits/chosen": -0.5895035266876221, + "logits/rejected": -0.6758289337158203, + "logps/chosen": -8.647509093862027e-05, + "logps/rejected": -2.294323444366455, + "loss": 0.3178, + "nll_loss": 0.07944169640541077, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.647508366266266e-06, + "rewards/margins": 0.22942368686199188, + "rewards/rejected": -0.2294323444366455, + "step": 13426 + }, + { + "epoch": 9.285615491009683, + "grad_norm": 2.2570669651031494, + "learning_rate": 3.96880282772399e-06, + "log_odds_chosen": 11.683237075805664, + "log_odds_ratio": -2.6745978175313212e-05, + "logits/chosen": -0.3830067217350006, + "logits/rejected": -0.3945922255516052, + "logps/chosen": -0.00012965469795744866, + "logps/rejected": -2.250619649887085, + "loss": 0.2252, + "nll_loss": 0.05630715191364288, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2965469068149105e-05, + "rewards/margins": 0.2250490039587021, + "rewards/rejected": -0.22506198287010193, + "step": 13427 + }, + { + "epoch": 9.28630705394191, + "grad_norm": 3.58709454536438, + "learning_rate": 3.9649608114338405e-06, + "log_odds_chosen": 11.472620964050293, + "log_odds_ratio": -3.926477438653819e-05, + "logits/chosen": 0.07597567141056061, + "logits/rejected": -0.1412590742111206, + "logps/chosen": -9.301173849962652e-05, + "logps/rejected": -1.7634772062301636, + "loss": 0.4221, + "nll_loss": 0.10552544891834259, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.301174031861592e-06, + "rewards/margins": 0.17633843421936035, + "rewards/rejected": -0.17634771764278412, + "step": 13428 + }, + { + "epoch": 9.286998616874136, + "grad_norm": 3.3155059814453125, + "learning_rate": 3.961118795143691e-06, + "log_odds_chosen": 11.708847045898438, + "log_odds_ratio": -3.0453265935648233e-05, + "logits/chosen": -0.400894433259964, + "logits/rejected": -0.3575477600097656, + "logps/chosen": -0.00024603097699582577, + "logps/rejected": -2.225172758102417, + "loss": 0.3595, + "nll_loss": 0.08986914902925491, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4603097699582577e-05, + "rewards/margins": 0.22249269485473633, + "rewards/rejected": -0.22251728177070618, + "step": 13429 + }, + { + "epoch": 9.287690179806363, + "grad_norm": 4.786239147186279, + "learning_rate": 3.957276778853543e-06, + "log_odds_chosen": 12.104966163635254, + "log_odds_ratio": -7.745972652628552e-06, + "logits/chosen": -0.4614717960357666, + "logits/rejected": -0.43710124492645264, + "logps/chosen": -0.00013096739712636918, + "logps/rejected": -2.6526381969451904, + "loss": 0.4942, + "nll_loss": 0.12354452908039093, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3096740076434799e-05, + "rewards/margins": 0.2652507424354553, + "rewards/rejected": -0.2652638554573059, + "step": 13430 + }, + { + "epoch": 9.28838174273859, + "grad_norm": 4.398645401000977, + "learning_rate": 3.953434762563394e-06, + "log_odds_chosen": 11.697811126708984, + "log_odds_ratio": -2.1033920347690582e-05, + "logits/chosen": -0.3223825991153717, + "logits/rejected": -0.3768712878227234, + "logps/chosen": -0.00012502839672379196, + "logps/rejected": -2.744093894958496, + "loss": 0.7729, + "nll_loss": 0.19321200251579285, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.250284185516648e-05, + "rewards/margins": 0.2743968963623047, + "rewards/rejected": -0.27440938353538513, + "step": 13431 + }, + { + "epoch": 9.289073305670817, + "grad_norm": 4.682480812072754, + "learning_rate": 3.949592746273244e-06, + "log_odds_chosen": 12.311955451965332, + "log_odds_ratio": -1.3734586900682189e-05, + "logits/chosen": -0.13822859525680542, + "logits/rejected": -0.2811431884765625, + "logps/chosen": -0.00023069609596859664, + "logps/rejected": -3.5613608360290527, + "loss": 0.5211, + "nll_loss": 0.1302732676267624, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3069609596859664e-05, + "rewards/margins": 0.3561130464076996, + "rewards/rejected": -0.35613611340522766, + "step": 13432 + }, + { + "epoch": 9.289764868603044, + "grad_norm": 3.0834381580352783, + "learning_rate": 3.945750729983096e-06, + "log_odds_chosen": 11.342191696166992, + "log_odds_ratio": -2.7563957701204345e-05, + "logits/chosen": -0.24458184838294983, + "logits/rejected": -0.3116031587123871, + "logps/chosen": -0.00011202124733245, + "logps/rejected": -2.335583209991455, + "loss": 0.4059, + "nll_loss": 0.10148320347070694, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.120212436944712e-05, + "rewards/margins": 0.2335471212863922, + "rewards/rejected": -0.23355832695960999, + "step": 13433 + }, + { + "epoch": 9.29045643153527, + "grad_norm": 2.614386558532715, + "learning_rate": 3.941908713692946e-06, + "log_odds_chosen": 10.352871894836426, + "log_odds_ratio": -0.00029394158627837896, + "logits/chosen": -0.45531344413757324, + "logits/rejected": -0.40713274478912354, + "logps/chosen": -0.0005957625689916313, + "logps/rejected": -1.8678221702575684, + "loss": 0.2736, + "nll_loss": 0.06837328523397446, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.957625762675889e-05, + "rewards/margins": 0.18672263622283936, + "rewards/rejected": -0.18678221106529236, + "step": 13434 + }, + { + "epoch": 9.291147994467497, + "grad_norm": 3.3317110538482666, + "learning_rate": 3.9380666974027975e-06, + "log_odds_chosen": 11.031869888305664, + "log_odds_ratio": -9.012289956444874e-05, + "logits/chosen": -0.5779318809509277, + "logits/rejected": -0.7733064889907837, + "logps/chosen": -0.0001790263195289299, + "logps/rejected": -1.7726452350616455, + "loss": 0.3714, + "nll_loss": 0.09283643215894699, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.790263195289299e-05, + "rewards/margins": 0.17724663019180298, + "rewards/rejected": -0.17726454138755798, + "step": 13435 + }, + { + "epoch": 9.291839557399724, + "grad_norm": 3.116892099380493, + "learning_rate": 3.934224681112647e-06, + "log_odds_chosen": 10.351808547973633, + "log_odds_ratio": -7.368012302322313e-05, + "logits/chosen": -0.32790011167526245, + "logits/rejected": -0.3104146718978882, + "logps/chosen": -0.000142537901410833, + "logps/rejected": -1.4875270128250122, + "loss": 0.3242, + "nll_loss": 0.08104795962572098, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.42537901410833e-05, + "rewards/margins": 0.14873844385147095, + "rewards/rejected": -0.14875270426273346, + "step": 13436 + }, + { + "epoch": 9.292531120331951, + "grad_norm": 3.672302722930908, + "learning_rate": 3.930382664822499e-06, + "log_odds_chosen": 12.376562118530273, + "log_odds_ratio": -5.7372599258087575e-06, + "logits/chosen": -0.2653083801269531, + "logits/rejected": -0.30238568782806396, + "logps/chosen": -7.231077324831858e-05, + "logps/rejected": -2.5421056747436523, + "loss": 0.432, + "nll_loss": 0.10799235105514526, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.231077233882388e-06, + "rewards/margins": 0.25420331954956055, + "rewards/rejected": -0.25421056151390076, + "step": 13437 + }, + { + "epoch": 9.293222683264178, + "grad_norm": 3.056394100189209, + "learning_rate": 3.9265406485323505e-06, + "log_odds_chosen": 11.484671592712402, + "log_odds_ratio": -3.2231018849415705e-05, + "logits/chosen": -0.3656775653362274, + "logits/rejected": -0.36800307035446167, + "logps/chosen": -0.00019055130542255938, + "logps/rejected": -2.2714104652404785, + "loss": 0.3314, + "nll_loss": 0.08284640312194824, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9055129087064415e-05, + "rewards/margins": 0.2271220088005066, + "rewards/rejected": -0.22714105248451233, + "step": 13438 + }, + { + "epoch": 9.293914246196405, + "grad_norm": 3.0379509925842285, + "learning_rate": 3.9226986322422004e-06, + "log_odds_chosen": 10.669838905334473, + "log_odds_ratio": -0.00021413953800220042, + "logits/chosen": -0.21338048577308655, + "logits/rejected": -0.2577047348022461, + "logps/chosen": -0.0002600338775664568, + "logps/rejected": -2.0176901817321777, + "loss": 0.3005, + "nll_loss": 0.07510216534137726, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.600338848424144e-05, + "rewards/margins": 0.201743021607399, + "rewards/rejected": -0.20176903903484344, + "step": 13439 + }, + { + "epoch": 9.294605809128631, + "grad_norm": 4.63295841217041, + "learning_rate": 3.918856615952052e-06, + "log_odds_chosen": 11.448358535766602, + "log_odds_ratio": -1.9698167307069525e-05, + "logits/chosen": -0.5362762212753296, + "logits/rejected": -0.5343048572540283, + "logps/chosen": -0.00015766645083203912, + "logps/rejected": -2.31644868850708, + "loss": 0.4271, + "nll_loss": 0.10677941143512726, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5766647265991196e-05, + "rewards/margins": 0.23162910342216492, + "rewards/rejected": -0.231644868850708, + "step": 13440 + }, + { + "epoch": 9.295297372060858, + "grad_norm": 2.1920297145843506, + "learning_rate": 3.915014599661903e-06, + "log_odds_chosen": 11.539931297302246, + "log_odds_ratio": -9.448492346564308e-05, + "logits/chosen": -0.7264431118965149, + "logits/rejected": -0.8167514801025391, + "logps/chosen": -0.0003868629573844373, + "logps/rejected": -3.05387020111084, + "loss": 0.2399, + "nll_loss": 0.05997336655855179, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.868629573844373e-05, + "rewards/margins": 0.30534833669662476, + "rewards/rejected": -0.305387020111084, + "step": 13441 + }, + { + "epoch": 9.295988934993085, + "grad_norm": 3.0261287689208984, + "learning_rate": 3.9111725833717535e-06, + "log_odds_chosen": 11.672271728515625, + "log_odds_ratio": -3.1384017347591e-05, + "logits/chosen": -0.5126137137413025, + "logits/rejected": -0.555162787437439, + "logps/chosen": -0.00015418983821291476, + "logps/rejected": -2.4113316535949707, + "loss": 0.397, + "nll_loss": 0.09924092143774033, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5418983821291476e-05, + "rewards/margins": 0.24111774563789368, + "rewards/rejected": -0.24113315343856812, + "step": 13442 + }, + { + "epoch": 9.296680497925312, + "grad_norm": 4.1650567054748535, + "learning_rate": 3.907330567081604e-06, + "log_odds_chosen": 11.079200744628906, + "log_odds_ratio": -0.0002563607122283429, + "logits/chosen": -0.021878689527511597, + "logits/rejected": -0.13688160479068756, + "logps/chosen": -0.0005930270999670029, + "logps/rejected": -2.562225341796875, + "loss": 0.3913, + "nll_loss": 0.09778881818056107, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.930270708631724e-05, + "rewards/margins": 0.25616323947906494, + "rewards/rejected": -0.25622254610061646, + "step": 13443 + }, + { + "epoch": 9.297372060857539, + "grad_norm": 3.347186803817749, + "learning_rate": 3.903488550791456e-06, + "log_odds_chosen": 11.050576210021973, + "log_odds_ratio": -9.413971565663815e-05, + "logits/chosen": -0.3477120101451874, + "logits/rejected": -0.35860568284988403, + "logps/chosen": -9.553891140967607e-05, + "logps/rejected": -1.656992793083191, + "loss": 0.355, + "nll_loss": 0.08875176310539246, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.553890777169727e-06, + "rewards/margins": 0.16568972170352936, + "rewards/rejected": -0.16569927334785461, + "step": 13444 + }, + { + "epoch": 9.298063623789766, + "grad_norm": 2.4853501319885254, + "learning_rate": 3.899646534501307e-06, + "log_odds_chosen": 10.950776100158691, + "log_odds_ratio": -5.783190135844052e-05, + "logits/chosen": -0.26249608397483826, + "logits/rejected": -0.2371029406785965, + "logps/chosen": -0.00018176852609030902, + "logps/rejected": -2.1492209434509277, + "loss": 0.2225, + "nll_loss": 0.05562479421496391, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8176853700424545e-05, + "rewards/margins": 0.21490392088890076, + "rewards/rejected": -0.21492210030555725, + "step": 13445 + }, + { + "epoch": 9.298755186721992, + "grad_norm": 2.5416414737701416, + "learning_rate": 3.895804518211157e-06, + "log_odds_chosen": 11.243158340454102, + "log_odds_ratio": -5.964957381365821e-05, + "logits/chosen": -0.11643625795841217, + "logits/rejected": -0.15829455852508545, + "logps/chosen": -0.00025903433561325073, + "logps/rejected": -2.2749953269958496, + "loss": 0.2535, + "nll_loss": 0.06337232142686844, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5903436835506e-05, + "rewards/margins": 0.22747361660003662, + "rewards/rejected": -0.22749951481819153, + "step": 13446 + }, + { + "epoch": 9.29944674965422, + "grad_norm": 4.396009922027588, + "learning_rate": 3.891962501921009e-06, + "log_odds_chosen": 11.116652488708496, + "log_odds_ratio": -3.810801717918366e-05, + "logits/chosen": -0.03421090170741081, + "logits/rejected": -0.05119268223643303, + "logps/chosen": -0.00012173371214885265, + "logps/rejected": -2.0002799034118652, + "loss": 0.4754, + "nll_loss": 0.1188463568687439, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2173370123491623e-05, + "rewards/margins": 0.2000158131122589, + "rewards/rejected": -0.20002800226211548, + "step": 13447 + }, + { + "epoch": 9.300138312586446, + "grad_norm": 4.451956272125244, + "learning_rate": 3.888120485630859e-06, + "log_odds_chosen": 11.3140869140625, + "log_odds_ratio": -3.882058445014991e-05, + "logits/chosen": -0.7956528663635254, + "logits/rejected": -0.8160718679428101, + "logps/chosen": -0.00028218800434842706, + "logps/rejected": -2.6495447158813477, + "loss": 0.3688, + "nll_loss": 0.09219682216644287, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8218801162438467e-05, + "rewards/margins": 0.2649262547492981, + "rewards/rejected": -0.26495444774627686, + "step": 13448 + }, + { + "epoch": 9.300829875518673, + "grad_norm": 2.387260913848877, + "learning_rate": 3.8842784693407105e-06, + "log_odds_chosen": 11.304975509643555, + "log_odds_ratio": -2.020270039793104e-05, + "logits/chosen": -0.2679097056388855, + "logits/rejected": -0.35723209381103516, + "logps/chosen": -0.00014827624545432627, + "logps/rejected": -2.125303268432617, + "loss": 0.234, + "nll_loss": 0.05850508436560631, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4827624909230508e-05, + "rewards/margins": 0.2125154733657837, + "rewards/rejected": -0.21253031492233276, + "step": 13449 + }, + { + "epoch": 9.3015214384509, + "grad_norm": 3.363272190093994, + "learning_rate": 3.880436453050561e-06, + "log_odds_chosen": 11.56545352935791, + "log_odds_ratio": -3.806597669608891e-05, + "logits/chosen": -0.5218194723129272, + "logits/rejected": -0.7576302289962769, + "logps/chosen": -0.0003641648218035698, + "logps/rejected": -2.0262107849121094, + "loss": 0.3822, + "nll_loss": 0.09554532915353775, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6416480725165457e-05, + "rewards/margins": 0.20258468389511108, + "rewards/rejected": -0.20262108743190765, + "step": 13450 + }, + { + "epoch": 9.302213001383127, + "grad_norm": 4.251006126403809, + "learning_rate": 3.876594436760412e-06, + "log_odds_chosen": 10.876567840576172, + "log_odds_ratio": -0.00013768920325674117, + "logits/chosen": -0.537798285484314, + "logits/rejected": -0.6344647407531738, + "logps/chosen": -0.00023333955323323607, + "logps/rejected": -2.351746082305908, + "loss": 0.3429, + "nll_loss": 0.08570535480976105, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.333395605091937e-05, + "rewards/margins": 0.23515130579471588, + "rewards/rejected": -0.23517462611198425, + "step": 13451 + }, + { + "epoch": 9.302904564315353, + "grad_norm": 4.054286479949951, + "learning_rate": 3.872752420470263e-06, + "log_odds_chosen": 11.610682487487793, + "log_odds_ratio": -2.241161018901039e-05, + "logits/chosen": -0.3463277816772461, + "logits/rejected": -0.3877999782562256, + "logps/chosen": -0.0001846577797550708, + "logps/rejected": -2.8598146438598633, + "loss": 0.4372, + "nll_loss": 0.10929237306118011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8465776520315558e-05, + "rewards/margins": 0.2859629988670349, + "rewards/rejected": -0.2859814763069153, + "step": 13452 + }, + { + "epoch": 9.30359612724758, + "grad_norm": 3.626314163208008, + "learning_rate": 3.8689104041801135e-06, + "log_odds_chosen": 11.272414207458496, + "log_odds_ratio": -5.84806184633635e-05, + "logits/chosen": -0.5375299453735352, + "logits/rejected": -0.5248052477836609, + "logps/chosen": -0.00018739163351710886, + "logps/rejected": -2.3324246406555176, + "loss": 0.4116, + "nll_loss": 0.10289761424064636, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8739163351710886e-05, + "rewards/margins": 0.23322373628616333, + "rewards/rejected": -0.2332424819469452, + "step": 13453 + }, + { + "epoch": 9.304287690179807, + "grad_norm": 3.117380142211914, + "learning_rate": 3.865068387889965e-06, + "log_odds_chosen": 12.008508682250977, + "log_odds_ratio": -2.961501741083339e-05, + "logits/chosen": -0.3070840537548065, + "logits/rejected": -0.3590050935745239, + "logps/chosen": -0.00015943381004035473, + "logps/rejected": -2.8896145820617676, + "loss": 0.2863, + "nll_loss": 0.07156022638082504, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.594338027643971e-05, + "rewards/margins": 0.2889455258846283, + "rewards/rejected": -0.2889614403247833, + "step": 13454 + }, + { + "epoch": 9.304979253112034, + "grad_norm": 5.104377746582031, + "learning_rate": 3.861226371599816e-06, + "log_odds_chosen": 11.075465202331543, + "log_odds_ratio": -0.0001627878227736801, + "logits/chosen": -0.1711142212152481, + "logits/rejected": -0.28870540857315063, + "logps/chosen": -0.00043384850141592324, + "logps/rejected": -2.306971549987793, + "loss": 0.6368, + "nll_loss": 0.1591799110174179, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3384850869188085e-05, + "rewards/margins": 0.230653777718544, + "rewards/rejected": -0.2306971549987793, + "step": 13455 + }, + { + "epoch": 9.30567081604426, + "grad_norm": 3.257077693939209, + "learning_rate": 3.8573843553096666e-06, + "log_odds_chosen": 11.359479904174805, + "log_odds_ratio": -4.4101354433223605e-05, + "logits/chosen": -0.15206146240234375, + "logits/rejected": -0.22837527096271515, + "logps/chosen": -0.00023172479995992035, + "logps/rejected": -2.734315872192383, + "loss": 0.2831, + "nll_loss": 0.07076961547136307, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3172480723587796e-05, + "rewards/margins": 0.2734084129333496, + "rewards/rejected": -0.27343159914016724, + "step": 13456 + }, + { + "epoch": 9.306362378976488, + "grad_norm": 3.722757577896118, + "learning_rate": 3.853542339019517e-06, + "log_odds_chosen": 11.233427047729492, + "log_odds_ratio": -0.0003971302940044552, + "logits/chosen": -0.14773187041282654, + "logits/rejected": -0.14477570354938507, + "logps/chosen": -0.0011931579792872071, + "logps/rejected": -2.492964029312134, + "loss": 0.3946, + "nll_loss": 0.0986117273569107, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011931579501833767, + "rewards/margins": 0.24917706847190857, + "rewards/rejected": -0.2492963820695877, + "step": 13457 + }, + { + "epoch": 9.307053941908714, + "grad_norm": 2.218470573425293, + "learning_rate": 3.849700322729369e-06, + "log_odds_chosen": 10.07578182220459, + "log_odds_ratio": -0.00019474061264190823, + "logits/chosen": -0.2753535807132721, + "logits/rejected": -0.3422609567642212, + "logps/chosen": -0.0010008374229073524, + "logps/rejected": -2.065652847290039, + "loss": 0.2113, + "nll_loss": 0.052812688052654266, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010008375102188438, + "rewards/margins": 0.2064652144908905, + "rewards/rejected": -0.20656529068946838, + "step": 13458 + }, + { + "epoch": 9.307745504840941, + "grad_norm": 4.743387699127197, + "learning_rate": 3.845858306439219e-06, + "log_odds_chosen": 11.158632278442383, + "log_odds_ratio": -2.11762326216558e-05, + "logits/chosen": -0.21728329360485077, + "logits/rejected": -0.2781408429145813, + "logps/chosen": -0.000218193992623128, + "logps/rejected": -2.3770692348480225, + "loss": 0.4833, + "nll_loss": 0.12082695960998535, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.181939998990856e-05, + "rewards/margins": 0.23768511414527893, + "rewards/rejected": -0.23770692944526672, + "step": 13459 + }, + { + "epoch": 9.308437067773168, + "grad_norm": 5.226717948913574, + "learning_rate": 3.84201629014907e-06, + "log_odds_chosen": 10.973608016967773, + "log_odds_ratio": -0.00014043868577573448, + "logits/chosen": -0.2824869453907013, + "logits/rejected": -0.3398590683937073, + "logps/chosen": -0.00011858497600769624, + "logps/rejected": -1.9236409664154053, + "loss": 0.6101, + "nll_loss": 0.1525149792432785, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1858497600769624e-05, + "rewards/margins": 0.19235223531723022, + "rewards/rejected": -0.19236409664154053, + "step": 13460 + }, + { + "epoch": 9.309128630705395, + "grad_norm": 2.705129384994507, + "learning_rate": 3.838174273858921e-06, + "log_odds_chosen": 10.589807510375977, + "log_odds_ratio": -0.00017924222629517317, + "logits/chosen": -0.7581220865249634, + "logits/rejected": -0.7678347229957581, + "logps/chosen": -0.00028634577756747603, + "logps/rejected": -2.0105509757995605, + "loss": 0.2845, + "nll_loss": 0.07111126184463501, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8634578484343365e-05, + "rewards/margins": 0.20102645456790924, + "rewards/rejected": -0.20105509459972382, + "step": 13461 + }, + { + "epoch": 9.309820193637622, + "grad_norm": 3.6058995723724365, + "learning_rate": 3.834332257568772e-06, + "log_odds_chosen": 10.95798397064209, + "log_odds_ratio": -5.1791306759696454e-05, + "logits/chosen": -0.6187546253204346, + "logits/rejected": -0.4810827672481537, + "logps/chosen": -0.00013288088666740805, + "logps/rejected": -1.8508840799331665, + "loss": 0.3569, + "nll_loss": 0.08922485262155533, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3288088666740805e-05, + "rewards/margins": 0.18507513403892517, + "rewards/rejected": -0.1850884109735489, + "step": 13462 + }, + { + "epoch": 9.310511756569849, + "grad_norm": 3.3807432651519775, + "learning_rate": 3.8304902412786235e-06, + "log_odds_chosen": 10.725987434387207, + "log_odds_ratio": -9.697769564809278e-05, + "logits/chosen": -0.062388740479946136, + "logits/rejected": -0.0435502827167511, + "logps/chosen": -0.00022250697657000273, + "logps/rejected": -1.8565727472305298, + "loss": 0.2896, + "nll_loss": 0.07238153368234634, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2250696929404512e-05, + "rewards/margins": 0.1856350302696228, + "rewards/rejected": -0.18565726280212402, + "step": 13463 + }, + { + "epoch": 9.311203319502075, + "grad_norm": 3.468080520629883, + "learning_rate": 3.826648224988474e-06, + "log_odds_chosen": 10.823293685913086, + "log_odds_ratio": -0.00020776645396836102, + "logits/chosen": -0.33366209268569946, + "logits/rejected": -0.2669678330421448, + "logps/chosen": -0.00029635181999765337, + "logps/rejected": -2.1758363246917725, + "loss": 0.432, + "nll_loss": 0.10798037052154541, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9635182727361098e-05, + "rewards/margins": 0.2175540030002594, + "rewards/rejected": -0.21758362650871277, + "step": 13464 + }, + { + "epoch": 9.311894882434302, + "grad_norm": 2.700617551803589, + "learning_rate": 3.822806208698325e-06, + "log_odds_chosen": 13.142247200012207, + "log_odds_ratio": -2.230983409390319e-05, + "logits/chosen": -0.005779445171356201, + "logits/rejected": -0.09649817645549774, + "logps/chosen": -0.00014073318743612617, + "logps/rejected": -3.8190064430236816, + "loss": 0.3025, + "nll_loss": 0.07562220096588135, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4073319107410498e-05, + "rewards/margins": 0.3818865418434143, + "rewards/rejected": -0.3819006085395813, + "step": 13465 + }, + { + "epoch": 9.312586445366529, + "grad_norm": 4.4598870277404785, + "learning_rate": 3.818964192408176e-06, + "log_odds_chosen": 11.616842269897461, + "log_odds_ratio": -2.9827209800714627e-05, + "logits/chosen": -0.1599445343017578, + "logits/rejected": -0.3907346725463867, + "logps/chosen": -0.00012314711057115346, + "logps/rejected": -2.275468587875366, + "loss": 0.4764, + "nll_loss": 0.11909401416778564, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2314711966610048e-05, + "rewards/margins": 0.22753453254699707, + "rewards/rejected": -0.22754687070846558, + "step": 13466 + }, + { + "epoch": 9.313278008298756, + "grad_norm": 5.46815824508667, + "learning_rate": 3.815122176118027e-06, + "log_odds_chosen": 12.737980842590332, + "log_odds_ratio": -5.220482762524625e-06, + "logits/chosen": -0.6087969541549683, + "logits/rejected": -0.6135598421096802, + "logps/chosen": -0.00015955566777847707, + "logps/rejected": -3.849729061126709, + "loss": 0.4696, + "nll_loss": 0.11739481985569, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.595556750544347e-05, + "rewards/margins": 0.3849569857120514, + "rewards/rejected": -0.3849729299545288, + "step": 13467 + }, + { + "epoch": 9.313969571230983, + "grad_norm": 8.528974533081055, + "learning_rate": 3.8112801598278777e-06, + "log_odds_chosen": 10.95968246459961, + "log_odds_ratio": -4.384573912830092e-05, + "logits/chosen": -0.18643268942832947, + "logits/rejected": -0.21873146295547485, + "logps/chosen": -0.00013041615602560341, + "logps/rejected": -1.957237720489502, + "loss": 0.3603, + "nll_loss": 0.09007404744625092, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.304161469306564e-05, + "rewards/margins": 0.19571073353290558, + "rewards/rejected": -0.1957237720489502, + "step": 13468 + }, + { + "epoch": 9.31466113416321, + "grad_norm": 4.134918212890625, + "learning_rate": 3.807438143537729e-06, + "log_odds_chosen": 12.330034255981445, + "log_odds_ratio": -6.079011654946953e-06, + "logits/chosen": -0.008475244045257568, + "logits/rejected": 0.025699757039546967, + "logps/chosen": -9.574719297233969e-05, + "logps/rejected": -2.944679021835327, + "loss": 0.3604, + "nll_loss": 0.09009113162755966, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.57472002482973e-06, + "rewards/margins": 0.2944583296775818, + "rewards/rejected": -0.2944679260253906, + "step": 13469 + }, + { + "epoch": 9.315352697095436, + "grad_norm": 3.7410848140716553, + "learning_rate": 3.803596127247579e-06, + "log_odds_chosen": 11.439793586730957, + "log_odds_ratio": -1.828746280807536e-05, + "logits/chosen": -0.25906312465667725, + "logits/rejected": -0.34541481733322144, + "logps/chosen": -0.00023132732894737273, + "logps/rejected": -2.2295823097229004, + "loss": 0.4508, + "nll_loss": 0.1126941591501236, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3132733986130916e-05, + "rewards/margins": 0.22293512523174286, + "rewards/rejected": -0.22295823693275452, + "step": 13470 + }, + { + "epoch": 9.316044260027663, + "grad_norm": 4.06542444229126, + "learning_rate": 3.7997541109574304e-06, + "log_odds_chosen": 11.705513000488281, + "log_odds_ratio": -2.102018697769381e-05, + "logits/chosen": -0.30078208446502686, + "logits/rejected": -0.12992164492607117, + "logps/chosen": -8.429591252934188e-05, + "logps/rejected": -2.435026168823242, + "loss": 0.3457, + "nll_loss": 0.0864274799823761, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.429591616732068e-06, + "rewards/margins": 0.2434941828250885, + "rewards/rejected": -0.24350261688232422, + "step": 13471 + }, + { + "epoch": 9.31673582295989, + "grad_norm": 4.054144382476807, + "learning_rate": 3.7959120946672815e-06, + "log_odds_chosen": 10.893832206726074, + "log_odds_ratio": -8.980531129054725e-05, + "logits/chosen": -0.13907435536384583, + "logits/rejected": -0.008393503725528717, + "logps/chosen": -0.000236863037571311, + "logps/rejected": -2.596144199371338, + "loss": 0.5103, + "nll_loss": 0.1275601089000702, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3686305212322623e-05, + "rewards/margins": 0.259590744972229, + "rewards/rejected": -0.25961440801620483, + "step": 13472 + }, + { + "epoch": 9.317427385892117, + "grad_norm": 3.4377999305725098, + "learning_rate": 3.7920700783771323e-06, + "log_odds_chosen": 11.774809837341309, + "log_odds_ratio": -1.6027501260396093e-05, + "logits/chosen": -0.09235573559999466, + "logits/rejected": -0.14144612848758698, + "logps/chosen": -0.00010810409730765969, + "logps/rejected": -2.3190908432006836, + "loss": 0.3623, + "nll_loss": 0.09056393057107925, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0810409548867028e-05, + "rewards/margins": 0.2318982630968094, + "rewards/rejected": -0.23190905153751373, + "step": 13473 + }, + { + "epoch": 9.318118948824344, + "grad_norm": 3.6212639808654785, + "learning_rate": 3.7882280620869834e-06, + "log_odds_chosen": 10.931177139282227, + "log_odds_ratio": -3.4371048968750983e-05, + "logits/chosen": -0.64949631690979, + "logits/rejected": -0.6834389567375183, + "logps/chosen": -0.00039314801688306034, + "logps/rejected": -2.1690139770507812, + "loss": 0.4247, + "nll_loss": 0.10616564005613327, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.931480387109332e-05, + "rewards/margins": 0.21686206758022308, + "rewards/rejected": -0.21690139174461365, + "step": 13474 + }, + { + "epoch": 9.31881051175657, + "grad_norm": 3.314685106277466, + "learning_rate": 3.7843860457968346e-06, + "log_odds_chosen": 11.922871589660645, + "log_odds_ratio": -9.834478987613693e-06, + "logits/chosen": -0.37454918026924133, + "logits/rejected": -0.3413027226924896, + "logps/chosen": -8.132911898428574e-05, + "logps/rejected": -2.511720895767212, + "loss": 0.2946, + "nll_loss": 0.07365487515926361, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.132911716529634e-06, + "rewards/margins": 0.25116395950317383, + "rewards/rejected": -0.25117209553718567, + "step": 13475 + }, + { + "epoch": 9.319502074688797, + "grad_norm": 3.1171579360961914, + "learning_rate": 3.780544029506685e-06, + "log_odds_chosen": 11.269782066345215, + "log_odds_ratio": -7.540702790720388e-05, + "logits/chosen": -0.4870750308036804, + "logits/rejected": -0.5211501717567444, + "logps/chosen": -0.00012674767640419304, + "logps/rejected": -2.312211513519287, + "loss": 0.311, + "nll_loss": 0.0777503028512001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2674767276621424e-05, + "rewards/margins": 0.23120847344398499, + "rewards/rejected": -0.23122113943099976, + "step": 13476 + }, + { + "epoch": 9.320193637621024, + "grad_norm": 2.3931541442871094, + "learning_rate": 3.776702013216536e-06, + "log_odds_chosen": 10.98151969909668, + "log_odds_ratio": -3.289541564299725e-05, + "logits/chosen": -0.3848639130592346, + "logits/rejected": -0.37438511848449707, + "logps/chosen": -0.00015288082067854702, + "logps/rejected": -1.8499668836593628, + "loss": 0.3197, + "nll_loss": 0.0799197405576706, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5288083886844106e-05, + "rewards/margins": 0.18498140573501587, + "rewards/rejected": -0.18499669432640076, + "step": 13477 + }, + { + "epoch": 9.320885200553251, + "grad_norm": 2.7862470149993896, + "learning_rate": 3.7728599969263873e-06, + "log_odds_chosen": 11.56342887878418, + "log_odds_ratio": -1.7754273358150385e-05, + "logits/chosen": -0.5026286244392395, + "logits/rejected": -0.6119534373283386, + "logps/chosen": -8.075307414401323e-05, + "logps/rejected": -2.1262624263763428, + "loss": 0.3213, + "nll_loss": 0.08032898604869843, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.075307960098144e-06, + "rewards/margins": 0.2126181721687317, + "rewards/rejected": -0.21262626349925995, + "step": 13478 + }, + { + "epoch": 9.321576763485478, + "grad_norm": 2.863743305206299, + "learning_rate": 3.769017980636238e-06, + "log_odds_chosen": 11.144573211669922, + "log_odds_ratio": -2.4799961465760134e-05, + "logits/chosen": -0.31004148721694946, + "logits/rejected": -0.3101674020290375, + "logps/chosen": -0.00019185274140909314, + "logps/rejected": -2.265904664993286, + "loss": 0.2478, + "nll_loss": 0.06194061040878296, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9185274140909314e-05, + "rewards/margins": 0.22657127678394318, + "rewards/rejected": -0.22659045457839966, + "step": 13479 + }, + { + "epoch": 9.322268326417705, + "grad_norm": 3.149684190750122, + "learning_rate": 3.7651759643460892e-06, + "log_odds_chosen": 11.424112319946289, + "log_odds_ratio": -4.159379022894427e-05, + "logits/chosen": -0.2655598521232605, + "logits/rejected": -0.37703606486320496, + "logps/chosen": -0.00010751900117611513, + "logps/rejected": -2.443114995956421, + "loss": 0.2764, + "nll_loss": 0.06908674538135529, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0751899935712572e-05, + "rewards/margins": 0.2443007528781891, + "rewards/rejected": -0.24431152641773224, + "step": 13480 + }, + { + "epoch": 9.322959889349931, + "grad_norm": 4.372343063354492, + "learning_rate": 3.7613339480559404e-06, + "log_odds_chosen": 12.416967391967773, + "log_odds_ratio": -2.7587606382439844e-05, + "logits/chosen": 0.1218680888414383, + "logits/rejected": -0.008615031838417053, + "logps/chosen": -9.726442658575252e-05, + "logps/rejected": -2.9957611560821533, + "loss": 0.4528, + "nll_loss": 0.1131986677646637, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.726443749968894e-06, + "rewards/margins": 0.299566388130188, + "rewards/rejected": -0.29957613348960876, + "step": 13481 + }, + { + "epoch": 9.323651452282158, + "grad_norm": 4.135457992553711, + "learning_rate": 3.7574919317657907e-06, + "log_odds_chosen": 11.986932754516602, + "log_odds_ratio": -1.0640229447744787e-05, + "logits/chosen": -0.5449897050857544, + "logits/rejected": -0.5930085778236389, + "logps/chosen": -0.00015816489758435637, + "logps/rejected": -2.755146026611328, + "loss": 0.3185, + "nll_loss": 0.07961829006671906, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.581649121362716e-05, + "rewards/margins": 0.27549877762794495, + "rewards/rejected": -0.2755146026611328, + "step": 13482 + }, + { + "epoch": 9.324343015214385, + "grad_norm": 3.3711698055267334, + "learning_rate": 3.753649915475642e-06, + "log_odds_chosen": 10.710826873779297, + "log_odds_ratio": -0.00037505064392462373, + "logits/chosen": -0.26216813921928406, + "logits/rejected": -0.3119490444660187, + "logps/chosen": -0.0003268842410761863, + "logps/rejected": -2.353797435760498, + "loss": 0.4368, + "nll_loss": 0.10915695130825043, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.268842556281015e-05, + "rewards/margins": 0.23534703254699707, + "rewards/rejected": -0.23537972569465637, + "step": 13483 + }, + { + "epoch": 9.325034578146612, + "grad_norm": 3.8483965396881104, + "learning_rate": 3.749807899185493e-06, + "log_odds_chosen": 11.60401439666748, + "log_odds_ratio": -4.468131737667136e-05, + "logits/chosen": -0.21380922198295593, + "logits/rejected": -0.27483001351356506, + "logps/chosen": -0.00020263693295419216, + "logps/rejected": -2.752741813659668, + "loss": 0.2783, + "nll_loss": 0.0695706233382225, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.026369475061074e-05, + "rewards/margins": 0.27525392174720764, + "rewards/rejected": -0.2752741873264313, + "step": 13484 + }, + { + "epoch": 9.325726141078839, + "grad_norm": 4.260168075561523, + "learning_rate": 3.7459658828953434e-06, + "log_odds_chosen": 12.041501998901367, + "log_odds_ratio": -2.186280835303478e-05, + "logits/chosen": -0.1144566684961319, + "logits/rejected": -0.17487488687038422, + "logps/chosen": -0.00019972564768977463, + "logps/rejected": -3.208420515060425, + "loss": 0.4325, + "nll_loss": 0.10811209678649902, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9972565496573225e-05, + "rewards/margins": 0.3208220899105072, + "rewards/rejected": -0.32084208726882935, + "step": 13485 + }, + { + "epoch": 9.326417704011066, + "grad_norm": 3.800992727279663, + "learning_rate": 3.7421238666051946e-06, + "log_odds_chosen": 10.691490173339844, + "log_odds_ratio": -0.0006114224088378251, + "logits/chosen": -0.18019437789916992, + "logits/rejected": -0.22118288278579712, + "logps/chosen": -0.001764062442816794, + "logps/rejected": -2.772888660430908, + "loss": 0.3505, + "nll_loss": 0.08756895363330841, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017640624719206244, + "rewards/margins": 0.2771124839782715, + "rewards/rejected": -0.27728885412216187, + "step": 13486 + }, + { + "epoch": 9.327109266943292, + "grad_norm": 5.743640899658203, + "learning_rate": 3.7382818503150453e-06, + "log_odds_chosen": 10.67558765411377, + "log_odds_ratio": -7.596887007821351e-05, + "logits/chosen": -0.13736465573310852, + "logits/rejected": -0.2244989573955536, + "logps/chosen": -0.0007089751306921244, + "logps/rejected": -2.3339829444885254, + "loss": 0.4601, + "nll_loss": 0.11502114683389664, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.089751306921244e-05, + "rewards/margins": 0.23332740366458893, + "rewards/rejected": -0.23339828848838806, + "step": 13487 + }, + { + "epoch": 9.32780082987552, + "grad_norm": 2.9741744995117188, + "learning_rate": 3.7344398340248965e-06, + "log_odds_chosen": 11.594825744628906, + "log_odds_ratio": -5.341541691450402e-05, + "logits/chosen": -0.24379974603652954, + "logits/rejected": -0.23881873488426208, + "logps/chosen": -0.0002033656492130831, + "logps/rejected": -3.0501716136932373, + "loss": 0.2306, + "nll_loss": 0.05764034017920494, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.033656528510619e-05, + "rewards/margins": 0.3049968183040619, + "rewards/rejected": -0.3050171732902527, + "step": 13488 + }, + { + "epoch": 9.328492392807746, + "grad_norm": 4.511500358581543, + "learning_rate": 3.7305978177347476e-06, + "log_odds_chosen": 11.463151931762695, + "log_odds_ratio": -0.000139111332828179, + "logits/chosen": -0.14151470363140106, + "logits/rejected": -0.196761816740036, + "logps/chosen": -0.00032821958302520216, + "logps/rejected": -3.2633323669433594, + "loss": 0.3561, + "nll_loss": 0.0890030488371849, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2821957574924454e-05, + "rewards/margins": 0.3263004422187805, + "rewards/rejected": -0.326333224773407, + "step": 13489 + }, + { + "epoch": 9.329183955739973, + "grad_norm": 4.139351844787598, + "learning_rate": 3.726755801444598e-06, + "log_odds_chosen": 11.706854820251465, + "log_odds_ratio": -2.9966075089760125e-05, + "logits/chosen": -0.14848864078521729, + "logits/rejected": -0.19505658745765686, + "logps/chosen": -0.00011898233060492203, + "logps/rejected": -2.731010675430298, + "loss": 0.4292, + "nll_loss": 0.10730849206447601, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1898233424290083e-05, + "rewards/margins": 0.2730891704559326, + "rewards/rejected": -0.2731010615825653, + "step": 13490 + }, + { + "epoch": 9.3298755186722, + "grad_norm": 2.797802448272705, + "learning_rate": 3.722913785154449e-06, + "log_odds_chosen": 9.388402938842773, + "log_odds_ratio": -0.00041358559974469244, + "logits/chosen": -0.6194337606430054, + "logits/rejected": -0.7150942087173462, + "logps/chosen": -0.0005718135507777333, + "logps/rejected": -1.6147042512893677, + "loss": 0.3182, + "nll_loss": 0.0794985443353653, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.718135798815638e-05, + "rewards/margins": 0.1614132523536682, + "rewards/rejected": -0.161470428109169, + "step": 13491 + }, + { + "epoch": 9.330567081604427, + "grad_norm": 2.23926043510437, + "learning_rate": 3.7190717688643003e-06, + "log_odds_chosen": 10.670015335083008, + "log_odds_ratio": -9.015700197778642e-05, + "logits/chosen": -0.3055238127708435, + "logits/rejected": -0.3231578767299652, + "logps/chosen": -0.00011767053365474567, + "logps/rejected": -1.6536319255828857, + "loss": 0.2644, + "nll_loss": 0.06609741598367691, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1767053365474567e-05, + "rewards/margins": 0.16535143554210663, + "rewards/rejected": -0.16536319255828857, + "step": 13492 + }, + { + "epoch": 9.331258644536653, + "grad_norm": 2.5508315563201904, + "learning_rate": 3.7152297525741506e-06, + "log_odds_chosen": 11.282350540161133, + "log_odds_ratio": -8.483113197144121e-05, + "logits/chosen": -0.23872199654579163, + "logits/rejected": -0.19816704094409943, + "logps/chosen": -0.00013775471597909927, + "logps/rejected": -1.9430524110794067, + "loss": 0.321, + "nll_loss": 0.08024564385414124, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3775472325505689e-05, + "rewards/margins": 0.19429144263267517, + "rewards/rejected": -0.19430524110794067, + "step": 13493 + }, + { + "epoch": 9.33195020746888, + "grad_norm": 4.62529182434082, + "learning_rate": 3.711387736284002e-06, + "log_odds_chosen": 11.182456970214844, + "log_odds_ratio": -0.0008582415757700801, + "logits/chosen": 0.2925998568534851, + "logits/rejected": 0.2288024127483368, + "logps/chosen": -0.0004990738234482706, + "logps/rejected": -2.758685827255249, + "loss": 0.5416, + "nll_loss": 0.1353091448545456, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.990738307242282e-05, + "rewards/margins": 0.27581867575645447, + "rewards/rejected": -0.27586856484413147, + "step": 13494 + }, + { + "epoch": 9.332641770401107, + "grad_norm": 4.5020670890808105, + "learning_rate": 3.707545719993853e-06, + "log_odds_chosen": 12.771818161010742, + "log_odds_ratio": -7.950256986077875e-06, + "logits/chosen": -0.4980790615081787, + "logits/rejected": -0.5255163908004761, + "logps/chosen": -0.00014841003576293588, + "logps/rejected": -3.522858142852783, + "loss": 0.5237, + "nll_loss": 0.13092757761478424, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4841003576293588e-05, + "rewards/margins": 0.3522709906101227, + "rewards/rejected": -0.35228586196899414, + "step": 13495 + }, + { + "epoch": 9.333333333333334, + "grad_norm": 2.8230652809143066, + "learning_rate": 3.7037037037037037e-06, + "log_odds_chosen": 10.333768844604492, + "log_odds_ratio": -9.811281051952392e-05, + "logits/chosen": -0.14630591869354248, + "logits/rejected": -0.13303673267364502, + "logps/chosen": -0.00028036831645295024, + "logps/rejected": -1.8686578273773193, + "loss": 0.294, + "nll_loss": 0.07350200414657593, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8036831281497143e-05, + "rewards/margins": 0.18683774769306183, + "rewards/rejected": -0.18686577677726746, + "step": 13496 + }, + { + "epoch": 9.33402489626556, + "grad_norm": 3.8429489135742188, + "learning_rate": 3.699861687413555e-06, + "log_odds_chosen": 11.658157348632812, + "log_odds_ratio": -3.7459019949892536e-05, + "logits/chosen": 0.11421719938516617, + "logits/rejected": 0.05861104279756546, + "logps/chosen": -0.00013772404054179788, + "logps/rejected": -2.393380641937256, + "loss": 0.4082, + "nll_loss": 0.10205702483654022, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3772404599876609e-05, + "rewards/margins": 0.23932427167892456, + "rewards/rejected": -0.23933804035186768, + "step": 13497 + }, + { + "epoch": 9.334716459197788, + "grad_norm": 3.278484582901001, + "learning_rate": 3.696019671123406e-06, + "log_odds_chosen": 10.718673706054688, + "log_odds_ratio": -9.96225280687213e-05, + "logits/chosen": -0.35212522745132446, + "logits/rejected": -0.4008997976779938, + "logps/chosen": -0.0001456753961974755, + "logps/rejected": -1.6912918090820312, + "loss": 0.2434, + "nll_loss": 0.0608292818069458, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.456754034734331e-05, + "rewards/margins": 0.1691146194934845, + "rewards/rejected": -0.16912919282913208, + "step": 13498 + }, + { + "epoch": 9.335408022130014, + "grad_norm": 2.9812114238739014, + "learning_rate": 3.6921776548332564e-06, + "log_odds_chosen": 13.193338394165039, + "log_odds_ratio": -6.2142416936694644e-06, + "logits/chosen": -0.36012908816337585, + "logits/rejected": -0.4613358974456787, + "logps/chosen": -4.835828804061748e-05, + "logps/rejected": -3.165411949157715, + "loss": 0.3395, + "nll_loss": 0.08488430827856064, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.835828804061748e-06, + "rewards/margins": 0.3165363371372223, + "rewards/rejected": -0.3165411651134491, + "step": 13499 + }, + { + "epoch": 9.336099585062241, + "grad_norm": 4.30682897567749, + "learning_rate": 3.6883356385431076e-06, + "log_odds_chosen": 11.123899459838867, + "log_odds_ratio": -2.2451797121902928e-05, + "logits/chosen": -0.2035236656665802, + "logits/rejected": -0.08595597743988037, + "logps/chosen": -0.00021680566715076566, + "logps/rejected": -2.085360288619995, + "loss": 0.496, + "nll_loss": 0.12400057911872864, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1680567442672327e-05, + "rewards/margins": 0.20851436257362366, + "rewards/rejected": -0.2085360288619995, + "step": 13500 + }, + { + "epoch": 9.336791147994468, + "grad_norm": 3.1607930660247803, + "learning_rate": 3.6844936222529588e-06, + "log_odds_chosen": 10.761314392089844, + "log_odds_ratio": -9.914707334246486e-05, + "logits/chosen": -0.08080196380615234, + "logits/rejected": -0.11017942428588867, + "logps/chosen": -0.00016276085807476193, + "logps/rejected": -2.0921876430511475, + "loss": 0.2914, + "nll_loss": 0.07284814119338989, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.627608435228467e-05, + "rewards/margins": 0.20920248329639435, + "rewards/rejected": -0.20921877026557922, + "step": 13501 + }, + { + "epoch": 9.337482710926695, + "grad_norm": 2.8824057579040527, + "learning_rate": 3.6806516059628095e-06, + "log_odds_chosen": 10.971266746520996, + "log_odds_ratio": -3.601726348279044e-05, + "logits/chosen": -0.4004353880882263, + "logits/rejected": -0.43088674545288086, + "logps/chosen": -0.00027602817863225937, + "logps/rejected": -2.114922046661377, + "loss": 0.3238, + "nll_loss": 0.08093824237585068, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.76028185908217e-05, + "rewards/margins": 0.21146461367607117, + "rewards/rejected": -0.21149222552776337, + "step": 13502 + }, + { + "epoch": 9.338174273858922, + "grad_norm": 7.601047992706299, + "learning_rate": 3.6768095896726607e-06, + "log_odds_chosen": 10.525729179382324, + "log_odds_ratio": -4.52158747066278e-05, + "logits/chosen": 0.05261586606502533, + "logits/rejected": 0.04327049106359482, + "logps/chosen": -0.0005245390348136425, + "logps/rejected": -2.4437553882598877, + "loss": 0.6654, + "nll_loss": 0.16633406281471252, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.245389911578968e-05, + "rewards/margins": 0.24432307481765747, + "rewards/rejected": -0.2443755567073822, + "step": 13503 + }, + { + "epoch": 9.338865836791149, + "grad_norm": 3.025954008102417, + "learning_rate": 3.672967573382511e-06, + "log_odds_chosen": 10.200634002685547, + "log_odds_ratio": -0.00014758994802832603, + "logits/chosen": -0.06436628848314285, + "logits/rejected": -0.11504589766263962, + "logps/chosen": -0.0005985183524899185, + "logps/rejected": -1.6737840175628662, + "loss": 0.3746, + "nll_loss": 0.09363247454166412, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9851834521396086e-05, + "rewards/margins": 0.16731856763362885, + "rewards/rejected": -0.16737841069698334, + "step": 13504 + }, + { + "epoch": 9.339557399723375, + "grad_norm": 3.8002161979675293, + "learning_rate": 3.669125557092362e-06, + "log_odds_chosen": 11.298839569091797, + "log_odds_ratio": -3.492705582175404e-05, + "logits/chosen": -0.3517683744430542, + "logits/rejected": -0.37183061242103577, + "logps/chosen": -0.00021029937488492578, + "logps/rejected": -2.008366584777832, + "loss": 0.3739, + "nll_loss": 0.09347833693027496, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1029936760896817e-05, + "rewards/margins": 0.2008156180381775, + "rewards/rejected": -0.200836643576622, + "step": 13505 + }, + { + "epoch": 9.340248962655602, + "grad_norm": 3.177788734436035, + "learning_rate": 3.6652835408022133e-06, + "log_odds_chosen": 10.39039134979248, + "log_odds_ratio": -5.2843250159639865e-05, + "logits/chosen": -0.2426442801952362, + "logits/rejected": -0.34015002846717834, + "logps/chosen": -0.00031000032322481275, + "logps/rejected": -1.9575690031051636, + "loss": 0.3766, + "nll_loss": 0.09413687884807587, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1000032322481275e-05, + "rewards/margins": 0.19572588801383972, + "rewards/rejected": -0.19575689733028412, + "step": 13506 + }, + { + "epoch": 9.340940525587829, + "grad_norm": 2.606773853302002, + "learning_rate": 3.6614415245120637e-06, + "log_odds_chosen": 11.13302230834961, + "log_odds_ratio": -4.279446875443682e-05, + "logits/chosen": -0.6950966715812683, + "logits/rejected": -0.5016571879386902, + "logps/chosen": -0.00021491489314939827, + "logps/rejected": -2.5477190017700195, + "loss": 0.2904, + "nll_loss": 0.07259637862443924, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.149149077013135e-05, + "rewards/margins": 0.25475040078163147, + "rewards/rejected": -0.2547719180583954, + "step": 13507 + }, + { + "epoch": 9.341632088520056, + "grad_norm": 2.2499380111694336, + "learning_rate": 3.657599508221915e-06, + "log_odds_chosen": 11.097082138061523, + "log_odds_ratio": -0.0005031878827139735, + "logits/chosen": -0.5092487335205078, + "logits/rejected": -0.5142608284950256, + "logps/chosen": -0.00017486378783360124, + "logps/rejected": -1.891782283782959, + "loss": 0.2769, + "nll_loss": 0.06917122006416321, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.748637623677496e-05, + "rewards/margins": 0.18916073441505432, + "rewards/rejected": -0.1891782432794571, + "step": 13508 + }, + { + "epoch": 9.342323651452283, + "grad_norm": 3.7919974327087402, + "learning_rate": 3.653757491931766e-06, + "log_odds_chosen": 11.685168266296387, + "log_odds_ratio": -0.0007330195512622595, + "logits/chosen": -0.24235190451145172, + "logits/rejected": -0.3833863437175751, + "logps/chosen": -0.0025347373448312283, + "logps/rejected": -3.4914391040802, + "loss": 0.3644, + "nll_loss": 0.09102737158536911, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002534736995585263, + "rewards/margins": 0.34889042377471924, + "rewards/rejected": -0.3491438925266266, + "step": 13509 + }, + { + "epoch": 9.34301521438451, + "grad_norm": 1.976906657218933, + "learning_rate": 3.6499154756416168e-06, + "log_odds_chosen": 9.503071784973145, + "log_odds_ratio": -0.0006813781219534576, + "logits/chosen": 0.028965137898921967, + "logits/rejected": -0.10996140539646149, + "logps/chosen": -0.0010135057382285595, + "logps/rejected": -1.596954584121704, + "loss": 0.1912, + "nll_loss": 0.047726936638355255, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010135059710592031, + "rewards/margins": 0.1595941036939621, + "rewards/rejected": -0.15969546139240265, + "step": 13510 + }, + { + "epoch": 9.343706777316736, + "grad_norm": 2.922912120819092, + "learning_rate": 3.646073459351468e-06, + "log_odds_chosen": 10.952808380126953, + "log_odds_ratio": -0.00014453314361162484, + "logits/chosen": -0.34083905816078186, + "logits/rejected": -0.3925648629665375, + "logps/chosen": -0.00035240783472545445, + "logps/rejected": -2.0025248527526855, + "loss": 0.2737, + "nll_loss": 0.0683998242020607, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5240784200141206e-05, + "rewards/margins": 0.20021723210811615, + "rewards/rejected": -0.2002524733543396, + "step": 13511 + }, + { + "epoch": 9.344398340248963, + "grad_norm": 3.6394662857055664, + "learning_rate": 3.642231443061319e-06, + "log_odds_chosen": 9.833059310913086, + "log_odds_ratio": -0.0003041258896701038, + "logits/chosen": -0.010110607370734215, + "logits/rejected": -0.1232147365808487, + "logps/chosen": -0.0005445755086839199, + "logps/rejected": -1.7158136367797852, + "loss": 0.4296, + "nll_loss": 0.1073751449584961, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4457548685604706e-05, + "rewards/margins": 0.17152690887451172, + "rewards/rejected": -0.17158135771751404, + "step": 13512 + }, + { + "epoch": 9.34508990318119, + "grad_norm": 3.334577798843384, + "learning_rate": 3.6383894267711694e-06, + "log_odds_chosen": 10.367897033691406, + "log_odds_ratio": -0.0002346929832128808, + "logits/chosen": -0.906934380531311, + "logits/rejected": -0.8934662938117981, + "logps/chosen": -0.0001801918842829764, + "logps/rejected": -1.6105437278747559, + "loss": 0.5023, + "nll_loss": 0.12554189562797546, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.80191891558934e-05, + "rewards/margins": 0.16103635728359222, + "rewards/rejected": -0.16105437278747559, + "step": 13513 + }, + { + "epoch": 9.345781466113417, + "grad_norm": 5.024738788604736, + "learning_rate": 3.6345474104810206e-06, + "log_odds_chosen": 12.38737964630127, + "log_odds_ratio": -4.9194541134056635e-06, + "logits/chosen": -0.4815788269042969, + "logits/rejected": -0.5164158344268799, + "logps/chosen": -0.0001025611418299377, + "logps/rejected": -3.0465731620788574, + "loss": 0.694, + "nll_loss": 0.17351019382476807, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0256115274387412e-05, + "rewards/margins": 0.3046470582485199, + "rewards/rejected": -0.30465734004974365, + "step": 13514 + }, + { + "epoch": 9.346473029045644, + "grad_norm": 3.6809616088867188, + "learning_rate": 3.6307053941908718e-06, + "log_odds_chosen": 11.304495811462402, + "log_odds_ratio": -0.00010354755795560777, + "logits/chosen": -0.08116520941257477, + "logits/rejected": -0.15369859337806702, + "logps/chosen": -0.00045098719419911504, + "logps/rejected": -2.8794734477996826, + "loss": 0.4327, + "nll_loss": 0.10816633701324463, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.509872087510303e-05, + "rewards/margins": 0.28790223598480225, + "rewards/rejected": -0.2879473567008972, + "step": 13515 + }, + { + "epoch": 9.34716459197787, + "grad_norm": 3.3048770427703857, + "learning_rate": 3.626863377900722e-06, + "log_odds_chosen": 10.46618366241455, + "log_odds_ratio": -0.00015984366473276168, + "logits/chosen": -0.006419524550437927, + "logits/rejected": -0.06340011954307556, + "logps/chosen": -0.00023882980167400092, + "logps/rejected": -1.9816560745239258, + "loss": 0.3738, + "nll_loss": 0.09342585504055023, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.388297980360221e-05, + "rewards/margins": 0.19814172387123108, + "rewards/rejected": -0.198165625333786, + "step": 13516 + }, + { + "epoch": 9.347856154910097, + "grad_norm": 3.3105244636535645, + "learning_rate": 3.6230213616105733e-06, + "log_odds_chosen": 10.370229721069336, + "log_odds_ratio": -0.0002457791124470532, + "logits/chosen": -0.4260295629501343, + "logits/rejected": -0.5149073600769043, + "logps/chosen": -0.0004372486437205225, + "logps/rejected": -1.9320878982543945, + "loss": 0.3102, + "nll_loss": 0.0775316059589386, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.372486364445649e-05, + "rewards/margins": 0.19316506385803223, + "rewards/rejected": -0.19320878386497498, + "step": 13517 + }, + { + "epoch": 9.348547717842324, + "grad_norm": 3.5203912258148193, + "learning_rate": 3.6191793453204245e-06, + "log_odds_chosen": 10.76219654083252, + "log_odds_ratio": -5.4000083764549345e-05, + "logits/chosen": -0.07482422888278961, + "logits/rejected": -0.11013496667146683, + "logps/chosen": -0.00028690596809610724, + "logps/rejected": -2.247020721435547, + "loss": 0.6432, + "nll_loss": 0.1607947051525116, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.869059971999377e-05, + "rewards/margins": 0.22467339038848877, + "rewards/rejected": -0.22470206022262573, + "step": 13518 + }, + { + "epoch": 9.349239280774551, + "grad_norm": 3.769413471221924, + "learning_rate": 3.615337329030275e-06, + "log_odds_chosen": 10.713618278503418, + "log_odds_ratio": -0.00011112882202723995, + "logits/chosen": -0.492983341217041, + "logits/rejected": -0.34525761008262634, + "logps/chosen": -0.00021097969147376716, + "logps/rejected": -1.9031600952148438, + "loss": 0.5288, + "nll_loss": 0.1321868896484375, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.109797060256824e-05, + "rewards/margins": 0.19029492139816284, + "rewards/rejected": -0.19031602144241333, + "step": 13519 + }, + { + "epoch": 9.349930843706778, + "grad_norm": 3.34236741065979, + "learning_rate": 3.6114953127401264e-06, + "log_odds_chosen": 12.383539199829102, + "log_odds_ratio": -1.4433892829401884e-05, + "logits/chosen": 0.05255540832877159, + "logits/rejected": -0.03551376610994339, + "logps/chosen": -6.500162271549925e-05, + "logps/rejected": -2.6960530281066895, + "loss": 0.3563, + "nll_loss": 0.08907345682382584, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.500162726297276e-06, + "rewards/margins": 0.2695987820625305, + "rewards/rejected": -0.26960527896881104, + "step": 13520 + }, + { + "epoch": 9.350622406639005, + "grad_norm": 2.60010027885437, + "learning_rate": 3.6076532964499767e-06, + "log_odds_chosen": 9.934788703918457, + "log_odds_ratio": -0.0001642591378185898, + "logits/chosen": -0.22894614934921265, + "logits/rejected": -0.28230470418930054, + "logps/chosen": -0.000867595721501857, + "logps/rejected": -1.7370736598968506, + "loss": 0.2212, + "nll_loss": 0.055284351110458374, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.675957360537723e-05, + "rewards/margins": 0.17362061142921448, + "rewards/rejected": -0.17370736598968506, + "step": 13521 + }, + { + "epoch": 9.351313969571232, + "grad_norm": 3.1682677268981934, + "learning_rate": 3.603811280159828e-06, + "log_odds_chosen": 10.130863189697266, + "log_odds_ratio": -0.0001920114445965737, + "logits/chosen": -0.4609912633895874, + "logits/rejected": -0.5363480448722839, + "logps/chosen": -0.0002744471130426973, + "logps/rejected": -1.774971604347229, + "loss": 0.274, + "nll_loss": 0.06847333163022995, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7444712031865492e-05, + "rewards/margins": 0.17746974527835846, + "rewards/rejected": -0.17749716341495514, + "step": 13522 + }, + { + "epoch": 9.352005532503458, + "grad_norm": 3.280946731567383, + "learning_rate": 3.599969263869679e-06, + "log_odds_chosen": 12.321030616760254, + "log_odds_ratio": -7.711152647971176e-06, + "logits/chosen": -0.28302809596061707, + "logits/rejected": -0.3848038911819458, + "logps/chosen": -0.0001048395351972431, + "logps/rejected": -2.958585262298584, + "loss": 0.3042, + "nll_loss": 0.07606004178524017, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0483954611117952e-05, + "rewards/margins": 0.29584801197052, + "rewards/rejected": -0.2958585023880005, + "step": 13523 + }, + { + "epoch": 9.352697095435685, + "grad_norm": 7.865887641906738, + "learning_rate": 3.59612724757953e-06, + "log_odds_chosen": 11.32748031616211, + "log_odds_ratio": -1.6802272511995398e-05, + "logits/chosen": -0.3086088001728058, + "logits/rejected": -0.2242552787065506, + "logps/chosen": -0.000512672879267484, + "logps/rejected": -2.8091330528259277, + "loss": 0.6089, + "nll_loss": 0.15222682058811188, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1267288654344156e-05, + "rewards/margins": 0.280862033367157, + "rewards/rejected": -0.2809132933616638, + "step": 13524 + }, + { + "epoch": 9.353388658367912, + "grad_norm": 3.4927072525024414, + "learning_rate": 3.592285231289381e-06, + "log_odds_chosen": 10.934383392333984, + "log_odds_ratio": -0.0001778034056769684, + "logits/chosen": -0.05815959721803665, + "logits/rejected": -0.07613163441419601, + "logps/chosen": -0.0001998421794269234, + "logps/rejected": -2.3929872512817383, + "loss": 0.2906, + "nll_loss": 0.07262570410966873, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.998421794269234e-05, + "rewards/margins": 0.23927873373031616, + "rewards/rejected": -0.2392987310886383, + "step": 13525 + }, + { + "epoch": 9.354080221300139, + "grad_norm": 2.775463342666626, + "learning_rate": 3.588443214999232e-06, + "log_odds_chosen": 11.355659484863281, + "log_odds_ratio": -0.0002898540406022221, + "logits/chosen": -0.28086039423942566, + "logits/rejected": -0.30784451961517334, + "logps/chosen": -0.00010843494965229183, + "logps/rejected": -2.1209423542022705, + "loss": 0.3015, + "nll_loss": 0.07533597201108932, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0843496056622826e-05, + "rewards/margins": 0.2120833843946457, + "rewards/rejected": -0.21209421753883362, + "step": 13526 + }, + { + "epoch": 9.354771784232366, + "grad_norm": 3.300349235534668, + "learning_rate": 3.5846011987090825e-06, + "log_odds_chosen": 10.913179397583008, + "log_odds_ratio": -3.2291423849528655e-05, + "logits/chosen": -0.29905587434768677, + "logits/rejected": -0.41569286584854126, + "logps/chosen": -0.00027541533927433193, + "logps/rejected": -2.266528367996216, + "loss": 0.2815, + "nll_loss": 0.07037490606307983, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.754153683781624e-05, + "rewards/margins": 0.22662532329559326, + "rewards/rejected": -0.2266528606414795, + "step": 13527 + }, + { + "epoch": 9.355463347164592, + "grad_norm": 3.9342613220214844, + "learning_rate": 3.5807591824189336e-06, + "log_odds_chosen": 11.960512161254883, + "log_odds_ratio": -1.677579894021619e-05, + "logits/chosen": 0.3055367171764374, + "logits/rejected": 0.2745853662490845, + "logps/chosen": -0.0001570849126437679, + "logps/rejected": -2.8881330490112305, + "loss": 0.4841, + "nll_loss": 0.12102716416120529, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.570849235577043e-05, + "rewards/margins": 0.2887975871562958, + "rewards/rejected": -0.2888132929801941, + "step": 13528 + }, + { + "epoch": 9.35615491009682, + "grad_norm": 2.7532241344451904, + "learning_rate": 3.576917166128785e-06, + "log_odds_chosen": 10.52255630493164, + "log_odds_ratio": -0.00011265225475654006, + "logits/chosen": -0.18711841106414795, + "logits/rejected": -0.18893063068389893, + "logps/chosen": -0.0004456713213585317, + "logps/rejected": -1.7828774452209473, + "loss": 0.2779, + "nll_loss": 0.06947155296802521, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4567128497874364e-05, + "rewards/margins": 0.17824319005012512, + "rewards/rejected": -0.17828774452209473, + "step": 13529 + }, + { + "epoch": 9.356846473029046, + "grad_norm": 3.0096664428710938, + "learning_rate": 3.573075149838635e-06, + "log_odds_chosen": 10.857948303222656, + "log_odds_ratio": -4.2508509068284184e-05, + "logits/chosen": -0.3942891061306, + "logits/rejected": -0.47457262873649597, + "logps/chosen": -0.00019530697318259627, + "logps/rejected": -1.931833028793335, + "loss": 0.4284, + "nll_loss": 0.10710678994655609, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9530696590663865e-05, + "rewards/margins": 0.19316376745700836, + "rewards/rejected": -0.1931833028793335, + "step": 13530 + }, + { + "epoch": 9.357538035961273, + "grad_norm": 3.6303272247314453, + "learning_rate": 3.5692331335484863e-06, + "log_odds_chosen": 12.418657302856445, + "log_odds_ratio": -9.459313332627062e-06, + "logits/chosen": -0.06569956243038177, + "logits/rejected": -0.25190234184265137, + "logps/chosen": -5.5378652177751064e-05, + "logps/rejected": -2.470158100128174, + "loss": 0.5091, + "nll_loss": 0.12726396322250366, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.537865490623517e-06, + "rewards/margins": 0.24701027572155, + "rewards/rejected": -0.2470158189535141, + "step": 13531 + }, + { + "epoch": 9.3582295988935, + "grad_norm": 2.818519115447998, + "learning_rate": 3.5653911172583375e-06, + "log_odds_chosen": 10.561389923095703, + "log_odds_ratio": -8.231533138314262e-05, + "logits/chosen": -0.40165674686431885, + "logits/rejected": -0.47748011350631714, + "logps/chosen": -0.00016759091522544622, + "logps/rejected": -1.9250514507293701, + "loss": 0.3667, + "nll_loss": 0.09166901558637619, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6759091522544622e-05, + "rewards/margins": 0.1924883872270584, + "rewards/rejected": -0.1925051361322403, + "step": 13532 + }, + { + "epoch": 9.358921161825727, + "grad_norm": 2.5160977840423584, + "learning_rate": 3.5615491009681882e-06, + "log_odds_chosen": 10.444723129272461, + "log_odds_ratio": -0.00020006597333122045, + "logits/chosen": -0.13105201721191406, + "logits/rejected": -0.04617917537689209, + "logps/chosen": -0.0007053934969007969, + "logps/rejected": -1.9502530097961426, + "loss": 0.2094, + "nll_loss": 0.05233754962682724, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.053935405565426e-05, + "rewards/margins": 0.1949547529220581, + "rewards/rejected": -0.19502530992031097, + "step": 13533 + }, + { + "epoch": 9.359612724757953, + "grad_norm": 3.0994884967803955, + "learning_rate": 3.5577070846780394e-06, + "log_odds_chosen": 10.954906463623047, + "log_odds_ratio": -4.9613285227678716e-05, + "logits/chosen": -0.5060032606124878, + "logits/rejected": -0.5835158824920654, + "logps/chosen": -0.00023126085579860955, + "logps/rejected": -2.014693260192871, + "loss": 0.3486, + "nll_loss": 0.08713746815919876, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3126085579860955e-05, + "rewards/margins": 0.20144620537757874, + "rewards/rejected": -0.2014693170785904, + "step": 13534 + }, + { + "epoch": 9.36030428769018, + "grad_norm": 3.6979379653930664, + "learning_rate": 3.5538650683878906e-06, + "log_odds_chosen": 10.76142406463623, + "log_odds_ratio": -8.735790470382199e-05, + "logits/chosen": -0.24934163689613342, + "logits/rejected": -0.2001289278268814, + "logps/chosen": -0.0001878843322629109, + "logps/rejected": -2.0889670848846436, + "loss": 0.524, + "nll_loss": 0.13099606335163116, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8788434317684732e-05, + "rewards/margins": 0.20887792110443115, + "rewards/rejected": -0.2088966965675354, + "step": 13535 + }, + { + "epoch": 9.360995850622407, + "grad_norm": 3.5815634727478027, + "learning_rate": 3.550023052097741e-06, + "log_odds_chosen": 11.135117530822754, + "log_odds_ratio": -5.961426722933538e-05, + "logits/chosen": -0.32283806800842285, + "logits/rejected": -0.3588043749332428, + "logps/chosen": -0.0002041382249444723, + "logps/rejected": -2.0124354362487793, + "loss": 0.4955, + "nll_loss": 0.12385807931423187, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.041382140305359e-05, + "rewards/margins": 0.20122313499450684, + "rewards/rejected": -0.2012435495853424, + "step": 13536 + }, + { + "epoch": 9.361687413554634, + "grad_norm": 3.3385045528411865, + "learning_rate": 3.546181035807592e-06, + "log_odds_chosen": 10.644510269165039, + "log_odds_ratio": -4.055405588587746e-05, + "logits/chosen": 0.35951119661331177, + "logits/rejected": 0.1921553611755371, + "logps/chosen": -0.00018380882102064788, + "logps/rejected": -1.9298346042633057, + "loss": 0.2987, + "nll_loss": 0.07466232776641846, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.838088246586267e-05, + "rewards/margins": 0.1929650753736496, + "rewards/rejected": -0.192983478307724, + "step": 13537 + }, + { + "epoch": 9.36237897648686, + "grad_norm": 2.018005847930908, + "learning_rate": 3.5423390195174424e-06, + "log_odds_chosen": 10.15301513671875, + "log_odds_ratio": -0.00021057360572740436, + "logits/chosen": -0.2207718789577484, + "logits/rejected": -0.31473612785339355, + "logps/chosen": -0.00041511974995955825, + "logps/rejected": -2.0513081550598145, + "loss": 0.2247, + "nll_loss": 0.056156452745199203, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.151197936153039e-05, + "rewards/margins": 0.2050892859697342, + "rewards/rejected": -0.20513081550598145, + "step": 13538 + }, + { + "epoch": 9.363070539419088, + "grad_norm": 4.0689897537231445, + "learning_rate": 3.5384970032272936e-06, + "log_odds_chosen": 9.644641876220703, + "log_odds_ratio": -0.0006806966848671436, + "logits/chosen": -0.6376623511314392, + "logits/rejected": -0.7098978161811829, + "logps/chosen": -0.0008343122899532318, + "logps/rejected": -1.6593842506408691, + "loss": 0.3388, + "nll_loss": 0.08463947474956512, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.343123772647232e-05, + "rewards/margins": 0.16585499048233032, + "rewards/rejected": -0.16593842208385468, + "step": 13539 + }, + { + "epoch": 9.363762102351314, + "grad_norm": 3.441516876220703, + "learning_rate": 3.5346549869371448e-06, + "log_odds_chosen": 12.501133918762207, + "log_odds_ratio": -1.264120055566309e-05, + "logits/chosen": 0.14218124747276306, + "logits/rejected": 0.021220367401838303, + "logps/chosen": -0.00011060711403843015, + "logps/rejected": -3.2021191120147705, + "loss": 0.32, + "nll_loss": 0.07998687773942947, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1060712495236658e-05, + "rewards/margins": 0.3202008605003357, + "rewards/rejected": -0.32021191716194153, + "step": 13540 + }, + { + "epoch": 9.364453665283541, + "grad_norm": 3.1656370162963867, + "learning_rate": 3.5308129706469955e-06, + "log_odds_chosen": 9.825788497924805, + "log_odds_ratio": -0.0001689522177912295, + "logits/chosen": -0.1688910722732544, + "logits/rejected": -0.07189778238534927, + "logps/chosen": -0.0009861242724582553, + "logps/rejected": -1.7967033386230469, + "loss": 0.2544, + "nll_loss": 0.06358574330806732, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.861243597697467e-05, + "rewards/margins": 0.1795717179775238, + "rewards/rejected": -0.1796703338623047, + "step": 13541 + }, + { + "epoch": 9.365145228215768, + "grad_norm": 3.3450043201446533, + "learning_rate": 3.5269709543568467e-06, + "log_odds_chosen": 12.491061210632324, + "log_odds_ratio": -6.405562089639716e-06, + "logits/chosen": -0.25810593366622925, + "logits/rejected": -0.314283549785614, + "logps/chosen": -4.827147495234385e-05, + "logps/rejected": -2.621676445007324, + "loss": 0.4573, + "nll_loss": 0.11432111263275146, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.827148131880676e-06, + "rewards/margins": 0.26216280460357666, + "rewards/rejected": -0.26216763257980347, + "step": 13542 + }, + { + "epoch": 9.365836791147995, + "grad_norm": 2.7860682010650635, + "learning_rate": 3.523128938066698e-06, + "log_odds_chosen": 11.803909301757812, + "log_odds_ratio": -1.233218517882051e-05, + "logits/chosen": -0.45608076453208923, + "logits/rejected": -0.5399580597877502, + "logps/chosen": -5.7952653151005507e-05, + "logps/rejected": -1.9664535522460938, + "loss": 0.3966, + "nll_loss": 0.09915214031934738, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.79526476940373e-06, + "rewards/margins": 0.19663956761360168, + "rewards/rejected": -0.1966453492641449, + "step": 13543 + }, + { + "epoch": 9.366528354080222, + "grad_norm": 13.490571975708008, + "learning_rate": 3.519286921776548e-06, + "log_odds_chosen": 10.731107711791992, + "log_odds_ratio": -4.895938764093444e-05, + "logits/chosen": -0.1185360997915268, + "logits/rejected": -0.2674499750137329, + "logps/chosen": -0.00047680726856924593, + "logps/rejected": -2.3153059482574463, + "loss": 0.4476, + "nll_loss": 0.11190719902515411, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.768072540173307e-05, + "rewards/margins": 0.23148292303085327, + "rewards/rejected": -0.23153060674667358, + "step": 13544 + }, + { + "epoch": 9.367219917012449, + "grad_norm": 3.817535638809204, + "learning_rate": 3.5154449054863993e-06, + "log_odds_chosen": 10.996612548828125, + "log_odds_ratio": -0.0001471824652981013, + "logits/chosen": 0.03633652627468109, + "logits/rejected": -0.11121993511915207, + "logps/chosen": -0.00016094991588033736, + "logps/rejected": -2.0416789054870605, + "loss": 0.5539, + "nll_loss": 0.1384541392326355, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.609499304322526e-05, + "rewards/margins": 0.20415179431438446, + "rewards/rejected": -0.20416787266731262, + "step": 13545 + }, + { + "epoch": 9.367911479944675, + "grad_norm": 2.8662376403808594, + "learning_rate": 3.5116028891962505e-06, + "log_odds_chosen": 10.102569580078125, + "log_odds_ratio": -8.055663056438789e-05, + "logits/chosen": -0.33110880851745605, + "logits/rejected": -0.3435867130756378, + "logps/chosen": -0.00019017732120119035, + "logps/rejected": -1.3068668842315674, + "loss": 0.2805, + "nll_loss": 0.07012133300304413, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.901773430290632e-05, + "rewards/margins": 0.13066768646240234, + "rewards/rejected": -0.1306867003440857, + "step": 13546 + }, + { + "epoch": 9.368603042876902, + "grad_norm": 4.5606770515441895, + "learning_rate": 3.5077608729061013e-06, + "log_odds_chosen": 10.703825950622559, + "log_odds_ratio": -0.00022275917581282556, + "logits/chosen": -0.19523394107818604, + "logits/rejected": -0.2622426152229309, + "logps/chosen": -0.0007412299746647477, + "logps/rejected": -2.1645567417144775, + "loss": 0.5679, + "nll_loss": 0.14194399118423462, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.412301056319848e-05, + "rewards/margins": 0.21638154983520508, + "rewards/rejected": -0.21645568311214447, + "step": 13547 + }, + { + "epoch": 9.369294605809129, + "grad_norm": 2.658365249633789, + "learning_rate": 3.5039188566159524e-06, + "log_odds_chosen": 11.70455551147461, + "log_odds_ratio": -2.6133679057238623e-05, + "logits/chosen": -0.3863418698310852, + "logits/rejected": -0.41382431983947754, + "logps/chosen": -0.00024077700800262392, + "logps/rejected": -2.854800224304199, + "loss": 0.3191, + "nll_loss": 0.07977981120347977, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.407770080026239e-05, + "rewards/margins": 0.28545594215393066, + "rewards/rejected": -0.2854800224304199, + "step": 13548 + }, + { + "epoch": 9.369986168741356, + "grad_norm": 3.614183187484741, + "learning_rate": 3.5000768403258036e-06, + "log_odds_chosen": 10.417539596557617, + "log_odds_ratio": -0.0005081373383291066, + "logits/chosen": -0.12346908450126648, + "logits/rejected": -0.2217111736536026, + "logps/chosen": -0.0017699197633191943, + "logps/rejected": -2.1381521224975586, + "loss": 0.3942, + "nll_loss": 0.09849061071872711, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017699197633191943, + "rewards/margins": 0.21363824605941772, + "rewards/rejected": -0.21381521224975586, + "step": 13549 + }, + { + "epoch": 9.370677731673583, + "grad_norm": 2.7907590866088867, + "learning_rate": 3.496234824035654e-06, + "log_odds_chosen": 10.472977638244629, + "log_odds_ratio": -0.00026426269323565066, + "logits/chosen": -0.19486777484416962, + "logits/rejected": -0.1657772958278656, + "logps/chosen": -0.0003529631649143994, + "logps/rejected": -1.6553034782409668, + "loss": 0.3223, + "nll_loss": 0.08055949956178665, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.529631794663146e-05, + "rewards/margins": 0.16549506783485413, + "rewards/rejected": -0.16553035378456116, + "step": 13550 + }, + { + "epoch": 9.37136929460581, + "grad_norm": 3.4401087760925293, + "learning_rate": 3.492392807745505e-06, + "log_odds_chosen": 11.224440574645996, + "log_odds_ratio": -9.510615927865729e-05, + "logits/chosen": -0.26112285256385803, + "logits/rejected": -0.26466330885887146, + "logps/chosen": -0.0003577464958652854, + "logps/rejected": -2.475301742553711, + "loss": 0.3851, + "nll_loss": 0.09625741094350815, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5774646676145494e-05, + "rewards/margins": 0.2474943995475769, + "rewards/rejected": -0.24753017723560333, + "step": 13551 + }, + { + "epoch": 9.372060857538036, + "grad_norm": 3.604793071746826, + "learning_rate": 3.4885507914553563e-06, + "log_odds_chosen": 11.031092643737793, + "log_odds_ratio": -0.0006941432366147637, + "logits/chosen": -0.0888419970870018, + "logits/rejected": -0.19537392258644104, + "logps/chosen": -0.0005400891532190144, + "logps/rejected": -2.552731990814209, + "loss": 0.8016, + "nll_loss": 0.20033778250217438, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4008916777092963e-05, + "rewards/margins": 0.2552191913127899, + "rewards/rejected": -0.2552731931209564, + "step": 13552 + }, + { + "epoch": 9.372752420470263, + "grad_norm": 2.4608335494995117, + "learning_rate": 3.4847087751652066e-06, + "log_odds_chosen": 11.443750381469727, + "log_odds_ratio": -1.4160999853629619e-05, + "logits/chosen": -0.269876092672348, + "logits/rejected": -0.3958069086074829, + "logps/chosen": -5.696548760170117e-05, + "logps/rejected": -1.4379843473434448, + "loss": 0.2563, + "nll_loss": 0.06408420950174332, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.696549123967998e-06, + "rewards/margins": 0.14379273355007172, + "rewards/rejected": -0.14379842579364777, + "step": 13553 + }, + { + "epoch": 9.37344398340249, + "grad_norm": 4.183465480804443, + "learning_rate": 3.4808667588750578e-06, + "log_odds_chosen": 10.022726058959961, + "log_odds_ratio": -0.0005112159997224808, + "logits/chosen": -0.361375629901886, + "logits/rejected": -0.33961185812950134, + "logps/chosen": -0.0018962868489325047, + "logps/rejected": -2.9116106033325195, + "loss": 0.3587, + "nll_loss": 0.0896359458565712, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00018962868489325047, + "rewards/margins": 0.29097142815589905, + "rewards/rejected": -0.29116106033325195, + "step": 13554 + }, + { + "epoch": 9.374135546334717, + "grad_norm": 3.236473798751831, + "learning_rate": 3.4770247425849085e-06, + "log_odds_chosen": 11.414098739624023, + "log_odds_ratio": -3.5743047192227095e-05, + "logits/chosen": -0.10795624554157257, + "logits/rejected": -0.2149205058813095, + "logps/chosen": -0.00015265934052877128, + "logps/rejected": -2.235002279281616, + "loss": 0.4326, + "nll_loss": 0.108148954808712, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.526593587186653e-05, + "rewards/margins": 0.22348496317863464, + "rewards/rejected": -0.22350022196769714, + "step": 13555 + }, + { + "epoch": 9.374827109266944, + "grad_norm": 2.6837217807769775, + "learning_rate": 3.4731827262947597e-06, + "log_odds_chosen": 11.048116683959961, + "log_odds_ratio": -7.135642954381183e-05, + "logits/chosen": -0.4107449948787689, + "logits/rejected": -0.4265679717063904, + "logps/chosen": -0.00013635572395287454, + "logps/rejected": -1.8748329877853394, + "loss": 0.304, + "nll_loss": 0.0759982243180275, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3635572940984275e-05, + "rewards/margins": 0.18746967613697052, + "rewards/rejected": -0.1874833106994629, + "step": 13556 + }, + { + "epoch": 9.37551867219917, + "grad_norm": 3.1047263145446777, + "learning_rate": 3.469340710004611e-06, + "log_odds_chosen": 12.321937561035156, + "log_odds_ratio": -2.3595224774908274e-05, + "logits/chosen": -0.13789021968841553, + "logits/rejected": -0.3051018714904785, + "logps/chosen": -0.00010697266407078132, + "logps/rejected": -3.1257381439208984, + "loss": 0.3415, + "nll_loss": 0.08536285907030106, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.069726567948237e-05, + "rewards/margins": 0.31256312131881714, + "rewards/rejected": -0.31257379055023193, + "step": 13557 + }, + { + "epoch": 9.376210235131397, + "grad_norm": 2.6061549186706543, + "learning_rate": 3.465498693714461e-06, + "log_odds_chosen": 10.358039855957031, + "log_odds_ratio": -0.00012019602581858635, + "logits/chosen": -0.07744506001472473, + "logits/rejected": -0.11549359560012817, + "logps/chosen": -0.0004092513117939234, + "logps/rejected": -1.9624541997909546, + "loss": 0.2435, + "nll_loss": 0.060864921659231186, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.092513336217962e-05, + "rewards/margins": 0.1962045133113861, + "rewards/rejected": -0.19624543190002441, + "step": 13558 + }, + { + "epoch": 9.376901798063624, + "grad_norm": 3.5974655151367188, + "learning_rate": 3.4616566774243124e-06, + "log_odds_chosen": 10.450204849243164, + "log_odds_ratio": -0.00016834316193126142, + "logits/chosen": -0.2307269126176834, + "logits/rejected": -0.2927913963794708, + "logps/chosen": -0.00027439341647550464, + "logps/rejected": -1.9489752054214478, + "loss": 0.2947, + "nll_loss": 0.07364586740732193, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7439338737167418e-05, + "rewards/margins": 0.19487008452415466, + "rewards/rejected": -0.19489750266075134, + "step": 13559 + }, + { + "epoch": 9.377593360995851, + "grad_norm": 3.8792929649353027, + "learning_rate": 3.4578146611341635e-06, + "log_odds_chosen": 11.759347915649414, + "log_odds_ratio": -1.827812593546696e-05, + "logits/chosen": -0.21874378621578217, + "logits/rejected": -0.2008637636899948, + "logps/chosen": -0.00014112150529399514, + "logps/rejected": -2.4289422035217285, + "loss": 0.4251, + "nll_loss": 0.10627898573875427, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4112150893197395e-05, + "rewards/margins": 0.24288010597229004, + "rewards/rejected": -0.2428942322731018, + "step": 13560 + }, + { + "epoch": 9.378284923928078, + "grad_norm": 2.9272608757019043, + "learning_rate": 3.453972644844014e-06, + "log_odds_chosen": 10.84752082824707, + "log_odds_ratio": -5.029505700804293e-05, + "logits/chosen": -0.13805429637432098, + "logits/rejected": -0.06397004425525665, + "logps/chosen": -0.00026461438392288983, + "logps/rejected": -2.0349204540252686, + "loss": 0.4125, + "nll_loss": 0.10310757160186768, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.646143730089534e-05, + "rewards/margins": 0.20346559584140778, + "rewards/rejected": -0.20349204540252686, + "step": 13561 + }, + { + "epoch": 9.378976486860305, + "grad_norm": 3.037715435028076, + "learning_rate": 3.450130628553865e-06, + "log_odds_chosen": 9.759491920471191, + "log_odds_ratio": -0.0010099021019414067, + "logits/chosen": -0.31357407569885254, + "logits/rejected": -0.20637246966362, + "logps/chosen": -0.0014631549129262567, + "logps/rejected": -1.0898208618164062, + "loss": 0.3014, + "nll_loss": 0.0752379521727562, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001463155058445409, + "rewards/margins": 0.10883576422929764, + "rewards/rejected": -0.10898207128047943, + "step": 13562 + }, + { + "epoch": 9.379668049792532, + "grad_norm": 3.0630035400390625, + "learning_rate": 3.4462886122637162e-06, + "log_odds_chosen": 10.501922607421875, + "log_odds_ratio": -6.71866800985299e-05, + "logits/chosen": -0.47390761971473694, + "logits/rejected": -0.532874345779419, + "logps/chosen": -0.00016098772175610065, + "logps/rejected": -1.3961749076843262, + "loss": 0.339, + "nll_loss": 0.08475200831890106, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6098772903205827e-05, + "rewards/margins": 0.13960139453411102, + "rewards/rejected": -0.13961750268936157, + "step": 13563 + }, + { + "epoch": 9.380359612724758, + "grad_norm": 2.68406081199646, + "learning_rate": 3.442446595973567e-06, + "log_odds_chosen": 10.071020126342773, + "log_odds_ratio": -6.841091817477718e-05, + "logits/chosen": -0.2392948567867279, + "logits/rejected": -0.16483944654464722, + "logps/chosen": -0.0002607560600154102, + "logps/rejected": -1.6807191371917725, + "loss": 0.2379, + "nll_loss": 0.05947023630142212, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6075604182551615e-05, + "rewards/margins": 0.16804584860801697, + "rewards/rejected": -0.168071910738945, + "step": 13564 + }, + { + "epoch": 9.381051175656985, + "grad_norm": 3.561326503753662, + "learning_rate": 3.438604579683418e-06, + "log_odds_chosen": 10.621723175048828, + "log_odds_ratio": -6.102824772824533e-05, + "logits/chosen": -0.6733594536781311, + "logits/rejected": -0.7504304647445679, + "logps/chosen": -0.0002749867853708565, + "logps/rejected": -1.9878876209259033, + "loss": 0.4111, + "nll_loss": 0.10276590287685394, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.749867780948989e-05, + "rewards/margins": 0.19876126945018768, + "rewards/rejected": -0.19878877699375153, + "step": 13565 + }, + { + "epoch": 9.381742738589212, + "grad_norm": 3.828990936279297, + "learning_rate": 3.4347625633932693e-06, + "log_odds_chosen": 10.754388809204102, + "log_odds_ratio": -6.629389827139676e-05, + "logits/chosen": 0.1312224566936493, + "logits/rejected": 0.031041786074638367, + "logps/chosen": -0.00022781691222917289, + "logps/rejected": -2.1744894981384277, + "loss": 0.443, + "nll_loss": 0.11074355244636536, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2781690859119408e-05, + "rewards/margins": 0.21742618083953857, + "rewards/rejected": -0.21744894981384277, + "step": 13566 + }, + { + "epoch": 9.382434301521439, + "grad_norm": 4.1905012130737305, + "learning_rate": 3.4309205471031196e-06, + "log_odds_chosen": 11.427661895751953, + "log_odds_ratio": -2.659208985278383e-05, + "logits/chosen": -0.31396639347076416, + "logits/rejected": -0.3941642940044403, + "logps/chosen": -0.00021044274035375565, + "logps/rejected": -2.6817564964294434, + "loss": 0.3957, + "nll_loss": 0.09892689436674118, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1044272216386162e-05, + "rewards/margins": 0.2681546211242676, + "rewards/rejected": -0.2681756615638733, + "step": 13567 + }, + { + "epoch": 9.383125864453666, + "grad_norm": 3.7993035316467285, + "learning_rate": 3.427078530812971e-06, + "log_odds_chosen": 10.846572875976562, + "log_odds_ratio": -3.981238114647567e-05, + "logits/chosen": -0.326119065284729, + "logits/rejected": -0.3857240378856659, + "logps/chosen": -0.0003091458638664335, + "logps/rejected": -1.9830150604248047, + "loss": 0.3483, + "nll_loss": 0.08706822991371155, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.091458711423911e-05, + "rewards/margins": 0.19827060401439667, + "rewards/rejected": -0.1983015239238739, + "step": 13568 + }, + { + "epoch": 9.383817427385893, + "grad_norm": 3.793304443359375, + "learning_rate": 3.423236514522822e-06, + "log_odds_chosen": 11.753762245178223, + "log_odds_ratio": -0.00012055758270435035, + "logits/chosen": -0.368549644947052, + "logits/rejected": -0.4517800211906433, + "logps/chosen": -0.00020971563935745507, + "logps/rejected": -2.952338457107544, + "loss": 0.4279, + "nll_loss": 0.10697045177221298, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.097156539093703e-05, + "rewards/margins": 0.29521286487579346, + "rewards/rejected": -0.2952338457107544, + "step": 13569 + }, + { + "epoch": 9.38450899031812, + "grad_norm": 3.0754928588867188, + "learning_rate": 3.4193944982326727e-06, + "log_odds_chosen": 11.249407768249512, + "log_odds_ratio": -6.009052231092937e-05, + "logits/chosen": -0.5221173763275146, + "logits/rejected": -0.5405316352844238, + "logps/chosen": -0.0003820542187895626, + "logps/rejected": -2.40143084526062, + "loss": 0.3792, + "nll_loss": 0.09480046480894089, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8205420423764735e-05, + "rewards/margins": 0.24010486900806427, + "rewards/rejected": -0.2401430904865265, + "step": 13570 + }, + { + "epoch": 9.385200553250346, + "grad_norm": 3.1722779273986816, + "learning_rate": 3.415552481942524e-06, + "log_odds_chosen": 11.105411529541016, + "log_odds_ratio": -4.717747287941165e-05, + "logits/chosen": -0.8160011768341064, + "logits/rejected": -0.8017624616622925, + "logps/chosen": -0.0002255855652038008, + "logps/rejected": -2.233698844909668, + "loss": 0.4063, + "nll_loss": 0.10156512260437012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.255855724797584e-05, + "rewards/margins": 0.22334732115268707, + "rewards/rejected": -0.22336986660957336, + "step": 13571 + }, + { + "epoch": 9.385892116182573, + "grad_norm": 3.226372241973877, + "learning_rate": 3.4117104656523742e-06, + "log_odds_chosen": 10.567256927490234, + "log_odds_ratio": -0.00023033078468870372, + "logits/chosen": -0.3000712990760803, + "logits/rejected": -0.2451561689376831, + "logps/chosen": -0.0003794012009166181, + "logps/rejected": -2.2354483604431152, + "loss": 0.4107, + "nll_loss": 0.1026480570435524, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.794012445723638e-05, + "rewards/margins": 0.223506897687912, + "rewards/rejected": -0.22354485094547272, + "step": 13572 + }, + { + "epoch": 9.3865836791148, + "grad_norm": 3.852189064025879, + "learning_rate": 3.4078684493622254e-06, + "log_odds_chosen": 11.41202163696289, + "log_odds_ratio": -1.611007610335946e-05, + "logits/chosen": -0.08447438478469849, + "logits/rejected": -0.17714518308639526, + "logps/chosen": -0.0001326056953985244, + "logps/rejected": -2.1824405193328857, + "loss": 0.4716, + "nll_loss": 0.11790943145751953, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3260570085549261e-05, + "rewards/margins": 0.21823078393936157, + "rewards/rejected": -0.2182440459728241, + "step": 13573 + }, + { + "epoch": 9.387275242047027, + "grad_norm": 4.368622303009033, + "learning_rate": 3.4040264330720766e-06, + "log_odds_chosen": 10.459915161132812, + "log_odds_ratio": -8.878041990101337e-05, + "logits/chosen": -0.2901434898376465, + "logits/rejected": -0.3067079782485962, + "logps/chosen": -0.00019604206318035722, + "logps/rejected": -1.8046751022338867, + "loss": 0.3493, + "nll_loss": 0.08731767535209656, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9604207409429364e-05, + "rewards/margins": 0.18044789135456085, + "rewards/rejected": -0.18046751618385315, + "step": 13574 + }, + { + "epoch": 9.387966804979254, + "grad_norm": 2.562063694000244, + "learning_rate": 3.400184416781927e-06, + "log_odds_chosen": 11.21015453338623, + "log_odds_ratio": -6.06346657150425e-05, + "logits/chosen": -0.2250322699546814, + "logits/rejected": -0.30319520831108093, + "logps/chosen": -0.0002839433145709336, + "logps/rejected": -2.7567548751831055, + "loss": 0.2747, + "nll_loss": 0.0686735287308693, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8394333639880642e-05, + "rewards/margins": 0.27564704418182373, + "rewards/rejected": -0.2756754755973816, + "step": 13575 + }, + { + "epoch": 9.38865836791148, + "grad_norm": 3.084099531173706, + "learning_rate": 3.396342400491778e-06, + "log_odds_chosen": 10.3840913772583, + "log_odds_ratio": -0.00026426592376083136, + "logits/chosen": -0.00226283585652709, + "logits/rejected": -0.09975391626358032, + "logps/chosen": -0.00034463696647435427, + "logps/rejected": -2.0437979698181152, + "loss": 0.3703, + "nll_loss": 0.09254927933216095, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.446369737503119e-05, + "rewards/margins": 0.20434533059597015, + "rewards/rejected": -0.20437979698181152, + "step": 13576 + }, + { + "epoch": 9.389349930843707, + "grad_norm": 4.571847438812256, + "learning_rate": 3.3925003842016292e-06, + "log_odds_chosen": 10.668195724487305, + "log_odds_ratio": -9.47575899772346e-05, + "logits/chosen": -0.3503793478012085, + "logits/rejected": -0.5184992551803589, + "logps/chosen": -0.00021498440764844418, + "logps/rejected": -2.1632885932922363, + "loss": 0.543, + "nll_loss": 0.13575081527233124, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1498439309652895e-05, + "rewards/margins": 0.2163073569536209, + "rewards/rejected": -0.21632885932922363, + "step": 13577 + }, + { + "epoch": 9.390041493775934, + "grad_norm": 3.6178324222564697, + "learning_rate": 3.38865836791148e-06, + "log_odds_chosen": 11.287225723266602, + "log_odds_ratio": -4.2001665860880166e-05, + "logits/chosen": -0.2348712980747223, + "logits/rejected": -0.3508765697479248, + "logps/chosen": -9.699821384856477e-05, + "logps/rejected": -1.6754095554351807, + "loss": 0.3395, + "nll_loss": 0.08488013595342636, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.699820111563895e-06, + "rewards/margins": 0.16753125190734863, + "rewards/rejected": -0.16754095256328583, + "step": 13578 + }, + { + "epoch": 9.39073305670816, + "grad_norm": 2.393125295639038, + "learning_rate": 3.384816351621331e-06, + "log_odds_chosen": 10.588972091674805, + "log_odds_ratio": -6.092490002629347e-05, + "logits/chosen": -0.5662620663642883, + "logits/rejected": -0.5864916443824768, + "logps/chosen": -0.0003362030256539583, + "logps/rejected": -1.9976005554199219, + "loss": 0.3772, + "nll_loss": 0.09428730607032776, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3620304748183116e-05, + "rewards/margins": 0.1997264325618744, + "rewards/rejected": -0.1997600644826889, + "step": 13579 + }, + { + "epoch": 9.391424619640388, + "grad_norm": 3.167736053466797, + "learning_rate": 3.3809743353311823e-06, + "log_odds_chosen": 11.302907943725586, + "log_odds_ratio": -2.863583722501062e-05, + "logits/chosen": -0.6793009042739868, + "logits/rejected": -0.5627678632736206, + "logps/chosen": -0.0001633332867641002, + "logps/rejected": -2.2593603134155273, + "loss": 0.3868, + "nll_loss": 0.09669610857963562, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6333327948814258e-05, + "rewards/margins": 0.2259196937084198, + "rewards/rejected": -0.22593602538108826, + "step": 13580 + }, + { + "epoch": 9.392116182572614, + "grad_norm": 3.3908193111419678, + "learning_rate": 3.3771323190410327e-06, + "log_odds_chosen": 11.220902442932129, + "log_odds_ratio": -0.00010893019498325884, + "logits/chosen": 0.024650298058986664, + "logits/rejected": -0.0366368368268013, + "logps/chosen": -0.0008727542590349913, + "logps/rejected": -2.7380731105804443, + "loss": 0.4389, + "nll_loss": 0.10972367227077484, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.72754244483076e-05, + "rewards/margins": 0.27372005581855774, + "rewards/rejected": -0.2738073468208313, + "step": 13581 + }, + { + "epoch": 9.392807745504841, + "grad_norm": 2.68562650680542, + "learning_rate": 3.373290302750884e-06, + "log_odds_chosen": 10.752114295959473, + "log_odds_ratio": -5.1588660426205024e-05, + "logits/chosen": -0.15629924833774567, + "logits/rejected": -0.14297765493392944, + "logps/chosen": -0.0006921213353052735, + "logps/rejected": -2.4885218143463135, + "loss": 0.2704, + "nll_loss": 0.06759613752365112, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.921213935129344e-05, + "rewards/margins": 0.24878299236297607, + "rewards/rejected": -0.2488521933555603, + "step": 13582 + }, + { + "epoch": 9.393499308437068, + "grad_norm": 4.6912150382995605, + "learning_rate": 3.369448286460735e-06, + "log_odds_chosen": 11.584748268127441, + "log_odds_ratio": -0.00010558907524682581, + "logits/chosen": -0.40621644258499146, + "logits/rejected": -0.3990243673324585, + "logps/chosen": -0.00022080856433603913, + "logps/rejected": -2.1555638313293457, + "loss": 0.5407, + "nll_loss": 0.1351681500673294, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2080857888795435e-05, + "rewards/margins": 0.2155342996120453, + "rewards/rejected": -0.21555638313293457, + "step": 13583 + }, + { + "epoch": 9.394190871369295, + "grad_norm": 4.056237697601318, + "learning_rate": 3.3656062701705853e-06, + "log_odds_chosen": 10.888897895812988, + "log_odds_ratio": -0.00024027469044085592, + "logits/chosen": -0.4823153614997864, + "logits/rejected": -0.48899656534194946, + "logps/chosen": -0.00022984632232692093, + "logps/rejected": -2.356065034866333, + "loss": 0.4007, + "nll_loss": 0.10016251355409622, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.298463004990481e-05, + "rewards/margins": 0.23558352887630463, + "rewards/rejected": -0.23560652136802673, + "step": 13584 + }, + { + "epoch": 9.394882434301522, + "grad_norm": 3.921373128890991, + "learning_rate": 3.3617642538804365e-06, + "log_odds_chosen": 12.349092483520508, + "log_odds_ratio": -8.765801794652361e-06, + "logits/chosen": 0.1655922532081604, + "logits/rejected": 0.11520100384950638, + "logps/chosen": -0.0002407803403912112, + "logps/rejected": -3.638532876968384, + "loss": 0.3757, + "nll_loss": 0.09393280744552612, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4078035494312644e-05, + "rewards/margins": 0.36382919549942017, + "rewards/rejected": -0.3638532757759094, + "step": 13585 + }, + { + "epoch": 9.395573997233749, + "grad_norm": 3.0867762565612793, + "learning_rate": 3.3579222375902877e-06, + "log_odds_chosen": 10.678717613220215, + "log_odds_ratio": -0.0002471938787493855, + "logits/chosen": -0.2576814889907837, + "logits/rejected": -0.2743965685367584, + "logps/chosen": -0.00034111557761207223, + "logps/rejected": -2.0099055767059326, + "loss": 0.3935, + "nll_loss": 0.09835696220397949, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.411155557841994e-05, + "rewards/margins": 0.20095646381378174, + "rewards/rejected": -0.20099057257175446, + "step": 13586 + }, + { + "epoch": 9.396265560165975, + "grad_norm": 3.657719612121582, + "learning_rate": 3.3540802213001384e-06, + "log_odds_chosen": 10.942753791809082, + "log_odds_ratio": -0.00029641768196597695, + "logits/chosen": -0.25797197222709656, + "logits/rejected": -0.23642107844352722, + "logps/chosen": -0.00017646155902184546, + "logps/rejected": -2.24796462059021, + "loss": 0.2469, + "nll_loss": 0.0617036335170269, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7646154446993023e-05, + "rewards/margins": 0.22477883100509644, + "rewards/rejected": -0.22479647397994995, + "step": 13587 + }, + { + "epoch": 9.396957123098202, + "grad_norm": 4.464614391326904, + "learning_rate": 3.3502382050099896e-06, + "log_odds_chosen": 10.541181564331055, + "log_odds_ratio": -0.00013117294292896986, + "logits/chosen": -0.20644135773181915, + "logits/rejected": -0.10292874276638031, + "logps/chosen": -0.0005109063349664211, + "logps/rejected": -2.394510269165039, + "loss": 0.8101, + "nll_loss": 0.20250877737998962, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.10906356794294e-05, + "rewards/margins": 0.23939993977546692, + "rewards/rejected": -0.23945102095603943, + "step": 13588 + }, + { + "epoch": 9.39764868603043, + "grad_norm": 2.5152103900909424, + "learning_rate": 3.34639618871984e-06, + "log_odds_chosen": 10.59862232208252, + "log_odds_ratio": -9.659049101173878e-05, + "logits/chosen": -0.029881253838539124, + "logits/rejected": -0.1297704428434372, + "logps/chosen": -0.00021189470135141164, + "logps/rejected": -1.72607421875, + "loss": 0.2238, + "nll_loss": 0.05594668164849281, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1189471226534806e-05, + "rewards/margins": 0.17258621752262115, + "rewards/rejected": -0.1726074069738388, + "step": 13589 + }, + { + "epoch": 9.398340248962656, + "grad_norm": 2.9175498485565186, + "learning_rate": 3.342554172429691e-06, + "log_odds_chosen": 10.26060676574707, + "log_odds_ratio": -0.0002207197976531461, + "logits/chosen": 0.13548356294631958, + "logits/rejected": 0.15024137496948242, + "logps/chosen": -0.0009417575201950967, + "logps/rejected": -1.7678678035736084, + "loss": 0.3158, + "nll_loss": 0.07892054319381714, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.417576075065881e-05, + "rewards/margins": 0.17669261991977692, + "rewards/rejected": -0.17678678035736084, + "step": 13590 + }, + { + "epoch": 9.399031811894883, + "grad_norm": 3.9631402492523193, + "learning_rate": 3.3387121561395423e-06, + "log_odds_chosen": 12.132984161376953, + "log_odds_ratio": -1.4265860045270529e-05, + "logits/chosen": 0.06901467591524124, + "logits/rejected": -0.20833554863929749, + "logps/chosen": -0.0004912492004223168, + "logps/rejected": -3.343538761138916, + "loss": 0.4169, + "nll_loss": 0.10422386229038239, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9124922952614725e-05, + "rewards/margins": 0.33430472016334534, + "rewards/rejected": -0.33435386419296265, + "step": 13591 + }, + { + "epoch": 9.39972337482711, + "grad_norm": 3.6970267295837402, + "learning_rate": 3.334870139849393e-06, + "log_odds_chosen": 11.283451080322266, + "log_odds_ratio": -4.216269371681847e-05, + "logits/chosen": -0.38629505038261414, + "logits/rejected": -0.41579392552375793, + "logps/chosen": -9.976735600503162e-05, + "logps/rejected": -2.153491973876953, + "loss": 0.4248, + "nll_loss": 0.10618871450424194, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.976735782402102e-06, + "rewards/margins": 0.21533921360969543, + "rewards/rejected": -0.2153491973876953, + "step": 13592 + }, + { + "epoch": 9.400414937759336, + "grad_norm": 5.877899646759033, + "learning_rate": 3.331028123559244e-06, + "log_odds_chosen": 9.553054809570312, + "log_odds_ratio": -0.0002016788930632174, + "logits/chosen": -0.7149819135665894, + "logits/rejected": -0.5551720261573792, + "logps/chosen": -0.0003842208825517446, + "logps/rejected": -1.6851762533187866, + "loss": 0.5968, + "nll_loss": 0.14917269349098206, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8422091165557504e-05, + "rewards/margins": 0.16847920417785645, + "rewards/rejected": -0.168517604470253, + "step": 13593 + }, + { + "epoch": 9.401106500691563, + "grad_norm": 2.704509735107422, + "learning_rate": 3.3271861072690954e-06, + "log_odds_chosen": 10.636608123779297, + "log_odds_ratio": -0.00022637513757217675, + "logits/chosen": -0.517031192779541, + "logits/rejected": -0.5399819612503052, + "logps/chosen": -0.0005620784359052777, + "logps/rejected": -2.193096160888672, + "loss": 0.3281, + "nll_loss": 0.08199042081832886, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.620783849735744e-05, + "rewards/margins": 0.21925342082977295, + "rewards/rejected": -0.21930962800979614, + "step": 13594 + }, + { + "epoch": 9.40179806362379, + "grad_norm": 3.62241530418396, + "learning_rate": 3.3233440909789457e-06, + "log_odds_chosen": 11.132572174072266, + "log_odds_ratio": -8.268863894045353e-05, + "logits/chosen": -0.5297135710716248, + "logits/rejected": -0.5522671937942505, + "logps/chosen": -0.0009948884835466743, + "logps/rejected": -2.285141944885254, + "loss": 0.3795, + "nll_loss": 0.09486062824726105, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.948885417543352e-05, + "rewards/margins": 0.22841474413871765, + "rewards/rejected": -0.22851420938968658, + "step": 13595 + }, + { + "epoch": 9.402489626556017, + "grad_norm": 3.3328349590301514, + "learning_rate": 3.319502074688797e-06, + "log_odds_chosen": 11.376626014709473, + "log_odds_ratio": -5.180388325243257e-05, + "logits/chosen": -0.4596288800239563, + "logits/rejected": -0.5163432359695435, + "logps/chosen": -0.00018895625544246286, + "logps/rejected": -2.4481046199798584, + "loss": 0.3318, + "nll_loss": 0.08293630927801132, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8895625544246286e-05, + "rewards/margins": 0.24479156732559204, + "rewards/rejected": -0.24481046199798584, + "step": 13596 + }, + { + "epoch": 9.403181189488244, + "grad_norm": 2.345792770385742, + "learning_rate": 3.315660058398648e-06, + "log_odds_chosen": 10.897357940673828, + "log_odds_ratio": -6.9019639340695e-05, + "logits/chosen": -0.06716062873601913, + "logits/rejected": -0.031017400324344635, + "logps/chosen": -0.0003638725320342928, + "logps/rejected": -2.4356253147125244, + "loss": 0.281, + "nll_loss": 0.07024391740560532, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.638725320342928e-05, + "rewards/margins": 0.2435261458158493, + "rewards/rejected": -0.2435625195503235, + "step": 13597 + }, + { + "epoch": 9.40387275242047, + "grad_norm": 3.051527738571167, + "learning_rate": 3.3118180421084984e-06, + "log_odds_chosen": 10.573110580444336, + "log_odds_ratio": -6.965044303797185e-05, + "logits/chosen": -0.13037006556987762, + "logits/rejected": -0.13248313963413239, + "logps/chosen": -0.0001720719737932086, + "logps/rejected": -1.8427140712738037, + "loss": 0.2718, + "nll_loss": 0.06793493032455444, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7207195924129337e-05, + "rewards/margins": 0.18425419926643372, + "rewards/rejected": -0.1842714101076126, + "step": 13598 + }, + { + "epoch": 9.404564315352697, + "grad_norm": 3.171217918395996, + "learning_rate": 3.3079760258183495e-06, + "log_odds_chosen": 11.250415802001953, + "log_odds_ratio": -1.9207192963222042e-05, + "logits/chosen": -0.4102725088596344, + "logits/rejected": -0.47221803665161133, + "logps/chosen": -0.00015858103870414197, + "logps/rejected": -2.2595107555389404, + "loss": 0.3371, + "nll_loss": 0.0842801183462143, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5858104234212078e-05, + "rewards/margins": 0.2259352207183838, + "rewards/rejected": -0.22595107555389404, + "step": 13599 + }, + { + "epoch": 9.405255878284924, + "grad_norm": 2.929114580154419, + "learning_rate": 3.3041340095282007e-06, + "log_odds_chosen": 11.336652755737305, + "log_odds_ratio": -3.686985655804165e-05, + "logits/chosen": -0.05324409902095795, + "logits/rejected": -0.06757915019989014, + "logps/chosen": -0.00010137058416148648, + "logps/rejected": -1.7882221937179565, + "loss": 0.2707, + "nll_loss": 0.06766844540834427, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0137058779946528e-05, + "rewards/margins": 0.17881208658218384, + "rewards/rejected": -0.17882221937179565, + "step": 13600 + }, + { + "epoch": 9.405947441217151, + "grad_norm": 3.7205023765563965, + "learning_rate": 3.3002919932380515e-06, + "log_odds_chosen": 10.108559608459473, + "log_odds_ratio": -0.00035753127303905785, + "logits/chosen": -0.2789788246154785, + "logits/rejected": -0.31889981031417847, + "logps/chosen": -0.0004472219734452665, + "logps/rejected": -1.4498944282531738, + "loss": 0.2628, + "nll_loss": 0.0656713992357254, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4722200982505456e-05, + "rewards/margins": 0.14494472742080688, + "rewards/rejected": -0.14498944580554962, + "step": 13601 + }, + { + "epoch": 9.406639004149378, + "grad_norm": 3.527688503265381, + "learning_rate": 3.2964499769479026e-06, + "log_odds_chosen": 10.635601043701172, + "log_odds_ratio": -4.5054053771309555e-05, + "logits/chosen": -0.5978109240531921, + "logits/rejected": -0.6777760982513428, + "logps/chosen": -0.0001139695305027999, + "logps/rejected": -1.6677496433258057, + "loss": 0.3322, + "nll_loss": 0.08304670453071594, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1396952686482109e-05, + "rewards/margins": 0.1667635589838028, + "rewards/rejected": -0.1667749583721161, + "step": 13602 + }, + { + "epoch": 9.407330567081605, + "grad_norm": 3.7321369647979736, + "learning_rate": 3.292607960657754e-06, + "log_odds_chosen": 11.718954086303711, + "log_odds_ratio": -1.348754722130252e-05, + "logits/chosen": -0.35870301723480225, + "logits/rejected": -0.3781982660293579, + "logps/chosen": -0.00016278823022730649, + "logps/rejected": -2.7064929008483887, + "loss": 0.2841, + "nll_loss": 0.07101425528526306, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.627882375032641e-05, + "rewards/margins": 0.2706330120563507, + "rewards/rejected": -0.2706492841243744, + "step": 13603 + }, + { + "epoch": 9.408022130013832, + "grad_norm": 4.767492771148682, + "learning_rate": 3.288765944367604e-06, + "log_odds_chosen": 12.546222686767578, + "log_odds_ratio": -6.613996902160579e-06, + "logits/chosen": -0.011980824172496796, + "logits/rejected": -0.09855447709560394, + "logps/chosen": -0.00015213618462439626, + "logps/rejected": -3.185060501098633, + "loss": 0.6174, + "nll_loss": 0.15435130894184113, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5213619008136448e-05, + "rewards/margins": 0.3184908330440521, + "rewards/rejected": -0.31850606203079224, + "step": 13604 + }, + { + "epoch": 9.408713692946058, + "grad_norm": 1.7180169820785522, + "learning_rate": 3.2849239280774553e-06, + "log_odds_chosen": 11.02637004852295, + "log_odds_ratio": -3.8207799661904573e-05, + "logits/chosen": -0.1217493861913681, + "logits/rejected": -0.10812407732009888, + "logps/chosen": -4.604986315825954e-05, + "logps/rejected": -1.4926437139511108, + "loss": 0.2321, + "nll_loss": 0.05801209807395935, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.604986770573305e-06, + "rewards/margins": 0.1492597609758377, + "rewards/rejected": -0.1492643654346466, + "step": 13605 + }, + { + "epoch": 9.409405255878285, + "grad_norm": 5.552383899688721, + "learning_rate": 3.2810819117873056e-06, + "log_odds_chosen": 11.0220308303833, + "log_odds_ratio": -5.878361116629094e-05, + "logits/chosen": -0.24672137200832367, + "logits/rejected": -0.1756354570388794, + "logps/chosen": -0.00029366809758357704, + "logps/rejected": -2.6202406883239746, + "loss": 0.3598, + "nll_loss": 0.08994251489639282, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9366809030761942e-05, + "rewards/margins": 0.26199471950531006, + "rewards/rejected": -0.2620241045951843, + "step": 13606 + }, + { + "epoch": 9.410096818810512, + "grad_norm": 2.8825390338897705, + "learning_rate": 3.277239895497157e-06, + "log_odds_chosen": 11.942837715148926, + "log_odds_ratio": -0.000203387564397417, + "logits/chosen": -0.03993723541498184, + "logits/rejected": -0.06068715453147888, + "logps/chosen": -0.00022810781956650317, + "logps/rejected": -3.365323066711426, + "loss": 0.4158, + "nll_loss": 0.10393252968788147, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.281078377563972e-05, + "rewards/margins": 0.33650949597358704, + "rewards/rejected": -0.336532324552536, + "step": 13607 + }, + { + "epoch": 9.410788381742739, + "grad_norm": 2.55307674407959, + "learning_rate": 3.273397879207008e-06, + "log_odds_chosen": 10.70759105682373, + "log_odds_ratio": -0.0005642443429678679, + "logits/chosen": -0.437193900346756, + "logits/rejected": -0.45698311924934387, + "logps/chosen": -0.0011037236545234919, + "logps/rejected": -2.3171181678771973, + "loss": 0.2515, + "nll_loss": 0.06281343102455139, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011037235526600853, + "rewards/margins": 0.23160143196582794, + "rewards/rejected": -0.23171180486679077, + "step": 13608 + }, + { + "epoch": 9.411479944674966, + "grad_norm": 4.183548450469971, + "learning_rate": 3.2695558629168587e-06, + "log_odds_chosen": 11.013273239135742, + "log_odds_ratio": -0.00017840656801126897, + "logits/chosen": -0.7780969142913818, + "logits/rejected": -0.8504067063331604, + "logps/chosen": -0.00031670788303017616, + "logps/rejected": -2.138556480407715, + "loss": 0.3203, + "nll_loss": 0.08006297051906586, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1670788303017616e-05, + "rewards/margins": 0.21382398903369904, + "rewards/rejected": -0.21385566890239716, + "step": 13609 + }, + { + "epoch": 9.412171507607193, + "grad_norm": 3.3250057697296143, + "learning_rate": 3.26571384662671e-06, + "log_odds_chosen": 11.108063697814941, + "log_odds_ratio": -3.6314133467385545e-05, + "logits/chosen": -0.3911839425563812, + "logits/rejected": -0.39810705184936523, + "logps/chosen": -0.00027260807109996676, + "logps/rejected": -2.261627197265625, + "loss": 0.3394, + "nll_loss": 0.08484381437301636, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7260806746198796e-05, + "rewards/margins": 0.22613544762134552, + "rewards/rejected": -0.22616273164749146, + "step": 13610 + }, + { + "epoch": 9.41286307053942, + "grad_norm": 5.000128746032715, + "learning_rate": 3.261871830336561e-06, + "log_odds_chosen": 10.38177490234375, + "log_odds_ratio": -6.147993553895503e-05, + "logits/chosen": -0.5801539421081543, + "logits/rejected": -0.686652660369873, + "logps/chosen": -0.000287293252767995, + "logps/rejected": -1.8929868936538696, + "loss": 0.3666, + "nll_loss": 0.09163911640644073, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8729327823384665e-05, + "rewards/margins": 0.18926995992660522, + "rewards/rejected": -0.18929868936538696, + "step": 13611 + }, + { + "epoch": 9.413554633471646, + "grad_norm": 4.563615798950195, + "learning_rate": 3.2580298140464114e-06, + "log_odds_chosen": 11.843263626098633, + "log_odds_ratio": -0.00014839382492937148, + "logits/chosen": -0.28000980615615845, + "logits/rejected": -0.3588707447052002, + "logps/chosen": -0.00023143761791288853, + "logps/rejected": -3.1376800537109375, + "loss": 0.4461, + "nll_loss": 0.11149781942367554, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.314376069989521e-05, + "rewards/margins": 0.31374484300613403, + "rewards/rejected": -0.3137679994106293, + "step": 13612 + }, + { + "epoch": 9.414246196403873, + "grad_norm": 3.096818447113037, + "learning_rate": 3.2541877977562626e-06, + "log_odds_chosen": 10.367470741271973, + "log_odds_ratio": -0.0001926126569742337, + "logits/chosen": -0.3956316411495209, + "logits/rejected": -0.5229665040969849, + "logps/chosen": -0.0002295288140885532, + "logps/rejected": -1.6280049085617065, + "loss": 0.4132, + "nll_loss": 0.1032683402299881, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.295288140885532e-05, + "rewards/margins": 0.16277754306793213, + "rewards/rejected": -0.16280049085617065, + "step": 13613 + }, + { + "epoch": 9.4149377593361, + "grad_norm": 4.1682939529418945, + "learning_rate": 3.2503457814661137e-06, + "log_odds_chosen": 11.22652530670166, + "log_odds_ratio": -3.537128213793039e-05, + "logits/chosen": -0.06997202336788177, + "logits/rejected": -0.05267438292503357, + "logps/chosen": -0.0002715398441068828, + "logps/rejected": -2.250588893890381, + "loss": 0.4923, + "nll_loss": 0.12307889759540558, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7153984774486162e-05, + "rewards/margins": 0.22503173351287842, + "rewards/rejected": -0.2250588983297348, + "step": 13614 + }, + { + "epoch": 9.415629322268327, + "grad_norm": 4.823745250701904, + "learning_rate": 3.2465037651759645e-06, + "log_odds_chosen": 11.2670316696167, + "log_odds_ratio": -0.11680711805820465, + "logits/chosen": -0.14811766147613525, + "logits/rejected": -0.19620651006698608, + "logps/chosen": -0.0237438902258873, + "logps/rejected": -2.5987982749938965, + "loss": 0.4275, + "nll_loss": 0.09518326818943024, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0023743887431919575, + "rewards/margins": 0.2575054466724396, + "rewards/rejected": -0.25987982749938965, + "step": 13615 + }, + { + "epoch": 9.416320885200554, + "grad_norm": 3.1150922775268555, + "learning_rate": 3.2426617488858157e-06, + "log_odds_chosen": 11.053929328918457, + "log_odds_ratio": -9.234283788828179e-05, + "logits/chosen": -0.2426833212375641, + "logits/rejected": -0.27916908264160156, + "logps/chosen": -0.0002188576472690329, + "logps/rejected": -2.3410890102386475, + "loss": 0.3349, + "nll_loss": 0.08371220529079437, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1885763999307528e-05, + "rewards/margins": 0.2340870201587677, + "rewards/rejected": -0.23410890996456146, + "step": 13616 + }, + { + "epoch": 9.41701244813278, + "grad_norm": 2.70302677154541, + "learning_rate": 3.238819732595667e-06, + "log_odds_chosen": 11.673267364501953, + "log_odds_ratio": -2.1553594706347212e-05, + "logits/chosen": -0.324625700712204, + "logits/rejected": -0.26671531796455383, + "logps/chosen": -6.083353218855336e-05, + "logps/rejected": -2.182375431060791, + "loss": 0.4002, + "nll_loss": 0.1000453308224678, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.083352673158515e-06, + "rewards/margins": 0.2182314693927765, + "rewards/rejected": -0.21823754906654358, + "step": 13617 + }, + { + "epoch": 9.417704011065007, + "grad_norm": 3.1561553478240967, + "learning_rate": 3.234977716305517e-06, + "log_odds_chosen": 11.751758575439453, + "log_odds_ratio": -1.6239227988990024e-05, + "logits/chosen": -0.387655109167099, + "logits/rejected": -0.47167304158210754, + "logps/chosen": -0.000126562881632708, + "logps/rejected": -2.393993616104126, + "loss": 0.2997, + "nll_loss": 0.07492394745349884, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2656288163270801e-05, + "rewards/margins": 0.23938670754432678, + "rewards/rejected": -0.23939937353134155, + "step": 13618 + }, + { + "epoch": 9.418395573997234, + "grad_norm": 3.0932955741882324, + "learning_rate": 3.2311357000153683e-06, + "log_odds_chosen": 11.690707206726074, + "log_odds_ratio": -4.184385761618614e-05, + "logits/chosen": -0.2753382921218872, + "logits/rejected": -0.4104476273059845, + "logps/chosen": -6.605208182008937e-05, + "logps/rejected": -2.1179964542388916, + "loss": 0.3442, + "nll_loss": 0.08604513108730316, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.605208000109997e-06, + "rewards/margins": 0.21179303526878357, + "rewards/rejected": -0.21179965138435364, + "step": 13619 + }, + { + "epoch": 9.41908713692946, + "grad_norm": 3.613025665283203, + "learning_rate": 3.2272936837252195e-06, + "log_odds_chosen": 12.161299705505371, + "log_odds_ratio": -1.4612392988055944e-05, + "logits/chosen": 0.09467865526676178, + "logits/rejected": 0.08084909617900848, + "logps/chosen": -0.0002366297267144546, + "logps/rejected": -3.197540760040283, + "loss": 0.5081, + "nll_loss": 0.12701593339443207, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3662971216253936e-05, + "rewards/margins": 0.31973040103912354, + "rewards/rejected": -0.31975409388542175, + "step": 13620 + }, + { + "epoch": 9.419778699861688, + "grad_norm": 4.123516082763672, + "learning_rate": 3.22345166743507e-06, + "log_odds_chosen": 11.306703567504883, + "log_odds_ratio": -2.3380784114124253e-05, + "logits/chosen": -0.33806487917900085, + "logits/rejected": -0.32109004259109497, + "logps/chosen": -0.0001318022550549358, + "logps/rejected": -2.3551292419433594, + "loss": 0.6867, + "nll_loss": 0.17167049646377563, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.318022441409994e-05, + "rewards/margins": 0.23549973964691162, + "rewards/rejected": -0.23551294207572937, + "step": 13621 + }, + { + "epoch": 9.420470262793915, + "grad_norm": 3.9194085597991943, + "learning_rate": 3.219609651144921e-06, + "log_odds_chosen": 11.74150562286377, + "log_odds_ratio": -1.1522912245709449e-05, + "logits/chosen": -0.007314398884773254, + "logits/rejected": -0.030431300401687622, + "logps/chosen": -0.0001474516757298261, + "logps/rejected": -2.6600990295410156, + "loss": 0.4314, + "nll_loss": 0.10784394294023514, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4745167391083669e-05, + "rewards/margins": 0.26599520444869995, + "rewards/rejected": -0.2660099267959595, + "step": 13622 + }, + { + "epoch": 9.421161825726141, + "grad_norm": 4.476114273071289, + "learning_rate": 3.2157676348547718e-06, + "log_odds_chosen": 11.353679656982422, + "log_odds_ratio": -0.00018491236551199108, + "logits/chosen": -0.18393199145793915, + "logits/rejected": -0.2646377682685852, + "logps/chosen": -0.00029539887327700853, + "logps/rejected": -2.67562198638916, + "loss": 0.8482, + "nll_loss": 0.2120363414287567, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9539889510488138e-05, + "rewards/margins": 0.2675326466560364, + "rewards/rejected": -0.2675621807575226, + "step": 13623 + }, + { + "epoch": 9.421853388658368, + "grad_norm": 3.667562246322632, + "learning_rate": 3.211925618564623e-06, + "log_odds_chosen": 10.851970672607422, + "log_odds_ratio": -0.00023015293118078262, + "logits/chosen": -0.2953568696975708, + "logits/rejected": -0.3751392662525177, + "logps/chosen": -0.0002698083408176899, + "logps/rejected": -2.3600590229034424, + "loss": 0.4117, + "nll_loss": 0.10289748758077621, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6980836992152035e-05, + "rewards/margins": 0.23597891628742218, + "rewards/rejected": -0.23600590229034424, + "step": 13624 + }, + { + "epoch": 9.422544951590595, + "grad_norm": 2.6978375911712646, + "learning_rate": 3.208083602274474e-06, + "log_odds_chosen": 11.028204917907715, + "log_odds_ratio": -4.765874473378062e-05, + "logits/chosen": -0.21027493476867676, + "logits/rejected": -0.31045985221862793, + "logps/chosen": -0.00022212699695955962, + "logps/rejected": -2.5450971126556396, + "loss": 0.2484, + "nll_loss": 0.06209544464945793, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2212700059753843e-05, + "rewards/margins": 0.2544875144958496, + "rewards/rejected": -0.25450971722602844, + "step": 13625 + }, + { + "epoch": 9.423236514522822, + "grad_norm": 3.2922449111938477, + "learning_rate": 3.2042415859843244e-06, + "log_odds_chosen": 9.94178581237793, + "log_odds_ratio": -0.0003998736501671374, + "logits/chosen": -0.3220575749874115, + "logits/rejected": -0.406940221786499, + "logps/chosen": -0.0004172759654466063, + "logps/rejected": -1.5480931997299194, + "loss": 0.4327, + "nll_loss": 0.10814134776592255, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.172759872744791e-05, + "rewards/margins": 0.15476760268211365, + "rewards/rejected": -0.15480931103229523, + "step": 13626 + }, + { + "epoch": 9.423928077455049, + "grad_norm": 3.3185558319091797, + "learning_rate": 3.2003995696941756e-06, + "log_odds_chosen": 11.532296180725098, + "log_odds_ratio": -1.9236800653743558e-05, + "logits/chosen": 0.012846432626247406, + "logits/rejected": -0.1282588094472885, + "logps/chosen": -9.658637281972915e-05, + "logps/rejected": -2.0812268257141113, + "loss": 0.3085, + "nll_loss": 0.07712876051664352, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.658637281972915e-06, + "rewards/margins": 0.20811301469802856, + "rewards/rejected": -0.20812267065048218, + "step": 13627 + }, + { + "epoch": 9.424619640387276, + "grad_norm": 2.815317153930664, + "learning_rate": 3.1965575534040268e-06, + "log_odds_chosen": 11.8434476852417, + "log_odds_ratio": -1.1227140930714086e-05, + "logits/chosen": -0.5030456781387329, + "logits/rejected": -0.6363354921340942, + "logps/chosen": -0.00013441324699670076, + "logps/rejected": -2.511301040649414, + "loss": 0.3466, + "nll_loss": 0.08665873855352402, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3441325791063718e-05, + "rewards/margins": 0.25111669301986694, + "rewards/rejected": -0.2511301040649414, + "step": 13628 + }, + { + "epoch": 9.425311203319502, + "grad_norm": 3.614622116088867, + "learning_rate": 3.192715537113877e-06, + "log_odds_chosen": 12.06743049621582, + "log_odds_ratio": -1.551922468934208e-05, + "logits/chosen": -0.4491950273513794, + "logits/rejected": -0.509068489074707, + "logps/chosen": -0.0001239084085682407, + "logps/rejected": -2.926715612411499, + "loss": 0.7057, + "nll_loss": 0.17642636597156525, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2390841220621951e-05, + "rewards/margins": 0.2926591634750366, + "rewards/rejected": -0.2926715910434723, + "step": 13629 + }, + { + "epoch": 9.42600276625173, + "grad_norm": 3.2747886180877686, + "learning_rate": 3.1888735208237283e-06, + "log_odds_chosen": 10.869483947753906, + "log_odds_ratio": -8.397969941142946e-05, + "logits/chosen": -0.2558768689632416, + "logits/rejected": -0.5263897180557251, + "logps/chosen": -0.0003626463876571506, + "logps/rejected": -2.1652169227600098, + "loss": 0.3101, + "nll_loss": 0.07751419395208359, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.626464240369387e-05, + "rewards/margins": 0.2164854258298874, + "rewards/rejected": -0.21652168035507202, + "step": 13630 + }, + { + "epoch": 9.426694329183956, + "grad_norm": 4.786688804626465, + "learning_rate": 3.1850315045335794e-06, + "log_odds_chosen": 10.476531982421875, + "log_odds_ratio": -0.00017410835425835103, + "logits/chosen": 0.17468611896038055, + "logits/rejected": 0.08674517273902893, + "logps/chosen": -0.0007101238006725907, + "logps/rejected": -2.145505905151367, + "loss": 0.4957, + "nll_loss": 0.12391538918018341, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.101238588802516e-05, + "rewards/margins": 0.21447956562042236, + "rewards/rejected": -0.21455058455467224, + "step": 13631 + }, + { + "epoch": 9.427385892116183, + "grad_norm": 5.189729690551758, + "learning_rate": 3.18118948824343e-06, + "log_odds_chosen": 11.972164154052734, + "log_odds_ratio": -1.0619540262268856e-05, + "logits/chosen": 0.05772440880537033, + "logits/rejected": 0.0032345205545425415, + "logps/chosen": -7.343491597566754e-05, + "logps/rejected": -2.3607099056243896, + "loss": 0.3151, + "nll_loss": 0.07877691835165024, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.343490779021522e-06, + "rewards/margins": 0.2360636591911316, + "rewards/rejected": -0.23607099056243896, + "step": 13632 + }, + { + "epoch": 9.42807745504841, + "grad_norm": 3.6475706100463867, + "learning_rate": 3.1773474719532814e-06, + "log_odds_chosen": 12.083813667297363, + "log_odds_ratio": -1.8943410395877436e-05, + "logits/chosen": -0.002671957015991211, + "logits/rejected": -0.03937486559152603, + "logps/chosen": -0.00010588954319246113, + "logps/rejected": -2.9970786571502686, + "loss": 0.3612, + "nll_loss": 0.0903020054101944, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0588954864942934e-05, + "rewards/margins": 0.29969727993011475, + "rewards/rejected": -0.29970788955688477, + "step": 13633 + }, + { + "epoch": 9.428769017980636, + "grad_norm": 2.840242862701416, + "learning_rate": 3.1735054556631325e-06, + "log_odds_chosen": 10.816787719726562, + "log_odds_ratio": -0.0001226270542247221, + "logits/chosen": -0.1480427384376526, + "logits/rejected": -0.30245691537857056, + "logps/chosen": -0.0001767796347849071, + "logps/rejected": -2.009460926055908, + "loss": 0.2778, + "nll_loss": 0.06943796575069427, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7677964933682233e-05, + "rewards/margins": 0.20092841982841492, + "rewards/rejected": -0.20094609260559082, + "step": 13634 + }, + { + "epoch": 9.429460580912863, + "grad_norm": 4.10830545425415, + "learning_rate": 3.169663439372983e-06, + "log_odds_chosen": 11.465187072753906, + "log_odds_ratio": -1.9342684026923962e-05, + "logits/chosen": -0.08761780709028244, + "logits/rejected": -0.23344786465168, + "logps/chosen": -0.00020120250701438636, + "logps/rejected": -2.2758188247680664, + "loss": 0.6442, + "nll_loss": 0.16103589534759521, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0120251065236516e-05, + "rewards/margins": 0.22756177186965942, + "rewards/rejected": -0.22758188843727112, + "step": 13635 + }, + { + "epoch": 9.43015214384509, + "grad_norm": 3.0645132064819336, + "learning_rate": 3.165821423082834e-06, + "log_odds_chosen": 11.983230590820312, + "log_odds_ratio": -1.6645788491587155e-05, + "logits/chosen": -0.011146046221256256, + "logits/rejected": -0.09030141681432724, + "logps/chosen": -0.00019530183635652065, + "logps/rejected": -3.040992021560669, + "loss": 0.3178, + "nll_loss": 0.07945729047060013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9530183635652065e-05, + "rewards/margins": 0.30407968163490295, + "rewards/rejected": -0.3040992021560669, + "step": 13636 + }, + { + "epoch": 9.430843706777317, + "grad_norm": 2.893550157546997, + "learning_rate": 3.161979406792685e-06, + "log_odds_chosen": 10.124031066894531, + "log_odds_ratio": -9.303042315877974e-05, + "logits/chosen": -0.17404676973819733, + "logits/rejected": -0.3016105890274048, + "logps/chosen": -0.0003833910741377622, + "logps/rejected": -2.239431142807007, + "loss": 0.3076, + "nll_loss": 0.07688341289758682, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.833910886896774e-05, + "rewards/margins": 0.2239047884941101, + "rewards/rejected": -0.22394311428070068, + "step": 13637 + }, + { + "epoch": 9.431535269709544, + "grad_norm": 2.878291606903076, + "learning_rate": 3.158137390502536e-06, + "log_odds_chosen": 10.961097717285156, + "log_odds_ratio": -9.159816545434296e-05, + "logits/chosen": -0.39119625091552734, + "logits/rejected": -0.4431838393211365, + "logps/chosen": -0.0002546052564866841, + "logps/rejected": -2.232316255569458, + "loss": 0.337, + "nll_loss": 0.0842406302690506, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5460527467657812e-05, + "rewards/margins": 0.22320617735385895, + "rewards/rejected": -0.22323161363601685, + "step": 13638 + }, + { + "epoch": 9.43222683264177, + "grad_norm": 4.693680763244629, + "learning_rate": 3.154295374212387e-06, + "log_odds_chosen": 12.475543975830078, + "log_odds_ratio": -8.55696271173656e-06, + "logits/chosen": 0.04066818952560425, + "logits/rejected": -0.0014166105538606644, + "logps/chosen": -0.0001762946485541761, + "logps/rejected": -3.719608783721924, + "loss": 0.554, + "nll_loss": 0.13849200308322906, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7629463400226086e-05, + "rewards/margins": 0.37194323539733887, + "rewards/rejected": -0.3719608783721924, + "step": 13639 + }, + { + "epoch": 9.432918395573997, + "grad_norm": 3.4750702381134033, + "learning_rate": 3.1504533579222375e-06, + "log_odds_chosen": 10.41798210144043, + "log_odds_ratio": -0.0006747535662725568, + "logits/chosen": -0.17689725756645203, + "logits/rejected": -0.1873033195734024, + "logps/chosen": -0.0006824568845331669, + "logps/rejected": -1.7990126609802246, + "loss": 0.3511, + "nll_loss": 0.08769867569208145, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.82456957292743e-05, + "rewards/margins": 0.17983302474021912, + "rewards/rejected": -0.17990127205848694, + "step": 13640 + }, + { + "epoch": 9.433609958506224, + "grad_norm": 3.9087653160095215, + "learning_rate": 3.1466113416320886e-06, + "log_odds_chosen": 9.801446914672852, + "log_odds_ratio": -0.00026155952946282923, + "logits/chosen": -0.31932926177978516, + "logits/rejected": -0.31377002596855164, + "logps/chosen": -0.000133975685457699, + "logps/rejected": -1.313755989074707, + "loss": 0.5769, + "nll_loss": 0.14420348405838013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.339756818197202e-05, + "rewards/margins": 0.13136222958564758, + "rewards/rejected": -0.13137562572956085, + "step": 13641 + }, + { + "epoch": 9.434301521438451, + "grad_norm": 3.930058240890503, + "learning_rate": 3.14276932534194e-06, + "log_odds_chosen": 9.756757736206055, + "log_odds_ratio": -0.0005803716485388577, + "logits/chosen": -0.3408500552177429, + "logits/rejected": -0.3672954738140106, + "logps/chosen": -0.0016345781041309237, + "logps/rejected": -1.7084448337554932, + "loss": 0.3289, + "nll_loss": 0.08217174559831619, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016345782205462456, + "rewards/margins": 0.17068102955818176, + "rewards/rejected": -0.17084449529647827, + "step": 13642 + }, + { + "epoch": 9.434993084370678, + "grad_norm": 3.024298667907715, + "learning_rate": 3.13892730905179e-06, + "log_odds_chosen": 11.261756896972656, + "log_odds_ratio": -4.068774069310166e-05, + "logits/chosen": -0.28770825266838074, + "logits/rejected": -0.29424336552619934, + "logps/chosen": -0.00022447235824074596, + "logps/rejected": -2.1742238998413086, + "loss": 0.3234, + "nll_loss": 0.08083842694759369, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2447236915468238e-05, + "rewards/margins": 0.2173999398946762, + "rewards/rejected": -0.21742239594459534, + "step": 13643 + }, + { + "epoch": 9.435684647302905, + "grad_norm": 2.7786617279052734, + "learning_rate": 3.1350852927616413e-06, + "log_odds_chosen": 11.020342826843262, + "log_odds_ratio": -2.372608651057817e-05, + "logits/chosen": -0.29740408062934875, + "logits/rejected": -0.3028227686882019, + "logps/chosen": -0.0001481901272200048, + "logps/rejected": -2.0162127017974854, + "loss": 0.3055, + "nll_loss": 0.07637762278318405, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4819013813394122e-05, + "rewards/margins": 0.20160646736621857, + "rewards/rejected": -0.20162126421928406, + "step": 13644 + }, + { + "epoch": 9.436376210235132, + "grad_norm": 4.810678958892822, + "learning_rate": 3.1312432764714925e-06, + "log_odds_chosen": 11.225969314575195, + "log_odds_ratio": -0.00010552228923188522, + "logits/chosen": 0.10643033683300018, + "logits/rejected": 0.022820040583610535, + "logps/chosen": -0.0006701986421830952, + "logps/rejected": -2.911445379257202, + "loss": 0.7676, + "nll_loss": 0.19187721610069275, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.701986421830952e-05, + "rewards/margins": 0.29107752442359924, + "rewards/rejected": -0.29114454984664917, + "step": 13645 + }, + { + "epoch": 9.437067773167358, + "grad_norm": 2.9477415084838867, + "learning_rate": 3.1274012601813432e-06, + "log_odds_chosen": 11.44828987121582, + "log_odds_ratio": -2.495289118087385e-05, + "logits/chosen": -0.32208365201950073, + "logits/rejected": -0.2861567437648773, + "logps/chosen": -0.00014589079364668578, + "logps/rejected": -2.2691187858581543, + "loss": 0.407, + "nll_loss": 0.10175222903490067, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.45890808198601e-05, + "rewards/margins": 0.22689726948738098, + "rewards/rejected": -0.22691187262535095, + "step": 13646 + }, + { + "epoch": 9.437759336099585, + "grad_norm": 6.134434700012207, + "learning_rate": 3.1235592438911944e-06, + "log_odds_chosen": 9.971586227416992, + "log_odds_ratio": -0.0002822284877765924, + "logits/chosen": 0.033015862107276917, + "logits/rejected": 0.017898201942443848, + "logps/chosen": -0.00030175555730238557, + "logps/rejected": -1.692743182182312, + "loss": 0.417, + "nll_loss": 0.10421252995729446, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0175559004419483e-05, + "rewards/margins": 0.16924414038658142, + "rewards/rejected": -0.16927431523799896, + "step": 13647 + }, + { + "epoch": 9.438450899031812, + "grad_norm": 2.9997198581695557, + "learning_rate": 3.119717227601045e-06, + "log_odds_chosen": 10.828164100646973, + "log_odds_ratio": -4.2587987991282716e-05, + "logits/chosen": -0.5107184052467346, + "logits/rejected": -0.566586971282959, + "logps/chosen": -0.0003297154908068478, + "logps/rejected": -2.3637232780456543, + "loss": 0.2313, + "nll_loss": 0.05781985819339752, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2971551263472065e-05, + "rewards/margins": 0.23633936047554016, + "rewards/rejected": -0.23637235164642334, + "step": 13648 + }, + { + "epoch": 9.439142461964039, + "grad_norm": 4.279697895050049, + "learning_rate": 3.1158752113108963e-06, + "log_odds_chosen": 10.188965797424316, + "log_odds_ratio": -0.0001124302507378161, + "logits/chosen": -0.3338659703731537, + "logits/rejected": -0.43446221947669983, + "logps/chosen": -0.0022492276038974524, + "logps/rejected": -2.589181661605835, + "loss": 0.4241, + "nll_loss": 0.10601003468036652, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00022492274001706392, + "rewards/margins": 0.25869324803352356, + "rewards/rejected": -0.2589181661605835, + "step": 13649 + }, + { + "epoch": 9.439834024896266, + "grad_norm": 4.09467077255249, + "learning_rate": 3.112033195020747e-06, + "log_odds_chosen": 11.217891693115234, + "log_odds_ratio": -3.357167952344753e-05, + "logits/chosen": 0.32028713822364807, + "logits/rejected": 0.35995882749557495, + "logps/chosen": -0.00047433891450054944, + "logps/rejected": -2.3951239585876465, + "loss": 0.2861, + "nll_loss": 0.07152174413204193, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7433892177650705e-05, + "rewards/margins": 0.23946496844291687, + "rewards/rejected": -0.23951241374015808, + "step": 13650 + }, + { + "epoch": 9.440525587828493, + "grad_norm": 2.9829654693603516, + "learning_rate": 3.108191178730598e-06, + "log_odds_chosen": 11.757881164550781, + "log_odds_ratio": -5.9907364629907534e-05, + "logits/chosen": -0.2075476348400116, + "logits/rejected": -0.11603280901908875, + "logps/chosen": -0.0003911016683559865, + "logps/rejected": -3.1237878799438477, + "loss": 0.3611, + "nll_loss": 0.09027508646249771, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9110171201173216e-05, + "rewards/margins": 0.3123396635055542, + "rewards/rejected": -0.31237876415252686, + "step": 13651 + }, + { + "epoch": 9.44121715076072, + "grad_norm": 2.7247939109802246, + "learning_rate": 3.1043491624404486e-06, + "log_odds_chosen": 11.124319076538086, + "log_odds_ratio": -4.741606244351715e-05, + "logits/chosen": -0.6477873921394348, + "logits/rejected": -0.5830503106117249, + "logps/chosen": -0.00016284243611153215, + "logps/rejected": -1.8327804803848267, + "loss": 0.3479, + "nll_loss": 0.08697590976953506, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6284242519759573e-05, + "rewards/margins": 0.18326175212860107, + "rewards/rejected": -0.18327805399894714, + "step": 13652 + }, + { + "epoch": 9.441908713692946, + "grad_norm": 2.818121910095215, + "learning_rate": 3.1005071461502997e-06, + "log_odds_chosen": 11.644196510314941, + "log_odds_ratio": -1.876071110018529e-05, + "logits/chosen": -0.6093887686729431, + "logits/rejected": -0.6418349742889404, + "logps/chosen": -0.00027470127679407597, + "logps/rejected": -2.701404094696045, + "loss": 0.3011, + "nll_loss": 0.07527416199445724, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.747012877080124e-05, + "rewards/margins": 0.27011293172836304, + "rewards/rejected": -0.2701404094696045, + "step": 13653 + }, + { + "epoch": 9.442600276625173, + "grad_norm": 3.2062249183654785, + "learning_rate": 3.096665129860151e-06, + "log_odds_chosen": 10.758845329284668, + "log_odds_ratio": -8.449627057416365e-05, + "logits/chosen": -0.06728816777467728, + "logits/rejected": -0.12761613726615906, + "logps/chosen": -0.0004660533741116524, + "logps/rejected": -2.3059773445129395, + "loss": 0.3629, + "nll_loss": 0.09072056412696838, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.660533886635676e-05, + "rewards/margins": 0.23055113852024078, + "rewards/rejected": -0.23059773445129395, + "step": 13654 + }, + { + "epoch": 9.4432918395574, + "grad_norm": 5.059345722198486, + "learning_rate": 3.0928231135700017e-06, + "log_odds_chosen": 11.177997589111328, + "log_odds_ratio": -9.584966755937785e-05, + "logits/chosen": -0.2947097420692444, + "logits/rejected": -0.41434311866760254, + "logps/chosen": -0.0005000841920264065, + "logps/rejected": -2.6178932189941406, + "loss": 0.3753, + "nll_loss": 0.09381620585918427, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0008420657832175e-05, + "rewards/margins": 0.2617393136024475, + "rewards/rejected": -0.26178932189941406, + "step": 13655 + }, + { + "epoch": 9.443983402489627, + "grad_norm": 3.8090786933898926, + "learning_rate": 3.088981097279853e-06, + "log_odds_chosen": 11.203605651855469, + "log_odds_ratio": -0.0001385588984703645, + "logits/chosen": -0.11259017884731293, + "logits/rejected": -0.05078551918268204, + "logps/chosen": -0.00046949341776780784, + "logps/rejected": -2.737952470779419, + "loss": 0.3455, + "nll_loss": 0.08635265380144119, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.694933886639774e-05, + "rewards/margins": 0.27374833822250366, + "rewards/rejected": -0.2737952768802643, + "step": 13656 + }, + { + "epoch": 9.444674965421854, + "grad_norm": 2.3689358234405518, + "learning_rate": 3.0851390809897036e-06, + "log_odds_chosen": 10.809409141540527, + "log_odds_ratio": -3.6527962947729975e-05, + "logits/chosen": -0.3216843008995056, + "logits/rejected": -0.37638893723487854, + "logps/chosen": -0.00045874243369325995, + "logps/rejected": -1.871604084968567, + "loss": 0.2578, + "nll_loss": 0.06445614248514175, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5874243369325995e-05, + "rewards/margins": 0.18711453676223755, + "rewards/rejected": -0.1871604323387146, + "step": 13657 + }, + { + "epoch": 9.44536652835408, + "grad_norm": 3.8250749111175537, + "learning_rate": 3.0812970646995543e-06, + "log_odds_chosen": 11.257601737976074, + "log_odds_ratio": -4.2512434447417036e-05, + "logits/chosen": -0.41863155364990234, + "logits/rejected": -0.4046270251274109, + "logps/chosen": -0.0001472465810365975, + "logps/rejected": -2.14986515045166, + "loss": 0.3402, + "nll_loss": 0.08504797518253326, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.472465828555869e-05, + "rewards/margins": 0.21497179567813873, + "rewards/rejected": -0.21498653292655945, + "step": 13658 + }, + { + "epoch": 9.446058091286307, + "grad_norm": 3.0374255180358887, + "learning_rate": 3.0774550484094055e-06, + "log_odds_chosen": 11.082832336425781, + "log_odds_ratio": -8.868123404681683e-05, + "logits/chosen": 0.3002154231071472, + "logits/rejected": 0.32750892639160156, + "logps/chosen": -0.000218449771637097, + "logps/rejected": -2.1682121753692627, + "loss": 0.2893, + "nll_loss": 0.07232128083705902, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1844974980922416e-05, + "rewards/margins": 0.21679937839508057, + "rewards/rejected": -0.21682122349739075, + "step": 13659 + }, + { + "epoch": 9.446749654218534, + "grad_norm": 2.465670108795166, + "learning_rate": 3.0736130321192562e-06, + "log_odds_chosen": 11.993000030517578, + "log_odds_ratio": -1.03414704426541e-05, + "logits/chosen": -0.018259674310684204, + "logits/rejected": -0.09901609271764755, + "logps/chosen": -0.00011617916607065126, + "logps/rejected": -2.3807053565979004, + "loss": 0.2366, + "nll_loss": 0.059150226414203644, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1617918062256649e-05, + "rewards/margins": 0.238058939576149, + "rewards/rejected": -0.238070547580719, + "step": 13660 + }, + { + "epoch": 9.447441217150761, + "grad_norm": 3.112118721008301, + "learning_rate": 3.0697710158291074e-06, + "log_odds_chosen": 11.837477684020996, + "log_odds_ratio": -7.899626507423818e-05, + "logits/chosen": -0.566035807132721, + "logits/rejected": -0.6351016759872437, + "logps/chosen": -0.0003235914628021419, + "logps/rejected": -3.252774238586426, + "loss": 0.3784, + "nll_loss": 0.09459532797336578, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2359148463001475e-05, + "rewards/margins": 0.32524505257606506, + "rewards/rejected": -0.3252774178981781, + "step": 13661 + }, + { + "epoch": 9.448132780082988, + "grad_norm": 3.8702402114868164, + "learning_rate": 3.065928999538958e-06, + "log_odds_chosen": 10.895008087158203, + "log_odds_ratio": -6.9529349275399e-05, + "logits/chosen": -0.025634005665779114, + "logits/rejected": -0.1855071783065796, + "logps/chosen": -0.0006080082966946065, + "logps/rejected": -2.0902671813964844, + "loss": 0.4369, + "nll_loss": 0.10922367870807648, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.080083403503522e-05, + "rewards/margins": 0.20896592736244202, + "rewards/rejected": -0.20902672410011292, + "step": 13662 + }, + { + "epoch": 9.448824343015215, + "grad_norm": 2.9250028133392334, + "learning_rate": 3.0620869832488093e-06, + "log_odds_chosen": 12.242441177368164, + "log_odds_ratio": -3.040230330952909e-05, + "logits/chosen": -0.34707507491111755, + "logits/rejected": -0.39765727519989014, + "logps/chosen": -0.00011096797970822081, + "logps/rejected": -2.8269543647766113, + "loss": 0.3002, + "nll_loss": 0.07505042105913162, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1096798516518902e-05, + "rewards/margins": 0.2826843857765198, + "rewards/rejected": -0.282695472240448, + "step": 13663 + }, + { + "epoch": 9.449515905947441, + "grad_norm": 3.9676806926727295, + "learning_rate": 3.05824496695866e-06, + "log_odds_chosen": 10.78483772277832, + "log_odds_ratio": -3.852003283100203e-05, + "logits/chosen": -0.45215755701065063, + "logits/rejected": -0.4137214422225952, + "logps/chosen": -0.00024637990281917155, + "logps/rejected": -2.412273406982422, + "loss": 0.3628, + "nll_loss": 0.0907062217593193, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.46379931923002e-05, + "rewards/margins": 0.241202712059021, + "rewards/rejected": -0.24122732877731323, + "step": 13664 + }, + { + "epoch": 9.450207468879668, + "grad_norm": 3.1666104793548584, + "learning_rate": 3.054402950668511e-06, + "log_odds_chosen": 9.979389190673828, + "log_odds_ratio": -0.00020525579748209566, + "logits/chosen": -0.1879369020462036, + "logits/rejected": -0.12144997715950012, + "logps/chosen": -0.0005892362678423524, + "logps/rejected": -1.3971165418624878, + "loss": 0.407, + "nll_loss": 0.10174022614955902, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8923629694618285e-05, + "rewards/margins": 0.13965272903442383, + "rewards/rejected": -0.1397116482257843, + "step": 13665 + }, + { + "epoch": 9.450899031811895, + "grad_norm": 2.9881982803344727, + "learning_rate": 3.050560934378362e-06, + "log_odds_chosen": 10.837393760681152, + "log_odds_ratio": -8.207259816117585e-05, + "logits/chosen": -0.6228085160255432, + "logits/rejected": -0.6411685347557068, + "logps/chosen": -0.0004132247995585203, + "logps/rejected": -2.03163480758667, + "loss": 0.4245, + "nll_loss": 0.1061079204082489, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.13224843214266e-05, + "rewards/margins": 0.20312216877937317, + "rewards/rejected": -0.20316347479820251, + "step": 13666 + }, + { + "epoch": 9.451590594744122, + "grad_norm": 3.928393602371216, + "learning_rate": 3.0467189180882128e-06, + "log_odds_chosen": 11.026008605957031, + "log_odds_ratio": -5.968601908534765e-05, + "logits/chosen": -0.504901647567749, + "logits/rejected": -0.5036959648132324, + "logps/chosen": -0.00010204267164226621, + "logps/rejected": -1.9367821216583252, + "loss": 0.5695, + "nll_loss": 0.14236928522586823, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.020426680042874e-05, + "rewards/margins": 0.19366800785064697, + "rewards/rejected": -0.19367821514606476, + "step": 13667 + }, + { + "epoch": 9.452282157676349, + "grad_norm": 3.4829812049865723, + "learning_rate": 3.0428769017980635e-06, + "log_odds_chosen": 10.596809387207031, + "log_odds_ratio": -0.0004383395134937018, + "logits/chosen": -0.12057967483997345, + "logits/rejected": -0.09670063853263855, + "logps/chosen": -0.00024910306092351675, + "logps/rejected": -2.325828790664673, + "loss": 0.3445, + "nll_loss": 0.08608973771333694, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4910305000958033e-05, + "rewards/margins": 0.2325579822063446, + "rewards/rejected": -0.23258288204669952, + "step": 13668 + }, + { + "epoch": 9.452973720608576, + "grad_norm": 4.093359470367432, + "learning_rate": 3.0390348855079147e-06, + "log_odds_chosen": 10.298482894897461, + "log_odds_ratio": -0.001447304617613554, + "logits/chosen": -0.021594732999801636, + "logits/rejected": 0.001156628131866455, + "logps/chosen": -0.0005646768258884549, + "logps/rejected": -1.9710403680801392, + "loss": 0.5585, + "nll_loss": 0.1394735723733902, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.646768113365397e-05, + "rewards/margins": 0.19704757630825043, + "rewards/rejected": -0.19710403680801392, + "step": 13669 + }, + { + "epoch": 9.453665283540802, + "grad_norm": 4.57317590713501, + "learning_rate": 3.035192869217766e-06, + "log_odds_chosen": 11.619937896728516, + "log_odds_ratio": -0.00013235447113402188, + "logits/chosen": -0.5300256609916687, + "logits/rejected": -0.45142507553100586, + "logps/chosen": -0.00031192644382826984, + "logps/rejected": -2.820838212966919, + "loss": 0.482, + "nll_loss": 0.12048950046300888, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1192645110422745e-05, + "rewards/margins": 0.2820526361465454, + "rewards/rejected": -0.2820838391780853, + "step": 13670 + }, + { + "epoch": 9.45435684647303, + "grad_norm": 3.9143404960632324, + "learning_rate": 3.0313508529276166e-06, + "log_odds_chosen": 11.59218692779541, + "log_odds_ratio": -7.889211701694876e-05, + "logits/chosen": 0.2372472584247589, + "logits/rejected": 0.1737135797739029, + "logps/chosen": -7.292727968888357e-05, + "logps/rejected": -2.157414436340332, + "loss": 0.3916, + "nll_loss": 0.09789082407951355, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.292728241736768e-06, + "rewards/margins": 0.21573415398597717, + "rewards/rejected": -0.21574144065380096, + "step": 13671 + }, + { + "epoch": 9.455048409405256, + "grad_norm": 3.0498509407043457, + "learning_rate": 3.0275088366374674e-06, + "log_odds_chosen": 11.467384338378906, + "log_odds_ratio": -2.097031392622739e-05, + "logits/chosen": -0.2295285165309906, + "logits/rejected": -0.21985048055648804, + "logps/chosen": -5.9011006669607013e-05, + "logps/rejected": -1.7431241273880005, + "loss": 0.2288, + "nll_loss": 0.05719863995909691, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.901100848859642e-06, + "rewards/margins": 0.17430651187896729, + "rewards/rejected": -0.17431241273880005, + "step": 13672 + }, + { + "epoch": 9.455739972337483, + "grad_norm": 4.1104416847229, + "learning_rate": 3.0236668203473185e-06, + "log_odds_chosen": 11.44217300415039, + "log_odds_ratio": -0.00010912174911936745, + "logits/chosen": -0.2721630334854126, + "logits/rejected": -0.3509449064731598, + "logps/chosen": -0.00014903565170243382, + "logps/rejected": -2.6721222400665283, + "loss": 0.3536, + "nll_loss": 0.08839452266693115, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4903566807333846e-05, + "rewards/margins": 0.2671973705291748, + "rewards/rejected": -0.26721224188804626, + "step": 13673 + }, + { + "epoch": 9.45643153526971, + "grad_norm": 2.9885518550872803, + "learning_rate": 3.0198248040571693e-06, + "log_odds_chosen": 10.360175132751465, + "log_odds_ratio": -0.00012097896251361817, + "logits/chosen": -0.6109101176261902, + "logits/rejected": -0.6060300469398499, + "logps/chosen": -0.000436235626693815, + "logps/rejected": -1.8446308374404907, + "loss": 0.3533, + "nll_loss": 0.08830248564481735, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.362356412457302e-05, + "rewards/margins": 0.1844194382429123, + "rewards/rejected": -0.18446308374404907, + "step": 13674 + }, + { + "epoch": 9.457123098201937, + "grad_norm": 5.488043785095215, + "learning_rate": 3.01598278776702e-06, + "log_odds_chosen": 9.340782165527344, + "log_odds_ratio": -0.00013256016245577484, + "logits/chosen": -0.7102746367454529, + "logits/rejected": -0.5713024139404297, + "logps/chosen": -0.0008446794236078858, + "logps/rejected": -2.0710833072662354, + "loss": 0.6072, + "nll_loss": 0.1517745852470398, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.446794527117163e-05, + "rewards/margins": 0.20702385902404785, + "rewards/rejected": -0.20710833370685577, + "step": 13675 + }, + { + "epoch": 9.457814661134163, + "grad_norm": 3.401562213897705, + "learning_rate": 3.012140771476871e-06, + "log_odds_chosen": 11.066591262817383, + "log_odds_ratio": -3.361454582773149e-05, + "logits/chosen": -0.09300799667835236, + "logits/rejected": -0.09423156082630157, + "logps/chosen": -0.00017423040117137134, + "logps/rejected": -2.2629432678222656, + "loss": 0.3336, + "nll_loss": 0.08340008556842804, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7423040844732895e-05, + "rewards/margins": 0.22627690434455872, + "rewards/rejected": -0.22629432380199432, + "step": 13676 + }, + { + "epoch": 9.45850622406639, + "grad_norm": 3.094069242477417, + "learning_rate": 3.0082987551867224e-06, + "log_odds_chosen": 12.419988632202148, + "log_odds_ratio": -6.88181398800225e-06, + "logits/chosen": -0.588909924030304, + "logits/rejected": -0.5188385248184204, + "logps/chosen": -9.001026046462357e-05, + "logps/rejected": -3.1227521896362305, + "loss": 0.4008, + "nll_loss": 0.1001872792840004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.001025318866596e-06, + "rewards/margins": 0.31226620078086853, + "rewards/rejected": -0.3122752010822296, + "step": 13677 + }, + { + "epoch": 9.459197786998617, + "grad_norm": 2.294238328933716, + "learning_rate": 3.004456738896573e-06, + "log_odds_chosen": 10.08603286743164, + "log_odds_ratio": -0.00013286015018820763, + "logits/chosen": -0.19353480637073517, + "logits/rejected": -0.13654178380966187, + "logps/chosen": -0.0006431926740333438, + "logps/rejected": -1.8902684450149536, + "loss": 0.2685, + "nll_loss": 0.06711249053478241, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.43192688585259e-05, + "rewards/margins": 0.18896251916885376, + "rewards/rejected": -0.1890268474817276, + "step": 13678 + }, + { + "epoch": 9.459889349930844, + "grad_norm": 4.15861701965332, + "learning_rate": 3.000614722606424e-06, + "log_odds_chosen": 11.671398162841797, + "log_odds_ratio": -2.1137140720384195e-05, + "logits/chosen": -0.030270785093307495, + "logits/rejected": -0.15903228521347046, + "logps/chosen": -0.00015941433957777917, + "logps/rejected": -2.824725866317749, + "loss": 0.4703, + "nll_loss": 0.1175684779882431, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5941433957777917e-05, + "rewards/margins": 0.282456636428833, + "rewards/rejected": -0.2824726104736328, + "step": 13679 + }, + { + "epoch": 9.46058091286307, + "grad_norm": 2.5023744106292725, + "learning_rate": 2.996772706316275e-06, + "log_odds_chosen": 11.48115348815918, + "log_odds_ratio": -2.6544312277110294e-05, + "logits/chosen": -0.31745773553848267, + "logits/rejected": -0.4156531095504761, + "logps/chosen": -6.688917346764356e-05, + "logps/rejected": -1.7893203496932983, + "loss": 0.2732, + "nll_loss": 0.06830289214849472, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.688916982966475e-06, + "rewards/margins": 0.17892535030841827, + "rewards/rejected": -0.1789320409297943, + "step": 13680 + }, + { + "epoch": 9.461272475795298, + "grad_norm": 3.793719530105591, + "learning_rate": 2.992930690026126e-06, + "log_odds_chosen": 12.253263473510742, + "log_odds_ratio": -0.0001680817367741838, + "logits/chosen": 0.17600244283676147, + "logits/rejected": 0.19702959060668945, + "logps/chosen": -0.001068698475137353, + "logps/rejected": -3.698801279067993, + "loss": 0.307, + "nll_loss": 0.07672946155071259, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010686985478969291, + "rewards/margins": 0.369773268699646, + "rewards/rejected": -0.36988016963005066, + "step": 13681 + }, + { + "epoch": 9.461964038727524, + "grad_norm": 3.7785212993621826, + "learning_rate": 2.9890886737359765e-06, + "log_odds_chosen": 11.784876823425293, + "log_odds_ratio": -1.636009619687684e-05, + "logits/chosen": 0.1131448894739151, + "logits/rejected": 0.024499140679836273, + "logps/chosen": -0.00020417144696693867, + "logps/rejected": -2.9145703315734863, + "loss": 0.3781, + "nll_loss": 0.0945122167468071, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0417144696693867e-05, + "rewards/margins": 0.29143664240837097, + "rewards/rejected": -0.29145705699920654, + "step": 13682 + }, + { + "epoch": 9.462655601659751, + "grad_norm": 3.6575255393981934, + "learning_rate": 2.9852466574458277e-06, + "log_odds_chosen": 11.061687469482422, + "log_odds_ratio": -9.567139204591513e-05, + "logits/chosen": -0.2089971899986267, + "logits/rejected": -0.25104543566703796, + "logps/chosen": -0.00028398202266544104, + "logps/rejected": -2.3885231018066406, + "loss": 0.3151, + "nll_loss": 0.07875557988882065, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8398204449331388e-05, + "rewards/margins": 0.23882392048835754, + "rewards/rejected": -0.23885229229927063, + "step": 13683 + }, + { + "epoch": 9.463347164591978, + "grad_norm": 2.5947320461273193, + "learning_rate": 2.981404641155679e-06, + "log_odds_chosen": 10.604299545288086, + "log_odds_ratio": -6.143797509139404e-05, + "logits/chosen": -0.367992639541626, + "logits/rejected": -0.2641603946685791, + "logps/chosen": -0.0002503315918147564, + "logps/rejected": -1.903537392616272, + "loss": 0.3265, + "nll_loss": 0.08162114024162292, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.503315954527352e-05, + "rewards/margins": 0.19032873213291168, + "rewards/rejected": -0.19035375118255615, + "step": 13684 + }, + { + "epoch": 9.464038727524205, + "grad_norm": 4.084955215454102, + "learning_rate": 2.9775626248655296e-06, + "log_odds_chosen": 11.059584617614746, + "log_odds_ratio": -7.182247645687312e-05, + "logits/chosen": -0.09201770275831223, + "logits/rejected": -0.1854029893875122, + "logps/chosen": -0.0005451062461361289, + "logps/rejected": -2.800729274749756, + "loss": 0.5028, + "nll_loss": 0.12568828463554382, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.451062315842137e-05, + "rewards/margins": 0.2800183892250061, + "rewards/rejected": -0.2800729274749756, + "step": 13685 + }, + { + "epoch": 9.464730290456432, + "grad_norm": 2.895047426223755, + "learning_rate": 2.9737206085753804e-06, + "log_odds_chosen": 12.01188850402832, + "log_odds_ratio": -8.314920705743134e-06, + "logits/chosen": -0.38543713092803955, + "logits/rejected": -0.5218108296394348, + "logps/chosen": -0.00016034372674766928, + "logps/rejected": -3.107083320617676, + "loss": 0.4189, + "nll_loss": 0.10472860932350159, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.603437340236269e-05, + "rewards/margins": 0.31069228053092957, + "rewards/rejected": -0.31070831418037415, + "step": 13686 + }, + { + "epoch": 9.465421853388658, + "grad_norm": 2.4275357723236084, + "learning_rate": 2.9698785922852316e-06, + "log_odds_chosen": 11.681462287902832, + "log_odds_ratio": -9.818573016673326e-05, + "logits/chosen": -0.10170315206050873, + "logits/rejected": -0.09424428641796112, + "logps/chosen": -0.0002597036655060947, + "logps/rejected": -2.9518909454345703, + "loss": 0.296, + "nll_loss": 0.0739840418100357, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5970368369598873e-05, + "rewards/margins": 0.2951631247997284, + "rewards/rejected": -0.2951890826225281, + "step": 13687 + }, + { + "epoch": 9.466113416320885, + "grad_norm": 3.9739270210266113, + "learning_rate": 2.9660365759950823e-06, + "log_odds_chosen": 11.264009475708008, + "log_odds_ratio": -7.518980419263244e-05, + "logits/chosen": -0.34022510051727295, + "logits/rejected": -0.4449506402015686, + "logps/chosen": -0.0002356268814764917, + "logps/rejected": -2.1614341735839844, + "loss": 0.398, + "nll_loss": 0.0994982123374939, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3562686692457646e-05, + "rewards/margins": 0.21611987054347992, + "rewards/rejected": -0.2161434292793274, + "step": 13688 + }, + { + "epoch": 9.466804979253112, + "grad_norm": 3.0976598262786865, + "learning_rate": 2.962194559704933e-06, + "log_odds_chosen": 10.38652229309082, + "log_odds_ratio": -7.720041321590543e-05, + "logits/chosen": -0.3017534911632538, + "logits/rejected": -0.3283177614212036, + "logps/chosen": -0.0004530068254098296, + "logps/rejected": -1.8843767642974854, + "loss": 0.3019, + "nll_loss": 0.07546820491552353, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.530068326857872e-05, + "rewards/margins": 0.18839238584041595, + "rewards/rejected": -0.18843768537044525, + "step": 13689 + }, + { + "epoch": 9.467496542185339, + "grad_norm": 4.3513031005859375, + "learning_rate": 2.9583525434147842e-06, + "log_odds_chosen": 12.240751266479492, + "log_odds_ratio": -9.313460395787843e-06, + "logits/chosen": -0.1769258677959442, + "logits/rejected": -0.19462567567825317, + "logps/chosen": -0.00016390776727348566, + "logps/rejected": -3.3067760467529297, + "loss": 0.2872, + "nll_loss": 0.07179521769285202, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6390775272157043e-05, + "rewards/margins": 0.33066120743751526, + "rewards/rejected": -0.3306775987148285, + "step": 13690 + }, + { + "epoch": 9.468188105117566, + "grad_norm": 3.3704264163970947, + "learning_rate": 2.954510527124635e-06, + "log_odds_chosen": 11.753616333007812, + "log_odds_ratio": -9.331144246971235e-05, + "logits/chosen": -0.5359658598899841, + "logits/rejected": -0.49007073044776917, + "logps/chosen": -0.0002047703310381621, + "logps/rejected": -2.6890151500701904, + "loss": 0.3271, + "nll_loss": 0.0817599892616272, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0477034922805615e-05, + "rewards/margins": 0.26888102293014526, + "rewards/rejected": -0.2689014971256256, + "step": 13691 + }, + { + "epoch": 9.468879668049793, + "grad_norm": 3.193803548812866, + "learning_rate": 2.950668510834486e-06, + "log_odds_chosen": 11.477971076965332, + "log_odds_ratio": -6.550650869030505e-05, + "logits/chosen": -0.4765413701534271, + "logits/rejected": -0.5337736010551453, + "logps/chosen": -0.00011261247709626332, + "logps/rejected": -2.128232717514038, + "loss": 0.25, + "nll_loss": 0.06248745322227478, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1261247891525272e-05, + "rewards/margins": 0.21281200647354126, + "rewards/rejected": -0.212823286652565, + "step": 13692 + }, + { + "epoch": 9.46957123098202, + "grad_norm": 3.092468500137329, + "learning_rate": 2.9468264945443373e-06, + "log_odds_chosen": 11.156269073486328, + "log_odds_ratio": -0.0001080305955838412, + "logits/chosen": -0.4892697036266327, + "logits/rejected": -0.6489784717559814, + "logps/chosen": -0.00033197173615917563, + "logps/rejected": -2.2593331336975098, + "loss": 0.3406, + "nll_loss": 0.08514631539583206, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3197175071109086e-05, + "rewards/margins": 0.22590012848377228, + "rewards/rejected": -0.22593331336975098, + "step": 13693 + }, + { + "epoch": 9.470262793914246, + "grad_norm": 3.5633842945098877, + "learning_rate": 2.942984478254188e-06, + "log_odds_chosen": 9.765762329101562, + "log_odds_ratio": -0.000332854047883302, + "logits/chosen": -0.47233837842941284, + "logits/rejected": -0.42558562755584717, + "logps/chosen": -0.00043346098391339183, + "logps/rejected": -2.0893874168395996, + "loss": 0.2935, + "nll_loss": 0.07334748655557632, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.334609548095614e-05, + "rewards/margins": 0.20889541506767273, + "rewards/rejected": -0.20893874764442444, + "step": 13694 + }, + { + "epoch": 9.470954356846473, + "grad_norm": 4.938905715942383, + "learning_rate": 2.939142461964039e-06, + "log_odds_chosen": 11.603968620300293, + "log_odds_ratio": -3.054105764022097e-05, + "logits/chosen": -0.26857519149780273, + "logits/rejected": -0.2991647720336914, + "logps/chosen": -0.0002254693245049566, + "logps/rejected": -2.439152479171753, + "loss": 0.5257, + "nll_loss": 0.13143321871757507, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.254693208669778e-05, + "rewards/margins": 0.24389272928237915, + "rewards/rejected": -0.24391527473926544, + "step": 13695 + }, + { + "epoch": 9.4716459197787, + "grad_norm": 3.45434832572937, + "learning_rate": 2.9353004456738896e-06, + "log_odds_chosen": 11.478617668151855, + "log_odds_ratio": -1.8379865650786087e-05, + "logits/chosen": -0.023225925862789154, + "logits/rejected": -0.0734102874994278, + "logps/chosen": -0.00010731960355769843, + "logps/rejected": -2.3385214805603027, + "loss": 0.3996, + "nll_loss": 0.09990224242210388, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0731960173870903e-05, + "rewards/margins": 0.2338414192199707, + "rewards/rejected": -0.23385214805603027, + "step": 13696 + }, + { + "epoch": 9.472337482710927, + "grad_norm": 2.682950496673584, + "learning_rate": 2.9314584293837407e-06, + "log_odds_chosen": 10.489137649536133, + "log_odds_ratio": -6.562238559126854e-05, + "logits/chosen": -0.08150936663150787, + "logits/rejected": -0.13377483189105988, + "logps/chosen": -0.00015974488633219153, + "logps/rejected": -1.6511993408203125, + "loss": 0.336, + "nll_loss": 0.08398184180259705, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5974488633219153e-05, + "rewards/margins": 0.1651039570569992, + "rewards/rejected": -0.16511991620063782, + "step": 13697 + }, + { + "epoch": 9.473029045643154, + "grad_norm": 2.681863307952881, + "learning_rate": 2.9276164130935915e-06, + "log_odds_chosen": 10.831472396850586, + "log_odds_ratio": -0.00016914252773858607, + "logits/chosen": -0.34048521518707275, + "logits/rejected": -0.32761192321777344, + "logps/chosen": -0.00020769353432115167, + "logps/rejected": -1.6908882856369019, + "loss": 0.2172, + "nll_loss": 0.054294973611831665, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0769353795913048e-05, + "rewards/margins": 0.16906805336475372, + "rewards/rejected": -0.16908882558345795, + "step": 13698 + }, + { + "epoch": 9.47372060857538, + "grad_norm": 3.5782458782196045, + "learning_rate": 2.9237743968034427e-06, + "log_odds_chosen": 10.741741180419922, + "log_odds_ratio": -0.00011454321793280542, + "logits/chosen": -0.36604902148246765, + "logits/rejected": -0.3194882571697235, + "logps/chosen": -0.00023873275495134294, + "logps/rejected": -2.1600375175476074, + "loss": 0.2892, + "nll_loss": 0.07228127121925354, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3873277314123698e-05, + "rewards/margins": 0.2159799039363861, + "rewards/rejected": -0.21600376069545746, + "step": 13699 + }, + { + "epoch": 9.474412171507607, + "grad_norm": 3.9181885719299316, + "learning_rate": 2.919932380513294e-06, + "log_odds_chosen": 10.678892135620117, + "log_odds_ratio": -0.0003496372955851257, + "logits/chosen": -0.46870332956314087, + "logits/rejected": -0.5139002799987793, + "logps/chosen": -0.0002608651702757925, + "logps/rejected": -1.6953610181808472, + "loss": 0.3021, + "nll_loss": 0.07547971606254578, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6086519937962294e-05, + "rewards/margins": 0.16951002180576324, + "rewards/rejected": -0.16953611373901367, + "step": 13700 + }, + { + "epoch": 9.475103734439834, + "grad_norm": 3.2762069702148438, + "learning_rate": 2.9160903642231446e-06, + "log_odds_chosen": 10.687110900878906, + "log_odds_ratio": -0.0008537794346921146, + "logits/chosen": -0.38689276576042175, + "logits/rejected": -0.4276360273361206, + "logps/chosen": -0.0003648015554063022, + "logps/rejected": -2.2275314331054688, + "loss": 0.3875, + "nll_loss": 0.09679935872554779, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.648015626822598e-05, + "rewards/margins": 0.22271665930747986, + "rewards/rejected": -0.22275316715240479, + "step": 13701 + }, + { + "epoch": 9.475795297372061, + "grad_norm": 3.8926045894622803, + "learning_rate": 2.9122483479329953e-06, + "log_odds_chosen": 11.083564758300781, + "log_odds_ratio": -5.3327985369833186e-05, + "logits/chosen": -0.26325908303260803, + "logits/rejected": -0.2744675576686859, + "logps/chosen": -0.00030690288986079395, + "logps/rejected": -2.491783857345581, + "loss": 0.4966, + "nll_loss": 0.12413786351680756, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.069029116886668e-05, + "rewards/margins": 0.2491477131843567, + "rewards/rejected": -0.24917840957641602, + "step": 13702 + }, + { + "epoch": 9.476486860304288, + "grad_norm": 4.4134440422058105, + "learning_rate": 2.908406331642846e-06, + "log_odds_chosen": 11.066388130187988, + "log_odds_ratio": -3.332171399961226e-05, + "logits/chosen": -0.23798823356628418, + "logits/rejected": -0.18991810083389282, + "logps/chosen": -0.00045601866440847516, + "logps/rejected": -2.242123603820801, + "loss": 0.6391, + "nll_loss": 0.15978127717971802, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.560186789603904e-05, + "rewards/margins": 0.22416675090789795, + "rewards/rejected": -0.22421236336231232, + "step": 13703 + }, + { + "epoch": 9.477178423236515, + "grad_norm": 3.9003124237060547, + "learning_rate": 2.9045643153526973e-06, + "log_odds_chosen": 11.239566802978516, + "log_odds_ratio": -0.00013829019735567272, + "logits/chosen": -0.3914109170436859, + "logits/rejected": -0.41909319162368774, + "logps/chosen": -0.0008354556048288941, + "logps/rejected": -2.3318867683410645, + "loss": 0.3443, + "nll_loss": 0.0860588327050209, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.354555757250637e-05, + "rewards/margins": 0.2331051081418991, + "rewards/rejected": -0.2331886738538742, + "step": 13704 + }, + { + "epoch": 9.477869986168741, + "grad_norm": 3.461466073989868, + "learning_rate": 2.900722299062548e-06, + "log_odds_chosen": 11.788957595825195, + "log_odds_ratio": -2.456989932397846e-05, + "logits/chosen": -0.1686708927154541, + "logits/rejected": -0.2556487023830414, + "logps/chosen": -0.00015625954256393015, + "logps/rejected": -2.564842700958252, + "loss": 0.4262, + "nll_loss": 0.10655610263347626, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5625955711584538e-05, + "rewards/margins": 0.25646865367889404, + "rewards/rejected": -0.2564842998981476, + "step": 13705 + }, + { + "epoch": 9.478561549100968, + "grad_norm": 3.256856918334961, + "learning_rate": 2.896880282772399e-06, + "log_odds_chosen": 11.125326156616211, + "log_odds_ratio": -7.740782893961295e-05, + "logits/chosen": -0.21685834228992462, + "logits/rejected": -0.22548425197601318, + "logps/chosen": -0.0001516193151473999, + "logps/rejected": -2.313654899597168, + "loss": 0.2757, + "nll_loss": 0.0689086988568306, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5161932424234692e-05, + "rewards/margins": 0.23135033249855042, + "rewards/rejected": -0.23136550188064575, + "step": 13706 + }, + { + "epoch": 9.479253112033195, + "grad_norm": 3.6939895153045654, + "learning_rate": 2.8930382664822504e-06, + "log_odds_chosen": 12.723363876342773, + "log_odds_ratio": -5.737080755352508e-06, + "logits/chosen": -0.17502564191818237, + "logits/rejected": -0.16039277613162994, + "logps/chosen": -6.294964259723201e-05, + "logps/rejected": -2.9757256507873535, + "loss": 0.4568, + "nll_loss": 0.11420326679944992, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2949643506726716e-06, + "rewards/margins": 0.297566294670105, + "rewards/rejected": -0.2975725829601288, + "step": 13707 + }, + { + "epoch": 9.479944674965422, + "grad_norm": 3.37601637840271, + "learning_rate": 2.889196250192101e-06, + "log_odds_chosen": 12.085586547851562, + "log_odds_ratio": -1.5522884496022016e-05, + "logits/chosen": -0.3996522128582001, + "logits/rejected": -0.4061252176761627, + "logps/chosen": -9.973048872780055e-05, + "logps/rejected": -2.785703182220459, + "loss": 0.4483, + "nll_loss": 0.11207623779773712, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.973049600375816e-06, + "rewards/margins": 0.2785603702068329, + "rewards/rejected": -0.27857035398483276, + "step": 13708 + }, + { + "epoch": 9.480636237897649, + "grad_norm": 3.2007503509521484, + "learning_rate": 2.885354233901952e-06, + "log_odds_chosen": 12.089580535888672, + "log_odds_ratio": -1.4233486581360921e-05, + "logits/chosen": -0.06223127245903015, + "logits/rejected": -0.054247599095106125, + "logps/chosen": -0.00021037404076196253, + "logps/rejected": -3.1310501098632812, + "loss": 0.3306, + "nll_loss": 0.08265635371208191, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1037403712398373e-05, + "rewards/margins": 0.3130840063095093, + "rewards/rejected": -0.313105046749115, + "step": 13709 + }, + { + "epoch": 9.481327800829876, + "grad_norm": 3.856358051300049, + "learning_rate": 2.881512217611803e-06, + "log_odds_chosen": 9.892733573913574, + "log_odds_ratio": -0.00014180310245137662, + "logits/chosen": -0.11307594925165176, + "logits/rejected": -0.1626780778169632, + "logps/chosen": -0.000550577009562403, + "logps/rejected": -2.0717012882232666, + "loss": 0.3495, + "nll_loss": 0.08736885339021683, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.505770241143182e-05, + "rewards/margins": 0.2071150839328766, + "rewards/rejected": -0.20717012882232666, + "step": 13710 + }, + { + "epoch": 9.482019363762102, + "grad_norm": 3.731860637664795, + "learning_rate": 2.8776702013216538e-06, + "log_odds_chosen": 11.344371795654297, + "log_odds_ratio": -0.00012071852688677609, + "logits/chosen": -0.43114885687828064, + "logits/rejected": -0.5573596358299255, + "logps/chosen": -0.00047632993664592505, + "logps/rejected": -3.5855040550231934, + "loss": 0.3692, + "nll_loss": 0.09229099750518799, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.763299511978403e-05, + "rewards/margins": 0.35850274562835693, + "rewards/rejected": -0.35855039954185486, + "step": 13711 + }, + { + "epoch": 9.48271092669433, + "grad_norm": 3.2189559936523438, + "learning_rate": 2.8738281850315045e-06, + "log_odds_chosen": 9.976612091064453, + "log_odds_ratio": -0.00010828935046447441, + "logits/chosen": -0.520516037940979, + "logits/rejected": -0.5231585502624512, + "logps/chosen": -0.001182026695460081, + "logps/rejected": -1.9440940618515015, + "loss": 0.3307, + "nll_loss": 0.08265582472085953, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011820268264273182, + "rewards/margins": 0.19429121911525726, + "rewards/rejected": -0.19440940022468567, + "step": 13712 + }, + { + "epoch": 9.483402489626556, + "grad_norm": 3.685671091079712, + "learning_rate": 2.8699861687413553e-06, + "log_odds_chosen": 11.894643783569336, + "log_odds_ratio": -1.2605565643752925e-05, + "logits/chosen": -0.04930278658866882, + "logits/rejected": -0.13070863485336304, + "logps/chosen": -0.00010384486813563854, + "logps/rejected": -2.5991384983062744, + "loss": 0.3739, + "nll_loss": 0.09347648173570633, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0384486813563854e-05, + "rewards/margins": 0.2599034905433655, + "rewards/rejected": -0.2599138915538788, + "step": 13713 + }, + { + "epoch": 9.484094052558783, + "grad_norm": 3.278616189956665, + "learning_rate": 2.8661441524512064e-06, + "log_odds_chosen": 11.319167137145996, + "log_odds_ratio": -0.00026694638654589653, + "logits/chosen": -0.306878924369812, + "logits/rejected": -0.38049980998039246, + "logps/chosen": -0.001009898609481752, + "logps/rejected": -2.2372772693634033, + "loss": 0.3337, + "nll_loss": 0.08339577913284302, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010098986967932433, + "rewards/margins": 0.2236267328262329, + "rewards/rejected": -0.2237277328968048, + "step": 13714 + }, + { + "epoch": 9.48478561549101, + "grad_norm": 3.5239064693450928, + "learning_rate": 2.8623021361610576e-06, + "log_odds_chosen": 11.55378532409668, + "log_odds_ratio": -4.032679134979844e-05, + "logits/chosen": -0.12040985375642776, + "logits/rejected": -0.15733376145362854, + "logps/chosen": -0.00027053779922425747, + "logps/rejected": -2.9848201274871826, + "loss": 0.3486, + "nll_loss": 0.08713582158088684, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.705378210521303e-05, + "rewards/margins": 0.29845497012138367, + "rewards/rejected": -0.2984820008277893, + "step": 13715 + }, + { + "epoch": 9.485477178423237, + "grad_norm": 4.460223197937012, + "learning_rate": 2.8584601198709084e-06, + "log_odds_chosen": 12.06035327911377, + "log_odds_ratio": -1.8679675122257322e-05, + "logits/chosen": -0.47401994466781616, + "logits/rejected": -0.42134177684783936, + "logps/chosen": -5.195748235564679e-05, + "logps/rejected": -2.2302098274230957, + "loss": 0.3507, + "nll_loss": 0.08767677843570709, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.19574859936256e-06, + "rewards/margins": 0.22301580011844635, + "rewards/rejected": -0.2230209857225418, + "step": 13716 + }, + { + "epoch": 9.486168741355463, + "grad_norm": 2.486361026763916, + "learning_rate": 2.8546181035807595e-06, + "log_odds_chosen": 11.190108299255371, + "log_odds_ratio": -6.145198130980134e-05, + "logits/chosen": -0.695064902305603, + "logits/rejected": -0.7149726152420044, + "logps/chosen": -0.0001509027642896399, + "logps/rejected": -2.284609794616699, + "loss": 0.236, + "nll_loss": 0.05898681655526161, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.509027697466081e-05, + "rewards/margins": 0.22844591736793518, + "rewards/rejected": -0.22846101224422455, + "step": 13717 + }, + { + "epoch": 9.48686030428769, + "grad_norm": 4.049819469451904, + "learning_rate": 2.8507760872906103e-06, + "log_odds_chosen": 12.161651611328125, + "log_odds_ratio": -8.164747669070493e-06, + "logits/chosen": -0.5261566638946533, + "logits/rejected": -0.5017327070236206, + "logps/chosen": -0.00010774727707030252, + "logps/rejected": -2.7548489570617676, + "loss": 0.4114, + "nll_loss": 0.10285009443759918, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0774727343232371e-05, + "rewards/margins": 0.27547410130500793, + "rewards/rejected": -0.2754848599433899, + "step": 13718 + }, + { + "epoch": 9.487551867219917, + "grad_norm": 4.140195846557617, + "learning_rate": 2.846934071000461e-06, + "log_odds_chosen": 8.858113288879395, + "log_odds_ratio": -0.0964498221874237, + "logits/chosen": -0.5958190560340881, + "logits/rejected": -0.6393790245056152, + "logps/chosen": -0.021119430661201477, + "logps/rejected": -1.9574973583221436, + "loss": 0.3022, + "nll_loss": 0.06590069830417633, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.002111943205818534, + "rewards/margins": 0.19363778829574585, + "rewards/rejected": -0.19574972987174988, + "step": 13719 + }, + { + "epoch": 9.488243430152144, + "grad_norm": 3.137244701385498, + "learning_rate": 2.8430920547103118e-06, + "log_odds_chosen": 11.436861038208008, + "log_odds_ratio": -1.8819559045368806e-05, + "logits/chosen": -0.23692496120929718, + "logits/rejected": -0.4107188582420349, + "logps/chosen": -0.0001498030760558322, + "logps/rejected": -2.4895315170288086, + "loss": 0.4337, + "nll_loss": 0.1084178239107132, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4980309060774744e-05, + "rewards/margins": 0.2489381730556488, + "rewards/rejected": -0.24895316362380981, + "step": 13720 + }, + { + "epoch": 9.48893499308437, + "grad_norm": 3.764981746673584, + "learning_rate": 2.839250038420163e-06, + "log_odds_chosen": 11.15414810180664, + "log_odds_ratio": -2.7917967599933036e-05, + "logits/chosen": -0.05560293793678284, + "logits/rejected": -0.19690510630607605, + "logps/chosen": -0.00021685943647753447, + "logps/rejected": -2.401160955429077, + "loss": 0.3767, + "nll_loss": 0.09417416900396347, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.168594437534921e-05, + "rewards/margins": 0.24009442329406738, + "rewards/rejected": -0.24011610448360443, + "step": 13721 + }, + { + "epoch": 9.489626556016598, + "grad_norm": 3.811582326889038, + "learning_rate": 2.835408022130014e-06, + "log_odds_chosen": 11.078816413879395, + "log_odds_ratio": -0.00021127502259332687, + "logits/chosen": -0.5352858304977417, + "logits/rejected": -0.5483341217041016, + "logps/chosen": -0.0009039980941452086, + "logps/rejected": -2.568899154663086, + "loss": 0.28, + "nll_loss": 0.06998679786920547, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.039981523528695e-05, + "rewards/margins": 0.25679951906204224, + "rewards/rejected": -0.2568899095058441, + "step": 13722 + }, + { + "epoch": 9.490318118948824, + "grad_norm": 4.578982353210449, + "learning_rate": 2.831566005839865e-06, + "log_odds_chosen": 10.710317611694336, + "log_odds_ratio": -0.00025594281032681465, + "logits/chosen": -0.7265045642852783, + "logits/rejected": -0.7505810260772705, + "logps/chosen": -0.00025931381969712675, + "logps/rejected": -1.9837772846221924, + "loss": 0.4493, + "nll_loss": 0.11230659484863281, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5931381969712675e-05, + "rewards/margins": 0.19835181534290314, + "rewards/rejected": -0.19837772846221924, + "step": 13723 + }, + { + "epoch": 9.491009681881051, + "grad_norm": 3.612924575805664, + "learning_rate": 2.827723989549716e-06, + "log_odds_chosen": 12.415328025817871, + "log_odds_ratio": -2.9161874408600852e-05, + "logits/chosen": -0.05908776819705963, + "logits/rejected": -0.1314144879579544, + "logps/chosen": -0.00014797823678236455, + "logps/rejected": -3.427035331726074, + "loss": 0.5031, + "nll_loss": 0.12576347589492798, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4797822586842813e-05, + "rewards/margins": 0.3426887094974518, + "rewards/rejected": -0.34270352125167847, + "step": 13724 + }, + { + "epoch": 9.491701244813278, + "grad_norm": 4.0577778816223145, + "learning_rate": 2.823881973259567e-06, + "log_odds_chosen": 10.035642623901367, + "log_odds_ratio": -0.00023546107695437968, + "logits/chosen": -0.19884175062179565, + "logits/rejected": -0.31768226623535156, + "logps/chosen": -0.0004935381002724171, + "logps/rejected": -1.8631964921951294, + "loss": 0.3125, + "nll_loss": 0.07809849083423615, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.935381002724171e-05, + "rewards/margins": 0.18627029657363892, + "rewards/rejected": -0.18631964921951294, + "step": 13725 + }, + { + "epoch": 9.492392807745505, + "grad_norm": 3.204608201980591, + "learning_rate": 2.8200399569694176e-06, + "log_odds_chosen": 11.073084831237793, + "log_odds_ratio": -0.0004762558382935822, + "logits/chosen": -0.4759052097797394, + "logits/rejected": -0.5320380926132202, + "logps/chosen": -0.000281669752439484, + "logps/rejected": -2.417051315307617, + "loss": 0.33, + "nll_loss": 0.08244785666465759, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8166976335342042e-05, + "rewards/margins": 0.2416769564151764, + "rewards/rejected": -0.24170511960983276, + "step": 13726 + }, + { + "epoch": 9.493084370677732, + "grad_norm": 4.095341682434082, + "learning_rate": 2.8161979406792687e-06, + "log_odds_chosen": 11.520950317382812, + "log_odds_ratio": -2.6311652618460357e-05, + "logits/chosen": -0.07997718453407288, + "logits/rejected": -0.11879251897335052, + "logps/chosen": -0.00034241483081132174, + "logps/rejected": -2.570692539215088, + "loss": 0.4557, + "nll_loss": 0.11391951143741608, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.424148599151522e-05, + "rewards/margins": 0.2570350170135498, + "rewards/rejected": -0.2570692300796509, + "step": 13727 + }, + { + "epoch": 9.493775933609959, + "grad_norm": 3.238722801208496, + "learning_rate": 2.8123559243891195e-06, + "log_odds_chosen": 11.319939613342285, + "log_odds_ratio": -3.77092364942655e-05, + "logits/chosen": -0.34053710103034973, + "logits/rejected": -0.3158467710018158, + "logps/chosen": -0.00011048711894545704, + "logps/rejected": -1.9384033679962158, + "loss": 0.3732, + "nll_loss": 0.09329046308994293, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1048712622141466e-05, + "rewards/margins": 0.19382929801940918, + "rewards/rejected": -0.19384033977985382, + "step": 13728 + }, + { + "epoch": 9.494467496542185, + "grad_norm": 2.9551427364349365, + "learning_rate": 2.8085139080989706e-06, + "log_odds_chosen": 11.750532150268555, + "log_odds_ratio": -2.050580042123329e-05, + "logits/chosen": -0.5918290615081787, + "logits/rejected": -0.6298559904098511, + "logps/chosen": -9.227452392224222e-05, + "logps/rejected": -2.4829556941986084, + "loss": 0.2679, + "nll_loss": 0.06697001308202744, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.227452210325282e-06, + "rewards/margins": 0.24828633666038513, + "rewards/rejected": -0.24829556047916412, + "step": 13729 + }, + { + "epoch": 9.495159059474412, + "grad_norm": 4.240682601928711, + "learning_rate": 2.8046718918088214e-06, + "log_odds_chosen": 11.138177871704102, + "log_odds_ratio": -9.627740655560046e-05, + "logits/chosen": -0.40648186206817627, + "logits/rejected": -0.4481659531593323, + "logps/chosen": -0.0002279129985254258, + "logps/rejected": -2.282442092895508, + "loss": 0.4667, + "nll_loss": 0.11667248606681824, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.279130058013834e-05, + "rewards/margins": 0.22822141647338867, + "rewards/rejected": -0.22824421525001526, + "step": 13730 + }, + { + "epoch": 9.495850622406639, + "grad_norm": 3.6743686199188232, + "learning_rate": 2.8008298755186726e-06, + "log_odds_chosen": 11.219482421875, + "log_odds_ratio": -3.4768607292789966e-05, + "logits/chosen": -0.47874248027801514, + "logits/rejected": -0.5910977125167847, + "logps/chosen": -0.00014219920558389276, + "logps/rejected": -2.0327465534210205, + "loss": 0.3731, + "nll_loss": 0.09327490627765656, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4219920558389276e-05, + "rewards/margins": 0.2032604068517685, + "rewards/rejected": -0.2032746523618698, + "step": 13731 + }, + { + "epoch": 9.496542185338866, + "grad_norm": 2.6880970001220703, + "learning_rate": 2.7969878592285233e-06, + "log_odds_chosen": 10.376435279846191, + "log_odds_ratio": -0.00012236303882673383, + "logits/chosen": -0.5876057147979736, + "logits/rejected": -0.6403185725212097, + "logps/chosen": -0.00017380820645485073, + "logps/rejected": -1.4591680765151978, + "loss": 0.3207, + "nll_loss": 0.08016160130500793, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7380822100676596e-05, + "rewards/margins": 0.1458994299173355, + "rewards/rejected": -0.14591681957244873, + "step": 13732 + }, + { + "epoch": 9.497233748271093, + "grad_norm": 6.102115631103516, + "learning_rate": 2.793145842938374e-06, + "log_odds_chosen": 10.565109252929688, + "log_odds_ratio": -0.00015378330135717988, + "logits/chosen": -0.5142971873283386, + "logits/rejected": -0.605043888092041, + "logps/chosen": -0.0008334450540132821, + "logps/rejected": -1.8944003582000732, + "loss": 0.3937, + "nll_loss": 0.0984080508351326, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.334450831171125e-05, + "rewards/margins": 0.1893567144870758, + "rewards/rejected": -0.1894400417804718, + "step": 13733 + }, + { + "epoch": 9.49792531120332, + "grad_norm": 4.143298149108887, + "learning_rate": 2.7893038266482252e-06, + "log_odds_chosen": 12.307402610778809, + "log_odds_ratio": -9.185643648379482e-06, + "logits/chosen": -0.2419433444738388, + "logits/rejected": -0.35360732674598694, + "logps/chosen": -0.000124764846987091, + "logps/rejected": -3.047055721282959, + "loss": 0.4336, + "nll_loss": 0.10840722173452377, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2476483789214399e-05, + "rewards/margins": 0.3046931028366089, + "rewards/rejected": -0.30470559000968933, + "step": 13734 + }, + { + "epoch": 9.498616874135546, + "grad_norm": 3.105912685394287, + "learning_rate": 2.785461810358076e-06, + "log_odds_chosen": 11.952037811279297, + "log_odds_ratio": -3.523328632581979e-05, + "logits/chosen": -0.3825800120830536, + "logits/rejected": -0.4227985143661499, + "logps/chosen": -0.0001465064415242523, + "logps/rejected": -2.528470754623413, + "loss": 0.4094, + "nll_loss": 0.10234944522380829, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4650645425717812e-05, + "rewards/margins": 0.25283244252204895, + "rewards/rejected": -0.2528470754623413, + "step": 13735 + }, + { + "epoch": 9.499308437067773, + "grad_norm": 3.376985549926758, + "learning_rate": 2.7816197940679267e-06, + "log_odds_chosen": 11.052566528320312, + "log_odds_ratio": -7.253669900819659e-05, + "logits/chosen": -0.17732563614845276, + "logits/rejected": -0.38987892866134644, + "logps/chosen": -0.00033917598193511367, + "logps/rejected": -2.2939813137054443, + "loss": 0.2221, + "nll_loss": 0.055507708340883255, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3917596738319844e-05, + "rewards/margins": 0.22936421632766724, + "rewards/rejected": -0.22939813137054443, + "step": 13736 + }, + { + "epoch": 9.5, + "grad_norm": 4.159225940704346, + "learning_rate": 2.777777777777778e-06, + "log_odds_chosen": 11.19643783569336, + "log_odds_ratio": -5.351271101972088e-05, + "logits/chosen": -0.29468539357185364, + "logits/rejected": -0.3837074041366577, + "logps/chosen": -0.00014817621558904648, + "logps/rejected": -2.212210178375244, + "loss": 0.3631, + "nll_loss": 0.09078159928321838, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.481762228650041e-05, + "rewards/margins": 0.2212061733007431, + "rewards/rejected": -0.22122101485729218, + "step": 13737 + }, + { + "epoch": 9.500691562932227, + "grad_norm": 4.34866189956665, + "learning_rate": 2.773935761487629e-06, + "log_odds_chosen": 12.640228271484375, + "log_odds_ratio": -1.353957486571744e-05, + "logits/chosen": -0.09187261760234833, + "logits/rejected": -0.09283635020256042, + "logps/chosen": -0.00016267431783489883, + "logps/rejected": -3.7860047817230225, + "loss": 0.3538, + "nll_loss": 0.08845299482345581, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6267431419692002e-05, + "rewards/margins": 0.37858423590660095, + "rewards/rejected": -0.37860050797462463, + "step": 13738 + }, + { + "epoch": 9.501383125864454, + "grad_norm": 4.006698131561279, + "learning_rate": 2.77009374519748e-06, + "log_odds_chosen": 11.847143173217773, + "log_odds_ratio": -4.2443374695722014e-05, + "logits/chosen": -0.14370986819267273, + "logits/rejected": -0.0862513855099678, + "logps/chosen": -0.0001187943562399596, + "logps/rejected": -2.390091896057129, + "loss": 0.3347, + "nll_loss": 0.08366773277521133, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1879436897288542e-05, + "rewards/margins": 0.23899732530117035, + "rewards/rejected": -0.23900920152664185, + "step": 13739 + }, + { + "epoch": 9.50207468879668, + "grad_norm": 3.6946158409118652, + "learning_rate": 2.7662517289073306e-06, + "log_odds_chosen": 10.731229782104492, + "log_odds_ratio": -6.909217336215079e-05, + "logits/chosen": -0.6922059059143066, + "logits/rejected": -0.6929417848587036, + "logps/chosen": -0.0004905189271084964, + "logps/rejected": -1.811964511871338, + "loss": 0.3459, + "nll_loss": 0.08647441118955612, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9051894166041166e-05, + "rewards/margins": 0.18114739656448364, + "rewards/rejected": -0.1811964511871338, + "step": 13740 + }, + { + "epoch": 9.502766251728907, + "grad_norm": 3.3442227840423584, + "learning_rate": 2.7624097126171818e-06, + "log_odds_chosen": 9.403704643249512, + "log_odds_ratio": -0.00022261112462729216, + "logits/chosen": -0.5050197839736938, + "logits/rejected": -0.6509999632835388, + "logps/chosen": -0.00037455116398632526, + "logps/rejected": -1.067748785018921, + "loss": 0.3388, + "nll_loss": 0.08468709886074066, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7455116398632526e-05, + "rewards/margins": 0.106737419962883, + "rewards/rejected": -0.10677488148212433, + "step": 13741 + }, + { + "epoch": 9.503457814661134, + "grad_norm": 3.956580638885498, + "learning_rate": 2.7585676963270325e-06, + "log_odds_chosen": 11.188089370727539, + "log_odds_ratio": -3.0993913242127746e-05, + "logits/chosen": -0.5652532577514648, + "logits/rejected": -0.6602020263671875, + "logps/chosen": -0.00010985941480612382, + "logps/rejected": -2.1618151664733887, + "loss": 0.4445, + "nll_loss": 0.11113232374191284, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0985942026309203e-05, + "rewards/margins": 0.216170534491539, + "rewards/rejected": -0.21618153154850006, + "step": 13742 + }, + { + "epoch": 9.504149377593361, + "grad_norm": 2.5939555168151855, + "learning_rate": 2.7547256800368833e-06, + "log_odds_chosen": 11.381129264831543, + "log_odds_ratio": -3.200870196451433e-05, + "logits/chosen": -0.01486485730856657, + "logits/rejected": -0.04487267881631851, + "logps/chosen": -0.00017249659867957234, + "logps/rejected": -2.412909746170044, + "loss": 0.2677, + "nll_loss": 0.06691596657037735, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7249660231755115e-05, + "rewards/margins": 0.24127374589443207, + "rewards/rejected": -0.24129100143909454, + "step": 13743 + }, + { + "epoch": 9.504840940525588, + "grad_norm": 4.674348831176758, + "learning_rate": 2.7508836637467344e-06, + "log_odds_chosen": 10.176416397094727, + "log_odds_ratio": -9.241919906344265e-05, + "logits/chosen": 0.023514077067375183, + "logits/rejected": -0.02323298156261444, + "logps/chosen": -0.0006039447034709156, + "logps/rejected": -1.9654035568237305, + "loss": 0.3913, + "nll_loss": 0.0978233814239502, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.039447180228308e-05, + "rewards/margins": 0.1964799463748932, + "rewards/rejected": -0.19654035568237305, + "step": 13744 + }, + { + "epoch": 9.505532503457815, + "grad_norm": 2.398162364959717, + "learning_rate": 2.7470416474565856e-06, + "log_odds_chosen": 10.539690971374512, + "log_odds_ratio": -0.00018237254698760808, + "logits/chosen": 0.11595743149518967, + "logits/rejected": -0.02506435476243496, + "logps/chosen": -0.00022768854978494346, + "logps/rejected": -1.79641592502594, + "loss": 0.325, + "nll_loss": 0.08123064786195755, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2768854250898585e-05, + "rewards/margins": 0.17961883544921875, + "rewards/rejected": -0.17964158952236176, + "step": 13745 + }, + { + "epoch": 9.506224066390041, + "grad_norm": 2.861630916595459, + "learning_rate": 2.7431996311664363e-06, + "log_odds_chosen": 10.886007308959961, + "log_odds_ratio": -0.00011090746556874365, + "logits/chosen": -0.06716457009315491, + "logits/rejected": -0.08194438368082047, + "logps/chosen": -0.00025152985472232103, + "logps/rejected": -2.21199369430542, + "loss": 0.249, + "nll_loss": 0.06222885847091675, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5152985472232103e-05, + "rewards/margins": 0.2211742103099823, + "rewards/rejected": -0.22119936347007751, + "step": 13746 + }, + { + "epoch": 9.506915629322268, + "grad_norm": 2.145341396331787, + "learning_rate": 2.739357614876287e-06, + "log_odds_chosen": 12.142789840698242, + "log_odds_ratio": -1.9502431314322166e-05, + "logits/chosen": -0.4173874855041504, + "logits/rejected": -0.544904887676239, + "logps/chosen": -0.00026509028975851834, + "logps/rejected": -3.7034568786621094, + "loss": 0.2385, + "nll_loss": 0.059627942740917206, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6509029339649715e-05, + "rewards/margins": 0.3703191578388214, + "rewards/rejected": -0.37034571170806885, + "step": 13747 + }, + { + "epoch": 9.507607192254495, + "grad_norm": 3.220600128173828, + "learning_rate": 2.7355155985861383e-06, + "log_odds_chosen": 11.07638931274414, + "log_odds_ratio": -3.4151064028264955e-05, + "logits/chosen": -0.5607274174690247, + "logits/rejected": -0.6396695375442505, + "logps/chosen": -4.824280767934397e-05, + "logps/rejected": -1.467947006225586, + "loss": 0.3997, + "nll_loss": 0.09991302341222763, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.824280495085986e-06, + "rewards/margins": 0.14678986370563507, + "rewards/rejected": -0.14679470658302307, + "step": 13748 + }, + { + "epoch": 9.508298755186722, + "grad_norm": 2.3197197914123535, + "learning_rate": 2.731673582295989e-06, + "log_odds_chosen": 11.177478790283203, + "log_odds_ratio": -4.273248487152159e-05, + "logits/chosen": -0.18425364792346954, + "logits/rejected": -0.3179050087928772, + "logps/chosen": -0.00013618064986076206, + "logps/rejected": -2.3722050189971924, + "loss": 0.3034, + "nll_loss": 0.07583820074796677, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3618064258480445e-05, + "rewards/margins": 0.23720687627792358, + "rewards/rejected": -0.23722049593925476, + "step": 13749 + }, + { + "epoch": 9.508990318118949, + "grad_norm": 2.5504002571105957, + "learning_rate": 2.7278315660058398e-06, + "log_odds_chosen": 11.458822250366211, + "log_odds_ratio": -3.5846809623762965e-05, + "logits/chosen": -0.4976326823234558, + "logits/rejected": -0.6373475790023804, + "logps/chosen": -0.00011164910392835736, + "logps/rejected": -1.9494824409484863, + "loss": 0.302, + "nll_loss": 0.07548534870147705, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1164910574734677e-05, + "rewards/margins": 0.1949370801448822, + "rewards/rejected": -0.1949482411146164, + "step": 13750 + }, + { + "epoch": 9.509681881051176, + "grad_norm": 4.700429916381836, + "learning_rate": 2.723989549715691e-06, + "log_odds_chosen": 11.000699043273926, + "log_odds_ratio": -0.0001611942716408521, + "logits/chosen": -0.09237821400165558, + "logits/rejected": -0.04532511904835701, + "logps/chosen": -0.0004388167290017009, + "logps/rejected": -2.4760220050811768, + "loss": 0.6004, + "nll_loss": 0.15007996559143066, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3881671444978565e-05, + "rewards/margins": 0.24755831062793732, + "rewards/rejected": -0.2476022094488144, + "step": 13751 + }, + { + "epoch": 9.510373443983402, + "grad_norm": 3.0092380046844482, + "learning_rate": 2.720147533425542e-06, + "log_odds_chosen": 12.049249649047852, + "log_odds_ratio": -3.0345461709657684e-05, + "logits/chosen": -0.13598808646202087, + "logits/rejected": -0.16782134771347046, + "logps/chosen": -0.0001091573212761432, + "logps/rejected": -2.8934497833251953, + "loss": 0.3649, + "nll_loss": 0.09121409058570862, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0915731763816439e-05, + "rewards/margins": 0.2893340587615967, + "rewards/rejected": -0.2893449664115906, + "step": 13752 + }, + { + "epoch": 9.51106500691563, + "grad_norm": 4.360344886779785, + "learning_rate": 2.716305517135393e-06, + "log_odds_chosen": 10.946246147155762, + "log_odds_ratio": -0.00011370900028850883, + "logits/chosen": -0.034930601716041565, + "logits/rejected": -0.21249479055404663, + "logps/chosen": -0.00041773245902732015, + "logps/rejected": -2.1995179653167725, + "loss": 0.5481, + "nll_loss": 0.13701015710830688, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1773251723498106e-05, + "rewards/margins": 0.21991004049777985, + "rewards/rejected": -0.2199518233537674, + "step": 13753 + }, + { + "epoch": 9.511756569847856, + "grad_norm": 5.424455642700195, + "learning_rate": 2.7124635008452436e-06, + "log_odds_chosen": 11.442152976989746, + "log_odds_ratio": -2.2443495254265144e-05, + "logits/chosen": -0.3222275376319885, + "logits/rejected": -0.42254069447517395, + "logps/chosen": -0.00016010711260605603, + "logps/rejected": -2.557199001312256, + "loss": 0.4625, + "nll_loss": 0.11561720073223114, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6010711988201365e-05, + "rewards/margins": 0.2557039260864258, + "rewards/rejected": -0.2557199001312256, + "step": 13754 + }, + { + "epoch": 9.512448132780083, + "grad_norm": 5.503417491912842, + "learning_rate": 2.7086214845550948e-06, + "log_odds_chosen": 10.576732635498047, + "log_odds_ratio": -0.00010041467612609267, + "logits/chosen": -0.3194338083267212, + "logits/rejected": -0.34131085872650146, + "logps/chosen": -0.0002541765570640564, + "logps/rejected": -2.326648235321045, + "loss": 0.4973, + "nll_loss": 0.12430934607982635, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5417659344384447e-05, + "rewards/margins": 0.2326394021511078, + "rewards/rejected": -0.2326648086309433, + "step": 13755 + }, + { + "epoch": 9.51313969571231, + "grad_norm": 3.766571044921875, + "learning_rate": 2.7047794682649455e-06, + "log_odds_chosen": 12.046109199523926, + "log_odds_ratio": -1.1743547474907245e-05, + "logits/chosen": -0.8128657341003418, + "logits/rejected": -0.7834848165512085, + "logps/chosen": -0.00014546149759553373, + "logps/rejected": -2.5551412105560303, + "loss": 0.3761, + "nll_loss": 0.09401434659957886, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4546150850947015e-05, + "rewards/margins": 0.25549960136413574, + "rewards/rejected": -0.25551414489746094, + "step": 13756 + }, + { + "epoch": 9.513831258644537, + "grad_norm": 3.58656907081604, + "learning_rate": 2.7009374519747963e-06, + "log_odds_chosen": 11.6583833694458, + "log_odds_ratio": -3.437104169279337e-05, + "logits/chosen": -0.2658565938472748, + "logits/rejected": -0.35223588347435, + "logps/chosen": -0.00015039359277579933, + "logps/rejected": -2.4674582481384277, + "loss": 0.5114, + "nll_loss": 0.1278354823589325, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5039359823276754e-05, + "rewards/margins": 0.246730774641037, + "rewards/rejected": -0.24674582481384277, + "step": 13757 + }, + { + "epoch": 9.514522821576763, + "grad_norm": 2.4052798748016357, + "learning_rate": 2.6970954356846475e-06, + "log_odds_chosen": 11.306896209716797, + "log_odds_ratio": -3.7681104004150257e-05, + "logits/chosen": -0.21047669649124146, + "logits/rejected": -0.1311059594154358, + "logps/chosen": -0.00017037391080521047, + "logps/rejected": -2.7428433895111084, + "loss": 0.3149, + "nll_loss": 0.07871001958847046, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7037391444318928e-05, + "rewards/margins": 0.274267315864563, + "rewards/rejected": -0.27428436279296875, + "step": 13758 + }, + { + "epoch": 9.51521438450899, + "grad_norm": 3.084327220916748, + "learning_rate": 2.693253419394498e-06, + "log_odds_chosen": 11.337088584899902, + "log_odds_ratio": -5.701879126718268e-05, + "logits/chosen": -0.43909841775894165, + "logits/rejected": -0.5297845602035522, + "logps/chosen": -0.00023459625663235784, + "logps/rejected": -2.462244749069214, + "loss": 0.4307, + "nll_loss": 0.10767786204814911, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3459626390831545e-05, + "rewards/margins": 0.2462010383605957, + "rewards/rejected": -0.24622449278831482, + "step": 13759 + }, + { + "epoch": 9.515905947441217, + "grad_norm": 2.876373291015625, + "learning_rate": 2.6894114031043494e-06, + "log_odds_chosen": 11.915162086486816, + "log_odds_ratio": -1.1002990504493937e-05, + "logits/chosen": -0.01239769160747528, + "logits/rejected": -0.014579236507415771, + "logps/chosen": -0.00014055031351745129, + "logps/rejected": -2.851630687713623, + "loss": 0.3221, + "nll_loss": 0.08052629232406616, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4055030987947248e-05, + "rewards/margins": 0.2851490378379822, + "rewards/rejected": -0.2851630747318268, + "step": 13760 + }, + { + "epoch": 9.516597510373444, + "grad_norm": 4.359734535217285, + "learning_rate": 2.6855693868142005e-06, + "log_odds_chosen": 10.538614273071289, + "log_odds_ratio": -0.00028232764452695847, + "logits/chosen": -0.39528805017471313, + "logits/rejected": -0.4545425474643707, + "logps/chosen": -0.0003071337123401463, + "logps/rejected": -1.9423317909240723, + "loss": 0.4818, + "nll_loss": 0.12042504549026489, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.071337050641887e-05, + "rewards/margins": 0.1942024528980255, + "rewards/rejected": -0.19423317909240723, + "step": 13761 + }, + { + "epoch": 9.51728907330567, + "grad_norm": 2.967798948287964, + "learning_rate": 2.6817273705240513e-06, + "log_odds_chosen": 10.072077751159668, + "log_odds_ratio": -0.0020614732056856155, + "logits/chosen": -0.3225874900817871, + "logits/rejected": -0.28027576208114624, + "logps/chosen": -0.016036754474043846, + "logps/rejected": -1.8646742105484009, + "loss": 0.3828, + "nll_loss": 0.09549985826015472, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016036754241213202, + "rewards/margins": 0.18486374616622925, + "rewards/rejected": -0.18646740913391113, + "step": 13762 + }, + { + "epoch": 9.517980636237898, + "grad_norm": 5.028452396392822, + "learning_rate": 2.677885354233902e-06, + "log_odds_chosen": 11.795109748840332, + "log_odds_ratio": -0.00018023568554781377, + "logits/chosen": -0.052124328911304474, + "logits/rejected": -0.04959341511130333, + "logps/chosen": -0.000244269089307636, + "logps/rejected": -2.9171204566955566, + "loss": 0.4495, + "nll_loss": 0.11235899478197098, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4426910385955125e-05, + "rewards/margins": 0.29168763756752014, + "rewards/rejected": -0.29171207547187805, + "step": 13763 + }, + { + "epoch": 9.518672199170124, + "grad_norm": 3.2558481693267822, + "learning_rate": 2.674043337943753e-06, + "log_odds_chosen": 11.236143112182617, + "log_odds_ratio": -1.880578747659456e-05, + "logits/chosen": 0.16380754113197327, + "logits/rejected": 0.18336796760559082, + "logps/chosen": -0.0001271188084501773, + "logps/rejected": -2.213346242904663, + "loss": 0.2965, + "nll_loss": 0.07413183152675629, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.271188011742197e-05, + "rewards/margins": 0.22132191061973572, + "rewards/rejected": -0.22133463621139526, + "step": 13764 + }, + { + "epoch": 9.519363762102351, + "grad_norm": 3.0914859771728516, + "learning_rate": 2.670201321653604e-06, + "log_odds_chosen": 11.55090618133545, + "log_odds_ratio": -9.511876123724505e-05, + "logits/chosen": -0.6392191052436829, + "logits/rejected": -0.7223262190818787, + "logps/chosen": -0.00022679113317281008, + "logps/rejected": -3.00004243850708, + "loss": 0.3672, + "nll_loss": 0.09178087115287781, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2679112589685246e-05, + "rewards/margins": 0.29998156428337097, + "rewards/rejected": -0.300004243850708, + "step": 13765 + }, + { + "epoch": 9.520055325034578, + "grad_norm": 2.6469478607177734, + "learning_rate": 2.6663593053634547e-06, + "log_odds_chosen": 11.283838272094727, + "log_odds_ratio": -3.390854908502661e-05, + "logits/chosen": -0.47625666856765747, + "logits/rejected": -0.5266400575637817, + "logps/chosen": -0.00020409503486007452, + "logps/rejected": -1.99119234085083, + "loss": 0.3722, + "nll_loss": 0.09304707497358322, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0409504941198975e-05, + "rewards/margins": 0.19909882545471191, + "rewards/rejected": -0.19911924004554749, + "step": 13766 + }, + { + "epoch": 9.520746887966805, + "grad_norm": 3.793987512588501, + "learning_rate": 2.662517289073306e-06, + "log_odds_chosen": 10.413166046142578, + "log_odds_ratio": -0.0005908824969083071, + "logits/chosen": -0.392555296421051, + "logits/rejected": -0.43396303057670593, + "logps/chosen": -0.0013371091336011887, + "logps/rejected": -2.43149471282959, + "loss": 0.4323, + "nll_loss": 0.10801825672388077, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001337109279120341, + "rewards/margins": 0.24301576614379883, + "rewards/rejected": -0.24314947426319122, + "step": 13767 + }, + { + "epoch": 9.521438450899032, + "grad_norm": 4.248869895935059, + "learning_rate": 2.658675272783157e-06, + "log_odds_chosen": 11.54124641418457, + "log_odds_ratio": -5.0189213652629405e-05, + "logits/chosen": -0.0543501079082489, + "logits/rejected": -0.17935310304164886, + "logps/chosen": -0.00014576385729014874, + "logps/rejected": -2.3155760765075684, + "loss": 0.5594, + "nll_loss": 0.1398562639951706, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4576385183318052e-05, + "rewards/margins": 0.23154304921627045, + "rewards/rejected": -0.23155760765075684, + "step": 13768 + }, + { + "epoch": 9.522130013831259, + "grad_norm": 2.860259771347046, + "learning_rate": 2.654833256493008e-06, + "log_odds_chosen": 12.623004913330078, + "log_odds_ratio": -2.0028579456266016e-05, + "logits/chosen": -0.19771257042884827, + "logits/rejected": -0.24933721125125885, + "logps/chosen": -8.964027801994234e-05, + "logps/rejected": -3.1500256061553955, + "loss": 0.314, + "nll_loss": 0.07849594950675964, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.964027983893175e-06, + "rewards/margins": 0.31499359011650085, + "rewards/rejected": -0.31500256061553955, + "step": 13769 + }, + { + "epoch": 9.522821576763485, + "grad_norm": 3.8954451084136963, + "learning_rate": 2.6509912402028586e-06, + "log_odds_chosen": 10.765434265136719, + "log_odds_ratio": -0.0008625364862382412, + "logits/chosen": -0.28756552934646606, + "logits/rejected": -0.36020129919052124, + "logps/chosen": -0.001342854229733348, + "logps/rejected": -2.6182684898376465, + "loss": 0.6883, + "nll_loss": 0.17197641730308533, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001342854229733348, + "rewards/margins": 0.2616925835609436, + "rewards/rejected": -0.26182684302330017, + "step": 13770 + }, + { + "epoch": 9.523513139695712, + "grad_norm": 3.4728455543518066, + "learning_rate": 2.6471492239127093e-06, + "log_odds_chosen": 11.453036308288574, + "log_odds_ratio": -3.844147067866288e-05, + "logits/chosen": -0.444953978061676, + "logits/rejected": -0.4889020621776581, + "logps/chosen": -0.00020016248163301498, + "logps/rejected": -2.6657907962799072, + "loss": 0.7643, + "nll_loss": 0.19108229875564575, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0016246708109975e-05, + "rewards/margins": 0.26655906438827515, + "rewards/rejected": -0.2665790915489197, + "step": 13771 + }, + { + "epoch": 9.524204702627939, + "grad_norm": 4.564413547515869, + "learning_rate": 2.6433072076225605e-06, + "log_odds_chosen": 12.367158889770508, + "log_odds_ratio": -1.2168299690529238e-05, + "logits/chosen": 0.1791875958442688, + "logits/rejected": 0.1064046323299408, + "logps/chosen": -0.00012622703798115253, + "logps/rejected": -3.2469067573547363, + "loss": 0.6618, + "nll_loss": 0.1654365360736847, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.262270234292373e-05, + "rewards/margins": 0.32467809319496155, + "rewards/rejected": -0.32469069957733154, + "step": 13772 + }, + { + "epoch": 9.524896265560166, + "grad_norm": 3.1376585960388184, + "learning_rate": 2.6394651913324112e-06, + "log_odds_chosen": 10.973043441772461, + "log_odds_ratio": -0.00015945962513796985, + "logits/chosen": -0.26559609174728394, + "logits/rejected": -0.25664129853248596, + "logps/chosen": -0.0012450111098587513, + "logps/rejected": -2.3380062580108643, + "loss": 0.3606, + "nll_loss": 0.09012934565544128, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012450110807549208, + "rewards/margins": 0.23367613554000854, + "rewards/rejected": -0.23380064964294434, + "step": 13773 + }, + { + "epoch": 9.525587828492393, + "grad_norm": 2.822948932647705, + "learning_rate": 2.635623175042262e-06, + "log_odds_chosen": 12.173641204833984, + "log_odds_ratio": -1.9636803699540906e-05, + "logits/chosen": -0.5914163589477539, + "logits/rejected": -0.5313130617141724, + "logps/chosen": -8.995865209726617e-05, + "logps/rejected": -2.55480694770813, + "loss": 0.2704, + "nll_loss": 0.06760506331920624, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.995865755423438e-06, + "rewards/margins": 0.25547170639038086, + "rewards/rejected": -0.25548070669174194, + "step": 13774 + }, + { + "epoch": 9.52627939142462, + "grad_norm": 3.493384838104248, + "learning_rate": 2.6317811587521136e-06, + "log_odds_chosen": 11.011516571044922, + "log_odds_ratio": -9.456177212996408e-05, + "logits/chosen": -0.28284674882888794, + "logits/rejected": -0.26557302474975586, + "logps/chosen": -0.0042744167149066925, + "logps/rejected": -2.7121171951293945, + "loss": 0.3721, + "nll_loss": 0.09302199631929398, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00042744172969833016, + "rewards/margins": 0.27078428864479065, + "rewards/rejected": -0.27121174335479736, + "step": 13775 + }, + { + "epoch": 9.526970954356846, + "grad_norm": 3.317343235015869, + "learning_rate": 2.6279391424619643e-06, + "log_odds_chosen": 12.224468231201172, + "log_odds_ratio": -1.7389818822266534e-05, + "logits/chosen": -0.43882882595062256, + "logits/rejected": -0.4803587794303894, + "logps/chosen": -0.0001255149400094524, + "logps/rejected": -2.820533514022827, + "loss": 0.415, + "nll_loss": 0.10373595356941223, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.255149345524842e-05, + "rewards/margins": 0.2820408344268799, + "rewards/rejected": -0.2820533812046051, + "step": 13776 + }, + { + "epoch": 9.527662517289073, + "grad_norm": 4.189540863037109, + "learning_rate": 2.624097126171815e-06, + "log_odds_chosen": 11.56295108795166, + "log_odds_ratio": -4.864947550231591e-05, + "logits/chosen": -0.4536324143409729, + "logits/rejected": -0.4786219596862793, + "logps/chosen": -0.00020644332107622176, + "logps/rejected": -2.809377908706665, + "loss": 0.6721, + "nll_loss": 0.1680191308259964, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0644332835217938e-05, + "rewards/margins": 0.28091713786125183, + "rewards/rejected": -0.2809377610683441, + "step": 13777 + }, + { + "epoch": 9.5283540802213, + "grad_norm": 3.0645949840545654, + "learning_rate": 2.620255109881666e-06, + "log_odds_chosen": 10.801593780517578, + "log_odds_ratio": -6.805037264712155e-05, + "logits/chosen": -0.5784319639205933, + "logits/rejected": -0.5846536159515381, + "logps/chosen": -0.0001794451236492023, + "logps/rejected": -1.995002031326294, + "loss": 0.2643, + "nll_loss": 0.06606464087963104, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.794451236492023e-05, + "rewards/margins": 0.199482262134552, + "rewards/rejected": -0.1995002031326294, + "step": 13778 + }, + { + "epoch": 9.529045643153527, + "grad_norm": 4.208901882171631, + "learning_rate": 2.616413093591517e-06, + "log_odds_chosen": 11.72228717803955, + "log_odds_ratio": -2.457138180034235e-05, + "logits/chosen": -0.4067918360233307, + "logits/rejected": -0.502360463142395, + "logps/chosen": -0.000477884488645941, + "logps/rejected": -3.3087949752807617, + "loss": 0.324, + "nll_loss": 0.08099675178527832, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7788445954211056e-05, + "rewards/margins": 0.3308316767215729, + "rewards/rejected": -0.33087947964668274, + "step": 13779 + }, + { + "epoch": 9.529737206085754, + "grad_norm": 2.3598437309265137, + "learning_rate": 2.6125710773013677e-06, + "log_odds_chosen": 11.954383850097656, + "log_odds_ratio": -0.00010405042849015445, + "logits/chosen": -0.6290899515151978, + "logits/rejected": -0.5953158140182495, + "logps/chosen": -0.00013793556718155742, + "logps/rejected": -2.5401864051818848, + "loss": 0.2037, + "nll_loss": 0.05092133581638336, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3793557627650443e-05, + "rewards/margins": 0.2540048360824585, + "rewards/rejected": -0.254018634557724, + "step": 13780 + }, + { + "epoch": 9.53042876901798, + "grad_norm": 3.0388574600219727, + "learning_rate": 2.6087290610112185e-06, + "log_odds_chosen": 12.778547286987305, + "log_odds_ratio": -1.059939677361399e-05, + "logits/chosen": -0.6642726063728333, + "logits/rejected": -0.701655387878418, + "logps/chosen": -0.00012015497486572713, + "logps/rejected": -3.0411171913146973, + "loss": 0.4084, + "nll_loss": 0.102105513215065, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2015498214168474e-05, + "rewards/margins": 0.3040997087955475, + "rewards/rejected": -0.3041117191314697, + "step": 13781 + }, + { + "epoch": 9.531120331950207, + "grad_norm": 3.123870849609375, + "learning_rate": 2.6048870447210697e-06, + "log_odds_chosen": 10.889333724975586, + "log_odds_ratio": -8.684580097906291e-05, + "logits/chosen": -0.5890014171600342, + "logits/rejected": -0.6292410492897034, + "logps/chosen": -0.0002101728750858456, + "logps/rejected": -1.6682124137878418, + "loss": 0.3341, + "nll_loss": 0.08351971209049225, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.101728750858456e-05, + "rewards/margins": 0.16680021584033966, + "rewards/rejected": -0.16682124137878418, + "step": 13782 + }, + { + "epoch": 9.531811894882434, + "grad_norm": 2.1983208656311035, + "learning_rate": 2.601045028430921e-06, + "log_odds_chosen": 11.260841369628906, + "log_odds_ratio": -4.079763311892748e-05, + "logits/chosen": -0.6290895342826843, + "logits/rejected": -0.6792348623275757, + "logps/chosen": -7.228052709251642e-05, + "logps/rejected": -1.6622785329818726, + "loss": 0.2882, + "nll_loss": 0.07205353677272797, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.228052709251642e-06, + "rewards/margins": 0.16622062027454376, + "rewards/rejected": -0.16622784733772278, + "step": 13783 + }, + { + "epoch": 9.532503457814661, + "grad_norm": 3.228482484817505, + "learning_rate": 2.5972030121407716e-06, + "log_odds_chosen": 11.143197059631348, + "log_odds_ratio": -2.7463487640488893e-05, + "logits/chosen": -0.19031089544296265, + "logits/rejected": -0.2810859978199005, + "logps/chosen": -0.0001386001822538674, + "logps/rejected": -1.8822040557861328, + "loss": 0.2618, + "nll_loss": 0.06545382738113403, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.38600189529825e-05, + "rewards/margins": 0.1882065385580063, + "rewards/rejected": -0.18822041153907776, + "step": 13784 + }, + { + "epoch": 9.533195020746888, + "grad_norm": 4.018812656402588, + "learning_rate": 2.5933609958506228e-06, + "log_odds_chosen": 11.427385330200195, + "log_odds_ratio": -0.00019970980065409094, + "logits/chosen": -0.5215947031974792, + "logits/rejected": -0.579155445098877, + "logps/chosen": -0.00018272115266881883, + "logps/rejected": -2.2558345794677734, + "loss": 0.4895, + "nll_loss": 0.1223452240228653, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8272115994477645e-05, + "rewards/margins": 0.22556518018245697, + "rewards/rejected": -0.22558343410491943, + "step": 13785 + }, + { + "epoch": 9.533886583679115, + "grad_norm": 3.184753179550171, + "learning_rate": 2.5895189795604735e-06, + "log_odds_chosen": 10.723051071166992, + "log_odds_ratio": -0.00016598444199189544, + "logits/chosen": -0.5750604867935181, + "logits/rejected": -0.712570309638977, + "logps/chosen": -0.00014530037879012525, + "logps/rejected": -2.0900259017944336, + "loss": 0.3324, + "nll_loss": 0.08307978510856628, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4530039152305108e-05, + "rewards/margins": 0.20898807048797607, + "rewards/rejected": -0.20900261402130127, + "step": 13786 + }, + { + "epoch": 9.534578146611342, + "grad_norm": 3.268193483352661, + "learning_rate": 2.5856769632703243e-06, + "log_odds_chosen": 10.619029998779297, + "log_odds_ratio": -3.353383726789616e-05, + "logits/chosen": -0.2768457531929016, + "logits/rejected": -0.2528938949108124, + "logps/chosen": -0.00025384750915691257, + "logps/rejected": -1.9944524765014648, + "loss": 0.3209, + "nll_loss": 0.08022896945476532, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5384753826074302e-05, + "rewards/margins": 0.19941987097263336, + "rewards/rejected": -0.19944524765014648, + "step": 13787 + }, + { + "epoch": 9.535269709543568, + "grad_norm": 3.37003755569458, + "learning_rate": 2.581834946980175e-06, + "log_odds_chosen": 12.37077808380127, + "log_odds_ratio": -1.2251228326931596e-05, + "logits/chosen": -0.27846595644950867, + "logits/rejected": -0.27231597900390625, + "logps/chosen": -0.00015653952141292393, + "logps/rejected": -3.245314121246338, + "loss": 0.3284, + "nll_loss": 0.08210815489292145, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5653953596483916e-05, + "rewards/margins": 0.32451578974723816, + "rewards/rejected": -0.3245314657688141, + "step": 13788 + }, + { + "epoch": 9.535961272475795, + "grad_norm": 3.0098512172698975, + "learning_rate": 2.577992930690026e-06, + "log_odds_chosen": 11.0325927734375, + "log_odds_ratio": -2.921521445387043e-05, + "logits/chosen": -0.5826141238212585, + "logits/rejected": -0.6744647026062012, + "logps/chosen": -0.0001600942632649094, + "logps/rejected": -2.2958791255950928, + "loss": 0.291, + "nll_loss": 0.07275387644767761, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6009425962693058e-05, + "rewards/margins": 0.22957190871238708, + "rewards/rejected": -0.22958789765834808, + "step": 13789 + }, + { + "epoch": 9.536652835408022, + "grad_norm": 4.2000885009765625, + "learning_rate": 2.5741509143998774e-06, + "log_odds_chosen": 11.735769271850586, + "log_odds_ratio": -1.6273430446744896e-05, + "logits/chosen": -0.32315850257873535, + "logits/rejected": -0.39081114530563354, + "logps/chosen": -0.00011309140973025933, + "logps/rejected": -2.3739523887634277, + "loss": 0.5507, + "nll_loss": 0.13766588270664215, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1309141882520635e-05, + "rewards/margins": 0.23738393187522888, + "rewards/rejected": -0.23739522695541382, + "step": 13790 + }, + { + "epoch": 9.537344398340249, + "grad_norm": 3.6808247566223145, + "learning_rate": 2.570308898109728e-06, + "log_odds_chosen": 10.931219100952148, + "log_odds_ratio": -8.441173122264445e-05, + "logits/chosen": -0.44997841119766235, + "logits/rejected": -0.45926445722579956, + "logps/chosen": -0.0002950811176560819, + "logps/rejected": -2.4755983352661133, + "loss": 0.3674, + "nll_loss": 0.09184225648641586, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9508113584597595e-05, + "rewards/margins": 0.24753034114837646, + "rewards/rejected": -0.24755984544754028, + "step": 13791 + }, + { + "epoch": 9.538035961272476, + "grad_norm": 2.8941354751586914, + "learning_rate": 2.5664668818195793e-06, + "log_odds_chosen": 11.258584022521973, + "log_odds_ratio": -3.4192082239314914e-05, + "logits/chosen": -0.1735733300447464, + "logits/rejected": -0.1983909159898758, + "logps/chosen": -0.00016744263120926917, + "logps/rejected": -2.520059585571289, + "loss": 0.3083, + "nll_loss": 0.0770687460899353, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6744263120926917e-05, + "rewards/margins": 0.2519892454147339, + "rewards/rejected": -0.25200599431991577, + "step": 13792 + }, + { + "epoch": 9.538727524204702, + "grad_norm": 3.580810785293579, + "learning_rate": 2.56262486552943e-06, + "log_odds_chosen": 11.275768280029297, + "log_odds_ratio": -2.0234165276633576e-05, + "logits/chosen": -0.2527013421058655, + "logits/rejected": -0.2792891263961792, + "logps/chosen": -8.296287705888972e-05, + "logps/rejected": -1.984834909439087, + "loss": 0.4227, + "nll_loss": 0.10567609965801239, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.296288797282614e-06, + "rewards/margins": 0.198475182056427, + "rewards/rejected": -0.19848348200321198, + "step": 13793 + }, + { + "epoch": 9.53941908713693, + "grad_norm": 3.8068368434906006, + "learning_rate": 2.5587828492392808e-06, + "log_odds_chosen": 11.381519317626953, + "log_odds_ratio": -2.498861067579128e-05, + "logits/chosen": -0.34718751907348633, + "logits/rejected": -0.399546355009079, + "logps/chosen": -0.00018592897686176002, + "logps/rejected": -2.734633445739746, + "loss": 0.5206, + "nll_loss": 0.13015493750572205, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8592898413771763e-05, + "rewards/margins": 0.2734447717666626, + "rewards/rejected": -0.2734633684158325, + "step": 13794 + }, + { + "epoch": 9.540110650069156, + "grad_norm": 2.8936984539031982, + "learning_rate": 2.5549408329491315e-06, + "log_odds_chosen": 10.034442901611328, + "log_odds_ratio": -0.0002595040714368224, + "logits/chosen": -0.5466099381446838, + "logits/rejected": -0.4480135142803192, + "logps/chosen": -0.0003597848699428141, + "logps/rejected": -1.7597925662994385, + "loss": 0.206, + "nll_loss": 0.051484182476997375, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.597848626668565e-05, + "rewards/margins": 0.1759432852268219, + "rewards/rejected": -0.17597925662994385, + "step": 13795 + }, + { + "epoch": 9.540802213001383, + "grad_norm": 3.7214016914367676, + "learning_rate": 2.5510988166589827e-06, + "log_odds_chosen": 10.998615264892578, + "log_odds_ratio": -0.0001421015476807952, + "logits/chosen": -0.3131212890148163, + "logits/rejected": -0.41555941104888916, + "logps/chosen": -0.0002672660048119724, + "logps/rejected": -2.176845073699951, + "loss": 0.3728, + "nll_loss": 0.09317442774772644, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6726600481197238e-05, + "rewards/margins": 0.21765778958797455, + "rewards/rejected": -0.21768450736999512, + "step": 13796 + }, + { + "epoch": 9.54149377593361, + "grad_norm": 2.209973096847534, + "learning_rate": 2.5472568003688334e-06, + "log_odds_chosen": 11.791141510009766, + "log_odds_ratio": -7.219630060717463e-05, + "logits/chosen": -0.2285546213388443, + "logits/rejected": -0.38388004899024963, + "logps/chosen": -0.0002000272215809673, + "logps/rejected": -2.9354567527770996, + "loss": 0.2675, + "nll_loss": 0.06686940044164658, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.000272252189461e-05, + "rewards/margins": 0.29352566599845886, + "rewards/rejected": -0.293545663356781, + "step": 13797 + }, + { + "epoch": 9.542185338865837, + "grad_norm": 3.551180124282837, + "learning_rate": 2.5434147840786846e-06, + "log_odds_chosen": 11.445298194885254, + "log_odds_ratio": -1.558000258228276e-05, + "logits/chosen": -0.2221369743347168, + "logits/rejected": -0.2530161440372467, + "logps/chosen": -0.00038774916902184486, + "logps/rejected": -2.6247973442077637, + "loss": 0.4239, + "nll_loss": 0.10597167909145355, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8774916902184486e-05, + "rewards/margins": 0.26244091987609863, + "rewards/rejected": -0.262479692697525, + "step": 13798 + }, + { + "epoch": 9.542876901798063, + "grad_norm": 4.352911472320557, + "learning_rate": 2.539572767788536e-06, + "log_odds_chosen": 11.785818099975586, + "log_odds_ratio": -2.144151039829012e-05, + "logits/chosen": -0.3466078042984009, + "logits/rejected": -0.3404996395111084, + "logps/chosen": -0.00010007733362726867, + "logps/rejected": -2.582352876663208, + "loss": 0.4948, + "nll_loss": 0.12369333952665329, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0007733180827927e-05, + "rewards/margins": 0.2582252621650696, + "rewards/rejected": -0.25823527574539185, + "step": 13799 + }, + { + "epoch": 9.54356846473029, + "grad_norm": 3.131136894226074, + "learning_rate": 2.5357307514983865e-06, + "log_odds_chosen": 11.537412643432617, + "log_odds_ratio": -4.232880746712908e-05, + "logits/chosen": -0.3534148335456848, + "logits/rejected": -0.27187490463256836, + "logps/chosen": -0.00012684140529017895, + "logps/rejected": -2.396341562271118, + "loss": 0.3547, + "nll_loss": 0.08867879211902618, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2684140529017895e-05, + "rewards/margins": 0.23962149024009705, + "rewards/rejected": -0.23963415622711182, + "step": 13800 + }, + { + "epoch": 9.544260027662517, + "grad_norm": 3.412196397781372, + "learning_rate": 2.5318887352082373e-06, + "log_odds_chosen": 11.617633819580078, + "log_odds_ratio": -2.581049375294242e-05, + "logits/chosen": -0.36974507570266724, + "logits/rejected": -0.48222726583480835, + "logps/chosen": -0.0004511342558544129, + "logps/rejected": -3.2871181964874268, + "loss": 0.4721, + "nll_loss": 0.11802138388156891, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.511342558544129e-05, + "rewards/margins": 0.32866671681404114, + "rewards/rejected": -0.3287118077278137, + "step": 13801 + }, + { + "epoch": 9.544951590594744, + "grad_norm": 3.6055257320404053, + "learning_rate": 2.5280467189180885e-06, + "log_odds_chosen": 11.49981689453125, + "log_odds_ratio": -7.038934563752264e-05, + "logits/chosen": -0.3161396384239197, + "logits/rejected": -0.30127403140068054, + "logps/chosen": -0.0005343385855667293, + "logps/rejected": -3.03310489654541, + "loss": 0.378, + "nll_loss": 0.09448744356632233, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3433861467055976e-05, + "rewards/margins": 0.3032570481300354, + "rewards/rejected": -0.3033105134963989, + "step": 13802 + }, + { + "epoch": 9.54564315352697, + "grad_norm": 3.4347176551818848, + "learning_rate": 2.5242047026279392e-06, + "log_odds_chosen": 10.892858505249023, + "log_odds_ratio": -2.7437276003183797e-05, + "logits/chosen": -0.08749085664749146, + "logits/rejected": -0.015900779515504837, + "logps/chosen": -0.00021966373606119305, + "logps/rejected": -2.3338217735290527, + "loss": 0.2875, + "nll_loss": 0.07186231017112732, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1966374333715066e-05, + "rewards/margins": 0.23336023092269897, + "rewards/rejected": -0.2333821952342987, + "step": 13803 + }, + { + "epoch": 9.546334716459198, + "grad_norm": 2.9281575679779053, + "learning_rate": 2.52036268633779e-06, + "log_odds_chosen": 10.796745300292969, + "log_odds_ratio": -0.00015486503252759576, + "logits/chosen": -0.3453628420829773, + "logits/rejected": -0.31174102425575256, + "logps/chosen": -0.0006333081400953233, + "logps/rejected": -2.9530837535858154, + "loss": 0.2823, + "nll_loss": 0.07056043297052383, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.333081546472386e-05, + "rewards/margins": 0.29524505138397217, + "rewards/rejected": -0.2953084111213684, + "step": 13804 + }, + { + "epoch": 9.547026279391424, + "grad_norm": 3.473527431488037, + "learning_rate": 2.516520670047641e-06, + "log_odds_chosen": 10.990711212158203, + "log_odds_ratio": -4.315027763368562e-05, + "logits/chosen": -0.12099762260913849, + "logits/rejected": -0.1815110743045807, + "logps/chosen": -0.00015486619668081403, + "logps/rejected": -2.163987398147583, + "loss": 0.3474, + "nll_loss": 0.08683499693870544, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5486619304283522e-05, + "rewards/margins": 0.21638326346874237, + "rewards/rejected": -0.21639874577522278, + "step": 13805 + }, + { + "epoch": 9.547717842323651, + "grad_norm": 4.62323522567749, + "learning_rate": 2.5126786537574923e-06, + "log_odds_chosen": 10.207351684570312, + "log_odds_ratio": -0.0003335881920065731, + "logits/chosen": -0.27806001901626587, + "logits/rejected": -0.1864968240261078, + "logps/chosen": -0.0035468749701976776, + "logps/rejected": -2.6968271732330322, + "loss": 0.4498, + "nll_loss": 0.11242831498384476, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00035468745045363903, + "rewards/margins": 0.2693280279636383, + "rewards/rejected": -0.26968270540237427, + "step": 13806 + }, + { + "epoch": 9.548409405255878, + "grad_norm": 3.1464242935180664, + "learning_rate": 2.508836637467343e-06, + "log_odds_chosen": 10.235457420349121, + "log_odds_ratio": -0.0002480056427884847, + "logits/chosen": -0.6790138483047485, + "logits/rejected": -0.6777001023292542, + "logps/chosen": -0.0003498023725114763, + "logps/rejected": -1.6847186088562012, + "loss": 0.333, + "nll_loss": 0.08322387933731079, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4980235795956105e-05, + "rewards/margins": 0.16843688488006592, + "rewards/rejected": -0.16847185790538788, + "step": 13807 + }, + { + "epoch": 9.549100968188105, + "grad_norm": 3.7750470638275146, + "learning_rate": 2.504994621177194e-06, + "log_odds_chosen": 10.81887435913086, + "log_odds_ratio": -8.63458335516043e-05, + "logits/chosen": -0.40970128774642944, + "logits/rejected": -0.42641782760620117, + "logps/chosen": -0.00040343630826100707, + "logps/rejected": -2.3015646934509277, + "loss": 0.3517, + "nll_loss": 0.08792360126972198, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.034363155369647e-05, + "rewards/margins": 0.2301161289215088, + "rewards/rejected": -0.23015648126602173, + "step": 13808 + }, + { + "epoch": 9.549792531120332, + "grad_norm": 4.055153846740723, + "learning_rate": 2.501152604887045e-06, + "log_odds_chosen": 10.715600967407227, + "log_odds_ratio": -5.44217909919098e-05, + "logits/chosen": -0.2824767529964447, + "logits/rejected": -0.22777681052684784, + "logps/chosen": -0.0002521896967664361, + "logps/rejected": -2.063735008239746, + "loss": 0.4458, + "nll_loss": 0.11144724488258362, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5218971131835133e-05, + "rewards/margins": 0.2063482701778412, + "rewards/rejected": -0.20637348294258118, + "step": 13809 + }, + { + "epoch": 9.550484094052559, + "grad_norm": 4.018936634063721, + "learning_rate": 2.4973105885968957e-06, + "log_odds_chosen": 10.957100868225098, + "log_odds_ratio": -2.922149360529147e-05, + "logits/chosen": -0.1165858656167984, + "logits/rejected": -0.140200674533844, + "logps/chosen": -0.000530008168425411, + "logps/rejected": -2.5369997024536133, + "loss": 0.5002, + "nll_loss": 0.12503990530967712, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.300081829773262e-05, + "rewards/margins": 0.25364699959754944, + "rewards/rejected": -0.25369998812675476, + "step": 13810 + }, + { + "epoch": 9.551175656984785, + "grad_norm": 4.327420711517334, + "learning_rate": 2.4934685723067465e-06, + "log_odds_chosen": 11.74692153930664, + "log_odds_ratio": -1.655887535889633e-05, + "logits/chosen": -0.28256115317344666, + "logits/rejected": -0.32909637689590454, + "logps/chosen": -0.0001346078934147954, + "logps/rejected": -2.7365636825561523, + "loss": 0.4023, + "nll_loss": 0.10056796669960022, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3460790796671063e-05, + "rewards/margins": 0.273642897605896, + "rewards/rejected": -0.27365636825561523, + "step": 13811 + }, + { + "epoch": 9.551867219917012, + "grad_norm": 3.934828042984009, + "learning_rate": 2.4896265560165977e-06, + "log_odds_chosen": 10.9265775680542, + "log_odds_ratio": -5.796810728497803e-05, + "logits/chosen": -0.14236091077327728, + "logits/rejected": -0.23647207021713257, + "logps/chosen": -0.00025920968619175255, + "logps/rejected": -2.556171178817749, + "loss": 0.4271, + "nll_loss": 0.10676690936088562, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.592097189335618e-05, + "rewards/margins": 0.25559118390083313, + "rewards/rejected": -0.2556171119213104, + "step": 13812 + }, + { + "epoch": 9.552558782849239, + "grad_norm": 3.546806812286377, + "learning_rate": 2.485784539726449e-06, + "log_odds_chosen": 10.0706787109375, + "log_odds_ratio": -9.912428504321724e-05, + "logits/chosen": -0.43360263109207153, + "logits/rejected": -0.4856250584125519, + "logps/chosen": -0.0004174561472609639, + "logps/rejected": -2.019066333770752, + "loss": 0.3284, + "nll_loss": 0.08210201561450958, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.174561763647944e-05, + "rewards/margins": 0.20186486840248108, + "rewards/rejected": -0.20190662145614624, + "step": 13813 + }, + { + "epoch": 9.553250345781466, + "grad_norm": 3.4718127250671387, + "learning_rate": 2.4819425234362996e-06, + "log_odds_chosen": 8.758975982666016, + "log_odds_ratio": -0.0022941383067518473, + "logits/chosen": -0.1509694755077362, + "logits/rejected": -0.1860388070344925, + "logps/chosen": -0.016561385244131088, + "logps/rejected": -1.6161742210388184, + "loss": 0.3722, + "nll_loss": 0.09281736612319946, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016561385709792376, + "rewards/margins": 0.1599612832069397, + "rewards/rejected": -0.1616174280643463, + "step": 13814 + }, + { + "epoch": 9.553941908713693, + "grad_norm": 2.690258026123047, + "learning_rate": 2.4781005071461503e-06, + "log_odds_chosen": 10.737163543701172, + "log_odds_ratio": -6.156474410090595e-05, + "logits/chosen": -0.4985949695110321, + "logits/rejected": -0.4808202385902405, + "logps/chosen": -0.0002015067293541506, + "logps/rejected": -2.085129737854004, + "loss": 0.2381, + "nll_loss": 0.05951059237122536, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0150675482000224e-05, + "rewards/margins": 0.20849281549453735, + "rewards/rejected": -0.20851297676563263, + "step": 13815 + }, + { + "epoch": 9.55463347164592, + "grad_norm": 3.3727006912231445, + "learning_rate": 2.4742584908560015e-06, + "log_odds_chosen": 9.605585098266602, + "log_odds_ratio": -0.0002903227577917278, + "logits/chosen": -0.68758225440979, + "logits/rejected": -0.6676366925239563, + "logps/chosen": -0.0005176997510716319, + "logps/rejected": -1.6474573612213135, + "loss": 0.4287, + "nll_loss": 0.10714066028594971, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.176998092792928e-05, + "rewards/margins": 0.16469396650791168, + "rewards/rejected": -0.16474571824073792, + "step": 13816 + }, + { + "epoch": 9.555325034578146, + "grad_norm": 3.6826722621917725, + "learning_rate": 2.4704164745658522e-06, + "log_odds_chosen": 9.754058837890625, + "log_odds_ratio": -9.896468691295013e-05, + "logits/chosen": -0.8700242638587952, + "logits/rejected": -0.8466467261314392, + "logps/chosen": -0.0003343577263876796, + "logps/rejected": -1.5087486505508423, + "loss": 0.4219, + "nll_loss": 0.10547720640897751, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3435775549151e-05, + "rewards/margins": 0.15084142982959747, + "rewards/rejected": -0.15087486803531647, + "step": 13817 + }, + { + "epoch": 9.556016597510373, + "grad_norm": 3.3866477012634277, + "learning_rate": 2.466574458275703e-06, + "log_odds_chosen": 11.469823837280273, + "log_odds_ratio": -9.968294034479186e-05, + "logits/chosen": -0.18187442421913147, + "logits/rejected": -0.2844538688659668, + "logps/chosen": -0.00022320245625451207, + "logps/rejected": -2.6314992904663086, + "loss": 0.2389, + "nll_loss": 0.05972164124250412, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2320244170259684e-05, + "rewards/margins": 0.2631275951862335, + "rewards/rejected": -0.2631498873233795, + "step": 13818 + }, + { + "epoch": 9.5567081604426, + "grad_norm": 4.043667316436768, + "learning_rate": 2.462732441985554e-06, + "log_odds_chosen": 11.33417797088623, + "log_odds_ratio": -2.4894938178476878e-05, + "logits/chosen": -0.19683483242988586, + "logits/rejected": -0.26682889461517334, + "logps/chosen": -0.00023350583796855062, + "logps/rejected": -2.2510480880737305, + "loss": 0.3719, + "nll_loss": 0.09298249334096909, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3350585252046585e-05, + "rewards/margins": 0.2250814437866211, + "rewards/rejected": -0.22510480880737305, + "step": 13819 + }, + { + "epoch": 9.557399723374827, + "grad_norm": 4.178382396697998, + "learning_rate": 2.458890425695405e-06, + "log_odds_chosen": 10.40451431274414, + "log_odds_ratio": -0.00011425888078520074, + "logits/chosen": 0.1248646080493927, + "logits/rejected": 0.07189249992370605, + "logps/chosen": -0.0009131749393418431, + "logps/rejected": -2.2126245498657227, + "loss": 0.5634, + "nll_loss": 0.14083731174468994, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.131749538937584e-05, + "rewards/margins": 0.22117114067077637, + "rewards/rejected": -0.22126245498657227, + "step": 13820 + }, + { + "epoch": 9.558091286307054, + "grad_norm": 3.49820613861084, + "learning_rate": 2.455048409405256e-06, + "log_odds_chosen": 10.636669158935547, + "log_odds_ratio": -0.00011038091906812042, + "logits/chosen": -0.16480788588523865, + "logits/rejected": -0.18782483041286469, + "logps/chosen": -0.00019006957882083952, + "logps/rejected": -2.0420327186584473, + "loss": 0.3931, + "nll_loss": 0.09826451539993286, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.900695679069031e-05, + "rewards/margins": 0.20418426394462585, + "rewards/rejected": -0.2042032778263092, + "step": 13821 + }, + { + "epoch": 9.55878284923928, + "grad_norm": 3.947436571121216, + "learning_rate": 2.451206393115107e-06, + "log_odds_chosen": 10.386070251464844, + "log_odds_ratio": -5.714597864425741e-05, + "logits/chosen": -0.2845008671283722, + "logits/rejected": -0.3634602725505829, + "logps/chosen": -0.00021994822600390762, + "logps/rejected": -1.699218988418579, + "loss": 0.3057, + "nll_loss": 0.0764106884598732, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.199482150899712e-05, + "rewards/margins": 0.16989991068840027, + "rewards/rejected": -0.1699219048023224, + "step": 13822 + }, + { + "epoch": 9.559474412171507, + "grad_norm": 3.815812110900879, + "learning_rate": 2.447364376824958e-06, + "log_odds_chosen": 10.201431274414062, + "log_odds_ratio": -0.00012514113041106611, + "logits/chosen": -0.5025345683097839, + "logits/rejected": -0.6607664823532104, + "logps/chosen": -0.00035802845377475023, + "logps/rejected": -2.074693202972412, + "loss": 0.5202, + "nll_loss": 0.13002927601337433, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.580284464987926e-05, + "rewards/margins": 0.2074335217475891, + "rewards/rejected": -0.20746931433677673, + "step": 13823 + }, + { + "epoch": 9.560165975103734, + "grad_norm": 3.921386241912842, + "learning_rate": 2.4435223605348088e-06, + "log_odds_chosen": 11.03492546081543, + "log_odds_ratio": -9.24068663152866e-05, + "logits/chosen": -0.23196052014827728, + "logits/rejected": -0.2074502408504486, + "logps/chosen": -0.0001746982743497938, + "logps/rejected": -2.3940048217773438, + "loss": 0.3778, + "nll_loss": 0.09443871676921844, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.74698270711815e-05, + "rewards/margins": 0.2393830120563507, + "rewards/rejected": -0.2394004911184311, + "step": 13824 + }, + { + "epoch": 9.560857538035961, + "grad_norm": 4.664690017700195, + "learning_rate": 2.4396803442446595e-06, + "log_odds_chosen": 10.925382614135742, + "log_odds_ratio": -0.00015570037066936493, + "logits/chosen": -0.31630760431289673, + "logits/rejected": -0.2322784662246704, + "logps/chosen": -0.000406614359235391, + "logps/rejected": -2.5762383937835693, + "loss": 0.5605, + "nll_loss": 0.14011088013648987, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0661438106326386e-05, + "rewards/margins": 0.2575831413269043, + "rewards/rejected": -0.2576238214969635, + "step": 13825 + }, + { + "epoch": 9.561549100968188, + "grad_norm": 3.627612829208374, + "learning_rate": 2.4358383279545107e-06, + "log_odds_chosen": 10.90926742553711, + "log_odds_ratio": -5.2584851800929755e-05, + "logits/chosen": -0.22938194870948792, + "logits/rejected": -0.2999710440635681, + "logps/chosen": -0.0003280373348388821, + "logps/rejected": -2.518974781036377, + "loss": 0.3191, + "nll_loss": 0.0797610729932785, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.280373493907973e-05, + "rewards/margins": 0.2518646717071533, + "rewards/rejected": -0.2518974840641022, + "step": 13826 + }, + { + "epoch": 9.562240663900415, + "grad_norm": 4.754434585571289, + "learning_rate": 2.4319963116643614e-06, + "log_odds_chosen": 11.580238342285156, + "log_odds_ratio": -3.5747376387007535e-05, + "logits/chosen": -0.5771031975746155, + "logits/rejected": -0.5557994842529297, + "logps/chosen": -0.0005113329389132559, + "logps/rejected": -2.7893121242523193, + "loss": 0.6148, + "nll_loss": 0.15368834137916565, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.113328734296374e-05, + "rewards/margins": 0.2788800895214081, + "rewards/rejected": -0.278931200504303, + "step": 13827 + }, + { + "epoch": 9.562932226832642, + "grad_norm": 3.742189645767212, + "learning_rate": 2.4281542953742126e-06, + "log_odds_chosen": 11.10738468170166, + "log_odds_ratio": -0.0001169455936178565, + "logits/chosen": -0.6030516028404236, + "logits/rejected": -0.5496112108230591, + "logps/chosen": -0.00011338586773490533, + "logps/rejected": -2.0595290660858154, + "loss": 0.4938, + "nll_loss": 0.12342959642410278, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1338586773490533e-05, + "rewards/margins": 0.2059415727853775, + "rewards/rejected": -0.20595292747020721, + "step": 13828 + }, + { + "epoch": 9.563623789764868, + "grad_norm": 3.6209042072296143, + "learning_rate": 2.4243122790840634e-06, + "log_odds_chosen": 11.689399719238281, + "log_odds_ratio": -0.0004498852649703622, + "logits/chosen": -0.5256933569908142, + "logits/rejected": -0.429162859916687, + "logps/chosen": -0.0005315897287800908, + "logps/rejected": -2.733124017715454, + "loss": 0.5217, + "nll_loss": 0.1303684264421463, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.315896851243451e-05, + "rewards/margins": 0.2732592225074768, + "rewards/rejected": -0.27331238985061646, + "step": 13829 + }, + { + "epoch": 9.564315352697095, + "grad_norm": 2.6820294857025146, + "learning_rate": 2.4204702627939145e-06, + "log_odds_chosen": 10.56318473815918, + "log_odds_ratio": -0.00020509613386821002, + "logits/chosen": -0.16411474347114563, + "logits/rejected": -0.22590361535549164, + "logps/chosen": -0.0004030780401080847, + "logps/rejected": -2.0240650177001953, + "loss": 0.2231, + "nll_loss": 0.05575563758611679, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0307808376383036e-05, + "rewards/margins": 0.2023661732673645, + "rewards/rejected": -0.20240649580955505, + "step": 13830 + }, + { + "epoch": 9.565006915629322, + "grad_norm": 3.4184882640838623, + "learning_rate": 2.4166282465037653e-06, + "log_odds_chosen": 10.659126281738281, + "log_odds_ratio": -4.517474371823482e-05, + "logits/chosen": -0.4295801818370819, + "logits/rejected": -0.4659000337123871, + "logps/chosen": -0.00013040596968494356, + "logps/rejected": -1.8989781141281128, + "loss": 0.3486, + "nll_loss": 0.08713482320308685, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3040596968494356e-05, + "rewards/margins": 0.18988478183746338, + "rewards/rejected": -0.1898978352546692, + "step": 13831 + }, + { + "epoch": 9.565698478561549, + "grad_norm": 3.020599842071533, + "learning_rate": 2.412786230213616e-06, + "log_odds_chosen": 11.195077896118164, + "log_odds_ratio": -4.840535984840244e-05, + "logits/chosen": -0.45517709851264954, + "logits/rejected": -0.4778594970703125, + "logps/chosen": -0.00019780338334385306, + "logps/rejected": -2.1995885372161865, + "loss": 0.3524, + "nll_loss": 0.08808855712413788, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.978034015337471e-05, + "rewards/margins": 0.21993906795978546, + "rewards/rejected": -0.21995887160301208, + "step": 13832 + }, + { + "epoch": 9.566390041493776, + "grad_norm": 2.6750314235687256, + "learning_rate": 2.408944213923467e-06, + "log_odds_chosen": 12.286481857299805, + "log_odds_ratio": -8.945756235334557e-06, + "logits/chosen": -0.7853397130966187, + "logits/rejected": -0.806201696395874, + "logps/chosen": -8.892019832273945e-05, + "logps/rejected": -2.449873447418213, + "loss": 0.3711, + "nll_loss": 0.09277424961328506, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.892019650375005e-06, + "rewards/margins": 0.24497844278812408, + "rewards/rejected": -0.2449873387813568, + "step": 13833 + }, + { + "epoch": 9.567081604426003, + "grad_norm": 2.950885772705078, + "learning_rate": 2.405102197633318e-06, + "log_odds_chosen": 10.587726593017578, + "log_odds_ratio": -0.00012081609747838229, + "logits/chosen": -0.6059221625328064, + "logits/rejected": -0.6290983557701111, + "logps/chosen": -0.0004665871092583984, + "logps/rejected": -2.1101551055908203, + "loss": 0.2502, + "nll_loss": 0.06253961473703384, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6658708015456796e-05, + "rewards/margins": 0.21096885204315186, + "rewards/rejected": -0.211015522480011, + "step": 13834 + }, + { + "epoch": 9.56777316735823, + "grad_norm": 3.7712626457214355, + "learning_rate": 2.401260181343169e-06, + "log_odds_chosen": 10.279624938964844, + "log_odds_ratio": -0.00013441775809042156, + "logits/chosen": -0.5828365683555603, + "logits/rejected": -0.6356889605522156, + "logps/chosen": -0.002119675977155566, + "logps/rejected": -2.069326400756836, + "loss": 0.4315, + "nll_loss": 0.10785016417503357, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00021196759189479053, + "rewards/margins": 0.20672067999839783, + "rewards/rejected": -0.2069326490163803, + "step": 13835 + }, + { + "epoch": 9.568464730290456, + "grad_norm": 2.1596806049346924, + "learning_rate": 2.3974181650530203e-06, + "log_odds_chosen": 11.440327644348145, + "log_odds_ratio": -7.01888493495062e-05, + "logits/chosen": -0.23839282989501953, + "logits/rejected": -0.2481321394443512, + "logps/chosen": -0.00018663023365661502, + "logps/rejected": -2.5868582725524902, + "loss": 0.2811, + "nll_loss": 0.0702705979347229, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8663024093257263e-05, + "rewards/margins": 0.25866714119911194, + "rewards/rejected": -0.258685827255249, + "step": 13836 + }, + { + "epoch": 9.569156293222683, + "grad_norm": 3.1561505794525146, + "learning_rate": 2.393576148762871e-06, + "log_odds_chosen": 13.4840087890625, + "log_odds_ratio": -1.5572994016110897e-05, + "logits/chosen": -0.8446952700614929, + "logits/rejected": -0.8324875235557556, + "logps/chosen": -0.00013882649363949895, + "logps/rejected": -3.4737942218780518, + "loss": 0.3884, + "nll_loss": 0.09709928929805756, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3882649909646716e-05, + "rewards/margins": 0.3473655581474304, + "rewards/rejected": -0.3473794162273407, + "step": 13837 + }, + { + "epoch": 9.56984785615491, + "grad_norm": 3.678325891494751, + "learning_rate": 2.3897341324727218e-06, + "log_odds_chosen": 10.045623779296875, + "log_odds_ratio": -0.00022260332480072975, + "logits/chosen": -0.38605496287345886, + "logits/rejected": -0.4284819960594177, + "logps/chosen": -0.00029918731888756156, + "logps/rejected": -1.6647403240203857, + "loss": 0.341, + "nll_loss": 0.08522848039865494, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9918732252554037e-05, + "rewards/margins": 0.16644412279129028, + "rewards/rejected": -0.16647404432296753, + "step": 13838 + }, + { + "epoch": 9.570539419087137, + "grad_norm": 3.3484020233154297, + "learning_rate": 2.3858921161825725e-06, + "log_odds_chosen": 11.903501510620117, + "log_odds_ratio": -2.8621461751754396e-05, + "logits/chosen": -0.12507346272468567, + "logits/rejected": -0.16093148291110992, + "logps/chosen": -0.0002613436954561621, + "logps/rejected": -3.074439525604248, + "loss": 0.2855, + "nll_loss": 0.07138177752494812, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.613437027321197e-05, + "rewards/margins": 0.3074178397655487, + "rewards/rejected": -0.3074439764022827, + "step": 13839 + }, + { + "epoch": 9.571230982019364, + "grad_norm": 3.287785530090332, + "learning_rate": 2.3820500998924237e-06, + "log_odds_chosen": 10.767134666442871, + "log_odds_ratio": -4.5068540202919394e-05, + "logits/chosen": -0.22858965396881104, + "logits/rejected": -0.270693838596344, + "logps/chosen": -0.00025440676836296916, + "logps/rejected": -2.1813197135925293, + "loss": 0.3567, + "nll_loss": 0.08916851878166199, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5440676836296916e-05, + "rewards/margins": 0.21810652315616608, + "rewards/rejected": -0.21813197433948517, + "step": 13840 + }, + { + "epoch": 9.57192254495159, + "grad_norm": 2.4846994876861572, + "learning_rate": 2.3782080836022745e-06, + "log_odds_chosen": 10.437719345092773, + "log_odds_ratio": -0.00014845086843706667, + "logits/chosen": -0.6213191151618958, + "logits/rejected": -0.6971963047981262, + "logps/chosen": -0.00021114877017680556, + "logps/rejected": -1.7383289337158203, + "loss": 0.2353, + "nll_loss": 0.05881960690021515, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1114876290084794e-05, + "rewards/margins": 0.17381176352500916, + "rewards/rejected": -0.17383289337158203, + "step": 13841 + }, + { + "epoch": 9.572614107883817, + "grad_norm": 3.5097603797912598, + "learning_rate": 2.374366067312125e-06, + "log_odds_chosen": 11.36322021484375, + "log_odds_ratio": -4.9892110837390646e-05, + "logits/chosen": -0.5468803644180298, + "logits/rejected": -0.5342287421226501, + "logps/chosen": -0.00011988454207312316, + "logps/rejected": -2.2961316108703613, + "loss": 0.4119, + "nll_loss": 0.1029641330242157, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1988455298705958e-05, + "rewards/margins": 0.2296011745929718, + "rewards/rejected": -0.22961315512657166, + "step": 13842 + }, + { + "epoch": 9.573305670816044, + "grad_norm": 3.7719080448150635, + "learning_rate": 2.3705240510219764e-06, + "log_odds_chosen": 11.050481796264648, + "log_odds_ratio": -4.697371332440525e-05, + "logits/chosen": -0.004522927105426788, + "logits/rejected": -0.10563942790031433, + "logps/chosen": -0.0002663819177541882, + "logps/rejected": -2.487330198287964, + "loss": 0.5121, + "nll_loss": 0.1280326545238495, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6638190320227295e-05, + "rewards/margins": 0.2487063854932785, + "rewards/rejected": -0.2487330287694931, + "step": 13843 + }, + { + "epoch": 9.57399723374827, + "grad_norm": 2.42683482170105, + "learning_rate": 2.3666820347318276e-06, + "log_odds_chosen": 10.184989929199219, + "log_odds_ratio": -8.84734108694829e-05, + "logits/chosen": -0.47841382026672363, + "logits/rejected": -0.49283891916275024, + "logps/chosen": -0.0002696047304198146, + "logps/rejected": -1.7976648807525635, + "loss": 0.2798, + "nll_loss": 0.06993197649717331, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6960471586789936e-05, + "rewards/margins": 0.17973953485488892, + "rewards/rejected": -0.1797664910554886, + "step": 13844 + }, + { + "epoch": 9.574688796680498, + "grad_norm": 2.9095869064331055, + "learning_rate": 2.3628400184416783e-06, + "log_odds_chosen": 11.749897003173828, + "log_odds_ratio": -1.533768227091059e-05, + "logits/chosen": -0.5682963728904724, + "logits/rejected": -0.6184790134429932, + "logps/chosen": -0.0009056901326403022, + "logps/rejected": -2.6393790245056152, + "loss": 0.3163, + "nll_loss": 0.0790674239397049, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.056901762960479e-05, + "rewards/margins": 0.26384735107421875, + "rewards/rejected": -0.26393792033195496, + "step": 13845 + }, + { + "epoch": 9.575380359612724, + "grad_norm": 4.509950637817383, + "learning_rate": 2.358998002151529e-06, + "log_odds_chosen": 10.542989730834961, + "log_odds_ratio": -9.281733218813315e-05, + "logits/chosen": -0.3523010015487671, + "logits/rejected": -0.36976903676986694, + "logps/chosen": -0.00023095075448509306, + "logps/rejected": -2.0211093425750732, + "loss": 0.4824, + "nll_loss": 0.12059512734413147, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.309507726749871e-05, + "rewards/margins": 0.20208783447742462, + "rewards/rejected": -0.20211093127727509, + "step": 13846 + }, + { + "epoch": 9.576071922544951, + "grad_norm": 3.3705577850341797, + "learning_rate": 2.3551559858613802e-06, + "log_odds_chosen": 10.59005355834961, + "log_odds_ratio": -4.5606426283484325e-05, + "logits/chosen": -0.3730345666408539, + "logits/rejected": -0.30688732862472534, + "logps/chosen": -0.00017936564108822495, + "logps/rejected": -1.9504601955413818, + "loss": 0.3621, + "nll_loss": 0.09051172435283661, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7936563381226733e-05, + "rewards/margins": 0.19502808153629303, + "rewards/rejected": -0.19504602253437042, + "step": 13847 + }, + { + "epoch": 9.576763485477178, + "grad_norm": 3.9667961597442627, + "learning_rate": 2.351313969571231e-06, + "log_odds_chosen": 11.09381103515625, + "log_odds_ratio": -0.0005342273507267237, + "logits/chosen": -0.44047313928604126, + "logits/rejected": -0.476299911737442, + "logps/chosen": -0.0002753309381660074, + "logps/rejected": -2.385202407836914, + "loss": 0.4616, + "nll_loss": 0.11535707116127014, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7533096726983786e-05, + "rewards/margins": 0.2384927123785019, + "rewards/rejected": -0.23852024972438812, + "step": 13848 + }, + { + "epoch": 9.577455048409405, + "grad_norm": 3.3474607467651367, + "learning_rate": 2.3474719532810817e-06, + "log_odds_chosen": 10.757822036743164, + "log_odds_ratio": -0.0001743780157994479, + "logits/chosen": -0.35189270973205566, + "logits/rejected": -0.4230746328830719, + "logps/chosen": -0.00025460452889092267, + "logps/rejected": -1.9166150093078613, + "loss": 0.3665, + "nll_loss": 0.09159547090530396, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.546044925111346e-05, + "rewards/margins": 0.19163604080677032, + "rewards/rejected": -0.1916615068912506, + "step": 13849 + }, + { + "epoch": 9.578146611341632, + "grad_norm": 5.058994770050049, + "learning_rate": 2.343629936990933e-06, + "log_odds_chosen": 9.74898624420166, + "log_odds_ratio": -0.04051890969276428, + "logits/chosen": 0.23132355511188507, + "logits/rejected": 0.2241034060716629, + "logps/chosen": -0.015972377732396126, + "logps/rejected": -2.487572431564331, + "loss": 0.4615, + "nll_loss": 0.11132311820983887, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015972377732396126, + "rewards/margins": 0.24715998768806458, + "rewards/rejected": -0.2487572282552719, + "step": 13850 + }, + { + "epoch": 9.578838174273859, + "grad_norm": 2.210313558578491, + "learning_rate": 2.339787920700784e-06, + "log_odds_chosen": 9.881656646728516, + "log_odds_ratio": -0.00016877069720067084, + "logits/chosen": -0.5011169910430908, + "logits/rejected": -0.4511227011680603, + "logps/chosen": -0.0003738755185622722, + "logps/rejected": -1.4024717807769775, + "loss": 0.203, + "nll_loss": 0.05072278156876564, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.738755185622722e-05, + "rewards/margins": 0.14020980894565582, + "rewards/rejected": -0.1402471959590912, + "step": 13851 + }, + { + "epoch": 9.579529737206085, + "grad_norm": 3.426076650619507, + "learning_rate": 2.335945904410635e-06, + "log_odds_chosen": 12.249835968017578, + "log_odds_ratio": -3.26341760228388e-05, + "logits/chosen": -0.23818932473659515, + "logits/rejected": -0.2377479523420334, + "logps/chosen": -0.00019521964713931084, + "logps/rejected": -3.2837133407592773, + "loss": 0.3596, + "nll_loss": 0.08990383893251419, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9521965441526845e-05, + "rewards/margins": 0.32835185527801514, + "rewards/rejected": -0.3283713757991791, + "step": 13852 + }, + { + "epoch": 9.580221300138312, + "grad_norm": 3.3171756267547607, + "learning_rate": 2.332103888120486e-06, + "log_odds_chosen": 11.941597938537598, + "log_odds_ratio": -7.733933671261184e-06, + "logits/chosen": -0.16036270558834076, + "logits/rejected": -0.14073586463928223, + "logps/chosen": -5.014177440898493e-05, + "logps/rejected": -2.030078649520874, + "loss": 0.3642, + "nll_loss": 0.0910460352897644, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.014177531847963e-06, + "rewards/margins": 0.20300287008285522, + "rewards/rejected": -0.20300786197185516, + "step": 13853 + }, + { + "epoch": 9.58091286307054, + "grad_norm": 2.973254442214966, + "learning_rate": 2.3282618718303367e-06, + "log_odds_chosen": 11.087392807006836, + "log_odds_ratio": -2.4515626137144864e-05, + "logits/chosen": -0.5213772058486938, + "logits/rejected": -0.6343944072723389, + "logps/chosen": -8.429721492575482e-05, + "logps/rejected": -1.803528070449829, + "loss": 0.2552, + "nll_loss": 0.06380080431699753, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.429721674474422e-06, + "rewards/margins": 0.1803443729877472, + "rewards/rejected": -0.1803528070449829, + "step": 13854 + }, + { + "epoch": 9.581604426002766, + "grad_norm": 2.7656378746032715, + "learning_rate": 2.3244198555401875e-06, + "log_odds_chosen": 11.633098602294922, + "log_odds_ratio": -3.7952137063257396e-05, + "logits/chosen": -0.5965179204940796, + "logits/rejected": -0.5844557285308838, + "logps/chosen": -0.00021014529920648783, + "logps/rejected": -2.6297168731689453, + "loss": 0.2653, + "nll_loss": 0.06631910800933838, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1014529920648783e-05, + "rewards/margins": 0.26295068860054016, + "rewards/rejected": -0.2629716992378235, + "step": 13855 + }, + { + "epoch": 9.582295988934993, + "grad_norm": 3.864344358444214, + "learning_rate": 2.3205778392500382e-06, + "log_odds_chosen": 10.258223533630371, + "log_odds_ratio": -0.00022534048184752464, + "logits/chosen": 0.31239843368530273, + "logits/rejected": 0.2385031133890152, + "logps/chosen": -0.00014834718604106456, + "logps/rejected": -1.5856529474258423, + "loss": 0.534, + "nll_loss": 0.13346663117408752, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4834718058409635e-05, + "rewards/margins": 0.1585504710674286, + "rewards/rejected": -0.15856531262397766, + "step": 13856 + }, + { + "epoch": 9.58298755186722, + "grad_norm": 3.465324640274048, + "learning_rate": 2.3167358229598894e-06, + "log_odds_chosen": 11.916043281555176, + "log_odds_ratio": -1.703957786958199e-05, + "logits/chosen": -0.11070622503757477, + "logits/rejected": 0.04024065285921097, + "logps/chosen": -0.0001028151746140793, + "logps/rejected": -2.7856462001800537, + "loss": 0.4053, + "nll_loss": 0.10132232308387756, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.028151746140793e-05, + "rewards/margins": 0.27855435013771057, + "rewards/rejected": -0.2785646319389343, + "step": 13857 + }, + { + "epoch": 9.583679114799446, + "grad_norm": 3.2913126945495605, + "learning_rate": 2.3128938066697406e-06, + "log_odds_chosen": 10.219085693359375, + "log_odds_ratio": -7.539245416410267e-05, + "logits/chosen": -0.7612735629081726, + "logits/rejected": -0.6986575126647949, + "logps/chosen": -0.00046573198051191866, + "logps/rejected": -2.052171468734741, + "loss": 0.5792, + "nll_loss": 0.14479374885559082, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6573197323596105e-05, + "rewards/margins": 0.20517057180404663, + "rewards/rejected": -0.2052171379327774, + "step": 13858 + }, + { + "epoch": 9.584370677731673, + "grad_norm": 3.505521774291992, + "learning_rate": 2.3090517903795913e-06, + "log_odds_chosen": 11.715803146362305, + "log_odds_ratio": -5.075123772257939e-05, + "logits/chosen": 0.008179709315299988, + "logits/rejected": -0.07167384028434753, + "logps/chosen": -0.00020146294264122844, + "logps/rejected": -2.7927322387695312, + "loss": 0.3128, + "nll_loss": 0.07819778472185135, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0146295355516486e-05, + "rewards/margins": 0.2792530655860901, + "rewards/rejected": -0.27927321195602417, + "step": 13859 + }, + { + "epoch": 9.5850622406639, + "grad_norm": 2.9111785888671875, + "learning_rate": 2.3052097740894425e-06, + "log_odds_chosen": 10.325887680053711, + "log_odds_ratio": -9.02124447748065e-05, + "logits/chosen": -0.510438084602356, + "logits/rejected": -0.5248683094978333, + "logps/chosen": -0.00034332674113102257, + "logps/rejected": -2.0093259811401367, + "loss": 0.2889, + "nll_loss": 0.07222461700439453, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.433267556829378e-05, + "rewards/margins": 0.20089825987815857, + "rewards/rejected": -0.2009325921535492, + "step": 13860 + }, + { + "epoch": 9.585753803596127, + "grad_norm": 3.1902220249176025, + "learning_rate": 2.3013677577992933e-06, + "log_odds_chosen": 11.665227890014648, + "log_odds_ratio": -2.9300321330083534e-05, + "logits/chosen": -0.01832330971956253, + "logits/rejected": 0.08498133718967438, + "logps/chosen": -0.00017734138236846775, + "logps/rejected": -2.832918643951416, + "loss": 0.398, + "nll_loss": 0.09950891882181168, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7734138964442536e-05, + "rewards/margins": 0.2832741439342499, + "rewards/rejected": -0.28329187631607056, + "step": 13861 + }, + { + "epoch": 9.586445366528354, + "grad_norm": 3.167797803878784, + "learning_rate": 2.297525741509144e-06, + "log_odds_chosen": 9.807540893554688, + "log_odds_ratio": -0.00013357223360799253, + "logits/chosen": -0.36445263028144836, + "logits/rejected": -0.37488484382629395, + "logps/chosen": -0.00033911221544258296, + "logps/rejected": -1.630523681640625, + "loss": 0.3235, + "nll_loss": 0.08087349683046341, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3911226637428626e-05, + "rewards/margins": 0.16301846504211426, + "rewards/rejected": -0.16305238008499146, + "step": 13862 + }, + { + "epoch": 9.58713692946058, + "grad_norm": 10.112004280090332, + "learning_rate": 2.2936837252189948e-06, + "log_odds_chosen": 10.343138694763184, + "log_odds_ratio": -0.0001180757099064067, + "logits/chosen": 0.09524346888065338, + "logits/rejected": 0.11811558902263641, + "logps/chosen": -0.00031170775764621794, + "logps/rejected": -2.2311322689056396, + "loss": 0.6984, + "nll_loss": 0.17459022998809814, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.117077721981332e-05, + "rewards/margins": 0.2230820655822754, + "rewards/rejected": -0.22311323881149292, + "step": 13863 + }, + { + "epoch": 9.587828492392807, + "grad_norm": 2.779890537261963, + "learning_rate": 2.289841708928846e-06, + "log_odds_chosen": 11.135278701782227, + "log_odds_ratio": -2.6663194148568437e-05, + "logits/chosen": -0.44592225551605225, + "logits/rejected": -0.49121996760368347, + "logps/chosen": -0.0001397555461153388, + "logps/rejected": -2.0133450031280518, + "loss": 0.305, + "nll_loss": 0.07624665647745132, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3975553883938119e-05, + "rewards/margins": 0.20132051408290863, + "rewards/rejected": -0.20133450627326965, + "step": 13864 + }, + { + "epoch": 9.588520055325034, + "grad_norm": 5.535505294799805, + "learning_rate": 2.2859996926386967e-06, + "log_odds_chosen": 11.428829193115234, + "log_odds_ratio": -3.691585880005732e-05, + "logits/chosen": -0.14533650875091553, + "logits/rejected": -0.18048177659511566, + "logps/chosen": -0.00014477927470579743, + "logps/rejected": -2.263044834136963, + "loss": 0.4133, + "nll_loss": 0.10332509130239487, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4477927834377624e-05, + "rewards/margins": 0.2262900173664093, + "rewards/rejected": -0.22630450129508972, + "step": 13865 + }, + { + "epoch": 9.589211618257261, + "grad_norm": 2.9248528480529785, + "learning_rate": 2.282157676348548e-06, + "log_odds_chosen": 10.032796859741211, + "log_odds_ratio": -8.267858356703073e-05, + "logits/chosen": -0.1600610464811325, + "logits/rejected": -0.16781851649284363, + "logps/chosen": -0.00020983436843380332, + "logps/rejected": -1.4508682489395142, + "loss": 0.4767, + "nll_loss": 0.11916402727365494, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.098343611578457e-05, + "rewards/margins": 0.14506584405899048, + "rewards/rejected": -0.14508682489395142, + "step": 13866 + }, + { + "epoch": 9.589903181189488, + "grad_norm": 3.026639223098755, + "learning_rate": 2.278315660058399e-06, + "log_odds_chosen": 11.75421142578125, + "log_odds_ratio": -0.0001594477507751435, + "logits/chosen": 0.08484269678592682, + "logits/rejected": 0.09943962097167969, + "logps/chosen": -7.933591405162588e-05, + "logps/rejected": -2.1240549087524414, + "loss": 0.3204, + "nll_loss": 0.08007805049419403, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.933591405162588e-06, + "rewards/margins": 0.21239754557609558, + "rewards/rejected": -0.2124055027961731, + "step": 13867 + }, + { + "epoch": 9.590594744121715, + "grad_norm": 2.964529514312744, + "learning_rate": 2.2744736437682498e-06, + "log_odds_chosen": 11.529214859008789, + "log_odds_ratio": -6.775275687687099e-05, + "logits/chosen": -0.5893282890319824, + "logits/rejected": -0.5058203935623169, + "logps/chosen": -0.00045186851639300585, + "logps/rejected": -2.9115042686462402, + "loss": 0.3896, + "nll_loss": 0.09739640355110168, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.518685818766244e-05, + "rewards/margins": 0.2911052405834198, + "rewards/rejected": -0.29115042090415955, + "step": 13868 + }, + { + "epoch": 9.591286307053942, + "grad_norm": 2.6916327476501465, + "learning_rate": 2.2706316274781005e-06, + "log_odds_chosen": 11.157115936279297, + "log_odds_ratio": -0.00023880114895291626, + "logits/chosen": -0.18521174788475037, + "logits/rejected": -0.14967837929725647, + "logps/chosen": -0.00012755044735968113, + "logps/rejected": -1.9115530252456665, + "loss": 0.3149, + "nll_loss": 0.07870766520500183, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2755044735968113e-05, + "rewards/margins": 0.19114252924919128, + "rewards/rejected": -0.1911552995443344, + "step": 13869 + }, + { + "epoch": 9.591977869986168, + "grad_norm": 4.349775314331055, + "learning_rate": 2.2667896111879517e-06, + "log_odds_chosen": 11.370203018188477, + "log_odds_ratio": -4.0916835132520646e-05, + "logits/chosen": -0.10869202762842178, + "logits/rejected": -0.1567172408103943, + "logps/chosen": -0.00013811516691930592, + "logps/rejected": -2.493417739868164, + "loss": 0.4697, + "nll_loss": 0.11743083596229553, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.381151560053695e-05, + "rewards/margins": 0.24932795763015747, + "rewards/rejected": -0.24934178590774536, + "step": 13870 + }, + { + "epoch": 9.592669432918395, + "grad_norm": 4.363363742828369, + "learning_rate": 2.2629475948978024e-06, + "log_odds_chosen": 12.449634552001953, + "log_odds_ratio": -5.222107574809343e-06, + "logits/chosen": -0.07508926093578339, + "logits/rejected": -0.06943850219249725, + "logps/chosen": -9.200820932164788e-05, + "logps/rejected": -2.8852505683898926, + "loss": 0.4792, + "nll_loss": 0.11979883909225464, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.200821295962669e-06, + "rewards/margins": 0.2885158658027649, + "rewards/rejected": -0.2885250449180603, + "step": 13871 + }, + { + "epoch": 9.593360995850622, + "grad_norm": 5.529043197631836, + "learning_rate": 2.259105578607653e-06, + "log_odds_chosen": 11.823799133300781, + "log_odds_ratio": -2.6034242182504386e-05, + "logits/chosen": -0.05254924297332764, + "logits/rejected": -0.12436945736408234, + "logps/chosen": -0.00015545799396932125, + "logps/rejected": -2.569683790206909, + "loss": 0.5739, + "nll_loss": 0.14347787201404572, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5545800124527887e-05, + "rewards/margins": 0.25695285201072693, + "rewards/rejected": -0.2569683790206909, + "step": 13872 + }, + { + "epoch": 9.594052558782849, + "grad_norm": 3.2023260593414307, + "learning_rate": 2.2552635623175044e-06, + "log_odds_chosen": 10.723604202270508, + "log_odds_ratio": -5.572741065407172e-05, + "logits/chosen": -0.3888397812843323, + "logits/rejected": -0.3862932622432709, + "logps/chosen": -0.00012830989726353437, + "logps/rejected": -1.6381213665008545, + "loss": 0.324, + "nll_loss": 0.08099858462810516, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2830989362555556e-05, + "rewards/margins": 0.16379928588867188, + "rewards/rejected": -0.16381213068962097, + "step": 13873 + }, + { + "epoch": 9.594744121715076, + "grad_norm": 3.7831342220306396, + "learning_rate": 2.2514215460273555e-06, + "log_odds_chosen": 11.306631088256836, + "log_odds_ratio": -6.008523632772267e-05, + "logits/chosen": -0.34363579750061035, + "logits/rejected": -0.3957400321960449, + "logps/chosen": -0.00016849691746756434, + "logps/rejected": -2.431356191635132, + "loss": 0.4075, + "nll_loss": 0.10185706615447998, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6849691746756434e-05, + "rewards/margins": 0.2431187778711319, + "rewards/rejected": -0.24313563108444214, + "step": 13874 + }, + { + "epoch": 9.595435684647303, + "grad_norm": 3.419001340866089, + "learning_rate": 2.2475795297372063e-06, + "log_odds_chosen": 11.561694145202637, + "log_odds_ratio": -0.00010816368012456223, + "logits/chosen": -0.4011830687522888, + "logits/rejected": -0.34620726108551025, + "logps/chosen": -0.00036602304317057133, + "logps/rejected": -2.66555118560791, + "loss": 0.526, + "nll_loss": 0.13147681951522827, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.66023086826317e-05, + "rewards/margins": 0.2665185332298279, + "rewards/rejected": -0.26655513048171997, + "step": 13875 + }, + { + "epoch": 9.59612724757953, + "grad_norm": 4.279079914093018, + "learning_rate": 2.243737513447057e-06, + "log_odds_chosen": 10.130746841430664, + "log_odds_ratio": -0.0002273559512104839, + "logits/chosen": -0.3535163402557373, + "logits/rejected": -0.47031304240226746, + "logps/chosen": -0.0006931457901373506, + "logps/rejected": -2.424685478210449, + "loss": 0.4603, + "nll_loss": 0.11506015807390213, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.931458483450115e-05, + "rewards/margins": 0.24239924550056458, + "rewards/rejected": -0.24246855080127716, + "step": 13876 + }, + { + "epoch": 9.596818810511756, + "grad_norm": 3.7910823822021484, + "learning_rate": 2.239895497156908e-06, + "log_odds_chosen": 10.984574317932129, + "log_odds_ratio": -8.284907380584627e-05, + "logits/chosen": -0.06811004877090454, + "logits/rejected": -0.14354632794857025, + "logps/chosen": -0.00013478229811880738, + "logps/rejected": -2.0187888145446777, + "loss": 0.3664, + "nll_loss": 0.09160007536411285, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3478229448082857e-05, + "rewards/margins": 0.20186540484428406, + "rewards/rejected": -0.2018788754940033, + "step": 13877 + }, + { + "epoch": 9.597510373443983, + "grad_norm": 3.4868521690368652, + "learning_rate": 2.236053480866759e-06, + "log_odds_chosen": 11.839643478393555, + "log_odds_ratio": -1.3536369806388393e-05, + "logits/chosen": 0.13439291715621948, + "logits/rejected": 0.010013069957494736, + "logps/chosen": -0.0003978666791226715, + "logps/rejected": -3.200017213821411, + "loss": 0.3056, + "nll_loss": 0.07640783488750458, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.978666791226715e-05, + "rewards/margins": 0.31996193528175354, + "rewards/rejected": -0.3200017213821411, + "step": 13878 + }, + { + "epoch": 9.59820193637621, + "grad_norm": 2.500501871109009, + "learning_rate": 2.2322114645766097e-06, + "log_odds_chosen": 10.714197158813477, + "log_odds_ratio": -5.230196984484792e-05, + "logits/chosen": -0.3795170783996582, + "logits/rejected": -0.44043371081352234, + "logps/chosen": -0.00018136526341550052, + "logps/rejected": -2.1343531608581543, + "loss": 0.3357, + "nll_loss": 0.08392222970724106, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8136528524337336e-05, + "rewards/margins": 0.213417187333107, + "rewards/rejected": -0.2134353220462799, + "step": 13879 + }, + { + "epoch": 9.598893499308437, + "grad_norm": 7.815499782562256, + "learning_rate": 2.228369448286461e-06, + "log_odds_chosen": 11.23033618927002, + "log_odds_ratio": -0.00029832214931957424, + "logits/chosen": -0.12109455466270447, + "logits/rejected": -0.20619699358940125, + "logps/chosen": -0.00010856491280719638, + "logps/rejected": -2.1542465686798096, + "loss": 0.7267, + "nll_loss": 0.18164558708667755, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0856490916921757e-05, + "rewards/margins": 0.21541380882263184, + "rewards/rejected": -0.21542467176914215, + "step": 13880 + }, + { + "epoch": 9.599585062240664, + "grad_norm": 3.6803181171417236, + "learning_rate": 2.224527431996312e-06, + "log_odds_chosen": 11.81740665435791, + "log_odds_ratio": -4.97671753691975e-05, + "logits/chosen": -0.6466069221496582, + "logits/rejected": -0.6269413232803345, + "logps/chosen": -0.0004883024375885725, + "logps/rejected": -3.5680038928985596, + "loss": 0.4366, + "nll_loss": 0.10914058983325958, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.883024303126149e-05, + "rewards/margins": 0.35675156116485596, + "rewards/rejected": -0.356800377368927, + "step": 13881 + }, + { + "epoch": 9.60027662517289, + "grad_norm": 2.4747352600097656, + "learning_rate": 2.220685415706163e-06, + "log_odds_chosen": 10.944051742553711, + "log_odds_ratio": -2.3117994714993984e-05, + "logits/chosen": -0.5824406743049622, + "logits/rejected": -0.2831963300704956, + "logps/chosen": -0.00017456647765357047, + "logps/rejected": -2.2170677185058594, + "loss": 0.2905, + "nll_loss": 0.07261285185813904, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.745664849295281e-05, + "rewards/margins": 0.22168932855129242, + "rewards/rejected": -0.22170677781105042, + "step": 13882 + }, + { + "epoch": 9.600968188105117, + "grad_norm": 4.029825210571289, + "learning_rate": 2.2168433994160135e-06, + "log_odds_chosen": 10.969319343566895, + "log_odds_ratio": -2.87359634967288e-05, + "logits/chosen": -0.15640632808208466, + "logits/rejected": -0.18583597242832184, + "logps/chosen": -0.00016927471733652055, + "logps/rejected": -2.1790919303894043, + "loss": 0.368, + "nll_loss": 0.09199932962656021, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6927471733652055e-05, + "rewards/margins": 0.21789227426052094, + "rewards/rejected": -0.21790921688079834, + "step": 13883 + }, + { + "epoch": 9.601659751037344, + "grad_norm": 3.1205484867095947, + "learning_rate": 2.2130013831258647e-06, + "log_odds_chosen": 12.199348449707031, + "log_odds_ratio": -9.655268513597548e-06, + "logits/chosen": -0.12709780037403107, + "logits/rejected": -0.17708489298820496, + "logps/chosen": -0.00011951341002713889, + "logps/rejected": -3.046433448791504, + "loss": 0.3709, + "nll_loss": 0.09273400157690048, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1951340638916008e-05, + "rewards/margins": 0.30463141202926636, + "rewards/rejected": -0.30464333295822144, + "step": 13884 + }, + { + "epoch": 9.60235131396957, + "grad_norm": 2.562211751937866, + "learning_rate": 2.2091593668357155e-06, + "log_odds_chosen": 10.068363189697266, + "log_odds_ratio": -0.00015095957496669143, + "logits/chosen": -0.15639328956604004, + "logits/rejected": -0.19314239919185638, + "logps/chosen": -0.00022999334032647312, + "logps/rejected": -1.699575662612915, + "loss": 0.2787, + "nll_loss": 0.06966718286275864, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2999334760243073e-05, + "rewards/margins": 0.16993457078933716, + "rewards/rejected": -0.16995757818222046, + "step": 13885 + }, + { + "epoch": 9.603042876901798, + "grad_norm": 3.483767032623291, + "learning_rate": 2.2053173505455662e-06, + "log_odds_chosen": 12.795426368713379, + "log_odds_ratio": -4.1594525100663304e-05, + "logits/chosen": -0.3526758551597595, + "logits/rejected": -0.4029574990272522, + "logps/chosen": -0.0001118480577133596, + "logps/rejected": -3.419715642929077, + "loss": 0.3472, + "nll_loss": 0.08679346740245819, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.118480577133596e-05, + "rewards/margins": 0.34196043014526367, + "rewards/rejected": -0.34197157621383667, + "step": 13886 + }, + { + "epoch": 9.603734439834025, + "grad_norm": 2.8883213996887207, + "learning_rate": 2.2014753342554174e-06, + "log_odds_chosen": 10.48097038269043, + "log_odds_ratio": -8.504984725732356e-05, + "logits/chosen": -0.3213323950767517, + "logits/rejected": -0.3076537251472473, + "logps/chosen": -0.0004419469041749835, + "logps/rejected": -2.042109966278076, + "loss": 0.2717, + "nll_loss": 0.06790684163570404, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.419469041749835e-05, + "rewards/margins": 0.20416679978370667, + "rewards/rejected": -0.20421099662780762, + "step": 13887 + }, + { + "epoch": 9.604426002766251, + "grad_norm": 3.354330062866211, + "learning_rate": 2.197633317965268e-06, + "log_odds_chosen": 11.06173324584961, + "log_odds_ratio": -2.4618173483759165e-05, + "logits/chosen": -0.22894491255283356, + "logits/rejected": -0.22772037982940674, + "logps/chosen": -0.00034024231717921793, + "logps/rejected": -2.3082427978515625, + "loss": 0.3857, + "nll_loss": 0.09642204642295837, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4024233173113316e-05, + "rewards/margins": 0.23079025745391846, + "rewards/rejected": -0.230824276804924, + "step": 13888 + }, + { + "epoch": 9.605117565698478, + "grad_norm": 4.197360992431641, + "learning_rate": 2.1937913016751193e-06, + "log_odds_chosen": 10.922062873840332, + "log_odds_ratio": -2.6824922315427102e-05, + "logits/chosen": -0.09128378331661224, + "logits/rejected": -0.1841985583305359, + "logps/chosen": -0.00023976791999302804, + "logps/rejected": -2.2560863494873047, + "loss": 0.4823, + "nll_loss": 0.12056770920753479, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.397679418209009e-05, + "rewards/margins": 0.22558467090129852, + "rewards/rejected": -0.22560864686965942, + "step": 13889 + }, + { + "epoch": 9.605809128630705, + "grad_norm": 3.4729881286621094, + "learning_rate": 2.18994928538497e-06, + "log_odds_chosen": 11.6234769821167, + "log_odds_ratio": -1.936111038958188e-05, + "logits/chosen": -0.6802545189857483, + "logits/rejected": -0.603384256362915, + "logps/chosen": -8.280224574264139e-05, + "logps/rejected": -2.281142234802246, + "loss": 0.3669, + "nll_loss": 0.09171170741319656, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.2802253018599e-06, + "rewards/margins": 0.22810596227645874, + "rewards/rejected": -0.22811424732208252, + "step": 13890 + }, + { + "epoch": 9.606500691562932, + "grad_norm": 2.6800572872161865, + "learning_rate": 2.1861072690948212e-06, + "log_odds_chosen": 11.265277862548828, + "log_odds_ratio": -1.970949597307481e-05, + "logits/chosen": -0.1551126390695572, + "logits/rejected": -0.20388063788414001, + "logps/chosen": -8.644152694614604e-05, + "logps/rejected": -1.9881235361099243, + "loss": 0.2974, + "nll_loss": 0.07434845715761185, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.644152330816723e-06, + "rewards/margins": 0.19880370795726776, + "rewards/rejected": -0.198812335729599, + "step": 13891 + }, + { + "epoch": 9.607192254495159, + "grad_norm": 3.867112636566162, + "learning_rate": 2.182265252804672e-06, + "log_odds_chosen": 11.430559158325195, + "log_odds_ratio": -2.1698100681533106e-05, + "logits/chosen": -0.36402902007102966, + "logits/rejected": -0.3568218946456909, + "logps/chosen": -0.00016789848450571299, + "logps/rejected": -1.9694077968597412, + "loss": 0.4019, + "nll_loss": 0.1004675030708313, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.678984881436918e-05, + "rewards/margins": 0.19692397117614746, + "rewards/rejected": -0.19694077968597412, + "step": 13892 + }, + { + "epoch": 9.607883817427386, + "grad_norm": 2.5731377601623535, + "learning_rate": 2.1784232365145227e-06, + "log_odds_chosen": 11.45120906829834, + "log_odds_ratio": -1.9578732462832704e-05, + "logits/chosen": -0.4507691264152527, + "logits/rejected": -0.5357175469398499, + "logps/chosen": -0.00018559573800303042, + "logps/rejected": -2.5820508003234863, + "loss": 0.2696, + "nll_loss": 0.06739149987697601, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8559574527898803e-05, + "rewards/margins": 0.2581865191459656, + "rewards/rejected": -0.2582050859928131, + "step": 13893 + }, + { + "epoch": 9.608575380359612, + "grad_norm": 3.959655284881592, + "learning_rate": 2.174581220224374e-06, + "log_odds_chosen": 11.153068542480469, + "log_odds_ratio": -0.00017264412599615753, + "logits/chosen": -0.05517587810754776, + "logits/rejected": -0.016276437789201736, + "logps/chosen": -0.00031824197503738105, + "logps/rejected": -2.672032356262207, + "loss": 0.4601, + "nll_loss": 0.11499620974063873, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.182419823133387e-05, + "rewards/margins": 0.2671714425086975, + "rewards/rejected": -0.2672032415866852, + "step": 13894 + }, + { + "epoch": 9.60926694329184, + "grad_norm": 4.496756553649902, + "learning_rate": 2.1707392039342247e-06, + "log_odds_chosen": 11.771528244018555, + "log_odds_ratio": -1.823817910917569e-05, + "logits/chosen": -0.1999531090259552, + "logits/rejected": -0.2911415696144104, + "logps/chosen": -0.0001995821949094534, + "logps/rejected": -3.107776403427124, + "loss": 0.6024, + "nll_loss": 0.15058927237987518, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9958217308158055e-05, + "rewards/margins": 0.31075769662857056, + "rewards/rejected": -0.3107776641845703, + "step": 13895 + }, + { + "epoch": 9.609958506224066, + "grad_norm": 3.24926495552063, + "learning_rate": 2.166897187644076e-06, + "log_odds_chosen": 10.774518966674805, + "log_odds_ratio": -3.353306601638906e-05, + "logits/chosen": 0.20405946671962738, + "logits/rejected": 0.040008507668972015, + "logps/chosen": -0.0013074527960270643, + "logps/rejected": -2.6293416023254395, + "loss": 0.4048, + "nll_loss": 0.10120494663715363, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013074529124423862, + "rewards/margins": 0.26280343532562256, + "rewards/rejected": -0.2629341781139374, + "step": 13896 + }, + { + "epoch": 9.610650069156293, + "grad_norm": 4.835551738739014, + "learning_rate": 2.1630551713539266e-06, + "log_odds_chosen": 10.513887405395508, + "log_odds_ratio": -0.00015845283633098006, + "logits/chosen": -0.0554099939763546, + "logits/rejected": -0.13395971059799194, + "logps/chosen": -0.00025557586923241615, + "logps/rejected": -1.9353229999542236, + "loss": 0.3504, + "nll_loss": 0.08758310228586197, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5557586923241615e-05, + "rewards/margins": 0.19350674748420715, + "rewards/rejected": -0.1935323029756546, + "step": 13897 + }, + { + "epoch": 9.61134163208852, + "grad_norm": 4.209211826324463, + "learning_rate": 2.1592131550637777e-06, + "log_odds_chosen": 10.88747787475586, + "log_odds_ratio": -4.2023919377243146e-05, + "logits/chosen": -0.1938972771167755, + "logits/rejected": -0.1918545663356781, + "logps/chosen": -0.0004351716488599777, + "logps/rejected": -2.025944471359253, + "loss": 0.5448, + "nll_loss": 0.13619840145111084, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.351716779638082e-05, + "rewards/margins": 0.2025509476661682, + "rewards/rejected": -0.20259445905685425, + "step": 13898 + }, + { + "epoch": 9.612033195020746, + "grad_norm": 3.5081121921539307, + "learning_rate": 2.1553711387736285e-06, + "log_odds_chosen": 11.063932418823242, + "log_odds_ratio": -3.3935433748411015e-05, + "logits/chosen": -0.4234510660171509, + "logits/rejected": -0.4184904396533966, + "logps/chosen": -0.00022541767975781113, + "logps/rejected": -2.182734727859497, + "loss": 0.5019, + "nll_loss": 0.12546983361244202, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2541767975781113e-05, + "rewards/margins": 0.21825093030929565, + "rewards/rejected": -0.21827347576618195, + "step": 13899 + }, + { + "epoch": 9.612724757952973, + "grad_norm": 3.9560019969940186, + "learning_rate": 2.1515291224834792e-06, + "log_odds_chosen": 10.76880931854248, + "log_odds_ratio": -5.8079334849026054e-05, + "logits/chosen": -0.054325178265571594, + "logits/rejected": -0.10271134972572327, + "logps/chosen": -0.00017941728583537042, + "logps/rejected": -1.9920737743377686, + "loss": 0.3548, + "nll_loss": 0.08869768679141998, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7941729311132804e-05, + "rewards/margins": 0.1991894394159317, + "rewards/rejected": -0.1992073804140091, + "step": 13900 + }, + { + "epoch": 9.6134163208852, + "grad_norm": 2.539076328277588, + "learning_rate": 2.1476871061933304e-06, + "log_odds_chosen": 10.76081657409668, + "log_odds_ratio": -0.00012573970889206976, + "logits/chosen": -0.16937761008739471, + "logits/rejected": -0.2564617395401001, + "logps/chosen": -0.00027681823121383786, + "logps/rejected": -2.3275856971740723, + "loss": 0.2439, + "nll_loss": 0.06097453832626343, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7681826395564713e-05, + "rewards/margins": 0.232730895280838, + "rewards/rejected": -0.23275858163833618, + "step": 13901 + }, + { + "epoch": 9.614107883817427, + "grad_norm": 2.375969648361206, + "learning_rate": 2.143845089903181e-06, + "log_odds_chosen": 10.718599319458008, + "log_odds_ratio": -5.3689509513787925e-05, + "logits/chosen": -0.0198553204536438, + "logits/rejected": -0.153344064950943, + "logps/chosen": -0.0011022513499483466, + "logps/rejected": -1.8870506286621094, + "loss": 0.2744, + "nll_loss": 0.06859680265188217, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011022514081560075, + "rewards/margins": 0.18859481811523438, + "rewards/rejected": -0.18870505690574646, + "step": 13902 + }, + { + "epoch": 9.614799446749654, + "grad_norm": 2.5292680263519287, + "learning_rate": 2.1400030736130323e-06, + "log_odds_chosen": 10.61046314239502, + "log_odds_ratio": -5.377128763939254e-05, + "logits/chosen": -0.14776532351970673, + "logits/rejected": -0.16533081233501434, + "logps/chosen": -0.00022588277352042496, + "logps/rejected": -1.6798335313796997, + "loss": 0.2788, + "nll_loss": 0.06968588382005692, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2588277715840377e-05, + "rewards/margins": 0.1679607778787613, + "rewards/rejected": -0.16798336803913116, + "step": 13903 + }, + { + "epoch": 9.61549100968188, + "grad_norm": 4.080808162689209, + "learning_rate": 2.1361610573228835e-06, + "log_odds_chosen": 12.258644104003906, + "log_odds_ratio": -1.09394122773665e-05, + "logits/chosen": -0.3831287920475006, + "logits/rejected": -0.47929805517196655, + "logps/chosen": -0.00010576730710454285, + "logps/rejected": -2.9734280109405518, + "loss": 0.4296, + "nll_loss": 0.10739340633153915, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0576731256151106e-05, + "rewards/margins": 0.297332227230072, + "rewards/rejected": -0.29734277725219727, + "step": 13904 + }, + { + "epoch": 9.616182572614107, + "grad_norm": 2.973907470703125, + "learning_rate": 2.1323190410327343e-06, + "log_odds_chosen": 9.683038711547852, + "log_odds_ratio": -0.0007329158834181726, + "logits/chosen": 0.13456621766090393, + "logits/rejected": 0.10806768387556076, + "logps/chosen": -0.0017268441151827574, + "logps/rejected": -1.5090343952178955, + "loss": 0.2626, + "nll_loss": 0.06558310985565186, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017268442024942487, + "rewards/margins": 0.15073075890541077, + "rewards/rejected": -0.15090343356132507, + "step": 13905 + }, + { + "epoch": 9.616874135546334, + "grad_norm": 3.142827272415161, + "learning_rate": 2.128477024742585e-06, + "log_odds_chosen": 11.427976608276367, + "log_odds_ratio": -2.8424015908967704e-05, + "logits/chosen": -0.47292864322662354, + "logits/rejected": -0.47138306498527527, + "logps/chosen": -0.00012114901619497687, + "logps/rejected": -2.115497589111328, + "loss": 0.3664, + "nll_loss": 0.0915897786617279, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2114901437598746e-05, + "rewards/margins": 0.2115376591682434, + "rewards/rejected": -0.2115497589111328, + "step": 13906 + }, + { + "epoch": 9.617565698478561, + "grad_norm": 17.765735626220703, + "learning_rate": 2.1246350084524358e-06, + "log_odds_chosen": 10.096508026123047, + "log_odds_ratio": -0.0651700347661972, + "logits/chosen": -0.03107866644859314, + "logits/rejected": -0.19261762499809265, + "logps/chosen": -0.010400773957371712, + "logps/rejected": -3.08225154876709, + "loss": 0.7105, + "nll_loss": 0.17109745740890503, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010400773026049137, + "rewards/margins": 0.3071851134300232, + "rewards/rejected": -0.30822518467903137, + "step": 13907 + }, + { + "epoch": 9.618257261410788, + "grad_norm": 3.9724233150482178, + "learning_rate": 2.120792992162287e-06, + "log_odds_chosen": 10.521794319152832, + "log_odds_ratio": -0.00015680433716624975, + "logits/chosen": 0.06511492282152176, + "logits/rejected": 0.00012525171041488647, + "logps/chosen": -0.00021123632905073464, + "logps/rejected": -1.658719778060913, + "loss": 0.5107, + "nll_loss": 0.12765847146511078, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1123632905073464e-05, + "rewards/margins": 0.16585084795951843, + "rewards/rejected": -0.1658719778060913, + "step": 13908 + }, + { + "epoch": 9.618948824343015, + "grad_norm": 2.43673038482666, + "learning_rate": 2.1169509758721377e-06, + "log_odds_chosen": 10.04551887512207, + "log_odds_ratio": -0.00024950189981609583, + "logits/chosen": -0.14085373282432556, + "logits/rejected": -0.1827581524848938, + "logps/chosen": -0.0010271634673699737, + "logps/rejected": -2.2036585807800293, + "loss": 0.2702, + "nll_loss": 0.0675291195511818, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010271634528180584, + "rewards/margins": 0.22026312351226807, + "rewards/rejected": -0.22036585211753845, + "step": 13909 + }, + { + "epoch": 9.619640387275242, + "grad_norm": 2.886796236038208, + "learning_rate": 2.1131089595819884e-06, + "log_odds_chosen": 11.632570266723633, + "log_odds_ratio": -1.8629045371199027e-05, + "logits/chosen": -0.13659584522247314, + "logits/rejected": -0.128843292593956, + "logps/chosen": -0.00016159014194272459, + "logps/rejected": -2.6940741539001465, + "loss": 0.2098, + "nll_loss": 0.05246026813983917, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.615901419427246e-05, + "rewards/margins": 0.2693912386894226, + "rewards/rejected": -0.26940739154815674, + "step": 13910 + }, + { + "epoch": 9.620331950207468, + "grad_norm": 2.791591167449951, + "learning_rate": 2.1092669432918396e-06, + "log_odds_chosen": 11.185066223144531, + "log_odds_ratio": -2.2798591089667752e-05, + "logits/chosen": -0.33810728788375854, + "logits/rejected": -0.4625110328197479, + "logps/chosen": -0.00020533816132228822, + "logps/rejected": -1.8624236583709717, + "loss": 0.3463, + "nll_loss": 0.08656968921422958, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0533816496026702e-05, + "rewards/margins": 0.18622180819511414, + "rewards/rejected": -0.18624237179756165, + "step": 13911 + }, + { + "epoch": 9.621023513139695, + "grad_norm": 3.6606931686401367, + "learning_rate": 2.1054249270016908e-06, + "log_odds_chosen": 11.881719589233398, + "log_odds_ratio": -4.465238453121856e-05, + "logits/chosen": -0.5093518495559692, + "logits/rejected": -0.41257208585739136, + "logps/chosen": -0.00023769350082147866, + "logps/rejected": -3.3733842372894287, + "loss": 0.2947, + "nll_loss": 0.07367514818906784, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3769349354552105e-05, + "rewards/margins": 0.3373146653175354, + "rewards/rejected": -0.3373384475708008, + "step": 13912 + }, + { + "epoch": 9.621715076071922, + "grad_norm": 2.6280322074890137, + "learning_rate": 2.1015829107115415e-06, + "log_odds_chosen": 9.698561668395996, + "log_odds_ratio": -0.0002886706788558513, + "logits/chosen": -0.5018261075019836, + "logits/rejected": -0.5304703712463379, + "logps/chosen": -0.0005297226598486304, + "logps/rejected": -1.5569900274276733, + "loss": 0.253, + "nll_loss": 0.0632205531001091, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2972267440054566e-05, + "rewards/margins": 0.15564602613449097, + "rewards/rejected": -0.1556989997625351, + "step": 13913 + }, + { + "epoch": 9.622406639004149, + "grad_norm": 3.4627389907836914, + "learning_rate": 2.0977408944213923e-06, + "log_odds_chosen": 11.172958374023438, + "log_odds_ratio": -0.00016042341303545982, + "logits/chosen": -0.2492392212152481, + "logits/rejected": -0.2748255729675293, + "logps/chosen": -0.0007011451525613666, + "logps/rejected": -2.1626133918762207, + "loss": 0.3737, + "nll_loss": 0.09341553598642349, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.01145181665197e-05, + "rewards/margins": 0.21619121730327606, + "rewards/rejected": -0.21626132726669312, + "step": 13914 + }, + { + "epoch": 9.623098201936376, + "grad_norm": 4.134411811828613, + "learning_rate": 2.0938988781312434e-06, + "log_odds_chosen": 11.793828010559082, + "log_odds_ratio": -1.4233235560823232e-05, + "logits/chosen": -0.28255489468574524, + "logits/rejected": -0.26436835527420044, + "logps/chosen": -0.00012407053145579994, + "logps/rejected": -2.460082530975342, + "loss": 0.4142, + "nll_loss": 0.10354601591825485, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2407054782670457e-05, + "rewards/margins": 0.24599584937095642, + "rewards/rejected": -0.2460082769393921, + "step": 13915 + }, + { + "epoch": 9.623789764868603, + "grad_norm": 3.257009744644165, + "learning_rate": 2.090056861841094e-06, + "log_odds_chosen": 10.670095443725586, + "log_odds_ratio": -0.0003247207496315241, + "logits/chosen": -0.47446343302726746, + "logits/rejected": -0.46082183718681335, + "logps/chosen": -0.0009013573289848864, + "logps/rejected": -2.077688217163086, + "loss": 0.4598, + "nll_loss": 0.11492659151554108, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.013573435368016e-05, + "rewards/margins": 0.20767870545387268, + "rewards/rejected": -0.20776882767677307, + "step": 13916 + }, + { + "epoch": 9.62448132780083, + "grad_norm": 3.5385079383850098, + "learning_rate": 2.086214845550945e-06, + "log_odds_chosen": 10.039594650268555, + "log_odds_ratio": -0.0008481164113618433, + "logits/chosen": -0.4221859872341156, + "logits/rejected": -0.46361225843429565, + "logps/chosen": -0.0007482378277927637, + "logps/rejected": -2.10674786567688, + "loss": 0.3021, + "nll_loss": 0.07544497400522232, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.482377986889333e-05, + "rewards/margins": 0.21059995889663696, + "rewards/rejected": -0.21067479252815247, + "step": 13917 + }, + { + "epoch": 9.625172890733056, + "grad_norm": 3.3730711936950684, + "learning_rate": 2.082372829260796e-06, + "log_odds_chosen": 11.064151763916016, + "log_odds_ratio": -8.68295828695409e-05, + "logits/chosen": -0.40897443890571594, + "logits/rejected": -0.45414721965789795, + "logps/chosen": -0.0001387879892718047, + "logps/rejected": -2.230348587036133, + "loss": 0.3715, + "nll_loss": 0.09286414831876755, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3878798199584708e-05, + "rewards/margins": 0.223021000623703, + "rewards/rejected": -0.22303488850593567, + "step": 13918 + }, + { + "epoch": 9.625864453665283, + "grad_norm": 2.669193744659424, + "learning_rate": 2.0785308129706473e-06, + "log_odds_chosen": 11.192930221557617, + "log_odds_ratio": -0.00021025211026426405, + "logits/chosen": -0.32236677408218384, + "logits/rejected": -0.29362133145332336, + "logps/chosen": -0.0008011598256416619, + "logps/rejected": -2.3582253456115723, + "loss": 0.3081, + "nll_loss": 0.07700303196907043, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.011598401935771e-05, + "rewards/margins": 0.23574241995811462, + "rewards/rejected": -0.23582251369953156, + "step": 13919 + }, + { + "epoch": 9.62655601659751, + "grad_norm": 5.058048725128174, + "learning_rate": 2.074688796680498e-06, + "log_odds_chosen": 11.47877311706543, + "log_odds_ratio": -2.503236464690417e-05, + "logits/chosen": -0.035071179270744324, + "logits/rejected": -0.12748485803604126, + "logps/chosen": -0.00045203138142824173, + "logps/rejected": -3.1081864833831787, + "loss": 0.5406, + "nll_loss": 0.13513976335525513, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.520314178080298e-05, + "rewards/margins": 0.31077346205711365, + "rewards/rejected": -0.3108186721801758, + "step": 13920 + }, + { + "epoch": 9.627247579529737, + "grad_norm": 3.9089407920837402, + "learning_rate": 2.0708467803903492e-06, + "log_odds_chosen": 10.992070198059082, + "log_odds_ratio": -0.0001330882078036666, + "logits/chosen": -0.2942034602165222, + "logits/rejected": -0.3216381371021271, + "logps/chosen": -0.0002567889168858528, + "logps/rejected": -2.4235384464263916, + "loss": 0.3938, + "nll_loss": 0.09844039380550385, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.567889168858528e-05, + "rewards/margins": 0.24232818186283112, + "rewards/rejected": -0.24235385656356812, + "step": 13921 + }, + { + "epoch": 9.627939142461964, + "grad_norm": 3.0660769939422607, + "learning_rate": 2.0670047641002e-06, + "log_odds_chosen": 11.104628562927246, + "log_odds_ratio": -3.0097862691036426e-05, + "logits/chosen": -0.5589832067489624, + "logits/rejected": -0.6116610765457153, + "logps/chosen": -5.852892354596406e-05, + "logps/rejected": -1.5549559593200684, + "loss": 0.4344, + "nll_loss": 0.10860708355903625, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.852892172697466e-06, + "rewards/margins": 0.1554897427558899, + "rewards/rejected": -0.15549558401107788, + "step": 13922 + }, + { + "epoch": 9.62863070539419, + "grad_norm": 6.13004732131958, + "learning_rate": 2.0631627478100507e-06, + "log_odds_chosen": 10.39303970336914, + "log_odds_ratio": -0.000225274998228997, + "logits/chosen": -0.6050529479980469, + "logits/rejected": -0.6999838352203369, + "logps/chosen": -0.00015310509479604661, + "logps/rejected": -1.6478575468063354, + "loss": 0.5678, + "nll_loss": 0.14191612601280212, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5310510207200423e-05, + "rewards/margins": 0.1647704392671585, + "rewards/rejected": -0.16478575766086578, + "step": 13923 + }, + { + "epoch": 9.629322268326417, + "grad_norm": 3.988724946975708, + "learning_rate": 2.0593207315199015e-06, + "log_odds_chosen": 11.240147590637207, + "log_odds_ratio": -3.00856918329373e-05, + "logits/chosen": -0.26336485147476196, + "logits/rejected": -0.32718032598495483, + "logps/chosen": -0.00012234252062626183, + "logps/rejected": -2.0191023349761963, + "loss": 0.4075, + "nll_loss": 0.10188324004411697, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2234251698828302e-05, + "rewards/margins": 0.2018980085849762, + "rewards/rejected": -0.20191024243831635, + "step": 13924 + }, + { + "epoch": 9.630013831258644, + "grad_norm": 4.1808037757873535, + "learning_rate": 2.0554787152297526e-06, + "log_odds_chosen": 11.633804321289062, + "log_odds_ratio": -1.0782297977129929e-05, + "logits/chosen": -0.2822992503643036, + "logits/rejected": -0.29951217770576477, + "logps/chosen": -0.00015155112487263978, + "logps/rejected": -2.631167411804199, + "loss": 0.3955, + "nll_loss": 0.0988800898194313, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5155113032960799e-05, + "rewards/margins": 0.26310157775878906, + "rewards/rejected": -0.263116717338562, + "step": 13925 + }, + { + "epoch": 9.630705394190871, + "grad_norm": 3.0692636966705322, + "learning_rate": 2.051636698939604e-06, + "log_odds_chosen": 10.706825256347656, + "log_odds_ratio": -5.6105287512764335e-05, + "logits/chosen": -0.6887298226356506, + "logits/rejected": -0.6536301374435425, + "logps/chosen": -0.0005047488957643509, + "logps/rejected": -2.0179996490478516, + "loss": 0.222, + "nll_loss": 0.05550269037485123, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0474893214413896e-05, + "rewards/margins": 0.20174948871135712, + "rewards/rejected": -0.20179995894432068, + "step": 13926 + }, + { + "epoch": 9.631396957123098, + "grad_norm": 3.3400537967681885, + "learning_rate": 2.0477946826494546e-06, + "log_odds_chosen": 12.713604927062988, + "log_odds_ratio": -1.0083525921800174e-05, + "logits/chosen": -0.12084333598613739, + "logits/rejected": -0.1845424622297287, + "logps/chosen": -0.00010912872676271945, + "logps/rejected": -3.221863269805908, + "loss": 0.3719, + "nll_loss": 0.09298507869243622, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0912872312474065e-05, + "rewards/margins": 0.32217538356781006, + "rewards/rejected": -0.32218629121780396, + "step": 13927 + }, + { + "epoch": 9.632088520055325, + "grad_norm": 2.9674794673919678, + "learning_rate": 2.0439526663593057e-06, + "log_odds_chosen": 11.599284172058105, + "log_odds_ratio": -2.6393710868433118e-05, + "logits/chosen": -0.552137017250061, + "logits/rejected": -0.6465173363685608, + "logps/chosen": -0.0001403492351528257, + "logps/rejected": -2.0705151557922363, + "loss": 0.3808, + "nll_loss": 0.0952024906873703, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4034923879080452e-05, + "rewards/margins": 0.20703749358654022, + "rewards/rejected": -0.20705154538154602, + "step": 13928 + }, + { + "epoch": 9.632780082987551, + "grad_norm": 4.203634738922119, + "learning_rate": 2.0401106500691565e-06, + "log_odds_chosen": 10.64388656616211, + "log_odds_ratio": -0.00011743738286895677, + "logits/chosen": 0.017155641689896584, + "logits/rejected": 0.012800104916095734, + "logps/chosen": -0.0005038772942498326, + "logps/rejected": -2.2548394203186035, + "loss": 0.371, + "nll_loss": 0.09274850785732269, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.038773451815359e-05, + "rewards/margins": 0.22543352842330933, + "rewards/rejected": -0.22548392415046692, + "step": 13929 + }, + { + "epoch": 9.633471645919778, + "grad_norm": 2.5074808597564697, + "learning_rate": 2.0362686337790072e-06, + "log_odds_chosen": 10.935724258422852, + "log_odds_ratio": -0.00020738595048896968, + "logits/chosen": -0.5184868574142456, + "logits/rejected": -0.5800386071205139, + "logps/chosen": -0.00023217473062686622, + "logps/rejected": -1.9821714162826538, + "loss": 0.3235, + "nll_loss": 0.08084598928689957, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.32174716074951e-05, + "rewards/margins": 0.19819393754005432, + "rewards/rejected": -0.19821715354919434, + "step": 13930 + }, + { + "epoch": 9.634163208852005, + "grad_norm": 2.7048091888427734, + "learning_rate": 2.032426617488858e-06, + "log_odds_chosen": 10.152687072753906, + "log_odds_ratio": -9.616788884159178e-05, + "logits/chosen": -0.4461780786514282, + "logits/rejected": -0.5649304389953613, + "logps/chosen": -0.0006437112460844219, + "logps/rejected": -1.70163094997406, + "loss": 0.241, + "nll_loss": 0.060244545340538025, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.437112460844219e-05, + "rewards/margins": 0.17009872198104858, + "rewards/rejected": -0.170163094997406, + "step": 13931 + }, + { + "epoch": 9.634854771784232, + "grad_norm": 3.0434532165527344, + "learning_rate": 2.028584601198709e-06, + "log_odds_chosen": 12.094767570495605, + "log_odds_ratio": -1.862308636191301e-05, + "logits/chosen": -0.30809926986694336, + "logits/rejected": -0.3018673360347748, + "logps/chosen": -0.00010675377416191623, + "logps/rejected": -2.613502264022827, + "loss": 0.3352, + "nll_loss": 0.08379031717777252, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0675378689484205e-05, + "rewards/margins": 0.26133957505226135, + "rewards/rejected": -0.26135024428367615, + "step": 13932 + }, + { + "epoch": 9.635546334716459, + "grad_norm": 4.059986114501953, + "learning_rate": 2.02474258490856e-06, + "log_odds_chosen": 11.331657409667969, + "log_odds_ratio": -7.682020077481866e-05, + "logits/chosen": -0.44414669275283813, + "logits/rejected": -0.4349921643733978, + "logps/chosen": -0.0002738929179031402, + "logps/rejected": -2.484193801879883, + "loss": 0.3711, + "nll_loss": 0.09277647733688354, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7389291062718257e-05, + "rewards/margins": 0.2483920156955719, + "rewards/rejected": -0.248419389128685, + "step": 13933 + }, + { + "epoch": 9.636237897648686, + "grad_norm": 3.5578558444976807, + "learning_rate": 2.020900568618411e-06, + "log_odds_chosen": 10.544318199157715, + "log_odds_ratio": -6.59368306514807e-05, + "logits/chosen": -0.18671739101409912, + "logits/rejected": -0.2391272932291031, + "logps/chosen": -0.00027044754824601114, + "logps/rejected": -2.0963134765625, + "loss": 0.2938, + "nll_loss": 0.07343493402004242, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.704475809878204e-05, + "rewards/margins": 0.20960432291030884, + "rewards/rejected": -0.20963135361671448, + "step": 13934 + }, + { + "epoch": 9.636929460580912, + "grad_norm": 3.4241902828216553, + "learning_rate": 2.0170585523282622e-06, + "log_odds_chosen": 10.410786628723145, + "log_odds_ratio": -9.618296462576836e-05, + "logits/chosen": -0.01987304911017418, + "logits/rejected": -0.09989184886217117, + "logps/chosen": -0.00017437424685340375, + "logps/rejected": -1.4026689529418945, + "loss": 0.3551, + "nll_loss": 0.08875397592782974, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7437425412936136e-05, + "rewards/margins": 0.14024946093559265, + "rewards/rejected": -0.14026689529418945, + "step": 13935 + }, + { + "epoch": 9.63762102351314, + "grad_norm": 2.408499240875244, + "learning_rate": 2.013216536038113e-06, + "log_odds_chosen": 10.555112838745117, + "log_odds_ratio": -0.00019298665574751794, + "logits/chosen": -0.09522217512130737, + "logits/rejected": -0.11984743177890778, + "logps/chosen": -0.00025845691561698914, + "logps/rejected": -1.6210821866989136, + "loss": 0.226, + "nll_loss": 0.056473828852176666, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5845693016890436e-05, + "rewards/margins": 0.16208237409591675, + "rewards/rejected": -0.16210821270942688, + "step": 13936 + }, + { + "epoch": 9.638312586445366, + "grad_norm": 3.2268059253692627, + "learning_rate": 2.0093745197479637e-06, + "log_odds_chosen": 10.40433120727539, + "log_odds_ratio": -6.741328252246603e-05, + "logits/chosen": -0.013442234136164188, + "logits/rejected": 0.015229137614369392, + "logps/chosen": -0.0002590430958662182, + "logps/rejected": -1.8909542560577393, + "loss": 0.3726, + "nll_loss": 0.09313929080963135, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5904311769409105e-05, + "rewards/margins": 0.18906950950622559, + "rewards/rejected": -0.18909543752670288, + "step": 13937 + }, + { + "epoch": 9.639004149377593, + "grad_norm": 3.6599769592285156, + "learning_rate": 2.005532503457815e-06, + "log_odds_chosen": 10.732551574707031, + "log_odds_ratio": -0.0011148906778544188, + "logits/chosen": -0.4779934883117676, + "logits/rejected": -0.4871721863746643, + "logps/chosen": -0.0004535532498266548, + "logps/rejected": -2.415609836578369, + "loss": 0.4094, + "nll_loss": 0.10223733633756638, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.535532571026124e-05, + "rewards/margins": 0.2415156364440918, + "rewards/rejected": -0.24156101047992706, + "step": 13938 + }, + { + "epoch": 9.63969571230982, + "grad_norm": 2.5770082473754883, + "learning_rate": 2.0016904871676657e-06, + "log_odds_chosen": 11.127652168273926, + "log_odds_ratio": -2.9959042876726016e-05, + "logits/chosen": -0.504085123538971, + "logits/rejected": -0.5476514101028442, + "logps/chosen": -0.0001619804388610646, + "logps/rejected": -2.0527985095977783, + "loss": 0.3142, + "nll_loss": 0.07854273915290833, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6198046068893746e-05, + "rewards/margins": 0.2052636742591858, + "rewards/rejected": -0.2052798867225647, + "step": 13939 + }, + { + "epoch": 9.640387275242047, + "grad_norm": 2.991600275039673, + "learning_rate": 1.9978484708775164e-06, + "log_odds_chosen": 11.098523139953613, + "log_odds_ratio": -7.594098133267835e-05, + "logits/chosen": -0.1997908055782318, + "logits/rejected": -0.26286157965660095, + "logps/chosen": -0.0006000112625770271, + "logps/rejected": -2.43660569190979, + "loss": 0.3706, + "nll_loss": 0.09263789653778076, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.000112625770271e-05, + "rewards/margins": 0.24360056221485138, + "rewards/rejected": -0.243660569190979, + "step": 13940 + }, + { + "epoch": 9.641078838174273, + "grad_norm": 3.1971094608306885, + "learning_rate": 1.9940064545873676e-06, + "log_odds_chosen": 10.228924751281738, + "log_odds_ratio": -0.00011588910274440423, + "logits/chosen": -0.38264912366867065, + "logits/rejected": -0.45704877376556396, + "logps/chosen": -0.000311953917844221, + "logps/rejected": -1.974640965461731, + "loss": 0.4756, + "nll_loss": 0.11888445168733597, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.11953917844221e-05, + "rewards/margins": 0.19743289053440094, + "rewards/rejected": -0.19746409356594086, + "step": 13941 + }, + { + "epoch": 9.6417704011065, + "grad_norm": 3.8516626358032227, + "learning_rate": 1.9901644382972188e-06, + "log_odds_chosen": 11.560503005981445, + "log_odds_ratio": -1.5859755876590498e-05, + "logits/chosen": -0.10931985825300217, + "logits/rejected": -0.006788960192352533, + "logps/chosen": -0.00013140823284629732, + "logps/rejected": -2.5448997020721436, + "loss": 0.3484, + "nll_loss": 0.08708598464727402, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3140824194124434e-05, + "rewards/margins": 0.2544768452644348, + "rewards/rejected": -0.2544899582862854, + "step": 13942 + }, + { + "epoch": 9.642461964038727, + "grad_norm": 3.120718240737915, + "learning_rate": 1.9863224220070695e-06, + "log_odds_chosen": 11.14166259765625, + "log_odds_ratio": -5.944729127804749e-05, + "logits/chosen": -0.23091326653957367, + "logits/rejected": -0.37017300724983215, + "logps/chosen": -0.00030458703986369073, + "logps/rejected": -2.427988290786743, + "loss": 0.3297, + "nll_loss": 0.08242782205343246, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0458708351943642e-05, + "rewards/margins": 0.24276837706565857, + "rewards/rejected": -0.2427988052368164, + "step": 13943 + }, + { + "epoch": 9.643153526970954, + "grad_norm": 3.2770447731018066, + "learning_rate": 1.9824804057169203e-06, + "log_odds_chosen": 10.39407730102539, + "log_odds_ratio": -6.861618021503091e-05, + "logits/chosen": -0.5030243992805481, + "logits/rejected": -0.4254170358181, + "logps/chosen": -0.0003515210119076073, + "logps/rejected": -2.0133328437805176, + "loss": 0.425, + "nll_loss": 0.1062437891960144, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.515210119076073e-05, + "rewards/margins": 0.20129813253879547, + "rewards/rejected": -0.20133328437805176, + "step": 13944 + }, + { + "epoch": 9.64384508990318, + "grad_norm": 3.685124635696411, + "learning_rate": 1.9786383894267714e-06, + "log_odds_chosen": 11.571239471435547, + "log_odds_ratio": -1.943923780345358e-05, + "logits/chosen": -0.10035257786512375, + "logits/rejected": -0.12497510015964508, + "logps/chosen": -0.00018849805928766727, + "logps/rejected": -2.77651309967041, + "loss": 0.3319, + "nll_loss": 0.08296985924243927, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8849805201170966e-05, + "rewards/margins": 0.2776325047016144, + "rewards/rejected": -0.2776513397693634, + "step": 13945 + }, + { + "epoch": 9.644536652835408, + "grad_norm": 4.463271617889404, + "learning_rate": 1.974796373136622e-06, + "log_odds_chosen": 11.625158309936523, + "log_odds_ratio": -1.385235464113066e-05, + "logits/chosen": -0.5605673789978027, + "logits/rejected": -0.5917690396308899, + "logps/chosen": -0.00016648485325276852, + "logps/rejected": -2.598931074142456, + "loss": 0.5527, + "nll_loss": 0.13818588852882385, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6648486052872613e-05, + "rewards/margins": 0.25987643003463745, + "rewards/rejected": -0.2598930895328522, + "step": 13946 + }, + { + "epoch": 9.645228215767634, + "grad_norm": 4.580418109893799, + "learning_rate": 1.970954356846473e-06, + "log_odds_chosen": 10.304842948913574, + "log_odds_ratio": -6.0571030189748853e-05, + "logits/chosen": 0.23934561014175415, + "logits/rejected": 0.1578591763973236, + "logps/chosen": -0.0003283784899394959, + "logps/rejected": -2.1980645656585693, + "loss": 0.5329, + "nll_loss": 0.1332293152809143, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.283785190433264e-05, + "rewards/margins": 0.21977362036705017, + "rewards/rejected": -0.2198064625263214, + "step": 13947 + }, + { + "epoch": 9.645919778699861, + "grad_norm": 2.5201478004455566, + "learning_rate": 1.9671123405563237e-06, + "log_odds_chosen": 10.01999282836914, + "log_odds_ratio": -6.546937220264226e-05, + "logits/chosen": -0.2158067226409912, + "logits/rejected": -0.15728622674942017, + "logps/chosen": -0.00015559874009341002, + "logps/rejected": -1.4551308155059814, + "loss": 0.2542, + "nll_loss": 0.06354384869337082, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.555987364554312e-05, + "rewards/margins": 0.14549751579761505, + "rewards/rejected": -0.14551308751106262, + "step": 13948 + }, + { + "epoch": 9.646611341632088, + "grad_norm": 2.6810781955718994, + "learning_rate": 1.9632703242661753e-06, + "log_odds_chosen": 10.072636604309082, + "log_odds_ratio": -0.0002688949170988053, + "logits/chosen": -0.31802284717559814, + "logits/rejected": -0.34241783618927, + "logps/chosen": -0.00027778628282248974, + "logps/rejected": -1.521261215209961, + "loss": 0.2867, + "nll_loss": 0.07165224850177765, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7778627554653212e-05, + "rewards/margins": 0.15209835767745972, + "rewards/rejected": -0.15212613344192505, + "step": 13949 + }, + { + "epoch": 9.647302904564315, + "grad_norm": 3.1169934272766113, + "learning_rate": 1.959428307976026e-06, + "log_odds_chosen": 11.934995651245117, + "log_odds_ratio": -8.18016087578144e-06, + "logits/chosen": -0.14000201225280762, + "logits/rejected": -0.21201661229133606, + "logps/chosen": -5.927299935137853e-05, + "logps/rejected": -2.194277286529541, + "loss": 0.356, + "nll_loss": 0.08899751305580139, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9273002079862636e-06, + "rewards/margins": 0.21942180395126343, + "rewards/rejected": -0.21942773461341858, + "step": 13950 + }, + { + "epoch": 9.647994467496542, + "grad_norm": 3.84570574760437, + "learning_rate": 1.9555862916858768e-06, + "log_odds_chosen": 10.816938400268555, + "log_odds_ratio": -0.00033466549939475954, + "logits/chosen": 0.020725198090076447, + "logits/rejected": -0.060677967965602875, + "logps/chosen": -0.00038195756496861577, + "logps/rejected": -2.639225721359253, + "loss": 0.3431, + "nll_loss": 0.0857374295592308, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.819575431407429e-05, + "rewards/margins": 0.26388436555862427, + "rewards/rejected": -0.2639225721359253, + "step": 13951 + }, + { + "epoch": 9.648686030428768, + "grad_norm": 3.859541177749634, + "learning_rate": 1.951744275395728e-06, + "log_odds_chosen": 12.209653854370117, + "log_odds_ratio": -7.651913620065898e-06, + "logits/chosen": -0.3404412865638733, + "logits/rejected": -0.3990596532821655, + "logps/chosen": -9.441153815714642e-05, + "logps/rejected": -2.768007516860962, + "loss": 0.5145, + "nll_loss": 0.128626748919487, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.441153451916762e-06, + "rewards/margins": 0.2767913341522217, + "rewards/rejected": -0.2768007516860962, + "step": 13952 + }, + { + "epoch": 9.649377593360995, + "grad_norm": 5.899332046508789, + "learning_rate": 1.9479022591055787e-06, + "log_odds_chosen": 12.843746185302734, + "log_odds_ratio": -2.0442003005882725e-05, + "logits/chosen": -0.026304662227630615, + "logits/rejected": -0.09566555917263031, + "logps/chosen": -0.00028729261248372495, + "logps/rejected": -4.177236557006836, + "loss": 0.4666, + "nll_loss": 0.11665841937065125, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8729262339766137e-05, + "rewards/margins": 0.41769489645957947, + "rewards/rejected": -0.4177236557006836, + "step": 13953 + }, + { + "epoch": 9.650069156293222, + "grad_norm": 3.5560855865478516, + "learning_rate": 1.9440602428154294e-06, + "log_odds_chosen": 10.799585342407227, + "log_odds_ratio": -6.642687367275357e-05, + "logits/chosen": -0.3646691143512726, + "logits/rejected": -0.4457007646560669, + "logps/chosen": -0.00032388101681135595, + "logps/rejected": -2.1229469776153564, + "loss": 0.39, + "nll_loss": 0.09749428927898407, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.238810313632712e-05, + "rewards/margins": 0.2122623324394226, + "rewards/rejected": -0.21229471266269684, + "step": 13954 + }, + { + "epoch": 9.650760719225449, + "grad_norm": 3.677841901779175, + "learning_rate": 1.9402182265252806e-06, + "log_odds_chosen": 11.822620391845703, + "log_odds_ratio": -1.5757483197376132e-05, + "logits/chosen": -0.7204559445381165, + "logits/rejected": -0.6808112263679504, + "logps/chosen": -0.00016307370970025659, + "logps/rejected": -2.5352365970611572, + "loss": 0.7112, + "nll_loss": 0.1778022050857544, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.630737097002566e-05, + "rewards/margins": 0.2535073757171631, + "rewards/rejected": -0.25352364778518677, + "step": 13955 + }, + { + "epoch": 9.651452282157676, + "grad_norm": 4.258469104766846, + "learning_rate": 1.9363762102351314e-06, + "log_odds_chosen": 12.353096008300781, + "log_odds_ratio": -6.92297635396244e-06, + "logits/chosen": -0.13911697268486023, + "logits/rejected": -0.2312823385000229, + "logps/chosen": -6.1727077991236e-05, + "logps/rejected": -2.645348072052002, + "loss": 0.4578, + "nll_loss": 0.1144505962729454, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.172708253870951e-06, + "rewards/margins": 0.26452863216400146, + "rewards/rejected": -0.2645348310470581, + "step": 13956 + }, + { + "epoch": 9.652143845089903, + "grad_norm": 2.751460313796997, + "learning_rate": 1.9325341939449825e-06, + "log_odds_chosen": 12.060667037963867, + "log_odds_ratio": -1.2571328625199385e-05, + "logits/chosen": -0.4585002660751343, + "logits/rejected": -0.45212164521217346, + "logps/chosen": -0.00021719810320064425, + "logps/rejected": -2.7318403720855713, + "loss": 0.3136, + "nll_loss": 0.07839718461036682, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1719810320064425e-05, + "rewards/margins": 0.273162305355072, + "rewards/rejected": -0.27318403124809265, + "step": 13957 + }, + { + "epoch": 9.65283540802213, + "grad_norm": 3.256422996520996, + "learning_rate": 1.9286921776548333e-06, + "log_odds_chosen": 11.365076065063477, + "log_odds_ratio": -2.1767995349364355e-05, + "logits/chosen": -0.08825594931840897, + "logits/rejected": -0.23204001784324646, + "logps/chosen": -0.00021763856057077646, + "logps/rejected": -2.7843446731567383, + "loss": 0.3723, + "nll_loss": 0.09308039397001266, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1763855329481885e-05, + "rewards/margins": 0.27841272950172424, + "rewards/rejected": -0.27843448519706726, + "step": 13958 + }, + { + "epoch": 9.653526970954356, + "grad_norm": 3.9554879665374756, + "learning_rate": 1.9248501613646845e-06, + "log_odds_chosen": 9.385603904724121, + "log_odds_ratio": -0.000505188072565943, + "logits/chosen": -0.32343584299087524, + "logits/rejected": -0.19714388251304626, + "logps/chosen": -0.0008970825583674014, + "logps/rejected": -1.9236663579940796, + "loss": 0.6538, + "nll_loss": 0.16340425610542297, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.970825001597404e-05, + "rewards/margins": 0.19227692484855652, + "rewards/rejected": -0.19236664474010468, + "step": 13959 + }, + { + "epoch": 9.654218533886583, + "grad_norm": 5.494601249694824, + "learning_rate": 1.921008145074535e-06, + "log_odds_chosen": 10.197301864624023, + "log_odds_ratio": -8.86005800566636e-05, + "logits/chosen": -0.22364522516727448, + "logits/rejected": -0.3473617732524872, + "logps/chosen": -0.0006021163426339626, + "logps/rejected": -1.9727119207382202, + "loss": 0.5145, + "nll_loss": 0.128618061542511, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.021163790137507e-05, + "rewards/margins": 0.19721098244190216, + "rewards/rejected": -0.1972711980342865, + "step": 13960 + }, + { + "epoch": 9.65491009681881, + "grad_norm": 4.0596466064453125, + "learning_rate": 1.917166128784386e-06, + "log_odds_chosen": 11.051063537597656, + "log_odds_ratio": -0.0009821861749514937, + "logits/chosen": -0.592627763748169, + "logits/rejected": -0.6205758452415466, + "logps/chosen": -0.0003839967539533973, + "logps/rejected": -2.1661429405212402, + "loss": 0.5071, + "nll_loss": 0.12668871879577637, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8399673940148205e-05, + "rewards/margins": 0.21657592058181763, + "rewards/rejected": -0.21661432087421417, + "step": 13961 + }, + { + "epoch": 9.655601659751037, + "grad_norm": 3.5549631118774414, + "learning_rate": 1.913324112494237e-06, + "log_odds_chosen": 11.564865112304688, + "log_odds_ratio": -4.4520056690089405e-05, + "logits/chosen": -0.10879924893379211, + "logits/rejected": -0.18709474802017212, + "logps/chosen": -0.0003732958575710654, + "logps/rejected": -2.9977054595947266, + "loss": 0.3898, + "nll_loss": 0.09745587408542633, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.732958430191502e-05, + "rewards/margins": 0.2997332215309143, + "rewards/rejected": -0.2997705638408661, + "step": 13962 + }, + { + "epoch": 9.656293222683264, + "grad_norm": 2.9617433547973633, + "learning_rate": 1.909482096204088e-06, + "log_odds_chosen": 10.506338119506836, + "log_odds_ratio": -6.779128307243809e-05, + "logits/chosen": -0.7193613648414612, + "logits/rejected": -0.7034804224967957, + "logps/chosen": -0.00013443415809888393, + "logps/rejected": -1.504642367362976, + "loss": 0.3665, + "nll_loss": 0.0916147232055664, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3443415809888393e-05, + "rewards/margins": 0.15045081079006195, + "rewards/rejected": -0.1504642516374588, + "step": 13963 + }, + { + "epoch": 9.65698478561549, + "grad_norm": 3.313770294189453, + "learning_rate": 1.9056400799139388e-06, + "log_odds_chosen": 9.815048217773438, + "log_odds_ratio": -0.00028208905132487416, + "logits/chosen": -0.2507280111312866, + "logits/rejected": -0.3469865322113037, + "logps/chosen": -0.00044885973329655826, + "logps/rejected": -1.6550946235656738, + "loss": 0.313, + "nll_loss": 0.07822908461093903, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.488597551244311e-05, + "rewards/margins": 0.1654645800590515, + "rewards/rejected": -0.16550946235656738, + "step": 13964 + }, + { + "epoch": 9.657676348547717, + "grad_norm": 2.9807019233703613, + "learning_rate": 1.9017980636237896e-06, + "log_odds_chosen": 10.997095108032227, + "log_odds_ratio": -3.3784810511860996e-05, + "logits/chosen": -0.08487127721309662, + "logits/rejected": -0.13770827651023865, + "logps/chosen": -0.00013816017599310726, + "logps/rejected": -1.9608837366104126, + "loss": 0.2567, + "nll_loss": 0.06416875123977661, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3816017599310726e-05, + "rewards/margins": 0.19607457518577576, + "rewards/rejected": -0.19608840346336365, + "step": 13965 + }, + { + "epoch": 9.658367911479944, + "grad_norm": 3.188283681869507, + "learning_rate": 1.8979560473336408e-06, + "log_odds_chosen": 12.547552108764648, + "log_odds_ratio": -5.1370989240240306e-06, + "logits/chosen": -0.4965924918651581, + "logits/rejected": -0.517026960849762, + "logps/chosen": -0.00013113873137626797, + "logps/rejected": -2.892089366912842, + "loss": 0.3937, + "nll_loss": 0.09843617677688599, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3113873137626797e-05, + "rewards/margins": 0.28919583559036255, + "rewards/rejected": -0.28920894861221313, + "step": 13966 + }, + { + "epoch": 9.659059474412171, + "grad_norm": 3.3707354068756104, + "learning_rate": 1.8941140310434917e-06, + "log_odds_chosen": 11.181507110595703, + "log_odds_ratio": -6.762929842807353e-05, + "logits/chosen": -0.5147676467895508, + "logits/rejected": -0.505560040473938, + "logps/chosen": -0.0001584067940711975, + "logps/rejected": -2.233450174331665, + "loss": 0.3739, + "nll_loss": 0.09346611797809601, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5840680134715512e-05, + "rewards/margins": 0.22332918643951416, + "rewards/rejected": -0.22334501147270203, + "step": 13967 + }, + { + "epoch": 9.659751037344398, + "grad_norm": 3.804871082305908, + "learning_rate": 1.8902720147533425e-06, + "log_odds_chosen": 10.533234596252441, + "log_odds_ratio": -0.0006375403027050197, + "logits/chosen": -0.37808045744895935, + "logits/rejected": -0.4201991558074951, + "logps/chosen": -0.0006506206700578332, + "logps/rejected": -1.9480440616607666, + "loss": 0.2891, + "nll_loss": 0.07221576571464539, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.50620786473155e-05, + "rewards/margins": 0.19473935663700104, + "rewards/rejected": -0.19480441510677338, + "step": 13968 + }, + { + "epoch": 9.660442600276625, + "grad_norm": 4.535920143127441, + "learning_rate": 1.8864299984631936e-06, + "log_odds_chosen": 10.594648361206055, + "log_odds_ratio": -4.910709321848117e-05, + "logits/chosen": -0.28199848532676697, + "logits/rejected": -0.3965606093406677, + "logps/chosen": -0.00027819437673315406, + "logps/rejected": -2.096233367919922, + "loss": 0.5034, + "nll_loss": 0.12584403157234192, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.781944021990057e-05, + "rewards/margins": 0.20959553122520447, + "rewards/rejected": -0.2096233367919922, + "step": 13969 + }, + { + "epoch": 9.661134163208851, + "grad_norm": 2.5646848678588867, + "learning_rate": 1.8825879821730446e-06, + "log_odds_chosen": 10.840084075927734, + "log_odds_ratio": -5.749518459197134e-05, + "logits/chosen": -0.622244119644165, + "logits/rejected": -0.6063990592956543, + "logps/chosen": -0.00022037388407625258, + "logps/rejected": -2.235664129257202, + "loss": 0.2875, + "nll_loss": 0.07186561077833176, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.20373894990189e-05, + "rewards/margins": 0.22354437410831451, + "rewards/rejected": -0.22356641292572021, + "step": 13970 + }, + { + "epoch": 9.661825726141078, + "grad_norm": 5.075047492980957, + "learning_rate": 1.8787459658828954e-06, + "log_odds_chosen": 12.194637298583984, + "log_odds_ratio": -2.2917814931133762e-05, + "logits/chosen": -0.004379307851195335, + "logits/rejected": -0.012477247044444084, + "logps/chosen": -0.00013443827629089355, + "logps/rejected": -3.050947427749634, + "loss": 0.3824, + "nll_loss": 0.09560485184192657, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3443827810988296e-05, + "rewards/margins": 0.305081307888031, + "rewards/rejected": -0.30509474873542786, + "step": 13971 + }, + { + "epoch": 9.662517289073305, + "grad_norm": 3.017207384109497, + "learning_rate": 1.8749039495927465e-06, + "log_odds_chosen": 10.548418045043945, + "log_odds_ratio": -5.83583750994876e-05, + "logits/chosen": -0.512605607509613, + "logits/rejected": -0.5638319253921509, + "logps/chosen": -0.00021537914290092885, + "logps/rejected": -2.041076898574829, + "loss": 0.2392, + "nll_loss": 0.059799715876579285, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1537915017688647e-05, + "rewards/margins": 0.20408615469932556, + "rewards/rejected": -0.20410770177841187, + "step": 13972 + }, + { + "epoch": 9.663208852005532, + "grad_norm": 3.117672920227051, + "learning_rate": 1.8710619333025973e-06, + "log_odds_chosen": 12.20585823059082, + "log_odds_ratio": -2.3021353626972996e-05, + "logits/chosen": -0.308549165725708, + "logits/rejected": -0.14046195149421692, + "logps/chosen": -9.588651300873607e-05, + "logps/rejected": -2.887348175048828, + "loss": 0.2555, + "nll_loss": 0.06386178731918335, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.588651664671488e-06, + "rewards/margins": 0.28872519731521606, + "rewards/rejected": -0.2887347936630249, + "step": 13973 + }, + { + "epoch": 9.663900414937759, + "grad_norm": 2.9554359912872314, + "learning_rate": 1.8672199170124482e-06, + "log_odds_chosen": 11.596789360046387, + "log_odds_ratio": -1.0424893844174221e-05, + "logits/chosen": -0.39971041679382324, + "logits/rejected": -0.46779516339302063, + "logps/chosen": -7.597896183142439e-05, + "logps/rejected": -2.0296382904052734, + "loss": 0.3011, + "nll_loss": 0.07528509944677353, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.597896001243498e-06, + "rewards/margins": 0.2029562145471573, + "rewards/rejected": -0.20296382904052734, + "step": 13974 + }, + { + "epoch": 9.664591977869986, + "grad_norm": 3.62790846824646, + "learning_rate": 1.863377900722299e-06, + "log_odds_chosen": 12.152950286865234, + "log_odds_ratio": -7.841685874154791e-05, + "logits/chosen": -0.5727795362472534, + "logits/rejected": -0.6163936853408813, + "logps/chosen": -0.0001449670089641586, + "logps/rejected": -3.1027400493621826, + "loss": 0.5028, + "nll_loss": 0.12568151950836182, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4496701624011621e-05, + "rewards/margins": 0.31025952100753784, + "rewards/rejected": -0.31027403473854065, + "step": 13975 + }, + { + "epoch": 9.665283540802212, + "grad_norm": 3.7316770553588867, + "learning_rate": 1.8595358844321502e-06, + "log_odds_chosen": 10.631937980651855, + "log_odds_ratio": -0.00010631579061737284, + "logits/chosen": 0.005236908793449402, + "logits/rejected": -0.029435843229293823, + "logps/chosen": -0.00042771699372678995, + "logps/rejected": -2.0606510639190674, + "loss": 0.4573, + "nll_loss": 0.1143130213022232, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.277170228306204e-05, + "rewards/margins": 0.20602233707904816, + "rewards/rejected": -0.2060651183128357, + "step": 13976 + }, + { + "epoch": 9.66597510373444, + "grad_norm": 5.226464748382568, + "learning_rate": 1.855693868142001e-06, + "log_odds_chosen": 12.1885986328125, + "log_odds_ratio": -7.177217412390746e-06, + "logits/chosen": -0.0014483742415904999, + "logits/rejected": -0.04183807224035263, + "logps/chosen": -0.00012206320388941094, + "logps/rejected": -2.8245863914489746, + "loss": 0.5347, + "nll_loss": 0.1336742490530014, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2206321116536856e-05, + "rewards/margins": 0.28244641423225403, + "rewards/rejected": -0.28245866298675537, + "step": 13977 + }, + { + "epoch": 9.666666666666666, + "grad_norm": 3.0053915977478027, + "learning_rate": 1.8518518518518519e-06, + "log_odds_chosen": 10.44801139831543, + "log_odds_ratio": -9.702295938041061e-05, + "logits/chosen": -0.615515410900116, + "logits/rejected": -0.634098470211029, + "logps/chosen": -0.0006284684641286731, + "logps/rejected": -2.4199600219726562, + "loss": 0.3143, + "nll_loss": 0.07857717573642731, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.284684059210122e-05, + "rewards/margins": 0.2419331669807434, + "rewards/rejected": -0.24199600517749786, + "step": 13978 + }, + { + "epoch": 9.667358229598893, + "grad_norm": 3.3845856189727783, + "learning_rate": 1.848009835561703e-06, + "log_odds_chosen": 12.12214469909668, + "log_odds_ratio": -2.5276207452407107e-05, + "logits/chosen": -0.30605417490005493, + "logits/rejected": -0.2163485586643219, + "logps/chosen": -0.00045134034007787704, + "logps/rejected": -3.1922848224639893, + "loss": 0.3106, + "nll_loss": 0.07765016704797745, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.513404201134108e-05, + "rewards/margins": 0.3191833198070526, + "rewards/rejected": -0.31922847032546997, + "step": 13979 + }, + { + "epoch": 9.66804979253112, + "grad_norm": 2.010406017303467, + "learning_rate": 1.8441678192715538e-06, + "log_odds_chosen": 10.314872741699219, + "log_odds_ratio": -6.38093042653054e-05, + "logits/chosen": -0.4727852940559387, + "logits/rejected": -0.4534512758255005, + "logps/chosen": -0.000576147111132741, + "logps/rejected": -1.9994373321533203, + "loss": 0.2105, + "nll_loss": 0.052621982991695404, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.761471766163595e-05, + "rewards/margins": 0.19988611340522766, + "rewards/rejected": -0.19994375109672546, + "step": 13980 + }, + { + "epoch": 9.668741355463347, + "grad_norm": 3.694669008255005, + "learning_rate": 1.8403258029814048e-06, + "log_odds_chosen": 10.413873672485352, + "log_odds_ratio": -9.337106894236058e-05, + "logits/chosen": -0.06772250682115555, + "logits/rejected": -0.3364441394805908, + "logps/chosen": -0.0002473013009876013, + "logps/rejected": -2.0732505321502686, + "loss": 0.5063, + "nll_loss": 0.12656481564044952, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.473013046255801e-05, + "rewards/margins": 0.2073003202676773, + "rewards/rejected": -0.2073250561952591, + "step": 13981 + }, + { + "epoch": 9.669432918395573, + "grad_norm": 5.21959924697876, + "learning_rate": 1.8364837866912555e-06, + "log_odds_chosen": 11.933232307434082, + "log_odds_ratio": -1.4127767826721538e-05, + "logits/chosen": 0.22804135084152222, + "logits/rejected": 0.21710431575775146, + "logps/chosen": -0.0001345960918115452, + "logps/rejected": -2.759626865386963, + "loss": 0.5374, + "nll_loss": 0.13435040414333344, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.345960936305346e-05, + "rewards/margins": 0.27594923973083496, + "rewards/rejected": -0.2759627103805542, + "step": 13982 + }, + { + "epoch": 9.6701244813278, + "grad_norm": 2.4474329948425293, + "learning_rate": 1.8326417704011067e-06, + "log_odds_chosen": 10.466503143310547, + "log_odds_ratio": -0.00013020877668168396, + "logits/chosen": -0.09109814465045929, + "logits/rejected": -0.18959535658359528, + "logps/chosen": -0.00045913312351331115, + "logps/rejected": -2.278885841369629, + "loss": 0.2638, + "nll_loss": 0.06593704223632812, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.591331526171416e-05, + "rewards/margins": 0.22784265875816345, + "rewards/rejected": -0.22788859903812408, + "step": 13983 + }, + { + "epoch": 9.670816044260027, + "grad_norm": 4.067322731018066, + "learning_rate": 1.8287997541109574e-06, + "log_odds_chosen": 12.389582633972168, + "log_odds_ratio": -3.250035297241993e-05, + "logits/chosen": -0.2228613793849945, + "logits/rejected": -0.293897807598114, + "logps/chosen": -0.00021752913016825914, + "logps/rejected": -3.7773804664611816, + "loss": 0.2975, + "nll_loss": 0.07436374574899673, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.175291592720896e-05, + "rewards/margins": 0.3777163028717041, + "rewards/rejected": -0.3777380585670471, + "step": 13984 + }, + { + "epoch": 9.671507607192254, + "grad_norm": 2.84019136428833, + "learning_rate": 1.8249577378208084e-06, + "log_odds_chosen": 11.020377159118652, + "log_odds_ratio": -0.00011550134513527155, + "logits/chosen": -0.524024248123169, + "logits/rejected": -0.569028377532959, + "logps/chosen": -0.0003480208106338978, + "logps/rejected": -2.0050735473632812, + "loss": 0.2781, + "nll_loss": 0.06951781362295151, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4802083973772824e-05, + "rewards/margins": 0.20047254860401154, + "rewards/rejected": -0.20050735771656036, + "step": 13985 + }, + { + "epoch": 9.67219917012448, + "grad_norm": 3.9230411052703857, + "learning_rate": 1.8211157215306596e-06, + "log_odds_chosen": 11.75917911529541, + "log_odds_ratio": -1.6298314221785404e-05, + "logits/chosen": -0.01063506118953228, + "logits/rejected": -0.06654591858386993, + "logps/chosen": -0.00010372167162131518, + "logps/rejected": -2.515432834625244, + "loss": 0.5497, + "nll_loss": 0.1374278962612152, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.037216770782834e-05, + "rewards/margins": 0.2515329122543335, + "rewards/rejected": -0.2515432834625244, + "step": 13986 + }, + { + "epoch": 9.672890733056708, + "grad_norm": 3.6679883003234863, + "learning_rate": 1.8172737052405103e-06, + "log_odds_chosen": 11.404784202575684, + "log_odds_ratio": -5.806395711260848e-05, + "logits/chosen": -0.5198478698730469, + "logits/rejected": -0.5719497799873352, + "logps/chosen": -0.0005329885752871633, + "logps/rejected": -3.326045036315918, + "loss": 0.3997, + "nll_loss": 0.09991982579231262, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3298852435545996e-05, + "rewards/margins": 0.3325512409210205, + "rewards/rejected": -0.3326045274734497, + "step": 13987 + }, + { + "epoch": 9.673582295988934, + "grad_norm": 3.1682469844818115, + "learning_rate": 1.813431688950361e-06, + "log_odds_chosen": 11.663154602050781, + "log_odds_ratio": -0.0005123711889609694, + "logits/chosen": -0.014620475471019745, + "logits/rejected": -0.15799535810947418, + "logps/chosen": -0.0006218062480911613, + "logps/rejected": -3.092310667037964, + "loss": 0.2362, + "nll_loss": 0.059004656970500946, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.218061753315851e-05, + "rewards/margins": 0.30916887521743774, + "rewards/rejected": -0.30923107266426086, + "step": 13988 + }, + { + "epoch": 9.674273858921161, + "grad_norm": 2.75168776512146, + "learning_rate": 1.8095896726602122e-06, + "log_odds_chosen": 11.537948608398438, + "log_odds_ratio": -2.661232247191947e-05, + "logits/chosen": -0.8945484161376953, + "logits/rejected": -0.7763729095458984, + "logps/chosen": -6.327818118734285e-05, + "logps/rejected": -1.9054579734802246, + "loss": 0.3813, + "nll_loss": 0.09531690180301666, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.327818482532166e-06, + "rewards/margins": 0.19053946435451508, + "rewards/rejected": -0.19054579734802246, + "step": 13989 + }, + { + "epoch": 9.674965421853388, + "grad_norm": 3.947296142578125, + "learning_rate": 1.8057476563700632e-06, + "log_odds_chosen": 11.823331832885742, + "log_odds_ratio": -3.6443863791646436e-05, + "logits/chosen": -0.24902662634849548, + "logits/rejected": -0.2904735505580902, + "logps/chosen": -0.00020042044343426824, + "logps/rejected": -2.9156477451324463, + "loss": 0.4597, + "nll_loss": 0.11491231620311737, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.004204725380987e-05, + "rewards/margins": 0.29154473543167114, + "rewards/rejected": -0.29156479239463806, + "step": 13990 + }, + { + "epoch": 9.675656984785615, + "grad_norm": 2.841975450515747, + "learning_rate": 1.801905640079914e-06, + "log_odds_chosen": 11.474272727966309, + "log_odds_ratio": -5.509784023161046e-05, + "logits/chosen": -0.17017102241516113, + "logits/rejected": -0.17466890811920166, + "logps/chosen": -0.00014371874567586929, + "logps/rejected": -2.071979522705078, + "loss": 0.3136, + "nll_loss": 0.07839880883693695, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4371873476193286e-05, + "rewards/margins": 0.2071835696697235, + "rewards/rejected": -0.20719794929027557, + "step": 13991 + }, + { + "epoch": 9.676348547717842, + "grad_norm": 3.145448684692383, + "learning_rate": 1.798063623789765e-06, + "log_odds_chosen": 11.523065567016602, + "log_odds_ratio": -1.7595630197320133e-05, + "logits/chosen": -0.3231140971183777, + "logits/rejected": -0.3559204936027527, + "logps/chosen": -0.00013811652024742216, + "logps/rejected": -2.501310348510742, + "loss": 0.2778, + "nll_loss": 0.0694592297077179, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3811652024742216e-05, + "rewards/margins": 0.2501172423362732, + "rewards/rejected": -0.2501310408115387, + "step": 13992 + }, + { + "epoch": 9.677040110650069, + "grad_norm": 3.7536604404449463, + "learning_rate": 1.794221607499616e-06, + "log_odds_chosen": 11.987735748291016, + "log_odds_ratio": -1.530145527794957e-05, + "logits/chosen": -0.35948991775512695, + "logits/rejected": -0.2973094582557678, + "logps/chosen": -0.00017380458302795887, + "logps/rejected": -2.8969004154205322, + "loss": 0.3402, + "nll_loss": 0.08504220843315125, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.738046012178529e-05, + "rewards/margins": 0.2896726429462433, + "rewards/rejected": -0.2896900475025177, + "step": 13993 + }, + { + "epoch": 9.677731673582295, + "grad_norm": 3.7913312911987305, + "learning_rate": 1.7903795912094668e-06, + "log_odds_chosen": 10.949464797973633, + "log_odds_ratio": -6.61562880850397e-05, + "logits/chosen": -0.39711904525756836, + "logits/rejected": -0.4672422409057617, + "logps/chosen": -0.000134202215122059, + "logps/rejected": -1.9377895593643188, + "loss": 0.3026, + "nll_loss": 0.07563167065382004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3420220057014376e-05, + "rewards/margins": 0.1937655359506607, + "rewards/rejected": -0.19377896189689636, + "step": 13994 + }, + { + "epoch": 9.678423236514522, + "grad_norm": 2.5605897903442383, + "learning_rate": 1.7865375749193176e-06, + "log_odds_chosen": 10.683719635009766, + "log_odds_ratio": -0.00015510033699683845, + "logits/chosen": 0.04777078330516815, + "logits/rejected": -0.020088866353034973, + "logps/chosen": -0.0005237612058408558, + "logps/rejected": -2.3416595458984375, + "loss": 0.2956, + "nll_loss": 0.0738738402724266, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.237612276687287e-05, + "rewards/margins": 0.23411358892917633, + "rewards/rejected": -0.2341659516096115, + "step": 13995 + }, + { + "epoch": 9.679114799446749, + "grad_norm": 2.956864595413208, + "learning_rate": 1.7826955586291687e-06, + "log_odds_chosen": 10.05513858795166, + "log_odds_ratio": -0.0002707884996198118, + "logits/chosen": -0.1300402730703354, + "logits/rejected": -0.1567571759223938, + "logps/chosen": -0.00043549295514822006, + "logps/rejected": -1.9624979496002197, + "loss": 0.2858, + "nll_loss": 0.0714205801486969, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3549294787226245e-05, + "rewards/margins": 0.1962062418460846, + "rewards/rejected": -0.1962497979402542, + "step": 13996 + }, + { + "epoch": 9.679806362378976, + "grad_norm": 3.7831461429595947, + "learning_rate": 1.7788535423390197e-06, + "log_odds_chosen": 10.855228424072266, + "log_odds_ratio": -5.6292657973244786e-05, + "logits/chosen": -0.22141483426094055, + "logits/rejected": -0.22882868349552155, + "logps/chosen": -0.0003413744561839849, + "logps/rejected": -2.0822219848632812, + "loss": 0.4089, + "nll_loss": 0.10222725570201874, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.41374434356112e-05, + "rewards/margins": 0.20818807184696198, + "rewards/rejected": -0.20822221040725708, + "step": 13997 + }, + { + "epoch": 9.680497925311203, + "grad_norm": 3.2380263805389404, + "learning_rate": 1.7750115260488705e-06, + "log_odds_chosen": 11.399898529052734, + "log_odds_ratio": -2.5035338694578968e-05, + "logits/chosen": -0.20180514454841614, + "logits/rejected": -0.18763187527656555, + "logps/chosen": -0.00019273949146736413, + "logps/rejected": -2.396219253540039, + "loss": 0.3219, + "nll_loss": 0.08048289269208908, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9273949874332175e-05, + "rewards/margins": 0.23960265517234802, + "rewards/rejected": -0.23962193727493286, + "step": 13998 + }, + { + "epoch": 9.68118948824343, + "grad_norm": 2.449667453765869, + "learning_rate": 1.7711695097587212e-06, + "log_odds_chosen": 9.883462905883789, + "log_odds_ratio": -0.0003530489047989249, + "logits/chosen": -0.47197049856185913, + "logits/rejected": -0.38646137714385986, + "logps/chosen": -0.0005334184388630092, + "logps/rejected": -1.6608302593231201, + "loss": 0.303, + "nll_loss": 0.07572139799594879, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.33418424311094e-05, + "rewards/margins": 0.1660296767950058, + "rewards/rejected": -0.16608302295207977, + "step": 13999 + }, + { + "epoch": 9.681881051175656, + "grad_norm": 2.9047279357910156, + "learning_rate": 1.7673274934685724e-06, + "log_odds_chosen": 10.272042274475098, + "log_odds_ratio": -0.015053609386086464, + "logits/chosen": -0.09649567306041718, + "logits/rejected": -0.07403193414211273, + "logps/chosen": -0.004163277801126242, + "logps/rejected": -1.9767775535583496, + "loss": 0.2624, + "nll_loss": 0.06408600509166718, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0004163277626503259, + "rewards/margins": 0.19726142287254333, + "rewards/rejected": -0.19767774641513824, + "step": 14000 + }, + { + "epoch": 9.682572614107883, + "grad_norm": 2.8445565700531006, + "learning_rate": 1.7634854771784233e-06, + "log_odds_chosen": 11.194666862487793, + "log_odds_ratio": -7.283731974894181e-05, + "logits/chosen": -0.1809304803609848, + "logits/rejected": -0.24531452357769012, + "logps/chosen": -0.00035821396159008145, + "logps/rejected": -2.685930013656616, + "loss": 0.3867, + "nll_loss": 0.09666649252176285, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.582139470381662e-05, + "rewards/margins": 0.26855719089508057, + "rewards/rejected": -0.2685930132865906, + "step": 14001 + }, + { + "epoch": 9.68326417704011, + "grad_norm": 3.799746513366699, + "learning_rate": 1.759643460888274e-06, + "log_odds_chosen": 11.528482437133789, + "log_odds_ratio": -3.163529618177563e-05, + "logits/chosen": -0.36452963948249817, + "logits/rejected": -0.4858834743499756, + "logps/chosen": -7.771144737489522e-05, + "logps/rejected": -1.7862298488616943, + "loss": 0.4304, + "nll_loss": 0.10759089887142181, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.771145646984223e-06, + "rewards/margins": 0.1786152422428131, + "rewards/rejected": -0.1786229908466339, + "step": 14002 + }, + { + "epoch": 9.683955739972337, + "grad_norm": 3.057624101638794, + "learning_rate": 1.7558014445981253e-06, + "log_odds_chosen": 8.92990779876709, + "log_odds_ratio": -0.0007533504394814372, + "logits/chosen": -0.5203484296798706, + "logits/rejected": -0.4924953579902649, + "logps/chosen": -0.000799480068963021, + "logps/rejected": -1.1398868560791016, + "loss": 0.2708, + "nll_loss": 0.06761455535888672, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.994801126187667e-05, + "rewards/margins": 0.11390873789787292, + "rewards/rejected": -0.11398869752883911, + "step": 14003 + }, + { + "epoch": 9.684647302904564, + "grad_norm": 3.033080577850342, + "learning_rate": 1.7519594283079762e-06, + "log_odds_chosen": 10.711329460144043, + "log_odds_ratio": -4.0234081097878516e-05, + "logits/chosen": 0.03341685235500336, + "logits/rejected": 0.015133783221244812, + "logps/chosen": -0.00013347969797905535, + "logps/rejected": -1.9615447521209717, + "loss": 0.3171, + "nll_loss": 0.07927147299051285, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3347968888410833e-05, + "rewards/margins": 0.19614112377166748, + "rewards/rejected": -0.19615447521209717, + "step": 14004 + }, + { + "epoch": 9.68533886583679, + "grad_norm": 2.9966540336608887, + "learning_rate": 1.748117412017827e-06, + "log_odds_chosen": 11.109415054321289, + "log_odds_ratio": -0.00030459227855317295, + "logits/chosen": -0.2702929973602295, + "logits/rejected": -0.3118366301059723, + "logps/chosen": -0.0005579779390245676, + "logps/rejected": -2.5292561054229736, + "loss": 0.2598, + "nll_loss": 0.06492118537425995, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.579779826803133e-05, + "rewards/margins": 0.2528698444366455, + "rewards/rejected": -0.2529256343841553, + "step": 14005 + }, + { + "epoch": 9.686030428769017, + "grad_norm": 4.1271209716796875, + "learning_rate": 1.7442753957276781e-06, + "log_odds_chosen": 10.542098999023438, + "log_odds_ratio": -0.00016738972044549882, + "logits/chosen": -0.23534046113491058, + "logits/rejected": -0.4081006348133087, + "logps/chosen": -0.00025860758614726365, + "logps/rejected": -1.8603882789611816, + "loss": 0.3008, + "nll_loss": 0.07518626004457474, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5860757887130603e-05, + "rewards/margins": 0.18601296842098236, + "rewards/rejected": -0.18603883683681488, + "step": 14006 + }, + { + "epoch": 9.686721991701244, + "grad_norm": 4.879711627960205, + "learning_rate": 1.7404333794375289e-06, + "log_odds_chosen": 12.63100814819336, + "log_odds_ratio": -2.0368574041640386e-05, + "logits/chosen": -0.2950138449668884, + "logits/rejected": -0.385187566280365, + "logps/chosen": -0.00016288757615257055, + "logps/rejected": -3.399386167526245, + "loss": 0.4562, + "nll_loss": 0.1140429824590683, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6288757251459174e-05, + "rewards/margins": 0.33992233872413635, + "rewards/rejected": -0.3399386405944824, + "step": 14007 + }, + { + "epoch": 9.687413554633471, + "grad_norm": 3.5236964225769043, + "learning_rate": 1.7365913631473799e-06, + "log_odds_chosen": 10.040060997009277, + "log_odds_ratio": -0.00023548353055957705, + "logits/chosen": -0.4871000051498413, + "logits/rejected": -0.46208345890045166, + "logps/chosen": -0.00017160980496555567, + "logps/rejected": -1.4598802328109741, + "loss": 0.5502, + "nll_loss": 0.13752034306526184, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7160980860353447e-05, + "rewards/margins": 0.14597088098526, + "rewards/rejected": -0.14598803222179413, + "step": 14008 + }, + { + "epoch": 9.688105117565698, + "grad_norm": 3.6535000801086426, + "learning_rate": 1.7327493468572306e-06, + "log_odds_chosen": 9.759282112121582, + "log_odds_ratio": -0.00044369022361934185, + "logits/chosen": -0.16132892668247223, + "logits/rejected": -0.22028201818466187, + "logps/chosen": -0.0008336616447195411, + "logps/rejected": -1.7865080833435059, + "loss": 0.4812, + "nll_loss": 0.12024475634098053, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.336616156157106e-05, + "rewards/margins": 0.17856745421886444, + "rewards/rejected": -0.17865081131458282, + "step": 14009 + }, + { + "epoch": 9.688796680497925, + "grad_norm": 4.613658905029297, + "learning_rate": 1.7289073305670818e-06, + "log_odds_chosen": 11.295829772949219, + "log_odds_ratio": -0.00011873643234139308, + "logits/chosen": 0.01779123581945896, + "logits/rejected": -0.0042826710268855095, + "logps/chosen": -0.0004925333778373897, + "logps/rejected": -2.9457249641418457, + "loss": 0.5291, + "nll_loss": 0.13225911557674408, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9253339966526255e-05, + "rewards/margins": 0.2945232689380646, + "rewards/rejected": -0.29457250237464905, + "step": 14010 + }, + { + "epoch": 9.689488243430151, + "grad_norm": 3.4785525798797607, + "learning_rate": 1.7250653142769325e-06, + "log_odds_chosen": 11.194210052490234, + "log_odds_ratio": -0.00019617436919361353, + "logits/chosen": -0.5711617469787598, + "logits/rejected": -0.6833369731903076, + "logps/chosen": -0.0003467965289019048, + "logps/rejected": -2.1996796131134033, + "loss": 0.4503, + "nll_loss": 0.11255437135696411, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.467965143499896e-05, + "rewards/margins": 0.21993330121040344, + "rewards/rejected": -0.21996797621250153, + "step": 14011 + }, + { + "epoch": 9.690179806362378, + "grad_norm": 3.3688061237335205, + "learning_rate": 1.7212232979867835e-06, + "log_odds_chosen": 10.838143348693848, + "log_odds_ratio": -2.946642052847892e-05, + "logits/chosen": 0.006400890648365021, + "logits/rejected": 0.06212189793586731, + "logps/chosen": -0.0001350159727735445, + "logps/rejected": -1.9210996627807617, + "loss": 0.3849, + "nll_loss": 0.09623268991708755, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3501597095455509e-05, + "rewards/margins": 0.19209645688533783, + "rewards/rejected": -0.19210997223854065, + "step": 14012 + }, + { + "epoch": 9.690871369294605, + "grad_norm": 4.070370674133301, + "learning_rate": 1.7173812816966347e-06, + "log_odds_chosen": 10.671753883361816, + "log_odds_ratio": -0.00023755105212330818, + "logits/chosen": 0.0998513251543045, + "logits/rejected": 0.1142597496509552, + "logps/chosen": -0.0007802036125212908, + "logps/rejected": -2.0398645401000977, + "loss": 0.3847, + "nll_loss": 0.09615591168403625, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.80203627073206e-05, + "rewards/margins": 0.20390844345092773, + "rewards/rejected": -0.20398646593093872, + "step": 14013 + }, + { + "epoch": 9.691562932226832, + "grad_norm": 3.36007022857666, + "learning_rate": 1.7135392654064854e-06, + "log_odds_chosen": 12.014041900634766, + "log_odds_ratio": -9.326961844635662e-06, + "logits/chosen": -0.37761032581329346, + "logits/rejected": -0.6232584118843079, + "logps/chosen": -0.00013342205784283578, + "logps/rejected": -2.4795708656311035, + "loss": 0.3714, + "nll_loss": 0.0928611233830452, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.33422063299804e-05, + "rewards/margins": 0.24794375896453857, + "rewards/rejected": -0.24795711040496826, + "step": 14014 + }, + { + "epoch": 9.692254495159059, + "grad_norm": 3.4856786727905273, + "learning_rate": 1.7096972491163364e-06, + "log_odds_chosen": 12.255451202392578, + "log_odds_ratio": -7.380766874121036e-06, + "logits/chosen": -0.35279661417007446, + "logits/rejected": -0.3281911015510559, + "logps/chosen": -0.00017538013344164938, + "logps/rejected": -3.205848217010498, + "loss": 0.4235, + "nll_loss": 0.10587650537490845, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.753801370796282e-05, + "rewards/margins": 0.3205672800540924, + "rewards/rejected": -0.320584774017334, + "step": 14015 + }, + { + "epoch": 9.692946058091286, + "grad_norm": 2.7672152519226074, + "learning_rate": 1.7058552328261871e-06, + "log_odds_chosen": 12.378790855407715, + "log_odds_ratio": -9.387677710037678e-05, + "logits/chosen": -0.11450943350791931, + "logits/rejected": -0.2026877999305725, + "logps/chosen": -0.00015812195488251746, + "logps/rejected": -3.1105387210845947, + "loss": 0.2959, + "nll_loss": 0.07396487146615982, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5812194760655984e-05, + "rewards/margins": 0.311038076877594, + "rewards/rejected": -0.3110538721084595, + "step": 14016 + }, + { + "epoch": 9.693637621023512, + "grad_norm": 2.6869497299194336, + "learning_rate": 1.7020132165360383e-06, + "log_odds_chosen": 10.067146301269531, + "log_odds_ratio": -0.00024698564084246755, + "logits/chosen": -0.6013608574867249, + "logits/rejected": -0.6596066951751709, + "logps/chosen": -0.00031246000435203314, + "logps/rejected": -1.901986002922058, + "loss": 0.2878, + "nll_loss": 0.07193221896886826, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.124599970760755e-05, + "rewards/margins": 0.1901673525571823, + "rewards/rejected": -0.190198615193367, + "step": 14017 + }, + { + "epoch": 9.69432918395574, + "grad_norm": 4.2703094482421875, + "learning_rate": 1.698171200245889e-06, + "log_odds_chosen": 9.801267623901367, + "log_odds_ratio": -0.0002336654142709449, + "logits/chosen": -0.11382442712783813, + "logits/rejected": -0.18640612065792084, + "logps/chosen": -0.0006799734546802938, + "logps/rejected": -1.7594125270843506, + "loss": 0.444, + "nll_loss": 0.11097525805234909, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.7997352743987e-05, + "rewards/margins": 0.1758732795715332, + "rewards/rejected": -0.17594125866889954, + "step": 14018 + }, + { + "epoch": 9.695020746887966, + "grad_norm": 3.3787882328033447, + "learning_rate": 1.69432918395574e-06, + "log_odds_chosen": 11.508310317993164, + "log_odds_ratio": -7.473472214769572e-05, + "logits/chosen": -0.5702272653579712, + "logits/rejected": -0.5165907144546509, + "logps/chosen": -0.0003059214213863015, + "logps/rejected": -2.46089243888855, + "loss": 0.3218, + "nll_loss": 0.08045011758804321, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.059214213863015e-05, + "rewards/margins": 0.2460586428642273, + "rewards/rejected": -0.24608924984931946, + "step": 14019 + }, + { + "epoch": 9.695712309820193, + "grad_norm": 3.9944427013397217, + "learning_rate": 1.6904871676655912e-06, + "log_odds_chosen": 10.361598014831543, + "log_odds_ratio": -0.0032728288788348436, + "logits/chosen": 0.045859482139348984, + "logits/rejected": 0.14110252261161804, + "logps/chosen": -0.023929793387651443, + "logps/rejected": -2.801180839538574, + "loss": 0.3426, + "nll_loss": 0.0853225439786911, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0023929793387651443, + "rewards/margins": 0.27772510051727295, + "rewards/rejected": -0.28011807799339294, + "step": 14020 + }, + { + "epoch": 9.69640387275242, + "grad_norm": 3.809525728225708, + "learning_rate": 1.686645151375442e-06, + "log_odds_chosen": 13.034082412719727, + "log_odds_ratio": -7.79420315666357e-06, + "logits/chosen": 0.15093648433685303, + "logits/rejected": 0.14870363473892212, + "logps/chosen": -0.00018803677812684327, + "logps/rejected": -3.9701337814331055, + "loss": 0.427, + "nll_loss": 0.1067470982670784, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8803677448886447e-05, + "rewards/margins": 0.39699459075927734, + "rewards/rejected": -0.3970133662223816, + "step": 14021 + }, + { + "epoch": 9.697095435684647, + "grad_norm": 3.7040340900421143, + "learning_rate": 1.6828031350852927e-06, + "log_odds_chosen": 10.49951171875, + "log_odds_ratio": -0.00019893118587788194, + "logits/chosen": -0.5705597400665283, + "logits/rejected": -0.540901780128479, + "logps/chosen": -0.00047733585233800113, + "logps/rejected": -1.9048418998718262, + "loss": 0.3755, + "nll_loss": 0.09386002272367477, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.773358887177892e-05, + "rewards/margins": 0.190436452627182, + "rewards/rejected": -0.1904841959476471, + "step": 14022 + }, + { + "epoch": 9.697786998616873, + "grad_norm": 3.799828290939331, + "learning_rate": 1.6789611187951438e-06, + "log_odds_chosen": 11.155202865600586, + "log_odds_ratio": -7.58213282097131e-05, + "logits/chosen": -0.449858158826828, + "logits/rejected": -0.48318931460380554, + "logps/chosen": -0.00021174876019358635, + "logps/rejected": -1.8886882066726685, + "loss": 0.3028, + "nll_loss": 0.07569512724876404, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1174875655560754e-05, + "rewards/margins": 0.1888476461172104, + "rewards/rejected": -0.18886882066726685, + "step": 14023 + }, + { + "epoch": 9.6984785615491, + "grad_norm": 3.440284490585327, + "learning_rate": 1.6751191025049948e-06, + "log_odds_chosen": 10.947023391723633, + "log_odds_ratio": -7.497541810153052e-05, + "logits/chosen": -0.039105042815208435, + "logits/rejected": -0.09824025630950928, + "logps/chosen": -0.0003042893949896097, + "logps/rejected": -2.4186761379241943, + "loss": 0.2919, + "nll_loss": 0.0729638859629631, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0428940590354614e-05, + "rewards/margins": 0.24183718860149384, + "rewards/rejected": -0.24186763167381287, + "step": 14024 + }, + { + "epoch": 9.699170124481327, + "grad_norm": 4.413259983062744, + "learning_rate": 1.6712770862148456e-06, + "log_odds_chosen": 10.910194396972656, + "log_odds_ratio": -0.0002598028222564608, + "logits/chosen": 0.010938696563243866, + "logits/rejected": 0.030111849308013916, + "logps/chosen": -0.00035252771340310574, + "logps/rejected": -2.6153974533081055, + "loss": 0.3088, + "nll_loss": 0.07716973125934601, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5252771340310574e-05, + "rewards/margins": 0.26150450110435486, + "rewards/rejected": -0.2615397572517395, + "step": 14025 + }, + { + "epoch": 9.699861687413554, + "grad_norm": 2.3560967445373535, + "learning_rate": 1.6674350699246965e-06, + "log_odds_chosen": 11.574258804321289, + "log_odds_ratio": -5.458891610032879e-05, + "logits/chosen": -0.3051443099975586, + "logits/rejected": -0.31580889225006104, + "logps/chosen": -0.00020766808302141726, + "logps/rejected": -2.629638910293579, + "loss": 0.2865, + "nll_loss": 0.07160836458206177, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0766809029737487e-05, + "rewards/margins": 0.2629431188106537, + "rewards/rejected": -0.2629638910293579, + "step": 14026 + }, + { + "epoch": 9.70055325034578, + "grad_norm": 2.802356481552124, + "learning_rate": 1.6635930536345477e-06, + "log_odds_chosen": 10.46877384185791, + "log_odds_ratio": -6.196806498337537e-05, + "logits/chosen": -0.28871241211891174, + "logits/rejected": -0.23788052797317505, + "logps/chosen": -0.00011537078535184264, + "logps/rejected": -1.5588353872299194, + "loss": 0.2662, + "nll_loss": 0.06653650850057602, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1537079444678966e-05, + "rewards/margins": 0.15587201714515686, + "rewards/rejected": -0.1558835655450821, + "step": 14027 + }, + { + "epoch": 9.701244813278008, + "grad_norm": 2.6260311603546143, + "learning_rate": 1.6597510373443984e-06, + "log_odds_chosen": 9.96330451965332, + "log_odds_ratio": -0.00013285898603498936, + "logits/chosen": -0.005995616316795349, + "logits/rejected": 0.18605396151542664, + "logps/chosen": -0.0004144633130636066, + "logps/rejected": -1.845277190208435, + "loss": 0.2063, + "nll_loss": 0.05155202001333237, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1446332033956423e-05, + "rewards/margins": 0.1844862699508667, + "rewards/rejected": -0.18452772498130798, + "step": 14028 + }, + { + "epoch": 9.701936376210234, + "grad_norm": 2.945845365524292, + "learning_rate": 1.6559090210542492e-06, + "log_odds_chosen": 11.385857582092285, + "log_odds_ratio": -1.8085802366840653e-05, + "logits/chosen": -0.001389726996421814, + "logits/rejected": -0.02717539668083191, + "logps/chosen": -8.305132359964773e-05, + "logps/rejected": -2.020082712173462, + "loss": 0.2967, + "nll_loss": 0.07418528199195862, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.305132723762654e-06, + "rewards/margins": 0.2019999772310257, + "rewards/rejected": -0.20200827717781067, + "step": 14029 + }, + { + "epoch": 9.702627939142461, + "grad_norm": 2.373103141784668, + "learning_rate": 1.6520670047641004e-06, + "log_odds_chosen": 10.701430320739746, + "log_odds_ratio": -8.253100531874225e-05, + "logits/chosen": -0.5777424573898315, + "logits/rejected": -0.654435396194458, + "logps/chosen": -0.0001618588576093316, + "logps/rejected": -2.025099515914917, + "loss": 0.2145, + "nll_loss": 0.053607311099767685, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.618588612473104e-05, + "rewards/margins": 0.20249375700950623, + "rewards/rejected": -0.20250995457172394, + "step": 14030 + }, + { + "epoch": 9.703319502074688, + "grad_norm": 3.8033952713012695, + "learning_rate": 1.6482249884739513e-06, + "log_odds_chosen": 10.59766960144043, + "log_odds_ratio": -0.0005519393598660827, + "logits/chosen": -0.07405143231153488, + "logits/rejected": -0.07039715349674225, + "logps/chosen": -0.0005530752241611481, + "logps/rejected": -2.344395637512207, + "loss": 0.3574, + "nll_loss": 0.08929930627346039, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.530752241611481e-05, + "rewards/margins": 0.23438426852226257, + "rewards/rejected": -0.23443956673145294, + "step": 14031 + }, + { + "epoch": 9.704011065006915, + "grad_norm": 4.2601141929626465, + "learning_rate": 1.644382972183802e-06, + "log_odds_chosen": 12.247598648071289, + "log_odds_ratio": -0.00012447117478586733, + "logits/chosen": 0.069765105843544, + "logits/rejected": 0.03893115371465683, + "logps/chosen": -0.00020795888849534094, + "logps/rejected": -3.519665241241455, + "loss": 0.4654, + "nll_loss": 0.11634733527898788, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.079589103232138e-05, + "rewards/margins": 0.351945698261261, + "rewards/rejected": -0.3519665002822876, + "step": 14032 + }, + { + "epoch": 9.704702627939142, + "grad_norm": 3.4472925662994385, + "learning_rate": 1.6405409558936528e-06, + "log_odds_chosen": 10.819717407226562, + "log_odds_ratio": -5.671809412888251e-05, + "logits/chosen": -0.0314161479473114, + "logits/rejected": -0.18641072511672974, + "logps/chosen": -0.00015227627591229975, + "logps/rejected": -1.8129750490188599, + "loss": 0.4014, + "nll_loss": 0.10035184770822525, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5227628864522558e-05, + "rewards/margins": 0.18128228187561035, + "rewards/rejected": -0.18129751086235046, + "step": 14033 + }, + { + "epoch": 9.705394190871369, + "grad_norm": 2.233940839767456, + "learning_rate": 1.636698939603504e-06, + "log_odds_chosen": 10.519308090209961, + "log_odds_ratio": -6.494284025393426e-05, + "logits/chosen": 0.16154597699642181, + "logits/rejected": 0.16004878282546997, + "logps/chosen": -0.00028291059425100684, + "logps/rejected": -1.9881476163864136, + "loss": 0.2677, + "nll_loss": 0.06692343205213547, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.829106233548373e-05, + "rewards/margins": 0.19878648221492767, + "rewards/rejected": -0.1988147497177124, + "step": 14034 + }, + { + "epoch": 9.706085753803595, + "grad_norm": 3.530731439590454, + "learning_rate": 1.632856923313355e-06, + "log_odds_chosen": 10.872820854187012, + "log_odds_ratio": -0.0001081073860405013, + "logits/chosen": -0.13833673298358917, + "logits/rejected": -0.13667529821395874, + "logps/chosen": -0.00039223130443133414, + "logps/rejected": -2.3692827224731445, + "loss": 0.3193, + "nll_loss": 0.07980798184871674, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.922313408111222e-05, + "rewards/margins": 0.23688904941082, + "rewards/rejected": -0.2369282841682434, + "step": 14035 + }, + { + "epoch": 9.706777316735822, + "grad_norm": 3.414837598800659, + "learning_rate": 1.6290149070232057e-06, + "log_odds_chosen": 10.04426383972168, + "log_odds_ratio": -0.00011767448449973017, + "logits/chosen": -0.22231408953666687, + "logits/rejected": -0.1594626009464264, + "logps/chosen": -0.0003671125741675496, + "logps/rejected": -1.8401079177856445, + "loss": 0.4563, + "nll_loss": 0.11405658721923828, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.671126250992529e-05, + "rewards/margins": 0.18397407233715057, + "rewards/rejected": -0.1840108036994934, + "step": 14036 + }, + { + "epoch": 9.707468879668049, + "grad_norm": 3.143347978591919, + "learning_rate": 1.6251728907330569e-06, + "log_odds_chosen": 10.427767753601074, + "log_odds_ratio": -0.00018148086383007467, + "logits/chosen": -0.10258910059928894, + "logits/rejected": -0.12172472476959229, + "logps/chosen": -0.0007102714153006673, + "logps/rejected": -2.601733684539795, + "loss": 0.3016, + "nll_loss": 0.0753839984536171, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.102714153006673e-05, + "rewards/margins": 0.26010236144065857, + "rewards/rejected": -0.26017338037490845, + "step": 14037 + }, + { + "epoch": 9.708160442600276, + "grad_norm": 2.39312744140625, + "learning_rate": 1.6213308744429078e-06, + "log_odds_chosen": 11.521331787109375, + "log_odds_ratio": -6.457349081756547e-05, + "logits/chosen": -0.6590924859046936, + "logits/rejected": -0.7450777888298035, + "logps/chosen": -0.000132291839690879, + "logps/rejected": -2.1752631664276123, + "loss": 0.2123, + "nll_loss": 0.05306620895862579, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.322918433288578e-05, + "rewards/margins": 0.2175130695104599, + "rewards/rejected": -0.21752631664276123, + "step": 14038 + }, + { + "epoch": 9.708852005532503, + "grad_norm": 3.785318613052368, + "learning_rate": 1.6174888581527586e-06, + "log_odds_chosen": 11.339120864868164, + "log_odds_ratio": -0.0005568203050643206, + "logits/chosen": -0.38010960817337036, + "logits/rejected": -0.3457328975200653, + "logps/chosen": -0.0005193545948714018, + "logps/rejected": -3.0819265842437744, + "loss": 0.4132, + "nll_loss": 0.10325435549020767, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1935458031948656e-05, + "rewards/margins": 0.30814072489738464, + "rewards/rejected": -0.3081927001476288, + "step": 14039 + }, + { + "epoch": 9.70954356846473, + "grad_norm": 2.951171636581421, + "learning_rate": 1.6136468418626098e-06, + "log_odds_chosen": 12.310433387756348, + "log_odds_ratio": -1.6809477529022843e-05, + "logits/chosen": -0.5569875836372375, + "logits/rejected": -0.508375346660614, + "logps/chosen": -0.00024335035413969308, + "logps/rejected": -3.2429840564727783, + "loss": 0.3514, + "nll_loss": 0.08783870935440063, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.433503686916083e-05, + "rewards/margins": 0.32427406311035156, + "rewards/rejected": -0.3242984116077423, + "step": 14040 + }, + { + "epoch": 9.710235131396956, + "grad_norm": 3.1393754482269287, + "learning_rate": 1.6098048255724605e-06, + "log_odds_chosen": 10.602744102478027, + "log_odds_ratio": -6.755004142178223e-05, + "logits/chosen": -0.24596892297267914, + "logits/rejected": -0.28606274724006653, + "logps/chosen": -0.00040816832915879786, + "logps/rejected": -2.2621121406555176, + "loss": 0.3513, + "nll_loss": 0.08782707154750824, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.081683437107131e-05, + "rewards/margins": 0.2261703908443451, + "rewards/rejected": -0.22621119022369385, + "step": 14041 + }, + { + "epoch": 9.710926694329183, + "grad_norm": 2.1659014225006104, + "learning_rate": 1.6059628092823115e-06, + "log_odds_chosen": 11.307205200195312, + "log_odds_ratio": -0.0003245777916163206, + "logits/chosen": -0.3967916965484619, + "logits/rejected": -0.4056175947189331, + "logps/chosen": -0.0033080969005823135, + "logps/rejected": -2.641173839569092, + "loss": 0.2131, + "nll_loss": 0.05323443189263344, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003308096493128687, + "rewards/margins": 0.26378658413887024, + "rewards/rejected": -0.26411738991737366, + "step": 14042 + }, + { + "epoch": 9.71161825726141, + "grad_norm": 2.597486734390259, + "learning_rate": 1.6021207929921622e-06, + "log_odds_chosen": 10.689266204833984, + "log_odds_ratio": -0.0007740746950730681, + "logits/chosen": 0.0743560940027237, + "logits/rejected": 0.11470898985862732, + "logps/chosen": -0.004252062179148197, + "logps/rejected": -3.105828046798706, + "loss": 0.2565, + "nll_loss": 0.06405926495790482, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00042520626448094845, + "rewards/margins": 0.3101575970649719, + "rewards/rejected": -0.31058281660079956, + "step": 14043 + }, + { + "epoch": 9.712309820193637, + "grad_norm": 2.9477479457855225, + "learning_rate": 1.5982787767020134e-06, + "log_odds_chosen": 10.683327674865723, + "log_odds_ratio": -0.0007875992450863123, + "logits/chosen": 0.42970871925354004, + "logits/rejected": 0.38266023993492126, + "logps/chosen": -0.000497455068398267, + "logps/rejected": -2.429025411605835, + "loss": 0.2053, + "nll_loss": 0.05125453323125839, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9745507567422464e-05, + "rewards/margins": 0.24285279214382172, + "rewards/rejected": -0.24290254712104797, + "step": 14044 + }, + { + "epoch": 9.713001383125864, + "grad_norm": 3.5443201065063477, + "learning_rate": 1.5944367604118641e-06, + "log_odds_chosen": 11.064702987670898, + "log_odds_ratio": -0.0003790265473071486, + "logits/chosen": -0.3422020673751831, + "logits/rejected": -0.36000049114227295, + "logps/chosen": -0.0004840421606786549, + "logps/rejected": -2.4914891719818115, + "loss": 0.4072, + "nll_loss": 0.10174968838691711, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.840421388507821e-05, + "rewards/margins": 0.24910053610801697, + "rewards/rejected": -0.24914893507957458, + "step": 14045 + }, + { + "epoch": 9.71369294605809, + "grad_norm": 2.883375883102417, + "learning_rate": 1.590594744121715e-06, + "log_odds_chosen": 9.93283748626709, + "log_odds_ratio": -0.00039232539711520076, + "logits/chosen": -0.042561545968055725, + "logits/rejected": -0.27848780155181885, + "logps/chosen": -0.0007758078863844275, + "logps/rejected": -1.8716483116149902, + "loss": 0.2954, + "nll_loss": 0.0738009661436081, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.758078572805971e-05, + "rewards/margins": 0.1870872676372528, + "rewards/rejected": -0.18716485798358917, + "step": 14046 + }, + { + "epoch": 9.714384508990317, + "grad_norm": 2.6166839599609375, + "learning_rate": 1.5867527278315663e-06, + "log_odds_chosen": 11.069677352905273, + "log_odds_ratio": -2.7290288926451467e-05, + "logits/chosen": -0.5398693680763245, + "logits/rejected": -0.5795652270317078, + "logps/chosen": -0.00012801631237380207, + "logps/rejected": -2.0744822025299072, + "loss": 0.2607, + "nll_loss": 0.06516466289758682, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.280163269257173e-05, + "rewards/margins": 0.20743544399738312, + "rewards/rejected": -0.20744824409484863, + "step": 14047 + }, + { + "epoch": 9.715076071922544, + "grad_norm": 3.791430711746216, + "learning_rate": 1.582910711541417e-06, + "log_odds_chosen": 10.99785041809082, + "log_odds_ratio": -4.2141480662394315e-05, + "logits/chosen": 0.2726088762283325, + "logits/rejected": 0.2046135663986206, + "logps/chosen": -0.0003061083552893251, + "logps/rejected": -2.4845235347747803, + "loss": 0.3876, + "nll_loss": 0.09688502550125122, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.061083407374099e-05, + "rewards/margins": 0.24842172861099243, + "rewards/rejected": -0.2484523504972458, + "step": 14048 + }, + { + "epoch": 9.715767634854771, + "grad_norm": 4.07421350479126, + "learning_rate": 1.579068695251268e-06, + "log_odds_chosen": 10.782313346862793, + "log_odds_ratio": -3.886540434905328e-05, + "logits/chosen": 0.03747441619634628, + "logits/rejected": -0.0050468891859054565, + "logps/chosen": -0.00014611854567192495, + "logps/rejected": -1.8385167121887207, + "loss": 0.3533, + "nll_loss": 0.08832766115665436, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4611853657697793e-05, + "rewards/margins": 0.18383705615997314, + "rewards/rejected": -0.18385165929794312, + "step": 14049 + }, + { + "epoch": 9.716459197786998, + "grad_norm": 3.4308087825775146, + "learning_rate": 1.5752266789611187e-06, + "log_odds_chosen": 12.339305877685547, + "log_odds_ratio": -7.251634542626562e-06, + "logits/chosen": -0.43659594655036926, + "logits/rejected": -0.44429975748062134, + "logps/chosen": -6.530222890432924e-05, + "logps/rejected": -2.59944486618042, + "loss": 0.3676, + "nll_loss": 0.09190993756055832, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.530222890432924e-06, + "rewards/margins": 0.25993794202804565, + "rewards/rejected": -0.25994449853897095, + "step": 14050 + }, + { + "epoch": 9.717150760719225, + "grad_norm": 2.8448033332824707, + "learning_rate": 1.57138466267097e-06, + "log_odds_chosen": 8.927064895629883, + "log_odds_ratio": -0.0007420360925607383, + "logits/chosen": -0.3754725754261017, + "logits/rejected": -0.4090180993080139, + "logps/chosen": -0.0015167912933975458, + "logps/rejected": -1.7306694984436035, + "loss": 0.3376, + "nll_loss": 0.08433172106742859, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015167915262281895, + "rewards/margins": 0.1729152649641037, + "rewards/rejected": -0.17306694388389587, + "step": 14051 + }, + { + "epoch": 9.717842323651452, + "grad_norm": 4.093862533569336, + "learning_rate": 1.5675426463808206e-06, + "log_odds_chosen": 12.276299476623535, + "log_odds_ratio": -9.109542588703334e-06, + "logits/chosen": -0.23019427061080933, + "logits/rejected": -0.30176666378974915, + "logps/chosen": -9.692131425254047e-05, + "logps/rejected": -2.8041582107543945, + "loss": 0.5366, + "nll_loss": 0.1341451108455658, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.692131243355107e-06, + "rewards/margins": 0.28040611743927, + "rewards/rejected": -0.280415803194046, + "step": 14052 + }, + { + "epoch": 9.718533886583678, + "grad_norm": 3.794600486755371, + "learning_rate": 1.5637006300906716e-06, + "log_odds_chosen": 10.613341331481934, + "log_odds_ratio": -0.00021652613941114396, + "logits/chosen": 0.2754127085208893, + "logits/rejected": 0.1355481892824173, + "logps/chosen": -0.0013486103853210807, + "logps/rejected": -2.005495548248291, + "loss": 0.4825, + "nll_loss": 0.12059873342514038, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001348610530840233, + "rewards/margins": 0.2004147171974182, + "rewards/rejected": -0.20054957270622253, + "step": 14053 + }, + { + "epoch": 9.719225449515905, + "grad_norm": 2.87605619430542, + "learning_rate": 1.5598586138005226e-06, + "log_odds_chosen": 11.48611831665039, + "log_odds_ratio": -0.0001008669423754327, + "logits/chosen": -0.7142854332923889, + "logits/rejected": -0.7215863466262817, + "logps/chosen": -0.0003769434697460383, + "logps/rejected": -2.316336154937744, + "loss": 0.3937, + "nll_loss": 0.09841687232255936, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.769434624700807e-05, + "rewards/margins": 0.23159591853618622, + "rewards/rejected": -0.23163361847400665, + "step": 14054 + }, + { + "epoch": 9.719917012448132, + "grad_norm": 4.028690338134766, + "learning_rate": 1.5560165975103735e-06, + "log_odds_chosen": 11.993674278259277, + "log_odds_ratio": -1.3392660548561253e-05, + "logits/chosen": 0.05926985293626785, + "logits/rejected": -0.0457332506775856, + "logps/chosen": -0.0001267297484446317, + "logps/rejected": -2.551068067550659, + "loss": 0.3361, + "nll_loss": 0.08402892202138901, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.267297466256423e-05, + "rewards/margins": 0.25509414076805115, + "rewards/rejected": -0.2551068067550659, + "step": 14055 + }, + { + "epoch": 9.720608575380359, + "grad_norm": 4.912097930908203, + "learning_rate": 1.5521745812202243e-06, + "log_odds_chosen": 11.190658569335938, + "log_odds_ratio": -4.0823182644089684e-05, + "logits/chosen": -0.05123640596866608, + "logits/rejected": -0.08516818284988403, + "logps/chosen": -0.00025188413565047085, + "logps/rejected": -2.3816003799438477, + "loss": 0.4144, + "nll_loss": 0.10360151529312134, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5188412109855562e-05, + "rewards/margins": 0.23813486099243164, + "rewards/rejected": -0.23816005885601044, + "step": 14056 + }, + { + "epoch": 9.721300138312586, + "grad_norm": 4.297684192657471, + "learning_rate": 1.5483325649300755e-06, + "log_odds_chosen": 11.821123123168945, + "log_odds_ratio": -2.1368965462897904e-05, + "logits/chosen": -0.24659223854541779, + "logits/rejected": -0.33540570735931396, + "logps/chosen": -0.000154897992615588, + "logps/rejected": -2.5837361812591553, + "loss": 0.3602, + "nll_loss": 0.09005637466907501, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5489800716750324e-05, + "rewards/margins": 0.2583581209182739, + "rewards/rejected": -0.2583736181259155, + "step": 14057 + }, + { + "epoch": 9.721991701244812, + "grad_norm": 2.7587263584136963, + "learning_rate": 1.5444905486399264e-06, + "log_odds_chosen": 10.231828689575195, + "log_odds_ratio": -0.00010190495231654495, + "logits/chosen": -0.07780411839485168, + "logits/rejected": -0.18501782417297363, + "logps/chosen": -0.0002624362532515079, + "logps/rejected": -1.7737529277801514, + "loss": 0.2391, + "nll_loss": 0.059752993285655975, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.624362605274655e-05, + "rewards/margins": 0.1773490607738495, + "rewards/rejected": -0.17737528681755066, + "step": 14058 + }, + { + "epoch": 9.72268326417704, + "grad_norm": 3.6375765800476074, + "learning_rate": 1.5406485323497772e-06, + "log_odds_chosen": 10.606718063354492, + "log_odds_ratio": -4.8315603635273874e-05, + "logits/chosen": -0.718386173248291, + "logits/rejected": -0.7688785195350647, + "logps/chosen": -0.0005027923616580665, + "logps/rejected": -2.3363733291625977, + "loss": 0.3991, + "nll_loss": 0.0997781902551651, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0279242714168504e-05, + "rewards/margins": 0.23358707129955292, + "rewards/rejected": -0.23363733291625977, + "step": 14059 + }, + { + "epoch": 9.723374827109266, + "grad_norm": 4.166598796844482, + "learning_rate": 1.5368065160596281e-06, + "log_odds_chosen": 11.076985359191895, + "log_odds_ratio": -3.0693649023305625e-05, + "logits/chosen": -0.4552866220474243, + "logits/rejected": -0.4488312005996704, + "logps/chosen": -0.0007647222955711186, + "logps/rejected": -2.5392777919769287, + "loss": 0.5003, + "nll_loss": 0.1250789910554886, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.647223537787795e-05, + "rewards/margins": 0.2538512945175171, + "rewards/rejected": -0.2539277672767639, + "step": 14060 + }, + { + "epoch": 9.724066390041493, + "grad_norm": 3.4372000694274902, + "learning_rate": 1.532964499769479e-06, + "log_odds_chosen": 10.523962020874023, + "log_odds_ratio": -0.0003658041823655367, + "logits/chosen": -0.061703141778707504, + "logits/rejected": -0.15716056525707245, + "logps/chosen": -0.0014027312863618135, + "logps/rejected": -2.742238759994507, + "loss": 0.2906, + "nll_loss": 0.07262028753757477, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00014027312863618135, + "rewards/margins": 0.27408361434936523, + "rewards/rejected": -0.27422386407852173, + "step": 14061 + }, + { + "epoch": 9.72475795297372, + "grad_norm": 3.768315553665161, + "learning_rate": 1.52912248347933e-06, + "log_odds_chosen": 10.620694160461426, + "log_odds_ratio": -8.276679000118747e-05, + "logits/chosen": 0.28818756341934204, + "logits/rejected": 0.23599982261657715, + "logps/chosen": -0.00031609414145350456, + "logps/rejected": -1.734850525856018, + "loss": 0.4504, + "nll_loss": 0.11258061230182648, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.160941560054198e-05, + "rewards/margins": 0.1734534502029419, + "rewards/rejected": -0.17348507046699524, + "step": 14062 + }, + { + "epoch": 9.725449515905947, + "grad_norm": 3.50580096244812, + "learning_rate": 1.525280467189181e-06, + "log_odds_chosen": 10.935100555419922, + "log_odds_ratio": -5.6851513363653794e-05, + "logits/chosen": 0.04261190444231033, + "logits/rejected": 0.08111607283353806, + "logps/chosen": -0.0002661603211890906, + "logps/rejected": -2.5173263549804688, + "loss": 0.3063, + "nll_loss": 0.07656723260879517, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6616033210302703e-05, + "rewards/margins": 0.25170600414276123, + "rewards/rejected": -0.25173258781433105, + "step": 14063 + }, + { + "epoch": 9.726141078838173, + "grad_norm": 3.284005880355835, + "learning_rate": 1.5214384508990318e-06, + "log_odds_chosen": 11.105777740478516, + "log_odds_ratio": -2.1458101400639862e-05, + "logits/chosen": -0.54180508852005, + "logits/rejected": -0.5738696455955505, + "logps/chosen": -0.0002820981899276376, + "logps/rejected": -2.3394103050231934, + "loss": 0.3549, + "nll_loss": 0.08873511850833893, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8209818992763758e-05, + "rewards/margins": 0.23391284048557281, + "rewards/rejected": -0.23394104838371277, + "step": 14064 + }, + { + "epoch": 9.7268326417704, + "grad_norm": 2.8504526615142822, + "learning_rate": 1.517596434608883e-06, + "log_odds_chosen": 11.10925579071045, + "log_odds_ratio": -8.551261998945847e-05, + "logits/chosen": 0.4728356897830963, + "logits/rejected": 0.6037408113479614, + "logps/chosen": -0.00030483852606266737, + "logps/rejected": -2.6879568099975586, + "loss": 0.2293, + "nll_loss": 0.05732450261712074, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0483850423479453e-05, + "rewards/margins": 0.2687651813030243, + "rewards/rejected": -0.2687956690788269, + "step": 14065 + }, + { + "epoch": 9.727524204702627, + "grad_norm": 3.3343968391418457, + "learning_rate": 1.5137544183187337e-06, + "log_odds_chosen": 10.605195999145508, + "log_odds_ratio": -0.00013343783211894333, + "logits/chosen": -0.3856803774833679, + "logits/rejected": -0.45289888978004456, + "logps/chosen": -0.00033911195350810885, + "logps/rejected": -1.8947904109954834, + "loss": 0.3649, + "nll_loss": 0.09120230376720428, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.391119389561936e-05, + "rewards/margins": 0.1894451379776001, + "rewards/rejected": -0.1894790530204773, + "step": 14066 + }, + { + "epoch": 9.728215767634854, + "grad_norm": 3.192932367324829, + "learning_rate": 1.5099124020285846e-06, + "log_odds_chosen": 11.377513885498047, + "log_odds_ratio": -3.7108355172676966e-05, + "logits/chosen": -0.3142525851726532, + "logits/rejected": -0.3797409236431122, + "logps/chosen": -9.22659964999184e-05, + "logps/rejected": -2.035445213317871, + "loss": 0.3276, + "nll_loss": 0.08189854025840759, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.22660001378972e-06, + "rewards/margins": 0.2035352885723114, + "rewards/rejected": -0.2035445272922516, + "step": 14067 + }, + { + "epoch": 9.72890733056708, + "grad_norm": 2.9361958503723145, + "learning_rate": 1.5060703857384356e-06, + "log_odds_chosen": 11.60842514038086, + "log_odds_ratio": -2.894986027968116e-05, + "logits/chosen": -0.3391823470592499, + "logits/rejected": -0.36785149574279785, + "logps/chosen": -0.00030659729964099824, + "logps/rejected": -3.0317115783691406, + "loss": 0.3211, + "nll_loss": 0.08026459068059921, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.06597285089083e-05, + "rewards/margins": 0.3031404912471771, + "rewards/rejected": -0.30317115783691406, + "step": 14068 + }, + { + "epoch": 9.729598893499308, + "grad_norm": 2.9858357906341553, + "learning_rate": 1.5022283694482866e-06, + "log_odds_chosen": 12.086821556091309, + "log_odds_ratio": -3.983519127359614e-05, + "logits/chosen": -0.3994476795196533, + "logits/rejected": -0.456809937953949, + "logps/chosen": -0.00023153756046667695, + "logps/rejected": -3.2445569038391113, + "loss": 0.3513, + "nll_loss": 0.08781701326370239, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3153756046667695e-05, + "rewards/margins": 0.32443252205848694, + "rewards/rejected": -0.3244556784629822, + "step": 14069 + }, + { + "epoch": 9.730290456431534, + "grad_norm": 3.037654161453247, + "learning_rate": 1.4983863531581375e-06, + "log_odds_chosen": 11.219244003295898, + "log_odds_ratio": -1.625965887797065e-05, + "logits/chosen": -0.147754967212677, + "logits/rejected": -0.2532964050769806, + "logps/chosen": -0.00011137408000649884, + "logps/rejected": -2.091094493865967, + "loss": 0.3681, + "nll_loss": 0.0920318067073822, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1137407454953063e-05, + "rewards/margins": 0.20909832417964935, + "rewards/rejected": -0.20910947024822235, + "step": 14070 + }, + { + "epoch": 9.730982019363761, + "grad_norm": 4.7024455070495605, + "learning_rate": 1.4945443368679883e-06, + "log_odds_chosen": 10.624773025512695, + "log_odds_ratio": -7.10480599082075e-05, + "logits/chosen": -0.6009021997451782, + "logits/rejected": -0.5314351320266724, + "logps/chosen": -0.0009751567849889398, + "logps/rejected": -2.0029373168945312, + "loss": 0.3716, + "nll_loss": 0.09289713203907013, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.75156799540855e-05, + "rewards/margins": 0.20019623637199402, + "rewards/rejected": -0.20029374957084656, + "step": 14071 + }, + { + "epoch": 9.731673582295988, + "grad_norm": 3.3876678943634033, + "learning_rate": 1.4907023205778394e-06, + "log_odds_chosen": 11.144800186157227, + "log_odds_ratio": -0.00040390901267528534, + "logits/chosen": -0.40883949398994446, + "logits/rejected": -0.3468576669692993, + "logps/chosen": -0.0008895749342627823, + "logps/rejected": -2.9034881591796875, + "loss": 0.2573, + "nll_loss": 0.06428197026252747, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.895749488146976e-05, + "rewards/margins": 0.2902598977088928, + "rewards/rejected": -0.2903488278388977, + "step": 14072 + }, + { + "epoch": 9.732365145228215, + "grad_norm": 3.847625732421875, + "learning_rate": 1.4868603042876902e-06, + "log_odds_chosen": 11.44306468963623, + "log_odds_ratio": -7.029860717011616e-05, + "logits/chosen": -0.3494161069393158, + "logits/rejected": -0.381717711687088, + "logps/chosen": -0.00027682489599101245, + "logps/rejected": -2.269761085510254, + "loss": 0.3172, + "nll_loss": 0.07928379625082016, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7682490326697007e-05, + "rewards/margins": 0.22694844007492065, + "rewards/rejected": -0.22697609663009644, + "step": 14073 + }, + { + "epoch": 9.733056708160442, + "grad_norm": 3.9256770610809326, + "learning_rate": 1.4830182879975412e-06, + "log_odds_chosen": 11.631043434143066, + "log_odds_ratio": -9.395475353812799e-05, + "logits/chosen": -0.08485838770866394, + "logits/rejected": -0.20589959621429443, + "logps/chosen": -0.00025275791995227337, + "logps/rejected": -2.6631438732147217, + "loss": 0.3937, + "nll_loss": 0.09841006249189377, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5275794541812502e-05, + "rewards/margins": 0.2662891149520874, + "rewards/rejected": -0.26631438732147217, + "step": 14074 + }, + { + "epoch": 9.733748271092669, + "grad_norm": 3.187096118927002, + "learning_rate": 1.4791762717073921e-06, + "log_odds_chosen": 10.009942054748535, + "log_odds_ratio": -0.0011052628979086876, + "logits/chosen": -0.5161486268043518, + "logits/rejected": -0.5463173389434814, + "logps/chosen": -0.0007038781768642366, + "logps/rejected": -1.7462892532348633, + "loss": 0.695, + "nll_loss": 0.17363958060741425, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.038781768642366e-05, + "rewards/margins": 0.17455855011940002, + "rewards/rejected": -0.17462894320487976, + "step": 14075 + }, + { + "epoch": 9.734439834024897, + "grad_norm": 3.8338570594787598, + "learning_rate": 1.475334255417243e-06, + "log_odds_chosen": 10.597758293151855, + "log_odds_ratio": -7.467473187716678e-05, + "logits/chosen": -0.2802301347255707, + "logits/rejected": -0.3332068622112274, + "logps/chosen": -0.00017298806051257998, + "logps/rejected": -1.433288335800171, + "loss": 0.3614, + "nll_loss": 0.09034281224012375, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7298805687460117e-05, + "rewards/margins": 0.1433115303516388, + "rewards/rejected": -0.14332884550094604, + "step": 14076 + }, + { + "epoch": 9.735131396957122, + "grad_norm": 5.281632423400879, + "learning_rate": 1.471492239127094e-06, + "log_odds_chosen": 10.167672157287598, + "log_odds_ratio": -0.0002352109004277736, + "logits/chosen": 0.035262420773506165, + "logits/rejected": 0.0025625228881835938, + "logps/chosen": -0.0005891511682420969, + "logps/rejected": -1.637753963470459, + "loss": 0.7929, + "nll_loss": 0.1982031613588333, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.891511682420969e-05, + "rewards/margins": 0.16371646523475647, + "rewards/rejected": -0.16377539932727814, + "step": 14077 + }, + { + "epoch": 9.73582295988935, + "grad_norm": 4.715962886810303, + "learning_rate": 1.4676502228369448e-06, + "log_odds_chosen": 10.590758323669434, + "log_odds_ratio": -9.746826253831387e-05, + "logits/chosen": -0.19473972916603088, + "logits/rejected": -0.21724197268486023, + "logps/chosen": -0.00022266368614509702, + "logps/rejected": -1.9685781002044678, + "loss": 0.425, + "nll_loss": 0.1062331572175026, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2266369342105463e-05, + "rewards/margins": 0.19683553278446198, + "rewards/rejected": -0.19685781002044678, + "step": 14078 + }, + { + "epoch": 9.736514522821576, + "grad_norm": 3.3531792163848877, + "learning_rate": 1.4638082065467957e-06, + "log_odds_chosen": 9.810626029968262, + "log_odds_ratio": -0.0006292449543252587, + "logits/chosen": -0.11490876972675323, + "logits/rejected": -0.23583844304084778, + "logps/chosen": -0.0012257093330845237, + "logps/rejected": -1.5976797342300415, + "loss": 0.5769, + "nll_loss": 0.1441582441329956, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012257092748768628, + "rewards/margins": 0.15964540839195251, + "rewards/rejected": -0.1597679853439331, + "step": 14079 + }, + { + "epoch": 9.737206085753805, + "grad_norm": 3.5185587406158447, + "learning_rate": 1.459966190256647e-06, + "log_odds_chosen": 10.632923126220703, + "log_odds_ratio": -0.00013407410006038845, + "logits/chosen": -0.056553907692432404, + "logits/rejected": -0.1727689951658249, + "logps/chosen": -0.0005626532947644591, + "logps/rejected": -1.8834426403045654, + "loss": 0.2605, + "nll_loss": 0.06510130316019058, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6265325838467106e-05, + "rewards/margins": 0.18828797340393066, + "rewards/rejected": -0.18834425508975983, + "step": 14080 + }, + { + "epoch": 9.73789764868603, + "grad_norm": 3.742751359939575, + "learning_rate": 1.4561241739664977e-06, + "log_odds_chosen": 10.546586990356445, + "log_odds_ratio": -0.0005286220693960786, + "logits/chosen": -0.020138412714004517, + "logits/rejected": -0.12044990062713623, + "logps/chosen": -0.0017503878334537148, + "logps/rejected": -2.42435359954834, + "loss": 0.4216, + "nll_loss": 0.10534273833036423, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00017503878916613758, + "rewards/margins": 0.24226033687591553, + "rewards/rejected": -0.24243536591529846, + "step": 14081 + }, + { + "epoch": 9.738589211618258, + "grad_norm": 3.0572142601013184, + "learning_rate": 1.4522821576763486e-06, + "log_odds_chosen": 8.781017303466797, + "log_odds_ratio": -0.0013535015750676394, + "logits/chosen": 0.25028395652770996, + "logits/rejected": 0.33531999588012695, + "logps/chosen": -0.001614319160580635, + "logps/rejected": -1.6745295524597168, + "loss": 0.4157, + "nll_loss": 0.10378843545913696, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00016143193352036178, + "rewards/margins": 0.167291522026062, + "rewards/rejected": -0.16745296120643616, + "step": 14082 + }, + { + "epoch": 9.739280774550483, + "grad_norm": 3.526397943496704, + "learning_rate": 1.4484401413861996e-06, + "log_odds_chosen": 11.265777587890625, + "log_odds_ratio": -5.912484266445972e-05, + "logits/chosen": -0.6020369529724121, + "logits/rejected": -0.6356661915779114, + "logps/chosen": -0.00014268027734942734, + "logps/rejected": -1.9638330936431885, + "loss": 0.3327, + "nll_loss": 0.08316794037818909, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4268028280639555e-05, + "rewards/margins": 0.19636905193328857, + "rewards/rejected": -0.19638332724571228, + "step": 14083 + }, + { + "epoch": 9.739972337482712, + "grad_norm": 3.143381357192993, + "learning_rate": 1.4445981250960506e-06, + "log_odds_chosen": 12.47369384765625, + "log_odds_ratio": -2.576132283138577e-05, + "logits/chosen": -0.23411519825458527, + "logits/rejected": -0.39735114574432373, + "logps/chosen": -0.0001566018327139318, + "logps/rejected": -3.5539331436157227, + "loss": 0.3253, + "nll_loss": 0.0813322439789772, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.566018363519106e-05, + "rewards/margins": 0.3553776741027832, + "rewards/rejected": -0.35539335012435913, + "step": 14084 + }, + { + "epoch": 9.740663900414937, + "grad_norm": 3.986825942993164, + "learning_rate": 1.4407561088059015e-06, + "log_odds_chosen": 10.651996612548828, + "log_odds_ratio": -6.728620792273432e-05, + "logits/chosen": 0.058619819581508636, + "logits/rejected": 0.007906101644039154, + "logps/chosen": -0.0003832450311165303, + "logps/rejected": -2.0348761081695557, + "loss": 0.5029, + "nll_loss": 0.12572330236434937, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8324506022036076e-05, + "rewards/margins": 0.2034492790699005, + "rewards/rejected": -0.2034876048564911, + "step": 14085 + }, + { + "epoch": 9.741355463347166, + "grad_norm": 3.143564224243164, + "learning_rate": 1.4369140925157523e-06, + "log_odds_chosen": 10.742366790771484, + "log_odds_ratio": -0.00018134075799025595, + "logits/chosen": -0.2008328139781952, + "logits/rejected": -0.19422832131385803, + "logps/chosen": -0.00031903735361993313, + "logps/rejected": -2.401066780090332, + "loss": 0.6903, + "nll_loss": 0.17255018651485443, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.190373536199331e-05, + "rewards/margins": 0.2400747835636139, + "rewards/rejected": -0.2401067018508911, + "step": 14086 + }, + { + "epoch": 9.74204702627939, + "grad_norm": 3.724709987640381, + "learning_rate": 1.4330720762256032e-06, + "log_odds_chosen": 11.134361267089844, + "log_odds_ratio": -0.00011482177069410682, + "logits/chosen": -0.2082558125257492, + "logits/rejected": -0.3872357904911041, + "logps/chosen": -0.00015495551633648574, + "logps/rejected": -1.985574722290039, + "loss": 0.4225, + "nll_loss": 0.10561499744653702, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5495550542254932e-05, + "rewards/margins": 0.19854196906089783, + "rewards/rejected": -0.19855748116970062, + "step": 14087 + }, + { + "epoch": 9.74273858921162, + "grad_norm": 3.1600730419158936, + "learning_rate": 1.4292300599354542e-06, + "log_odds_chosen": 11.408288955688477, + "log_odds_ratio": -1.736992817313876e-05, + "logits/chosen": -0.055584512650966644, + "logits/rejected": -0.17049917578697205, + "logps/chosen": -0.000266480928985402, + "logps/rejected": -2.9400196075439453, + "loss": 0.3232, + "nll_loss": 0.08080445230007172, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6648091079550795e-05, + "rewards/margins": 0.293975293636322, + "rewards/rejected": -0.2940019369125366, + "step": 14088 + }, + { + "epoch": 9.743430152143844, + "grad_norm": 2.75327467918396, + "learning_rate": 1.4253880436453051e-06, + "log_odds_chosen": 9.775684356689453, + "log_odds_ratio": -0.00013458194734994322, + "logits/chosen": -0.45873522758483887, + "logits/rejected": -0.43231844902038574, + "logps/chosen": -0.0002898464153986424, + "logps/rejected": -1.6174246072769165, + "loss": 0.3435, + "nll_loss": 0.08585477620363235, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.898464117606636e-05, + "rewards/margins": 0.16171349585056305, + "rewards/rejected": -0.16174247860908508, + "step": 14089 + }, + { + "epoch": 9.744121715076073, + "grad_norm": 4.789213180541992, + "learning_rate": 1.4215460273551559e-06, + "log_odds_chosen": 11.066625595092773, + "log_odds_ratio": -0.00024053329252637923, + "logits/chosen": 0.1454014629125595, + "logits/rejected": 0.29094868898391724, + "logps/chosen": -0.00014530331827700138, + "logps/rejected": -2.399749279022217, + "loss": 0.7863, + "nll_loss": 0.1965598315000534, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.453033291909378e-05, + "rewards/margins": 0.23996040225028992, + "rewards/rejected": -0.23997493088245392, + "step": 14090 + }, + { + "epoch": 9.744813278008298, + "grad_norm": 3.5538330078125, + "learning_rate": 1.417704011065007e-06, + "log_odds_chosen": 10.93421745300293, + "log_odds_ratio": -0.0001779910089680925, + "logits/chosen": -0.421045184135437, + "logits/rejected": -0.3911956548690796, + "logps/chosen": -0.00013790714729111642, + "logps/rejected": -2.344021797180176, + "loss": 0.4124, + "nll_loss": 0.10307969152927399, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3790715456707403e-05, + "rewards/margins": 0.23438839614391327, + "rewards/rejected": -0.23440217971801758, + "step": 14091 + }, + { + "epoch": 9.745504840940526, + "grad_norm": 3.080880641937256, + "learning_rate": 1.413861994774858e-06, + "log_odds_chosen": 13.354349136352539, + "log_odds_ratio": -5.88564853387652e-06, + "logits/chosen": -0.5883753895759583, + "logits/rejected": -0.5448213219642639, + "logps/chosen": -7.660967821720988e-05, + "logps/rejected": -3.779228687286377, + "loss": 0.2712, + "nll_loss": 0.06779833137989044, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.660968549316749e-06, + "rewards/margins": 0.37791526317596436, + "rewards/rejected": -0.3779228925704956, + "step": 14092 + }, + { + "epoch": 9.746196403872752, + "grad_norm": 4.574988842010498, + "learning_rate": 1.4100199784847088e-06, + "log_odds_chosen": 11.685251235961914, + "log_odds_ratio": -0.0002756851026788354, + "logits/chosen": 0.12553559243679047, + "logits/rejected": 0.0865488052368164, + "logps/chosen": -0.0003497231809888035, + "logps/rejected": -2.762530565261841, + "loss": 0.4701, + "nll_loss": 0.11749804019927979, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4972315916093066e-05, + "rewards/margins": 0.27621808648109436, + "rewards/rejected": -0.2762530446052551, + "step": 14093 + }, + { + "epoch": 9.74688796680498, + "grad_norm": 2.5818259716033936, + "learning_rate": 1.4061779621945597e-06, + "log_odds_chosen": 12.163631439208984, + "log_odds_ratio": -4.104728577658534e-05, + "logits/chosen": -0.267192542552948, + "logits/rejected": -0.366630882024765, + "logps/chosen": -0.00015620008343830705, + "logps/rejected": -2.9856929779052734, + "loss": 0.2523, + "nll_loss": 0.06308251619338989, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5620007616234943e-05, + "rewards/margins": 0.2985536754131317, + "rewards/rejected": -0.29856929183006287, + "step": 14094 + }, + { + "epoch": 9.747579529737205, + "grad_norm": 2.803849697113037, + "learning_rate": 1.4023359459044107e-06, + "log_odds_chosen": 10.526567459106445, + "log_odds_ratio": -5.086886085337028e-05, + "logits/chosen": -0.4517253637313843, + "logits/rejected": -0.5013437271118164, + "logps/chosen": -0.00023212407540995628, + "logps/rejected": -1.738023042678833, + "loss": 0.2217, + "nll_loss": 0.05542437359690666, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3212407540995628e-05, + "rewards/margins": 0.17377911508083344, + "rewards/rejected": -0.17380231618881226, + "step": 14095 + }, + { + "epoch": 9.748271092669434, + "grad_norm": 3.0401809215545654, + "learning_rate": 1.3984939296142617e-06, + "log_odds_chosen": 10.050030708312988, + "log_odds_ratio": -0.0003560371696949005, + "logits/chosen": 0.07473733276128769, + "logits/rejected": 0.1357569545507431, + "logps/chosen": -0.0013845351058989763, + "logps/rejected": -1.765272617340088, + "loss": 0.3357, + "nll_loss": 0.0838976800441742, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0001384535280521959, + "rewards/margins": 0.17638880014419556, + "rewards/rejected": -0.1765272468328476, + "step": 14096 + }, + { + "epoch": 9.748962655601659, + "grad_norm": 2.90578556060791, + "learning_rate": 1.3946519133241126e-06, + "log_odds_chosen": 11.149063110351562, + "log_odds_ratio": -3.0750688893022016e-05, + "logits/chosen": 0.1374821811914444, + "logits/rejected": 0.033144012093544006, + "logps/chosen": -0.00010943791130557656, + "logps/rejected": -2.0966885089874268, + "loss": 0.3125, + "nll_loss": 0.07813267409801483, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0943791494355537e-05, + "rewards/margins": 0.20965790748596191, + "rewards/rejected": -0.2096688449382782, + "step": 14097 + }, + { + "epoch": 9.749654218533887, + "grad_norm": 2.961550712585449, + "learning_rate": 1.3908098970339634e-06, + "log_odds_chosen": 10.718459129333496, + "log_odds_ratio": -0.00010540042421780527, + "logits/chosen": -0.46274954080581665, + "logits/rejected": -0.42374998331069946, + "logps/chosen": -0.0003471905365586281, + "logps/rejected": -2.034414291381836, + "loss": 0.3213, + "nll_loss": 0.08032146096229553, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.471905438345857e-05, + "rewards/margins": 0.2034067064523697, + "rewards/rejected": -0.20344141125679016, + "step": 14098 + }, + { + "epoch": 9.750345781466113, + "grad_norm": 3.322176218032837, + "learning_rate": 1.3869678807438145e-06, + "log_odds_chosen": 11.455936431884766, + "log_odds_ratio": -4.330392403062433e-05, + "logits/chosen": -0.07885990291833878, + "logits/rejected": -0.1312030702829361, + "logps/chosen": -9.869838686427101e-05, + "logps/rejected": -2.3764195442199707, + "loss": 0.3969, + "nll_loss": 0.09921538084745407, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.86983832262922e-06, + "rewards/margins": 0.23763209581375122, + "rewards/rejected": -0.23764196038246155, + "step": 14099 + }, + { + "epoch": 9.751037344398341, + "grad_norm": 2.997025966644287, + "learning_rate": 1.3831258644536653e-06, + "log_odds_chosen": 11.968637466430664, + "log_odds_ratio": -1.6224121281993575e-05, + "logits/chosen": -0.6310082077980042, + "logits/rejected": -0.5447397828102112, + "logps/chosen": -4.5612454414367676e-05, + "logps/rejected": -1.7740814685821533, + "loss": 0.3439, + "nll_loss": 0.08597853779792786, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.561245532386238e-06, + "rewards/margins": 0.17740359902381897, + "rewards/rejected": -0.1774081587791443, + "step": 14100 + }, + { + "epoch": 9.751728907330566, + "grad_norm": 3.8272266387939453, + "learning_rate": 1.3792838481635163e-06, + "log_odds_chosen": 11.665167808532715, + "log_odds_ratio": -0.00018376082880422473, + "logits/chosen": -0.4063437581062317, + "logits/rejected": -0.3796899914741516, + "logps/chosen": -0.0002080218109767884, + "logps/rejected": -2.5441761016845703, + "loss": 0.3762, + "nll_loss": 0.09402955323457718, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.080218109767884e-05, + "rewards/margins": 0.25439679622650146, + "rewards/rejected": -0.2544175982475281, + "step": 14101 + }, + { + "epoch": 9.752420470262795, + "grad_norm": 3.6981446743011475, + "learning_rate": 1.3754418318733672e-06, + "log_odds_chosen": 10.13759994506836, + "log_odds_ratio": -0.00014721702609676868, + "logits/chosen": 0.0600414052605629, + "logits/rejected": -0.016490664333105087, + "logps/chosen": -0.0003654182655736804, + "logps/rejected": -1.8190926313400269, + "loss": 0.3529, + "nll_loss": 0.08821893483400345, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.65418272849638e-05, + "rewards/margins": 0.18187272548675537, + "rewards/rejected": -0.18190926313400269, + "step": 14102 + }, + { + "epoch": 9.75311203319502, + "grad_norm": 3.9282379150390625, + "learning_rate": 1.3715998155832182e-06, + "log_odds_chosen": 11.93588924407959, + "log_odds_ratio": -1.2728256479022093e-05, + "logits/chosen": -0.49063944816589355, + "logits/rejected": -0.5796186923980713, + "logps/chosen": -0.0003584186197258532, + "logps/rejected": -2.8449249267578125, + "loss": 0.5295, + "nll_loss": 0.1323769986629486, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.584186197258532e-05, + "rewards/margins": 0.28445667028427124, + "rewards/rejected": -0.28449252247810364, + "step": 14103 + }, + { + "epoch": 9.753803596127248, + "grad_norm": 3.6572389602661133, + "learning_rate": 1.3677577992930691e-06, + "log_odds_chosen": 11.625504493713379, + "log_odds_ratio": -2.5438281227252446e-05, + "logits/chosen": -0.12430409342050552, + "logits/rejected": -0.2552037835121155, + "logps/chosen": -9.886729822028428e-05, + "logps/rejected": -2.3856372833251953, + "loss": 0.4087, + "nll_loss": 0.10217204689979553, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.886729458230548e-06, + "rewards/margins": 0.23855382204055786, + "rewards/rejected": -0.23856371641159058, + "step": 14104 + }, + { + "epoch": 9.754495159059474, + "grad_norm": 3.4196536540985107, + "learning_rate": 1.3639157830029199e-06, + "log_odds_chosen": 11.092937469482422, + "log_odds_ratio": -0.00010019134788308293, + "logits/chosen": -0.506554901599884, + "logits/rejected": -0.5191491842269897, + "logps/chosen": -0.000254131096880883, + "logps/rejected": -2.1024882793426514, + "loss": 0.41, + "nll_loss": 0.10248668491840363, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5413108232896775e-05, + "rewards/margins": 0.21022343635559082, + "rewards/rejected": -0.21024884283542633, + "step": 14105 + }, + { + "epoch": 9.755186721991702, + "grad_norm": 3.407672166824341, + "learning_rate": 1.360073766712771e-06, + "log_odds_chosen": 10.763035774230957, + "log_odds_ratio": -6.49708672426641e-05, + "logits/chosen": -0.11657628417015076, + "logits/rejected": -0.12170778959989548, + "logps/chosen": -0.0002784933312796056, + "logps/rejected": -2.243659019470215, + "loss": 0.3516, + "nll_loss": 0.08789139986038208, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7849335310747847e-05, + "rewards/margins": 0.22433805465698242, + "rewards/rejected": -0.22436591982841492, + "step": 14106 + }, + { + "epoch": 9.755878284923927, + "grad_norm": 2.614415168762207, + "learning_rate": 1.3562317504226218e-06, + "log_odds_chosen": 10.69253158569336, + "log_odds_ratio": -3.6323992389952764e-05, + "logits/chosen": -0.21887744963169098, + "logits/rejected": -0.16397805511951447, + "logps/chosen": -8.981427527032793e-05, + "logps/rejected": -1.5139155387878418, + "loss": 0.3785, + "nll_loss": 0.09461327642202377, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.981427527032793e-06, + "rewards/margins": 0.15138258039951324, + "rewards/rejected": -0.15139156579971313, + "step": 14107 + }, + { + "epoch": 9.756569847856156, + "grad_norm": 3.89406681060791, + "learning_rate": 1.3523897341324728e-06, + "log_odds_chosen": 10.90273380279541, + "log_odds_ratio": -0.0001827479136409238, + "logits/chosen": -0.27213039994239807, + "logits/rejected": -0.33823931217193604, + "logps/chosen": -0.0006309926393441856, + "logps/rejected": -2.185246467590332, + "loss": 0.4562, + "nll_loss": 0.11403346806764603, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.309926538961008e-05, + "rewards/margins": 0.21846157312393188, + "rewards/rejected": -0.21852466464042664, + "step": 14108 + }, + { + "epoch": 9.75726141078838, + "grad_norm": 3.9044744968414307, + "learning_rate": 1.3485477178423237e-06, + "log_odds_chosen": 10.96763801574707, + "log_odds_ratio": -2.465880061208736e-05, + "logits/chosen": -0.09640151262283325, + "logits/rejected": -0.16491639614105225, + "logps/chosen": -0.0002413954061921686, + "logps/rejected": -2.499875545501709, + "loss": 0.4665, + "nll_loss": 0.11662641167640686, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.413954098301474e-05, + "rewards/margins": 0.2499634176492691, + "rewards/rejected": -0.24998754262924194, + "step": 14109 + }, + { + "epoch": 9.75795297372061, + "grad_norm": 2.787626028060913, + "learning_rate": 1.3447057015521747e-06, + "log_odds_chosen": 10.05321216583252, + "log_odds_ratio": -6.740433309460059e-05, + "logits/chosen": -0.26694992184638977, + "logits/rejected": -0.28863584995269775, + "logps/chosen": -0.00020133465295657516, + "logps/rejected": -1.5156182050704956, + "loss": 0.3212, + "nll_loss": 0.08028507977724075, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0133464204263873e-05, + "rewards/margins": 0.15154169499874115, + "rewards/rejected": -0.15156181156635284, + "step": 14110 + }, + { + "epoch": 9.758644536652834, + "grad_norm": 2.6926894187927246, + "learning_rate": 1.3408636852620256e-06, + "log_odds_chosen": 11.286189079284668, + "log_odds_ratio": -2.019215389736928e-05, + "logits/chosen": -0.7049732208251953, + "logits/rejected": -0.7106414437294006, + "logps/chosen": -8.282619091914967e-05, + "logps/rejected": -1.9253034591674805, + "loss": 0.3249, + "nll_loss": 0.08121532201766968, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.282619091914967e-06, + "rewards/margins": 0.1925220787525177, + "rewards/rejected": -0.19253036379814148, + "step": 14111 + }, + { + "epoch": 9.759336099585063, + "grad_norm": 3.2615058422088623, + "learning_rate": 1.3370216689718764e-06, + "log_odds_chosen": 11.126974105834961, + "log_odds_ratio": -2.0196584955556318e-05, + "logits/chosen": -0.35881438851356506, + "logits/rejected": -0.39509907364845276, + "logps/chosen": -0.00017172233492601663, + "logps/rejected": -2.2962188720703125, + "loss": 0.3438, + "nll_loss": 0.08595031499862671, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7172233128803782e-05, + "rewards/margins": 0.22960472106933594, + "rewards/rejected": -0.22962188720703125, + "step": 14112 + }, + { + "epoch": 9.760027662517288, + "grad_norm": 2.589125871658325, + "learning_rate": 1.3331796526817274e-06, + "log_odds_chosen": 9.937051773071289, + "log_odds_ratio": -0.0012398697435855865, + "logits/chosen": -0.1563034951686859, + "logits/rejected": -0.029356352984905243, + "logps/chosen": -0.009704858995974064, + "logps/rejected": -1.7757169008255005, + "loss": 0.261, + "nll_loss": 0.06513293832540512, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009704858530312777, + "rewards/margins": 0.17660120129585266, + "rewards/rejected": -0.17757169902324677, + "step": 14113 + }, + { + "epoch": 9.760719225449517, + "grad_norm": 3.666938066482544, + "learning_rate": 1.3293376363915785e-06, + "log_odds_chosen": 11.526802062988281, + "log_odds_ratio": -4.0625600377097726e-05, + "logits/chosen": 0.01854725182056427, + "logits/rejected": -0.040144093334674835, + "logps/chosen": -0.00013797509018331766, + "logps/rejected": -2.309964895248413, + "loss": 0.3983, + "nll_loss": 0.09956299513578415, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3797509382129647e-05, + "rewards/margins": 0.2309826910495758, + "rewards/rejected": -0.2309964895248413, + "step": 14114 + }, + { + "epoch": 9.761410788381742, + "grad_norm": 2.951451063156128, + "learning_rate": 1.3254956201014293e-06, + "log_odds_chosen": 11.107068061828613, + "log_odds_ratio": -6.79503646097146e-05, + "logits/chosen": -0.41734617948532104, + "logits/rejected": -0.5176043510437012, + "logps/chosen": -0.0003165987436659634, + "logps/rejected": -2.4256505966186523, + "loss": 0.3921, + "nll_loss": 0.0980195701122284, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.165987436659634e-05, + "rewards/margins": 0.24253341555595398, + "rewards/rejected": -0.2425650656223297, + "step": 14115 + }, + { + "epoch": 9.76210235131397, + "grad_norm": 2.88873553276062, + "learning_rate": 1.3216536038112802e-06, + "log_odds_chosen": 10.593223571777344, + "log_odds_ratio": -6.995137664489448e-05, + "logits/chosen": -0.2237984538078308, + "logits/rejected": -0.32712095975875854, + "logps/chosen": -0.002185999881476164, + "logps/rejected": -2.6842517852783203, + "loss": 0.2768, + "nll_loss": 0.06920219212770462, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00021859999105799943, + "rewards/margins": 0.26820656657218933, + "rewards/rejected": -0.2684251666069031, + "step": 14116 + }, + { + "epoch": 9.762793914246195, + "grad_norm": 4.580874919891357, + "learning_rate": 1.317811587521131e-06, + "log_odds_chosen": 11.170866012573242, + "log_odds_ratio": -2.7959044018643908e-05, + "logits/chosen": -0.21377623081207275, + "logits/rejected": -0.22888220846652985, + "logps/chosen": -0.00028878834564238787, + "logps/rejected": -2.9187369346618652, + "loss": 0.3573, + "nll_loss": 0.08931108564138412, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8878834200440906e-05, + "rewards/margins": 0.29184481501579285, + "rewards/rejected": -0.2918736934661865, + "step": 14117 + }, + { + "epoch": 9.763485477178424, + "grad_norm": 2.4250340461730957, + "learning_rate": 1.3139695712309822e-06, + "log_odds_chosen": 10.966585159301758, + "log_odds_ratio": -3.959906462114304e-05, + "logits/chosen": -0.29696375131607056, + "logits/rejected": -0.23120468854904175, + "logps/chosen": -0.0001043882584781386, + "logps/rejected": -1.4906551837921143, + "loss": 0.3578, + "nll_loss": 0.08944513648748398, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0438825484015979e-05, + "rewards/margins": 0.14905507862567902, + "rewards/rejected": -0.1490655243396759, + "step": 14118 + }, + { + "epoch": 9.76417704011065, + "grad_norm": 4.123467445373535, + "learning_rate": 1.310127554940833e-06, + "log_odds_chosen": 10.684322357177734, + "log_odds_ratio": -3.5656423278851435e-05, + "logits/chosen": -0.05151619762182236, + "logits/rejected": -0.14460456371307373, + "logps/chosen": -0.000281482411082834, + "logps/rejected": -2.3089118003845215, + "loss": 0.3749, + "nll_loss": 0.09372323751449585, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8148242563474923e-05, + "rewards/margins": 0.2308630496263504, + "rewards/rejected": -0.23089119791984558, + "step": 14119 + }, + { + "epoch": 9.764868603042878, + "grad_norm": 4.185392379760742, + "learning_rate": 1.3062855386506839e-06, + "log_odds_chosen": 11.940038681030273, + "log_odds_ratio": -7.481678494514199e-06, + "logits/chosen": -0.09267014265060425, + "logits/rejected": -0.2242162525653839, + "logps/chosen": -0.00013214690261520445, + "logps/rejected": -2.61649227142334, + "loss": 0.4487, + "nll_loss": 0.11216644197702408, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3214690625318326e-05, + "rewards/margins": 0.26163601875305176, + "rewards/rejected": -0.2616492211818695, + "step": 14120 + }, + { + "epoch": 9.765560165975103, + "grad_norm": 3.458895683288574, + "learning_rate": 1.3024435223605348e-06, + "log_odds_chosen": 10.002806663513184, + "log_odds_ratio": -0.00016798570868559182, + "logits/chosen": 0.08603809773921967, + "logits/rejected": 0.07021744549274445, + "logps/chosen": -0.00020438554929569364, + "logps/rejected": -1.3508172035217285, + "loss": 0.5933, + "nll_loss": 0.14831441640853882, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0438554201973602e-05, + "rewards/margins": 0.13506127893924713, + "rewards/rejected": -0.1350817084312439, + "step": 14121 + }, + { + "epoch": 9.766251728907331, + "grad_norm": 2.6571707725524902, + "learning_rate": 1.2986015060703858e-06, + "log_odds_chosen": 11.517476081848145, + "log_odds_ratio": -3.799406476900913e-05, + "logits/chosen": -0.2700203061103821, + "logits/rejected": -0.21803030371665955, + "logps/chosen": -0.0003085459757130593, + "logps/rejected": -3.0179555416107178, + "loss": 0.2957, + "nll_loss": 0.0739310160279274, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0854600481688976e-05, + "rewards/margins": 0.30176469683647156, + "rewards/rejected": -0.3017955422401428, + "step": 14122 + }, + { + "epoch": 9.766943291839558, + "grad_norm": 4.420147895812988, + "learning_rate": 1.2947594897802368e-06, + "log_odds_chosen": 11.551900863647461, + "log_odds_ratio": -0.00017292052507400513, + "logits/chosen": -0.1685771644115448, + "logits/rejected": -0.20248129963874817, + "logps/chosen": -0.00016953478916548193, + "logps/rejected": -2.5348751544952393, + "loss": 0.483, + "nll_loss": 0.12072944641113281, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.695347782515455e-05, + "rewards/margins": 0.2534705698490143, + "rewards/rejected": -0.2534875273704529, + "step": 14123 + }, + { + "epoch": 9.767634854771785, + "grad_norm": 3.5160269737243652, + "learning_rate": 1.2909174734900875e-06, + "log_odds_chosen": 11.154373168945312, + "log_odds_ratio": -4.2867439333349466e-05, + "logits/chosen": -0.006218772381544113, + "logits/rejected": -0.006272992119193077, + "logps/chosen": -0.0006437020492739975, + "logps/rejected": -3.0088915824890137, + "loss": 0.4935, + "nll_loss": 0.12338031828403473, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.43702078377828e-05, + "rewards/margins": 0.3008247911930084, + "rewards/rejected": -0.30088916420936584, + "step": 14124 + }, + { + "epoch": 9.768326417704012, + "grad_norm": 2.4684836864471436, + "learning_rate": 1.2870754571999387e-06, + "log_odds_chosen": 10.261589050292969, + "log_odds_ratio": -0.00013145655975677073, + "logits/chosen": -0.29296594858169556, + "logits/rejected": -0.2342432290315628, + "logps/chosen": -0.00031795038376003504, + "logps/rejected": -1.6441881656646729, + "loss": 0.3265, + "nll_loss": 0.08161911368370056, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.179503983119503e-05, + "rewards/margins": 0.16438701748847961, + "rewards/rejected": -0.1644188016653061, + "step": 14125 + }, + { + "epoch": 9.769017980636239, + "grad_norm": 2.7318315505981445, + "learning_rate": 1.2832334409097896e-06, + "log_odds_chosen": 11.669432640075684, + "log_odds_ratio": -2.1845677110832185e-05, + "logits/chosen": -0.42241886258125305, + "logits/rejected": -0.42704257369041443, + "logps/chosen": -0.0001758452272042632, + "logps/rejected": -2.379481315612793, + "loss": 0.2661, + "nll_loss": 0.06652377545833588, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7584523448022082e-05, + "rewards/margins": 0.2379305362701416, + "rewards/rejected": -0.23794810473918915, + "step": 14126 + }, + { + "epoch": 9.769709543568466, + "grad_norm": 4.480816841125488, + "learning_rate": 1.2793914246196404e-06, + "log_odds_chosen": 12.366174697875977, + "log_odds_ratio": -6.5960643951257225e-06, + "logits/chosen": -0.36253371834754944, + "logits/rejected": -0.18785274028778076, + "logps/chosen": -5.130483987159096e-05, + "logps/rejected": -2.4808356761932373, + "loss": 0.3719, + "nll_loss": 0.09298262000083923, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.130484169058036e-06, + "rewards/margins": 0.24807843565940857, + "rewards/rejected": -0.24808356165885925, + "step": 14127 + }, + { + "epoch": 9.770401106500692, + "grad_norm": 4.40105676651001, + "learning_rate": 1.2755494083294913e-06, + "log_odds_chosen": 11.451199531555176, + "log_odds_ratio": -2.284867878188379e-05, + "logits/chosen": 0.037913352251052856, + "logits/rejected": -0.0362299308180809, + "logps/chosen": -0.0001210991686093621, + "logps/rejected": -2.365833282470703, + "loss": 0.4228, + "nll_loss": 0.10570620000362396, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2109916497138329e-05, + "rewards/margins": 0.23657123744487762, + "rewards/rejected": -0.23658335208892822, + "step": 14128 + }, + { + "epoch": 9.77109266943292, + "grad_norm": 3.923491954803467, + "learning_rate": 1.2717073920393423e-06, + "log_odds_chosen": 11.70336627960205, + "log_odds_ratio": -3.302631012047641e-05, + "logits/chosen": -0.0079636350274086, + "logits/rejected": -0.04412021487951279, + "logps/chosen": -0.00017817021580412984, + "logps/rejected": -2.770371437072754, + "loss": 0.3479, + "nll_loss": 0.08698327839374542, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7817021216615103e-05, + "rewards/margins": 0.27701929211616516, + "rewards/rejected": -0.2770371437072754, + "step": 14129 + }, + { + "epoch": 9.771784232365146, + "grad_norm": 2.299215078353882, + "learning_rate": 1.2678653757491933e-06, + "log_odds_chosen": 12.437353134155273, + "log_odds_ratio": -7.882959835114889e-06, + "logits/chosen": -0.9748541116714478, + "logits/rejected": -1.050576090812683, + "logps/chosen": -8.047391020227224e-05, + "logps/rejected": -2.585092067718506, + "loss": 0.3165, + "nll_loss": 0.07913664728403091, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.047391020227224e-06, + "rewards/margins": 0.2585011422634125, + "rewards/rejected": -0.25850921869277954, + "step": 14130 + }, + { + "epoch": 9.772475795297373, + "grad_norm": 3.3570504188537598, + "learning_rate": 1.2640233594590442e-06, + "log_odds_chosen": 10.04572582244873, + "log_odds_ratio": -0.00029344053473323584, + "logits/chosen": -0.30483633279800415, + "logits/rejected": -0.3217681646347046, + "logps/chosen": -0.0002906577428802848, + "logps/rejected": -1.6494433879852295, + "loss": 0.3324, + "nll_loss": 0.08306828141212463, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9065777198411524e-05, + "rewards/margins": 0.1649152934551239, + "rewards/rejected": -0.1649443656206131, + "step": 14131 + }, + { + "epoch": 9.7731673582296, + "grad_norm": 3.0047953128814697, + "learning_rate": 1.260181343168895e-06, + "log_odds_chosen": 10.727274894714355, + "log_odds_ratio": -6.0682545154122636e-05, + "logits/chosen": -0.1944286823272705, + "logits/rejected": -0.11104271560907364, + "logps/chosen": -9.876063995761797e-05, + "logps/rejected": -1.8163423538208008, + "loss": 0.4689, + "nll_loss": 0.11722764372825623, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.876064723357558e-06, + "rewards/margins": 0.18162435293197632, + "rewards/rejected": -0.18163424730300903, + "step": 14132 + }, + { + "epoch": 9.773858921161827, + "grad_norm": 3.318375825881958, + "learning_rate": 1.2563393268787462e-06, + "log_odds_chosen": 10.844013214111328, + "log_odds_ratio": -4.2838320950977504e-05, + "logits/chosen": -0.18170973658561707, + "logits/rejected": -0.3086509704589844, + "logps/chosen": -0.00013860626495443285, + "logps/rejected": -1.9294129610061646, + "loss": 0.438, + "nll_loss": 0.10948415100574493, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3860626495443285e-05, + "rewards/margins": 0.19292744994163513, + "rewards/rejected": -0.1929413080215454, + "step": 14133 + }, + { + "epoch": 9.774550484094053, + "grad_norm": 4.006042957305908, + "learning_rate": 1.252497310588597e-06, + "log_odds_chosen": 11.966079711914062, + "log_odds_ratio": -3.672724051284604e-05, + "logits/chosen": 0.12438063323497772, + "logits/rejected": 0.23474116623401642, + "logps/chosen": -0.0010203744750469923, + "logps/rejected": -3.957035779953003, + "loss": 0.4223, + "nll_loss": 0.1055690348148346, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010203745478065684, + "rewards/margins": 0.3956015408039093, + "rewards/rejected": -0.39570358395576477, + "step": 14134 + }, + { + "epoch": 9.77524204702628, + "grad_norm": 3.421947717666626, + "learning_rate": 1.2486552942984479e-06, + "log_odds_chosen": 10.71963119506836, + "log_odds_ratio": -9.687233250588179e-05, + "logits/chosen": -0.14968188107013702, + "logits/rejected": -0.2112322747707367, + "logps/chosen": -0.00012315776257310063, + "logps/rejected": -1.5105361938476562, + "loss": 0.3276, + "nll_loss": 0.08189380168914795, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.231577516591642e-05, + "rewards/margins": 0.15104131400585175, + "rewards/rejected": -0.15105360746383667, + "step": 14135 + }, + { + "epoch": 9.775933609958507, + "grad_norm": 3.2914109230041504, + "learning_rate": 1.2448132780082988e-06, + "log_odds_chosen": 11.381830215454102, + "log_odds_ratio": -2.1068624846520834e-05, + "logits/chosen": -0.35839736461639404, + "logits/rejected": -0.292026162147522, + "logps/chosen": -0.00011267801892245188, + "logps/rejected": -2.0828683376312256, + "loss": 0.3396, + "nll_loss": 0.08490855246782303, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.126780261984095e-05, + "rewards/margins": 0.20827557146549225, + "rewards/rejected": -0.2082868218421936, + "step": 14136 + }, + { + "epoch": 9.776625172890734, + "grad_norm": 2.6957051753997803, + "learning_rate": 1.2409712617181498e-06, + "log_odds_chosen": 11.469804763793945, + "log_odds_ratio": -1.659486224525608e-05, + "logits/chosen": -0.4436866044998169, + "logits/rejected": -0.41482651233673096, + "logps/chosen": -7.341312448261306e-05, + "logps/rejected": -1.9058144092559814, + "loss": 0.3009, + "nll_loss": 0.0752357617020607, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.341312084463425e-06, + "rewards/margins": 0.19057410955429077, + "rewards/rejected": -0.19058147072792053, + "step": 14137 + }, + { + "epoch": 9.77731673582296, + "grad_norm": 3.438163995742798, + "learning_rate": 1.2371292454280007e-06, + "log_odds_chosen": 10.702985763549805, + "log_odds_ratio": -0.00013088583364151418, + "logits/chosen": -0.2541540265083313, + "logits/rejected": -0.26329177618026733, + "logps/chosen": -0.0002986131585203111, + "logps/rejected": -2.1693620681762695, + "loss": 0.4543, + "nll_loss": 0.1135587990283966, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9861312214052305e-05, + "rewards/margins": 0.2169063538312912, + "rewards/rejected": -0.21693623065948486, + "step": 14138 + }, + { + "epoch": 9.778008298755188, + "grad_norm": 3.345736265182495, + "learning_rate": 1.2332872291378515e-06, + "log_odds_chosen": 12.125631332397461, + "log_odds_ratio": -3.3365773560944945e-05, + "logits/chosen": -0.47693562507629395, + "logits/rejected": -0.5634940266609192, + "logps/chosen": -0.0001726750488160178, + "logps/rejected": -3.035978317260742, + "loss": 0.3919, + "nll_loss": 0.09797894209623337, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7267502698814496e-05, + "rewards/margins": 0.3035805821418762, + "rewards/rejected": -0.3035978376865387, + "step": 14139 + }, + { + "epoch": 9.778699861687414, + "grad_norm": 4.212039947509766, + "learning_rate": 1.2294452128477025e-06, + "log_odds_chosen": 10.795297622680664, + "log_odds_ratio": -0.00023799219343345612, + "logits/chosen": -0.22696000337600708, + "logits/rejected": -0.37522876262664795, + "logps/chosen": -0.00017699907766655087, + "logps/rejected": -1.976933479309082, + "loss": 0.5068, + "nll_loss": 0.12667426466941833, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7699907402857207e-05, + "rewards/margins": 0.1976756453514099, + "rewards/rejected": -0.1976933479309082, + "step": 14140 + }, + { + "epoch": 9.779391424619641, + "grad_norm": 3.0429298877716064, + "learning_rate": 1.2256031965575534e-06, + "log_odds_chosen": 9.816993713378906, + "log_odds_ratio": -0.00025687177549116313, + "logits/chosen": -0.4343181848526001, + "logits/rejected": -0.4792379140853882, + "logps/chosen": -0.004632133059203625, + "logps/rejected": -2.4986414909362793, + "loss": 0.2904, + "nll_loss": 0.07257814705371857, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000463213276816532, + "rewards/margins": 0.24940095841884613, + "rewards/rejected": -0.24986416101455688, + "step": 14141 + }, + { + "epoch": 9.780082987551868, + "grad_norm": 3.5660789012908936, + "learning_rate": 1.2217611802674044e-06, + "log_odds_chosen": 11.582379341125488, + "log_odds_ratio": -1.1961707059526816e-05, + "logits/chosen": -0.11018684506416321, + "logits/rejected": -0.1884918063879013, + "logps/chosen": -8.266264921985567e-05, + "logps/rejected": -2.0520644187927246, + "loss": 0.4035, + "nll_loss": 0.10088561475276947, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.266264558187686e-06, + "rewards/margins": 0.20519816875457764, + "rewards/rejected": -0.20520645380020142, + "step": 14142 + }, + { + "epoch": 9.780774550484095, + "grad_norm": 3.748652219772339, + "learning_rate": 1.2179191639772553e-06, + "log_odds_chosen": 10.329841613769531, + "log_odds_ratio": -0.000146003148984164, + "logits/chosen": 0.018064171075820923, + "logits/rejected": -0.08140605688095093, + "logps/chosen": -0.0004367720102891326, + "logps/rejected": -2.023275375366211, + "loss": 0.477, + "nll_loss": 0.1192474290728569, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3677198846125975e-05, + "rewards/margins": 0.2022838294506073, + "rewards/rejected": -0.20232751965522766, + "step": 14143 + }, + { + "epoch": 9.781466113416322, + "grad_norm": 2.5951290130615234, + "learning_rate": 1.2140771476871063e-06, + "log_odds_chosen": 10.93802547454834, + "log_odds_ratio": -3.0638646421721205e-05, + "logits/chosen": -0.2951814532279968, + "logits/rejected": -0.36579304933547974, + "logps/chosen": -0.00030578882433474064, + "logps/rejected": -2.4859442710876465, + "loss": 0.3166, + "nll_loss": 0.07915446907281876, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.05788817058783e-05, + "rewards/margins": 0.24856385588645935, + "rewards/rejected": -0.24859443306922913, + "step": 14144 + }, + { + "epoch": 9.782157676348548, + "grad_norm": 3.2371087074279785, + "learning_rate": 1.2102351313969573e-06, + "log_odds_chosen": 10.779736518859863, + "log_odds_ratio": -0.0002104683080688119, + "logits/chosen": -0.3973849415779114, + "logits/rejected": -0.3972107172012329, + "logps/chosen": -0.00013605685671791434, + "logps/rejected": -1.7934966087341309, + "loss": 0.3629, + "nll_loss": 0.09071143716573715, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3605684216599911e-05, + "rewards/margins": 0.1793360561132431, + "rewards/rejected": -0.17934966087341309, + "step": 14145 + }, + { + "epoch": 9.782849239280775, + "grad_norm": 2.9736454486846924, + "learning_rate": 1.206393115106808e-06, + "log_odds_chosen": 11.041149139404297, + "log_odds_ratio": -3.184582237736322e-05, + "logits/chosen": -0.7083456516265869, + "logits/rejected": -0.7434857487678528, + "logps/chosen": -0.0002840912784449756, + "logps/rejected": -2.2934787273406982, + "loss": 0.3777, + "nll_loss": 0.09442107379436493, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8409125661710277e-05, + "rewards/margins": 0.2293194681406021, + "rewards/rejected": -0.22934786975383759, + "step": 14146 + }, + { + "epoch": 9.783540802213002, + "grad_norm": 4.897305965423584, + "learning_rate": 1.202551098816659e-06, + "log_odds_chosen": 11.59496784210205, + "log_odds_ratio": -2.3090786271495745e-05, + "logits/chosen": -0.11693152785301208, + "logits/rejected": -0.05875653773546219, + "logps/chosen": -0.00021892010408919305, + "logps/rejected": -2.939328908920288, + "loss": 0.3021, + "nll_loss": 0.07552653551101685, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1892010408919305e-05, + "rewards/margins": 0.2939109802246094, + "rewards/rejected": -0.2939329147338867, + "step": 14147 + }, + { + "epoch": 9.784232365145229, + "grad_norm": 4.216211318969727, + "learning_rate": 1.1987090825265101e-06, + "log_odds_chosen": 11.53899097442627, + "log_odds_ratio": -1.641822382225655e-05, + "logits/chosen": -0.5299835801124573, + "logits/rejected": -0.5817488431930542, + "logps/chosen": -0.00015015192911960185, + "logps/rejected": -2.3932838439941406, + "loss": 0.3286, + "nll_loss": 0.0821424350142479, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5015193639555946e-05, + "rewards/margins": 0.23931337893009186, + "rewards/rejected": -0.23932838439941406, + "step": 14148 + }, + { + "epoch": 9.784923928077456, + "grad_norm": 6.387366771697998, + "learning_rate": 1.1948670662363609e-06, + "log_odds_chosen": 10.867435455322266, + "log_odds_ratio": -0.00010485449456609786, + "logits/chosen": -0.01297275722026825, + "logits/rejected": -0.08890549838542938, + "logps/chosen": -0.00029303666087798774, + "logps/rejected": -2.1026883125305176, + "loss": 0.8159, + "nll_loss": 0.20396147668361664, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9303666451596655e-05, + "rewards/margins": 0.21023951470851898, + "rewards/rejected": -0.21026882529258728, + "step": 14149 + }, + { + "epoch": 9.785615491009683, + "grad_norm": 3.6339125633239746, + "learning_rate": 1.1910250499462119e-06, + "log_odds_chosen": 12.274009704589844, + "log_odds_ratio": -3.182486034347676e-05, + "logits/chosen": -0.05865704268217087, + "logits/rejected": 0.04080052673816681, + "logps/chosen": -0.0001506828557467088, + "logps/rejected": -3.2266464233398438, + "loss": 0.3411, + "nll_loss": 0.08526080846786499, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5068284483277239e-05, + "rewards/margins": 0.3226495683193207, + "rewards/rejected": -0.32266464829444885, + "step": 14150 + }, + { + "epoch": 9.78630705394191, + "grad_norm": 3.0847744941711426, + "learning_rate": 1.1871830336560626e-06, + "log_odds_chosen": 10.046998977661133, + "log_odds_ratio": -0.0005083397263661027, + "logits/chosen": -0.4494885206222534, + "logits/rejected": -0.44672513008117676, + "logps/chosen": -0.0005137314437888563, + "logps/rejected": -1.7720887660980225, + "loss": 0.313, + "nll_loss": 0.07818809151649475, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1373142923694104e-05, + "rewards/margins": 0.17715752124786377, + "rewards/rejected": -0.17720890045166016, + "step": 14151 + }, + { + "epoch": 9.786998616874136, + "grad_norm": 2.7055211067199707, + "learning_rate": 1.1833410173659138e-06, + "log_odds_chosen": 12.727792739868164, + "log_odds_ratio": -6.9703046392533e-06, + "logits/chosen": -0.5745997428894043, + "logits/rejected": -0.5524734258651733, + "logps/chosen": -8.036150393309072e-05, + "logps/rejected": -2.9690189361572266, + "loss": 0.2696, + "nll_loss": 0.06739067286252975, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.036150575208012e-06, + "rewards/margins": 0.2968938946723938, + "rewards/rejected": -0.2969019114971161, + "step": 14152 + }, + { + "epoch": 9.787690179806363, + "grad_norm": 4.3523850440979, + "learning_rate": 1.1794990010757645e-06, + "log_odds_chosen": 11.877840042114258, + "log_odds_ratio": -1.3758017303189263e-05, + "logits/chosen": 0.049047283828258514, + "logits/rejected": -0.08200374990701675, + "logps/chosen": -0.0001703656162135303, + "logps/rejected": -2.9096250534057617, + "loss": 0.483, + "nll_loss": 0.12073956429958344, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7036563804140314e-05, + "rewards/margins": 0.29094547033309937, + "rewards/rejected": -0.2909625172615051, + "step": 14153 + }, + { + "epoch": 9.78838174273859, + "grad_norm": 2.886366605758667, + "learning_rate": 1.1756569847856155e-06, + "log_odds_chosen": 11.11473274230957, + "log_odds_ratio": -5.9133606555406004e-05, + "logits/chosen": 0.02898319810628891, + "logits/rejected": -0.15263637900352478, + "logps/chosen": -0.0001661498099565506, + "logps/rejected": -2.322352409362793, + "loss": 0.2655, + "nll_loss": 0.06635863333940506, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6614982087048702e-05, + "rewards/margins": 0.23221862316131592, + "rewards/rejected": -0.23223522305488586, + "step": 14154 + }, + { + "epoch": 9.789073305670817, + "grad_norm": 4.71960973739624, + "learning_rate": 1.1718149684954664e-06, + "log_odds_chosen": 10.76363468170166, + "log_odds_ratio": -0.00012997673184145242, + "logits/chosen": -0.12062954157590866, + "logits/rejected": -0.13581763207912445, + "logps/chosen": -0.0002602715394459665, + "logps/rejected": -2.5428872108459473, + "loss": 0.3783, + "nll_loss": 0.09456643462181091, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6027153580798768e-05, + "rewards/margins": 0.25426268577575684, + "rewards/rejected": -0.2542887032032013, + "step": 14155 + }, + { + "epoch": 9.789764868603044, + "grad_norm": 3.4646763801574707, + "learning_rate": 1.1679729522053174e-06, + "log_odds_chosen": 10.0863037109375, + "log_odds_ratio": -0.00011547702160896733, + "logits/chosen": -0.5279322266578674, + "logits/rejected": -0.5816812515258789, + "logps/chosen": -0.0002697250456549227, + "logps/rejected": -1.5220682621002197, + "loss": 0.327, + "nll_loss": 0.08173598349094391, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.69725078396732e-05, + "rewards/margins": 0.15217985212802887, + "rewards/rejected": -0.15220682322978973, + "step": 14156 + }, + { + "epoch": 9.79045643153527, + "grad_norm": 3.5090792179107666, + "learning_rate": 1.1641309359151684e-06, + "log_odds_chosen": 10.63144588470459, + "log_odds_ratio": -0.0001910420978674665, + "logits/chosen": -0.49909472465515137, + "logits/rejected": -0.4897599518299103, + "logps/chosen": -0.0013054630253463984, + "logps/rejected": -1.859399676322937, + "loss": 0.3322, + "nll_loss": 0.08304300904273987, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013054630835540593, + "rewards/margins": 0.1858094334602356, + "rewards/rejected": -0.1859399676322937, + "step": 14157 + }, + { + "epoch": 9.791147994467497, + "grad_norm": 3.027678966522217, + "learning_rate": 1.1602889196250191e-06, + "log_odds_chosen": 11.527877807617188, + "log_odds_ratio": -1.5956939023453742e-05, + "logits/chosen": -0.35433340072631836, + "logits/rejected": -0.3891632556915283, + "logps/chosen": -0.00012688693823292851, + "logps/rejected": -2.255835771560669, + "loss": 0.3571, + "nll_loss": 0.08926960825920105, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.268869345949497e-05, + "rewards/margins": 0.2255708873271942, + "rewards/rejected": -0.22558358311653137, + "step": 14158 + }, + { + "epoch": 9.791839557399724, + "grad_norm": 4.357656478881836, + "learning_rate": 1.1564469033348703e-06, + "log_odds_chosen": 10.750991821289062, + "log_odds_ratio": -0.00014336322783492506, + "logits/chosen": -0.34541675448417664, + "logits/rejected": -0.3815138638019562, + "logps/chosen": -0.00023925396089907736, + "logps/rejected": -2.113788604736328, + "loss": 0.451, + "nll_loss": 0.11273743957281113, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3925394998514093e-05, + "rewards/margins": 0.21135492622852325, + "rewards/rejected": -0.21137885749340057, + "step": 14159 + }, + { + "epoch": 9.792531120331951, + "grad_norm": 2.9158616065979004, + "learning_rate": 1.1526048870447213e-06, + "log_odds_chosen": 10.863975524902344, + "log_odds_ratio": -9.102724288823083e-05, + "logits/chosen": 0.1784595102071762, + "logits/rejected": 0.03506646305322647, + "logps/chosen": -0.00021359164384193718, + "logps/rejected": -2.2265512943267822, + "loss": 0.291, + "nll_loss": 0.07275011390447617, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.13591647479916e-05, + "rewards/margins": 0.22263376414775848, + "rewards/rejected": -0.22265511751174927, + "step": 14160 + }, + { + "epoch": 9.793222683264178, + "grad_norm": 3.547389268875122, + "learning_rate": 1.148762870754572e-06, + "log_odds_chosen": 9.15870475769043, + "log_odds_ratio": -0.000654935953207314, + "logits/chosen": -0.08512475341558456, + "logits/rejected": 0.048164308071136475, + "logps/chosen": -0.0006864020833745599, + "logps/rejected": -1.2858558893203735, + "loss": 0.5277, + "nll_loss": 0.13187168538570404, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.864021270303056e-05, + "rewards/margins": 0.12851695716381073, + "rewards/rejected": -0.1285855919122696, + "step": 14161 + }, + { + "epoch": 9.793914246196405, + "grad_norm": 4.508517265319824, + "learning_rate": 1.144920854464423e-06, + "log_odds_chosen": 10.289685249328613, + "log_odds_ratio": -0.06541527807712555, + "logits/chosen": -0.5938727855682373, + "logits/rejected": -0.6513862609863281, + "logps/chosen": -0.01044410653412342, + "logps/rejected": -2.716381311416626, + "loss": 0.3712, + "nll_loss": 0.08626425266265869, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010444107465445995, + "rewards/margins": 0.2705937325954437, + "rewards/rejected": -0.2716381549835205, + "step": 14162 + }, + { + "epoch": 9.794605809128631, + "grad_norm": 3.4274098873138428, + "learning_rate": 1.141078838174274e-06, + "log_odds_chosen": 11.125181198120117, + "log_odds_ratio": -4.395114228827879e-05, + "logits/chosen": -0.3660215437412262, + "logits/rejected": -0.4066369831562042, + "logps/chosen": -0.00020414634491316974, + "logps/rejected": -2.4690892696380615, + "loss": 0.3781, + "nll_loss": 0.09451229870319366, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0414634491316974e-05, + "rewards/margins": 0.24688851833343506, + "rewards/rejected": -0.24690893292427063, + "step": 14163 + }, + { + "epoch": 9.795297372060858, + "grad_norm": 2.3276455402374268, + "learning_rate": 1.1372368218841249e-06, + "log_odds_chosen": 10.200767517089844, + "log_odds_ratio": -0.00020588882034644485, + "logits/chosen": -0.2072562575340271, + "logits/rejected": -0.2718925178050995, + "logps/chosen": -0.0003796897944994271, + "logps/rejected": -1.9515058994293213, + "loss": 0.2726, + "nll_loss": 0.06811818480491638, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.796897362917662e-05, + "rewards/margins": 0.19511263072490692, + "rewards/rejected": -0.19515059888362885, + "step": 14164 + }, + { + "epoch": 9.795988934993085, + "grad_norm": 3.7078750133514404, + "learning_rate": 1.1333948055939758e-06, + "log_odds_chosen": 10.788431167602539, + "log_odds_ratio": -7.90195626905188e-05, + "logits/chosen": 0.46831220388412476, + "logits/rejected": 0.37939536571502686, + "logps/chosen": -0.00032136685331352055, + "logps/rejected": -1.9700994491577148, + "loss": 0.3841, + "nll_loss": 0.09601810574531555, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.213668242096901e-05, + "rewards/margins": 0.19697780907154083, + "rewards/rejected": -0.19700995087623596, + "step": 14165 + }, + { + "epoch": 9.796680497925312, + "grad_norm": 5.544235706329346, + "learning_rate": 1.1295527893038266e-06, + "log_odds_chosen": 11.110427856445312, + "log_odds_ratio": -5.360724389902316e-05, + "logits/chosen": -0.31555014848709106, + "logits/rejected": -0.44030508399009705, + "logps/chosen": -0.0003340440453030169, + "logps/rejected": -2.5095624923706055, + "loss": 0.4651, + "nll_loss": 0.11626134067773819, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.340440889587626e-05, + "rewards/margins": 0.2509228587150574, + "rewards/rejected": -0.2509562373161316, + "step": 14166 + }, + { + "epoch": 9.797372060857539, + "grad_norm": 2.5396666526794434, + "learning_rate": 1.1257107730136778e-06, + "log_odds_chosen": 9.936513900756836, + "log_odds_ratio": -0.00018028414342552423, + "logits/chosen": -0.5710381269454956, + "logits/rejected": -0.5857884287834167, + "logps/chosen": -0.00018568903033155948, + "logps/rejected": -1.4176135063171387, + "loss": 0.2561, + "nll_loss": 0.06399574130773544, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8568902305560187e-05, + "rewards/margins": 0.1417427808046341, + "rewards/rejected": -0.14176134765148163, + "step": 14167 + }, + { + "epoch": 9.798063623789766, + "grad_norm": 4.6397809982299805, + "learning_rate": 1.1218687567235285e-06, + "log_odds_chosen": 11.111590385437012, + "log_odds_ratio": -8.788368722889572e-05, + "logits/chosen": -0.14068683981895447, + "logits/rejected": -0.2126733958721161, + "logps/chosen": -0.00029392243595793843, + "logps/rejected": -2.6638545989990234, + "loss": 0.5023, + "nll_loss": 0.12556704878807068, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9392243959591724e-05, + "rewards/margins": 0.26635608077049255, + "rewards/rejected": -0.2663854658603668, + "step": 14168 + }, + { + "epoch": 9.798755186721992, + "grad_norm": 2.9909815788269043, + "learning_rate": 1.1180267404333795e-06, + "log_odds_chosen": 11.690807342529297, + "log_odds_ratio": -1.2376316590234637e-05, + "logits/chosen": 0.055607229471206665, + "logits/rejected": -0.10118089616298676, + "logps/chosen": -0.0001790410024113953, + "logps/rejected": -2.558742046356201, + "loss": 0.331, + "nll_loss": 0.08274025470018387, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.790409987734165e-05, + "rewards/margins": 0.25585630536079407, + "rewards/rejected": -0.2558741867542267, + "step": 14169 + }, + { + "epoch": 9.79944674965422, + "grad_norm": 3.224093198776245, + "learning_rate": 1.1141847241432304e-06, + "log_odds_chosen": 9.783575057983398, + "log_odds_ratio": -0.00012982710904907435, + "logits/chosen": -0.068440280854702, + "logits/rejected": -0.05813627690076828, + "logps/chosen": -0.0009000495774671435, + "logps/rejected": -1.8843883275985718, + "loss": 0.3821, + "nll_loss": 0.09550738334655762, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.000496356748044e-05, + "rewards/margins": 0.18834882974624634, + "rewards/rejected": -0.18843884766101837, + "step": 14170 + }, + { + "epoch": 9.800138312586446, + "grad_norm": 3.4567954540252686, + "learning_rate": 1.1103427078530814e-06, + "log_odds_chosen": 11.08468246459961, + "log_odds_ratio": -0.00014502073463518173, + "logits/chosen": -0.5457359552383423, + "logits/rejected": -0.5854384899139404, + "logps/chosen": -0.00023909546143840998, + "logps/rejected": -1.9390183687210083, + "loss": 0.4471, + "nll_loss": 0.11176192760467529, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3909547962830402e-05, + "rewards/margins": 0.1938779354095459, + "rewards/rejected": -0.19390185177326202, + "step": 14171 + }, + { + "epoch": 9.800829875518673, + "grad_norm": 3.547675609588623, + "learning_rate": 1.1065006915629324e-06, + "log_odds_chosen": 11.780421257019043, + "log_odds_ratio": -1.080137190001551e-05, + "logits/chosen": -0.40957897901535034, + "logits/rejected": -0.3727770745754242, + "logps/chosen": -6.516015127999708e-05, + "logps/rejected": -2.1233139038085938, + "loss": 0.3879, + "nll_loss": 0.09698373079299927, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.516015218949178e-06, + "rewards/margins": 0.21232487261295319, + "rewards/rejected": -0.2123313844203949, + "step": 14172 + }, + { + "epoch": 9.8015214384509, + "grad_norm": 3.0268282890319824, + "learning_rate": 1.1026586752727831e-06, + "log_odds_chosen": 10.072689056396484, + "log_odds_ratio": -0.00012022092414554209, + "logits/chosen": 0.08040404319763184, + "logits/rejected": 0.019931059330701828, + "logps/chosen": -0.00023253823746927083, + "logps/rejected": -1.420956015586853, + "loss": 0.5183, + "nll_loss": 0.1295713484287262, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3253825929714367e-05, + "rewards/margins": 0.14207234978675842, + "rewards/rejected": -0.14209561049938202, + "step": 14173 + }, + { + "epoch": 9.802213001383127, + "grad_norm": 2.7813079357147217, + "learning_rate": 1.098816658982634e-06, + "log_odds_chosen": 11.032231330871582, + "log_odds_ratio": -0.0002524368173908442, + "logits/chosen": -0.7510668635368347, + "logits/rejected": -0.7767488360404968, + "logps/chosen": -0.0005029549356549978, + "logps/rejected": -2.3083441257476807, + "loss": 0.3432, + "nll_loss": 0.08577017486095428, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.029549356549978e-05, + "rewards/margins": 0.2307841032743454, + "rewards/rejected": -0.23083440959453583, + "step": 14174 + }, + { + "epoch": 9.802904564315353, + "grad_norm": 3.758674144744873, + "learning_rate": 1.094974642692485e-06, + "log_odds_chosen": 11.322364807128906, + "log_odds_ratio": -4.581090615829453e-05, + "logits/chosen": -0.10903534293174744, + "logits/rejected": -0.1322764754295349, + "logps/chosen": -0.00021902378648519516, + "logps/rejected": -2.3018741607666016, + "loss": 0.5231, + "nll_loss": 0.13077309727668762, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.190238046750892e-05, + "rewards/margins": 0.2301655113697052, + "rewards/rejected": -0.23018741607666016, + "step": 14175 + }, + { + "epoch": 9.80359612724758, + "grad_norm": 4.369509220123291, + "learning_rate": 1.091132626402336e-06, + "log_odds_chosen": 11.108938217163086, + "log_odds_ratio": -0.000450789782917127, + "logits/chosen": -0.150197371840477, + "logits/rejected": -0.30005329847335815, + "logps/chosen": -0.0004888575640507042, + "logps/rejected": -2.422581434249878, + "loss": 0.6459, + "nll_loss": 0.16144119203090668, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.88857549498789e-05, + "rewards/margins": 0.2422092705965042, + "rewards/rejected": -0.24225814640522003, + "step": 14176 + }, + { + "epoch": 9.804287690179807, + "grad_norm": 3.787135601043701, + "learning_rate": 1.087290610112187e-06, + "log_odds_chosen": 10.38783073425293, + "log_odds_ratio": -0.00010526390542509034, + "logits/chosen": -0.14729399979114532, + "logits/rejected": -0.11241314560174942, + "logps/chosen": -0.0001371078978991136, + "logps/rejected": -1.7678358554840088, + "loss": 0.3392, + "nll_loss": 0.08480089157819748, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3710789062315598e-05, + "rewards/margins": 0.17676988244056702, + "rewards/rejected": -0.17678357660770416, + "step": 14177 + }, + { + "epoch": 9.804979253112034, + "grad_norm": 3.134108304977417, + "learning_rate": 1.083448593822038e-06, + "log_odds_chosen": 11.161537170410156, + "log_odds_ratio": -2.4501694497303106e-05, + "logits/chosen": -0.44808104634284973, + "logits/rejected": -0.3272436261177063, + "logps/chosen": -0.00018843442376237363, + "logps/rejected": -2.60626220703125, + "loss": 0.483, + "nll_loss": 0.12075001746416092, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8843442376237363e-05, + "rewards/margins": 0.26060739159584045, + "rewards/rejected": -0.2606262266635895, + "step": 14178 + }, + { + "epoch": 9.80567081604426, + "grad_norm": 3.9424610137939453, + "learning_rate": 1.0796065775318889e-06, + "log_odds_chosen": 11.937067031860352, + "log_odds_ratio": -2.6453697500983253e-05, + "logits/chosen": 0.09776285290718079, + "logits/rejected": -0.04009261727333069, + "logps/chosen": -0.00026516837533563375, + "logps/rejected": -3.3824143409729004, + "loss": 0.4357, + "nll_loss": 0.10891351103782654, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6516838261159137e-05, + "rewards/margins": 0.3382148742675781, + "rewards/rejected": -0.33824142813682556, + "step": 14179 + }, + { + "epoch": 9.806362378976488, + "grad_norm": 3.355971097946167, + "learning_rate": 1.0757645612417396e-06, + "log_odds_chosen": 11.36531925201416, + "log_odds_ratio": -2.120831049978733e-05, + "logits/chosen": 0.018348708748817444, + "logits/rejected": -0.00679410994052887, + "logps/chosen": -0.00022142543457448483, + "logps/rejected": -2.782313346862793, + "loss": 0.2885, + "nll_loss": 0.07211624085903168, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2142543457448483e-05, + "rewards/margins": 0.2782091796398163, + "rewards/rejected": -0.27823132276535034, + "step": 14180 + }, + { + "epoch": 9.807053941908714, + "grad_norm": 3.0975801944732666, + "learning_rate": 1.0719225449515906e-06, + "log_odds_chosen": 11.767681121826172, + "log_odds_ratio": -1.4651730452897027e-05, + "logits/chosen": -0.28424689173698425, + "logits/rejected": -0.37487995624542236, + "logps/chosen": -0.00011077235831180587, + "logps/rejected": -2.540308713912964, + "loss": 0.3767, + "nll_loss": 0.0941721498966217, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1077236194978468e-05, + "rewards/margins": 0.2540197968482971, + "rewards/rejected": -0.25403088331222534, + "step": 14181 + }, + { + "epoch": 9.807745504840941, + "grad_norm": 2.4416236877441406, + "learning_rate": 1.0680805286614418e-06, + "log_odds_chosen": 10.600057601928711, + "log_odds_ratio": -6.607848627027124e-05, + "logits/chosen": -0.12181997299194336, + "logits/rejected": -0.22408144176006317, + "logps/chosen": -0.00020478527585510164, + "logps/rejected": -1.9726923704147339, + "loss": 0.2484, + "nll_loss": 0.06209355592727661, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0478526494116522e-05, + "rewards/margins": 0.19724875688552856, + "rewards/rejected": -0.1972692459821701, + "step": 14182 + }, + { + "epoch": 9.808437067773168, + "grad_norm": 2.202310562133789, + "learning_rate": 1.0642385123712925e-06, + "log_odds_chosen": 10.932657241821289, + "log_odds_ratio": -0.00032081958488561213, + "logits/chosen": -0.07163320481777191, + "logits/rejected": -0.02166604995727539, + "logps/chosen": -0.0005710614495910704, + "logps/rejected": -2.6383872032165527, + "loss": 0.2048, + "nll_loss": 0.05117820203304291, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7106146414298564e-05, + "rewards/margins": 0.2637816071510315, + "rewards/rejected": -0.2638387084007263, + "step": 14183 + }, + { + "epoch": 9.809128630705395, + "grad_norm": 2.7330756187438965, + "learning_rate": 1.0603964960811435e-06, + "log_odds_chosen": 11.486774444580078, + "log_odds_ratio": -3.485183697193861e-05, + "logits/chosen": 0.036774277687072754, + "logits/rejected": 0.06198891997337341, + "logps/chosen": -0.00046196073526516557, + "logps/rejected": -3.1498825550079346, + "loss": 0.3413, + "nll_loss": 0.08531951159238815, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.619607352651656e-05, + "rewards/margins": 0.31494206190109253, + "rewards/rejected": -0.31498825550079346, + "step": 14184 + }, + { + "epoch": 9.809820193637622, + "grad_norm": 3.961125135421753, + "learning_rate": 1.0565544797909942e-06, + "log_odds_chosen": 11.124430656433105, + "log_odds_ratio": -4.4068317947676405e-05, + "logits/chosen": -0.37122130393981934, + "logits/rejected": -0.46189895272254944, + "logps/chosen": -0.00010646507143974304, + "logps/rejected": -2.1137115955352783, + "loss": 0.3482, + "nll_loss": 0.08704251050949097, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0646506780176423e-05, + "rewards/margins": 0.21136051416397095, + "rewards/rejected": -0.21137115359306335, + "step": 14185 + }, + { + "epoch": 9.810511756569849, + "grad_norm": 3.631211519241333, + "learning_rate": 1.0527124635008454e-06, + "log_odds_chosen": 10.711901664733887, + "log_odds_ratio": -5.7884266425389796e-05, + "logits/chosen": -0.3625379204750061, + "logits/rejected": -0.2845163345336914, + "logps/chosen": -0.0003204490931238979, + "logps/rejected": -2.4358041286468506, + "loss": 0.4626, + "nll_loss": 0.11565632373094559, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2044910767581314e-05, + "rewards/margins": 0.24354836344718933, + "rewards/rejected": -0.2435804307460785, + "step": 14186 + }, + { + "epoch": 9.811203319502075, + "grad_norm": 3.038205623626709, + "learning_rate": 1.0488704472106961e-06, + "log_odds_chosen": 10.539594650268555, + "log_odds_ratio": -0.0004320595180615783, + "logits/chosen": -0.3175621032714844, + "logits/rejected": -0.3884882926940918, + "logps/chosen": -0.000516570289619267, + "logps/rejected": -2.3882803916931152, + "loss": 0.3221, + "nll_loss": 0.0804857388138771, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.16570289619267e-05, + "rewards/margins": 0.23877638578414917, + "rewards/rejected": -0.23882803320884705, + "step": 14187 + }, + { + "epoch": 9.811894882434302, + "grad_norm": 4.937509059906006, + "learning_rate": 1.045028430920547e-06, + "log_odds_chosen": 11.531312942504883, + "log_odds_ratio": -1.8641001588548534e-05, + "logits/chosen": -0.37507709860801697, + "logits/rejected": -0.44480010867118835, + "logps/chosen": -0.000322213425533846, + "logps/rejected": -2.9958252906799316, + "loss": 0.8522, + "nll_loss": 0.21304495632648468, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.222134546376765e-05, + "rewards/margins": 0.299550324678421, + "rewards/rejected": -0.2995825409889221, + "step": 14188 + }, + { + "epoch": 9.812586445366529, + "grad_norm": 3.532475709915161, + "learning_rate": 1.041186414630398e-06, + "log_odds_chosen": 9.091978073120117, + "log_odds_ratio": -0.0005241170874796808, + "logits/chosen": -0.5417889952659607, + "logits/rejected": -0.5371156334877014, + "logps/chosen": -0.0008866861462593079, + "logps/rejected": -1.4519448280334473, + "loss": 0.3514, + "nll_loss": 0.08780322968959808, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.866861753631383e-05, + "rewards/margins": 0.14510582387447357, + "rewards/rejected": -0.14519450068473816, + "step": 14189 + }, + { + "epoch": 9.813278008298756, + "grad_norm": 3.668975591659546, + "learning_rate": 1.037344398340249e-06, + "log_odds_chosen": 10.498470306396484, + "log_odds_ratio": -0.00035588949685916305, + "logits/chosen": -0.26290363073349, + "logits/rejected": -0.25256627798080444, + "logps/chosen": -0.0015520071610808372, + "logps/rejected": -2.535429000854492, + "loss": 0.3628, + "nll_loss": 0.09067076444625854, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00015520071610808372, + "rewards/margins": 0.2533876895904541, + "rewards/rejected": -0.2535429000854492, + "step": 14190 + }, + { + "epoch": 9.813969571230983, + "grad_norm": 3.6465094089508057, + "learning_rate": 1.0335023820501e-06, + "log_odds_chosen": 11.360379219055176, + "log_odds_ratio": -0.00039515478420071304, + "logits/chosen": -0.20723837614059448, + "logits/rejected": -0.36116114258766174, + "logps/chosen": -0.0006092398543842137, + "logps/rejected": -2.817495584487915, + "loss": 0.4128, + "nll_loss": 0.10316009819507599, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0923983255634084e-05, + "rewards/margins": 0.2816886305809021, + "rewards/rejected": -0.28174954652786255, + "step": 14191 + }, + { + "epoch": 9.81466113416321, + "grad_norm": 2.419128656387329, + "learning_rate": 1.0296603657599507e-06, + "log_odds_chosen": 10.698506355285645, + "log_odds_ratio": -7.404476491501555e-05, + "logits/chosen": -0.17025601863861084, + "logits/rejected": -0.19649499654769897, + "logps/chosen": -0.0002077743411064148, + "logps/rejected": -2.078397750854492, + "loss": 0.23, + "nll_loss": 0.057494472712278366, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0777435565833002e-05, + "rewards/margins": 0.20781899988651276, + "rewards/rejected": -0.20783977210521698, + "step": 14192 + }, + { + "epoch": 9.815352697095436, + "grad_norm": 3.329080581665039, + "learning_rate": 1.025818349469802e-06, + "log_odds_chosen": 11.043357849121094, + "log_odds_ratio": -3.2028674468165264e-05, + "logits/chosen": -0.07440190017223358, + "logits/rejected": -0.053846318274736404, + "logps/chosen": -0.0005089318146929145, + "logps/rejected": -2.821354627609253, + "loss": 0.4336, + "nll_loss": 0.10839445888996124, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0893184379674494e-05, + "rewards/margins": 0.282084584236145, + "rewards/rejected": -0.2821354866027832, + "step": 14193 + }, + { + "epoch": 9.816044260027663, + "grad_norm": 3.651942491531372, + "learning_rate": 1.0219763331796529e-06, + "log_odds_chosen": 10.818357467651367, + "log_odds_ratio": -0.0002754127490334213, + "logits/chosen": -0.3202933669090271, + "logits/rejected": -0.3279436528682709, + "logps/chosen": -0.0005293386057019234, + "logps/rejected": -2.5842478275299072, + "loss": 0.3804, + "nll_loss": 0.09506859630346298, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2933864935766906e-05, + "rewards/margins": 0.25837188959121704, + "rewards/rejected": -0.2584247887134552, + "step": 14194 + }, + { + "epoch": 9.81673582295989, + "grad_norm": 2.4412009716033936, + "learning_rate": 1.0181343168895036e-06, + "log_odds_chosen": 11.217656135559082, + "log_odds_ratio": -6.202785152709112e-05, + "logits/chosen": -0.5744768977165222, + "logits/rejected": -0.6186306476593018, + "logps/chosen": -0.00016856074216775596, + "logps/rejected": -2.4487104415893555, + "loss": 0.2595, + "nll_loss": 0.06486619263887405, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6856072761584073e-05, + "rewards/margins": 0.24485419690608978, + "rewards/rejected": -0.24487105011940002, + "step": 14195 + }, + { + "epoch": 9.817427385892117, + "grad_norm": 2.520139455795288, + "learning_rate": 1.0142923005993546e-06, + "log_odds_chosen": 10.983305931091309, + "log_odds_ratio": -0.00019241197151131928, + "logits/chosen": -0.5942064523696899, + "logits/rejected": -0.6731418371200562, + "logps/chosen": -0.0005326925893314183, + "logps/rejected": -3.039681911468506, + "loss": 0.2778, + "nll_loss": 0.06942148506641388, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.326926111592911e-05, + "rewards/margins": 0.3039149343967438, + "rewards/rejected": -0.3039681911468506, + "step": 14196 + }, + { + "epoch": 9.818118948824344, + "grad_norm": 4.07489013671875, + "learning_rate": 1.0104502843092055e-06, + "log_odds_chosen": 10.513197898864746, + "log_odds_ratio": -5.2534029236994684e-05, + "logits/chosen": -0.5547770261764526, + "logits/rejected": -0.7435232996940613, + "logps/chosen": -0.0002200988819822669, + "logps/rejected": -1.8414217233657837, + "loss": 0.3489, + "nll_loss": 0.08720815181732178, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.200988819822669e-05, + "rewards/margins": 0.18412016332149506, + "rewards/rejected": -0.18414217233657837, + "step": 14197 + }, + { + "epoch": 9.81881051175657, + "grad_norm": 2.957580804824829, + "learning_rate": 1.0066082680190565e-06, + "log_odds_chosen": 10.992133140563965, + "log_odds_ratio": -4.3501433538040146e-05, + "logits/chosen": -0.5260941386222839, + "logits/rejected": -0.648362934589386, + "logps/chosen": -0.00034256701474078, + "logps/rejected": -2.743771553039551, + "loss": 0.3164, + "nll_loss": 0.07908672839403152, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.425670001888648e-05, + "rewards/margins": 0.2743428945541382, + "rewards/rejected": -0.27437716722488403, + "step": 14198 + }, + { + "epoch": 9.819502074688797, + "grad_norm": 3.9296913146972656, + "learning_rate": 1.0027662517289075e-06, + "log_odds_chosen": 11.788595199584961, + "log_odds_ratio": -1.3948605555924587e-05, + "logits/chosen": -0.25325828790664673, + "logits/rejected": -0.32087087631225586, + "logps/chosen": -8.637905557407066e-05, + "logps/rejected": -2.1206068992614746, + "loss": 0.4042, + "nll_loss": 0.10105835646390915, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.637905921204947e-06, + "rewards/margins": 0.21205206215381622, + "rewards/rejected": -0.21206068992614746, + "step": 14199 + }, + { + "epoch": 9.820193637621024, + "grad_norm": 3.424304723739624, + "learning_rate": 9.989242354387582e-07, + "log_odds_chosen": 11.120806694030762, + "log_odds_ratio": -5.018915544496849e-05, + "logits/chosen": -0.07096761465072632, + "logits/rejected": -0.2540345788002014, + "logps/chosen": -0.0002843011461663991, + "logps/rejected": -2.281825065612793, + "loss": 0.4318, + "nll_loss": 0.10793668031692505, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8430116799427196e-05, + "rewards/margins": 0.22815406322479248, + "rewards/rejected": -0.22818250954151154, + "step": 14200 + }, + { + "epoch": 9.820885200553251, + "grad_norm": 4.0108819007873535, + "learning_rate": 9.950822191486094e-07, + "log_odds_chosen": 12.207027435302734, + "log_odds_ratio": -5.891701675864169e-06, + "logits/chosen": -0.18299826979637146, + "logits/rejected": -0.17729896306991577, + "logps/chosen": -0.0001339554728474468, + "logps/rejected": -2.995695114135742, + "loss": 0.521, + "nll_loss": 0.13025128841400146, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3395549103734083e-05, + "rewards/margins": 0.2995561361312866, + "rewards/rejected": -0.2995695471763611, + "step": 14201 + }, + { + "epoch": 9.821576763485478, + "grad_norm": 3.378380060195923, + "learning_rate": 9.912402028584601e-07, + "log_odds_chosen": 10.768424034118652, + "log_odds_ratio": -6.376580859068781e-05, + "logits/chosen": -0.41383251547813416, + "logits/rejected": -0.4662688076496124, + "logps/chosen": -0.00010562510578893125, + "logps/rejected": -1.8373764753341675, + "loss": 0.4542, + "nll_loss": 0.1135462075471878, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0562511306488886e-05, + "rewards/margins": 0.18372708559036255, + "rewards/rejected": -0.18373766541481018, + "step": 14202 + }, + { + "epoch": 9.822268326417705, + "grad_norm": 3.4288904666900635, + "learning_rate": 9.87398186568311e-07, + "log_odds_chosen": 11.275004386901855, + "log_odds_ratio": -8.114479715004563e-05, + "logits/chosen": -0.4122001528739929, + "logits/rejected": -0.5066218972206116, + "logps/chosen": -0.00015522413013968617, + "logps/rejected": -2.3961973190307617, + "loss": 0.347, + "nll_loss": 0.08673498779535294, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5522411558777094e-05, + "rewards/margins": 0.23960421979427338, + "rewards/rejected": -0.23961973190307617, + "step": 14203 + }, + { + "epoch": 9.822959889349931, + "grad_norm": 3.5420782566070557, + "learning_rate": 9.835561702781618e-07, + "log_odds_chosen": 12.537866592407227, + "log_odds_ratio": -6.367015885189176e-05, + "logits/chosen": 0.0796017125248909, + "logits/rejected": 0.002951107919216156, + "logps/chosen": -0.00025438476586714387, + "logps/rejected": -4.141080856323242, + "loss": 0.3596, + "nll_loss": 0.08990590274333954, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.543847767810803e-05, + "rewards/margins": 0.4140826165676117, + "rewards/rejected": -0.4141080677509308, + "step": 14204 + }, + { + "epoch": 9.823651452282158, + "grad_norm": 4.7772088050842285, + "learning_rate": 9.79714153988013e-07, + "log_odds_chosen": 11.233759880065918, + "log_odds_ratio": -4.31876651418861e-05, + "logits/chosen": -0.37256962060928345, + "logits/rejected": -0.3746577501296997, + "logps/chosen": -0.0002783732197713107, + "logps/rejected": -2.4954299926757812, + "loss": 0.5566, + "nll_loss": 0.13915464282035828, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.783732270472683e-05, + "rewards/margins": 0.24951516091823578, + "rewards/rejected": -0.2495429962873459, + "step": 14205 + }, + { + "epoch": 9.824343015214385, + "grad_norm": 2.7360641956329346, + "learning_rate": 9.75872137697864e-07, + "log_odds_chosen": 11.81240463256836, + "log_odds_ratio": -2.6226813133689575e-05, + "logits/chosen": -0.2089729905128479, + "logits/rejected": -0.3485751748085022, + "logps/chosen": -0.00027663970831781626, + "logps/rejected": -2.7306594848632812, + "loss": 0.3143, + "nll_loss": 0.07858486473560333, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.766397301456891e-05, + "rewards/margins": 0.27303826808929443, + "rewards/rejected": -0.2730659246444702, + "step": 14206 + }, + { + "epoch": 9.825034578146612, + "grad_norm": 4.1154866218566895, + "learning_rate": 9.720301214077147e-07, + "log_odds_chosen": 12.137218475341797, + "log_odds_ratio": -1.3980280527903233e-05, + "logits/chosen": -0.16195784509181976, + "logits/rejected": -0.3471846878528595, + "logps/chosen": -0.00012617021275218576, + "logps/rejected": -3.104794979095459, + "loss": 0.3952, + "nll_loss": 0.09880155324935913, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2617021639016457e-05, + "rewards/margins": 0.3104668855667114, + "rewards/rejected": -0.3104795217514038, + "step": 14207 + }, + { + "epoch": 9.825726141078839, + "grad_norm": 3.9353859424591064, + "learning_rate": 9.681881051175657e-07, + "log_odds_chosen": 10.912803649902344, + "log_odds_ratio": -0.00021392188500612974, + "logits/chosen": 0.3478098213672638, + "logits/rejected": 0.3224194049835205, + "logps/chosen": -0.0004368519294075668, + "logps/rejected": -2.136322021484375, + "loss": 0.5987, + "nll_loss": 0.1496632695198059, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3685191485565156e-05, + "rewards/margins": 0.21358852088451385, + "rewards/rejected": -0.21363219618797302, + "step": 14208 + }, + { + "epoch": 9.826417704011066, + "grad_norm": 3.2916836738586426, + "learning_rate": 9.643460888274166e-07, + "log_odds_chosen": 10.82497787475586, + "log_odds_ratio": -9.243319072993472e-05, + "logits/chosen": -0.21201613545417786, + "logits/rejected": -0.23995202779769897, + "logps/chosen": -0.0002386215201113373, + "logps/rejected": -2.293454170227051, + "loss": 0.337, + "nll_loss": 0.08425004780292511, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.386215237493161e-05, + "rewards/margins": 0.22932155430316925, + "rewards/rejected": -0.2293454110622406, + "step": 14209 + }, + { + "epoch": 9.827109266943292, + "grad_norm": 2.628417730331421, + "learning_rate": 9.605040725372676e-07, + "log_odds_chosen": 9.22122573852539, + "log_odds_ratio": -0.00034783576847985387, + "logits/chosen": -0.1604822874069214, + "logits/rejected": -0.29971182346343994, + "logps/chosen": -0.000420909549575299, + "logps/rejected": -1.3741449117660522, + "loss": 0.259, + "nll_loss": 0.06471599638462067, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.209095277474262e-05, + "rewards/margins": 0.13737240433692932, + "rewards/rejected": -0.13741448521614075, + "step": 14210 + }, + { + "epoch": 9.82780082987552, + "grad_norm": 3.577942371368408, + "learning_rate": 9.566620562471186e-07, + "log_odds_chosen": 10.938081741333008, + "log_odds_ratio": -8.411614544456825e-05, + "logits/chosen": 0.007461972534656525, + "logits/rejected": -0.1085021048784256, + "logps/chosen": -0.0009497880819253623, + "logps/rejected": -2.315056562423706, + "loss": 0.4293, + "nll_loss": 0.10732435435056686, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.497880819253623e-05, + "rewards/margins": 0.2314106822013855, + "rewards/rejected": -0.2315056473016739, + "step": 14211 + }, + { + "epoch": 9.828492392807746, + "grad_norm": 3.1475579738616943, + "learning_rate": 9.528200399569694e-07, + "log_odds_chosen": 11.785881042480469, + "log_odds_ratio": -0.00021281295630615205, + "logits/chosen": -0.15384456515312195, + "logits/rejected": -0.16469359397888184, + "logps/chosen": -9.269881411455572e-05, + "logps/rejected": -2.4875898361206055, + "loss": 0.327, + "nll_loss": 0.08173118531703949, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.269881047657691e-06, + "rewards/margins": 0.24874970316886902, + "rewards/rejected": -0.24875898659229279, + "step": 14212 + }, + { + "epoch": 9.829183955739973, + "grad_norm": 2.7176268100738525, + "learning_rate": 9.489780236668204e-07, + "log_odds_chosen": 12.190725326538086, + "log_odds_ratio": -2.826091986207757e-05, + "logits/chosen": -0.07545314729213715, + "logits/rejected": -0.1277007907629013, + "logps/chosen": -0.0003076986758969724, + "logps/rejected": -3.1703543663024902, + "loss": 0.416, + "nll_loss": 0.10398771613836288, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0769868317293e-05, + "rewards/margins": 0.3170046806335449, + "rewards/rejected": -0.3170354664325714, + "step": 14213 + }, + { + "epoch": 9.8298755186722, + "grad_norm": 3.433530330657959, + "learning_rate": 9.451360073766712e-07, + "log_odds_chosen": 10.720812797546387, + "log_odds_ratio": -8.576504478696734e-05, + "logits/chosen": -0.3028118908405304, + "logits/rejected": -0.3917955160140991, + "logps/chosen": -0.0011436090571805835, + "logps/rejected": -2.5318808555603027, + "loss": 0.2916, + "nll_loss": 0.07288795709609985, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011436091153882444, + "rewards/margins": 0.25307372212409973, + "rewards/rejected": -0.2531880736351013, + "step": 14214 + }, + { + "epoch": 9.830567081604427, + "grad_norm": 3.285998821258545, + "learning_rate": 9.412939910865223e-07, + "log_odds_chosen": 10.454912185668945, + "log_odds_ratio": -0.00021035922691226006, + "logits/chosen": -0.2515740990638733, + "logits/rejected": -0.34814438223838806, + "logps/chosen": -0.00033893357613123953, + "logps/rejected": -1.6399266719818115, + "loss": 0.4488, + "nll_loss": 0.11218146979808807, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.389335688552819e-05, + "rewards/margins": 0.16395878791809082, + "rewards/rejected": -0.16399268805980682, + "step": 14215 + }, + { + "epoch": 9.831258644536653, + "grad_norm": 3.471129894256592, + "learning_rate": 9.374519747963733e-07, + "log_odds_chosen": 11.554468154907227, + "log_odds_ratio": -0.00011103285942226648, + "logits/chosen": -0.4782823324203491, + "logits/rejected": -0.5865257382392883, + "logps/chosen": -0.0003853056114166975, + "logps/rejected": -3.031780242919922, + "loss": 0.3246, + "nll_loss": 0.08112723380327225, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.853056114166975e-05, + "rewards/margins": 0.30313950777053833, + "rewards/rejected": -0.30317801237106323, + "step": 14216 + }, + { + "epoch": 9.83195020746888, + "grad_norm": 2.7864818572998047, + "learning_rate": 9.336099585062241e-07, + "log_odds_chosen": 10.391328811645508, + "log_odds_ratio": -0.00019343834719620645, + "logits/chosen": -0.4431155025959015, + "logits/rejected": -0.5533031821250916, + "logps/chosen": -0.00038062711246311665, + "logps/rejected": -1.70412278175354, + "loss": 0.2584, + "nll_loss": 0.06458286941051483, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8062717067077756e-05, + "rewards/margins": 0.17037422955036163, + "rewards/rejected": -0.17041230201721191, + "step": 14217 + }, + { + "epoch": 9.832641770401107, + "grad_norm": 3.4246859550476074, + "learning_rate": 9.297679422160751e-07, + "log_odds_chosen": 11.085762023925781, + "log_odds_ratio": -3.4930937545141205e-05, + "logits/chosen": -0.5329622030258179, + "logits/rejected": -0.5232717394828796, + "logps/chosen": -0.00013040719204582274, + "logps/rejected": -2.0230469703674316, + "loss": 0.3623, + "nll_loss": 0.09056548774242401, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3040720659773797e-05, + "rewards/margins": 0.20229166746139526, + "rewards/rejected": -0.2023046910762787, + "step": 14218 + }, + { + "epoch": 9.833333333333334, + "grad_norm": 4.81691837310791, + "learning_rate": 9.259259259259259e-07, + "log_odds_chosen": 11.22233772277832, + "log_odds_ratio": -0.00020713395497296005, + "logits/chosen": 0.19287648797035217, + "logits/rejected": -0.06851596385240555, + "logps/chosen": -0.00026251928647980094, + "logps/rejected": -2.5511860847473145, + "loss": 0.322, + "nll_loss": 0.08047536760568619, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6251927920384333e-05, + "rewards/margins": 0.2550923526287079, + "rewards/rejected": -0.25511860847473145, + "step": 14219 + }, + { + "epoch": 9.83402489626556, + "grad_norm": 2.9679412841796875, + "learning_rate": 9.220839096357769e-07, + "log_odds_chosen": 10.381444931030273, + "log_odds_ratio": -0.00014931659097783267, + "logits/chosen": -0.21991831064224243, + "logits/rejected": -0.24379611015319824, + "logps/chosen": -0.0001824432547437027, + "logps/rejected": -1.773210883140564, + "loss": 0.327, + "nll_loss": 0.08174421638250351, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.824432547437027e-05, + "rewards/margins": 0.17730283737182617, + "rewards/rejected": -0.17732109129428864, + "step": 14220 + }, + { + "epoch": 9.834716459197788, + "grad_norm": 2.674473762512207, + "learning_rate": 9.182418933456278e-07, + "log_odds_chosen": 10.52938461303711, + "log_odds_ratio": -0.0003815424279309809, + "logits/chosen": -0.16022010147571564, + "logits/rejected": -0.19140568375587463, + "logps/chosen": -0.0003706804709509015, + "logps/rejected": -2.0917491912841797, + "loss": 0.2643, + "nll_loss": 0.06602489948272705, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.706804636749439e-05, + "rewards/margins": 0.20913787186145782, + "rewards/rejected": -0.20917494595050812, + "step": 14221 + }, + { + "epoch": 9.835408022130014, + "grad_norm": 5.377455711364746, + "learning_rate": 9.143998770554787e-07, + "log_odds_chosen": 12.041780471801758, + "log_odds_ratio": -2.566107468737755e-05, + "logits/chosen": -0.07172747701406479, + "logits/rejected": -0.10550010949373245, + "logps/chosen": -0.00014010498125571758, + "logps/rejected": -3.049236297607422, + "loss": 0.5647, + "nll_loss": 0.14116749167442322, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4010498489369638e-05, + "rewards/margins": 0.3049096465110779, + "rewards/rejected": -0.3049236536026001, + "step": 14222 + }, + { + "epoch": 9.836099585062241, + "grad_norm": 3.151463270187378, + "learning_rate": 9.105578607653298e-07, + "log_odds_chosen": 12.666389465332031, + "log_odds_ratio": -1.4467575965682045e-05, + "logits/chosen": -0.38619983196258545, + "logits/rejected": -0.42439472675323486, + "logps/chosen": -0.0003368236939422786, + "logps/rejected": -3.4165196418762207, + "loss": 0.3543, + "nll_loss": 0.08858034014701843, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.36823686666321e-05, + "rewards/margins": 0.34161829948425293, + "rewards/rejected": -0.341651976108551, + "step": 14223 + }, + { + "epoch": 9.836791147994468, + "grad_norm": 3.383626699447632, + "learning_rate": 9.067158444751805e-07, + "log_odds_chosen": 11.358930587768555, + "log_odds_ratio": -8.952614007284865e-05, + "logits/chosen": -0.5634051561355591, + "logits/rejected": -0.5803734064102173, + "logps/chosen": -0.0001293225068366155, + "logps/rejected": -1.9455944299697876, + "loss": 0.3954, + "nll_loss": 0.09883418679237366, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2932249774166849e-05, + "rewards/margins": 0.19454652070999146, + "rewards/rejected": -0.19455945491790771, + "step": 14224 + }, + { + "epoch": 9.837482710926695, + "grad_norm": 3.4224886894226074, + "learning_rate": 9.028738281850316e-07, + "log_odds_chosen": 11.326183319091797, + "log_odds_ratio": -2.355646574869752e-05, + "logits/chosen": -0.05391174927353859, + "logits/rejected": -0.19636398553848267, + "logps/chosen": -0.00014157703844830394, + "logps/rejected": -2.18880295753479, + "loss": 0.2927, + "nll_loss": 0.07316230237483978, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4157703844830394e-05, + "rewards/margins": 0.21886613965034485, + "rewards/rejected": -0.218880295753479, + "step": 14225 + }, + { + "epoch": 9.838174273858922, + "grad_norm": 3.9258923530578613, + "learning_rate": 8.990318118948824e-07, + "log_odds_chosen": 12.06605339050293, + "log_odds_ratio": -1.743772554618772e-05, + "logits/chosen": -0.12857088446617126, + "logits/rejected": -0.2564074397087097, + "logps/chosen": -9.755059727467597e-05, + "logps/rejected": -2.5448834896087646, + "loss": 0.3866, + "nll_loss": 0.09664377570152283, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.755060091265477e-06, + "rewards/margins": 0.2544785737991333, + "rewards/rejected": -0.25448834896087646, + "step": 14226 + }, + { + "epoch": 9.838865836791149, + "grad_norm": 3.148038864135742, + "learning_rate": 8.951897956047334e-07, + "log_odds_chosen": 10.782970428466797, + "log_odds_ratio": -9.98452742351219e-05, + "logits/chosen": -0.19792483747005463, + "logits/rejected": -0.1644812673330307, + "logps/chosen": -0.00033488357439637184, + "logps/rejected": -2.11006236076355, + "loss": 0.2978, + "nll_loss": 0.07445169985294342, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.348835889482871e-05, + "rewards/margins": 0.21097274124622345, + "rewards/rejected": -0.21100623905658722, + "step": 14227 + }, + { + "epoch": 9.839557399723375, + "grad_norm": 2.8470401763916016, + "learning_rate": 8.913477793145844e-07, + "log_odds_chosen": 11.369443893432617, + "log_odds_ratio": -4.5292501454241574e-05, + "logits/chosen": -0.07345202565193176, + "logits/rejected": -0.16434796154499054, + "logps/chosen": -0.0002378990175202489, + "logps/rejected": -2.261381149291992, + "loss": 0.4839, + "nll_loss": 0.12097690999507904, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3789905753801577e-05, + "rewards/margins": 0.22611434757709503, + "rewards/rejected": -0.2261381298303604, + "step": 14228 + }, + { + "epoch": 9.840248962655602, + "grad_norm": 2.4687888622283936, + "learning_rate": 8.875057630244352e-07, + "log_odds_chosen": 11.013246536254883, + "log_odds_ratio": -0.00010485532402526587, + "logits/chosen": -0.04445496201515198, + "logits/rejected": 0.013508342206478119, + "logps/chosen": -0.00022190046729519963, + "logps/rejected": -2.511152505874634, + "loss": 0.2488, + "nll_loss": 0.062182098627090454, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2190048184711486e-05, + "rewards/margins": 0.2510930597782135, + "rewards/rejected": -0.2511152923107147, + "step": 14229 + }, + { + "epoch": 9.840940525587829, + "grad_norm": 2.7241017818450928, + "learning_rate": 8.836637467342862e-07, + "log_odds_chosen": 10.521742820739746, + "log_odds_ratio": -5.390686419559643e-05, + "logits/chosen": -0.00449778139591217, + "logits/rejected": -0.0959646999835968, + "logps/chosen": -0.00020149025658611208, + "logps/rejected": -1.9966009855270386, + "loss": 0.3227, + "nll_loss": 0.08067253232002258, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0149025658611208e-05, + "rewards/margins": 0.1996399611234665, + "rewards/rejected": -0.19966010749340057, + "step": 14230 + }, + { + "epoch": 9.841632088520056, + "grad_norm": 4.300022125244141, + "learning_rate": 8.79821730444137e-07, + "log_odds_chosen": 10.618481636047363, + "log_odds_ratio": -0.00010173715418204665, + "logits/chosen": -0.24378979206085205, + "logits/rejected": -0.3016761839389801, + "logps/chosen": -0.00034635435440577567, + "logps/rejected": -1.9258100986480713, + "loss": 0.4667, + "nll_loss": 0.11666956543922424, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.463543544057757e-05, + "rewards/margins": 0.19254638254642487, + "rewards/rejected": -0.19258102774620056, + "step": 14231 + }, + { + "epoch": 9.842323651452283, + "grad_norm": 4.205569267272949, + "learning_rate": 8.759797141539881e-07, + "log_odds_chosen": 11.185318946838379, + "log_odds_ratio": -2.8937647584825754e-05, + "logits/chosen": -0.1755310446023941, + "logits/rejected": -0.21790622174739838, + "logps/chosen": -0.0003062895266339183, + "logps/rejected": -2.576779365539551, + "loss": 0.6641, + "nll_loss": 0.1660199612379074, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.062895484617911e-05, + "rewards/margins": 0.257647305727005, + "rewards/rejected": -0.25767794251441956, + "step": 14232 + }, + { + "epoch": 9.84301521438451, + "grad_norm": 3.5787975788116455, + "learning_rate": 8.721376978638391e-07, + "log_odds_chosen": 11.742012023925781, + "log_odds_ratio": -0.00013934174785390496, + "logits/chosen": -0.05586977303028107, + "logits/rejected": -0.16371804475784302, + "logps/chosen": -0.00023717660224065185, + "logps/rejected": -2.860356092453003, + "loss": 0.3523, + "nll_loss": 0.08806522935628891, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3717660951660946e-05, + "rewards/margins": 0.28601187467575073, + "rewards/rejected": -0.28603559732437134, + "step": 14233 + }, + { + "epoch": 9.843706777316736, + "grad_norm": 3.3110666275024414, + "learning_rate": 8.682956815736899e-07, + "log_odds_chosen": 10.989479064941406, + "log_odds_ratio": -8.622042514616624e-05, + "logits/chosen": -0.6725688576698303, + "logits/rejected": -0.6157367825508118, + "logps/chosen": -0.00035073357867076993, + "logps/rejected": -2.6909520626068115, + "loss": 0.382, + "nll_loss": 0.09548350423574448, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.507336077746004e-05, + "rewards/margins": 0.2690601348876953, + "rewards/rejected": -0.26909518241882324, + "step": 14234 + }, + { + "epoch": 9.844398340248963, + "grad_norm": 3.361088991165161, + "learning_rate": 8.644536652835409e-07, + "log_odds_chosen": 11.052905082702637, + "log_odds_ratio": -0.0004103026003576815, + "logits/chosen": -0.20317518711090088, + "logits/rejected": -0.28955215215682983, + "logps/chosen": -0.0008011145400814712, + "logps/rejected": -2.3988914489746094, + "loss": 0.4576, + "nll_loss": 0.1143680214881897, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.011145837372169e-05, + "rewards/margins": 0.2398090362548828, + "rewards/rejected": -0.23988914489746094, + "step": 14235 + }, + { + "epoch": 9.84508990318119, + "grad_norm": 2.968040704727173, + "learning_rate": 8.606116489933917e-07, + "log_odds_chosen": 10.519956588745117, + "log_odds_ratio": -6.622553337365389e-05, + "logits/chosen": -0.32500743865966797, + "logits/rejected": -0.26284950971603394, + "logps/chosen": -0.00019432193948887289, + "logps/rejected": -1.7805821895599365, + "loss": 0.3513, + "nll_loss": 0.08781195431947708, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9432191038504243e-05, + "rewards/margins": 0.17803877592086792, + "rewards/rejected": -0.1780582070350647, + "step": 14236 + }, + { + "epoch": 9.845781466113417, + "grad_norm": 3.02480149269104, + "learning_rate": 8.567696327032427e-07, + "log_odds_chosen": 11.432319641113281, + "log_odds_ratio": -6.157150346552953e-05, + "logits/chosen": -0.03514774143695831, + "logits/rejected": -0.14978571236133575, + "logps/chosen": -0.00016551982844248414, + "logps/rejected": -2.245384693145752, + "loss": 0.3142, + "nll_loss": 0.07853604853153229, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6551983208046295e-05, + "rewards/margins": 0.2245219349861145, + "rewards/rejected": -0.22453849017620087, + "step": 14237 + }, + { + "epoch": 9.846473029045644, + "grad_norm": 4.3203253746032715, + "learning_rate": 8.529276164130936e-07, + "log_odds_chosen": 11.395495414733887, + "log_odds_ratio": -1.157735641754698e-05, + "logits/chosen": -0.4227936565876007, + "logits/rejected": -0.3641412854194641, + "logps/chosen": -0.00022150250151753426, + "logps/rejected": -2.7379443645477295, + "loss": 0.4385, + "nll_loss": 0.10962338745594025, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.215025233454071e-05, + "rewards/margins": 0.27377229928970337, + "rewards/rejected": -0.2737944424152374, + "step": 14238 + }, + { + "epoch": 9.84716459197787, + "grad_norm": 3.5373475551605225, + "learning_rate": 8.490856001229445e-07, + "log_odds_chosen": 11.616133689880371, + "log_odds_ratio": -3.7899713788647205e-05, + "logits/chosen": 0.08863887190818787, + "logits/rejected": 0.13405123353004456, + "logps/chosen": -0.0002468510647304356, + "logps/rejected": -2.6139259338378906, + "loss": 0.5372, + "nll_loss": 0.1342896819114685, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4685105017852038e-05, + "rewards/margins": 0.26136791706085205, + "rewards/rejected": -0.26139259338378906, + "step": 14239 + }, + { + "epoch": 9.847856154910097, + "grad_norm": 3.6691622734069824, + "learning_rate": 8.452435838327956e-07, + "log_odds_chosen": 10.332157135009766, + "log_odds_ratio": -8.147610060404986e-05, + "logits/chosen": -0.35312798619270325, + "logits/rejected": -0.38212132453918457, + "logps/chosen": -0.0007140946108847857, + "logps/rejected": -2.3038883209228516, + "loss": 0.4133, + "nll_loss": 0.10331538319587708, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.140946399886161e-05, + "rewards/margins": 0.23031742870807648, + "rewards/rejected": -0.2303888499736786, + "step": 14240 + }, + { + "epoch": 9.848547717842324, + "grad_norm": 2.72764253616333, + "learning_rate": 8.414015675426463e-07, + "log_odds_chosen": 9.962644577026367, + "log_odds_ratio": -6.466775084845722e-05, + "logits/chosen": -0.2865466773509979, + "logits/rejected": -0.30098193883895874, + "logps/chosen": -0.0002605429326649755, + "logps/rejected": -1.6753520965576172, + "loss": 0.3607, + "nll_loss": 0.09016528725624084, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6054294721689075e-05, + "rewards/margins": 0.16750916838645935, + "rewards/rejected": -0.1675352305173874, + "step": 14241 + }, + { + "epoch": 9.849239280774551, + "grad_norm": 4.229187488555908, + "learning_rate": 8.375595512524974e-07, + "log_odds_chosen": 11.752243041992188, + "log_odds_ratio": -2.328364280401729e-05, + "logits/chosen": -0.46780213713645935, + "logits/rejected": -0.4287949204444885, + "logps/chosen": -0.0002484232245478779, + "logps/rejected": -3.091888904571533, + "loss": 0.4013, + "nll_loss": 0.10031718015670776, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.484232209098991e-05, + "rewards/margins": 0.3091640770435333, + "rewards/rejected": -0.3091889023780823, + "step": 14242 + }, + { + "epoch": 9.849930843706778, + "grad_norm": 3.541060447692871, + "learning_rate": 8.337175349623483e-07, + "log_odds_chosen": 11.745586395263672, + "log_odds_ratio": -1.5079102013260126e-05, + "logits/chosen": -0.46697476506233215, + "logits/rejected": -0.46221745014190674, + "logps/chosen": -0.0005979898851364851, + "logps/rejected": -2.6756162643432617, + "loss": 0.4069, + "nll_loss": 0.10171540081501007, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.979898924124427e-05, + "rewards/margins": 0.2675018310546875, + "rewards/rejected": -0.2675616145133972, + "step": 14243 + }, + { + "epoch": 9.850622406639005, + "grad_norm": 3.7117698192596436, + "learning_rate": 8.298755186721992e-07, + "log_odds_chosen": 10.491707801818848, + "log_odds_ratio": -0.0003969599201809615, + "logits/chosen": -0.2933369576931, + "logits/rejected": -0.44635993242263794, + "logps/chosen": -0.00018704970716498792, + "logps/rejected": -1.8389921188354492, + "loss": 0.4396, + "nll_loss": 0.10986747592687607, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.870496998890303e-05, + "rewards/margins": 0.1838805079460144, + "rewards/rejected": -0.18389922380447388, + "step": 14244 + }, + { + "epoch": 9.851313969571232, + "grad_norm": 4.036135196685791, + "learning_rate": 8.260335023820502e-07, + "log_odds_chosen": 11.864362716674805, + "log_odds_ratio": -9.471379598835483e-06, + "logits/chosen": -0.03377307951450348, + "logits/rejected": -0.16422481834888458, + "logps/chosen": -7.505207759095356e-05, + "logps/rejected": -2.2845170497894287, + "loss": 0.6574, + "nll_loss": 0.16434094309806824, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.505208486691117e-06, + "rewards/margins": 0.2284441888332367, + "rewards/rejected": -0.2284516990184784, + "step": 14245 + }, + { + "epoch": 9.852005532503458, + "grad_norm": 2.320402145385742, + "learning_rate": 8.22191486091901e-07, + "log_odds_chosen": 11.187734603881836, + "log_odds_ratio": -2.727216633502394e-05, + "logits/chosen": -0.40191370248794556, + "logits/rejected": -0.43743768334388733, + "logps/chosen": -0.0002643515763338655, + "logps/rejected": -2.5183866024017334, + "loss": 0.256, + "nll_loss": 0.06400003284215927, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6435158360982314e-05, + "rewards/margins": 0.251812219619751, + "rewards/rejected": -0.25183865427970886, + "step": 14246 + }, + { + "epoch": 9.852697095435685, + "grad_norm": 2.3957173824310303, + "learning_rate": 8.18349469801752e-07, + "log_odds_chosen": 10.161579132080078, + "log_odds_ratio": -0.0006505093188025057, + "logits/chosen": -0.6251837015151978, + "logits/rejected": -0.6660484075546265, + "logps/chosen": -0.00022856263967696577, + "logps/rejected": -1.7181360721588135, + "loss": 0.2409, + "nll_loss": 0.060165874660015106, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2856263967696577e-05, + "rewards/margins": 0.17179076373577118, + "rewards/rejected": -0.17181360721588135, + "step": 14247 + }, + { + "epoch": 9.853388658367912, + "grad_norm": 2.6903257369995117, + "learning_rate": 8.145074535116028e-07, + "log_odds_chosen": 11.195083618164062, + "log_odds_ratio": -8.327865361934528e-05, + "logits/chosen": -0.16145284473896027, + "logits/rejected": -0.25972306728363037, + "logps/chosen": -0.00040092156268656254, + "logps/rejected": -3.030973434448242, + "loss": 0.2734, + "nll_loss": 0.06833411753177643, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0092156268656254e-05, + "rewards/margins": 0.3030572533607483, + "rewards/rejected": -0.30309736728668213, + "step": 14248 + }, + { + "epoch": 9.854080221300139, + "grad_norm": 5.38814640045166, + "learning_rate": 8.106654372214539e-07, + "log_odds_chosen": 11.149696350097656, + "log_odds_ratio": -2.1174480934860185e-05, + "logits/chosen": 0.13153231143951416, + "logits/rejected": -0.09815803915262222, + "logps/chosen": -0.00029714504489675164, + "logps/rejected": -2.774975538253784, + "loss": 0.5003, + "nll_loss": 0.12506981194019318, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9714505217270926e-05, + "rewards/margins": 0.27746787667274475, + "rewards/rejected": -0.2774975597858429, + "step": 14249 + }, + { + "epoch": 9.854771784232366, + "grad_norm": 3.2264857292175293, + "learning_rate": 8.068234209313049e-07, + "log_odds_chosen": 11.523049354553223, + "log_odds_ratio": -6.451080844271928e-05, + "logits/chosen": -0.28169524669647217, + "logits/rejected": -0.28970491886138916, + "logps/chosen": -0.0004202952259220183, + "logps/rejected": -2.4245822429656982, + "loss": 0.4129, + "nll_loss": 0.10322895646095276, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.202951822662726e-05, + "rewards/margins": 0.24241620302200317, + "rewards/rejected": -0.2424582540988922, + "step": 14250 + }, + { + "epoch": 9.855463347164592, + "grad_norm": 3.9890167713165283, + "learning_rate": 8.029814046411557e-07, + "log_odds_chosen": 10.884989738464355, + "log_odds_ratio": -4.29791361966636e-05, + "logits/chosen": -0.07643628865480423, + "logits/rejected": -0.24363578855991364, + "logps/chosen": -0.00019140413496643305, + "logps/rejected": -2.163062334060669, + "loss": 0.5284, + "nll_loss": 0.13209296762943268, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9140414224239066e-05, + "rewards/margins": 0.21628707647323608, + "rewards/rejected": -0.21630622446537018, + "step": 14251 + }, + { + "epoch": 9.85615491009682, + "grad_norm": 3.9284536838531494, + "learning_rate": 7.991393883510067e-07, + "log_odds_chosen": 11.530202865600586, + "log_odds_ratio": -3.462812674115412e-05, + "logits/chosen": -0.32096803188323975, + "logits/rejected": -0.4255814552307129, + "logps/chosen": -0.00027573731495067477, + "logps/rejected": -2.7849910259246826, + "loss": 0.3758, + "nll_loss": 0.09394600987434387, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7573729312280193e-05, + "rewards/margins": 0.27847155928611755, + "rewards/rejected": -0.27849912643432617, + "step": 14252 + }, + { + "epoch": 9.856846473029046, + "grad_norm": 3.034029483795166, + "learning_rate": 7.952973720608575e-07, + "log_odds_chosen": 11.338737487792969, + "log_odds_ratio": -3.79706580133643e-05, + "logits/chosen": -0.006503105163574219, + "logits/rejected": -0.08917421102523804, + "logps/chosen": -0.0002537562686484307, + "logps/rejected": -2.6884939670562744, + "loss": 0.3293, + "nll_loss": 0.08232768625020981, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5375627956236713e-05, + "rewards/margins": 0.26882404088974, + "rewards/rejected": -0.2688494026660919, + "step": 14253 + }, + { + "epoch": 9.857538035961273, + "grad_norm": 4.269557476043701, + "learning_rate": 7.914553557707085e-07, + "log_odds_chosen": 10.948267936706543, + "log_odds_ratio": -8.185338811017573e-05, + "logits/chosen": -0.30610525608062744, + "logits/rejected": -0.3892078399658203, + "logps/chosen": -0.0004585545975714922, + "logps/rejected": -2.365933418273926, + "loss": 0.335, + "nll_loss": 0.08375367522239685, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.585546048474498e-05, + "rewards/margins": 0.23654749989509583, + "rewards/rejected": -0.2365933507680893, + "step": 14254 + }, + { + "epoch": 9.8582295988935, + "grad_norm": 3.503242015838623, + "learning_rate": 7.876133394805594e-07, + "log_odds_chosen": 11.935919761657715, + "log_odds_ratio": -1.5749861631775275e-05, + "logits/chosen": -0.49925288558006287, + "logits/rejected": -0.5630971789360046, + "logps/chosen": -7.692461076658219e-05, + "logps/rejected": -2.2525434494018555, + "loss": 0.412, + "nll_loss": 0.1030036062002182, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.692462531849742e-06, + "rewards/margins": 0.22524665296077728, + "rewards/rejected": -0.2252543419599533, + "step": 14255 + }, + { + "epoch": 9.858921161825727, + "grad_norm": 3.271296501159668, + "learning_rate": 7.837713231904103e-07, + "log_odds_chosen": 10.795654296875, + "log_odds_ratio": -0.0001503465318819508, + "logits/chosen": 0.001009523868560791, + "logits/rejected": -0.06132131814956665, + "logps/chosen": -0.0003437385312281549, + "logps/rejected": -2.3663485050201416, + "loss": 0.3405, + "nll_loss": 0.085112065076828, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.437385385041125e-05, + "rewards/margins": 0.23660047352313995, + "rewards/rejected": -0.23663485050201416, + "step": 14256 + }, + { + "epoch": 9.859612724757953, + "grad_norm": 2.5387489795684814, + "learning_rate": 7.799293069002613e-07, + "log_odds_chosen": 10.57435131072998, + "log_odds_ratio": -6.567249511135742e-05, + "logits/chosen": -0.35600507259368896, + "logits/rejected": -0.3159923851490021, + "logps/chosen": -0.00017069655586965382, + "logps/rejected": -1.6613383293151855, + "loss": 0.2971, + "nll_loss": 0.07425770163536072, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.706965485936962e-05, + "rewards/margins": 0.16611677408218384, + "rewards/rejected": -0.1661338359117508, + "step": 14257 + }, + { + "epoch": 9.86030428769018, + "grad_norm": 2.987234354019165, + "learning_rate": 7.760872906101121e-07, + "log_odds_chosen": 10.598260879516602, + "log_odds_ratio": -9.142129420069978e-05, + "logits/chosen": -0.21032246947288513, + "logits/rejected": -0.27841508388519287, + "logps/chosen": -0.00016935166786424816, + "logps/rejected": -1.6317389011383057, + "loss": 0.2541, + "nll_loss": 0.06352090835571289, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6935167877818458e-05, + "rewards/margins": 0.16315695643424988, + "rewards/rejected": -0.1631738841533661, + "step": 14258 + }, + { + "epoch": 9.860995850622407, + "grad_norm": 3.952584743499756, + "learning_rate": 7.722452743199632e-07, + "log_odds_chosen": 10.416267395019531, + "log_odds_ratio": -5.154755126568489e-05, + "logits/chosen": -0.133428692817688, + "logits/rejected": -0.1537066251039505, + "logps/chosen": -0.0003270559827797115, + "logps/rejected": -2.1095032691955566, + "loss": 0.4201, + "nll_loss": 0.10501537472009659, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.270560046075843e-05, + "rewards/margins": 0.2109176069498062, + "rewards/rejected": -0.2109503298997879, + "step": 14259 + }, + { + "epoch": 9.861687413554634, + "grad_norm": 3.255803346633911, + "learning_rate": 7.684032580298141e-07, + "log_odds_chosen": 11.39590835571289, + "log_odds_ratio": -7.392850966425613e-05, + "logits/chosen": -0.34807223081588745, + "logits/rejected": -0.4470960199832916, + "logps/chosen": -0.00012161783524788916, + "logps/rejected": -2.456490993499756, + "loss": 0.4438, + "nll_loss": 0.11094728112220764, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2161783160991035e-05, + "rewards/margins": 0.2456369251012802, + "rewards/rejected": -0.24564911425113678, + "step": 14260 + }, + { + "epoch": 9.86237897648686, + "grad_norm": 2.4818081855773926, + "learning_rate": 7.64561241739665e-07, + "log_odds_chosen": 11.260456085205078, + "log_odds_ratio": -2.046274494205136e-05, + "logits/chosen": -0.101976178586483, + "logits/rejected": -0.2396450936794281, + "logps/chosen": -6.300826498772949e-05, + "logps/rejected": -1.7347753047943115, + "loss": 0.248, + "nll_loss": 0.061990223824977875, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.300826498772949e-06, + "rewards/margins": 0.17347121238708496, + "rewards/rejected": -0.17347753047943115, + "step": 14261 + }, + { + "epoch": 9.863070539419088, + "grad_norm": 2.9214489459991455, + "learning_rate": 7.607192254495159e-07, + "log_odds_chosen": 11.829577445983887, + "log_odds_ratio": -2.8550321076181717e-05, + "logits/chosen": -0.16604715585708618, + "logits/rejected": -0.19383998215198517, + "logps/chosen": -0.0003231268492527306, + "logps/rejected": -2.782902717590332, + "loss": 0.3157, + "nll_loss": 0.07892406731843948, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2312687835656106e-05, + "rewards/margins": 0.27825796604156494, + "rewards/rejected": -0.2782903015613556, + "step": 14262 + }, + { + "epoch": 9.863762102351314, + "grad_norm": 3.5143449306488037, + "learning_rate": 7.568772091593668e-07, + "log_odds_chosen": 11.174112319946289, + "log_odds_ratio": -6.027439667377621e-05, + "logits/chosen": -0.12853315472602844, + "logits/rejected": -0.09809570014476776, + "logps/chosen": -0.00025325047317892313, + "logps/rejected": -2.588475227355957, + "loss": 0.2653, + "nll_loss": 0.06632782518863678, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5325047317892313e-05, + "rewards/margins": 0.2588222026824951, + "rewards/rejected": -0.25884753465652466, + "step": 14263 + }, + { + "epoch": 9.864453665283541, + "grad_norm": 2.2648425102233887, + "learning_rate": 7.530351928692178e-07, + "log_odds_chosen": 10.080967903137207, + "log_odds_ratio": -0.00010812430264195427, + "logits/chosen": -0.5540417432785034, + "logits/rejected": -0.5870547294616699, + "logps/chosen": -0.0004334770201239735, + "logps/rejected": -1.931509017944336, + "loss": 0.2214, + "nll_loss": 0.05533323436975479, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.334770346758887e-05, + "rewards/margins": 0.19310756027698517, + "rewards/rejected": -0.19315090775489807, + "step": 14264 + }, + { + "epoch": 9.865145228215768, + "grad_norm": 3.299065351486206, + "learning_rate": 7.491931765790688e-07, + "log_odds_chosen": 10.757427215576172, + "log_odds_ratio": -0.00043512610136531293, + "logits/chosen": -0.007109713740646839, + "logits/rejected": -0.14529041945934296, + "logps/chosen": -0.0008351364522241056, + "logps/rejected": -2.858060359954834, + "loss": 0.3195, + "nll_loss": 0.07981950789690018, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.351364522241056e-05, + "rewards/margins": 0.285722553730011, + "rewards/rejected": -0.2858060598373413, + "step": 14265 + }, + { + "epoch": 9.865836791147995, + "grad_norm": 3.2676212787628174, + "learning_rate": 7.453511602889197e-07, + "log_odds_chosen": 11.02478313446045, + "log_odds_ratio": -6.122445483924821e-05, + "logits/chosen": -0.12963497638702393, + "logits/rejected": -0.22275590896606445, + "logps/chosen": -0.00026319074095226824, + "logps/rejected": -2.292569637298584, + "loss": 0.4047, + "nll_loss": 0.10117470473051071, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6319075914216228e-05, + "rewards/margins": 0.22923067212104797, + "rewards/rejected": -0.22925697267055511, + "step": 14266 + }, + { + "epoch": 9.866528354080222, + "grad_norm": 3.312446355819702, + "learning_rate": 7.415091439987706e-07, + "log_odds_chosen": 11.558563232421875, + "log_odds_ratio": -3.94942908314988e-05, + "logits/chosen": -0.3568577170372009, + "logits/rejected": -0.5602548122406006, + "logps/chosen": -8.418185461778194e-05, + "logps/rejected": -2.275566577911377, + "loss": 0.3074, + "nll_loss": 0.07684757560491562, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.418184734182432e-06, + "rewards/margins": 0.22754821181297302, + "rewards/rejected": -0.22755663096904755, + "step": 14267 + }, + { + "epoch": 9.867219917012449, + "grad_norm": 3.094496011734009, + "learning_rate": 7.376671277086215e-07, + "log_odds_chosen": 11.407001495361328, + "log_odds_ratio": -0.00010264647426083684, + "logits/chosen": -0.46122875809669495, + "logits/rejected": -0.34138795733451843, + "logps/chosen": -0.0001607881422387436, + "logps/rejected": -2.7173984050750732, + "loss": 0.2876, + "nll_loss": 0.07190103828907013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6078816770459525e-05, + "rewards/margins": 0.27172374725341797, + "rewards/rejected": -0.2717398405075073, + "step": 14268 + }, + { + "epoch": 9.867911479944675, + "grad_norm": 4.279833793640137, + "learning_rate": 7.338251114184724e-07, + "log_odds_chosen": 12.136764526367188, + "log_odds_ratio": -2.284317451994866e-05, + "logits/chosen": -0.23695890605449677, + "logits/rejected": -0.2460167109966278, + "logps/chosen": -0.00017700533499009907, + "logps/rejected": -3.117013931274414, + "loss": 0.3232, + "nll_loss": 0.08079381287097931, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.770053495420143e-05, + "rewards/margins": 0.311683714389801, + "rewards/rejected": -0.3117014169692993, + "step": 14269 + }, + { + "epoch": 9.868603042876902, + "grad_norm": 4.246036052703857, + "learning_rate": 7.299830951283235e-07, + "log_odds_chosen": 11.855365753173828, + "log_odds_ratio": -4.492932930588722e-05, + "logits/chosen": -0.1479170173406601, + "logits/rejected": -0.1711617112159729, + "logps/chosen": -0.0001270500069949776, + "logps/rejected": -3.0261948108673096, + "loss": 0.4588, + "nll_loss": 0.11470459401607513, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2704999790003058e-05, + "rewards/margins": 0.3026067912578583, + "rewards/rejected": -0.30261948704719543, + "step": 14270 + }, + { + "epoch": 9.869294605809129, + "grad_norm": 4.595867156982422, + "learning_rate": 7.261410788381743e-07, + "log_odds_chosen": 12.388121604919434, + "log_odds_ratio": -9.911271263263188e-06, + "logits/chosen": -0.35695046186447144, + "logits/rejected": -0.4349959194660187, + "logps/chosen": -0.00011795929458457977, + "logps/rejected": -3.168016195297241, + "loss": 0.4223, + "nll_loss": 0.10557819902896881, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1795929822255857e-05, + "rewards/margins": 0.316789835691452, + "rewards/rejected": -0.31680163741111755, + "step": 14271 + }, + { + "epoch": 9.869986168741356, + "grad_norm": 3.5730643272399902, + "learning_rate": 7.222990625480253e-07, + "log_odds_chosen": 10.779461860656738, + "log_odds_ratio": -7.836138684069738e-05, + "logits/chosen": -0.30118629336357117, + "logits/rejected": -0.34218257665634155, + "logps/chosen": -0.0006731522735208273, + "logps/rejected": -2.5577590465545654, + "loss": 0.3795, + "nll_loss": 0.09486451745033264, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.731522444169968e-05, + "rewards/margins": 0.25570860505104065, + "rewards/rejected": -0.25577589869499207, + "step": 14272 + }, + { + "epoch": 9.870677731673583, + "grad_norm": 4.24649715423584, + "learning_rate": 7.184570462578761e-07, + "log_odds_chosen": 12.680334091186523, + "log_odds_ratio": -1.203614374389872e-05, + "logits/chosen": -0.012453213334083557, + "logits/rejected": -0.04921615123748779, + "logps/chosen": -7.618685049237683e-05, + "logps/rejected": -3.263230323791504, + "loss": 0.4144, + "nll_loss": 0.10360294580459595, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.618685231136624e-06, + "rewards/margins": 0.32631543278694153, + "rewards/rejected": -0.3263230621814728, + "step": 14273 + }, + { + "epoch": 9.87136929460581, + "grad_norm": 3.6585779190063477, + "learning_rate": 7.146150299677271e-07, + "log_odds_chosen": 10.362115859985352, + "log_odds_ratio": -4.786492354469374e-05, + "logits/chosen": 0.03239941596984863, + "logits/rejected": -0.016241123899817467, + "logps/chosen": -0.0001966757990885526, + "logps/rejected": -1.8835418224334717, + "loss": 0.4046, + "nll_loss": 0.10113903880119324, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.96675810002489e-05, + "rewards/margins": 0.18833452463150024, + "rewards/rejected": -0.18835417926311493, + "step": 14274 + }, + { + "epoch": 9.872060857538036, + "grad_norm": 3.5085196495056152, + "learning_rate": 7.107730136775779e-07, + "log_odds_chosen": 10.342247009277344, + "log_odds_ratio": -0.00015586627705488354, + "logits/chosen": -0.18690171837806702, + "logits/rejected": -0.18983352184295654, + "logps/chosen": -0.00023725997016299516, + "logps/rejected": -1.7420761585235596, + "loss": 0.3701, + "nll_loss": 0.09251439571380615, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3725997380097397e-05, + "rewards/margins": 0.1741839051246643, + "rewards/rejected": -0.1742076277732849, + "step": 14275 + }, + { + "epoch": 9.872752420470263, + "grad_norm": 2.949233055114746, + "learning_rate": 7.06930997387429e-07, + "log_odds_chosen": 11.369131088256836, + "log_odds_ratio": -3.7902838812442496e-05, + "logits/chosen": -0.4845868647098541, + "logits/rejected": -0.5072664022445679, + "logps/chosen": -0.00026072681066580117, + "logps/rejected": -2.3432700634002686, + "loss": 0.3909, + "nll_loss": 0.09773064404726028, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6072681066580117e-05, + "rewards/margins": 0.23430094122886658, + "rewards/rejected": -0.2343270182609558, + "step": 14276 + }, + { + "epoch": 9.87344398340249, + "grad_norm": 4.4616522789001465, + "learning_rate": 7.030889810972799e-07, + "log_odds_chosen": 11.328014373779297, + "log_odds_ratio": -3.821388236247003e-05, + "logits/chosen": -0.10368916392326355, + "logits/rejected": -0.19956597685813904, + "logps/chosen": -0.0010826927609741688, + "logps/rejected": -2.758289337158203, + "loss": 0.5929, + "nll_loss": 0.1482183188199997, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010826926882145926, + "rewards/margins": 0.2757207155227661, + "rewards/rejected": -0.2758289575576782, + "step": 14277 + }, + { + "epoch": 9.874135546334717, + "grad_norm": 2.6527774333953857, + "learning_rate": 6.992469648071308e-07, + "log_odds_chosen": 11.004871368408203, + "log_odds_ratio": -0.00011215964332222939, + "logits/chosen": -0.6274237036705017, + "logits/rejected": -0.6948424577713013, + "logps/chosen": -0.0003716836217790842, + "logps/rejected": -2.4248266220092773, + "loss": 0.2473, + "nll_loss": 0.061821553856134415, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.716836727107875e-05, + "rewards/margins": 0.24244548380374908, + "rewards/rejected": -0.24248266220092773, + "step": 14278 + }, + { + "epoch": 9.874827109266944, + "grad_norm": 2.395679473876953, + "learning_rate": 6.954049485169817e-07, + "log_odds_chosen": 12.594627380371094, + "log_odds_ratio": -1.518072713224683e-05, + "logits/chosen": -0.3285084366798401, + "logits/rejected": -0.3908994793891907, + "logps/chosen": -0.00010526237019803375, + "logps/rejected": -3.351914405822754, + "loss": 0.2661, + "nll_loss": 0.06652521342039108, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0526237019803375e-05, + "rewards/margins": 0.33518093824386597, + "rewards/rejected": -0.3351914584636688, + "step": 14279 + }, + { + "epoch": 9.87551867219917, + "grad_norm": 4.559573650360107, + "learning_rate": 6.915629322268326e-07, + "log_odds_chosen": 12.000591278076172, + "log_odds_ratio": -7.7763048466295e-05, + "logits/chosen": -0.10347125679254532, + "logits/rejected": -0.0704481452703476, + "logps/chosen": -0.00014326543896459043, + "logps/rejected": -2.82658314704895, + "loss": 0.4159, + "nll_loss": 0.1039554625749588, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4326544260256924e-05, + "rewards/margins": 0.28264403343200684, + "rewards/rejected": -0.28265830874443054, + "step": 14280 + }, + { + "epoch": 9.876210235131397, + "grad_norm": 3.346869707107544, + "learning_rate": 6.877209159366836e-07, + "log_odds_chosen": 11.646493911743164, + "log_odds_ratio": -3.573457070160657e-05, + "logits/chosen": -0.6695969700813293, + "logits/rejected": -0.6724919676780701, + "logps/chosen": -0.0005850328598171473, + "logps/rejected": -2.9864587783813477, + "loss": 0.3558, + "nll_loss": 0.08893509954214096, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.850328670931049e-05, + "rewards/margins": 0.2985873818397522, + "rewards/rejected": -0.29864588379859924, + "step": 14281 + }, + { + "epoch": 9.876901798063624, + "grad_norm": 4.491700172424316, + "learning_rate": 6.838788996465346e-07, + "log_odds_chosen": 11.605969429016113, + "log_odds_ratio": -3.0743733077542856e-05, + "logits/chosen": 0.2914671003818512, + "logits/rejected": 0.125943124294281, + "logps/chosen": -0.0001813894195947796, + "logps/rejected": -2.674973726272583, + "loss": 0.5663, + "nll_loss": 0.14156340062618256, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8138942323275842e-05, + "rewards/margins": 0.2674792408943176, + "rewards/rejected": -0.26749736070632935, + "step": 14282 + }, + { + "epoch": 9.877593360995851, + "grad_norm": 3.6197402477264404, + "learning_rate": 6.800368833563855e-07, + "log_odds_chosen": 11.6904296875, + "log_odds_ratio": -3.449645373621024e-05, + "logits/chosen": -0.25123652815818787, + "logits/rejected": -0.260647714138031, + "logps/chosen": -0.0001352946856059134, + "logps/rejected": -2.788188934326172, + "loss": 0.2937, + "nll_loss": 0.07341036945581436, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3529470379580744e-05, + "rewards/margins": 0.27880537509918213, + "rewards/rejected": -0.27881887555122375, + "step": 14283 + }, + { + "epoch": 9.878284923928078, + "grad_norm": 4.248296737670898, + "learning_rate": 6.761948670662364e-07, + "log_odds_chosen": 12.074800491333008, + "log_odds_ratio": -2.439486706862226e-05, + "logits/chosen": -0.20641836524009705, + "logits/rejected": -0.26033246517181396, + "logps/chosen": -0.0001606778532732278, + "logps/rejected": -2.9791653156280518, + "loss": 0.4337, + "nll_loss": 0.10842004418373108, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6067786418716423e-05, + "rewards/margins": 0.2979004681110382, + "rewards/rejected": -0.2979165315628052, + "step": 14284 + }, + { + "epoch": 9.878976486860305, + "grad_norm": 2.8815829753875732, + "learning_rate": 6.723528507760873e-07, + "log_odds_chosen": 12.093220710754395, + "log_odds_ratio": -1.1571889444894623e-05, + "logits/chosen": -0.5799591541290283, + "logits/rejected": -0.5917587280273438, + "logps/chosen": -5.5582597269676626e-05, + "logps/rejected": -2.379427909851074, + "loss": 0.2728, + "nll_loss": 0.06819252669811249, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.558259545068722e-06, + "rewards/margins": 0.23793722689151764, + "rewards/rejected": -0.23794278502464294, + "step": 14285 + }, + { + "epoch": 9.879668049792532, + "grad_norm": 2.925414800643921, + "learning_rate": 6.685108344859382e-07, + "log_odds_chosen": 11.433218955993652, + "log_odds_ratio": -7.641038246219978e-05, + "logits/chosen": 0.006106570363044739, + "logits/rejected": -0.09651975333690643, + "logps/chosen": -0.0002864729904104024, + "logps/rejected": -2.8932034969329834, + "loss": 0.8084, + "nll_loss": 0.20209553837776184, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8647300496231765e-05, + "rewards/margins": 0.28929170966148376, + "rewards/rejected": -0.28932034969329834, + "step": 14286 + }, + { + "epoch": 9.880359612724758, + "grad_norm": 4.226961612701416, + "learning_rate": 6.646688181957893e-07, + "log_odds_chosen": 10.29315185546875, + "log_odds_ratio": -0.00028602348174899817, + "logits/chosen": -0.4428179860115051, + "logits/rejected": -0.55384761095047, + "logps/chosen": -0.0006280643865466118, + "logps/rejected": -2.452889919281006, + "loss": 0.454, + "nll_loss": 0.11347101628780365, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.280643719946966e-05, + "rewards/margins": 0.2452262043952942, + "rewards/rejected": -0.24528899788856506, + "step": 14287 + }, + { + "epoch": 9.881051175656985, + "grad_norm": 3.1403894424438477, + "learning_rate": 6.608268019056401e-07, + "log_odds_chosen": 11.154643058776855, + "log_odds_ratio": -3.0978779250290245e-05, + "logits/chosen": -0.10836577415466309, + "logits/rejected": -0.1866854578256607, + "logps/chosen": -0.00019304529996588826, + "logps/rejected": -2.0414133071899414, + "loss": 0.3154, + "nll_loss": 0.07883745431900024, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.930452708620578e-05, + "rewards/margins": 0.20412203669548035, + "rewards/rejected": -0.20414134860038757, + "step": 14288 + }, + { + "epoch": 9.881742738589212, + "grad_norm": 2.4305622577667236, + "learning_rate": 6.569847856154911e-07, + "log_odds_chosen": 11.386059761047363, + "log_odds_ratio": -6.501846655737609e-05, + "logits/chosen": -0.32185888290405273, + "logits/rejected": -0.4893759787082672, + "logps/chosen": -8.296041050925851e-05, + "logps/rejected": -1.9257011413574219, + "loss": 0.2682, + "nll_loss": 0.06703435629606247, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.296042324218433e-06, + "rewards/margins": 0.1925618201494217, + "rewards/rejected": -0.19257010519504547, + "step": 14289 + }, + { + "epoch": 9.882434301521439, + "grad_norm": 4.274883270263672, + "learning_rate": 6.531427693253419e-07, + "log_odds_chosen": 10.623971939086914, + "log_odds_ratio": -5.016976501792669e-05, + "logits/chosen": 0.021292902529239655, + "logits/rejected": -0.05526788532733917, + "logps/chosen": -0.0001984376722248271, + "logps/rejected": -1.9755374193191528, + "loss": 0.4367, + "nll_loss": 0.1091596782207489, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.984376649488695e-05, + "rewards/margins": 0.19753390550613403, + "rewards/rejected": -0.19755373895168304, + "step": 14290 + }, + { + "epoch": 9.883125864453666, + "grad_norm": 3.6415889263153076, + "learning_rate": 6.493007530351929e-07, + "log_odds_chosen": 10.185497283935547, + "log_odds_ratio": -0.0002156754635507241, + "logits/chosen": -0.5327581167221069, + "logits/rejected": -0.5787546038627625, + "logps/chosen": -0.0012383005814626813, + "logps/rejected": -1.8759710788726807, + "loss": 0.393, + "nll_loss": 0.09823187440633774, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00012383004650473595, + "rewards/margins": 0.18747326731681824, + "rewards/rejected": -0.1875970959663391, + "step": 14291 + }, + { + "epoch": 9.883817427385893, + "grad_norm": 2.7681779861450195, + "learning_rate": 6.454587367450438e-07, + "log_odds_chosen": 10.560806274414062, + "log_odds_ratio": -0.00027869484620168805, + "logits/chosen": -0.5162404179573059, + "logits/rejected": -0.6527389883995056, + "logps/chosen": -0.0005228912341408432, + "logps/rejected": -1.8825963735580444, + "loss": 0.3032, + "nll_loss": 0.0757603719830513, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.228912050370127e-05, + "rewards/margins": 0.18820734322071075, + "rewards/rejected": -0.18825963139533997, + "step": 14292 + }, + { + "epoch": 9.88450899031812, + "grad_norm": 4.2536211013793945, + "learning_rate": 6.416167204548948e-07, + "log_odds_chosen": 10.498558044433594, + "log_odds_ratio": -9.042402962222695e-05, + "logits/chosen": -0.6295912265777588, + "logits/rejected": -0.5925350785255432, + "logps/chosen": -0.0004221365961711854, + "logps/rejected": -2.2527432441711426, + "loss": 0.5501, + "nll_loss": 0.13752688467502594, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.221366179990582e-05, + "rewards/margins": 0.2252321094274521, + "rewards/rejected": -0.22527432441711426, + "step": 14293 + }, + { + "epoch": 9.885200553250346, + "grad_norm": 2.9185569286346436, + "learning_rate": 6.377747041647457e-07, + "log_odds_chosen": 11.82805061340332, + "log_odds_ratio": -1.629330108698923e-05, + "logits/chosen": -0.11088446527719498, + "logits/rejected": -0.16331283748149872, + "logps/chosen": -0.0006101735634729266, + "logps/rejected": -3.084566593170166, + "loss": 0.2856, + "nll_loss": 0.0713898241519928, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.101735198171809e-05, + "rewards/margins": 0.308395653963089, + "rewards/rejected": -0.3084566593170166, + "step": 14294 + }, + { + "epoch": 9.885892116182573, + "grad_norm": 2.9352762699127197, + "learning_rate": 6.339326878745966e-07, + "log_odds_chosen": 10.382514953613281, + "log_odds_ratio": -0.00020576248061843216, + "logits/chosen": -0.45164620876312256, + "logits/rejected": -0.571010172367096, + "logps/chosen": -0.0007919521303847432, + "logps/rejected": -2.304452896118164, + "loss": 0.3793, + "nll_loss": 0.09479214251041412, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.91952115832828e-05, + "rewards/margins": 0.2303660809993744, + "rewards/rejected": -0.2304452806711197, + "step": 14295 + }, + { + "epoch": 9.8865836791148, + "grad_norm": 2.45688796043396, + "learning_rate": 6.300906715844475e-07, + "log_odds_chosen": 11.476516723632812, + "log_odds_ratio": -8.425705891568214e-05, + "logits/chosen": -0.10602383315563202, + "logits/rejected": -0.13981786370277405, + "logps/chosen": -0.00018015142995864153, + "logps/rejected": -1.9917879104614258, + "loss": 0.2963, + "nll_loss": 0.07405611127614975, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8015143723459914e-05, + "rewards/margins": 0.19916076958179474, + "rewards/rejected": -0.1991787850856781, + "step": 14296 + }, + { + "epoch": 9.887275242047027, + "grad_norm": 4.807474136352539, + "learning_rate": 6.262486552942985e-07, + "log_odds_chosen": 11.048713684082031, + "log_odds_ratio": -3.208504858775996e-05, + "logits/chosen": 0.037749409675598145, + "logits/rejected": 0.01808573305606842, + "logps/chosen": -0.00015323254046961665, + "logps/rejected": -2.082486391067505, + "loss": 0.4806, + "nll_loss": 0.12015396356582642, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5323254046961665e-05, + "rewards/margins": 0.2082333266735077, + "rewards/rejected": -0.20824863016605377, + "step": 14297 + }, + { + "epoch": 9.887966804979254, + "grad_norm": 3.227276563644409, + "learning_rate": 6.224066390041494e-07, + "log_odds_chosen": 9.382593154907227, + "log_odds_ratio": -0.0006197717739269137, + "logits/chosen": 0.034344106912612915, + "logits/rejected": -0.012226209044456482, + "logps/chosen": -0.0031241520773619413, + "logps/rejected": -2.3328871726989746, + "loss": 0.3996, + "nll_loss": 0.09983502328395844, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003124152426607907, + "rewards/margins": 0.23297631740570068, + "rewards/rejected": -0.2332887351512909, + "step": 14298 + }, + { + "epoch": 9.88865836791148, + "grad_norm": 5.094573020935059, + "learning_rate": 6.185646227140004e-07, + "log_odds_chosen": 11.684231758117676, + "log_odds_ratio": -4.3627602281048894e-05, + "logits/chosen": -0.16069498658180237, + "logits/rejected": -0.2361924946308136, + "logps/chosen": -0.00020580008276738226, + "logps/rejected": -2.831536293029785, + "loss": 0.571, + "nll_loss": 0.1427527368068695, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.058000973192975e-05, + "rewards/margins": 0.28313305974006653, + "rewards/rejected": -0.2831536531448364, + "step": 14299 + }, + { + "epoch": 9.889349930843707, + "grad_norm": 2.746864080429077, + "learning_rate": 6.147226064238512e-07, + "log_odds_chosen": 12.112071990966797, + "log_odds_ratio": -2.832992686307989e-05, + "logits/chosen": -0.23692499101161957, + "logits/rejected": -0.21913860738277435, + "logps/chosen": -0.00011797455226769671, + "logps/rejected": -2.8220291137695312, + "loss": 0.3201, + "nll_loss": 0.08001617342233658, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1797455954365432e-05, + "rewards/margins": 0.28219112753868103, + "rewards/rejected": -0.28220292925834656, + "step": 14300 + }, + { + "epoch": 9.890041493775934, + "grad_norm": 2.84747314453125, + "learning_rate": 6.108805901337022e-07, + "log_odds_chosen": 10.872987747192383, + "log_odds_ratio": -3.52791539626196e-05, + "logits/chosen": -0.15608876943588257, + "logits/rejected": -0.1359274983406067, + "logps/chosen": -0.00016623694682493806, + "logps/rejected": -2.1368963718414307, + "loss": 0.3956, + "nll_loss": 0.09889744222164154, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6623695046291687e-05, + "rewards/margins": 0.21367302536964417, + "rewards/rejected": -0.2136896550655365, + "step": 14301 + }, + { + "epoch": 9.89073305670816, + "grad_norm": 3.633258819580078, + "learning_rate": 6.070385738435532e-07, + "log_odds_chosen": 10.81205940246582, + "log_odds_ratio": -5.5275151680689305e-05, + "logits/chosen": -0.2276846170425415, + "logits/rejected": -0.21954035758972168, + "logps/chosen": -0.00034371885703876615, + "logps/rejected": -2.4575982093811035, + "loss": 0.7281, + "nll_loss": 0.1820145845413208, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4371885703876615e-05, + "rewards/margins": 0.24572545289993286, + "rewards/rejected": -0.24575982987880707, + "step": 14302 + }, + { + "epoch": 9.891424619640388, + "grad_norm": 4.360518455505371, + "learning_rate": 6.03196557553404e-07, + "log_odds_chosen": 10.949689865112305, + "log_odds_ratio": -0.0003759180544875562, + "logits/chosen": -0.17199602723121643, + "logits/rejected": -0.18793082237243652, + "logps/chosen": -0.000553618127014488, + "logps/rejected": -2.2656283378601074, + "loss": 0.3991, + "nll_loss": 0.09972595423460007, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5361815611831844e-05, + "rewards/margins": 0.22650748491287231, + "rewards/rejected": -0.22656285762786865, + "step": 14303 + }, + { + "epoch": 9.892116182572614, + "grad_norm": 3.440701961517334, + "learning_rate": 5.993545412632551e-07, + "log_odds_chosen": 11.22562313079834, + "log_odds_ratio": -3.425776958465576e-05, + "logits/chosen": -0.30449116230010986, + "logits/rejected": -0.35918286442756653, + "logps/chosen": -0.00014760816702619195, + "logps/rejected": -2.243481159210205, + "loss": 0.4307, + "nll_loss": 0.10767467319965363, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4760817066417076e-05, + "rewards/margins": 0.22433334589004517, + "rewards/rejected": -0.22434811294078827, + "step": 14304 + }, + { + "epoch": 9.892807745504841, + "grad_norm": 3.14928936958313, + "learning_rate": 5.955125249731059e-07, + "log_odds_chosen": 11.337374687194824, + "log_odds_ratio": -3.201282015652396e-05, + "logits/chosen": -0.1693303883075714, + "logits/rejected": -0.17716658115386963, + "logps/chosen": -0.00022396880376618356, + "logps/rejected": -2.830944538116455, + "loss": 0.3083, + "nll_loss": 0.07706800103187561, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.239688183180988e-05, + "rewards/margins": 0.2830720543861389, + "rewards/rejected": -0.2830944359302521, + "step": 14305 + }, + { + "epoch": 9.893499308437068, + "grad_norm": 3.699977159500122, + "learning_rate": 5.916705086829569e-07, + "log_odds_chosen": 10.842156410217285, + "log_odds_ratio": -0.0004522549279499799, + "logits/chosen": -0.06826915591955185, + "logits/rejected": -0.2108014076948166, + "logps/chosen": -0.0010594420600682497, + "logps/rejected": -2.3867008686065674, + "loss": 0.3698, + "nll_loss": 0.09239698201417923, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00010594420018605888, + "rewards/margins": 0.238564133644104, + "rewards/rejected": -0.23867008090019226, + "step": 14306 + }, + { + "epoch": 9.894190871369295, + "grad_norm": 2.9475674629211426, + "learning_rate": 5.878284923928077e-07, + "log_odds_chosen": 11.217729568481445, + "log_odds_ratio": -9.593695722287521e-05, + "logits/chosen": -0.11455540359020233, + "logits/rejected": -0.13295046985149384, + "logps/chosen": -0.0002060092519968748, + "logps/rejected": -2.5001935958862305, + "loss": 0.3923, + "nll_loss": 0.09807466715574265, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0600926291081123e-05, + "rewards/margins": 0.24999874830245972, + "rewards/rejected": -0.2500193417072296, + "step": 14307 + }, + { + "epoch": 9.894882434301522, + "grad_norm": 3.6130259037017822, + "learning_rate": 5.839864761026587e-07, + "log_odds_chosen": 10.944160461425781, + "log_odds_ratio": -5.0609152822289616e-05, + "logits/chosen": -0.17907792329788208, + "logits/rejected": -0.35081222653388977, + "logps/chosen": -0.0004822844348382205, + "logps/rejected": -3.0487985610961914, + "loss": 0.7491, + "nll_loss": 0.1872725784778595, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.822844493901357e-05, + "rewards/margins": 0.3048316538333893, + "rewards/rejected": -0.3048798739910126, + "step": 14308 + }, + { + "epoch": 9.895573997233749, + "grad_norm": 3.746443748474121, + "learning_rate": 5.801444598125096e-07, + "log_odds_chosen": 12.997480392456055, + "log_odds_ratio": -1.1382105185475666e-05, + "logits/chosen": -0.10519464313983917, + "logits/rejected": -0.11598517000675201, + "logps/chosen": -0.00033332454040646553, + "logps/rejected": -4.251444339752197, + "loss": 0.2877, + "nll_loss": 0.07191663980484009, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.333245331305079e-05, + "rewards/margins": 0.42511114478111267, + "rewards/rejected": -0.4251444637775421, + "step": 14309 + }, + { + "epoch": 9.896265560165975, + "grad_norm": 3.0396363735198975, + "learning_rate": 5.763024435223606e-07, + "log_odds_chosen": 11.321372985839844, + "log_odds_ratio": -5.7202312746085227e-05, + "logits/chosen": 0.09867669641971588, + "logits/rejected": -0.03381189703941345, + "logps/chosen": -0.00016327225603163242, + "logps/rejected": -2.585301637649536, + "loss": 0.356, + "nll_loss": 0.08898219466209412, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.632722523936536e-05, + "rewards/margins": 0.25851383805274963, + "rewards/rejected": -0.2585301697254181, + "step": 14310 + }, + { + "epoch": 9.896957123098202, + "grad_norm": 3.3631041049957275, + "learning_rate": 5.724604272322115e-07, + "log_odds_chosen": 11.977582931518555, + "log_odds_ratio": -0.00012897477427031845, + "logits/chosen": -0.14217886328697205, + "logits/rejected": -0.2045181840658188, + "logps/chosen": -0.0004048075061291456, + "logps/rejected": -3.6177620887756348, + "loss": 0.4012, + "nll_loss": 0.10028345882892609, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.048075061291456e-05, + "rewards/margins": 0.3617357313632965, + "rewards/rejected": -0.361776202917099, + "step": 14311 + }, + { + "epoch": 9.89764868603043, + "grad_norm": 3.0941312313079834, + "learning_rate": 5.686184109420624e-07, + "log_odds_chosen": 12.279654502868652, + "log_odds_ratio": -5.036395668867044e-05, + "logits/chosen": -0.18718120455741882, + "logits/rejected": -0.17902915179729462, + "logps/chosen": -0.00018711428856477141, + "logps/rejected": -3.5618364810943604, + "loss": 0.3191, + "nll_loss": 0.07976827025413513, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8711431039264426e-05, + "rewards/margins": 0.35616499185562134, + "rewards/rejected": -0.3561837077140808, + "step": 14312 + }, + { + "epoch": 9.898340248962656, + "grad_norm": 2.8663947582244873, + "learning_rate": 5.647763946519133e-07, + "log_odds_chosen": 11.31735897064209, + "log_odds_ratio": -4.939009886584245e-05, + "logits/chosen": -0.44607430696487427, + "logits/rejected": -0.4172176718711853, + "logps/chosen": -0.00024118662986438721, + "logps/rejected": -2.5889973640441895, + "loss": 0.2682, + "nll_loss": 0.06704474240541458, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.411866262264084e-05, + "rewards/margins": 0.25887560844421387, + "rewards/rejected": -0.2588997483253479, + "step": 14313 + }, + { + "epoch": 9.899031811894883, + "grad_norm": 3.3275301456451416, + "learning_rate": 5.609343783617643e-07, + "log_odds_chosen": 11.67763900756836, + "log_odds_ratio": -2.294234036526177e-05, + "logits/chosen": -0.5921186208724976, + "logits/rejected": -0.5702080130577087, + "logps/chosen": -0.0003165464149788022, + "logps/rejected": -3.1312780380249023, + "loss": 0.4145, + "nll_loss": 0.10362927615642548, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.165464295307174e-05, + "rewards/margins": 0.31309616565704346, + "rewards/rejected": -0.3131278157234192, + "step": 14314 + }, + { + "epoch": 9.89972337482711, + "grad_norm": 2.9337375164031982, + "learning_rate": 5.570923620716152e-07, + "log_odds_chosen": 12.45496940612793, + "log_odds_ratio": -1.6639531168038957e-05, + "logits/chosen": -0.36686575412750244, + "logits/rejected": -0.34807348251342773, + "logps/chosen": -0.00012428374611772597, + "logps/rejected": -3.021192789077759, + "loss": 0.3055, + "nll_loss": 0.07638576626777649, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2428373338480014e-05, + "rewards/margins": 0.3021068871021271, + "rewards/rejected": -0.30211928486824036, + "step": 14315 + }, + { + "epoch": 9.900414937759336, + "grad_norm": 2.6304478645324707, + "learning_rate": 5.532503457814662e-07, + "log_odds_chosen": 10.704986572265625, + "log_odds_ratio": -7.087891572155058e-05, + "logits/chosen": -0.03459363430738449, + "logits/rejected": -0.02272067219018936, + "logps/chosen": -0.0003902169701177627, + "logps/rejected": -2.3840231895446777, + "loss": 0.452, + "nll_loss": 0.113001748919487, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9021695556584746e-05, + "rewards/margins": 0.23836329579353333, + "rewards/rejected": -0.2384023219347, + "step": 14316 + }, + { + "epoch": 9.901106500691563, + "grad_norm": 3.5170085430145264, + "learning_rate": 5.49408329491317e-07, + "log_odds_chosen": 12.182526588439941, + "log_odds_ratio": -0.00010460546764079481, + "logits/chosen": -0.009993776679039001, + "logits/rejected": -0.03831420838832855, + "logps/chosen": -0.0008481538970954716, + "logps/rejected": -3.2034897804260254, + "loss": 0.3519, + "nll_loss": 0.08796828985214233, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.481539407512173e-05, + "rewards/margins": 0.32026416063308716, + "rewards/rejected": -0.32034897804260254, + "step": 14317 + }, + { + "epoch": 9.90179806362379, + "grad_norm": 2.901359796524048, + "learning_rate": 5.45566313201168e-07, + "log_odds_chosen": 10.570993423461914, + "log_odds_ratio": -6.009342541801743e-05, + "logits/chosen": -0.05773099139332771, + "logits/rejected": -0.17567658424377441, + "logps/chosen": -0.00017942303384188563, + "logps/rejected": -1.751425862312317, + "loss": 0.2901, + "nll_loss": 0.07250886410474777, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7942304111784324e-05, + "rewards/margins": 0.1751246452331543, + "rewards/rejected": -0.1751425862312317, + "step": 14318 + }, + { + "epoch": 9.902489626556017, + "grad_norm": 3.4182724952697754, + "learning_rate": 5.41724296911019e-07, + "log_odds_chosen": 10.695033073425293, + "log_odds_ratio": -4.265415191184729e-05, + "logits/chosen": -0.0365283340215683, + "logits/rejected": -0.08216466009616852, + "logps/chosen": -0.00021733081666752696, + "logps/rejected": -2.2307868003845215, + "loss": 0.34, + "nll_loss": 0.08500487357378006, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.17330834857421e-05, + "rewards/margins": 0.22305697202682495, + "rewards/rejected": -0.22307869791984558, + "step": 14319 + }, + { + "epoch": 9.903181189488244, + "grad_norm": 4.249139785766602, + "learning_rate": 5.378822806208698e-07, + "log_odds_chosen": 11.20050048828125, + "log_odds_ratio": -2.2386917407857254e-05, + "logits/chosen": -0.02520899474620819, + "logits/rejected": -0.03752049803733826, + "logps/chosen": -0.0003164797672070563, + "logps/rejected": -2.747769832611084, + "loss": 0.3104, + "nll_loss": 0.07758967578411102, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.164797817589715e-05, + "rewards/margins": 0.2747453451156616, + "rewards/rejected": -0.27477699518203735, + "step": 14320 + }, + { + "epoch": 9.90387275242047, + "grad_norm": 4.353235721588135, + "learning_rate": 5.340402643307209e-07, + "log_odds_chosen": 11.509032249450684, + "log_odds_ratio": -6.0765403759432957e-05, + "logits/chosen": -0.13491703569889069, + "logits/rejected": -0.15714114904403687, + "logps/chosen": -0.00016875413712114096, + "logps/rejected": -2.7386293411254883, + "loss": 0.4921, + "nll_loss": 0.12302062660455704, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.687541589490138e-05, + "rewards/margins": 0.2738460600376129, + "rewards/rejected": -0.27386295795440674, + "step": 14321 + }, + { + "epoch": 9.904564315352697, + "grad_norm": 5.303290367126465, + "learning_rate": 5.301982480405717e-07, + "log_odds_chosen": 11.984261512756348, + "log_odds_ratio": -3.151076452923007e-05, + "logits/chosen": -0.14473620057106018, + "logits/rejected": -0.23054178059101105, + "logps/chosen": -0.00011740185436792672, + "logps/rejected": -2.885850191116333, + "loss": 0.6141, + "nll_loss": 0.15351364016532898, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.174018416350009e-05, + "rewards/margins": 0.288573294878006, + "rewards/rejected": -0.28858503699302673, + "step": 14322 + }, + { + "epoch": 9.905255878284924, + "grad_norm": 4.3790740966796875, + "learning_rate": 5.263562317504227e-07, + "log_odds_chosen": 12.246095657348633, + "log_odds_ratio": -1.5059587894938886e-05, + "logits/chosen": -0.03567267954349518, + "logits/rejected": -0.15627005696296692, + "logps/chosen": -7.786623609717935e-05, + "logps/rejected": -2.4729762077331543, + "loss": 0.3954, + "nll_loss": 0.09885958582162857, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.786624337313697e-06, + "rewards/margins": 0.24728982150554657, + "rewards/rejected": -0.24729761481285095, + "step": 14323 + }, + { + "epoch": 9.905947441217151, + "grad_norm": 2.7396340370178223, + "learning_rate": 5.225142154602735e-07, + "log_odds_chosen": 11.351285934448242, + "log_odds_ratio": -1.8908889614976943e-05, + "logits/chosen": -0.6820241212844849, + "logits/rejected": -0.6081110835075378, + "logps/chosen": -0.00010962403030134737, + "logps/rejected": -2.1085121631622314, + "loss": 0.3534, + "nll_loss": 0.08834048360586166, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0962403393932618e-05, + "rewards/margins": 0.21084025502204895, + "rewards/rejected": -0.21085122227668762, + "step": 14324 + }, + { + "epoch": 9.906639004149378, + "grad_norm": 3.673875093460083, + "learning_rate": 5.186721991701245e-07, + "log_odds_chosen": 11.471330642700195, + "log_odds_ratio": -9.9008837423753e-05, + "logits/chosen": -0.03928861767053604, + "logits/rejected": -0.2071848213672638, + "logps/chosen": -0.00031385914189741015, + "logps/rejected": -3.0085816383361816, + "loss": 0.5239, + "nll_loss": 0.1309712678194046, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.13859163725283e-05, + "rewards/margins": 0.3008267879486084, + "rewards/rejected": -0.30085819959640503, + "step": 14325 + }, + { + "epoch": 9.907330567081605, + "grad_norm": 3.081761598587036, + "learning_rate": 5.148301828799754e-07, + "log_odds_chosen": 9.377728462219238, + "log_odds_ratio": -0.0008418531506322324, + "logits/chosen": -0.3373812437057495, + "logits/rejected": -0.3121347427368164, + "logps/chosen": -0.00022931865532882512, + "logps/rejected": -1.6012835502624512, + "loss": 0.2925, + "nll_loss": 0.07303423434495926, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2931868443265557e-05, + "rewards/margins": 0.16010543704032898, + "rewards/rejected": -0.16012835502624512, + "step": 14326 + }, + { + "epoch": 9.908022130013832, + "grad_norm": 3.6323537826538086, + "learning_rate": 5.109881665898264e-07, + "log_odds_chosen": 10.823930740356445, + "log_odds_ratio": -0.0002582712040748447, + "logits/chosen": -0.5242524147033691, + "logits/rejected": -0.5194587707519531, + "logps/chosen": -0.00021303680841811, + "logps/rejected": -2.167452096939087, + "loss": 0.2817, + "nll_loss": 0.07040915638208389, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.130368011421524e-05, + "rewards/margins": 0.21672390401363373, + "rewards/rejected": -0.21674521267414093, + "step": 14327 + }, + { + "epoch": 9.908713692946058, + "grad_norm": 3.7610743045806885, + "learning_rate": 5.071461502996773e-07, + "log_odds_chosen": 11.104131698608398, + "log_odds_ratio": -7.880770863266662e-05, + "logits/chosen": -0.23604810237884521, + "logits/rejected": -0.311259388923645, + "logps/chosen": -0.0001373798295389861, + "logps/rejected": -2.1140708923339844, + "loss": 0.4371, + "nll_loss": 0.10926076024770737, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3737982044403907e-05, + "rewards/margins": 0.2113933563232422, + "rewards/rejected": -0.21140709519386292, + "step": 14328 + }, + { + "epoch": 9.909405255878285, + "grad_norm": 3.5242393016815186, + "learning_rate": 5.033041340095282e-07, + "log_odds_chosen": 9.724529266357422, + "log_odds_ratio": -0.0002068439789582044, + "logits/chosen": -0.14855721592903137, + "logits/rejected": -0.25745317339897156, + "logps/chosen": -0.00022229237947613, + "logps/rejected": -1.5355801582336426, + "loss": 0.3654, + "nll_loss": 0.09133844077587128, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2229240130400285e-05, + "rewards/margins": 0.15353579819202423, + "rewards/rejected": -0.15355801582336426, + "step": 14329 + }, + { + "epoch": 9.910096818810512, + "grad_norm": 3.3911397457122803, + "learning_rate": 4.994621177193791e-07, + "log_odds_chosen": 10.772411346435547, + "log_odds_ratio": -0.00016185276035685092, + "logits/chosen": -0.0470561645925045, + "logits/rejected": -0.04483966901898384, + "logps/chosen": -0.0006423345184884965, + "logps/rejected": -2.230781316757202, + "loss": 0.4028, + "nll_loss": 0.10068729519844055, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.423345621442422e-05, + "rewards/margins": 0.22301389276981354, + "rewards/rejected": -0.22307811677455902, + "step": 14330 + }, + { + "epoch": 9.910788381742739, + "grad_norm": 2.422025680541992, + "learning_rate": 4.956201014292301e-07, + "log_odds_chosen": 9.661433219909668, + "log_odds_ratio": -0.000198134541278705, + "logits/chosen": -0.24157515168190002, + "logits/rejected": -0.36891329288482666, + "logps/chosen": -0.00032799746259115636, + "logps/rejected": -1.5236164331436157, + "loss": 0.2083, + "nll_loss": 0.05205736309289932, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.279974771430716e-05, + "rewards/margins": 0.15232884883880615, + "rewards/rejected": -0.1523616462945938, + "step": 14331 + }, + { + "epoch": 9.911479944674966, + "grad_norm": 3.357443332672119, + "learning_rate": 4.917780851390809e-07, + "log_odds_chosen": 11.180157661437988, + "log_odds_ratio": -3.0052715374040417e-05, + "logits/chosen": -0.16521982848644257, + "logits/rejected": -0.12277151644229889, + "logps/chosen": -6.565694638993591e-05, + "logps/rejected": -1.5582237243652344, + "loss": 0.267, + "nll_loss": 0.06674351543188095, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.565694548044121e-06, + "rewards/margins": 0.15581580996513367, + "rewards/rejected": -0.15582238137722015, + "step": 14332 + }, + { + "epoch": 9.912171507607193, + "grad_norm": 3.570207357406616, + "learning_rate": 4.87936068848932e-07, + "log_odds_chosen": 11.282625198364258, + "log_odds_ratio": -4.635823279386386e-05, + "logits/chosen": -0.20219162106513977, + "logits/rejected": -0.32793179154396057, + "logps/chosen": -0.00024112232495099306, + "logps/rejected": -2.8642830848693848, + "loss": 0.3108, + "nll_loss": 0.07770770788192749, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4112232495099306e-05, + "rewards/margins": 0.28640419244766235, + "rewards/rejected": -0.2864283323287964, + "step": 14333 + }, + { + "epoch": 9.91286307053942, + "grad_norm": 3.489907741546631, + "learning_rate": 4.840940525587828e-07, + "log_odds_chosen": 11.93216323852539, + "log_odds_ratio": -2.781835246423725e-05, + "logits/chosen": -0.2622598111629486, + "logits/rejected": -0.3802410066127777, + "logps/chosen": -0.00022485022782348096, + "logps/rejected": -2.870073080062866, + "loss": 0.4957, + "nll_loss": 0.12392807006835938, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2485022782348096e-05, + "rewards/margins": 0.2869848608970642, + "rewards/rejected": -0.28700733184814453, + "step": 14334 + }, + { + "epoch": 9.913554633471646, + "grad_norm": 3.2331087589263916, + "learning_rate": 4.802520362686338e-07, + "log_odds_chosen": 11.829715728759766, + "log_odds_ratio": -2.7710057111107744e-05, + "logits/chosen": -0.19258259236812592, + "logits/rejected": -0.24621924757957458, + "logps/chosen": -0.00016499309276696295, + "logps/rejected": -2.9383063316345215, + "loss": 0.3404, + "nll_loss": 0.0851057916879654, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6499310731887817e-05, + "rewards/margins": 0.29381415247917175, + "rewards/rejected": -0.29383063316345215, + "step": 14335 + }, + { + "epoch": 9.914246196403873, + "grad_norm": 4.226347923278809, + "learning_rate": 4.764100199784847e-07, + "log_odds_chosen": 11.72370719909668, + "log_odds_ratio": -2.9156955861253664e-05, + "logits/chosen": -0.29745885729789734, + "logits/rejected": -0.3365001678466797, + "logps/chosen": -0.0002458348171785474, + "logps/rejected": -2.796137809753418, + "loss": 0.4286, + "nll_loss": 0.10715927183628082, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4583481717854738e-05, + "rewards/margins": 0.2795892059803009, + "rewards/rejected": -0.27961376309394836, + "step": 14336 + }, + { + "epoch": 9.9149377593361, + "grad_norm": 3.6933679580688477, + "learning_rate": 4.725680036883356e-07, + "log_odds_chosen": 12.086358070373535, + "log_odds_ratio": -8.461339348286856e-06, + "logits/chosen": -0.27729731798171997, + "logits/rejected": -0.46082037687301636, + "logps/chosen": -0.00010570999438641593, + "logps/rejected": -2.9097843170166016, + "loss": 0.5001, + "nll_loss": 0.1250213086605072, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0570998711045831e-05, + "rewards/margins": 0.2909678816795349, + "rewards/rejected": -0.29097843170166016, + "step": 14337 + }, + { + "epoch": 9.915629322268327, + "grad_norm": 3.8641040325164795, + "learning_rate": 4.6872598739818663e-07, + "log_odds_chosen": 11.767168045043945, + "log_odds_ratio": -9.326574399892706e-06, + "logits/chosen": -0.46089547872543335, + "logits/rejected": -0.692821204662323, + "logps/chosen": -7.577265205327421e-05, + "logps/rejected": -2.1478333473205566, + "loss": 0.3482, + "nll_loss": 0.08706062287092209, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.577265478175832e-06, + "rewards/margins": 0.21477577090263367, + "rewards/rejected": -0.21478332579135895, + "step": 14338 + }, + { + "epoch": 9.916320885200554, + "grad_norm": 3.4284377098083496, + "learning_rate": 4.6488397110803754e-07, + "log_odds_chosen": 8.728742599487305, + "log_odds_ratio": -0.0006372688221745193, + "logits/chosen": -0.48407578468322754, + "logits/rejected": -0.44575226306915283, + "logps/chosen": -0.0006253526080399752, + "logps/rejected": -1.2256580591201782, + "loss": 0.31, + "nll_loss": 0.07742569595575333, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.253526225918904e-05, + "rewards/margins": 0.12250328063964844, + "rewards/rejected": -0.12256580591201782, + "step": 14339 + }, + { + "epoch": 9.91701244813278, + "grad_norm": 2.6981019973754883, + "learning_rate": 4.6104195481788845e-07, + "log_odds_chosen": 9.52665901184082, + "log_odds_ratio": -0.00023078435333445668, + "logits/chosen": -0.554850161075592, + "logits/rejected": -0.513104259967804, + "logps/chosen": -0.0002982397563755512, + "logps/rejected": -1.2079306840896606, + "loss": 0.1931, + "nll_loss": 0.0482604056596756, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9823975637555122e-05, + "rewards/margins": 0.12076324224472046, + "rewards/rejected": -0.12079307436943054, + "step": 14340 + }, + { + "epoch": 9.917704011065007, + "grad_norm": 2.396088123321533, + "learning_rate": 4.5719993852773936e-07, + "log_odds_chosen": 11.458711624145508, + "log_odds_ratio": -5.126070755068213e-05, + "logits/chosen": -0.5153904557228088, + "logits/rejected": -0.5878928899765015, + "logps/chosen": -0.00014841034135315567, + "logps/rejected": -2.667332410812378, + "loss": 0.2098, + "nll_loss": 0.05243738740682602, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.484103540860815e-05, + "rewards/margins": 0.26671838760375977, + "rewards/rejected": -0.26673322916030884, + "step": 14341 + }, + { + "epoch": 9.918395573997234, + "grad_norm": 2.856847047805786, + "learning_rate": 4.5335792223759026e-07, + "log_odds_chosen": 10.811798095703125, + "log_odds_ratio": -5.029054227634333e-05, + "logits/chosen": 0.1117456704378128, + "logits/rejected": 0.1272241324186325, + "logps/chosen": -0.0001538070064270869, + "logps/rejected": -1.848840355873108, + "loss": 0.2877, + "nll_loss": 0.07192511856555939, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.538070137030445e-05, + "rewards/margins": 0.18486866354942322, + "rewards/rejected": -0.18488404154777527, + "step": 14342 + }, + { + "epoch": 9.91908713692946, + "grad_norm": 3.0931427478790283, + "learning_rate": 4.495159059474412e-07, + "log_odds_chosen": 9.95556640625, + "log_odds_ratio": -0.00023802714713383466, + "logits/chosen": -0.2813582420349121, + "logits/rejected": -0.35545971989631653, + "logps/chosen": -0.00039546735933981836, + "logps/rejected": -1.5701475143432617, + "loss": 0.3322, + "nll_loss": 0.08301551640033722, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9546735933981836e-05, + "rewards/margins": 0.15697520971298218, + "rewards/rejected": -0.15701475739479065, + "step": 14343 + }, + { + "epoch": 9.919778699861688, + "grad_norm": 2.814404249191284, + "learning_rate": 4.456738896572922e-07, + "log_odds_chosen": 10.750186920166016, + "log_odds_ratio": -0.00018831017951015383, + "logits/chosen": -0.25252482295036316, + "logits/rejected": -0.23979459702968597, + "logps/chosen": -0.0008598081185482442, + "logps/rejected": -2.1662676334381104, + "loss": 0.3028, + "nll_loss": 0.07567182928323746, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.598080603405833e-05, + "rewards/margins": 0.21654078364372253, + "rewards/rejected": -0.21662676334381104, + "step": 14344 + }, + { + "epoch": 9.920470262793915, + "grad_norm": 3.1870203018188477, + "learning_rate": 4.418318733671431e-07, + "log_odds_chosen": 11.540773391723633, + "log_odds_ratio": -2.0432918972801417e-05, + "logits/chosen": -0.2521243095397949, + "logits/rejected": -0.26861798763275146, + "logps/chosen": -0.0001973059552256018, + "logps/rejected": -2.646104097366333, + "loss": 0.3702, + "nll_loss": 0.09253590553998947, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.973059625015594e-05, + "rewards/margins": 0.26459068059921265, + "rewards/rejected": -0.2646104097366333, + "step": 14345 + }, + { + "epoch": 9.921161825726141, + "grad_norm": 5.119821071624756, + "learning_rate": 4.3798985707699405e-07, + "log_odds_chosen": 11.276300430297852, + "log_odds_ratio": -0.00011375157191650942, + "logits/chosen": -0.19327619671821594, + "logits/rejected": -0.32824161648750305, + "logps/chosen": -0.0003952342667616904, + "logps/rejected": -2.8914079666137695, + "loss": 0.4067, + "nll_loss": 0.10165176540613174, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.95234274037648e-05, + "rewards/margins": 0.2891013026237488, + "rewards/rejected": -0.2891407907009125, + "step": 14346 + }, + { + "epoch": 9.921853388658368, + "grad_norm": 3.130152940750122, + "learning_rate": 4.3414784078684496e-07, + "log_odds_chosen": 11.177153587341309, + "log_odds_ratio": -2.2892505512572825e-05, + "logits/chosen": -0.23346176743507385, + "logits/rejected": -0.3023627996444702, + "logps/chosen": -8.725168299861252e-05, + "logps/rejected": -1.9590811729431152, + "loss": 0.329, + "nll_loss": 0.08225492388010025, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.725168299861252e-06, + "rewards/margins": 0.19589939713478088, + "rewards/rejected": -0.1959080994129181, + "step": 14347 + }, + { + "epoch": 9.922544951590595, + "grad_norm": 3.5128986835479736, + "learning_rate": 4.3030582449669587e-07, + "log_odds_chosen": 9.824265480041504, + "log_odds_ratio": -0.00019305248861201108, + "logits/chosen": -0.335892915725708, + "logits/rejected": -0.4563968777656555, + "logps/chosen": -0.00021023111185058951, + "logps/rejected": -1.5053505897521973, + "loss": 0.2965, + "nll_loss": 0.07410359382629395, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.102311009366531e-05, + "rewards/margins": 0.15051403641700745, + "rewards/rejected": -0.15053506195545197, + "step": 14348 + }, + { + "epoch": 9.923236514522822, + "grad_norm": 3.2810566425323486, + "learning_rate": 4.264638082065468e-07, + "log_odds_chosen": 11.708654403686523, + "log_odds_ratio": -4.994563278160058e-05, + "logits/chosen": -0.03914834186434746, + "logits/rejected": -0.14430376887321472, + "logps/chosen": -0.00014019644004292786, + "logps/rejected": -2.522564172744751, + "loss": 0.3258, + "nll_loss": 0.08145511150360107, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4019644368090667e-05, + "rewards/margins": 0.25224241614341736, + "rewards/rejected": -0.25225645303726196, + "step": 14349 + }, + { + "epoch": 9.923928077455049, + "grad_norm": 3.1786246299743652, + "learning_rate": 4.226217919163978e-07, + "log_odds_chosen": 9.759382247924805, + "log_odds_ratio": -0.00024910017964430153, + "logits/chosen": -0.1662512570619583, + "logits/rejected": -0.286848783493042, + "logps/chosen": -0.000488889985717833, + "logps/rejected": -1.896082878112793, + "loss": 0.3639, + "nll_loss": 0.09095057845115662, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8889003664953634e-05, + "rewards/margins": 0.18955940008163452, + "rewards/rejected": -0.18960830569267273, + "step": 14350 + }, + { + "epoch": 9.924619640387276, + "grad_norm": 3.9964568614959717, + "learning_rate": 4.187797756262487e-07, + "log_odds_chosen": 11.948275566101074, + "log_odds_ratio": -3.0497600164380856e-05, + "logits/chosen": -0.1427125781774521, + "logits/rejected": -0.2857535481452942, + "logps/chosen": -0.00014905542775522918, + "logps/rejected": -2.870856285095215, + "loss": 0.349, + "nll_loss": 0.08725368976593018, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4905541320331395e-05, + "rewards/margins": 0.28707072138786316, + "rewards/rejected": -0.287085622549057, + "step": 14351 + }, + { + "epoch": 9.925311203319502, + "grad_norm": 3.8953027725219727, + "learning_rate": 4.149377593360996e-07, + "log_odds_chosen": 11.3707275390625, + "log_odds_ratio": -7.19654854037799e-05, + "logits/chosen": -0.15170668065547943, + "logits/rejected": -0.09434667229652405, + "logps/chosen": -0.00013707912876270711, + "logps/rejected": -2.4071221351623535, + "loss": 0.4143, + "nll_loss": 0.10356798022985458, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3707913240068592e-05, + "rewards/margins": 0.24069853127002716, + "rewards/rejected": -0.2407122403383255, + "step": 14352 + }, + { + "epoch": 9.92600276625173, + "grad_norm": 3.3584933280944824, + "learning_rate": 4.110957430459505e-07, + "log_odds_chosen": 11.33407974243164, + "log_odds_ratio": -0.0006362693966366351, + "logits/chosen": -0.029478400945663452, + "logits/rejected": 0.0747109204530716, + "logps/chosen": -0.0005637967842631042, + "logps/rejected": -2.222421884536743, + "loss": 0.2766, + "nll_loss": 0.06908503919839859, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.637968206428923e-05, + "rewards/margins": 0.22218582034111023, + "rewards/rejected": -0.22224220633506775, + "step": 14353 + }, + { + "epoch": 9.926694329183956, + "grad_norm": 2.657400369644165, + "learning_rate": 4.072537267558014e-07, + "log_odds_chosen": 11.29609489440918, + "log_odds_ratio": -5.056093505118042e-05, + "logits/chosen": -0.36670732498168945, + "logits/rejected": -0.37637168169021606, + "logps/chosen": -0.00039156334241852164, + "logps/rejected": -2.510091781616211, + "loss": 0.2492, + "nll_loss": 0.06230463087558746, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.915633715223521e-05, + "rewards/margins": 0.2509700059890747, + "rewards/rejected": -0.25100916624069214, + "step": 14354 + }, + { + "epoch": 9.927385892116183, + "grad_norm": 2.7479970455169678, + "learning_rate": 4.0341171046565244e-07, + "log_odds_chosen": 11.697278022766113, + "log_odds_ratio": -0.00018010164785664529, + "logits/chosen": -0.36440762877464294, + "logits/rejected": -0.36176443099975586, + "logps/chosen": -0.0001285705075133592, + "logps/rejected": -2.5904908180236816, + "loss": 0.2776, + "nll_loss": 0.06938271224498749, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2857051842729561e-05, + "rewards/margins": 0.25903621315956116, + "rewards/rejected": -0.25904908776283264, + "step": 14355 + }, + { + "epoch": 9.92807745504841, + "grad_norm": 3.3048360347747803, + "learning_rate": 3.9956969417550335e-07, + "log_odds_chosen": 10.689544677734375, + "log_odds_ratio": -4.125951454625465e-05, + "logits/chosen": -0.082699716091156, + "logits/rejected": -0.11700746417045593, + "logps/chosen": -0.0004147653526160866, + "logps/rejected": -2.2804675102233887, + "loss": 0.3935, + "nll_loss": 0.09838270395994186, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.147654180997051e-05, + "rewards/margins": 0.2280052900314331, + "rewards/rejected": -0.22804677486419678, + "step": 14356 + }, + { + "epoch": 9.928769017980636, + "grad_norm": 3.060988187789917, + "learning_rate": 3.9572767788535425e-07, + "log_odds_chosen": 10.875423431396484, + "log_odds_ratio": -0.0001337130379397422, + "logits/chosen": -0.5028753280639648, + "logits/rejected": -0.475473552942276, + "logps/chosen": -0.00013390640378929675, + "logps/rejected": -1.8304393291473389, + "loss": 0.2411, + "nll_loss": 0.06026024371385574, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3390640560828615e-05, + "rewards/margins": 0.18303054571151733, + "rewards/rejected": -0.1830439269542694, + "step": 14357 + }, + { + "epoch": 9.929460580912863, + "grad_norm": 3.6875088214874268, + "learning_rate": 3.9188566159520516e-07, + "log_odds_chosen": 11.18298625946045, + "log_odds_ratio": -3.897779970429838e-05, + "logits/chosen": -0.215055450797081, + "logits/rejected": -0.2545468211174011, + "logps/chosen": -0.0002388485736446455, + "logps/rejected": -2.675607442855835, + "loss": 0.415, + "nll_loss": 0.10374531149864197, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3884858819656074e-05, + "rewards/margins": 0.26753684878349304, + "rewards/rejected": -0.267560750246048, + "step": 14358 + }, + { + "epoch": 9.93015214384509, + "grad_norm": 3.312593460083008, + "learning_rate": 3.8804364530505607e-07, + "log_odds_chosen": 10.246174812316895, + "log_odds_ratio": -4.975176489097066e-05, + "logits/chosen": -0.3240154981613159, + "logits/rejected": -0.3295287489891052, + "logps/chosen": -0.00044594379141926765, + "logps/rejected": -1.9773529767990112, + "loss": 0.2867, + "nll_loss": 0.0716669112443924, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.459438059711829e-05, + "rewards/margins": 0.1976906955242157, + "rewards/rejected": -0.19773529469966888, + "step": 14359 + }, + { + "epoch": 9.930843706777317, + "grad_norm": 2.8286046981811523, + "learning_rate": 3.8420162901490703e-07, + "log_odds_chosen": 13.840531349182129, + "log_odds_ratio": -4.998999429517426e-06, + "logits/chosen": -0.17049254477024078, + "logits/rejected": -0.17025065422058105, + "logps/chosen": -6.283095717662945e-05, + "logps/rejected": -4.116872787475586, + "loss": 0.3128, + "nll_loss": 0.0782066360116005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.283095899561886e-06, + "rewards/margins": 0.41168099641799927, + "rewards/rejected": -0.4116872549057007, + "step": 14360 + }, + { + "epoch": 9.931535269709544, + "grad_norm": 3.902099132537842, + "learning_rate": 3.8035961272475794e-07, + "log_odds_chosen": 11.704643249511719, + "log_odds_ratio": -1.2796294868167024e-05, + "logits/chosen": -0.47484564781188965, + "logits/rejected": -0.4412471055984497, + "logps/chosen": -0.00021034966630395502, + "logps/rejected": -2.4094064235687256, + "loss": 0.4744, + "nll_loss": 0.11860089749097824, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.103496626659762e-05, + "rewards/margins": 0.24091961979866028, + "rewards/rejected": -0.2409406453371048, + "step": 14361 + }, + { + "epoch": 9.93222683264177, + "grad_norm": 2.629956007003784, + "learning_rate": 3.765175964346089e-07, + "log_odds_chosen": 10.24508285522461, + "log_odds_ratio": -0.00032763052149675786, + "logits/chosen": -0.47297993302345276, + "logits/rejected": -0.3988914489746094, + "logps/chosen": -0.001132260193116963, + "logps/rejected": -1.8652898073196411, + "loss": 0.3144, + "nll_loss": 0.07857255637645721, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00011322601494612172, + "rewards/margins": 0.18641576170921326, + "rewards/rejected": -0.1865289807319641, + "step": 14362 + }, + { + "epoch": 9.932918395573997, + "grad_norm": 3.8364431858062744, + "learning_rate": 3.7267558014445986e-07, + "log_odds_chosen": 10.027873992919922, + "log_odds_ratio": -0.0002950678754132241, + "logits/chosen": -0.2743924558162689, + "logits/rejected": -0.29987969994544983, + "logps/chosen": -0.000396082759834826, + "logps/rejected": -1.961113691329956, + "loss": 0.3966, + "nll_loss": 0.09912268817424774, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.96082759834826e-05, + "rewards/margins": 0.19607174396514893, + "rewards/rejected": -0.19611136615276337, + "step": 14363 + }, + { + "epoch": 9.933609958506224, + "grad_norm": 3.0874459743499756, + "learning_rate": 3.6883356385431077e-07, + "log_odds_chosen": 10.096710205078125, + "log_odds_ratio": -0.00023568206233903766, + "logits/chosen": -0.04615066200494766, + "logits/rejected": -0.14368771016597748, + "logps/chosen": -0.00041334485285915434, + "logps/rejected": -1.6168841123580933, + "loss": 0.2994, + "nll_loss": 0.07481654733419418, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.133448237553239e-05, + "rewards/margins": 0.16164706647396088, + "rewards/rejected": -0.1616884171962738, + "step": 14364 + }, + { + "epoch": 9.934301521438451, + "grad_norm": 3.1047251224517822, + "learning_rate": 3.6499154756416173e-07, + "log_odds_chosen": 10.717859268188477, + "log_odds_ratio": -0.00011625559272943065, + "logits/chosen": -0.3506580591201782, + "logits/rejected": -0.4321751892566681, + "logps/chosen": -0.00015862606232985854, + "logps/rejected": -2.001960039138794, + "loss": 0.303, + "nll_loss": 0.0757325291633606, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5862606232985854e-05, + "rewards/margins": 0.20018012821674347, + "rewards/rejected": -0.20019599795341492, + "step": 14365 + }, + { + "epoch": 9.934993084370678, + "grad_norm": 2.378512382507324, + "learning_rate": 3.6114953127401264e-07, + "log_odds_chosen": 10.845108985900879, + "log_odds_ratio": -8.855470514390618e-05, + "logits/chosen": -0.43037089705467224, + "logits/rejected": -0.4957068860530853, + "logps/chosen": -0.00022176709899213165, + "logps/rejected": -2.273205041885376, + "loss": 0.2282, + "nll_loss": 0.05704750120639801, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2176711354404688e-05, + "rewards/margins": 0.2272983193397522, + "rewards/rejected": -0.22732050716876984, + "step": 14366 + }, + { + "epoch": 9.935684647302905, + "grad_norm": 4.0295281410217285, + "learning_rate": 3.5730751498386355e-07, + "log_odds_chosen": 11.803579330444336, + "log_odds_ratio": -0.00016219766985159367, + "logits/chosen": -0.608587384223938, + "logits/rejected": -0.47692131996154785, + "logps/chosen": -0.0001110218872781843, + "logps/rejected": -2.783219814300537, + "loss": 0.4327, + "nll_loss": 0.10816709697246552, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1102188182121608e-05, + "rewards/margins": 0.2783108949661255, + "rewards/rejected": -0.2783219814300537, + "step": 14367 + }, + { + "epoch": 9.936376210235132, + "grad_norm": 4.525236129760742, + "learning_rate": 3.534654986937145e-07, + "log_odds_chosen": 10.090860366821289, + "log_odds_ratio": -0.00014848806313239038, + "logits/chosen": -0.38946732878685, + "logits/rejected": -0.33470335602760315, + "logps/chosen": -0.0006121266633272171, + "logps/rejected": -1.7874327898025513, + "loss": 0.467, + "nll_loss": 0.11672414094209671, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.121266778791323e-05, + "rewards/margins": 0.17868207395076752, + "rewards/rejected": -0.17874330282211304, + "step": 14368 + }, + { + "epoch": 9.937067773167358, + "grad_norm": 2.479583263397217, + "learning_rate": 3.496234824035654e-07, + "log_odds_chosen": 11.412271499633789, + "log_odds_ratio": -1.3052333088126034e-05, + "logits/chosen": -0.3329862356185913, + "logits/rejected": -0.31148213148117065, + "logps/chosen": -0.00014164020831231028, + "logps/rejected": -2.179635524749756, + "loss": 0.3027, + "nll_loss": 0.07568074017763138, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4164021195028909e-05, + "rewards/margins": 0.21794936060905457, + "rewards/rejected": -0.21796351671218872, + "step": 14369 + }, + { + "epoch": 9.937759336099585, + "grad_norm": 3.3530638217926025, + "learning_rate": 3.457814661134163e-07, + "log_odds_chosen": 11.313865661621094, + "log_odds_ratio": -1.7188747733598575e-05, + "logits/chosen": -0.4860305190086365, + "logits/rejected": -0.5446900129318237, + "logps/chosen": -0.00016923845396377146, + "logps/rejected": -2.2236130237579346, + "loss": 0.3453, + "nll_loss": 0.08631302416324615, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6923844668781385e-05, + "rewards/margins": 0.22234439849853516, + "rewards/rejected": -0.22236132621765137, + "step": 14370 + }, + { + "epoch": 9.938450899031812, + "grad_norm": 4.311180114746094, + "learning_rate": 3.419394498232673e-07, + "log_odds_chosen": 12.050071716308594, + "log_odds_ratio": -2.2124790120869875e-05, + "logits/chosen": 0.15515440702438354, + "logits/rejected": 0.11022113263607025, + "logps/chosen": -0.00015703440294601023, + "logps/rejected": -3.03157639503479, + "loss": 0.4012, + "nll_loss": 0.10030660778284073, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5703441022196785e-05, + "rewards/margins": 0.30314192175865173, + "rewards/rejected": -0.30315762758255005, + "step": 14371 + }, + { + "epoch": 9.939142461964039, + "grad_norm": 3.2761802673339844, + "learning_rate": 3.380974335331182e-07, + "log_odds_chosen": 11.795350074768066, + "log_odds_ratio": -2.7355852580512874e-05, + "logits/chosen": -0.21269749104976654, + "logits/rejected": -0.24452780187129974, + "logps/chosen": -0.00038405804662033916, + "logps/rejected": -2.8474626541137695, + "loss": 0.2882, + "nll_loss": 0.07203909754753113, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.840580757241696e-05, + "rewards/margins": 0.2847078740596771, + "rewards/rejected": -0.28474628925323486, + "step": 14372 + }, + { + "epoch": 9.939834024896266, + "grad_norm": 4.4887309074401855, + "learning_rate": 3.342554172429691e-07, + "log_odds_chosen": 10.536214828491211, + "log_odds_ratio": -5.4568004998145625e-05, + "logits/chosen": -0.05506886541843414, + "logits/rejected": -0.09337669610977173, + "logps/chosen": -0.00011480246030259877, + "logps/rejected": -1.565328598022461, + "loss": 0.5551, + "nll_loss": 0.13875915110111237, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1480246030259877e-05, + "rewards/margins": 0.15652137994766235, + "rewards/rejected": -0.1565328687429428, + "step": 14373 + }, + { + "epoch": 9.940525587828493, + "grad_norm": 3.0688681602478027, + "learning_rate": 3.3041340095282006e-07, + "log_odds_chosen": 12.513391494750977, + "log_odds_ratio": -1.2958620573044755e-05, + "logits/chosen": -0.49493831396102905, + "logits/rejected": -0.532113790512085, + "logps/chosen": -0.00010285632743034512, + "logps/rejected": -3.2977731227874756, + "loss": 0.3057, + "nll_loss": 0.07642143964767456, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0285633834428154e-05, + "rewards/margins": 0.32976701855659485, + "rewards/rejected": -0.3297773003578186, + "step": 14374 + }, + { + "epoch": 9.94121715076072, + "grad_norm": 3.959526300430298, + "learning_rate": 3.2657138466267097e-07, + "log_odds_chosen": 11.419652938842773, + "log_odds_ratio": -0.0004229422484058887, + "logits/chosen": 0.07121730595827103, + "logits/rejected": -0.13295045495033264, + "logps/chosen": -0.0003118419263046235, + "logps/rejected": -2.7346346378326416, + "loss": 0.3539, + "nll_loss": 0.0884229764342308, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.118419408565387e-05, + "rewards/margins": 0.27343228459358215, + "rewards/rejected": -0.2734634578227997, + "step": 14375 + }, + { + "epoch": 9.941908713692946, + "grad_norm": 3.3876402378082275, + "learning_rate": 3.227293683725219e-07, + "log_odds_chosen": 11.020099639892578, + "log_odds_ratio": -8.45960748847574e-05, + "logits/chosen": -0.23261260986328125, + "logits/rejected": -0.3242484927177429, + "logps/chosen": -0.00022357783745974302, + "logps/rejected": -2.103334903717041, + "loss": 0.3831, + "nll_loss": 0.09575633704662323, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2357784473570064e-05, + "rewards/margins": 0.2103111296892166, + "rewards/rejected": -0.21033348143100739, + "step": 14376 + }, + { + "epoch": 9.942600276625173, + "grad_norm": 4.13154411315918, + "learning_rate": 3.1888735208237284e-07, + "log_odds_chosen": 10.373748779296875, + "log_odds_ratio": -8.665035420563072e-05, + "logits/chosen": -0.2737635374069214, + "logits/rejected": -0.3789776563644409, + "logps/chosen": -0.00020896026398986578, + "logps/rejected": -1.7434680461883545, + "loss": 0.5489, + "nll_loss": 0.1372080147266388, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0896026398986578e-05, + "rewards/margins": 0.17432589828968048, + "rewards/rejected": -0.17434681951999664, + "step": 14377 + }, + { + "epoch": 9.9432918395574, + "grad_norm": 3.0957772731781006, + "learning_rate": 3.1504533579222375e-07, + "log_odds_chosen": 10.974006652832031, + "log_odds_ratio": -4.768053986481391e-05, + "logits/chosen": -0.23501087725162506, + "logits/rejected": -0.29930877685546875, + "logps/chosen": -0.0003055653069168329, + "logps/rejected": -2.1016600131988525, + "loss": 0.2729, + "nll_loss": 0.06820801645517349, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.055652996408753e-05, + "rewards/margins": 0.21013543009757996, + "rewards/rejected": -0.21016600728034973, + "step": 14378 + }, + { + "epoch": 9.943983402489627, + "grad_norm": 3.494986057281494, + "learning_rate": 3.112033195020747e-07, + "log_odds_chosen": 11.643218040466309, + "log_odds_ratio": -3.819255289272405e-05, + "logits/chosen": 0.0929754376411438, + "logits/rejected": -0.049822524189949036, + "logps/chosen": -0.00013487892283592373, + "logps/rejected": -2.6793737411499023, + "loss": 0.3038, + "nll_loss": 0.07595498859882355, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3487893738783896e-05, + "rewards/margins": 0.26792389154434204, + "rewards/rejected": -0.26793739199638367, + "step": 14379 + }, + { + "epoch": 9.944674965421854, + "grad_norm": 3.9710512161254883, + "learning_rate": 3.073613032119256e-07, + "log_odds_chosen": 11.226348876953125, + "log_odds_ratio": -5.1108327170368284e-05, + "logits/chosen": -0.392134428024292, + "logits/rejected": -0.4214463531970978, + "logps/chosen": -0.00013843349006492645, + "logps/rejected": -2.1756904125213623, + "loss": 0.3873, + "nll_loss": 0.09682579338550568, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3843347915099002e-05, + "rewards/margins": 0.2175552099943161, + "rewards/rejected": -0.21756905317306519, + "step": 14380 + }, + { + "epoch": 9.94536652835408, + "grad_norm": 4.002619743347168, + "learning_rate": 3.035192869217766e-07, + "log_odds_chosen": 12.835500717163086, + "log_odds_ratio": -1.0827752703335136e-05, + "logits/chosen": -0.45156607031822205, + "logits/rejected": -0.5535336136817932, + "logps/chosen": -7.671228377148509e-05, + "logps/rejected": -3.319952964782715, + "loss": 0.4184, + "nll_loss": 0.10460923612117767, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.67122855904745e-06, + "rewards/margins": 0.3319876194000244, + "rewards/rejected": -0.33199530839920044, + "step": 14381 + }, + { + "epoch": 9.946058091286307, + "grad_norm": 3.6625680923461914, + "learning_rate": 2.9967727063162754e-07, + "log_odds_chosen": 11.213696479797363, + "log_odds_ratio": -4.3532319978112355e-05, + "logits/chosen": 0.1309901475906372, + "logits/rejected": -0.013646259903907776, + "logps/chosen": -0.00030758522916585207, + "logps/rejected": -2.5861880779266357, + "loss": 0.6305, + "nll_loss": 0.15761451423168182, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.075852509937249e-05, + "rewards/margins": 0.258588045835495, + "rewards/rejected": -0.2586188018321991, + "step": 14382 + }, + { + "epoch": 9.946749654218534, + "grad_norm": 3.9107625484466553, + "learning_rate": 2.9583525434147844e-07, + "log_odds_chosen": 11.015143394470215, + "log_odds_ratio": -0.00011202752648387104, + "logits/chosen": -0.4123598635196686, + "logits/rejected": -0.41012248396873474, + "logps/chosen": -0.00012248425628058612, + "logps/rejected": -1.9108706712722778, + "loss": 0.3176, + "nll_loss": 0.0793963372707367, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2248425264260732e-05, + "rewards/margins": 0.19107483327388763, + "rewards/rejected": -0.19108708202838898, + "step": 14383 + }, + { + "epoch": 9.947441217150761, + "grad_norm": 4.414669990539551, + "learning_rate": 2.9199323805132935e-07, + "log_odds_chosen": 11.332856178283691, + "log_odds_ratio": -1.5987745427992195e-05, + "logits/chosen": -0.03796715661883354, + "logits/rejected": -0.1524902582168579, + "logps/chosen": -0.0013471555430442095, + "logps/rejected": -2.820084571838379, + "loss": 0.729, + "nll_loss": 0.1822463423013687, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00013471556303557009, + "rewards/margins": 0.28187376260757446, + "rewards/rejected": -0.28200846910476685, + "step": 14384 + }, + { + "epoch": 9.948132780082988, + "grad_norm": 3.4981753826141357, + "learning_rate": 2.881512217611803e-07, + "log_odds_chosen": 11.192922592163086, + "log_odds_ratio": -5.592240268015303e-05, + "logits/chosen": -0.16741448640823364, + "logits/rejected": -0.2337617129087448, + "logps/chosen": -0.00023815588792786002, + "logps/rejected": -2.6667916774749756, + "loss": 0.3633, + "nll_loss": 0.09081831574440002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.381558806519024e-05, + "rewards/margins": 0.2666553258895874, + "rewards/rejected": -0.26667916774749756, + "step": 14385 + }, + { + "epoch": 9.948824343015215, + "grad_norm": 4.5940260887146, + "learning_rate": 2.843092054710312e-07, + "log_odds_chosen": 10.830406188964844, + "log_odds_ratio": -0.000504222116433084, + "logits/chosen": -0.00815871637314558, + "logits/rejected": -0.0809616893529892, + "logps/chosen": -0.00018051249207928777, + "logps/rejected": -2.215250015258789, + "loss": 0.4995, + "nll_loss": 0.12483423948287964, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.80512506631203e-05, + "rewards/margins": 0.22150695323944092, + "rewards/rejected": -0.22152499854564667, + "step": 14386 + }, + { + "epoch": 9.949515905947441, + "grad_norm": 3.0476293563842773, + "learning_rate": 2.8046718918088213e-07, + "log_odds_chosen": 11.848611831665039, + "log_odds_ratio": -3.5934321203967556e-05, + "logits/chosen": 0.06482765823602676, + "logits/rejected": 0.010766156017780304, + "logps/chosen": -0.00028337494586594403, + "logps/rejected": -3.363632917404175, + "loss": 0.2712, + "nll_loss": 0.06780073046684265, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.833749749697745e-05, + "rewards/margins": 0.3363349735736847, + "rewards/rejected": -0.3363633155822754, + "step": 14387 + }, + { + "epoch": 9.950207468879668, + "grad_norm": 5.000711917877197, + "learning_rate": 2.766251728907331e-07, + "log_odds_chosen": 11.645959854125977, + "log_odds_ratio": -1.2625767340068705e-05, + "logits/chosen": -0.46102869510650635, + "logits/rejected": -0.5022953748703003, + "logps/chosen": -9.16571807465516e-05, + "logps/rejected": -2.1655919551849365, + "loss": 0.2875, + "nll_loss": 0.07186735421419144, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.165718438453041e-06, + "rewards/margins": 0.21655002236366272, + "rewards/rejected": -0.21655918657779694, + "step": 14388 + }, + { + "epoch": 9.950899031811895, + "grad_norm": 3.3046367168426514, + "learning_rate": 2.72783156600584e-07, + "log_odds_chosen": 11.407812118530273, + "log_odds_ratio": -2.3163374862633646e-05, + "logits/chosen": 0.016744054853916168, + "logits/rejected": -0.06968782097101212, + "logps/chosen": -0.00015086446364875883, + "logps/rejected": -2.4506261348724365, + "loss": 0.489, + "nll_loss": 0.12225218862295151, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5086447092471644e-05, + "rewards/margins": 0.24504750967025757, + "rewards/rejected": -0.24506260454654694, + "step": 14389 + }, + { + "epoch": 9.951590594744122, + "grad_norm": 4.441773891448975, + "learning_rate": 2.689411403104349e-07, + "log_odds_chosen": 11.36505126953125, + "log_odds_ratio": -3.684817784233019e-05, + "logits/chosen": -0.2129819393157959, + "logits/rejected": -0.16193649172782898, + "logps/chosen": -0.0001590220199432224, + "logps/rejected": -2.540721893310547, + "loss": 0.4842, + "nll_loss": 0.12104177474975586, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.590220199432224e-05, + "rewards/margins": 0.25405630469322205, + "rewards/rejected": -0.2540721893310547, + "step": 14390 + }, + { + "epoch": 9.952282157676349, + "grad_norm": 3.1770615577697754, + "learning_rate": 2.6509912402028587e-07, + "log_odds_chosen": 12.067096710205078, + "log_odds_ratio": -1.1090942280134186e-05, + "logits/chosen": -0.5020029544830322, + "logits/rejected": -0.47750067710876465, + "logps/chosen": -6.147479871287942e-05, + "logps/rejected": -2.369724750518799, + "loss": 0.3183, + "nll_loss": 0.07957428693771362, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.147480235085823e-06, + "rewards/margins": 0.2369663417339325, + "rewards/rejected": -0.23697249591350555, + "step": 14391 + }, + { + "epoch": 9.952973720608576, + "grad_norm": 3.6418378353118896, + "learning_rate": 2.612571077301368e-07, + "log_odds_chosen": 11.277714729309082, + "log_odds_ratio": -2.0814361050724983e-05, + "logits/chosen": -0.0488487109541893, + "logits/rejected": -0.10118186473846436, + "logps/chosen": -0.00013960029173176736, + "logps/rejected": -2.152407169342041, + "loss": 0.4439, + "nll_loss": 0.11096364259719849, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3960028809378855e-05, + "rewards/margins": 0.21522676944732666, + "rewards/rejected": -0.2152407169342041, + "step": 14392 + }, + { + "epoch": 9.953665283540802, + "grad_norm": 2.9789321422576904, + "learning_rate": 2.574150914399877e-07, + "log_odds_chosen": 11.723701477050781, + "log_odds_ratio": -6.824049341958016e-05, + "logits/chosen": -0.2967926561832428, + "logits/rejected": -0.3067656457424164, + "logps/chosen": -0.00016774365212768316, + "logps/rejected": -2.2390599250793457, + "loss": 0.4298, + "nll_loss": 0.10745257139205933, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6774365576566197e-05, + "rewards/margins": 0.22388924658298492, + "rewards/rejected": -0.223906010389328, + "step": 14393 + }, + { + "epoch": 9.95435684647303, + "grad_norm": 3.5029759407043457, + "learning_rate": 2.5357307514983864e-07, + "log_odds_chosen": 10.046087265014648, + "log_odds_ratio": -0.00013034732546657324, + "logits/chosen": -0.19458447396755219, + "logits/rejected": -0.22558461129665375, + "logps/chosen": -0.00021201715571805835, + "logps/rejected": -1.4606382846832275, + "loss": 0.3774, + "nll_loss": 0.09434570372104645, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1201714844210073e-05, + "rewards/margins": 0.1460426300764084, + "rewards/rejected": -0.14606383442878723, + "step": 14394 + }, + { + "epoch": 9.955048409405256, + "grad_norm": 4.0619635581970215, + "learning_rate": 2.4973105885968955e-07, + "log_odds_chosen": 12.413434982299805, + "log_odds_ratio": -8.567772601963952e-05, + "logits/chosen": 0.010890178382396698, + "logits/rejected": -0.04578394815325737, + "logps/chosen": -0.0005771232536062598, + "logps/rejected": -3.222485065460205, + "loss": 0.4735, + "nll_loss": 0.11836060881614685, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.771232827100903e-05, + "rewards/margins": 0.3221907615661621, + "rewards/rejected": -0.32224851846694946, + "step": 14395 + }, + { + "epoch": 9.955739972337483, + "grad_norm": 2.319875955581665, + "learning_rate": 2.4588904256954046e-07, + "log_odds_chosen": 10.2796049118042, + "log_odds_ratio": -0.0003484278277028352, + "logits/chosen": -0.867690920829773, + "logits/rejected": -0.8488110303878784, + "logps/chosen": -0.0004555814084596932, + "logps/rejected": -1.552926778793335, + "loss": 0.3101, + "nll_loss": 0.07749606668949127, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5558139390777797e-05, + "rewards/margins": 0.15524712204933167, + "rewards/rejected": -0.15529268980026245, + "step": 14396 + }, + { + "epoch": 9.95643153526971, + "grad_norm": 3.616581916809082, + "learning_rate": 2.420470262793914e-07, + "log_odds_chosen": 11.643377304077148, + "log_odds_ratio": -3.979198299930431e-05, + "logits/chosen": -0.4350857734680176, + "logits/rejected": -0.5141991972923279, + "logps/chosen": -0.0005210883100517094, + "logps/rejected": -3.020252227783203, + "loss": 0.3239, + "nll_loss": 0.08097027242183685, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.210883682593703e-05, + "rewards/margins": 0.3019731044769287, + "rewards/rejected": -0.3020251989364624, + "step": 14397 + }, + { + "epoch": 9.957123098201937, + "grad_norm": 3.0220160484313965, + "learning_rate": 2.3820500998924236e-07, + "log_odds_chosen": 10.704514503479004, + "log_odds_ratio": -0.000209915975574404, + "logits/chosen": -0.10353732109069824, + "logits/rejected": -0.1952836513519287, + "logps/chosen": -0.000387487409170717, + "logps/rejected": -2.056887149810791, + "loss": 0.3214, + "nll_loss": 0.08033111691474915, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8748738006688654e-05, + "rewards/margins": 0.2056499719619751, + "rewards/rejected": -0.2056887298822403, + "step": 14398 + }, + { + "epoch": 9.957814661134163, + "grad_norm": 3.386103630065918, + "learning_rate": 2.3436299369909332e-07, + "log_odds_chosen": 10.593547821044922, + "log_odds_ratio": -0.00010035329614765942, + "logits/chosen": -0.3432830572128296, + "logits/rejected": -0.4551471769809723, + "logps/chosen": -0.00039374135667458177, + "logps/rejected": -1.9529142379760742, + "loss": 0.375, + "nll_loss": 0.0937325730919838, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9374139305436984e-05, + "rewards/margins": 0.19525204598903656, + "rewards/rejected": -0.1952914148569107, + "step": 14399 + }, + { + "epoch": 9.95850622406639, + "grad_norm": 3.4891092777252197, + "learning_rate": 2.3052097740894422e-07, + "log_odds_chosen": 11.950043678283691, + "log_odds_ratio": -7.900898708612658e-06, + "logits/chosen": -0.31247663497924805, + "logits/rejected": -0.3481077551841736, + "logps/chosen": -0.00016259380208794028, + "logps/rejected": -2.870516300201416, + "loss": 0.3762, + "nll_loss": 0.09405548125505447, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6259382391581312e-05, + "rewards/margins": 0.28703537583351135, + "rewards/rejected": -0.28705161809921265, + "step": 14400 + }, + { + "epoch": 9.959197786998617, + "grad_norm": 4.95497989654541, + "learning_rate": 2.2667896111879513e-07, + "log_odds_chosen": 11.518646240234375, + "log_odds_ratio": -2.0016508642584085e-05, + "logits/chosen": -0.09684181213378906, + "logits/rejected": -0.07883979380130768, + "logps/chosen": -0.00022995812469162047, + "logps/rejected": -2.7280092239379883, + "loss": 0.4319, + "nll_loss": 0.1079646497964859, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.299581319675781e-05, + "rewards/margins": 0.2727779150009155, + "rewards/rejected": -0.27280092239379883, + "step": 14401 + }, + { + "epoch": 9.959889349930844, + "grad_norm": 2.1735763549804688, + "learning_rate": 2.228369448286461e-07, + "log_odds_chosen": 11.362403869628906, + "log_odds_ratio": -0.00016163713007699698, + "logits/chosen": -0.5279222726821899, + "logits/rejected": -0.39347508549690247, + "logps/chosen": -0.0002045204018941149, + "logps/rejected": -2.1512932777404785, + "loss": 0.2093, + "nll_loss": 0.05231678858399391, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0452042008400895e-05, + "rewards/margins": 0.21510887145996094, + "rewards/rejected": -0.21512934565544128, + "step": 14402 + }, + { + "epoch": 9.96058091286307, + "grad_norm": 3.0434203147888184, + "learning_rate": 2.1899492853849703e-07, + "log_odds_chosen": 11.224910736083984, + "log_odds_ratio": -3.419286076677963e-05, + "logits/chosen": -0.719974160194397, + "logits/rejected": -0.7234589457511902, + "logps/chosen": -0.00011317985627101734, + "logps/rejected": -1.707182765007019, + "loss": 0.3209, + "nll_loss": 0.08021647483110428, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1317985809000675e-05, + "rewards/margins": 0.17070695757865906, + "rewards/rejected": -0.17071828246116638, + "step": 14403 + }, + { + "epoch": 9.961272475795298, + "grad_norm": 3.3985514640808105, + "learning_rate": 2.1515291224834794e-07, + "log_odds_chosen": 11.151140213012695, + "log_odds_ratio": -2.503224641259294e-05, + "logits/chosen": -0.7156893610954285, + "logits/rejected": -0.7925446629524231, + "logps/chosen": -0.00011096397793153301, + "logps/rejected": -1.8769021034240723, + "loss": 0.3496, + "nll_loss": 0.08739632368087769, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1096398338850122e-05, + "rewards/margins": 0.18767911195755005, + "rewards/rejected": -0.18769021332263947, + "step": 14404 + }, + { + "epoch": 9.961964038727524, + "grad_norm": 2.7494256496429443, + "learning_rate": 2.113108959581989e-07, + "log_odds_chosen": 11.359611511230469, + "log_odds_ratio": -2.5823699616012163e-05, + "logits/chosen": -0.7222087979316711, + "logits/rejected": -0.7345014810562134, + "logps/chosen": -0.00012434877862688154, + "logps/rejected": -2.3068008422851562, + "loss": 0.4386, + "nll_loss": 0.10965168476104736, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2434877135092393e-05, + "rewards/margins": 0.2306676208972931, + "rewards/rejected": -0.23068007826805115, + "step": 14405 + }, + { + "epoch": 9.962655601659751, + "grad_norm": 3.3446850776672363, + "learning_rate": 2.074688796680498e-07, + "log_odds_chosen": 11.147770881652832, + "log_odds_ratio": -2.3483353288611397e-05, + "logits/chosen": -0.2715800106525421, + "logits/rejected": -0.38775211572647095, + "logps/chosen": -0.00018793967319652438, + "logps/rejected": -2.3913774490356445, + "loss": 0.3135, + "nll_loss": 0.07836674153804779, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8793965864460915e-05, + "rewards/margins": 0.23911894857883453, + "rewards/rejected": -0.23913775384426117, + "step": 14406 + }, + { + "epoch": 9.963347164591978, + "grad_norm": 3.412565231323242, + "learning_rate": 2.036268633779007e-07, + "log_odds_chosen": 11.36739730834961, + "log_odds_ratio": -0.00020149120246060193, + "logits/chosen": -0.3841862082481384, + "logits/rejected": -0.37498146295547485, + "logps/chosen": -0.0002477857342455536, + "logps/rejected": -2.7530517578125, + "loss": 0.3472, + "nll_loss": 0.08676967024803162, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4778573788353242e-05, + "rewards/margins": 0.2752804458141327, + "rewards/rejected": -0.27530521154403687, + "step": 14407 + }, + { + "epoch": 9.964038727524205, + "grad_norm": 3.8611674308776855, + "learning_rate": 1.9978484708775167e-07, + "log_odds_chosen": 11.597522735595703, + "log_odds_ratio": -2.0960025722160935e-05, + "logits/chosen": -0.5457533001899719, + "logits/rejected": -0.5970411896705627, + "logps/chosen": -0.00017053255578503013, + "logps/rejected": -2.5950927734375, + "loss": 0.4092, + "nll_loss": 0.10229329019784927, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7053256669896655e-05, + "rewards/margins": 0.25949224829673767, + "rewards/rejected": -0.25950929522514343, + "step": 14408 + }, + { + "epoch": 9.964730290456432, + "grad_norm": 5.490063667297363, + "learning_rate": 1.9594283079760258e-07, + "log_odds_chosen": 11.967035293579102, + "log_odds_ratio": -8.80186416907236e-06, + "logits/chosen": -0.34099629521369934, + "logits/rejected": -0.3364622890949249, + "logps/chosen": -6.214046879904345e-05, + "logps/rejected": -2.297642230987549, + "loss": 0.731, + "nll_loss": 0.18274804949760437, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.214047061803285e-06, + "rewards/margins": 0.22975800931453705, + "rewards/rejected": -0.22976422309875488, + "step": 14409 + }, + { + "epoch": 9.965421853388658, + "grad_norm": 3.1003382205963135, + "learning_rate": 1.9210081450745352e-07, + "log_odds_chosen": 11.369587898254395, + "log_odds_ratio": -1.9927889297832735e-05, + "logits/chosen": -0.5328479409217834, + "logits/rejected": -0.5892972946166992, + "logps/chosen": -0.00016084310482256114, + "logps/rejected": -2.1601061820983887, + "loss": 0.3009, + "nll_loss": 0.07521713525056839, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6084311937447637e-05, + "rewards/margins": 0.21599456667900085, + "rewards/rejected": -0.21601064503192902, + "step": 14410 + }, + { + "epoch": 9.966113416320885, + "grad_norm": 3.565310001373291, + "learning_rate": 1.8825879821730445e-07, + "log_odds_chosen": 10.874410629272461, + "log_odds_ratio": -0.0001317542337346822, + "logits/chosen": -0.225718691945076, + "logits/rejected": -0.2878406047821045, + "logps/chosen": -0.00030120619339868426, + "logps/rejected": -2.397109270095825, + "loss": 0.4632, + "nll_loss": 0.11577460914850235, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.012062188645359e-05, + "rewards/margins": 0.23968079686164856, + "rewards/rejected": -0.23971092700958252, + "step": 14411 + }, + { + "epoch": 9.966804979253112, + "grad_norm": 2.7302663326263428, + "learning_rate": 1.8441678192715538e-07, + "log_odds_chosen": 10.82535457611084, + "log_odds_ratio": -7.440607441822067e-05, + "logits/chosen": -0.397928923368454, + "logits/rejected": -0.3490590453147888, + "logps/chosen": -0.00016319297719746828, + "logps/rejected": -2.080110549926758, + "loss": 0.2296, + "nll_loss": 0.05740056559443474, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.631929808354471e-05, + "rewards/margins": 0.2079947590827942, + "rewards/rejected": -0.20801107585430145, + "step": 14412 + }, + { + "epoch": 9.967496542185339, + "grad_norm": 3.359323501586914, + "learning_rate": 1.8057476563700632e-07, + "log_odds_chosen": 11.5607271194458, + "log_odds_ratio": -2.955112540803384e-05, + "logits/chosen": -0.5813517570495605, + "logits/rejected": -0.6620681285858154, + "logps/chosen": -0.0001213313516927883, + "logps/rejected": -2.29302978515625, + "loss": 0.4426, + "nll_loss": 0.11064665019512177, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.213313498737989e-05, + "rewards/margins": 0.22929087281227112, + "rewards/rejected": -0.22930298745632172, + "step": 14413 + }, + { + "epoch": 9.968188105117566, + "grad_norm": 5.813520908355713, + "learning_rate": 1.7673274934685725e-07, + "log_odds_chosen": 9.556093215942383, + "log_odds_ratio": -0.1622939556837082, + "logits/chosen": 0.14559586346149445, + "logits/rejected": 0.039752352982759476, + "logps/chosen": -0.033193714916706085, + "logps/rejected": -2.0233352184295654, + "loss": 0.4383, + "nll_loss": 0.09335532784461975, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.003319371724501252, + "rewards/margins": 0.19901415705680847, + "rewards/rejected": -0.20233353972434998, + "step": 14414 + }, + { + "epoch": 9.968879668049793, + "grad_norm": 2.3858280181884766, + "learning_rate": 1.7289073305670816e-07, + "log_odds_chosen": 9.702201843261719, + "log_odds_ratio": -0.0002256479929201305, + "logits/chosen": -0.3610384166240692, + "logits/rejected": -0.35918182134628296, + "logps/chosen": -0.0002743570366874337, + "logps/rejected": -1.2810546159744263, + "loss": 0.2601, + "nll_loss": 0.0649990364909172, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7435704396339133e-05, + "rewards/margins": 0.12807804346084595, + "rewards/rejected": -0.12810547649860382, + "step": 14415 + }, + { + "epoch": 9.96957123098202, + "grad_norm": 3.2431888580322266, + "learning_rate": 1.690487167665591e-07, + "log_odds_chosen": 10.424958229064941, + "log_odds_ratio": -8.65029142005369e-05, + "logits/chosen": -0.08212536573410034, + "logits/rejected": 0.022407136857509613, + "logps/chosen": -0.00048625317867845297, + "logps/rejected": -2.2663631439208984, + "loss": 0.4031, + "nll_loss": 0.10076534748077393, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.862531568505801e-05, + "rewards/margins": 0.22658771276474, + "rewards/rejected": -0.22663632035255432, + "step": 14416 + }, + { + "epoch": 9.970262793914246, + "grad_norm": 2.993499994277954, + "learning_rate": 1.6520670047641003e-07, + "log_odds_chosen": 12.383956909179688, + "log_odds_ratio": -1.850670378189534e-05, + "logits/chosen": -0.055138956755399704, + "logits/rejected": -0.17104335129261017, + "logps/chosen": -0.00026264588814228773, + "logps/rejected": -3.508105516433716, + "loss": 0.3427, + "nll_loss": 0.08568139374256134, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6264588086633012e-05, + "rewards/margins": 0.3507842719554901, + "rewards/rejected": -0.35081055760383606, + "step": 14417 + }, + { + "epoch": 9.970954356846473, + "grad_norm": 2.7699737548828125, + "learning_rate": 1.6136468418626094e-07, + "log_odds_chosen": 10.448410034179688, + "log_odds_ratio": -0.0002700125623960048, + "logits/chosen": -0.24522686004638672, + "logits/rejected": -0.2643146812915802, + "logps/chosen": -0.0003112297272309661, + "logps/rejected": -1.7951382398605347, + "loss": 0.2979, + "nll_loss": 0.0744408369064331, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1122970540309325e-05, + "rewards/margins": 0.17948269844055176, + "rewards/rejected": -0.1795138269662857, + "step": 14418 + }, + { + "epoch": 9.9716459197787, + "grad_norm": 3.2262375354766846, + "learning_rate": 1.5752266789611187e-07, + "log_odds_chosen": 11.268388748168945, + "log_odds_ratio": -6.620370550081134e-05, + "logits/chosen": -0.6279283761978149, + "logits/rejected": -0.6332384943962097, + "logps/chosen": -0.000160843541380018, + "logps/rejected": -2.012922763824463, + "loss": 0.329, + "nll_loss": 0.08223171532154083, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6084355593193322e-05, + "rewards/margins": 0.20127618312835693, + "rewards/rejected": -0.2012922763824463, + "step": 14419 + }, + { + "epoch": 9.972337482710927, + "grad_norm": 3.0317883491516113, + "learning_rate": 1.536806516059628e-07, + "log_odds_chosen": 11.24959659576416, + "log_odds_ratio": -2.1257339540170506e-05, + "logits/chosen": -0.3580717444419861, + "logits/rejected": -0.34256482124328613, + "logps/chosen": -0.00010173609189223498, + "logps/rejected": -1.8382275104522705, + "loss": 0.2935, + "nll_loss": 0.07337658852338791, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0173609553021379e-05, + "rewards/margins": 0.18381257355213165, + "rewards/rejected": -0.18382275104522705, + "step": 14420 + }, + { + "epoch": 9.973029045643154, + "grad_norm": 3.33316969871521, + "learning_rate": 1.4983863531581377e-07, + "log_odds_chosen": 11.102269172668457, + "log_odds_ratio": -6.263488467084244e-05, + "logits/chosen": -0.5760700106620789, + "logits/rejected": -0.757520318031311, + "logps/chosen": -0.00015352622722275555, + "logps/rejected": -2.201449394226074, + "loss": 0.3569, + "nll_loss": 0.08921611309051514, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5352623449871317e-05, + "rewards/margins": 0.22012959420681, + "rewards/rejected": -0.22014492750167847, + "step": 14421 + }, + { + "epoch": 9.97372060857538, + "grad_norm": 3.053219795227051, + "learning_rate": 1.4599661902566468e-07, + "log_odds_chosen": 12.511886596679688, + "log_odds_ratio": -1.9515664462232962e-05, + "logits/chosen": -0.4584193825721741, + "logits/rejected": -0.47777336835861206, + "logps/chosen": -0.00012212220462970436, + "logps/rejected": -3.465627670288086, + "loss": 0.3561, + "nll_loss": 0.08903485536575317, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2212220099172555e-05, + "rewards/margins": 0.3465505838394165, + "rewards/rejected": -0.34656277298927307, + "step": 14422 + }, + { + "epoch": 9.974412171507607, + "grad_norm": 3.221226215362549, + "learning_rate": 1.421546027355156e-07, + "log_odds_chosen": 10.802603721618652, + "log_odds_ratio": -6.027729614288546e-05, + "logits/chosen": 0.020891718566417694, + "logits/rejected": 0.010172158479690552, + "logps/chosen": -0.00024104956537485123, + "logps/rejected": -1.9035903215408325, + "loss": 0.3388, + "nll_loss": 0.08469771593809128, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.410495471849572e-05, + "rewards/margins": 0.19033494591712952, + "rewards/rejected": -0.19035905599594116, + "step": 14423 + }, + { + "epoch": 9.975103734439834, + "grad_norm": 3.4250340461730957, + "learning_rate": 1.3831258644536654e-07, + "log_odds_chosen": 10.85274887084961, + "log_odds_ratio": -7.843859930289909e-05, + "logits/chosen": -0.16926470398902893, + "logits/rejected": -0.16885489225387573, + "logps/chosen": -0.00040010723751038313, + "logps/rejected": -2.4334042072296143, + "loss": 0.3691, + "nll_loss": 0.09226728975772858, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.001072375103831e-05, + "rewards/margins": 0.2433004081249237, + "rewards/rejected": -0.2433404177427292, + "step": 14424 + }, + { + "epoch": 9.975795297372061, + "grad_norm": 3.7283456325531006, + "learning_rate": 1.3447057015521745e-07, + "log_odds_chosen": 10.44284439086914, + "log_odds_ratio": -0.00015514253755100071, + "logits/chosen": 0.31527790427207947, + "logits/rejected": 0.3427479863166809, + "logps/chosen": -0.00020504721032921225, + "logps/rejected": -1.915432095527649, + "loss": 0.3656, + "nll_loss": 0.09138018637895584, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0504719941527583e-05, + "rewards/margins": 0.1915227174758911, + "rewards/rejected": -0.19154320657253265, + "step": 14425 + }, + { + "epoch": 9.976486860304288, + "grad_norm": 4.250152111053467, + "learning_rate": 1.306285538650684e-07, + "log_odds_chosen": 11.869510650634766, + "log_odds_ratio": -9.183246220345609e-06, + "logits/chosen": -0.340200275182724, + "logits/rejected": -0.3542943000793457, + "logps/chosen": -0.00025191018357872963, + "logps/rejected": -2.6849536895751953, + "loss": 0.51, + "nll_loss": 0.12750062346458435, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.519101690268144e-05, + "rewards/margins": 0.2684701979160309, + "rewards/rejected": -0.2684953808784485, + "step": 14426 + }, + { + "epoch": 9.977178423236515, + "grad_norm": 3.831174850463867, + "learning_rate": 1.2678653757491932e-07, + "log_odds_chosen": 11.098943710327148, + "log_odds_ratio": -3.759023820748553e-05, + "logits/chosen": -0.3505597412586212, + "logits/rejected": -0.391792356967926, + "logps/chosen": -0.00024021898570936173, + "logps/rejected": -2.4518837928771973, + "loss": 0.2844, + "nll_loss": 0.07108572125434875, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.402190148131922e-05, + "rewards/margins": 0.24516433477401733, + "rewards/rejected": -0.2451883852481842, + "step": 14427 + }, + { + "epoch": 9.977869986168741, + "grad_norm": 5.215520858764648, + "learning_rate": 1.2294452128477023e-07, + "log_odds_chosen": 11.317865371704102, + "log_odds_ratio": -2.5791119696805254e-05, + "logits/chosen": -0.09801247715950012, + "logits/rejected": -0.0976279005408287, + "logps/chosen": -0.00020278405281715095, + "logps/rejected": -2.345594882965088, + "loss": 0.5098, + "nll_loss": 0.12745140492916107, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0278404917917214e-05, + "rewards/margins": 0.2345392107963562, + "rewards/rejected": -0.23455950617790222, + "step": 14428 + }, + { + "epoch": 9.978561549100968, + "grad_norm": 3.929835081100464, + "learning_rate": 1.1910250499462118e-07, + "log_odds_chosen": 12.225106239318848, + "log_odds_ratio": -0.00011829940194729716, + "logits/chosen": -0.17743200063705444, + "logits/rejected": -0.2416902482509613, + "logps/chosen": -0.0002096697426168248, + "logps/rejected": -3.256639242172241, + "loss": 0.3582, + "nll_loss": 0.08954370021820068, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0966976080671884e-05, + "rewards/margins": 0.32564300298690796, + "rewards/rejected": -0.3256639540195465, + "step": 14429 + }, + { + "epoch": 9.979253112033195, + "grad_norm": 2.8752622604370117, + "learning_rate": 1.1526048870447211e-07, + "log_odds_chosen": 11.139347076416016, + "log_odds_ratio": -9.415207023266703e-05, + "logits/chosen": -0.59767746925354, + "logits/rejected": -0.6264057159423828, + "logps/chosen": -0.00012206398241687566, + "logps/rejected": -1.9091721773147583, + "loss": 0.3657, + "nll_loss": 0.09141746163368225, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2206397514091805e-05, + "rewards/margins": 0.19090501964092255, + "rewards/rejected": -0.1909172236919403, + "step": 14430 + }, + { + "epoch": 9.979944674965422, + "grad_norm": 2.5739731788635254, + "learning_rate": 1.1141847241432305e-07, + "log_odds_chosen": 10.908953666687012, + "log_odds_ratio": -3.49894653481897e-05, + "logits/chosen": -0.10505393147468567, + "logits/rejected": -0.11533316969871521, + "logps/chosen": -0.0002562586741987616, + "logps/rejected": -1.855210304260254, + "loss": 0.2667, + "nll_loss": 0.06666852533817291, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.562586814747192e-05, + "rewards/margins": 0.18549540638923645, + "rewards/rejected": -0.18552103638648987, + "step": 14431 + }, + { + "epoch": 9.980636237897649, + "grad_norm": 3.690086603164673, + "learning_rate": 1.0757645612417397e-07, + "log_odds_chosen": 12.060749053955078, + "log_odds_ratio": -1.1755011655623093e-05, + "logits/chosen": -0.6370924711227417, + "logits/rejected": -0.655462384223938, + "logps/chosen": -0.00011227516370126978, + "logps/rejected": -2.708317995071411, + "loss": 0.3352, + "nll_loss": 0.08378816395998001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1227516552025918e-05, + "rewards/margins": 0.2708205580711365, + "rewards/rejected": -0.270831823348999, + "step": 14432 + }, + { + "epoch": 9.981327800829876, + "grad_norm": 3.6096031665802, + "learning_rate": 1.037344398340249e-07, + "log_odds_chosen": 10.562856674194336, + "log_odds_ratio": -0.00021144159836694598, + "logits/chosen": -0.02058953046798706, + "logits/rejected": -0.1223272830247879, + "logps/chosen": -0.0002965645689982921, + "logps/rejected": -2.3732357025146484, + "loss": 0.3817, + "nll_loss": 0.09541618824005127, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.965645762742497e-05, + "rewards/margins": 0.23729392886161804, + "rewards/rejected": -0.2373235821723938, + "step": 14433 + }, + { + "epoch": 9.982019363762102, + "grad_norm": 3.1935596466064453, + "learning_rate": 9.989242354387584e-08, + "log_odds_chosen": 10.641152381896973, + "log_odds_ratio": -0.0001305602490901947, + "logits/chosen": -0.2632865607738495, + "logits/rejected": -0.11204138398170471, + "logps/chosen": -0.00028467128868214786, + "logps/rejected": -2.1090238094329834, + "loss": 0.6777, + "nll_loss": 0.16941051185131073, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8467127776821144e-05, + "rewards/margins": 0.21087393164634705, + "rewards/rejected": -0.2109023928642273, + "step": 14434 + }, + { + "epoch": 9.98271092669433, + "grad_norm": 2.954907178878784, + "learning_rate": 9.605040725372676e-08, + "log_odds_chosen": 11.83912181854248, + "log_odds_ratio": -2.0430359654710628e-05, + "logits/chosen": -0.29502013325691223, + "logits/rejected": -0.32154542207717896, + "logps/chosen": -6.92913745297119e-05, + "logps/rejected": -2.161543846130371, + "loss": 0.3088, + "nll_loss": 0.0771905928850174, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.92913727107225e-06, + "rewards/margins": 0.21614745259284973, + "rewards/rejected": -0.21615436673164368, + "step": 14435 + }, + { + "epoch": 9.983402489626556, + "grad_norm": 3.4380013942718506, + "learning_rate": 9.220839096357769e-08, + "log_odds_chosen": 11.68551254272461, + "log_odds_ratio": -1.8098944565281272e-05, + "logits/chosen": -0.4443289339542389, + "logits/rejected": -0.5476394891738892, + "logps/chosen": -9.755351493367925e-05, + "logps/rejected": -2.384434223175049, + "loss": 0.4065, + "nll_loss": 0.10162050276994705, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.755351129570045e-06, + "rewards/margins": 0.23843365907669067, + "rewards/rejected": -0.23844340443611145, + "step": 14436 + }, + { + "epoch": 9.984094052558783, + "grad_norm": 4.005263328552246, + "learning_rate": 8.836637467342863e-08, + "log_odds_chosen": 11.748692512512207, + "log_odds_ratio": -1.6541533113922924e-05, + "logits/chosen": -0.19354397058486938, + "logits/rejected": -0.20091736316680908, + "logps/chosen": -0.00013413293345365673, + "logps/rejected": -2.7356362342834473, + "loss": 0.423, + "nll_loss": 0.10575605928897858, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3413294254860375e-05, + "rewards/margins": 0.27355021238327026, + "rewards/rejected": -0.2735636234283447, + "step": 14437 + }, + { + "epoch": 9.98478561549101, + "grad_norm": 3.510989189147949, + "learning_rate": 8.452435838327955e-08, + "log_odds_chosen": 11.33831787109375, + "log_odds_ratio": -3.435353210079484e-05, + "logits/chosen": -0.33414196968078613, + "logits/rejected": -0.3761923015117645, + "logps/chosen": -0.0003766388981603086, + "logps/rejected": -2.5318145751953125, + "loss": 0.3936, + "nll_loss": 0.09838636219501495, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7663892726413906e-05, + "rewards/margins": 0.2531437873840332, + "rewards/rejected": -0.25318145751953125, + "step": 14438 + }, + { + "epoch": 9.985477178423237, + "grad_norm": 2.6459414958953857, + "learning_rate": 8.068234209313047e-08, + "log_odds_chosen": 11.280380249023438, + "log_odds_ratio": -2.8480833861976862e-05, + "logits/chosen": -0.7788600921630859, + "logits/rejected": -0.7748812437057495, + "logps/chosen": -6.958026642678306e-05, + "logps/rejected": -1.900242567062378, + "loss": 0.305, + "nll_loss": 0.07623657584190369, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.958027370274067e-06, + "rewards/margins": 0.19001729786396027, + "rewards/rejected": -0.1900242567062378, + "step": 14439 + }, + { + "epoch": 9.986168741355463, + "grad_norm": 3.695897340774536, + "learning_rate": 7.68403258029814e-08, + "log_odds_chosen": 11.930622100830078, + "log_odds_ratio": -1.7742391719366424e-05, + "logits/chosen": -0.3199276030063629, + "logits/rejected": -0.32262054085731506, + "logps/chosen": -0.00020379522175062448, + "logps/rejected": -2.6338376998901367, + "loss": 0.4075, + "nll_loss": 0.101873017847538, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0379520719870925e-05, + "rewards/margins": 0.2633633613586426, + "rewards/rejected": -0.26338374614715576, + "step": 14440 + }, + { + "epoch": 9.98686030428769, + "grad_norm": 3.20121169090271, + "learning_rate": 7.299830951283234e-08, + "log_odds_chosen": 11.908764839172363, + "log_odds_ratio": -9.826524546951987e-06, + "logits/chosen": 0.030937325209379196, + "logits/rejected": 0.012108508497476578, + "logps/chosen": -0.0002181961026508361, + "logps/rejected": -2.7337958812713623, + "loss": 0.439, + "nll_loss": 0.10974864661693573, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1819610992679372e-05, + "rewards/margins": 0.2733577787876129, + "rewards/rejected": -0.2733795940876007, + "step": 14441 + }, + { + "epoch": 9.987551867219917, + "grad_norm": 2.5336647033691406, + "learning_rate": 6.915629322268327e-08, + "log_odds_chosen": 10.848555564880371, + "log_odds_ratio": -0.00010116137855220586, + "logits/chosen": -0.2958114445209503, + "logits/rejected": -0.3266344666481018, + "logps/chosen": -0.0001003117358777672, + "logps/rejected": -1.5897066593170166, + "loss": 0.2334, + "nll_loss": 0.05832758545875549, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.003117358777672e-05, + "rewards/margins": 0.15896062552928925, + "rewards/rejected": -0.1589706540107727, + "step": 14442 + }, + { + "epoch": 9.988243430152144, + "grad_norm": 3.3334195613861084, + "learning_rate": 6.53142769325342e-08, + "log_odds_chosen": 10.656942367553711, + "log_odds_ratio": -5.3099036449566483e-05, + "logits/chosen": -0.02133992314338684, + "logits/rejected": -0.08297806978225708, + "logps/chosen": -0.0001351845421595499, + "logps/rejected": -1.7620038986206055, + "loss": 0.4026, + "nll_loss": 0.10065347701311111, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.351845457975287e-05, + "rewards/margins": 0.17618687450885773, + "rewards/rejected": -0.17620038986206055, + "step": 14443 + }, + { + "epoch": 9.98893499308437, + "grad_norm": 3.3461174964904785, + "learning_rate": 6.147226064238511e-08, + "log_odds_chosen": 9.42183780670166, + "log_odds_ratio": -0.00031376894912682474, + "logits/chosen": -0.0702032744884491, + "logits/rejected": -0.15273018181324005, + "logps/chosen": -0.0004545687697827816, + "logps/rejected": -1.6560826301574707, + "loss": 0.3266, + "nll_loss": 0.08162922412157059, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5456879888661206e-05, + "rewards/margins": 0.16556280851364136, + "rewards/rejected": -0.1656082570552826, + "step": 14444 + }, + { + "epoch": 9.989626556016598, + "grad_norm": 3.5137791633605957, + "learning_rate": 5.7630244352236056e-08, + "log_odds_chosen": 10.902608871459961, + "log_odds_ratio": -0.00014107978495303541, + "logits/chosen": -0.15415823459625244, + "logits/rejected": -0.16600894927978516, + "logps/chosen": -0.00030136233544908464, + "logps/rejected": -2.6479830741882324, + "loss": 0.2951, + "nll_loss": 0.0737719014286995, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.013623609149363e-05, + "rewards/margins": 0.26476815342903137, + "rewards/rejected": -0.26479828357696533, + "step": 14445 + }, + { + "epoch": 9.990318118948824, + "grad_norm": 2.9514148235321045, + "learning_rate": 5.3788228062086984e-08, + "log_odds_chosen": 11.803411483764648, + "log_odds_ratio": -1.0912965990428347e-05, + "logits/chosen": -0.056035421788692474, + "logits/rejected": -0.1221289113163948, + "logps/chosen": -0.00013051855785306543, + "logps/rejected": -2.565654993057251, + "loss": 0.3331, + "nll_loss": 0.08327168971300125, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3051856512902305e-05, + "rewards/margins": 0.25655242800712585, + "rewards/rejected": -0.25656551122665405, + "step": 14446 + }, + { + "epoch": 9.991009681881051, + "grad_norm": 4.212923049926758, + "learning_rate": 4.994621177193792e-08, + "log_odds_chosen": 9.935128211975098, + "log_odds_ratio": -0.00034432156826369464, + "logits/chosen": -0.46043860912323, + "logits/rejected": -0.47046327590942383, + "logps/chosen": -0.0035317661240696907, + "logps/rejected": -2.0355629920959473, + "loss": 0.3522, + "nll_loss": 0.08801926672458649, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003531765833031386, + "rewards/margins": 0.20320314168930054, + "rewards/rejected": -0.20355631411075592, + "step": 14447 + }, + { + "epoch": 9.991701244813278, + "grad_norm": 4.088373184204102, + "learning_rate": 4.6104195481788846e-08, + "log_odds_chosen": 12.469947814941406, + "log_odds_ratio": -1.4285373254097067e-05, + "logits/chosen": -0.07809141278266907, + "logits/rejected": -0.1154666543006897, + "logps/chosen": -0.00018460096907801926, + "logps/rejected": -3.5400447845458984, + "loss": 0.3694, + "nll_loss": 0.09235501289367676, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8460097635397688e-05, + "rewards/margins": 0.3539860248565674, + "rewards/rejected": -0.35400447249412537, + "step": 14448 + }, + { + "epoch": 9.992392807745505, + "grad_norm": 2.9412035942077637, + "learning_rate": 4.2262179191639774e-08, + "log_odds_chosen": 11.648869514465332, + "log_odds_ratio": -2.198399670305662e-05, + "logits/chosen": -0.9148153066635132, + "logits/rejected": -0.9539530277252197, + "logps/chosen": -0.00016754664829932153, + "logps/rejected": -2.500732421875, + "loss": 0.4408, + "nll_loss": 0.1101963147521019, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6754665921325795e-05, + "rewards/margins": 0.25005650520324707, + "rewards/rejected": -0.25007325410842896, + "step": 14449 + }, + { + "epoch": 9.993084370677732, + "grad_norm": 4.903641700744629, + "learning_rate": 3.84201629014907e-08, + "log_odds_chosen": 10.70071029663086, + "log_odds_ratio": -0.00033215072471648455, + "logits/chosen": -0.1328817903995514, + "logits/rejected": -0.20120401680469513, + "logps/chosen": -0.00018549786182120442, + "logps/rejected": -2.3652138710021973, + "loss": 0.4958, + "nll_loss": 0.12390975654125214, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8549788364907727e-05, + "rewards/margins": 0.23650284111499786, + "rewards/rejected": -0.236521378159523, + "step": 14450 + }, + { + "epoch": 9.993775933609959, + "grad_norm": 4.2302985191345215, + "learning_rate": 3.4578146611341636e-08, + "log_odds_chosen": 10.842260360717773, + "log_odds_ratio": -4.8358084313804284e-05, + "logits/chosen": -0.30342334508895874, + "logits/rejected": -0.3700042963027954, + "logps/chosen": -0.00016310744103975594, + "logps/rejected": -1.9715077877044678, + "loss": 0.5477, + "nll_loss": 0.13690924644470215, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6310745195369236e-05, + "rewards/margins": 0.19713447988033295, + "rewards/rejected": -0.1971507966518402, + "step": 14451 + }, + { + "epoch": 9.994467496542185, + "grad_norm": 2.7045373916625977, + "learning_rate": 3.073613032119256e-08, + "log_odds_chosen": 11.430469512939453, + "log_odds_ratio": -2.6685371267376468e-05, + "logits/chosen": -0.035802312195301056, + "logits/rejected": -0.12288626283407211, + "logps/chosen": -0.00012464431347325444, + "logps/rejected": -2.233880043029785, + "loss": 0.3057, + "nll_loss": 0.07642503082752228, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2464431165426504e-05, + "rewards/margins": 0.22337555885314941, + "rewards/rejected": -0.22338801622390747, + "step": 14452 + }, + { + "epoch": 9.995159059474412, + "grad_norm": 3.083169460296631, + "learning_rate": 2.6894114031043492e-08, + "log_odds_chosen": 10.873865127563477, + "log_odds_ratio": -0.00016370532102882862, + "logits/chosen": -0.3911985158920288, + "logits/rejected": -0.4098545014858246, + "logps/chosen": -0.00013889935507904738, + "logps/rejected": -1.9613475799560547, + "loss": 0.466, + "nll_loss": 0.11647483706474304, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3889934962207917e-05, + "rewards/margins": 0.19612087309360504, + "rewards/rejected": -0.1961347460746765, + "step": 14453 + }, + { + "epoch": 9.995850622406639, + "grad_norm": 2.367278814315796, + "learning_rate": 2.3052097740894423e-08, + "log_odds_chosen": 11.183568000793457, + "log_odds_ratio": -3.5808894608635455e-05, + "logits/chosen": -0.21817639470100403, + "logits/rejected": -0.1968151032924652, + "logps/chosen": -0.0003093630075454712, + "logps/rejected": -2.3852624893188477, + "loss": 0.3001, + "nll_loss": 0.07503216713666916, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.093630220973864e-05, + "rewards/margins": 0.23849530518054962, + "rewards/rejected": -0.23852625489234924, + "step": 14454 + }, + { + "epoch": 9.996542185338866, + "grad_norm": 3.72048020362854, + "learning_rate": 1.921008145074535e-08, + "log_odds_chosen": 12.282805442810059, + "log_odds_ratio": -1.186850386147853e-05, + "logits/chosen": -0.5011136531829834, + "logits/rejected": -0.5989029407501221, + "logps/chosen": -0.00011271586117800325, + "logps/rejected": -2.921992778778076, + "loss": 0.3838, + "nll_loss": 0.09595651924610138, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1271586117800325e-05, + "rewards/margins": 0.29218801856040955, + "rewards/rejected": -0.2921992838382721, + "step": 14455 + }, + { + "epoch": 9.997233748271093, + "grad_norm": 3.6513216495513916, + "learning_rate": 1.536806516059628e-08, + "log_odds_chosen": 12.021650314331055, + "log_odds_ratio": -1.8507635104469955e-05, + "logits/chosen": -0.26748454570770264, + "logits/rejected": -0.48302775621414185, + "logps/chosen": -0.00019425532082095742, + "logps/rejected": -2.976384162902832, + "loss": 0.4706, + "nll_loss": 0.11765141785144806, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9425533537287265e-05, + "rewards/margins": 0.2976189851760864, + "rewards/rejected": -0.2976384162902832, + "step": 14456 + }, + { + "epoch": 9.99792531120332, + "grad_norm": 3.2580089569091797, + "learning_rate": 1.1526048870447212e-08, + "log_odds_chosen": 11.5446195602417, + "log_odds_ratio": -3.38861791533418e-05, + "logits/chosen": -0.2588220536708832, + "logits/rejected": -0.3084482252597809, + "logps/chosen": -0.00026432055165059865, + "logps/rejected": -2.583191394805908, + "loss": 0.3673, + "nll_loss": 0.09181944280862808, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6432056984049268e-05, + "rewards/margins": 0.25829267501831055, + "rewards/rejected": -0.2583191394805908, + "step": 14457 + }, + { + "epoch": 9.998616874135546, + "grad_norm": 2.574554204940796, + "learning_rate": 7.68403258029814e-09, + "log_odds_chosen": 10.828826904296875, + "log_odds_ratio": -6.853970262454823e-05, + "logits/chosen": -0.13539712131023407, + "logits/rejected": -0.27097612619400024, + "logps/chosen": -0.0007511146832257509, + "logps/rejected": -2.2712390422821045, + "loss": 0.2194, + "nll_loss": 0.05484360456466675, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.511146395700052e-05, + "rewards/margins": 0.22704878449440002, + "rewards/rejected": -0.2271239161491394, + "step": 14458 + }, + { + "epoch": 9.999308437067773, + "grad_norm": 2.449514865875244, + "learning_rate": 3.84201629014907e-09, + "log_odds_chosen": 10.208294868469238, + "log_odds_ratio": -0.0011579160345718265, + "logits/chosen": -0.21203842759132385, + "logits/rejected": -0.27797165513038635, + "logps/chosen": -0.002736873459070921, + "logps/rejected": -2.4325757026672363, + "loss": 0.2492, + "nll_loss": 0.062181442975997925, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0002736873284447938, + "rewards/margins": 0.2429838627576828, + "rewards/rejected": -0.2432575672864914, + "step": 14459 + }, + { + "epoch": 10.0, + "grad_norm": 2.7562191486358643, + "learning_rate": 0.0, + "log_odds_chosen": 11.591655731201172, + "log_odds_ratio": -3.828236003755592e-05, + "logits/chosen": -0.5259432196617126, + "logits/rejected": -0.5628337264060974, + "logps/chosen": -0.00029507066938094795, + "logps/rejected": -2.732250452041626, + "loss": 0.337, + "nll_loss": 0.08423374593257904, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9507067665690556e-05, + "rewards/margins": 0.2731955647468567, + "rewards/rejected": -0.2732250690460205, + "step": 14460 + } + ], + "logging_steps": 1, + "max_steps": 14460, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}